tracing_throttle/
lib.rs

1//! # tracing-throttle
2//!
3//! High-performance log deduplication and rate limiting for the `tracing` ecosystem.
4//!
5//! This crate provides a `tracing::Layer` that suppresses repetitive log events based on
6//! configurable policies. Events are deduplicated by their signature (level, target, and message).
7//! Event field **values** are NOT included in signatures by default - use
8//! `.with_event_fields()` to include specific fields.
9//!
10//!
11//! ## Quick Start
12//!
13//! ```rust,no_run
14//! use tracing_throttle::{TracingRateLimitLayer, Policy};
15//! use tracing_subscriber::prelude::*;
16//! use std::time::Duration;
17//!
18//! // Use sensible defaults: 50 burst capacity, 1 token/sec (60/min), 10k signature limit
19//! let rate_limit = TracingRateLimitLayer::new();
20//!
21//! // Or customize for high-volume applications:
22//! let rate_limit = TracingRateLimitLayer::builder()
23//!     .with_policy(Policy::token_bucket(100.0, 10.0).unwrap())  // 100 burst, 600/min
24//!     .with_max_signatures(50_000)  // Custom limit
25//!     .with_summary_interval(Duration::from_secs(30))
26//!     .build()
27//!     .unwrap();
28//!
29//! // Apply the rate limit as a filter to your fmt layer
30//! tracing_subscriber::registry()
31//!     .with(tracing_subscriber::fmt::layer().with_filter(rate_limit))
32//!     .init();
33//! ```
34//!
35//! ## Features
36//!
37//! ### Rate Limiting Policies
38//! - **Token bucket limiting**: Burst tolerance with smooth recovery (recommended default)
39//! - **Time-window limiting**: Allow K events per time period with natural reset
40//! - **Count-based limiting**: Allow N events, then suppress the rest (no recovery)
41//! - **Exponential backoff**: Emit at exponentially increasing intervals (1st, 2nd, 4th, 8th...)
42//! - **Custom policies**: Implement your own rate limiting logic
43//!
44//! ### Eviction Strategies
45//! - **LRU eviction**: Evict least recently used signatures (default)
46//! - **Priority-based**: Custom priority functions to keep important events (ERROR over INFO)
47//! - **Memory-based**: Enforce byte limits with automatic memory tracking
48//! - **Combined**: Use both priority and memory constraints together
49//!
50//! ### Other Features
51//! - **Per-signature throttling**: Different messages are throttled independently
52//! - **Observability metrics**: Built-in tracking of allowed, suppressed, and evicted events
53//! - **Fail-safe circuit breaker**: Fails open during errors to preserve observability
54//!
55//! ## Event Signatures
56//!
57//! Events are deduplicated based on their **signature**. By default, signatures include:
58//! - Event level (INFO, WARN, ERROR, etc.)
59//! - Target (module path)
60//! - Message text
61//!
62//! **Event field VALUES are NOT included by default.** This means:
63//!
64//! ```rust,no_run
65//! # use tracing::info;
66//! info!(user_id = 1, "Login");  // Signature: (INFO, target, "Login")
67//! info!(user_id = 2, "Login");  // SAME signature - will be rate limited together!
68//! ```
69//!
70//! To rate-limit events per field value, use `.with_event_fields()`:
71//!
72//! ```rust,no_run
73//! # use tracing_throttle::TracingRateLimitLayer;
74//! let layer = TracingRateLimitLayer::builder()
75//!     .with_event_fields(vec!["user_id".to_string()])  // Include user_id in signature
76//!     .build()
77//!     .unwrap();
78//! ```
79//!
80//! Now each user_id gets its own rate limit:
81//!
82//! ```rust,no_run
83//! # use tracing::info;
84//! info!(user_id = 1, "Login");  // Signature: (INFO, target, "Login", user_id=1)
85//! info!(user_id = 2, "Login");  // Signature: (INFO, target, "Login", user_id=2)
86//! ```
87//!
88//! **See `tests/event_fields.rs` for complete examples.**
89//!
90//! ## Observability
91//!
92//! Monitor rate limiting behavior with built-in metrics:
93//!
94//! ```rust,no_run
95//! # use tracing_throttle::{TracingRateLimitLayer, Policy};
96//! # let rate_limit = TracingRateLimitLayer::builder()
97//! #     .with_policy(Policy::count_based(100).unwrap())
98//! #     .build()
99//! #     .unwrap();
100//! // Get current metrics
101//! let metrics = rate_limit.metrics();
102//! println!("Events allowed: {}", metrics.events_allowed());
103//! println!("Events suppressed: {}", metrics.events_suppressed());
104//! println!("Signatures evicted: {}", metrics.signatures_evicted());
105//!
106//! // Get snapshot for calculations
107//! let snapshot = metrics.snapshot();
108//! println!("Suppression rate: {:.2}%", snapshot.suppression_rate() * 100.0);
109//! ```
110//!
111//! ## Eviction Strategies
112//!
113//! Control which event signatures are kept when storage limits are reached:
114//!
115//! ### LRU (Default)
116//!
117//! ```rust,no_run
118//! # use tracing_throttle::TracingRateLimitLayer;
119//! let layer = TracingRateLimitLayer::builder()
120//!     .with_max_signatures(10_000)  // Uses LRU eviction by default
121//!     .build()
122//!     .unwrap();
123//! ```
124//!
125//! ### Priority-Based
126//!
127//! Keep important events (ERROR) over less important ones (INFO):
128//!
129//! ```rust,no_run
130//! # use tracing_throttle::{TracingRateLimitLayer, EvictionStrategy};
131//! # use std::sync::Arc;
132//! let layer = TracingRateLimitLayer::builder()
133//!     .with_max_signatures(5_000)
134//!     .with_eviction_strategy(EvictionStrategy::Priority {
135//!         max_entries: 5_000,
136//!         priority_fn: Arc::new(|_sig, state| {
137//!             match state.metadata.as_ref().map(|m| m.level.as_str()) {
138//!                 Some("ERROR") => 100,
139//!                 Some("WARN") => 50,
140//!                 Some("INFO") => 10,
141//!                 _ => 5,
142//!             }
143//!         }),
144//!     })
145//!     .build()
146//!     .unwrap();
147//! ```
148//!
149//! ### Memory-Based
150//!
151//! Enforce memory limits with automatic tracking:
152//!
153//! ```rust,no_run
154//! # use tracing_throttle::{TracingRateLimitLayer, EvictionStrategy};
155//! let layer = TracingRateLimitLayer::builder()
156//!     .with_eviction_strategy(EvictionStrategy::Memory {
157//!         max_bytes: 5 * 1024 * 1024,  // 5MB limit
158//!     })
159//!     .build()
160//!     .unwrap();
161//! ```
162//!
163//! ### Combined
164//!
165//! Use both priority and memory constraints:
166//!
167//! ```rust,no_run
168//! # use tracing_throttle::{TracingRateLimitLayer, EvictionStrategy};
169//! # use std::sync::Arc;
170//! let layer = TracingRateLimitLayer::builder()
171//!     .with_eviction_strategy(EvictionStrategy::PriorityWithMemory {
172//!         max_entries: 10_000,
173//!         priority_fn: Arc::new(|_sig, state| {
174//!             match state.metadata.as_ref().map(|m| m.level.as_str()) {
175//!                 Some("ERROR") => 100,
176//!                 _ => 10,
177//!             }
178//!         }),
179//!         max_bytes: 10 * 1024 * 1024,
180//!     })
181//!     .build()
182//!     .unwrap();
183//! ```
184//!
185//! See `examples/eviction.rs` for complete working examples.
186//!
187//! ## Fail-Safe Operation
188//!
189//! The library uses a circuit breaker to fail open during errors, preserving
190//! observability over strict rate limiting:
191//!
192//! ```rust,no_run
193//! # use tracing_throttle::{TracingRateLimitLayer, CircuitState};
194//! # let rate_limit = TracingRateLimitLayer::new();
195//! // Check circuit breaker state
196//! let cb = rate_limit.circuit_breaker();
197//! match cb.state() {
198//!     CircuitState::Closed => println!("Normal operation"),
199//!     CircuitState::Open => println!("Failing open - allowing all events"),
200//!     CircuitState::HalfOpen => println!("Testing recovery"),
201//! }
202//! ```
203//!
204//! ## Memory Management
205//!
206//! By default, tracks up to 10,000 unique event signatures with LRU eviction.
207//! Each signature uses approximately 200-400 bytes (includes event metadata for summaries).
208//!
209//! **Typical memory usage:**
210//! - 10,000 signatures (default): ~2-4 MB
211//! - 50,000 signatures: ~10-20 MB
212//! - 100,000 signatures: ~20-40 MB
213//!
214//! **Configuration:**
215//! ```rust,no_run
216//! # use tracing_throttle::TracingRateLimitLayer;
217//! // Increase limit for high-cardinality applications
218//! let rate_limit = TracingRateLimitLayer::builder()
219//!     .with_max_signatures(50_000)
220//!     .build()
221//!     .unwrap();
222//!
223//! // Monitor usage
224//! let sig_count = rate_limit.signature_count();
225//! let evictions = rate_limit.metrics().signatures_evicted();
226//! ```
227//!
228//! ### Memory Usage Breakdown
229//!
230//! Each tracked signature consumes memory for:
231//!
232//! ```text
233//! Per-Signature Memory:
234//! ├─ EventSignature (hash key)      ~32 bytes  (u64 hash)
235//! ├─ EventState (value)              ~170-370 bytes
236//! │  ├─ Policy state                 ~40-80 bytes (depends on policy type)
237//! │  ├─ SuppressionCounter           ~40 bytes (atomic counters + timestamp)
238//! │  ├─ EventMetadata (Optional)     ~50-200 bytes (level, message, target, fields)
239//! │  │  ├─ Level string              ~8 bytes
240//! │  │  ├─ Message string            ~20-100 bytes (depends on message length)
241//! │  │  ├─ Target string             ~20-50 bytes (module path)
242//! │  │  └─ Fields (BTreeMap)         ~0-50 bytes (depends on field count)
243//! │  └─ Metadata overhead            ~40 bytes (DashMap internals)
244//! └─ Total per signature             ~200-400 bytes (varies with policy & message length)
245//! ```
246//!
247//! **Estimated memory usage at different signature limits:**
248//!
249//! | Signatures | Memory (typical) | Memory (worst case) | Use Case |
250//! |------------|------------------|---------------------|----------|
251//! | 1,000      | ~200 KB          | ~400 KB             | Small apps, few event types |
252//! | 10,000 (default) | ~2 MB      | ~4 MB               | Most applications |
253//! | 50,000     | ~10 MB           | ~20 MB              | High-cardinality apps |
254//! | 100,000    | ~20 MB           | ~40 MB              | Very large systems |
255//!
256//! **Additional overhead:**
257//! - Metrics: ~100 bytes (atomic counters)
258//! - Circuit breaker: ~200 bytes (state tracking)
259//! - Layer structure: ~500 bytes
260//! - **Total fixed overhead: ~800 bytes**
261//!
262//! ### Signature Cardinality Analysis
263//!
264//! **What affects signature cardinality?**
265//!
266//! By default, signatures are computed from `(level, target, message)` only.
267//! Field values are NOT included unless configured with `.with_event_fields()`.
268//!
269//! ```rust,no_run
270//! # use tracing::info;
271//! // Low cardinality (good) - same signature for all occurrences
272//! info!("User login successful");  // Always same signature
273//! info!(user_id = 123, "User login");  // SAME signature (user_id not included by default)
274//!
275//! // Medium cardinality - if you configure .with_event_fields(vec!["user_id".to_string()])
276//! # let id = 123;
277//! info!(user_id = %id, "User login");  // One signature per unique user_id
278//!
279//! // High cardinality (danger) - if you configure .with_event_fields(vec!["request_id".to_string()])
280//! # let uuid = "abc";
281//! info!(request_id = %uuid, "Processing");  // New signature every time!
282//! ```
283//!
284//! **Cardinality examples:**
285//!
286//! | Pattern | Config | Unique Signatures | Memory Impact |
287//! |---------|--------|-------------------|---------------|
288//! | Static messages only | Default | ~10-100 | Minimal (~10 KB) |
289//! | Messages with fields | Default (fields ignored) | ~10-100 | Minimal (~10 KB) |
290//! | `.with_event_fields(["user_id"])` | Stable IDs | ~1,000-10,000 | Low (1-2 MB) |
291//! | `.with_event_fields(["session_id"])` | Session IDs | ~10,000-100,000 | Medium (10-25 MB) |
292//! | `.with_event_fields(["request_id"])` | UUIDs | Unbounded | **High risk** |
293//!
294//! **How to estimate your cardinality:**
295//!
296//! 1. **Count unique log templates** in your codebase
297//! 2. **Multiply by field cardinality** (unique values per field)
298//! 3. **Example calculation:**
299//!    - 50 unique log messages
300//!    - 10 severity levels used
301//!    - Average 20 unique user IDs per message
302//!    - **Estimated: 50 × 20 = 1,000 signatures** (✓ well below default)
303//!
304//! ### Configuration Guidelines
305//!
306//! **When to use the default (10k signatures):**
307//! - ✅ Most applications with structured logging
308//! - ✅ Log messages use stable identifiers (user_id, tenant_id, service_name)
309//! - ✅ You're unsure about cardinality
310//! - ✅ Memory is not severely constrained
311//!
312//! **When to increase the limit:**
313//!
314//! ```rust,no_run
315//! # use tracing_throttle::TracingRateLimitLayer;
316//! let rate_limit = TracingRateLimitLayer::builder()
317//!     .with_max_signatures(50_000)  // 5-10 MB overhead
318//!     .build()
319//!     .expect("valid config");
320//! ```
321//!
322//! - ✅ High log volume with many unique event types (>10k)
323//! - ✅ Large distributed system with many services/endpoints
324//! - ✅ You've measured cardinality and need more capacity
325//! - ✅ Memory is available (10+ MB is acceptable)
326//!
327//! **When to use unlimited signatures:**
328//!
329//! ```rust,no_run
330//! # use tracing_throttle::TracingRateLimitLayer;
331//! let rate_limit = TracingRateLimitLayer::builder()
332//!     .with_unlimited_signatures()  // ⚠️ Unbounded memory growth
333//!     .build()
334//!     .expect("valid config");
335//! ```
336//!
337//! - ⚠️ **Use with extreme caution** - can cause unbounded memory growth
338//! - ✅ Controlled environments (short-lived processes, tests)
339//! - ✅ Known bounded cardinality with monitoring in place
340//! - ✅ Memory constraints are not a concern
341//! - ❌ **Never use** if logging includes UUIDs, timestamps, or other high-cardinality data
342//!
343//! ### Monitoring Memory Usage
344//!
345//! **Check signature count in production:**
346//!
347//! ```rust,no_run
348//! # use tracing_throttle::TracingRateLimitLayer;
349//! # use tracing::warn;
350//! # let rate_limit = TracingRateLimitLayer::new();
351//! // In a periodic health check or metrics reporter:
352//! let sig_count = rate_limit.signature_count();
353//! let evictions = rate_limit.metrics().signatures_evicted();
354//!
355//! if sig_count > 8000 {
356//!     warn!("Approaching signature limit: {}/10000", sig_count);
357//! }
358//!
359//! if evictions > 1000 {
360//!     warn!("High eviction rate: {} signatures evicted", evictions);
361//! }
362//! ```
363//!
364//! **Integrate with memory profilers:**
365//!
366//! ```bash
367//! # Use Valgrind Massif for heap profiling
368//! valgrind --tool=massif --massif-out-file=massif.out ./your-app
369//!
370//! # Analyze with ms_print
371//! ms_print massif.out
372//!
373//! # Look for DashMap and EventState allocations
374//! ```
375//!
376//! **Signs you need to adjust signature limits:**
377//!
378//! | Symptom | Likely Cause | Action |
379//! |---------|--------------|--------|
380//! | High eviction rate (>1000/min) | Cardinality > limit | Increase `max_signatures` |
381//! | Memory growth over time | Unbounded cardinality | Fix logging (remove UUIDs), add limit |
382//! | Low signature count (<100) | Over-provisioned | Can reduce limit safely |
383//! | Frequent evictions + suppression | Limit too low | Increase limit or reduce cardinality |
384
385// Domain layer - pure business logic
386pub mod domain;
387
388// Application layer - orchestration
389pub mod application;
390
391// Infrastructure layer - external adapters
392pub mod infrastructure;
393
394// Re-export commonly used types for convenience
395pub use domain::{
396    policy::{
397        CountBasedPolicy, ExponentialBackoffPolicy, Policy, PolicyDecision, PolicyError,
398        RateLimitPolicy, TimeWindowPolicy, TokenBucketPolicy,
399    },
400    signature::EventSignature,
401    summary::{SuppressionCounter, SuppressionSummary},
402};
403
404pub use application::{
405    circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitState},
406    emitter::EmitterConfigError,
407    limiter::RateLimiter,
408    metrics::{Metrics, MetricsSnapshot},
409    ports::{Clock, EvictionCandidate, EvictionPolicy, Storage},
410    registry::SuppressionRegistry,
411};
412
413#[cfg(feature = "async")]
414pub use application::emitter::{EmitterHandle, ShutdownError};
415
416pub use infrastructure::{
417    clock::SystemClock,
418    eviction::{
419        LruEviction, MemoryEviction, PriorityEviction, PriorityFn, PriorityWithMemoryEviction,
420    },
421    layer::{BuildError, EvictionStrategy, TracingRateLimitLayer, TracingRateLimitLayerBuilder},
422    storage::ShardedStorage,
423};
424
425#[cfg(feature = "async")]
426pub use infrastructure::layer::SummaryFormatter;
427
428#[cfg(feature = "redis-storage")]
429pub use infrastructure::redis_storage::{RedisStorage, RedisStorageConfig};