tracing_throttle/lib.rs
1//! # tracing-throttle
2//!
3//! High-performance log deduplication and rate limiting for the `tracing` ecosystem.
4//!
5//! This crate provides a `tracing::Layer` that suppresses repetitive log events based on
6//! configurable policies. Events are deduplicated by their signature (level, target, and message).
7//! Event field **values** are NOT included in signatures by default - use
8//! `.with_event_fields()` to include specific fields.
9//!
10//!
11//! ## Quick Start
12//!
13//! ```rust,no_run
14//! use tracing_throttle::{TracingRateLimitLayer, Policy};
15//! use tracing_subscriber::prelude::*;
16//! use std::time::Duration;
17//!
18//! // Use sensible defaults: 50 burst capacity, 1 token/sec (60/min), 10k signature limit
19//! let rate_limit = TracingRateLimitLayer::new();
20//!
21//! // Or customize for high-volume applications:
22//! let rate_limit = TracingRateLimitLayer::builder()
23//! .with_policy(Policy::token_bucket(100.0, 10.0).unwrap()) // 100 burst, 600/min
24//! .with_max_signatures(50_000) // Custom limit
25//! .with_summary_interval(Duration::from_secs(30))
26//! .build()
27//! .unwrap();
28//!
29//! // Apply the rate limit as a filter to your fmt layer
30//! tracing_subscriber::registry()
31//! .with(tracing_subscriber::fmt::layer().with_filter(rate_limit))
32//! .init();
33//! ```
34//!
35//! ## Event Signatures
36//!
37//! Events are deduplicated based on their **signature**. By default, signatures include:
38//! - Event level (INFO, WARN, ERROR, etc.)
39//! - Target (module path)
40//! - Message text
41//!
42//! **Event field VALUES are NOT included by default.** This means:
43//!
44//! ```rust,no_run
45//! # use tracing::info;
46//! info!(user_id = 1, "Login"); // Signature: (INFO, target, "Login")
47//! info!(user_id = 2, "Login"); // SAME signature - will be rate limited together!
48//! ```
49//!
50//! To rate-limit events per field value, use `.with_event_fields()`:
51//!
52//! ```rust,no_run
53//! # use tracing_throttle::TracingRateLimitLayer;
54//! let layer = TracingRateLimitLayer::builder()
55//! .with_event_fields(vec!["user_id".to_string()]) // Include user_id in signature
56//! .build()
57//! .unwrap();
58//! ```
59//!
60//! Now each user_id gets its own rate limit:
61//!
62//! ```rust,no_run
63//! # use tracing::info;
64//! info!(user_id = 1, "Login"); // Signature: (INFO, target, "Login", user_id=1)
65//! info!(user_id = 2, "Login"); // Signature: (INFO, target, "Login", user_id=2)
66//! ```
67//!
68//! **See `tests/event_fields.rs` for complete examples.**
69//!
70//! ## Features
71//!
72//! - **Token bucket limiting**: Burst tolerance with smooth recovery (recommended default)
73//! - **Time-window limiting**: Allow K events per time period with natural reset
74//! - **Count-based limiting**: Allow N events, then suppress the rest (no recovery)
75//! - **Exponential backoff**: Emit at exponentially increasing intervals (1st, 2nd, 4th, 8th...)
76//! - **Custom policies**: Implement your own rate limiting logic
77//! - **Per-signature throttling**: Different messages are throttled independently
78//! - **LRU eviction**: Optional memory limits with automatic eviction of least recently used signatures
79//! - **Observability metrics**: Built-in tracking of allowed, suppressed, and evicted events
80//! - **Fail-safe circuit breaker**: Fails open during errors to preserve observability
81//!
82//! ## Observability
83//!
84//! Monitor rate limiting behavior with built-in metrics:
85//!
86//! ```rust,no_run
87//! # use tracing_throttle::{TracingRateLimitLayer, Policy};
88//! # let rate_limit = TracingRateLimitLayer::builder()
89//! # .with_policy(Policy::count_based(100).unwrap())
90//! # .build()
91//! # .unwrap();
92//! // Get current metrics
93//! let metrics = rate_limit.metrics();
94//! println!("Events allowed: {}", metrics.events_allowed());
95//! println!("Events suppressed: {}", metrics.events_suppressed());
96//! println!("Signatures evicted: {}", metrics.signatures_evicted());
97//!
98//! // Get snapshot for calculations
99//! let snapshot = metrics.snapshot();
100//! println!("Suppression rate: {:.2}%", snapshot.suppression_rate() * 100.0);
101//! ```
102//!
103//! ## Fail-Safe Operation
104//!
105//! The library uses a circuit breaker to fail open during errors, preserving
106//! observability over strict rate limiting:
107//!
108//! ```rust,no_run
109//! # use tracing_throttle::{TracingRateLimitLayer, CircuitState};
110//! # let rate_limit = TracingRateLimitLayer::new();
111//! // Check circuit breaker state
112//! let cb = rate_limit.circuit_breaker();
113//! match cb.state() {
114//! CircuitState::Closed => println!("Normal operation"),
115//! CircuitState::Open => println!("Failing open - allowing all events"),
116//! CircuitState::HalfOpen => println!("Testing recovery"),
117//! }
118//! ```
119//!
120//! ## Memory Management
121//!
122//! By default, tracks up to 10,000 unique event signatures with LRU eviction.
123//! Each signature uses approximately 150-250 bytes.
124//!
125//! **Typical memory usage:**
126//! - 10,000 signatures (default): ~1.5-2.5 MB
127//! - 50,000 signatures: ~7.5-12.5 MB
128//! - 100,000 signatures: ~15-25 MB
129//!
130//! **Configuration:**
131//! ```rust,no_run
132//! # use tracing_throttle::TracingRateLimitLayer;
133//! // Increase limit for high-cardinality applications
134//! let rate_limit = TracingRateLimitLayer::builder()
135//! .with_max_signatures(50_000)
136//! .build()
137//! .unwrap();
138//!
139//! // Monitor usage
140//! let sig_count = rate_limit.signature_count();
141//! let evictions = rate_limit.metrics().signatures_evicted();
142//! ```
143//!
144//! ### Memory Usage Breakdown
145//!
146//! Each tracked signature consumes memory for:
147//!
148//! ```text
149//! Per-Signature Memory:
150//! ├─ EventSignature (hash key) ~32 bytes (u64 hash)
151//! ├─ EventState (value) ~120-200 bytes
152//! │ ├─ Policy state ~40-80 bytes (depends on policy type)
153//! │ ├─ SuppressionCounter ~40 bytes (atomic counters + timestamp)
154//! │ └─ Metadata overhead ~40 bytes (DashMap internals)
155//! └─ Total per signature ~150-250 bytes (varies with policy)
156//! ```
157//!
158//! **Estimated memory usage at different signature limits:**
159//!
160//! | Signatures | Memory (typical) | Memory (worst case) | Use Case |
161//! |------------|------------------|---------------------|----------|
162//! | 1,000 | ~150 KB | ~250 KB | Small apps, few event types |
163//! | 10,000 (default) | ~1.5 MB | ~2.5 MB | Most applications |
164//! | 50,000 | ~7.5 MB | ~12.5 MB | High-cardinality apps |
165//! | 100,000 | ~15 MB | ~25 MB | Very large systems |
166//!
167//! **Additional overhead:**
168//! - Metrics: ~100 bytes (atomic counters)
169//! - Circuit breaker: ~200 bytes (state tracking)
170//! - Layer structure: ~500 bytes
171//! - **Total fixed overhead: ~800 bytes**
172//!
173//! ### Signature Cardinality Analysis
174//!
175//! **What affects signature cardinality?**
176//!
177//! By default, signatures are computed from `(level, target, message)` only.
178//! Field values are NOT included unless configured with `.with_event_fields()`.
179//!
180//! ```rust,no_run
181//! # use tracing::info;
182//! // Low cardinality (good) - same signature for all occurrences
183//! info!("User login successful"); // Always same signature
184//! info!(user_id = 123, "User login"); // SAME signature (user_id not included by default)
185//!
186//! // Medium cardinality - if you configure .with_event_fields(vec!["user_id".to_string()])
187//! # let id = 123;
188//! info!(user_id = %id, "User login"); // One signature per unique user_id
189//!
190//! // High cardinality (danger) - if you configure .with_event_fields(vec!["request_id".to_string()])
191//! # let uuid = "abc";
192//! info!(request_id = %uuid, "Processing"); // New signature every time!
193//! ```
194//!
195//! **Cardinality examples:**
196//!
197//! | Pattern | Config | Unique Signatures | Memory Impact |
198//! |---------|--------|-------------------|---------------|
199//! | Static messages only | Default | ~10-100 | Minimal (~10 KB) |
200//! | Messages with fields | Default (fields ignored) | ~10-100 | Minimal (~10 KB) |
201//! | `.with_event_fields(["user_id"])` | Stable IDs | ~1,000-10,000 | Low (1-2 MB) |
202//! | `.with_event_fields(["session_id"])` | Session IDs | ~10,000-100,000 | Medium (10-25 MB) |
203//! | `.with_event_fields(["request_id"])` | UUIDs | Unbounded | **High risk** |
204//!
205//! **How to estimate your cardinality:**
206//!
207//! 1. **Count unique log templates** in your codebase
208//! 2. **Multiply by field cardinality** (unique values per field)
209//! 3. **Example calculation:**
210//! - 50 unique log messages
211//! - 10 severity levels used
212//! - Average 20 unique user IDs per message
213//! - **Estimated: 50 × 20 = 1,000 signatures** (✓ well below default)
214//!
215//! ### Configuration Guidelines
216//!
217//! **When to use the default (10k signatures):**
218//! - ✅ Most applications with structured logging
219//! - ✅ Log messages use stable identifiers (user_id, tenant_id, service_name)
220//! - ✅ You're unsure about cardinality
221//! - ✅ Memory is not severely constrained
222//!
223//! **When to increase the limit:**
224//!
225//! ```rust,no_run
226//! # use tracing_throttle::TracingRateLimitLayer;
227//! let rate_limit = TracingRateLimitLayer::builder()
228//! .with_max_signatures(50_000) // 5-10 MB overhead
229//! .build()
230//! .expect("valid config");
231//! ```
232//!
233//! - ✅ High log volume with many unique event types (>10k)
234//! - ✅ Large distributed system with many services/endpoints
235//! - ✅ You've measured cardinality and need more capacity
236//! - ✅ Memory is available (10+ MB is acceptable)
237//!
238//! **When to use unlimited signatures:**
239//!
240//! ```rust,no_run
241//! # use tracing_throttle::TracingRateLimitLayer;
242//! let rate_limit = TracingRateLimitLayer::builder()
243//! .with_unlimited_signatures() // ⚠️ Unbounded memory growth
244//! .build()
245//! .expect("valid config");
246//! ```
247//!
248//! - ⚠️ **Use with extreme caution** - can cause unbounded memory growth
249//! - ✅ Controlled environments (short-lived processes, tests)
250//! - ✅ Known bounded cardinality with monitoring in place
251//! - ✅ Memory constraints are not a concern
252//! - ❌ **Never use** if logging includes UUIDs, timestamps, or other high-cardinality data
253//!
254//! ### Monitoring Memory Usage
255//!
256//! **Check signature count in production:**
257//!
258//! ```rust,no_run
259//! # use tracing_throttle::TracingRateLimitLayer;
260//! # use tracing::warn;
261//! # let rate_limit = TracingRateLimitLayer::new();
262//! // In a periodic health check or metrics reporter:
263//! let sig_count = rate_limit.signature_count();
264//! let evictions = rate_limit.metrics().signatures_evicted();
265//!
266//! if sig_count > 8000 {
267//! warn!("Approaching signature limit: {}/10000", sig_count);
268//! }
269//!
270//! if evictions > 1000 {
271//! warn!("High eviction rate: {} signatures evicted", evictions);
272//! }
273//! ```
274//!
275//! **Integrate with memory profilers:**
276//!
277//! ```bash
278//! # Use Valgrind Massif for heap profiling
279//! valgrind --tool=massif --massif-out-file=massif.out ./your-app
280//!
281//! # Analyze with ms_print
282//! ms_print massif.out
283//!
284//! # Look for DashMap and EventState allocations
285//! ```
286//!
287//! **Signs you need to adjust signature limits:**
288//!
289//! | Symptom | Likely Cause | Action |
290//! |---------|--------------|--------|
291//! | High eviction rate (>1000/min) | Cardinality > limit | Increase `max_signatures` |
292//! | Memory growth over time | Unbounded cardinality | Fix logging (remove UUIDs), add limit |
293//! | Low signature count (<100) | Over-provisioned | Can reduce limit safely |
294//! | Frequent evictions + suppression | Limit too low | Increase limit or reduce cardinality |
295
296// Domain layer - pure business logic
297pub mod domain;
298
299// Application layer - orchestration
300pub mod application;
301
302// Infrastructure layer - external adapters
303pub mod infrastructure;
304
305// Re-export commonly used types for convenience
306pub use domain::{
307 policy::{
308 CountBasedPolicy, ExponentialBackoffPolicy, Policy, PolicyDecision, PolicyError,
309 RateLimitPolicy, TimeWindowPolicy, TokenBucketPolicy,
310 },
311 signature::EventSignature,
312 summary::{SuppressionCounter, SuppressionSummary},
313};
314
315pub use application::{
316 circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitState},
317 emitter::EmitterConfigError,
318 limiter::RateLimiter,
319 metrics::{Metrics, MetricsSnapshot},
320 ports::{Clock, Storage},
321 registry::SuppressionRegistry,
322};
323
324#[cfg(feature = "async")]
325pub use application::emitter::{EmitterHandle, ShutdownError};
326
327pub use infrastructure::{
328 clock::SystemClock,
329 layer::{BuildError, TracingRateLimitLayer, TracingRateLimitLayerBuilder},
330 storage::ShardedStorage,
331};
332
333#[cfg(feature = "async")]
334pub use infrastructure::layer::SummaryFormatter;
335
336#[cfg(feature = "redis-storage")]
337pub use infrastructure::redis_storage::{RedisStorage, RedisStorageConfig};