rater/rate_limiter/
metrics.rs

1//! This module provides comprehensive performance monitoring and health analysis
2//! for rate limiters. It helps you understand how your rate limiting is performing
3//! and detect when your system is under stress.
4//!
5//! ## Metrics Overview
6//!
7//! ```text
8//!     Metrics Dashboard:
9//!     ┌─────────────────────────────────────┐
10//!     │  Success Rate: 85%                 │
11//!     │  ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓░░░  (85/100)     │
12//!     │                                     │
13//!     │  Token Usage: 70%                   │
14//!     │  ▓▓▓▓▓▓▓▓▓▓▓▓▓▓░░░░░░  (70/100)   │
15//!     │                                     │
16//!     │  Health: ✅ Healthy                 │
17//!     │  Pressure: Low                      │
18//!     │  Max Wait: 1.5ms                    │
19//!     └─────────────────────────────────────┘
20//! ```
21
22use std::fmt;
23
24/// Comprehensive metrics for rate limiter performance analysis.
25///
26/// This struct provides a snapshot of all important rate limiter metrics,
27/// allowing you to monitor performance, detect issues, and make informed
28/// decisions about capacity planning.
29///
30/// ## Key Metrics Explained
31///
32/// ### Success Metrics
33/// - **total_acquired**: Successfully processed requests
34/// - **total_rejected**: Requests that were rate limited
35/// - **success_rate**: Percentage of successful requests
36///
37/// ### Capacity Metrics
38/// - **current_tokens**: Available capacity right now
39/// - **max_tokens**: Maximum possible capacity
40/// - **utilization**: How much of the capacity is being used
41///
42/// ### Pressure Indicators
43/// - **consecutive_rejections**: Recent rejection streak (high = pressure)
44/// - **pressure_ratio**: Overall rejection ratio
45/// - **max_wait_time_ns**: Longest wait observed
46///
47/// ## Example Usage
48///
49/// ```rust
50/// use rater::RateLimiter;
51///
52/// let limiter = RateLimiter::new(100, 10);
53/// // ... use the limiter ...
54///
55/// let metrics = limiter.metrics();
56///
57/// // Check health
58/// if metrics.is_under_pressure() {
59///     println!("⚠️ System under pressure!");
60///     println!("Success rate: {:.1}%", metrics.success_rate() * 100.0);
61/// }
62///
63/// // Display comprehensive report
64/// println!("{}", metrics.summary());
65/// ```
66#[derive(Debug, Clone)]
67pub struct RateLimiterMetrics {
68    /// Total number of tokens successfully acquired.
69    /// This represents the number of allowed requests.
70    pub total_acquired: u64,
71
72    /// Total number of token acquisition attempts that were rejected.
73    /// This represents the number of rate-limited requests.
74    pub total_rejected: u64,
75
76    /// Total number of refill operations performed.
77    /// High numbers indicate the limiter has been active for a while.
78    pub total_refills: u64,
79
80    /// Current number of available tokens in the bucket.
81    /// This is the immediate capacity available.
82    pub current_tokens: u64,
83
84    /// Maximum capacity of the token bucket.
85    /// This is the burst limit configured for the limiter.
86    pub max_tokens: u64,
87
88    /// Number of consecutive rejections without a successful acquisition.
89    /// High values (>10) indicate sustained pressure.
90    pub consecutive_rejections: u32,
91
92    /// Maximum wait time observed in nanoseconds.
93    /// Useful for identifying contention issues.
94    pub max_wait_time_ns: u64,
95
96    /// Ratio of rejected requests to total requests (0.0 to 1.0).
97    /// Values above 0.3 indicate significant pressure.
98    pub pressure_ratio: f64,
99}
100
101impl RateLimiterMetrics {
102    /// Calculates the success rate of token acquisitions.
103    ///
104    /// # Returns
105    ///
106    /// A value between 0.0 and 1.0, where:
107    /// - 1.0 = 100% success (no rejections)
108    /// - 0.5 = 50% success (half rejected)
109    /// - 0.0 = 0% success (all rejected)
110    ///
111    /// # Example
112    ///
113    /// ```rust
114    /// use rater::RateLimiter;
115    ///
116    /// let limiter = RateLimiter::new(100, 10);
117    /// let metrics = limiter.metrics();
118    /// if metrics.success_rate() < 0.8 {
119    ///     println!("Warning: High rejection rate!");
120    /// }
121    /// ```
122    #[inline]
123    pub fn success_rate(&self) -> f64 {
124        let total = self.total_acquired + self.total_rejected;
125        if total == 0 {
126            1.0 // No requests yet, assume success
127        } else {
128            self.total_acquired as f64 / total as f64
129        }
130    }
131
132    /// Calculates the rejection rate (inverse of success rate).
133    ///
134    /// # Returns
135    ///
136    /// A value between 0.0 and 1.0 representing the fraction of rejected requests.
137    #[inline]
138    pub fn rejection_rate(&self) -> f64 {
139        1.0 - self.success_rate()
140    }
141
142    /// Determines if the rate limiter is under immediate pressure.
143    ///
144    /// Immediate pressure means:
145    /// - Success rate below 50%, OR
146    /// - No tokens currently available
147    ///
148    /// # Example
149    ///
150    /// ```rust
151    /// use rater::RateLimiter;
152    ///
153    /// let limiter = RateLimiter::new(100, 10);
154    /// let metrics = limiter.metrics();
155    /// if metrics.is_under_pressure() {
156    ///     // Consider backing off or queueing requests
157    /// }
158    /// ```
159    #[inline]
160    pub fn is_under_pressure(&self) -> bool {
161        self.success_rate() < 0.5 || self.current_tokens == 0
162    }
163
164    /// Calculates the current utilization of the token bucket.
165    ///
166    /// Utilization shows how much of the capacity is being used:
167    /// - 0.0 = Bucket is full (no usage)
168    /// - 0.5 = Half capacity used
169    /// - 1.0 = Bucket is empty (full usage)
170    ///
171    /// # Example
172    ///
173    /// ```rust
174    /// use rater::RateLimiter;
175    ///
176    /// let limiter = RateLimiter::new(100, 10);
177    /// let metrics = limiter.metrics();
178    /// if metrics.utilization() > 0.9 {
179    ///     println!("Running at high utilization!");
180    /// }
181    /// ```
182    #[inline]
183    pub fn utilization(&self) -> f64 {
184        if self.max_tokens == 0 {
185            0.0
186        } else {
187            1.0 - (self.current_tokens as f64 / self.max_tokens as f64)
188        }
189    }
190
191    /// Returns the percentage of available tokens.
192    ///
193    /// This is the inverse of utilization, showing remaining capacity:
194    /// - 100% = Bucket is full
195    /// - 50% = Half capacity available
196    /// - 0% = No tokens available
197    ///
198    /// # Example
199    ///
200    /// ```rust
201    /// use rater::RateLimiter;
202    ///
203    /// let limiter = RateLimiter::new(100, 10);
204    /// let metrics = limiter.metrics();
205    /// println!("Available capacity: {:.1}%", metrics.availability_percentage());
206    /// ```
207    #[inline]
208    pub fn availability_percentage(&self) -> f64 {
209        if self.max_tokens == 0 {
210            0.0
211        } else {
212            (self.current_tokens as f64 / self.max_tokens as f64) * 100.0
213        }
214    }
215
216    /// Determines if the rate limiter is under sustained pressure.
217    ///
218    /// Sustained pressure indicates ongoing high demand that exceeds capacity.
219    /// This is detected when:
220    /// - More than 10 consecutive rejections, OR
221    /// - Overall rejection ratio above 30%
222    ///
223    /// # Example
224    ///
225    /// ```rust
226    /// use rater::RateLimiter;
227    ///
228    /// let limiter = RateLimiter::new(100, 10);
229    /// let metrics = limiter.metrics();
230    /// if metrics.is_under_sustained_pressure() {
231    ///     // Consider scaling up capacity or implementing backpressure
232    ///     println!("System needs intervention!");
233    /// }
234    /// ```
235    #[inline]
236    pub fn is_under_sustained_pressure(&self) -> bool {
237        self.consecutive_rejections > 10 || self.pressure_ratio > 0.3
238    }
239
240    /// Returns the maximum wait time in microseconds.
241    ///
242    /// Converts nanoseconds to microseconds for easier reading.
243    #[inline]
244    pub fn max_wait_time_us(&self) -> f64 {
245        self.max_wait_time_ns as f64 / 1000.0
246    }
247
248    /// Returns the maximum wait time in milliseconds.
249    ///
250    /// Converts nanoseconds to milliseconds for easier reading.
251    ///
252    /// # Example
253    ///
254    /// ```rust
255    /// use rater::RateLimiter;
256    ///
257    /// let limiter = RateLimiter::new(100, 10);
258    /// let metrics = limiter.metrics();
259    /// if metrics.max_wait_time_ms() > 10.0 {
260    ///     println!("High contention detected: {:.2}ms max wait",
261    ///              metrics.max_wait_time_ms());
262    /// }
263    /// ```
264    #[inline]
265    pub fn max_wait_time_ms(&self) -> f64 {
266        self.max_wait_time_ns as f64 / 1_000_000.0
267    }
268
269    /// Returns the total number of requests (acquired + rejected).
270    #[inline]
271    pub fn total_requests(&self) -> u64 {
272        self.total_acquired + self.total_rejected
273    }
274
275    /// Determines the health status of the rate limiter.
276    ///
277    /// Health status provides a quick assessment of the limiter's state:
278    /// - **Healthy**: Operating normally
279    /// - **Degraded**: Under some pressure but functional
280    /// - **Critical**: Severe pressure, intervention needed
281    ///
282    /// # Example
283    ///
284    /// ```rust
285    /// use rater::{HealthStatus, RateLimiter};
286    ///
287    /// let limiter = RateLimiter::new(100, 10);
288    /// let metrics = limiter.metrics();
289    /// match metrics.health_status() {
290    ///     HealthStatus::Healthy => println!("✅ All good"),
291    ///     HealthStatus::Degraded => println!("⚠️ Monitor closely"),
292    ///     HealthStatus::Critical => println!("🔴 Take action!"),
293    /// }
294    /// ```
295    pub fn health_status(&self) -> HealthStatus {
296        if self.is_under_sustained_pressure() {
297            HealthStatus::Critical
298        } else if self.is_under_pressure() {
299            HealthStatus::Degraded
300        } else {
301            HealthStatus::Healthy
302        }
303    }
304
305    /// Generates a human-readable summary of the metrics.
306    ///
307    /// This provides a comprehensive report suitable for logging or display.
308    ///
309    /// # Example Output
310    ///
311    /// ```text
312    /// RateLimiter Metrics:
313    /// ├─ Performance:
314    /// │  ├─ Success Rate: 85.50%
315    /// │  ├─ Rejection Rate: 14.50%
316    /// │  └─ Max Wait Time: 1.234ms
317    /// ├─ Capacity:
318    /// │  ├─ Available Tokens: 75/100
319    /// │  ├─ Utilization: 25.00%
320    /// │  └─ Availability: 75.00%
321    /// └─ Health:
322    ///    ├─ Status: Healthy
323    ///    └─ Under Pressure: false
324    /// ```
325    pub fn summary(&self) -> String {
326        format!(
327            "RateLimiter Metrics:\n\
328             ├─ Performance:\n\
329             │  ├─ Success Rate: {:.2}%\n\
330             │  ├─ Rejection Rate: {:.2}%\n\
331             │  └─ Max Wait Time: {:.3}ms\n\
332             ├─ Capacity:\n\
333             │  ├─ Available Tokens: {}/{}\n\
334             │  ├─ Utilization: {:.2}%\n\
335             │  └─ Availability: {:.2}%\n\
336             ├─ Counters:\n\
337             │  ├─ Total Acquired: {}\n\
338             │  ├─ Total Rejected: {}\n\
339             │  ├─ Total Refills: {}\n\
340             │  └─ Consecutive Rejections: {}\n\
341             └─ Health:\n\
342                ├─ Status: {:?}\n\
343                ├─ Under Pressure: {}\n\
344                └─ Under Sustained Pressure: {}",
345            self.success_rate() * 100.0,
346            self.rejection_rate() * 100.0,
347            self.max_wait_time_ms(),
348            self.current_tokens,
349            self.max_tokens,
350            self.utilization() * 100.0,
351            self.availability_percentage(),
352            self.total_acquired,
353            self.total_rejected,
354            self.total_refills,
355            self.consecutive_rejections,
356            self.health_status(),
357            self.is_under_pressure(),
358            self.is_under_sustained_pressure()
359        )
360    }
361}
362
363impl fmt::Display for RateLimiterMetrics {
364    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
365        write!(f, "{}", self.summary())
366    }
367}
368
369/// Health status indicator for the rate limiter.
370///
371/// Provides a simple three-level assessment of rate limiter health,
372/// making it easy to trigger alerts or take action based on status.
373///
374/// ## Status Levels
375///
376/// ```text
377///     Healthy ──────► Normal operation, plenty of capacity
378///        │
379///     Degraded ─────► Some pressure, monitor closely
380///        │
381///     Critical ─────► Severe pressure, immediate action needed
382/// ```
383///
384/// ## Example Usage
385///
386/// ```rust
387/// use tracing::{error, warn};
388/// use rater::{RateLimiter, HealthStatus};
389///
390/// let limiter = RateLimiter::new(100, 10);
391/// // ... heavy usage ...
392///
393/// let metrics = limiter.metrics();
394/// let health = metrics.health_status();
395///
396/// // Take action based on health
397/// match health {
398///     HealthStatus::Healthy => {
399///         // Normal operation
400///     }
401///     HealthStatus::Degraded => {
402///         // Log warning, consider scaling
403///         warn!("Rate limiter degraded: {}", health.suggested_action());
404///     }
405///     HealthStatus::Critical => {
406///         // Alert on-call, scale immediately
407///         error!("Rate limiter critical: {}", health.suggested_action());
408///     }
409/// }
410/// ```
411#[derive(Debug, Clone, Copy, PartialEq, Eq)]
412pub enum HealthStatus {
413    /// Operating normally with good success rates.
414    ///
415    /// Indicates:
416    /// - Success rate above 50%
417    /// - Tokens available
418    /// - Low rejection count
419    Healthy,
420
421    /// Under some pressure but still functional.
422    ///
423    /// Indicates:
424    /// - Success rate below 50% OR
425    /// - No tokens currently available
426    /// - System can recover if load decreases
427    Degraded,
428
429    /// Under severe pressure, intervention recommended.
430    ///
431    /// Indicates:
432    /// - Sustained high rejection rate (>30%) OR
433    /// - Many consecutive rejections (>10)
434    /// - System needs scaling or load reduction
435    Critical,
436}
437
438impl HealthStatus {
439    /// Returns true if the status indicates any problems.
440    ///
441    /// Useful for simple health checks.
442    ///
443    /// # Example
444    ///
445    /// ```rust
446    /// use rater::RateLimiter;
447    ///
448    /// let rater = RateLimiter::new(100, 10);
449    /// let health  = rater.metrics().health_status();
450    /// if health.is_unhealthy() {
451    ///     // Take corrective action
452    ///     true;
453    /// }
454    /// ```
455    pub fn is_unhealthy(&self) -> bool {
456        !matches!(self, Self::Healthy)
457    }
458
459    /// Returns a suggested action based on the health status.
460    ///
461    /// Provides actionable guidance for operators.
462    ///
463    /// # Example
464    ///
465    /// ```rust
466    /// use rater::RateLimiter;
467    ///
468    /// let rater = RateLimiter::new(100, 10);
469    /// let health  = rater.metrics().health_status();
470    /// println!("Recommendation: {}", health.suggested_action());
471    /// ```
472    pub fn suggested_action(&self) -> &'static str {
473        match self {
474            Self::Healthy => "No action needed",
475            Self::Degraded => "Monitor closely, consider increasing capacity",
476            Self::Critical => "Immediate action required: scale up or reduce load",
477        }
478    }
479}
480
481impl fmt::Display for HealthStatus {
482    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
483        match self {
484            Self::Healthy => write!(f, "✅ Healthy"),
485            Self::Degraded => write!(f, "⚠️ Degraded"),
486            Self::Critical => write!(f, "🔴 Critical"),
487        }
488    }
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494
495    #[test]
496    fn test_metrics_calculations() {
497        let metrics = RateLimiterMetrics {
498            total_acquired: 80,
499            total_rejected: 20,
500            total_refills: 10,
501            current_tokens: 25,
502            max_tokens: 100,
503            consecutive_rejections: 5,
504            max_wait_time_ns: 1_000_000,
505            pressure_ratio: 0.2,
506        };
507
508        assert_eq!(metrics.success_rate(), 0.8);
509        assert_eq!(metrics.utilization(), 0.75);
510        assert!(!metrics.is_under_pressure());
511        assert_eq!(metrics.health_status(), HealthStatus::Healthy);
512    }
513
514    #[test]
515    fn test_health_status() {
516        let metrics = RateLimiterMetrics {
517            total_acquired: 40,
518            total_rejected: 60,
519            total_refills: 10,
520            current_tokens: 0,
521            max_tokens: 100,
522            consecutive_rejections: 15,
523            max_wait_time_ns: 0,
524            pressure_ratio: 0.6,
525        };
526
527        assert!(metrics.is_under_pressure());
528        assert!(metrics.is_under_sustained_pressure());
529        assert_eq!(metrics.health_status(), HealthStatus::Critical);
530    }
531
532    #[test]
533    fn test_edge_cases() {
534        // Test with zero totals
535        let metrics = RateLimiterMetrics {
536            total_acquired: 0,
537            total_rejected: 0,
538            total_refills: 0,
539            current_tokens: 50,
540            max_tokens: 100,
541            consecutive_rejections: 0,
542            max_wait_time_ns: 0,
543            pressure_ratio: 0.0,
544        };
545
546        assert_eq!(metrics.success_rate(), 1.0);
547        assert_eq!(metrics.utilization(), 0.5);
548        assert!(!metrics.is_under_pressure());
549
550        // Test with max_tokens = 0
551        let metrics = RateLimiterMetrics {
552            total_acquired: 0,
553            total_rejected: 0,
554            total_refills: 0,
555            current_tokens: 0,
556            max_tokens: 0,
557            consecutive_rejections: 0,
558            max_wait_time_ns: 0,
559            pressure_ratio: 0.0,
560        };
561
562        assert_eq!(metrics.utilization(), 0.0);
563        assert_eq!(metrics.availability_percentage(), 0.0);
564    }
565    #[test]
566    fn test_health_status_methods() {
567        assert!(!HealthStatus::Healthy.is_unhealthy());
568        assert!(HealthStatus::Degraded.is_unhealthy());
569        assert!(HealthStatus::Critical.is_unhealthy());
570
571        assert_eq!(HealthStatus::Healthy.suggested_action(), "No action needed");
572        assert!(HealthStatus::Degraded
573            .suggested_action()
574            .contains("Monitor"));
575        assert!(HealthStatus::Critical
576            .suggested_action()
577            .contains("Immediate"));
578    }
579
580    #[test]
581    fn test_health_status_display() {
582        let healthy = format!("{}", HealthStatus::Healthy);
583        assert!(healthy.contains("Healthy"));
584
585        let degraded = format!("{}", HealthStatus::Degraded);
586        assert!(degraded.contains("Degraded"));
587
588        let critical = format!("{}", HealthStatus::Critical);
589        assert!(critical.contains("Critical"));
590    }
591
592    #[test]
593    fn test_metrics_display() {
594        let metrics = RateLimiterMetrics {
595            total_acquired: 100,
596            total_rejected: 20,
597            total_refills: 5,
598            current_tokens: 30,
599            max_tokens: 100,
600            consecutive_rejections: 0,
601            max_wait_time_ns: 1_500_000,
602            pressure_ratio: 0.1,
603        };
604
605        let display = format!("{}", metrics);
606        assert!(display.contains("RateLimiter Metrics"));
607        assert!(display.contains("Success Rate"));
608
609        let summary = metrics.summary();
610        assert!(summary.contains("Performance"));
611        assert!(summary.contains("Capacity"));
612        assert!(summary.contains("Health"));
613    }
614
615    #[test]
616    fn test_metrics_time_conversions() {
617        let metrics = RateLimiterMetrics {
618            total_acquired: 0,
619            total_rejected: 0,
620            total_refills: 0,
621            current_tokens: 0,
622            max_tokens: 100,
623            consecutive_rejections: 0,
624            max_wait_time_ns: 1_500_000_000, // 1.5 seconds
625            pressure_ratio: 0.0,
626        };
627
628        assert_eq!(metrics.max_wait_time_us(), 1_500_000.0);
629        assert_eq!(metrics.max_wait_time_ms(), 1_500.0);
630    }
631
632    #[test]
633    fn test_total_requests() {
634        let metrics = RateLimiterMetrics {
635            total_acquired: 75,
636            total_rejected: 25,
637            total_refills: 0,
638            current_tokens: 0,
639            max_tokens: 100,
640            consecutive_rejections: 0,
641            max_wait_time_ns: 0,
642            pressure_ratio: 0.0,
643        };
644
645        assert_eq!(metrics.total_requests(), 100);
646    }
647}