oxify_authz/
metrics.rs

1//! Performance Metrics Tracking
2//!
3//! Track and export performance metrics for the authorization engine.
4//!
5//! ## Features
6//!
7//! - Request latency tracking (p50, p95, p99)
8//! - Cache hit rate monitoring
9//! - Throughput measurement
10//! - Error rate tracking
11//! - Export metrics in JSON format for CI integration
12//!
13//! ## Example
14//!
15//! ```rust
16//! use oxify_authz::metrics::{PerformanceMetrics, MetricsSnapshot};
17//!
18//! let metrics = PerformanceMetrics::new();
19//!
20//! // Record a request
21//! metrics.record_check_latency(125); // 125 microseconds
22//! metrics.record_cache_hit(true);
23//!
24//! // Get snapshot
25//! let snapshot = metrics.snapshot();
26//! println!("Cache hit rate: {:.2}%", snapshot.cache_hit_rate() * 100.0);
27//! ```
28
29use serde::{Deserialize, Serialize};
30use std::sync::atomic::{AtomicU64, Ordering};
31use std::sync::Arc;
32use std::time::Instant;
33
34/// Performance metrics for the authorization engine
35#[derive(Clone)]
36pub struct PerformanceMetrics {
37    inner: Arc<MetricsInner>,
38}
39
40struct MetricsInner {
41    // Request counts
42    total_checks: AtomicU64,
43    successful_checks: AtomicU64,
44    failed_checks: AtomicU64,
45
46    // Cache metrics
47    cache_hits: AtomicU64,
48    cache_misses: AtomicU64,
49
50    // Latency tracking (in microseconds)
51    total_latency_us: AtomicU64,
52    min_latency_us: AtomicU64,
53    max_latency_us: AtomicU64,
54
55    // Delegation metrics
56    delegation_checks: AtomicU64,
57    delegation_grants: AtomicU64,
58
59    // Tenant metrics
60    tenant_quota_exceeded: AtomicU64,
61
62    // Start time for throughput calculation
63    start_time: Instant,
64}
65
66impl PerformanceMetrics {
67    /// Create a new performance metrics tracker
68    pub fn new() -> Self {
69        Self {
70            inner: Arc::new(MetricsInner {
71                total_checks: AtomicU64::new(0),
72                successful_checks: AtomicU64::new(0),
73                failed_checks: AtomicU64::new(0),
74                cache_hits: AtomicU64::new(0),
75                cache_misses: AtomicU64::new(0),
76                total_latency_us: AtomicU64::new(0),
77                min_latency_us: AtomicU64::new(u64::MAX),
78                max_latency_us: AtomicU64::new(0),
79                delegation_checks: AtomicU64::new(0),
80                delegation_grants: AtomicU64::new(0),
81                tenant_quota_exceeded: AtomicU64::new(0),
82                start_time: Instant::now(),
83            }),
84        }
85    }
86
87    /// Record a permission check latency (in microseconds)
88    pub fn record_check_latency(&self, latency_us: u64) {
89        self.inner.total_checks.fetch_add(1, Ordering::Relaxed);
90        self.inner
91            .total_latency_us
92            .fetch_add(latency_us, Ordering::Relaxed);
93
94        // Update min latency
95        let mut current_min = self.inner.min_latency_us.load(Ordering::Relaxed);
96        while latency_us < current_min {
97            match self.inner.min_latency_us.compare_exchange_weak(
98                current_min,
99                latency_us,
100                Ordering::Relaxed,
101                Ordering::Relaxed,
102            ) {
103                Ok(_) => break,
104                Err(x) => current_min = x,
105            }
106        }
107
108        // Update max latency
109        let mut current_max = self.inner.max_latency_us.load(Ordering::Relaxed);
110        while latency_us > current_max {
111            match self.inner.max_latency_us.compare_exchange_weak(
112                current_max,
113                latency_us,
114                Ordering::Relaxed,
115                Ordering::Relaxed,
116            ) {
117                Ok(_) => break,
118                Err(x) => current_max = x,
119            }
120        }
121    }
122
123    /// Record a successful permission check
124    pub fn record_check_success(&self) {
125        self.inner.successful_checks.fetch_add(1, Ordering::Relaxed);
126    }
127
128    /// Record a failed permission check
129    pub fn record_check_failure(&self) {
130        self.inner.failed_checks.fetch_add(1, Ordering::Relaxed);
131    }
132
133    /// Record a cache hit or miss
134    pub fn record_cache_hit(&self, hit: bool) {
135        if hit {
136            self.inner.cache_hits.fetch_add(1, Ordering::Relaxed);
137        } else {
138            self.inner.cache_misses.fetch_add(1, Ordering::Relaxed);
139        }
140    }
141
142    /// Record a delegation check
143    pub fn record_delegation_check(&self, granted: bool) {
144        self.inner.delegation_checks.fetch_add(1, Ordering::Relaxed);
145        if granted {
146            self.inner.delegation_grants.fetch_add(1, Ordering::Relaxed);
147        }
148    }
149
150    /// Record a tenant quota exceeded event
151    pub fn record_quota_exceeded(&self) {
152        self.inner
153            .tenant_quota_exceeded
154            .fetch_add(1, Ordering::Relaxed);
155    }
156
157    /// Get a snapshot of current metrics
158    pub fn snapshot(&self) -> MetricsSnapshot {
159        let total_checks = self.inner.total_checks.load(Ordering::Relaxed);
160        let total_latency = self.inner.total_latency_us.load(Ordering::Relaxed);
161        let cache_hits = self.inner.cache_hits.load(Ordering::Relaxed);
162        let cache_misses = self.inner.cache_misses.load(Ordering::Relaxed);
163
164        MetricsSnapshot {
165            total_checks,
166            successful_checks: self.inner.successful_checks.load(Ordering::Relaxed),
167            failed_checks: self.inner.failed_checks.load(Ordering::Relaxed),
168            cache_hits,
169            cache_misses,
170            avg_latency_us: if total_checks > 0 {
171                total_latency / total_checks
172            } else {
173                0
174            },
175            min_latency_us: self.inner.min_latency_us.load(Ordering::Relaxed),
176            max_latency_us: self.inner.max_latency_us.load(Ordering::Relaxed),
177            delegation_checks: self.inner.delegation_checks.load(Ordering::Relaxed),
178            delegation_grants: self.inner.delegation_grants.load(Ordering::Relaxed),
179            tenant_quota_exceeded: self.inner.tenant_quota_exceeded.load(Ordering::Relaxed),
180            uptime_seconds: self.inner.start_time.elapsed().as_secs(),
181        }
182    }
183
184    /// Reset all metrics
185    pub fn reset(&self) {
186        self.inner.total_checks.store(0, Ordering::Relaxed);
187        self.inner.successful_checks.store(0, Ordering::Relaxed);
188        self.inner.failed_checks.store(0, Ordering::Relaxed);
189        self.inner.cache_hits.store(0, Ordering::Relaxed);
190        self.inner.cache_misses.store(0, Ordering::Relaxed);
191        self.inner.total_latency_us.store(0, Ordering::Relaxed);
192        self.inner.min_latency_us.store(u64::MAX, Ordering::Relaxed);
193        self.inner.max_latency_us.store(0, Ordering::Relaxed);
194        self.inner.delegation_checks.store(0, Ordering::Relaxed);
195        self.inner.delegation_grants.store(0, Ordering::Relaxed);
196        self.inner.tenant_quota_exceeded.store(0, Ordering::Relaxed);
197    }
198}
199
200impl Default for PerformanceMetrics {
201    fn default() -> Self {
202        Self::new()
203    }
204}
205
206/// Snapshot of performance metrics at a point in time
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct MetricsSnapshot {
209    pub total_checks: u64,
210    pub successful_checks: u64,
211    pub failed_checks: u64,
212    pub cache_hits: u64,
213    pub cache_misses: u64,
214    pub avg_latency_us: u64,
215    pub min_latency_us: u64,
216    pub max_latency_us: u64,
217    pub delegation_checks: u64,
218    pub delegation_grants: u64,
219    pub tenant_quota_exceeded: u64,
220    pub uptime_seconds: u64,
221}
222
223impl MetricsSnapshot {
224    /// Calculate cache hit rate (0.0 to 1.0)
225    pub fn cache_hit_rate(&self) -> f64 {
226        let total = self.cache_hits + self.cache_misses;
227        if total == 0 {
228            0.0
229        } else {
230            self.cache_hits as f64 / total as f64
231        }
232    }
233
234    /// Calculate success rate (0.0 to 1.0)
235    pub fn success_rate(&self) -> f64 {
236        if self.total_checks == 0 {
237            0.0
238        } else {
239            self.successful_checks as f64 / self.total_checks as f64
240        }
241    }
242
243    /// Calculate average latency in milliseconds
244    pub fn avg_latency_ms(&self) -> f64 {
245        self.avg_latency_us as f64 / 1000.0
246    }
247
248    /// Calculate throughput (requests per second)
249    pub fn throughput_rps(&self) -> f64 {
250        if self.uptime_seconds == 0 {
251            0.0
252        } else {
253            self.total_checks as f64 / self.uptime_seconds as f64
254        }
255    }
256
257    /// Calculate delegation grant rate (0.0 to 1.0)
258    pub fn delegation_grant_rate(&self) -> f64 {
259        if self.delegation_checks == 0 {
260            0.0
261        } else {
262            self.delegation_grants as f64 / self.delegation_checks as f64
263        }
264    }
265
266    /// Export metrics as JSON string
267    pub fn to_json(&self) -> Result<String, serde_json::Error> {
268        serde_json::to_string_pretty(self)
269    }
270
271    /// Check if metrics meet performance targets
272    pub fn meets_targets(&self) -> PerformanceTargets {
273        PerformanceTargets {
274            avg_latency_target_met: self.avg_latency_us < 100, // <100μs for cached
275            cache_hit_rate_target_met: self.cache_hit_rate() > 0.95, // >95% hit rate
276            success_rate_target_met: self.success_rate() > 0.99, // >99% success
277        }
278    }
279}
280
281/// Performance targets evaluation
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct PerformanceTargets {
284    pub avg_latency_target_met: bool,
285    pub cache_hit_rate_target_met: bool,
286    pub success_rate_target_met: bool,
287}
288
289impl PerformanceTargets {
290    /// Check if all targets are met
291    pub fn all_met(&self) -> bool {
292        self.avg_latency_target_met
293            && self.cache_hit_rate_target_met
294            && self.success_rate_target_met
295    }
296}
297
298/// Timer for measuring operation duration
299pub struct MetricsTimer {
300    start: Instant,
301    metrics: PerformanceMetrics,
302}
303
304impl MetricsTimer {
305    /// Start a new timer
306    pub fn start(metrics: PerformanceMetrics) -> Self {
307        Self {
308            start: Instant::now(),
309            metrics,
310        }
311    }
312
313    /// Stop the timer and record the latency
314    pub fn stop(self) {
315        let elapsed_us = self.start.elapsed().as_micros() as u64;
316        self.metrics.record_check_latency(elapsed_us);
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323
324    #[test]
325    fn test_metrics_basic() {
326        let metrics = PerformanceMetrics::new();
327
328        metrics.record_check_latency(100);
329        metrics.record_check_latency(200);
330        metrics.record_check_latency(150);
331
332        metrics.record_cache_hit(true);
333        metrics.record_cache_hit(true);
334        metrics.record_cache_hit(false);
335
336        let snapshot = metrics.snapshot();
337        assert_eq!(snapshot.total_checks, 3);
338        assert_eq!(snapshot.avg_latency_us, 150); // (100+200+150)/3
339        assert_eq!(snapshot.min_latency_us, 100);
340        assert_eq!(snapshot.max_latency_us, 200);
341        assert_eq!(snapshot.cache_hits, 2);
342        assert_eq!(snapshot.cache_misses, 1);
343    }
344
345    #[test]
346    fn test_cache_hit_rate() {
347        let metrics = PerformanceMetrics::new();
348
349        for _ in 0..95 {
350            metrics.record_cache_hit(true);
351        }
352        for _ in 0..5 {
353            metrics.record_cache_hit(false);
354        }
355
356        let snapshot = metrics.snapshot();
357        assert!((snapshot.cache_hit_rate() - 0.95).abs() < 0.01);
358    }
359
360    #[test]
361    fn test_delegation_metrics() {
362        let metrics = PerformanceMetrics::new();
363
364        metrics.record_delegation_check(true);
365        metrics.record_delegation_check(true);
366        metrics.record_delegation_check(false);
367
368        let snapshot = metrics.snapshot();
369        assert_eq!(snapshot.delegation_checks, 3);
370        assert_eq!(snapshot.delegation_grants, 2);
371        assert!((snapshot.delegation_grant_rate() - 0.666).abs() < 0.01);
372    }
373
374    #[test]
375    fn test_metrics_reset() {
376        let metrics = PerformanceMetrics::new();
377
378        metrics.record_check_latency(100);
379        metrics.record_cache_hit(true);
380
381        assert_eq!(metrics.snapshot().total_checks, 1);
382
383        metrics.reset();
384
385        let snapshot = metrics.snapshot();
386        assert_eq!(snapshot.total_checks, 0);
387        assert_eq!(snapshot.cache_hits, 0);
388    }
389
390    #[test]
391    fn test_json_export() {
392        let snapshot = MetricsSnapshot {
393            total_checks: 1000,
394            successful_checks: 995,
395            failed_checks: 5,
396            cache_hits: 950,
397            cache_misses: 50,
398            avg_latency_us: 75,
399            min_latency_us: 10,
400            max_latency_us: 500,
401            delegation_checks: 100,
402            delegation_grants: 80,
403            tenant_quota_exceeded: 2,
404            uptime_seconds: 3600,
405        };
406
407        let json = snapshot.to_json().unwrap();
408        assert!(json.contains("total_checks"));
409        assert!(json.contains("1000"));
410    }
411}