Skip to main content

goldrush_sdk/
metrics.rs

1use std::collections::HashMap;
2use std::sync::atomic::{AtomicU64, Ordering};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5use tokio::sync::RwLock;
6use tracing::{debug, info, instrument};
7
8/// Metrics collector for SDK operations.
9#[derive(Debug)]
10pub struct MetricsCollector {
11    /// Total number of requests made.
12    request_count: AtomicU64,
13    /// Total number of successful requests.
14    success_count: AtomicU64,
15    /// Total number of failed requests.
16    error_count: AtomicU64,
17    /// Cache hit count.
18    cache_hits: AtomicU64,
19    /// Cache miss count.
20    cache_misses: AtomicU64,
21    /// Rate limit hit count.
22    rate_limit_hits: AtomicU64,
23    /// Response time tracking.
24    response_times: Arc<RwLock<ResponseTimeTracker>>,
25    /// Error breakdown by type.
26    error_breakdown: Arc<RwLock<HashMap<String, u64>>>,
27    /// Request breakdown by endpoint.
28    endpoint_stats: Arc<RwLock<HashMap<String, EndpointStats>>>,
29}
30
31impl Default for MetricsCollector {
32    fn default() -> Self {
33        Self::new()
34    }
35}
36
37impl MetricsCollector {
38    pub fn new() -> Self {
39        Self {
40            request_count: AtomicU64::new(0),
41            success_count: AtomicU64::new(0),
42            error_count: AtomicU64::new(0),
43            cache_hits: AtomicU64::new(0),
44            cache_misses: AtomicU64::new(0),
45            rate_limit_hits: AtomicU64::new(0),
46            response_times: Arc::new(RwLock::new(ResponseTimeTracker::new())),
47            error_breakdown: Arc::new(RwLock::new(HashMap::new())),
48            endpoint_stats: Arc::new(RwLock::new(HashMap::new())),
49        }
50    }
51    
52    /// Record a successful request.
53    #[instrument(skip(self), fields(endpoint = %endpoint, response_time_ms = %response_time.as_millis()))]
54    pub async fn record_success(&self, endpoint: &str, response_time: Duration) {
55        self.request_count.fetch_add(1, Ordering::Relaxed);
56        self.success_count.fetch_add(1, Ordering::Relaxed);
57        
58        // Update response times
59        self.response_times.write().await.add_sample(response_time);
60        
61        // Update endpoint stats
62        let mut endpoint_stats = self.endpoint_stats.write().await;
63        let stats = endpoint_stats.entry(endpoint.to_string()).or_insert_with(EndpointStats::new);
64        stats.request_count += 1;
65        stats.success_count += 1;
66        stats.total_response_time += response_time;
67        
68        debug!("Success metrics recorded");
69    }
70    
71    /// Record a failed request.
72    #[instrument(skip(self), fields(endpoint = %endpoint, error_type = %error_type))]
73    pub async fn record_error(&self, endpoint: &str, error_type: &str) {
74        self.request_count.fetch_add(1, Ordering::Relaxed);
75        self.error_count.fetch_add(1, Ordering::Relaxed);
76        
77        // Update error breakdown
78        let mut error_breakdown = self.error_breakdown.write().await;
79        *error_breakdown.entry(error_type.to_string()).or_insert(0) += 1;
80        
81        // Update endpoint stats
82        let mut endpoint_stats = self.endpoint_stats.write().await;
83        let stats = endpoint_stats.entry(endpoint.to_string()).or_insert_with(EndpointStats::new);
84        stats.request_count += 1;
85        stats.error_count += 1;
86        
87        debug!("Error metrics recorded");
88    }
89    
90    /// Record a cache hit.
91    pub fn record_cache_hit(&self) {
92        self.cache_hits.fetch_add(1, Ordering::Relaxed);
93        debug!("Cache hit recorded");
94    }
95    
96    /// Record a cache miss.
97    pub fn record_cache_miss(&self) {
98        self.cache_misses.fetch_add(1, Ordering::Relaxed);
99        debug!("Cache miss recorded");
100    }
101    
102    /// Record a rate limit hit.
103    pub fn record_rate_limit_hit(&self) {
104        self.rate_limit_hits.fetch_add(1, Ordering::Relaxed);
105        debug!("Rate limit hit recorded");
106    }
107    
108    /// Get comprehensive metrics summary.
109    pub async fn get_metrics(&self) -> MetricsSummary {
110        let response_times = self.response_times.read().await;
111        let error_breakdown = self.error_breakdown.read().await.clone();
112        let endpoint_stats = self.endpoint_stats.read().await.clone();
113        
114        MetricsSummary {
115            request_count: self.request_count.load(Ordering::Relaxed),
116            success_count: self.success_count.load(Ordering::Relaxed),
117            error_count: self.error_count.load(Ordering::Relaxed),
118            cache_hits: self.cache_hits.load(Ordering::Relaxed),
119            cache_misses: self.cache_misses.load(Ordering::Relaxed),
120            rate_limit_hits: self.rate_limit_hits.load(Ordering::Relaxed),
121            avg_response_time: response_times.average(),
122            p95_response_time: response_times.p95(),
123            p99_response_time: response_times.p99(),
124            error_breakdown,
125            endpoint_stats,
126        }
127    }
128    
129    /// Reset all metrics.
130    pub async fn reset(&self) {
131        self.request_count.store(0, Ordering::Relaxed);
132        self.success_count.store(0, Ordering::Relaxed);
133        self.error_count.store(0, Ordering::Relaxed);
134        self.cache_hits.store(0, Ordering::Relaxed);
135        self.cache_misses.store(0, Ordering::Relaxed);
136        self.rate_limit_hits.store(0, Ordering::Relaxed);
137        
138        self.response_times.write().await.reset();
139        self.error_breakdown.write().await.clear();
140        self.endpoint_stats.write().await.clear();
141        
142        info!("Metrics reset");
143    }
144}
145
146/// Track response times with percentile calculations.
147#[derive(Debug)]
148struct ResponseTimeTracker {
149    samples: Vec<Duration>,
150    max_samples: usize,
151}
152
153impl ResponseTimeTracker {
154    fn new() -> Self {
155        Self {
156            samples: Vec::new(),
157            max_samples: 1000, // Keep last 1000 samples
158        }
159    }
160    
161    fn add_sample(&mut self, duration: Duration) {
162        self.samples.push(duration);
163        
164        // Keep only the most recent samples
165        if self.samples.len() > self.max_samples {
166            self.samples.remove(0);
167        }
168    }
169    
170    fn average(&self) -> Duration {
171        if self.samples.is_empty() {
172            return Duration::ZERO;
173        }
174        
175        let total: Duration = self.samples.iter().sum();
176        total / self.samples.len() as u32
177    }
178    
179    fn p95(&self) -> Duration {
180        self.percentile(0.95)
181    }
182    
183    fn p99(&self) -> Duration {
184        self.percentile(0.99)
185    }
186    
187    fn percentile(&self, percentile: f64) -> Duration {
188        if self.samples.is_empty() {
189            return Duration::ZERO;
190        }
191        
192        let mut sorted = self.samples.clone();
193        sorted.sort();
194        
195        let index = ((sorted.len() as f64 - 1.0) * percentile) as usize;
196        sorted.get(index).copied().unwrap_or(Duration::ZERO)
197    }
198    
199    fn reset(&mut self) {
200        self.samples.clear();
201    }
202}
203
204/// Statistics for individual endpoints.
205#[derive(Debug, Clone)]
206pub struct EndpointStats {
207    pub request_count: u64,
208    pub success_count: u64,
209    pub error_count: u64,
210    pub total_response_time: Duration,
211}
212
213impl EndpointStats {
214    fn new() -> Self {
215        Self {
216            request_count: 0,
217            success_count: 0,
218            error_count: 0,
219            total_response_time: Duration::ZERO,
220        }
221    }
222    
223    pub fn average_response_time(&self) -> Duration {
224        if self.request_count == 0 {
225            Duration::ZERO
226        } else {
227            self.total_response_time / self.request_count as u32
228        }
229    }
230    
231    pub fn success_rate(&self) -> f64 {
232        if self.request_count == 0 {
233            0.0
234        } else {
235            (self.success_count as f64) / (self.request_count as f64) * 100.0
236        }
237    }
238}
239
240/// Complete metrics summary.
241#[derive(Debug, Clone)]
242pub struct MetricsSummary {
243    pub request_count: u64,
244    pub success_count: u64,
245    pub error_count: u64,
246    pub cache_hits: u64,
247    pub cache_misses: u64,
248    pub rate_limit_hits: u64,
249    pub avg_response_time: Duration,
250    pub p95_response_time: Duration,
251    pub p99_response_time: Duration,
252    pub error_breakdown: HashMap<String, u64>,
253    pub endpoint_stats: HashMap<String, EndpointStats>,
254}
255
256impl MetricsSummary {
257    /// Calculate success rate percentage.
258    pub fn success_rate(&self) -> f64 {
259        if self.request_count == 0 {
260            0.0
261        } else {
262            (self.success_count as f64) / (self.request_count as f64) * 100.0
263        }
264    }
265    
266    /// Calculate cache hit rate percentage.
267    pub fn cache_hit_rate(&self) -> f64 {
268        let total_cache_requests = self.cache_hits + self.cache_misses;
269        if total_cache_requests == 0 {
270            0.0
271        } else {
272            (self.cache_hits as f64) / (total_cache_requests as f64) * 100.0
273        }
274    }
275    
276    /// Get formatted metrics string for logging.
277    pub fn format_for_logging(&self) -> String {
278        format!(
279            "SDK Metrics - Requests: {} ({}% success), Cache Hit Rate: {:.1}%, Avg Response: {}ms",
280            self.request_count,
281            self.success_rate(),
282            self.cache_hit_rate(),
283            self.avg_response_time.as_millis()
284        )
285    }
286}
287
288/// Timer utility for measuring operation duration.
289#[derive(Debug)]
290pub struct Timer {
291    start: Instant,
292}
293
294impl Timer {
295    pub fn new() -> Self {
296        Self {
297            start: Instant::now(),
298        }
299    }
300    
301    pub fn elapsed(&self) -> Duration {
302        self.start.elapsed()
303    }
304}
305
306impl Default for Timer {
307    fn default() -> Self {
308        Self::new()
309    }
310}