edb_rpc_proxy/
metrics.rs

1// EDB - Ethereum Debugger
2// Copyright (C) 2024 Zhuo Zhang and Wuqi Zhang
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Comprehensive metrics collection for RPC proxy performance monitoring
18
19use serde::{Deserialize, Serialize};
20use std::{
21    collections::{HashMap, VecDeque},
22    sync::{
23        atomic::{AtomicU64, Ordering},
24        Arc, RwLock,
25    },
26    time::{SystemTime, UNIX_EPOCH},
27};
28
29/// Method-level performance statistics
30///
31/// Tracks comprehensive performance metrics for individual RPC methods,
32/// including cache performance, response times, and error rates.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct MethodStats {
35    /// Number of cache hits for this method (served from cache)
36    pub hits: u64,
37    /// Number of requests forwarded to upstream providers (cache misses + non-cacheable)
38    pub misses: u64,
39    /// Total number of requests for this method (hits + misses)
40    pub total_requests: u64,
41    /// Average response time in milliseconds
42    pub avg_response_time_ms: f64,
43    /// Total response time in milliseconds (used for average calculation)
44    pub total_response_time_ms: u64,
45    /// Number of errors encountered for this method
46    pub errors: u64,
47}
48
49impl Default for MethodStats {
50    fn default() -> Self {
51        Self {
52            hits: 0,
53            misses: 0,
54            total_requests: 0,
55            avg_response_time_ms: 0.0,
56            total_response_time_ms: 0,
57            errors: 0,
58        }
59    }
60}
61
62impl MethodStats {
63    /// Calculate the cache hit rate as a percentage (0.0 to 100.0)
64    #[allow(dead_code)]
65    pub fn hit_rate(&self) -> f64 {
66        if self.total_requests == 0 {
67            0.0
68        } else {
69            (self.hits as f64 / self.total_requests as f64) * 100.0
70        }
71    }
72
73    /// Calculate the error rate as a percentage (0.0 to 100.0)
74    #[allow(dead_code)]
75    pub fn error_rate(&self) -> f64 {
76        if self.total_requests == 0 {
77            0.0
78        } else {
79            (self.errors as f64 / self.total_requests as f64) * 100.0
80        }
81    }
82
83    /// Update the average response time based on current totals
84    pub fn update_avg_response_time(&mut self) {
85        if self.total_requests > 0 {
86            self.avg_response_time_ms =
87                self.total_response_time_ms as f64 / self.total_requests as f64;
88        }
89    }
90}
91
92/// Provider usage analytics
93///
94/// Tracks detailed usage statistics for individual RPC providers,
95/// including performance metrics and historical response times.
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct ProviderUsage {
98    /// Total number of requests sent to this provider
99    pub request_count: u64,
100    /// Total response time in milliseconds (used for average calculation)
101    pub total_response_time_ms: u64,
102    /// Number of successful requests
103    pub success_count: u64,
104    /// Number of failed requests
105    pub error_count: u64,
106    /// Unix timestamp of the last request to this provider
107    pub last_used_timestamp: u64,
108    /// Recent response times (limited to last 100) for histogram analysis
109    pub response_time_history: VecDeque<u64>,
110}
111
112impl Default for ProviderUsage {
113    fn default() -> Self {
114        Self {
115            request_count: 0,
116            total_response_time_ms: 0,
117            success_count: 0,
118            error_count: 0,
119            last_used_timestamp: 0,
120            response_time_history: VecDeque::with_capacity(100),
121        }
122    }
123}
124
125impl ProviderUsage {
126    /// Calculate the average response time in milliseconds
127    pub fn avg_response_time_ms(&self) -> f64 {
128        if self.request_count == 0 {
129            0.0
130        } else {
131            self.total_response_time_ms as f64 / self.request_count as f64
132        }
133    }
134
135    /// Calculate the success rate as a percentage (0.0 to 100.0)
136    pub fn success_rate(&self) -> f64 {
137        if self.request_count == 0 {
138            0.0
139        } else {
140            (self.success_count as f64 / self.request_count as f64) * 100.0
141        }
142    }
143
144    /// Calculate this provider's load as a percentage of total requests
145    pub fn load_percentage(&self, total_requests: u64) -> f64 {
146        if total_requests == 0 {
147            0.0
148        } else {
149            (self.request_count as f64 / total_requests as f64) * 100.0
150        }
151    }
152
153    /// Record a request to this provider with response time and success status
154    pub fn record_request(&mut self, response_time_ms: u64, success: bool) {
155        self.request_count += 1;
156        self.total_response_time_ms += response_time_ms;
157        self.last_used_timestamp =
158            SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs();
159
160        if success {
161            self.success_count += 1;
162        } else {
163            self.error_count += 1;
164        }
165
166        // Keep only last 100 response times for histogram analysis
167        self.response_time_history.push_back(response_time_ms);
168        if self.response_time_history.len() > 100 {
169            self.response_time_history.pop_front();
170        }
171    }
172}
173
174/// Historical metric data point for time-series analysis
175///
176/// Represents a snapshot of system metrics at a specific point in time,
177/// used for trend analysis and historical monitoring.
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct HistoricalMetric {
180    /// Unix timestamp when this metric was recorded
181    pub timestamp: u64,
182    /// Number of cache hits at this point in time
183    pub cache_hits: u64,
184    /// Number of cache misses at this point in time
185    pub cache_misses: u64,
186    /// Current cache size (number of entries)
187    pub cache_size: u64,
188    /// Number of healthy providers at this point in time
189    pub healthy_providers: u64,
190    /// Total number of configured providers
191    pub total_providers: u64,
192    /// Request rate (requests per minute) at this point in time
193    pub requests_per_minute: u64,
194    /// Average response time across all methods and providers
195    pub avg_response_time_ms: f64,
196    /// Number of active EDB instances connected
197    pub active_instances: usize,
198}
199
200/// Comprehensive metrics collector for the RPC proxy
201///
202/// Thread-safe metrics collection system that tracks cache performance,
203/// provider usage, method-level statistics, and historical trends.
204/// Uses atomic operations and read-write locks for concurrent access.
205#[derive(Debug)]
206pub struct MetricsCollector {
207    // Cache metrics - atomic for high-performance concurrent access
208    /// Total number of cache hits across all methods (served from cache)
209    pub cache_hits: AtomicU64,
210    /// Total number of cache misses that required checking the cache (excludes non-cacheable)
211    pub cache_misses: AtomicU64,
212    /// Total number of requests processed (all requests)
213    pub total_requests: AtomicU64,
214
215    // Provider metrics - protected by RwLock for complex operations
216    /// Per-provider usage statistics and performance metrics
217    pub provider_usage: Arc<RwLock<HashMap<String, ProviderUsage>>>,
218
219    // Method-level metrics - protected by RwLock for complex operations
220    /// Per-method performance statistics and cache effectiveness
221    pub method_stats: Arc<RwLock<HashMap<String, MethodStats>>>,
222
223    // Historical data (limited to 1000 points to prevent memory leaks)
224    /// Time-series data for trend analysis and monitoring dashboards
225    pub metrics_history: Arc<RwLock<VecDeque<HistoricalMetric>>>,
226
227    // Request rate tracking (timestamps of last 1000 requests)
228    /// Recent request timestamps for calculating requests-per-minute
229    pub request_timestamps: Arc<RwLock<VecDeque<u64>>>,
230
231    // Error tracking - atomic for high-performance concurrent access
232    /// Total number of errors across all request types
233    pub total_errors: AtomicU64,
234    /// Number of rate limiting errors encountered
235    pub rate_limit_errors: AtomicU64,
236    /// Number of user-caused errors (4xx responses)
237    pub user_errors: AtomicU64,
238}
239
240impl Default for MetricsCollector {
241    fn default() -> Self {
242        Self::new()
243    }
244}
245
246impl MetricsCollector {
247    /// Create a new metrics collector
248    pub fn new() -> Self {
249        Self {
250            cache_hits: AtomicU64::new(0),
251            cache_misses: AtomicU64::new(0),
252            total_requests: AtomicU64::new(0),
253            provider_usage: Arc::new(RwLock::new(HashMap::new())),
254            method_stats: Arc::new(RwLock::new(HashMap::new())),
255            metrics_history: Arc::new(RwLock::new(VecDeque::with_capacity(1000))),
256            request_timestamps: Arc::new(RwLock::new(VecDeque::with_capacity(1000))),
257            total_errors: AtomicU64::new(0),
258            rate_limit_errors: AtomicU64::new(0),
259            user_errors: AtomicU64::new(0),
260        }
261    }
262
263    /// Record a cache hit for a specific method
264    pub fn record_cache_hit(&self, method: &str, response_time_ms: u64) {
265        self.cache_hits.fetch_add(1, Ordering::Relaxed);
266
267        // XXX (ZZ): since we will not go through the forward_request when cache is hit,
268        // we need to update the following metrics accordingly
269        self.total_requests.fetch_add(1, Ordering::Relaxed);
270        self.record_request_timestamp();
271
272        // Update method stats
273        if let Ok(mut stats) = self.method_stats.write() {
274            let method_stat = stats.entry(method.to_string()).or_default();
275            method_stat.hits += 1;
276            method_stat.total_requests += 1;
277            method_stat.total_response_time_ms += response_time_ms;
278            method_stat.update_avg_response_time();
279        }
280    }
281
282    /// Record a cache miss for a specific method
283    pub fn record_cache_miss(&self) {
284        self.cache_misses.fetch_add(1, Ordering::Relaxed);
285    }
286
287    /// Record a request that was forwarded to an upstream provider
288    /// This includes both cache misses and non-cacheable requests
289    pub fn record_request(
290        &self,
291        method: &str,
292        provider_url: &str,
293        response_time_ms: u64,
294        success: bool,
295    ) {
296        self.total_requests.fetch_add(1, Ordering::Relaxed);
297        self.record_request_timestamp();
298
299        if !success {
300            self.total_errors.fetch_add(1, Ordering::Relaxed);
301        }
302
303        // Update method stats
304        if let Ok(mut stats) = self.method_stats.write() {
305            let method_stat = stats.entry(method.to_string()).or_default();
306            method_stat.misses += 1; // Any forwarded request counts as a miss
307            method_stat.total_requests += 1;
308            method_stat.total_response_time_ms += response_time_ms;
309            if !success {
310                method_stat.errors += 1;
311            }
312            method_stat.update_avg_response_time();
313        }
314
315        // Update provider usage
316        if let Ok(mut usage) = self.provider_usage.write() {
317            let provider_usage = usage.entry(provider_url.to_string()).or_default();
318            provider_usage.record_request(response_time_ms, success);
319        }
320    }
321
322    /// Record an error by type
323    pub fn record_error(&self, error_type: ErrorType) {
324        match error_type {
325            ErrorType::RateLimit => self.rate_limit_errors.fetch_add(1, Ordering::Relaxed),
326            ErrorType::UserError => self.user_errors.fetch_add(1, Ordering::Relaxed),
327            ErrorType::Other => self.total_errors.fetch_add(1, Ordering::Relaxed),
328        };
329    }
330
331    /// Record a request timestamp for rate calculation
332    fn record_request_timestamp(&self) {
333        let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs();
334
335        if let Ok(mut timestamps) = self.request_timestamps.write() {
336            timestamps.push_back(now);
337            // Keep only last 1000 timestamps (about 1 hour at 1 req/sec)
338            if timestamps.len() > 1000 {
339                timestamps.pop_front();
340            }
341        }
342    }
343
344    /// Calculate requests per minute based on recent timestamps
345    pub fn requests_per_minute(&self) -> u64 {
346        let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs();
347        let one_minute_ago = now - 60;
348
349        if let Ok(timestamps) = self.request_timestamps.read() {
350            timestamps.iter().filter(|&&ts| ts >= one_minute_ago).count() as u64
351        } else {
352            0
353        }
354    }
355
356    /// Get overall cache hit rate as percentage
357    pub fn cache_hit_rate(&self) -> f64 {
358        let hits = self.cache_hits.load(Ordering::Relaxed);
359        let total = self.total_requests.load(Ordering::Relaxed);
360
361        if total == 0 {
362            0.0
363        } else {
364            (hits as f64 / total as f64) * 100.0
365        }
366    }
367
368    /// Get error rate as percentage
369    pub fn error_rate(&self) -> f64 {
370        let errors = self.total_errors.load(Ordering::Relaxed);
371        let total = self.total_requests.load(Ordering::Relaxed);
372
373        if total == 0 {
374            0.0
375        } else {
376            (errors as f64 / total as f64) * 100.0
377        }
378    }
379
380    /// Add a historical data point
381    pub fn add_historical_point(
382        &self,
383        cache_size: u64,
384        healthy_providers: u64,
385        total_providers: u64,
386        active_instances: usize,
387    ) {
388        let metric = HistoricalMetric {
389            timestamp: SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs(),
390            cache_hits: self.cache_hits.load(Ordering::Relaxed),
391            cache_misses: self.cache_misses.load(Ordering::Relaxed),
392            cache_size,
393            healthy_providers,
394            total_providers,
395            requests_per_minute: self.requests_per_minute(),
396            avg_response_time_ms: self.overall_avg_response_time(),
397            active_instances,
398        };
399
400        if let Ok(mut history) = self.metrics_history.write() {
401            history.push_back(metric);
402            // Keep only last 1000 data points to prevent memory growth
403            if history.len() > 1000 {
404                history.pop_front();
405            }
406        }
407    }
408
409    /// Calculate overall average response time
410    fn overall_avg_response_time(&self) -> f64 {
411        if let Ok(stats) = self.method_stats.read() {
412            let total_time: u64 = stats.values().map(|s| s.total_response_time_ms).sum();
413            let total_requests: u64 = stats.values().map(|s| s.total_requests).sum();
414
415            if total_requests > 0 {
416                total_time as f64 / total_requests as f64
417            } else {
418                0.0
419            }
420        } else {
421            0.0
422        }
423    }
424
425    /// Get method statistics as a cloned HashMap
426    pub fn get_method_stats(&self) -> HashMap<String, MethodStats> {
427        self.method_stats
428            .read()
429            .unwrap_or_else(|_| {
430                std::thread::yield_now();
431                self.method_stats.read().expect("Failed to acquire method stats lock")
432            })
433            .clone()
434    }
435
436    /// Get provider usage statistics as a cloned HashMap
437    pub fn get_provider_usage(&self) -> HashMap<String, ProviderUsage> {
438        self.provider_usage
439            .read()
440            .unwrap_or_else(|_| {
441                std::thread::yield_now();
442                self.provider_usage.read().expect("Failed to acquire provider usage lock")
443            })
444            .clone()
445    }
446
447    /// Get historical metrics as a cloned VecDeque
448    pub fn get_metrics_history(&self) -> VecDeque<HistoricalMetric> {
449        self.metrics_history
450            .read()
451            .unwrap_or_else(|_| {
452                std::thread::yield_now();
453                self.metrics_history.read().expect("Failed to acquire metrics history lock")
454            })
455            .clone()
456    }
457}
458
459/// Error type classification for metrics
460///
461/// Categorizes different types of errors for detailed error analysis
462/// and monitoring. This helps identify systemic issues vs user errors.
463#[derive(Debug, Clone, Copy)]
464pub enum ErrorType {
465    /// Rate limiting errors (429 responses)
466    RateLimit,
467    /// User-caused errors (4xx responses except 429)
468    UserError,
469    /// Other system errors (5xx responses, network errors, etc.)
470    Other,
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476    use std::thread;
477
478    #[test]
479    fn test_method_stats_default() {
480        let stats = MethodStats::default();
481        assert_eq!(stats.hits, 0);
482        assert_eq!(stats.misses, 0);
483        assert_eq!(stats.total_requests, 0);
484        assert_eq!(stats.avg_response_time_ms, 0.0);
485        assert_eq!(stats.total_response_time_ms, 0);
486        assert_eq!(stats.errors, 0);
487    }
488
489    #[test]
490    fn test_method_stats_hit_rate() {
491        let mut stats = MethodStats::default();
492
493        // No requests - should return 0%
494        assert_eq!(stats.hit_rate(), 0.0);
495
496        // 50% hit rate
497        stats.hits = 5;
498        stats.total_requests = 10;
499        assert_eq!(stats.hit_rate(), 50.0);
500
501        // 100% hit rate
502        stats.hits = 10;
503        stats.total_requests = 10;
504        assert_eq!(stats.hit_rate(), 100.0);
505
506        // 0% hit rate
507        stats.hits = 0;
508        stats.total_requests = 10;
509        assert_eq!(stats.hit_rate(), 0.0);
510    }
511
512    #[test]
513    fn test_method_stats_error_rate() {
514        let mut stats = MethodStats::default();
515
516        // No requests - should return 0%
517        assert_eq!(stats.error_rate(), 0.0);
518
519        // 20% error rate
520        stats.errors = 2;
521        stats.total_requests = 10;
522        assert_eq!(stats.error_rate(), 20.0);
523
524        // 100% error rate
525        stats.errors = 10;
526        stats.total_requests = 10;
527        assert_eq!(stats.error_rate(), 100.0);
528    }
529
530    #[test]
531    fn test_method_stats_update_avg_response_time() {
532        let mut stats = MethodStats::default();
533
534        // No requests - average should remain 0
535        stats.update_avg_response_time();
536        assert_eq!(stats.avg_response_time_ms, 0.0);
537
538        // With requests - should calculate correct average
539        stats.total_response_time_ms = 500;
540        stats.total_requests = 5;
541        stats.update_avg_response_time();
542        assert_eq!(stats.avg_response_time_ms, 100.0);
543
544        // Updated totals
545        stats.total_response_time_ms = 1200;
546        stats.total_requests = 4;
547        stats.update_avg_response_time();
548        assert_eq!(stats.avg_response_time_ms, 300.0);
549    }
550
551    #[test]
552    fn test_provider_usage_default() {
553        let usage = ProviderUsage::default();
554        assert_eq!(usage.request_count, 0);
555        assert_eq!(usage.total_response_time_ms, 0);
556        assert_eq!(usage.success_count, 0);
557        assert_eq!(usage.error_count, 0);
558        assert_eq!(usage.last_used_timestamp, 0);
559        assert_eq!(usage.response_time_history.len(), 0);
560        assert_eq!(usage.response_time_history.capacity(), 100);
561    }
562
563    #[test]
564    fn test_provider_usage_avg_response_time() {
565        let mut usage = ProviderUsage::default();
566
567        // No requests - should return 0
568        assert_eq!(usage.avg_response_time_ms(), 0.0);
569
570        // With requests
571        usage.total_response_time_ms = 1000;
572        usage.request_count = 5;
573        assert_eq!(usage.avg_response_time_ms(), 200.0);
574    }
575
576    #[test]
577    fn test_provider_usage_success_rate() {
578        let mut usage = ProviderUsage::default();
579
580        // No requests - should return 0%
581        assert_eq!(usage.success_rate(), 0.0);
582
583        // 80% success rate
584        usage.success_count = 8;
585        usage.request_count = 10;
586        assert_eq!(usage.success_rate(), 80.0);
587
588        // 100% success rate
589        usage.success_count = 10;
590        usage.request_count = 10;
591        assert_eq!(usage.success_rate(), 100.0);
592    }
593
594    #[test]
595    fn test_provider_usage_load_percentage() {
596        let mut usage = ProviderUsage::default();
597
598        // No total requests - should return 0%
599        assert_eq!(usage.load_percentage(0), 0.0);
600
601        // 25% of total load
602        usage.request_count = 25;
603        assert_eq!(usage.load_percentage(100), 25.0);
604
605        // 100% of total load
606        usage.request_count = 100;
607        assert_eq!(usage.load_percentage(100), 100.0);
608    }
609
610    #[test]
611    fn test_provider_usage_record_request() {
612        let mut usage = ProviderUsage::default();
613        let initial_timestamp = usage.last_used_timestamp;
614
615        // Record successful request
616        usage.record_request(150, true);
617        assert_eq!(usage.request_count, 1);
618        assert_eq!(usage.total_response_time_ms, 150);
619        assert_eq!(usage.success_count, 1);
620        assert_eq!(usage.error_count, 0);
621        assert!(usage.last_used_timestamp > initial_timestamp);
622        assert_eq!(usage.response_time_history.len(), 1);
623        assert_eq!(usage.response_time_history[0], 150);
624
625        // Record failed request
626        usage.record_request(300, false);
627        assert_eq!(usage.request_count, 2);
628        assert_eq!(usage.total_response_time_ms, 450);
629        assert_eq!(usage.success_count, 1);
630        assert_eq!(usage.error_count, 1);
631        assert_eq!(usage.response_time_history.len(), 2);
632        assert_eq!(usage.response_time_history[1], 300);
633    }
634
635    #[test]
636    fn test_provider_usage_response_time_history_limit() {
637        let mut usage = ProviderUsage::default();
638
639        // Fill history beyond capacity
640        for i in 0..150 {
641            usage.record_request(i, true);
642        }
643
644        // Should be limited to 100 entries
645        assert_eq!(usage.response_time_history.len(), 100);
646
647        // Should contain the most recent 100 entries (50-149)
648        assert_eq!(usage.response_time_history.front(), Some(&50));
649        assert_eq!(usage.response_time_history.back(), Some(&149));
650    }
651
652    #[test]
653    fn test_metrics_collector_new() {
654        let collector = MetricsCollector::new();
655
656        assert_eq!(collector.cache_hits.load(Ordering::Relaxed), 0);
657        assert_eq!(collector.cache_misses.load(Ordering::Relaxed), 0);
658        assert_eq!(collector.total_requests.load(Ordering::Relaxed), 0);
659        assert_eq!(collector.total_errors.load(Ordering::Relaxed), 0);
660        assert_eq!(collector.rate_limit_errors.load(Ordering::Relaxed), 0);
661        assert_eq!(collector.user_errors.load(Ordering::Relaxed), 0);
662
663        assert!(collector.provider_usage.read().unwrap().is_empty());
664        assert!(collector.method_stats.read().unwrap().is_empty());
665        assert!(collector.metrics_history.read().unwrap().is_empty());
666        assert!(collector.request_timestamps.read().unwrap().is_empty());
667    }
668
669    #[test]
670    fn test_metrics_collector_record_cache_hit() {
671        let collector = MetricsCollector::new();
672
673        collector.record_cache_hit("eth_getBalance", 50);
674
675        assert_eq!(collector.cache_hits.load(Ordering::Relaxed), 1);
676        assert_eq!(collector.cache_misses.load(Ordering::Relaxed), 0);
677        assert_eq!(collector.total_requests.load(Ordering::Relaxed), 1);
678
679        // Check method stats
680        let stats = collector.get_method_stats();
681        assert_eq!(stats.len(), 1);
682        let method_stat = stats.get("eth_getBalance").unwrap();
683        assert_eq!(method_stat.hits, 1);
684        assert_eq!(method_stat.misses, 0);
685        assert_eq!(method_stat.total_requests, 1);
686        assert_eq!(method_stat.total_response_time_ms, 50);
687        assert_eq!(method_stat.avg_response_time_ms, 50.0);
688        assert_eq!(method_stat.errors, 0);
689
690        // Check request timestamp recorded
691        assert_eq!(collector.request_timestamps.read().unwrap().len(), 1);
692    }
693
694    #[test]
695    fn test_metrics_collector_record_cache_miss() {
696        let collector = MetricsCollector::new();
697        let provider_url = "https://eth.llamarpc.com";
698
699        // Record a cache miss (just increments the counter)
700        collector.record_cache_miss();
701
702        // Then record the forwarded request
703        collector.record_request("eth_getBlockByNumber", provider_url, 200, true);
704
705        assert_eq!(collector.cache_hits.load(Ordering::Relaxed), 0);
706        assert_eq!(collector.cache_misses.load(Ordering::Relaxed), 1);
707        assert_eq!(collector.total_requests.load(Ordering::Relaxed), 1);
708        assert_eq!(collector.total_errors.load(Ordering::Relaxed), 0);
709
710        // Check method stats
711        let stats = collector.get_method_stats();
712        let method_stat = stats.get("eth_getBlockByNumber").unwrap();
713        assert_eq!(method_stat.hits, 0);
714        assert_eq!(method_stat.misses, 1);
715        assert_eq!(method_stat.total_requests, 1);
716        assert_eq!(method_stat.total_response_time_ms, 200);
717        assert_eq!(method_stat.errors, 0);
718
719        // Check provider usage
720        let usage = collector.get_provider_usage();
721        let provider_usage = usage.get(provider_url).unwrap();
722        assert_eq!(provider_usage.request_count, 1);
723        assert_eq!(provider_usage.success_count, 1);
724        assert_eq!(provider_usage.error_count, 0);
725        assert_eq!(provider_usage.total_response_time_ms, 200);
726    }
727
728    #[test]
729    fn test_metrics_collector_record_cache_miss_with_error() {
730        let collector = MetricsCollector::new();
731        let provider_url = "https://failing-provider.com";
732
733        // Record a cache miss
734        collector.record_cache_miss();
735
736        // Then record the failed forwarded request
737        collector.record_request("eth_getBalance", provider_url, 5000, false);
738
739        assert_eq!(collector.cache_misses.load(Ordering::Relaxed), 1);
740        assert_eq!(collector.total_errors.load(Ordering::Relaxed), 1);
741
742        // Check method stats include error
743        let stats = collector.get_method_stats();
744        let method_stat = stats.get("eth_getBalance").unwrap();
745        assert_eq!(method_stat.errors, 1);
746
747        // Check provider usage includes error
748        let usage = collector.get_provider_usage();
749        let provider_usage = usage.get(provider_url).unwrap();
750        assert_eq!(provider_usage.error_count, 1);
751        assert_eq!(provider_usage.success_count, 0);
752    }
753
754    #[test]
755    fn test_metrics_collector_record_request() {
756        let collector = MetricsCollector::new();
757        let provider_url = "https://eth.llamarpc.com";
758
759        // Record a non-cacheable request (directly forwarded)
760        collector.record_request("eth_sendRawTransaction", provider_url, 100, true);
761
762        assert_eq!(collector.cache_hits.load(Ordering::Relaxed), 0);
763        assert_eq!(collector.cache_misses.load(Ordering::Relaxed), 0);
764        assert_eq!(collector.total_requests.load(Ordering::Relaxed), 1);
765
766        // Method should be tracked with misses incremented (forwarded request)
767        let stats = collector.get_method_stats();
768        let method_stat = stats.get("eth_sendRawTransaction").unwrap();
769        assert_eq!(method_stat.hits, 0);
770        assert_eq!(method_stat.misses, 1); // Forwarded requests count as misses
771        assert_eq!(method_stat.total_requests, 1);
772    }
773
774    #[test]
775    fn test_metrics_collector_record_error() {
776        let collector = MetricsCollector::new();
777
778        collector.record_error(ErrorType::RateLimit);
779        collector.record_error(ErrorType::UserError);
780        collector.record_error(ErrorType::Other);
781
782        assert_eq!(collector.rate_limit_errors.load(Ordering::Relaxed), 1);
783        assert_eq!(collector.user_errors.load(Ordering::Relaxed), 1);
784        assert_eq!(collector.total_errors.load(Ordering::Relaxed), 1);
785    }
786
787    #[test]
788    fn test_metrics_collector_cache_hit_rate() {
789        let collector = MetricsCollector::new();
790
791        // No requests - should return 0%
792        assert_eq!(collector.cache_hit_rate(), 0.0);
793
794        // Record some hits and misses
795        collector.record_cache_hit("method1", 50);
796        collector.record_cache_hit("method2", 75);
797
798        // Record cache misses followed by forwarded requests
799        collector.record_cache_miss();
800        collector.record_request("method3", "provider1", 100, true);
801
802        collector.record_cache_miss();
803        collector.record_request("method4", "provider1", 125, true);
804
805        // Direct forwarded request (non-cacheable)
806        collector.record_request("method5", "provider1", 150, true);
807
808        // 2 hits out of 5 total requests = 40%
809        assert_eq!(collector.cache_hit_rate(), 40.0);
810    }
811
812    #[test]
813    fn test_metrics_collector_error_rate() {
814        let collector = MetricsCollector::new();
815
816        // No requests - should return 0%
817        assert_eq!(collector.error_rate(), 0.0);
818
819        // Record some requests with errors
820        collector.record_cache_hit("method1", 50); // Success
821
822        // Cache miss with error
823        collector.record_cache_miss();
824        collector.record_request("method2", "provider1", 100, false); // Error
825
826        collector.record_request("method3", "provider1", 150, true); // Success
827        collector.record_request("method4", "provider1", 200, false); // Error
828
829        // 2 errors out of 4 total requests = 50%
830        assert_eq!(collector.error_rate(), 50.0);
831    }
832
833    #[test]
834    fn test_metrics_collector_requests_per_minute() {
835        let collector = MetricsCollector::new();
836
837        // No requests initially
838        assert_eq!(collector.requests_per_minute(), 0);
839
840        // Record some requests
841        collector.record_cache_hit("method1", 50);
842
843        collector.record_cache_miss();
844        collector.record_request("method2", "provider1", 100, true);
845
846        // Should count recent requests (within last minute)
847        assert_eq!(collector.requests_per_minute(), 2);
848    }
849
850    #[test]
851    fn test_metrics_collector_add_historical_point() {
852        let collector = MetricsCollector::new();
853
854        // Add some metrics first
855        collector.record_cache_hit("method1", 100);
856
857        collector.record_cache_miss();
858        collector.record_request("method2", "provider1", 200, true);
859
860        collector.add_historical_point(1000, 3, 5, 2);
861
862        let history = collector.get_metrics_history();
863        assert_eq!(history.len(), 1);
864
865        let point = &history[0];
866        assert_eq!(point.cache_hits, 1);
867        assert_eq!(point.cache_misses, 1);
868        assert_eq!(point.cache_size, 1000);
869        assert_eq!(point.healthy_providers, 3);
870        assert_eq!(point.total_providers, 5);
871        assert_eq!(point.active_instances, 2);
872        assert!(point.timestamp > 0);
873        assert!(point.avg_response_time_ms > 0.0);
874    }
875
876    #[test]
877    fn test_metrics_collector_historical_point_limit() {
878        let collector = MetricsCollector::new();
879
880        // Add more than the limit (1000)
881        for i in 0..1100 {
882            collector.add_historical_point(i, 1, 1, 1);
883        }
884
885        let history = collector.get_metrics_history();
886        assert_eq!(history.len(), 1000);
887
888        // Should have the most recent 1000 points
889        assert_eq!(history.front().unwrap().cache_size, 100); // 1100 - 1000
890        assert_eq!(history.back().unwrap().cache_size, 1099);
891    }
892
893    #[test]
894    fn test_metrics_collector_request_timestamps_limit() {
895        let collector = MetricsCollector::new();
896
897        // Record more requests than the timestamp limit (1000)
898        for _i in 0..1100 {
899            collector.record_cache_hit("method", 50);
900        }
901
902        let timestamps = collector.request_timestamps.read().unwrap();
903        assert_eq!(timestamps.len(), 1000);
904    }
905
906    #[test]
907    fn test_metrics_collector_concurrent_access() {
908        let collector = Arc::new(MetricsCollector::new());
909        let mut handles = vec![];
910
911        // Spawn multiple threads that record metrics concurrently
912        for i in 0..10 {
913            let collector_clone = Arc::clone(&collector);
914            let handle = thread::spawn(move || {
915                for j in 0..100 {
916                    collector_clone.record_cache_hit(&format!("method_{}", i), j as u64);
917
918                    // Record cache miss and forwarded request
919                    collector_clone.record_cache_miss();
920                    collector_clone.record_request(
921                        &format!("method_{}", i),
922                        &format!("provider_{}", i),
923                        (j * 2) as u64,
924                        true,
925                    );
926                }
927            });
928            handles.push(handle);
929        }
930
931        // Wait for all threads to complete
932        for handle in handles {
933            handle.join().unwrap();
934        }
935
936        // Verify final counts
937        assert_eq!(collector.cache_hits.load(Ordering::Relaxed), 1000);
938        assert_eq!(collector.cache_misses.load(Ordering::Relaxed), 1000);
939        assert_eq!(collector.total_requests.load(Ordering::Relaxed), 2000);
940
941        // Verify method stats were updated correctly
942        let stats = collector.get_method_stats();
943        assert_eq!(stats.len(), 10); // 10 different methods
944
945        for i in 0..10 {
946            let method_name = format!("method_{}", i);
947            let method_stat = stats.get(&method_name).unwrap();
948            assert_eq!(method_stat.hits, 100);
949            assert_eq!(method_stat.misses, 100);
950            assert_eq!(method_stat.total_requests, 200);
951        }
952
953        // Verify provider stats were updated correctly
954        let usage = collector.get_provider_usage();
955        assert_eq!(usage.len(), 10); // 10 different providers
956
957        for i in 0..10 {
958            let provider_name = format!("provider_{}", i);
959            let provider_usage = usage.get(&provider_name).unwrap();
960            assert_eq!(provider_usage.request_count, 100);
961            assert_eq!(provider_usage.success_count, 100);
962        }
963    }
964
965    #[test]
966    fn test_metrics_collector_overall_avg_response_time() {
967        let collector = MetricsCollector::new();
968
969        // No requests initially
970        assert_eq!(collector.overall_avg_response_time(), 0.0);
971
972        // Add requests with different response times
973        collector.record_cache_hit("method1", 100); // 100ms
974        collector.record_cache_hit("method2", 200); // 200ms
975
976        collector.record_cache_miss();
977        collector.record_request("method3", "provider1", 300, true); // 300ms
978
979        // Should calculate weighted average across all methods
980        // method1: 100ms total, 1 request
981        // method2: 200ms total, 1 request
982        // method3: 300ms total, 1 request
983        // Overall: (100 + 200 + 300) / 3 = 200ms
984        assert_eq!(collector.overall_avg_response_time(), 200.0);
985    }
986
987    #[test]
988    fn test_metrics_serialization() {
989        let mut method_stats = MethodStats::default();
990        method_stats.hits = 10;
991        method_stats.total_requests = 15;
992        method_stats.avg_response_time_ms = 150.5;
993
994        // Test serialization to JSON
995        let json = serde_json::to_string(&method_stats).unwrap();
996        assert!(json.contains("\"hits\":10"));
997        assert!(json.contains("\"total_requests\":15"));
998        assert!(json.contains("\"avg_response_time_ms\":150.5"));
999
1000        // Test deserialization from JSON
1001        let deserialized: MethodStats = serde_json::from_str(&json).unwrap();
1002        assert_eq!(deserialized.hits, method_stats.hits);
1003        assert_eq!(deserialized.total_requests, method_stats.total_requests);
1004        assert_eq!(deserialized.avg_response_time_ms, method_stats.avg_response_time_ms);
1005    }
1006
1007    #[test]
1008    fn test_edge_cases() {
1009        let collector = MetricsCollector::new();
1010
1011        // Test with empty method name
1012        collector.record_cache_hit("", 100);
1013        let stats = collector.get_method_stats();
1014        assert!(stats.contains_key(""));
1015
1016        // Test with very long method name
1017        let long_method = "a".repeat(1000);
1018        collector.record_cache_hit(&long_method, 200);
1019        let stats = collector.get_method_stats();
1020        assert!(stats.contains_key(&long_method));
1021
1022        // Test with zero response time
1023        collector.record_cache_hit("zero_time", 0);
1024        let stats = collector.get_method_stats();
1025        let method_stat = stats.get("zero_time").unwrap();
1026        assert_eq!(method_stat.avg_response_time_ms, 0.0);
1027
1028        // Test with very large response time
1029        collector.record_cache_hit("large_time", u64::MAX);
1030        let stats = collector.get_method_stats();
1031        let method_stat = stats.get("large_time").unwrap();
1032        assert_eq!(method_stat.total_response_time_ms, u64::MAX);
1033    }
1034}