oxify_connect_vision/
metrics.rs

1//! Metrics collection and reporting module.
2//!
3//! This module provides comprehensive metrics tracking for OCR operations:
4//! - Processing latency (histograms)
5//! - Cache hit/miss rates
6//! - Error rates by type
7//! - Provider usage statistics
8//! - Throughput metrics
9//!
10//! Metrics are designed to be compatible with Prometheus format.
11
12use std::collections::HashMap;
13use std::sync::atomic::{AtomicU64, Ordering};
14use std::sync::{Arc, RwLock};
15use std::time::{Duration, Instant};
16
17/// Metric type for categorization
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19pub enum MetricType {
20    /// Counter metric (monotonically increasing)
21    Counter,
22    /// Gauge metric (can increase or decrease)
23    Gauge,
24    /// Histogram metric (distribution of values)
25    Histogram,
26}
27
28/// A single metric value
29#[derive(Debug, Clone)]
30pub struct MetricValue {
31    /// Metric type
32    pub metric_type: MetricType,
33    /// Current value
34    pub value: f64,
35    /// Optional help text
36    pub help: String,
37    /// Labels for this metric
38    pub labels: HashMap<String, String>,
39}
40
41/// Counter metric (monotonically increasing)
42#[derive(Debug, Default)]
43pub struct Counter {
44    value: AtomicU64,
45}
46
47impl Counter {
48    /// Create a new counter
49    pub fn new() -> Self {
50        Self {
51            value: AtomicU64::new(0),
52        }
53    }
54
55    /// Increment the counter by 1
56    pub fn inc(&self) {
57        self.add(1);
58    }
59
60    /// Add a value to the counter
61    pub fn add(&self, value: u64) {
62        self.value.fetch_add(value, Ordering::Relaxed);
63    }
64
65    /// Get the current value
66    pub fn get(&self) -> u64 {
67        self.value.load(Ordering::Relaxed)
68    }
69
70    /// Reset the counter to 0
71    pub fn reset(&self) {
72        self.value.store(0, Ordering::Relaxed);
73    }
74}
75
76/// Gauge metric (can increase or decrease)
77#[derive(Debug, Default)]
78pub struct Gauge {
79    value: AtomicU64,
80}
81
82impl Gauge {
83    /// Create a new gauge
84    pub fn new() -> Self {
85        Self {
86            value: AtomicU64::new(0),
87        }
88    }
89
90    /// Set the gauge value
91    pub fn set(&self, value: f64) {
92        self.value.store(value.to_bits(), Ordering::Relaxed);
93    }
94
95    /// Increment the gauge by 1
96    pub fn inc(&self) {
97        self.add(1.0);
98    }
99
100    /// Decrement the gauge by 1
101    pub fn dec(&self) {
102        self.sub(1.0);
103    }
104
105    /// Add to the gauge
106    pub fn add(&self, value: f64) {
107        let mut old = self.value.load(Ordering::Relaxed);
108        loop {
109            let old_f64 = f64::from_bits(old);
110            let new = (old_f64 + value).to_bits();
111            match self
112                .value
113                .compare_exchange_weak(old, new, Ordering::Relaxed, Ordering::Relaxed)
114            {
115                Ok(_) => break,
116                Err(x) => old = x,
117            }
118        }
119    }
120
121    /// Subtract from the gauge
122    pub fn sub(&self, value: f64) {
123        self.add(-value);
124    }
125
126    /// Get the current value
127    pub fn get(&self) -> f64 {
128        f64::from_bits(self.value.load(Ordering::Relaxed))
129    }
130}
131
132/// Histogram bucket
133#[derive(Debug, Clone)]
134pub struct HistogramBucket {
135    /// Upper bound for this bucket
136    pub upper_bound: f64,
137    /// Count of values in this bucket
138    pub count: Arc<AtomicU64>,
139}
140
141/// Histogram metric (distribution of values)
142#[derive(Debug, Clone)]
143pub struct Histogram {
144    buckets: Vec<HistogramBucket>,
145    sum: Arc<AtomicU64>,
146    count: Arc<AtomicU64>,
147}
148
149impl Histogram {
150    /// Create a new histogram with default buckets
151    pub fn new() -> Self {
152        Self::with_buckets(vec![
153            0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10.0,
154        ])
155    }
156
157    /// Create a histogram with custom buckets
158    pub fn with_buckets(bounds: Vec<f64>) -> Self {
159        let mut buckets = Vec::new();
160        for bound in bounds {
161            buckets.push(HistogramBucket {
162                upper_bound: bound,
163                count: Arc::new(AtomicU64::new(0)),
164            });
165        }
166        // Add +Inf bucket
167        buckets.push(HistogramBucket {
168            upper_bound: f64::INFINITY,
169            count: Arc::new(AtomicU64::new(0)),
170        });
171
172        Self {
173            buckets,
174            sum: Arc::new(AtomicU64::new(0)),
175            count: Arc::new(AtomicU64::new(0)),
176        }
177    }
178
179    /// Observe a value
180    pub fn observe(&self, value: f64) {
181        // Update sum
182        let mut old_sum = self.sum.load(Ordering::Relaxed);
183        loop {
184            let old_sum_f64 = f64::from_bits(old_sum);
185            let new_sum = (old_sum_f64 + value).to_bits();
186            match self.sum.compare_exchange_weak(
187                old_sum,
188                new_sum,
189                Ordering::Relaxed,
190                Ordering::Relaxed,
191            ) {
192                Ok(_) => break,
193                Err(x) => old_sum = x,
194            }
195        }
196
197        // Increment count
198        self.count.fetch_add(1, Ordering::Relaxed);
199
200        // Update buckets
201        for bucket in &self.buckets {
202            if value <= bucket.upper_bound {
203                bucket.count.fetch_add(1, Ordering::Relaxed);
204            }
205        }
206    }
207
208    /// Get the sum of all observed values
209    pub fn sum(&self) -> f64 {
210        f64::from_bits(self.sum.load(Ordering::Relaxed))
211    }
212
213    /// Get the count of observed values
214    pub fn count(&self) -> u64 {
215        self.count.load(Ordering::Relaxed)
216    }
217
218    /// Get the buckets
219    pub fn buckets(&self) -> &[HistogramBucket] {
220        &self.buckets
221    }
222
223    /// Get the average value
224    pub fn avg(&self) -> f64 {
225        let count = self.count();
226        if count == 0 {
227            0.0
228        } else {
229            self.sum() / count as f64
230        }
231    }
232}
233
234impl Default for Histogram {
235    fn default() -> Self {
236        Self::new()
237    }
238}
239
240/// Timer for measuring durations
241pub struct Timer {
242    histogram: Histogram,
243    start: Instant,
244}
245
246impl Timer {
247    /// Start a new timer
248    pub fn start(histogram: Histogram) -> Self {
249        Self {
250            histogram,
251            start: Instant::now(),
252        }
253    }
254
255    /// Stop the timer and record the duration
256    pub fn stop(self) {
257        let duration = self.start.elapsed();
258        self.histogram.observe(duration.as_secs_f64());
259    }
260}
261
262/// Metrics collector for OCR operations
263#[derive(Debug, Clone)]
264pub struct OcrMetrics {
265    // Processing metrics
266    pub requests_total: Arc<Counter>,
267    pub requests_success: Arc<Counter>,
268    pub requests_failed: Arc<Counter>,
269    pub processing_duration: Histogram,
270
271    // Cache metrics
272    pub cache_hits: Arc<Counter>,
273    pub cache_misses: Arc<Counter>,
274    pub cache_size: Arc<Gauge>,
275
276    // Provider metrics
277    provider_usage: Arc<RwLock<HashMap<String, u64>>>,
278
279    // Error metrics
280    error_counts: Arc<RwLock<HashMap<String, u64>>>,
281
282    // Throughput metrics
283    pub bytes_processed: Arc<Counter>,
284    pub images_processed: Arc<Counter>,
285}
286
287impl OcrMetrics {
288    /// Create a new metrics collector
289    pub fn new() -> Self {
290        Self {
291            requests_total: Arc::new(Counter::new()),
292            requests_success: Arc::new(Counter::new()),
293            requests_failed: Arc::new(Counter::new()),
294            processing_duration: Histogram::new(),
295            cache_hits: Arc::new(Counter::new()),
296            cache_misses: Arc::new(Counter::new()),
297            cache_size: Arc::new(Gauge::new()),
298            provider_usage: Arc::new(RwLock::new(HashMap::new())),
299            error_counts: Arc::new(RwLock::new(HashMap::new())),
300            bytes_processed: Arc::new(Counter::new()),
301            images_processed: Arc::new(Counter::new()),
302        }
303    }
304
305    /// Start timing a request
306    pub fn start_timer(&self) -> Timer {
307        self.requests_total.inc();
308        Timer::start(self.processing_duration.clone())
309    }
310
311    /// Record a successful request
312    pub fn record_success(&self) {
313        self.requests_success.inc();
314    }
315
316    /// Record a failed request
317    pub fn record_failure(&self, error_type: &str) {
318        self.requests_failed.inc();
319        let mut errors = self.error_counts.write().unwrap();
320        *errors.entry(error_type.to_string()).or_insert(0) += 1;
321    }
322
323    /// Record a cache hit
324    pub fn record_cache_hit(&self) {
325        self.cache_hits.inc();
326    }
327
328    /// Record a cache miss
329    pub fn record_cache_miss(&self) {
330        self.cache_misses.inc();
331    }
332
333    /// Update cache size
334    pub fn update_cache_size(&self, size: usize) {
335        self.cache_size.set(size as f64);
336    }
337
338    /// Record provider usage
339    pub fn record_provider_usage(&self, provider: &str) {
340        let mut usage = self.provider_usage.write().unwrap();
341        *usage.entry(provider.to_string()).or_insert(0) += 1;
342    }
343
344    /// Record bytes processed
345    pub fn record_bytes(&self, bytes: usize) {
346        self.bytes_processed.add(bytes as u64);
347    }
348
349    /// Record an image processed
350    pub fn record_image(&self) {
351        self.images_processed.inc();
352    }
353
354    /// Get cache hit rate
355    pub fn cache_hit_rate(&self) -> f64 {
356        let hits = self.cache_hits.get() as f64;
357        let misses = self.cache_misses.get() as f64;
358        let total = hits + misses;
359        if total == 0.0 {
360            0.0
361        } else {
362            hits / total
363        }
364    }
365
366    /// Get error rate
367    pub fn error_rate(&self) -> f64 {
368        let total = self.requests_total.get() as f64;
369        if total == 0.0 {
370            0.0
371        } else {
372            self.requests_failed.get() as f64 / total
373        }
374    }
375
376    /// Get success rate
377    pub fn success_rate(&self) -> f64 {
378        let total = self.requests_total.get() as f64;
379        if total == 0.0 {
380            0.0
381        } else {
382            self.requests_success.get() as f64 / total
383        }
384    }
385
386    /// Get average processing duration
387    pub fn avg_duration(&self) -> Duration {
388        Duration::from_secs_f64(self.processing_duration.avg())
389    }
390
391    /// Get provider usage statistics
392    pub fn provider_stats(&self) -> HashMap<String, u64> {
393        self.provider_usage.read().unwrap().clone()
394    }
395
396    /// Get error statistics
397    pub fn error_stats(&self) -> HashMap<String, u64> {
398        self.error_counts.read().unwrap().clone()
399    }
400
401    /// Get a summary of all metrics
402    pub fn summary(&self) -> MetricsSummary {
403        MetricsSummary {
404            total_requests: self.requests_total.get(),
405            successful_requests: self.requests_success.get(),
406            failed_requests: self.requests_failed.get(),
407            cache_hits: self.cache_hits.get(),
408            cache_misses: self.cache_misses.get(),
409            cache_hit_rate: self.cache_hit_rate(),
410            error_rate: self.error_rate(),
411            success_rate: self.success_rate(),
412            avg_duration: self.avg_duration(),
413            bytes_processed: self.bytes_processed.get(),
414            images_processed: self.images_processed.get(),
415            provider_stats: self.provider_stats(),
416            error_stats: self.error_stats(),
417        }
418    }
419
420    /// Export metrics in Prometheus format
421    pub fn prometheus_format(&self) -> String {
422        let mut output = String::new();
423
424        // Requests
425        output.push_str("# HELP ocr_requests_total Total number of OCR requests\n");
426        output.push_str("# TYPE ocr_requests_total counter\n");
427        output.push_str(&format!(
428            "ocr_requests_total {}\n",
429            self.requests_total.get()
430        ));
431
432        output.push_str("# HELP ocr_requests_success Number of successful OCR requests\n");
433        output.push_str("# TYPE ocr_requests_success counter\n");
434        output.push_str(&format!(
435            "ocr_requests_success {}\n",
436            self.requests_success.get()
437        ));
438
439        output.push_str("# HELP ocr_requests_failed Number of failed OCR requests\n");
440        output.push_str("# TYPE ocr_requests_failed counter\n");
441        output.push_str(&format!(
442            "ocr_requests_failed {}\n",
443            self.requests_failed.get()
444        ));
445
446        // Cache
447        output.push_str("# HELP ocr_cache_hits Number of cache hits\n");
448        output.push_str("# TYPE ocr_cache_hits counter\n");
449        output.push_str(&format!("ocr_cache_hits {}\n", self.cache_hits.get()));
450
451        output.push_str("# HELP ocr_cache_misses Number of cache misses\n");
452        output.push_str("# TYPE ocr_cache_misses counter\n");
453        output.push_str(&format!("ocr_cache_misses {}\n", self.cache_misses.get()));
454
455        output.push_str("# HELP ocr_cache_size Current cache size\n");
456        output.push_str("# TYPE ocr_cache_size gauge\n");
457        output.push_str(&format!("ocr_cache_size {}\n", self.cache_size.get()));
458
459        // Processing duration histogram
460        output.push_str("# HELP ocr_processing_duration_seconds OCR processing duration\n");
461        output.push_str("# TYPE ocr_processing_duration_seconds histogram\n");
462        for bucket in self.processing_duration.buckets() {
463            output.push_str(&format!(
464                "ocr_processing_duration_seconds_bucket{{le=\"{}\"}} {}\n",
465                bucket.upper_bound,
466                bucket.count.load(Ordering::Relaxed)
467            ));
468        }
469        output.push_str(&format!(
470            "ocr_processing_duration_seconds_sum {}\n",
471            self.processing_duration.sum()
472        ));
473        output.push_str(&format!(
474            "ocr_processing_duration_seconds_count {}\n",
475            self.processing_duration.count()
476        ));
477
478        // Provider usage
479        output.push_str("# HELP ocr_provider_usage Usage count by provider\n");
480        output.push_str("# TYPE ocr_provider_usage counter\n");
481        for (provider, count) in self.provider_stats() {
482            output.push_str(&format!(
483                "ocr_provider_usage{{provider=\"{}\"}} {}\n",
484                provider, count
485            ));
486        }
487
488        // Error counts
489        output.push_str("# HELP ocr_errors_total Error count by type\n");
490        output.push_str("# TYPE ocr_errors_total counter\n");
491        for (error_type, count) in self.error_stats() {
492            output.push_str(&format!(
493                "ocr_errors_total{{type=\"{}\"}} {}\n",
494                error_type, count
495            ));
496        }
497
498        output
499    }
500
501    /// Reset all metrics
502    pub fn reset(&self) {
503        self.requests_total.reset();
504        self.requests_success.reset();
505        self.requests_failed.reset();
506        self.cache_hits.reset();
507        self.cache_misses.reset();
508        self.bytes_processed.reset();
509        self.images_processed.reset();
510        self.provider_usage.write().unwrap().clear();
511        self.error_counts.write().unwrap().clear();
512    }
513}
514
515impl Default for OcrMetrics {
516    fn default() -> Self {
517        Self::new()
518    }
519}
520
521/// Summary of metrics
522#[derive(Debug, Clone)]
523pub struct MetricsSummary {
524    pub total_requests: u64,
525    pub successful_requests: u64,
526    pub failed_requests: u64,
527    pub cache_hits: u64,
528    pub cache_misses: u64,
529    pub cache_hit_rate: f64,
530    pub error_rate: f64,
531    pub success_rate: f64,
532    pub avg_duration: Duration,
533    pub bytes_processed: u64,
534    pub images_processed: u64,
535    pub provider_stats: HashMap<String, u64>,
536    pub error_stats: HashMap<String, u64>,
537}
538
539impl std::fmt::Display for MetricsSummary {
540    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
541        writeln!(f, "OCR Metrics Summary")?;
542        writeln!(f, "===================")?;
543        writeln!(f, "Total Requests:      {}", self.total_requests)?;
544        writeln!(f, "Successful:          {}", self.successful_requests)?;
545        writeln!(f, "Failed:              {}", self.failed_requests)?;
546        writeln!(f, "Success Rate:        {:.2}%", self.success_rate * 100.0)?;
547        writeln!(f, "Error Rate:          {:.2}%", self.error_rate * 100.0)?;
548        writeln!(f, "Cache Hits:          {}", self.cache_hits)?;
549        writeln!(f, "Cache Misses:        {}", self.cache_misses)?;
550        writeln!(
551            f,
552            "Cache Hit Rate:      {:.2}%",
553            self.cache_hit_rate * 100.0
554        )?;
555        writeln!(f, "Avg Duration:        {:?}", self.avg_duration)?;
556        writeln!(f, "Images Processed:    {}", self.images_processed)?;
557        writeln!(f, "Bytes Processed:     {}", self.bytes_processed)?;
558
559        if !self.provider_stats.is_empty() {
560            writeln!(f, "\nProvider Usage:")?;
561            for (provider, count) in &self.provider_stats {
562                writeln!(f, "  {}: {}", provider, count)?;
563            }
564        }
565
566        if !self.error_stats.is_empty() {
567            writeln!(f, "\nError Statistics:")?;
568            for (error_type, count) in &self.error_stats {
569                writeln!(f, "  {}: {}", error_type, count)?;
570            }
571        }
572
573        Ok(())
574    }
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580
581    #[test]
582    fn test_counter() {
583        let counter = Counter::new();
584        assert_eq!(counter.get(), 0);
585
586        counter.inc();
587        assert_eq!(counter.get(), 1);
588
589        counter.add(5);
590        assert_eq!(counter.get(), 6);
591
592        counter.reset();
593        assert_eq!(counter.get(), 0);
594    }
595
596    #[test]
597    fn test_gauge() {
598        let gauge = Gauge::new();
599        assert_eq!(gauge.get(), 0.0);
600
601        gauge.set(10.0);
602        assert_eq!(gauge.get(), 10.0);
603
604        gauge.inc();
605        assert_eq!(gauge.get(), 11.0);
606
607        gauge.dec();
608        assert_eq!(gauge.get(), 10.0);
609
610        gauge.add(5.5);
611        assert_eq!(gauge.get(), 15.5);
612
613        gauge.sub(2.5);
614        assert_eq!(gauge.get(), 13.0);
615    }
616
617    #[test]
618    fn test_histogram() {
619        let hist = Histogram::new();
620        assert_eq!(hist.count(), 0);
621        assert_eq!(hist.sum(), 0.0);
622
623        hist.observe(0.5);
624        hist.observe(1.5);
625        hist.observe(2.5);
626
627        assert_eq!(hist.count(), 3);
628        assert_eq!(hist.sum(), 4.5);
629        assert_eq!(hist.avg(), 1.5);
630    }
631
632    #[test]
633    fn test_ocr_metrics() {
634        let metrics = OcrMetrics::new();
635
636        // Test requests
637        metrics.requests_total.inc();
638        metrics.record_success();
639        assert_eq!(metrics.requests_total.get(), 1);
640        assert_eq!(metrics.requests_success.get(), 1);
641
642        // Test failures
643        metrics.record_failure("timeout");
644        assert_eq!(metrics.requests_failed.get(), 1);
645
646        // Test cache
647        metrics.record_cache_hit();
648        metrics.record_cache_hit();
649        metrics.record_cache_miss();
650        assert_eq!(metrics.cache_hits.get(), 2);
651        assert_eq!(metrics.cache_misses.get(), 1);
652        assert_eq!(metrics.cache_hit_rate(), 2.0 / 3.0);
653
654        // Test provider usage
655        metrics.record_provider_usage("tesseract");
656        metrics.record_provider_usage("tesseract");
657        metrics.record_provider_usage("surya");
658        let stats = metrics.provider_stats();
659        assert_eq!(stats.get("tesseract"), Some(&2));
660        assert_eq!(stats.get("surya"), Some(&1));
661    }
662
663    #[test]
664    fn test_metrics_summary() {
665        let metrics = OcrMetrics::new();
666        metrics.requests_total.inc();
667        metrics.record_success();
668        metrics.record_cache_hit();
669
670        let summary = metrics.summary();
671        assert_eq!(summary.total_requests, 1);
672        assert_eq!(summary.successful_requests, 1);
673        assert_eq!(summary.cache_hits, 1);
674    }
675
676    #[test]
677    fn test_prometheus_format() {
678        let metrics = OcrMetrics::new();
679        metrics.requests_total.inc();
680        metrics.record_success();
681
682        let output = metrics.prometheus_format();
683        assert!(output.contains("ocr_requests_total"));
684        assert!(output.contains("ocr_requests_success"));
685    }
686
687    #[test]
688    fn test_timer() {
689        let hist = Histogram::new();
690        let timer = Timer::start(hist.clone());
691        std::thread::sleep(Duration::from_millis(10));
692        timer.stop();
693
694        assert!(hist.count() > 0);
695        assert!(hist.sum() > 0.0);
696    }
697
698    #[test]
699    fn test_metrics_reset() {
700        let metrics = OcrMetrics::new();
701        metrics.requests_total.inc();
702        metrics.record_success();
703        metrics.record_cache_hit();
704
705        metrics.reset();
706
707        assert_eq!(metrics.requests_total.get(), 0);
708        assert_eq!(metrics.requests_success.get(), 0);
709        assert_eq!(metrics.cache_hits.get(), 0);
710    }
711
712    #[test]
713    fn test_error_rate() {
714        let metrics = OcrMetrics::new();
715        metrics.requests_total.add(10);
716        metrics.requests_success.add(7);
717        metrics.requests_failed.add(3);
718
719        assert_eq!(metrics.error_rate(), 0.3);
720        assert_eq!(metrics.success_rate(), 0.7);
721    }
722
723    #[test]
724    fn test_histogram_buckets() {
725        let hist = Histogram::with_buckets(vec![1.0, 5.0, 10.0]);
726        hist.observe(0.5);
727        hist.observe(2.0);
728        hist.observe(7.0);
729        hist.observe(15.0);
730
731        let buckets = hist.buckets();
732        assert_eq!(buckets.len(), 4); // 3 + infinity
733        assert_eq!(buckets[0].count.load(Ordering::Relaxed), 1);
734        assert_eq!(buckets[1].count.load(Ordering::Relaxed), 2);
735        assert_eq!(buckets[2].count.load(Ordering::Relaxed), 3);
736        assert_eq!(buckets[3].count.load(Ordering::Relaxed), 4);
737    }
738}