sayr-engine 0.3.0

A high-performance Rust AI agent runtime inspired by the Agno framework
Documentation
//! Metrics tracking and evaluation.
#![allow(dead_code)]

use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};

use opentelemetry::global;
use opentelemetry::metrics::{Counter, Histogram, Meter};
use opentelemetry_prometheus::exporter;
use prometheus::Registry as PromRegistry;
use serde::{Deserialize, Serialize};
use sysinfo::System;

use crate::telemetry::TelemetryLabels;

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct EvaluationReport {
    pub duration: Duration,
    pub peak_memory_bytes: u64,
    pub tool_calls: usize,
    pub failures: usize,
    pub success: bool,
    pub labels: TelemetryLabels,
}

impl EvaluationReport {
    pub fn success_rate(reports: &[Self]) -> f32 {
        if reports.is_empty() {
            return 0.0;
        }
        let successes = reports.iter().filter(|r| r.success).count();
        successes as f32 / reports.len() as f32
    }
}

#[derive(Clone)]
pub struct MetricsTracker {
    reports: Arc<Mutex<Vec<EvaluationReport>>>,
    meter: Meter,
    run_counter: Counter<u64>,
    tool_call_counter: Counter<u64>,
    failure_counter: Counter<u64>,
    duration_histogram: Histogram<f64>,
}

impl Default for MetricsTracker {
    fn default() -> Self {
        let meter = global::meter("agno-metrics");
        let run_counter = meter
            .u64_counter("run_total")
            .with_description("Total runs")
            .init();
        let tool_call_counter = meter
            .u64_counter("tool_call_total")
            .with_description("Tool calls")
            .init();
        let failure_counter = meter
            .u64_counter("failure_total")
            .with_description("Failures")
            .init();
        let duration_histogram = meter
            .f64_histogram("run_duration_ms")
            .with_description("Run durations in milliseconds")
            .init();
        Self {
            reports: Arc::new(Mutex::new(Vec::new())),
            meter,
            run_counter,
            tool_call_counter,
            failure_counter,
            duration_histogram,
        }
    }
}

impl MetricsTracker {
    pub fn start_run(&self, labels: TelemetryLabels) -> RunGuard {
        self.run_counter.add(1, &labels.as_attributes());
        RunGuard {
            start: Instant::now(),
            tool_calls: 0,
            failures: 0,
            metrics: self.clone(),
            system: System::new_all(),
            labels,
        }
    }

    pub fn reports(&self) -> Vec<EvaluationReport> {
        self.reports.lock().unwrap().clone()
    }
}

pub struct RunGuard {
    start: Instant,
    tool_calls: usize,
    failures: usize,
    metrics: MetricsTracker,
    system: System,
    labels: TelemetryLabels,
}

impl RunGuard {
    pub fn record_tool_call(&mut self, tool: impl Into<String>) {
        self.tool_calls += 1;
        let labels = self.labels.clone().with_tool(tool.into());
        self.metrics
            .tool_call_counter
            .add(1, &labels.as_attributes());
    }

    pub fn record_failure(&mut self, tool: Option<String>) {
        self.failures += 1;
        let labels = match tool {
            Some(name) => self.labels.clone().with_tool(name),
            None => self.labels.clone(),
        };
        self.metrics.failure_counter.add(1, &labels.as_attributes());
    }

    pub fn finish(mut self, success: bool) -> EvaluationReport {
        let duration = self.start.elapsed();
        self.system.refresh_memory();
        let peak_memory_bytes = self.system.used_memory() * 1024;
        self.metrics
            .duration_histogram
            .record(duration.as_millis() as f64, &self.labels.as_attributes());
        let report = EvaluationReport {
            duration,
            peak_memory_bytes,
            tool_calls: self.tool_calls,
            failures: self.failures,
            success,
            labels: self.labels.clone(),
        };
        self.metrics.reports.lock().unwrap().push(report.clone());
        report
    }
}

pub fn init_prometheus_registry() -> PromRegistry {
    let registry = PromRegistry::new();
    let _ = exporter().with_registry(registry.clone()).build();
    registry
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn tracks_success_rate_and_labels() {
        let tracker = MetricsTracker::default();
        let labels = TelemetryLabels {
            tenant: Some("tenant-a".into()),
            tool: Some("metrics".into()),
            workflow: Some("test".into()),
        };
        let report = tracker.start_run(labels.clone()).finish(true);
        assert!(report.duration >= Duration::from_millis(0));
        assert_eq!(report.labels, labels);
        let reports = tracker.reports();
        assert_eq!(reports.len(), 1);
        assert_eq!(EvaluationReport::success_rate(&reports), 1.0);
    }
}