Skip to main content

codetether_agent/telemetry/
metrics.rs

1//! Per-instance rolling telemetry metrics.
2//!
3//! Prefer the global [`crate::telemetry::TOKEN_USAGE`] / `TOOL_EXECUTIONS`
4//! singletons for process-wide counts. Use [`Telemetry`] when you need a
5//! scoped collector whose lifetime is tied to a specific agent instance.
6
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use tokio::sync::Mutex;
10
11/// Snapshot-able metrics for an agent instance.
12///
13/// # Examples
14///
15/// ```rust
16/// use codetether_agent::telemetry::TelemetryMetrics;
17///
18/// let m = TelemetryMetrics::default();
19/// assert_eq!(m.tool_invocations, 0);
20/// ```
21#[derive(Debug, Clone, Default, Serialize, Deserialize)]
22pub struct TelemetryMetrics {
23    /// Total tool invocations.
24    pub tool_invocations: u64,
25    /// Tool invocations that returned `Ok`.
26    pub successful_operations: u64,
27    /// Tool invocations that returned an error.
28    pub failed_operations: u64,
29    /// Sum of tokens consumed across all invocations.
30    pub total_tokens: u64,
31    /// Running mean latency in ms.
32    pub avg_latency_ms: f64,
33}
34
35/// Per-instance telemetry tracker with an async-friendly rolling mean.
36#[derive(Debug)]
37pub struct Telemetry {
38    metrics: Mutex<TelemetryMetrics>,
39    /// Free-form instance metadata (agent id, tenant, etc).
40    pub metadata: HashMap<String, String>,
41}
42
43impl Telemetry {
44    /// Build an empty tracker.
45    pub fn new() -> Self {
46        Self {
47            metrics: Mutex::new(TelemetryMetrics::default()),
48            metadata: HashMap::new(),
49        }
50    }
51
52    /// Record one invocation, updating the rolling mean latency.
53    pub async fn record_tool_invocation(&self, success: bool, latency_ms: u64, tokens: u64) {
54        let mut metrics = self.metrics.lock().await;
55        metrics.tool_invocations += 1;
56        if success {
57            metrics.successful_operations += 1;
58        } else {
59            metrics.failed_operations += 1;
60        }
61        metrics.total_tokens += tokens;
62        let n = metrics.tool_invocations as f64;
63        metrics.avg_latency_ms = metrics.avg_latency_ms * (n - 1.0) / n + latency_ms as f64 / n;
64    }
65
66    /// Clone the current metrics.
67    pub async fn get_metrics(&self) -> TelemetryMetrics {
68        self.metrics.lock().await.clone()
69    }
70
71    /// Placeholder for future per-instance swarm telemetry.
72    pub async fn start_swarm(&self, _task_id: &str, _agent_count: usize) {}
73
74    /// Placeholder for future per-instance swarm progress tracking.
75    pub async fn record_swarm_progress(&self, _task_id: &str, _completed: usize, _total: usize) {}
76
77    /// Placeholder: returns the current metrics regardless of `_success`.
78    pub async fn complete_swarm(&self, _success: bool) -> TelemetryMetrics {
79        self.metrics.lock().await.clone()
80    }
81}
82
83impl Default for Telemetry {
84    fn default() -> Self {
85        Self::new()
86    }
87}