Skip to main content

vtcode_core/core/
telemetry.rs

1use anyhow::{Context, Result};
2use hashbrown::HashMap;
3use std::sync::atomic::{AtomicU64, Ordering};
4use std::sync::{Arc, Mutex, TryLockError};
5use std::time::{Duration, Instant};
6
7/// Aggregates telemetry data for the agent session.
8#[derive(Debug, Clone, Default)]
9pub struct TelemetryManager {
10    stats: Arc<Mutex<TelemetryStats>>,
11    start_time: Option<Instant>,
12    dropped_metric_updates: Arc<AtomicU64>,
13}
14
15#[derive(Debug, Clone, Default)]
16pub struct ModelUsageStats {
17    pub api_time: Duration,
18    pub prompt_tokens: u64,
19    pub completion_tokens: u64,
20    pub cached_prompt_tokens: u64,
21    pub cache_read_tokens: u64,
22    pub cache_creation_tokens: u64,
23}
24
25#[derive(Debug, Clone, Default)]
26pub struct TelemetryStats {
27    pub total_turns: usize,
28    pub total_tool_calls: usize,
29    pub total_tokens: usize, // Placeholder if we get token usage
30    pub tool_counts: HashMap<String, usize>,
31    pub tool_errors: HashMap<String, usize>,
32    pub session_duration: Duration,
33    pub api_time_spent: Duration,
34    pub model_usage: HashMap<String, ModelUsageStats>,
35    pub dropped_metric_updates: u64,
36}
37
38impl TelemetryManager {
39    pub fn new() -> Self {
40        Self {
41            stats: Arc::new(Mutex::new(TelemetryStats::default())),
42            start_time: Some(Instant::now()),
43            dropped_metric_updates: Arc::new(AtomicU64::new(0)),
44        }
45    }
46
47    pub fn record_turn(&self) {
48        self.with_stats_mut_non_blocking(|stats| {
49            stats.total_turns += 1;
50            if let Some(start) = self.start_time {
51                stats.session_duration = start.elapsed();
52            }
53        });
54    }
55
56    pub fn record_tool_usage(&self, tool: &str, success: bool) {
57        self.with_stats_mut_non_blocking(|stats| {
58            stats.total_tool_calls += 1;
59            if let Some(count) = stats.tool_counts.get_mut(tool) {
60                *count += 1;
61            } else {
62                stats.tool_counts.insert(tool.to_owned(), 1);
63            }
64            if !success {
65                if let Some(count) = stats.tool_errors.get_mut(tool) {
66                    *count += 1;
67                } else {
68                    stats.tool_errors.insert(tool.to_owned(), 1);
69                }
70            }
71        });
72    }
73
74    pub fn record_llm_request(
75        &self,
76        model: &str,
77        duration: Duration,
78        usage: Option<&crate::llm::provider::Usage>,
79    ) {
80        self.with_stats_mut_non_blocking(|stats| {
81            stats.api_time_spent = stats.api_time_spent.saturating_add(duration);
82            let model_stats = if let Some(existing) = stats.model_usage.get_mut(model) {
83                existing
84            } else {
85                stats.model_usage.entry(model.to_owned()).or_default()
86            };
87            model_stats.api_time = model_stats.api_time.saturating_add(duration);
88
89            if let Some(usage) = usage {
90                model_stats.prompt_tokens = model_stats
91                    .prompt_tokens
92                    .saturating_add(usage.prompt_tokens as u64);
93                model_stats.completion_tokens = model_stats
94                    .completion_tokens
95                    .saturating_add(usage.completion_tokens as u64);
96                model_stats.cached_prompt_tokens = model_stats
97                    .cached_prompt_tokens
98                    .saturating_add(usage.cached_prompt_tokens.unwrap_or(0) as u64);
99                model_stats.cache_read_tokens = model_stats
100                    .cache_read_tokens
101                    .saturating_add(usage.cache_read_tokens_or_fallback() as u64);
102                model_stats.cache_creation_tokens = model_stats
103                    .cache_creation_tokens
104                    .saturating_add(usage.cache_creation_tokens_or_zero() as u64);
105            }
106        });
107    }
108
109    fn with_stats_mut_non_blocking<F>(&self, update: F)
110    where
111        F: FnOnce(&mut TelemetryStats),
112    {
113        match self.stats.try_lock() {
114            Ok(mut stats) => update(&mut stats),
115            Err(TryLockError::WouldBlock) | Err(TryLockError::Poisoned(_)) => {
116                self.dropped_metric_updates.fetch_add(1, Ordering::Relaxed);
117            }
118        }
119    }
120
121    pub fn get_snapshot(&self) -> Result<TelemetryStats> {
122        let stats = self
123            .stats
124            .lock()
125            .map_err(|err| anyhow::anyhow!("telemetry stats lock poisoned: {err}"))
126            .context("Failed to read telemetry snapshot")?;
127        let mut snapshot = stats.clone();
128        snapshot.dropped_metric_updates = self.dropped_metric_updates.load(Ordering::Relaxed);
129        Ok(snapshot)
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::TelemetryManager;
136    use std::time::Duration;
137
138    #[test]
139    fn records_llm_request_usage_per_model() {
140        let telemetry = TelemetryManager::new();
141        telemetry.record_llm_request(
142            "gpt-5",
143            Duration::from_secs(30),
144            Some(&crate::llm::provider::Usage {
145                prompt_tokens: 100,
146                completion_tokens: 200,
147                total_tokens: 300,
148                cached_prompt_tokens: Some(50),
149                cache_creation_tokens: None,
150                cache_read_tokens: None,
151            }),
152        );
153        telemetry.record_llm_request("gpt-5", Duration::from_secs(10), None);
154
155        let snapshot = telemetry.get_snapshot().expect("snapshot");
156        assert_eq!(snapshot.api_time_spent, Duration::from_secs(40));
157        let model = snapshot.model_usage.get("gpt-5").expect("model usage");
158        assert_eq!(model.api_time, Duration::from_secs(40));
159        assert_eq!(model.prompt_tokens, 100);
160        assert_eq!(model.completion_tokens, 200);
161        assert_eq!(model.cached_prompt_tokens, 50);
162        assert_eq!(model.cache_read_tokens, 50);
163        assert_eq!(model.cache_creation_tokens, 0);
164        assert_eq!(snapshot.dropped_metric_updates, 0);
165    }
166
167    #[test]
168    fn records_cache_read_fallback_and_creation_tokens() {
169        let telemetry = TelemetryManager::new();
170        telemetry.record_llm_request(
171            "gpt-5",
172            Duration::from_secs(5),
173            Some(&crate::llm::provider::Usage {
174                prompt_tokens: 500,
175                completion_tokens: 100,
176                total_tokens: 600,
177                cached_prompt_tokens: Some(320),
178                cache_creation_tokens: Some(80),
179                cache_read_tokens: None,
180            }),
181        );
182
183        let snapshot = telemetry.get_snapshot().expect("snapshot");
184        let model = snapshot.model_usage.get("gpt-5").expect("model usage");
185        assert_eq!(model.cache_read_tokens, 320);
186        assert_eq!(model.cache_creation_tokens, 80);
187    }
188}