Skip to main content

vtcode_core/metrics/
mod.rs

1// Metrics collection and observability for MCP execution system
2//
3// Tracks performance, effectiveness, and security across all execution steps:
4// - Tool discovery (hit rate, response time)
5// - Code execution (duration, success rate, memory)
6// - SDK generation (overhead, caching)
7// - Data filtering (reduction ratio, token savings)
8// - Skill usage (adoption, reuse patterns)
9// - PII detection (pattern matches, audit trail)
10
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13use std::fmt::Write;
14use std::sync::{Arc, Mutex};
15use std::time::Instant;
16
17pub mod discovery_metrics;
18pub mod execution_metrics;
19pub mod filtering_metrics;
20pub mod sdk_metrics;
21pub mod security_metrics;
22pub mod skill_metrics;
23
24pub use discovery_metrics::DiscoveryMetrics;
25pub use execution_metrics::ExecutionMetrics;
26pub use filtering_metrics::FilteringMetrics;
27pub use sdk_metrics::SdkMetrics;
28pub use security_metrics::SecurityMetrics;
29pub use skill_metrics::SkillMetrics;
30
31/// Central metrics collector for all MCP execution activities
32#[derive(Clone)]
33pub struct MetricsCollector {
34    discovery: Arc<Mutex<DiscoveryMetrics>>,
35    execution: Arc<Mutex<ExecutionMetrics>>,
36    sdk: Arc<Mutex<SdkMetrics>>,
37    filtering: Arc<Mutex<FilteringMetrics>>,
38    skills: Arc<Mutex<SkillMetrics>>,
39    security: Arc<Mutex<SecurityMetrics>>,
40    start_time: Instant,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct MetricsSummary {
45    pub timestamp: DateTime<Utc>,
46    pub session_duration_ms: u64,
47    pub discovery: DiscoveryMetrics,
48    pub execution: ExecutionMetrics,
49    pub sdk: SdkMetrics,
50    pub filtering: FilteringMetrics,
51    pub skills: SkillMetrics,
52    pub security: SecurityMetrics,
53}
54
55impl MetricsCollector {
56    /// Create a new metrics collector
57    pub fn new() -> Self {
58        Self {
59            discovery: Arc::new(Mutex::new(DiscoveryMetrics::new())),
60            execution: Arc::new(Mutex::new(ExecutionMetrics::new())),
61            sdk: Arc::new(Mutex::new(SdkMetrics::new())),
62            filtering: Arc::new(Mutex::new(FilteringMetrics::new())),
63            skills: Arc::new(Mutex::new(SkillMetrics::new())),
64            security: Arc::new(Mutex::new(SecurityMetrics::new())),
65            start_time: Instant::now(),
66        }
67    }
68
69    // ========== Discovery Metrics ==========
70
71    /// Record a tool discovery query
72    pub fn record_discovery_query(
73        &self,
74        keyword: String,
75        result_count: u64,
76        response_time_ms: u64,
77    ) {
78        if let Ok(mut metrics) = self.discovery.lock() {
79            metrics.record_query(keyword, result_count, response_time_ms);
80        }
81    }
82
83    /// Record a failed discovery query
84    pub fn record_discovery_failure(&self, keyword: String) {
85        if let Ok(mut metrics) = self.discovery.lock() {
86            metrics.record_failure(keyword);
87        }
88    }
89
90    /// Record a discovery cache hit
91    pub fn record_discovery_cache_hit(&self) {
92        if let Ok(mut metrics) = self.discovery.lock() {
93            metrics.record_cache_hit();
94        }
95    }
96
97    // ========== Execution Metrics ==========
98
99    /// Record the start of a code execution
100    pub fn record_execution_start(&self, language: String) {
101        if let Ok(mut metrics) = self.execution.lock() {
102            metrics.record_start(language);
103        }
104    }
105
106    /// Record successful execution completion
107    pub fn record_execution_complete(&self, language: String, duration_ms: u64, memory_mb: u64) {
108        if let Ok(mut metrics) = self.execution.lock() {
109            metrics.record_complete(language, duration_ms, memory_mb, true);
110        }
111    }
112
113    /// Record failed execution
114    pub fn record_execution_failure(&self, language: String, duration_ms: u64) {
115        if let Ok(mut metrics) = self.execution.lock() {
116            metrics.record_failure(language, duration_ms);
117        }
118    }
119
120    /// Record execution timeout
121    pub fn record_execution_timeout(&self, language: String, duration_ms: u64) {
122        if let Ok(mut metrics) = self.execution.lock() {
123            metrics.record_timeout(language, duration_ms);
124        }
125    }
126
127    /// Record a retry attempt for execution-related workflows.
128    pub fn record_retry_attempt(&self) {
129        if let Ok(mut metrics) = self.execution.lock() {
130            metrics.record_retry_attempt();
131        }
132    }
133
134    /// Record a successful retry that eventually recovered.
135    pub fn record_retry_success(&self) {
136        if let Ok(mut metrics) = self.execution.lock() {
137            metrics.record_retry_success();
138        }
139    }
140
141    /// Record an execution that exhausted all retry attempts.
142    pub fn record_retry_exhausted(&self) {
143        if let Ok(mut metrics) = self.execution.lock() {
144            metrics.record_retry_exhausted();
145        }
146    }
147
148    /// Record that a circuit breaker entered the open state.
149    pub fn record_circuit_open(&self) {
150        if let Ok(mut metrics) = self.execution.lock() {
151            metrics.record_circuit_open();
152        }
153    }
154
155    /// Record that a circuit breaker transitioned to half-open.
156    pub fn record_half_open(&self) {
157        if let Ok(mut metrics) = self.execution.lock() {
158            metrics.record_half_open();
159        }
160    }
161
162    /// Record a denied request caused by an open circuit breaker.
163    pub fn record_breaker_denial(&self) {
164        if let Ok(mut metrics) = self.execution.lock() {
165            metrics.record_breaker_denial();
166        }
167    }
168
169    /// Record result size for filtering calculation
170    pub fn record_result_size(&self, size_bytes: usize) {
171        if let Ok(mut metrics) = self.execution.lock() {
172            metrics.record_result_size(size_bytes);
173        }
174    }
175
176    // ========== SDK Metrics ==========
177
178    /// Record SDK generation
179    pub fn record_sdk_generation(&self, generation_time_ms: u64, tools_count: u64) {
180        if let Ok(mut metrics) = self.sdk.lock() {
181            metrics.record_generation(generation_time_ms, tools_count);
182        }
183    }
184
185    /// Record SDK cache utilization
186    pub fn record_sdk_cache_hit(&self) {
187        if let Ok(mut metrics) = self.sdk.lock() {
188            metrics.record_cache_hit();
189        }
190    }
191
192    // ========== Filtering Metrics ==========
193
194    /// Record a filtering operation
195    pub fn record_filtering_operation(
196        &self,
197        operation_type: String,
198        input_size: u64,
199        output_size: u64,
200        duration_ms: u64,
201    ) {
202        if let Ok(mut metrics) = self.filtering.lock() {
203            metrics.record_operation(operation_type, input_size, output_size, duration_ms);
204        }
205    }
206
207    // ========== Skill Metrics ==========
208
209    /// Record skill execution
210    pub fn record_skill_execution(&self, skill_name: String, duration_ms: u64, success: bool) {
211        if let Ok(mut metrics) = self.skills.lock() {
212            metrics.record_execution(skill_name, duration_ms, success);
213        }
214    }
215
216    /// Record skill creation
217    pub fn record_skill_created(&self, skill_name: String, language: String) {
218        if let Ok(mut metrics) = self.skills.lock() {
219            metrics.record_created(skill_name, language);
220        }
221    }
222
223    /// Record skill deletion
224    pub fn record_skill_deleted(&self, skill_name: String) {
225        if let Ok(mut metrics) = self.skills.lock() {
226            metrics.record_deleted(skill_name);
227        }
228    }
229
230    // ========== Security Metrics ==========
231
232    /// Record PII pattern detection
233    pub fn record_pii_detection(&self, pattern_type: String) {
234        if let Ok(mut metrics) = self.security.lock() {
235            metrics.record_detection(pattern_type);
236        }
237    }
238
239    /// Record tokenization
240    pub fn record_pii_tokenization(&self, token_count: usize) {
241        if let Ok(mut metrics) = self.security.lock() {
242            metrics.record_tokenization(token_count);
243        }
244    }
245
246    /// Record audit event
247    pub fn record_audit_event(&self, event_type: String, severity: String) {
248        if let Ok(mut metrics) = self.security.lock() {
249            metrics.record_audit_event(event_type, severity);
250        }
251    }
252
253    // ========== Queries ==========
254
255    /// Get current discovery metrics snapshot
256    pub fn get_discovery_metrics(&self) -> DiscoveryMetrics {
257        self.discovery
258            .lock()
259            .map(|m| m.clone())
260            .unwrap_or_else(|_| DiscoveryMetrics::new())
261    }
262
263    /// Get current execution metrics snapshot
264    pub fn get_execution_metrics(&self) -> ExecutionMetrics {
265        self.execution
266            .lock()
267            .map(|m| m.clone())
268            .unwrap_or_else(|_| ExecutionMetrics::new())
269    }
270
271    /// Get current SDK metrics snapshot
272    pub fn get_sdk_metrics(&self) -> SdkMetrics {
273        self.sdk
274            .lock()
275            .map(|m| m.clone())
276            .unwrap_or_else(|_| SdkMetrics::new())
277    }
278
279    /// Get current filtering metrics snapshot
280    pub fn get_filtering_metrics(&self) -> FilteringMetrics {
281        self.filtering
282            .lock()
283            .map(|m| m.clone())
284            .unwrap_or_else(|_| FilteringMetrics::new())
285    }
286
287    /// Get current skill metrics snapshot
288    pub fn get_skill_metrics(&self) -> SkillMetrics {
289        self.skills
290            .lock()
291            .map(|m| m.clone())
292            .unwrap_or_else(|_| SkillMetrics::new())
293    }
294
295    /// Get current security metrics snapshot
296    pub fn get_security_metrics(&self) -> SecurityMetrics {
297        self.security
298            .lock()
299            .map(|m| m.clone())
300            .unwrap_or_else(|_| SecurityMetrics::new())
301    }
302
303    /// Get comprehensive summary of all metrics
304    pub fn get_summary(&self) -> MetricsSummary {
305        MetricsSummary {
306            timestamp: Utc::now(),
307            session_duration_ms: self.start_time.elapsed().as_millis().min(u64::MAX as u128) as u64,
308            discovery: self.get_discovery_metrics(),
309            execution: self.get_execution_metrics(),
310            sdk: self.get_sdk_metrics(),
311            filtering: self.get_filtering_metrics(),
312            skills: self.get_skill_metrics(),
313            security: self.get_security_metrics(),
314        }
315    }
316
317    // ========== Export ==========
318
319    /// Export metrics as JSON
320    pub fn export_json(&self) -> anyhow::Result<serde_json::Value> {
321        let summary = self.get_summary();
322        Ok(serde_json::to_value(summary)?)
323    }
324
325    /// Export metrics in Prometheus format
326    pub fn export_prometheus(&self) -> String {
327        let discovery = self.get_discovery_metrics();
328        let execution = self.get_execution_metrics();
329        let filtering = self.get_filtering_metrics();
330        let skills = self.get_skill_metrics();
331        let security = self.get_security_metrics();
332
333        let mut output = String::new();
334
335        // Discovery metrics
336        let _ = write!(
337            output,
338            "# HELP vtcode_discovery_queries_total Total tool discovery queries\n\
339             # TYPE vtcode_discovery_queries_total counter\n\
340             vtcode_discovery_queries_total {}\n\n",
341            discovery.total_queries
342        );
343
344        let _ = write!(
345            output,
346            "# HELP vtcode_discovery_hit_rate Hit rate of discovery queries\n\
347             # TYPE vtcode_discovery_hit_rate gauge\n\
348             vtcode_discovery_hit_rate {}\n\n",
349            discovery.hit_rate()
350        );
351
352        // Execution metrics
353        let _ = write!(
354            output,
355            "# HELP vtcode_execution_total Total code executions\n\
356             # TYPE vtcode_execution_total counter\n\
357             vtcode_execution_total {}\n\n",
358            execution.total_executions
359        );
360
361        let _ = write!(
362            output,
363            "# HELP vtcode_execution_duration_ms Code execution average duration\n\
364             # TYPE vtcode_execution_duration_ms gauge\n\
365             vtcode_execution_duration_ms {}\n\n",
366            execution.avg_duration_ms()
367        );
368
369        let _ = write!(
370            output,
371            "# HELP vtcode_retry_attempts_total Total retry attempts\n\
372             # TYPE vtcode_retry_attempts_total counter\n\
373             vtcode_retry_attempts_total {}\n\n",
374            execution.retry_attempts
375        );
376
377        let _ = write!(
378            output,
379            "# HELP vtcode_retry_successes_total Total retries that later succeeded\n\
380             # TYPE vtcode_retry_successes_total counter\n\
381             vtcode_retry_successes_total {}\n\n",
382            execution.retry_successes
383        );
384
385        let _ = write!(
386            output,
387            "# HELP vtcode_retry_exhausted_total Total operations that exhausted retries\n\
388             # TYPE vtcode_retry_exhausted_total counter\n\
389             vtcode_retry_exhausted_total {}\n\n",
390            execution.retry_exhausted
391        );
392
393        let _ = write!(
394            output,
395            "# HELP vtcode_circuit_open_total Total circuit breaker open transitions\n\
396             # TYPE vtcode_circuit_open_total counter\n\
397             vtcode_circuit_open_total {}\n\n",
398            execution.circuit_open_events
399        );
400
401        let _ = write!(
402            output,
403            "# HELP vtcode_circuit_half_open_total Total circuit breaker half-open transitions\n\
404             # TYPE vtcode_circuit_half_open_total counter\n\
405             vtcode_circuit_half_open_total {}\n\n",
406            execution.half_open_events
407        );
408
409        let _ = write!(
410            output,
411            "# HELP vtcode_circuit_breaker_denials_total Total circuit breaker denials\n\
412             # TYPE vtcode_circuit_breaker_denials_total counter\n\
413             vtcode_circuit_breaker_denials_total {}\n\n",
414            execution.breaker_denials
415        );
416
417        // Filtering metrics
418        let _ = write!(
419            output,
420            "# HELP vtcode_filtering_operations_total Total filtering operations\n\
421             # TYPE vtcode_filtering_operations_total counter\n\
422             vtcode_filtering_operations_total {}\n\n",
423            filtering.total_operations
424        );
425
426        let _ = write!(
427            output,
428            "# HELP vtcode_context_tokens_saved Estimated tokens saved by filtering\n\
429             # TYPE vtcode_context_tokens_saved counter\n\
430             vtcode_context_tokens_saved {}\n\n",
431            filtering.estimated_tokens_saved()
432        );
433
434        // Skills metrics
435        let _ = write!(
436            output,
437            "# HELP vtcode_skills_total Total saved skills\n\
438             # TYPE vtcode_skills_total gauge\n\
439             vtcode_skills_total {}\n\n",
440            skills.total_skills
441        );
442
443        let _ = write!(
444            output,
445            "# HELP vtcode_skill_reuse_ratio Ratio of skill reuse\n\
446             # TYPE vtcode_skill_reuse_ratio gauge\n\
447             vtcode_skill_reuse_ratio {}\n\n",
448            skills.reuse_ratio()
449        );
450
451        // Security metrics
452        let _ = write!(
453            output,
454            "# HELP vtcode_pii_detections_total Total PII patterns detected\n\
455             # TYPE vtcode_pii_detections_total counter\n\
456             vtcode_pii_detections_total {}\n\n",
457            security.pii_detections
458        );
459
460        let _ = write!(
461            output,
462            "# HELP vtcode_tokens_created_total Total PII tokens created\n\
463             # TYPE vtcode_tokens_created_total counter\n\
464             vtcode_tokens_created_total {}\n\n",
465            security.tokens_created
466        );
467
468        output
469    }
470}
471
472impl Default for MetricsCollector {
473    fn default() -> Self {
474        Self::new()
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481
482    #[test]
483    fn test_metrics_collector_creation() {
484        let collector = MetricsCollector::new();
485        let summary = collector.get_summary();
486        assert_eq!(summary.discovery.total_queries, 0);
487        assert_eq!(summary.execution.total_executions, 0);
488    }
489
490    #[test]
491    fn test_discovery_metrics_recording() {
492        let collector = MetricsCollector::new();
493        collector.record_discovery_query("file".to_owned(), 5, 50);
494
495        let metrics = collector.get_discovery_metrics();
496        assert_eq!(metrics.total_queries, 1);
497        assert!(metrics.avg_response_time_ms() > 0);
498    }
499
500    #[test]
501    fn test_execution_metrics_recording() {
502        let collector = MetricsCollector::new();
503        collector.record_execution_start("python3".to_owned());
504        collector.record_execution_complete("python3".to_owned(), 1000, 50);
505
506        let metrics = collector.get_execution_metrics();
507        assert_eq!(metrics.total_executions, 1);
508        assert_eq!(metrics.successful_executions, 1);
509        assert_eq!(metrics.avg_duration_ms(), 1000);
510    }
511
512    #[test]
513    fn test_metrics_summary_export() {
514        let collector = MetricsCollector::new();
515        collector.record_discovery_query("test".to_owned(), 3, 30);
516        collector.record_pii_detection("email".to_owned());
517
518        let summary = collector.get_summary();
519        assert_eq!(summary.discovery.total_queries, 1);
520        assert_eq!(summary.security.pii_detections, 1);
521    }
522
523    #[test]
524    fn test_reliability_metrics_recording() {
525        let collector = MetricsCollector::new();
526        collector.record_retry_attempt();
527        collector.record_retry_success();
528        collector.record_circuit_open();
529        collector.record_half_open();
530        collector.record_breaker_denial();
531
532        let metrics = collector.get_execution_metrics();
533        assert_eq!(metrics.retry_attempts, 1);
534        assert_eq!(metrics.retry_successes, 1);
535        assert_eq!(metrics.circuit_open_events, 1);
536        assert_eq!(metrics.half_open_events, 1);
537        assert_eq!(metrics.breaker_denials, 1);
538    }
539
540    #[test]
541    fn test_prometheus_export() {
542        let collector = MetricsCollector::new();
543        collector.record_execution_complete("python3".to_owned(), 500, 40);
544
545        let prometheus = collector.export_prometheus();
546        assert!(prometheus.contains("vtcode_execution_total"));
547        assert!(prometheus.contains("vtcode_execution_duration_ms"));
548        assert!(prometheus.contains("vtcode_retry_attempts_total"));
549    }
550
551    #[test]
552    fn test_json_export() {
553        let collector = MetricsCollector::new();
554        collector.record_discovery_query("test".to_owned(), 2, 25);
555
556        let json = collector.export_json().unwrap();
557        assert!(json.get("timestamp").is_some());
558        assert!(json.get("discovery").is_some());
559    }
560}