Skip to main content

aster/agents/monitor/
metrics.rs

1//! Agent Monitor
2//!
3//! Tracks agent execution metrics including duration,
4//! tokens, API calls, tool calls, cost, and errors.
5//!
6//! This module provides:
7//! - Agent execution tracking with start/stop lifecycle
8//! - Token usage recording
9//! - API call tracking with latency
10//! - Tool call metrics with input/output sizes
11//! - Cost tracking
12//! - Error recording with context
13//! - Metrics persistence to disk
14//! - Aggregated statistics across all agents
15
16use chrono::{DateTime, Utc};
17use serde::{Deserialize, Serialize};
18use std::collections::HashMap;
19use std::path::PathBuf;
20use std::time::Duration;
21
22use super::alerts::{AgentExecutionStatus, ErrorRecord, TokenUsage};
23
24/// Tool call metric for tracking individual tool executions
25#[derive(Debug, Clone, Serialize, Deserialize)]
26#[serde(rename_all = "camelCase")]
27pub struct ToolCallMetric {
28    /// Unique identifier for this tool call
29    pub id: String,
30    /// Name of the tool
31    pub tool_name: String,
32    /// Start time of the tool call
33    pub start_time: DateTime<Utc>,
34    /// End time of the tool call (if completed)
35    pub end_time: Option<DateTime<Utc>>,
36    /// Duration of the tool call (if completed)
37    pub duration: Option<Duration>,
38    /// Whether the tool call succeeded
39    pub success: bool,
40    /// Error message if failed
41    pub error: Option<String>,
42    /// Input size in bytes
43    pub input_size: Option<usize>,
44    /// Output size in bytes
45    pub output_size: Option<usize>,
46}
47
48impl ToolCallMetric {
49    /// Create a new tool call metric
50    pub fn new(tool_name: impl Into<String>) -> Self {
51        Self {
52            id: uuid::Uuid::new_v4().to_string(),
53            tool_name: tool_name.into(),
54            start_time: Utc::now(),
55            end_time: None,
56            duration: None,
57            success: false,
58            error: None,
59            input_size: None,
60            output_size: None,
61        }
62    }
63
64    /// Set input size
65    pub fn with_input_size(mut self, size: usize) -> Self {
66        self.input_size = Some(size);
67        self
68    }
69
70    /// Complete the tool call
71    pub fn complete(&mut self, success: bool, error: Option<String>) {
72        self.end_time = Some(Utc::now());
73        self.success = success;
74        self.error = error;
75        if let Some(end) = self.end_time {
76            let elapsed = end.signed_duration_since(self.start_time);
77            self.duration = elapsed.to_std().ok();
78        }
79    }
80
81    /// Set output size
82    pub fn set_output_size(&mut self, size: usize) {
83        self.output_size = Some(size);
84    }
85}
86
87/// Performance metrics for an agent
88#[derive(Debug, Clone, Default, Serialize, Deserialize)]
89#[serde(rename_all = "camelCase")]
90pub struct PerformanceMetrics {
91    /// Average API call latency
92    pub avg_api_latency: Option<Duration>,
93    /// Average tool call duration
94    pub avg_tool_duration: Option<Duration>,
95    /// Tokens per second
96    pub tokens_per_second: Option<f64>,
97    /// API calls per minute
98    pub api_calls_per_minute: Option<f64>,
99}
100
101/// Full agent metrics for monitoring
102#[derive(Debug, Clone, Serialize, Deserialize)]
103#[serde(rename_all = "camelCase")]
104pub struct FullAgentMetrics {
105    /// Agent ID
106    pub agent_id: String,
107    /// Agent type
108    pub agent_type: String,
109    /// Optional description
110    pub description: Option<String>,
111    /// Start time
112    pub start_time: DateTime<Utc>,
113    /// End time (if completed)
114    pub end_time: Option<DateTime<Utc>>,
115    /// Duration (if completed)
116    #[serde(with = "optional_duration_serde")]
117    pub duration: Option<Duration>,
118    /// Execution status
119    pub status: AgentExecutionStatus,
120    /// Token usage
121    pub tokens_used: TokenUsage,
122    /// Number of API calls
123    pub api_calls: usize,
124    /// Number of successful API calls
125    pub api_calls_successful: usize,
126    /// Tool call metrics
127    pub tool_calls: Vec<ToolCallMetric>,
128    /// Total cost
129    pub cost: f64,
130    /// Errors encountered
131    pub errors: Vec<ErrorRecord>,
132    /// Performance metrics
133    pub performance: PerformanceMetrics,
134    /// Configured timeout
135    #[serde(with = "optional_duration_serde")]
136    pub timeout: Option<Duration>,
137    /// API call latencies for calculating averages
138    #[serde(skip)]
139    api_latencies: Vec<Duration>,
140}
141
142/// Custom serialization for Option<Duration>
143mod optional_duration_serde {
144    use serde::{Deserialize, Deserializer, Serialize, Serializer};
145    use std::time::Duration;
146
147    #[derive(Serialize, Deserialize)]
148    struct DurationMs(u64);
149
150    pub fn serialize<S>(duration: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
151    where
152        S: Serializer,
153    {
154        match duration {
155            Some(d) => serializer.serialize_some(&DurationMs(d.as_millis() as u64)),
156            None => serializer.serialize_none(),
157        }
158    }
159
160    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Duration>, D::Error>
161    where
162        D: Deserializer<'de>,
163    {
164        let opt: Option<DurationMs> = Option::deserialize(deserializer)?;
165        Ok(opt.map(|d| Duration::from_millis(d.0)))
166    }
167}
168
169impl FullAgentMetrics {
170    /// Create new agent metrics
171    pub fn new(agent_id: impl Into<String>, agent_type: impl Into<String>) -> Self {
172        Self {
173            agent_id: agent_id.into(),
174            agent_type: agent_type.into(),
175            description: None,
176            start_time: Utc::now(),
177            end_time: None,
178            duration: None,
179            status: AgentExecutionStatus::Running,
180            tokens_used: TokenUsage::default(),
181            api_calls: 0,
182            api_calls_successful: 0,
183            tool_calls: Vec::new(),
184            cost: 0.0,
185            errors: Vec::new(),
186            performance: PerformanceMetrics::default(),
187            timeout: None,
188            api_latencies: Vec::new(),
189        }
190    }
191
192    /// Set description
193    pub fn with_description(mut self, description: impl Into<String>) -> Self {
194        self.description = Some(description.into());
195        self
196    }
197
198    /// Set timeout
199    pub fn with_timeout(mut self, timeout: Duration) -> Self {
200        self.timeout = Some(timeout);
201        self
202    }
203
204    /// Record token usage
205    pub fn record_tokens(&mut self, input: usize, output: usize) {
206        self.tokens_used.input += input;
207        self.tokens_used.output += output;
208        self.tokens_used.total = self.tokens_used.input + self.tokens_used.output;
209    }
210
211    /// Record an API call
212    pub fn record_api_call(&mut self, success: bool, latency: Option<Duration>) {
213        self.api_calls += 1;
214        if success {
215            self.api_calls_successful += 1;
216        }
217        if let Some(lat) = latency {
218            self.api_latencies.push(lat);
219        }
220    }
221
222    /// Record cost
223    pub fn record_cost(&mut self, cost: f64) {
224        self.cost += cost;
225    }
226
227    /// Record an error
228    pub fn record_error(&mut self, message: impl Into<String>, phase: Option<&str>) {
229        let mut error = ErrorRecord::new(message);
230        if let Some(p) = phase {
231            error = error.with_phase(p);
232        }
233        self.errors.push(error);
234    }
235
236    /// Add a tool call metric
237    pub fn add_tool_call(&mut self, metric: ToolCallMetric) {
238        self.tool_calls.push(metric);
239    }
240
241    /// Complete the metrics tracking
242    pub fn complete(&mut self, status: AgentExecutionStatus) {
243        self.end_time = Some(Utc::now());
244        self.status = status;
245        if let Some(end) = self.end_time {
246            let elapsed = end.signed_duration_since(self.start_time);
247            self.duration = elapsed.to_std().ok();
248        }
249        self.calculate_performance();
250    }
251
252    /// Calculate performance metrics
253    fn calculate_performance(&mut self) {
254        // Average API latency
255        if !self.api_latencies.is_empty() {
256            let total: Duration = self.api_latencies.iter().sum();
257            self.performance.avg_api_latency = Some(total / self.api_latencies.len() as u32);
258        }
259
260        // Average tool duration
261        let completed_tools: Vec<_> = self.tool_calls.iter().filter_map(|t| t.duration).collect();
262        if !completed_tools.is_empty() {
263            let total: Duration = completed_tools.iter().sum();
264            self.performance.avg_tool_duration = Some(total / completed_tools.len() as u32);
265        }
266
267        // Tokens per second
268        if let Some(duration) = self.duration {
269            let secs = duration.as_secs_f64();
270            if secs > 0.0 {
271                self.performance.tokens_per_second = Some(self.tokens_used.total as f64 / secs);
272            }
273        }
274
275        // API calls per minute
276        if let Some(duration) = self.duration {
277            let mins = duration.as_secs_f64() / 60.0;
278            if mins > 0.0 {
279                self.performance.api_calls_per_minute = Some(self.api_calls as f64 / mins);
280            }
281        }
282    }
283
284    /// Calculate error rate
285    pub fn error_rate(&self) -> f32 {
286        if self.api_calls == 0 {
287            0.0
288        } else {
289            (self.api_calls - self.api_calls_successful) as f32 / self.api_calls as f32
290        }
291    }
292
293    /// Check if the agent has timed out
294    pub fn is_timed_out(&self) -> bool {
295        if let Some(timeout) = self.timeout {
296            if let Some(duration) = self.duration {
297                return duration > timeout;
298            }
299            let elapsed = Utc::now().signed_duration_since(self.start_time);
300            if let Ok(elapsed_std) = elapsed.to_std() {
301                return elapsed_std > timeout;
302            }
303        }
304        false
305    }
306}
307
308/// Monitor configuration
309#[derive(Debug, Clone, Serialize, Deserialize)]
310#[serde(rename_all = "camelCase")]
311pub struct MonitorConfig {
312    /// Whether to track tool calls
313    pub track_tool_calls: bool,
314    /// Whether to track API latencies
315    pub track_api_latencies: bool,
316    /// Whether to persist metrics automatically
317    pub auto_persist: bool,
318    /// Maximum number of metrics to keep in memory
319    pub max_metrics_in_memory: usize,
320    /// Directory for persisting metrics
321    pub metrics_dir: Option<PathBuf>,
322}
323
324impl Default for MonitorConfig {
325    fn default() -> Self {
326        Self {
327            track_tool_calls: true,
328            track_api_latencies: true,
329            auto_persist: false,
330            max_metrics_in_memory: 1000,
331            metrics_dir: None,
332        }
333    }
334}
335
336/// Aggregated statistics across all agents
337#[derive(Debug, Clone, Default, Serialize, Deserialize)]
338#[serde(rename_all = "camelCase")]
339pub struct AggregatedStats {
340    /// Total number of agents tracked
341    pub total_agents: usize,
342    /// Number of completed agents
343    pub completed_agents: usize,
344    /// Number of failed agents
345    pub failed_agents: usize,
346    /// Number of running agents
347    pub running_agents: usize,
348    /// Total tokens used
349    pub total_tokens: usize,
350    /// Total API calls
351    pub total_api_calls: usize,
352    /// Total tool calls
353    pub total_tool_calls: usize,
354    /// Total cost
355    pub total_cost: f64,
356    /// Total errors
357    pub total_errors: usize,
358    /// Average duration (for completed agents)
359    pub avg_duration: Option<Duration>,
360    /// Average tokens per agent
361    pub avg_tokens_per_agent: f64,
362    /// Overall error rate
363    pub overall_error_rate: f32,
364}
365
366/// Agent Monitor for tracking agent execution metrics
367#[derive(Debug)]
368pub struct AgentMonitor {
369    /// Configuration
370    config: MonitorConfig,
371    /// Metrics indexed by agent ID
372    metrics: HashMap<String, FullAgentMetrics>,
373    /// Active tool calls indexed by tool call ID
374    active_tool_calls: HashMap<String, (String, ToolCallMetric)>, // (agent_id, metric)
375    /// Directory for persisting metrics
376    metrics_dir: PathBuf,
377}
378
379impl Default for AgentMonitor {
380    fn default() -> Self {
381        Self::new(None)
382    }
383}
384
385impl AgentMonitor {
386    /// Create a new AgentMonitor
387    pub fn new(config: Option<MonitorConfig>) -> Self {
388        let config = config.unwrap_or_default();
389        let metrics_dir = config
390            .metrics_dir
391            .clone()
392            .unwrap_or_else(|| PathBuf::from(".aster/metrics"));
393
394        Self {
395            config,
396            metrics: HashMap::new(),
397            active_tool_calls: HashMap::new(),
398            metrics_dir,
399        }
400    }
401
402    /// Start tracking an agent
403    pub fn start_tracking(&mut self, agent_id: &str, agent_type: &str, description: Option<&str>) {
404        let mut metrics = FullAgentMetrics::new(agent_id, agent_type);
405        if let Some(desc) = description {
406            metrics = metrics.with_description(desc);
407        }
408        self.metrics.insert(agent_id.to_string(), metrics);
409    }
410
411    /// Start tracking an agent with timeout
412    pub fn start_tracking_with_timeout(
413        &mut self,
414        agent_id: &str,
415        agent_type: &str,
416        description: Option<&str>,
417        timeout: Duration,
418    ) {
419        let mut metrics = FullAgentMetrics::new(agent_id, agent_type).with_timeout(timeout);
420        if let Some(desc) = description {
421            metrics = metrics.with_description(desc);
422        }
423        self.metrics.insert(agent_id.to_string(), metrics);
424    }
425
426    /// Start a tool call and return its ID
427    pub fn start_tool_call(
428        &mut self,
429        agent_id: &str,
430        tool_name: &str,
431        input_size: Option<usize>,
432    ) -> String {
433        if !self.config.track_tool_calls {
434            return String::new();
435        }
436
437        let mut metric = ToolCallMetric::new(tool_name);
438        if let Some(size) = input_size {
439            metric = metric.with_input_size(size);
440        }
441        let id = metric.id.clone();
442        self.active_tool_calls
443            .insert(id.clone(), (agent_id.to_string(), metric));
444        id
445    }
446
447    /// End a tool call
448    pub fn end_tool_call(
449        &mut self,
450        agent_id: &str,
451        tool_call_id: &str,
452        success: bool,
453        error: Option<&str>,
454        output_size: Option<usize>,
455    ) {
456        if !self.config.track_tool_calls {
457            return;
458        }
459
460        if let Some((stored_agent_id, mut metric)) = self.active_tool_calls.remove(tool_call_id) {
461            if stored_agent_id != agent_id {
462                // Mismatch, put it back
463                self.active_tool_calls
464                    .insert(tool_call_id.to_string(), (stored_agent_id, metric));
465                return;
466            }
467
468            metric.complete(success, error.map(String::from));
469            if let Some(size) = output_size {
470                metric.set_output_size(size);
471            }
472
473            if let Some(agent_metrics) = self.metrics.get_mut(agent_id) {
474                agent_metrics.add_tool_call(metric);
475            }
476        }
477    }
478
479    /// Record token usage for an agent
480    pub fn record_tokens(&mut self, agent_id: &str, input: usize, output: usize) {
481        if let Some(metrics) = self.metrics.get_mut(agent_id) {
482            metrics.record_tokens(input, output);
483        }
484    }
485
486    /// Record an API call for an agent
487    pub fn record_api_call(&mut self, agent_id: &str, success: bool, latency: Option<Duration>) {
488        if let Some(metrics) = self.metrics.get_mut(agent_id) {
489            let lat = if self.config.track_api_latencies {
490                latency
491            } else {
492                None
493            };
494            metrics.record_api_call(success, lat);
495        }
496    }
497
498    /// Record cost for an agent
499    pub fn record_cost(&mut self, agent_id: &str, cost: f64) {
500        if let Some(metrics) = self.metrics.get_mut(agent_id) {
501            metrics.record_cost(cost);
502        }
503    }
504
505    /// Record an error for an agent
506    pub fn record_error(&mut self, agent_id: &str, error: &str, phase: Option<&str>) {
507        if let Some(metrics) = self.metrics.get_mut(agent_id) {
508            metrics.record_error(error, phase);
509        }
510    }
511
512    /// Stop tracking an agent
513    pub fn stop_tracking(&mut self, agent_id: &str, status: AgentExecutionStatus) {
514        if let Some(metrics) = self.metrics.get_mut(agent_id) {
515            metrics.complete(status);
516
517            if self.config.auto_persist {
518                let _ = self.persist_metrics(agent_id);
519            }
520        }
521    }
522
523    /// Get metrics for an agent
524    pub fn get_metrics(&self, agent_id: &str) -> Option<&FullAgentMetrics> {
525        self.metrics.get(agent_id)
526    }
527
528    /// Get mutable metrics for an agent
529    pub fn get_metrics_mut(&mut self, agent_id: &str) -> Option<&mut FullAgentMetrics> {
530        self.metrics.get_mut(agent_id)
531    }
532
533    /// Get all metrics
534    pub fn get_all_metrics(&self) -> Vec<&FullAgentMetrics> {
535        self.metrics.values().collect()
536    }
537
538    /// Get metrics by status
539    pub fn get_metrics_by_status(&self, status: AgentExecutionStatus) -> Vec<&FullAgentMetrics> {
540        self.metrics
541            .values()
542            .filter(|m| m.status == status)
543            .collect()
544    }
545
546    /// Remove metrics for an agent
547    pub fn remove_metrics(&mut self, agent_id: &str) -> Option<FullAgentMetrics> {
548        self.metrics.remove(agent_id)
549    }
550
551    /// Clear all metrics
552    pub fn clear(&mut self) {
553        self.metrics.clear();
554        self.active_tool_calls.clear();
555    }
556
557    /// Get aggregated statistics
558    pub fn get_aggregated_stats(&self) -> AggregatedStats {
559        let mut stats = AggregatedStats {
560            total_agents: self.metrics.len(),
561            ..Default::default()
562        };
563
564        let mut total_duration = Duration::ZERO;
565        let mut completed_count = 0usize;
566
567        for metrics in self.metrics.values() {
568            match metrics.status {
569                AgentExecutionStatus::Completed => {
570                    stats.completed_agents += 1;
571                    if let Some(d) = metrics.duration {
572                        total_duration += d;
573                        completed_count += 1;
574                    }
575                }
576                AgentExecutionStatus::Failed | AgentExecutionStatus::TimedOut => {
577                    stats.failed_agents += 1;
578                }
579                AgentExecutionStatus::Running => {
580                    stats.running_agents += 1;
581                }
582                AgentExecutionStatus::Cancelled => {}
583            }
584
585            stats.total_tokens += metrics.tokens_used.total;
586            stats.total_api_calls += metrics.api_calls;
587            stats.total_tool_calls += metrics.tool_calls.len();
588            stats.total_cost += metrics.cost;
589            stats.total_errors += metrics.errors.len();
590        }
591
592        if completed_count > 0 {
593            stats.avg_duration = Some(total_duration / completed_count as u32);
594        }
595
596        if stats.total_agents > 0 {
597            stats.avg_tokens_per_agent = stats.total_tokens as f64 / stats.total_agents as f64;
598        }
599
600        let total_successful: usize = self.metrics.values().map(|m| m.api_calls_successful).sum();
601        if stats.total_api_calls > 0 {
602            stats.overall_error_rate =
603                (stats.total_api_calls - total_successful) as f32 / stats.total_api_calls as f32;
604        }
605
606        stats
607    }
608
609    /// Persist metrics for an agent to disk
610    pub fn persist_metrics(&self, agent_id: &str) -> std::io::Result<()> {
611        let metrics = match self.metrics.get(agent_id) {
612            Some(m) => m,
613            None => return Ok(()),
614        };
615
616        std::fs::create_dir_all(&self.metrics_dir)?;
617
618        let file_path = self.metrics_dir.join(format!("{}.json", agent_id));
619        let json = serde_json::to_string_pretty(metrics)
620            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
621        std::fs::write(file_path, json)?;
622
623        Ok(())
624    }
625
626    /// Load metrics for an agent from disk
627    pub fn load_metrics(&mut self, agent_id: &str) -> std::io::Result<Option<FullAgentMetrics>> {
628        let file_path = self.metrics_dir.join(format!("{}.json", agent_id));
629
630        if !file_path.exists() {
631            return Ok(None);
632        }
633
634        let json = std::fs::read_to_string(&file_path)?;
635        let metrics: FullAgentMetrics = serde_json::from_str(&json)
636            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
637
638        self.metrics.insert(agent_id.to_string(), metrics.clone());
639        Ok(Some(metrics))
640    }
641
642    /// List all persisted metrics
643    pub fn list_persisted_metrics(&self) -> std::io::Result<Vec<String>> {
644        if !self.metrics_dir.exists() {
645            return Ok(Vec::new());
646        }
647
648        let mut agent_ids = Vec::new();
649        for entry in std::fs::read_dir(&self.metrics_dir)? {
650            let entry = entry?;
651            let path = entry.path();
652            if path.extension().is_some_and(|ext| ext == "json") {
653                if let Some(stem) = path.file_stem() {
654                    agent_ids.push(stem.to_string_lossy().to_string());
655                }
656            }
657        }
658
659        Ok(agent_ids)
660    }
661
662    /// Delete persisted metrics for an agent
663    pub fn delete_persisted_metrics(&self, agent_id: &str) -> std::io::Result<bool> {
664        let file_path = self.metrics_dir.join(format!("{}.json", agent_id));
665
666        if file_path.exists() {
667            std::fs::remove_file(file_path)?;
668            Ok(true)
669        } else {
670            Ok(false)
671        }
672    }
673
674    /// Get the number of tracked agents
675    pub fn agent_count(&self) -> usize {
676        self.metrics.len()
677    }
678
679    /// Get the number of active tool calls
680    pub fn active_tool_call_count(&self) -> usize {
681        self.active_tool_calls.len()
682    }
683
684    /// Update configuration
685    pub fn set_config(&mut self, config: MonitorConfig) {
686        if let Some(dir) = &config.metrics_dir {
687            self.metrics_dir = dir.clone();
688        }
689        self.config = config;
690    }
691
692    /// Get current configuration
693    pub fn config(&self) -> &MonitorConfig {
694        &self.config
695    }
696
697    /// Set metrics directory
698    pub fn set_metrics_dir(&mut self, dir: PathBuf) {
699        self.metrics_dir = dir;
700    }
701
702    /// Get metrics directory
703    pub fn metrics_dir(&self) -> &PathBuf {
704        &self.metrics_dir
705    }
706}
707
708#[cfg(test)]
709mod tests {
710    use super::*;
711
712    #[test]
713    fn test_tool_call_metric_creation() {
714        let metric = ToolCallMetric::new("test_tool");
715
716        assert!(!metric.id.is_empty());
717        assert_eq!(metric.tool_name, "test_tool");
718        assert!(!metric.success);
719        assert!(metric.end_time.is_none());
720        assert!(metric.duration.is_none());
721    }
722
723    #[test]
724    fn test_tool_call_metric_complete() {
725        let mut metric = ToolCallMetric::new("test_tool");
726        std::thread::sleep(std::time::Duration::from_millis(10));
727        metric.complete(true, None);
728
729        assert!(metric.success);
730        assert!(metric.end_time.is_some());
731        assert!(metric.duration.is_some());
732        assert!(metric.error.is_none());
733    }
734
735    #[test]
736    fn test_tool_call_metric_with_error() {
737        let mut metric = ToolCallMetric::new("test_tool");
738        metric.complete(false, Some("Test error".to_string()));
739
740        assert!(!metric.success);
741        assert_eq!(metric.error, Some("Test error".to_string()));
742    }
743
744    #[test]
745    fn test_full_agent_metrics_creation() {
746        let metrics = FullAgentMetrics::new("agent-1", "test_agent");
747
748        assert_eq!(metrics.agent_id, "agent-1");
749        assert_eq!(metrics.agent_type, "test_agent");
750        assert_eq!(metrics.status, AgentExecutionStatus::Running);
751        assert_eq!(metrics.tokens_used.total, 0);
752        assert_eq!(metrics.api_calls, 0);
753        assert!(metrics.tool_calls.is_empty());
754        assert_eq!(metrics.cost, 0.0);
755        assert!(metrics.errors.is_empty());
756    }
757
758    #[test]
759    fn test_full_agent_metrics_record_tokens() {
760        let mut metrics = FullAgentMetrics::new("agent-1", "test");
761
762        metrics.record_tokens(100, 50);
763        assert_eq!(metrics.tokens_used.input, 100);
764        assert_eq!(metrics.tokens_used.output, 50);
765        assert_eq!(metrics.tokens_used.total, 150);
766
767        metrics.record_tokens(50, 25);
768        assert_eq!(metrics.tokens_used.input, 150);
769        assert_eq!(metrics.tokens_used.output, 75);
770        assert_eq!(metrics.tokens_used.total, 225);
771    }
772
773    #[test]
774    fn test_full_agent_metrics_record_api_call() {
775        let mut metrics = FullAgentMetrics::new("agent-1", "test");
776
777        metrics.record_api_call(true, Some(Duration::from_millis(100)));
778        metrics.record_api_call(true, Some(Duration::from_millis(200)));
779        metrics.record_api_call(false, None);
780
781        assert_eq!(metrics.api_calls, 3);
782        assert_eq!(metrics.api_calls_successful, 2);
783    }
784
785    #[test]
786    fn test_full_agent_metrics_error_rate() {
787        let mut metrics = FullAgentMetrics::new("agent-1", "test");
788
789        metrics.api_calls = 10;
790        metrics.api_calls_successful = 8;
791
792        assert!((metrics.error_rate() - 0.2).abs() < 0.001);
793    }
794
795    #[test]
796    fn test_full_agent_metrics_error_rate_zero_calls() {
797        let metrics = FullAgentMetrics::new("agent-1", "test");
798        assert_eq!(metrics.error_rate(), 0.0);
799    }
800
801    #[test]
802    fn test_full_agent_metrics_record_error() {
803        let mut metrics = FullAgentMetrics::new("agent-1", "test");
804
805        metrics.record_error("Test error 1", None);
806        metrics.record_error("Test error 2", Some("api_call"));
807
808        assert_eq!(metrics.errors.len(), 2);
809        assert_eq!(metrics.errors[0].message, "Test error 1");
810        assert!(metrics.errors[0].phase.is_none());
811        assert_eq!(metrics.errors[1].message, "Test error 2");
812        assert_eq!(metrics.errors[1].phase, Some("api_call".to_string()));
813    }
814
815    #[test]
816    fn test_full_agent_metrics_complete() {
817        let mut metrics = FullAgentMetrics::new("agent-1", "test");
818        std::thread::sleep(std::time::Duration::from_millis(10));
819        metrics.complete(AgentExecutionStatus::Completed);
820
821        assert_eq!(metrics.status, AgentExecutionStatus::Completed);
822        assert!(metrics.end_time.is_some());
823        assert!(metrics.duration.is_some());
824    }
825
826    #[test]
827    fn test_agent_monitor_creation() {
828        let monitor = AgentMonitor::new(None);
829
830        assert_eq!(monitor.agent_count(), 0);
831        assert_eq!(monitor.active_tool_call_count(), 0);
832    }
833
834    #[test]
835    fn test_agent_monitor_start_tracking() {
836        let mut monitor = AgentMonitor::new(None);
837
838        monitor.start_tracking("agent-1", "test_agent", Some("Test description"));
839
840        assert_eq!(monitor.agent_count(), 1);
841        let metrics = monitor.get_metrics("agent-1").unwrap();
842        assert_eq!(metrics.agent_id, "agent-1");
843        assert_eq!(metrics.agent_type, "test_agent");
844        assert_eq!(metrics.description, Some("Test description".to_string()));
845    }
846
847    #[test]
848    fn test_agent_monitor_record_tokens() {
849        let mut monitor = AgentMonitor::new(None);
850        monitor.start_tracking("agent-1", "test", None);
851
852        monitor.record_tokens("agent-1", 100, 50);
853
854        let metrics = monitor.get_metrics("agent-1").unwrap();
855        assert_eq!(metrics.tokens_used.total, 150);
856    }
857
858    #[test]
859    fn test_agent_monitor_record_api_call() {
860        let mut monitor = AgentMonitor::new(None);
861        monitor.start_tracking("agent-1", "test", None);
862
863        monitor.record_api_call("agent-1", true, Some(Duration::from_millis(100)));
864        monitor.record_api_call("agent-1", false, None);
865
866        let metrics = monitor.get_metrics("agent-1").unwrap();
867        assert_eq!(metrics.api_calls, 2);
868        assert_eq!(metrics.api_calls_successful, 1);
869    }
870
871    #[test]
872    fn test_agent_monitor_record_cost() {
873        let mut monitor = AgentMonitor::new(None);
874        monitor.start_tracking("agent-1", "test", None);
875
876        monitor.record_cost("agent-1", 0.5);
877        monitor.record_cost("agent-1", 0.3);
878
879        let metrics = monitor.get_metrics("agent-1").unwrap();
880        assert!((metrics.cost - 0.8).abs() < 0.001);
881    }
882
883    #[test]
884    fn test_agent_monitor_record_error() {
885        let mut monitor = AgentMonitor::new(None);
886        monitor.start_tracking("agent-1", "test", None);
887
888        monitor.record_error("agent-1", "Test error", Some("tool_call"));
889
890        let metrics = monitor.get_metrics("agent-1").unwrap();
891        assert_eq!(metrics.errors.len(), 1);
892        assert_eq!(metrics.errors[0].message, "Test error");
893    }
894
895    #[test]
896    fn test_agent_monitor_tool_call_tracking() {
897        let mut monitor = AgentMonitor::new(None);
898        monitor.start_tracking("agent-1", "test", None);
899
900        let tool_call_id = monitor.start_tool_call("agent-1", "test_tool", Some(100));
901        assert!(!tool_call_id.is_empty());
902        assert_eq!(monitor.active_tool_call_count(), 1);
903
904        monitor.end_tool_call("agent-1", &tool_call_id, true, None, Some(200));
905        assert_eq!(monitor.active_tool_call_count(), 0);
906
907        let metrics = monitor.get_metrics("agent-1").unwrap();
908        assert_eq!(metrics.tool_calls.len(), 1);
909        assert_eq!(metrics.tool_calls[0].tool_name, "test_tool");
910        assert!(metrics.tool_calls[0].success);
911        assert_eq!(metrics.tool_calls[0].input_size, Some(100));
912        assert_eq!(metrics.tool_calls[0].output_size, Some(200));
913    }
914
915    #[test]
916    fn test_agent_monitor_stop_tracking() {
917        let mut monitor = AgentMonitor::new(None);
918        monitor.start_tracking("agent-1", "test", None);
919
920        monitor.stop_tracking("agent-1", AgentExecutionStatus::Completed);
921
922        let metrics = monitor.get_metrics("agent-1").unwrap();
923        assert_eq!(metrics.status, AgentExecutionStatus::Completed);
924        assert!(metrics.end_time.is_some());
925    }
926
927    #[test]
928    fn test_agent_monitor_get_metrics_by_status() {
929        let mut monitor = AgentMonitor::new(None);
930
931        monitor.start_tracking("agent-1", "test", None);
932        monitor.start_tracking("agent-2", "test", None);
933        monitor.start_tracking("agent-3", "test", None);
934
935        monitor.stop_tracking("agent-1", AgentExecutionStatus::Completed);
936        monitor.stop_tracking("agent-2", AgentExecutionStatus::Failed);
937
938        let running = monitor.get_metrics_by_status(AgentExecutionStatus::Running);
939        assert_eq!(running.len(), 1);
940
941        let completed = monitor.get_metrics_by_status(AgentExecutionStatus::Completed);
942        assert_eq!(completed.len(), 1);
943
944        let failed = monitor.get_metrics_by_status(AgentExecutionStatus::Failed);
945        assert_eq!(failed.len(), 1);
946    }
947
948    #[test]
949    fn test_agent_monitor_aggregated_stats() {
950        let mut monitor = AgentMonitor::new(None);
951
952        monitor.start_tracking("agent-1", "test", None);
953        monitor.record_tokens("agent-1", 100, 50);
954        monitor.record_api_call("agent-1", true, None);
955        monitor.record_cost("agent-1", 0.5);
956        monitor.stop_tracking("agent-1", AgentExecutionStatus::Completed);
957
958        monitor.start_tracking("agent-2", "test", None);
959        monitor.record_tokens("agent-2", 200, 100);
960        monitor.record_api_call("agent-2", false, None);
961        monitor.record_cost("agent-2", 0.3);
962        monitor.stop_tracking("agent-2", AgentExecutionStatus::Failed);
963
964        let stats = monitor.get_aggregated_stats();
965
966        assert_eq!(stats.total_agents, 2);
967        assert_eq!(stats.completed_agents, 1);
968        assert_eq!(stats.failed_agents, 1);
969        assert_eq!(stats.total_tokens, 450);
970        assert_eq!(stats.total_api_calls, 2);
971        assert!((stats.total_cost - 0.8).abs() < 0.001);
972        assert!((stats.overall_error_rate - 0.5).abs() < 0.001);
973    }
974
975    #[test]
976    fn test_agent_monitor_remove_metrics() {
977        let mut monitor = AgentMonitor::new(None);
978        monitor.start_tracking("agent-1", "test", None);
979
980        assert_eq!(monitor.agent_count(), 1);
981
982        let removed = monitor.remove_metrics("agent-1");
983        assert!(removed.is_some());
984        assert_eq!(monitor.agent_count(), 0);
985    }
986
987    #[test]
988    fn test_agent_monitor_clear() {
989        let mut monitor = AgentMonitor::new(None);
990        monitor.start_tracking("agent-1", "test", None);
991        monitor.start_tracking("agent-2", "test", None);
992        monitor.start_tool_call("agent-1", "tool", None);
993
994        monitor.clear();
995
996        assert_eq!(monitor.agent_count(), 0);
997        assert_eq!(monitor.active_tool_call_count(), 0);
998    }
999
1000    #[test]
1001    fn test_monitor_config_default() {
1002        let config = MonitorConfig::default();
1003
1004        assert!(config.track_tool_calls);
1005        assert!(config.track_api_latencies);
1006        assert!(!config.auto_persist);
1007        assert_eq!(config.max_metrics_in_memory, 1000);
1008    }
1009
1010    #[test]
1011    fn test_agent_monitor_with_config() {
1012        let config = MonitorConfig {
1013            track_tool_calls: false,
1014            track_api_latencies: false,
1015            auto_persist: false,
1016            max_metrics_in_memory: 100,
1017            metrics_dir: Some(PathBuf::from("/tmp/test_metrics")),
1018        };
1019
1020        let mut monitor = AgentMonitor::new(Some(config));
1021        monitor.start_tracking("agent-1", "test", None);
1022
1023        // Tool calls should not be tracked
1024        let tool_call_id = monitor.start_tool_call("agent-1", "test_tool", None);
1025        assert!(tool_call_id.is_empty());
1026        assert_eq!(monitor.active_tool_call_count(), 0);
1027    }
1028
1029    #[test]
1030    fn test_full_agent_metrics_is_timed_out() {
1031        let mut metrics =
1032            FullAgentMetrics::new("agent-1", "test").with_timeout(Duration::from_millis(100));
1033
1034        // Not timed out yet
1035        assert!(!metrics.is_timed_out());
1036
1037        // Simulate completion with timeout exceeded
1038        metrics.duration = Some(Duration::from_millis(200));
1039        assert!(metrics.is_timed_out());
1040    }
1041
1042    #[test]
1043    fn test_performance_metrics_calculation() {
1044        let mut metrics = FullAgentMetrics::new("agent-1", "test");
1045
1046        // Record some API calls with latencies
1047        metrics.record_api_call(true, Some(Duration::from_millis(100)));
1048        metrics.record_api_call(true, Some(Duration::from_millis(200)));
1049
1050        // Add some tool calls
1051        let mut tool1 = ToolCallMetric::new("tool1");
1052        tool1.complete(true, None);
1053        tool1.duration = Some(Duration::from_millis(50));
1054        metrics.add_tool_call(tool1);
1055
1056        let mut tool2 = ToolCallMetric::new("tool2");
1057        tool2.complete(true, None);
1058        tool2.duration = Some(Duration::from_millis(150));
1059        metrics.add_tool_call(tool2);
1060
1061        // Record tokens
1062        metrics.record_tokens(1000, 500);
1063
1064        // Complete the metrics
1065        metrics.duration = Some(Duration::from_secs(1));
1066        metrics.complete(AgentExecutionStatus::Completed);
1067
1068        // Check performance metrics
1069        assert!(metrics.performance.avg_api_latency.is_some());
1070        assert!(metrics.performance.avg_tool_duration.is_some());
1071        assert!(metrics.performance.tokens_per_second.is_some());
1072
1073        // Average API latency should be 150ms
1074        let avg_api = metrics.performance.avg_api_latency.unwrap();
1075        assert!((avg_api.as_millis() as i64 - 150).abs() < 10);
1076
1077        // Average tool duration should be 100ms
1078        let avg_tool = metrics.performance.avg_tool_duration.unwrap();
1079        assert!((avg_tool.as_millis() as i64 - 100).abs() < 10);
1080    }
1081}