aster/agents/monitor/
analyzer.rs

1//! Performance Analyzer
2//!
3//! Analyzes agent performance, identifies bottlenecks,
4//! and provides optimization suggestions.
5//!
6//! This module provides:
7//! - Performance scoring across multiple dimensions
8//! - Bottleneck identification
9//! - Optimization suggestions
10//! - Performance ratings (excellent, good, fair, poor)
11
12use chrono::{DateTime, Utc};
13use serde::{Deserialize, Serialize};
14
15use super::metrics::FullAgentMetrics;
16
17/// Performance rating levels
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(rename_all = "lowercase")]
20pub enum PerformanceRating {
21    /// Excellent performance (score >= 80)
22    Excellent,
23    /// Good performance (score >= 60)
24    Good,
25    /// Fair performance (score >= 40)
26    Fair,
27    /// Poor performance (score < 40)
28    Poor,
29}
30
31impl PerformanceRating {
32    /// Get rating from score (0-100)
33    pub fn from_score(score: f32) -> Self {
34        if score >= 80.0 {
35            Self::Excellent
36        } else if score >= 60.0 {
37            Self::Good
38        } else if score >= 40.0 {
39            Self::Fair
40        } else {
41            Self::Poor
42        }
43    }
44}
45
46impl std::fmt::Display for PerformanceRating {
47    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48        match self {
49            Self::Excellent => write!(f, "excellent"),
50            Self::Good => write!(f, "good"),
51            Self::Fair => write!(f, "fair"),
52            Self::Poor => write!(f, "poor"),
53        }
54    }
55}
56
57/// Bottleneck category
58#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
59#[serde(rename_all = "snake_case")]
60pub enum BottleneckCategory {
61    /// High API latency
62    HighLatency,
63    /// Slow tool execution
64    SlowTools,
65    /// High error rate
66    HighErrorRate,
67    /// High cost
68    HighCost,
69    /// Low throughput
70    LowThroughput,
71    /// Timeout issues
72    TimeoutRisk,
73}
74
75impl std::fmt::Display for BottleneckCategory {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        match self {
78            Self::HighLatency => write!(f, "high_latency"),
79            Self::SlowTools => write!(f, "slow_tools"),
80            Self::HighErrorRate => write!(f, "high_error_rate"),
81            Self::HighCost => write!(f, "high_cost"),
82            Self::LowThroughput => write!(f, "low_throughput"),
83            Self::TimeoutRisk => write!(f, "timeout_risk"),
84        }
85    }
86}
87
88/// A performance bottleneck
89#[derive(Debug, Clone, Serialize, Deserialize)]
90#[serde(rename_all = "camelCase")]
91pub struct Bottleneck {
92    /// Category of the bottleneck
93    pub category: BottleneckCategory,
94    /// Severity (0-100, higher is worse)
95    pub severity: f32,
96    /// Description of the bottleneck
97    pub description: String,
98    /// Affected component (e.g., tool name)
99    pub affected_component: Option<String>,
100    /// Current value that triggered the bottleneck
101    pub current_value: Option<String>,
102    /// Threshold that was exceeded
103    pub threshold: Option<String>,
104}
105
106impl Bottleneck {
107    /// Create a new bottleneck
108    pub fn new(
109        category: BottleneckCategory,
110        severity: f32,
111        description: impl Into<String>,
112    ) -> Self {
113        Self {
114            category,
115            severity: severity.clamp(0.0, 100.0),
116            description: description.into(),
117            affected_component: None,
118            current_value: None,
119            threshold: None,
120        }
121    }
122
123    /// Set affected component
124    pub fn with_component(mut self, component: impl Into<String>) -> Self {
125        self.affected_component = Some(component.into());
126        self
127    }
128
129    /// Set current value
130    pub fn with_current_value(mut self, value: impl Into<String>) -> Self {
131        self.current_value = Some(value.into());
132        self
133    }
134
135    /// Set threshold
136    pub fn with_threshold(mut self, threshold: impl Into<String>) -> Self {
137        self.threshold = Some(threshold.into());
138        self
139    }
140}
141
142/// Suggestion priority
143#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
144#[serde(rename_all = "lowercase")]
145pub enum SuggestionPriority {
146    /// Low priority suggestion
147    Low,
148    /// Medium priority suggestion
149    Medium,
150    /// High priority suggestion
151    High,
152}
153
154/// An optimization suggestion
155#[derive(Debug, Clone, Serialize, Deserialize)]
156#[serde(rename_all = "camelCase")]
157pub struct Suggestion {
158    /// Priority of the suggestion
159    pub priority: SuggestionPriority,
160    /// Title of the suggestion
161    pub title: String,
162    /// Detailed description
163    pub description: String,
164    /// Expected improvement
165    pub expected_improvement: Option<String>,
166    /// Related bottleneck category
167    pub related_to: Option<BottleneckCategory>,
168}
169
170impl Suggestion {
171    /// Create a new suggestion
172    pub fn new(
173        priority: SuggestionPriority,
174        title: impl Into<String>,
175        description: impl Into<String>,
176    ) -> Self {
177        Self {
178            priority,
179            title: title.into(),
180            description: description.into(),
181            expected_improvement: None,
182            related_to: None,
183        }
184    }
185
186    /// Set expected improvement
187    pub fn with_improvement(mut self, improvement: impl Into<String>) -> Self {
188        self.expected_improvement = Some(improvement.into());
189        self
190    }
191
192    /// Set related bottleneck
193    pub fn with_related_to(mut self, category: BottleneckCategory) -> Self {
194        self.related_to = Some(category);
195        self
196    }
197}
198
199/// Performance scores across dimensions
200#[derive(Debug, Clone, Default, Serialize, Deserialize)]
201#[serde(rename_all = "camelCase")]
202pub struct PerformanceScores {
203    /// Latency score (0-100)
204    pub latency_score: f32,
205    /// Throughput score (0-100)
206    pub throughput_score: f32,
207    /// Error rate score (0-100)
208    pub error_rate_score: f32,
209    /// Cost efficiency score (0-100)
210    pub cost_efficiency_score: f32,
211    /// Tool efficiency score (0-100)
212    pub tool_efficiency_score: f32,
213}
214
215impl PerformanceScores {
216    /// Calculate overall score as weighted average
217    pub fn overall(&self) -> f32 {
218        let weights = [0.25, 0.20, 0.25, 0.15, 0.15];
219        let scores = [
220            self.latency_score,
221            self.throughput_score,
222            self.error_rate_score,
223            self.cost_efficiency_score,
224            self.tool_efficiency_score,
225        ];
226
227        let weighted_sum: f32 = scores.iter().zip(weights.iter()).map(|(s, w)| s * w).sum();
228        weighted_sum.clamp(0.0, 100.0)
229    }
230}
231
232/// Performance report for an agent
233#[derive(Debug, Clone, Serialize, Deserialize)]
234#[serde(rename_all = "camelCase")]
235pub struct PerformanceReport {
236    /// Agent ID
237    pub agent_id: String,
238    /// Overall performance score (0-100)
239    pub overall_score: f32,
240    /// Performance rating
241    pub rating: PerformanceRating,
242    /// Detailed scores
243    pub scores: PerformanceScores,
244    /// Identified bottlenecks
245    pub bottlenecks: Vec<Bottleneck>,
246    /// Optimization suggestions
247    pub suggestions: Vec<Suggestion>,
248    /// Report timestamp
249    pub timestamp: DateTime<Utc>,
250}
251
252impl PerformanceReport {
253    /// Create a new performance report
254    pub fn new(agent_id: impl Into<String>, scores: PerformanceScores) -> Self {
255        let overall_score = scores.overall();
256        Self {
257            agent_id: agent_id.into(),
258            overall_score,
259            rating: PerformanceRating::from_score(overall_score),
260            scores,
261            bottlenecks: Vec::new(),
262            suggestions: Vec::new(),
263            timestamp: Utc::now(),
264        }
265    }
266
267    /// Add a bottleneck
268    pub fn add_bottleneck(&mut self, bottleneck: Bottleneck) {
269        self.bottlenecks.push(bottleneck);
270    }
271
272    /// Add a suggestion
273    pub fn add_suggestion(&mut self, suggestion: Suggestion) {
274        self.suggestions.push(suggestion);
275    }
276}
277
278/// Thresholds for performance analysis
279#[derive(Debug, Clone)]
280pub struct AnalysisThresholds {
281    /// Good API latency threshold (ms)
282    pub good_latency_ms: u64,
283    /// Poor API latency threshold (ms)
284    pub poor_latency_ms: u64,
285    /// Good tool duration threshold (ms)
286    pub good_tool_duration_ms: u64,
287    /// Poor tool duration threshold (ms)
288    pub poor_tool_duration_ms: u64,
289    /// Good error rate threshold
290    pub good_error_rate: f32,
291    /// Poor error rate threshold
292    pub poor_error_rate: f32,
293    /// Good tokens per second
294    pub good_tokens_per_second: f64,
295    /// Poor tokens per second
296    pub poor_tokens_per_second: f64,
297    /// Cost per 1000 tokens (good)
298    pub good_cost_per_1k_tokens: f64,
299    /// Cost per 1000 tokens (poor)
300    pub poor_cost_per_1k_tokens: f64,
301}
302
303impl Default for AnalysisThresholds {
304    fn default() -> Self {
305        Self {
306            good_latency_ms: 500,
307            poor_latency_ms: 2000,
308            good_tool_duration_ms: 1000,
309            poor_tool_duration_ms: 5000,
310            good_error_rate: 0.05,
311            poor_error_rate: 0.20,
312            good_tokens_per_second: 50.0,
313            poor_tokens_per_second: 10.0,
314            good_cost_per_1k_tokens: 0.01,
315            poor_cost_per_1k_tokens: 0.05,
316        }
317    }
318}
319
320/// Performance Analyzer
321#[derive(Debug, Clone)]
322pub struct PerformanceAnalyzer {
323    /// Analysis thresholds
324    thresholds: AnalysisThresholds,
325}
326
327impl Default for PerformanceAnalyzer {
328    fn default() -> Self {
329        Self::new()
330    }
331}
332
333impl PerformanceAnalyzer {
334    /// Create a new PerformanceAnalyzer
335    pub fn new() -> Self {
336        Self {
337            thresholds: AnalysisThresholds::default(),
338        }
339    }
340
341    /// Create with custom thresholds
342    pub fn with_thresholds(thresholds: AnalysisThresholds) -> Self {
343        Self { thresholds }
344    }
345
346    /// Analyze multiple agents
347    pub fn analyze(&self, metrics: &[FullAgentMetrics]) -> Vec<PerformanceReport> {
348        metrics.iter().map(|m| self.analyze_agent(m)).collect()
349    }
350
351    /// Analyze a single agent
352    pub fn analyze_agent(&self, metrics: &FullAgentMetrics) -> PerformanceReport {
353        let scores = self.calculate_scores(metrics);
354        let mut report = PerformanceReport::new(&metrics.agent_id, scores);
355
356        // Identify bottlenecks
357        let bottlenecks = self.identify_bottlenecks(metrics);
358        for bottleneck in bottlenecks {
359            report.add_bottleneck(bottleneck);
360        }
361
362        // Generate suggestions
363        let suggestions = self.suggest_optimizations(metrics);
364        for suggestion in suggestions {
365            report.add_suggestion(suggestion);
366        }
367
368        report
369    }
370
371    /// Calculate performance scores
372    fn calculate_scores(&self, metrics: &FullAgentMetrics) -> PerformanceScores {
373        PerformanceScores {
374            latency_score: self.calculate_latency_score(metrics),
375            throughput_score: self.calculate_throughput_score(metrics),
376            error_rate_score: self.calculate_error_rate_score(metrics),
377            cost_efficiency_score: self.calculate_cost_efficiency_score(metrics),
378            tool_efficiency_score: self.calculate_tool_efficiency_score(metrics),
379        }
380    }
381
382    /// Calculate latency score (0-100)
383    fn calculate_latency_score(&self, metrics: &FullAgentMetrics) -> f32 {
384        let avg_latency_ms = metrics
385            .performance
386            .avg_api_latency
387            .map(|d| d.as_millis() as u64)
388            .unwrap_or(0);
389
390        if avg_latency_ms == 0 {
391            return 100.0; // No latency data, assume good
392        }
393
394        self.score_from_range(
395            avg_latency_ms as f64,
396            self.thresholds.good_latency_ms as f64,
397            self.thresholds.poor_latency_ms as f64,
398            true, // Lower is better
399        )
400    }
401
402    /// Calculate throughput score (0-100)
403    fn calculate_throughput_score(&self, metrics: &FullAgentMetrics) -> f32 {
404        let tokens_per_second = metrics.performance.tokens_per_second.unwrap_or(0.0);
405
406        if tokens_per_second == 0.0 {
407            return 50.0; // No data, neutral score
408        }
409
410        self.score_from_range(
411            tokens_per_second,
412            self.thresholds.poor_tokens_per_second,
413            self.thresholds.good_tokens_per_second,
414            false, // Higher is better
415        )
416    }
417
418    /// Calculate error rate score (0-100)
419    fn calculate_error_rate_score(&self, metrics: &FullAgentMetrics) -> f32 {
420        let error_rate = metrics.error_rate();
421
422        self.score_from_range(
423            error_rate as f64,
424            self.thresholds.good_error_rate as f64,
425            self.thresholds.poor_error_rate as f64,
426            true, // Lower is better
427        )
428    }
429
430    /// Calculate cost efficiency score (0-100)
431    fn calculate_cost_efficiency_score(&self, metrics: &FullAgentMetrics) -> f32 {
432        let total_tokens = metrics.tokens_used.total;
433        if total_tokens == 0 {
434            return 100.0; // No tokens used, no cost
435        }
436
437        let cost_per_1k = (metrics.cost / total_tokens as f64) * 1000.0;
438
439        self.score_from_range(
440            cost_per_1k,
441            self.thresholds.good_cost_per_1k_tokens,
442            self.thresholds.poor_cost_per_1k_tokens,
443            true, // Lower is better
444        )
445    }
446
447    /// Calculate tool efficiency score (0-100)
448    fn calculate_tool_efficiency_score(&self, metrics: &FullAgentMetrics) -> f32 {
449        let avg_tool_duration_ms = metrics
450            .performance
451            .avg_tool_duration
452            .map(|d| d.as_millis() as u64)
453            .unwrap_or(0);
454
455        if avg_tool_duration_ms == 0 {
456            return 100.0; // No tool calls or very fast
457        }
458
459        self.score_from_range(
460            avg_tool_duration_ms as f64,
461            self.thresholds.good_tool_duration_ms as f64,
462            self.thresholds.poor_tool_duration_ms as f64,
463            true, // Lower is better
464        )
465    }
466
467    /// Calculate score from a range
468    /// If lower_is_better is true, values <= good get 100, values >= poor get 0
469    /// If lower_is_better is false, values >= good get 100, values <= poor get 0
470    fn score_from_range(&self, value: f64, good: f64, poor: f64, lower_is_better: bool) -> f32 {
471        if lower_is_better {
472            if value <= good {
473                100.0
474            } else if value >= poor {
475                0.0
476            } else {
477                let range = poor - good;
478                let position = value - good;
479                (100.0 * (1.0 - position / range)) as f32
480            }
481        } else if value >= good {
482            100.0
483        } else if value <= poor {
484            0.0
485        } else {
486            let range = good - poor;
487            let position = value - poor;
488            (100.0 * (position / range)) as f32
489        }
490    }
491
492    /// Identify performance bottlenecks
493    pub fn identify_bottlenecks(&self, metrics: &FullAgentMetrics) -> Vec<Bottleneck> {
494        let mut bottlenecks = Vec::new();
495
496        // Check API latency
497        if let Some(avg_latency) = metrics.performance.avg_api_latency {
498            let latency_ms = avg_latency.as_millis() as u64;
499            if latency_ms > self.thresholds.poor_latency_ms {
500                let severity = ((latency_ms as f32 / self.thresholds.poor_latency_ms as f32)
501                    * 50.0)
502                    .min(100.0);
503                bottlenecks.push(
504                    Bottleneck::new(
505                        BottleneckCategory::HighLatency,
506                        severity,
507                        format!("API latency is {}ms, exceeding threshold", latency_ms),
508                    )
509                    .with_current_value(format!("{}ms", latency_ms))
510                    .with_threshold(format!("{}ms", self.thresholds.poor_latency_ms)),
511                );
512            }
513        }
514
515        // Check tool duration
516        if let Some(avg_tool_duration) = metrics.performance.avg_tool_duration {
517            let duration_ms = avg_tool_duration.as_millis() as u64;
518            if duration_ms > self.thresholds.poor_tool_duration_ms {
519                let severity =
520                    ((duration_ms as f32 / self.thresholds.poor_tool_duration_ms as f32) * 50.0)
521                        .min(100.0);
522
523                // Find the slowest tool
524                let slowest_tool = metrics
525                    .tool_calls
526                    .iter()
527                    .filter_map(|t| t.duration.map(|d| (t.tool_name.clone(), d)))
528                    .max_by_key(|(_, d)| d.as_millis());
529
530                let mut bottleneck = Bottleneck::new(
531                    BottleneckCategory::SlowTools,
532                    severity,
533                    format!(
534                        "Average tool duration is {}ms, exceeding threshold",
535                        duration_ms
536                    ),
537                )
538                .with_current_value(format!("{}ms", duration_ms))
539                .with_threshold(format!("{}ms", self.thresholds.poor_tool_duration_ms));
540
541                if let Some((tool_name, _)) = slowest_tool {
542                    bottleneck = bottleneck.with_component(tool_name);
543                }
544
545                bottlenecks.push(bottleneck);
546            }
547        }
548
549        // Check error rate
550        let error_rate = metrics.error_rate();
551        if error_rate > self.thresholds.poor_error_rate {
552            let severity = ((error_rate / self.thresholds.poor_error_rate) * 50.0).min(100.0);
553            bottlenecks.push(
554                Bottleneck::new(
555                    BottleneckCategory::HighErrorRate,
556                    severity,
557                    format!(
558                        "Error rate is {:.1}%, exceeding threshold",
559                        error_rate * 100.0
560                    ),
561                )
562                .with_current_value(format!("{:.1}%", error_rate * 100.0))
563                .with_threshold(format!("{:.1}%", self.thresholds.poor_error_rate * 100.0)),
564            );
565        }
566
567        // Check cost
568        let total_tokens = metrics.tokens_used.total;
569        if total_tokens > 0 {
570            let cost_per_1k = (metrics.cost / total_tokens as f64) * 1000.0;
571            if cost_per_1k > self.thresholds.poor_cost_per_1k_tokens {
572                let severity = ((cost_per_1k / self.thresholds.poor_cost_per_1k_tokens) * 50.0)
573                    .min(100.0) as f32;
574                bottlenecks.push(
575                    Bottleneck::new(
576                        BottleneckCategory::HighCost,
577                        severity,
578                        format!(
579                            "Cost per 1K tokens is ${:.4}, exceeding threshold",
580                            cost_per_1k
581                        ),
582                    )
583                    .with_current_value(format!("${:.4}", cost_per_1k))
584                    .with_threshold(format!("${:.4}", self.thresholds.poor_cost_per_1k_tokens)),
585                );
586            }
587        }
588
589        // Check throughput
590        if let Some(tokens_per_second) = metrics.performance.tokens_per_second {
591            if tokens_per_second < self.thresholds.poor_tokens_per_second && tokens_per_second > 0.0
592            {
593                let severity = ((self.thresholds.poor_tokens_per_second / tokens_per_second) * 25.0)
594                    .min(100.0) as f32;
595                bottlenecks.push(
596                    Bottleneck::new(
597                        BottleneckCategory::LowThroughput,
598                        severity,
599                        format!(
600                            "Throughput is {:.1} tokens/sec, below threshold",
601                            tokens_per_second
602                        ),
603                    )
604                    .with_current_value(format!("{:.1} tokens/sec", tokens_per_second))
605                    .with_threshold(format!(
606                        "{:.1} tokens/sec",
607                        self.thresholds.poor_tokens_per_second
608                    )),
609                );
610            }
611        }
612
613        // Check timeout risk
614        if let (Some(timeout), Some(duration)) = (metrics.timeout, metrics.duration) {
615            let usage_ratio = duration.as_millis() as f64 / timeout.as_millis() as f64;
616            if usage_ratio > 0.8 {
617                let severity = ((usage_ratio - 0.8) * 500.0).min(100.0) as f32;
618                bottlenecks.push(
619                    Bottleneck::new(
620                        BottleneckCategory::TimeoutRisk,
621                        severity,
622                        format!(
623                            "Execution used {:.0}% of timeout budget",
624                            usage_ratio * 100.0
625                        ),
626                    )
627                    .with_current_value(format!("{:.0}%", usage_ratio * 100.0))
628                    .with_threshold("80%".to_string()),
629                );
630            }
631        }
632
633        // Sort by severity (highest first)
634        bottlenecks.sort_by(|a, b| {
635            b.severity
636                .partial_cmp(&a.severity)
637                .unwrap_or(std::cmp::Ordering::Equal)
638        });
639
640        bottlenecks
641    }
642
643    /// Generate optimization suggestions
644    pub fn suggest_optimizations(&self, metrics: &FullAgentMetrics) -> Vec<Suggestion> {
645        let mut suggestions = Vec::new();
646
647        // Latency suggestions
648        if let Some(avg_latency) = metrics.performance.avg_api_latency {
649            let latency_ms = avg_latency.as_millis() as u64;
650            if latency_ms > self.thresholds.good_latency_ms {
651                let priority = if latency_ms > self.thresholds.poor_latency_ms {
652                    SuggestionPriority::High
653                } else {
654                    SuggestionPriority::Medium
655                };
656                suggestions.push(
657                    Suggestion::new(
658                        priority,
659                        "Reduce API latency",
660                        "Consider batching API calls or using a faster model for simple tasks",
661                    )
662                    .with_improvement("Could reduce latency by 30-50%")
663                    .with_related_to(BottleneckCategory::HighLatency),
664                );
665            }
666        }
667
668        // Tool efficiency suggestions
669        if let Some(avg_tool_duration) = metrics.performance.avg_tool_duration {
670            let duration_ms = avg_tool_duration.as_millis() as u64;
671            if duration_ms > self.thresholds.good_tool_duration_ms {
672                let priority = if duration_ms > self.thresholds.poor_tool_duration_ms {
673                    SuggestionPriority::High
674                } else {
675                    SuggestionPriority::Medium
676                };
677
678                // Find slow tools
679                let slow_tools: Vec<_> = metrics
680                    .tool_calls
681                    .iter()
682                    .filter(|t| {
683                        t.duration
684                            .map(|d| d.as_millis() as u64 > self.thresholds.good_tool_duration_ms)
685                            .unwrap_or(false)
686                    })
687                    .map(|t| t.tool_name.clone())
688                    .collect();
689
690                let description = if slow_tools.is_empty() {
691                    "Optimize tool execution by caching results or parallelizing calls".to_string()
692                } else {
693                    format!(
694                        "Optimize slow tools: {}. Consider caching or parallelization",
695                        slow_tools.join(", ")
696                    )
697                };
698
699                suggestions.push(
700                    Suggestion::new(priority, "Optimize tool execution", description)
701                        .with_improvement("Could reduce tool execution time by 20-40%")
702                        .with_related_to(BottleneckCategory::SlowTools),
703                );
704            }
705        }
706
707        // Error rate suggestions
708        let error_rate = metrics.error_rate();
709        if error_rate > self.thresholds.good_error_rate {
710            let priority = if error_rate > self.thresholds.poor_error_rate {
711                SuggestionPriority::High
712            } else {
713                SuggestionPriority::Medium
714            };
715            suggestions.push(
716                Suggestion::new(
717                    priority,
718                    "Reduce error rate",
719                    "Implement retry logic with exponential backoff, or improve input validation",
720                )
721                .with_improvement("Could reduce errors by 50-70%")
722                .with_related_to(BottleneckCategory::HighErrorRate),
723            );
724        }
725
726        // Cost suggestions
727        let total_tokens = metrics.tokens_used.total;
728        if total_tokens > 0 {
729            let cost_per_1k = (metrics.cost / total_tokens as f64) * 1000.0;
730            if cost_per_1k > self.thresholds.good_cost_per_1k_tokens {
731                let priority = if cost_per_1k > self.thresholds.poor_cost_per_1k_tokens {
732                    SuggestionPriority::High
733                } else {
734                    SuggestionPriority::Medium
735                };
736                suggestions.push(
737                    Suggestion::new(
738                        priority,
739                        "Reduce costs",
740                        "Consider using a smaller model for simple tasks, or implement prompt caching",
741                    )
742                    .with_improvement("Could reduce costs by 30-60%")
743                    .with_related_to(BottleneckCategory::HighCost),
744                );
745            }
746        }
747
748        // Throughput suggestions
749        if let Some(tokens_per_second) = metrics.performance.tokens_per_second {
750            if tokens_per_second < self.thresholds.good_tokens_per_second && tokens_per_second > 0.0
751            {
752                let priority = if tokens_per_second < self.thresholds.poor_tokens_per_second {
753                    SuggestionPriority::High
754                } else {
755                    SuggestionPriority::Medium
756                };
757                suggestions.push(
758                    Suggestion::new(
759                        priority,
760                        "Improve throughput",
761                        "Consider streaming responses or parallel processing for independent tasks",
762                    )
763                    .with_improvement("Could improve throughput by 2-3x")
764                    .with_related_to(BottleneckCategory::LowThroughput),
765                );
766            }
767        }
768
769        // Timeout risk suggestions
770        if let (Some(timeout), Some(duration)) = (metrics.timeout, metrics.duration) {
771            let usage_ratio = duration.as_millis() as f64 / timeout.as_millis() as f64;
772            if usage_ratio > 0.8 {
773                suggestions.push(
774                    Suggestion::new(
775                        SuggestionPriority::High,
776                        "Address timeout risk",
777                        "Increase timeout or optimize execution to reduce duration",
778                    )
779                    .with_improvement("Prevent potential timeout failures")
780                    .with_related_to(BottleneckCategory::TimeoutRisk),
781                );
782            }
783        }
784
785        // General suggestions based on overall performance
786        if metrics.tool_calls.len() > 10 {
787            let failed_tools = metrics.tool_calls.iter().filter(|t| !t.success).count();
788            if failed_tools > 2 {
789                suggestions.push(
790                    Suggestion::new(
791                        SuggestionPriority::Medium,
792                        "Review tool call patterns",
793                        format!(
794                            "{} out of {} tool calls failed. Review tool usage patterns",
795                            failed_tools,
796                            metrics.tool_calls.len()
797                        ),
798                    )
799                    .with_improvement("Could improve reliability"),
800                );
801            }
802        }
803
804        // Sort by priority (highest first)
805        suggestions.sort_by(|a, b| b.priority.cmp(&a.priority));
806
807        suggestions
808    }
809}
810
811#[cfg(test)]
812mod tests {
813    use super::*;
814    #[allow(unused_imports)]
815    use crate::agents::monitor::alerts::AgentExecutionStatus;
816
817    fn create_test_metrics(agent_id: &str) -> FullAgentMetrics {
818        FullAgentMetrics::new(agent_id, "test")
819    }
820
821    #[test]
822    fn test_performance_rating_from_score() {
823        assert_eq!(
824            PerformanceRating::from_score(100.0),
825            PerformanceRating::Excellent
826        );
827        assert_eq!(
828            PerformanceRating::from_score(80.0),
829            PerformanceRating::Excellent
830        );
831        assert_eq!(PerformanceRating::from_score(79.9), PerformanceRating::Good);
832        assert_eq!(PerformanceRating::from_score(60.0), PerformanceRating::Good);
833        assert_eq!(PerformanceRating::from_score(59.9), PerformanceRating::Fair);
834        assert_eq!(PerformanceRating::from_score(40.0), PerformanceRating::Fair);
835        assert_eq!(PerformanceRating::from_score(39.9), PerformanceRating::Poor);
836        assert_eq!(PerformanceRating::from_score(0.0), PerformanceRating::Poor);
837    }
838
839    #[test]
840    fn test_bottleneck_creation() {
841        let bottleneck = Bottleneck::new(
842            BottleneckCategory::HighLatency,
843            75.0,
844            "High latency detected",
845        )
846        .with_component("api_call")
847        .with_current_value("2500ms")
848        .with_threshold("2000ms");
849
850        assert_eq!(bottleneck.category, BottleneckCategory::HighLatency);
851        assert_eq!(bottleneck.severity, 75.0);
852        assert_eq!(bottleneck.affected_component, Some("api_call".to_string()));
853        assert_eq!(bottleneck.current_value, Some("2500ms".to_string()));
854        assert_eq!(bottleneck.threshold, Some("2000ms".to_string()));
855    }
856
857    #[test]
858    fn test_suggestion_creation() {
859        let suggestion = Suggestion::new(SuggestionPriority::High, "Reduce latency", "Use caching")
860            .with_improvement("30% improvement")
861            .with_related_to(BottleneckCategory::HighLatency);
862
863        assert_eq!(suggestion.priority, SuggestionPriority::High);
864        assert_eq!(suggestion.title, "Reduce latency");
865        assert_eq!(
866            suggestion.expected_improvement,
867            Some("30% improvement".to_string())
868        );
869        assert_eq!(suggestion.related_to, Some(BottleneckCategory::HighLatency));
870    }
871
872    #[test]
873    fn test_performance_scores_overall() {
874        let scores = PerformanceScores {
875            latency_score: 80.0,
876            throughput_score: 60.0,
877            error_rate_score: 100.0,
878            cost_efficiency_score: 70.0,
879            tool_efficiency_score: 50.0,
880        };
881
882        // Weighted: 80*0.25 + 60*0.20 + 100*0.25 + 70*0.15 + 50*0.15
883        // = 20 + 12 + 25 + 10.5 + 7.5 = 75
884        let overall = scores.overall();
885        assert!((overall - 75.0).abs() < 0.1);
886    }
887
888    #[test]
889    fn test_analyzer_creation() {
890        let analyzer = PerformanceAnalyzer::new();
891        assert_eq!(analyzer.thresholds.good_latency_ms, 500);
892        assert_eq!(analyzer.thresholds.poor_latency_ms, 2000);
893    }
894
895    #[test]
896    fn test_analyze_agent_basic() {
897        let analyzer = PerformanceAnalyzer::new();
898        let metrics = create_test_metrics("agent-1");
899
900        let report = analyzer.analyze_agent(&metrics);
901
902        assert_eq!(report.agent_id, "agent-1");
903        assert!(report.overall_score >= 0.0 && report.overall_score <= 100.0);
904    }
905
906    #[test]
907    fn test_analyze_multiple_agents() {
908        let analyzer = PerformanceAnalyzer::new();
909        let metrics = vec![
910            create_test_metrics("agent-1"),
911            create_test_metrics("agent-2"),
912            create_test_metrics("agent-3"),
913        ];
914
915        let reports = analyzer.analyze(&metrics);
916
917        assert_eq!(reports.len(), 3);
918        assert_eq!(reports[0].agent_id, "agent-1");
919        assert_eq!(reports[1].agent_id, "agent-2");
920        assert_eq!(reports[2].agent_id, "agent-3");
921    }
922
923    #[test]
924    fn test_identify_bottlenecks_high_error_rate() {
925        let analyzer = PerformanceAnalyzer::new();
926        let mut metrics = create_test_metrics("agent-1");
927        metrics.api_calls = 10;
928        metrics.api_calls_successful = 5; // 50% error rate
929
930        let bottlenecks = analyzer.identify_bottlenecks(&metrics);
931
932        assert!(!bottlenecks.is_empty());
933        assert!(bottlenecks
934            .iter()
935            .any(|b| b.category == BottleneckCategory::HighErrorRate));
936    }
937
938    #[test]
939    fn test_identify_bottlenecks_high_cost() {
940        let analyzer = PerformanceAnalyzer::new();
941        let mut metrics = create_test_metrics("agent-1");
942        metrics.tokens_used.total = 1000;
943        metrics.cost = 1.0; // $1 per 1000 tokens = very high
944
945        let bottlenecks = analyzer.identify_bottlenecks(&metrics);
946
947        assert!(!bottlenecks.is_empty());
948        assert!(bottlenecks
949            .iter()
950            .any(|b| b.category == BottleneckCategory::HighCost));
951    }
952
953    #[test]
954    fn test_suggest_optimizations_high_error_rate() {
955        let analyzer = PerformanceAnalyzer::new();
956        let mut metrics = create_test_metrics("agent-1");
957        metrics.api_calls = 10;
958        metrics.api_calls_successful = 5;
959
960        let suggestions = analyzer.suggest_optimizations(&metrics);
961
962        assert!(!suggestions.is_empty());
963        assert!(suggestions.iter().any(|s| s.title.contains("error")));
964    }
965
966    #[test]
967    fn test_score_from_range_lower_is_better() {
968        let analyzer = PerformanceAnalyzer::new();
969
970        // Value at good threshold = 100
971        assert_eq!(analyzer.score_from_range(500.0, 500.0, 2000.0, true), 100.0);
972
973        // Value at poor threshold = 0
974        assert_eq!(analyzer.score_from_range(2000.0, 500.0, 2000.0, true), 0.0);
975
976        // Value below good = 100
977        assert_eq!(analyzer.score_from_range(100.0, 500.0, 2000.0, true), 100.0);
978
979        // Value above poor = 0
980        assert_eq!(analyzer.score_from_range(3000.0, 500.0, 2000.0, true), 0.0);
981
982        // Value in middle
983        let mid_score = analyzer.score_from_range(1250.0, 500.0, 2000.0, true);
984        assert!((mid_score - 50.0).abs() < 1.0);
985    }
986
987    #[test]
988    fn test_score_from_range_higher_is_better() {
989        let analyzer = PerformanceAnalyzer::new();
990
991        // For higher_is_better=false: good is the high value, poor is the low value
992        // Value at good threshold (50) = 100
993        assert_eq!(analyzer.score_from_range(50.0, 50.0, 10.0, false), 100.0);
994
995        // Value at poor threshold (10) = 0
996        assert_eq!(analyzer.score_from_range(10.0, 50.0, 10.0, false), 0.0);
997
998        // Value above good = 100
999        assert_eq!(analyzer.score_from_range(100.0, 50.0, 10.0, false), 100.0);
1000
1001        // Value below poor = 0
1002        assert_eq!(analyzer.score_from_range(5.0, 50.0, 10.0, false), 0.0);
1003
1004        // Value in middle (30 is halfway between 10 and 50)
1005        let mid_score = analyzer.score_from_range(30.0, 50.0, 10.0, false);
1006        assert!((mid_score - 50.0).abs() < 1.0);
1007    }
1008
1009    #[test]
1010    fn test_performance_report_creation() {
1011        let scores = PerformanceScores {
1012            latency_score: 80.0,
1013            throughput_score: 80.0,
1014            error_rate_score: 80.0,
1015            cost_efficiency_score: 80.0,
1016            tool_efficiency_score: 80.0,
1017        };
1018
1019        let report = PerformanceReport::new("agent-1", scores);
1020
1021        assert_eq!(report.agent_id, "agent-1");
1022        assert_eq!(report.overall_score, 80.0);
1023        assert_eq!(report.rating, PerformanceRating::Excellent);
1024    }
1025
1026    #[test]
1027    fn test_bottleneck_severity_clamping() {
1028        let bottleneck = Bottleneck::new(BottleneckCategory::HighLatency, 150.0, "Test");
1029        assert_eq!(bottleneck.severity, 100.0);
1030
1031        let bottleneck = Bottleneck::new(BottleneckCategory::HighLatency, -10.0, "Test");
1032        assert_eq!(bottleneck.severity, 0.0);
1033    }
1034}
aster/agents/monitor/analyzer.rs

aster/agents/monitor/
analyzer.rs