1use serde::{Deserialize, Serialize};
2use std::collections::HashSet;
3
4#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
7#[serde(rename_all = "lowercase")]
8pub enum Severity {
9 Info,
10 Warning,
11 Critical,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
15#[serde(rename_all = "lowercase")]
16pub enum RuleCategory {
17 Budget,
18 Memory,
19 Prompt,
20 Tools,
21 Cost,
22 Quality,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct Tip {
27 pub severity: Severity,
28 pub category: RuleCategory,
29 pub rule_name: String,
30 pub message: String,
31 pub suggestion: String,
32}
33
34pub struct TurnData {
35 pub turn_id: String,
36 pub token_budget: i64,
37 pub system_prompt_tokens: i64,
38 pub memory_tokens: i64,
39 pub history_tokens: i64,
40 pub history_depth: i64,
41 pub complexity_level: String,
42 pub model: String,
43 pub cost: f64,
44 pub tokens_in: i64,
45 pub tokens_out: i64,
46 pub tool_call_count: i64,
47 pub tool_failure_count: i64,
48 pub thinking_length: i64,
49 pub has_reasoning: bool,
50 pub cached: bool,
51}
52
53pub struct SessionData {
54 pub turns: Vec<TurnData>,
55 pub session_id: String,
56 pub grades: Vec<(String, i32)>,
57}
58
59pub trait AnalysisRule: Send + Sync {
62 fn name(&self) -> &str;
63 fn category(&self) -> RuleCategory;
64 fn evaluate_turn(&self, turn: &TurnData, session_avg_cost: Option<f64>) -> Option<Tip>;
65}
66
67pub trait SessionAnalysisRule: Send + Sync {
68 fn name(&self) -> &str;
69 fn category(&self) -> RuleCategory;
70 fn evaluate_session(&self, session: &SessionData) -> Option<Tip>;
71}
72
73pub struct BudgetPressure;
76
77impl AnalysisRule for BudgetPressure {
78 fn name(&self) -> &str {
79 "budget_pressure"
80 }
81 fn category(&self) -> RuleCategory {
82 RuleCategory::Budget
83 }
84 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
85 if turn.token_budget <= 0 {
86 return None;
87 }
88 let used = turn.system_prompt_tokens + turn.memory_tokens + turn.history_tokens;
89 let util = used as f64 / turn.token_budget as f64;
90 if util > 0.90 {
91 Some(Tip {
92 severity: if util > 0.95 {
93 Severity::Critical
94 } else {
95 Severity::Warning
96 },
97 category: RuleCategory::Budget,
98 rule_name: self.name().into(),
99 message: format!(
100 "Token utilization at {:.0}% of budget ({} / {})",
101 util * 100.0,
102 used,
103 turn.token_budget
104 ),
105 suggestion: "Consider reducing system prompt size, pruning history, or increasing the token budget.".into(),
106 })
107 } else {
108 None
109 }
110 }
111}
112
113pub struct SystemPromptHeavy;
114
115impl AnalysisRule for SystemPromptHeavy {
116 fn name(&self) -> &str {
117 "system_prompt_heavy"
118 }
119 fn category(&self) -> RuleCategory {
120 RuleCategory::Prompt
121 }
122 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
123 if turn.token_budget <= 0 {
124 return None;
125 }
126 let pct = turn.system_prompt_tokens as f64 / turn.token_budget as f64;
127 if pct > 0.40 {
128 Some(Tip {
129 severity: if pct > 0.60 {
130 Severity::Critical
131 } else {
132 Severity::Warning
133 },
134 category: RuleCategory::Prompt,
135 rule_name: self.name().into(),
136 message: format!(
137 "System prompt consumes {:.0}% of the token budget ({} tokens)",
138 pct * 100.0,
139 turn.system_prompt_tokens
140 ),
141 suggestion: "Audit the system prompt for redundancy. Move static instructions to a retrieval layer or condense with structured formatting.".into(),
142 })
143 } else {
144 None
145 }
146 }
147}
148
149pub struct MemoryStarvation;
150
151impl AnalysisRule for MemoryStarvation {
152 fn name(&self) -> &str {
153 "memory_starvation"
154 }
155 fn category(&self) -> RuleCategory {
156 RuleCategory::Memory
157 }
158 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
159 if turn.token_budget <= 0 {
160 return None;
161 }
162 let pct = turn.memory_tokens as f64 / turn.token_budget as f64;
163 if pct < 0.10 && turn.memory_tokens >= 0 {
164 Some(Tip {
165 severity: Severity::Info,
166 category: RuleCategory::Memory,
167 rule_name: self.name().into(),
168 message: format!(
169 "Memory allocation is only {:.0}% of budget ({} tokens)",
170 pct * 100.0,
171 turn.memory_tokens
172 ),
173 suggestion: "The agent may lack long-term context. Check that memory retrieval is configured and returning relevant results.".into(),
174 })
175 } else {
176 None
177 }
178 }
179}
180
181pub struct ShallowHistory;
182
183impl AnalysisRule for ShallowHistory {
184 fn name(&self) -> &str {
185 "shallow_history"
186 }
187 fn category(&self) -> RuleCategory {
188 RuleCategory::Quality
189 }
190 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
191 if turn.history_depth < 3 && turn.history_depth >= 0 {
192 Some(Tip {
193 severity: Severity::Info,
194 category: RuleCategory::Quality,
195 rule_name: self.name().into(),
196 message: format!(
197 "Only {} messages in conversation history",
198 turn.history_depth
199 ),
200 suggestion: "With shallow history the model may lack conversational context. This is normal for early turns in a session.".into(),
201 })
202 } else {
203 None
204 }
205 }
206}
207
208pub struct HighToolDensity;
209
210impl AnalysisRule for HighToolDensity {
211 fn name(&self) -> &str {
212 "high_tool_density"
213 }
214 fn category(&self) -> RuleCategory {
215 RuleCategory::Tools
216 }
217 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
218 if turn.tool_call_count > 3 {
219 Some(Tip {
220 severity: if turn.tool_call_count > 8 {
221 Severity::Warning
222 } else {
223 Severity::Info
224 },
225 category: RuleCategory::Tools,
226 rule_name: self.name().into(),
227 message: format!(
228 "{} tool calls in a single turn",
229 turn.tool_call_count
230 ),
231 suggestion: "High tool density increases latency and cost. Consider whether the agent could batch operations or use a more targeted approach.".into(),
232 })
233 } else {
234 None
235 }
236 }
237}
238
239pub struct ToolFailures;
240
241impl AnalysisRule for ToolFailures {
242 fn name(&self) -> &str {
243 "tool_failures"
244 }
245 fn category(&self) -> RuleCategory {
246 RuleCategory::Tools
247 }
248 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
249 if turn.tool_failure_count > 0 {
250 let rate = if turn.tool_call_count > 0 {
251 turn.tool_failure_count as f64 / turn.tool_call_count as f64
252 } else {
253 1.0
254 };
255 Some(Tip {
256 severity: if rate > 0.5 {
257 Severity::Critical
258 } else {
259 Severity::Warning
260 },
261 category: RuleCategory::Tools,
262 rule_name: self.name().into(),
263 message: format!(
264 "{} of {} tool calls failed ({:.0}% failure rate)",
265 turn.tool_failure_count,
266 turn.tool_call_count,
267 rate * 100.0
268 ),
269 suggestion: "Investigate tool errors. Frequent failures waste tokens on retry loops and degrade response quality.".into(),
270 })
271 } else {
272 None
273 }
274 }
275}
276
277pub struct ExpensiveTurn;
278
279impl AnalysisRule for ExpensiveTurn {
280 fn name(&self) -> &str {
281 "expensive_turn"
282 }
283 fn category(&self) -> RuleCategory {
284 RuleCategory::Cost
285 }
286 fn evaluate_turn(&self, turn: &TurnData, session_avg_cost: Option<f64>) -> Option<Tip> {
287 let avg = session_avg_cost?;
288 if avg <= 0.0 {
289 return None;
290 }
291 let ratio = turn.cost / avg;
292 if ratio > 2.0 {
293 Some(Tip {
294 severity: if ratio > 5.0 {
295 Severity::Critical
296 } else {
297 Severity::Warning
298 },
299 category: RuleCategory::Cost,
300 rule_name: self.name().into(),
301 message: format!(
302 "Turn cost ${:.4} is {:.1}x the session average (${:.4})",
303 turn.cost, ratio, avg
304 ),
305 suggestion: "This turn was unusually expensive. Check for large context windows, expensive model selection, or excessive tool usage.".into(),
306 })
307 } else {
308 None
309 }
310 }
311}
312
313pub struct EmptyReasoning;
314
315impl AnalysisRule for EmptyReasoning {
316 fn name(&self) -> &str {
317 "empty_reasoning"
318 }
319 fn category(&self) -> RuleCategory {
320 RuleCategory::Quality
321 }
322 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
323 if turn.has_reasoning && turn.thinking_length == 0 {
324 Some(Tip {
325 severity: Severity::Info,
326 category: RuleCategory::Quality,
327 rule_name: self.name().into(),
328 message: "Model supports reasoning but produced no thinking trace".into(),
329 suggestion: "The model may have skipped reasoning for a simple query, or the thinking budget may be too low. No action needed if the response was adequate.".into(),
330 })
331 } else {
332 None
333 }
334 }
335}
336
337pub struct SystemPromptTax;
338
339impl AnalysisRule for SystemPromptTax {
340 fn name(&self) -> &str {
341 "system_prompt_tax"
342 }
343 fn category(&self) -> RuleCategory {
344 RuleCategory::Cost
345 }
346 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
347 if turn.tokens_in <= 0 {
348 return None;
349 }
350 let sys_fraction = turn.system_prompt_tokens as f64 / turn.tokens_in as f64;
351 let estimated_sys_cost = turn.cost * sys_fraction;
352 if estimated_sys_cost > 0.01 && sys_fraction > 0.30 {
353 Some(Tip {
354 severity: if estimated_sys_cost > 0.05 {
355 Severity::Warning
356 } else {
357 Severity::Info
358 },
359 category: RuleCategory::Cost,
360 rule_name: self.name().into(),
361 message: format!(
362 "System prompt accounts for ~${:.4} ({:.0}% of input tokens)",
363 estimated_sys_cost,
364 sys_fraction * 100.0
365 ),
366 suggestion: "Repeated system prompt tokens add up. Consider prompt caching, compression, or moving static content to retrieval.".into(),
367 })
368 } else {
369 None
370 }
371 }
372}
373
374pub struct HistoryCostDominant;
375
376impl AnalysisRule for HistoryCostDominant {
377 fn name(&self) -> &str {
378 "history_cost_dominant"
379 }
380 fn category(&self) -> RuleCategory {
381 RuleCategory::Cost
382 }
383 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
384 if turn.tokens_in <= 0 {
385 return None;
386 }
387 let hist_fraction = turn.history_tokens as f64 / turn.tokens_in as f64;
388 if hist_fraction > 0.60 {
389 Some(Tip {
390 severity: if hist_fraction > 0.80 {
391 Severity::Warning
392 } else {
393 Severity::Info
394 },
395 category: RuleCategory::Cost,
396 rule_name: self.name().into(),
397 message: format!(
398 "History tokens consume {:.0}% of input tokens ({} / {})",
399 hist_fraction * 100.0,
400 turn.history_tokens,
401 turn.tokens_in
402 ),
403 suggestion: "Conversation history dominates input cost. Consider summarizing older messages or reducing history window depth.".into(),
404 })
405 } else {
406 None
407 }
408 }
409}
410
411pub struct LargeOutputRatio;
412
413impl AnalysisRule for LargeOutputRatio {
414 fn name(&self) -> &str {
415 "large_output_ratio"
416 }
417 fn category(&self) -> RuleCategory {
418 RuleCategory::Cost
419 }
420 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
421 if turn.tokens_in <= 0 || turn.tokens_out <= 0 {
422 return None;
423 }
424 let ratio = turn.tokens_out as f64 / turn.tokens_in as f64;
425 if ratio > 2.0 && turn.tokens_out > 2000 {
426 Some(Tip {
427 severity: Severity::Info,
428 category: RuleCategory::Cost,
429 rule_name: self.name().into(),
430 message: format!(
431 "Output ({} tokens) is {:.1}x the input ({} tokens)",
432 turn.tokens_out, ratio, turn.tokens_in
433 ),
434 suggestion: "Large output may indicate verbose responses. Check if the model is being asked for overly detailed answers where conciseness would suffice.".into(),
435 })
436 } else {
437 None
438 }
439 }
440}
441
442pub struct CachedTurnSavings;
443
444impl AnalysisRule for CachedTurnSavings {
445 fn name(&self) -> &str {
446 "cached_turn_savings"
447 }
448 fn category(&self) -> RuleCategory {
449 RuleCategory::Cost
450 }
451 fn evaluate_turn(&self, turn: &TurnData, _avg: Option<f64>) -> Option<Tip> {
452 if turn.cached {
453 Some(Tip {
454 severity: Severity::Info,
455 category: RuleCategory::Cost,
456 rule_name: self.name().into(),
457 message: "This turn was served from cache".into(),
458 suggestion: "Cache hit saved inference cost. Frequently cached queries may indicate the system prompt or user patterns are repetitive.".into(),
459 })
460 } else {
461 None
462 }
463 }
464}
465
466pub struct ContextDrift;
469
470impl SessionAnalysisRule for ContextDrift {
471 fn name(&self) -> &str {
472 "context_drift"
473 }
474 fn category(&self) -> RuleCategory {
475 RuleCategory::Budget
476 }
477 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
478 let turns = &session.turns;
479 if turns.len() < 4 {
480 return None;
481 }
482 let utils: Vec<f64> = turns
483 .iter()
484 .filter(|t| t.token_budget > 0)
485 .map(|t| {
486 let used = t.system_prompt_tokens + t.memory_tokens + t.history_tokens;
487 used as f64 / t.token_budget as f64
488 })
489 .collect();
490 if utils.len() < 4 {
491 return None;
492 }
493 let half = utils.len() / 2;
494 let first_half_avg: f64 = utils[..half].iter().sum::<f64>() / half as f64;
495 let second_half_avg: f64 = utils[half..].iter().sum::<f64>() / (utils.len() - half) as f64;
496 if second_half_avg > first_half_avg + 0.15 {
497 Some(Tip {
498 severity: Severity::Warning,
499 category: RuleCategory::Budget,
500 rule_name: self.name().into(),
501 message: format!(
502 "Budget utilization trending upward: {:.0}% → {:.0}%",
503 first_half_avg * 100.0,
504 second_half_avg * 100.0
505 ),
506 suggestion: "Context is growing across turns. Consider more aggressive history pruning or a summarization step.".into(),
507 })
508 } else {
509 None
510 }
511 }
512}
513
514pub struct FrequentEscalation;
515
516impl SessionAnalysisRule for FrequentEscalation {
517 fn name(&self) -> &str {
518 "frequent_escalation"
519 }
520 fn category(&self) -> RuleCategory {
521 RuleCategory::Quality
522 }
523 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
524 let turns = &session.turns;
525 if turns.is_empty() {
526 return None;
527 }
528 let high_complexity = turns
529 .iter()
530 .filter(|t| t.complexity_level == "L2" || t.complexity_level == "L3")
531 .count();
532 let pct = high_complexity as f64 / turns.len() as f64;
533 if pct > 0.40 {
534 Some(Tip {
535 severity: Severity::Warning,
536 category: RuleCategory::Quality,
537 rule_name: self.name().into(),
538 message: format!(
539 "{:.0}% of turns ({}/{}) at L2/L3 complexity",
540 pct * 100.0,
541 high_complexity,
542 turns.len()
543 ),
544 suggestion: "Frequent complexity escalation drives up cost. Evaluate whether the escalation triggers are too sensitive or the base model could handle more queries.".into(),
545 })
546 } else {
547 None
548 }
549 }
550}
551
552pub struct CostAcceleration;
553
554impl SessionAnalysisRule for CostAcceleration {
555 fn name(&self) -> &str {
556 "cost_acceleration"
557 }
558 fn category(&self) -> RuleCategory {
559 RuleCategory::Cost
560 }
561 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
562 let turns = &session.turns;
563 if turns.len() < 4 {
564 return None;
565 }
566 let costs: Vec<f64> = turns.iter().map(|t| t.cost).collect();
567 let half = costs.len() / 2;
568 let first_avg: f64 = costs[..half].iter().sum::<f64>() / half as f64;
569 let second_avg: f64 = costs[half..].iter().sum::<f64>() / (costs.len() - half) as f64;
570 if first_avg > 0.0 && second_avg > first_avg * 1.5 {
571 Some(Tip {
572 severity: Severity::Warning,
573 category: RuleCategory::Cost,
574 rule_name: self.name().into(),
575 message: format!(
576 "Per-turn cost increasing: ${:.4} avg (first half) → ${:.4} avg (second half)",
577 first_avg, second_avg
578 ),
579 suggestion: "Costs are accelerating across the session. This often signals growing context windows or model escalation. Consider resetting the session or pruning aggressively.".into(),
580 })
581 } else {
582 None
583 }
584 }
585}
586
587pub struct UnderutilizedMemory;
588
589impl SessionAnalysisRule for UnderutilizedMemory {
590 fn name(&self) -> &str {
591 "underutilized_memory"
592 }
593 fn category(&self) -> RuleCategory {
594 RuleCategory::Memory
595 }
596 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
597 let turns = &session.turns;
598 if turns.is_empty() {
599 return None;
600 }
601 let all_zero = turns.iter().all(|t| t.memory_tokens == 0);
602 if all_zero {
603 Some(Tip {
604 severity: Severity::Info,
605 category: RuleCategory::Memory,
606 rule_name: self.name().into(),
607 message: "No memory tokens used across the entire session".into(),
608 suggestion: "Memory retrieval produced no content for any turn. Verify that memories exist and the retrieval pipeline is functioning.".into(),
609 })
610 } else {
611 None
612 }
613 }
614}
615
616pub struct ToolSuccessRate;
617
618impl SessionAnalysisRule for ToolSuccessRate {
619 fn name(&self) -> &str {
620 "tool_success_rate"
621 }
622 fn category(&self) -> RuleCategory {
623 RuleCategory::Tools
624 }
625 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
626 let total_calls: i64 = session.turns.iter().map(|t| t.tool_call_count).sum();
627 let total_failures: i64 = session.turns.iter().map(|t| t.tool_failure_count).sum();
628 if total_calls < 5 {
629 return None;
630 }
631 let success_rate = 1.0 - (total_failures as f64 / total_calls as f64);
632 if success_rate < 0.80 {
633 Some(Tip {
634 severity: if success_rate < 0.50 {
635 Severity::Critical
636 } else {
637 Severity::Warning
638 },
639 category: RuleCategory::Tools,
640 rule_name: self.name().into(),
641 message: format!(
642 "Session-wide tool success rate is {:.0}% ({} failures / {} total calls)",
643 success_rate * 100.0,
644 total_failures,
645 total_calls
646 ),
647 suggestion: "Chronic tool failures waste tokens and degrade quality. Investigate the most common failure modes.".into(),
648 })
649 } else {
650 None
651 }
652 }
653}
654
655pub struct ModelChurn;
656
657impl SessionAnalysisRule for ModelChurn {
658 fn name(&self) -> &str {
659 "model_churn"
660 }
661 fn category(&self) -> RuleCategory {
662 RuleCategory::Quality
663 }
664 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
665 let models: HashSet<&str> = session
666 .turns
667 .iter()
668 .filter(|t| !t.model.is_empty())
669 .map(|t| t.model.as_str())
670 .collect();
671 if models.len() > 3 {
672 Some(Tip {
673 severity: Severity::Info,
674 category: RuleCategory::Quality,
675 rule_name: self.name().into(),
676 message: format!(
677 "{} different models used across {} turns: {}",
678 models.len(),
679 session.turns.len(),
680 models.into_iter().collect::<Vec<_>>().join(", ")
681 ),
682 suggestion: "Frequent model switching can cause inconsistent tone and behavior. Consider stabilizing the model selection unless complexity-based routing is intentional.".into(),
683 })
684 } else {
685 None
686 }
687 }
688}
689
690pub struct QualityDeclining;
693
694impl SessionAnalysisRule for QualityDeclining {
695 fn name(&self) -> &str {
696 "quality_declining"
697 }
698 fn category(&self) -> RuleCategory {
699 RuleCategory::Quality
700 }
701 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
702 let grades = &session.grades;
703 if grades.len() < 4 {
704 return None;
705 }
706 let half = grades.len() / 2;
707 let first_avg = grades[..half].iter().map(|(_, g)| *g as f64).sum::<f64>() / half as f64;
708 let second_avg = grades[half..].iter().map(|(_, g)| *g as f64).sum::<f64>()
709 / (grades.len() - half) as f64;
710 if first_avg - second_avg > 0.5 {
711 Some(Tip {
712 severity: Severity::Warning,
713 category: RuleCategory::Quality,
714 rule_name: self.name().into(),
715 message: format!(
716 "Average grade declined from {:.1} to {:.1} over the session",
717 first_avg, second_avg
718 ),
719 suggestion: "Quality is dropping as the conversation progresses. This may indicate context degradation, model fatigue, or increasingly complex queries. Consider resetting or adjusting the model.".into(),
720 })
721 } else {
722 None
723 }
724 }
725}
726
727pub struct CostQualityMismatch;
728
729impl SessionAnalysisRule for CostQualityMismatch {
730 fn name(&self) -> &str {
731 "cost_quality_mismatch"
732 }
733 fn category(&self) -> RuleCategory {
734 RuleCategory::Cost
735 }
736 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
737 if session.grades.is_empty() || session.turns.len() < 2 {
738 return None;
739 }
740 let grade_map: std::collections::HashMap<&str, Vec<i32>> = {
741 let mut m: std::collections::HashMap<&str, Vec<i32>> = std::collections::HashMap::new();
742 for (turn_id, grade) in &session.grades {
743 if let Some(turn) = session.turns.iter().find(|t| t.turn_id == *turn_id) {
744 m.entry(turn.model.as_str()).or_default().push(*grade);
745 }
746 }
747 m
748 };
749 let cost_map: std::collections::HashMap<&str, f64> = {
750 let mut m: std::collections::HashMap<&str, f64> = std::collections::HashMap::new();
751 for t in &session.turns {
752 *m.entry(t.model.as_str()).or_default() += t.cost;
753 }
754 m
755 };
756
757 if grade_map.len() < 2 {
758 return None;
759 }
760
761 let most_expensive = cost_map
762 .iter()
763 .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
764 .map(|(m, _)| *m)?;
765
766 let exp_grades = grade_map.get(most_expensive)?;
767 let exp_avg = exp_grades.iter().map(|g| *g as f64).sum::<f64>() / exp_grades.len() as f64;
768
769 let best_quality = grade_map
770 .iter()
771 .filter(|(m, _)| **m != most_expensive)
772 .map(|(_, gs)| gs.iter().map(|g| *g as f64).sum::<f64>() / gs.len() as f64)
773 .fold(0.0_f64, f64::max);
774
775 if best_quality > exp_avg {
776 Some(Tip {
777 severity: Severity::Warning,
778 category: RuleCategory::Cost,
779 rule_name: self.name().into(),
780 message: format!(
781 "Most expensive model ({}) has lower quality ({:.1}) than a cheaper alternative ({:.1})",
782 most_expensive, exp_avg, best_quality
783 ),
784 suggestion: "The highest-cost model isn't producing the best grades. Consider routing more queries to the higher-quality, lower-cost model.".into(),
785 })
786 } else {
787 None
788 }
789 }
790}
791
792pub struct MemoryHelps;
793
794impl SessionAnalysisRule for MemoryHelps {
795 fn name(&self) -> &str {
796 "memory_helps"
797 }
798 fn category(&self) -> RuleCategory {
799 RuleCategory::Memory
800 }
801 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
802 if session.grades.is_empty() {
803 return None;
804 }
805 let mut with_mem: Vec<f64> = Vec::new();
806 let mut without_mem: Vec<f64> = Vec::new();
807 for (turn_id, grade) in &session.grades {
808 if let Some(turn) = session.turns.iter().find(|t| t.turn_id == *turn_id) {
809 if turn.memory_tokens > 0 {
810 with_mem.push(*grade as f64);
811 } else {
812 without_mem.push(*grade as f64);
813 }
814 }
815 }
816 if with_mem.len() < 2 || without_mem.len() < 2 {
817 return None;
818 }
819 let with_avg = with_mem.iter().sum::<f64>() / with_mem.len() as f64;
820 let without_avg = without_mem.iter().sum::<f64>() / without_mem.len() as f64;
821 if with_avg > without_avg + 0.5 {
822 Some(Tip {
823 severity: Severity::Info,
824 category: RuleCategory::Memory,
825 rule_name: self.name().into(),
826 message: format!(
827 "Quality is {:.1} with memory vs {:.1} without — memory retrieval is helping",
828 with_avg, without_avg
829 ),
830 suggestion: "Memory-augmented turns produce higher quality. Ensure memory retrieval is consistently available and consider expanding memory coverage.".into(),
831 })
832 } else {
833 None
834 }
835 }
836}
837
838pub struct LowCoverageWarning;
839
840impl SessionAnalysisRule for LowCoverageWarning {
841 fn name(&self) -> &str {
842 "low_coverage_warning"
843 }
844 fn category(&self) -> RuleCategory {
845 RuleCategory::Quality
846 }
847 fn evaluate_session(&self, session: &SessionData) -> Option<Tip> {
848 if session.turns.len() < 50 {
849 return None;
850 }
851 let coverage = session.grades.len() as f64 / session.turns.len() as f64;
852 if coverage < 0.20 {
853 Some(Tip {
854 severity: Severity::Info,
855 category: RuleCategory::Quality,
856 rule_name: self.name().into(),
857 message: format!(
858 "Only {:.0}% of turns have been graded ({}/{})",
859 coverage * 100.0,
860 session.grades.len(),
861 session.turns.len()
862 ),
863 suggestion: "Grade coverage is low. Quality metrics may not be representative. Consider grading more turns to get reliable quality signals.".into(),
864 })
865 } else {
866 None
867 }
868 }
869}
870
871pub struct ContextAnalyzer {
874 turn_rules: Vec<Box<dyn AnalysisRule>>,
875 session_rules: Vec<Box<dyn SessionAnalysisRule>>,
876}
877
878impl Default for ContextAnalyzer {
879 fn default() -> Self {
880 Self::new()
881 }
882}
883
884impl ContextAnalyzer {
885 pub fn new() -> Self {
886 Self {
887 turn_rules: vec![
888 Box::new(BudgetPressure),
889 Box::new(SystemPromptHeavy),
890 Box::new(MemoryStarvation),
891 Box::new(ShallowHistory),
892 Box::new(HighToolDensity),
893 Box::new(ToolFailures),
894 Box::new(ExpensiveTurn),
895 Box::new(EmptyReasoning),
896 Box::new(SystemPromptTax),
897 Box::new(HistoryCostDominant),
898 Box::new(LargeOutputRatio),
899 Box::new(CachedTurnSavings),
900 ],
901 session_rules: vec![
902 Box::new(ContextDrift),
903 Box::new(FrequentEscalation),
904 Box::new(CostAcceleration),
905 Box::new(UnderutilizedMemory),
906 Box::new(ToolSuccessRate),
907 Box::new(ModelChurn),
908 Box::new(QualityDeclining),
909 Box::new(CostQualityMismatch),
910 Box::new(MemoryHelps),
911 Box::new(LowCoverageWarning),
912 ],
913 }
914 }
915
916 pub fn analyze_turn(&self, turn: &TurnData, session_avg_cost: Option<f64>) -> Vec<Tip> {
917 self.turn_rules
918 .iter()
919 .filter_map(|r| r.evaluate_turn(turn, session_avg_cost))
920 .collect()
921 }
922
923 pub fn analyze_session(&self, session: &SessionData) -> Vec<Tip> {
924 self.session_rules
925 .iter()
926 .filter_map(|r| r.evaluate_session(session))
927 .collect()
928 }
929}
930
931pub struct LlmAnalyzer;
934
935impl LlmAnalyzer {
936 pub fn build_analysis_prompt(turn: &TurnData, heuristic_tips: &[Tip]) -> String {
937 let budget_util = if turn.token_budget > 0 {
938 let used = turn.system_prompt_tokens + turn.memory_tokens + turn.history_tokens;
939 (used as f64 / turn.token_budget as f64) * 100.0
940 } else {
941 0.0
942 };
943
944 let sys_pct = if turn.token_budget > 0 {
945 (turn.system_prompt_tokens as f64 / turn.token_budget as f64) * 100.0
946 } else {
947 0.0
948 };
949
950 let mem_pct = if turn.token_budget > 0 {
951 (turn.memory_tokens as f64 / turn.token_budget as f64) * 100.0
952 } else {
953 0.0
954 };
955
956 let tips_text = if heuristic_tips.is_empty() {
957 " (none)\n".to_string()
958 } else {
959 heuristic_tips
960 .iter()
961 .map(|t| format!(" - [{:?}] {}: {}", t.severity, t.rule_name, t.message))
962 .collect::<Vec<_>>()
963 .join("\n")
964 };
965
966 format!(
967 "Analyze this LLM context turn:\n\
968 - Model: {}\n\
969 - Tokens in: {}, out: {}\n\
970 - Budget utilization: {:.0}%\n\
971 - System prompt: {:.0}% of budget\n\
972 - Memory: {:.0}% of budget\n\
973 - History depth: {} messages\n\
974 - Tool calls: {} ({} failed)\n\
975 - Complexity level: {}\n\
976 - Cached: {}\n\
977 \nExisting analysis tips:\n{}\n\
978 \nProvide additional insights about:\n\
979 1. System prompt clarity and specificity\n\
980 2. Whether retrieved memories seem relevant\n\
981 3. Suggestions for improving the configuration\n\
982 4. Whether the model selection was appropriate",
983 turn.model,
984 turn.tokens_in,
985 turn.tokens_out,
986 budget_util,
987 sys_pct,
988 mem_pct,
989 turn.history_depth,
990 turn.tool_call_count,
991 turn.tool_failure_count,
992 turn.complexity_level,
993 turn.cached,
994 tips_text,
995 )
996 }
997
998 pub fn build_session_prompt(session: &SessionData, heuristic_tips: &[Tip]) -> String {
999 let total_cost: f64 = session.turns.iter().map(|t| t.cost).sum();
1000 let total_tokens: i64 = session
1001 .turns
1002 .iter()
1003 .map(|t| t.tokens_in + t.tokens_out)
1004 .sum();
1005 let models: HashSet<&str> = session.turns.iter().map(|t| t.model.as_str()).collect();
1006
1007 let tips_text = if heuristic_tips.is_empty() {
1008 " (none)\n".to_string()
1009 } else {
1010 heuristic_tips
1011 .iter()
1012 .map(|t| format!(" - [{:?}] {}: {}", t.severity, t.rule_name, t.message))
1013 .collect::<Vec<_>>()
1014 .join("\n")
1015 };
1016
1017 format!(
1018 "Analyze this LLM session:\n\
1019 - Session ID: {}\n\
1020 - Total turns: {}\n\
1021 - Total tokens: {}\n\
1022 - Total cost: ${:.4}\n\
1023 - Models used: {}\n\
1024 \nExisting analysis tips:\n{}\n\
1025 \nProvide insights about:\n\
1026 1. Session-level cost efficiency\n\
1027 2. Context management patterns\n\
1028 3. Opportunities for optimization\n\
1029 4. Overall session health assessment",
1030 session.session_id,
1031 session.turns.len(),
1032 total_tokens,
1033 total_cost,
1034 models.into_iter().collect::<Vec<_>>().join(", "),
1035 tips_text,
1036 )
1037 }
1038}
1039
1040#[cfg(test)]
1043mod tests {
1044 use super::*;
1045
1046 fn make_turn(overrides: impl FnOnce(&mut TurnData)) -> TurnData {
1047 let mut t = TurnData {
1048 turn_id: "turn-1".into(),
1049 token_budget: 128_000,
1050 system_prompt_tokens: 10_000,
1051 memory_tokens: 5_000,
1052 history_tokens: 20_000,
1053 history_depth: 10,
1054 complexity_level: "L1".into(),
1055 model: "gpt-4".into(),
1056 cost: 0.05,
1057 tokens_in: 35_000,
1058 tokens_out: 2_000,
1059 tool_call_count: 1,
1060 tool_failure_count: 0,
1061 thinking_length: 500,
1062 has_reasoning: true,
1063 cached: false,
1064 };
1065 overrides(&mut t);
1066 t
1067 }
1068
1069 #[test]
1070 fn budget_pressure_fires_above_90_pct() {
1071 let turn = make_turn(|t| {
1072 t.token_budget = 100;
1073 t.system_prompt_tokens = 50;
1074 t.memory_tokens = 20;
1075 t.history_tokens = 25;
1076 });
1077 let tip = BudgetPressure.evaluate_turn(&turn, None);
1078 assert!(tip.is_some());
1079 assert_eq!(tip.unwrap().severity, Severity::Warning);
1080 }
1081
1082 #[test]
1083 fn budget_pressure_critical_above_95_pct() {
1084 let turn = make_turn(|t| {
1085 t.token_budget = 100;
1086 t.system_prompt_tokens = 50;
1087 t.memory_tokens = 20;
1088 t.history_tokens = 27;
1089 });
1090 let tip = BudgetPressure.evaluate_turn(&turn, None);
1091 assert!(tip.is_some());
1092 assert_eq!(tip.unwrap().severity, Severity::Critical);
1093 }
1094
1095 #[test]
1096 fn budget_pressure_silent_below_90_pct() {
1097 let turn = make_turn(|t| {
1098 t.token_budget = 100;
1099 t.system_prompt_tokens = 20;
1100 t.memory_tokens = 10;
1101 t.history_tokens = 30;
1102 });
1103 assert!(BudgetPressure.evaluate_turn(&turn, None).is_none());
1104 }
1105
1106 #[test]
1107 fn system_prompt_heavy_fires_above_40_pct() {
1108 let turn = make_turn(|t| {
1109 t.token_budget = 100;
1110 t.system_prompt_tokens = 45;
1111 });
1112 let tip = SystemPromptHeavy.evaluate_turn(&turn, None);
1113 assert!(tip.is_some());
1114 assert_eq!(tip.unwrap().category, RuleCategory::Prompt);
1115 }
1116
1117 #[test]
1118 fn memory_starvation_fires_below_10_pct() {
1119 let turn = make_turn(|t| {
1120 t.token_budget = 1000;
1121 t.memory_tokens = 50;
1122 });
1123 let tip = MemoryStarvation.evaluate_turn(&turn, None);
1124 assert!(tip.is_some());
1125 }
1126
1127 #[test]
1128 fn shallow_history_fires_below_3() {
1129 let turn = make_turn(|t| {
1130 t.history_depth = 2;
1131 });
1132 let tip = ShallowHistory.evaluate_turn(&turn, None);
1133 assert!(tip.is_some());
1134 }
1135
1136 #[test]
1137 fn high_tool_density_fires_above_3() {
1138 let turn = make_turn(|t| {
1139 t.tool_call_count = 5;
1140 });
1141 let tip = HighToolDensity.evaluate_turn(&turn, None);
1142 assert!(tip.is_some());
1143 }
1144
1145 #[test]
1146 fn tool_failures_fires_on_failure() {
1147 let turn = make_turn(|t| {
1148 t.tool_call_count = 4;
1149 t.tool_failure_count = 3;
1150 });
1151 let tip = ToolFailures.evaluate_turn(&turn, None);
1152 assert!(tip.is_some());
1153 assert_eq!(tip.unwrap().severity, Severity::Critical);
1154 }
1155
1156 #[test]
1157 fn expensive_turn_fires_above_2x_avg() {
1158 let turn = make_turn(|t| {
1159 t.cost = 0.10;
1160 });
1161 let tip = ExpensiveTurn.evaluate_turn(&turn, Some(0.03));
1162 assert!(tip.is_some());
1163 }
1164
1165 #[test]
1166 fn expensive_turn_silent_without_avg() {
1167 let turn = make_turn(|_| {});
1168 assert!(ExpensiveTurn.evaluate_turn(&turn, None).is_none());
1169 }
1170
1171 #[test]
1172 fn empty_reasoning_fires_when_has_reasoning_but_empty() {
1173 let turn = make_turn(|t| {
1174 t.has_reasoning = true;
1175 t.thinking_length = 0;
1176 });
1177 let tip = EmptyReasoning.evaluate_turn(&turn, None);
1178 assert!(tip.is_some());
1179 }
1180
1181 #[test]
1182 fn cached_turn_savings_fires_on_cache_hit() {
1183 let turn = make_turn(|t| {
1184 t.cached = true;
1185 });
1186 let tip = CachedTurnSavings.evaluate_turn(&turn, None);
1187 assert!(tip.is_some());
1188 }
1189
1190 #[test]
1191 fn context_analyzer_produces_mixed_tips() {
1192 let analyzer = ContextAnalyzer::new();
1193 let turn = make_turn(|t| {
1194 t.token_budget = 100;
1195 t.system_prompt_tokens = 50;
1196 t.memory_tokens = 5;
1197 t.history_tokens = 40;
1198 t.history_depth = 2;
1199 t.tool_call_count = 6;
1200 t.tool_failure_count = 2;
1201 });
1202 let tips = analyzer.analyze_turn(&turn, Some(0.01));
1203 assert!(
1204 tips.len() >= 3,
1205 "expected multiple tips, got {}",
1206 tips.len()
1207 );
1208 }
1209
1210 fn make_session_turns(count: usize, modifier: impl Fn(usize, &mut TurnData)) -> SessionData {
1211 let turns: Vec<TurnData> = (0..count)
1212 .map(|i| {
1213 make_turn(|t| {
1214 t.turn_id = format!("turn-{i}");
1215 modifier(i, t);
1216 })
1217 })
1218 .collect();
1219 SessionData {
1220 turns,
1221 session_id: "session-1".into(),
1222 grades: vec![],
1223 }
1224 }
1225
1226 #[test]
1227 fn context_drift_fires_when_utilization_increases() {
1228 let session = make_session_turns(8, |i, t| {
1229 t.token_budget = 100;
1230 t.system_prompt_tokens = 10;
1231 t.memory_tokens = 5;
1232 t.history_tokens = if i < 4 { 20 } else { 70 };
1233 });
1234 let tip = ContextDrift.evaluate_session(&session);
1235 assert!(tip.is_some());
1236 }
1237
1238 #[test]
1239 fn cost_acceleration_fires_when_costs_increase() {
1240 let session = make_session_turns(6, |i, t| {
1241 t.cost = if i < 3 { 0.01 } else { 0.10 };
1242 });
1243 let tip = CostAcceleration.evaluate_session(&session);
1244 assert!(tip.is_some());
1245 }
1246
1247 #[test]
1248 fn underutilized_memory_fires_when_all_zero() {
1249 let session = make_session_turns(4, |_, t| {
1250 t.memory_tokens = 0;
1251 });
1252 let tip = UnderutilizedMemory.evaluate_session(&session);
1253 assert!(tip.is_some());
1254 }
1255
1256 #[test]
1257 fn tool_success_rate_fires_below_80_pct() {
1258 let session = make_session_turns(3, |_, t| {
1259 t.tool_call_count = 5;
1260 t.tool_failure_count = 3;
1261 });
1262 let tip = ToolSuccessRate.evaluate_session(&session);
1263 assert!(tip.is_some());
1264 }
1265
1266 #[test]
1267 fn model_churn_fires_above_3_models() {
1268 let models = ["gpt-4", "claude-3", "gemini-1.5", "llama-3"];
1269 let session = make_session_turns(4, |i, t| {
1270 t.model = models[i].into();
1271 });
1272 let tip = ModelChurn.evaluate_session(&session);
1273 assert!(tip.is_some());
1274 }
1275
1276 #[test]
1277 fn full_session_analysis_returns_tips() {
1278 let analyzer = ContextAnalyzer::new();
1279 let session = make_session_turns(6, |i, t| {
1280 t.cost = if i < 3 { 0.01 } else { 0.10 };
1281 t.memory_tokens = 0;
1282 });
1283 let tips = analyzer.analyze_session(&session);
1284 assert!(!tips.is_empty());
1285 }
1286
1287 #[test]
1288 fn llm_analyzer_build_prompt_not_empty() {
1289 let turn = make_turn(|_| {});
1290 let tips = vec![Tip {
1291 severity: Severity::Warning,
1292 category: RuleCategory::Budget,
1293 rule_name: "test".into(),
1294 message: "test message".into(),
1295 suggestion: "test suggestion".into(),
1296 }];
1297 let prompt = LlmAnalyzer::build_analysis_prompt(&turn, &tips);
1298 assert!(prompt.contains("gpt-4"));
1299 assert!(prompt.contains("test message"));
1300 }
1301
1302 #[test]
1303 fn llm_analyzer_session_prompt_not_empty() {
1304 let session = make_session_turns(3, |_, _| {});
1305 let tips = vec![];
1306 let prompt = LlmAnalyzer::build_session_prompt(&session, &tips);
1307 assert!(prompt.contains("session-1"));
1308 assert!(prompt.contains("Total turns: 3"));
1309 }
1310
1311 #[test]
1312 fn tip_serialization_roundtrip() {
1313 let tip = Tip {
1314 severity: Severity::Warning,
1315 category: RuleCategory::Cost,
1316 rule_name: "test_rule".into(),
1317 message: "test message".into(),
1318 suggestion: "test suggestion".into(),
1319 };
1320 let json = serde_json::to_string(&tip).unwrap();
1321 let back: Tip = serde_json::from_str(&json).unwrap();
1322 assert_eq!(back.severity, Severity::Warning);
1323 assert_eq!(back.category, RuleCategory::Cost);
1324 assert_eq!(back.rule_name, "test_rule");
1325 }
1326
1327 #[test]
1328 fn analyzer_default_has_all_rules() {
1329 let analyzer = ContextAnalyzer::default();
1330 assert_eq!(analyzer.turn_rules.len(), 12);
1331 assert_eq!(analyzer.session_rules.len(), 10);
1332 }
1333
1334 fn make_graded_session(
1335 count: usize,
1336 modifier: impl Fn(usize, &mut TurnData),
1337 grades: Vec<(String, i32)>,
1338 ) -> SessionData {
1339 let turns: Vec<TurnData> = (0..count)
1340 .map(|i| {
1341 make_turn(|t| {
1342 t.turn_id = format!("turn-{i}");
1343 modifier(i, t);
1344 })
1345 })
1346 .collect();
1347 SessionData {
1348 turns,
1349 session_id: "session-1".into(),
1350 grades,
1351 }
1352 }
1353
1354 #[test]
1355 fn quality_declining_fires_when_grades_drop() {
1356 let grades: Vec<(String, i32)> = (0..8)
1357 .map(|i| {
1358 let grade = if i < 4 { 5 } else { 3 };
1359 (format!("turn-{i}"), grade)
1360 })
1361 .collect();
1362 let session = make_graded_session(8, |_, _| {}, grades);
1363 let tip = QualityDeclining.evaluate_session(&session);
1364 assert!(tip.is_some());
1365 }
1366
1367 #[test]
1368 fn quality_declining_silent_when_stable() {
1369 let grades: Vec<(String, i32)> = (0..6).map(|i| (format!("turn-{i}"), 4)).collect();
1370 let session = make_graded_session(6, |_, _| {}, grades);
1371 assert!(QualityDeclining.evaluate_session(&session).is_none());
1372 }
1373
1374 #[test]
1375 fn cost_quality_mismatch_fires() {
1376 let grades = vec![
1377 ("turn-0".into(), 3),
1378 ("turn-1".into(), 3),
1379 ("turn-2".into(), 5),
1380 ("turn-3".into(), 5),
1381 ];
1382 let session = make_graded_session(
1383 4,
1384 |i, t| {
1385 if i < 2 {
1386 t.model = "expensive".into();
1387 t.cost = 0.10;
1388 } else {
1389 t.model = "cheap".into();
1390 t.cost = 0.01;
1391 }
1392 },
1393 grades,
1394 );
1395 let tip = CostQualityMismatch.evaluate_session(&session);
1396 assert!(tip.is_some());
1397 }
1398
1399 #[test]
1400 fn memory_helps_fires_when_significant() {
1401 let grades: Vec<(String, i32)> = (0..8)
1402 .map(|i| {
1403 let grade = if i < 4 { 2 } else { 5 };
1404 (format!("turn-{i}"), grade)
1405 })
1406 .collect();
1407 let session = make_graded_session(
1408 8,
1409 |i, t| {
1410 t.memory_tokens = if i < 4 { 0 } else { 500 };
1411 },
1412 grades,
1413 );
1414 let tip = MemoryHelps.evaluate_session(&session);
1415 assert!(tip.is_some());
1416 }
1417
1418 #[test]
1419 fn low_coverage_warning_fires_below_20_pct() {
1420 let grades: Vec<(String, i32)> = (0..5).map(|i| (format!("turn-{i}"), 4)).collect();
1421 let session = make_graded_session(60, |_, _| {}, grades);
1422 let tip = LowCoverageWarning.evaluate_session(&session);
1423 assert!(tip.is_some());
1424 }
1425
1426 #[test]
1427 fn low_coverage_warning_silent_for_small_sessions() {
1428 let session = make_graded_session(10, |_, _| {}, vec![]);
1429 assert!(LowCoverageWarning.evaluate_session(&session).is_none());
1430 }
1431
1432 #[test]
1435 fn budget_pressure_name_and_category() {
1436 assert_eq!(BudgetPressure.name(), "budget_pressure");
1437 assert_eq!(BudgetPressure.category(), RuleCategory::Budget);
1438 }
1439
1440 #[test]
1441 fn system_prompt_heavy_name_and_category() {
1442 assert_eq!(SystemPromptHeavy.name(), "system_prompt_heavy");
1443 assert_eq!(SystemPromptHeavy.category(), RuleCategory::Prompt);
1444 }
1445
1446 #[test]
1447 fn memory_starvation_name_and_category() {
1448 assert_eq!(MemoryStarvation.name(), "memory_starvation");
1449 assert_eq!(MemoryStarvation.category(), RuleCategory::Memory);
1450 }
1451
1452 #[test]
1453 fn shallow_history_name_and_category() {
1454 assert_eq!(ShallowHistory.name(), "shallow_history");
1455 assert_eq!(ShallowHistory.category(), RuleCategory::Quality);
1456 }
1457
1458 #[test]
1459 fn high_tool_density_name_and_category() {
1460 assert_eq!(HighToolDensity.name(), "high_tool_density");
1461 assert_eq!(HighToolDensity.category(), RuleCategory::Tools);
1462 }
1463
1464 #[test]
1465 fn tool_failures_name_and_category() {
1466 assert_eq!(ToolFailures.name(), "tool_failures");
1467 assert_eq!(ToolFailures.category(), RuleCategory::Tools);
1468 }
1469
1470 #[test]
1471 fn expensive_turn_name_and_category() {
1472 assert_eq!(ExpensiveTurn.name(), "expensive_turn");
1473 assert_eq!(ExpensiveTurn.category(), RuleCategory::Cost);
1474 }
1475
1476 #[test]
1477 fn empty_reasoning_name_and_category() {
1478 assert_eq!(EmptyReasoning.name(), "empty_reasoning");
1479 assert_eq!(EmptyReasoning.category(), RuleCategory::Quality);
1480 }
1481
1482 #[test]
1483 fn system_prompt_tax_name_and_category() {
1484 assert_eq!(SystemPromptTax.name(), "system_prompt_tax");
1485 assert_eq!(SystemPromptTax.category(), RuleCategory::Cost);
1486 }
1487
1488 #[test]
1489 fn history_cost_dominant_name_and_category() {
1490 assert_eq!(HistoryCostDominant.name(), "history_cost_dominant");
1491 assert_eq!(HistoryCostDominant.category(), RuleCategory::Cost);
1492 }
1493
1494 #[test]
1495 fn large_output_ratio_name_and_category() {
1496 assert_eq!(LargeOutputRatio.name(), "large_output_ratio");
1497 assert_eq!(LargeOutputRatio.category(), RuleCategory::Cost);
1498 }
1499
1500 #[test]
1501 fn cached_turn_savings_name_and_category() {
1502 assert_eq!(CachedTurnSavings.name(), "cached_turn_savings");
1503 assert_eq!(CachedTurnSavings.category(), RuleCategory::Cost);
1504 }
1505
1506 #[test]
1509 fn context_drift_name_and_category() {
1510 assert_eq!(ContextDrift.name(), "context_drift");
1511 assert_eq!(ContextDrift.category(), RuleCategory::Budget);
1512 }
1513
1514 #[test]
1515 fn frequent_escalation_name_and_category() {
1516 assert_eq!(FrequentEscalation.name(), "frequent_escalation");
1517 assert_eq!(FrequentEscalation.category(), RuleCategory::Quality);
1518 }
1519
1520 #[test]
1521 fn cost_acceleration_name_and_category() {
1522 assert_eq!(CostAcceleration.name(), "cost_acceleration");
1523 assert_eq!(CostAcceleration.category(), RuleCategory::Cost);
1524 }
1525
1526 #[test]
1527 fn underutilized_memory_name_and_category() {
1528 assert_eq!(UnderutilizedMemory.name(), "underutilized_memory");
1529 assert_eq!(UnderutilizedMemory.category(), RuleCategory::Memory);
1530 }
1531
1532 #[test]
1533 fn tool_success_rate_name_and_category() {
1534 assert_eq!(ToolSuccessRate.name(), "tool_success_rate");
1535 assert_eq!(ToolSuccessRate.category(), RuleCategory::Tools);
1536 }
1537
1538 #[test]
1539 fn model_churn_name_and_category() {
1540 assert_eq!(ModelChurn.name(), "model_churn");
1541 assert_eq!(ModelChurn.category(), RuleCategory::Quality);
1542 }
1543
1544 #[test]
1545 fn quality_declining_name_and_category() {
1546 assert_eq!(QualityDeclining.name(), "quality_declining");
1547 assert_eq!(QualityDeclining.category(), RuleCategory::Quality);
1548 }
1549
1550 #[test]
1551 fn cost_quality_mismatch_name_and_category() {
1552 assert_eq!(CostQualityMismatch.name(), "cost_quality_mismatch");
1553 assert_eq!(CostQualityMismatch.category(), RuleCategory::Cost);
1554 }
1555
1556 #[test]
1557 fn memory_helps_name_and_category() {
1558 assert_eq!(MemoryHelps.name(), "memory_helps");
1559 assert_eq!(MemoryHelps.category(), RuleCategory::Memory);
1560 }
1561
1562 #[test]
1563 fn low_coverage_warning_name_and_category() {
1564 assert_eq!(LowCoverageWarning.name(), "low_coverage_warning");
1565 assert_eq!(LowCoverageWarning.category(), RuleCategory::Quality);
1566 }
1567
1568 #[test]
1571 fn system_prompt_tax_fires_when_expensive() {
1572 let turn = make_turn(|t| {
1573 t.tokens_in = 10_000;
1574 t.system_prompt_tokens = 5_000;
1575 t.cost = 0.10;
1576 });
1577 let tip = SystemPromptTax.evaluate_turn(&turn, None);
1578 assert!(tip.is_some());
1579 let tip = tip.unwrap();
1580 assert_eq!(tip.rule_name, "system_prompt_tax");
1581 assert!(tip.message.contains("System prompt"));
1582 }
1583
1584 #[test]
1585 fn system_prompt_tax_warning_for_very_expensive() {
1586 let turn = make_turn(|t| {
1587 t.tokens_in = 10_000;
1588 t.system_prompt_tokens = 5_000;
1589 t.cost = 0.20;
1590 });
1591 let tip = SystemPromptTax.evaluate_turn(&turn, None);
1592 assert!(tip.is_some());
1593 assert_eq!(tip.unwrap().severity, Severity::Warning);
1594 }
1595
1596 #[test]
1597 fn system_prompt_tax_silent_for_zero_input() {
1598 let turn = make_turn(|t| {
1599 t.tokens_in = 0;
1600 });
1601 assert!(SystemPromptTax.evaluate_turn(&turn, None).is_none());
1602 }
1603
1604 #[test]
1605 fn system_prompt_tax_silent_for_low_cost() {
1606 let turn = make_turn(|t| {
1607 t.tokens_in = 10_000;
1608 t.system_prompt_tokens = 100;
1609 t.cost = 0.005;
1610 });
1611 assert!(SystemPromptTax.evaluate_turn(&turn, None).is_none());
1612 }
1613
1614 #[test]
1617 fn history_cost_dominant_fires_above_60_pct() {
1618 let turn = make_turn(|t| {
1619 t.tokens_in = 10_000;
1620 t.history_tokens = 7_000;
1621 });
1622 let tip = HistoryCostDominant.evaluate_turn(&turn, None);
1623 assert!(tip.is_some());
1624 assert_eq!(tip.as_ref().unwrap().rule_name, "history_cost_dominant");
1625 }
1626
1627 #[test]
1628 fn history_cost_dominant_warning_above_80_pct() {
1629 let turn = make_turn(|t| {
1630 t.tokens_in = 10_000;
1631 t.history_tokens = 8_500;
1632 });
1633 let tip = HistoryCostDominant.evaluate_turn(&turn, None);
1634 assert!(tip.is_some());
1635 assert_eq!(tip.unwrap().severity, Severity::Warning);
1636 }
1637
1638 #[test]
1639 fn history_cost_dominant_silent_for_zero_input() {
1640 let turn = make_turn(|t| {
1641 t.tokens_in = 0;
1642 });
1643 assert!(HistoryCostDominant.evaluate_turn(&turn, None).is_none());
1644 }
1645
1646 #[test]
1647 fn history_cost_dominant_silent_for_normal() {
1648 let turn = make_turn(|t| {
1649 t.tokens_in = 10_000;
1650 t.history_tokens = 3_000;
1651 });
1652 assert!(HistoryCostDominant.evaluate_turn(&turn, None).is_none());
1653 }
1654
1655 #[test]
1658 fn large_output_ratio_fires_for_verbose() {
1659 let turn = make_turn(|t| {
1660 t.tokens_in = 1_000;
1661 t.tokens_out = 3_000;
1662 });
1663 let tip = LargeOutputRatio.evaluate_turn(&turn, None);
1664 assert!(tip.is_some());
1665 assert_eq!(tip.unwrap().rule_name, "large_output_ratio");
1666 }
1667
1668 #[test]
1669 fn large_output_ratio_silent_for_small_output() {
1670 let turn = make_turn(|t| {
1671 t.tokens_in = 5_000;
1672 t.tokens_out = 1_000;
1673 });
1674 assert!(LargeOutputRatio.evaluate_turn(&turn, None).is_none());
1675 }
1676
1677 #[test]
1678 fn large_output_ratio_silent_for_zero_in() {
1679 let turn = make_turn(|t| {
1680 t.tokens_in = 0;
1681 t.tokens_out = 5_000;
1682 });
1683 assert!(LargeOutputRatio.evaluate_turn(&turn, None).is_none());
1684 }
1685
1686 #[test]
1687 fn large_output_ratio_silent_for_zero_out() {
1688 let turn = make_turn(|t| {
1689 t.tokens_in = 5_000;
1690 t.tokens_out = 0;
1691 });
1692 assert!(LargeOutputRatio.evaluate_turn(&turn, None).is_none());
1693 }
1694
1695 #[test]
1698 fn frequent_escalation_fires_above_40_pct() {
1699 let session = make_session_turns(10, |i, t| {
1700 t.complexity_level = if i < 5 { "L2".into() } else { "L3".into() };
1701 });
1702 let tip = FrequentEscalation.evaluate_session(&session);
1703 assert!(tip.is_some());
1704 assert_eq!(tip.unwrap().severity, Severity::Warning);
1705 }
1706
1707 #[test]
1708 fn frequent_escalation_silent_below_40_pct() {
1709 let session = make_session_turns(10, |i, t| {
1710 t.complexity_level = if i < 2 { "L2".into() } else { "L0".into() };
1711 });
1712 assert!(FrequentEscalation.evaluate_session(&session).is_none());
1713 }
1714
1715 #[test]
1716 fn frequent_escalation_silent_for_empty_session() {
1717 let session = SessionData {
1718 turns: vec![],
1719 session_id: "s".into(),
1720 grades: vec![],
1721 };
1722 assert!(FrequentEscalation.evaluate_session(&session).is_none());
1723 }
1724
1725 #[test]
1728 fn model_churn_silent_for_3_or_fewer_models() {
1729 let models = ["gpt-4", "claude-3", "gemini"];
1730 let session = make_session_turns(3, |i, t| {
1731 t.model = models[i].into();
1732 });
1733 assert!(ModelChurn.evaluate_session(&session).is_none());
1734 }
1735
1736 #[test]
1737 fn model_churn_ignores_empty_model_names() {
1738 let session = make_session_turns(5, |_, t| {
1739 t.model = String::new();
1740 });
1741 assert!(ModelChurn.evaluate_session(&session).is_none());
1742 }
1743
1744 #[test]
1747 fn expensive_turn_critical_above_5x() {
1748 let turn = make_turn(|t| {
1749 t.cost = 0.60;
1750 });
1751 let tip = ExpensiveTurn.evaluate_turn(&turn, Some(0.10));
1752 assert!(tip.is_some());
1753 assert_eq!(tip.unwrap().severity, Severity::Critical);
1754 }
1755
1756 #[test]
1757 fn expensive_turn_silent_for_zero_avg() {
1758 let turn = make_turn(|t| {
1759 t.cost = 0.10;
1760 });
1761 assert!(ExpensiveTurn.evaluate_turn(&turn, Some(0.0)).is_none());
1762 }
1763
1764 #[test]
1765 fn expensive_turn_silent_below_2x() {
1766 let turn = make_turn(|t| {
1767 t.cost = 0.05;
1768 });
1769 assert!(ExpensiveTurn.evaluate_turn(&turn, Some(0.04)).is_none());
1770 }
1771
1772 #[test]
1775 fn budget_pressure_silent_for_zero_budget() {
1776 let turn = make_turn(|t| {
1777 t.token_budget = 0;
1778 });
1779 assert!(BudgetPressure.evaluate_turn(&turn, None).is_none());
1780 }
1781
1782 #[test]
1785 fn system_prompt_heavy_critical_above_60_pct() {
1786 let turn = make_turn(|t| {
1787 t.token_budget = 100;
1788 t.system_prompt_tokens = 65;
1789 });
1790 let tip = SystemPromptHeavy.evaluate_turn(&turn, None);
1791 assert!(tip.is_some());
1792 assert_eq!(tip.unwrap().severity, Severity::Critical);
1793 }
1794
1795 #[test]
1796 fn system_prompt_heavy_silent_for_zero_budget() {
1797 let turn = make_turn(|t| {
1798 t.token_budget = 0;
1799 });
1800 assert!(SystemPromptHeavy.evaluate_turn(&turn, None).is_none());
1801 }
1802
1803 #[test]
1806 fn memory_starvation_silent_for_zero_budget() {
1807 let turn = make_turn(|t| {
1808 t.token_budget = 0;
1809 });
1810 assert!(MemoryStarvation.evaluate_turn(&turn, None).is_none());
1811 }
1812
1813 #[test]
1814 fn memory_starvation_silent_for_high_memory() {
1815 let turn = make_turn(|t| {
1816 t.token_budget = 1000;
1817 t.memory_tokens = 200;
1818 });
1819 assert!(MemoryStarvation.evaluate_turn(&turn, None).is_none());
1820 }
1821
1822 #[test]
1825 fn shallow_history_silent_for_depth_3_or_more() {
1826 let turn = make_turn(|t| {
1827 t.history_depth = 3;
1828 });
1829 assert!(ShallowHistory.evaluate_turn(&turn, None).is_none());
1830 }
1831
1832 #[test]
1835 fn high_tool_density_warning_above_8() {
1836 let turn = make_turn(|t| {
1837 t.tool_call_count = 10;
1838 });
1839 let tip = HighToolDensity.evaluate_turn(&turn, None);
1840 assert!(tip.is_some());
1841 assert_eq!(tip.unwrap().severity, Severity::Warning);
1842 }
1843
1844 #[test]
1845 fn high_tool_density_silent_for_3_or_fewer() {
1846 let turn = make_turn(|t| {
1847 t.tool_call_count = 3;
1848 });
1849 assert!(HighToolDensity.evaluate_turn(&turn, None).is_none());
1850 }
1851
1852 #[test]
1855 fn tool_failures_warning_below_50_pct() {
1856 let turn = make_turn(|t| {
1857 t.tool_call_count = 4;
1858 t.tool_failure_count = 1;
1859 });
1860 let tip = ToolFailures.evaluate_turn(&turn, None);
1861 assert!(tip.is_some());
1862 assert_eq!(tip.unwrap().severity, Severity::Warning);
1863 }
1864
1865 #[test]
1866 fn tool_failures_silent_for_no_failures() {
1867 let turn = make_turn(|t| {
1868 t.tool_call_count = 5;
1869 t.tool_failure_count = 0;
1870 });
1871 assert!(ToolFailures.evaluate_turn(&turn, None).is_none());
1872 }
1873
1874 #[test]
1875 fn tool_failures_handles_zero_total_calls() {
1876 let turn = make_turn(|t| {
1877 t.tool_call_count = 0;
1878 t.tool_failure_count = 1;
1879 });
1880 let tip = ToolFailures.evaluate_turn(&turn, None);
1881 assert!(tip.is_some());
1882 assert_eq!(tip.unwrap().severity, Severity::Critical);
1883 }
1884
1885 #[test]
1888 fn empty_reasoning_silent_when_no_reasoning() {
1889 let turn = make_turn(|t| {
1890 t.has_reasoning = false;
1891 t.thinking_length = 0;
1892 });
1893 assert!(EmptyReasoning.evaluate_turn(&turn, None).is_none());
1894 }
1895
1896 #[test]
1897 fn empty_reasoning_silent_when_thinking_present() {
1898 let turn = make_turn(|t| {
1899 t.has_reasoning = true;
1900 t.thinking_length = 500;
1901 });
1902 assert!(EmptyReasoning.evaluate_turn(&turn, None).is_none());
1903 }
1904
1905 #[test]
1908 fn cached_turn_savings_silent_when_not_cached() {
1909 let turn = make_turn(|t| {
1910 t.cached = false;
1911 });
1912 assert!(CachedTurnSavings.evaluate_turn(&turn, None).is_none());
1913 }
1914
1915 #[test]
1918 fn context_drift_silent_for_short_sessions() {
1919 let session = make_session_turns(3, |_, _| {});
1920 assert!(ContextDrift.evaluate_session(&session).is_none());
1921 }
1922
1923 #[test]
1924 fn context_drift_silent_with_zero_budgets() {
1925 let session = make_session_turns(6, |_, t| {
1926 t.token_budget = 0;
1927 });
1928 assert!(ContextDrift.evaluate_session(&session).is_none());
1929 }
1930
1931 #[test]
1934 fn cost_acceleration_silent_for_short_sessions() {
1935 let session = make_session_turns(3, |_, _| {});
1936 assert!(CostAcceleration.evaluate_session(&session).is_none());
1937 }
1938
1939 #[test]
1940 fn cost_acceleration_silent_when_stable() {
1941 let session = make_session_turns(6, |_, t| {
1942 t.cost = 0.05;
1943 });
1944 assert!(CostAcceleration.evaluate_session(&session).is_none());
1945 }
1946
1947 #[test]
1950 fn tool_success_rate_critical_below_50_pct() {
1951 let session = make_session_turns(3, |_, t| {
1952 t.tool_call_count = 5;
1953 t.tool_failure_count = 4;
1954 });
1955 let tip = ToolSuccessRate.evaluate_session(&session);
1956 assert!(tip.is_some());
1957 assert_eq!(tip.unwrap().severity, Severity::Critical);
1958 }
1959
1960 #[test]
1961 fn tool_success_rate_silent_for_few_calls() {
1962 let session = make_session_turns(2, |_, t| {
1963 t.tool_call_count = 1;
1964 t.tool_failure_count = 1;
1965 });
1966 assert!(ToolSuccessRate.evaluate_session(&session).is_none());
1967 }
1968
1969 #[test]
1972 fn underutilized_memory_silent_for_empty_session() {
1973 let session = SessionData {
1974 turns: vec![],
1975 session_id: "s".into(),
1976 grades: vec![],
1977 };
1978 assert!(UnderutilizedMemory.evaluate_session(&session).is_none());
1979 }
1980
1981 #[test]
1982 fn underutilized_memory_silent_when_some_memory() {
1983 let session = make_session_turns(4, |i, t| {
1984 t.memory_tokens = if i == 0 { 100 } else { 0 };
1985 });
1986 assert!(UnderutilizedMemory.evaluate_session(&session).is_none());
1987 }
1988
1989 #[test]
1992 fn quality_declining_silent_for_few_grades() {
1993 let grades = vec![("turn-0".into(), 5), ("turn-1".into(), 1)];
1994 let session = make_graded_session(4, |_, _| {}, grades);
1995 assert!(QualityDeclining.evaluate_session(&session).is_none());
1996 }
1997
1998 #[test]
2001 fn cost_quality_mismatch_silent_for_empty_grades() {
2002 let session = make_graded_session(4, |_, _| {}, vec![]);
2003 assert!(CostQualityMismatch.evaluate_session(&session).is_none());
2004 }
2005
2006 #[test]
2007 fn cost_quality_mismatch_silent_for_single_turn() {
2008 let grades = vec![("turn-0".into(), 5)];
2009 let session = make_graded_session(1, |_, _| {}, grades);
2010 assert!(CostQualityMismatch.evaluate_session(&session).is_none());
2011 }
2012
2013 #[test]
2014 fn cost_quality_mismatch_silent_for_single_model() {
2015 let grades = vec![("turn-0".into(), 5), ("turn-1".into(), 4)];
2016 let session = make_graded_session(2, |_, _| {}, grades);
2017 assert!(CostQualityMismatch.evaluate_session(&session).is_none());
2018 }
2019
2020 #[test]
2023 fn memory_helps_silent_for_empty_grades() {
2024 let session = make_graded_session(4, |_, _| {}, vec![]);
2025 assert!(MemoryHelps.evaluate_session(&session).is_none());
2026 }
2027
2028 #[test]
2029 fn memory_helps_silent_when_insufficient_samples() {
2030 let grades = vec![("turn-0".into(), 5)];
2031 let session = make_graded_session(
2032 2,
2033 |i, t| {
2034 t.memory_tokens = if i == 0 { 100 } else { 0 };
2035 },
2036 grades,
2037 );
2038 assert!(MemoryHelps.evaluate_session(&session).is_none());
2039 }
2040
2041 #[test]
2044 fn llm_analyzer_build_prompt_zero_budget() {
2045 let turn = make_turn(|t| {
2046 t.token_budget = 0;
2047 });
2048 let prompt = LlmAnalyzer::build_analysis_prompt(&turn, &[]);
2049 assert!(prompt.contains("Budget utilization: 0%"));
2050 }
2051
2052 #[test]
2053 fn llm_analyzer_build_prompt_with_no_tips() {
2054 let turn = make_turn(|_| {});
2055 let prompt = LlmAnalyzer::build_analysis_prompt(&turn, &[]);
2056 assert!(prompt.contains("(none)"));
2057 }
2058
2059 #[test]
2060 fn llm_analyzer_session_prompt_with_tips() {
2061 let session = make_session_turns(3, |_, _| {});
2062 let tips = vec![Tip {
2063 severity: Severity::Warning,
2064 category: RuleCategory::Cost,
2065 rule_name: "test_rule".into(),
2066 message: "test msg".into(),
2067 suggestion: "test sugg".into(),
2068 }];
2069 let prompt = LlmAnalyzer::build_session_prompt(&session, &tips);
2070 assert!(prompt.contains("test_rule"));
2071 assert!(prompt.contains("test msg"));
2072 }
2073
2074 #[test]
2075 fn llm_analyzer_session_prompt_no_tips() {
2076 let session = make_session_turns(3, |_, _| {});
2077 let prompt = LlmAnalyzer::build_session_prompt(&session, &[]);
2078 assert!(prompt.contains("(none)"));
2079 }
2080
2081 #[test]
2084 fn low_coverage_warning_silent_when_good_coverage() {
2085 let grades: Vec<(String, i32)> = (0..50).map(|i| (format!("turn-{i}"), 4)).collect();
2086 let session = make_graded_session(60, |_, _| {}, grades);
2087 assert!(LowCoverageWarning.evaluate_session(&session).is_none());
2088 }
2089}