1use std::collections::VecDeque;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10#[non_exhaustive]
11pub enum AnomalySeverity {
12 Warning,
13 Critical,
14}
15
16#[derive(Debug, Clone)]
18pub struct Anomaly {
19 pub severity: AnomalySeverity,
20 pub description: String,
21}
22
23#[derive(Debug)]
25pub struct AnomalyDetector {
26 window: VecDeque<Outcome>,
27 window_size: usize,
28 error_threshold: f64,
29 critical_threshold: f64,
30}
31
32#[derive(Debug, Clone, Copy)]
33enum Outcome {
34 Success,
35 Error,
36 Blocked,
37}
38
39impl AnomalyDetector {
40 #[must_use]
41 pub fn new(window_size: usize, error_threshold: f64, critical_threshold: f64) -> Self {
42 Self {
43 window: VecDeque::with_capacity(window_size),
44 window_size,
45 error_threshold,
46 critical_threshold,
47 }
48 }
49
50 pub fn record_success(&mut self) {
52 self.push(Outcome::Success);
53 }
54
55 pub fn record_error(&mut self) {
57 self.push(Outcome::Error);
58 }
59
60 pub fn record_blocked(&mut self) {
62 self.push(Outcome::Blocked);
63 }
64
65 pub fn record_reasoning_quality_failure(&mut self, model_name: &str, tool_name: &str) {
72 self.push(Outcome::Error);
73 tracing::warn!(
74 model = model_name,
75 tool = tool_name,
76 category = "reasoning_amplification",
77 "quality failure from reasoning model — CoT may amplify tool hallucination (arXiv:2510.22977)"
78 );
79 }
80
81 fn push(&mut self, outcome: Outcome) {
82 if self.window.len() >= self.window_size {
83 self.window.pop_front();
84 }
85 self.window.push_back(outcome);
86 }
87
88 #[must_use]
90 #[allow(clippy::cast_precision_loss)]
91 pub fn check(&self) -> Option<Anomaly> {
92 if self.window.len() < 3 {
93 return None;
94 }
95
96 let total = self.window.len();
97 let errors = self
98 .window
99 .iter()
100 .filter(|o| matches!(o, Outcome::Error | Outcome::Blocked))
101 .count();
102
103 let ratio = errors as f64 / total as f64;
104
105 if ratio >= self.critical_threshold {
106 Some(Anomaly {
107 severity: AnomalySeverity::Critical,
108 description: format!(
109 "error rate {:.0}% ({errors}/{total}) exceeds critical threshold",
110 ratio * 100.0,
111 ),
112 })
113 } else if ratio >= self.error_threshold {
114 Some(Anomaly {
115 severity: AnomalySeverity::Warning,
116 description: format!(
117 "error rate {:.0}% ({errors}/{total}) exceeds warning threshold",
118 ratio * 100.0,
119 ),
120 })
121 } else {
122 None
123 }
124 }
125
126 pub fn reset(&mut self) {
128 self.window.clear();
129 }
130}
131
132impl Default for AnomalyDetector {
133 fn default() -> Self {
134 Self::new(10, 0.5, 0.8)
135 }
136}
137
138#[must_use]
144pub fn is_reasoning_model(model_name: &str) -> bool {
145 let lower = model_name.to_ascii_lowercase();
146 let openai_o = lower.starts_with("o1") || lower.starts_with("o3") || lower.starts_with("o4");
148 let qwq = lower.contains("qwq");
150 let deepseek_r1 = lower.contains("deepseek-r1") || lower.contains("deepseek_r1");
152 let claude_think = lower.starts_with("claude") && lower.contains("think");
154 openai_o || qwq || deepseek_r1 || claude_think
155}
156
157#[cfg(test)]
158mod tests {
159 use super::*;
160
161 #[test]
162 fn no_anomaly_on_success() {
163 let mut det = AnomalyDetector::default();
164 for _ in 0..10 {
165 det.record_success();
166 }
167 assert!(det.check().is_none());
168 }
169
170 #[test]
171 fn warning_on_half_errors() {
172 let mut det = AnomalyDetector::new(10, 0.5, 0.8);
173 for _ in 0..5 {
174 det.record_success();
175 }
176 for _ in 0..5 {
177 det.record_error();
178 }
179 let anomaly = det.check().unwrap();
180 assert_eq!(anomaly.severity, AnomalySeverity::Warning);
181 }
182
183 #[test]
184 fn critical_on_high_errors() {
185 let mut det = AnomalyDetector::new(10, 0.5, 0.8);
186 for _ in 0..2 {
187 det.record_success();
188 }
189 for _ in 0..8 {
190 det.record_error();
191 }
192 let anomaly = det.check().unwrap();
193 assert_eq!(anomaly.severity, AnomalySeverity::Critical);
194 }
195
196 #[test]
197 fn blocked_counts_as_error() {
198 let mut det = AnomalyDetector::new(10, 0.5, 0.8);
199 for _ in 0..2 {
200 det.record_success();
201 }
202 for _ in 0..8 {
203 det.record_blocked();
204 }
205 let anomaly = det.check().unwrap();
206 assert_eq!(anomaly.severity, AnomalySeverity::Critical);
207 }
208
209 #[test]
210 fn window_slides() {
211 let mut det = AnomalyDetector::new(5, 0.5, 0.8);
212 for _ in 0..5 {
213 det.record_error();
214 }
215 assert!(det.check().is_some());
216
217 for _ in 0..5 {
219 det.record_success();
220 }
221 assert!(det.check().is_none());
222 }
223
224 #[test]
225 fn too_few_samples_returns_none() {
226 let mut det = AnomalyDetector::default();
227 det.record_error();
228 det.record_error();
229 assert!(det.check().is_none());
230 }
231
232 #[test]
233 fn reset_clears_window() {
234 let mut det = AnomalyDetector::new(5, 0.5, 0.8);
235 for _ in 0..5 {
236 det.record_error();
237 }
238 assert!(det.check().is_some());
239 det.reset();
240 assert!(det.check().is_none());
241 }
242
243 #[test]
244 fn default_thresholds() {
245 let det = AnomalyDetector::default();
246 assert_eq!(det.window_size, 10);
247 assert!((det.error_threshold - 0.5).abs() < f64::EPSILON);
248 assert!((det.critical_threshold - 0.8).abs() < f64::EPSILON);
249 }
250
251 #[test]
252 fn is_reasoning_model_openai_o_series() {
253 assert!(is_reasoning_model("o1"));
254 assert!(is_reasoning_model("o1-mini"));
255 assert!(is_reasoning_model("o1-preview"));
256 assert!(is_reasoning_model("o3"));
257 assert!(is_reasoning_model("o3-mini"));
258 assert!(is_reasoning_model("o4-mini"));
259 assert!(!is_reasoning_model("gpt-4o"));
260 assert!(!is_reasoning_model("gpt-4o-mini"));
261 }
262
263 #[test]
264 fn is_reasoning_model_other_families() {
265 assert!(is_reasoning_model("QwQ-32B"));
266 assert!(is_reasoning_model("deepseek-r1"));
267 assert!(is_reasoning_model("deepseek-r1-distill-qwen-14b"));
268 assert!(is_reasoning_model("claude-3-opus-think"));
269 assert!(!is_reasoning_model("claude-3-opus"));
270 assert!(!is_reasoning_model("qwen2.5:14b"));
271 }
272
273 #[test]
274 fn record_reasoning_quality_failure_increments_error_count() {
275 let mut det = AnomalyDetector::new(10, 0.5, 0.8);
276 for _ in 0..6 {
278 det.record_reasoning_quality_failure("o1", "shell");
279 }
280 let anomaly = det.check().unwrap();
282 assert_eq!(anomaly.severity, AnomalySeverity::Critical);
283 }
284}