1use std::collections::VecDeque;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum AnomalySeverity {
11 Warning,
12 Critical,
13}
14
15#[derive(Debug, Clone)]
17pub struct Anomaly {
18 pub severity: AnomalySeverity,
19 pub description: String,
20}
21
22#[derive(Debug)]
24pub struct AnomalyDetector {
25 window: VecDeque<Outcome>,
26 window_size: usize,
27 error_threshold: f64,
28 critical_threshold: f64,
29}
30
31#[derive(Debug, Clone, Copy)]
32enum Outcome {
33 Success,
34 Error,
35 Blocked,
36}
37
38impl AnomalyDetector {
39 #[must_use]
40 pub fn new(window_size: usize, error_threshold: f64, critical_threshold: f64) -> Self {
41 Self {
42 window: VecDeque::with_capacity(window_size),
43 window_size,
44 error_threshold,
45 critical_threshold,
46 }
47 }
48
49 pub fn record_success(&mut self) {
51 self.push(Outcome::Success);
52 }
53
54 pub fn record_error(&mut self) {
56 self.push(Outcome::Error);
57 }
58
59 pub fn record_blocked(&mut self) {
61 self.push(Outcome::Blocked);
62 }
63
64 pub fn record_reasoning_quality_failure(&mut self, model_name: &str, tool_name: &str) {
71 self.push(Outcome::Error);
72 tracing::warn!(
73 model = model_name,
74 tool = tool_name,
75 category = "reasoning_amplification",
76 "quality failure from reasoning model — CoT may amplify tool hallucination (arXiv:2510.22977)"
77 );
78 }
79
80 fn push(&mut self, outcome: Outcome) {
81 if self.window.len() >= self.window_size {
82 self.window.pop_front();
83 }
84 self.window.push_back(outcome);
85 }
86
87 #[must_use]
89 #[allow(clippy::cast_precision_loss)]
90 pub fn check(&self) -> Option<Anomaly> {
91 if self.window.len() < 3 {
92 return None;
93 }
94
95 let total = self.window.len();
96 let errors = self
97 .window
98 .iter()
99 .filter(|o| matches!(o, Outcome::Error | Outcome::Blocked))
100 .count();
101
102 let ratio = errors as f64 / total as f64;
103
104 if ratio >= self.critical_threshold {
105 Some(Anomaly {
106 severity: AnomalySeverity::Critical,
107 description: format!(
108 "error rate {:.0}% ({errors}/{total}) exceeds critical threshold",
109 ratio * 100.0,
110 ),
111 })
112 } else if ratio >= self.error_threshold {
113 Some(Anomaly {
114 severity: AnomalySeverity::Warning,
115 description: format!(
116 "error rate {:.0}% ({errors}/{total}) exceeds warning threshold",
117 ratio * 100.0,
118 ),
119 })
120 } else {
121 None
122 }
123 }
124
125 pub fn reset(&mut self) {
127 self.window.clear();
128 }
129}
130
131impl Default for AnomalyDetector {
132 fn default() -> Self {
133 Self::new(10, 0.5, 0.8)
134 }
135}
136
137#[must_use]
143pub fn is_reasoning_model(model_name: &str) -> bool {
144 let lower = model_name.to_ascii_lowercase();
145 let openai_o = lower.starts_with("o1") || lower.starts_with("o3") || lower.starts_with("o4");
147 let qwq = lower.contains("qwq");
149 let deepseek_r1 = lower.contains("deepseek-r1") || lower.contains("deepseek_r1");
151 let claude_think = lower.starts_with("claude") && lower.contains("think");
153 openai_o || qwq || deepseek_r1 || claude_think
154}
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159
160 #[test]
161 fn no_anomaly_on_success() {
162 let mut det = AnomalyDetector::default();
163 for _ in 0..10 {
164 det.record_success();
165 }
166 assert!(det.check().is_none());
167 }
168
169 #[test]
170 fn warning_on_half_errors() {
171 let mut det = AnomalyDetector::new(10, 0.5, 0.8);
172 for _ in 0..5 {
173 det.record_success();
174 }
175 for _ in 0..5 {
176 det.record_error();
177 }
178 let anomaly = det.check().unwrap();
179 assert_eq!(anomaly.severity, AnomalySeverity::Warning);
180 }
181
182 #[test]
183 fn critical_on_high_errors() {
184 let mut det = AnomalyDetector::new(10, 0.5, 0.8);
185 for _ in 0..2 {
186 det.record_success();
187 }
188 for _ in 0..8 {
189 det.record_error();
190 }
191 let anomaly = det.check().unwrap();
192 assert_eq!(anomaly.severity, AnomalySeverity::Critical);
193 }
194
195 #[test]
196 fn blocked_counts_as_error() {
197 let mut det = AnomalyDetector::new(10, 0.5, 0.8);
198 for _ in 0..2 {
199 det.record_success();
200 }
201 for _ in 0..8 {
202 det.record_blocked();
203 }
204 let anomaly = det.check().unwrap();
205 assert_eq!(anomaly.severity, AnomalySeverity::Critical);
206 }
207
208 #[test]
209 fn window_slides() {
210 let mut det = AnomalyDetector::new(5, 0.5, 0.8);
211 for _ in 0..5 {
212 det.record_error();
213 }
214 assert!(det.check().is_some());
215
216 for _ in 0..5 {
218 det.record_success();
219 }
220 assert!(det.check().is_none());
221 }
222
223 #[test]
224 fn too_few_samples_returns_none() {
225 let mut det = AnomalyDetector::default();
226 det.record_error();
227 det.record_error();
228 assert!(det.check().is_none());
229 }
230
231 #[test]
232 fn reset_clears_window() {
233 let mut det = AnomalyDetector::new(5, 0.5, 0.8);
234 for _ in 0..5 {
235 det.record_error();
236 }
237 assert!(det.check().is_some());
238 det.reset();
239 assert!(det.check().is_none());
240 }
241
242 #[test]
243 fn default_thresholds() {
244 let det = AnomalyDetector::default();
245 assert_eq!(det.window_size, 10);
246 assert!((det.error_threshold - 0.5).abs() < f64::EPSILON);
247 assert!((det.critical_threshold - 0.8).abs() < f64::EPSILON);
248 }
249
250 #[test]
251 fn is_reasoning_model_openai_o_series() {
252 assert!(is_reasoning_model("o1"));
253 assert!(is_reasoning_model("o1-mini"));
254 assert!(is_reasoning_model("o1-preview"));
255 assert!(is_reasoning_model("o3"));
256 assert!(is_reasoning_model("o3-mini"));
257 assert!(is_reasoning_model("o4-mini"));
258 assert!(!is_reasoning_model("gpt-4o"));
259 assert!(!is_reasoning_model("gpt-4o-mini"));
260 }
261
262 #[test]
263 fn is_reasoning_model_other_families() {
264 assert!(is_reasoning_model("QwQ-32B"));
265 assert!(is_reasoning_model("deepseek-r1"));
266 assert!(is_reasoning_model("deepseek-r1-distill-qwen-14b"));
267 assert!(is_reasoning_model("claude-3-opus-think"));
268 assert!(!is_reasoning_model("claude-3-opus"));
269 assert!(!is_reasoning_model("qwen2.5:14b"));
270 }
271
272 #[test]
273 fn record_reasoning_quality_failure_increments_error_count() {
274 let mut det = AnomalyDetector::new(10, 0.5, 0.8);
275 for _ in 0..6 {
277 det.record_reasoning_quality_failure("o1", "shell");
278 }
279 let anomaly = det.check().unwrap();
281 assert_eq!(anomaly.severity, AnomalySeverity::Critical);
282 }
283}