1use super::diagnostics::{
13 Anomaly, AnomalyCategory, ComponentMetrics, ComponentNode, StackDiagnostics,
14};
15use serde::{Deserialize, Serialize};
16
17const FEAT_DEMO_SCORE: usize = 0;
23const FEAT_COVERAGE: usize = 1;
24const FEAT_COMPLEXITY: usize = 3;
25const FEAT_DEAD_CODE: usize = 5;
26
27fn extract_features(metrics: &ComponentMetrics) -> Vec<f64> {
31 vec![
32 metrics.demo_score,
33 metrics.coverage,
34 metrics.mutation_score,
35 metrics.complexity_avg,
36 metrics.satd_count as f64,
37 metrics.dead_code_pct,
38 ]
39}
40
41struct CategoryRule {
47 feature_index: usize,
48 threshold: f64,
49 below: bool,
51 category: AnomalyCategory,
52 description_template: &'static str,
53 recommendation: &'static str,
54}
55
56const CATEGORY_RULES: &[CategoryRule] = &[
58 CategoryRule {
59 feature_index: FEAT_DEMO_SCORE,
60 threshold: 70.0,
61 below: true,
62 category: AnomalyCategory::QualityRegression,
63 description_template: "Quality score {val:.1} is significantly below healthy threshold",
64 recommendation: "Review recent changes for quality regressions",
65 },
66 CategoryRule {
67 feature_index: FEAT_COVERAGE,
68 threshold: 50.0,
69 below: true,
70 category: AnomalyCategory::CoverageDrop,
71 description_template: "Test coverage {val:.1}% is dangerously low",
72 recommendation: "Run `cargo tarpaulin` and add tests for uncovered paths",
73 },
74 CategoryRule {
75 feature_index: FEAT_COMPLEXITY,
76 threshold: 15.0,
77 below: false,
78 category: AnomalyCategory::ComplexityIncrease,
79 description_template: "Average complexity {val:.1} indicates maintainability risk",
80 recommendation: "Consider refactoring complex functions (>10 cyclomatic complexity)",
81 },
82 CategoryRule {
83 feature_index: FEAT_DEAD_CODE,
84 threshold: 10.0,
85 below: false,
86 category: AnomalyCategory::DependencyRisk,
87 description_template: "Dead code {val:.1}% suggests technical debt accumulation",
88 recommendation: "Run `cargo udeps` to identify and remove dead code",
89 },
90];
91
92fn rule_matches(rule: &CategoryRule, features: &[f64]) -> bool {
94 let val = features[rule.feature_index];
95 if rule.below {
96 val < rule.threshold
97 } else {
98 val > rule.threshold
99 }
100}
101
102fn find_matching_rule(features: &[f64]) -> Option<&'static CategoryRule> {
104 CATEGORY_RULES.iter().find(|r| rule_matches(r, features))
105}
106
107fn render_description(template: &str, features: &[f64], feature_index: usize) -> String {
109 let val = features[feature_index];
110 template.replace("{val:.1}", &format!("{val:.1}"))
111}
112
113#[derive(Debug, Clone)]
119struct SimpleRng {
120 state: u64,
121}
122
123impl SimpleRng {
124 fn seed_from_u64(seed: u64) -> Self {
125 Self { state: seed ^ 0x5DEECE66D }
126 }
127
128 fn next_u64(&mut self) -> u64 {
129 self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
131 self.state
132 }
133
134 fn gen_range(&mut self, range: std::ops::Range<usize>) -> usize {
135 if range.is_empty() {
136 return range.start;
137 }
138 let len = range.end - range.start;
139 range.start + (self.next_u64() as usize % len)
140 }
141
142 fn gen_range_f64(&mut self, range: std::ops::Range<f64>) -> f64 {
143 let t = (self.next_u64() as f64) / (u64::MAX as f64);
144 range.start + t * (range.end - range.start)
145 }
146}
147
148#[derive(Debug)]
155pub struct IsolationForest {
156 n_trees: usize,
158 sample_size: usize,
160 seed: u64,
162 trees: Vec<IsolationTree>,
164 feature_names: Vec<String>,
166}
167
168impl IsolationForest {
169 pub fn new(n_trees: usize, sample_size: usize, seed: u64) -> Self {
171 Self { n_trees, sample_size, seed, trees: Vec::new(), feature_names: Vec::new() }
172 }
173
174 pub fn default_forest() -> Self {
176 Self::new(100, 256, 42)
177 }
178
179 pub fn with_feature_names(mut self, names: Vec<String>) -> Self {
181 self.feature_names = names;
182 self
183 }
184
185 pub fn fit(&mut self, data: &[Vec<f64>]) {
188 if data.is_empty() {
189 return;
190 }
191
192 let mut rng = SimpleRng::seed_from_u64(self.seed);
193 let n_samples = data.len();
194 let max_depth = (self.sample_size as f64).log2().ceil() as usize;
195
196 self.trees.clear();
197
198 for _ in 0..self.n_trees {
199 let sample: Vec<Vec<f64>> = (0..self.sample_size.min(n_samples))
201 .map(|_| {
202 let idx = rng.gen_range(0..n_samples);
203 data[idx].clone()
204 })
205 .collect();
206
207 let tree = IsolationTree::build(&sample, max_depth, &mut rng);
209 self.trees.push(tree);
210 }
211 }
212
213 pub fn score(&self, data: &[Vec<f64>]) -> Vec<f64> {
216 if self.trees.is_empty() || data.is_empty() {
217 return vec![0.0; data.len()];
218 }
219
220 let n = self.sample_size as f64;
221 let c_n = average_path_length(n);
222
223 data.iter()
224 .map(|point| {
225 let avg_path_length: f64 =
226 self.trees.iter().map(|tree| tree.path_length(point, 0) as f64).sum::<f64>()
227 / self.trees.len() as f64;
228
229 2.0_f64.powf(-avg_path_length / c_n)
232 })
233 .collect()
234 }
235
236 pub fn predict(&self, data: &[Vec<f64>], threshold: f64) -> Vec<bool> {
238 self.score(data).into_iter().map(|s| s > threshold).collect()
239 }
240
241 pub fn detect_anomalies(&self, diagnostics: &StackDiagnostics, threshold: f64) -> Vec<Anomaly> {
243 let components: Vec<_> = diagnostics.components().collect();
244 if components.is_empty() {
245 return Vec::new();
246 }
247
248 let data: Vec<Vec<f64>> = components.iter().map(|c| extract_features(&c.metrics)).collect();
250
251 let scores = self.score(&data);
252 let mut anomalies = Vec::new();
253
254 for (i, (component, score)) in components.iter().zip(scores.iter()).enumerate() {
255 if *score > threshold {
256 let category = self.categorize_anomaly(&data[i]);
257 let description = self.describe_anomaly(&data[i], &category);
258
259 let mut anomaly =
260 Anomaly::new(component.name.clone(), *score, category, description);
261
262 anomaly = anomaly
264 .with_evidence(format!("Isolation score: {:.3}", score))
265 .with_evidence(format!("Demo score: {:.1}", component.metrics.demo_score))
266 .with_evidence(format!("Coverage: {:.1}%", component.metrics.coverage));
267
268 let rec = self.recommend_action(&category, &data[i]);
270 anomaly = anomaly.with_recommendation(rec);
271
272 anomalies.push(anomaly);
273 }
274 }
275
276 anomalies
278 .sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
279 anomalies
280 }
281
282 fn categorize_anomaly(&self, features: &[f64]) -> AnomalyCategory {
284 if features.len() < 6 {
285 return AnomalyCategory::Other;
286 }
287 find_matching_rule(features).map(|r| r.category).unwrap_or(AnomalyCategory::Other)
288 }
289
290 fn describe_anomaly(&self, features: &[f64], category: &AnomalyCategory) -> String {
292 if features.len() < 6 {
293 return "Unusual metric combination detected".to_string();
294 }
295 find_matching_rule(features)
296 .filter(|r| r.category == *category)
297 .map(|r| render_description(r.description_template, features, r.feature_index))
298 .unwrap_or_else(|| "Unusual metric combination detected".to_string())
299 }
300
301 fn recommend_action(&self, category: &AnomalyCategory, features: &[f64]) -> String {
303 if *category == AnomalyCategory::QualityRegression
305 && features.len() >= 6
306 && features[FEAT_COVERAGE] < 80.0
307 {
308 return "Add tests to improve coverage above 80%".to_string();
309 }
310
311 find_matching_rule(features)
312 .filter(|r| r.category == *category)
313 .map(|r| r.recommendation.to_string())
314 .unwrap_or_else(|| "Review component metrics for unusual patterns".to_string())
315 }
316}
317
318#[derive(Debug)]
320enum IsolationTree {
321 Internal {
323 split_feature: usize,
324 split_value: f64,
325 left: Box<IsolationTree>,
326 right: Box<IsolationTree>,
327 },
328 External { size: usize },
330}
331
332impl IsolationTree {
333 fn build(data: &[Vec<f64>], max_depth: usize, rng: &mut SimpleRng) -> Self {
335 if data.is_empty() {
336 return IsolationTree::External { size: 0 };
337 }
338
339 if max_depth == 0 || data.len() <= 1 {
340 return IsolationTree::External { size: data.len() };
341 }
342
343 let n_features = data[0].len();
344 if n_features == 0 {
345 return IsolationTree::External { size: data.len() };
346 }
347
348 let feature = rng.gen_range(0..n_features);
350
351 let values: Vec<f64> = data.iter().filter_map(|row| row.get(feature).copied()).collect();
353 if values.is_empty() {
354 return IsolationTree::External { size: data.len() };
355 }
356
357 let min_val = values.iter().copied().fold(f64::INFINITY, f64::min);
358 let max_val = values.iter().copied().fold(f64::NEG_INFINITY, f64::max);
359
360 if (max_val - min_val).abs() < f64::EPSILON {
361 return IsolationTree::External { size: data.len() };
362 }
363
364 let split_value = rng.gen_range_f64(min_val..max_val);
366
367 let (left_data, right_data): (Vec<_>, Vec<_>) = data
369 .iter()
370 .cloned()
371 .partition(|row| row.get(feature).is_some_and(|&v| v < split_value));
372
373 if left_data.is_empty() || right_data.is_empty() {
375 return IsolationTree::External { size: data.len() };
376 }
377
378 IsolationTree::Internal {
379 split_feature: feature,
380 split_value,
381 left: Box::new(IsolationTree::build(&left_data, max_depth - 1, rng)),
382 right: Box::new(IsolationTree::build(&right_data, max_depth - 1, rng)),
383 }
384 }
385
386 fn path_length(&self, point: &[f64], current_depth: usize) -> usize {
388 match self {
389 IsolationTree::External { size } => {
390 current_depth + average_path_length(*size as f64) as usize
391 }
392 IsolationTree::Internal { split_feature, split_value, left, right } => {
393 let value = point.get(*split_feature).copied().unwrap_or(0.0);
394 if value < *split_value {
395 left.path_length(point, current_depth + 1)
396 } else {
397 right.path_length(point, current_depth + 1)
398 }
399 }
400 }
401 }
402}
403
404fn average_path_length(n: f64) -> f64 {
406 if n <= 1.0 {
407 return 0.0;
408 }
409 2.0 * (n.ln() + 0.5772156649) - (2.0 * (n - 1.0) / n)
410}
411
412#[derive(Debug, Clone)]
418pub struct ErrorForecaster {
419 alpha: f64,
421 history: Vec<f64>,
423 level: f64,
425}
426
427impl ErrorForecaster {
428 pub fn new(alpha: f64) -> Self {
430 Self { alpha: alpha.clamp(0.0, 1.0), history: Vec::new(), level: 0.0 }
431 }
432
433 pub fn default_forecaster() -> Self {
435 Self::new(0.3)
436 }
437
438 pub fn observe(&mut self, value: f64) {
440 if self.history.is_empty() {
441 self.level = value;
442 } else {
443 self.level = self.alpha * value + (1.0 - self.alpha) * self.level;
445 }
446 self.history.push(value);
447 }
448
449 pub fn forecast(&self, n: usize) -> Vec<f64> {
451 vec![self.level; n]
453 }
454
455 pub fn error_metrics(&self) -> ForecastMetrics {
457 if self.history.len() < 2 {
458 return ForecastMetrics::default();
459 }
460
461 let mut errors = Vec::new();
463 let mut level = self.history[0];
464
465 for &actual in self.history.iter().skip(1) {
466 let forecast = level;
467 errors.push(actual - forecast);
468 level = self.alpha * actual + (1.0 - self.alpha) * level;
469 }
470
471 let n = errors.len() as f64;
472 let mae = errors.iter().map(|e| e.abs()).sum::<f64>() / n;
473 let mse = errors.iter().map(|e| e * e).sum::<f64>() / n;
474 let rmse = mse.sqrt();
475
476 let mape = if self.history.iter().skip(1).all(|&v| v.abs() > f64::EPSILON) {
478 let sum: f64 =
479 errors.iter().zip(self.history.iter().skip(1)).map(|(e, a)| (e / a).abs()).sum();
480 sum / n * 100.0
481 } else {
482 f64::NAN
483 };
484
485 ForecastMetrics { mae, mse, rmse, mape }
486 }
487
488 pub fn history(&self) -> &[f64] {
490 &self.history
491 }
492
493 pub fn current_level(&self) -> f64 {
495 self.level
496 }
497}
498
499#[derive(Debug, Clone, Default, Serialize, Deserialize)]
501pub struct ForecastMetrics {
502 pub mae: f64,
504 pub mse: f64,
506 pub rmse: f64,
508 pub mape: f64,
510}
511
512#[cfg(test)]
517#[path = "diagnostics_ml_tests.rs"]
518mod tests;