1use crate::error::Result;
2use std::collections::HashMap;
3
4#[derive(Debug, Clone)]
6pub enum AdvancedOutlierMethod {
7 LOF { k: usize },
9 IsolationScore { max_depth: usize },
11 DBSCANOutlier { eps: f64, min_pts: usize },
13 Mahalanobis,
15 Ensemble,
17}
18
19#[derive(Debug, Clone)]
21pub struct AdvancedOutlierInfo {
22 pub index: usize,
23 pub value: f64,
24 pub outlier_score: f64,
25 pub confidence: f64,
26 pub method_scores: HashMap<String, f64>,
27 pub is_outlier: bool,
28}
29
30#[derive(Debug, Clone)]
32pub struct AdvancedOutlierResult {
33 pub method_name: String,
34 pub outliers: Vec<AdvancedOutlierInfo>,
35 pub threshold: f64,
36 pub detection_rate: f64,
37 pub method_params: HashMap<String, f64>,
38}
39
40pub fn detect_outliers_lof(numbers: &[f64], k: usize) -> Result<AdvancedOutlierResult> {
42 if numbers.len() < k + 1 {
43 return Err(crate::error::BenfError::InsufficientData(numbers.len()));
44 }
45
46 let mut outliers = Vec::new();
47 let mut lof_scores = Vec::new();
48
49 for (i, &value) in numbers.iter().enumerate() {
50 let mut distances: Vec<f64> = numbers
52 .iter()
53 .enumerate()
54 .filter(|(j, _)| *j != i)
55 .map(|(_, &other)| (value - other).abs())
56 .collect();
57
58 distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
59
60 if distances.len() >= k {
61 let k_distance = distances[k - 1];
62
63 let reachability_distances: Vec<f64> =
65 distances[..k].iter().map(|&d| d.max(k_distance)).collect();
66
67 let lrd = k as f64 / reachability_distances.iter().sum::<f64>();
68
69 let lof_score = if lrd > 0.0 {
71 let neighbor_lrds: f64 = distances[..k]
72 .iter()
73 .map(|_| lrd) .sum();
75
76 (neighbor_lrds / (k as f64)) / lrd
77 } else {
78 1.0
79 };
80
81 lof_scores.push(lof_score);
82
83 if lof_score > 1.5 {
85 outliers.push(AdvancedOutlierInfo {
86 index: i,
87 value,
88 outlier_score: lof_score,
89 confidence: ((lof_score - 1.0).min(2.0) / 2.0).clamp(0.0, 1.0),
90 method_scores: {
91 let mut scores = HashMap::new();
92 scores.insert("lof".to_string(), lof_score);
93 scores.insert("k_distance".to_string(), k_distance);
94 scores
95 },
96 is_outlier: true,
97 });
98 }
99 }
100 }
101
102 let detection_rate = outliers.len() as f64 / numbers.len() as f64;
103
104 Ok(AdvancedOutlierResult {
105 method_name: format!("LOF (k={k})"),
106 outliers,
107 threshold: 1.5,
108 detection_rate,
109 method_params: {
110 let mut params = HashMap::new();
111 params.insert("k".to_string(), k as f64);
112 params.insert("threshold".to_string(), 1.5);
113 params
114 },
115 })
116}
117
118pub fn detect_outliers_isolation(
120 numbers: &[f64],
121 max_depth: usize,
122) -> Result<AdvancedOutlierResult> {
123 let mut outliers = Vec::new();
124 let avg_path_length = calculate_average_path_length(numbers.len());
125
126 for (i, &value) in numbers.iter().enumerate() {
127 let path_length = calculate_isolation_path_length(value, numbers, max_depth);
129
130 let anomaly_score = 2.0_f64.powf(-path_length / avg_path_length);
132
133 if anomaly_score > 0.6 {
135 outliers.push(AdvancedOutlierInfo {
136 index: i,
137 value,
138 outlier_score: anomaly_score,
139 confidence: ((anomaly_score - 0.5) * 2.0).clamp(0.0, 1.0),
140 method_scores: {
141 let mut scores = HashMap::new();
142 scores.insert("anomaly_score".to_string(), anomaly_score);
143 scores.insert("path_length".to_string(), path_length);
144 scores
145 },
146 is_outlier: true,
147 });
148 }
149 }
150
151 let detection_rate = outliers.len() as f64 / numbers.len() as f64;
152
153 Ok(AdvancedOutlierResult {
154 method_name: format!("Isolation Score (depth={max_depth})"),
155 outliers,
156 threshold: 0.6,
157 detection_rate,
158 method_params: {
159 let mut params = HashMap::new();
160 params.insert("max_depth".to_string(), max_depth as f64);
161 params.insert("threshold".to_string(), 0.6);
162 params
163 },
164 })
165}
166
167pub fn detect_outliers_dbscan(
169 numbers: &[f64],
170 eps: f64,
171 min_pts: usize,
172) -> Result<AdvancedOutlierResult> {
173 let mut outliers = Vec::new();
174 let mut visited = vec![false; numbers.len()];
175 let mut clusters = Vec::new();
176
177 for (i, &value) in numbers.iter().enumerate() {
178 if visited[i] {
179 continue;
180 }
181 visited[i] = true;
182
183 let neighbors: Vec<usize> = numbers
185 .iter()
186 .enumerate()
187 .filter(|(j, &other)| *j != i && (value - other).abs() <= eps)
188 .map(|(j, _)| j)
189 .collect();
190
191 if neighbors.len() >= min_pts {
192 let mut cluster = vec![i];
194 let mut queue = neighbors;
195
196 while let Some(neighbor_idx) = queue.pop() {
197 if !visited[neighbor_idx] {
198 visited[neighbor_idx] = true;
199 cluster.push(neighbor_idx);
200
201 let neighbor_neighbors: Vec<usize> = numbers
203 .iter()
204 .enumerate()
205 .filter(|(j, &other)| {
206 *j != neighbor_idx && (numbers[neighbor_idx] - other).abs() <= eps
207 })
208 .map(|(j, _)| j)
209 .collect();
210
211 if neighbor_neighbors.len() >= min_pts {
212 queue.extend(neighbor_neighbors);
213 }
214 }
215 }
216
217 clusters.push(cluster);
218 } else {
219 let density_score = neighbors.len() as f64 / min_pts as f64;
221
222 outliers.push(AdvancedOutlierInfo {
223 index: i,
224 value,
225 outlier_score: 1.0 - density_score,
226 confidence: (1.0 - density_score).clamp(0.0, 1.0),
227 method_scores: {
228 let mut scores = HashMap::new();
229 scores.insert("density_score".to_string(), density_score);
230 scores.insert("neighbor_count".to_string(), neighbors.len() as f64);
231 scores
232 },
233 is_outlier: true,
234 });
235 }
236 }
237
238 let detection_rate = outliers.len() as f64 / numbers.len() as f64;
239
240 Ok(AdvancedOutlierResult {
241 method_name: format!("DBSCAN Outlier (eps={eps:.2}, min_pts={min_pts})"),
242 outliers,
243 threshold: 1.0 - (min_pts as f64 / 10.0),
244 detection_rate,
245 method_params: {
246 let mut params = HashMap::new();
247 params.insert("eps".to_string(), eps);
248 params.insert("min_pts".to_string(), min_pts as f64);
249 params
250 },
251 })
252}
253
254pub fn detect_outliers_ensemble(numbers: &[f64]) -> Result<AdvancedOutlierResult> {
256 let lof_result = detect_outliers_lof(numbers, 5)?;
258 let isolation_result = detect_outliers_isolation(numbers, 8)?;
259
260 let std_dev = {
262 let mean = numbers.iter().sum::<f64>() / numbers.len() as f64;
263 let variance =
264 numbers.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / numbers.len() as f64;
265 variance.sqrt()
266 };
267 let eps = std_dev * 0.5;
268 let min_pts = (numbers.len() as f64).sqrt() as usize;
269
270 let dbscan_result = detect_outliers_dbscan(numbers, eps, min_pts)?;
271
272 let mut ensemble_scores: HashMap<usize, (f64, f64, usize)> = HashMap::new();
274
275 for outlier in &lof_result.outliers {
277 let entry = ensemble_scores
278 .entry(outlier.index)
279 .or_insert((0.0, 0.0, 0));
280 entry.0 += outlier.outlier_score;
281 entry.1 += outlier.confidence;
282 entry.2 += 1;
283 }
284
285 for outlier in &isolation_result.outliers {
286 let entry = ensemble_scores
287 .entry(outlier.index)
288 .or_insert((0.0, 0.0, 0));
289 entry.0 += outlier.outlier_score;
290 entry.1 += outlier.confidence;
291 entry.2 += 1;
292 }
293
294 for outlier in &dbscan_result.outliers {
295 let entry = ensemble_scores
296 .entry(outlier.index)
297 .or_insert((0.0, 0.0, 0));
298 entry.0 += outlier.outlier_score;
299 entry.1 += outlier.confidence;
300 entry.2 += 1;
301 }
302
303 let mut outliers = Vec::new();
305 for (&index, &(total_score, total_confidence, method_count)) in &ensemble_scores {
306 let avg_score = total_score / method_count as f64;
307 let avg_confidence = total_confidence / method_count as f64;
308 let consensus_strength = method_count as f64 / 3.0; if method_count >= 2 {
312 outliers.push(AdvancedOutlierInfo {
313 index,
314 value: numbers[index],
315 outlier_score: avg_score * consensus_strength,
316 confidence: avg_confidence * consensus_strength,
317 method_scores: {
318 let mut scores = HashMap::new();
319 scores.insert("ensemble_score".to_string(), avg_score);
320 scores.insert("consensus_strength".to_string(), consensus_strength);
321 scores.insert("method_count".to_string(), method_count as f64);
322 scores
323 },
324 is_outlier: true,
325 });
326 }
327 }
328
329 outliers.sort_by(|a, b| b.outlier_score.partial_cmp(&a.outlier_score).unwrap());
331
332 let detection_rate = outliers.len() as f64 / numbers.len() as f64;
333
334 Ok(AdvancedOutlierResult {
335 method_name: "Ensemble (LOF + Isolation + DBSCAN)".to_string(),
336 outliers,
337 threshold: 0.5,
338 detection_rate,
339 method_params: {
340 let mut params = HashMap::new();
341 params.insert("min_consensus".to_string(), 2.0);
342 params.insert("lof_k".to_string(), 5.0);
343 params.insert("isolation_depth".to_string(), 8.0);
344 params.insert("dbscan_eps".to_string(), eps);
345 params.insert("dbscan_min_pts".to_string(), min_pts as f64);
346 params
347 },
348 })
349}
350
351fn calculate_average_path_length(n: usize) -> f64 {
353 if n <= 1 {
354 return 0.0;
355 }
356 2.0 * ((n - 1) as f64).ln() - (2.0 * (n - 1) as f64 / n as f64)
357}
358
359fn calculate_isolation_path_length(value: f64, numbers: &[f64], max_depth: usize) -> f64 {
360 let mut depth = 0.0;
361 let mut data = numbers.to_vec();
362
363 for _ in 0..max_depth {
364 if data.len() <= 1 {
365 break;
366 }
367
368 let min_val = data.iter().copied().fold(f64::INFINITY, f64::min);
370 let max_val = data.iter().copied().fold(f64::NEG_INFINITY, f64::max);
371
372 if min_val == max_val {
373 break;
374 }
375
376 let split_point = (min_val + max_val) / 2.0;
377
378 if value < split_point {
379 data.retain(|&x| x < split_point);
380 } else {
381 data.retain(|&x| x >= split_point);
382 }
383
384 depth += 1.0;
385
386 if data.len() <= 1 {
387 break;
388 }
389 }
390
391 depth
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397
398 #[test]
399 fn test_lof_outlier_detection() {
400 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let result = detect_outliers_lof(&data, 3).unwrap();
402
403 assert_eq!(result.method_name, "LOF (k=3)");
405 assert!(result.detection_rate >= 0.0);
406 if !result.outliers.is_empty() {
408 assert!(result.detection_rate > 0.0);
410 }
411 }
412
413 #[test]
414 fn test_isolation_outlier_detection() {
415 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let result = detect_outliers_isolation(&data, 8).unwrap();
417
418 assert!(result.detection_rate >= 0.0);
419 assert_eq!(result.method_name, "Isolation Score (depth=8)");
420 }
421
422 #[test]
423 fn test_dbscan_outlier_detection() {
424 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let result = detect_outliers_dbscan(&data, 2.0, 2).unwrap();
426
427 assert!(result.detection_rate >= 0.0);
428 assert!(result.method_name.contains("DBSCAN"));
429 }
430
431 #[test]
432 fn test_ensemble_outlier_detection() {
433 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; let result = detect_outliers_ensemble(&data).unwrap();
435
436 assert_eq!(result.method_name, "Ensemble (LOF + Isolation + DBSCAN)");
437 assert!(result.detection_rate >= 0.0);
438 }
439
440 #[test]
441 fn test_insufficient_data_error() {
442 let data = vec![1.0, 2.0]; let result = detect_outliers_lof(&data, 5);
444
445 assert!(result.is_err());
446 }
447
448 #[test]
449 fn test_normal_data_low_detection_rate() {
450 let data = vec![1.0, 1.1, 0.9, 1.05, 0.95, 1.02, 0.98]; let result = detect_outliers_ensemble(&data).unwrap();
452
453 assert!(result.detection_rate < 0.5);
455 }
456}