Skip to main content

oxicuda_anomaly/
lib.rs

1//! `oxicuda-anomaly` — Anomaly Detection primitives for OxiCUDA.
2//!
3//! Pure-Rust implementation of canonical anomaly detection algorithms,
4//! suitable for CPU simulation and PTX kernel generation for GPU execution.
5//!
6//! # Architecture
7//!
8//! ```text
9//! oxicuda-anomaly
10//! ├── svdd/           — DeepSVDD (Ruff et al. 2018)
11//! ├── reconstruction/ — Autoencoder & VAE anomaly detection
12//! ├── distance/       — LOF, k-NN distance scorer
13//! ├── density/        — COPOD, Mahalanobis
14//! ├── isolation/      — Isolation Forest random-projection scorer
15//! ├── forest/         — Robust Random Cut Forest (CoDisp, streaming)
16//! ├── streaming/      — xStream (StreamHash + half-space chains)
17//! ├── subspace/       — RS-Hash (randomized subspace hashing)
18//! ├── statistical/    — MAD, Z-score, percentile threshold
19//! ├── ensemble/       — Ensemble scoring (Average / Maximum / Weighted)
20//! ├── metrics/        — AUC-ROC, AUC-PR, F1, detection metrics
21//! ├── error           — AnomalyError / AnomalyResult
22//! ├── handle          — AnomalyHandle (SmVersion + LcgRng)
23//! └── ptx_kernels     — GPU PTX kernel strings (7 kernels × 6 SM versions)
24//! ```
25
26pub mod density;
27pub mod distance;
28pub mod ensemble;
29pub mod error;
30pub mod forest;
31pub mod graph;
32pub mod handle;
33pub mod isolation;
34pub mod metrics;
35pub mod ptx_kernels;
36pub mod reconstruction;
37pub mod statistical;
38pub mod streaming;
39pub mod subspace;
40pub mod svdd;
41pub mod time_series;
42
43// ─── Prelude ─────────────────────────────────────────────────────────────────
44
45/// Convenience re-exports of the most-used anomaly detection types.
46pub mod prelude {
47    pub use crate::density::copod::Copod;
48    pub use crate::density::fast_mcd::{
49        FastMcdConfig, FastMcdFit, fast_mcd_fit, fast_mcd_predict, fast_mcd_score,
50        fast_mcd_score_batch,
51    };
52    pub use crate::density::gmm_detector::{
53        GmmConfig, GmmModel, gmm_fit, gmm_log_likelihood, gmm_predict, gmm_sample, gmm_score,
54    };
55    pub use crate::density::kde_detector::{Bandwidth, KdeConfig, KdeDetector, KdeKernel};
56    pub use crate::density::mahalanobis::MahalanobisDetector;
57    pub use crate::distance::abod::Abod;
58    pub use crate::distance::abod_approx::AbodApprox;
59    pub use crate::distance::cblof::{
60        CblofConfig, CblofFit, cblof_fit, cblof_predict, cblof_score,
61    };
62    pub use crate::distance::cof::Cof;
63    pub use crate::distance::knn_score::KnnAnomalyScorer;
64    pub use crate::distance::lof::Lof;
65    pub use crate::distance::lof_kdtree::{
66        KdNode, KdTree, LofKdConfig, LofKdFit, kd_build, kd_knn, kd_knn_ex, lof_kd_fit,
67        lof_kd_predict, lof_kd_score,
68    };
69    pub use crate::distance::sod::{Sod, SodConfig};
70    pub use crate::ensemble::ensemble::{AnomalyEnsemble, EnsembleMethod};
71    pub use crate::ensemble::ext_iforest::{
72        ExtIforestConfig, ExtIforestModel, ExtNode, ext_iforest_fit, ext_iforest_predict,
73        ext_iforest_score,
74    };
75    pub use crate::ensemble::loda::{Loda, LodaConfig};
76    pub use crate::ensemble::lscp::{LscpConfig, LscpEnsemble, LscpStrategy, LscpTarget};
77    pub use crate::ensemble::suod::{SuodConfig, SuodFit, suod_fit, suod_predict, suod_score};
78    pub use crate::error::{AnomalyError, AnomalyResult};
79    pub use crate::forest::rrcf::{
80        BoundingBox, RobustRandomCutForest, RrcNode, RrcTree, RrcfConfig,
81    };
82    pub use crate::handle::{AnomalyHandle, LcgRng, SmVersion};
83    pub use crate::isolation::iforest_score::{
84        IsolationScorer, c_factor, isolation_score_from_path,
85    };
86    pub use crate::isolation::inne::{InneConfig, InneDetector};
87    pub use crate::metrics::anomaly_metrics::{
88        AnomalyDetectionMetrics, auc_pr, auc_roc_anomaly, compute_detection_metrics,
89        f1_at_threshold,
90    };
91    pub use crate::ptx_kernels::{
92        copod_ecdf_ptx, ensemble_normalize_ptx, f32_hex, iforest_score_ptx, lof_reach_dist_ptx,
93        mahal_dist_ptx, recon_score_ptx, svdd_loss_ptx,
94    };
95    pub use crate::reconstruction::autoencoder::{AeConfig, AutoencoderAnomaly};
96    pub use crate::reconstruction::dagmm::{
97        DagmmConfig, DagmmFit, dagmm_fit, dagmm_predict, dagmm_score,
98    };
99    pub use crate::reconstruction::diffusion_anomaly::{
100        DiffusionAnomalyConfig, DiffusionAnomalyFit, diffusion_anomaly_fit,
101        diffusion_anomaly_predict, diffusion_anomaly_score,
102    };
103    pub use crate::reconstruction::mem_ae::{
104        MemAeConfig, MemAeFit, mem_ae_attention, mem_ae_fit, mem_ae_predict, mem_ae_score,
105    };
106    pub use crate::reconstruction::norm_flow::{
107        NormFlowConfig, NormFlowFit, norm_flow_fit, norm_flow_predict, norm_flow_score,
108    };
109    pub use crate::reconstruction::pca_anomaly::{PcaAnomaly, PcaAnomalyConfig};
110    pub use crate::reconstruction::self_supervised::{
111        SelfSupervisedConfig, SelfSupervisedFit, self_supervised_confidence_gap,
112        self_supervised_fit, self_supervised_predict, self_supervised_score,
113    };
114    pub use crate::reconstruction::vae_anomaly::VaeAnomaly;
115    pub use crate::statistical::concept_drift::{
116        AdwinDetector, CusumDetector, DdmDetector, DdmStatus, PageHinkleyDetector, adwin_add,
117        adwin_mean, adwin_new, adwin_window_size, cusum_add, cusum_new, cusum_reset, ddm_add,
118        ddm_new, ph_add, ph_detector_new, ph_reset,
119    };
120    pub use crate::statistical::conformal::{
121        ConformalConfig, ConformalDetector, ConformalResult, OnlineConformalDetector,
122        conformal_calibrate, conformal_p_value, conformal_predict, mondrian_conformal_predict,
123        online_conformal_detector_new, online_conformal_update,
124    };
125    pub use crate::statistical::ecod::Ecod;
126    pub use crate::statistical::extreme_value::{GpdDetector, GpdFit};
127    pub use crate::statistical::hbos::{Hbos, HbosConfig};
128    pub use crate::statistical::rock_idec::{IdecConfig, IdecDetector, RockConfig, RockDetector};
129    pub use crate::statistical::stats::{MadDetector, ZScoreDetector, percentile_threshold};
130    pub use crate::streaming::xstream::{HalfSpaceChain, StreamHash, XStream, XStreamConfig};
131    pub use crate::subspace::rs_hash::{HashComponent, RsHash, RsHashConfig};
132    pub use crate::svdd::deep_sad::{DeepSad, DeepSadConfig};
133    pub use crate::svdd::deep_svdd::DeepSvdd;
134    pub use crate::svdd::ocsvm::{OcsvmConfig, OcsvmFit, ocsvm_fit, ocsvm_predict, ocsvm_score};
135    pub use crate::svdd::trainable_svdd::{
136        TrainableSvddConfig, TrainableSvddFit, trainable_svdd_fit, trainable_svdd_loss_history,
137        trainable_svdd_predict, trainable_svdd_score,
138    };
139    pub use crate::time_series::spectral_residual::{
140        SpectralResidualConfig, SpectralResidualResult, spectral_residual,
141    };
142}
143
144// ─── End-to-end integration tests ────────────────────────────────────────────
145
146#[cfg(test)]
147mod e2e_tests {
148    use crate::prelude::*;
149
150    // ── Test 1: AE score is finite for training data and random noise ─────────
151
152    #[test]
153    fn e2e_autoencoder_normal_low_score() {
154        let cfg = AeConfig {
155            encoder_dims: vec![4, 2],
156            decoder_dims: vec![2, 4],
157        };
158        let mut rng = LcgRng::new(1);
159        let ae = AutoencoderAnomaly::new(cfg, &mut rng).expect("autoencoder should initialize");
160
161        let train = vec![0.5_f32; 4];
162        let s_train = ae
163            .score(&train)
164            .expect("autoencoder score on training data should succeed");
165
166        let mut noise = vec![0.0_f32; 4];
167        let mut rng2 = LcgRng::new(999);
168        rng2.fill_normal(&mut noise);
169        let s_noise = ae
170            .score(&noise)
171            .expect("autoencoder score on noise should succeed");
172
173        assert!(s_train.is_finite(), "train_score={s_train}");
174        assert!(s_noise.is_finite(), "noise_score={s_noise}");
175    }
176
177    // ── Test 2: AE score is finite ───────────────────────────────────────────
178
179    #[test]
180    fn e2e_autoencoder_score_finite() {
181        let cfg = AeConfig {
182            encoder_dims: vec![8, 4, 2],
183            decoder_dims: vec![2, 4, 8],
184        };
185        let mut rng = LcgRng::new(2);
186        let ae = AutoencoderAnomaly::new(cfg, &mut rng)
187            .expect("autoencoder should initialize with valid config");
188        let s = ae
189            .score(&[0.3_f32; 8])
190            .expect("autoencoder score should succeed on valid input");
191        assert!(s.is_finite() && s >= 0.0, "s={s}");
192    }
193
194    // ── Test 3: VAE score is finite and non-negative ──────────────────────────
195
196    #[test]
197    fn e2e_vae_score_finite() {
198        let mut rng = LcgRng::new(3);
199        let vae = VaeAnomaly::new(&[8, 4], 2, &[2, 4, 8], &mut rng)
200            .expect("VAE should initialize with valid architecture");
201        let s = vae
202            .anomaly_score(&[0.2_f32; 8], &mut rng)
203            .expect("VAE anomaly score should succeed");
204        assert!(s.is_finite(), "s={s}");
205    }
206
207    // ── Test 4: DeepSVDD score increases for far-away point ───────────────────
208
209    #[test]
210    fn e2e_deep_svdd_score_increases_for_outlier() {
211        let mut rng = LcgRng::new(4);
212        let mut svdd = DeepSvdd::new(&[4, 8, 4], &mut rng)
213            .expect("DeepSVDD should initialize with valid layers");
214
215        let train = vec![0.1_f32; 4 * 20];
216        svdd.fit(&train, 20).expect("DeepSVDD fit should succeed");
217
218        let close = [0.1_f32, 0.1, 0.1, 0.1];
219        let far = [100.0_f32, 100.0, 100.0, 100.0];
220
221        let s_close = svdd
222            .score(&close)
223            .expect("DeepSVDD score for close point should succeed");
224        let s_far = svdd
225            .score(&far)
226            .expect("DeepSVDD score for far point should succeed");
227
228        assert!(
229            s_far > s_close,
230            "far score {s_far} should > close score {s_close}"
231        );
232    }
233
234    // ── Test 5: LOF ≈ 1 for uniform data (trivial normal case) ───────────────
235
236    #[test]
237    fn e2e_lof_trivial_normal_case() {
238        let n = 20_usize;
239        let data: Vec<f32> = (0..n).map(|i| i as f32).collect();
240        let mut lof = Lof::new(3);
241        lof.fit(&data, n, 1).expect("LOF fit should succeed");
242        let s = lof.score(&[10.0_f32]).expect("LOF score should succeed");
243        assert!(s.is_finite(), "lof={s}");
244        assert!(s > 0.0, "lof > 0");
245    }
246
247    // ── Test 6: COPOD higher for extreme outlier ──────────────────────────────
248
249    #[test]
250    fn e2e_copod_known_outlier() {
251        let n = 30_usize;
252        let data: Vec<f32> = (0..n).map(|i| i as f32 * 0.1).collect();
253        let mut copod = Copod::new();
254        copod.fit(&data, n, 1).expect("COPOD fit should succeed");
255
256        let s_normal = copod
257            .score(&[1.5_f32])
258            .expect("COPOD normal score should succeed");
259        let s_outlier = copod
260            .score(&[100.0_f32])
261            .expect("COPOD outlier score should succeed");
262
263        assert!(
264            s_outlier > s_normal,
265            "outlier {s_outlier} should > normal {s_normal}"
266        );
267    }
268
269    // ── Test 7: Mahalanobis higher for OOD point ─────────────────────────────
270
271    #[test]
272    fn e2e_mahalanobis_known_outlier() {
273        let data = vec![
274            1.0_f32, 2.0, 1.1, 1.9, 0.9, 2.1, 1.05, 1.95, 0.95, 2.05, 1.0_f32, 2.0, 1.1, 1.9, 0.9,
275            2.1, 1.05, 1.95, 0.95, 2.05,
276        ];
277        let mut det = MahalanobisDetector::new();
278        det.fit(&data, 10, 2)
279            .expect("Mahalanobis fit should succeed");
280
281        let s_normal = det
282            .score(&[1.0_f32, 2.0])
283            .expect("Mahalanobis normal score should succeed");
284        let s_outlier = det
285            .score(&[50.0_f32, 100.0])
286            .expect("Mahalanobis outlier score should succeed");
287
288        assert!(
289            s_outlier > s_normal,
290            "outlier {s_outlier} > normal {s_normal}"
291        );
292    }
293
294    // ── Test 8: Isolation score in (0, 1) ────────────────────────────────────
295
296    #[test]
297    fn e2e_iforest_score_in_range() {
298        let mut rng = LcgRng::new(8);
299        let n = 100_usize;
300        let data: Vec<f32> = (0..n)
301            .flat_map(|i| vec![i as f32 * 0.1, i as f32 * 0.05])
302            .collect();
303        let mut scorer = IsolationScorer::new(50, &mut rng);
304        scorer
305            .fit(&data, n, 2, &mut rng)
306            .expect("IsolationScorer fit should succeed");
307        let s = scorer
308            .score(&[5.0_f32, 2.5])
309            .expect("IsolationScorer score should succeed");
310        assert!((0.0..=1.0).contains(&s), "s={s}");
311    }
312
313    // ── Test 9: Z-score extreme sample gets highest score ────────────────────
314
315    #[test]
316    fn e2e_zscore_known_outlier() {
317        let n = 20_usize;
318        let data: Vec<f32> = (0..n).map(|i| i as f32 * 0.1).collect();
319        let mut det = ZScoreDetector::new();
320        det.fit(&data, n, 1)
321            .expect("ZScoreDetector fit should succeed");
322
323        let s_normal = det
324            .score(&[1.0_f32])
325            .expect("ZScoreDetector normal score should succeed");
326        let s_outlier = det
327            .score(&[1000.0_f32])
328            .expect("ZScoreDetector outlier score should succeed");
329
330        assert!(
331            s_outlier > s_normal,
332            "outlier {s_outlier} > normal {s_normal}"
333        );
334    }
335
336    // ── Test 10: MAD detector returns finite scores ───────────────────────────
337
338    #[test]
339    fn e2e_mad_detector_finite() {
340        let n = 20_usize;
341        let data: Vec<f32> = (0..n)
342            .flat_map(|i| vec![i as f32, (i * 2) as f32])
343            .collect();
344        let mut det = MadDetector::new();
345        det.fit(&data, n, 2)
346            .expect("MadDetector fit should succeed");
347
348        let scores = det
349            .score_batch(&data, n)
350            .expect("MadDetector batch score should succeed");
351        assert!(scores.iter().all(|s| s.is_finite()), "not all finite");
352    }
353
354    // ── Test 11: Ensemble combine returns finite score in [0, 1] ─────────────
355
356    #[test]
357    fn e2e_ensemble_combine_finite() {
358        let n_det = 3_usize;
359        let n = 20_usize;
360        let mut rng = LcgRng::new(11);
361        let train_scores: Vec<f32> = (0..n * n_det).map(|_| rng.next_f32()).collect();
362        let mut ens = AnomalyEnsemble::new(EnsembleMethod::Average, n_det);
363        ens.fit(&train_scores, n)
364            .expect("AnomalyEnsemble fit should succeed");
365
366        let test = [0.5_f32, 0.8, 0.3];
367        let s = ens
368            .combine(&test)
369            .expect("AnomalyEnsemble combine should succeed");
370        assert!(s.is_finite(), "s={s}");
371        assert!((0.0..=1.0).contains(&s), "s={s} not in [0,1]");
372    }
373
374    // ── Test 12: All 7 × 6 SM versions produce valid PTX ─────────────────────
375
376    #[test]
377    #[allow(clippy::type_complexity)]
378    fn e2e_ptx_kernels_all_sm_versions() {
379        let sm_versions = [75_u32, 80, 86, 90, 100, 120];
380        let kernel_fns: &[(&str, fn(u32) -> String)] = &[
381            ("svdd_loss_kernel", svdd_loss_ptx),
382            ("recon_score_kernel", recon_score_ptx),
383            ("lof_reach_dist_kernel", lof_reach_dist_ptx),
384            ("copod_ecdf_kernel", copod_ecdf_ptx),
385            ("mahal_dist_kernel", mahal_dist_ptx),
386            ("iforest_score_kernel", iforest_score_ptx),
387            ("ensemble_normalize_kernel", ensemble_normalize_ptx),
388        ];
389        for sm in sm_versions {
390            for (kernel_name, gen_fn) in kernel_fns {
391                let ptx = gen_fn(sm);
392                assert!(
393                    ptx.contains(&format!("sm_{sm}")),
394                    "PTX for {kernel_name} sm={sm} missing sm target"
395                );
396                assert!(
397                    ptx.contains(".version"),
398                    "PTX for {kernel_name} sm={sm} missing .version"
399                );
400                assert!(
401                    ptx.contains(".visible .entry"),
402                    "PTX for {kernel_name} sm={sm} missing .visible .entry"
403                );
404                assert!(
405                    ptx.contains(kernel_name),
406                    "PTX for {kernel_name} sm={sm} missing kernel name"
407                );
408            }
409        }
410        assert_eq!(f32_hex(1.0_f32), "0F3F800000");
411    }
412}