Skip to main content

libspot_rs/
spot.rs

1//! Main SPOT detector implementation
2//!
3//! This module implements the main SPOT (Streaming Peaks Over Threshold) detector
4//! that provides real-time anomaly detection for time series data.
5//!
6//! # Serialization
7//!
8//! When the `serde` feature is enabled, the [`SpotDetector`] can be serialized and
9//! deserialized. This is particularly useful for:
10//!
11//! - **Model persistence**: Save a trained model to disk and load it later
12//! - **Model deployment**: Export models for use in production systems
13//! - **Model sharing**: Share trained models between different applications
14//! - **Checkpointing**: Save model state during long-running processes
15//!
16//! ## Example
17//!
18//! ```ignore
19//! use libspot_rs::{SpotConfig, SpotDetector};
20//! use serde_json;
21//!
22//! // Train a model
23//! let config = SpotConfig::default();
24//! let mut spot = SpotDetector::new(config).unwrap();
25//! let training_data: Vec<f64> = (0..1000).map(|i| i as f64 / 100.0).collect();
26//! spot.fit(&training_data).unwrap();
27//!
28//! // Serialize the trained model
29//! let json = serde_json::to_string(&spot).unwrap();
30//!
31//! // Later, deserialize and continue using
32//! let loaded: SpotDetector = serde_json::from_str(&json).unwrap();
33//! let status = loaded.step(50.0);
34//! ```
35
36use crate::config::SpotConfig;
37
38use crate::error::{SpotError, SpotResult};
39use crate::p2::p2_quantile;
40use crate::status::SpotStatus;
41use crate::tail::Tail;
42
43/// Main SPOT detector for streaming anomaly detection
44///
45/// The `SpotDetector` implements the SPOT (Streaming Peaks Over Threshold) algorithm
46/// for real-time anomaly detection in streaming time series data.
47///
48/// # Serialization
49///
50/// When the `serde` feature is enabled, the detector can be serialized and deserialized,
51/// allowing you to save trained models and restore them later without re-training.
52///
53/// # Example
54///
55/// ```
56/// use libspot_rs::{SpotConfig, SpotDetector, SpotStatus};
57///
58/// let config = SpotConfig::default();
59/// let mut spot = SpotDetector::new(config).unwrap();
60///
61/// // Fit with training data
62/// let data: Vec<f64> = (0..1000).map(|i| (i as f64) / 100.0).collect();
63/// spot.fit(&data).unwrap();
64///
65/// // Process new data points
66/// match spot.step(15.0).unwrap() {
67///     SpotStatus::Normal => println!("Normal"),
68///     SpotStatus::Excess => println!("Excess"),
69///     SpotStatus::Anomaly => println!("Anomaly detected!"),
70/// }
71/// ```
72#[derive(Debug)]
73#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
74pub struct SpotDetector {
75    /// Probability of an anomaly
76    q: f64,
77    /// Location of the tail (high quantile)
78    level: f64,
79    /// Flag anomalies (true = flag, false = don't flag)
80    discard_anomalies: bool,
81    /// Upper/Lower tail choice (true = lower tail, false = upper tail)
82    low: bool,
83    /// Internal constant (+/- 1.0)
84    up_down: f64,
85    /// Normal/abnormal threshold
86    #[cfg_attr(feature = "serde", serde(with = "crate::ser::nan_safe_f64"))]
87    anomaly_threshold: f64,
88    /// Tail threshold
89    #[cfg_attr(feature = "serde", serde(with = "crate::ser::nan_safe_f64"))]
90    excess_threshold: f64,
91    /// Total number of excesses
92    nt: usize,
93    /// Total number of seen data
94    n: usize,
95    /// GPD Tail
96    tail: Tail,
97}
98
99impl SpotDetector {
100    /// Create a new SPOT detector with the given configuration
101    pub fn new(config: SpotConfig) -> SpotResult<Self> {
102        // Validate parameters
103        if config.level < 0.0 || config.level >= 1.0 {
104            return Err(SpotError::LevelOutOfBounds);
105        }
106        if config.q >= (1.0 - config.level) || config.q <= 0.0 {
107            return Err(SpotError::QOutOfBounds);
108        }
109
110        let up_down = if config.low_tail { -1.0 } else { 1.0 };
111
112        Ok(Self {
113            q: config.q,
114            level: config.level,
115            discard_anomalies: config.discard_anomalies,
116            low: config.low_tail,
117            up_down,
118            anomaly_threshold: f64::NAN,
119            excess_threshold: f64::NAN,
120            nt: 0,
121            n: 0,
122            tail: Tail::new(config.max_excess)?,
123        })
124    }
125
126    /// Fit the model using initial training data
127    pub fn fit(&mut self, data: &[f64]) -> SpotResult<()> {
128        // Reset counters
129        self.nt = 0;
130        self.n = data.len();
131
132        // Compute excess threshold using P2 quantile estimator
133        let et = if self.low {
134            // Take the low quantile (1 - level)
135            p2_quantile(1.0 - self.level, data)
136        } else {
137            p2_quantile(self.level, data)
138        };
139
140        if et.is_nan() {
141            return Err(SpotError::ExcessThresholdIsNaN);
142        }
143
144        self.excess_threshold = et;
145
146        // Fill the tail with excesses
147        for &value in data {
148            // Positive excess
149            let excess = self.up_down * (value - et);
150            if excess > 0.0 {
151                // It's a real excess
152                self.nt += 1;
153                self.tail.push(excess);
154            }
155        }
156
157        // Fit the tail with the pushed data
158        self.tail.fit();
159
160        // Compute first anomaly threshold
161        self.anomaly_threshold = self.quantile(self.q);
162        if self.anomaly_threshold.is_nan() {
163            return Err(SpotError::AnomalyThresholdIsNaN);
164        }
165
166        Ok(())
167    }
168
169    /// Process a single data point and return its classification
170    pub fn step(&mut self, value: f64) -> SpotResult<SpotStatus> {
171        if value.is_nan() {
172            return Err(SpotError::DataIsNaN);
173        }
174
175        if self.discard_anomalies && (self.up_down * (value - self.anomaly_threshold) > 0.0) {
176            return Ok(SpotStatus::Anomaly);
177        }
178
179        // Increment number of data (without the anomalies)
180        self.n += 1;
181
182        let ex = self.up_down * (value - self.excess_threshold);
183        if ex >= 0.0 {
184            // Increment number of excesses
185            self.nt += 1;
186            self.tail.push(ex);
187            self.tail.fit();
188            // Update threshold
189            self.anomaly_threshold = self.quantile(self.q);
190            return Ok(SpotStatus::Excess);
191        }
192
193        Ok(SpotStatus::Normal)
194    }
195
196    /// Get the quantile for a given probability
197    pub fn quantile(&self, q: f64) -> f64 {
198        if self.n == 0 {
199            return f64::NAN;
200        }
201
202        let s = (self.nt as f64) / (self.n as f64);
203        self.excess_threshold + self.up_down * self.tail.quantile(s, q)
204    }
205
206    /// Get the probability for a given value
207    pub fn probability(&self, z: f64) -> f64 {
208        if self.n == 0 {
209            return f64::NAN;
210        }
211
212        let s = (self.nt as f64) / (self.n as f64);
213        self.tail
214            .probability(s, self.up_down * (z - self.excess_threshold))
215    }
216
217    /// Get the current anomaly threshold
218    pub fn anomaly_threshold(&self) -> f64 {
219        self.anomaly_threshold
220    }
221
222    /// Get the current excess threshold
223    pub fn excess_threshold(&self) -> f64 {
224        self.excess_threshold
225    }
226
227    /// Get the current configuration (reconstructed)
228    pub fn config(&self) -> Option<SpotConfig> {
229        Some(SpotConfig {
230            q: self.q,
231            low_tail: self.low,
232            discard_anomalies: self.discard_anomalies,
233            level: self.level,
234            max_excess: self.tail.peaks().container().capacity(),
235        })
236    }
237
238    /// Get the total number of data points seen
239    pub fn n(&self) -> usize {
240        self.n
241    }
242
243    /// Get the total number of excesses
244    pub fn nt(&self) -> usize {
245        self.nt
246    }
247
248    /// Get the current tail parameters
249    pub fn tail_parameters(&self) -> (f64, f64) {
250        (self.tail.gamma(), self.tail.sigma())
251    }
252
253    /// Reset the detector's internal state, keeping the configuration and the
254    /// backing buffer. After calling this, [`fit`](Self::fit) must be called
255    /// again before further [`step`](Self::step) calls.
256    ///
257    /// This mirrors the `spot_reset` C API exposed by the FFI wrapper crate.
258    pub fn reset(&mut self) {
259        self.anomaly_threshold = f64::NAN;
260        self.excess_threshold = f64::NAN;
261        self.nt = 0;
262        self.n = 0;
263        self.tail.reset();
264    }
265
266    /// Get the current size of the tail data
267    pub fn tail_size(&self) -> usize {
268        self.tail.size()
269    }
270
271    /// Get the minimum value in the peaks
272    pub fn peaks_min(&self) -> f64 {
273        self.tail.peaks().min()
274    }
275
276    /// Get the maximum value in the peaks
277    pub fn peaks_max(&self) -> f64 {
278        self.tail.peaks().max()
279    }
280
281    /// Get the mean of the peaks
282    pub fn peaks_mean(&self) -> f64 {
283        self.tail.peaks().mean()
284    }
285
286    /// Get the variance of the peaks
287    pub fn peaks_variance(&self) -> f64 {
288        self.tail.peaks().variance()
289    }
290
291    /// Get the peaks data as a vector (for debugging and export)
292    pub fn peaks_data(&self) -> Vec<f64> {
293        self.tail.peaks().container().data()
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300    use approx::assert_relative_eq;
301
302    #[test]
303    fn test_spot_creation_valid_config() {
304        let config = SpotConfig::default();
305        let spot = SpotDetector::new(config).unwrap();
306
307        assert_relative_eq!(spot.q, 0.0001);
308        assert!(!spot.low);
309        assert!(spot.discard_anomalies);
310        assert_relative_eq!(spot.level, 0.998);
311        assert!(spot.anomaly_threshold().is_nan());
312        assert!(spot.excess_threshold().is_nan());
313        assert_eq!(spot.n(), 0);
314        assert_eq!(spot.nt(), 0);
315    }
316
317    #[test]
318    fn test_spot_invalid_level() {
319        let config = SpotConfig {
320            level: 1.5, // Invalid
321            ..SpotConfig::default()
322        };
323        let result = SpotDetector::new(config);
324        assert!(result.is_err());
325        assert_eq!(result.unwrap_err(), SpotError::LevelOutOfBounds);
326    }
327
328    #[test]
329    fn test_spot_invalid_q() {
330        let config = SpotConfig {
331            q: 0.5, // Too high for level 0.998
332            ..SpotConfig::default()
333        };
334        let result = SpotDetector::new(config);
335        assert!(result.is_err());
336        assert_eq!(result.unwrap_err(), SpotError::QOutOfBounds);
337    }
338
339    #[test]
340    fn test_spot_fit_basic() {
341        let config = SpotConfig::default();
342        let mut spot = SpotDetector::new(config).unwrap();
343
344        // Create simple training data
345        let data: Vec<f64> = (0..1000).map(|i| (i as f64 / 1000.0) * 2.0 - 1.0).collect();
346
347        let result = spot.fit(&data);
348        assert!(result.is_ok());
349
350        // After fit, thresholds should be valid
351        assert!(!spot.anomaly_threshold().is_nan());
352        assert!(!spot.excess_threshold().is_nan());
353        assert!(spot.anomaly_threshold().is_finite());
354        assert!(spot.excess_threshold().is_finite());
355        assert_eq!(spot.n(), 1000);
356        assert!(spot.nt() > 0); // Should have some excesses
357    }
358
359    #[test]
360    fn test_spot_step_normal() {
361        let config = SpotConfig::default();
362        let mut spot = SpotDetector::new(config).unwrap();
363
364        // Fit with simple data
365        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
366        spot.fit(&data).unwrap();
367
368        // Test normal value
369        let result = spot.step(50.0);
370        assert!(result.is_ok());
371        // Result depends on the thresholds, but should be valid
372    }
373
374    #[test]
375    fn test_spot_step_nan() {
376        let config = SpotConfig::default();
377        let mut spot = SpotDetector::new(config).unwrap();
378
379        let result = spot.step(f64::NAN);
380        assert!(result.is_err());
381        assert_eq!(result.unwrap_err(), SpotError::DataIsNaN);
382    }
383
384    #[test]
385    fn test_spot_reset_returns_to_pristine_state() {
386        let config = SpotConfig::default();
387        let mut spot = SpotDetector::new(config.clone()).unwrap();
388
389        let data: Vec<f64> = (0..1000).map(|i| (i as f64 / 1000.0) * 2.0 - 1.0).collect();
390        spot.fit(&data).unwrap();
391        for v in &data {
392            let _ = spot.step(*v).unwrap();
393        }
394        assert!(spot.n() > 0);
395        assert!(!spot.anomaly_threshold().is_nan());
396
397        spot.reset();
398
399        // Looks like a freshly constructed detector.
400        assert!(spot.anomaly_threshold().is_nan());
401        assert!(spot.excess_threshold().is_nan());
402        assert_eq!(spot.n(), 0);
403        assert_eq!(spot.nt(), 0);
404        assert_eq!(spot.tail_size(), 0);
405        assert_eq!(spot.config(), Some(config.clone()));
406
407        // Re-fit produces identical numbers to a fresh detector.
408        let mut fresh = SpotDetector::new(config).unwrap();
409        spot.fit(&data).unwrap();
410        fresh.fit(&data).unwrap();
411        assert_relative_eq!(spot.anomaly_threshold(), fresh.anomaly_threshold());
412        assert_relative_eq!(spot.excess_threshold(), fresh.excess_threshold());
413        assert_eq!(spot.nt(), fresh.nt());
414        assert_eq!(spot.n(), fresh.n());
415    }
416
417    #[test]
418    fn test_spot_reset_before_fit_is_noop_safe() {
419        // Calling reset on a freshly constructed detector must not panic
420        // and must leave the detector in the same observable state.
421        let mut spot = SpotDetector::new(SpotConfig::default()).unwrap();
422        spot.reset();
423        assert!(spot.anomaly_threshold().is_nan());
424        assert!(spot.excess_threshold().is_nan());
425        assert_eq!(spot.n(), 0);
426        assert_eq!(spot.nt(), 0);
427        assert_eq!(spot.tail_size(), 0);
428
429        // Fit still works normally afterwards.
430        let data: Vec<f64> = (0..500).map(|i| (i as f64 / 500.0) * 2.0 - 1.0).collect();
431        spot.fit(&data).unwrap();
432        assert!(!spot.anomaly_threshold().is_nan());
433    }
434
435    #[test]
436    fn test_spot_reset_is_idempotent() {
437        let mut spot = SpotDetector::new(SpotConfig::default()).unwrap();
438        let data: Vec<f64> = (0..500).map(|i| (i as f64 / 500.0) * 2.0 - 1.0).collect();
439        spot.fit(&data).unwrap();
440        for v in &data {
441            let _ = spot.step(*v).unwrap();
442        }
443
444        spot.reset();
445        let after_first_n = spot.n();
446        let after_first_nt = spot.nt();
447        let after_first_size = spot.tail_size();
448
449        spot.reset();
450        assert_eq!(spot.n(), after_first_n);
451        assert_eq!(spot.nt(), after_first_nt);
452        assert_eq!(spot.tail_size(), after_first_size);
453        assert!(spot.anomaly_threshold().is_nan());
454        assert!(spot.excess_threshold().is_nan());
455    }
456
457    #[test]
458    fn test_spot_reset_then_fit_then_step_full_cycle() {
459        // Full lifecycle: fit -> step -> reset -> fit again -> step again must
460        // produce the same step classifications as a fresh detector running
461        // the same fit+step sequence.
462        let config = SpotConfig::default();
463        let train: Vec<f64> = (0..1000).map(|i| (i as f64 / 1000.0) * 2.0 - 1.0).collect();
464        let probe: Vec<f64> = (0..200).map(|i| (i as f64 / 100.0) - 1.0).collect();
465
466        let mut reused = SpotDetector::new(config.clone()).unwrap();
467        reused.fit(&train).unwrap();
468        for v in &probe {
469            let _ = reused.step(*v).unwrap();
470        }
471        reused.reset();
472        reused.fit(&train).unwrap();
473        let reused_classifications: Vec<SpotStatus> =
474            probe.iter().map(|&v| reused.step(v).unwrap()).collect();
475
476        let mut fresh = SpotDetector::new(config).unwrap();
477        fresh.fit(&train).unwrap();
478        let fresh_classifications: Vec<SpotStatus> =
479            probe.iter().map(|&v| fresh.step(v).unwrap()).collect();
480
481        assert_eq!(reused_classifications, fresh_classifications);
482        assert_relative_eq!(reused.anomaly_threshold(), fresh.anomaly_threshold());
483        assert_relative_eq!(reused.excess_threshold(), fresh.excess_threshold());
484        assert_eq!(reused.nt(), fresh.nt());
485        assert_eq!(reused.n(), fresh.n());
486    }
487
488    #[test]
489    fn test_spot_low_tail() {
490        let config = SpotConfig {
491            low_tail: true,
492            ..SpotConfig::default()
493        };
494        let spot = SpotDetector::new(config).unwrap();
495
496        assert!(spot.low);
497        assert_relative_eq!(spot.up_down, -1.0);
498    }
499
500    #[test]
501    fn test_spot_config_roundtrip() {
502        let original_config = SpotConfig {
503            q: 0.001,
504            low_tail: true,
505            discard_anomalies: false,
506            level: 0.99,
507            max_excess: 100,
508        };
509
510        let spot = SpotDetector::new(original_config.clone()).unwrap();
511        let retrieved_config = spot.config().unwrap();
512
513        assert_relative_eq!(retrieved_config.q, original_config.q);
514        assert_eq!(retrieved_config.low_tail, original_config.low_tail);
515        assert_eq!(
516            retrieved_config.discard_anomalies,
517            original_config.discard_anomalies
518        );
519        assert_relative_eq!(retrieved_config.level, original_config.level);
520        assert_eq!(retrieved_config.max_excess, original_config.max_excess);
521    }
522
523    #[test]
524    fn test_spot_quantile_probability_consistency() {
525        let config = SpotConfig::default();
526        let mut spot = SpotDetector::new(config).unwrap();
527
528        // Fit with some data
529        let data: Vec<f64> = (1..=100).map(|i| i as f64).collect();
530        spot.fit(&data).unwrap();
531
532        // Test quantile function
533        let q = spot.quantile(0.01);
534        assert!(!q.is_nan());
535        assert!(q.is_finite());
536
537        // Test probability function
538        let p = spot.probability(q);
539        assert!(!p.is_nan());
540        assert!(p >= 0.0);
541    }
542
543    #[test]
544    fn test_spot_excess_detection() {
545        let config = SpotConfig {
546            level: 0.9, // Lower level for easier testing
547            ..SpotConfig::default()
548        };
549        let mut spot = SpotDetector::new(config).unwrap();
550
551        // Fit with data range 0-100
552        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
553        spot.fit(&data).unwrap();
554
555        let _initial_nt = spot.nt();
556
557        // Add a value that should be an excess
558        let result = spot.step(95.0);
559        assert!(result.is_ok());
560
561        // Check that we got some classification
562        match result.unwrap() {
563            SpotStatus::Normal | SpotStatus::Excess | SpotStatus::Anomaly => {
564                // All are valid outcomes
565            }
566        }
567    }
568}