Skip to main content

libspot_rs/
spot.rs

1//! Main SPOT detector implementation
2//!
3//! This module implements the main SPOT (Streaming Peaks Over Threshold) detector
4//! that provides real-time anomaly detection for time series data.
5//!
6//! # Serialization
7//!
8//! When the `serde` feature is enabled, the [`SpotDetector`] can be serialized and
9//! deserialized. This is particularly useful for:
10//!
11//! - **Model persistence**: Save a trained model to disk and load it later
12//! - **Model deployment**: Export models for use in production systems
13//! - **Model sharing**: Share trained models between different applications
14//! - **Checkpointing**: Save model state during long-running processes
15//!
16//! ## Example
17//!
18//! ```ignore
19//! use libspot_rs::{SpotConfig, SpotDetector};
20//! use serde_json;
21//!
22//! // Train a model
23//! let config = SpotConfig::default();
24//! let mut spot = SpotDetector::new(config).unwrap();
25//! let training_data: Vec<f64> = (0..1000).map(|i| i as f64 / 100.0).collect();
26//! spot.fit(&training_data).unwrap();
27//!
28//! // Serialize the trained model
29//! let json = serde_json::to_string(&spot).unwrap();
30//!
31//! // Later, deserialize and continue using
32//! let loaded: SpotDetector = serde_json::from_str(&json).unwrap();
33//! let status = loaded.step(50.0);
34//! ```
35
36use crate::config::SpotConfig;
37
38use crate::error::{SpotError, SpotResult};
39use crate::p2::p2_quantile;
40use crate::status::SpotStatus;
41use crate::tail::Tail;
42
43/// Main SPOT detector for streaming anomaly detection
44///
45/// The `SpotDetector` implements the SPOT (Streaming Peaks Over Threshold) algorithm
46/// for real-time anomaly detection in streaming time series data.
47///
48/// # Serialization
49///
50/// When the `serde` feature is enabled, the detector can be serialized and deserialized,
51/// allowing you to save trained models and restore them later without re-training.
52///
53/// # Example
54///
55/// ```
56/// use libspot_rs::{SpotConfig, SpotDetector, SpotStatus};
57///
58/// let config = SpotConfig::default();
59/// let mut spot = SpotDetector::new(config).unwrap();
60///
61/// // Fit with training data
62/// let data: Vec<f64> = (0..1000).map(|i| (i as f64) / 100.0).collect();
63/// spot.fit(&data).unwrap();
64///
65/// // Process new data points
66/// match spot.step(15.0).unwrap() {
67///     SpotStatus::Normal => println!("Normal"),
68///     SpotStatus::Excess => println!("Excess"),
69///     SpotStatus::Anomaly => println!("Anomaly detected!"),
70/// }
71/// ```
72#[derive(Debug)]
73#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
74pub struct SpotDetector {
75    /// Probability of an anomaly
76    q: f64,
77    /// Location of the tail (high quantile)
78    level: f64,
79    /// Flag anomalies (true = flag, false = don't flag)
80    discard_anomalies: bool,
81    /// Upper/Lower tail choice (true = lower tail, false = upper tail)
82    low: bool,
83    /// Internal constant (+/- 1.0)
84    up_down: f64,
85    /// Normal/abnormal threshold
86    #[cfg_attr(feature = "serde", serde(with = "crate::ser::nan_safe_f64"))]
87    anomaly_threshold: f64,
88    /// Tail threshold
89    #[cfg_attr(feature = "serde", serde(with = "crate::ser::nan_safe_f64"))]
90    excess_threshold: f64,
91    /// Total number of excesses
92    nt: usize,
93    /// Total number of seen data
94    n: usize,
95    /// GPD Tail
96    tail: Tail,
97}
98
99impl SpotDetector {
100    /// Create a new SPOT detector with the given configuration
101    pub fn new(config: SpotConfig) -> SpotResult<Self> {
102        // Validate parameters
103        if config.level < 0.0 || config.level >= 1.0 {
104            return Err(SpotError::LevelOutOfBounds);
105        }
106        if config.q >= (1.0 - config.level) || config.q <= 0.0 {
107            return Err(SpotError::QOutOfBounds);
108        }
109
110        let up_down = if config.low_tail { -1.0 } else { 1.0 };
111
112        Ok(Self {
113            q: config.q,
114            level: config.level,
115            discard_anomalies: config.discard_anomalies,
116            low: config.low_tail,
117            up_down,
118            anomaly_threshold: f64::NAN,
119            excess_threshold: f64::NAN,
120            nt: 0,
121            n: 0,
122            tail: Tail::new(config.max_excess)?,
123        })
124    }
125
126    /// Fit the model using initial training data
127    pub fn fit(&mut self, data: &[f64]) -> SpotResult<()> {
128        // Reset counters
129        self.nt = 0;
130        self.n = data.len();
131
132        // Compute excess threshold using P2 quantile estimator
133        let et = if self.low {
134            // Take the low quantile (1 - level)
135            p2_quantile(1.0 - self.level, data)
136        } else {
137            p2_quantile(self.level, data)
138        };
139
140        if et.is_nan() {
141            return Err(SpotError::ExcessThresholdIsNaN);
142        }
143
144        self.excess_threshold = et;
145
146        // Fill the tail with excesses
147        for &value in data {
148            // Positive excess
149            let excess = self.up_down * (value - et);
150            if excess > 0.0 {
151                // It's a real excess
152                self.nt += 1;
153                self.tail.push(excess);
154            }
155        }
156
157        // Fit the tail with the pushed data
158        self.tail.fit();
159
160        // Compute first anomaly threshold
161        self.anomaly_threshold = self.quantile(self.q);
162        if self.anomaly_threshold.is_nan() {
163            return Err(SpotError::AnomalyThresholdIsNaN);
164        }
165
166        Ok(())
167    }
168
169    /// Process a single data point and return its classification
170    pub fn step(&mut self, value: f64) -> SpotResult<SpotStatus> {
171        if value.is_nan() {
172            return Err(SpotError::DataIsNaN);
173        }
174
175        if self.discard_anomalies && (self.up_down * (value - self.anomaly_threshold) > 0.0) {
176            return Ok(SpotStatus::Anomaly);
177        }
178
179        // Increment number of data (without the anomalies)
180        self.n += 1;
181
182        let ex = self.up_down * (value - self.excess_threshold);
183        if ex >= 0.0 {
184            // Increment number of excesses
185            self.nt += 1;
186            self.tail.push(ex);
187            self.tail.fit();
188            // Update threshold
189            self.anomaly_threshold = self.quantile(self.q);
190            return Ok(SpotStatus::Excess);
191        }
192
193        Ok(SpotStatus::Normal)
194    }
195
196    /// Get the quantile for a given probability
197    pub fn quantile(&self, q: f64) -> f64 {
198        if self.n == 0 {
199            return f64::NAN;
200        }
201
202        let s = (self.nt as f64) / (self.n as f64);
203        self.excess_threshold + self.up_down * self.tail.quantile(s, q)
204    }
205
206    /// Get the probability for a given value
207    pub fn probability(&self, z: f64) -> f64 {
208        if self.n == 0 {
209            return f64::NAN;
210        }
211
212        let s = (self.nt as f64) / (self.n as f64);
213        self.tail
214            .probability(s, self.up_down * (z - self.excess_threshold))
215    }
216
217    /// Get the current anomaly threshold
218    pub fn anomaly_threshold(&self) -> f64 {
219        self.anomaly_threshold
220    }
221
222    /// Get the current excess threshold
223    pub fn excess_threshold(&self) -> f64 {
224        self.excess_threshold
225    }
226
227    /// Get the current configuration (reconstructed)
228    pub fn config(&self) -> Option<SpotConfig> {
229        Some(SpotConfig {
230            q: self.q,
231            low_tail: self.low,
232            discard_anomalies: self.discard_anomalies,
233            level: self.level,
234            max_excess: self.tail.peaks().container().capacity(),
235        })
236    }
237
238    /// Get the total number of data points seen
239    pub fn n(&self) -> usize {
240        self.n
241    }
242
243    /// Get the total number of excesses
244    pub fn nt(&self) -> usize {
245        self.nt
246    }
247
248    /// Get the current tail parameters
249    pub fn tail_parameters(&self) -> (f64, f64) {
250        (self.tail.gamma(), self.tail.sigma())
251    }
252
253    /// Get the current size of the tail data
254    pub fn tail_size(&self) -> usize {
255        self.tail.size()
256    }
257
258    /// Get the minimum value in the peaks
259    pub fn peaks_min(&self) -> f64 {
260        self.tail.peaks().min()
261    }
262
263    /// Get the maximum value in the peaks
264    pub fn peaks_max(&self) -> f64 {
265        self.tail.peaks().max()
266    }
267
268    /// Get the mean of the peaks
269    pub fn peaks_mean(&self) -> f64 {
270        self.tail.peaks().mean()
271    }
272
273    /// Get the variance of the peaks
274    pub fn peaks_variance(&self) -> f64 {
275        self.tail.peaks().variance()
276    }
277
278    /// Get the peaks data as a vector (for debugging and export)
279    pub fn peaks_data(&self) -> Vec<f64> {
280        self.tail.peaks().container().data()
281    }
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287    use approx::assert_relative_eq;
288
289    #[test]
290    fn test_spot_creation_valid_config() {
291        let config = SpotConfig::default();
292        let spot = SpotDetector::new(config).unwrap();
293
294        assert_relative_eq!(spot.q, 0.0001);
295        assert!(!spot.low);
296        assert!(spot.discard_anomalies);
297        assert_relative_eq!(spot.level, 0.998);
298        assert!(spot.anomaly_threshold().is_nan());
299        assert!(spot.excess_threshold().is_nan());
300        assert_eq!(spot.n(), 0);
301        assert_eq!(spot.nt(), 0);
302    }
303
304    #[test]
305    fn test_spot_invalid_level() {
306        let config = SpotConfig {
307            level: 1.5, // Invalid
308            ..SpotConfig::default()
309        };
310        let result = SpotDetector::new(config);
311        assert!(result.is_err());
312        assert_eq!(result.unwrap_err(), SpotError::LevelOutOfBounds);
313    }
314
315    #[test]
316    fn test_spot_invalid_q() {
317        let config = SpotConfig {
318            q: 0.5, // Too high for level 0.998
319            ..SpotConfig::default()
320        };
321        let result = SpotDetector::new(config);
322        assert!(result.is_err());
323        assert_eq!(result.unwrap_err(), SpotError::QOutOfBounds);
324    }
325
326    #[test]
327    fn test_spot_fit_basic() {
328        let config = SpotConfig::default();
329        let mut spot = SpotDetector::new(config).unwrap();
330
331        // Create simple training data
332        let data: Vec<f64> = (0..1000).map(|i| (i as f64 / 1000.0) * 2.0 - 1.0).collect();
333
334        let result = spot.fit(&data);
335        assert!(result.is_ok());
336
337        // After fit, thresholds should be valid
338        assert!(!spot.anomaly_threshold().is_nan());
339        assert!(!spot.excess_threshold().is_nan());
340        assert!(spot.anomaly_threshold().is_finite());
341        assert!(spot.excess_threshold().is_finite());
342        assert_eq!(spot.n(), 1000);
343        assert!(spot.nt() > 0); // Should have some excesses
344    }
345
346    #[test]
347    fn test_spot_step_normal() {
348        let config = SpotConfig::default();
349        let mut spot = SpotDetector::new(config).unwrap();
350
351        // Fit with simple data
352        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
353        spot.fit(&data).unwrap();
354
355        // Test normal value
356        let result = spot.step(50.0);
357        assert!(result.is_ok());
358        // Result depends on the thresholds, but should be valid
359    }
360
361    #[test]
362    fn test_spot_step_nan() {
363        let config = SpotConfig::default();
364        let mut spot = SpotDetector::new(config).unwrap();
365
366        let result = spot.step(f64::NAN);
367        assert!(result.is_err());
368        assert_eq!(result.unwrap_err(), SpotError::DataIsNaN);
369    }
370
371    #[test]
372    fn test_spot_low_tail() {
373        let config = SpotConfig {
374            low_tail: true,
375            ..SpotConfig::default()
376        };
377        let spot = SpotDetector::new(config).unwrap();
378
379        assert!(spot.low);
380        assert_relative_eq!(spot.up_down, -1.0);
381    }
382
383    #[test]
384    fn test_spot_config_roundtrip() {
385        let original_config = SpotConfig {
386            q: 0.001,
387            low_tail: true,
388            discard_anomalies: false,
389            level: 0.99,
390            max_excess: 100,
391        };
392
393        let spot = SpotDetector::new(original_config.clone()).unwrap();
394        let retrieved_config = spot.config().unwrap();
395
396        assert_relative_eq!(retrieved_config.q, original_config.q);
397        assert_eq!(retrieved_config.low_tail, original_config.low_tail);
398        assert_eq!(
399            retrieved_config.discard_anomalies,
400            original_config.discard_anomalies
401        );
402        assert_relative_eq!(retrieved_config.level, original_config.level);
403        assert_eq!(retrieved_config.max_excess, original_config.max_excess);
404    }
405
406    #[test]
407    fn test_spot_quantile_probability_consistency() {
408        let config = SpotConfig::default();
409        let mut spot = SpotDetector::new(config).unwrap();
410
411        // Fit with some data
412        let data: Vec<f64> = (1..=100).map(|i| i as f64).collect();
413        spot.fit(&data).unwrap();
414
415        // Test quantile function
416        let q = spot.quantile(0.01);
417        assert!(!q.is_nan());
418        assert!(q.is_finite());
419
420        // Test probability function
421        let p = spot.probability(q);
422        assert!(!p.is_nan());
423        assert!(p >= 0.0);
424    }
425
426    #[test]
427    fn test_spot_excess_detection() {
428        let config = SpotConfig {
429            level: 0.9, // Lower level for easier testing
430            ..SpotConfig::default()
431        };
432        let mut spot = SpotDetector::new(config).unwrap();
433
434        // Fit with data range 0-100
435        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
436        spot.fit(&data).unwrap();
437
438        let _initial_nt = spot.nt();
439
440        // Add a value that should be an excess
441        let result = spot.step(95.0);
442        assert!(result.is_ok());
443
444        // Check that we got some classification
445        match result.unwrap() {
446            SpotStatus::Normal | SpotStatus::Excess | SpotStatus::Anomaly => {
447                // All are valid outcomes
448            }
449        }
450    }
451}