libspot_rs/
spot.rs

1//! Main SPOT detector implementation
2//!
3//! This module implements the main SPOT (Streaming Peaks Over Threshold) detector
4//! that provides real-time anomaly detection for time series data.
5
6use crate::config::SpotConfig;
7
8use crate::error::{SpotError, SpotResult};
9use crate::p2::p2_quantile;
10use crate::status::SpotStatus;
11use crate::tail::Tail;
12
13/// Main SPOT detector for streaming anomaly detection
14#[derive(Debug)]
15pub struct SpotDetector {
16    /// Probability of an anomaly
17    q: f64,
18    /// Location of the tail (high quantile)
19    level: f64,
20    /// Flag anomalies (true = flag, false = don't flag)
21    discard_anomalies: bool,
22    /// Upper/Lower tail choice (true = lower tail, false = upper tail)
23    low: bool,
24    /// Internal constant (+/- 1.0)
25    up_down: f64,
26    /// Normal/abnormal threshold
27    anomaly_threshold: f64,
28    /// Tail threshold
29    excess_threshold: f64,
30    /// Total number of excesses
31    nt: usize,
32    /// Total number of seen data
33    n: usize,
34    /// GPD Tail
35    tail: Tail,
36}
37
38impl SpotDetector {
39    /// Create a new SPOT detector with the given configuration
40    pub fn new(config: SpotConfig) -> SpotResult<Self> {
41        // Validate parameters
42        if config.level < 0.0 || config.level >= 1.0 {
43            return Err(SpotError::LevelOutOfBounds);
44        }
45        if config.q >= (1.0 - config.level) || config.q <= 0.0 {
46            return Err(SpotError::QOutOfBounds);
47        }
48
49        let up_down = if config.low_tail { -1.0 } else { 1.0 };
50
51        Ok(Self {
52            q: config.q,
53            level: config.level,
54            discard_anomalies: config.discard_anomalies,
55            low: config.low_tail,
56            up_down,
57            anomaly_threshold: f64::NAN,
58            excess_threshold: f64::NAN,
59            nt: 0,
60            n: 0,
61            tail: Tail::new(config.max_excess)?,
62        })
63    }
64
65    /// Fit the model using initial training data
66    pub fn fit(&mut self, data: &[f64]) -> SpotResult<()> {
67        // Reset counters
68        self.nt = 0;
69        self.n = data.len();
70
71        // Compute excess threshold using P2 quantile estimator
72        let et = if self.low {
73            // Take the low quantile (1 - level)
74            p2_quantile(1.0 - self.level, data)
75        } else {
76            p2_quantile(self.level, data)
77        };
78
79        if et.is_nan() {
80            return Err(SpotError::ExcessThresholdIsNaN);
81        }
82
83        self.excess_threshold = et;
84
85        // Fill the tail with excesses
86        for &value in data {
87            // Positive excess
88            let excess = self.up_down * (value - et);
89            if excess > 0.0 {
90                // It's a real excess
91                self.nt += 1;
92                self.tail.push(excess);
93            }
94        }
95
96        // Fit the tail with the pushed data
97        self.tail.fit();
98
99        // Compute first anomaly threshold
100        self.anomaly_threshold = self.quantile(self.q);
101        if self.anomaly_threshold.is_nan() {
102            return Err(SpotError::AnomalyThresholdIsNaN);
103        }
104
105        Ok(())
106    }
107
108    /// Process a single data point and return its classification
109    pub fn step(&mut self, value: f64) -> SpotResult<SpotStatus> {
110        if value.is_nan() {
111            return Err(SpotError::DataIsNaN);
112        }
113
114        if self.discard_anomalies && (self.up_down * (value - self.anomaly_threshold) > 0.0) {
115            return Ok(SpotStatus::Anomaly);
116        }
117
118        // Increment number of data (without the anomalies)
119        self.n += 1;
120
121        let ex = self.up_down * (value - self.excess_threshold);
122        if ex >= 0.0 {
123            // Increment number of excesses
124            self.nt += 1;
125            self.tail.push(ex);
126            self.tail.fit();
127            // Update threshold
128            self.anomaly_threshold = self.quantile(self.q);
129            return Ok(SpotStatus::Excess);
130        }
131
132        Ok(SpotStatus::Normal)
133    }
134
135    /// Get the quantile for a given probability
136    pub fn quantile(&self, q: f64) -> f64 {
137        if self.n == 0 {
138            return f64::NAN;
139        }
140
141        let s = (self.nt as f64) / (self.n as f64);
142        self.excess_threshold + self.up_down * self.tail.quantile(s, q)
143    }
144
145    /// Get the probability for a given value
146    pub fn probability(&self, z: f64) -> f64 {
147        if self.n == 0 {
148            return f64::NAN;
149        }
150
151        let s = (self.nt as f64) / (self.n as f64);
152        self.tail
153            .probability(s, self.up_down * (z - self.excess_threshold))
154    }
155
156    /// Get the current anomaly threshold
157    pub fn anomaly_threshold(&self) -> f64 {
158        self.anomaly_threshold
159    }
160
161    /// Get the current excess threshold
162    pub fn excess_threshold(&self) -> f64 {
163        self.excess_threshold
164    }
165
166    /// Get the current configuration (reconstructed)
167    pub fn config(&self) -> Option<SpotConfig> {
168        Some(SpotConfig {
169            q: self.q,
170            low_tail: self.low,
171            discard_anomalies: self.discard_anomalies,
172            level: self.level,
173            max_excess: self.tail.peaks().container().capacity(),
174        })
175    }
176
177    /// Get the total number of data points seen
178    pub fn n(&self) -> usize {
179        self.n
180    }
181
182    /// Get the total number of excesses
183    pub fn nt(&self) -> usize {
184        self.nt
185    }
186
187    /// Get the current tail parameters
188    pub fn tail_parameters(&self) -> (f64, f64) {
189        (self.tail.gamma(), self.tail.sigma())
190    }
191
192    /// Get the current size of the tail data
193    pub fn tail_size(&self) -> usize {
194        self.tail.size()
195    }
196
197    /// Get the minimum value in the peaks
198    pub fn peaks_min(&self) -> f64 {
199        self.tail.peaks().min()
200    }
201
202    /// Get the maximum value in the peaks
203    pub fn peaks_max(&self) -> f64 {
204        self.tail.peaks().max()
205    }
206
207    /// Get the mean of the peaks
208    pub fn peaks_mean(&self) -> f64 {
209        self.tail.peaks().mean()
210    }
211
212    /// Get the variance of the peaks
213    pub fn peaks_variance(&self) -> f64 {
214        self.tail.peaks().variance()
215    }
216
217    /// Get the peaks data as a vector (for debugging and export)
218    pub fn peaks_data(&self) -> Vec<f64> {
219        self.tail.peaks().container().data()
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use approx::assert_relative_eq;
227
228    #[test]
229    fn test_spot_creation_valid_config() {
230        let config = SpotConfig::default();
231        let spot = SpotDetector::new(config).unwrap();
232
233        assert_relative_eq!(spot.q, 0.0001);
234        assert!(!spot.low);
235        assert!(spot.discard_anomalies);
236        assert_relative_eq!(spot.level, 0.998);
237        assert!(spot.anomaly_threshold().is_nan());
238        assert!(spot.excess_threshold().is_nan());
239        assert_eq!(spot.n(), 0);
240        assert_eq!(spot.nt(), 0);
241    }
242
243    #[test]
244    fn test_spot_invalid_level() {
245        let config = SpotConfig {
246            level: 1.5, // Invalid
247            ..SpotConfig::default()
248        };
249        let result = SpotDetector::new(config);
250        assert!(result.is_err());
251        assert_eq!(result.unwrap_err(), SpotError::LevelOutOfBounds);
252    }
253
254    #[test]
255    fn test_spot_invalid_q() {
256        let config = SpotConfig {
257            q: 0.5, // Too high for level 0.998
258            ..SpotConfig::default()
259        };
260        let result = SpotDetector::new(config);
261        assert!(result.is_err());
262        assert_eq!(result.unwrap_err(), SpotError::QOutOfBounds);
263    }
264
265    #[test]
266    fn test_spot_fit_basic() {
267        let config = SpotConfig::default();
268        let mut spot = SpotDetector::new(config).unwrap();
269
270        // Create simple training data
271        let data: Vec<f64> = (0..1000).map(|i| (i as f64 / 1000.0) * 2.0 - 1.0).collect();
272
273        let result = spot.fit(&data);
274        assert!(result.is_ok());
275
276        // After fit, thresholds should be valid
277        assert!(!spot.anomaly_threshold().is_nan());
278        assert!(!spot.excess_threshold().is_nan());
279        assert!(spot.anomaly_threshold().is_finite());
280        assert!(spot.excess_threshold().is_finite());
281        assert_eq!(spot.n(), 1000);
282        assert!(spot.nt() > 0); // Should have some excesses
283    }
284
285    #[test]
286    fn test_spot_step_normal() {
287        let config = SpotConfig::default();
288        let mut spot = SpotDetector::new(config).unwrap();
289
290        // Fit with simple data
291        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
292        spot.fit(&data).unwrap();
293
294        // Test normal value
295        let result = spot.step(50.0);
296        assert!(result.is_ok());
297        // Result depends on the thresholds, but should be valid
298    }
299
300    #[test]
301    fn test_spot_step_nan() {
302        let config = SpotConfig::default();
303        let mut spot = SpotDetector::new(config).unwrap();
304
305        let result = spot.step(f64::NAN);
306        assert!(result.is_err());
307        assert_eq!(result.unwrap_err(), SpotError::DataIsNaN);
308    }
309
310    #[test]
311    fn test_spot_low_tail() {
312        let config = SpotConfig {
313            low_tail: true,
314            ..SpotConfig::default()
315        };
316        let spot = SpotDetector::new(config).unwrap();
317
318        assert!(spot.low);
319        assert_relative_eq!(spot.up_down, -1.0);
320    }
321
322    #[test]
323    fn test_spot_config_roundtrip() {
324        let original_config = SpotConfig {
325            q: 0.001,
326            low_tail: true,
327            discard_anomalies: false,
328            level: 0.99,
329            max_excess: 100,
330        };
331
332        let spot = SpotDetector::new(original_config.clone()).unwrap();
333        let retrieved_config = spot.config().unwrap();
334
335        assert_relative_eq!(retrieved_config.q, original_config.q);
336        assert_eq!(retrieved_config.low_tail, original_config.low_tail);
337        assert_eq!(
338            retrieved_config.discard_anomalies,
339            original_config.discard_anomalies
340        );
341        assert_relative_eq!(retrieved_config.level, original_config.level);
342        assert_eq!(retrieved_config.max_excess, original_config.max_excess);
343    }
344
345    #[test]
346    fn test_spot_quantile_probability_consistency() {
347        let config = SpotConfig::default();
348        let mut spot = SpotDetector::new(config).unwrap();
349
350        // Fit with some data
351        let data: Vec<f64> = (1..=100).map(|i| i as f64).collect();
352        spot.fit(&data).unwrap();
353
354        // Test quantile function
355        let q = spot.quantile(0.01);
356        assert!(!q.is_nan());
357        assert!(q.is_finite());
358
359        // Test probability function
360        let p = spot.probability(q);
361        assert!(!p.is_nan());
362        assert!(p >= 0.0);
363    }
364
365    #[test]
366    fn test_spot_excess_detection() {
367        let config = SpotConfig {
368            level: 0.9, // Lower level for easier testing
369            ..SpotConfig::default()
370        };
371        let mut spot = SpotDetector::new(config).unwrap();
372
373        // Fit with data range 0-100
374        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
375        spot.fit(&data).unwrap();
376
377        let _initial_nt = spot.nt();
378
379        // Add a value that should be an excess
380        let result = spot.step(95.0);
381        assert!(result.is_ok());
382
383        // Check that we got some classification
384        match result.unwrap() {
385            SpotStatus::Normal | SpotStatus::Excess | SpotStatus::Anomaly => {
386                // All are valid outcomes
387            }
388        }
389    }
390}