1#![doc = include_str!("../README.md")]
2
3use std::collections::BTreeSet;
4
5mod dbscan;
6mod error;
7mod mad;
8mod sensitivity;
9#[cfg(test)]
10mod testing;
11
12pub use dbscan::{Data as DbscanData, DbscanDetector};
13pub use error::Error;
14pub use mad::{MADDetector, PreprocessedData as MADData};
15use sensitivity::Sensitivity;
16
17#[derive(Debug, Clone)]
20#[cfg_attr(feature = "serde", derive(serde::Serialize))]
21#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))]
22pub struct Band {
23 pub min: Vec<f64>,
25 pub max: Vec<f64>,
27}
28
29impl Band {
30 fn new(n_timestamps: usize) -> Self {
31 Self {
32 min: vec![f64::NAN; n_timestamps],
33 max: vec![f64::NAN; n_timestamps],
34 }
35 }
36}
37
38#[derive(Debug, Clone)]
40#[cfg_attr(feature = "serde", derive(serde::Serialize))]
41#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))]
42pub struct OutlierOutput {
43 pub outlying_series: BTreeSet<usize>,
47
48 pub series_results: Vec<Series>,
50
51 pub cluster_band: Option<Band>,
58}
59
60impl OutlierOutput {
61 pub fn new(series_results: Vec<Series>, cluster_band: Option<Band>) -> Self {
63 Self {
64 outlying_series: series_results
65 .iter()
66 .enumerate()
67 .filter_map(|(i, s)| s.is_outlier.then_some(i))
68 .collect(),
69 series_results,
70 cluster_band,
71 }
72 }
73
74 pub fn is_outlier(&self, i: usize) -> bool {
76 self.outlying_series.contains(&i)
77 }
78}
79
80#[derive(Debug, Clone)]
82#[cfg_attr(feature = "serde", derive(serde::Serialize))]
83#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))]
84pub struct Series {
85 pub is_outlier: bool,
87 pub outlier_intervals: OutlierIntervals,
89 pub scores: Vec<f64>,
96}
97
98impl Series {
99 pub fn empty() -> Self {
101 Self {
102 is_outlier: false,
103 scores: Vec::new(),
104 outlier_intervals: OutlierIntervals::empty(),
105 }
106 }
107
108 pub fn with_capacity(n: usize) -> Self {
111 Self {
112 is_outlier: false,
113 scores: Vec::with_capacity(n),
114 outlier_intervals: OutlierIntervals::empty(),
115 }
116 }
117
118 pub fn preallocated(n_series: usize, n_timestamps: usize) -> Vec<Self> {
121 std::iter::repeat_with(|| {
122 let mut s = Series::with_capacity(n_timestamps);
123 s.scores.resize(n_timestamps, 0.0);
124 s
125 })
126 .take(n_series)
127 .collect()
128 }
129}
130
131#[derive(Debug, Clone)]
133#[cfg_attr(feature = "serde", derive(serde::Serialize))]
134#[cfg_attr(feature = "serde", serde(rename_all = "camelCase", transparent))]
135pub struct OutlierIntervals {
136 pub intervals: Vec<OutlierInterval>,
138
139 #[cfg_attr(feature = "serde", serde(skip))]
141 expecting_end: bool,
142}
143
144impl OutlierIntervals {
145 fn empty() -> Self {
146 Self {
147 intervals: Vec::new(),
148 expecting_end: false,
149 }
150 }
151
152 fn add_start(&mut self, ts: usize) {
153 debug_assert!(
154 !self.expecting_end,
155 "Expected end of outlier interval, got start"
156 );
157
158 self.intervals.push(OutlierInterval {
159 start: ts,
160 end: None,
161 });
162 self.expecting_end = true;
163 }
164
165 fn add_end(&mut self, ts: usize) {
166 debug_assert!(
167 self.expecting_end,
168 "Expected start of outlier interval, got end"
169 );
170
171 match self.intervals.last_mut() {
172 Some(x @ OutlierInterval { end: None, .. }) => {
173 x.end = Some(ts);
174 }
175 _ => unreachable!("tried to add end to an open-ended interval"),
176 };
177 self.expecting_end = false;
178 }
179}
180
181#[derive(Debug, Clone, PartialEq, Eq)]
186#[cfg_attr(feature = "serde", derive(serde::Serialize))]
187#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))]
188pub struct OutlierInterval {
189 pub start: usize,
191 pub end: Option<usize>,
195}
196
197pub trait OutlierDetector {
199 type PreprocessedData;
210
211 fn preprocess(&self, y: &[&[f64]]) -> Result<Self::PreprocessedData, Error>;
227
228 fn detect(&self, y: &Self::PreprocessedData) -> Result<OutlierOutput, Error>;
235}
236
237#[cfg(test)]
238mod test {
239 use super::*;
240
241 #[allow(dead_code)]
243 struct DummyDetector;
244
245 impl OutlierDetector for DummyDetector {
246 type PreprocessedData = Vec<Vec<f64>>;
247
248 fn preprocess(&self, y: &[&[f64]]) -> Result<Self::PreprocessedData, Error> {
249 Ok(y.iter().map(|x| x.to_vec()).collect())
250 }
251
252 fn detect(&self, y: &Self::PreprocessedData) -> Result<OutlierOutput, Error> {
253 let serieses = y
254 .iter()
255 .map(|series| {
256 let mut intervals = OutlierIntervals::empty();
257 intervals.add_start(1);
258 Series {
259 is_outlier: series.iter().any(|&x| x > 10.0),
260 scores: series.to_vec(),
261 outlier_intervals: intervals,
262 }
263 })
264 .collect();
265 let band = Band {
266 min: vec![-1.0; y[0].len()],
267 max: vec![1.0; y[0].len()],
268 };
269 Ok(OutlierOutput::new(serieses, Some(band)))
270 }
271 }
272
273 #[cfg(feature = "serde")]
274 #[test]
275 fn serialize() {
276 let mut outlier_intervals = OutlierIntervals::empty();
277 outlier_intervals.add_start(1);
278 let series = Series {
279 is_outlier: true,
280 scores: vec![1.0, 2.0, 3.0],
281 outlier_intervals,
282 };
283 let output = OutlierOutput {
284 outlying_series: BTreeSet::from([0, 1]),
285 series_results: vec![series],
286 cluster_band: None,
287 };
288 let serialized = serde_json::to_string(&output).unwrap();
289 assert_eq!(
290 serialized,
291 r#"{"outlyingSeries":[0,1],"seriesResults":[{"isOutlier":true,"outlierIntervals":[{"start":1,"end":null}],"scores":[1.0,2.0,3.0]}],"clusterBand":null}"#
292 );
293 }
294}