Skip to main content

wickra_core/indicators/
sample_entropy.rs

1//! Sample Entropy (`SampEn`) — the regularity / predictability of a window.
2
3use std::collections::VecDeque;
4
5use crate::error::{Error, Result};
6use crate::traits::Indicator;
7
8/// Population standard deviation of a slice (used for the matching tolerance).
9fn population_stddev(window: &[f64]) -> f64 {
10    let n = window.len() as f64;
11    let mean = window.iter().sum::<f64>() / n;
12    let var = window.iter().map(|&v| (v - mean) * (v - mean)).sum::<f64>() / n;
13    var.max(0.0).sqrt()
14}
15
16/// Whether two length-`len` templates starting at `i` and `j` match within the
17/// Chebyshev tolerance `tol`.
18fn templates_match(window: &[f64], i: usize, j: usize, len: usize, tol: f64) -> bool {
19    for k in 0..len {
20        if (window[i + k] - window[j + k]).abs() > tol {
21            return false;
22        }
23    }
24    true
25}
26
27/// Sample Entropy (`SampEn`) — Richman & Moorman's measure of how *regular* (i.e.
28/// predictable) a series is: the negative log conditional probability that two
29/// sub-sequences similar for `m` points stay similar at the next point.
30///
31/// ```text
32/// tol = r_factor · stddev(window)
33/// B   = # template pairs of length m   within tol   (i < j)
34/// A   = # template pairs of length m+1 within tol   (i < j)
35/// `SampEn` = − ln(A / B)
36/// ```
37///
38/// Low `SampEn` means the window is **regular** — patterns of length `m` reliably
39/// extend to length `m + 1`, the fingerprint of a trending or cyclic market. High
40/// `SampEn` means the series is **irregular** — knowing the last `m` points tells
41/// you little about the next, the fingerprint of noise. Unlike the older
42/// approximate entropy (`ApEn`), `SampEn` excludes self-matches, so it is far less
43/// biased on short windows.
44///
45/// The tolerance is `r_factor` times the window's standard deviation, so the
46/// measure self-scales. A perfectly flat window (`stddev == 0`) is maximally
47/// regular and returns `0`. If no length-`m` pairs match, the entropy is
48/// undefined and `0` is returned; if length-`m` pairs match but none extend, the
49/// estimator falls back to treating the unseen count as one (`−ln(1/B) = ln(B)`).
50/// The first value lands after `period` inputs; each `update` is O(`period²`).
51///
52/// # Example
53///
54/// ```
55/// use wickra_core::{Indicator, SampleEntropy};
56///
57/// let mut indicator = SampleEntropy::new(50, 2, 0.2).unwrap();
58/// let mut last = None;
59/// for i in 0..80 {
60///     last = indicator.update((f64::from(i) * 0.3).sin() * 5.0);
61/// }
62/// assert!(last.is_some());
63/// ```
64#[derive(Debug, Clone)]
65pub struct SampleEntropy {
66    period: usize,
67    emb_dim: usize,
68    r_factor: f64,
69    window: VecDeque<f64>,
70    last: Option<f64>,
71}
72
73impl SampleEntropy {
74    /// Construct a Sample Entropy over `period` values with embedding dimension
75    /// `m` and tolerance factor `r_factor`.
76    ///
77    /// # Errors
78    ///
79    /// Returns [`Error::PeriodZero`] if `period` or `m` is `0`,
80    /// [`Error::InvalidPeriod`] if `period < m + 2` (no length-`m+1` template
81    /// pairs otherwise), and [`Error::InvalidParameter`] if `r_factor` is not
82    /// finite and positive.
83    pub fn new(period: usize, m: usize, r_factor: f64) -> Result<Self> {
84        if period == 0 || m == 0 {
85            return Err(Error::PeriodZero);
86        }
87        if period < m + 2 {
88            return Err(Error::InvalidPeriod {
89                message: "sample entropy needs period >= m + 2",
90            });
91        }
92        if !r_factor.is_finite() || r_factor <= 0.0 {
93            return Err(Error::InvalidParameter {
94                message: "sample entropy r_factor must be finite and positive",
95            });
96        }
97        Ok(Self {
98            period,
99            emb_dim: m,
100            r_factor,
101            window: VecDeque::with_capacity(period),
102            last: None,
103        })
104    }
105
106    /// Configured `(period, m, r_factor)`.
107    pub const fn params(&self) -> (usize, usize, f64) {
108        (self.period, self.emb_dim, self.r_factor)
109    }
110
111    /// Current value if available.
112    pub const fn value(&self) -> Option<f64> {
113        self.last
114    }
115
116    fn compute(&self) -> f64 {
117        let window: Vec<f64> = self.window.iter().copied().collect();
118        let std = population_stddev(&window);
119        if std == 0.0 {
120            return 0.0;
121        }
122        let tol = self.r_factor * std;
123        let m = self.emb_dim;
124        // Restrict both template lengths to the same index range so A and B share
125        // their candidate pairs: there are `period − m` length-(m+1) templates.
126        let count = self.period - m;
127        let mut matches_m = 0u64;
128        let mut matches_m1 = 0u64;
129        for i in 0..count {
130            for j in (i + 1)..count {
131                if templates_match(&window, i, j, m, tol) {
132                    matches_m += 1;
133                    if templates_match(&window, i, j, m + 1, tol) {
134                        matches_m1 += 1;
135                    }
136                }
137            }
138        }
139        if matches_m == 0 {
140            return 0.0;
141        }
142        if matches_m1 == 0 {
143            // No length-(m+1) matches: fall back to one unseen count.
144            return (matches_m as f64).ln();
145        }
146        -((matches_m1 as f64) / (matches_m as f64)).ln()
147    }
148}
149
150impl Indicator for SampleEntropy {
151    type Input = f64;
152    type Output = f64;
153
154    fn update(&mut self, input: f64) -> Option<f64> {
155        if !input.is_finite() {
156            return self.last;
157        }
158        if self.window.len() == self.period {
159            self.window.pop_front();
160        }
161        self.window.push_back(input);
162        if self.window.len() < self.period {
163            return None;
164        }
165        let out = self.compute();
166        self.last = Some(out);
167        Some(out)
168    }
169
170    fn reset(&mut self) {
171        self.window.clear();
172        self.last = None;
173    }
174
175    fn warmup_period(&self) -> usize {
176        self.period
177    }
178
179    fn is_ready(&self) -> bool {
180        self.last.is_some()
181    }
182
183    fn name(&self) -> &'static str {
184        "SampleEntropy"
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191    use crate::traits::BatchExt;
192    use approx::assert_relative_eq;
193
194    #[test]
195    fn rejects_invalid_params() {
196        assert!(matches!(
197            SampleEntropy::new(0, 2, 0.2),
198            Err(Error::PeriodZero)
199        ));
200        assert!(matches!(
201            SampleEntropy::new(50, 0, 0.2),
202            Err(Error::PeriodZero)
203        ));
204        assert!(matches!(
205            SampleEntropy::new(3, 2, 0.2),
206            Err(Error::InvalidPeriod { .. })
207        ));
208        assert!(matches!(
209            SampleEntropy::new(50, 2, 0.0),
210            Err(Error::InvalidParameter { .. })
211        ));
212    }
213
214    #[test]
215    fn accessors_and_metadata() {
216        let s = SampleEntropy::new(50, 2, 0.2).unwrap();
217        assert_eq!(s.params(), (50, 2, 0.2));
218        assert_eq!(s.warmup_period(), 50);
219        assert_eq!(s.name(), "SampleEntropy");
220        assert!(!s.is_ready());
221        assert_eq!(s.value(), None);
222    }
223
224    #[test]
225    fn first_emission_at_warmup_period() {
226        let mut s = SampleEntropy::new(10, 2, 0.2).unwrap();
227        let xs: Vec<f64> = (0..14).map(|i| (f64::from(i) * 0.5).sin()).collect();
228        let out = s.batch(&xs);
229        for v in out.iter().take(9) {
230            assert!(v.is_none());
231        }
232        assert!(out[9].is_some());
233    }
234
235    #[test]
236    fn constant_window_is_zero() {
237        let mut s = SampleEntropy::new(20, 2, 0.2).unwrap();
238        let last = s.batch(&[5.0; 30]).into_iter().flatten().last().unwrap();
239        assert_relative_eq!(last, 0.0, epsilon = 1e-12);
240    }
241
242    #[test]
243    fn output_is_non_negative() {
244        let mut s = SampleEntropy::new(40, 2, 0.2).unwrap();
245        for v in s
246            .batch(
247                &(0..200)
248                    .map(|i| (f64::from(i) * 0.3).sin() * 5.0)
249                    .collect::<Vec<_>>(),
250            )
251            .into_iter()
252            .flatten()
253        {
254            assert!(v >= 0.0, "sample entropy must be non-negative, got {v}");
255        }
256    }
257
258    #[test]
259    fn regular_below_irregular() {
260        // A smooth sine is far more regular (lower `SampEn`) than a chaotic
261        // logistic-map series. (An *alternating* series would be periodic, hence
262        // regular too -- chaos is what makes the window genuinely unpredictable.)
263        let smooth: Vec<f64> = (0..60).map(|i| (f64::from(i) * 0.2).sin() * 5.0).collect();
264        let mut x = 0.37_f64;
265        let chaotic: Vec<f64> = (0..60)
266            .map(|_| {
267                x = 3.99 * x * (1.0 - x);
268                x * 5.0
269            })
270            .collect();
271        let s_smooth = SampleEntropy::new(50, 2, 0.2)
272            .unwrap()
273            .batch(&smooth)
274            .into_iter()
275            .flatten()
276            .last()
277            .unwrap();
278        let s_chaotic = SampleEntropy::new(50, 2, 0.2)
279            .unwrap()
280            .batch(&chaotic)
281            .into_iter()
282            .flatten()
283            .last()
284            .unwrap();
285        assert!(
286            s_smooth <= s_chaotic,
287            "smooth ({s_smooth}) should be <= chaotic ({s_chaotic})"
288        );
289    }
290
291    #[test]
292    fn ignores_non_finite() {
293        let mut s = SampleEntropy::new(10, 2, 0.2).unwrap();
294        let xs: Vec<f64> = (0..10).map(|i| (f64::from(i) * 0.5).sin()).collect();
295        let ready = s.batch(&xs).into_iter().flatten().last().unwrap();
296        assert_eq!(s.update(f64::NAN), Some(ready));
297    }
298
299    #[test]
300    fn reset_clears_state() {
301        let mut s = SampleEntropy::new(10, 2, 0.2).unwrap();
302        let xs: Vec<f64> = (0..10).map(|i| (f64::from(i) * 0.5).sin()).collect();
303        s.batch(&xs);
304        assert!(s.is_ready());
305        s.reset();
306        assert!(!s.is_ready());
307        assert_eq!(s.value(), None);
308        assert_eq!(s.update(1.0), None);
309    }
310
311    #[test]
312    fn batch_equals_streaming() {
313        let xs: Vec<f64> = (0..120)
314            .map(|i| (f64::from(i) * 0.25).sin() * 9.0)
315            .collect();
316        let batch = SampleEntropy::new(40, 2, 0.2).unwrap().batch(&xs);
317        let mut b = SampleEntropy::new(40, 2, 0.2).unwrap();
318        let streamed: Vec<_> = xs.iter().map(|x| b.update(*x)).collect();
319        assert_eq!(batch, streamed);
320    }
321
322    #[test]
323    fn falls_back_when_no_m_plus_one_matches() {
324        // `[1, 1, 1, 5]` with m = 2: the length-2 template `(1, 1)` repeats
325        // (matches_m > 0) but no length-3 template repeats (matches_m1 == 0),
326        // so SampEn takes the `ln(matches_m)` fallback branch.
327        let xs = [1.0, 1.0, 1.0, 5.0];
328        let v = SampleEntropy::new(4, 2, 0.2)
329            .unwrap()
330            .batch(&xs)
331            .into_iter()
332            .flatten()
333            .last()
334            .unwrap();
335        assert!(v.is_finite() && v >= 0.0, "got {v}");
336    }
337}