Skip to main content

fin_stream/norm/
mod.rs

1//! Rolling-window coordinate normalization for financial time series.
2//!
3//! ## Purpose
4//!
5//! Machine-learning pipelines require features in a bounded numeric range.
6//! For streaming market data, a global min/max is unavailable; this module
7//! maintains a **rolling window** of the last `W` observations and maps each
8//! new sample into `[0.0, 1.0]` using the running min and max.
9//!
10//! ## Formula
11//!
12//! Given a window of observations `x_1 ... x_W` with minimum `m` and maximum
13//! `M`, the normalized value of a new sample `x` is:
14//!
15//! ```text
16//!     x_norm = (x - m) / (M - m)    if M != m
17//!     x_norm = 0.0                  if M == m  (degenerate; single-valued window)
18//! ```
19//!
20//! The result is clamped to `[0.0, 1.0]` to handle the case where `x` falls
21//! outside the current window range.
22//!
23//! ## Guarantees
24//!
25//! - Non-panicking: all operations return `Result` or `Option`.
26//! - The window is a fixed-size ring buffer; once full, the oldest value is
27//!   evicted on each new observation.
28//! - `MinMaxNormalizer` is `Send` but not `Sync`; wrap in `Mutex` for shared
29//!   multi-thread access.
30
31use crate::error::StreamError;
32use std::collections::VecDeque;
33
34/// Rolling min-max normalizer over a sliding window of `f64` observations.
35///
36/// # Example
37///
38/// ```rust
39/// use fin_stream::norm::MinMaxNormalizer;
40///
41/// let mut norm = MinMaxNormalizer::new(4);
42/// norm.update(10.0);
43/// norm.update(20.0);
44/// norm.update(30.0);
45/// norm.update(40.0);
46///
47/// // 40.0 is the current max; 10.0 is the current min
48/// let v = norm.normalize(40.0).unwrap();
49/// assert!((v - 1.0).abs() < 1e-10);
50/// ```
51pub struct MinMaxNormalizer {
52    window_size: usize,
53    window: VecDeque<f64>,
54    cached_min: f64,
55    cached_max: f64,
56    dirty: bool,
57}
58
59impl MinMaxNormalizer {
60    /// Create a new normalizer with the given rolling window size.
61    ///
62    /// `window_size` must be at least 1. Passing 0 is a programming error;
63    /// this function will panic in debug mode and is flagged by the
64    /// `clippy::panic` lint configured in `Cargo.toml`.
65    ///
66    /// # Panics
67    ///
68    /// Panics if `window_size == 0`.
69    pub fn new(window_size: usize) -> Self {
70        // This is an API misuse guard; the window size of 0 makes the
71        // normalizer semantically undefined. The panic is intentional and
72        // documented. Production callers should validate before calling.
73        if window_size == 0 {
74            panic!("MinMaxNormalizer::new: window_size must be > 0");
75        }
76        Self {
77            window_size,
78            window: VecDeque::with_capacity(window_size),
79            cached_min: f64::MAX,
80            cached_max: f64::MIN,
81            dirty: false,
82        }
83    }
84
85    /// Add a new observation to the rolling window.
86    ///
87    /// If the window is full, the oldest value is evicted. After the call,
88    /// the internal min/max cache is marked dirty and will be recomputed lazily
89    /// on the next call to [`normalize`](Self::normalize) or
90    /// [`min_max`](Self::min_max).
91    ///
92    /// # Complexity: O(1) amortized
93    pub fn update(&mut self, value: f64) {
94        if self.window.len() == self.window_size {
95            self.window.pop_front();
96            self.dirty = true;
97        }
98        self.window.push_back(value);
99        // Eager update is cheaper than a full recompute when we don't evict.
100        if !self.dirty {
101            if value < self.cached_min {
102                self.cached_min = value;
103            }
104            if value > self.cached_max {
105                self.cached_max = value;
106            }
107        }
108    }
109
110    /// Recompute min and max from the full window.
111    ///
112    /// Called lazily when `dirty` is set (eviction occurred). O(W).
113    fn recompute(&mut self) {
114        self.cached_min = f64::MAX;
115        self.cached_max = f64::MIN;
116        for &v in &self.window {
117            if v < self.cached_min {
118                self.cached_min = v;
119            }
120            if v > self.cached_max {
121                self.cached_max = v;
122            }
123        }
124        self.dirty = false;
125    }
126
127    /// Return the current `(min, max)` of the window.
128    ///
129    /// Returns `None` if the window is empty.
130    ///
131    /// # Complexity: O(1) when the cache is clean; O(W) after an eviction.
132    pub fn min_max(&mut self) -> Option<(f64, f64)> {
133        if self.window.is_empty() {
134            return None;
135        }
136        if self.dirty {
137            self.recompute();
138        }
139        Some((self.cached_min, self.cached_max))
140    }
141
142    /// Normalize `value` into `[0.0, 1.0]` using the current window.
143    ///
144    /// The value is clamped so that even if `value` falls outside the window
145    /// range the result is always in `[0.0, 1.0]`.
146    ///
147    /// # Errors
148    ///
149    /// Returns `NormalizationError` if the window is empty (no observations
150    /// have been fed yet).
151    ///
152    /// # Complexity: O(1) when cache is clean; O(W) after an eviction.
153    pub fn normalize(&mut self, value: f64) -> Result<f64, StreamError> {
154        let (min, max) = self
155            .min_max()
156            .ok_or_else(|| StreamError::NormalizationError {
157                reason: "window is empty; call update() before normalize()".into(),
158            })?;
159        if (max - min).abs() < f64::EPSILON {
160            // Degenerate: all values in the window are identical.
161            return Ok(0.0);
162        }
163        let normalized = (value - min) / (max - min);
164        Ok(normalized.clamp(0.0, 1.0))
165    }
166
167    /// Reset the normalizer, clearing all observations and the cache.
168    pub fn reset(&mut self) {
169        self.window.clear();
170        self.cached_min = f64::MAX;
171        self.cached_max = f64::MIN;
172        self.dirty = false;
173    }
174
175    /// Number of observations currently in the window.
176    pub fn len(&self) -> usize {
177        self.window.len()
178    }
179
180    /// Returns `true` if no observations have been added since construction or
181    /// the last reset.
182    pub fn is_empty(&self) -> bool {
183        self.window.is_empty()
184    }
185
186    /// The configured window size.
187    pub fn window_size(&self) -> usize {
188        self.window_size
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    // ── Construction ─────────────────────────────────────────────────────────
197
198    #[test]
199    fn test_new_normalizer_is_empty() {
200        let n = MinMaxNormalizer::new(4);
201        assert!(n.is_empty());
202        assert_eq!(n.len(), 0);
203    }
204
205    // ── Normalization range [0, 1] ────────────────────────────────────────────
206
207    #[test]
208    fn test_normalize_min_is_zero() {
209        let mut n = MinMaxNormalizer::new(4);
210        n.update(10.0);
211        n.update(20.0);
212        n.update(30.0);
213        n.update(40.0);
214        let v = n.normalize(10.0).unwrap();
215        assert!(
216            (v - 0.0).abs() < 1e-10,
217            "min should normalize to 0.0, got {v}"
218        );
219    }
220
221    #[test]
222    fn test_normalize_max_is_one() {
223        let mut n = MinMaxNormalizer::new(4);
224        n.update(10.0);
225        n.update(20.0);
226        n.update(30.0);
227        n.update(40.0);
228        let v = n.normalize(40.0).unwrap();
229        assert!(
230            (v - 1.0).abs() < 1e-10,
231            "max should normalize to 1.0, got {v}"
232        );
233    }
234
235    #[test]
236    fn test_normalize_midpoint_is_half() {
237        let mut n = MinMaxNormalizer::new(4);
238        n.update(0.0);
239        n.update(100.0);
240        let v = n.normalize(50.0).unwrap();
241        assert!((v - 0.5).abs() < 1e-10);
242    }
243
244    #[test]
245    fn test_normalize_result_clamped_below_zero() {
246        let mut n = MinMaxNormalizer::new(4);
247        n.update(50.0);
248        n.update(100.0);
249        // 10.0 is below the window min of 50.0
250        let v = n.normalize(10.0).unwrap();
251        assert!(v >= 0.0);
252        assert_eq!(v, 0.0);
253    }
254
255    #[test]
256    fn test_normalize_result_clamped_above_one() {
257        let mut n = MinMaxNormalizer::new(4);
258        n.update(50.0);
259        n.update(100.0);
260        // 200.0 is above the window max of 100.0
261        let v = n.normalize(200.0).unwrap();
262        assert!(v <= 1.0);
263        assert_eq!(v, 1.0);
264    }
265
266    #[test]
267    fn test_normalize_all_same_values_returns_zero() {
268        let mut n = MinMaxNormalizer::new(4);
269        n.update(5.0);
270        n.update(5.0);
271        n.update(5.0);
272        let v = n.normalize(5.0).unwrap();
273        assert_eq!(v, 0.0);
274    }
275
276    // ── Empty window error ────────────────────────────────────────────────────
277
278    #[test]
279    fn test_normalize_empty_window_returns_error() {
280        let mut n = MinMaxNormalizer::new(4);
281        let err = n.normalize(1.0).unwrap_err();
282        assert!(matches!(err, StreamError::NormalizationError { .. }));
283    }
284
285    #[test]
286    fn test_min_max_empty_returns_none() {
287        let mut n = MinMaxNormalizer::new(4);
288        assert!(n.min_max().is_none());
289    }
290
291    // ── Rolling window eviction ───────────────────────────────────────────────
292
293    /// After the window fills and the minimum is evicted, the new min must
294    /// reflect the remaining values.
295    #[test]
296    fn test_rolling_window_evicts_oldest() {
297        let mut n = MinMaxNormalizer::new(3);
298        n.update(1.0); // will be evicted
299        n.update(5.0);
300        n.update(10.0);
301        n.update(20.0); // evicts 1.0
302        let (min, max) = n.min_max().unwrap();
303        assert_eq!(min, 5.0);
304        assert_eq!(max, 20.0);
305    }
306
307    #[test]
308    fn test_rolling_window_len_does_not_exceed_capacity() {
309        let mut n = MinMaxNormalizer::new(3);
310        for i in 0..10 {
311            n.update(i as f64);
312        }
313        assert_eq!(n.len(), 3);
314    }
315
316    // ── Reset behavior ────────────────────────────────────────────────────────
317
318    #[test]
319    fn test_reset_clears_window() {
320        let mut n = MinMaxNormalizer::new(4);
321        n.update(10.0);
322        n.update(20.0);
323        n.reset();
324        assert!(n.is_empty());
325        assert!(n.min_max().is_none());
326    }
327
328    #[test]
329    fn test_normalize_works_after_reset() {
330        let mut n = MinMaxNormalizer::new(4);
331        n.update(10.0);
332        n.reset();
333        n.update(0.0);
334        n.update(100.0);
335        let v = n.normalize(100.0).unwrap();
336        assert!((v - 1.0).abs() < 1e-10);
337    }
338
339    // ── Streaming update ──────────────────────────────────────────────────────
340
341    #[test]
342    fn test_streaming_updates_monotone_sequence() {
343        let mut n = MinMaxNormalizer::new(5);
344        let prices = [100.0, 101.0, 102.0, 103.0, 104.0, 105.0];
345        for &p in &prices {
346            n.update(p);
347        }
348        // Window now holds [101, 102, 103, 104, 105]; min=101, max=105
349        let v_min = n.normalize(101.0).unwrap();
350        let v_max = n.normalize(105.0).unwrap();
351        assert!((v_min - 0.0).abs() < 1e-10);
352        assert!((v_max - 1.0).abs() < 1e-10);
353    }
354
355    #[test]
356    fn test_normalization_monotonicity_in_window() {
357        let mut n = MinMaxNormalizer::new(10);
358        for i in 0..10 {
359            n.update(i as f64 * 10.0);
360        }
361        // Values 0, 10, 20, ..., 90 in window; min=0, max=90
362        let v0 = n.normalize(0.0).unwrap();
363        let v50 = n.normalize(50.0).unwrap();
364        let v90 = n.normalize(90.0).unwrap();
365        assert!(v0 < v50, "normalized values should be monotone");
366        assert!(v50 < v90, "normalized values should be monotone");
367    }
368}