fin_stream/norm/mod.rs
1//! Rolling-window coordinate normalization for financial time series.
2//!
3//! ## Purpose
4//!
5//! Machine-learning pipelines require features in a bounded numeric range.
6//! For streaming market data, a global min/max is unavailable; this module
7//! maintains a **rolling window** of the last `W` observations and maps each
8//! new sample into `[0.0, 1.0]` using the running min and max.
9//!
10//! ## Formula
11//!
12//! Given a window of observations `x_1 ... x_W` with minimum `m` and maximum
13//! `M`, the normalized value of a new sample `x` is:
14//!
15//! ```text
16//! x_norm = (x - m) / (M - m) if M != m
17//! x_norm = 0.0 if M == m (degenerate; single-valued window)
18//! ```
19//!
20//! The result is clamped to `[0.0, 1.0]` to handle the case where `x` falls
21//! outside the current window range.
22//!
23//! ## Guarantees
24//!
25//! - Non-panicking: all operations return `Result` or `Option`.
26//! - The window is a fixed-size ring buffer; once full, the oldest value is
27//! evicted on each new observation.
28//! - `MinMaxNormalizer` is `Send` but not `Sync`; wrap in `Mutex` for shared
29//! multi-thread access.
30
31use crate::error::StreamError;
32use std::collections::VecDeque;
33
34/// Rolling min-max normalizer over a sliding window of `f64` observations.
35///
36/// # Example
37///
38/// ```rust
39/// use fin_stream::norm::MinMaxNormalizer;
40///
41/// let mut norm = MinMaxNormalizer::new(4);
42/// norm.update(10.0);
43/// norm.update(20.0);
44/// norm.update(30.0);
45/// norm.update(40.0);
46///
47/// // 40.0 is the current max; 10.0 is the current min
48/// let v = norm.normalize(40.0).unwrap();
49/// assert!((v - 1.0).abs() < 1e-10);
50/// ```
51pub struct MinMaxNormalizer {
52 window_size: usize,
53 window: VecDeque<f64>,
54 cached_min: f64,
55 cached_max: f64,
56 dirty: bool,
57}
58
59impl MinMaxNormalizer {
60 /// Create a new normalizer with the given rolling window size.
61 ///
62 /// `window_size` must be at least 1. Passing 0 is a programming error;
63 /// this function will panic in debug mode and is flagged by the
64 /// `clippy::panic` lint configured in `Cargo.toml`.
65 ///
66 /// # Panics
67 ///
68 /// Panics if `window_size == 0`.
69 pub fn new(window_size: usize) -> Self {
70 // This is an API misuse guard; the window size of 0 makes the
71 // normalizer semantically undefined. The panic is intentional and
72 // documented. Production callers should validate before calling.
73 if window_size == 0 {
74 panic!("MinMaxNormalizer::new: window_size must be > 0");
75 }
76 Self {
77 window_size,
78 window: VecDeque::with_capacity(window_size),
79 cached_min: f64::MAX,
80 cached_max: f64::MIN,
81 dirty: false,
82 }
83 }
84
85 /// Add a new observation to the rolling window.
86 ///
87 /// If the window is full, the oldest value is evicted. After the call,
88 /// the internal min/max cache is marked dirty and will be recomputed lazily
89 /// on the next call to [`normalize`](Self::normalize) or
90 /// [`min_max`](Self::min_max).
91 ///
92 /// # Complexity: O(1) amortized
93 pub fn update(&mut self, value: f64) {
94 if self.window.len() == self.window_size {
95 self.window.pop_front();
96 self.dirty = true;
97 }
98 self.window.push_back(value);
99 // Eager update is cheaper than a full recompute when we don't evict.
100 if !self.dirty {
101 if value < self.cached_min {
102 self.cached_min = value;
103 }
104 if value > self.cached_max {
105 self.cached_max = value;
106 }
107 }
108 }
109
110 /// Recompute min and max from the full window.
111 ///
112 /// Called lazily when `dirty` is set (eviction occurred). O(W).
113 fn recompute(&mut self) {
114 self.cached_min = f64::MAX;
115 self.cached_max = f64::MIN;
116 for &v in &self.window {
117 if v < self.cached_min {
118 self.cached_min = v;
119 }
120 if v > self.cached_max {
121 self.cached_max = v;
122 }
123 }
124 self.dirty = false;
125 }
126
127 /// Return the current `(min, max)` of the window.
128 ///
129 /// Returns `None` if the window is empty.
130 ///
131 /// # Complexity: O(1) when the cache is clean; O(W) after an eviction.
132 pub fn min_max(&mut self) -> Option<(f64, f64)> {
133 if self.window.is_empty() {
134 return None;
135 }
136 if self.dirty {
137 self.recompute();
138 }
139 Some((self.cached_min, self.cached_max))
140 }
141
142 /// Normalize `value` into `[0.0, 1.0]` using the current window.
143 ///
144 /// The value is clamped so that even if `value` falls outside the window
145 /// range the result is always in `[0.0, 1.0]`.
146 ///
147 /// # Errors
148 ///
149 /// Returns `NormalizationError` if the window is empty (no observations
150 /// have been fed yet).
151 ///
152 /// # Complexity: O(1) when cache is clean; O(W) after an eviction.
153 pub fn normalize(&mut self, value: f64) -> Result<f64, StreamError> {
154 let (min, max) = self
155 .min_max()
156 .ok_or_else(|| StreamError::NormalizationError {
157 reason: "window is empty; call update() before normalize()".into(),
158 })?;
159 if (max - min).abs() < f64::EPSILON {
160 // Degenerate: all values in the window are identical.
161 return Ok(0.0);
162 }
163 let normalized = (value - min) / (max - min);
164 Ok(normalized.clamp(0.0, 1.0))
165 }
166
167 /// Reset the normalizer, clearing all observations and the cache.
168 pub fn reset(&mut self) {
169 self.window.clear();
170 self.cached_min = f64::MAX;
171 self.cached_max = f64::MIN;
172 self.dirty = false;
173 }
174
175 /// Number of observations currently in the window.
176 pub fn len(&self) -> usize {
177 self.window.len()
178 }
179
180 /// Returns `true` if no observations have been added since construction or
181 /// the last reset.
182 pub fn is_empty(&self) -> bool {
183 self.window.is_empty()
184 }
185
186 /// The configured window size.
187 pub fn window_size(&self) -> usize {
188 self.window_size
189 }
190}
191
192#[cfg(test)]
193mod tests {
194 use super::*;
195
196 // ── Construction ─────────────────────────────────────────────────────────
197
198 #[test]
199 fn test_new_normalizer_is_empty() {
200 let n = MinMaxNormalizer::new(4);
201 assert!(n.is_empty());
202 assert_eq!(n.len(), 0);
203 }
204
205 // ── Normalization range [0, 1] ────────────────────────────────────────────
206
207 #[test]
208 fn test_normalize_min_is_zero() {
209 let mut n = MinMaxNormalizer::new(4);
210 n.update(10.0);
211 n.update(20.0);
212 n.update(30.0);
213 n.update(40.0);
214 let v = n.normalize(10.0).unwrap();
215 assert!(
216 (v - 0.0).abs() < 1e-10,
217 "min should normalize to 0.0, got {v}"
218 );
219 }
220
221 #[test]
222 fn test_normalize_max_is_one() {
223 let mut n = MinMaxNormalizer::new(4);
224 n.update(10.0);
225 n.update(20.0);
226 n.update(30.0);
227 n.update(40.0);
228 let v = n.normalize(40.0).unwrap();
229 assert!(
230 (v - 1.0).abs() < 1e-10,
231 "max should normalize to 1.0, got {v}"
232 );
233 }
234
235 #[test]
236 fn test_normalize_midpoint_is_half() {
237 let mut n = MinMaxNormalizer::new(4);
238 n.update(0.0);
239 n.update(100.0);
240 let v = n.normalize(50.0).unwrap();
241 assert!((v - 0.5).abs() < 1e-10);
242 }
243
244 #[test]
245 fn test_normalize_result_clamped_below_zero() {
246 let mut n = MinMaxNormalizer::new(4);
247 n.update(50.0);
248 n.update(100.0);
249 // 10.0 is below the window min of 50.0
250 let v = n.normalize(10.0).unwrap();
251 assert!(v >= 0.0);
252 assert_eq!(v, 0.0);
253 }
254
255 #[test]
256 fn test_normalize_result_clamped_above_one() {
257 let mut n = MinMaxNormalizer::new(4);
258 n.update(50.0);
259 n.update(100.0);
260 // 200.0 is above the window max of 100.0
261 let v = n.normalize(200.0).unwrap();
262 assert!(v <= 1.0);
263 assert_eq!(v, 1.0);
264 }
265
266 #[test]
267 fn test_normalize_all_same_values_returns_zero() {
268 let mut n = MinMaxNormalizer::new(4);
269 n.update(5.0);
270 n.update(5.0);
271 n.update(5.0);
272 let v = n.normalize(5.0).unwrap();
273 assert_eq!(v, 0.0);
274 }
275
276 // ── Empty window error ────────────────────────────────────────────────────
277
278 #[test]
279 fn test_normalize_empty_window_returns_error() {
280 let mut n = MinMaxNormalizer::new(4);
281 let err = n.normalize(1.0).unwrap_err();
282 assert!(matches!(err, StreamError::NormalizationError { .. }));
283 }
284
285 #[test]
286 fn test_min_max_empty_returns_none() {
287 let mut n = MinMaxNormalizer::new(4);
288 assert!(n.min_max().is_none());
289 }
290
291 // ── Rolling window eviction ───────────────────────────────────────────────
292
293 /// After the window fills and the minimum is evicted, the new min must
294 /// reflect the remaining values.
295 #[test]
296 fn test_rolling_window_evicts_oldest() {
297 let mut n = MinMaxNormalizer::new(3);
298 n.update(1.0); // will be evicted
299 n.update(5.0);
300 n.update(10.0);
301 n.update(20.0); // evicts 1.0
302 let (min, max) = n.min_max().unwrap();
303 assert_eq!(min, 5.0);
304 assert_eq!(max, 20.0);
305 }
306
307 #[test]
308 fn test_rolling_window_len_does_not_exceed_capacity() {
309 let mut n = MinMaxNormalizer::new(3);
310 for i in 0..10 {
311 n.update(i as f64);
312 }
313 assert_eq!(n.len(), 3);
314 }
315
316 // ── Reset behavior ────────────────────────────────────────────────────────
317
318 #[test]
319 fn test_reset_clears_window() {
320 let mut n = MinMaxNormalizer::new(4);
321 n.update(10.0);
322 n.update(20.0);
323 n.reset();
324 assert!(n.is_empty());
325 assert!(n.min_max().is_none());
326 }
327
328 #[test]
329 fn test_normalize_works_after_reset() {
330 let mut n = MinMaxNormalizer::new(4);
331 n.update(10.0);
332 n.reset();
333 n.update(0.0);
334 n.update(100.0);
335 let v = n.normalize(100.0).unwrap();
336 assert!((v - 1.0).abs() < 1e-10);
337 }
338
339 // ── Streaming update ──────────────────────────────────────────────────────
340
341 #[test]
342 fn test_streaming_updates_monotone_sequence() {
343 let mut n = MinMaxNormalizer::new(5);
344 let prices = [100.0, 101.0, 102.0, 103.0, 104.0, 105.0];
345 for &p in &prices {
346 n.update(p);
347 }
348 // Window now holds [101, 102, 103, 104, 105]; min=101, max=105
349 let v_min = n.normalize(101.0).unwrap();
350 let v_max = n.normalize(105.0).unwrap();
351 assert!((v_min - 0.0).abs() < 1e-10);
352 assert!((v_max - 1.0).abs() < 1e-10);
353 }
354
355 #[test]
356 fn test_normalization_monotonicity_in_window() {
357 let mut n = MinMaxNormalizer::new(10);
358 for i in 0..10 {
359 n.update(i as f64 * 10.0);
360 }
361 // Values 0, 10, 20, ..., 90 in window; min=0, max=90
362 let v0 = n.normalize(0.0).unwrap();
363 let v50 = n.normalize(50.0).unwrap();
364 let v90 = n.normalize(90.0).unwrap();
365 assert!(v0 < v50, "normalized values should be monotone");
366 assert!(v50 < v90, "normalized values should be monotone");
367 }
368}