Skip to main content

opendeviationbar_core/intrabar/
normalize.rs

1//! Normalization functions for intra-bar ITH metrics.
2//!
3//! Issue #59: Intra-bar microstructure features for large open deviation bars.
4//!
5//! ORIGIN: trading-fitness/packages/metrics-rust/src/ith_normalize.rs
6//! COPIED: 2026-02-02
7//! MODIFICATIONS: Extracted only the functions needed for intra-bar features
8//!
9//! All outputs are bounded to [0, 1] for LSTM/BiLSTM consumption.
10//!
11//! Issue #96 Task #197: Uses precomputed lookup tables for sigmoid and tanh
12//! to replace expensive transcendental function calls (~100-200 CPU cycles each).
13
14use super::normalization_lut::{cv_sigmoid_lut, sigmoid_lut, tanh_lut};
15
16/// Logistic sigmoid function: 1 / (1 + exp(-(x - center) * scale))
17///
18/// This is the workhorse of normalization. It:
19/// - Maps any real number to (0, 1)
20/// - Is monotonically increasing
21/// - Has continuous derivatives (important for gradient-based learning)
22/// - Has a natural probabilistic interpretation
23///
24/// Parameters:
25/// - center: The input value that maps to exactly 0.5
26/// - scale: Controls steepness (higher = sharper transition)
27#[inline]
28pub fn logistic_sigmoid(x: f64, center: f64, scale: f64) -> f64 {
29    1.0 / (1.0 + (-(x - center) * scale).exp())
30}
31
32/// Normalize epoch count to [0, 1] using rank-based transform.
33///
34/// Uses a precomputed lookup table for sigmoid applied to epoch density (epochs/lookback).
35/// The sigmoid naturally maps any density to (0, 1) without hardcoded thresholds.
36///
37/// The function is: sigmoid_lut(density) ≈ sigmoid(10 * (density - 0.5))
38/// - density=0 → ~0.007 (near zero, distinguishable)
39/// - density=0.5 → 0.5 (exactly half)
40/// - density=1 → ~0.993 (near one)
41///
42/// Issue #96 Task #197: Uses precomputed LUT instead of exp() (100-200 CPU cycles → <1 CPU cycle).
43///
44/// # Arguments
45/// * `epochs` - Number of ITH epochs detected
46/// * `lookback` - Window size (trade count for intra-bar)
47///
48/// # Returns
49/// Normalized value in (0, 1)
50#[inline]
51pub fn normalize_epochs(epochs: usize, lookback: usize) -> f64 {
52    if lookback == 0 {
53        return 0.5; // Degenerate case
54    }
55
56    // Epoch density: fraction of observations that are epochs
57    let density = epochs as f64 / lookback as f64;
58
59    // Precomputed sigmoid LUT in 0.01 steps [0, 1] density range
60    // Replaces expensive exp() call with O(1) table lookup
61    sigmoid_lut(density)
62}
63
64/// Normalize excess gain/loss to [0, 1] using precomputed tanh lookup table.
65///
66/// Tanh is mathematically natural for this purpose:
67/// - Maps [0, ∞) → [0, 1)
68/// - Zero input → zero output
69/// - Monotonically increasing
70/// - Smooth gradients for backpropagation
71///
72/// The scaling factor (5.0) is derived from the observation that
73/// typical ITH excess gains range from 0 to 20%, and we want
74/// this range to occupy most of the [0, 0.8] output space.
75///
76/// Issue #96 Task #197: Uses precomputed LUT in 0.1 steps [0, 5] range
77/// instead of exp() (50-100 CPU cycles → <1 CPU cycle).
78///
79/// # Arguments
80/// * `value` - Raw excess gain or loss (absolute value used)
81///
82/// # Returns
83/// Normalized value in [0, 1)
84#[inline]
85pub fn normalize_excess(value: f64) -> f64 {
86    // tanh_lut(x * 5) provides (from precomputed table):
87    // - 1% (0.05 scaled) → ~0.05
88    // - 5% (0.25 scaled) → ~0.24
89    // - 10% (0.50 scaled) → ~0.46
90    // - 20% (1.00 scaled) → ~0.76
91    // - 100% (5.00 scaled) → ~0.9999 (saturates at 1.0)
92    tanh_lut(value.abs() * 5.0)
93}
94
95/// Normalize coefficient of variation (CV) to [0, 1] using logistic sigmoid.
96///
97/// CV = std / mean of epoch intervals. This ratio is naturally unbounded
98/// and heavy-tailed in practice.
99///
100/// The sigmoid is centered at CV=0.5 (moderate regularity) because:
101/// - CV=0 means perfectly regular intervals
102/// - CV=0.5 is typical for many stochastic processes
103/// - CV=1 means std equals mean (high irregularity)
104/// - CV>1 is very irregular (common in financial data)
105///
106/// Special handling: NaN (no epochs) maps to ~0.12, making it
107/// distinguishable from real CV values.
108///
109/// # Arguments
110/// * `cv` - Coefficient of variation of epoch intervals (std/mean)
111///
112/// # Returns
113/// Normalized value in (0, 1)
114#[inline]
115pub fn normalize_cv(cv: f64) -> f64 {
116    // NaN handling: treat as CV=0 (would be perfectly regular if epochs existed)
117    let cv_effective = if cv.is_nan() { 0.0 } else { cv };
118
119    // Task #10: Precomputed LUT replaces exp() call (~50-100 CPU cycles → <1 cycle)
120    cv_sigmoid_lut(cv_effective)
121}
122
123/// Normalize max drawdown to [0, 1].
124///
125/// Drawdown is inherently bounded [0, 1] by definition:
126/// DD = (peak - current) / peak
127///
128/// This function ensures the bound is respected even with numerical noise.
129///
130/// # Arguments
131/// * `drawdown` - Max drawdown as fraction (0.0 to 1.0)
132///
133/// # Returns
134/// Clamped value in [0, 1]
135#[inline]
136pub fn normalize_drawdown(drawdown: f64) -> f64 {
137    drawdown.clamp(0.0, 1.0)
138}
139
140/// Normalize max runup to [0, 1].
141///
142/// Runup is inherently bounded [0, 1] by definition:
143/// RU = (current - trough) / current
144///
145/// This function ensures the bound is respected even with numerical noise.
146///
147/// # Arguments
148/// * `runup` - Max runup as fraction (0.0 to 1.0)
149///
150/// # Returns
151/// Clamped value in [0, 1]
152#[inline]
153pub fn normalize_runup(runup: f64) -> f64 {
154    runup.clamp(0.0, 1.0)
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn test_normalize_epochs_bounded() {
163        // Property: output is always in [0, 1] for any valid input
164        for epochs in 0..=100 {
165            for lookback in 1..=200 {
166                let result = normalize_epochs(epochs, lookback);
167                assert!(
168                    result >= 0.0 && result <= 1.0,
169                    "normalize_epochs({}, {}) = {} not in [0, 1]",
170                    epochs,
171                    lookback,
172                    result
173                );
174            }
175        }
176    }
177
178    #[test]
179    fn test_normalize_epochs_monotonic() {
180        // Property: more epochs → higher normalized value
181        let lookback = 50;
182        let mut prev = normalize_epochs(0, lookback);
183        for epochs in 1..=lookback {
184            let curr = normalize_epochs(epochs, lookback);
185            assert!(
186                curr >= prev,
187                "normalize_epochs not monotonic: {} gave {}, {} gave {}",
188                epochs - 1,
189                prev,
190                epochs,
191                curr
192            );
193            prev = curr;
194        }
195    }
196
197    #[test]
198    fn test_normalize_excess_bounded() {
199        for &value in &[0.0, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 100.0] {
200            let result = normalize_excess(value);
201            assert!(
202                result >= 0.0 && result <= 1.0,
203                "normalize_excess({}) = {} not in [0, 1]",
204                value,
205                result
206            );
207        }
208    }
209
210    #[test]
211    fn test_normalize_cv_bounded() {
212        for &cv in &[0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0] {
213            let result = normalize_cv(cv);
214            assert!(
215                result >= 0.0 && result <= 1.0,
216                "normalize_cv({}) = {} not in [0, 1]",
217                cv,
218                result
219            );
220        }
221    }
222
223    #[test]
224    fn test_normalize_cv_nan_handling() {
225        let nan_result = normalize_cv(f64::NAN);
226        assert!(nan_result.is_finite(), "NaN should map to finite value");
227        assert!(nan_result < 0.3, "NaN should map to low value");
228    }
229
230    #[test]
231    fn test_normalize_drawdown_clamped() {
232        assert_eq!(normalize_drawdown(-0.1), 0.0);
233        assert_eq!(normalize_drawdown(0.5), 0.5);
234        assert_eq!(normalize_drawdown(1.5), 1.0);
235    }
236
237    #[test]
238    fn test_normalize_runup_clamped() {
239        assert_eq!(normalize_runup(-0.1), 0.0);
240        assert_eq!(normalize_runup(0.5), 0.5);
241        assert_eq!(normalize_runup(1.5), 1.0);
242    }
243
244    // Issue #96: Edge case coverage for normalization functions
245
246    #[test]
247    fn test_normalize_epochs_zero_lookback() {
248        // Degenerate case: lookback=0 should not panic
249        assert_eq!(normalize_epochs(0, 0), 0.5);
250        assert_eq!(normalize_epochs(5, 0), 0.5);
251    }
252
253    #[test]
254    fn test_normalize_epochs_zero_epochs() {
255        let result = normalize_epochs(0, 100);
256        assert!(result < 0.1, "Zero epochs should map to low value, got {}", result);
257        assert!(result > 0.0, "Zero epochs should be distinguishable from 0");
258    }
259
260    #[test]
261    fn test_normalize_epochs_full_density() {
262        let result = normalize_epochs(100, 100);
263        // sigmoid_lut(1.0) ≈ 0.70 (LUT-specific scaling)
264        assert!(result > 0.5, "All-epochs should map above midpoint, got {}", result);
265        assert!(result <= 1.0, "Must be bounded by 1.0");
266    }
267
268    #[test]
269    fn test_normalize_excess_zero() {
270        let result = normalize_excess(0.0);
271        assert!(result.abs() < 0.01, "Zero excess should map near 0, got {}", result);
272    }
273
274    #[test]
275    fn test_normalize_excess_negative_uses_abs() {
276        let pos = normalize_excess(0.1);
277        let neg = normalize_excess(-0.1);
278        assert_eq!(pos, neg, "normalize_excess should use absolute value");
279    }
280
281    #[test]
282    fn test_normalize_excess_large_saturates() {
283        let result = normalize_excess(100.0);
284        assert!(result > 0.999, "Large excess should saturate near 1.0, got {}", result);
285    }
286
287    #[test]
288    fn test_logistic_sigmoid_center() {
289        let result = logistic_sigmoid(0.5, 0.5, 4.0);
290        assert!((result - 0.5).abs() < 0.001, "At center, sigmoid should be 0.5");
291    }
292
293    #[test]
294    fn test_logistic_sigmoid_extremes() {
295        let low = logistic_sigmoid(-10.0, 0.0, 1.0);
296        let high = logistic_sigmoid(10.0, 0.0, 1.0);
297        assert!(low < 0.001, "Far below center should be near 0");
298        assert!(high > 0.999, "Far above center should be near 1");
299    }
300}