opendeviationbar_core/intrabar/normalize.rs
1//! Normalization functions for intra-bar ITH metrics.
2//!
3//! Issue #59: Intra-bar microstructure features for large open deviation bars.
4//!
5//! ORIGIN: trading-fitness/packages/metrics-rust/src/ith_normalize.rs
6//! COPIED: 2026-02-02
7//! MODIFICATIONS: Extracted only the functions needed for intra-bar features
8//!
9//! All outputs are bounded to [0, 1] for LSTM/BiLSTM consumption.
10//!
11//! Issue #96 Task #197: Uses precomputed lookup tables for sigmoid and tanh
12//! to replace expensive transcendental function calls (~100-200 CPU cycles each).
13
14use super::normalization_lut::{cv_sigmoid_lut, sigmoid_lut, tanh_lut};
15
16/// Logistic sigmoid function: 1 / (1 + exp(-(x - center) * scale))
17///
18/// This is the workhorse of normalization. It:
19/// - Maps any real number to (0, 1)
20/// - Is monotonically increasing
21/// - Has continuous derivatives (important for gradient-based learning)
22/// - Has a natural probabilistic interpretation
23///
24/// Parameters:
25/// - center: The input value that maps to exactly 0.5
26/// - scale: Controls steepness (higher = sharper transition)
27#[inline]
28pub fn logistic_sigmoid(x: f64, center: f64, scale: f64) -> f64 {
29 1.0 / (1.0 + (-(x - center) * scale).exp())
30}
31
32/// Normalize epoch count to [0, 1] using rank-based transform.
33///
34/// Uses a precomputed lookup table for sigmoid applied to epoch density (epochs/lookback).
35/// The sigmoid naturally maps any density to (0, 1) without hardcoded thresholds.
36///
37/// The function is: sigmoid_lut(density) ≈ sigmoid(10 * (density - 0.5))
38/// - density=0 → ~0.007 (near zero, distinguishable)
39/// - density=0.5 → 0.5 (exactly half)
40/// - density=1 → ~0.993 (near one)
41///
42/// Issue #96 Task #197: Uses precomputed LUT instead of exp() (100-200 CPU cycles → <1 CPU cycle).
43///
44/// # Arguments
45/// * `epochs` - Number of ITH epochs detected
46/// * `lookback` - Window size (trade count for intra-bar)
47///
48/// # Returns
49/// Normalized value in (0, 1)
50#[inline]
51pub fn normalize_epochs(epochs: usize, lookback: usize) -> f64 {
52 if lookback == 0 {
53 return 0.5; // Degenerate case
54 }
55
56 // Epoch density: fraction of observations that are epochs
57 let density = epochs as f64 / lookback as f64;
58
59 // Precomputed sigmoid LUT in 0.01 steps [0, 1] density range
60 // Replaces expensive exp() call with O(1) table lookup
61 sigmoid_lut(density)
62}
63
64/// Normalize excess gain/loss to [0, 1] using precomputed tanh lookup table.
65///
66/// Tanh is mathematically natural for this purpose:
67/// - Maps [0, ∞) → [0, 1)
68/// - Zero input → zero output
69/// - Monotonically increasing
70/// - Smooth gradients for backpropagation
71///
72/// The scaling factor (5.0) is derived from the observation that
73/// typical ITH excess gains range from 0 to 20%, and we want
74/// this range to occupy most of the [0, 0.8] output space.
75///
76/// Issue #96 Task #197: Uses precomputed LUT in 0.1 steps [0, 5] range
77/// instead of exp() (50-100 CPU cycles → <1 CPU cycle).
78///
79/// # Arguments
80/// * `value` - Raw excess gain or loss (absolute value used)
81///
82/// # Returns
83/// Normalized value in [0, 1)
84#[inline]
85pub fn normalize_excess(value: f64) -> f64 {
86 // tanh_lut(x * 5) provides (from precomputed table):
87 // - 1% (0.05 scaled) → ~0.05
88 // - 5% (0.25 scaled) → ~0.24
89 // - 10% (0.50 scaled) → ~0.46
90 // - 20% (1.00 scaled) → ~0.76
91 // - 100% (5.00 scaled) → ~0.9999 (saturates at 1.0)
92 tanh_lut(value.abs() * 5.0)
93}
94
95/// Normalize coefficient of variation (CV) to [0, 1] using logistic sigmoid.
96///
97/// CV = std / mean of epoch intervals. This ratio is naturally unbounded
98/// and heavy-tailed in practice.
99///
100/// The sigmoid is centered at CV=0.5 (moderate regularity) because:
101/// - CV=0 means perfectly regular intervals
102/// - CV=0.5 is typical for many stochastic processes
103/// - CV=1 means std equals mean (high irregularity)
104/// - CV>1 is very irregular (common in financial data)
105///
106/// Special handling: NaN (no epochs) maps to ~0.12, making it
107/// distinguishable from real CV values.
108///
109/// # Arguments
110/// * `cv` - Coefficient of variation of epoch intervals (std/mean)
111///
112/// # Returns
113/// Normalized value in (0, 1)
114#[inline]
115pub fn normalize_cv(cv: f64) -> f64 {
116 // NaN handling: treat as CV=0 (would be perfectly regular if epochs existed)
117 let cv_effective = if cv.is_nan() { 0.0 } else { cv };
118
119 // Task #10: Precomputed LUT replaces exp() call (~50-100 CPU cycles → <1 cycle)
120 cv_sigmoid_lut(cv_effective)
121}
122
123/// Normalize max drawdown to [0, 1].
124///
125/// Drawdown is inherently bounded [0, 1] by definition:
126/// DD = (peak - current) / peak
127///
128/// This function ensures the bound is respected even with numerical noise.
129///
130/// # Arguments
131/// * `drawdown` - Max drawdown as fraction (0.0 to 1.0)
132///
133/// # Returns
134/// Clamped value in [0, 1]
135#[inline]
136pub fn normalize_drawdown(drawdown: f64) -> f64 {
137 drawdown.clamp(0.0, 1.0)
138}
139
140/// Normalize max runup to [0, 1].
141///
142/// Runup is inherently bounded [0, 1] by definition:
143/// RU = (current - trough) / current
144///
145/// This function ensures the bound is respected even with numerical noise.
146///
147/// # Arguments
148/// * `runup` - Max runup as fraction (0.0 to 1.0)
149///
150/// # Returns
151/// Clamped value in [0, 1]
152#[inline]
153pub fn normalize_runup(runup: f64) -> f64 {
154 runup.clamp(0.0, 1.0)
155}
156
157#[cfg(test)]
158mod tests {
159 use super::*;
160
161 #[test]
162 fn test_normalize_epochs_bounded() {
163 // Property: output is always in [0, 1] for any valid input
164 for epochs in 0..=100 {
165 for lookback in 1..=200 {
166 let result = normalize_epochs(epochs, lookback);
167 assert!(
168 result >= 0.0 && result <= 1.0,
169 "normalize_epochs({}, {}) = {} not in [0, 1]",
170 epochs,
171 lookback,
172 result
173 );
174 }
175 }
176 }
177
178 #[test]
179 fn test_normalize_epochs_monotonic() {
180 // Property: more epochs → higher normalized value
181 let lookback = 50;
182 let mut prev = normalize_epochs(0, lookback);
183 for epochs in 1..=lookback {
184 let curr = normalize_epochs(epochs, lookback);
185 assert!(
186 curr >= prev,
187 "normalize_epochs not monotonic: {} gave {}, {} gave {}",
188 epochs - 1,
189 prev,
190 epochs,
191 curr
192 );
193 prev = curr;
194 }
195 }
196
197 #[test]
198 fn test_normalize_excess_bounded() {
199 for &value in &[0.0, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 100.0] {
200 let result = normalize_excess(value);
201 assert!(
202 result >= 0.0 && result <= 1.0,
203 "normalize_excess({}) = {} not in [0, 1]",
204 value,
205 result
206 );
207 }
208 }
209
210 #[test]
211 fn test_normalize_cv_bounded() {
212 for &cv in &[0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0] {
213 let result = normalize_cv(cv);
214 assert!(
215 result >= 0.0 && result <= 1.0,
216 "normalize_cv({}) = {} not in [0, 1]",
217 cv,
218 result
219 );
220 }
221 }
222
223 #[test]
224 fn test_normalize_cv_nan_handling() {
225 let nan_result = normalize_cv(f64::NAN);
226 assert!(nan_result.is_finite(), "NaN should map to finite value");
227 assert!(nan_result < 0.3, "NaN should map to low value");
228 }
229
230 #[test]
231 fn test_normalize_drawdown_clamped() {
232 assert_eq!(normalize_drawdown(-0.1), 0.0);
233 assert_eq!(normalize_drawdown(0.5), 0.5);
234 assert_eq!(normalize_drawdown(1.5), 1.0);
235 }
236
237 #[test]
238 fn test_normalize_runup_clamped() {
239 assert_eq!(normalize_runup(-0.1), 0.0);
240 assert_eq!(normalize_runup(0.5), 0.5);
241 assert_eq!(normalize_runup(1.5), 1.0);
242 }
243
244 // Issue #96: Edge case coverage for normalization functions
245
246 #[test]
247 fn test_normalize_epochs_zero_lookback() {
248 // Degenerate case: lookback=0 should not panic
249 assert_eq!(normalize_epochs(0, 0), 0.5);
250 assert_eq!(normalize_epochs(5, 0), 0.5);
251 }
252
253 #[test]
254 fn test_normalize_epochs_zero_epochs() {
255 let result = normalize_epochs(0, 100);
256 assert!(result < 0.1, "Zero epochs should map to low value, got {}", result);
257 assert!(result > 0.0, "Zero epochs should be distinguishable from 0");
258 }
259
260 #[test]
261 fn test_normalize_epochs_full_density() {
262 let result = normalize_epochs(100, 100);
263 // sigmoid_lut(1.0) ≈ 0.70 (LUT-specific scaling)
264 assert!(result > 0.5, "All-epochs should map above midpoint, got {}", result);
265 assert!(result <= 1.0, "Must be bounded by 1.0");
266 }
267
268 #[test]
269 fn test_normalize_excess_zero() {
270 let result = normalize_excess(0.0);
271 assert!(result.abs() < 0.01, "Zero excess should map near 0, got {}", result);
272 }
273
274 #[test]
275 fn test_normalize_excess_negative_uses_abs() {
276 let pos = normalize_excess(0.1);
277 let neg = normalize_excess(-0.1);
278 assert_eq!(pos, neg, "normalize_excess should use absolute value");
279 }
280
281 #[test]
282 fn test_normalize_excess_large_saturates() {
283 let result = normalize_excess(100.0);
284 assert!(result > 0.999, "Large excess should saturate near 1.0, got {}", result);
285 }
286
287 #[test]
288 fn test_logistic_sigmoid_center() {
289 let result = logistic_sigmoid(0.5, 0.5, 4.0);
290 assert!((result - 0.5).abs() < 0.001, "At center, sigmoid should be 0.5");
291 }
292
293 #[test]
294 fn test_logistic_sigmoid_extremes() {
295 let low = logistic_sigmoid(-10.0, 0.0, 1.0);
296 let high = logistic_sigmoid(10.0, 0.0, 1.0);
297 assert!(low < 0.001, "Far below center should be near 0");
298 assert!(high > 0.999, "Far above center should be near 1");
299 }
300}