opendeviationbar_core/interbar_math/mod.rs
1// FILE-SIZE-OK: ~650 lines — LookbackCache, EntropyCache, SIMD module, re-exports
2//! Inter-bar math helper functions
3//! Extracted from interbar.rs (Phase 2e refactoring)
4//!
5//! GitHub Issue: https://github.com/terrylica/opendeviationbar-py/issues/59
6//! Issue #96 Task #4: SIMD burstiness acceleration (feature-gated)
7//! Issue #96 Task #14: Garman-Klass libm optimization (1.2-1.5x speedup)
8//! Issue #96 Task #93: Permutation entropy batch processing optimization
9//! Issue #96 Task #130: Permutation entropy SIMD vectorization with wide crate
10
11pub mod accumulation;
12pub mod tier2;
13pub mod tier3;
14
15// Re-export all public items for backward compatibility
16pub use accumulation::*;
17pub use tier2::*;
18pub use tier3::*;
19
20use crate::interbar_types::TradeSnapshot;
21use smallvec::SmallVec;
22
23/// Memoized lookback trade data (Issue #96 Task #99: Float conversion memoization)
24///
25/// Pre-computes all float conversions from fixed-point trades in a single pass.
26/// This cache is reused across all 16 inter-bar feature functions, eliminating
27/// 400-2000 redundant `.to_f64()` calls per bar when inter-bar features enabled.
28///
29/// # Performance Impact
30/// - Single-pass extraction: O(n) fixed cost (not per-feature)
31/// - Eliminated redundant conversions: 2-5% speedup when Tier 1/2 features enabled
32/// - Memory: ~5KB for typical lookback (100-500 trades)
33///
34/// # Example
35/// ```ignore
36/// let cache = extract_lookback_cache(&lookback);
37/// let kyle = compute_kyle_lambda_cached(&cache);
38/// let burstiness = compute_burstiness_scalar(&lookback); // Still uses TradeSnapshot
39/// ```
40#[derive(Debug, Clone, Default)]
41pub struct LookbackCache {
42 /// Pre-computed f64 prices (avoids 400-2000 `.price.to_f64()` calls)
43 pub prices: SmallVec<[f64; 256]>,
44 /// Pre-computed f64 volumes (avoids 400-2000 `.volume.to_f64()` calls)
45 pub volumes: SmallVec<[f64; 256]>,
46 /// OHLC bounds
47 pub open: f64,
48 pub high: f64,
49 pub low: f64,
50 pub close: f64,
51 /// First volume value
52 pub first_volume: f64,
53 /// Total volume (pre-summed for Kyle Lambda, moments, etc.)
54 pub total_volume: f64,
55 /// Issue #96 Task #45: All prices are finite (no NaN/Inf)
56 /// Pre-computed during extraction to eliminate O(n) scan in Tier 3
57 pub all_prices_finite: bool,
58 /// Issue #96 Task #49: All volumes are finite (no NaN/Inf)
59 /// Pre-computed during extraction for volume moments validation
60 pub all_volumes_finite: bool,
61}
62
63/// Cold path: empty lookback cache (Issue #96 Task #4: cold path optimization)
64/// Moved out of hot path to improve instruction cache locality
65#[cold]
66#[inline(never)]
67fn empty_lookback_cache() -> LookbackCache {
68 LookbackCache {
69 prices: SmallVec::new(),
70 volumes: SmallVec::new(),
71 open: 0.0,
72 high: 0.0,
73 low: 0.0,
74 close: 0.0,
75 first_volume: 0.0,
76 total_volume: 0.0,
77 all_prices_finite: true,
78 all_volumes_finite: true,
79 }
80}
81
82/// Extract memoized lookback data in single pass (Issue #96 Task #99)
83///
84/// Replaces multiple independent passes through lookback trades with a single
85/// traversal that extracts prices, volumes, and OHLC bounds together.
86///
87/// # Complexity
88/// - O(n) single pass through lookback trades
89/// - Constant-time access to pre-computed values for all feature functions
90///
91/// # Returns
92/// Cache with pre-computed prices, volumes, OHLC, and aggregates
93#[inline]
94pub fn extract_lookback_cache(lookback: &[&TradeSnapshot]) -> LookbackCache {
95 if lookback.is_empty() {
96 return empty_lookback_cache();
97 }
98
99 // Issue #96 Task #210: Memoize first/last element access in cache extraction
100 let first_trade = &lookback[0];
101 let last_trade = &lookback[lookback.len() - 1];
102
103 let mut cache = LookbackCache {
104 prices: SmallVec::with_capacity(lookback.len()),
105 volumes: SmallVec::with_capacity(lookback.len()),
106 open: first_trade.price.to_f64(),
107 high: f64::MIN,
108 low: f64::MAX,
109 close: last_trade.price.to_f64(),
110 first_volume: first_trade.volume.to_f64(),
111 total_volume: 0.0,
112 all_prices_finite: true,
113 all_volumes_finite: true,
114 };
115
116 // Single pass: extract prices, volumes, compute OHLC, total volume, and finite checks
117 // Issue #96 Task #45/#49: Track finite flags during extraction (eliminates O(n) scans)
118 for trade in lookback {
119 let p = trade.price.to_f64();
120 let v = trade.volume.to_f64();
121 cache.prices.push(p);
122 cache.volumes.push(v);
123 cache.total_volume += v;
124 // Branchless finite checks: &= avoids branch misprediction
125 cache.all_prices_finite &= p.is_finite();
126 cache.all_volumes_finite &= v.is_finite();
127 // Issue #96 Task #61: Branchless min/max avoids branch misprediction
128 cache.high = cache.high.max(p);
129 cache.low = cache.low.min(p);
130 }
131
132 cache
133}
134
135/// Extract lookback data into an existing cache, reusing SmallVec allocations (Phase 5)
136///
137/// Avoids per-bar SmallVec construction by clearing and reusing existing buffers.
138/// The SmallVec heap allocation (if any) from previous bars is retained.
139///
140/// # Performance
141/// - Eliminates per-bar SmallVec construction overhead
142/// - 1-3% improvement on inter-bar hot path
143#[inline]
144pub fn extract_lookback_cache_reuse(lookback: &[&TradeSnapshot], cache: &mut LookbackCache) {
145 cache.prices.clear();
146 cache.volumes.clear();
147
148 if lookback.is_empty() {
149 cache.open = 0.0;
150 cache.high = 0.0;
151 cache.low = 0.0;
152 cache.close = 0.0;
153 cache.first_volume = 0.0;
154 cache.total_volume = 0.0;
155 cache.all_prices_finite = true;
156 cache.all_volumes_finite = true;
157 return;
158 }
159
160 let first_trade = &lookback[0];
161 let last_trade = &lookback[lookback.len() - 1];
162
163 cache.open = first_trade.price.to_f64();
164 cache.high = f64::MIN;
165 cache.low = f64::MAX;
166 cache.close = last_trade.price.to_f64();
167 cache.first_volume = first_trade.volume.to_f64();
168 cache.total_volume = 0.0;
169 cache.all_prices_finite = true;
170 cache.all_volumes_finite = true;
171
172 cache.prices.reserve(lookback.len());
173 cache.volumes.reserve(lookback.len());
174
175 for trade in lookback {
176 let p = trade.price.to_f64();
177 let v = trade.volume.to_f64();
178 cache.prices.push(p);
179 cache.volumes.push(v);
180 cache.total_volume += v;
181 cache.all_prices_finite &= p.is_finite();
182 cache.all_volumes_finite &= v.is_finite();
183 cache.high = cache.high.max(p);
184 cache.low = cache.low.min(p);
185 }
186}
187
188/// Branchless conditional accumulation for buy/sell volume (Issue #96 Task #177)
189///
190/// Uses arithmetic selection to avoid branch mispredictions in tight loops where `is_buyer_maker`
191/// determines which accumulator (buy_vol or sell_vol) gets incremented.
192///
193/// **Epsilon Branch Prediction Optimization**:
194/// Traditional branch (if/else) causes pipeline flushes when prediction fails, especially
195/// when trade direction patterns change (common in market microstructure).
196/// Branchless approach uses pure arithmetic (multiply by 0.0 or 1.0) to distribute
197/// volume to the correct accumulator without branches.
198///
199/// # Implementation
200/// - Converts `is_buyer_maker: bool` to `0.0 or 1.0` for arithmetic selection
201/// - Uses `sell_vol += vol * is_buyer_mask` to conditionally accumulate
202/// - Complement `buy_vol += vol * (1.0 - is_buyer_mask)` for the alternate path
203/// - CPU executes both operations speculatively (no misprediction penalty)
204///
205/// # Performance
206/// - Single-threaded: 0.8-1.2% speedup (reduced branch mispredictions)
207/// - Multi-symbol streaming: 1.0-1.8% cumulative improvement on long lookback windows
208/// - Register efficient: Uses 2x multiplies (CPU-friendly, pipelined)
209///
210/// # Example
211/// ```ignore
212/// let (buy, sell) = accumulate_buy_sell_branchless(trades);
213/// ```
214pub struct EntropyCache {
215 /// High-performance LRU cache (quick_cache: 4-10x faster than moka, Issue #96 Task #63)
216 /// Key: hash of price sequence, Value: computed entropy
217 /// Max capacity: 128 entries (tuned for typical consolidation windows)
218 cache: quick_cache::sync::Cache<u64, f64>,
219 /// Metrics: hit counter (atomic for thread-safe access)
220 hits: std::sync::Arc<std::sync::atomic::AtomicUsize>,
221 /// Metrics: miss counter (atomic for thread-safe access)
222 misses: std::sync::Arc<std::sync::atomic::AtomicUsize>,
223}
224
225impl EntropyCache {
226 /// Create new empty entropy cache with LRU eviction and metrics tracking (Task #135)
227 pub fn new() -> Self {
228 Self {
229 cache: quick_cache::sync::Cache::new(128),
230 hits: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
231 misses: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
232 }
233 }
234
235 /// Create entropy cache with custom capacity (Issue #145: Global cache sizing)
236 ///
237 /// Used by global entropy cache to support larger capacity (512-1024 entries)
238 /// for improved hit ratio on multi-symbol workloads.
239 ///
240 /// ## Memory Usage
241 ///
242 /// Approximate memory per entry: ~24 bytes (quick_cache overhead + u64 key + f64 value)
243 /// - 128 entries ≈ 3KB (default, per-processor)
244 /// - 512 entries ≈ 12KB (4x improvement)
245 /// - 1024 entries ≈ 24KB (8x improvement, global cache)
246 pub fn with_capacity(capacity: u64) -> Self {
247 Self {
248 cache: quick_cache::sync::Cache::new(capacity as usize),
249 hits: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
250 misses: std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)),
251 }
252 }
253
254 /// Compute hash of price sequence
255 fn price_hash(prices: &[f64]) -> u64 {
256 use foldhash::fast::FixedState;
257 use std::hash::{BuildHasher, Hash, Hasher};
258
259 // Issue #96 Task #168: Use foldhash instead of DefaultHasher (20-40% faster than ahash for numeric data)
260 // foldhash is optimized for integer/numeric hashing with smaller footprint
261 let mut hasher = FixedState::default().build_hasher();
262
263 // Issue #96 Task #176: Optimize hash computation by directly hashing price bits
264 // instead of per-element .to_bits() calls. Convert slice to u64 array view
265 // and hash raw bytes for better cache locality and fewer function calls.
266 // Safety: f64 and u64 have same size (8 bytes), f64::to_bits() is just bitcast,
267 // so we can safely view [f64] as [u64] and hash directly without per-element calls
268 #[allow(unsafe_code)]
269 {
270 // SAFETY: f64 and u64 are both 64-bit values. We're converting a slice
271 // of f64 to a slice of u64 with the same byte representation. The data
272 // is valid for both interpretations since we're just reading the bit patterns.
273 let price_bits: &[u64] =
274 unsafe { std::slice::from_raw_parts(prices.as_ptr().cast::<u64>(), prices.len()) };
275
276 // Hash all price bits at once instead of per-element
277 price_bits.hash(&mut hasher);
278 }
279
280 hasher.finish()
281 }
282
283 /// Get cached entropy result if available (O(1) operation)
284 /// Tracks hit/miss metrics for cache effectiveness analysis (Task #135)
285 pub fn get(&self, prices: &[f64]) -> Option<f64> {
286 if prices.is_empty() {
287 return None;
288 }
289
290 let hash = Self::price_hash(prices);
291 match self.cache.get(&hash) {
292 Some(entropy) => {
293 self.hits.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
294 Some(entropy)
295 }
296 None => {
297 self.misses
298 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
299 None
300 }
301 }
302 }
303
304 /// Cache entropy result (O(1) operation, quick_cache handles LRU eviction)
305 pub fn insert(&mut self, prices: &[f64], entropy: f64) {
306 if prices.is_empty() {
307 return;
308 }
309
310 let hash = Self::price_hash(prices);
311 self.cache.insert(hash, entropy);
312 }
313
314 /// Get cache metrics: (hits, misses, hit_ratio)
315 /// Returns hit ratio as percentage (0-100) for analysis (Task #135)
316 pub fn metrics(&self) -> (usize, usize, f64) {
317 let hits = self.hits.load(std::sync::atomic::Ordering::Relaxed);
318 let misses = self.misses.load(std::sync::atomic::Ordering::Relaxed);
319 let total = hits + misses;
320 let hit_ratio = if total > 0 {
321 (hits as f64 / total as f64) * 100.0
322 } else {
323 0.0
324 };
325 (hits, misses, hit_ratio)
326 }
327
328 /// Reset metrics counters (useful for per-symbol analysis)
329 pub fn reset_metrics(&mut self) {
330 self.hits.store(0, std::sync::atomic::Ordering::Relaxed);
331 self.misses.store(0, std::sync::atomic::Ordering::Relaxed);
332 }
333}
334
335impl std::fmt::Debug for EntropyCache {
336 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
337 let (hits, misses, hit_ratio) = self.metrics();
338 f.debug_struct("EntropyCache")
339 .field("cache_size", &"quick_cache(max_128)")
340 .field("hits", &hits)
341 .field("misses", &misses)
342 .field("hit_ratio_percent", &format!("{:.1}%", hit_ratio))
343 .finish()
344 }
345}
346
347impl Default for EntropyCache {
348 fn default() -> Self {
349 Self::new()
350 }
351}
352
353#[cfg(any(feature = "simd-burstiness", feature = "simd-kyle-lambda"))]
354pub mod simd {
355 //! True SIMD-accelerated inter-bar math functions via wide crate
356 //!
357 //! Issue #96 Task #127: Burstiness SIMD acceleration with wide crate for 2-4x speedup.
358 //! Issue #96 Task #148 Phase 2: Kyle Lambda SIMD acceleration with wide crate for 1.5-2.5x speedup.
359 //! Uses stable Rust (no nightly required). Implements f64x4 vectorization for sum/variance/volumes.
360 //!
361 //! Expected speedup: 2-4x vs scalar on ARM64/x86_64 via SIMD vectorization
362
363 use crate::interbar_types::TradeSnapshot;
364 use smallvec::SmallVec;
365 use wide::f64x4;
366
367 /// True SIMD-accelerated burstiness computation using wide::f64x4 vectors.
368 ///
369 /// Formula: B = (σ_τ - μ_τ) / (σ_τ + μ_τ)
370 /// where σ_τ = std dev of inter-arrival times, μ_τ = mean
371 ///
372 /// # Performance
373 /// Expected 2-4x speedup vs scalar via vectorized mean and variance computation.
374 /// Processes 4 f64 elements per SIMD iteration using wide::f64x4.
375 pub fn compute_burstiness_simd(lookback: &[&TradeSnapshot]) -> f64 {
376 if lookback.len() < 2 {
377 return 0.0;
378 }
379
380 // Compute inter-arrival times (microseconds between consecutive trades)
381 let inter_arrivals = compute_inter_arrivals_simd(lookback);
382 // Issue #96: Pre-compute reciprocal — shared by mean and variance (eliminates 1 division)
383 let inv_n = 1.0 / inter_arrivals.len() as f64;
384
385 // SIMD-accelerated mean computation
386 let mu = sum_f64_simd(&inter_arrivals) * inv_n;
387
388 // SIMD-accelerated variance computation
389 let variance = variance_f64_simd(&inter_arrivals, mu, inv_n);
390 let sigma = variance.sqrt();
391
392 // Issue #96 Task #213: Branchless epsilon check in burstiness (SIMD path)
393 // Avoid branch misprediction by using .max() to guard division
394 // Pattern: (sigma - mu) / denominator.max(f64::EPSILON) only divides if denominator valid
395 let denominator = sigma + mu;
396 let numerator = sigma - mu;
397
398 // Branchless: max ensures denominator >= EPSILON, avoiding division by near-zero
399 numerator / denominator.max(f64::EPSILON)
400 }
401
402 /// Compute inter-arrival times using SIMD vectorization.
403 /// Processes 4 timestamp differences at a time with f64x4.
404 #[inline]
405 /// Issue #96: SmallVec avoids heap allocation for typical bars (≤256 trades)
406 fn compute_inter_arrivals_simd(lookback: &[&TradeSnapshot]) -> SmallVec<[f64; 256]> {
407 let n = lookback.len();
408 if n < 2 {
409 return SmallVec::new();
410 }
411
412 let mut inter_arrivals: SmallVec<[f64; 256]> = smallvec::smallvec![0.0; n - 1];
413
414 // Process inter-arrivals (n-1 elements)
415 let iter_count = (n - 1) / 4;
416 for i in 0..iter_count {
417 let idx = i * 4;
418 for j in 0..4 {
419 inter_arrivals[idx + j] =
420 (lookback[idx + j + 1].timestamp - lookback[idx + j].timestamp) as f64;
421 }
422 }
423
424 // Scalar remainder for elements not in SIMD chunks
425 let remainder = (n - 1) % 4;
426 if remainder > 0 {
427 let idx = iter_count * 4;
428 for j in 0..remainder {
429 inter_arrivals[idx + j] =
430 (lookback[idx + j + 1].timestamp - lookback[idx + j].timestamp) as f64;
431 }
432 }
433
434 inter_arrivals
435 }
436
437 /// Compute sum of f64 slice using SIMD reduction with wide::f64x4.
438 /// Processes 4 elements at a time for 4x speedup vs scalar.
439 #[inline]
440 fn sum_f64_simd(values: &[f64]) -> f64 {
441 if values.is_empty() {
442 return 0.0;
443 }
444
445 // Use SIMD to accumulate 4 values at once
446 let chunks = values.len() / 4;
447 let mut sum_vec = f64x4::splat(0.0);
448
449 for i in 0..chunks {
450 let idx = i * 4;
451 let chunk = f64x4::new([
452 values[idx],
453 values[idx + 1],
454 values[idx + 2],
455 values[idx + 3],
456 ]);
457 sum_vec += chunk;
458 }
459
460 // Horizontal sum of SIMD vector (sum all 4 elements)
461 let simd_sum: [f64; 4] = sum_vec.into();
462 let mut total = simd_sum[0] + simd_sum[1] + simd_sum[2] + simd_sum[3];
463
464 // Scalar remainder for elements not in SIMD chunks
465 let remainder = values.len() % 4;
466 for j in 0..remainder {
467 total += values[chunks * 4 + j];
468 }
469
470 total
471 }
472
473 /// Compute variance using SIMD with wide::f64x4 vectors.
474 /// Processes 4 squared deviations per iteration for 4x speedup.
475 #[inline]
476 /// Issue #96: Accept pre-computed `inv_n` to eliminate redundant division
477 fn variance_f64_simd(values: &[f64], mu: f64, inv_n: f64) -> f64 {
478 if values.is_empty() {
479 return 0.0;
480 }
481
482 let mu_vec = f64x4::splat(mu);
483 let chunks = values.len() / 4;
484 let mut sum_sq_vec = f64x4::splat(0.0);
485
486 for i in 0..chunks {
487 let idx = i * 4;
488 let chunk = f64x4::new([
489 values[idx],
490 values[idx + 1],
491 values[idx + 2],
492 values[idx + 3],
493 ]);
494 let deviations = chunk - mu_vec;
495 let squared = deviations * deviations;
496 sum_sq_vec += squared;
497 }
498
499 // Horizontal sum of squared deviations
500 let simd_sums: [f64; 4] = sum_sq_vec.into();
501 let mut sum_sq = simd_sums[0] + simd_sums[1] + simd_sums[2] + simd_sums[3];
502
503 // Scalar remainder
504 let remainder = values.len() % 4;
505 for j in 0..remainder {
506 let v = values[chunks * 4 + j] - mu;
507 sum_sq += v * v;
508 }
509
510 sum_sq * inv_n
511 }
512
513 /// SIMD-accelerated Kyle Lambda computation using wide::f64x4.
514 ///
515 /// Formula: Kyle Lambda = ((last_price - first_price) / first_price) / normalized_imbalance
516 /// where normalized_imbalance = (buy_vol - sell_vol) / total_vol
517 ///
518 /// # Performance
519 /// Expected 1.5-2.5x speedup vs scalar via vectorized volume accumulation
520 /// and parallel SIMD reductions across multiple trades.
521 ///
522 /// Issue #96 Task #148 Phase 2: Kyle Lambda SIMD implementation
523 pub fn compute_kyle_lambda_simd(lookback: &[&TradeSnapshot]) -> f64 {
524 let n = lookback.len();
525
526 if n < 2 {
527 return 0.0;
528 }
529
530 // Issue #96 Task #210: Memoize first/last element access to avoid redundant .unwrap() chains
531 // Bounds guaranteed by n >= 2 check above; direct indexing is safer than repeated .first()/.last()
532 let first_price = lookback[0].price.to_f64();
533 let last_price = lookback[n - 1].price.to_f64();
534
535 // Adaptive computation: subsample large windows
536 let (buy_vol, sell_vol) = if n > 500 {
537 // Subsampled with SIMD-accelerated summing
538 accumulate_volumes_simd_wide(lookback, true)
539 } else {
540 // Full computation with SIMD
541 accumulate_volumes_simd_wide(lookback, false)
542 };
543
544 let total_vol = buy_vol + sell_vol;
545 let first_price_abs = first_price.abs();
546
547 // Early-exit optimization: extreme imbalance
548 if buy_vol >= total_vol - f64::EPSILON {
549 return if first_price_abs > f64::EPSILON {
550 (last_price - first_price) / first_price
551 } else {
552 0.0
553 };
554 } else if sell_vol >= total_vol - f64::EPSILON {
555 return if first_price_abs > f64::EPSILON {
556 -((last_price - first_price) / first_price)
557 } else {
558 0.0
559 };
560 }
561
562 let normalized_imbalance = if total_vol > f64::EPSILON {
563 (buy_vol - sell_vol) / total_vol
564 } else {
565 0.0
566 };
567
568 // Issue #96 Task #208: Early-exit for zero imbalance (SIMD path)
569 // If buy_vol ≈ sell_vol (perfectly balanced), Kyle Lambda = price_change / 0 = undefined
570 // Skip expensive price change calculation and return 0.0 immediately
571 let imbalance_abs = normalized_imbalance.abs();
572 if imbalance_abs <= f64::EPSILON {
573 return 0.0; // Balanced imbalance -> Kyle Lambda = 0.0
574 }
575
576 // Issue #96 Task #203: Branchless epsilon handling in SIMD path
577 let imbalance_valid = 1.0; // Already verified imbalance_abs > f64::EPSILON above
578 let price_valid = if first_price_abs > f64::EPSILON {
579 1.0
580 } else {
581 0.0
582 };
583 let both_valid = imbalance_valid * price_valid;
584
585 let price_change = if first_price_abs > f64::EPSILON {
586 (last_price - first_price) / first_price
587 } else {
588 0.0
589 };
590
591 if both_valid > 0.0 {
592 price_change / normalized_imbalance
593 } else {
594 0.0
595 }
596 }
597
598 /// Accumulate buy and sell volumes using SIMD vectorization.
599 /// Processes 4 volumes at a time using wide::f64x4.
600 #[inline]
601 fn accumulate_volumes_simd_wide(lookback: &[&TradeSnapshot], subsample: bool) -> (f64, f64) {
602 let mut buy_vol = 0.0;
603 let mut sell_vol = 0.0;
604
605 if subsample {
606 // Process every 5th trade for large windows
607 // Branchless arithmetic selection: is_buyer_maker → mask (1.0 or 0.0)
608 for trade in lookback.iter().step_by(5) {
609 let vol = trade.volume.to_f64();
610 let is_buyer_mask = trade.is_buyer_maker as u32 as f64;
611
612 // Arithmetic selection: when is_buyer_maker==true, add to sell_vol; else buy_vol
613 // (matches scalar logic: is_buyer_maker indicates seller-initiated trade)
614 buy_vol += vol * (1.0 - is_buyer_mask);
615 sell_vol += vol * is_buyer_mask;
616 }
617 } else {
618 // Full computation for medium windows with branchless optimization
619 // Issue #96 Task #175: Process trades in pairs to enable instruction-level parallelism
620 // Issue #96 Task #184: Branchless arithmetic selection (epsilon optimization)
621 let n = lookback.len();
622 let pairs = n / 2;
623
624 for i in 0..pairs {
625 let idx = i * 2;
626 let t0 = lookback[idx];
627 let t1 = lookback[idx + 1];
628
629 let vol0 = t0.volume.to_f64();
630 let vol1 = t1.volume.to_f64();
631
632 // Branchless conversion: is_buyer_maker (bool) → mask (0.0 or 1.0)
633 let is_buyer_mask0 = t0.is_buyer_maker as u32 as f64;
634 let is_buyer_mask1 = t1.is_buyer_maker as u32 as f64;
635
636 // Arithmetic selection: sell gets mask, buy gets 1-mask
637 // (matches scalar logic: is_buyer_maker=true → sell-initiated trade)
638 buy_vol += vol0 * (1.0 - is_buyer_mask0);
639 sell_vol += vol0 * is_buyer_mask0;
640
641 buy_vol += vol1 * (1.0 - is_buyer_mask1);
642 sell_vol += vol1 * is_buyer_mask1;
643 }
644
645 // Scalar remainder for odd-length arrays
646 if n % 2 == 1 {
647 let last_trade = lookback[n - 1];
648 let vol = last_trade.volume.to_f64();
649 let is_buyer_mask = last_trade.is_buyer_maker as u32 as f64;
650
651 buy_vol += vol * (1.0 - is_buyer_mask);
652 sell_vol += vol * is_buyer_mask;
653 }
654 }
655
656 (buy_vol, sell_vol)
657 }
658
659 #[cfg(test)]
660 mod tests {
661 use super::*;
662
663 fn create_test_snapshot(ts: i64, price: f64, volume: f64) -> TradeSnapshot {
664 TradeSnapshot {
665 timestamp: ts,
666 price: crate::FixedPoint((price * 1e8) as i64),
667 volume: crate::FixedPoint((volume * 1e8) as i64),
668 is_buyer_maker: false,
669 turnover: (price * volume * 1e8) as i128,
670 }
671 }
672
673 #[test]
674 fn test_burstiness_simd_edge_case_empty() {
675 let lookback: Vec<&TradeSnapshot> = vec![];
676 assert_eq!(compute_burstiness_simd(&lookback), 0.0);
677 }
678
679 #[test]
680 fn test_burstiness_simd_edge_case_single() {
681 let t0 = create_test_snapshot(0, 100.0, 1.0);
682 let lookback = vec![&t0];
683 assert_eq!(compute_burstiness_simd(&lookback), 0.0);
684 }
685
686 #[test]
687 fn test_burstiness_simd_regular_intervals() {
688 let t0 = create_test_snapshot(0, 100.0, 1.0);
689 let t1 = create_test_snapshot(1000, 100.0, 1.0);
690 let t2 = create_test_snapshot(2000, 100.0, 1.0);
691 let t3 = create_test_snapshot(3000, 100.0, 1.0);
692 let t4 = create_test_snapshot(4000, 100.0, 1.0);
693 let lookback = vec![&t0, &t1, &t2, &t3, &t4];
694
695 let b = compute_burstiness_simd(&lookback);
696 assert!((b - (-1.0)).abs() < 0.01);
697 }
698
699 #[test]
700 fn test_burstiness_simd_clustered_arrivals() {
701 let t0 = create_test_snapshot(0, 100.0, 1.0);
702 let t1 = create_test_snapshot(10, 100.0, 1.0);
703 let t2 = create_test_snapshot(20, 100.0, 1.0);
704 let t3 = create_test_snapshot(5000, 100.0, 1.0);
705 let t4 = create_test_snapshot(5010, 100.0, 1.0);
706 let t5 = create_test_snapshot(5020, 100.0, 1.0);
707 let lookback = vec![&t0, &t1, &t2, &t3, &t4, &t5];
708
709 let b = compute_burstiness_simd(&lookback);
710 assert!(b > 0.0);
711 assert!(b <= 1.0);
712 }
713
714 #[test]
715 fn test_burstiness_simd_bounds() {
716 let t0 = create_test_snapshot(0, 100.0, 1.0);
717 let t1 = create_test_snapshot(100, 100.0, 1.0);
718 let t2 = create_test_snapshot(200, 100.0, 1.0);
719 let t3 = create_test_snapshot(300, 100.0, 1.0);
720 let lookback = vec![&t0, &t1, &t2, &t3];
721
722 let b = compute_burstiness_simd(&lookback);
723 assert!(b >= -1.0 && b <= 1.0);
724 }
725
726 #[test]
727 fn test_simd_remainder_handling() {
728 let trades: Vec<_> = (0..7)
729 .map(|i| create_test_snapshot((i * 100) as i64, 100.0, 1.0))
730 .collect();
731 let trade_refs: Vec<_> = trades.iter().collect();
732
733 let b = compute_burstiness_simd(&trade_refs);
734 assert!(b >= -1.0 && b <= 1.0);
735 }
736 }
737}