Skip to main content

quantwave_core/features/
mod.rs

1//! ML Feature Engineering Toolkit (ta.features.*)
2//!
3//! This module provides rich, multi-dimensional feature extractors built on top of
4//! QuantWave's existing high-quality indicators (especially Ehlers DSP and Regimes).
5//! The goal is to make it trivial to build stable, no-lookahead feature matrices
6//! for ML pipelines and strategy research.
7//!
8//! All extractors follow the Universal Indicator pattern where possible:
9//! - Implement `Next<Input>` for streaming use
10//! - Provide equivalent batch (Polars) paths in `quantwave-polars`
11//! - Must eventually prove batch == streaming via proptests (see task quantwave-tha)
12//!
13//! Design principles (from quantwave-4ub research notes):
14//! - Rich outputs (structs or tuples) over single scalars when useful
15//! - Strong metadata (reuse/extend IndicatorMetadata)
16//! - Easy composition with regimes and (future) PA events
17//! - Zero lookahead by construction
18//!
19//! Sources recorded (per AGENTS.md):
20//! - cyber_cycle.rs:35 (returns (cycle, trigger))
21//! - hurst.rs (persistence value, excellent regime feature)
22//! - regimes/mod.rs + 12 submodules (HMM probs, GMM, PELT, etc. as meta-features)
23//! - Ehlers papers in references/Ehlers Papers/implemented/
24//! - Prado "Advances in Financial Machine Learning" (for future fractional differencing / entropy)
25
26pub mod cyber_cycle;
27pub mod ehlers_autocorrelation;
28pub mod griffiths_dominant_cycle;
29pub mod hurst;
30pub mod instantaneous_trendline;
31pub mod regime;
32pub mod regime_probs;
33pub mod trendflex;
34
35// Re-export common feature types for convenience
36pub use cyber_cycle::{CyberCycleFeatureExtractor, CyberCycleFeatures};
37pub use ehlers_autocorrelation::{
38    EhlersAutocorrelationFeatureExtractor, EhlersAutocorrelationFeatures,
39};
40pub use griffiths_dominant_cycle::{
41    GriffithsDominantCycleFeatureExtractor, GriffithsDominantCycleFeatures,
42};
43pub use hurst::{HurstFeatureExtractor, HurstFeatures};
44pub use instantaneous_trendline::{
45    InstantaneousTrendlineFeatureExtractor, InstantaneousTrendlineFeatures,
46};
47pub use regime::{RegimeFeatures, regime_to_features};
48pub use regime_probs::{RegimeProbFeatureExtractor, RegimeProbFeatures, regime_to_prob_features};
49pub use trendflex::{TrendflexFeatureExtractor, TrendflexFeatures};
50
51// === wlx (Polars layer) preparation note (2026-05-30) ===
52// Planned public API surface in quantwave-polars (to be implemented in wlx task):
53// - Extension trait on LazyFrame/Series: .ta.features.hurst(period) -> Struct or multiple columns
54// - .ta.features.ehlers_autocorrelation(length, num_lags) -> Struct with "correlations" List<f64> + "dominant_lag"
55// - Convenience: .ta.features.build_core_matrix() or .ta.features.build_matrix(["cyber_cycle", "hurst", "regime", "autocorrelation"])
56// - Rich returns: Struct for multi-value (cycle+trigger, autocorr vec), or exploded columns.
57// - All features must be causal (no lookahead) — enforced by using the Next<T> impls here or equivalent Polars exprs.
58// - Full integration with regimes (e.g. append hmm_regime probs as features) and future PA rich events (from cu03).
59// - Python exposure: from quantwave import ta; df.ta.features.hurst(20) etc. (minimal already prototyped in gw7s notebook via python bindings).
60//
61// See: quantwave-4ps epic + children (tha + wlx + gw7s), quantwave-4ub research notes (P0 list + validation strategy), this module's proptest skeleton (the parity contract wlx must honor).
62// Once the core extractors here are stable (more P0 + full proptests), wlx can wire the Polars expressions + builders.
63// The gw7s notebook (docs/examples/notebooks/ml_feature_stability.py) already demonstrates the intended usage pattern with the current extractors.
64
65#[cfg(test)]
66mod proptest_parity {
67    use super::*;
68    use crate::traits::Next;
69    use proptest::prelude::*;
70
71    // Skeleton for batch vs streaming parity (per quantwave-tha + 4ub research).
72    // Once wlx Polars layer exists, the "batch" path will use actual .ta.features.* exprs on LazyFrame.
73    // Current skeleton: determinism of Next + simple "batch re-compute" equivalence for stateless views.
74    // Full rich parity (including regime + feature filters) will be exercised in backtester (ug9t/06sz).
75
76    proptest! {
77        #[test]
78        fn hurst_streaming_is_deterministic(data in prop::collection::vec(-100f64..100.0, 5..100)) {
79            let mut ext1 = HurstFeatureExtractor::new(20);
80            let mut ext2 = HurstFeatureExtractor::new(20);
81
82            for &val in &data {
83                let f1 = ext1.next(val);
84                let f2 = ext2.next(val);
85                // Treat NaN == NaN as equal for determinism check on degenerate input
86                if f1.persistence.is_nan() && f2.persistence.is_nan() {
87                    continue;
88                }
89                prop_assert_eq!(f1.persistence, f2.persistence);
90            }
91        }
92
93        #[test]
94        fn cybercycle_streaming_deterministic_and_momentum_sane(data in prop::collection::vec(-50f64..50.0, 10..80)) {
95            let mut ext = CyberCycleFeatureExtractor::new(14);
96            let mut prev_mom = 0.0;
97            for &val in &data {
98                let f = ext.next(val);
99                // Momentum should be cycle delta (sanity, not strict property)
100                if !f.cycle_momentum.is_nan() {
101                    prop_assert!(f.cycle_momentum.abs() < 100.0);
102                }
103            }
104        }
105
106        #[test]
107        fn autocorrelation_streaming_deterministic(data in prop::collection::vec(-100f64..100.0, 20..120)) {
108            let mut ext1 = EhlersAutocorrelationFeatureExtractor::new(30, 10);
109            let mut ext2 = EhlersAutocorrelationFeatureExtractor::new(30, 10);
110            for &val in &data {
111                let f1 = ext1.next(val);
112                let f2 = ext2.next(val);
113                prop_assert_eq!(f1.dominant_lag, f2.dominant_lag);
114                prop_assert_eq!(f1.max_correlation, f2.max_correlation);
115            }
116        }
117
118        #[test]
119        fn griffiths_and_regime_prob_streaming_stable(data in prop::collection::vec(-80f64..80.0, 30..150)) {
120            let mut g1 = GriffithsDominantCycleFeatureExtractor::new(8, 50, 40);
121            let mut g2 = GriffithsDominantCycleFeatureExtractor::new(8, 50, 40);
122            for &val in &data {
123                let gf1 = g1.next(val);
124                let gf2 = g2.next(val);
125                prop_assert_eq!(gf1.dominant_cycle, gf2.dominant_cycle);
126                // regime prob is deterministic transform of label
127                let rf = regime_probs::regime_to_prob_features(crate::regimes::MarketRegime::Steady);
128                prop_assert!(rf.probs.iter().sum::<f64>() > 0.99);
129            }
130        }
131
132        // TODO (wlx integration): Once Polars exposure exists,
133        // add property:
134        // "batch Polars .ta.features.hurst(...) on LazyFrame(series) == streaming collect(Next) on same series"
135        // using existing crate check_batch_streaming_parity helper pattern from indicators.
136    }
137}
138
139/// Common trait for feature extractors that want to expose a stable "feature vector" view.
140/// (Future expansion point for a unified FeatureVector trait.)
141pub trait AsFeatures {
142    /// Returns a slice of the current feature values (for quick ML consumption).
143    fn as_features(&self) -> &[f64];
144}