quantwave_core/features/mod.rs
1//! ML Feature Engineering Toolkit (ta.features.*)
2//!
3//! This module provides rich, multi-dimensional feature extractors built on top of
4//! QuantWave's existing high-quality indicators (especially Ehlers DSP and Regimes).
5//! The goal is to make it trivial to build stable, no-lookahead feature matrices
6//! for ML pipelines and strategy research.
7//!
8//! All extractors follow the Universal Indicator pattern where possible:
9//! - Implement `Next<Input>` for streaming use
10//! - Provide equivalent batch (Polars) paths in `quantwave-polars`
11//! - Must eventually prove batch == streaming via proptests (see task quantwave-tha)
12//!
13//! Design principles (from quantwave-4ub research notes):
14//! - Rich outputs (structs or tuples) over single scalars when useful
15//! - Strong metadata (reuse/extend IndicatorMetadata)
16//! - Easy composition with regimes and (future) PA events
17//! - Zero lookahead by construction
18//!
19//! Sources recorded (per AGENTS.md):
20//! - cyber_cycle.rs:35 (returns (cycle, trigger))
21//! - hurst.rs (persistence value, excellent regime feature)
22//! - regimes/mod.rs + 12 submodules (HMM probs, GMM, PELT, etc. as meta-features)
23//! - Ehlers papers in references/Ehlers Papers/implemented/
24//! - Prado "Advances in Financial Machine Learning" (for future fractional differencing / entropy)
25
26pub mod cyber_cycle;
27pub mod ehlers_autocorrelation;
28pub mod griffiths_dominant_cycle;
29pub mod hurst;
30pub mod instantaneous_trendline;
31pub mod regime;
32pub mod regime_probs;
33pub mod trendflex;
34
35// Re-export common feature types for convenience
36pub use cyber_cycle::{CyberCycleFeatureExtractor, CyberCycleFeatures};
37pub use ehlers_autocorrelation::{
38 EhlersAutocorrelationFeatureExtractor, EhlersAutocorrelationFeatures,
39};
40pub use griffiths_dominant_cycle::{
41 GriffithsDominantCycleFeatureExtractor, GriffithsDominantCycleFeatures,
42};
43pub use hurst::{HurstFeatureExtractor, HurstFeatures};
44pub use instantaneous_trendline::{
45 InstantaneousTrendlineFeatureExtractor, InstantaneousTrendlineFeatures,
46};
47pub use regime::{RegimeFeatures, regime_to_features};
48pub use regime_probs::{RegimeProbFeatureExtractor, RegimeProbFeatures, regime_to_prob_features};
49pub use trendflex::{TrendflexFeatureExtractor, TrendflexFeatures};
50
51// === wlx (Polars layer) preparation note (2026-05-30) ===
52// Planned public API surface in quantwave-polars (to be implemented in wlx task):
53// - Extension trait on LazyFrame/Series: .ta.features.hurst(period) -> Struct or multiple columns
54// - .ta.features.ehlers_autocorrelation(length, num_lags) -> Struct with "correlations" List<f64> + "dominant_lag"
55// - Convenience: .ta.features.build_core_matrix() or .ta.features.build_matrix(["cyber_cycle", "hurst", "regime", "autocorrelation"])
56// - Rich returns: Struct for multi-value (cycle+trigger, autocorr vec), or exploded columns.
57// - All features must be causal (no lookahead) — enforced by using the Next<T> impls here or equivalent Polars exprs.
58// - Full integration with regimes (e.g. append hmm_regime probs as features) and future PA rich events (from cu03).
59// - Python exposure: from quantwave import ta; df.ta.features.hurst(20) etc. (minimal already prototyped in gw7s notebook via python bindings).
60//
61// See: quantwave-4ps epic + children (tha + wlx + gw7s), quantwave-4ub research notes (P0 list + validation strategy), this module's proptest skeleton (the parity contract wlx must honor).
62// Once the core extractors here are stable (more P0 + full proptests), wlx can wire the Polars expressions + builders.
63// The gw7s notebook (docs/examples/notebooks/ml_feature_stability.py) already demonstrates the intended usage pattern with the current extractors.
64
65#[cfg(test)]
66mod proptest_parity {
67 use super::*;
68 use crate::traits::Next;
69 use proptest::prelude::*;
70
71 // Skeleton for batch vs streaming parity (per quantwave-tha + 4ub research).
72 // Once wlx Polars layer exists, the "batch" path will use actual .ta.features.* exprs on LazyFrame.
73 // Current skeleton: determinism of Next + simple "batch re-compute" equivalence for stateless views.
74 // Full rich parity (including regime + feature filters) will be exercised in backtester (ug9t/06sz).
75
76 proptest! {
77 #[test]
78 fn hurst_streaming_is_deterministic(data in prop::collection::vec(-100f64..100.0, 5..100)) {
79 let mut ext1 = HurstFeatureExtractor::new(20);
80 let mut ext2 = HurstFeatureExtractor::new(20);
81
82 for &val in &data {
83 let f1 = ext1.next(val);
84 let f2 = ext2.next(val);
85 // Treat NaN == NaN as equal for determinism check on degenerate input
86 if f1.persistence.is_nan() && f2.persistence.is_nan() {
87 continue;
88 }
89 prop_assert_eq!(f1.persistence, f2.persistence);
90 }
91 }
92
93 #[test]
94 fn cybercycle_streaming_deterministic_and_momentum_sane(data in prop::collection::vec(-50f64..50.0, 10..80)) {
95 let mut ext = CyberCycleFeatureExtractor::new(14);
96 let mut prev_mom = 0.0;
97 for &val in &data {
98 let f = ext.next(val);
99 // Momentum should be cycle delta (sanity, not strict property)
100 if !f.cycle_momentum.is_nan() {
101 prop_assert!(f.cycle_momentum.abs() < 100.0);
102 }
103 }
104 }
105
106 #[test]
107 fn autocorrelation_streaming_deterministic(data in prop::collection::vec(-100f64..100.0, 20..120)) {
108 let mut ext1 = EhlersAutocorrelationFeatureExtractor::new(30, 10);
109 let mut ext2 = EhlersAutocorrelationFeatureExtractor::new(30, 10);
110 for &val in &data {
111 let f1 = ext1.next(val);
112 let f2 = ext2.next(val);
113 prop_assert_eq!(f1.dominant_lag, f2.dominant_lag);
114 prop_assert_eq!(f1.max_correlation, f2.max_correlation);
115 }
116 }
117
118 #[test]
119 fn griffiths_and_regime_prob_streaming_stable(data in prop::collection::vec(-80f64..80.0, 30..150)) {
120 let mut g1 = GriffithsDominantCycleFeatureExtractor::new(8, 50, 40);
121 let mut g2 = GriffithsDominantCycleFeatureExtractor::new(8, 50, 40);
122 for &val in &data {
123 let gf1 = g1.next(val);
124 let gf2 = g2.next(val);
125 prop_assert_eq!(gf1.dominant_cycle, gf2.dominant_cycle);
126 // regime prob is deterministic transform of label
127 let rf = regime_probs::regime_to_prob_features(crate::regimes::MarketRegime::Steady);
128 prop_assert!(rf.probs.iter().sum::<f64>() > 0.99);
129 }
130 }
131
132 // TODO (wlx integration): Once Polars exposure exists,
133 // add property:
134 // "batch Polars .ta.features.hurst(...) on LazyFrame(series) == streaming collect(Next) on same series"
135 // using existing crate check_batch_streaming_parity helper pattern from indicators.
136 }
137}
138
139/// Common trait for feature extractors that want to expose a stable "feature vector" view.
140/// (Future expansion point for a unified FeatureVector trait.)
141pub trait AsFeatures {
142 /// Returns a slice of the current feature values (for quick ML consumption).
143 fn as_features(&self) -> &[f64];
144}