quantwave_core/features/mod.rs
1//! ML Feature Engineering Toolkit (ta.features.*)
2//!
3//! This module provides rich, multi-dimensional feature extractors built on top of
4//! QuantWave's existing high-quality indicators (especially Ehlers DSP and Regimes).
5//! The goal is to make it trivial to build stable, no-lookahead feature matrices
6//! for ML pipelines and strategy research.
7//!
8//! All extractors follow the Universal Indicator pattern where possible:
9//! - Implement `Next<Input>` for streaming use
10//! - Provide equivalent batch (Polars) paths in `quantwave-polars`
11//! - Must eventually prove batch == streaming via proptests (see task quantwave-tha)
12//!
13//! Design principles (from quantwave-4ub research notes):
14//! - Rich outputs (structs or tuples) over single scalars when useful
15//! - Strong metadata (reuse/extend IndicatorMetadata)
16//! - Easy composition with regimes and (future) PA events
17//! - Zero lookahead by construction
18//!
19//! Sources recorded (per AGENTS.md):
20//! - cyber_cycle.rs:35 (returns (cycle, trigger))
21//! - hurst.rs (persistence value, excellent regime feature)
22//! - regimes/mod.rs + 12 submodules (HMM probs, GMM, PELT, etc. as meta-features)
23//! - Ehlers papers in references/Ehlers Papers/implemented/
24//! - Prado "Advances in Financial Machine Learning" (for future fractional differencing / entropy)
25
26pub mod cyber_cycle;
27pub mod ehlers_autocorrelation;
28pub mod griffiths_dominant_cycle;
29pub mod hurst;
30pub mod instantaneous_trendline;
31pub mod regime;
32pub mod regime_probs;
33pub mod trendflex;
34
35// Re-export common feature types for convenience
36pub use cyber_cycle::{CyberCycleFeatureExtractor, CyberCycleFeatures};
37pub use ehlers_autocorrelation::{EhlersAutocorrelationFeatureExtractor, EhlersAutocorrelationFeatures};
38pub use griffiths_dominant_cycle::{GriffithsDominantCycleFeatureExtractor, GriffithsDominantCycleFeatures};
39pub use hurst::{HurstFeatureExtractor, HurstFeatures};
40pub use instantaneous_trendline::{InstantaneousTrendlineFeatureExtractor, InstantaneousTrendlineFeatures};
41pub use regime::{regime_to_features, RegimeFeatures};
42pub use regime_probs::{regime_to_prob_features, RegimeProbFeatures};
43pub use trendflex::{TrendflexFeatureExtractor, TrendflexFeatures};
44
45// === wlx (Polars layer) preparation note (2026-05-30) ===
46// Planned public API surface in quantwave-polars (to be implemented in wlx task):
47// - Extension trait on LazyFrame/Series: .ta.features.hurst(period) -> Struct or multiple columns
48// - .ta.features.ehlers_autocorrelation(length, num_lags) -> Struct with "correlations" List<f64> + "dominant_lag"
49// - Convenience: .ta.features.build_core_matrix() or .ta.features.build_matrix(["cyber_cycle", "hurst", "regime", "autocorrelation"])
50// - Rich returns: Struct for multi-value (cycle+trigger, autocorr vec), or exploded columns.
51// - All features must be causal (no lookahead) — enforced by using the Next<T> impls here or equivalent Polars exprs.
52// - Full integration with regimes (e.g. append hmm_regime probs as features) and future PA rich events (from cu03).
53// - Python exposure: from quantwave import ta; df.ta.features.hurst(20) etc. (minimal already prototyped in gw7s notebook via python bindings).
54//
55// See: quantwave-4ps epic + children (tha + wlx + gw7s), quantwave-4ub research notes (P0 list + validation strategy), this module's proptest skeleton (the parity contract wlx must honor).
56// Once the core extractors here are stable (more P0 + full proptests), wlx can wire the Polars expressions + builders.
57// The gw7s notebook (docs/examples/notebooks/ml_feature_stability.py) already demonstrates the intended usage pattern with the current extractors.
58
59#[cfg(test)]
60mod proptest_parity {
61 use super::*;
62 use crate::traits::Next;
63 use proptest::prelude::*;
64
65 // Skeleton for batch vs streaming parity (per quantwave-tha + 4ub research).
66 // Once wlx Polars layer exists, the "batch" path will use actual .ta.features.* exprs on LazyFrame.
67 // Current skeleton: determinism of Next + simple "batch re-compute" equivalence for stateless views.
68 // Full rich parity (including regime + feature filters) will be exercised in backtester (ug9t/06sz).
69
70 proptest! {
71 #[test]
72 fn hurst_streaming_is_deterministic(data in prop::collection::vec(-100f64..100.0, 5..100)) {
73 let mut ext1 = HurstFeatureExtractor::new(20);
74 let mut ext2 = HurstFeatureExtractor::new(20);
75
76 for &val in &data {
77 let f1 = ext1.next(val);
78 let f2 = ext2.next(val);
79 prop_assert_eq!(f1.persistence, f2.persistence);
80 }
81 }
82
83 #[test]
84 fn cybercycle_streaming_deterministic_and_momentum_sane(data in prop::collection::vec(-50f64..50.0, 10..80)) {
85 let mut ext = CyberCycleFeatureExtractor::new(14);
86 let mut prev_mom = 0.0;
87 for &val in &data {
88 let f = ext.next(val);
89 // Momentum should be cycle delta (sanity, not strict property)
90 if !f.cycle_momentum.is_nan() {
91 prop_assert!(f.cycle_momentum.abs() < 100.0);
92 }
93 }
94 }
95
96 #[test]
97 fn autocorrelation_streaming_deterministic(data in prop::collection::vec(-100f64..100.0, 20..120)) {
98 let mut ext1 = EhlersAutocorrelationFeatureExtractor::new(30, 10);
99 let mut ext2 = EhlersAutocorrelationFeatureExtractor::new(30, 10);
100 for &val in &data {
101 let f1 = ext1.next(val);
102 let f2 = ext2.next(val);
103 prop_assert_eq!(f1.dominant_lag, f2.dominant_lag);
104 prop_assert_eq!(f1.max_correlation, f2.max_correlation);
105 }
106 }
107
108 #[test]
109 fn griffiths_and_regime_prob_streaming_stable(data in prop::collection::vec(-80f64..80.0, 30..150)) {
110 let mut g1 = GriffithsDominantCycleFeatureExtractor::new(8, 50, 40);
111 let mut g2 = GriffithsDominantCycleFeatureExtractor::new(8, 50, 40);
112 for &val in &data {
113 let gf1 = g1.next(val);
114 let gf2 = g2.next(val);
115 prop_assert_eq!(gf1.dominant_cycle, gf2.dominant_cycle);
116 // regime prob is deterministic transform of label
117 let rf = regime_probs::regime_to_prob_features(crate::regimes::MarketRegime::Steady);
118 prop_assert!(rf.probs.iter().sum::<f64>() > 0.99);
119 }
120 }
121
122 // TODO (wlx integration): Once Polars exposure exists,
123 // add property:
124 // "batch Polars .ta.features.hurst(...) on LazyFrame(series) == streaming collect(Next) on same series"
125 // using existing crate check_batch_streaming_parity helper pattern from indicators.
126 }
127}
128
129
130/// Common trait for feature extractors that want to expose a stable "feature vector" view.
131/// (Future expansion point for a unified FeatureVector trait.)
132pub trait AsFeatures {
133 /// Returns a slice of the current feature values (for quick ML consumption).
134 fn as_features(&self) -> &[f64];
135}