Skip to main content

quantwave_polars/
features.rs

1//! ML Feature Engineering Polars layer (ta.features.*)
2//!
3//! Wires the rich Rust feature extractors from `quantwave_core::features` into
4//! the .ta. namespace on LazyFrame, following the exact patterns from
5//! quantwave-polars/src/lib.rs (UDF map closures + StructChunked::from_series
6//! for rich multi-outputs + with_columns for lazy exprs).
7//!
8//! This delivers the **minimal locked surface** required for the cross-epic
9//! deliverable (ML Features → Realistic Backtest with Rich Metadata) that
10//! closes quantwave-4ps + quantwave-gwx.
11//!
12//! The canonical executable demonstration + parity verification is the notebook:
13//! docs/examples/notebooks/ml_feature_backtest_parity.py
14//! (uses this surface in documented Rust batch path + equivalent Python streaming generators
15//!  + FeatureToSignal adapter + full rich metadata preservation in trades).
16//!
17//! LOCKED SURFACE (per quantwave-4ps notes, "DETAILED WLX SURFACE REQUIRED..." section, 2026-05-31 IST):
18//! 1. .ta.features.hurst(period) -> column "hurst_{period}" (f64 persistence)
19//! 2. .ta.features.cyber_cycle(length) -> Struct column "cyber_cycle" with fields [cycle, trigger, momentum, signal]
20//! 3. .ta.features.griffiths_dominant_cycle(lower, upper, length) -> column "griffiths_dc" (f64)
21//! 4. .ta.features.regime_features() -> column "regime_label" (u32, from HMM bull_bear for MVP usability)
22//!
23//! All are lazy (exprs built with with_columns + map UDFs; execution deferred to collect).
24//! All delegate directly to the Next<T> wrappers in quantwave-core (zero lookahead by construction).
25//! No build_matrix yet (per instructions; kept minimal).
26//!
27//! Sources recorded (per AGENTS.md + 4ps spec):
28//! - quantwave-core/src/features/hurst.rs (HurstFeatureExtractor + HurstFeatures; wraps indicators/hurst.rs)
29//! - quantwave-core/src/features/cyber_cycle.rs (CyberCycleFeatureExtractor + CyberCycleFeatures; primary source indicators/cyber_cycle.rs:35 per Ehlers "Cybernetic Analysis...")
30//! - quantwave-core/src/features/griffiths_dominant_cycle.rs (GriffithsDominantCycleFeatureExtractor + ...Features; wraps indicators/griffiths_dominant_cycle.rs)
31//! - quantwave-core/src/features/regime.rs + regimes/hmm.rs (regime_to_features + HMM::bull_bear for label; MarketRegime)
32//! - quantwave-core/src/features/mod.rs (wlx prep note 2026-05-30 + AsFeatures skeleton + proptest parity contract)
33//! - quantwave-4ps epic (parent) + wlx child design notes (this surface is the exact contract for the "smoking gun" notebook)
34//! - Existing .ta. patterns in quantwave-polars/src/lib.rs (macd/bbands/supertrend/gap_momentum struct returns, adosc etc. stateful maps, regimes_conditioned_metrics)
35//! - gw7s notebook (docs/examples/notebooks/ml_feature_stability.py) + quantwave-4ub research (P0 feature list)
36//! - quantwave-backtest (future consumer of the metadata columns from these exprs)
37//!
38//! Decision: CyberCycle uses Struct (matches all rich outputs in this crate on Polars 0.46; users .unnest("cyber_cycle") if needed). Regime uses simple but real HMM label (usable in MVP notebook/backtester filters) rather than pure placeholder.
39
40use polars::prelude::*;
41use quantwave_core::features::{self as rust_features};
42use quantwave_core::traits::Next;
43
44// Bring parent crate type into scope for the inherent impl that extends the .ta. namespace.
45use crate::QuantWaveNamespace;
46
47/// Sub-namespace returned by .ta().features().
48/// Methods here implement the exact locked surface for the 4ps/gwx cross-epic deliverable.
49pub struct TaFeaturesNamespace<'a>(pub(crate) &'a LazyFrame);
50
51impl<'a> QuantWaveNamespace<'a> {
52    /// Entry point for the ML features namespace.
53    /// Usage: df.lazy().ta().features().hurst(20) etc.
54    pub fn features(self) -> TaFeaturesNamespace<'a> {
55        TaFeaturesNamespace(self.0)
56    }
57}
58
59impl<'a> TaFeaturesNamespace<'a> {
60    /// Hurst persistence feature (plus internal regime label in the core extractor).
61    /// Output column: "hurst_{period}" (f64).
62    ///
63    /// Delegates to quantwave_core::features::HurstFeatureExtractor (Next<f64, Output=HurstFeatures>).
64    pub fn hurst(self, period: usize) -> LazyFrame {
65        self.0.clone().with_columns([col("close")
66            .map(
67                move |s| {
68                    let mut extractor = rust_features::HurstFeatureExtractor::new(period);
69                    let ca: &Float64Chunked = s.f64()?;
70                    let mut values = Vec::with_capacity(s.len());
71                    for i in 0..s.len() {
72                        let val = ca.get(i).unwrap_or(f64::NAN);
73                        values.push(extractor.next(val).persistence);
74                    }
75                    Ok(Some(Column::from(Series::new(
76                        format!("hurst_{}", period).into(),
77                        values,
78                    ))))
79                },
80                GetOutput::from_type(DataType::Float64),
81            )
82            .alias(&format!("hurst_{}", period))])
83    }
84
85    /// Cyber Cycle rich features (cycle + trigger + derived momentum + signal).
86    /// Returns Struct column named "cyber_cycle" with fields:
87    ///   cycle, trigger, momentum, signal (all f64).
88    ///
89    /// Delegates to quantwave_core::features::CyberCycleFeatureExtractor.
90    /// Struct return matches project convention for multi-output (see macd, bbands, supertrend etc in lib.rs).
91    pub fn cyber_cycle(self, length: usize) -> LazyFrame {
92        self.0.clone().with_columns([col("close")
93            .map(
94                move |s| {
95                    let mut extractor = rust_features::CyberCycleFeatureExtractor::new(length);
96                    let ca: &Float64Chunked = s.f64()?;
97                    let mut cycles = Vec::with_capacity(s.len());
98                    let mut triggers = Vec::with_capacity(s.len());
99                    let mut momenta = Vec::with_capacity(s.len());
100                    let mut signals = Vec::with_capacity(s.len());
101
102                    for i in 0..s.len() {
103                        let val = ca.get(i).unwrap_or(f64::NAN);
104                        let f = extractor.next(val);
105                        cycles.push(f.cycle);
106                        triggers.push(f.trigger);
107                        momenta.push(f.cycle_momentum);
108                        signals.push(f.trigger_signal);
109                    }
110
111                    let s_cycle = Series::new("cycle".into(), cycles);
112                    let s_trigger = Series::new("trigger".into(), triggers);
113                    let s_mom = Series::new("momentum".into(), momenta);
114                    let s_sig = Series::new("signal".into(), signals);
115
116                    let struct_series = StructChunked::from_series(
117                        "cyber_cycle_result".into(),
118                        s.len(),
119                        [s_cycle, s_trigger, s_mom, s_sig].iter(),
120                    )?;
121                    Ok(Some(Column::from(struct_series.into_series())))
122                },
123                GetOutput::from_type(DataType::Struct(vec![
124                    Field::new("cycle".into(), DataType::Float64),
125                    Field::new("trigger".into(), DataType::Float64),
126                    Field::new("momentum".into(), DataType::Float64),
127                    Field::new("signal".into(), DataType::Float64),
128                ])),
129            )
130            .alias("cyber_cycle")])
131    }
132
133    /// Griffiths Dominant Cycle estimate (high-value stationary cycle feature).
134    /// Output column: "griffiths_dc" (f64) — name fixed per locked 4ps deliverable spec (params not encoded in col name).
135    ///
136    /// Delegates to quantwave_core::features::GriffithsDominantCycleFeatureExtractor.
137    pub fn griffiths_dominant_cycle(self, lower: usize, upper: usize, length: usize) -> LazyFrame {
138        self.0.clone().with_columns([col("close")
139            .map(
140                move |s| {
141                    let mut extractor =
142                        rust_features::GriffithsDominantCycleFeatureExtractor::new(lower, upper, length);
143                    let ca: &Float64Chunked = s.f64()?;
144                    let mut values = Vec::with_capacity(s.len());
145                    for i in 0..s.len() {
146                        let val = ca.get(i).unwrap_or(f64::NAN);
147                        values.push(extractor.next(val).dominant_cycle);
148                    }
149                    Ok(Some(Column::from(Series::new("griffiths_dc".into(), values))))
150                },
151                GetOutput::from_type(DataType::Float64),
152            )
153            .alias("griffiths_dc")])
154    }
155
156    /// Basic regime label feature (usable for filters/sizing in backtester + MVP notebook).
157    /// Output column: "regime_label" (u32).
158    ///
159    /// For this minimal surface we compute a real label using the HMM bull_bear detector
160    /// on close (consistent with existing regime exprs in lib.rs). Simple label satisfies
161    /// the locked 4ps deliverable spec; richer probs/one-hot can layer on later.
162    ///
163    /// Delegates to quantwave_core::regimes::hmm::HMM + MarketRegime (see also regime.rs helpers).
164    pub fn regime_features(self) -> LazyFrame {
165        self.0.clone().with_columns([col("close")
166            .map(
167                move |s| {
168                    let mut hmm = quantwave_core::regimes::hmm::HMM::bull_bear();
169                    let ca = s.f64()?;
170                    let mut labels = Vec::with_capacity(s.len());
171                    for i in 0..s.len() {
172                        let val = ca.get(i).unwrap_or(f64::NAN);
173                        let regime = if val.is_nan() {
174                            quantwave_core::regimes::MarketRegime::Steady
175                        } else {
176                            hmm.next(val)
177                        };
178                        let label: u32 = match regime {
179                            quantwave_core::regimes::MarketRegime::Bull => 1,
180                            quantwave_core::regimes::MarketRegime::Bear => 2,
181                            quantwave_core::regimes::MarketRegime::Crisis => 3,
182                            quantwave_core::regimes::MarketRegime::Steady => 0,
183                            quantwave_core::regimes::MarketRegime::Cluster(c) => 4 + (c as u32),
184                        };
185                        labels.push(label);
186                    }
187                    Ok(Some(Column::from(Series::new("regime_label".into(), labels))))
188                },
189                GetOutput::from_type(DataType::UInt32),
190            )
191            .alias("regime_label")])
192    }
193}
194
195// The struct is pub so it is reachable as quantwave_polars::features::TaFeaturesNamespace if needed for turbofish/docs.
196// No additional re-export required here; the .ta().features() chaining works via the impl on QuantWaveNamespace
197// (the mod features; declaration in lib.rs ensures the impl is linked).
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use crate::QuantWaveExt; // brings .ta() extension method into scope for the smoke test
203
204    /// Smoke test for the exact minimal locked .ta.features.* surface (quantwave-4ps wlx slice).
205    /// Exercises all four methods on a tiny close series; verifies column names, dtypes, and basic collect.
206    /// (Full numeric parity + proptests live in quantwave-core/tests/ per project rules.)
207    #[test]
208    fn smoke_ta_features_surface() -> PolarsResult<()> {
209        // Small oscillatory + trending price series (enough to warm extractors with period ~5-14)
210        let prices: Vec<f64> = (0..40)
211            .map(|i| 100.0 + 3.0 * (i as f64 * 0.4).sin() + (i as f64) * 0.1)
212            .collect();
213
214        let df = df!["close" => prices]?;
215        let lf = df.lazy();
216
217        // 1. hurst
218        let out = lf
219            .clone()
220            .ta()
221            .features()
222            .hurst(8)
223            .collect()?;
224        assert!(out.column("hurst_8").is_ok());
225        assert_eq!(out.column("hurst_8")?.dtype(), &DataType::Float64);
226
227        // 2. cyber_cycle -> struct
228        let out = out
229            .lazy()
230            .ta()
231            .features()
232            .cyber_cycle(12)
233            .collect()?;
234        let cc = out.column("cyber_cycle")?;
235        assert_eq!(cc.dtype().clone(), DataType::Struct(vec![
236            Field::new("cycle".into(), DataType::Float64),
237            Field::new("trigger".into(), DataType::Float64),
238            Field::new("momentum".into(), DataType::Float64),
239            Field::new("signal".into(), DataType::Float64),
240        ]));
241        let ca = cc.struct_()?;
242        assert!(ca.field_by_name("cycle".into())?.f64()?.get(39).is_some());
243
244        // 3. griffiths_dominant_cycle -> "griffiths_dc"
245        let out = out
246            .lazy()
247            .ta()
248            .features()
249            .griffiths_dominant_cycle(6, 40, 25)
250            .collect()?;
251        assert!(out.column("griffiths_dc").is_ok());
252        assert_eq!(out.column("griffiths_dc")?.dtype(), &DataType::Float64);
253
254        // 4. regime_features -> "regime_label"
255        let out = out
256            .lazy()
257            .ta()
258            .features()
259            .regime_features()
260            .collect()?;
261        assert!(out.column("regime_label").is_ok());
262        assert_eq!(out.column("regime_label")?.dtype(), &DataType::UInt32);
263
264        // All columns present on final DF for the deliverable use-case (verified via direct column access)
265        assert!(out.column("hurst_8").is_ok());
266        assert!(out.column("cyber_cycle").is_ok());
267        assert!(out.column("griffiths_dc").is_ok());
268        assert!(out.column("regime_label").is_ok());
269
270        Ok(())
271    }
272}