Skip to main content

quantwave_polars/
features.rs

1//! ML Feature Engineering Polars layer (ta.features.*)
2//!
3//! Wires the rich Rust feature extractors from `quantwave_core::features` into
4//! the .ta. namespace on LazyFrame, following the exact patterns from
5//! quantwave-polars/src/lib.rs (UDF map closures + StructChunked::from_series
6//! for rich multi-outputs + with_columns for lazy exprs).
7//!
8//! This delivers the **minimal locked surface** required for the cross-epic
9//! deliverable (ML Features → Realistic Backtest with Rich Metadata) that
10//! closes quantwave-4ps + quantwave-gwx.
11//!
12//! The canonical executable demonstration + parity verification is the notebook:
13//! docs/examples/notebooks/ml_feature_backtest_parity.py
14//! (uses this surface in documented Rust batch path + equivalent Python streaming generators
15//!  + FeatureToSignal adapter + full rich metadata preservation in trades).
16//!
17//! LOCKED SURFACE (per quantwave-4ps notes, "DETAILED WLX SURFACE REQUIRED..." section, 2026-05-31 IST):
18//! 1. .ta.features.hurst(period) -> column "hurst_{period}" (f64 persistence)
19//! 2. .ta.features.cyber_cycle(length) -> Struct column "cyber_cycle" with fields [cycle, trigger, momentum, signal]
20//! 3. .ta.features.griffiths_dominant_cycle(lower, upper, length) -> column "griffiths_dc" (f64)
21//! 4. .ta.features.regime_features() -> column "regime_label" (u32, from HMM bull_bear for MVP usability)
22//! 5. .ta.features.instantaneous_trendline() -> Struct "itl" {trend, strength}
23//! 6. .ta.features.regime_probs() -> Struct "regime_probs" {prob_bull, prob_bear, prob_steady, prob_crisis, prob_other}
24//! 7. .ta.features.trendflex(length) -> column "trendflex_{length}"
25//! 8. .ta.features.ehlers_autocorrelation(length, num_lags) -> Struct {dominant_lag, max_correlation}
26//!
27//! All are lazy (exprs built with with_columns + map UDFs; execution deferred to collect).
28//! All delegate directly to the Next<T> wrappers in quantwave-core (zero lookahead by construction).
29//! No build_matrix yet (per instructions; kept minimal).
30//!
31//! Sources recorded (per AGENTS.md + 4ps spec):
32//! - quantwave-core/src/features/hurst.rs (HurstFeatureExtractor + HurstFeatures; wraps indicators/hurst.rs)
33//! - quantwave-core/src/features/cyber_cycle.rs (CyberCycleFeatureExtractor + CyberCycleFeatures; primary source indicators/cyber_cycle.rs:35 per Ehlers "Cybernetic Analysis...")
34//! - quantwave-core/src/features/griffiths_dominant_cycle.rs (GriffithsDominantCycleFeatureExtractor + ...Features; wraps indicators/griffiths_dominant_cycle.rs)
35//! - quantwave-core/src/features/regime.rs + regimes/hmm.rs (regime_to_features + HMM::bull_bear for label; MarketRegime)
36//! - quantwave-core/src/features/mod.rs (wlx prep note 2026-05-30 + AsFeatures skeleton + proptest parity contract)
37//! - quantwave-4ps epic (parent) + wlx child design notes (this surface is the exact contract for the "smoking gun" notebook)
38//! - Existing .ta. patterns in quantwave-polars/src/lib.rs (macd/bbands/supertrend/gap_momentum struct returns, adosc etc. stateful maps, regimes_conditioned_metrics)
39//! - gw7s notebook (docs/examples/notebooks/ml_feature_stability.py) + quantwave-4ub research (P0 feature list)
40//! - quantwave-backtest (future consumer of the metadata columns from these exprs)
41//!
42//! Decision: CyberCycle uses Struct (matches all rich outputs in this crate on Polars 0.46; users .unnest("cyber_cycle") if needed). Regime uses simple but real HMM label (usable in MVP notebook/backtester filters) rather than pure placeholder.
43
44use polars::prelude::*;
45use quantwave_core::features::{self as rust_features};
46use quantwave_core::traits::Next;
47
48// Bring parent crate type into scope for the inherent impl that extends the .ta. namespace.
49use crate::QuantWaveNamespace;
50
51/// Sub-namespace returned by .ta().features().
52/// Methods here implement the exact locked surface for the 4ps/gwx cross-epic deliverable.
53pub struct TaFeaturesNamespace<'a>(pub(crate) &'a LazyFrame);
54
55impl<'a> QuantWaveNamespace<'a> {
56    /// Entry point for the ML features namespace.
57    /// Usage: df.lazy().ta().features().hurst(20) etc.
58    pub fn features(self) -> TaFeaturesNamespace<'a> {
59        TaFeaturesNamespace(self.0)
60    }
61}
62
63impl<'a> TaFeaturesNamespace<'a> {
64    /// Hurst persistence feature (plus internal regime label in the core extractor).
65    /// Output column: "hurst_{period}" (f64).
66    ///
67    /// Delegates to quantwave_core::features::HurstFeatureExtractor (Next<f64, Output=HurstFeatures>).
68    pub fn hurst(self, period: usize) -> LazyFrame {
69        self.0.clone().with_columns([col("close")
70            .map(
71                move |s| {
72                    let mut extractor = rust_features::HurstFeatureExtractor::new(period);
73                    let ca: &Float64Chunked = s.f64()?;
74                    let mut values = Vec::with_capacity(s.len());
75                    for i in 0..s.len() {
76                        let val = ca.get(i).unwrap_or(f64::NAN);
77                        values.push(extractor.next(val).persistence);
78                    }
79                    Ok(Some(Column::from(Series::new(
80                        format!("hurst_{}", period).into(),
81                        values,
82                    ))))
83                },
84                GetOutput::from_type(DataType::Float64),
85            )
86            .alias(&format!("hurst_{}", period))])
87    }
88
89    /// Cyber Cycle rich features (cycle + trigger + derived momentum + signal).
90    /// Returns Struct column named "cyber_cycle" with fields:
91    ///   cycle, trigger, momentum, signal (all f64).
92    ///
93    /// Delegates to quantwave_core::features::CyberCycleFeatureExtractor.
94    /// Struct return matches project convention for multi-output (see macd, bbands, supertrend etc in lib.rs).
95    pub fn cyber_cycle(self, length: usize) -> LazyFrame {
96        self.0.clone().with_columns([col("close")
97            .map(
98                move |s| {
99                    let mut extractor = rust_features::CyberCycleFeatureExtractor::new(length);
100                    let ca: &Float64Chunked = s.f64()?;
101                    let mut cycles = Vec::with_capacity(s.len());
102                    let mut triggers = Vec::with_capacity(s.len());
103                    let mut momenta = Vec::with_capacity(s.len());
104                    let mut signals = Vec::with_capacity(s.len());
105
106                    for i in 0..s.len() {
107                        let val = ca.get(i).unwrap_or(f64::NAN);
108                        let f = extractor.next(val);
109                        cycles.push(f.cycle);
110                        triggers.push(f.trigger);
111                        momenta.push(f.cycle_momentum);
112                        signals.push(f.trigger_signal);
113                    }
114
115                    let s_cycle = Series::new("cycle".into(), cycles);
116                    let s_trigger = Series::new("trigger".into(), triggers);
117                    let s_mom = Series::new("momentum".into(), momenta);
118                    let s_sig = Series::new("signal".into(), signals);
119
120                    let struct_series = StructChunked::from_series(
121                        "cyber_cycle_result".into(),
122                        s.len(),
123                        [s_cycle, s_trigger, s_mom, s_sig].iter(),
124                    )?;
125                    Ok(Some(Column::from(struct_series.into_series())))
126                },
127                GetOutput::from_type(DataType::Struct(vec![
128                    Field::new("cycle".into(), DataType::Float64),
129                    Field::new("trigger".into(), DataType::Float64),
130                    Field::new("momentum".into(), DataType::Float64),
131                    Field::new("signal".into(), DataType::Float64),
132                ])),
133            )
134            .alias("cyber_cycle")])
135    }
136
137    /// Griffiths Dominant Cycle estimate (high-value stationary cycle feature).
138    /// Output column: "griffiths_dc" (f64) — name fixed per locked 4ps deliverable spec (params not encoded in col name).
139    ///
140    /// Delegates to quantwave_core::features::GriffithsDominantCycleFeatureExtractor.
141    pub fn griffiths_dominant_cycle(self, lower: usize, upper: usize, length: usize) -> LazyFrame {
142        self.0.clone().with_columns([col("close")
143            .map(
144                move |s| {
145                    let mut extractor =
146                        rust_features::GriffithsDominantCycleFeatureExtractor::new(lower, upper, length);
147                    let ca: &Float64Chunked = s.f64()?;
148                    let mut values = Vec::with_capacity(s.len());
149                    for i in 0..s.len() {
150                        let val = ca.get(i).unwrap_or(f64::NAN);
151                        values.push(extractor.next(val).dominant_cycle);
152                    }
153                    Ok(Some(Column::from(Series::new("griffiths_dc".into(), values))))
154                },
155                GetOutput::from_type(DataType::Float64),
156            )
157            .alias("griffiths_dc")])
158    }
159
160    /// Basic regime label feature (usable for filters/sizing in backtester + MVP notebook).
161    /// Output column: "regime_label" (u32).
162    ///
163    /// For this minimal surface we compute a real label using the HMM bull_bear detector
164    /// on close (consistent with existing regime exprs in lib.rs). Simple label satisfies
165    /// the locked 4ps deliverable spec; richer probs/one-hot can layer on later.
166    ///
167    /// Delegates to quantwave_core::regimes::hmm::HMM + MarketRegime (see also regime.rs helpers).
168    pub fn regime_features(self) -> LazyFrame {
169        self.0.clone().with_columns([col("close")
170            .map(
171                move |s| {
172                    let mut hmm = quantwave_core::regimes::hmm::HMM::bull_bear();
173                    let ca = s.f64()?;
174                    let mut labels = Vec::with_capacity(s.len());
175                    for i in 0..s.len() {
176                        let val = ca.get(i).unwrap_or(f64::NAN);
177                        let regime = if val.is_nan() {
178                            quantwave_core::regimes::MarketRegime::Steady
179                        } else {
180                            hmm.next(val)
181                        };
182                        let label: u32 = match regime {
183                            quantwave_core::regimes::MarketRegime::Bull => 1,
184                            quantwave_core::regimes::MarketRegime::Bear => 2,
185                            quantwave_core::regimes::MarketRegime::Crisis => 3,
186                            quantwave_core::regimes::MarketRegime::Steady => 0,
187                            quantwave_core::regimes::MarketRegime::Cluster(c) => 4 + (c as u32),
188                        };
189                        labels.push(label);
190                    }
191                    Ok(Some(Column::from(Series::new("regime_label".into(), labels))))
192                },
193                GetOutput::from_type(DataType::UInt32),
194            )
195            .alias("regime_label")])
196    }
197
198    /// Instantaneous Trendline (Ehlers) with derived trend-strength feature.
199    /// Returns Struct column "itl" with fields: trend, strength (f64).
200    pub fn instantaneous_trendline(self) -> LazyFrame {
201        self.0.clone().with_columns([col("close")
202            .map(
203                move |s| {
204                    let mut extractor = rust_features::InstantaneousTrendlineFeatureExtractor::new();
205                    let ca: &Float64Chunked = s.f64()?;
206                    let mut trends = Vec::with_capacity(s.len());
207                    let mut strengths = Vec::with_capacity(s.len());
208                    for i in 0..s.len() {
209                        let val = ca.get(i).unwrap_or(f64::NAN);
210                        let f = extractor.next(val);
211                        trends.push(f.trend);
212                        strengths.push(f.strength);
213                    }
214                    let struct_series = StructChunked::from_series(
215                        "itl_result".into(),
216                        s.len(),
217                        [
218                            Series::new("trend".into(), trends),
219                            Series::new("strength".into(), strengths),
220                        ]
221                        .iter(),
222                    )?;
223                    Ok(Some(Column::from(struct_series.into_series())))
224                },
225                GetOutput::from_type(DataType::Struct(vec![
226                    Field::new("trend".into(), DataType::Float64),
227                    Field::new("strength".into(), DataType::Float64),
228                ])),
229            )
230            .alias("itl")])
231    }
232
233    /// HMM soft regime probabilities (bull/bear forward probs from Viterbi deltas).
234    /// Returns Struct column "regime_probs" with prob_bull, prob_bear, prob_steady, prob_crisis, prob_other.
235    pub fn regime_probs(self) -> LazyFrame {
236        self.0.clone().with_columns([col("close")
237            .map(
238                move |s| {
239                    let mut extractor = rust_features::RegimeProbFeatureExtractor::bull_bear();
240                    let ca = s.f64()?;
241                    let mut bull = Vec::with_capacity(s.len());
242                    let mut bear = Vec::with_capacity(s.len());
243                    let mut steady = Vec::with_capacity(s.len());
244                    let mut crisis = Vec::with_capacity(s.len());
245                    let mut other = Vec::with_capacity(s.len());
246                    for i in 0..s.len() {
247                        let val = ca.get(i).unwrap_or(f64::NAN);
248                        let f = extractor.next(val);
249                        bull.push(f.probs[0]);
250                        bear.push(f.probs[1]);
251                        crisis.push(f.probs[2]);
252                        steady.push(f.probs[3]);
253                        other.push(f.probs[4]);
254                    }
255                    let struct_series = StructChunked::from_series(
256                        "regime_probs_result".into(),
257                        s.len(),
258                        [
259                            Series::new("prob_bull".into(), bull),
260                            Series::new("prob_bear".into(), bear),
261                            Series::new("prob_steady".into(), steady),
262                            Series::new("prob_crisis".into(), crisis),
263                            Series::new("prob_other".into(), other),
264                        ]
265                        .iter(),
266                    )?;
267                    Ok(Some(Column::from(struct_series.into_series())))
268                },
269                GetOutput::from_type(DataType::Struct(vec![
270                    Field::new("prob_bull".into(), DataType::Float64),
271                    Field::new("prob_bear".into(), DataType::Float64),
272                    Field::new("prob_steady".into(), DataType::Float64),
273                    Field::new("prob_crisis".into(), DataType::Float64),
274                    Field::new("prob_other".into(), DataType::Float64),
275                ])),
276            )
277            .alias("regime_probs")])
278    }
279
280    /// Trendflex zero-lag trend component.
281    /// Output column: "trendflex_{length}" (f64).
282    pub fn trendflex(self, length: usize) -> LazyFrame {
283        self.0.clone().with_columns([col("close")
284            .map(
285                move |s| {
286                    let mut extractor = rust_features::TrendflexFeatureExtractor::new(length);
287                    let ca: &Float64Chunked = s.f64()?;
288                    let mut values = Vec::with_capacity(s.len());
289                    for i in 0..s.len() {
290                        let val = ca.get(i).unwrap_or(f64::NAN);
291                        values.push(extractor.next(val).trendflex);
292                    }
293                    Ok(Some(Column::from(Series::new(
294                        format!("trendflex_{}", length).into(),
295                        values,
296                    ))))
297                },
298                GetOutput::from_type(DataType::Float64),
299            )
300            .alias(&format!("trendflex_{}", length))])
301    }
302
303    /// Ehlers Autocorrelation summary features (dominant lag + max correlation).
304    /// Returns Struct column "ehlers_autocorr" with dominant_lag (u32), max_correlation (f64).
305    pub fn ehlers_autocorrelation(self, length: usize, num_lags: usize) -> LazyFrame {
306        self.0.clone().with_columns([col("close")
307            .map(
308                move |s| {
309                    let mut extractor =
310                        rust_features::EhlersAutocorrelationFeatureExtractor::new(length, num_lags);
311                    let ca: &Float64Chunked = s.f64()?;
312                    let mut lags = Vec::with_capacity(s.len());
313                    let mut max_corrs = Vec::with_capacity(s.len());
314                    for i in 0..s.len() {
315                        let val = ca.get(i).unwrap_or(f64::NAN);
316                        let f = extractor.next(val);
317                        lags.push(f.dominant_lag as u32);
318                        max_corrs.push(f.max_correlation);
319                    }
320                    let struct_series = StructChunked::from_series(
321                        "ehlers_autocorr_result".into(),
322                        s.len(),
323                        [
324                            Series::new("dominant_lag".into(), lags),
325                            Series::new("max_correlation".into(), max_corrs),
326                        ]
327                        .iter(),
328                    )?;
329                    Ok(Some(Column::from(struct_series.into_series())))
330                },
331                GetOutput::from_type(DataType::Struct(vec![
332                    Field::new("dominant_lag".into(), DataType::UInt32),
333                    Field::new("max_correlation".into(), DataType::Float64),
334                ])),
335            )
336            .alias("ehlers_autocorr")])
337    }
338
339    /// Build the recommended ML feature matrix (all locked `.ta.features.*` outputs).
340    ///
341    /// Chains: hurst, cyber_cycle (struct), griffiths_dc, regime_label, itl (struct),
342    /// regime_probs (struct), trendflex, ehlers_autocorr (struct).
343    /// Matches the `recommended` preset in `quantwave.build_feature_matrix()` (rdpk).
344    pub fn recommended_matrix(self) -> LazyFrame {
345        use crate::QuantWaveExt;
346        self.hurst(100)
347            .ta()
348            .features()
349            .cyber_cycle(30)
350            .ta()
351            .features()
352            .griffiths_dominant_cycle(6, 50, 30)
353            .ta()
354            .features()
355            .regime_features()
356            .ta()
357            .features()
358            .instantaneous_trendline()
359            .ta()
360            .features()
361            .regime_probs()
362            .ta()
363            .features()
364            .trendflex(30)
365            .ta()
366            .features()
367            .ehlers_autocorrelation(30, 10)
368    }
369}
370
371// The struct is pub so it is reachable as quantwave_polars::features::TaFeaturesNamespace if needed for turbofish/docs.
372// No additional re-export required here; the .ta().features() chaining works via the impl on QuantWaveNamespace
373// (the mod features; declaration in lib.rs ensures the impl is linked).
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use crate::QuantWaveExt; // brings .ta() extension method into scope for the smoke test
379
380    /// Smoke test for the exact minimal locked .ta.features.* surface (quantwave-4ps wlx slice).
381    /// Exercises all four methods on a tiny close series; verifies column names, dtypes, and basic collect.
382    /// (Full numeric parity + proptests live in quantwave-core/tests/ per project rules.)
383    #[test]
384    fn smoke_ta_features_surface() -> PolarsResult<()> {
385        // Small oscillatory + trending price series (enough to warm extractors with period ~5-14)
386        let prices: Vec<f64> = (0..40)
387            .map(|i| 100.0 + 3.0 * (i as f64 * 0.4).sin() + (i as f64) * 0.1)
388            .collect();
389
390        let df = df!["close" => prices]?;
391        let lf = df.lazy();
392
393        // 1. hurst
394        let out = lf
395            .clone()
396            .ta()
397            .features()
398            .hurst(8)
399            .collect()?;
400        assert!(out.column("hurst_8").is_ok());
401        assert_eq!(out.column("hurst_8")?.dtype(), &DataType::Float64);
402
403        // 2. cyber_cycle -> struct
404        let out = out
405            .lazy()
406            .ta()
407            .features()
408            .cyber_cycle(12)
409            .collect()?;
410        let cc = out.column("cyber_cycle")?;
411        assert_eq!(cc.dtype().clone(), DataType::Struct(vec![
412            Field::new("cycle".into(), DataType::Float64),
413            Field::new("trigger".into(), DataType::Float64),
414            Field::new("momentum".into(), DataType::Float64),
415            Field::new("signal".into(), DataType::Float64),
416        ]));
417        let ca = cc.struct_()?;
418        assert!(ca.field_by_name("cycle".into())?.f64()?.get(39).is_some());
419
420        // 3. griffiths_dominant_cycle -> "griffiths_dc"
421        let out = out
422            .lazy()
423            .ta()
424            .features()
425            .griffiths_dominant_cycle(6, 40, 25)
426            .collect()?;
427        assert!(out.column("griffiths_dc").is_ok());
428        assert_eq!(out.column("griffiths_dc")?.dtype(), &DataType::Float64);
429
430        // 4. regime_features -> "regime_label"
431        let out = out
432            .lazy()
433            .ta()
434            .features()
435            .regime_features()
436            .collect()?;
437        assert!(out.column("regime_label").is_ok());
438        assert_eq!(out.column("regime_label")?.dtype(), &DataType::UInt32);
439
440        let out = out.lazy().ta().features().instantaneous_trendline().collect()?;
441        assert!(out.column("itl").is_ok());
442
443        let out = out.lazy().ta().features().regime_probs().collect()?;
444        assert!(out.column("regime_probs").is_ok());
445
446        let out = out.lazy().ta().features().trendflex(20).collect()?;
447        assert!(out.column("trendflex_20").is_ok());
448
449        let out = out
450            .lazy()
451            .ta()
452            .features()
453            .ehlers_autocorrelation(30, 10)
454            .collect()?;
455        assert!(out.column("ehlers_autocorr").is_ok());
456
457        Ok(())
458    }
459}