quantwave_polars/features.rs
1//! ML Feature Engineering Polars layer (ta.features.*)
2//!
3//! Wires the rich Rust feature extractors from `quantwave_core::features` into
4//! the .ta. namespace on LazyFrame, following the exact patterns from
5//! quantwave-polars/src/lib.rs (UDF map closures + StructChunked::from_series
6//! for rich multi-outputs + with_columns for lazy exprs).
7//!
8//! This delivers the **minimal locked surface** required for the cross-epic
9//! deliverable (ML Features → Realistic Backtest with Rich Metadata) that
10//! closes quantwave-4ps + quantwave-gwx.
11//!
12//! The canonical executable demonstration + parity verification is the notebook:
13//! docs/examples/notebooks/ml_feature_backtest_parity.py
14//! (uses this surface in documented Rust batch path + equivalent Python streaming generators
15//! + FeatureToSignal adapter + full rich metadata preservation in trades).
16//!
17//! LOCKED SURFACE (per quantwave-4ps notes, "DETAILED WLX SURFACE REQUIRED..." section, 2026-05-31 IST):
18//! 1. .ta.features.hurst(period) -> column "hurst_{period}" (f64 persistence)
19//! 2. .ta.features.cyber_cycle(length) -> Struct column "cyber_cycle" with fields [cycle, trigger, momentum, signal]
20//! 3. .ta.features.griffiths_dominant_cycle(lower, upper, length) -> column "griffiths_dc" (f64)
21//! 4. .ta.features.regime_features() -> column "regime_label" (u32, from HMM bull_bear for MVP usability)
22//!
23//! All are lazy (exprs built with with_columns + map UDFs; execution deferred to collect).
24//! All delegate directly to the Next<T> wrappers in quantwave-core (zero lookahead by construction).
25//! No build_matrix yet (per instructions; kept minimal).
26//!
27//! Sources recorded (per AGENTS.md + 4ps spec):
28//! - quantwave-core/src/features/hurst.rs (HurstFeatureExtractor + HurstFeatures; wraps indicators/hurst.rs)
29//! - quantwave-core/src/features/cyber_cycle.rs (CyberCycleFeatureExtractor + CyberCycleFeatures; primary source indicators/cyber_cycle.rs:35 per Ehlers "Cybernetic Analysis...")
30//! - quantwave-core/src/features/griffiths_dominant_cycle.rs (GriffithsDominantCycleFeatureExtractor + ...Features; wraps indicators/griffiths_dominant_cycle.rs)
31//! - quantwave-core/src/features/regime.rs + regimes/hmm.rs (regime_to_features + HMM::bull_bear for label; MarketRegime)
32//! - quantwave-core/src/features/mod.rs (wlx prep note 2026-05-30 + AsFeatures skeleton + proptest parity contract)
33//! - quantwave-4ps epic (parent) + wlx child design notes (this surface is the exact contract for the "smoking gun" notebook)
34//! - Existing .ta. patterns in quantwave-polars/src/lib.rs (macd/bbands/supertrend/gap_momentum struct returns, adosc etc. stateful maps, regimes_conditioned_metrics)
35//! - gw7s notebook (docs/examples/notebooks/ml_feature_stability.py) + quantwave-4ub research (P0 feature list)
36//! - quantwave-backtest (future consumer of the metadata columns from these exprs)
37//!
38//! Decision: CyberCycle uses Struct (matches all rich outputs in this crate on Polars 0.46; users .unnest("cyber_cycle") if needed). Regime uses simple but real HMM label (usable in MVP notebook/backtester filters) rather than pure placeholder.
39
40use polars::prelude::*;
41use quantwave_core::features::{self as rust_features};
42use quantwave_core::traits::Next;
43
44// Bring parent crate type into scope for the inherent impl that extends the .ta. namespace.
45use crate::QuantWaveNamespace;
46
47/// Sub-namespace returned by .ta().features().
48/// Methods here implement the exact locked surface for the 4ps/gwx cross-epic deliverable.
49pub struct TaFeaturesNamespace<'a>(pub(crate) &'a LazyFrame);
50
51impl<'a> QuantWaveNamespace<'a> {
52 /// Entry point for the ML features namespace.
53 /// Usage: df.lazy().ta().features().hurst(20) etc.
54 pub fn features(self) -> TaFeaturesNamespace<'a> {
55 TaFeaturesNamespace(self.0)
56 }
57}
58
59impl<'a> TaFeaturesNamespace<'a> {
60 /// Hurst persistence feature (plus internal regime label in the core extractor).
61 /// Output column: "hurst_{period}" (f64).
62 ///
63 /// Delegates to quantwave_core::features::HurstFeatureExtractor (Next<f64, Output=HurstFeatures>).
64 pub fn hurst(self, period: usize) -> LazyFrame {
65 self.0.clone().with_columns([col("close")
66 .map(
67 move |s| {
68 let mut extractor = rust_features::HurstFeatureExtractor::new(period);
69 let ca: &Float64Chunked = s.f64()?;
70 let mut values = Vec::with_capacity(s.len());
71 for i in 0..s.len() {
72 let val = ca.get(i).unwrap_or(f64::NAN);
73 values.push(extractor.next(val).persistence);
74 }
75 Ok(Some(Column::from(Series::new(
76 format!("hurst_{}", period).into(),
77 values,
78 ))))
79 },
80 GetOutput::from_type(DataType::Float64),
81 )
82 .alias(&format!("hurst_{}", period))])
83 }
84
85 /// Cyber Cycle rich features (cycle + trigger + derived momentum + signal).
86 /// Returns Struct column named "cyber_cycle" with fields:
87 /// cycle, trigger, momentum, signal (all f64).
88 ///
89 /// Delegates to quantwave_core::features::CyberCycleFeatureExtractor.
90 /// Struct return matches project convention for multi-output (see macd, bbands, supertrend etc in lib.rs).
91 pub fn cyber_cycle(self, length: usize) -> LazyFrame {
92 self.0.clone().with_columns([col("close")
93 .map(
94 move |s| {
95 let mut extractor = rust_features::CyberCycleFeatureExtractor::new(length);
96 let ca: &Float64Chunked = s.f64()?;
97 let mut cycles = Vec::with_capacity(s.len());
98 let mut triggers = Vec::with_capacity(s.len());
99 let mut momenta = Vec::with_capacity(s.len());
100 let mut signals = Vec::with_capacity(s.len());
101
102 for i in 0..s.len() {
103 let val = ca.get(i).unwrap_or(f64::NAN);
104 let f = extractor.next(val);
105 cycles.push(f.cycle);
106 triggers.push(f.trigger);
107 momenta.push(f.cycle_momentum);
108 signals.push(f.trigger_signal);
109 }
110
111 let s_cycle = Series::new("cycle".into(), cycles);
112 let s_trigger = Series::new("trigger".into(), triggers);
113 let s_mom = Series::new("momentum".into(), momenta);
114 let s_sig = Series::new("signal".into(), signals);
115
116 let struct_series = StructChunked::from_series(
117 "cyber_cycle_result".into(),
118 s.len(),
119 [s_cycle, s_trigger, s_mom, s_sig].iter(),
120 )?;
121 Ok(Some(Column::from(struct_series.into_series())))
122 },
123 GetOutput::from_type(DataType::Struct(vec![
124 Field::new("cycle".into(), DataType::Float64),
125 Field::new("trigger".into(), DataType::Float64),
126 Field::new("momentum".into(), DataType::Float64),
127 Field::new("signal".into(), DataType::Float64),
128 ])),
129 )
130 .alias("cyber_cycle")])
131 }
132
133 /// Griffiths Dominant Cycle estimate (high-value stationary cycle feature).
134 /// Output column: "griffiths_dc" (f64) — name fixed per locked 4ps deliverable spec (params not encoded in col name).
135 ///
136 /// Delegates to quantwave_core::features::GriffithsDominantCycleFeatureExtractor.
137 pub fn griffiths_dominant_cycle(self, lower: usize, upper: usize, length: usize) -> LazyFrame {
138 self.0.clone().with_columns([col("close")
139 .map(
140 move |s| {
141 let mut extractor =
142 rust_features::GriffithsDominantCycleFeatureExtractor::new(lower, upper, length);
143 let ca: &Float64Chunked = s.f64()?;
144 let mut values = Vec::with_capacity(s.len());
145 for i in 0..s.len() {
146 let val = ca.get(i).unwrap_or(f64::NAN);
147 values.push(extractor.next(val).dominant_cycle);
148 }
149 Ok(Some(Column::from(Series::new("griffiths_dc".into(), values))))
150 },
151 GetOutput::from_type(DataType::Float64),
152 )
153 .alias("griffiths_dc")])
154 }
155
156 /// Basic regime label feature (usable for filters/sizing in backtester + MVP notebook).
157 /// Output column: "regime_label" (u32).
158 ///
159 /// For this minimal surface we compute a real label using the HMM bull_bear detector
160 /// on close (consistent with existing regime exprs in lib.rs). Simple label satisfies
161 /// the locked 4ps deliverable spec; richer probs/one-hot can layer on later.
162 ///
163 /// Delegates to quantwave_core::regimes::hmm::HMM + MarketRegime (see also regime.rs helpers).
164 pub fn regime_features(self) -> LazyFrame {
165 self.0.clone().with_columns([col("close")
166 .map(
167 move |s| {
168 let mut hmm = quantwave_core::regimes::hmm::HMM::bull_bear();
169 let ca = s.f64()?;
170 let mut labels = Vec::with_capacity(s.len());
171 for i in 0..s.len() {
172 let val = ca.get(i).unwrap_or(f64::NAN);
173 let regime = if val.is_nan() {
174 quantwave_core::regimes::MarketRegime::Steady
175 } else {
176 hmm.next(val)
177 };
178 let label: u32 = match regime {
179 quantwave_core::regimes::MarketRegime::Bull => 1,
180 quantwave_core::regimes::MarketRegime::Bear => 2,
181 quantwave_core::regimes::MarketRegime::Crisis => 3,
182 quantwave_core::regimes::MarketRegime::Steady => 0,
183 quantwave_core::regimes::MarketRegime::Cluster(c) => 4 + (c as u32),
184 };
185 labels.push(label);
186 }
187 Ok(Some(Column::from(Series::new("regime_label".into(), labels))))
188 },
189 GetOutput::from_type(DataType::UInt32),
190 )
191 .alias("regime_label")])
192 }
193}
194
195// The struct is pub so it is reachable as quantwave_polars::features::TaFeaturesNamespace if needed for turbofish/docs.
196// No additional re-export required here; the .ta().features() chaining works via the impl on QuantWaveNamespace
197// (the mod features; declaration in lib.rs ensures the impl is linked).
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202 use crate::QuantWaveExt; // brings .ta() extension method into scope for the smoke test
203
204 /// Smoke test for the exact minimal locked .ta.features.* surface (quantwave-4ps wlx slice).
205 /// Exercises all four methods on a tiny close series; verifies column names, dtypes, and basic collect.
206 /// (Full numeric parity + proptests live in quantwave-core/tests/ per project rules.)
207 #[test]
208 fn smoke_ta_features_surface() -> PolarsResult<()> {
209 // Small oscillatory + trending price series (enough to warm extractors with period ~5-14)
210 let prices: Vec<f64> = (0..40)
211 .map(|i| 100.0 + 3.0 * (i as f64 * 0.4).sin() + (i as f64) * 0.1)
212 .collect();
213
214 let df = df!["close" => prices]?;
215 let lf = df.lazy();
216
217 // 1. hurst
218 let out = lf
219 .clone()
220 .ta()
221 .features()
222 .hurst(8)
223 .collect()?;
224 assert!(out.column("hurst_8").is_ok());
225 assert_eq!(out.column("hurst_8")?.dtype(), &DataType::Float64);
226
227 // 2. cyber_cycle -> struct
228 let out = out
229 .lazy()
230 .ta()
231 .features()
232 .cyber_cycle(12)
233 .collect()?;
234 let cc = out.column("cyber_cycle")?;
235 assert_eq!(cc.dtype().clone(), DataType::Struct(vec![
236 Field::new("cycle".into(), DataType::Float64),
237 Field::new("trigger".into(), DataType::Float64),
238 Field::new("momentum".into(), DataType::Float64),
239 Field::new("signal".into(), DataType::Float64),
240 ]));
241 let ca = cc.struct_()?;
242 assert!(ca.field_by_name("cycle".into())?.f64()?.get(39).is_some());
243
244 // 3. griffiths_dominant_cycle -> "griffiths_dc"
245 let out = out
246 .lazy()
247 .ta()
248 .features()
249 .griffiths_dominant_cycle(6, 40, 25)
250 .collect()?;
251 assert!(out.column("griffiths_dc").is_ok());
252 assert_eq!(out.column("griffiths_dc")?.dtype(), &DataType::Float64);
253
254 // 4. regime_features -> "regime_label"
255 let out = out
256 .lazy()
257 .ta()
258 .features()
259 .regime_features()
260 .collect()?;
261 assert!(out.column("regime_label").is_ok());
262 assert_eq!(out.column("regime_label")?.dtype(), &DataType::UInt32);
263
264 // All columns present on final DF for the deliverable use-case (verified via direct column access)
265 assert!(out.column("hurst_8").is_ok());
266 assert!(out.column("cyber_cycle").is_ok());
267 assert!(out.column("griffiths_dc").is_ok());
268 assert!(out.column("regime_label").is_ok());
269
270 Ok(())
271 }
272}