quantwave_polars/features.rs
1//! ML Feature Engineering Polars layer (ta.features.*)
2//!
3//! Wires the rich Rust feature extractors from `quantwave_core::features` into
4//! the .ta. namespace on LazyFrame, following the exact patterns from
5//! quantwave-polars/src/lib.rs (UDF map closures + StructChunked::from_series
6//! for rich multi-outputs + with_columns for lazy exprs).
7//!
8//! This delivers the **minimal locked surface** required for the cross-epic
9//! deliverable (ML Features → Realistic Backtest with Rich Metadata) that
10//! closes quantwave-4ps + quantwave-gwx.
11//!
12//! The canonical executable demonstration + parity verification is the notebook:
13//! docs/examples/notebooks/ml_feature_backtest_parity.py
14//! (uses this surface in documented Rust batch path + equivalent Python streaming generators
15//! + FeatureToSignal adapter + full rich metadata preservation in trades).
16//!
17//! LOCKED SURFACE (per quantwave-4ps notes, "DETAILED WLX SURFACE REQUIRED..." section, 2026-05-31 IST):
18//! 1. .ta.features.hurst(period) -> column "hurst_{period}" (f64 persistence)
19//! 2. .ta.features.cyber_cycle(length) -> Struct column "cyber_cycle" with fields [cycle, trigger, momentum, signal]
20//! 3. .ta.features.griffiths_dominant_cycle(lower, upper, length) -> column "griffiths_dc" (f64)
21//! 4. .ta.features.regime_features() -> column "regime_label" (u32, from HMM bull_bear for MVP usability)
22//! 5. .ta.features.instantaneous_trendline() -> Struct "itl" {trend, strength}
23//! 6. .ta.features.regime_probs() -> Struct "regime_probs" {prob_bull, prob_bear, prob_steady, prob_crisis, prob_other}
24//! 7. .ta.features.trendflex(length) -> column "trendflex_{length}"
25//! 8. .ta.features.ehlers_autocorrelation(length, num_lags) -> Struct {dominant_lag, max_correlation}
26//!
27//! All are lazy (exprs built with with_columns + map UDFs; execution deferred to collect).
28//! All delegate directly to the Next<T> wrappers in quantwave-core (zero lookahead by construction).
29//! No build_matrix yet (per instructions; kept minimal).
30//!
31//! Sources recorded (per AGENTS.md + 4ps spec):
32//! - quantwave-core/src/features/hurst.rs (HurstFeatureExtractor + HurstFeatures; wraps indicators/hurst.rs)
33//! - quantwave-core/src/features/cyber_cycle.rs (CyberCycleFeatureExtractor + CyberCycleFeatures; primary source indicators/cyber_cycle.rs:35 per Ehlers "Cybernetic Analysis...")
34//! - quantwave-core/src/features/griffiths_dominant_cycle.rs (GriffithsDominantCycleFeatureExtractor + ...Features; wraps indicators/griffiths_dominant_cycle.rs)
35//! - quantwave-core/src/features/regime.rs + regimes/hmm.rs (regime_to_features + HMM::bull_bear for label; MarketRegime)
36//! - quantwave-core/src/features/mod.rs (wlx prep note 2026-05-30 + AsFeatures skeleton + proptest parity contract)
37//! - quantwave-4ps epic (parent) + wlx child design notes (this surface is the exact contract for the "smoking gun" notebook)
38//! - Existing .ta. patterns in quantwave-polars/src/lib.rs (macd/bbands/supertrend/gap_momentum struct returns, adosc etc. stateful maps, regimes_conditioned_metrics)
39//! - gw7s notebook (docs/examples/notebooks/ml_feature_stability.py) + quantwave-4ub research (P0 feature list)
40//! - quantwave-backtest (future consumer of the metadata columns from these exprs)
41//!
42//! Decision: CyberCycle uses Struct (matches all rich outputs in this crate on Polars 0.46; users .unnest("cyber_cycle") if needed). Regime uses simple but real HMM label (usable in MVP notebook/backtester filters) rather than pure placeholder.
43
44use polars::prelude::*;
45use quantwave_core::features::{self as rust_features};
46use quantwave_core::traits::Next;
47
48// Bring parent crate type into scope for the inherent impl that extends the .ta. namespace.
49use crate::QuantWaveNamespace;
50
51/// Sub-namespace returned by .ta().features().
52/// Methods here implement the exact locked surface for the 4ps/gwx cross-epic deliverable.
53pub struct TaFeaturesNamespace<'a>(pub(crate) &'a LazyFrame);
54
55impl<'a> QuantWaveNamespace<'a> {
56 /// Entry point for the ML features namespace.
57 /// Usage: df.lazy().ta().features().hurst(20) etc.
58 pub fn features(self) -> TaFeaturesNamespace<'a> {
59 TaFeaturesNamespace(self.0)
60 }
61}
62
63impl<'a> TaFeaturesNamespace<'a> {
64 /// Hurst persistence feature (plus internal regime label in the core extractor).
65 /// Output column: "hurst_{period}" (f64).
66 ///
67 /// Delegates to quantwave_core::features::HurstFeatureExtractor (Next<f64, Output=HurstFeatures>).
68 pub fn hurst(self, period: usize) -> LazyFrame {
69 self.0.clone().with_columns([col("close")
70 .map(
71 move |s| {
72 let mut extractor = rust_features::HurstFeatureExtractor::new(period);
73 let ca: &Float64Chunked = s.f64()?;
74 let mut values = Vec::with_capacity(s.len());
75 for i in 0..s.len() {
76 let val = ca.get(i).unwrap_or(f64::NAN);
77 values.push(extractor.next(val).persistence);
78 }
79 Ok(Some(Column::from(Series::new(
80 format!("hurst_{}", period).into(),
81 values,
82 ))))
83 },
84 GetOutput::from_type(DataType::Float64),
85 )
86 .alias(&format!("hurst_{}", period))])
87 }
88
89 /// Cyber Cycle rich features (cycle + trigger + derived momentum + signal).
90 /// Returns Struct column named "cyber_cycle" with fields:
91 /// cycle, trigger, momentum, signal (all f64).
92 ///
93 /// Delegates to quantwave_core::features::CyberCycleFeatureExtractor.
94 /// Struct return matches project convention for multi-output (see macd, bbands, supertrend etc in lib.rs).
95 pub fn cyber_cycle(self, length: usize) -> LazyFrame {
96 self.0.clone().with_columns([col("close")
97 .map(
98 move |s| {
99 let mut extractor = rust_features::CyberCycleFeatureExtractor::new(length);
100 let ca: &Float64Chunked = s.f64()?;
101 let mut cycles = Vec::with_capacity(s.len());
102 let mut triggers = Vec::with_capacity(s.len());
103 let mut momenta = Vec::with_capacity(s.len());
104 let mut signals = Vec::with_capacity(s.len());
105
106 for i in 0..s.len() {
107 let val = ca.get(i).unwrap_or(f64::NAN);
108 let f = extractor.next(val);
109 cycles.push(f.cycle);
110 triggers.push(f.trigger);
111 momenta.push(f.cycle_momentum);
112 signals.push(f.trigger_signal);
113 }
114
115 let s_cycle = Series::new("cycle".into(), cycles);
116 let s_trigger = Series::new("trigger".into(), triggers);
117 let s_mom = Series::new("momentum".into(), momenta);
118 let s_sig = Series::new("signal".into(), signals);
119
120 let struct_series = StructChunked::from_series(
121 "cyber_cycle_result".into(),
122 s.len(),
123 [s_cycle, s_trigger, s_mom, s_sig].iter(),
124 )?;
125 Ok(Some(Column::from(struct_series.into_series())))
126 },
127 GetOutput::from_type(DataType::Struct(vec![
128 Field::new("cycle".into(), DataType::Float64),
129 Field::new("trigger".into(), DataType::Float64),
130 Field::new("momentum".into(), DataType::Float64),
131 Field::new("signal".into(), DataType::Float64),
132 ])),
133 )
134 .alias("cyber_cycle")])
135 }
136
137 /// Griffiths Dominant Cycle estimate (high-value stationary cycle feature).
138 /// Output column: "griffiths_dc" (f64) — name fixed per locked 4ps deliverable spec (params not encoded in col name).
139 ///
140 /// Delegates to quantwave_core::features::GriffithsDominantCycleFeatureExtractor.
141 pub fn griffiths_dominant_cycle(self, lower: usize, upper: usize, length: usize) -> LazyFrame {
142 self.0.clone().with_columns([col("close")
143 .map(
144 move |s| {
145 let mut extractor =
146 rust_features::GriffithsDominantCycleFeatureExtractor::new(lower, upper, length);
147 let ca: &Float64Chunked = s.f64()?;
148 let mut values = Vec::with_capacity(s.len());
149 for i in 0..s.len() {
150 let val = ca.get(i).unwrap_or(f64::NAN);
151 values.push(extractor.next(val).dominant_cycle);
152 }
153 Ok(Some(Column::from(Series::new("griffiths_dc".into(), values))))
154 },
155 GetOutput::from_type(DataType::Float64),
156 )
157 .alias("griffiths_dc")])
158 }
159
160 /// Basic regime label feature (usable for filters/sizing in backtester + MVP notebook).
161 /// Output column: "regime_label" (u32).
162 ///
163 /// For this minimal surface we compute a real label using the HMM bull_bear detector
164 /// on close (consistent with existing regime exprs in lib.rs). Simple label satisfies
165 /// the locked 4ps deliverable spec; richer probs/one-hot can layer on later.
166 ///
167 /// Delegates to quantwave_core::regimes::hmm::HMM + MarketRegime (see also regime.rs helpers).
168 pub fn regime_features(self) -> LazyFrame {
169 self.0.clone().with_columns([col("close")
170 .map(
171 move |s| {
172 let mut hmm = quantwave_core::regimes::hmm::HMM::bull_bear();
173 let ca = s.f64()?;
174 let mut labels = Vec::with_capacity(s.len());
175 for i in 0..s.len() {
176 let val = ca.get(i).unwrap_or(f64::NAN);
177 let regime = if val.is_nan() {
178 quantwave_core::regimes::MarketRegime::Steady
179 } else {
180 hmm.next(val)
181 };
182 let label: u32 = match regime {
183 quantwave_core::regimes::MarketRegime::Bull => 1,
184 quantwave_core::regimes::MarketRegime::Bear => 2,
185 quantwave_core::regimes::MarketRegime::Crisis => 3,
186 quantwave_core::regimes::MarketRegime::Steady => 0,
187 quantwave_core::regimes::MarketRegime::Cluster(c) => 4 + (c as u32),
188 };
189 labels.push(label);
190 }
191 Ok(Some(Column::from(Series::new("regime_label".into(), labels))))
192 },
193 GetOutput::from_type(DataType::UInt32),
194 )
195 .alias("regime_label")])
196 }
197
198 /// Instantaneous Trendline (Ehlers) with derived trend-strength feature.
199 /// Returns Struct column "itl" with fields: trend, strength (f64).
200 pub fn instantaneous_trendline(self) -> LazyFrame {
201 self.0.clone().with_columns([col("close")
202 .map(
203 move |s| {
204 let mut extractor = rust_features::InstantaneousTrendlineFeatureExtractor::new();
205 let ca: &Float64Chunked = s.f64()?;
206 let mut trends = Vec::with_capacity(s.len());
207 let mut strengths = Vec::with_capacity(s.len());
208 for i in 0..s.len() {
209 let val = ca.get(i).unwrap_or(f64::NAN);
210 let f = extractor.next(val);
211 trends.push(f.trend);
212 strengths.push(f.strength);
213 }
214 let struct_series = StructChunked::from_series(
215 "itl_result".into(),
216 s.len(),
217 [
218 Series::new("trend".into(), trends),
219 Series::new("strength".into(), strengths),
220 ]
221 .iter(),
222 )?;
223 Ok(Some(Column::from(struct_series.into_series())))
224 },
225 GetOutput::from_type(DataType::Struct(vec![
226 Field::new("trend".into(), DataType::Float64),
227 Field::new("strength".into(), DataType::Float64),
228 ])),
229 )
230 .alias("itl")])
231 }
232
233 /// HMM soft regime probabilities (bull/bear forward probs from Viterbi deltas).
234 /// Returns Struct column "regime_probs" with prob_bull, prob_bear, prob_steady, prob_crisis, prob_other.
235 pub fn regime_probs(self) -> LazyFrame {
236 self.0.clone().with_columns([col("close")
237 .map(
238 move |s| {
239 let mut extractor = rust_features::RegimeProbFeatureExtractor::bull_bear();
240 let ca = s.f64()?;
241 let mut bull = Vec::with_capacity(s.len());
242 let mut bear = Vec::with_capacity(s.len());
243 let mut steady = Vec::with_capacity(s.len());
244 let mut crisis = Vec::with_capacity(s.len());
245 let mut other = Vec::with_capacity(s.len());
246 for i in 0..s.len() {
247 let val = ca.get(i).unwrap_or(f64::NAN);
248 let f = extractor.next(val);
249 bull.push(f.probs[0]);
250 bear.push(f.probs[1]);
251 crisis.push(f.probs[2]);
252 steady.push(f.probs[3]);
253 other.push(f.probs[4]);
254 }
255 let struct_series = StructChunked::from_series(
256 "regime_probs_result".into(),
257 s.len(),
258 [
259 Series::new("prob_bull".into(), bull),
260 Series::new("prob_bear".into(), bear),
261 Series::new("prob_steady".into(), steady),
262 Series::new("prob_crisis".into(), crisis),
263 Series::new("prob_other".into(), other),
264 ]
265 .iter(),
266 )?;
267 Ok(Some(Column::from(struct_series.into_series())))
268 },
269 GetOutput::from_type(DataType::Struct(vec![
270 Field::new("prob_bull".into(), DataType::Float64),
271 Field::new("prob_bear".into(), DataType::Float64),
272 Field::new("prob_steady".into(), DataType::Float64),
273 Field::new("prob_crisis".into(), DataType::Float64),
274 Field::new("prob_other".into(), DataType::Float64),
275 ])),
276 )
277 .alias("regime_probs")])
278 }
279
280 /// Trendflex zero-lag trend component.
281 /// Output column: "trendflex_{length}" (f64).
282 pub fn trendflex(self, length: usize) -> LazyFrame {
283 self.0.clone().with_columns([col("close")
284 .map(
285 move |s| {
286 let mut extractor = rust_features::TrendflexFeatureExtractor::new(length);
287 let ca: &Float64Chunked = s.f64()?;
288 let mut values = Vec::with_capacity(s.len());
289 for i in 0..s.len() {
290 let val = ca.get(i).unwrap_or(f64::NAN);
291 values.push(extractor.next(val).trendflex);
292 }
293 Ok(Some(Column::from(Series::new(
294 format!("trendflex_{}", length).into(),
295 values,
296 ))))
297 },
298 GetOutput::from_type(DataType::Float64),
299 )
300 .alias(&format!("trendflex_{}", length))])
301 }
302
303 /// Ehlers Autocorrelation summary features (dominant lag + max correlation).
304 /// Returns Struct column "ehlers_autocorr" with dominant_lag (u32), max_correlation (f64).
305 pub fn ehlers_autocorrelation(self, length: usize, num_lags: usize) -> LazyFrame {
306 self.0.clone().with_columns([col("close")
307 .map(
308 move |s| {
309 let mut extractor =
310 rust_features::EhlersAutocorrelationFeatureExtractor::new(length, num_lags);
311 let ca: &Float64Chunked = s.f64()?;
312 let mut lags = Vec::with_capacity(s.len());
313 let mut max_corrs = Vec::with_capacity(s.len());
314 for i in 0..s.len() {
315 let val = ca.get(i).unwrap_or(f64::NAN);
316 let f = extractor.next(val);
317 lags.push(f.dominant_lag as u32);
318 max_corrs.push(f.max_correlation);
319 }
320 let struct_series = StructChunked::from_series(
321 "ehlers_autocorr_result".into(),
322 s.len(),
323 [
324 Series::new("dominant_lag".into(), lags),
325 Series::new("max_correlation".into(), max_corrs),
326 ]
327 .iter(),
328 )?;
329 Ok(Some(Column::from(struct_series.into_series())))
330 },
331 GetOutput::from_type(DataType::Struct(vec![
332 Field::new("dominant_lag".into(), DataType::UInt32),
333 Field::new("max_correlation".into(), DataType::Float64),
334 ])),
335 )
336 .alias("ehlers_autocorr")])
337 }
338
339 /// Build the recommended ML feature matrix (all locked `.ta.features.*` outputs).
340 ///
341 /// Chains: hurst, cyber_cycle (struct), griffiths_dc, regime_label, itl (struct),
342 /// regime_probs (struct), trendflex, ehlers_autocorr (struct).
343 /// Matches the `recommended` preset in `quantwave.build_feature_matrix()` (rdpk).
344 pub fn recommended_matrix(self) -> LazyFrame {
345 use crate::QuantWaveExt;
346 self.hurst(100)
347 .ta()
348 .features()
349 .cyber_cycle(30)
350 .ta()
351 .features()
352 .griffiths_dominant_cycle(6, 50, 30)
353 .ta()
354 .features()
355 .regime_features()
356 .ta()
357 .features()
358 .instantaneous_trendline()
359 .ta()
360 .features()
361 .regime_probs()
362 .ta()
363 .features()
364 .trendflex(30)
365 .ta()
366 .features()
367 .ehlers_autocorrelation(30, 10)
368 }
369}
370
371// The struct is pub so it is reachable as quantwave_polars::features::TaFeaturesNamespace if needed for turbofish/docs.
372// No additional re-export required here; the .ta().features() chaining works via the impl on QuantWaveNamespace
373// (the mod features; declaration in lib.rs ensures the impl is linked).
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378 use crate::QuantWaveExt; // brings .ta() extension method into scope for the smoke test
379
380 /// Smoke test for the exact minimal locked .ta.features.* surface (quantwave-4ps wlx slice).
381 /// Exercises all four methods on a tiny close series; verifies column names, dtypes, and basic collect.
382 /// (Full numeric parity + proptests live in quantwave-core/tests/ per project rules.)
383 #[test]
384 fn smoke_ta_features_surface() -> PolarsResult<()> {
385 // Small oscillatory + trending price series (enough to warm extractors with period ~5-14)
386 let prices: Vec<f64> = (0..40)
387 .map(|i| 100.0 + 3.0 * (i as f64 * 0.4).sin() + (i as f64) * 0.1)
388 .collect();
389
390 let df = df!["close" => prices]?;
391 let lf = df.lazy();
392
393 // 1. hurst
394 let out = lf
395 .clone()
396 .ta()
397 .features()
398 .hurst(8)
399 .collect()?;
400 assert!(out.column("hurst_8").is_ok());
401 assert_eq!(out.column("hurst_8")?.dtype(), &DataType::Float64);
402
403 // 2. cyber_cycle -> struct
404 let out = out
405 .lazy()
406 .ta()
407 .features()
408 .cyber_cycle(12)
409 .collect()?;
410 let cc = out.column("cyber_cycle")?;
411 assert_eq!(cc.dtype().clone(), DataType::Struct(vec![
412 Field::new("cycle".into(), DataType::Float64),
413 Field::new("trigger".into(), DataType::Float64),
414 Field::new("momentum".into(), DataType::Float64),
415 Field::new("signal".into(), DataType::Float64),
416 ]));
417 let ca = cc.struct_()?;
418 assert!(ca.field_by_name("cycle".into())?.f64()?.get(39).is_some());
419
420 // 3. griffiths_dominant_cycle -> "griffiths_dc"
421 let out = out
422 .lazy()
423 .ta()
424 .features()
425 .griffiths_dominant_cycle(6, 40, 25)
426 .collect()?;
427 assert!(out.column("griffiths_dc").is_ok());
428 assert_eq!(out.column("griffiths_dc")?.dtype(), &DataType::Float64);
429
430 // 4. regime_features -> "regime_label"
431 let out = out
432 .lazy()
433 .ta()
434 .features()
435 .regime_features()
436 .collect()?;
437 assert!(out.column("regime_label").is_ok());
438 assert_eq!(out.column("regime_label")?.dtype(), &DataType::UInt32);
439
440 let out = out.lazy().ta().features().instantaneous_trendline().collect()?;
441 assert!(out.column("itl").is_ok());
442
443 let out = out.lazy().ta().features().regime_probs().collect()?;
444 assert!(out.column("regime_probs").is_ok());
445
446 let out = out.lazy().ta().features().trendflex(20).collect()?;
447 assert!(out.column("trendflex_20").is_ok());
448
449 let out = out
450 .lazy()
451 .ta()
452 .features()
453 .ehlers_autocorrelation(30, 10)
454 .collect()?;
455 assert!(out.column("ehlers_autocorr").is_ok());
456
457 Ok(())
458 }
459}