Skip to main content

vector_ta/indicators/
macd.rs

1#[cfg(all(feature = "python", feature = "cuda"))]
2use crate::utilities::dlpack_cuda::export_f32_cuda_dlpack_2d;
3#[cfg(feature = "python")]
4use numpy::{IntoPyArray, PyArray1, PyArrayMethods, PyReadonlyArray1};
5#[cfg(feature = "python")]
6use pyo3::exceptions::PyValueError;
7#[cfg(feature = "python")]
8use pyo3::prelude::*;
9#[cfg(feature = "python")]
10use pyo3::types::PyDict;
11
12#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
13use serde::{Deserialize, Serialize};
14#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
15use wasm_bindgen::prelude::*;
16
17use crate::utilities::data_loader::{source_type, Candles};
18use crate::utilities::enums::Kernel;
19use crate::utilities::helpers::{
20    alloc_with_nan_prefix, detect_best_batch_kernel, detect_best_kernel, init_matrix_prefixes,
21    make_uninit_matrix,
22};
23#[cfg(feature = "python")]
24use crate::utilities::kernel_validation::validate_kernel;
25use aligned_vec::{AVec, CACHELINE_ALIGN};
26#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
27use core::arch::x86_64::*;
28#[cfg(not(target_arch = "wasm32"))]
29use rayon::prelude::*;
30use std::error::Error;
31use thiserror::Error;
32
33#[derive(Debug, Clone)]
34pub enum MacdData<'a> {
35    Candles {
36        candles: &'a Candles,
37        source: &'a str,
38    },
39    Slice(&'a [f64]),
40}
41
42#[derive(Debug, Clone)]
43pub struct MacdOutput {
44    pub macd: Vec<f64>,
45    pub signal: Vec<f64>,
46    pub hist: Vec<f64>,
47}
48
49#[derive(Debug, Clone)]
50#[cfg_attr(
51    all(target_arch = "wasm32", feature = "wasm"),
52    derive(Serialize, Deserialize)
53)]
54pub struct MacdParams {
55    pub fast_period: Option<usize>,
56    pub slow_period: Option<usize>,
57    pub signal_period: Option<usize>,
58    pub ma_type: Option<String>,
59}
60
61impl Default for MacdParams {
62    fn default() -> Self {
63        Self {
64            fast_period: Some(12),
65            slow_period: Some(26),
66            signal_period: Some(9),
67            ma_type: Some("ema".to_string()),
68        }
69    }
70}
71
72#[derive(Debug, Clone)]
73pub struct MacdInput<'a> {
74    pub data: MacdData<'a>,
75    pub params: MacdParams,
76}
77
78impl<'a> MacdInput<'a> {
79    #[inline]
80    pub fn from_candles(c: &'a Candles, s: &'a str, p: MacdParams) -> Self {
81        Self {
82            data: MacdData::Candles {
83                candles: c,
84                source: s,
85            },
86            params: p,
87        }
88    }
89    #[inline]
90    pub fn from_slice(sl: &'a [f64], p: MacdParams) -> Self {
91        Self {
92            data: MacdData::Slice(sl),
93            params: p,
94        }
95    }
96    #[inline]
97    pub fn with_default_candles(c: &'a Candles) -> Self {
98        Self::from_candles(c, "close", MacdParams::default())
99    }
100    #[inline]
101    pub fn get_fast_period(&self) -> usize {
102        self.params.fast_period.unwrap_or(12)
103    }
104    #[inline]
105    pub fn get_slow_period(&self) -> usize {
106        self.params.slow_period.unwrap_or(26)
107    }
108    #[inline]
109    pub fn get_signal_period(&self) -> usize {
110        self.params.signal_period.unwrap_or(9)
111    }
112    #[inline]
113    pub fn get_ma_type(&self) -> String {
114        self.params
115            .ma_type
116            .clone()
117            .unwrap_or_else(|| "ema".to_string())
118    }
119}
120
121impl<'a> AsRef<[f64]> for MacdInput<'a> {
122    #[inline(always)]
123    fn as_ref(&self) -> &[f64] {
124        match &self.data {
125            MacdData::Slice(s) => s,
126            MacdData::Candles { candles, source } => source_type(candles, source),
127        }
128    }
129}
130
131#[derive(Clone, Debug)]
132pub struct MacdBuilder {
133    fast_period: Option<usize>,
134    slow_period: Option<usize>,
135    signal_period: Option<usize>,
136    ma_type: Option<String>,
137    kernel: Kernel,
138}
139
140impl Default for MacdBuilder {
141    fn default() -> Self {
142        Self {
143            fast_period: None,
144            slow_period: None,
145            signal_period: None,
146            ma_type: None,
147            kernel: Kernel::Auto,
148        }
149    }
150}
151
152impl MacdBuilder {
153    #[inline]
154    pub fn new() -> Self {
155        Self::default()
156    }
157    #[inline]
158    pub fn fast_period(mut self, n: usize) -> Self {
159        self.fast_period = Some(n);
160        self
161    }
162    #[inline]
163    pub fn slow_period(mut self, n: usize) -> Self {
164        self.slow_period = Some(n);
165        self
166    }
167    #[inline]
168    pub fn signal_period(mut self, n: usize) -> Self {
169        self.signal_period = Some(n);
170        self
171    }
172    #[inline]
173    pub fn ma_type<S: Into<String>>(mut self, s: S) -> Self {
174        self.ma_type = Some(s.into());
175        self
176    }
177    #[inline]
178    pub fn kernel(mut self, k: Kernel) -> Self {
179        self.kernel = k;
180        self
181    }
182
183    #[inline]
184    pub fn apply(self, c: &Candles) -> Result<MacdOutput, MacdError> {
185        let p = MacdParams {
186            fast_period: self.fast_period,
187            slow_period: self.slow_period,
188            signal_period: self.signal_period,
189            ma_type: self.ma_type,
190        };
191        let i = MacdInput::from_candles(c, "close", p);
192        macd_with_kernel(&i, self.kernel)
193    }
194
195    #[inline]
196    pub fn apply_slice(self, d: &[f64]) -> Result<MacdOutput, MacdError> {
197        let p = MacdParams {
198            fast_period: self.fast_period,
199            slow_period: self.slow_period,
200            signal_period: self.signal_period,
201            ma_type: self.ma_type,
202        };
203        let i = MacdInput::from_slice(d, p);
204        macd_with_kernel(&i, self.kernel)
205    }
206}
207
208#[derive(Debug, Error)]
209pub enum MacdError {
210    #[error("macd: input data slice is empty")]
211    EmptyInputData,
212    #[error("macd: All values are NaN.")]
213    AllValuesNaN,
214    #[error("macd: Invalid period: fast = {fast}, slow = {slow}, signal = {signal}, data length = {data_len}")]
215    InvalidPeriod {
216        fast: usize,
217        slow: usize,
218        signal: usize,
219        data_len: usize,
220    },
221    #[error("macd: Not enough valid data: needed = {needed}, valid = {valid}")]
222    NotEnoughValidData { needed: usize, valid: usize },
223    #[error("macd: Output length mismatch: expected = {expected}, got = {got}")]
224    OutputLengthMismatch { expected: usize, got: usize },
225    #[error("macd: Invalid range: start = {start}, end = {end}, step = {step}")]
226    InvalidRange {
227        start: usize,
228        end: usize,
229        step: usize,
230    },
231    #[error("macd: Unknown MA type: {0}")]
232    UnknownMA(String),
233    #[error("macd: Invalid kernel for batch operation: {0:?}")]
234    InvalidKernelForBatch(Kernel),
235}
236
237#[derive(Debug, Clone)]
238pub struct MacdStream {
239    fast: usize,
240    slow: usize,
241    signal: usize,
242    kind: MaKind,
243    inner: StreamImpl,
244}
245
246#[derive(Debug, Clone, Copy, PartialEq, Eq)]
247enum MaKind {
248    Ema,
249    Rma,
250    Sma,
251    Wma,
252    Unknown,
253}
254
255impl MaKind {
256    #[inline]
257    fn from_str(s: &str) -> Self {
258        match s.to_ascii_lowercase().as_str() {
259            "ema" => MaKind::Ema,
260            "rma" | "wilders" | "smma" => MaKind::Rma,
261            "sma" => MaKind::Sma,
262            "wma" | "lwma" => MaKind::Wma,
263            _ => MaKind::Unknown,
264        }
265    }
266}
267
268#[derive(Debug, Clone)]
269enum StreamImpl {
270    Ema(EmaState),
271
272    Sma(SmaState),
273
274    Wma(WmaState),
275
276    Unsupported,
277}
278
279#[derive(Debug, Clone)]
280struct EmaState {
281    af: f64,
282    omf: f64,
283    aslow: f64,
284    oms: f64,
285    asig: f64,
286    omsi: f64,
287    inv_fast: f64,
288    inv_slow: f64,
289    inv_sig: f64,
290
291    fsum: f64,
292    ssum: f64,
293    fcnt: usize,
294    scnt: usize,
295
296    fast_ema: Option<f64>,
297    slow_ema: Option<f64>,
298
299    sig_accum: f64,
300    sig_cnt: usize,
301    sig_ema: Option<f64>,
302}
303
304#[derive(Debug, Clone)]
305struct SmaState {
306    fast: RollingSma,
307    slow: RollingSma,
308    sig: RollingSma,
309}
310
311#[derive(Debug, Clone)]
312struct WmaState {
313    fast: RollingWma,
314    slow: RollingWma,
315    sig: RollingWma,
316}
317
318#[derive(Debug, Clone)]
319struct RollingSma {
320    n: usize,
321    inv_n: f64,
322    buf: Vec<f64>,
323    sum: f64,
324    idx: usize,
325    cnt: usize,
326}
327
328impl RollingSma {
329    #[inline]
330    fn new(n: usize) -> Self {
331        Self {
332            n,
333            inv_n: 1.0 / n as f64,
334            buf: vec![0.0; n],
335            sum: 0.0,
336            idx: 0,
337            cnt: 0,
338        }
339    }
340    #[inline]
341    fn push(&mut self, x: f64) -> Option<f64> {
342        if !x.is_finite() {
343            return None;
344        }
345        if self.cnt < self.n {
346            self.sum += x;
347            self.buf[self.idx] = x;
348            self.idx = (self.idx + 1) % self.n;
349            self.cnt += 1;
350            if self.cnt == self.n {
351                Some(self.sum * self.inv_n)
352            } else {
353                None
354            }
355        } else {
356            let old = self.buf[self.idx];
357            self.buf[self.idx] = x;
358            self.idx = (self.idx + 1) % self.n;
359            self.sum += x - old;
360            Some(self.sum * self.inv_n)
361        }
362    }
363}
364
365#[derive(Debug, Clone)]
366struct RollingWma {
367    n: usize,
368    inv_denom: f64,
369    buf: Vec<f64>,
370    idx: usize,
371    cnt: usize,
372    sum: f64,
373    wsum: f64,
374}
375
376impl RollingWma {
377    #[inline]
378    fn new(n: usize) -> Self {
379        let denom = (n as f64) * (n as f64 + 1.0) * 0.5;
380        Self {
381            n,
382            inv_denom: 1.0 / denom,
383            buf: vec![0.0; n],
384            idx: 0,
385            cnt: 0,
386            sum: 0.0,
387            wsum: 0.0,
388        }
389    }
390    #[inline]
391    fn push(&mut self, x: f64) -> Option<f64> {
392        if !x.is_finite() {
393            return None;
394        }
395        if self.cnt < self.n {
396            self.cnt += 1;
397            self.sum += x;
398            self.wsum += (self.cnt as f64) * x;
399            self.buf[self.idx] = x;
400            self.idx = (self.idx + 1) % self.n;
401            if self.cnt == self.n {
402                Some(self.wsum * self.inv_denom)
403            } else {
404                None
405            }
406        } else {
407            let s_prev = self.sum;
408            let old = self.buf[self.idx];
409            self.buf[self.idx] = x;
410            self.idx = (self.idx + 1) % self.n;
411
412            self.wsum = self.wsum + (self.n as f64) * x - s_prev;
413            self.sum = s_prev + x - old;
414            Some(self.wsum * self.inv_denom)
415        }
416    }
417}
418
419impl MacdStream {
420    pub fn new(fast: usize, slow: usize, signal: usize, ma_type: &str) -> Self {
421        let kind = MaKind::from_str(ma_type);
422        let inner = match kind {
423            MaKind::Ema | MaKind::Rma => {
424                let (af, aslow) = match kind {
425                    MaKind::Ema => (2.0 / (fast as f64 + 1.0), 2.0 / (slow as f64 + 1.0)),
426                    MaKind::Rma => (1.0 / fast as f64, 1.0 / slow as f64),
427                    _ => unreachable!(),
428                };
429                let asig = match kind {
430                    MaKind::Ema => 2.0 / (signal as f64 + 1.0),
431                    MaKind::Rma => 1.0 / signal as f64,
432                    _ => unreachable!(),
433                };
434                StreamImpl::Ema(EmaState {
435                    af,
436                    omf: 1.0 - af,
437                    aslow,
438                    oms: 1.0 - aslow,
439                    asig,
440                    omsi: 1.0 - asig,
441                    inv_fast: 1.0 / fast as f64,
442                    inv_slow: 1.0 / slow as f64,
443                    inv_sig: 1.0 / signal as f64,
444                    fsum: 0.0,
445                    ssum: 0.0,
446                    fcnt: 0,
447                    scnt: 0,
448                    fast_ema: None,
449                    slow_ema: None,
450                    sig_accum: 0.0,
451                    sig_cnt: 0,
452                    sig_ema: None,
453                })
454            }
455            MaKind::Sma => StreamImpl::Sma(SmaState {
456                fast: RollingSma::new(fast),
457                slow: RollingSma::new(slow),
458                sig: RollingSma::new(signal),
459            }),
460            MaKind::Wma => StreamImpl::Wma(WmaState {
461                fast: RollingWma::new(fast),
462                slow: RollingWma::new(slow),
463                sig: RollingWma::new(signal),
464            }),
465            MaKind::Unknown => StreamImpl::Unsupported,
466        };
467
468        Self {
469            fast,
470            slow,
471            signal,
472            kind,
473            inner,
474        }
475    }
476
477    pub fn update(&mut self, x: f64) -> Option<(f64, f64, f64)> {
478        if !x.is_finite() {
479            return None;
480        }
481
482        match &mut self.inner {
483            StreamImpl::Ema(st) => {
484                if st.fcnt < self.fast {
485                    st.fcnt += 1;
486                    st.fsum += x;
487                    if st.fcnt == self.fast {
488                        st.fast_ema = Some(st.fsum * st.inv_fast);
489                    }
490                } else {
491                    let fe = st.fast_ema.unwrap();
492                    st.fast_ema = Some(x.mul_add(st.af, st.omf * fe));
493                }
494
495                if st.scnt < self.slow {
496                    st.scnt += 1;
497                    st.ssum += x;
498                    if st.scnt == self.slow {
499                        st.slow_ema = Some(st.ssum * st.inv_slow);
500                    }
501                } else {
502                    let se = st.slow_ema.unwrap();
503                    st.slow_ema = Some(x.mul_add(st.aslow, st.oms * se));
504                }
505
506                if st.scnt >= self.slow {
507                    let m = st.fast_ema.unwrap() - st.slow_ema.unwrap();
508
509                    if st.sig_ema.is_none() {
510                        st.sig_cnt += 1;
511                        st.sig_accum += m;
512                        if st.sig_cnt == self.signal {
513                            let se = st.sig_accum * st.inv_sig;
514                            st.sig_ema = Some(se);
515                            let hist = m - se;
516                            return Some((m, se, hist));
517                        }
518                        return None;
519                    } else {
520                        let prev = st.sig_ema.unwrap();
521                        let se = m.mul_add(st.asig, st.omsi * prev);
522                        st.sig_ema = Some(se);
523                        let hist = m - se;
524                        return Some((m, se, hist));
525                    }
526                }
527                None
528            }
529
530            StreamImpl::Sma(st) => {
531                let f = st.fast.push(x)?;
532                let s = st.slow.push(x)?;
533                let m = f - s;
534                if let Some(se) = st.sig.push(m) {
535                    Some((m, se, m - se))
536                } else {
537                    None
538                }
539            }
540
541            StreamImpl::Wma(st) => {
542                let f = st.fast.push(x)?;
543                let s = st.slow.push(x)?;
544                let m = f - s;
545                if let Some(se) = st.sig.push(m) {
546                    Some((m, se, m - se))
547                } else {
548                    None
549                }
550            }
551
552            StreamImpl::Unsupported => None,
553        }
554    }
555}
556
557#[inline]
558pub fn macd(input: &MacdInput) -> Result<MacdOutput, MacdError> {
559    macd_with_kernel(input, Kernel::Auto)
560}
561
562#[inline(always)]
563fn macd_prepare<'a>(
564    input: &'a MacdInput,
565    kernel: Kernel,
566) -> Result<(&'a [f64], usize, usize, usize, String, usize, usize, Kernel), MacdError> {
567    let data = input.as_ref();
568    let len = data.len();
569    if len == 0 {
570        return Err(MacdError::EmptyInputData);
571    }
572    let fast = input.get_fast_period();
573    let slow = input.get_slow_period();
574    let signal = input.get_signal_period();
575    let ma_type = input.get_ma_type();
576
577    let first = data
578        .iter()
579        .position(|x| !x.is_nan())
580        .ok_or(MacdError::AllValuesNaN)?;
581    if fast == 0 || slow == 0 || signal == 0 || fast > len || slow > len || signal > len {
582        return Err(MacdError::InvalidPeriod {
583            fast,
584            slow,
585            signal,
586            data_len: len,
587        });
588    }
589    if len - first < slow {
590        return Err(MacdError::NotEnoughValidData {
591            needed: slow,
592            valid: len - first,
593        });
594    }
595
596    let macd_warmup = first + slow - 1;
597    let signal_warmup = first + slow + signal - 2;
598
599    let chosen = match kernel {
600        Kernel::Auto => Kernel::Scalar,
601        k => k,
602    };
603    Ok((
604        data,
605        fast,
606        slow,
607        signal,
608        ma_type,
609        macd_warmup,
610        signal_warmup,
611        chosen,
612    ))
613}
614
615#[inline(always)]
616
617fn macd_compute_into_classic_ema(
618    data: &[f64],
619    fast: usize,
620    slow: usize,
621    signal: usize,
622    first: usize,
623    macd_out: &mut [f64],
624    signal_out: &mut [f64],
625    hist_out: &mut [f64],
626) -> Result<(), MacdError> {
627    let len = data.len();
628    let macd_warmup = first + slow - 1;
629    let signal_warmup = first + slow + signal - 2;
630
631    let af = 2.0 / (fast as f64 + 1.0);
632    let aslow = 2.0 / (slow as f64 + 1.0);
633    let asig = 2.0 / (signal as f64 + 1.0);
634    let omf = 1.0 - af;
635    let oms = 1.0 - aslow;
636    let omsi = 1.0 - asig;
637
638    let fast_seed_idx = first + fast - 1;
639    let slow_seed_idx = macd_warmup;
640
641    let mut fsum = 0.0f64;
642    let mut ssum = 0.0f64;
643
644    let mut fast_ema = 0.0f64;
645    let mut slow_ema = 0.0f64;
646    let mut fast_ready = false;
647    let mut slow_ready = false;
648
649    let mut have_seed = false;
650    let mut se = 0.0f64;
651    let mut sig_accum = 0.0f64;
652
653    let mut i = first;
654    while i < len {
655        let x = data[i];
656
657        if !fast_ready {
658            fsum += x;
659            if i >= first + fast {
660                fsum -= data[i - fast];
661            }
662        }
663        if !slow_ready {
664            ssum += x;
665            if i >= first + slow {
666                ssum -= data[i - slow];
667            }
668        }
669
670        if !fast_ready {
671            if i == fast_seed_idx {
672                fast_ema = fsum / fast as f64;
673                fast_ready = true;
674            }
675        } else {
676            fast_ema = x.mul_add(af, omf * fast_ema);
677        }
678
679        if !slow_ready {
680            if i == slow_seed_idx {
681                slow_ema = ssum / slow as f64;
682                slow_ready = true;
683            }
684        } else {
685            slow_ema = x.mul_add(aslow, oms * slow_ema);
686        }
687
688        if slow_ready {
689            let m = fast_ema - slow_ema;
690            macd_out[i] = m;
691
692            if !have_seed {
693                if signal == 1 {
694                    if i == signal_warmup {
695                        se = m;
696                        have_seed = true;
697                        signal_out[i] = se;
698                        hist_out[i] = m - se;
699                    }
700                } else {
701                    if i <= signal_warmup {
702                        sig_accum += m;
703                        if i == signal_warmup {
704                            se = sig_accum / (signal as f64);
705                            have_seed = true;
706                            signal_out[i] = se;
707                            hist_out[i] = m - se;
708                        }
709                    }
710                }
711            } else {
712                se = m.mul_add(asig, omsi * se);
713                if i >= signal_warmup {
714                    signal_out[i] = se;
715                    hist_out[i] = m - se;
716                }
717            }
718        }
719
720        i += 1;
721    }
722
723    Ok(())
724}
725
726fn macd_compute_into(
727    data: &[f64],
728    fast: usize,
729    slow: usize,
730    signal: usize,
731    ma_type: &str,
732    first: usize,
733    macd_out: &mut [f64],
734    signal_out: &mut [f64],
735    hist_out: &mut [f64],
736) -> Result<(), MacdError> {
737    if ma_type.eq_ignore_ascii_case("ema") {
738        return macd_compute_into_classic_ema(
739            data, fast, slow, signal, first, macd_out, signal_out, hist_out,
740        );
741    }
742
743    use crate::indicators::moving_averages::ma::{ma, MaData};
744
745    debug_assert_eq!(macd_out.len(), data.len());
746    debug_assert_eq!(signal_out.len(), data.len());
747    debug_assert_eq!(hist_out.len(), data.len());
748
749    let fast_ma = ma(&ma_type, MaData::Slice(data), fast).map_err(|e| {
750        if e.to_string().contains("Unknown moving average type")
751            || e.to_string().contains("Unsupported")
752        {
753            MacdError::UnknownMA(ma_type.to_string())
754        } else if e.to_string().contains("All values are NaN") {
755            MacdError::AllValuesNaN
756        } else {
757            MacdError::UnknownMA(format!("{}: {}", ma_type, e))
758        }
759    })?;
760    let slow_ma = ma(&ma_type, MaData::Slice(data), slow).map_err(|e| {
761        if e.to_string().contains("Unknown moving average type")
762            || e.to_string().contains("Unsupported")
763        {
764            MacdError::UnknownMA(ma_type.to_string())
765        } else if e.to_string().contains("All values are NaN") {
766            MacdError::AllValuesNaN
767        } else {
768            MacdError::UnknownMA(format!("{}: {}", ma_type, e))
769        }
770    })?;
771
772    let macd_warmup = first + slow - 1;
773    for i in macd_warmup..data.len() {
774        let f = fast_ma[i];
775        let s = slow_ma[i];
776        if f.is_nan() || s.is_nan() {
777            continue;
778        }
779        macd_out[i] = f - s;
780    }
781
782    let signal_warmup = first + slow + signal - 2;
783    if ma_type.eq_ignore_ascii_case("ema") {
784        let alpha = 2.0 / (signal as f64 + 1.0);
785
786        let signal_start = macd_warmup + signal - 1;
787        if signal_start < data.len() {
788            let mut seed_idx = signal_start;
789            while seed_idx < data.len() && macd_out[seed_idx].is_nan() {
790                seed_idx += 1;
791            }
792
793            if seed_idx < data.len() {
794                let mut prev = macd_out[seed_idx];
795                signal_out[seed_idx] = prev;
796
797                for i in (seed_idx + 1)..data.len() {
798                    let x = macd_out[i];
799                    if !x.is_nan() {
800                        prev = alpha * x + (1.0 - alpha) * prev;
801                        signal_out[i] = prev;
802                    }
803                }
804            }
805        }
806    } else {
807        let sig_tmp = ma(&ma_type, MaData::Slice(macd_out), signal).map_err(|e| {
808            if e.to_string().contains("Unknown moving average type")
809                || e.to_string().contains("Unsupported")
810            {
811                MacdError::UnknownMA(ma_type.to_string())
812            } else if e.to_string().contains("All values are NaN") {
813                MacdError::AllValuesNaN
814            } else {
815                MacdError::UnknownMA(format!("{}: {}", ma_type, e))
816            }
817        })?;
818
819        signal_out[signal_warmup..].copy_from_slice(&sig_tmp[signal_warmup..]);
820    }
821
822    for i in signal_warmup..data.len() {
823        let m = macd_out[i];
824        let s = signal_out[i];
825        if m.is_nan() || s.is_nan() {
826            continue;
827        }
828        hist_out[i] = m - s;
829    }
830    Ok(())
831}
832
833pub fn macd_with_kernel(input: &MacdInput, kernel: Kernel) -> Result<MacdOutput, MacdError> {
834    let (data, fast, slow, signal, ma_type, macd_warmup, signal_warmup, chosen) =
835        macd_prepare(input, kernel)?;
836    let len = data.len();
837
838    if ma_type.eq_ignore_ascii_case("ema") {
839        let first = macd_warmup + 1 - slow;
840
841        unsafe {
842            match chosen {
843                Kernel::Scalar | Kernel::ScalarBatch => {
844                    return macd_scalar(data, fast, slow, signal, &ma_type, first);
845                }
846                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
847                Kernel::Avx2 | Kernel::Avx2Batch => {
848                    return macd_avx2(data, fast, slow, signal, &ma_type, first);
849                }
850                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
851                Kernel::Avx512 | Kernel::Avx512Batch => {
852                    return macd_avx512(data, fast, slow, signal, &ma_type, first);
853                }
854                #[cfg(not(all(feature = "nightly-avx", target_arch = "x86_64")))]
855                Kernel::Avx2 | Kernel::Avx2Batch | Kernel::Avx512 | Kernel::Avx512Batch => {
856                    return macd_scalar(data, fast, slow, signal, &ma_type, first);
857                }
858                _ => unreachable!(),
859            }
860        }
861    }
862
863    let mut macd = alloc_with_nan_prefix(len, macd_warmup);
864    let mut signal_vec = alloc_with_nan_prefix(len, signal_warmup);
865    let mut hist = alloc_with_nan_prefix(len, signal_warmup);
866    let first = macd_warmup + 1 - slow;
867    macd_compute_into(
868        data,
869        fast,
870        slow,
871        signal,
872        &ma_type,
873        first,
874        &mut macd,
875        &mut signal_vec,
876        &mut hist,
877    )?;
878    Ok(MacdOutput {
879        macd,
880        signal: signal_vec,
881        hist,
882    })
883}
884
885#[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
886pub fn macd_into(
887    input: &MacdInput,
888    macd_out: &mut [f64],
889    signal_out: &mut [f64],
890    hist_out: &mut [f64],
891) -> Result<(), MacdError> {
892    let (data, fast, slow, signal, ma_type, macd_warmup, signal_warmup, chosen) =
893        macd_prepare(input, Kernel::Auto)?;
894
895    let expected = data.len();
896    if macd_out.len() != expected || signal_out.len() != expected || hist_out.len() != expected {
897        let got = macd_out.len().max(signal_out.len()).max(hist_out.len());
898        return Err(MacdError::OutputLengthMismatch { expected, got });
899    }
900
901    let qnan = f64::from_bits(0x7ff8_0000_0000_0000);
902    let mw = macd_warmup.min(macd_out.len());
903    for v in &mut macd_out[..mw] {
904        *v = qnan;
905    }
906    let sw = signal_warmup.min(signal_out.len());
907    for v in &mut signal_out[..sw] {
908        *v = qnan;
909    }
910    let hw = signal_warmup.min(hist_out.len());
911    for v in &mut hist_out[..hw] {
912        *v = qnan;
913    }
914
915    if ma_type.eq_ignore_ascii_case("ema") {
916        let first = macd_warmup + 1 - slow;
917
918        unsafe {
919            match chosen {
920                Kernel::Scalar | Kernel::ScalarBatch => {
921                    return macd_compute_into_classic_ema(
922                        data, fast, slow, signal, first, macd_out, signal_out, hist_out,
923                    );
924                }
925                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
926                Kernel::Avx2 | Kernel::Avx2Batch | Kernel::Avx512 | Kernel::Avx512Batch => {
927                    return macd_compute_into_classic_ema(
928                        data, fast, slow, signal, first, macd_out, signal_out, hist_out,
929                    );
930                }
931                _ => unreachable!(),
932            }
933        }
934    } else {
935        let first = macd_warmup + 1 - slow;
936        return macd_compute_into(
937            data, fast, slow, signal, &ma_type, first, macd_out, signal_out, hist_out,
938        );
939    }
940}
941
942#[inline(always)]
943
944pub unsafe fn macd_scalar_classic_ema(
945    data: &[f64],
946    fast: usize,
947    slow: usize,
948    signal: usize,
949    first: usize,
950) -> Result<MacdOutput, MacdError> {
951    let len = data.len();
952    let macd_warmup = first + slow - 1;
953    let signal_warmup = first + slow + signal - 2;
954
955    let mut macd = alloc_with_nan_prefix(len, macd_warmup);
956    let mut signal_vec = alloc_with_nan_prefix(len, signal_warmup);
957    let mut hist = alloc_with_nan_prefix(len, signal_warmup);
958
959    macd_compute_into_classic_ema(
960        data,
961        fast,
962        slow,
963        signal,
964        first,
965        &mut macd,
966        &mut signal_vec,
967        &mut hist,
968    )?;
969
970    Ok(MacdOutput {
971        macd,
972        signal: signal_vec,
973        hist,
974    })
975}
976
977pub unsafe fn macd_scalar(
978    data: &[f64],
979    fast: usize,
980    slow: usize,
981    signal: usize,
982    ma_type: &str,
983    first: usize,
984) -> Result<MacdOutput, MacdError> {
985    if ma_type.eq_ignore_ascii_case("ema") {
986        return macd_scalar_classic_ema(data, fast, slow, signal, first);
987    }
988
989    use crate::indicators::moving_averages::ma::{ma, MaData};
990    let len = data.len();
991    let fast_ma = ma(ma_type, MaData::Slice(data), fast).map_err(|_| MacdError::AllValuesNaN)?;
992    let slow_ma = ma(ma_type, MaData::Slice(data), slow).map_err(|_| MacdError::AllValuesNaN)?;
993
994    let warmup = first + slow - 1;
995    let mut macd = alloc_with_nan_prefix(len, warmup);
996    for i in warmup..len {
997        if fast_ma[i].is_nan() || slow_ma[i].is_nan() {
998            continue;
999        }
1000        macd[i] = fast_ma[i] - slow_ma[i];
1001    }
1002    let signal_ma =
1003        ma(ma_type, MaData::Slice(&macd), signal).map_err(|_| MacdError::AllValuesNaN)?;
1004
1005    let signal_warmup = warmup + signal - 1;
1006    let mut signal_vec = alloc_with_nan_prefix(len, signal_warmup);
1007    let mut hist = alloc_with_nan_prefix(len, signal_warmup);
1008    for i in first..len {
1009        if macd[i].is_nan() || signal_ma[i].is_nan() {
1010            continue;
1011        }
1012        signal_vec[i] = signal_ma[i];
1013        hist[i] = macd[i] - signal_ma[i];
1014    }
1015    Ok(MacdOutput {
1016        macd,
1017        signal: signal_vec,
1018        hist,
1019    })
1020}
1021
1022#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1023#[inline(always)]
1024pub unsafe fn macd_avx2(
1025    data: &[f64],
1026    fast: usize,
1027    slow: usize,
1028    signal: usize,
1029    ma_type: &str,
1030    first: usize,
1031) -> Result<MacdOutput, MacdError> {
1032    if !ma_type.eq_ignore_ascii_case("ema") {
1033        return macd_scalar(data, fast, slow, signal, ma_type, first);
1034    }
1035
1036    #[inline(always)]
1037    unsafe fn hsum256_pd(v: __m256d) -> f64 {
1038        let hi = _mm256_extractf128_pd(v, 1);
1039        let lo = _mm256_castpd256_pd128(v);
1040        let sum2 = _mm_add_pd(lo, hi);
1041        let shuf = _mm_unpackhi_pd(sum2, sum2);
1042        let sum = _mm_add_sd(sum2, shuf);
1043        _mm_cvtsd_f64(sum)
1044    }
1045    #[inline(always)]
1046    unsafe fn avx2_sum(ptr: *const f64, n: usize) -> f64 {
1047        let mut i = 0usize;
1048        let mut a0 = _mm256_setzero_pd();
1049        let mut a1 = _mm256_setzero_pd();
1050        while i + 8 <= n {
1051            let v0 = _mm256_loadu_pd(ptr.add(i));
1052            let v1 = _mm256_loadu_pd(ptr.add(i + 4));
1053            a0 = _mm256_add_pd(a0, v0);
1054            a1 = _mm256_add_pd(a1, v1);
1055            i += 8;
1056        }
1057        let mut acc = _mm256_add_pd(a0, a1);
1058        if i + 4 <= n {
1059            let v = _mm256_loadu_pd(ptr.add(i));
1060            acc = _mm256_add_pd(acc, v);
1061            i += 4;
1062        }
1063        let mut sum = hsum256_pd(acc);
1064        while i < n {
1065            sum += *ptr.add(i);
1066            i += 1;
1067        }
1068        sum
1069    }
1070
1071    let len = data.len();
1072    let macd_warmup = first + slow - 1;
1073    let signal_warmup = first + slow + signal - 2;
1074
1075    let mut macd = alloc_with_nan_prefix(len, macd_warmup);
1076    let mut signal_vec = alloc_with_nan_prefix(len, signal_warmup);
1077    let mut hist = alloc_with_nan_prefix(len, signal_warmup);
1078
1079    let af = 2.0 / (fast as f64 + 1.0);
1080    let aslow = 2.0 / (slow as f64 + 1.0);
1081    let asig = 2.0 / (signal as f64 + 1.0);
1082    let omf = 1.0 - af;
1083    let oms = 1.0 - aslow;
1084    let omsi = 1.0 - asig;
1085
1086    let base = data.as_ptr().add(first);
1087    let mut fast_ema = avx2_sum(base, fast) / fast as f64;
1088    let mut slow_ema = avx2_sum(base, slow) / slow as f64;
1089
1090    let mut t = first + fast;
1091    while t <= macd_warmup {
1092        let x = *data.get_unchecked(t);
1093        fast_ema = x.mul_add(af, omf * fast_ema);
1094        t += 1;
1095    }
1096
1097    let m0 = fast_ema - slow_ema;
1098    *macd.get_unchecked_mut(macd_warmup) = m0;
1099
1100    let mut se = 0.0f64;
1101    let mut have_seed = false;
1102    if signal == 1 {
1103        se = m0;
1104        have_seed = true;
1105        if signal_warmup < len {
1106            *signal_vec.get_unchecked_mut(signal_warmup) = se;
1107            *hist.get_unchecked_mut(signal_warmup) = m0 - se;
1108        }
1109    }
1110    let mut sig_accum = if signal > 1 { m0 } else { 0.0 };
1111
1112    let mut i = macd_warmup + 1;
1113    while i < len {
1114        let x = *data.get_unchecked(i);
1115        fast_ema = x.mul_add(af, omf * fast_ema);
1116        slow_ema = x.mul_add(aslow, oms * slow_ema);
1117        let m = fast_ema - slow_ema;
1118        *macd.get_unchecked_mut(i) = m;
1119
1120        if !have_seed {
1121            if signal > 1 && i <= signal_warmup {
1122                sig_accum += m;
1123                if i == signal_warmup {
1124                    se = sig_accum / (signal as f64);
1125                    have_seed = true;
1126                    *signal_vec.get_unchecked_mut(i) = se;
1127                    *hist.get_unchecked_mut(i) = m - se;
1128                }
1129            }
1130        } else {
1131            se = m.mul_add(asig, omsi * se);
1132            if i >= signal_warmup {
1133                *signal_vec.get_unchecked_mut(i) = se;
1134                *hist.get_unchecked_mut(i) = m - se;
1135            }
1136        }
1137        i += 1;
1138    }
1139
1140    Ok(MacdOutput {
1141        macd,
1142        signal: signal_vec,
1143        hist,
1144    })
1145}
1146
1147#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1148#[inline(always)]
1149pub unsafe fn macd_avx512(
1150    data: &[f64],
1151    fast: usize,
1152    slow: usize,
1153    signal: usize,
1154    ma_type: &str,
1155    first: usize,
1156) -> Result<MacdOutput, MacdError> {
1157    if !ma_type.eq_ignore_ascii_case("ema") {
1158        return macd_scalar(data, fast, slow, signal, ma_type, first);
1159    }
1160
1161    #[inline(always)]
1162    unsafe fn avx512_sum(ptr: *const f64, n: usize) -> f64 {
1163        let mut i = 0usize;
1164        let mut a0 = _mm512_setzero_pd();
1165        let mut a1 = _mm512_setzero_pd();
1166        while i + 16 <= n {
1167            let v0 = _mm512_loadu_pd(ptr.add(i));
1168            let v1 = _mm512_loadu_pd(ptr.add(i + 8));
1169            a0 = _mm512_add_pd(a0, v0);
1170            a1 = _mm512_add_pd(a1, v1);
1171            i += 16;
1172        }
1173        let mut acc = _mm512_add_pd(a0, a1);
1174        if i + 8 <= n {
1175            let v = _mm512_loadu_pd(ptr.add(i));
1176            acc = _mm512_add_pd(acc, v);
1177            i += 8;
1178        }
1179        let mut sum = _mm512_reduce_add_pd(acc);
1180        while i < n {
1181            sum += *ptr.add(i);
1182            i += 1;
1183        }
1184        sum
1185    }
1186
1187    let len = data.len();
1188    let macd_warmup = first + slow - 1;
1189    let signal_warmup = first + slow + signal - 2;
1190
1191    let mut macd = alloc_with_nan_prefix(len, macd_warmup);
1192    let mut signal_vec = alloc_with_nan_prefix(len, signal_warmup);
1193    let mut hist = alloc_with_nan_prefix(len, signal_warmup);
1194
1195    let af = 2.0 / (fast as f64 + 1.0);
1196    let aslow = 2.0 / (slow as f64 + 1.0);
1197    let asig = 2.0 / (signal as f64 + 1.0);
1198    let omf = 1.0 - af;
1199    let oms = 1.0 - aslow;
1200    let omsi = 1.0 - asig;
1201
1202    let base = data.as_ptr().add(first);
1203    let mut fast_ema = avx512_sum(base, fast) / fast as f64;
1204    let mut slow_ema = avx512_sum(base, slow) / slow as f64;
1205
1206    let mut t = first + fast;
1207    while t <= macd_warmup {
1208        let x = *data.get_unchecked(t);
1209        fast_ema = x.mul_add(af, omf * fast_ema);
1210        t += 1;
1211    }
1212
1213    let m0 = fast_ema - slow_ema;
1214    *macd.get_unchecked_mut(macd_warmup) = m0;
1215
1216    let mut se = 0.0f64;
1217    let mut have_seed = false;
1218    if signal == 1 {
1219        se = m0;
1220        have_seed = true;
1221        if signal_warmup < len {
1222            *signal_vec.get_unchecked_mut(signal_warmup) = se;
1223            *hist.get_unchecked_mut(signal_warmup) = m0 - se;
1224        }
1225    }
1226    let mut sig_accum = if signal > 1 { m0 } else { 0.0 };
1227
1228    let mut i = macd_warmup + 1;
1229    while i < len {
1230        let x = *data.get_unchecked(i);
1231        fast_ema = x.mul_add(af, omf * fast_ema);
1232        slow_ema = x.mul_add(aslow, oms * slow_ema);
1233        let m = fast_ema - slow_ema;
1234        *macd.get_unchecked_mut(i) = m;
1235
1236        if !have_seed {
1237            if signal > 1 && i <= signal_warmup {
1238                sig_accum += m;
1239                if i == signal_warmup {
1240                    se = sig_accum / (signal as f64);
1241                    have_seed = true;
1242                    *signal_vec.get_unchecked_mut(i) = se;
1243                    *hist.get_unchecked_mut(i) = m - se;
1244                }
1245            }
1246        } else {
1247            se = m.mul_add(asig, omsi * se);
1248            if i >= signal_warmup {
1249                *signal_vec.get_unchecked_mut(i) = se;
1250                *hist.get_unchecked_mut(i) = m - se;
1251            }
1252        }
1253        i += 1;
1254    }
1255
1256    Ok(MacdOutput {
1257        macd,
1258        signal: signal_vec,
1259        hist,
1260    })
1261}
1262
1263#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1264#[inline(always)]
1265pub unsafe fn macd_avx512_short(
1266    data: &[f64],
1267    fast: usize,
1268    slow: usize,
1269    signal: usize,
1270    ma_type: &str,
1271    first: usize,
1272) -> Result<MacdOutput, MacdError> {
1273    macd_avx512(data, fast, slow, signal, ma_type, first)
1274}
1275
1276#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1277#[inline(always)]
1278pub unsafe fn macd_avx512_long(
1279    data: &[f64],
1280    fast: usize,
1281    slow: usize,
1282    signal: usize,
1283    ma_type: &str,
1284    first: usize,
1285) -> Result<MacdOutput, MacdError> {
1286    macd_avx512(data, fast, slow, signal, ma_type, first)
1287}
1288
1289#[inline(always)]
1290pub fn macd_row_scalar(
1291    data: &[f64],
1292    fast: usize,
1293    slow: usize,
1294    signal: usize,
1295    ma_type: &str,
1296    first: usize,
1297) -> Result<MacdOutput, MacdError> {
1298    unsafe { macd_scalar(data, fast, slow, signal, ma_type, first) }
1299}
1300#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1301#[inline(always)]
1302pub fn macd_row_avx2(
1303    data: &[f64],
1304    fast: usize,
1305    slow: usize,
1306    signal: usize,
1307    ma_type: &str,
1308    first: usize,
1309) -> Result<MacdOutput, MacdError> {
1310    unsafe { macd_avx2(data, fast, slow, signal, ma_type, first) }
1311}
1312#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1313#[inline(always)]
1314pub fn macd_row_avx512(
1315    data: &[f64],
1316    fast: usize,
1317    slow: usize,
1318    signal: usize,
1319    ma_type: &str,
1320    first: usize,
1321) -> Result<MacdOutput, MacdError> {
1322    unsafe { macd_avx512(data, fast, slow, signal, ma_type, first) }
1323}
1324#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1325#[inline(always)]
1326pub fn macd_row_avx512_short(
1327    data: &[f64],
1328    fast: usize,
1329    slow: usize,
1330    signal: usize,
1331    ma_type: &str,
1332    first: usize,
1333) -> Result<MacdOutput, MacdError> {
1334    unsafe { macd_avx512_short(data, fast, slow, signal, ma_type, first) }
1335}
1336#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1337#[inline(always)]
1338pub fn macd_row_avx512_long(
1339    data: &[f64],
1340    fast: usize,
1341    slow: usize,
1342    signal: usize,
1343    ma_type: &str,
1344    first: usize,
1345) -> Result<MacdOutput, MacdError> {
1346    unsafe { macd_avx512_long(data, fast, slow, signal, ma_type, first) }
1347}
1348
1349#[derive(Clone, Debug)]
1350pub struct MacdBatchRange {
1351    pub fast_period: (usize, usize, usize),
1352    pub slow_period: (usize, usize, usize),
1353    pub signal_period: (usize, usize, usize),
1354    pub ma_type: (String, String, String),
1355}
1356
1357impl Default for MacdBatchRange {
1358    fn default() -> Self {
1359        Self {
1360            fast_period: (12, 12, 0),
1361            slow_period: (26, 275, 1),
1362            signal_period: (9, 9, 0),
1363            ma_type: ("ema".to_string(), "ema".to_string(), "".to_string()),
1364        }
1365    }
1366}
1367
1368#[derive(Clone, Debug, Default)]
1369pub struct MacdBatchBuilder {
1370    range: MacdBatchRange,
1371    kernel: Kernel,
1372}
1373
1374impl MacdBatchBuilder {
1375    pub fn new() -> Self {
1376        Self::default()
1377    }
1378    pub fn kernel(mut self, k: Kernel) -> Self {
1379        self.kernel = k;
1380        self
1381    }
1382    #[inline]
1383    pub fn fast_period_range(mut self, start: usize, end: usize, step: usize) -> Self {
1384        self.range.fast_period = (start, end, step);
1385        self
1386    }
1387    #[inline]
1388    pub fn slow_period_range(mut self, start: usize, end: usize, step: usize) -> Self {
1389        self.range.slow_period = (start, end, step);
1390        self
1391    }
1392    #[inline]
1393    pub fn signal_period_range(mut self, start: usize, end: usize, step: usize) -> Self {
1394        self.range.signal_period = (start, end, step);
1395        self
1396    }
1397    #[inline]
1398    pub fn ma_type_static(mut self, s: &str) -> Self {
1399        self.range.ma_type = (s.to_string(), s.to_string(), "".to_string());
1400        self
1401    }
1402
1403    pub fn apply_slice(self, data: &[f64]) -> Result<MacdBatchOutput, MacdError> {
1404        macd_batch_with_kernel(data, &self.range, self.kernel)
1405    }
1406    pub fn with_default_slice(data: &[f64], k: Kernel) -> Result<MacdBatchOutput, MacdError> {
1407        MacdBatchBuilder::new().kernel(k).apply_slice(data)
1408    }
1409    pub fn apply_candles(self, c: &Candles, src: &str) -> Result<MacdBatchOutput, MacdError> {
1410        let slice = source_type(c, src);
1411        self.apply_slice(slice)
1412    }
1413    pub fn with_default_candles(c: &Candles) -> Result<MacdBatchOutput, MacdError> {
1414        MacdBatchBuilder::new()
1415            .kernel(Kernel::Auto)
1416            .apply_candles(c, "close")
1417    }
1418}
1419
1420pub fn macd_batch_with_kernel(
1421    data: &[f64],
1422    sweep: &MacdBatchRange,
1423    k: Kernel,
1424) -> Result<MacdBatchOutput, MacdError> {
1425    let kernel = match k {
1426        Kernel::Auto => detect_best_batch_kernel(),
1427        other if other.is_batch() => other,
1428        _ => {
1429            return Err(MacdError::InvalidKernelForBatch(k));
1430        }
1431    };
1432    let simd = match kernel {
1433        Kernel::Avx512Batch => Kernel::Avx512,
1434        Kernel::Avx2Batch => Kernel::Avx2,
1435        Kernel::ScalarBatch => Kernel::Scalar,
1436        _ => unreachable!(),
1437    };
1438    macd_batch_par_slice(data, sweep, simd)
1439}
1440
1441#[derive(Clone, Debug)]
1442pub struct MacdBatchOutput {
1443    pub macd: Vec<f64>,
1444    pub signal: Vec<f64>,
1445    pub hist: Vec<f64>,
1446    pub combos: Vec<MacdParams>,
1447    pub rows: usize,
1448    pub cols: usize,
1449}
1450
1451#[inline(always)]
1452pub fn expand_grid(r: &MacdBatchRange) -> Result<Vec<MacdParams>, MacdError> {
1453    fn axis_usize((start, end, step): (usize, usize, usize)) -> Result<Vec<usize>, MacdError> {
1454        let (start, end, step) = (start, end, step);
1455        if step == 0 || start == end {
1456            return Ok(vec![start]);
1457        }
1458        if start < end {
1459            let mut out = Vec::new();
1460            let mut v = start;
1461            loop {
1462                out.push(v);
1463                match v.checked_add(step) {
1464                    Some(next) if next <= end => {
1465                        v = next;
1466                    }
1467                    Some(_) | None => break,
1468                }
1469            }
1470            if out.is_empty() {
1471                return Err(MacdError::InvalidRange { start, end, step });
1472            }
1473            Ok(out)
1474        } else {
1475            let mut out = Vec::new();
1476            let mut v = start;
1477            loop {
1478                out.push(v);
1479                if v <= end {
1480                    break;
1481                }
1482                if v < step {
1483                    break;
1484                }
1485                v -= step;
1486                if v < end {
1487                    break;
1488                }
1489            }
1490            if out.is_empty() {
1491                return Err(MacdError::InvalidRange { start, end, step });
1492            }
1493            Ok(out)
1494        }
1495    }
1496    let fasts = axis_usize(r.fast_period)?;
1497    let slows = axis_usize(r.slow_period)?;
1498    let signals = axis_usize(r.signal_period)?;
1499    let ma_types = vec![r.ma_type.0.clone()];
1500
1501    let mut combos = vec![];
1502    for &f in &fasts {
1503        for &s in &slows {
1504            for &g in &signals {
1505                for t in &ma_types {
1506                    combos.push(MacdParams {
1507                        fast_period: Some(f),
1508                        slow_period: Some(s),
1509                        signal_period: Some(g),
1510                        ma_type: Some(t.clone()),
1511                    });
1512                }
1513            }
1514        }
1515    }
1516    if combos.is_empty() {
1517        return Err(MacdError::InvalidRange {
1518            start: r.fast_period.0,
1519            end: r.fast_period.1,
1520            step: r.fast_period.2,
1521        });
1522    }
1523    Ok(combos)
1524}
1525
1526pub fn macd_batch_par_slice(
1527    data: &[f64],
1528    sweep: &MacdBatchRange,
1529    _simd: Kernel,
1530) -> Result<MacdBatchOutput, MacdError> {
1531    let combos = expand_grid(sweep)?;
1532    let rows = combos.len();
1533    let cols = data.len();
1534    if cols == 0 {
1535        return Err(MacdError::EmptyInputData);
1536    }
1537
1538    let mut macd_mu = make_uninit_matrix(rows, cols);
1539    let mut sig_mu = make_uninit_matrix(rows, cols);
1540    let mut hist_mu = make_uninit_matrix(rows, cols);
1541
1542    let first = data.iter().position(|x| !x.is_nan()).unwrap_or(0);
1543    let macd_warm: Vec<usize> = combos
1544        .iter()
1545        .map(|p| {
1546            let slow = p.slow_period.unwrap_or(26);
1547            first + slow - 1
1548        })
1549        .collect();
1550    let warm: Vec<usize> = combos
1551        .iter()
1552        .map(|p| {
1553            let slow = p.slow_period.unwrap_or(26);
1554            let signal = p.signal_period.unwrap_or(9);
1555            first + slow + signal - 2
1556        })
1557        .collect();
1558
1559    init_matrix_prefixes(&mut macd_mu, cols, &macd_warm);
1560    init_matrix_prefixes(&mut sig_mu, cols, &warm);
1561    init_matrix_prefixes(&mut hist_mu, cols, &warm);
1562
1563    let mut macd_guard = core::mem::ManuallyDrop::new(macd_mu);
1564    let mut sig_guard = core::mem::ManuallyDrop::new(sig_mu);
1565    let mut hist_guard = core::mem::ManuallyDrop::new(hist_mu);
1566
1567    let macd_out: &mut [f64] = unsafe {
1568        core::slice::from_raw_parts_mut(macd_guard.as_mut_ptr() as *mut f64, macd_guard.len())
1569    };
1570    let sig_out: &mut [f64] = unsafe {
1571        core::slice::from_raw_parts_mut(sig_guard.as_mut_ptr() as *mut f64, sig_guard.len())
1572    };
1573    let hist_out: &mut [f64] = unsafe {
1574        core::slice::from_raw_parts_mut(hist_guard.as_mut_ptr() as *mut f64, hist_guard.len())
1575    };
1576
1577    for (row, prm) in combos.iter().enumerate() {
1578        let fast = prm.fast_period.unwrap_or(12);
1579        let slow = prm.slow_period.unwrap_or(26);
1580        let signal = prm.signal_period.unwrap_or(9);
1581        let ma_t = prm.ma_type.as_deref().unwrap_or("ema");
1582
1583        let r0 = row * cols;
1584        let r1 = r0 + cols;
1585
1586        let _ = macd_compute_into(
1587            data,
1588            fast,
1589            slow,
1590            signal,
1591            ma_t,
1592            first,
1593            &mut macd_out[r0..r1],
1594            &mut sig_out[r0..r1],
1595            &mut hist_out[r0..r1],
1596        );
1597    }
1598
1599    let macd = unsafe {
1600        Vec::from_raw_parts(
1601            macd_guard.as_mut_ptr() as *mut f64,
1602            macd_guard.len(),
1603            macd_guard.capacity(),
1604        )
1605    };
1606    let signal = unsafe {
1607        Vec::from_raw_parts(
1608            sig_guard.as_mut_ptr() as *mut f64,
1609            sig_guard.len(),
1610            sig_guard.capacity(),
1611        )
1612    };
1613    let hist = unsafe {
1614        Vec::from_raw_parts(
1615            hist_guard.as_mut_ptr() as *mut f64,
1616            hist_guard.len(),
1617            hist_guard.capacity(),
1618        )
1619    };
1620
1621    Ok(MacdBatchOutput {
1622        macd,
1623        signal,
1624        hist,
1625        combos,
1626        rows,
1627        cols,
1628    })
1629}
1630
1631#[cfg(any(feature = "python", feature = "wasm"))]
1632pub fn macd_batch_inner_into(
1633    data: &[f64],
1634    sweep: &MacdBatchRange,
1635    _simd: Kernel,
1636    _fill_invalid: bool,
1637    macd_out: &mut [f64],
1638    signal_out: &mut [f64],
1639    hist_out: &mut [f64],
1640) -> Result<Vec<MacdParams>, MacdError> {
1641    let combos = expand_grid(sweep)?;
1642    let rows = combos.len();
1643    let cols = data.len();
1644    if let Some(expected) = rows.checked_mul(cols) {
1645        if macd_out.len() != expected || signal_out.len() != expected || hist_out.len() != expected
1646        {
1647            let got = macd_out.len().max(signal_out.len()).max(hist_out.len());
1648            return Err(MacdError::OutputLengthMismatch { expected, got });
1649        }
1650    } else {
1651        return Err(MacdError::InvalidRange {
1652            start: sweep.fast_period.0,
1653            end: sweep.fast_period.1,
1654            step: sweep.fast_period.2,
1655        });
1656    }
1657    let first = data.iter().position(|x| !x.is_nan()).unwrap_or(0);
1658
1659    for (row, prm) in combos.iter().enumerate() {
1660        let r0 = row * cols;
1661        let r1 = r0 + cols;
1662
1663        let fast_period = prm.fast_period.unwrap_or(12);
1664        let slow_period = prm.slow_period.unwrap_or(26);
1665        let signal_period = prm.signal_period.unwrap_or(9);
1666        let macd_warmup = first + slow_period - 1;
1667        let signal_warmup = first + slow_period + signal_period - 2;
1668
1669        for i in 0..macd_warmup.min(cols) {
1670            macd_out[r0 + i] = f64::NAN;
1671        }
1672        for i in 0..signal_warmup.min(cols) {
1673            signal_out[r0 + i] = f64::NAN;
1674            hist_out[r0 + i] = f64::NAN;
1675        }
1676
1677        let _ = macd_compute_into(
1678            data,
1679            fast_period,
1680            slow_period,
1681            signal_period,
1682            prm.ma_type.as_deref().unwrap_or("ema"),
1683            first,
1684            &mut macd_out[r0..r1],
1685            &mut signal_out[r0..r1],
1686            &mut hist_out[r0..r1],
1687        );
1688    }
1689    Ok(combos)
1690}
1691
1692#[cfg(feature = "python")]
1693#[pyfunction(name = "macd")]
1694#[pyo3(signature = (data, fast_period, slow_period, signal_period, ma_type, kernel=None))]
1695pub fn macd_py<'py>(
1696    py: Python<'py>,
1697    data: PyReadonlyArray1<'py, f64>,
1698    fast_period: usize,
1699    slow_period: usize,
1700    signal_period: usize,
1701    ma_type: &str,
1702    kernel: Option<&str>,
1703) -> PyResult<(
1704    Bound<'py, PyArray1<f64>>,
1705    Bound<'py, PyArray1<f64>>,
1706    Bound<'py, PyArray1<f64>>,
1707)> {
1708    use numpy::PyArray1;
1709
1710    let slice_in = data.as_slice()?;
1711    let len = slice_in.len();
1712
1713    let first = slice_in.iter().position(|x| !x.is_nan()).unwrap_or(0);
1714    let macd_warmup = first + slow_period - 1;
1715    let signal_warmup = first + slow_period + signal_period - 2;
1716
1717    let macd_arr = unsafe { PyArray1::<f64>::new(py, [len], false) };
1718    let signal_arr = unsafe { PyArray1::<f64>::new(py, [len], false) };
1719    let hist_arr = unsafe { PyArray1::<f64>::new(py, [len], false) };
1720
1721    let macd_slice = unsafe { macd_arr.as_slice_mut()? };
1722    let signal_slice = unsafe { signal_arr.as_slice_mut()? };
1723    let hist_slice = unsafe { hist_arr.as_slice_mut()? };
1724
1725    if macd_warmup <= len {
1726        macd_slice[..macd_warmup].fill(f64::from_bits(0x7ff8_0000_0000_0000));
1727    } else {
1728        macd_slice.fill(f64::from_bits(0x7ff8_0000_0000_0000));
1729    }
1730    if signal_warmup <= len {
1731        signal_slice[..signal_warmup].fill(f64::from_bits(0x7ff8_0000_0000_0000));
1732        hist_slice[..signal_warmup].fill(f64::from_bits(0x7ff8_0000_0000_0000));
1733    } else {
1734        signal_slice.fill(f64::from_bits(0x7ff8_0000_0000_0000));
1735        hist_slice.fill(f64::from_bits(0x7ff8_0000_0000_0000));
1736    }
1737
1738    let kern = validate_kernel(kernel, false)?;
1739
1740    let params = MacdParams {
1741        fast_period: Some(fast_period),
1742        slow_period: Some(slow_period),
1743        signal_period: Some(signal_period),
1744        ma_type: Some(ma_type.to_string()),
1745    };
1746    let input = MacdInput::from_slice(slice_in, params);
1747
1748    let result = py
1749        .allow_threads(|| macd_with_kernel(&input, kern))
1750        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1751
1752    macd_slice.copy_from_slice(&result.macd);
1753    signal_slice.copy_from_slice(&result.signal);
1754    hist_slice.copy_from_slice(&result.hist);
1755
1756    Ok((macd_arr, signal_arr, hist_arr))
1757}
1758
1759#[cfg(feature = "python")]
1760#[pyclass(name = "MacdStream")]
1761pub struct MacdStreamPy {
1762    stream: MacdStream,
1763    data_buffer: Vec<f64>,
1764    fast_period: usize,
1765    slow_period: usize,
1766    signal_period: usize,
1767    ma_type: String,
1768}
1769
1770#[cfg(feature = "python")]
1771#[pymethods]
1772impl MacdStreamPy {
1773    #[new]
1774    fn new(
1775        fast_period: usize,
1776        slow_period: usize,
1777        signal_period: usize,
1778        ma_type: &str,
1779    ) -> PyResult<Self> {
1780        Ok(MacdStreamPy {
1781            stream: MacdStream::new(fast_period, slow_period, signal_period, ma_type),
1782            data_buffer: Vec::new(),
1783            fast_period,
1784            slow_period,
1785            signal_period,
1786            ma_type: ma_type.to_string(),
1787        })
1788    }
1789
1790    fn update(&mut self, value: f64) -> Option<(f64, f64, f64)> {
1791        if let Some(result) = self.stream.update(value) {
1792            return Some(result);
1793        }
1794
1795        if !self.ma_type.eq_ignore_ascii_case("ema") {
1796            self.data_buffer.push(value);
1797
1798            let min_needed = self.slow_period + self.signal_period - 1;
1799            if self.data_buffer.len() < min_needed {
1800                return None;
1801            }
1802
1803            let params = MacdParams {
1804                fast_period: Some(self.fast_period),
1805                slow_period: Some(self.slow_period),
1806                signal_period: Some(self.signal_period),
1807                ma_type: Some(self.ma_type.clone()),
1808            };
1809            let input = MacdInput::from_slice(&self.data_buffer, params);
1810
1811            match macd(&input) {
1812                Ok(output) => {
1813                    let last_idx = output.macd.len() - 1;
1814                    Some((
1815                        output.macd[last_idx],
1816                        output.signal[last_idx],
1817                        output.hist[last_idx],
1818                    ))
1819                }
1820                Err(_) => None,
1821            }
1822        } else {
1823            None
1824        }
1825    }
1826}
1827
1828#[cfg(feature = "python")]
1829#[pyfunction(name = "macd_batch")]
1830#[pyo3(signature = (data, fast_period_range, slow_period_range, signal_period_range, ma_type, kernel=None))]
1831pub fn macd_batch_py<'py>(
1832    py: Python<'py>,
1833    data: PyReadonlyArray1<'py, f64>,
1834    fast_period_range: (usize, usize, usize),
1835    slow_period_range: (usize, usize, usize),
1836    signal_period_range: (usize, usize, usize),
1837    ma_type: &str,
1838    kernel: Option<&str>,
1839) -> PyResult<Bound<'py, PyDict>> {
1840    use numpy::{IntoPyArray, PyArray1, PyArrayMethods};
1841    use pyo3::types::PyDict;
1842
1843    let slice_in = data.as_slice()?;
1844
1845    if slice_in.is_empty() {
1846        return Err(PyValueError::new_err("macd: Input data slice is empty"));
1847    }
1848
1849    if slice_in.iter().all(|x| x.is_nan()) {
1850        return Err(PyValueError::new_err("macd: All values are NaN"));
1851    }
1852
1853    let kern = validate_kernel(kernel, true)?;
1854
1855    let sweep = MacdBatchRange {
1856        fast_period: fast_period_range,
1857        slow_period: slow_period_range,
1858        signal_period: signal_period_range,
1859        ma_type: (ma_type.to_string(), ma_type.to_string(), String::new()),
1860    };
1861
1862    let combos = expand_grid(&sweep).map_err(|e| PyValueError::new_err(e.to_string()))?;
1863    let rows = combos.len();
1864    let cols = slice_in.len();
1865
1866    let macd_arr = unsafe { PyArray1::<f64>::new(py, [rows * cols], false) };
1867    let signal_arr = unsafe { PyArray1::<f64>::new(py, [rows * cols], false) };
1868    let hist_arr = unsafe { PyArray1::<f64>::new(py, [rows * cols], false) };
1869
1870    let macd_slice = unsafe { macd_arr.as_slice_mut()? };
1871    let signal_slice = unsafe { signal_arr.as_slice_mut()? };
1872    let hist_slice = unsafe { hist_arr.as_slice_mut()? };
1873
1874    let combos = py
1875        .allow_threads(|| {
1876            let kernel = match kern {
1877                Kernel::Auto => detect_best_batch_kernel(),
1878                k => k,
1879            };
1880            let simd = match kernel {
1881                Kernel::Avx512Batch => Kernel::Avx512,
1882                Kernel::Avx2Batch => Kernel::Avx2,
1883                Kernel::ScalarBatch => Kernel::Scalar,
1884                _ => unreachable!(),
1885            };
1886            macd_batch_inner_into(
1887                slice_in,
1888                &sweep,
1889                simd,
1890                true,
1891                macd_slice,
1892                signal_slice,
1893                hist_slice,
1894            )
1895        })
1896        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1897
1898    let dict = PyDict::new(py);
1899    dict.set_item("macd", macd_arr.reshape((rows, cols))?)?;
1900    dict.set_item("signal", signal_arr.reshape((rows, cols))?)?;
1901    dict.set_item("hist", hist_arr.reshape((rows, cols))?)?;
1902    dict.set_item(
1903        "fast_periods",
1904        combos
1905            .iter()
1906            .map(|p| p.fast_period.unwrap() as u64)
1907            .collect::<Vec<_>>()
1908            .into_pyarray(py),
1909    )?;
1910    dict.set_item(
1911        "slow_periods",
1912        combos
1913            .iter()
1914            .map(|p| p.slow_period.unwrap() as u64)
1915            .collect::<Vec<_>>()
1916            .into_pyarray(py),
1917    )?;
1918    dict.set_item(
1919        "signal_periods",
1920        combos
1921            .iter()
1922            .map(|p| p.signal_period.unwrap() as u64)
1923            .collect::<Vec<_>>()
1924            .into_pyarray(py),
1925    )?;
1926
1927    Ok(dict)
1928}
1929
1930#[cfg(feature = "python")]
1931pub fn register_macd_module(m: &Bound<'_, pyo3::types::PyModule>) -> PyResult<()> {
1932    m.add_function(wrap_pyfunction!(macd_py, m)?)?;
1933    m.add_function(wrap_pyfunction!(macd_batch_py, m)?)?;
1934    Ok(())
1935}
1936
1937#[cfg(all(feature = "python", feature = "cuda"))]
1938#[pyclass(module = "ta_indicators.cuda", unsendable)]
1939pub struct DeviceArrayF32MacdPy {
1940    pub(crate) inner: crate::cuda::oscillators::macd_wrapper::DeviceArrayF32Macd,
1941}
1942
1943#[cfg(all(feature = "python", feature = "cuda"))]
1944#[pymethods]
1945impl DeviceArrayF32MacdPy {
1946    #[getter]
1947    fn __cuda_array_interface__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
1948        let d = PyDict::new(py);
1949        d.set_item("shape", (self.inner.rows, self.inner.cols))?;
1950        d.set_item("typestr", "<f4")?;
1951        d.set_item(
1952            "strides",
1953            (
1954                self.inner.cols * std::mem::size_of::<f32>(),
1955                std::mem::size_of::<f32>(),
1956            ),
1957        )?;
1958        let ptr = if self.inner.rows == 0 || self.inner.cols == 0 {
1959            0usize
1960        } else {
1961            self.inner.device_ptr() as usize
1962        };
1963        d.set_item("data", (ptr, false))?;
1964        d.set_item("version", 3)?;
1965        Ok(d)
1966    }
1967
1968    fn __dlpack_device__(&self) -> (i32, i32) {
1969        (2, self.inner.device_id as i32)
1970    }
1971
1972    #[pyo3(signature = (stream=None, max_version=None, dl_device=None, copy=None))]
1973    fn __dlpack__<'py>(
1974        &mut self,
1975        py: Python<'py>,
1976        stream: Option<pyo3::PyObject>,
1977        max_version: Option<pyo3::PyObject>,
1978        dl_device: Option<pyo3::PyObject>,
1979        copy: Option<pyo3::PyObject>,
1980    ) -> PyResult<PyObject> {
1981        let (kdl, alloc_dev) = self.__dlpack_device__();
1982        if let Some(dev_obj) = dl_device.as_ref() {
1983            if let Ok((dev_ty, dev_id)) = dev_obj.extract::<(i32, i32)>(py) {
1984                if dev_ty != kdl || dev_id != alloc_dev {
1985                    let wants_copy = copy
1986                        .as_ref()
1987                        .and_then(|c| c.extract::<bool>(py).ok())
1988                        .unwrap_or(false);
1989                    if wants_copy {
1990                        return Err(PyValueError::new_err(
1991                            "device copy not implemented for __dlpack__",
1992                        ));
1993                    } else {
1994                        return Err(PyValueError::new_err("dl_device mismatch for __dlpack__"));
1995                    }
1996                }
1997            }
1998        }
1999
2000        if let Some(obj) = &stream {
2001            if let Ok(i) = obj.extract::<i64>(py) {
2002                if i == 0 {
2003                    return Err(PyValueError::new_err(
2004                        "__dlpack__: stream 0 is disallowed for CUDA",
2005                    ));
2006                }
2007            }
2008        }
2009
2010        let dummy = cust::memory::DeviceBuffer::from_slice(&[])
2011            .map_err(|e| PyValueError::new_err(e.to_string()))?;
2012        let ctx_clone = self.inner.ctx.clone();
2013        let dev_id = self.inner.device_id;
2014        let inner = std::mem::replace(
2015            &mut self.inner,
2016            crate::cuda::oscillators::macd_wrapper::DeviceArrayF32Macd {
2017                buf: dummy,
2018                rows: 0,
2019                cols: 0,
2020                ctx: ctx_clone,
2021                device_id: dev_id,
2022            },
2023        );
2024
2025        let rows = inner.rows;
2026        let cols = inner.cols;
2027        let buf = inner.buf;
2028
2029        let max_version_bound = max_version.map(|obj| obj.into_bound(py));
2030
2031        export_f32_cuda_dlpack_2d(py, buf, rows, cols, alloc_dev, max_version_bound)
2032    }
2033}
2034
2035#[cfg(all(feature = "python", feature = "cuda"))]
2036#[pyfunction(name = "macd_cuda_batch_dev")]
2037#[pyo3(signature = (data_f32, fast_range, slow_range, signal_range, ma_type="ema", device_id=0))]
2038pub fn macd_cuda_batch_dev_py<'py>(
2039    py: Python<'py>,
2040    data_f32: numpy::PyReadonlyArray1<'py, f32>,
2041    fast_range: (usize, usize, usize),
2042    slow_range: (usize, usize, usize),
2043    signal_range: (usize, usize, usize),
2044    ma_type: &str,
2045    device_id: usize,
2046) -> PyResult<Bound<'py, pyo3::types::PyDict>> {
2047    use crate::cuda::cuda_available;
2048    use crate::cuda::oscillators::macd_wrapper::DeviceMacdTriplet;
2049    use crate::cuda::oscillators::CudaMacd;
2050    use numpy::IntoPyArray;
2051    use pyo3::types::PyList;
2052
2053    if !cuda_available() {
2054        return Err(PyValueError::new_err("CUDA not available"));
2055    }
2056    if !ma_type.eq_ignore_ascii_case("ema") {
2057        return Err(PyValueError::new_err(
2058            "macd_cuda: only ma_type=\"ema\" is supported on CUDA",
2059        ));
2060    }
2061    let slice = data_f32.as_slice()?;
2062    let sweep = MacdBatchRange {
2063        fast_period: fast_range,
2064        slow_period: slow_range,
2065        signal_period: signal_range,
2066        ma_type: (ma_type.to_string(), ma_type.to_string(), String::new()),
2067    };
2068
2069    let (outputs, combos) = py.allow_threads(|| {
2070        let cuda = CudaMacd::new(device_id).map_err(|e| PyValueError::new_err(e.to_string()))?;
2071        cuda.macd_batch_dev(slice, &sweep)
2072            .map_err(|e| PyValueError::new_err(e.to_string()))
2073    })?;
2074
2075    let DeviceMacdTriplet { macd, signal, hist } = outputs;
2076    let dict = pyo3::types::PyDict::new(py);
2077    dict.set_item("macd", Py::new(py, DeviceArrayF32MacdPy { inner: macd })?)?;
2078    dict.set_item(
2079        "signal",
2080        Py::new(py, DeviceArrayF32MacdPy { inner: signal })?,
2081    )?;
2082    dict.set_item("hist", Py::new(py, DeviceArrayF32MacdPy { inner: hist })?)?;
2083
2084    let fasts: Vec<u64> = combos
2085        .iter()
2086        .map(|p| p.fast_period.unwrap() as u64)
2087        .collect();
2088    let slows: Vec<u64> = combos
2089        .iter()
2090        .map(|p| p.slow_period.unwrap() as u64)
2091        .collect();
2092    let signals: Vec<u64> = combos
2093        .iter()
2094        .map(|p| p.signal_period.unwrap() as u64)
2095        .collect();
2096    let ma_types = PyList::new(py, vec![ma_type; combos.len()])?;
2097    dict.set_item("fast_periods", fasts.into_pyarray(py))?;
2098    dict.set_item("slow_periods", slows.into_pyarray(py))?;
2099    dict.set_item("signal_periods", signals.into_pyarray(py))?;
2100    dict.set_item("ma_types", ma_types)?;
2101    dict.set_item("rows", combos.len())?;
2102    dict.set_item("cols", slice.len())?;
2103    Ok(dict)
2104}
2105
2106#[cfg(all(feature = "python", feature = "cuda"))]
2107#[pyfunction(name = "macd_cuda_many_series_one_param_dev")]
2108#[pyo3(signature = (data_tm_f32, fast_period, slow_period, signal_period, ma_type="ema", device_id=0))]
2109pub fn macd_cuda_many_series_one_param_dev_py<'py>(
2110    py: Python<'py>,
2111    data_tm_f32: numpy::PyReadonlyArray2<'py, f32>,
2112    fast_period: usize,
2113    slow_period: usize,
2114    signal_period: usize,
2115    ma_type: &str,
2116    device_id: usize,
2117) -> PyResult<Bound<'py, pyo3::types::PyDict>> {
2118    use crate::cuda::cuda_available;
2119    use crate::cuda::oscillators::macd_wrapper::DeviceMacdTriplet;
2120    use crate::cuda::oscillators::CudaMacd;
2121    use numpy::PyUntypedArrayMethods;
2122
2123    if !cuda_available() {
2124        return Err(PyValueError::new_err("CUDA not available"));
2125    }
2126    if !ma_type.eq_ignore_ascii_case("ema") {
2127        return Err(PyValueError::new_err(
2128            "macd_cuda: only ma_type=\"ema\" is supported on CUDA",
2129        ));
2130    }
2131    let shape = data_tm_f32.shape();
2132    if shape.len() != 2 {
2133        return Err(PyValueError::new_err("expected 2D array"));
2134    }
2135    let rows = shape[0];
2136    let cols = shape[1];
2137    let flat = data_tm_f32.as_slice()?;
2138    let params = MacdParams {
2139        fast_period: Some(fast_period),
2140        slow_period: Some(slow_period),
2141        signal_period: Some(signal_period),
2142        ma_type: Some(ma_type.to_string()),
2143    };
2144    let DeviceMacdTriplet { macd, signal, hist } = py.allow_threads(|| {
2145        let cuda = CudaMacd::new(device_id).map_err(|e| PyValueError::new_err(e.to_string()))?;
2146        cuda.macd_many_series_one_param_time_major_dev(flat, cols, rows, &params)
2147            .map_err(|e| PyValueError::new_err(e.to_string()))
2148    })?;
2149    let dict = pyo3::types::PyDict::new(py);
2150    dict.set_item("macd", Py::new(py, DeviceArrayF32MacdPy { inner: macd })?)?;
2151    dict.set_item(
2152        "signal",
2153        Py::new(py, DeviceArrayF32MacdPy { inner: signal })?,
2154    )?;
2155    dict.set_item("hist", Py::new(py, DeviceArrayF32MacdPy { inner: hist })?)?;
2156    dict.set_item("rows", rows)?;
2157    dict.set_item("cols", cols)?;
2158    dict.set_item("fast_period", fast_period)?;
2159    dict.set_item("slow_period", slow_period)?;
2160    dict.set_item("signal_period", signal_period)?;
2161    dict.set_item("ma_type", ma_type)?;
2162    Ok(dict)
2163}
2164
2165#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2166#[wasm_bindgen]
2167#[derive(Serialize, Deserialize)]
2168pub struct MacdResult {
2169    values: Vec<f64>,
2170    rows: usize,
2171    cols: usize,
2172}
2173
2174#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2175#[wasm_bindgen]
2176impl MacdResult {
2177    #[wasm_bindgen(getter)]
2178    pub fn values(&self) -> Vec<f64> {
2179        self.values.clone()
2180    }
2181
2182    #[wasm_bindgen(getter)]
2183    pub fn rows(&self) -> usize {
2184        self.rows
2185    }
2186
2187    #[wasm_bindgen(getter)]
2188    pub fn cols(&self) -> usize {
2189        self.cols
2190    }
2191}
2192
2193#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2194#[wasm_bindgen]
2195pub fn macd_js(
2196    data: &[f64],
2197    fast_period: usize,
2198    slow_period: usize,
2199    signal_period: usize,
2200    ma_type: &str,
2201) -> Result<MacdResult, JsValue> {
2202    let len = data.len();
2203    if len == 0 {
2204        return Err(JsValue::from_str(&MacdError::EmptyInputData.to_string()));
2205    }
2206    let first = data
2207        .iter()
2208        .position(|x| !x.is_nan())
2209        .ok_or(MacdError::AllValuesNaN)
2210        .map_err(|e| JsValue::from_str(&e.to_string()))?;
2211    if fast_period == 0
2212        || slow_period == 0
2213        || signal_period == 0
2214        || fast_period > len
2215        || slow_period > len
2216        || signal_period > len
2217    {
2218        return Err(JsValue::from_str(
2219            &MacdError::InvalidPeriod {
2220                fast: fast_period,
2221                slow: slow_period,
2222                signal: signal_period,
2223                data_len: len,
2224            }
2225            .to_string(),
2226        ));
2227    }
2228    if len - first < slow_period {
2229        return Err(JsValue::from_str(
2230            &MacdError::NotEnoughValidData {
2231                needed: slow_period,
2232                valid: len - first,
2233            }
2234            .to_string(),
2235        ));
2236    }
2237    let macd_warmup = first + slow_period - 1;
2238    let signal_warmup = first + slow_period + signal_period - 2;
2239
2240    let mut macd = alloc_with_nan_prefix(len, macd_warmup);
2241    let mut signal = alloc_with_nan_prefix(len, signal_warmup);
2242    let mut hist = alloc_with_nan_prefix(len, signal_warmup);
2243
2244    macd_compute_into(
2245        data,
2246        fast_period,
2247        slow_period,
2248        signal_period,
2249        ma_type,
2250        first,
2251        &mut macd,
2252        &mut signal,
2253        &mut hist,
2254    )
2255    .map_err(|e| JsValue::from_str(&e.to_string()))?;
2256
2257    let mut values = Vec::with_capacity(3 * len);
2258    values.extend_from_slice(&macd);
2259    values.extend_from_slice(&signal);
2260    values.extend_from_slice(&hist);
2261
2262    Ok(MacdResult {
2263        values,
2264        rows: 3,
2265        cols: len,
2266    })
2267}
2268
2269#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2270#[wasm_bindgen]
2271pub fn macd_alloc(len: usize) -> *mut f64 {
2272    let mut vec = Vec::<f64>::with_capacity(len);
2273    let ptr = vec.as_mut_ptr();
2274    std::mem::forget(vec);
2275    ptr
2276}
2277
2278#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2279#[wasm_bindgen]
2280pub fn macd_free(ptr: *mut f64, len: usize) {
2281    if !ptr.is_null() {
2282        unsafe {
2283            let _ = Vec::from_raw_parts(ptr, len, len);
2284        }
2285    }
2286}
2287
2288#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2289#[wasm_bindgen]
2290pub fn macd_into(
2291    in_ptr: *const f64,
2292    macd_ptr: *mut f64,
2293    signal_ptr: *mut f64,
2294    hist_ptr: *mut f64,
2295    len: usize,
2296    fast_period: usize,
2297    slow_period: usize,
2298    signal_period: usize,
2299    ma_type: &str,
2300) -> Result<(), JsValue> {
2301    if in_ptr.is_null() || macd_ptr.is_null() || signal_ptr.is_null() || hist_ptr.is_null() {
2302        return Err(JsValue::from_str("Null pointer provided"));
2303    }
2304
2305    unsafe {
2306        let data = std::slice::from_raw_parts(in_ptr, len);
2307        let params = MacdParams {
2308            fast_period: Some(fast_period),
2309            slow_period: Some(slow_period),
2310            signal_period: Some(signal_period),
2311            ma_type: Some(ma_type.to_string()),
2312        };
2313        let input = MacdInput::from_slice(data, params);
2314
2315        let needs_temp = in_ptr == macd_ptr as *const f64
2316            || in_ptr == signal_ptr as *const f64
2317            || in_ptr == hist_ptr as *const f64;
2318
2319        if needs_temp {
2320            let first = data.iter().position(|x| !x.is_nan()).unwrap_or(0);
2321            let macd_warmup = first + slow_period - 1;
2322            let signal_warmup = first + slow_period + signal_period - 2;
2323
2324            let mut temp_macd = alloc_with_nan_prefix(len, macd_warmup);
2325            let mut temp_signal = alloc_with_nan_prefix(len, signal_warmup);
2326            let mut temp_hist = alloc_with_nan_prefix(len, signal_warmup);
2327
2328            macd_compute_into(
2329                data,
2330                fast_period,
2331                slow_period,
2332                signal_period,
2333                ma_type,
2334                first,
2335                &mut temp_macd,
2336                &mut temp_signal,
2337                &mut temp_hist,
2338            )
2339            .map_err(|e| JsValue::from_str(&e.to_string()))?;
2340
2341            let macd_out = std::slice::from_raw_parts_mut(macd_ptr, len);
2342            let signal_out = std::slice::from_raw_parts_mut(signal_ptr, len);
2343            let hist_out = std::slice::from_raw_parts_mut(hist_ptr, len);
2344
2345            macd_out.copy_from_slice(&temp_macd);
2346            signal_out.copy_from_slice(&temp_signal);
2347            hist_out.copy_from_slice(&temp_hist);
2348        } else {
2349            let result = macd(&input).map_err(|e| JsValue::from_str(&e.to_string()))?;
2350
2351            let macd_out = std::slice::from_raw_parts_mut(macd_ptr, len);
2352            let signal_out = std::slice::from_raw_parts_mut(signal_ptr, len);
2353            let hist_out = std::slice::from_raw_parts_mut(hist_ptr, len);
2354
2355            macd_out.copy_from_slice(&result.macd);
2356            signal_out.copy_from_slice(&result.signal);
2357            hist_out.copy_from_slice(&result.hist);
2358        }
2359
2360        Ok(())
2361    }
2362}
2363
2364#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2365#[derive(Serialize, Deserialize)]
2366pub struct MacdBatchConfig {
2367    pub fast_period_range: (usize, usize, usize),
2368    pub slow_period_range: (usize, usize, usize),
2369    pub signal_period_range: (usize, usize, usize),
2370    pub ma_type: String,
2371}
2372
2373#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2374#[derive(Serialize, Deserialize)]
2375pub struct MacdBatchJsOutput {
2376    pub values: Vec<f64>,
2377    pub rows: usize,
2378    pub cols: usize,
2379    pub fast_periods: Vec<usize>,
2380    pub slow_periods: Vec<usize>,
2381    pub signal_periods: Vec<usize>,
2382}
2383
2384#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2385#[wasm_bindgen(js_name = macd_batch)]
2386pub fn macd_batch_js(data: &[f64], config: JsValue) -> Result<JsValue, JsValue> {
2387    if data.is_empty() {
2388        return Err(JsValue::from_str("macd: Input data slice is empty"));
2389    }
2390
2391    if data.iter().all(|x| x.is_nan()) {
2392        return Err(JsValue::from_str("macd: All values are NaN"));
2393    }
2394
2395    let config: MacdBatchConfig = serde_wasm_bindgen::from_value(config)
2396        .map_err(|e| JsValue::from_str(&format!("Invalid config: {}", e)))?;
2397    let sweep = MacdBatchRange {
2398        fast_period: config.fast_period_range,
2399        slow_period: config.slow_period_range,
2400        signal_period: config.signal_period_range,
2401        ma_type: (
2402            config.ma_type.clone(),
2403            config.ma_type.clone(),
2404            String::new(),
2405        ),
2406    };
2407    let combos =
2408        expand_grid(&sweep).map_err(|e| JsValue::from_str(&format!("Invalid range: {}", e)))?;
2409    let rows = combos.len();
2410    let cols = data.len();
2411
2412    let first = data.iter().position(|x| !x.is_nan()).unwrap_or(0);
2413    let macd_warm: Vec<usize> = combos
2414        .iter()
2415        .map(|p| first + p.slow_period.unwrap_or(26) - 1)
2416        .collect();
2417    let sig_warm: Vec<usize> = combos
2418        .iter()
2419        .map(|p| first + p.slow_period.unwrap_or(26) + p.signal_period.unwrap_or(9) - 2)
2420        .collect();
2421
2422    let mut macd_mu = make_uninit_matrix(rows, cols);
2423    let mut sig_mu = make_uninit_matrix(rows, cols);
2424    let mut hist_mu = make_uninit_matrix(rows, cols);
2425
2426    init_matrix_prefixes(&mut macd_mu, cols, &macd_warm);
2427    init_matrix_prefixes(&mut sig_mu, cols, &sig_warm);
2428    init_matrix_prefixes(&mut hist_mu, cols, &sig_warm);
2429
2430    let mut macd_guard = core::mem::ManuallyDrop::new(macd_mu);
2431    let mut sig_guard = core::mem::ManuallyDrop::new(sig_mu);
2432    let mut hist_guard = core::mem::ManuallyDrop::new(hist_mu);
2433
2434    let macd_out: &mut [f64] = unsafe {
2435        core::slice::from_raw_parts_mut(macd_guard.as_mut_ptr() as *mut f64, macd_guard.len())
2436    };
2437    let sig_out: &mut [f64] = unsafe {
2438        core::slice::from_raw_parts_mut(sig_guard.as_mut_ptr() as *mut f64, sig_guard.len())
2439    };
2440    let hist_out: &mut [f64] = unsafe {
2441        core::slice::from_raw_parts_mut(hist_guard.as_mut_ptr() as *mut f64, hist_guard.len())
2442    };
2443
2444    macd_batch_inner_into(
2445        data,
2446        &sweep,
2447        detect_best_kernel(),
2448        true,
2449        macd_out,
2450        sig_out,
2451        hist_out,
2452    )
2453    .map_err(|e| JsValue::from_str(&format!("Batch computation error: {}", e)))?;
2454
2455    let macd = unsafe {
2456        Vec::from_raw_parts(
2457            macd_guard.as_mut_ptr() as *mut f64,
2458            macd_guard.len(),
2459            macd_guard.capacity(),
2460        )
2461    };
2462    let sig = unsafe {
2463        Vec::from_raw_parts(
2464            sig_guard.as_mut_ptr() as *mut f64,
2465            sig_guard.len(),
2466            sig_guard.capacity(),
2467        )
2468    };
2469    let hist = unsafe {
2470        Vec::from_raw_parts(
2471            hist_guard.as_mut_ptr() as *mut f64,
2472            hist_guard.len(),
2473            hist_guard.capacity(),
2474        )
2475    };
2476
2477    let mut values = Vec::with_capacity(3 * rows * cols);
2478    values.extend_from_slice(&macd);
2479    values.extend_from_slice(&sig);
2480    values.extend_from_slice(&hist);
2481
2482    let out = MacdBatchJsOutput {
2483        values,
2484        rows,
2485        cols,
2486        fast_periods: combos.iter().map(|p| p.fast_period.unwrap()).collect(),
2487        slow_periods: combos.iter().map(|p| p.slow_period.unwrap()).collect(),
2488        signal_periods: combos.iter().map(|p| p.signal_period.unwrap()).collect(),
2489    };
2490    serde_wasm_bindgen::to_value(&out)
2491        .map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
2492}
2493
2494#[cfg(test)]
2495mod tests {
2496    use super::*;
2497    use crate::skip_if_unsupported;
2498    use crate::utilities::data_loader::read_candles_from_csv;
2499    use crate::utilities::enums::Kernel;
2500
2501    #[inline]
2502    fn eq_or_both_nan_eps(a: f64, b: f64, eps: f64) -> bool {
2503        (a.is_nan() && b.is_nan()) || (a - b).abs() <= eps
2504    }
2505
2506    #[test]
2507    fn test_macd_into_matches_api() -> Result<(), Box<dyn Error>> {
2508        let len = 512usize;
2509        let mut data = Vec::with_capacity(len);
2510        for i in 0..len {
2511            let t = i as f64;
2512            data.push(0.01 * t + (t * 0.07).sin());
2513        }
2514
2515        let params = MacdParams::default();
2516        let input = MacdInput::from_slice(&data, params);
2517
2518        let baseline = macd(&input)?;
2519
2520        let mut macd_out = vec![0.0f64; len];
2521        let mut signal_out = vec![0.0f64; len];
2522        let mut hist_out = vec![0.0f64; len];
2523        #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
2524        macd_into(&input, &mut macd_out, &mut signal_out, &mut hist_out)?;
2525
2526        assert_eq!(baseline.macd.len(), len);
2527        assert_eq!(baseline.signal.len(), len);
2528        assert_eq!(baseline.hist.len(), len);
2529
2530        for i in 0..len {
2531            assert!(
2532                eq_or_both_nan_eps(baseline.macd[i], macd_out[i], 1e-12),
2533                "MACD mismatch at index {}: baseline={} into={}",
2534                i,
2535                baseline.macd[i],
2536                macd_out[i]
2537            );
2538            assert!(
2539                eq_or_both_nan_eps(baseline.signal[i], signal_out[i], 1e-12),
2540                "Signal mismatch at index {}: baseline={} into={}",
2541                i,
2542                baseline.signal[i],
2543                signal_out[i]
2544            );
2545            assert!(
2546                eq_or_both_nan_eps(baseline.hist[i], hist_out[i], 1e-12),
2547                "Hist mismatch at index {}: baseline={} into={}",
2548                i,
2549                baseline.hist[i],
2550                hist_out[i]
2551            );
2552        }
2553
2554        Ok(())
2555    }
2556
2557    fn check_macd_partial_params(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2558        skip_if_unsupported!(kernel, test);
2559        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2560        let candles = read_candles_from_csv(file)?;
2561
2562        let default_params = MacdParams {
2563            fast_period: None,
2564            slow_period: None,
2565            signal_period: None,
2566            ma_type: None,
2567        };
2568        let input = MacdInput::from_candles(&candles, "close", default_params);
2569        let output = macd_with_kernel(&input, kernel)?;
2570        assert_eq!(output.macd.len(), candles.close.len());
2571        assert_eq!(output.signal.len(), candles.close.len());
2572        assert_eq!(output.hist.len(), candles.close.len());
2573        Ok(())
2574    }
2575
2576    fn check_macd_accuracy(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2577        skip_if_unsupported!(kernel, test);
2578        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2579        let candles = read_candles_from_csv(file)?;
2580
2581        let params = MacdParams::default();
2582        let input = MacdInput::from_candles(&candles, "close", params);
2583        let result = macd_with_kernel(&input, kernel)?;
2584
2585        let expected_macd = [
2586            -629.8674025082801,
2587            -600.2986584356258,
2588            -581.6188884820076,
2589            -551.1020443476082,
2590            -560.798510688488,
2591        ];
2592        let expected_signal = [
2593            -721.9744591891067,
2594            -697.6392990384105,
2595            -674.4352169271299,
2596            -649.7685824112256,
2597            -631.9745680666781,
2598        ];
2599        let expected_hist = [
2600            92.10705668082664,
2601            97.34064060278467,
2602            92.81632844512228,
2603            98.6665380636174,
2604            71.17605737819008,
2605        ];
2606        let len = result.macd.len();
2607        let start = len - 5;
2608        for i in 0..5 {
2609            assert!((result.macd[start + i] - expected_macd[i]).abs() < 1e-1);
2610            assert!((result.signal[start + i] - expected_signal[i]).abs() < 1e-1);
2611            assert!((result.hist[start + i] - expected_hist[i]).abs() < 1e-1);
2612        }
2613        Ok(())
2614    }
2615
2616    fn check_macd_zero_period(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2617        skip_if_unsupported!(kernel, test);
2618        let input_data = [10.0, 20.0, 30.0];
2619        let params = MacdParams {
2620            fast_period: Some(0),
2621            slow_period: Some(26),
2622            signal_period: Some(9),
2623            ma_type: Some("ema".to_string()),
2624        };
2625        let input = MacdInput::from_slice(&input_data, params);
2626        let res = macd_with_kernel(&input, kernel);
2627        assert!(
2628            res.is_err(),
2629            "[{}] MACD should fail with zero fast period",
2630            test
2631        );
2632        Ok(())
2633    }
2634
2635    fn check_macd_period_exceeds_length(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2636        skip_if_unsupported!(kernel, test);
2637        let data = [10.0, 20.0, 30.0];
2638        let params = MacdParams {
2639            fast_period: Some(12),
2640            slow_period: Some(26),
2641            signal_period: Some(9),
2642            ma_type: Some("ema".to_string()),
2643        };
2644        let input = MacdInput::from_slice(&data, params);
2645        let res = macd_with_kernel(&input, kernel);
2646        assert!(
2647            res.is_err(),
2648            "[{}] MACD should fail with period exceeding length",
2649            test
2650        );
2651        Ok(())
2652    }
2653
2654    fn check_macd_very_small_dataset(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2655        skip_if_unsupported!(kernel, test);
2656        let data = [42.0];
2657        let params = MacdParams {
2658            fast_period: Some(12),
2659            slow_period: Some(26),
2660            signal_period: Some(9),
2661            ma_type: Some("ema".to_string()),
2662        };
2663        let input = MacdInput::from_slice(&data, params);
2664        let res = macd_with_kernel(&input, kernel);
2665        assert!(
2666            res.is_err(),
2667            "[{}] MACD should fail with insufficient data",
2668            test
2669        );
2670        Ok(())
2671    }
2672
2673    fn check_macd_reinput(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2674        skip_if_unsupported!(kernel, test);
2675        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2676        let candles = read_candles_from_csv(file)?;
2677
2678        let params = MacdParams::default();
2679        let input = MacdInput::from_candles(&candles, "close", params.clone());
2680        let first_result = macd_with_kernel(&input, kernel)?;
2681
2682        let reinput = MacdInput::from_slice(&first_result.macd, params);
2683        let re_result = macd_with_kernel(&reinput, kernel)?;
2684
2685        assert_eq!(re_result.macd.len(), first_result.macd.len());
2686        for i in 52..re_result.macd.len() {
2687            assert!(!re_result.macd[i].is_nan());
2688        }
2689        Ok(())
2690    }
2691
2692    fn check_macd_nan_handling(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2693        skip_if_unsupported!(kernel, test);
2694        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2695        let candles = read_candles_from_csv(file)?;
2696
2697        let params = MacdParams::default();
2698        let input = MacdInput::from_candles(&candles, "close", params);
2699        let res = macd_with_kernel(&input, kernel)?;
2700        let n = res.macd.len();
2701        if n > 240 {
2702            for i in 240..n {
2703                assert!(!res.macd[i].is_nan());
2704                assert!(!res.signal[i].is_nan());
2705                assert!(!res.hist[i].is_nan());
2706            }
2707        }
2708        Ok(())
2709    }
2710
2711    #[cfg(debug_assertions)]
2712    fn check_macd_no_poison(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2713        skip_if_unsupported!(kernel, test_name);
2714
2715        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2716        let candles = read_candles_from_csv(file_path)?;
2717
2718        let test_params = vec![
2719            MacdParams::default(),
2720            MacdParams {
2721                fast_period: Some(2),
2722                slow_period: Some(3),
2723                signal_period: Some(2),
2724                ma_type: Some("ema".to_string()),
2725            },
2726            MacdParams {
2727                fast_period: Some(5),
2728                slow_period: Some(10),
2729                signal_period: Some(5),
2730                ma_type: Some("ema".to_string()),
2731            },
2732            MacdParams {
2733                fast_period: Some(8),
2734                slow_period: Some(21),
2735                signal_period: Some(8),
2736                ma_type: Some("sma".to_string()),
2737            },
2738            MacdParams {
2739                fast_period: Some(20),
2740                slow_period: Some(50),
2741                signal_period: Some(15),
2742                ma_type: Some("ema".to_string()),
2743            },
2744            MacdParams {
2745                fast_period: Some(50),
2746                slow_period: Some(100),
2747                signal_period: Some(20),
2748                ma_type: Some("wma".to_string()),
2749            },
2750            MacdParams {
2751                fast_period: Some(3),
2752                slow_period: Some(6),
2753                signal_period: Some(3),
2754                ma_type: Some("sma".to_string()),
2755            },
2756            MacdParams {
2757                fast_period: Some(10),
2758                slow_period: Some(30),
2759                signal_period: Some(10),
2760                ma_type: Some("wma".to_string()),
2761            },
2762            MacdParams {
2763                fast_period: Some(15),
2764                slow_period: Some(35),
2765                signal_period: Some(12),
2766                ma_type: Some("ema".to_string()),
2767            },
2768            MacdParams {
2769                fast_period: Some(25),
2770                slow_period: Some(75),
2771                signal_period: Some(18),
2772                ma_type: Some("sma".to_string()),
2773            },
2774            MacdParams {
2775                fast_period: Some(6),
2776                slow_period: Some(13),
2777                signal_period: Some(4),
2778                ma_type: Some("ema".to_string()),
2779            },
2780            MacdParams {
2781                fast_period: Some(9),
2782                slow_period: Some(18),
2783                signal_period: Some(7),
2784                ma_type: Some("wma".to_string()),
2785            },
2786        ];
2787
2788        for (param_idx, params) in test_params.iter().enumerate() {
2789            let input = MacdInput::from_candles(&candles, "close", params.clone());
2790            let output = macd_with_kernel(&input, kernel)?;
2791
2792            for (i, &val) in output.macd.iter().enumerate() {
2793                if val.is_nan() {
2794                    continue;
2795                }
2796
2797                let bits = val.to_bits();
2798
2799                if bits == 0x11111111_11111111 {
2800                    panic!(
2801                        "[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at index {} \
2802						in MACD output with params: {:?} (param set {})",
2803                        test_name, val, bits, i, params, param_idx
2804                    );
2805                }
2806
2807                if bits == 0x22222222_22222222 {
2808                    panic!(
2809                        "[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at index {} \
2810						in MACD output with params: {:?} (param set {})",
2811                        test_name, val, bits, i, params, param_idx
2812                    );
2813                }
2814
2815                if bits == 0x33333333_33333333 {
2816                    panic!(
2817                        "[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at index {} \
2818						in MACD output with params: {:?} (param set {})",
2819                        test_name, val, bits, i, params, param_idx
2820                    );
2821                }
2822            }
2823
2824            for (i, &val) in output.signal.iter().enumerate() {
2825                if val.is_nan() {
2826                    continue;
2827                }
2828
2829                let bits = val.to_bits();
2830
2831                if bits == 0x11111111_11111111 {
2832                    panic!(
2833                        "[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at index {} \
2834						in signal output with params: {:?} (param set {})",
2835                        test_name, val, bits, i, params, param_idx
2836                    );
2837                }
2838
2839                if bits == 0x22222222_22222222 {
2840                    panic!(
2841                        "[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at index {} \
2842						in signal output with params: {:?} (param set {})",
2843                        test_name, val, bits, i, params, param_idx
2844                    );
2845                }
2846
2847                if bits == 0x33333333_33333333 {
2848                    panic!(
2849                        "[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at index {} \
2850						in signal output with params: {:?} (param set {})",
2851                        test_name, val, bits, i, params, param_idx
2852                    );
2853                }
2854            }
2855
2856            for (i, &val) in output.hist.iter().enumerate() {
2857                if val.is_nan() {
2858                    continue;
2859                }
2860
2861                let bits = val.to_bits();
2862
2863                if bits == 0x11111111_11111111 {
2864                    panic!(
2865                        "[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at index {} \
2866						in histogram output with params: {:?} (param set {})",
2867                        test_name, val, bits, i, params, param_idx
2868                    );
2869                }
2870
2871                if bits == 0x22222222_22222222 {
2872                    panic!(
2873                        "[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at index {} \
2874						in histogram output with params: {:?} (param set {})",
2875                        test_name, val, bits, i, params, param_idx
2876                    );
2877                }
2878
2879                if bits == 0x33333333_33333333 {
2880                    panic!(
2881                        "[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at index {} \
2882						in histogram output with params: {:?} (param set {})",
2883                        test_name, val, bits, i, params, param_idx
2884                    );
2885                }
2886            }
2887        }
2888
2889        Ok(())
2890    }
2891
2892    #[cfg(not(debug_assertions))]
2893    fn check_macd_no_poison(_test_name: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
2894        Ok(())
2895    }
2896
2897    #[cfg(feature = "proptest")]
2898    #[allow(clippy::float_cmp)]
2899    fn check_macd_property(
2900        test_name: &str,
2901        kernel: Kernel,
2902    ) -> Result<(), Box<dyn std::error::Error>> {
2903        use proptest::prelude::*;
2904        skip_if_unsupported!(kernel, test_name);
2905
2906        let strat = (2usize..=20).prop_flat_map(|fast_period| {
2907            (fast_period + 1..=50).prop_flat_map(move |slow_period| {
2908                (2usize..=20).prop_flat_map(move |signal_period| {
2909                    (100f64..10000f64, 0.0001f64..0.1f64).prop_flat_map(
2910                        move |(base_price, volatility)| {
2911                            let min_len = slow_period + signal_period + 10;
2912                            (min_len..400).prop_flat_map(move |data_len| {
2913                                let price_changes = prop::collection::vec(
2914                                    prop_oneof![
2915
2916                                        6 => (-volatility..volatility),
2917
2918                                        1 => Just(0.0),
2919
2920                                        15 => (0.0..volatility * 2.0),
2921
2922                                        15 => (-volatility * 2.0..0.0),
2923                                    ],
2924                                    data_len,
2925                                );
2926
2927                                price_changes.prop_map(move |changes| {
2928                                    let mut data = Vec::with_capacity(data_len);
2929                                    data.push(base_price);
2930
2931                                    for i in 1..data_len {
2932                                        let prev = data[i - 1];
2933                                        let change = changes[i];
2934                                        let new_price = prev * (1.0 + change);
2935
2936                                        data.push(new_price.max(1.0).min(1e6));
2937                                    }
2938
2939                                    (data, fast_period, slow_period, signal_period)
2940                                })
2941                            })
2942                        },
2943                    )
2944                })
2945            })
2946        });
2947
2948        proptest::test_runner::TestRunner::default().run(
2949            &strat,
2950            |(data, fast_period, slow_period, signal_period)| {
2951                let params = MacdParams {
2952                    fast_period: Some(fast_period),
2953                    slow_period: Some(slow_period),
2954                    signal_period: Some(signal_period),
2955                    ma_type: Some("ema".to_string()),
2956                };
2957                let input = MacdInput::from_slice(&data, params.clone());
2958
2959                let result = macd_with_kernel(&input, kernel)?;
2960
2961                let reference = macd_with_kernel(&input, Kernel::Scalar)?;
2962
2963                let len = data.len();
2964
2965                prop_assert_eq!(result.macd.len(), len, "MACD output length mismatch");
2966                prop_assert_eq!(result.signal.len(), len, "Signal output length mismatch");
2967                prop_assert_eq!(result.hist.len(), len, "Histogram output length mismatch");
2968
2969                let macd_warmup = slow_period - 1;
2970                let signal_warmup = slow_period + signal_period - 2;
2971
2972                for i in 0..macd_warmup.min(len) {
2973                    prop_assert!(
2974                        result.macd[i].is_nan(),
2975                        "MACD[{}] should be NaN during warmup (< {})",
2976                        i,
2977                        macd_warmup
2978                    );
2979                }
2980
2981                for i in 0..signal_warmup.min(len) {
2982                    prop_assert!(
2983                        result.signal[i].is_nan(),
2984                        "Signal[{}] should be NaN during warmup (< {})",
2985                        i,
2986                        signal_warmup
2987                    );
2988                    prop_assert!(
2989                        result.hist[i].is_nan(),
2990                        "Histogram[{}] should be NaN during warmup (< {})",
2991                        i,
2992                        signal_warmup
2993                    );
2994                }
2995
2996                for i in signal_warmup..len {
2997                    if !result.macd[i].is_nan() && !result.signal[i].is_nan() {
2998                        let expected_hist = result.macd[i] - result.signal[i];
2999                        prop_assert!(
3000                            (result.hist[i] - expected_hist).abs() < 1e-10,
3001                            "Histogram[{}] = {} != MACD - Signal = {} - {} = {}",
3002                            i,
3003                            result.hist[i],
3004                            result.macd[i],
3005                            result.signal[i],
3006                            expected_hist
3007                        );
3008                    }
3009                }
3010
3011                if data.windows(2).all(|w| (w[0] - w[1]).abs() < 1e-10) {
3012                    for i in signal_warmup..len {
3013                        if !result.macd[i].is_nan() {
3014                            prop_assert!(
3015                                result.macd[i].abs() < 1e-3,
3016                                "MACD[{}] = {} should be near 0 for constant data",
3017                                i,
3018                                result.macd[i]
3019                            );
3020                        }
3021                    }
3022                }
3023
3024                let data_range = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max)
3025                    - data.iter().cloned().fold(f64::INFINITY, f64::min);
3026                for i in macd_warmup..len {
3027                    if !result.macd[i].is_nan() {
3028                        prop_assert!(
3029                            result.macd[i].abs() <= data_range,
3030                            "MACD[{}] = {} exceeds data range {}",
3031                            i,
3032                            result.macd[i],
3033                            data_range
3034                        );
3035                    }
3036                }
3037
3038                let is_monotonic_inc = data.windows(2).all(|w| w[1] >= w[0] - 1e-10);
3039                let is_monotonic_dec = data.windows(2).all(|w| w[1] <= w[0] + 1e-10);
3040
3041                if is_monotonic_inc && !data.windows(2).all(|w| (w[0] - w[1]).abs() < 1e-10) {
3042                    let stable_start = (signal_warmup + 10).min(len - 1);
3043                    if stable_start < len {
3044                        let stable_macd = &result.macd[stable_start..len];
3045                        let positive_count = stable_macd
3046                            .iter()
3047                            .filter(|&&v| !v.is_nan() && v > -1e-10)
3048                            .count();
3049                        let total_valid = stable_macd.iter().filter(|&&v| !v.is_nan()).count();
3050                        if total_valid > 0 {
3051                            prop_assert!(
3052                                positive_count as f64 / total_valid as f64 > 0.9,
3053                                "MACD should be mostly positive for monotonic increasing data"
3054                            );
3055                        }
3056                    }
3057                } else if is_monotonic_dec && !data.windows(2).all(|w| (w[0] - w[1]).abs() < 1e-10)
3058                {
3059                    let stable_start = (signal_warmup + 10).min(len - 1);
3060                    if stable_start < len {
3061                        let stable_macd = &result.macd[stable_start..len];
3062                        let negative_count = stable_macd
3063                            .iter()
3064                            .filter(|&&v| !v.is_nan() && v < 1e-10)
3065                            .count();
3066                        let total_valid = stable_macd.iter().filter(|&&v| !v.is_nan()).count();
3067                        if total_valid > 0 {
3068                            prop_assert!(
3069                                negative_count as f64 / total_valid as f64 > 0.9,
3070                                "MACD should be mostly negative for monotonic decreasing data"
3071                            );
3072                        }
3073                    }
3074                }
3075
3076                if fast_period == slow_period - 1 {
3077                    let data_scale = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max).abs();
3078                    for i in signal_warmup..len {
3079                        if !result.macd[i].is_nan() && data_scale > 1e-10 {
3080                            let relative_macd = result.macd[i].abs() / data_scale;
3081                            prop_assert!(
3082								relative_macd < 0.1,
3083								"MACD[{}] relative magnitude {} too large for minimum period difference",
3084								i, relative_macd
3085							);
3086                        }
3087                    }
3088                }
3089
3090                for i in 0..len {
3091                    let macd_y = result.macd[i];
3092                    let macd_r = reference.macd[i];
3093                    let signal_y = result.signal[i];
3094                    let signal_r = reference.signal[i];
3095                    let hist_y = result.hist[i];
3096                    let hist_r = reference.hist[i];
3097
3098                    if !macd_y.is_finite() || !macd_r.is_finite() {
3099                        prop_assert_eq!(
3100                            macd_y.to_bits(),
3101                            macd_r.to_bits(),
3102                            "MACD NaN/finite mismatch at index {}",
3103                            i
3104                        );
3105                    } else {
3106                        let ulp_diff = macd_y.to_bits().abs_diff(macd_r.to_bits());
3107                        prop_assert!(
3108                            (macd_y - macd_r).abs() <= 1e-9 || ulp_diff <= 5,
3109                            "MACD mismatch at index {}: {} vs {} (ULP={})",
3110                            i,
3111                            macd_y,
3112                            macd_r,
3113                            ulp_diff
3114                        );
3115                    }
3116
3117                    if !signal_y.is_finite() || !signal_r.is_finite() {
3118                        prop_assert_eq!(
3119                            signal_y.to_bits(),
3120                            signal_r.to_bits(),
3121                            "Signal NaN/finite mismatch at index {}",
3122                            i
3123                        );
3124                    } else {
3125                        let ulp_diff = signal_y.to_bits().abs_diff(signal_r.to_bits());
3126                        prop_assert!(
3127                            (signal_y - signal_r).abs() <= 1e-9 || ulp_diff <= 5,
3128                            "Signal mismatch at index {}: {} vs {} (ULP={})",
3129                            i,
3130                            signal_y,
3131                            signal_r,
3132                            ulp_diff
3133                        );
3134                    }
3135
3136                    if !hist_y.is_finite() || !hist_r.is_finite() {
3137                        prop_assert_eq!(
3138                            hist_y.to_bits(),
3139                            hist_r.to_bits(),
3140                            "Histogram NaN/finite mismatch at index {}",
3141                            i
3142                        );
3143                    } else {
3144                        let ulp_diff = hist_y.to_bits().abs_diff(hist_r.to_bits());
3145                        prop_assert!(
3146                            (hist_y - hist_r).abs() <= 1e-9 || ulp_diff <= 5,
3147                            "Histogram mismatch at index {}: {} vs {} (ULP={})",
3148                            i,
3149                            hist_y,
3150                            hist_r,
3151                            ulp_diff
3152                        );
3153                    }
3154                }
3155
3156                Ok(())
3157            },
3158        )?;
3159
3160        Ok(())
3161    }
3162
3163    macro_rules! generate_all_macd_tests {
3164        ($($test_fn:ident),*) => {
3165            paste::paste! {
3166                $(
3167                    #[test]
3168                    fn [<$test_fn _scalar_f64>]() {
3169                        let _ = $test_fn(stringify!([<$test_fn _scalar_f64>]), Kernel::Scalar);
3170                    }
3171                )*
3172                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
3173                $(
3174                    #[test]
3175                    fn [<$test_fn _avx2_f64>]() {
3176                        let _ = $test_fn(stringify!([<$test_fn _avx2_f64>]), Kernel::Avx2);
3177                    }
3178                    #[test]
3179                    fn [<$test_fn _avx512_f64>]() {
3180                        let _ = $test_fn(stringify!([<$test_fn _avx512_f64>]), Kernel::Avx512);
3181                    }
3182                )*
3183            }
3184        }
3185    }
3186    generate_all_macd_tests!(
3187        check_macd_partial_params,
3188        check_macd_accuracy,
3189        check_macd_zero_period,
3190        check_macd_period_exceeds_length,
3191        check_macd_very_small_dataset,
3192        check_macd_reinput,
3193        check_macd_nan_handling,
3194        check_macd_no_poison
3195    );
3196
3197    #[cfg(feature = "proptest")]
3198    generate_all_macd_tests!(check_macd_property);
3199
3200    fn check_batch_default_row(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
3201        skip_if_unsupported!(kernel, test);
3202        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
3203        let c = read_candles_from_csv(file)?;
3204        let output = MacdBatchBuilder::new()
3205            .kernel(kernel)
3206            .apply_candles(&c, "close")?;
3207        let def = MacdParams::default();
3208        let row = output
3209            .combos
3210            .iter()
3211            .position(|prm| {
3212                prm.fast_period == def.fast_period
3213                    && prm.slow_period == def.slow_period
3214                    && prm.signal_period == def.signal_period
3215                    && prm.ma_type == def.ma_type
3216            })
3217            .expect("default row missing");
3218        let start = row * output.cols;
3219        let macd = &output.macd[start..start + output.cols];
3220        let signal = &output.signal[start..start + output.cols];
3221        let hist = &output.hist[start..start + output.cols];
3222        let expected_macd = [
3223            -629.8674025082801,
3224            -600.2986584356258,
3225            -581.6188884820076,
3226            -551.1020443476082,
3227            -560.798510688488,
3228        ];
3229        let len = macd.len();
3230        let s = len - 5;
3231        for i in 0..5 {
3232            assert!((macd[s + i] - expected_macd[i]).abs() < 1e-1);
3233        }
3234        Ok(())
3235    }
3236
3237    #[cfg(debug_assertions)]
3238    fn check_batch_no_poison(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
3239        skip_if_unsupported!(kernel, test);
3240
3241        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
3242        let c = read_candles_from_csv(file)?;
3243
3244        let test_configs = vec![
3245            (2, 10, 2, 10, 20, 2, 2, 6, 2),
3246            (5, 25, 5, 26, 50, 5, 5, 15, 5),
3247            (20, 40, 10, 50, 100, 10, 10, 20, 5),
3248            (2, 5, 1, 6, 10, 1, 2, 4, 1),
3249            (10, 15, 1, 20, 30, 2, 8, 12, 1),
3250            (3, 30, 3, 26, 52, 13, 5, 20, 5),
3251            (2, 6, 1, 8, 12, 1, 3, 5, 1),
3252        ];
3253
3254        for (cfg_idx, &(f_start, f_end, f_step, s_start, s_end, s_step, g_start, g_end, g_step)) in
3255            test_configs.iter().enumerate()
3256        {
3257            let output = MacdBatchBuilder::new()
3258                .kernel(kernel)
3259                .fast_period_range(f_start, f_end, f_step)
3260                .slow_period_range(s_start, s_end, s_step)
3261                .signal_period_range(g_start, g_end, g_step)
3262                .ma_type_static("ema")
3263                .apply_candles(&c, "close")?;
3264
3265            for (idx, &val) in output.macd.iter().enumerate() {
3266                if val.is_nan() {
3267                    continue;
3268                }
3269
3270                let bits = val.to_bits();
3271                let row = idx / output.cols;
3272                let col = idx % output.cols;
3273                let combo = &output.combos[row];
3274
3275                if bits == 0x11111111_11111111 {
3276                    panic!(
3277                        "[{}] Config {}: Found alloc_with_nan_prefix poison value {} (0x{:016X}) \
3278						at row {} col {} (flat index {}) in MACD output with params: {:?}",
3279                        test, cfg_idx, val, bits, row, col, idx, combo
3280                    );
3281                }
3282
3283                if bits == 0x22222222_22222222 {
3284                    panic!(
3285                        "[{}] Config {}: Found init_matrix_prefixes poison value {} (0x{:016X}) \
3286						at row {} col {} (flat index {}) in MACD output with params: {:?}",
3287                        test, cfg_idx, val, bits, row, col, idx, combo
3288                    );
3289                }
3290
3291                if bits == 0x33333333_33333333 {
3292                    panic!(
3293                        "[{}] Config {}: Found make_uninit_matrix poison value {} (0x{:016X}) \
3294						at row {} col {} (flat index {}) in MACD output with params: {:?}",
3295                        test, cfg_idx, val, bits, row, col, idx, combo
3296                    );
3297                }
3298            }
3299
3300            for (idx, &val) in output.signal.iter().enumerate() {
3301                if val.is_nan() {
3302                    continue;
3303                }
3304
3305                let bits = val.to_bits();
3306                let row = idx / output.cols;
3307                let col = idx % output.cols;
3308                let combo = &output.combos[row];
3309
3310                if bits == 0x11111111_11111111 {
3311                    panic!(
3312                        "[{}] Config {}: Found alloc_with_nan_prefix poison value {} (0x{:016X}) \
3313						at row {} col {} (flat index {}) in signal output with params: {:?}",
3314                        test, cfg_idx, val, bits, row, col, idx, combo
3315                    );
3316                }
3317
3318                if bits == 0x22222222_22222222 {
3319                    panic!(
3320                        "[{}] Config {}: Found init_matrix_prefixes poison value {} (0x{:016X}) \
3321						at row {} col {} (flat index {}) in signal output with params: {:?}",
3322                        test, cfg_idx, val, bits, row, col, idx, combo
3323                    );
3324                }
3325
3326                if bits == 0x33333333_33333333 {
3327                    panic!(
3328                        "[{}] Config {}: Found make_uninit_matrix poison value {} (0x{:016X}) \
3329						at row {} col {} (flat index {}) in signal output with params: {:?}",
3330                        test, cfg_idx, val, bits, row, col, idx, combo
3331                    );
3332                }
3333            }
3334
3335            for (idx, &val) in output.hist.iter().enumerate() {
3336                if val.is_nan() {
3337                    continue;
3338                }
3339
3340                let bits = val.to_bits();
3341                let row = idx / output.cols;
3342                let col = idx % output.cols;
3343                let combo = &output.combos[row];
3344
3345                if bits == 0x11111111_11111111 {
3346                    panic!(
3347                        "[{}] Config {}: Found alloc_with_nan_prefix poison value {} (0x{:016X}) \
3348						at row {} col {} (flat index {}) in histogram output with params: {:?}",
3349                        test, cfg_idx, val, bits, row, col, idx, combo
3350                    );
3351                }
3352
3353                if bits == 0x22222222_22222222 {
3354                    panic!(
3355                        "[{}] Config {}: Found init_matrix_prefixes poison value {} (0x{:016X}) \
3356						at row {} col {} (flat index {}) in histogram output with params: {:?}",
3357                        test, cfg_idx, val, bits, row, col, idx, combo
3358                    );
3359                }
3360
3361                if bits == 0x33333333_33333333 {
3362                    panic!(
3363                        "[{}] Config {}: Found make_uninit_matrix poison value {} (0x{:016X}) \
3364						at row {} col {} (flat index {}) in histogram output with params: {:?}",
3365                        test, cfg_idx, val, bits, row, col, idx, combo
3366                    );
3367                }
3368            }
3369        }
3370
3371        Ok(())
3372    }
3373
3374    #[cfg(not(debug_assertions))]
3375    fn check_batch_no_poison(_test: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
3376        Ok(())
3377    }
3378
3379    macro_rules! gen_batch_tests {
3380        ($fn_name:ident) => {
3381            paste::paste! {
3382                #[test] fn [<$fn_name _scalar>]()      {
3383                    let _ = $fn_name(stringify!([<$fn_name _scalar>]), Kernel::ScalarBatch);
3384                }
3385                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
3386                #[test] fn [<$fn_name _avx2>]()        {
3387                    let _ = $fn_name(stringify!([<$fn_name _avx2>]), Kernel::Avx2Batch);
3388                }
3389                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
3390                #[test] fn [<$fn_name _avx512>]()      {
3391                    let _ = $fn_name(stringify!([<$fn_name _avx512>]), Kernel::Avx512Batch);
3392                }
3393                #[test] fn [<$fn_name _auto_detect>]() {
3394                    let _ = $fn_name(stringify!([<$fn_name _auto_detect>]), Kernel::Auto);
3395                }
3396            }
3397        };
3398    }
3399    gen_batch_tests!(check_batch_default_row);
3400    gen_batch_tests!(check_batch_no_poison);
3401}