Skip to main content

vector_ta/indicators/
vlma.rs

1#[cfg(all(feature = "python", feature = "cuda"))]
2use numpy::PyUntypedArrayMethods;
3#[cfg(feature = "python")]
4use numpy::{IntoPyArray, PyArray1, PyArrayMethods, PyReadonlyArray1};
5#[cfg(feature = "python")]
6use pyo3::exceptions::PyValueError;
7#[cfg(feature = "python")]
8use pyo3::prelude::*;
9#[cfg(feature = "python")]
10use pyo3::types::PyDict;
11
12#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
13use serde::{Deserialize, Serialize};
14#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
15use wasm_bindgen::prelude::*;
16
17#[cfg(all(feature = "python", feature = "cuda"))]
18use crate::cuda::{cuda_available, CudaVlma};
19use crate::indicators::deviation::{deviation, DevInput, DevParams};
20use crate::indicators::moving_averages::ma::{ma, MaData};
21use crate::utilities::data_loader::{source_type, Candles};
22#[cfg(all(feature = "python", feature = "cuda"))]
23use crate::utilities::dlpack_cuda::{make_device_array_py, DeviceArrayF32Py};
24use crate::utilities::enums::Kernel;
25use crate::utilities::helpers::{
26    alloc_with_nan_prefix, detect_best_batch_kernel, detect_best_kernel, init_matrix_prefixes,
27    make_uninit_matrix,
28};
29#[cfg(feature = "python")]
30use crate::utilities::kernel_validation::validate_kernel;
31use aligned_vec::{AVec, CACHELINE_ALIGN};
32#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
33use core::arch::x86_64::*;
34#[cfg(not(target_arch = "wasm32"))]
35use rayon::prelude::*;
36use std::convert::AsRef;
37use std::error::Error;
38use thiserror::Error;
39
40impl<'a> AsRef<[f64]> for VlmaInput<'a> {
41    #[inline(always)]
42    fn as_ref(&self) -> &[f64] {
43        match &self.data {
44            VlmaData::Slice(sl) => sl,
45            VlmaData::Candles { candles, source } => source_type(candles, source),
46        }
47    }
48}
49
50#[inline(always)]
51fn fast_ema_update(last: f64, x: f64, sc: f64) -> f64 {
52    (x - last).mul_add(sc, last)
53}
54
55#[inline(always)]
56fn fast_clamp_period(p: isize, min_p: usize, max_p: usize) -> usize {
57    let lo = min_p as isize;
58    let hi = max_p as isize;
59    if p < lo {
60        min_p
61    } else if p > hi {
62        max_p
63    } else {
64        p as usize
65    }
66}
67
68#[inline(always)]
69fn fast_std_from_sums(sum: f64, sumsq: f64, inv_n: f64) -> (f64, f64) {
70    let m = sum * inv_n;
71
72    let var = (-m).mul_add(m, sumsq * inv_n);
73    let dv = if var <= 0.0 { 0.0 } else { var.sqrt() };
74    (m, dv)
75}
76
77#[derive(Debug, Clone)]
78pub enum VlmaData<'a> {
79    Candles {
80        candles: &'a Candles,
81        source: &'a str,
82    },
83    Slice(&'a [f64]),
84}
85
86#[derive(Debug, Clone)]
87pub struct VlmaOutput {
88    pub values: Vec<f64>,
89}
90
91#[derive(Debug, Clone)]
92#[cfg_attr(
93    all(target_arch = "wasm32", feature = "wasm"),
94    derive(Serialize, Deserialize)
95)]
96pub struct VlmaParams {
97    pub min_period: Option<usize>,
98    pub max_period: Option<usize>,
99    pub matype: Option<String>,
100    pub devtype: Option<usize>,
101}
102
103impl Default for VlmaParams {
104    fn default() -> Self {
105        Self {
106            min_period: Some(5),
107            max_period: Some(50),
108            matype: Some("sma".to_string()),
109            devtype: Some(0),
110        }
111    }
112}
113
114#[derive(Debug, Clone)]
115pub struct VlmaInput<'a> {
116    pub data: VlmaData<'a>,
117    pub params: VlmaParams,
118}
119
120impl<'a> VlmaInput<'a> {
121    #[inline]
122    pub fn from_candles(c: &'a Candles, s: &'a str, p: VlmaParams) -> Self {
123        Self {
124            data: VlmaData::Candles {
125                candles: c,
126                source: s,
127            },
128            params: p,
129        }
130    }
131    #[inline]
132    pub fn from_slice(sl: &'a [f64], p: VlmaParams) -> Self {
133        Self {
134            data: VlmaData::Slice(sl),
135            params: p,
136        }
137    }
138    #[inline]
139    pub fn with_default_candles(c: &'a Candles) -> Self {
140        Self::from_candles(c, "close", VlmaParams::default())
141    }
142    #[inline]
143    pub fn get_min_period(&self) -> usize {
144        self.params.min_period.unwrap_or(5)
145    }
146    #[inline]
147    pub fn get_max_period(&self) -> usize {
148        self.params.max_period.unwrap_or(50)
149    }
150    #[inline]
151    pub fn get_matype(&self) -> String {
152        self.params
153            .matype
154            .clone()
155            .unwrap_or_else(|| "sma".to_string())
156    }
157    #[inline]
158    pub fn get_devtype(&self) -> usize {
159        self.params.devtype.unwrap_or(0)
160    }
161}
162
163#[derive(Clone, Debug)]
164pub struct VlmaBuilder {
165    min_period: Option<usize>,
166    max_period: Option<usize>,
167    matype: Option<String>,
168    devtype: Option<usize>,
169    kernel: Kernel,
170}
171
172impl Default for VlmaBuilder {
173    fn default() -> Self {
174        Self {
175            min_period: None,
176            max_period: None,
177            matype: None,
178            devtype: None,
179            kernel: Kernel::Auto,
180        }
181    }
182}
183
184impl VlmaBuilder {
185    #[inline(always)]
186    pub fn new() -> Self {
187        Self::default()
188    }
189    #[inline(always)]
190    pub fn min_period(mut self, n: usize) -> Self {
191        self.min_period = Some(n);
192        self
193    }
194    #[inline(always)]
195    pub fn max_period(mut self, n: usize) -> Self {
196        self.max_period = Some(n);
197        self
198    }
199    #[inline(always)]
200    pub fn matype<S: Into<String>>(mut self, t: S) -> Self {
201        self.matype = Some(t.into());
202        self
203    }
204    #[inline(always)]
205    pub fn devtype(mut self, d: usize) -> Self {
206        self.devtype = Some(d);
207        self
208    }
209    #[inline(always)]
210    pub fn kernel(mut self, k: Kernel) -> Self {
211        self.kernel = k;
212        self
213    }
214    #[inline(always)]
215    pub fn apply(self, c: &Candles) -> Result<VlmaOutput, VlmaError> {
216        let p = VlmaParams {
217            min_period: self.min_period,
218            max_period: self.max_period,
219            matype: self.matype,
220            devtype: self.devtype,
221        };
222        let i = VlmaInput::from_candles(c, "close", p);
223        vlma_with_kernel(&i, self.kernel)
224    }
225    #[inline(always)]
226    pub fn apply_slice(self, d: &[f64]) -> Result<VlmaOutput, VlmaError> {
227        let p = VlmaParams {
228            min_period: self.min_period,
229            max_period: self.max_period,
230            matype: self.matype,
231            devtype: self.devtype,
232        };
233        let i = VlmaInput::from_slice(d, p);
234        vlma_with_kernel(&i, self.kernel)
235    }
236    #[inline(always)]
237    pub fn into_stream(self) -> Result<VlmaStream, VlmaError> {
238        let p = VlmaParams {
239            min_period: self.min_period,
240            max_period: self.max_period,
241            matype: self.matype,
242            devtype: self.devtype,
243        };
244        VlmaStream::try_new(p)
245    }
246}
247
248#[derive(Debug, Error)]
249pub enum VlmaError {
250    #[error("vlma: Empty data provided.")]
251    EmptyInputData,
252    #[error("vlma: min_period={min_period} is greater than max_period={max_period}.")]
253    InvalidPeriodRange {
254        min_period: usize,
255        max_period: usize,
256    },
257    #[error("vlma: All values are NaN.")]
258    AllValuesNaN,
259    #[error("vlma: Not enough valid data: needed={needed}, valid={valid}.")]
260    NotEnoughValidData { needed: usize, valid: usize },
261    #[error("vlma: Invalid period: period = {period}, data length = {data_len}")]
262    InvalidPeriod { period: usize, data_len: usize },
263    #[error("vlma: Output length mismatch: expected {expected}, got {got}")]
264    OutputLengthMismatch { expected: usize, got: usize },
265    #[error("vlma: Invalid range: start={start}, end={end}, step={step}")]
266    InvalidRange {
267        start: String,
268        end: String,
269        step: String,
270    },
271    #[error("vlma: Invalid kernel for batch: {0:?}")]
272    InvalidKernelForBatch(crate::utilities::enums::Kernel),
273    #[error("vlma: Error in MA calculation: {0}")]
274    MaError(String),
275    #[error("vlma: Error in Deviation calculation: {0}")]
276    DevError(String),
277}
278
279#[inline]
280pub fn vlma(input: &VlmaInput) -> Result<VlmaOutput, VlmaError> {
281    vlma_with_kernel(input, Kernel::Auto)
282}
283
284pub fn vlma_with_kernel(input: &VlmaInput, kernel: Kernel) -> Result<VlmaOutput, VlmaError> {
285    let (data, min_p, max_p, matype, devtype, first, chosen) = vlma_prepare(input, kernel)?;
286    let mut out = alloc_with_nan_prefix(data.len(), first + max_p - 1);
287    vlma_compute_into(
288        data, min_p, max_p, &matype, devtype, first, chosen, &mut out,
289    )?;
290    Ok(VlmaOutput { values: out })
291}
292
293#[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
294pub fn vlma_into(input: &VlmaInput, out: &mut [f64]) -> Result<(), VlmaError> {
295    vlma_into_slice(out, input, Kernel::Auto)
296}
297
298#[inline]
299pub fn vlma_into_slice(dst: &mut [f64], input: &VlmaInput, kern: Kernel) -> Result<(), VlmaError> {
300    let (data, min_p, max_p, matype, devtype, first, chosen) = vlma_prepare(input, kern)?;
301    if dst.len() != data.len() {
302        return Err(VlmaError::OutputLengthMismatch {
303            expected: data.len(),
304            got: dst.len(),
305        });
306    }
307    vlma_compute_into(data, min_p, max_p, &matype, devtype, first, chosen, dst)?;
308
309    let warm_end = first + max_p - 1;
310    for i in 0..warm_end {
311        if i != first {
312            dst[i] = f64::NAN;
313        }
314    }
315    Ok(())
316}
317
318#[inline(always)]
319fn vlma_prepare<'a>(
320    input: &'a VlmaInput,
321    kernel: Kernel,
322) -> Result<(&'a [f64], usize, usize, String, usize, usize, Kernel), VlmaError> {
323    let data: &[f64] = input.as_ref();
324
325    if data.is_empty() {
326        return Err(VlmaError::EmptyInputData);
327    }
328
329    let min_period = input.get_min_period();
330    let max_period = input.get_max_period();
331    if min_period > max_period {
332        return Err(VlmaError::InvalidPeriodRange {
333            min_period,
334            max_period,
335        });
336    }
337
338    if max_period == 0 || max_period > data.len() {
339        return Err(VlmaError::InvalidPeriod {
340            period: max_period,
341            data_len: data.len(),
342        });
343    }
344
345    let first = data
346        .iter()
347        .position(|&x| !x.is_nan())
348        .ok_or(VlmaError::AllValuesNaN)?;
349
350    if (data.len() - first) < max_period {
351        return Err(VlmaError::NotEnoughValidData {
352            needed: max_period,
353            valid: data.len() - first,
354        });
355    }
356
357    let matype = input.get_matype();
358    let devtype = input.get_devtype();
359
360    let chosen = match kernel {
361        Kernel::Auto => Kernel::Scalar,
362        k => k,
363    };
364
365    Ok((data, min_period, max_period, matype, devtype, first, chosen))
366}
367
368#[inline(always)]
369fn vlma_compute_into(
370    data: &[f64],
371    min_period: usize,
372    max_period: usize,
373    matype: &str,
374    devtype: usize,
375    first: usize,
376    kernel: Kernel,
377    out: &mut [f64],
378) -> Result<(), VlmaError> {
379    unsafe {
380        match kernel {
381            Kernel::Scalar | Kernel::ScalarBatch => {
382                if matype == "sma" && devtype == 0 {
383                    vlma_scalar_sma_stddev_into(data, min_period, max_period, first, out)?;
384                } else {
385                    vlma_scalar_into(data, min_period, max_period, matype, devtype, first, out)?;
386                }
387            }
388            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
389            Kernel::Avx2 | Kernel::Avx2Batch => {
390                vlma_avx2_into(data, min_period, max_period, matype, devtype, first, out)?;
391            }
392            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
393            Kernel::Avx512 | Kernel::Avx512Batch => {
394                vlma_avx512_into(data, min_period, max_period, matype, devtype, first, out)?;
395            }
396            _ => unreachable!(),
397        }
398    }
399    Ok(())
400}
401
402pub unsafe fn vlma_scalar_classic(
403    data: &[f64],
404    min_period: usize,
405    max_period: usize,
406    matype: &str,
407    devtype: usize,
408    first_valid: usize,
409    out: &mut [f64],
410) -> Result<(), VlmaError> {
411    if matype == "sma" && devtype == 0 {
412        return vlma_scalar_sma_stddev_into(data, min_period, max_period, first_valid, out);
413    }
414    vlma_scalar_into(
415        data,
416        min_period,
417        max_period,
418        matype,
419        devtype,
420        first_valid,
421        out,
422    )
423}
424
425#[inline(always)]
426pub unsafe fn vlma_scalar_sma_stddev_into(
427    data: &[f64],
428    min_period: usize,
429    max_period: usize,
430    first_valid: usize,
431    out: &mut [f64],
432) -> Result<(), VlmaError> {
433    debug_assert_eq!(out.len(), data.len());
434    let len = data.len();
435    if len == 0 {
436        return Ok(());
437    }
438
439    let warm_end = first_valid + max_period - 1;
440    let x0 = *data.get_unchecked(first_valid);
441    *out.get_unchecked_mut(first_valid) = x0;
442
443    let min_pi = if min_period == 0 { 1 } else { min_period };
444    let max_pi = core::cmp::max(max_period, min_pi);
445    let mut last_p: usize = max_pi;
446    let mut sc_lut = Vec::with_capacity(max_pi + 1);
447    sc_lut.push(0.0);
448    for p in 1..=max_pi {
449        sc_lut.push(2.0 / (p as f64 + 1.0));
450    }
451    let sc_ptr = sc_lut.as_ptr();
452
453    const D175: f64 = 1.75;
454    const D025: f64 = 0.25;
455
456    let mut last_val = x0;
457
458    let mut i = first_valid + 1;
459    while i < len && i < warm_end {
460        let x = *data.get_unchecked(i);
461        if !x.is_nan() {
462            let sc = *sc_ptr.add(last_p);
463            last_val = fast_ema_update(last_val, x, sc);
464        }
465        i += 1;
466    }
467
468    if warm_end >= len {
469        return Ok(());
470    }
471
472    let mut sum = 0.0_f64;
473    let mut sumsq = 0.0_f64;
474    let mut nan_count: usize = 0;
475    for k in 0..max_period {
476        let v = *data.get_unchecked(first_valid + k);
477        if v.is_finite() {
478            sum += v;
479            sumsq += v * v;
480        } else {
481            nan_count += 1;
482        }
483    }
484    let inv_n = 1.0 / (max_period as f64);
485
486    i = warm_end;
487    while i < len {
488        let x = *data.get_unchecked(i);
489
490        if x.is_nan() {
491            *out.get_unchecked_mut(i) = f64::NAN;
492        } else {
493            let (m, dv) = if nan_count == 0 {
494                let m = sum * inv_n;
495                let var = (sumsq * inv_n) - m * m;
496                let dv = if var < 0.0 { 0.0 } else { var.sqrt() };
497                (m, dv)
498            } else {
499                (f64::NAN, f64::NAN)
500            };
501
502            let prev_p = if last_p == 0 { max_pi } else { last_p };
503            let mut next_p = prev_p;
504            if m.is_finite() && dv.is_finite() {
505                let d175 = dv * D175;
506                let d025 = dv * D025;
507                let a = m - d175;
508                let b = m - d025;
509                let c = m + d025;
510                let d = m + d175;
511                let inc_fast = ((x < a) as i32) | ((x > d) as i32);
512                let inc_slow = ((x >= b) as i32) & ((x <= c) as i32);
513                let delta = inc_slow - inc_fast;
514                let p_tmp = prev_p as isize + delta as isize;
515                next_p = if p_tmp < min_pi as isize {
516                    min_pi
517                } else if p_tmp > max_pi as isize {
518                    max_pi
519                } else {
520                    p_tmp as usize
521                };
522            }
523
524            let sc = *sc_ptr.add(next_p);
525            last_val = fast_ema_update(last_val, x, sc);
526            last_p = next_p;
527            *out.get_unchecked_mut(i) = last_val;
528        }
529
530        let next = i + 1;
531        if next < len {
532            let out_idx = next - max_period;
533            let v_out = *data.get_unchecked(out_idx);
534            if v_out.is_finite() {
535                sum -= v_out;
536                sumsq -= v_out * v_out;
537            } else {
538                nan_count = nan_count.saturating_sub(1);
539            }
540            let v_in = *data.get_unchecked(next);
541            if v_in.is_finite() {
542                sum += v_in;
543                sumsq += v_in * v_in;
544            } else {
545                nan_count += 1;
546            }
547        }
548
549        i = next;
550    }
551
552    Ok(())
553}
554
555#[inline(always)]
556unsafe fn vlma_scalar_into(
557    data: &[f64],
558    min_period: usize,
559    max_period: usize,
560    matype: &str,
561    devtype: usize,
562    first_valid: usize,
563    out: &mut [f64],
564) -> Result<(), VlmaError> {
565    debug_assert_eq!(out.len(), data.len());
566
567    let mean = ma(matype, MaData::Slice(data), max_period)
568        .map_err(|e| VlmaError::MaError(e.to_string()))?;
569    let dev = deviation(&DevInput::from_slice(
570        data,
571        DevParams {
572            period: Some(max_period),
573            devtype: Some(devtype),
574        },
575    ))
576    .map_err(|e| VlmaError::DevError(e.to_string()))?;
577
578    let len = data.len();
579    if len == 0 {
580        return Ok(());
581    }
582
583    let warm_end = first_valid + max_period - 1;
584
585    let x0 = *data.get_unchecked(first_valid);
586    *out.get_unchecked_mut(first_valid) = x0;
587
588    let min_pi = if min_period == 0 { 1 } else { min_period };
589    let max_pi = core::cmp::max(max_period, min_pi);
590    let mut last_p: usize = max_pi;
591
592    let mut sc_lut = Vec::with_capacity(max_pi + 1);
593    sc_lut.push(0.0);
594    for p in 1..=max_pi {
595        sc_lut.push(2.0 / (p as f64 + 1.0));
596    }
597    debug_assert_eq!(sc_lut.len(), max_pi + 1);
598    let sc_ptr = sc_lut.as_ptr();
599
600    const D175: f64 = 1.75;
601    const D025: f64 = 0.25;
602
603    let mut last_val = x0;
604
605    let mut i = first_valid + 1;
606    while i < len && i < warm_end {
607        let x = *data.get_unchecked(i);
608        if x.is_nan() {
609            i += 1;
610            continue;
611        }
612
613        let m = mean[i];
614        let dv = dev[i];
615
616        let prev_p = if last_p == 0 { max_pi } else { last_p };
617        let mut next_p = prev_p;
618
619        if m.is_finite() && dv.is_finite() {
620            let d175 = dv * D175;
621            let d025 = dv * D025;
622
623            let a = m - d175;
624            let b = m - d025;
625            let c = m + d025;
626            let d = m + d175;
627
628            let inc_fast = ((x < a) as i32) | ((x > d) as i32);
629            let inc_slow = ((x >= b) as i32) & ((x <= c) as i32);
630            let delta = inc_slow - inc_fast;
631
632            let p_tmp = prev_p as isize + delta as isize;
633            next_p = if p_tmp < min_pi as isize {
634                min_pi
635            } else if p_tmp > max_pi as isize {
636                max_pi
637            } else {
638                p_tmp as usize
639            };
640        }
641
642        let sc = *sc_ptr.add(next_p);
643        last_val = (x - last_val).mul_add(sc, last_val);
644        last_p = next_p;
645
646        i += 1;
647    }
648
649    while i < len {
650        let x = *data.get_unchecked(i);
651
652        if x.is_nan() {
653            *out.get_unchecked_mut(i) = f64::NAN;
654            i += 1;
655            continue;
656        }
657
658        let m = mean[i];
659        let dv = dev[i];
660
661        let prev_p = if last_p == 0 { max_pi } else { last_p };
662        let mut next_p = prev_p;
663
664        if m.is_finite() && dv.is_finite() {
665            let d175 = dv * D175;
666            let d025 = dv * D025;
667
668            let a = m - d175;
669            let b = m - d025;
670            let c = m + d025;
671            let d = m + d175;
672
673            let inc_fast = ((x < a) as i32) | ((x > d) as i32);
674            let inc_slow = ((x >= b) as i32) & ((x <= c) as i32);
675            let delta = inc_slow - inc_fast;
676
677            let p_tmp = prev_p as isize + delta as isize;
678            next_p = if p_tmp < min_pi as isize {
679                min_pi
680            } else if p_tmp > max_pi as isize {
681                max_pi
682            } else {
683                p_tmp as usize
684            };
685        }
686
687        let sc = *sc_ptr.add(next_p);
688        last_val = (x - last_val).mul_add(sc, last_val);
689        last_p = next_p;
690
691        *out.get_unchecked_mut(i) = last_val;
692        i += 1;
693    }
694
695    Ok(())
696}
697
698#[inline(always)]
699unsafe fn vlma_row_scalar(
700    data: &[f64],
701    min_period: usize,
702    max_period: usize,
703    matype: &str,
704    devtype: usize,
705    first_valid: usize,
706    out: &mut [f64],
707) -> Result<(), VlmaError> {
708    vlma_scalar_into(
709        data,
710        min_period,
711        max_period,
712        matype,
713        devtype,
714        first_valid,
715        out,
716    )
717}
718
719#[inline(always)]
720unsafe fn vlma_row_fast_sma_std_prefix(
721    data: &[f64],
722    min_period: usize,
723    max_period: usize,
724    first_valid: usize,
725    ps_sum: &[f64],
726    ps_sumsq: &[f64],
727    ps_cnt: &[usize],
728    out: &mut [f64],
729) -> Result<(), VlmaError> {
730    debug_assert_eq!(out.len(), data.len());
731    let len = data.len();
732    if len == 0 {
733        return Ok(());
734    }
735
736    let warm_end = first_valid + max_period - 1;
737    let x0 = *data.get_unchecked(first_valid);
738
739    *out.get_unchecked_mut(first_valid) = x0;
740
741    let min_pi = if min_period == 0 { 1 } else { min_period };
742    let max_pi = core::cmp::max(max_period, min_pi);
743    let mut last_p: usize = max_pi;
744
745    let mut sc_lut = Vec::with_capacity(max_pi + 1);
746    sc_lut.push(0.0);
747    for p in 1..=max_pi {
748        sc_lut.push(2.0 / (p as f64 + 1.0));
749    }
750    let sc_ptr = sc_lut.as_ptr();
751
752    const D175: f64 = 1.75;
753    const D025: f64 = 0.25;
754
755    let mut last_val = x0;
756
757    let mut i = first_valid + 1;
758    while i < len && i < warm_end {
759        let x = *data.get_unchecked(i);
760        if x.is_finite() {
761            let sc = *sc_ptr.add(last_p);
762            last_val = (x - last_val).mul_add(sc, last_val);
763        }
764        i += 1;
765    }
766    if warm_end >= len {
767        return Ok(());
768    }
769
770    while i < len {
771        let x = *data.get_unchecked(i);
772        if !x.is_finite() {
773            *out.get_unchecked_mut(i) = f64::NAN;
774        } else {
775            let start = i + 1 - max_period;
776            let cnt = *ps_cnt.get_unchecked(i + 1) - *ps_cnt.get_unchecked(start);
777            let (m, dv) = if cnt == max_period {
778                let sum = *ps_sum.get_unchecked(i + 1) - *ps_sum.get_unchecked(start);
779                let sumsq = *ps_sumsq.get_unchecked(i + 1) - *ps_sumsq.get_unchecked(start);
780                let inv = 1.0 / (max_period as f64);
781                let m = sum * inv;
782                let var = (sumsq * inv) - m * m;
783                let dv = if var < 0.0 { 0.0 } else { var.sqrt() };
784                (m, dv)
785            } else {
786                (f64::NAN, f64::NAN)
787            };
788
789            let prev_p = if last_p == 0 { max_pi } else { last_p };
790            let mut next_p = prev_p;
791            if m.is_finite() && dv.is_finite() {
792                let d175 = dv * D175;
793                let d025 = dv * D025;
794                let a = m - d175;
795                let b = m - d025;
796                let c = m + d025;
797                let d = m + d175;
798                let inc_fast = ((x < a) as i32) | ((x > d) as i32);
799                let inc_slow = ((x >= b) as i32) & ((x <= c) as i32);
800                let delta = inc_slow - inc_fast;
801                let p_tmp = prev_p as isize + delta as isize;
802                next_p = if p_tmp < min_pi as isize {
803                    min_pi
804                } else if p_tmp > max_pi as isize {
805                    max_pi
806                } else {
807                    p_tmp as usize
808                };
809            }
810            let sc = *sc_ptr.add(next_p);
811            last_val = (x - last_val).mul_add(sc, last_val);
812            last_p = next_p;
813            *out.get_unchecked_mut(i) = last_val;
814        }
815        i += 1;
816    }
817
818    Ok(())
819}
820
821#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
822#[inline(always)]
823unsafe fn vlma_avx2_into(
824    data: &[f64],
825    min_period: usize,
826    max_period: usize,
827    matype: &str,
828    devtype: usize,
829    first_valid: usize,
830    out: &mut [f64],
831) -> Result<(), VlmaError> {
832    vlma_scalar_into(
833        data,
834        min_period,
835        max_period,
836        matype,
837        devtype,
838        first_valid,
839        out,
840    )
841}
842
843#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
844#[inline(always)]
845unsafe fn vlma_row_avx2(
846    data: &[f64],
847    min_period: usize,
848    max_period: usize,
849    matype: &str,
850    devtype: usize,
851    first_valid: usize,
852    out: &mut [f64],
853) -> Result<(), VlmaError> {
854    vlma_avx2_into(
855        data,
856        min_period,
857        max_period,
858        matype,
859        devtype,
860        first_valid,
861        out,
862    )
863}
864
865#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
866#[inline(always)]
867unsafe fn vlma_avx512_into(
868    data: &[f64],
869    min_period: usize,
870    max_period: usize,
871    matype: &str,
872    devtype: usize,
873    first_valid: usize,
874    out: &mut [f64],
875) -> Result<(), VlmaError> {
876    if max_period <= 32 {
877        vlma_avx512_short_into(
878            data,
879            min_period,
880            max_period,
881            matype,
882            devtype,
883            first_valid,
884            out,
885        )
886    } else {
887        vlma_avx512_long_into(
888            data,
889            min_period,
890            max_period,
891            matype,
892            devtype,
893            first_valid,
894            out,
895        )
896    }
897}
898
899#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
900#[inline(always)]
901unsafe fn vlma_row_avx512(
902    data: &[f64],
903    min_period: usize,
904    max_period: usize,
905    matype: &str,
906    devtype: usize,
907    first_valid: usize,
908    out: &mut [f64],
909) -> Result<(), VlmaError> {
910    vlma_avx512_into(
911        data,
912        min_period,
913        max_period,
914        matype,
915        devtype,
916        first_valid,
917        out,
918    )
919}
920
921#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
922#[inline(always)]
923unsafe fn vlma_avx512_short_into(
924    data: &[f64],
925    min_period: usize,
926    max_period: usize,
927    matype: &str,
928    devtype: usize,
929    first_valid: usize,
930    out: &mut [f64],
931) -> Result<(), VlmaError> {
932    vlma_scalar_into(
933        data,
934        min_period,
935        max_period,
936        matype,
937        devtype,
938        first_valid,
939        out,
940    )
941}
942
943#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
944#[inline(always)]
945unsafe fn vlma_avx512_long_into(
946    data: &[f64],
947    min_period: usize,
948    max_period: usize,
949    matype: &str,
950    devtype: usize,
951    first_valid: usize,
952    out: &mut [f64],
953) -> Result<(), VlmaError> {
954    vlma_scalar_into(
955        data,
956        min_period,
957        max_period,
958        matype,
959        devtype,
960        first_valid,
961        out,
962    )
963}
964
965#[derive(Debug, Clone)]
966pub struct VlmaStream {
967    min_period: usize,
968    max_period: usize,
969    matype: String,
970    devtype: usize,
971    buffer: Vec<f64>,
972    head: usize,
973    filled: bool,
974    period: f64,
975    last_val: f64,
976
977    sum: f64,
978    sumsq: f64,
979    nan_count: usize,
980    inv_n: f64,
981    last_p: usize,
982    sc_lut: Vec<f64>,
983}
984
985impl VlmaStream {
986    pub fn try_new(params: VlmaParams) -> Result<Self, VlmaError> {
987        let min_period = params.min_period.unwrap_or(5);
988        let max_period = params.max_period.unwrap_or(50);
989        let matype = params.matype.unwrap_or_else(|| "sma".to_string());
990        let devtype = params.devtype.unwrap_or(0);
991
992        if min_period > max_period {
993            return Err(VlmaError::InvalidPeriodRange {
994                min_period,
995                max_period,
996            });
997        }
998        if max_period == 0 {
999            return Err(VlmaError::InvalidPeriod {
1000                period: max_period,
1001                data_len: 0,
1002            });
1003        }
1004
1005        let mut sc_lut = Vec::with_capacity(max_period + 1);
1006        sc_lut.push(0.0);
1007        for p in 1..=max_period {
1008            sc_lut.push(2.0 / (p as f64 + 1.0));
1009        }
1010
1011        Ok(Self {
1012            min_period,
1013            max_period,
1014            matype,
1015            devtype,
1016            buffer: vec![f64::NAN; max_period],
1017            head: 0,
1018            filled: false,
1019            period: max_period as f64,
1020            last_val: f64::NAN,
1021
1022            sum: 0.0,
1023            sumsq: 0.0,
1024            nan_count: 0,
1025            inv_n: 1.0 / (max_period as f64),
1026            last_p: max_period,
1027            sc_lut,
1028        })
1029    }
1030
1031    #[inline(always)]
1032    pub fn update(&mut self, value: f64) -> Option<f64> {
1033        let out_idx = self.head;
1034        let v_out = self.buffer[out_idx];
1035        self.buffer[out_idx] = value;
1036        self.head = (self.head + 1) % self.max_period;
1037
1038        if !self.filled && self.head == 0 {
1039            self.filled = true;
1040        }
1041
1042        if self.filled {
1043            if v_out.is_finite() {
1044                self.sum -= v_out;
1045                self.sumsq -= v_out * v_out;
1046            } else {
1047                self.nan_count = self.nan_count.saturating_sub(1);
1048            }
1049        }
1050        if value.is_finite() {
1051            self.sum += value;
1052            self.sumsq += value * value;
1053        } else {
1054            self.nan_count += 1;
1055        }
1056
1057        if self.matype == "sma" && self.devtype == 0 {
1058            if !self.filled {
1059                if self.last_val.is_nan() {
1060                    if value.is_finite() {
1061                        self.last_val = value;
1062                        return Some(value);
1063                    } else {
1064                        return None;
1065                    }
1066                }
1067                if value.is_finite() {
1068                    let sc = self.sc_lut[self.last_p];
1069                    self.last_val = fast_ema_update(self.last_val, value, sc);
1070                }
1071                return None;
1072            }
1073
1074            if !value.is_finite() {
1075                return Some(f64::NAN);
1076            }
1077
1078            let (m, dv) = if self.nan_count == 0 {
1079                let mean = self.sum * self.inv_n;
1080                let var = (self.sumsq * self.inv_n) - mean * mean;
1081                let std = if var <= 0.0 { 0.0 } else { var.sqrt() };
1082                (mean, std)
1083            } else {
1084                (f64::NAN, f64::NAN)
1085            };
1086
1087            let mut next_p = self.last_p;
1088            if m.is_finite() && dv.is_finite() {
1089                let d175 = dv * 1.75;
1090                let d025 = dv * 0.25;
1091                let a = m - d175;
1092                let b = m - d025;
1093                let c = m + d025;
1094                let d = m + d175;
1095
1096                let inc_fast = ((value < a) as i32) | ((value > d) as i32);
1097                let inc_slow = ((value >= b) as i32) & ((value <= c) as i32);
1098                let delta = inc_slow - inc_fast;
1099                let p_tmp = self.last_p as isize + delta as isize;
1100                next_p = if p_tmp < self.min_period as isize {
1101                    self.min_period
1102                } else if p_tmp > self.max_period as isize {
1103                    self.max_period
1104                } else {
1105                    p_tmp as usize
1106                };
1107            }
1108
1109            let sc = self.sc_lut[next_p];
1110            self.last_val = fast_ema_update(self.last_val, value, sc);
1111            self.last_p = next_p;
1112            self.period = next_p as f64;
1113            return Some(self.last_val);
1114        }
1115
1116        let mut window: Vec<f64> = Vec::with_capacity(self.max_period);
1117        for i in 0..self.max_period {
1118            let idx = (self.head + i) % self.max_period;
1119            let v = self.buffer[idx];
1120            if v.is_finite() {
1121                window.push(v);
1122            }
1123        }
1124        if window.len() < self.max_period {
1125            if self.last_val.is_nan() && value.is_finite() {
1126                self.last_val = value;
1127                return Some(value);
1128            }
1129            if value.is_finite() {
1130                let sc = 2.0 / (self.period + 1.0);
1131                self.last_val = fast_ema_update(self.last_val, value, sc);
1132            }
1133            return None;
1134        }
1135
1136        let mean = match ma(&self.matype, MaData::Slice(&window), self.max_period) {
1137            Ok(v) => *v.last().unwrap_or(&f64::NAN),
1138            Err(_) => return None,
1139        };
1140        let dev_params = DevParams {
1141            period: Some(self.max_period),
1142            devtype: Some(self.devtype),
1143        };
1144        let dv = match deviation(&DevInput::from_slice(&window, dev_params)) {
1145            Ok(v) => *v.last().unwrap_or(&f64::NAN),
1146            Err(_) => return None,
1147        };
1148
1149        if value.is_finite() {
1150            let prev = if self.period == 0.0 {
1151                self.max_period as f64
1152            } else {
1153                self.period
1154            };
1155            let mut new_p = prev;
1156            if mean.is_finite() && dv.is_finite() {
1157                let a = mean - 1.75 * dv;
1158                let b = mean - 0.25 * dv;
1159                let c = mean + 0.25 * dv;
1160                let d = mean + 1.75 * dv;
1161                if value < a || value > d {
1162                    new_p = (prev - 1.0).max(self.min_period as f64);
1163                } else if value >= b && value <= c {
1164                    new_p = (prev + 1.0).min(self.max_period as f64);
1165                }
1166            }
1167            let sc = 2.0 / (new_p + 1.0);
1168            if !self.last_val.is_nan() {
1169                self.last_val = fast_ema_update(self.last_val, value, sc);
1170            } else {
1171                self.last_val = value;
1172            }
1173            self.period = new_p;
1174            return Some(self.last_val);
1175        }
1176
1177        Some(f64::NAN)
1178    }
1179}
1180
1181#[derive(Clone, Debug)]
1182pub struct VlmaBatchRange {
1183    pub min_period: (usize, usize, usize),
1184    pub max_period: (usize, usize, usize),
1185    pub matype: (String, String, String),
1186    pub devtype: (usize, usize, usize),
1187}
1188
1189impl Default for VlmaBatchRange {
1190    fn default() -> Self {
1191        Self {
1192            min_period: (5, 5, 0),
1193            max_period: (50, 299, 1),
1194            matype: ("sma".to_string(), "sma".to_string(), "".to_string()),
1195            devtype: (0, 0, 0),
1196        }
1197    }
1198}
1199
1200#[derive(Clone, Debug, Default)]
1201pub struct VlmaBatchBuilder {
1202    range: VlmaBatchRange,
1203    kernel: Kernel,
1204}
1205
1206impl VlmaBatchBuilder {
1207    pub fn new() -> Self {
1208        Self::default()
1209    }
1210    pub fn kernel(mut self, k: Kernel) -> Self {
1211        self.kernel = k;
1212        self
1213    }
1214    #[inline]
1215    pub fn min_period_range(mut self, start: usize, end: usize, step: usize) -> Self {
1216        self.range.min_period = (start, end, step);
1217        self
1218    }
1219    #[inline]
1220    pub fn max_period_range(mut self, start: usize, end: usize, step: usize) -> Self {
1221        self.range.max_period = (start, end, step);
1222        self
1223    }
1224    pub fn matype_static<S: Into<String>>(mut self, v: S) -> Self {
1225        let s = v.into();
1226        self.range.matype = (s.clone(), s, "".to_string());
1227        self
1228    }
1229    #[inline]
1230    pub fn devtype_range(mut self, start: usize, end: usize, step: usize) -> Self {
1231        self.range.devtype = (start, end, step);
1232        self
1233    }
1234    pub fn apply_slice(self, data: &[f64]) -> Result<VlmaBatchOutput, VlmaError> {
1235        vlma_batch_with_kernel(data, &self.range, self.kernel)
1236    }
1237    pub fn with_default_slice(data: &[f64], k: Kernel) -> Result<VlmaBatchOutput, VlmaError> {
1238        VlmaBatchBuilder::new().kernel(k).apply_slice(data)
1239    }
1240    pub fn apply_candles(self, c: &Candles, src: &str) -> Result<VlmaBatchOutput, VlmaError> {
1241        let slice = source_type(c, src);
1242        self.apply_slice(slice)
1243    }
1244    pub fn with_default_candles(c: &Candles) -> Result<VlmaBatchOutput, VlmaError> {
1245        VlmaBatchBuilder::new()
1246            .kernel(Kernel::Auto)
1247            .apply_candles(c, "close")
1248    }
1249}
1250
1251#[derive(Clone, Debug)]
1252pub struct VlmaBatchOutput {
1253    pub values: Vec<f64>,
1254    pub combos: Vec<VlmaParams>,
1255    pub rows: usize,
1256    pub cols: usize,
1257}
1258
1259impl VlmaBatchOutput {
1260    pub fn row_for_params(&self, p: &VlmaParams) -> Option<usize> {
1261        self.combos.iter().position(|c| {
1262            c.min_period.unwrap_or(5) == p.min_period.unwrap_or(5)
1263                && c.max_period.unwrap_or(50) == p.max_period.unwrap_or(50)
1264                && c.matype.as_ref().unwrap_or(&"sma".to_string())
1265                    == p.matype.as_ref().unwrap_or(&"sma".to_string())
1266                && c.devtype.unwrap_or(0) == p.devtype.unwrap_or(0)
1267        })
1268    }
1269    pub fn values_for(&self, p: &VlmaParams) -> Option<&[f64]> {
1270        self.row_for_params(p).map(|row| {
1271            let start = row.checked_mul(self.cols).unwrap_or(0);
1272            &self.values[start..start + self.cols]
1273        })
1274    }
1275}
1276
1277fn axis_usize((start, end, step): (usize, usize, usize)) -> Vec<usize> {
1278    if step == 0 || start == end {
1279        return vec![start];
1280    }
1281    if start < end {
1282        (start..=end).step_by(step.max(1)).collect()
1283    } else {
1284        let mut v = Vec::new();
1285        let mut x = start as isize;
1286        let end_i = end as isize;
1287        let st = (step as isize).max(1);
1288        while x >= end_i {
1289            v.push(x as usize);
1290            x -= st;
1291        }
1292        v
1293    }
1294}
1295fn axis_string((start, end, _): (String, String, String)) -> Vec<String> {
1296    if start == end {
1297        vec![start]
1298    } else {
1299        vec![start, end]
1300    }
1301}
1302fn axis_usize_step((start, end, step): (usize, usize, usize)) -> Vec<usize> {
1303    axis_usize((start, end, step))
1304}
1305fn axis_devtype((start, end, step): (usize, usize, usize)) -> Vec<usize> {
1306    axis_usize((start, end, step))
1307}
1308
1309fn expand_grid(r: &VlmaBatchRange) -> Result<Vec<VlmaParams>, VlmaError> {
1310    let min_periods = axis_usize(r.min_period);
1311    let max_periods = axis_usize(r.max_period);
1312    let matypes = axis_string(r.matype.clone());
1313    let devtypes = axis_devtype(r.devtype);
1314
1315    if min_periods.is_empty() || max_periods.is_empty() || matypes.is_empty() || devtypes.is_empty()
1316    {
1317        return Err(VlmaError::InvalidRange {
1318            start: format!("{:?}", r.min_period),
1319            end: format!("{:?}", r.max_period),
1320            step: format!("{:?}", r.devtype),
1321        });
1322    }
1323
1324    let cap = min_periods
1325        .len()
1326        .checked_mul(max_periods.len())
1327        .and_then(|x| x.checked_mul(matypes.len()))
1328        .and_then(|x| x.checked_mul(devtypes.len()))
1329        .ok_or_else(|| VlmaError::InvalidRange {
1330            start: "cap".into(),
1331            end: "overflow".into(),
1332            step: "mul".into(),
1333        })?;
1334
1335    let mut out = Vec::with_capacity(cap);
1336    for &mn in &min_periods {
1337        for &mx in &max_periods {
1338            for mt in &matypes {
1339                for &dt in &devtypes {
1340                    out.push(VlmaParams {
1341                        min_period: Some(mn),
1342                        max_period: Some(mx),
1343                        matype: Some(mt.clone()),
1344                        devtype: Some(dt),
1345                    });
1346                }
1347            }
1348        }
1349    }
1350    Ok(out)
1351}
1352
1353#[inline(always)]
1354pub fn vlma_batch_with_kernel(
1355    data: &[f64],
1356    sweep: &VlmaBatchRange,
1357    k: Kernel,
1358) -> Result<VlmaBatchOutput, VlmaError> {
1359    let kernel = match k {
1360        Kernel::Auto => detect_best_batch_kernel(),
1361        other if other.is_batch() => other,
1362        _ => {
1363            return Err(VlmaError::InvalidKernelForBatch(k));
1364        }
1365    };
1366    let simd = match kernel {
1367        Kernel::Avx512Batch => Kernel::Avx512,
1368        Kernel::Avx2Batch => Kernel::Avx2,
1369        Kernel::ScalarBatch => Kernel::Scalar,
1370        _ => unreachable!(),
1371    };
1372    vlma_batch_par_slice(data, sweep, simd)
1373}
1374
1375#[inline(always)]
1376pub fn vlma_batch_slice(
1377    data: &[f64],
1378    sweep: &VlmaBatchRange,
1379    kern: Kernel,
1380) -> Result<VlmaBatchOutput, VlmaError> {
1381    vlma_batch_inner(data, sweep, kern, false)
1382}
1383
1384#[inline(always)]
1385pub fn vlma_batch_par_slice(
1386    data: &[f64],
1387    sweep: &VlmaBatchRange,
1388    kern: Kernel,
1389) -> Result<VlmaBatchOutput, VlmaError> {
1390    vlma_batch_inner(data, sweep, kern, true)
1391}
1392
1393fn vlma_batch_inner(
1394    data: &[f64],
1395    sweep: &VlmaBatchRange,
1396    kern: Kernel,
1397    parallel: bool,
1398) -> Result<VlmaBatchOutput, VlmaError> {
1399    let combos = expand_grid(sweep)?;
1400
1401    let first = data
1402        .iter()
1403        .position(|x| !x.is_nan())
1404        .ok_or(VlmaError::AllValuesNaN)?;
1405    let max_p = combos.iter().map(|c| c.max_period.unwrap()).max().unwrap();
1406    if data.len() - first < max_p {
1407        return Err(VlmaError::NotEnoughValidData {
1408            needed: max_p,
1409            valid: data.len() - first,
1410        });
1411    }
1412
1413    let rows = combos.len();
1414    let cols = data.len();
1415
1416    let mut buf_mu = make_uninit_matrix(rows, cols);
1417    let warms: Vec<usize> = combos
1418        .iter()
1419        .map(|c| first + c.max_period.unwrap() - 1)
1420        .collect();
1421    init_matrix_prefixes(&mut buf_mu, cols, &warms);
1422
1423    let mut guard = core::mem::ManuallyDrop::new(buf_mu);
1424    let out: &mut [f64] =
1425        unsafe { core::slice::from_raw_parts_mut(guard.as_mut_ptr() as *mut f64, guard.len()) };
1426
1427    for row in 0..rows {
1428        let row_start = row * cols;
1429        out[row_start + first] = data[first];
1430    }
1431
1432    let simd_kern = match kern {
1433        Kernel::Auto => match detect_best_batch_kernel() {
1434            Kernel::Avx512Batch => Kernel::Avx512,
1435            Kernel::Avx2Batch => Kernel::Avx2,
1436            Kernel::ScalarBatch => Kernel::Scalar,
1437            _ => Kernel::Scalar,
1438        },
1439        Kernel::Avx512Batch => Kernel::Avx512,
1440        Kernel::Avx2Batch => Kernel::Avx2,
1441        Kernel::ScalarBatch => Kernel::Scalar,
1442        k => k,
1443    };
1444    vlma_batch_inner_into(data, sweep, simd_kern, parallel, out)?;
1445
1446    let values = unsafe {
1447        Vec::from_raw_parts(
1448            guard.as_mut_ptr() as *mut f64,
1449            guard.len(),
1450            guard.capacity(),
1451        )
1452    };
1453
1454    Ok(VlmaBatchOutput {
1455        values,
1456        combos,
1457        rows,
1458        cols,
1459    })
1460}
1461
1462#[inline(always)]
1463pub fn vlma_batch_inner_into(
1464    data: &[f64],
1465    sweep: &VlmaBatchRange,
1466    kern: Kernel,
1467    parallel: bool,
1468    out: &mut [f64],
1469) -> Result<Vec<VlmaParams>, VlmaError> {
1470    let combos = expand_grid(sweep)?;
1471    let first = data
1472        .iter()
1473        .position(|x| !x.is_nan())
1474        .ok_or(VlmaError::AllValuesNaN)?;
1475    let max_p = combos.iter().map(|c| c.max_period.unwrap()).max().unwrap();
1476    if data.len() - first < max_p {
1477        return Err(VlmaError::NotEnoughValidData {
1478            needed: max_p,
1479            valid: data.len() - first,
1480        });
1481    }
1482    let rows = combos.len();
1483    let cols = data.len();
1484
1485    let any_sma_std = combos
1486        .iter()
1487        .any(|c| c.matype.as_deref() == Some("sma") && c.devtype == Some(0));
1488
1489    let (ps_sum, ps_sumsq, ps_cnt);
1490    let ps_sum_ref: Option<&[f64]>;
1491    let ps_sumsq_ref: Option<&[f64]>;
1492    let ps_cnt_ref: Option<&[usize]>;
1493    if any_sma_std {
1494        let mut sum = 0.0_f64;
1495        let mut sumsq = 0.0_f64;
1496        let mut cnt = 0_usize;
1497        let mut ps_s = Vec::with_capacity(cols + 1);
1498        let mut ps_q = Vec::with_capacity(cols + 1);
1499        let mut ps_c = Vec::with_capacity(cols + 1);
1500        ps_s.push(0.0);
1501        ps_q.push(0.0);
1502        ps_c.push(0);
1503        for &v in data.iter() {
1504            if v.is_finite() {
1505                sum += v;
1506                sumsq += v * v;
1507                cnt += 1;
1508            }
1509            ps_s.push(sum);
1510            ps_q.push(sumsq);
1511            ps_c.push(cnt);
1512        }
1513        ps_sum = ps_s;
1514        ps_sumsq = ps_q;
1515        ps_cnt = ps_c;
1516        ps_sum_ref = Some(&ps_sum);
1517        ps_sumsq_ref = Some(&ps_sumsq);
1518        ps_cnt_ref = Some(&ps_cnt);
1519    } else {
1520        ps_sum = Vec::new();
1521        ps_sumsq = Vec::new();
1522        ps_cnt = Vec::new();
1523        ps_sum_ref = None;
1524        ps_sumsq_ref = None;
1525        ps_cnt_ref = None;
1526    }
1527
1528    let do_row = |row: usize, out_row: &mut [f64]| unsafe {
1529        let min_period = combos[row].min_period.unwrap();
1530        let max_period = combos[row].max_period.unwrap();
1531        let matype = combos[row].matype.as_ref().unwrap();
1532        let devtype = combos[row].devtype.unwrap();
1533        match kern {
1534            Kernel::Scalar => {
1535                if matype == "sma" && devtype == 0 {
1536                    vlma_row_fast_sma_std_prefix(
1537                        data,
1538                        min_period,
1539                        max_period,
1540                        first,
1541                        ps_sum_ref.unwrap(),
1542                        ps_sumsq_ref.unwrap(),
1543                        ps_cnt_ref.unwrap(),
1544                        out_row,
1545                    )
1546                    .unwrap();
1547                } else {
1548                    vlma_row_scalar(
1549                        data, min_period, max_period, matype, devtype, first, out_row,
1550                    )
1551                    .unwrap();
1552                }
1553            }
1554            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1555            Kernel::Avx2 => {
1556                vlma_row_avx2(
1557                    data, min_period, max_period, matype, devtype, first, out_row,
1558                )
1559                .unwrap();
1560            }
1561            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1562            Kernel::Avx512 => {
1563                vlma_row_avx512(
1564                    data, min_period, max_period, matype, devtype, first, out_row,
1565                )
1566                .unwrap();
1567            }
1568            #[cfg(not(all(feature = "nightly-avx", target_arch = "x86_64")))]
1569            Kernel::Avx2 | Kernel::Avx512 => {
1570                vlma_row_scalar(
1571                    data, min_period, max_period, matype, devtype, first, out_row,
1572                )
1573                .unwrap();
1574            }
1575            _ => unreachable!(),
1576        }
1577    };
1578
1579    if parallel {
1580        #[cfg(not(target_arch = "wasm32"))]
1581        {
1582            out.par_chunks_mut(cols)
1583                .enumerate()
1584                .for_each(|(row, slice)| do_row(row, slice));
1585        }
1586
1587        #[cfg(target_arch = "wasm32")]
1588        {
1589            for (row, slice) in out.chunks_mut(cols).enumerate() {
1590                do_row(row, slice);
1591            }
1592        }
1593    } else {
1594        for (row, slice) in out.chunks_mut(cols).enumerate() {
1595            do_row(row, slice);
1596        }
1597    }
1598
1599    Ok(combos)
1600}
1601
1602#[inline(always)]
1603pub fn expand_grid_vlma(r: &VlmaBatchRange) -> Result<Vec<VlmaParams>, VlmaError> {
1604    expand_grid(r)
1605}
1606
1607#[cfg(feature = "python")]
1608#[pyfunction(name = "vlma")]
1609#[pyo3(signature = (data, min_period=5, max_period=50, matype="sma", devtype=0, kernel=None))]
1610pub fn vlma_py<'py>(
1611    py: Python<'py>,
1612    data: PyReadonlyArray1<'py, f64>,
1613    min_period: usize,
1614    max_period: usize,
1615    matype: &str,
1616    devtype: usize,
1617    kernel: Option<&str>,
1618) -> PyResult<Bound<'py, PyArray1<f64>>> {
1619    use numpy::{IntoPyArray, PyArrayMethods};
1620
1621    let slice_in = data.as_slice()?;
1622    let kern = validate_kernel(kernel, false)?;
1623
1624    let params = VlmaParams {
1625        min_period: Some(min_period),
1626        max_period: Some(max_period),
1627        matype: Some(matype.to_string()),
1628        devtype: Some(devtype),
1629    };
1630    let input = VlmaInput::from_slice(slice_in, params);
1631
1632    let result_vec: Vec<f64> = py
1633        .allow_threads(|| vlma_with_kernel(&input, kern).map(|o| o.values))
1634        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1635
1636    Ok(result_vec.into_pyarray(py))
1637}
1638
1639#[cfg(feature = "python")]
1640#[pyclass(name = "VlmaStream")]
1641pub struct VlmaStreamPy {
1642    stream: VlmaStream,
1643}
1644
1645#[cfg(feature = "python")]
1646#[pymethods]
1647impl VlmaStreamPy {
1648    #[new]
1649    fn new(min_period: usize, max_period: usize, matype: &str, devtype: usize) -> PyResult<Self> {
1650        let params = VlmaParams {
1651            min_period: Some(min_period),
1652            max_period: Some(max_period),
1653            matype: Some(matype.to_string()),
1654            devtype: Some(devtype),
1655        };
1656        let stream =
1657            VlmaStream::try_new(params).map_err(|e| PyValueError::new_err(e.to_string()))?;
1658        Ok(VlmaStreamPy { stream })
1659    }
1660
1661    fn update(&mut self, value: f64) -> Option<f64> {
1662        self.stream.update(value)
1663    }
1664}
1665
1666#[cfg(feature = "python")]
1667#[pyfunction(name = "vlma_batch")]
1668#[pyo3(signature = (data, min_period_range=(5, 5, 0), max_period_range=(50, 50, 0), devtype_range=(0, 0, 0), matype="sma", kernel=None))]
1669pub fn vlma_batch_py<'py>(
1670    py: Python<'py>,
1671    data: PyReadonlyArray1<'py, f64>,
1672    min_period_range: (usize, usize, usize),
1673    max_period_range: (usize, usize, usize),
1674    devtype_range: (usize, usize, usize),
1675    matype: &str,
1676    kernel: Option<&str>,
1677) -> PyResult<Bound<'py, PyDict>> {
1678    use numpy::{IntoPyArray, PyArrayMethods};
1679
1680    let slice_in = data.as_slice()?;
1681
1682    let sweep = VlmaBatchRange {
1683        min_period: min_period_range,
1684        max_period: max_period_range,
1685        matype: (matype.to_string(), matype.to_string(), "".to_string()),
1686        devtype: devtype_range,
1687    };
1688
1689    let combos = expand_grid(&sweep).map_err(|e| PyValueError::new_err(e.to_string()))?;
1690    let rows = combos.len();
1691    let cols = slice_in.len();
1692
1693    let out_arr = unsafe { PyArray1::<f64>::new(py, [rows * cols], false) };
1694    let slice_out = unsafe { out_arr.as_slice_mut()? };
1695
1696    let first = slice_in.iter().position(|x| !x.is_nan()).unwrap_or(0);
1697    for (row, combo) in combos.iter().enumerate() {
1698        let warmup = first + combo.max_period.unwrap() - 1;
1699        let row_start = row * cols;
1700        for i in 0..warmup.min(cols) {
1701            if i != first {
1702                slice_out[row_start + i] = f64::NAN;
1703            }
1704        }
1705
1706        if first < cols {
1707            slice_out[row_start + first] = slice_in[first];
1708        }
1709    }
1710
1711    let kern = validate_kernel(kernel, true)?;
1712
1713    let combos = py
1714        .allow_threads(|| {
1715            let kernel = match kern {
1716                Kernel::Auto => detect_best_batch_kernel(),
1717                k => k,
1718            };
1719            let simd = match kernel {
1720                Kernel::Avx512Batch => Kernel::Avx512,
1721                Kernel::Avx2Batch => Kernel::Avx2,
1722                Kernel::ScalarBatch => Kernel::Scalar,
1723                _ => unreachable!(),
1724            };
1725            vlma_batch_inner_into(slice_in, &sweep, simd, true, slice_out)
1726        })
1727        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1728
1729    let dict = PyDict::new(py);
1730    dict.set_item("values", out_arr.reshape((rows, cols))?)?;
1731    dict.set_item(
1732        "min_periods",
1733        combos
1734            .iter()
1735            .map(|p| p.min_period.unwrap())
1736            .collect::<Vec<_>>()
1737            .into_pyarray(py),
1738    )?;
1739    dict.set_item(
1740        "max_periods",
1741        combos
1742            .iter()
1743            .map(|p| p.max_period.unwrap())
1744            .collect::<Vec<_>>()
1745            .into_pyarray(py),
1746    )?;
1747    dict.set_item(
1748        "devtypes",
1749        combos
1750            .iter()
1751            .map(|p| p.devtype.unwrap())
1752            .collect::<Vec<_>>()
1753            .into_pyarray(py),
1754    )?;
1755    dict.set_item(
1756        "matypes",
1757        combos
1758            .iter()
1759            .map(|p| p.matype.as_ref().unwrap().clone())
1760            .collect::<Vec<_>>(),
1761    )?;
1762
1763    Ok(dict)
1764}
1765
1766#[cfg(all(feature = "python", feature = "cuda"))]
1767#[pyfunction(name = "vlma_cuda_batch_dev")]
1768#[pyo3(signature = (data_f32, min_period_range=(5, 5, 0), max_period_range=(50, 50, 0), devtype_range=(0, 0, 0), matype="sma", device_id=0))]
1769pub fn vlma_cuda_batch_dev_py(
1770    py: Python<'_>,
1771    data_f32: numpy::PyReadonlyArray1<'_, f32>,
1772    min_period_range: (usize, usize, usize),
1773    max_period_range: (usize, usize, usize),
1774    devtype_range: (usize, usize, usize),
1775    matype: &str,
1776    device_id: usize,
1777) -> PyResult<DeviceArrayF32Py> {
1778    if !cuda_available() {
1779        return Err(PyValueError::new_err("CUDA not available"));
1780    }
1781    let slice_in = data_f32.as_slice()?;
1782    let sweep = VlmaBatchRange {
1783        min_period: min_period_range,
1784        max_period: max_period_range,
1785        matype: (matype.to_string(), matype.to_string(), "".to_string()),
1786        devtype: devtype_range,
1787    };
1788
1789    let inner = py.allow_threads(|| {
1790        let mut cuda =
1791            CudaVlma::new(device_id).map_err(|e| PyValueError::new_err(e.to_string()))?;
1792        cuda.vlma_batch_dev(slice_in, &sweep)
1793            .map_err(|e| PyValueError::new_err(e.to_string()))
1794    })?;
1795    make_device_array_py(device_id, inner)
1796}
1797
1798#[cfg(all(feature = "python", feature = "cuda"))]
1799#[pyfunction(name = "vlma_cuda_many_series_one_param_dev")]
1800#[pyo3(signature = (data_tm_f32, min_period, max_period, devtype=0, matype="sma", device_id=0))]
1801pub fn vlma_cuda_many_series_one_param_dev_py(
1802    py: Python<'_>,
1803    data_tm_f32: numpy::PyReadonlyArray2<'_, f32>,
1804    min_period: usize,
1805    max_period: usize,
1806    devtype: usize,
1807    matype: &str,
1808    device_id: usize,
1809) -> PyResult<DeviceArrayF32Py> {
1810    if !cuda_available() {
1811        return Err(PyValueError::new_err("CUDA not available"));
1812    }
1813    let flat: &[f32] = data_tm_f32.as_slice()?;
1814    let rows = data_tm_f32.shape()[0];
1815    let cols = data_tm_f32.shape()[1];
1816    let params = VlmaParams {
1817        min_period: Some(min_period),
1818        max_period: Some(max_period),
1819        matype: Some(matype.to_string()),
1820        devtype: Some(devtype),
1821    };
1822    let inner = py.allow_threads(|| {
1823        let mut cuda =
1824            CudaVlma::new(device_id).map_err(|e| PyValueError::new_err(e.to_string()))?;
1825        cuda.vlma_many_series_one_param_time_major_dev(flat, cols, rows, &params)
1826            .map_err(|e| PyValueError::new_err(e.to_string()))
1827    })?;
1828    make_device_array_py(device_id, inner)
1829}
1830
1831#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1832#[wasm_bindgen]
1833pub fn vlma_js(
1834    data: &[f64],
1835    min_period: usize,
1836    max_period: usize,
1837    matype: &str,
1838    devtype: usize,
1839) -> Result<Vec<f64>, JsValue> {
1840    let params = VlmaParams {
1841        min_period: Some(min_period),
1842        max_period: Some(max_period),
1843        matype: Some(matype.to_string()),
1844        devtype: Some(devtype),
1845    };
1846    let input = VlmaInput::from_slice(data, params);
1847    let mut out = vec![0.0; data.len()];
1848    vlma_into_slice(&mut out, &input, detect_best_kernel())
1849        .map_err(|e| JsValue::from_str(&e.to_string()))?;
1850    Ok(out)
1851}
1852
1853#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1854#[wasm_bindgen]
1855pub fn vlma_into(
1856    in_ptr: *const f64,
1857    out_ptr: *mut f64,
1858    len: usize,
1859    min_period: usize,
1860    max_period: usize,
1861    matype: &str,
1862    devtype: usize,
1863) -> Result<(), JsValue> {
1864    if in_ptr.is_null() || out_ptr.is_null() {
1865        return Err(JsValue::from_str("Null pointer provided"));
1866    }
1867    unsafe {
1868        let data = std::slice::from_raw_parts(in_ptr, len);
1869        let params = VlmaParams {
1870            min_period: Some(min_period),
1871            max_period: Some(max_period),
1872            matype: Some(matype.to_string()),
1873            devtype: Some(devtype),
1874        };
1875        let input = VlmaInput::from_slice(data, params);
1876
1877        if in_ptr == out_ptr as *const f64 {
1878            let mut temp = vec![0.0; len];
1879            vlma_into_slice(&mut temp, &input, detect_best_kernel())
1880                .map_err(|e| JsValue::from_str(&e.to_string()))?;
1881            let out = std::slice::from_raw_parts_mut(out_ptr, len);
1882            out.copy_from_slice(&temp);
1883        } else {
1884            let out = std::slice::from_raw_parts_mut(out_ptr, len);
1885            vlma_into_slice(out, &input, detect_best_kernel())
1886                .map_err(|e| JsValue::from_str(&e.to_string()))?;
1887        }
1888        Ok(())
1889    }
1890}
1891
1892#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1893#[wasm_bindgen]
1894pub fn vlma_alloc(len: usize) -> *mut f64 {
1895    let mut vec = Vec::<f64>::with_capacity(len);
1896    let ptr = vec.as_mut_ptr();
1897    std::mem::forget(vec);
1898    ptr
1899}
1900
1901#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1902#[wasm_bindgen]
1903pub fn vlma_free(ptr: *mut f64, len: usize) {
1904    if !ptr.is_null() {
1905        unsafe {
1906            let _ = Vec::from_raw_parts(ptr, len, len);
1907        }
1908    }
1909}
1910
1911#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1912#[derive(Serialize, Deserialize)]
1913pub struct VlmaBatchConfig {
1914    pub min_period_range: (usize, usize, usize),
1915    pub max_period_range: (usize, usize, usize),
1916    pub devtype_range: (usize, usize, usize),
1917    pub matype: String,
1918}
1919
1920#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1921#[derive(Serialize, Deserialize)]
1922pub struct VlmaBatchJsOutput {
1923    pub values: Vec<f64>,
1924    pub combos: Vec<VlmaParams>,
1925    pub rows: usize,
1926    pub cols: usize,
1927}
1928
1929#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1930#[wasm_bindgen(js_name = vlma_batch)]
1931pub fn vlma_batch_unified_js(data: &[f64], config: JsValue) -> Result<JsValue, JsValue> {
1932    let config: VlmaBatchConfig = serde_wasm_bindgen::from_value(config)
1933        .map_err(|e| JsValue::from_str(&format!("Invalid config: {}", e)))?;
1934
1935    let sweep = VlmaBatchRange {
1936        min_period: config.min_period_range,
1937        max_period: config.max_period_range,
1938        matype: (config.matype.clone(), config.matype.clone(), "".to_string()),
1939        devtype: config.devtype_range,
1940    };
1941
1942    let output = vlma_batch_inner(data, &sweep, Kernel::Auto, false)
1943        .map_err(|e| JsValue::from_str(&e.to_string()))?;
1944
1945    let js_output = VlmaBatchJsOutput {
1946        values: output.values,
1947        combos: output.combos,
1948        rows: output.rows,
1949        cols: output.cols,
1950    };
1951
1952    serde_wasm_bindgen::to_value(&js_output)
1953        .map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
1954}
1955
1956#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1957#[wasm_bindgen]
1958pub fn vlma_batch_into(
1959    in_ptr: *const f64,
1960    out_ptr: *mut f64,
1961    len: usize,
1962    min_period_start: usize,
1963    min_period_end: usize,
1964    min_period_step: usize,
1965    max_period_start: usize,
1966    max_period_end: usize,
1967    max_period_step: usize,
1968    devtype_start: usize,
1969    devtype_end: usize,
1970    devtype_step: usize,
1971    matype: &str,
1972) -> Result<usize, JsValue> {
1973    if in_ptr.is_null() || out_ptr.is_null() {
1974        return Err(JsValue::from_str("null pointer passed to vlma_batch_into"));
1975    }
1976
1977    unsafe {
1978        let data = std::slice::from_raw_parts(in_ptr, len);
1979
1980        let sweep = VlmaBatchRange {
1981            min_period: (min_period_start, min_period_end, min_period_step),
1982            max_period: (max_period_start, max_period_end, max_period_step),
1983            matype: (matype.to_string(), matype.to_string(), "".to_string()),
1984            devtype: (devtype_start, devtype_end, devtype_step),
1985        };
1986
1987        let combos = expand_grid(&sweep).map_err(|e| JsValue::from_str(&e.to_string()))?;
1988        let total_len = combos
1989            .len()
1990            .checked_mul(len)
1991            .ok_or_else(|| JsValue::from_str("vlma_batch_into: output size overflow"))?;
1992        let out_slice = std::slice::from_raw_parts_mut(out_ptr, total_len);
1993
1994        let _ = vlma_batch_inner_into(data, &sweep, Kernel::Scalar, false, out_slice)
1995            .map_err(|e| JsValue::from_str(&e.to_string()))?;
1996
1997        Ok(combos.len())
1998    }
1999}
2000
2001#[cfg(test)]
2002mod tests {
2003    use super::*;
2004    use crate::skip_if_unsupported;
2005    use crate::utilities::data_loader::read_candles_from_csv;
2006
2007    #[test]
2008    fn test_vlma_into_matches_api() -> Result<(), Box<dyn Error>> {
2009        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2010        let candles = read_candles_from_csv(file_path)?;
2011        let input = VlmaInput::with_default_candles(&candles);
2012
2013        let base = vlma(&input)?.values;
2014
2015        let mut out = vec![0.0f64; base.len()];
2016        super::vlma_into(&input, &mut out)?;
2017
2018        assert_eq!(base.len(), out.len());
2019
2020        fn eq_or_both_nan(a: f64, b: f64) -> bool {
2021            (a.is_nan() && b.is_nan()) || (a == b) || ((a - b).abs() <= 1e-12)
2022        }
2023
2024        for i in 0..out.len() {
2025            assert!(
2026                eq_or_both_nan(base[i], out[i]),
2027                "Mismatch at index {i}: base={:?}, into={:?}",
2028                base[i],
2029                out[i]
2030            );
2031        }
2032        Ok(())
2033    }
2034    fn check_vlma_partial_params(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2035        skip_if_unsupported!(kernel, test_name);
2036        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2037        let candles = read_candles_from_csv(file_path)?;
2038        let default_params = VlmaParams {
2039            min_period: None,
2040            max_period: None,
2041            matype: None,
2042            devtype: None,
2043        };
2044        let input_default = VlmaInput::from_candles(&candles, "close", default_params);
2045        let output_default = vlma_with_kernel(&input_default, kernel)?;
2046        assert_eq!(output_default.values.len(), candles.close.len());
2047        Ok(())
2048    }
2049    fn check_vlma_accuracy(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2050        skip_if_unsupported!(kernel, test_name);
2051        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2052        let candles = read_candles_from_csv(file_path)?;
2053        let close_prices = candles.select_candle_field("close")?;
2054        let params = VlmaParams {
2055            min_period: Some(5),
2056            max_period: Some(50),
2057            matype: Some("sma".to_string()),
2058            devtype: Some(0),
2059        };
2060        let input = VlmaInput::from_candles(&candles, "close", params);
2061        let vlma_result = vlma_with_kernel(&input, kernel)?;
2062        assert_eq!(vlma_result.values.len(), close_prices.len());
2063        let required_len = 5;
2064        assert!(
2065            vlma_result.values.len() >= required_len,
2066            "VLMA length is too short"
2067        );
2068        let test_vals = [
2069            59376.252799490234,
2070            59343.71066624187,
2071            59292.92555520155,
2072            59269.93796266796,
2073            59167.4483022233,
2074        ];
2075        let start_idx = vlma_result.values.len() - test_vals.len();
2076        let actual_slice = &vlma_result.values[start_idx..];
2077        for (i, &val) in actual_slice.iter().enumerate() {
2078            let expected = test_vals[i];
2079            if !val.is_nan() {
2080                assert!(
2081                    (val - expected).abs() < 1e-1,
2082                    "Mismatch at index {}: expected {}, got {}",
2083                    i,
2084                    expected,
2085                    val
2086                );
2087            }
2088        }
2089        Ok(())
2090    }
2091    fn check_vlma_zero_or_inverted_periods(
2092        test: &str,
2093        kernel: Kernel,
2094    ) -> Result<(), Box<dyn Error>> {
2095        skip_if_unsupported!(kernel, test);
2096        let input_data = [10.0, 20.0, 30.0, 40.0];
2097        let params_min_greater = VlmaParams {
2098            min_period: Some(10),
2099            max_period: Some(5),
2100            matype: Some("sma".to_string()),
2101            devtype: Some(0),
2102        };
2103        let input_min_greater = VlmaInput::from_slice(&input_data, params_min_greater);
2104        let result = vlma_with_kernel(&input_min_greater, kernel);
2105        assert!(result.is_err());
2106        let params_zero_max = VlmaParams {
2107            min_period: Some(5),
2108            max_period: Some(0),
2109            matype: Some("sma".to_string()),
2110            devtype: Some(0),
2111        };
2112        let input_zero_max = VlmaInput::from_slice(&input_data, params_zero_max);
2113        let result2 = vlma_with_kernel(&input_zero_max, kernel);
2114        assert!(result2.is_err());
2115        Ok(())
2116    }
2117    fn check_vlma_not_enough_data(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2118        skip_if_unsupported!(kernel, test);
2119        let input_data = [10.0, 20.0, 30.0];
2120        let params = VlmaParams {
2121            min_period: Some(5),
2122            max_period: Some(10),
2123            matype: Some("sma".to_string()),
2124            devtype: Some(0),
2125        };
2126        let input = VlmaInput::from_slice(&input_data, params);
2127        let result = vlma_with_kernel(&input, kernel);
2128        assert!(result.is_err());
2129        Ok(())
2130    }
2131    fn check_vlma_all_nan(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2132        skip_if_unsupported!(kernel, test);
2133        let input_data = [f64::NAN, f64::NAN, f64::NAN];
2134        let params = VlmaParams {
2135            min_period: Some(2),
2136            max_period: Some(3),
2137            matype: Some("sma".to_string()),
2138            devtype: Some(0),
2139        };
2140        let input = VlmaInput::from_slice(&input_data, params);
2141        let result = vlma_with_kernel(&input, kernel);
2142        assert!(result.is_err());
2143        Ok(())
2144    }
2145    fn check_vlma_slice_reinput(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2146        skip_if_unsupported!(kernel, test);
2147        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2148        let candles = read_candles_from_csv(file_path)?;
2149        let first_params = VlmaParams {
2150            min_period: Some(5),
2151            max_period: Some(20),
2152            matype: Some("ema".to_string()),
2153            devtype: Some(1),
2154        };
2155        let first_input = VlmaInput::from_candles(&candles, "close", first_params);
2156        let first_result = vlma_with_kernel(&first_input, kernel)?;
2157        let second_params = VlmaParams {
2158            min_period: Some(5),
2159            max_period: Some(20),
2160            matype: Some("ema".to_string()),
2161            devtype: Some(1),
2162        };
2163        let second_input = VlmaInput::from_slice(&first_result.values, second_params);
2164        let second_result = vlma_with_kernel(&second_input, kernel)?;
2165        assert_eq!(second_result.values.len(), first_result.values.len());
2166        Ok(())
2167    }
2168    fn check_vlma_streaming(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2169        skip_if_unsupported!(kernel, test_name);
2170        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2171        let candles = read_candles_from_csv(file_path)?;
2172        let params = VlmaParams {
2173            min_period: Some(5),
2174            max_period: Some(50),
2175            matype: Some("sma".to_string()),
2176            devtype: Some(0),
2177        };
2178        let input = VlmaInput::from_candles(&candles, "close", params.clone());
2179        let batch_output = vlma_with_kernel(&input, kernel)?.values;
2180        let mut stream = VlmaStream::try_new(params)?;
2181        let mut stream_values = Vec::with_capacity(candles.close.len());
2182        for &price in &candles.close {
2183            match stream.update(price) {
2184                Some(v) => stream_values.push(v),
2185                None => stream_values.push(f64::NAN),
2186            }
2187        }
2188        assert_eq!(batch_output.len(), stream_values.len());
2189        for (i, (&b, &s)) in batch_output.iter().zip(stream_values.iter()).enumerate() {
2190            if b.is_nan() && s.is_nan() {
2191                continue;
2192            }
2193            let diff = (b - s).abs();
2194            assert!(
2195                diff < 1e-9,
2196                "[{}] VLMA streaming f64 mismatch at idx {}: batch={}, stream={}, diff={}",
2197                test_name,
2198                i,
2199                b,
2200                s,
2201                diff
2202            );
2203        }
2204        Ok(())
2205    }
2206
2207    #[cfg(feature = "proptest")]
2208    #[allow(clippy::float_cmp)]
2209    fn check_vlma_property(
2210        test_name: &str,
2211        kernel: Kernel,
2212    ) -> Result<(), Box<dyn std::error::Error>> {
2213        use proptest::prelude::*;
2214        skip_if_unsupported!(kernel, test_name);
2215
2216        let strat = (2usize..=20, 0.001f64..1e6f64).prop_flat_map(|(min_period, scalar)| {
2217            let max_period_start = min_period + 1;
2218            (
2219                prop::collection::vec(
2220                    (0.001f64..1e6f64)
2221                        .prop_filter("positive finite", |x| x.is_finite() && *x > 0.0),
2222                    max_period_start..400,
2223                ),
2224                Just(min_period),
2225                (max_period_start..=50),
2226                prop::sample::select(vec!["sma", "ema", "wma"]),
2227                (0usize..=2),
2228                Just(scalar),
2229            )
2230        });
2231
2232        proptest::test_runner::TestRunner::default()
2233			.run(&strat, |(data, min_period, max_period, matype, devtype, scalar)| {
2234
2235				if max_period > data.len() {
2236					return Ok(());
2237				}
2238
2239				let params = VlmaParams {
2240					min_period: Some(min_period),
2241					max_period: Some(max_period),
2242					matype: Some(matype.to_string()),
2243					devtype: Some(devtype),
2244				};
2245				let input = VlmaInput::from_slice(&data, params.clone());
2246
2247
2248				let VlmaOutput { values: out } = vlma_with_kernel(&input, kernel).unwrap();
2249
2250
2251				let VlmaOutput { values: ref_out } = vlma_with_kernel(&input, Kernel::Scalar).unwrap();
2252
2253
2254
2255				let first_valid = data.iter().position(|&x| !x.is_nan()).unwrap_or(0);
2256				let expected_warmup = first_valid + max_period - 1;
2257
2258
2259				if first_valid < out.len() {
2260					prop_assert!(
2261						!out[first_valid].is_nan(),
2262						"Expected initial value at first_valid index {}, got NaN",
2263						first_valid
2264					);
2265
2266
2267					prop_assert!(
2268						(out[first_valid] - data[first_valid]).abs() < 1e-9,
2269						"Initial VLMA value {} should equal first data point {} at index {}",
2270						out[first_valid],
2271						data[first_valid],
2272						first_valid
2273					);
2274				}
2275
2276
2277				for i in (first_valid + 1)..expected_warmup.min(out.len()) {
2278					prop_assert!(
2279						out[i].is_nan(),
2280						"Expected NaN during warmup at index {}, got {}",
2281						i,
2282						out[i]
2283					);
2284				}
2285
2286
2287				if expected_warmup < out.len() {
2288					prop_assert!(
2289						!out[expected_warmup].is_nan(),
2290						"Expected valid value at warmup end (index {}), got NaN",
2291						expected_warmup
2292					);
2293				}
2294
2295
2296				let data_min = data.iter().cloned().fold(f64::INFINITY, f64::min);
2297				let data_max = data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
2298
2299				for (i, &val) in out.iter().enumerate() {
2300					if !val.is_nan() && i != first_valid {
2301						prop_assert!(
2302							val >= data_min - 1e-9 && val <= data_max + 1e-9,
2303							"VLMA at index {} = {} is outside data range [{}, {}]",
2304							i,
2305							val,
2306							data_min,
2307							data_max
2308						);
2309					}
2310				}
2311
2312
2313				if data.windows(2).all(|w| (w[0] - w[1]).abs() < 1e-10) {
2314
2315					for (i, &val) in out.iter().enumerate() {
2316						if !val.is_nan() && i >= expected_warmup + 10 {
2317							prop_assert!(
2318								(val - data[0]).abs() < 1e-6,
2319								"VLMA should converge to constant value {} but got {} at index {}",
2320								data[0],
2321								val,
2322								i
2323							);
2324						}
2325					}
2326				}
2327
2328
2329
2330				if data.len() >= max_period * 2 {
2331					let stable_end = data.len();
2332					let stable_start = stable_end - max_period;
2333					let input_segment = &data[stable_start..stable_end];
2334					let output_segment = &out[stable_start..stable_end];
2335
2336					let seg_min = input_segment.iter().cloned().fold(f64::INFINITY, f64::min);
2337					let seg_max = input_segment
2338						.iter()
2339						.cloned()
2340						.fold(f64::NEG_INFINITY, f64::max);
2341
2342					let mut valid_outputs = Vec::with_capacity(output_segment.len());
2343					let mut outputs_within_segment_range = true;
2344					for &v in output_segment {
2345						if v.is_nan() {
2346							continue;
2347						}
2348						if v < seg_min - 1e-9 || v > seg_max + 1e-9 {
2349							outputs_within_segment_range = false;
2350							break;
2351						}
2352						valid_outputs.push(v);
2353					}
2354
2355					if outputs_within_segment_range && valid_outputs.len() > 1 {
2356						let input_mean: f64 = input_segment.iter().sum::<f64>() / input_segment.len() as f64;
2357						let input_var: f64 = input_segment
2358							.iter()
2359							.map(|x| (x - input_mean).powi(2))
2360							.sum::<f64>()
2361							/ input_segment.len() as f64;
2362
2363
2364
2365						if input_var > 1e-18 {
2366							let output_mean: f64 =
2367								valid_outputs.iter().sum::<f64>() / valid_outputs.len() as f64;
2368							let output_var: f64 = valid_outputs
2369								.iter()
2370								.map(|x| (x - output_mean).powi(2))
2371								.sum::<f64>()
2372								/ valid_outputs.len() as f64;
2373
2374							prop_assert!(
2375								output_var <= input_var * 1.01 + 1e-12,
2376								"Output variance {} should not exceed input variance {} (smoothing property)",
2377								output_var,
2378								input_var
2379							);
2380						}
2381					}
2382				}
2383
2384
2385
2386				if data.len() >= max_period * 3 {
2387
2388					let mid_point = data.len() / 2;
2389					let region1_start = expected_warmup + max_period;
2390
2391
2392					if mid_point > region1_start && data.len() > mid_point + max_period {
2393						let region1_end = region1_start + max_period.min((mid_point - region1_start) / 2);
2394						let region2_start = mid_point + max_period;
2395						let region2_end = region2_start + max_period.min((data.len() - region2_start) / 2);
2396
2397						if region1_end > region1_start && region2_end > region2_start {
2398
2399							let calc_std = |segment: &[f64]| -> f64 {
2400								let mean = segment.iter().sum::<f64>() / segment.len() as f64;
2401								let variance = segment.iter()
2402									.map(|x| (x - mean).powi(2))
2403									.sum::<f64>() / segment.len() as f64;
2404								variance.sqrt()
2405							};
2406
2407							let region1_data = &data[region1_start..region1_end.min(data.len())];
2408							let region2_data = &data[region2_start..region2_end.min(data.len())];
2409
2410							if region1_data.len() > 2 && region2_data.len() > 2 {
2411								let std1 = calc_std(region1_data);
2412								let std2 = calc_std(region2_data);
2413
2414
2415								if (std1 > std2 * 2.0 || std2 > std1 * 2.0) && std1 > 1e-6 && std2 > 1e-6 {
2416
2417									let out1: Vec<f64> = out[region1_start..region1_end.min(out.len())]
2418										.iter()
2419										.filter(|x| !x.is_nan())
2420										.cloned()
2421										.collect();
2422									let out2: Vec<f64> = out[region2_start..region2_end.min(out.len())]
2423										.iter()
2424										.filter(|x| !x.is_nan())
2425										.cloned()
2426										.collect();
2427
2428									if out1.len() > 2 && out2.len() > 2 {
2429										let out_std1 = calc_std(&out1);
2430										let out_std2 = calc_std(&out2);
2431
2432
2433										prop_assert!(
2434											(out_std1 - out_std2).abs() > 1e-10 || (std1 - std2).abs() < 1e-6,
2435											"VLMA should show adaptive behavior: region1 std={}, region2 std={}, but outputs are too similar",
2436											std1,
2437											std2
2438										);
2439									}
2440								}
2441							}
2442						}
2443					}
2444				}
2445
2446
2447				for i in expected_warmup..out.len().min(ref_out.len()) {
2448					let y = out[i];
2449					let r = ref_out[i];
2450
2451					if !y.is_finite() || !r.is_finite() {
2452						prop_assert!(
2453							y.to_bits() == r.to_bits(),
2454							"NaN/Inf mismatch at index {}: {} vs {}",
2455							i,
2456							y,
2457							r
2458						);
2459						continue;
2460					}
2461
2462
2463					let y_bits = y.to_bits();
2464					let r_bits = r.to_bits();
2465					let ulp_diff: u64 = y_bits.abs_diff(r_bits);
2466
2467					prop_assert!(
2468						(y - r).abs() <= 1e-9 || ulp_diff <= 4,
2469						"Kernel mismatch at index {}: {} vs {} (ULP={})",
2470						i,
2471						y,
2472						r,
2473						ulp_diff
2474					);
2475				}
2476
2477
2478				#[cfg(debug_assertions)]
2479				for (i, &val) in out.iter().enumerate() {
2480					if !val.is_nan() {
2481						let bits = val.to_bits();
2482						prop_assert!(
2483							bits != 0x11111111_11111111 &&
2484							bits != 0x22222222_22222222 &&
2485							bits != 0x33333333_33333333,
2486							"Found poison value {} (0x{:016X}) at index {}",
2487							val,
2488							bits,
2489							i
2490						);
2491					}
2492				}
2493
2494
2495
2496				let is_increasing = data.windows(2).all(|w| w[1] >= w[0]);
2497				let is_decreasing = data.windows(2).all(|w| w[1] <= w[0]);
2498
2499				if is_increasing || is_decreasing {
2500					let valid_outputs: Vec<(usize, f64)> = out.iter()
2501						.enumerate()
2502						.filter(|(_, x)| !x.is_nan())
2503						.map(|(i, &x)| (i, x))
2504						.collect();
2505
2506					if valid_outputs.len() >= 10 {
2507
2508						let last_5 = &valid_outputs[valid_outputs.len() - 5..];
2509						if is_increasing {
2510							for w in last_5.windows(2) {
2511								prop_assert!(
2512									w[1].1 >= w[0].1 * 0.999,
2513									"VLMA should be non-decreasing for increasing data at indices {}-{}: {} > {}",
2514									w[0].0,
2515									w[1].0,
2516									w[0].1,
2517									w[1].1
2518								);
2519							}
2520						} else if is_decreasing {
2521							for w in last_5.windows(2) {
2522								prop_assert!(
2523									w[1].1 <= w[0].1 * 1.001,
2524									"VLMA should be non-increasing for decreasing data at indices {}-{}: {} < {}",
2525									w[0].0,
2526									w[1].0,
2527									w[0].1,
2528									w[1].1
2529								);
2530							}
2531						}
2532					}
2533				}
2534
2535
2536				let input2 = VlmaInput::from_slice(&data, params);
2537				let VlmaOutput { values: out2 } = vlma_with_kernel(&input2, kernel).unwrap();
2538
2539				for i in 0..out.len().min(out2.len()) {
2540					if out[i].is_finite() && out2[i].is_finite() {
2541						prop_assert!(
2542							(out[i] - out2[i]).abs() < f64::EPSILON,
2543							"Non-deterministic output at index {}: {} vs {}",
2544							i,
2545							out[i],
2546							out2[i]
2547						);
2548					} else {
2549						prop_assert!(
2550							out[i].to_bits() == out2[i].to_bits(),
2551							"Non-deterministic NaN/Inf at index {}: {:016X} vs {:016X}",
2552							i,
2553							out[i].to_bits(),
2554							out2[i].to_bits()
2555						);
2556					}
2557				}
2558
2559				Ok(())
2560			})
2561			.unwrap();
2562
2563        Ok(())
2564    }
2565
2566    #[cfg(debug_assertions)]
2567    fn check_vlma_no_poison(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2568        skip_if_unsupported!(kernel, test_name);
2569
2570        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2571        let candles = read_candles_from_csv(file_path)?;
2572
2573        let test_params = vec![
2574            VlmaParams::default(),
2575            VlmaParams {
2576                min_period: Some(1),
2577                max_period: Some(2),
2578                matype: Some("sma".to_string()),
2579                devtype: Some(0),
2580            },
2581            VlmaParams {
2582                min_period: Some(2),
2583                max_period: Some(10),
2584                matype: Some("sma".to_string()),
2585                devtype: Some(0),
2586            },
2587            VlmaParams {
2588                min_period: Some(10),
2589                max_period: Some(30),
2590                matype: Some("ema".to_string()),
2591                devtype: Some(0),
2592            },
2593            VlmaParams {
2594                min_period: Some(20),
2595                max_period: Some(100),
2596                matype: Some("sma".to_string()),
2597                devtype: Some(0),
2598            },
2599            VlmaParams {
2600                min_period: Some(50),
2601                max_period: Some(200),
2602                matype: Some("wma".to_string()),
2603                devtype: Some(0),
2604            },
2605            VlmaParams {
2606                min_period: Some(5),
2607                max_period: Some(25),
2608                matype: Some("sma".to_string()),
2609                devtype: Some(1),
2610            },
2611            VlmaParams {
2612                min_period: Some(5),
2613                max_period: Some(25),
2614                matype: Some("ema".to_string()),
2615                devtype: Some(2),
2616            },
2617            VlmaParams {
2618                min_period: Some(19),
2619                max_period: Some(20),
2620                matype: Some("sma".to_string()),
2621                devtype: Some(0),
2622            },
2623            VlmaParams {
2624                min_period: Some(3),
2625                max_period: Some(15),
2626                matype: Some("wma".to_string()),
2627                devtype: Some(1),
2628            },
2629            VlmaParams {
2630                min_period: Some(5),
2631                max_period: Some(100),
2632                matype: Some("ema".to_string()),
2633                devtype: Some(2),
2634            },
2635        ];
2636
2637        for (param_idx, params) in test_params.iter().enumerate() {
2638            let input = VlmaInput::from_candles(&candles, "close", params.clone());
2639            let output = vlma_with_kernel(&input, kernel)?;
2640
2641            for (i, &val) in output.values.iter().enumerate() {
2642                if val.is_nan() {
2643                    continue;
2644                }
2645
2646                let bits = val.to_bits();
2647
2648                if bits == 0x11111111_11111111 {
2649                    panic!(
2650                        "[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at index {} \
2651						 with params: min_period={}, max_period={}, matype={}, devtype={} (param set {})",
2652                        test_name,
2653                        val,
2654                        bits,
2655                        i,
2656                        params.min_period.unwrap_or(5),
2657                        params.max_period.unwrap_or(50),
2658                        params.matype.as_deref().unwrap_or("sma"),
2659                        params.devtype.unwrap_or(0),
2660                        param_idx
2661                    );
2662                }
2663
2664                if bits == 0x22222222_22222222 {
2665                    panic!(
2666                        "[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at index {} \
2667						 with params: min_period={}, max_period={}, matype={}, devtype={} (param set {})",
2668                        test_name,
2669                        val,
2670                        bits,
2671                        i,
2672                        params.min_period.unwrap_or(5),
2673                        params.max_period.unwrap_or(50),
2674                        params.matype.as_deref().unwrap_or("sma"),
2675                        params.devtype.unwrap_or(0),
2676                        param_idx
2677                    );
2678                }
2679
2680                if bits == 0x33333333_33333333 {
2681                    panic!(
2682                        "[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at index {} \
2683						 with params: min_period={}, max_period={}, matype={}, devtype={} (param set {})",
2684                        test_name,
2685                        val,
2686                        bits,
2687                        i,
2688                        params.min_period.unwrap_or(5),
2689                        params.max_period.unwrap_or(50),
2690                        params.matype.as_deref().unwrap_or("sma"),
2691                        params.devtype.unwrap_or(0),
2692                        param_idx
2693                    );
2694                }
2695            }
2696        }
2697
2698        Ok(())
2699    }
2700
2701    #[cfg(not(debug_assertions))]
2702    fn check_vlma_no_poison(_test_name: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
2703        Ok(())
2704    }
2705
2706    macro_rules! generate_all_vlma_tests {
2707        ($($test_fn:ident),*) => {
2708            paste::paste! {
2709                $(
2710                    #[test]
2711                    fn [<$test_fn _scalar_f64>]() {
2712                        let _ = $test_fn(stringify!([<$test_fn _scalar_f64>]), Kernel::Scalar);
2713                    }
2714                )*
2715                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
2716                $(
2717                    #[test]
2718                    fn [<$test_fn _avx2_f64>]() {
2719                        let _ = $test_fn(stringify!([<$test_fn _avx2_f64>]), Kernel::Avx2);
2720                    }
2721                    #[test]
2722                    fn [<$test_fn _avx512_f64>]() {
2723                        let _ = $test_fn(stringify!([<$test_fn _avx512_f64>]), Kernel::Avx512);
2724                    }
2725                )*
2726            }
2727        }
2728    }
2729    generate_all_vlma_tests!(
2730        check_vlma_partial_params,
2731        check_vlma_accuracy,
2732        check_vlma_zero_or_inverted_periods,
2733        check_vlma_not_enough_data,
2734        check_vlma_all_nan,
2735        check_vlma_slice_reinput,
2736        check_vlma_streaming,
2737        check_vlma_no_poison
2738    );
2739
2740    #[cfg(feature = "proptest")]
2741    generate_all_vlma_tests!(check_vlma_property);
2742    fn check_batch_default_row(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2743        skip_if_unsupported!(kernel, test);
2744        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2745        let c = read_candles_from_csv(file)?;
2746        let output = VlmaBatchBuilder::new()
2747            .kernel(kernel)
2748            .apply_candles(&c, "close")?;
2749        let def = VlmaParams::default();
2750        let row = output.values_for(&def).expect("default row missing");
2751        assert_eq!(row.len(), c.close.len());
2752        let expected = [
2753            59376.252799490234,
2754            59343.71066624187,
2755            59292.92555520155,
2756            59269.93796266796,
2757            59167.4483022233,
2758        ];
2759        let start = row.len() - 5;
2760        for (i, &v) in row[start..].iter().enumerate() {
2761            assert!(
2762                (v - expected[i]).abs() < 1e-1,
2763                "[{test}] default-row mismatch at idx {i}: {v} vs {expected:?}"
2764            );
2765        }
2766        Ok(())
2767    }
2768
2769    #[cfg(debug_assertions)]
2770    fn check_batch_no_poison(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2771        skip_if_unsupported!(kernel, test);
2772
2773        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2774        let c = read_candles_from_csv(file)?;
2775
2776        let test_configs = vec![
2777            (2, 10, 2, 10, 20, 2, "sma", 0, 0, 0),
2778            (5, 25, 5, 25, 50, 5, "sma", 0, 2, 1),
2779            (10, 50, 10, 50, 100, 10, "ema", 0, 0, 0),
2780            (1, 5, 1, 5, 10, 1, "sma", 0, 0, 0),
2781            (5, 5, 0, 20, 100, 20, "wma", 0, 2, 2),
2782            (2, 10, 4, 20, 20, 0, "sma", 1, 1, 0),
2783            (3, 15, 3, 15, 30, 3, "ema", 2, 2, 0),
2784            (20, 50, 15, 60, 150, 30, "sma", 0, 2, 1),
2785            (5, 5, 0, 50, 50, 0, "sma", 0, 2, 1),
2786        ];
2787
2788        for (
2789            cfg_idx,
2790            &(
2791                min_start,
2792                min_end,
2793                min_step,
2794                max_start,
2795                max_end,
2796                max_step,
2797                matype,
2798                dev_start,
2799                dev_end,
2800                dev_step,
2801            ),
2802        ) in test_configs.iter().enumerate()
2803        {
2804            let mut builder = VlmaBatchBuilder::new().kernel(kernel);
2805
2806            if min_step > 0 {
2807                builder = builder.min_period_range(min_start, min_end, min_step);
2808            } else {
2809                builder = builder.min_period_range(min_start, min_start, 0);
2810            }
2811
2812            if max_step > 0 {
2813                builder = builder.max_period_range(max_start, max_end, max_step);
2814            } else {
2815                builder = builder.max_period_range(max_start, max_start, 0);
2816            }
2817
2818            builder = builder.matype_static(matype);
2819
2820            if dev_step > 0 {
2821                builder = builder.devtype_range(dev_start, dev_end, dev_step);
2822            } else {
2823                builder = builder.devtype_range(dev_start, dev_start, 0);
2824            }
2825
2826            let output = builder.apply_candles(&c, "close")?;
2827
2828            for (idx, &val) in output.values.iter().enumerate() {
2829                if val.is_nan() {
2830                    continue;
2831                }
2832
2833                let bits = val.to_bits();
2834                let row = idx / output.cols;
2835                let col = idx % output.cols;
2836                let combo = &output.combos[row];
2837
2838                if bits == 0x11111111_11111111 {
2839                    panic!(
2840                        "[{}] Config {}: Found alloc_with_nan_prefix poison value {} (0x{:016X}) \
2841						 at row {} col {} (flat index {}) with params: \
2842						 min_period={}, max_period={}, matype={}, devtype={}",
2843                        test,
2844                        cfg_idx,
2845                        val,
2846                        bits,
2847                        row,
2848                        col,
2849                        idx,
2850                        combo.min_period.unwrap_or(5),
2851                        combo.max_period.unwrap_or(50),
2852                        combo.matype.as_deref().unwrap_or("sma"),
2853                        combo.devtype.unwrap_or(0)
2854                    );
2855                }
2856
2857                if bits == 0x22222222_22222222 {
2858                    panic!(
2859                        "[{}] Config {}: Found init_matrix_prefixes poison value {} (0x{:016X}) \
2860						 at row {} col {} (flat index {}) with params: \
2861						 min_period={}, max_period={}, matype={}, devtype={}",
2862                        test,
2863                        cfg_idx,
2864                        val,
2865                        bits,
2866                        row,
2867                        col,
2868                        idx,
2869                        combo.min_period.unwrap_or(5),
2870                        combo.max_period.unwrap_or(50),
2871                        combo.matype.as_deref().unwrap_or("sma"),
2872                        combo.devtype.unwrap_or(0)
2873                    );
2874                }
2875
2876                if bits == 0x33333333_33333333 {
2877                    panic!(
2878                        "[{}] Config {}: Found make_uninit_matrix poison value {} (0x{:016X}) \
2879						 at row {} col {} (flat index {}) with params: \
2880						 min_period={}, max_period={}, matype={}, devtype={}",
2881                        test,
2882                        cfg_idx,
2883                        val,
2884                        bits,
2885                        row,
2886                        col,
2887                        idx,
2888                        combo.min_period.unwrap_or(5),
2889                        combo.max_period.unwrap_or(50),
2890                        combo.matype.as_deref().unwrap_or("sma"),
2891                        combo.devtype.unwrap_or(0)
2892                    );
2893                }
2894            }
2895        }
2896
2897        Ok(())
2898    }
2899
2900    #[cfg(not(debug_assertions))]
2901    fn check_batch_no_poison(_test: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
2902        Ok(())
2903    }
2904
2905    macro_rules! gen_batch_tests {
2906        ($fn_name:ident) => {
2907            paste::paste! {
2908                #[test] fn [<$fn_name _scalar>]()      {
2909                    let _ = $fn_name(stringify!([<$fn_name _scalar>]), Kernel::ScalarBatch);
2910                }
2911                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
2912                #[test] fn [<$fn_name _avx2>]()        {
2913                    let _ = $fn_name(stringify!([<$fn_name _avx2>]), Kernel::Avx2Batch);
2914                }
2915                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
2916                #[test] fn [<$fn_name _avx512>]()      {
2917                    let _ = $fn_name(stringify!([<$fn_name _avx512>]), Kernel::Avx512Batch);
2918                }
2919                #[test] fn [<$fn_name _auto_detect>]() {
2920                    let _ = $fn_name(stringify!([<$fn_name _auto_detect>]), Kernel::Auto);
2921                }
2922            }
2923        };
2924    }
2925    gen_batch_tests!(check_batch_default_row);
2926    gen_batch_tests!(check_batch_no_poison);
2927}