Skip to main content

vector_ta/indicators/
cg.rs

1#[cfg(feature = "python")]
2use numpy::{IntoPyArray, PyArray1};
3#[cfg(feature = "python")]
4use pyo3::exceptions::PyValueError;
5#[cfg(feature = "python")]
6use pyo3::prelude::*;
7#[cfg(feature = "python")]
8use pyo3::types::{PyDict, PyList};
9
10#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
11use serde::{Deserialize, Serialize};
12#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
13use wasm_bindgen::prelude::*;
14
15use crate::utilities::data_loader::{source_type, Candles};
16use crate::utilities::enums::Kernel;
17use crate::utilities::helpers::{
18    alloc_with_nan_prefix, detect_best_batch_kernel, detect_best_kernel, init_matrix_prefixes,
19    make_uninit_matrix,
20};
21#[cfg(feature = "python")]
22use crate::utilities::kernel_validation::validate_kernel;
23#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
24use core::arch::x86_64::*;
25#[cfg(not(target_arch = "wasm32"))]
26use rayon::prelude::*;
27use std::convert::AsRef;
28use std::error::Error;
29use std::mem::{ManuallyDrop, MaybeUninit};
30use thiserror::Error;
31
32impl<'a> AsRef<[f64]> for CgInput<'a> {
33    #[inline(always)]
34    fn as_ref(&self) -> &[f64] {
35        match &self.data {
36            CgData::Slice(slice) => slice,
37            CgData::Candles { candles, source } => source_type(candles, source),
38        }
39    }
40}
41
42#[derive(Debug, Clone)]
43pub enum CgData<'a> {
44    Candles {
45        candles: &'a Candles,
46        source: &'a str,
47    },
48    Slice(&'a [f64]),
49}
50
51#[derive(Debug, Clone)]
52pub struct CgOutput {
53    pub values: Vec<f64>,
54}
55
56impl std::ops::Deref for CgOutput {
57    type Target = [f64];
58
59    fn deref(&self) -> &Self::Target {
60        &self.values
61    }
62}
63
64impl std::ops::DerefMut for CgOutput {
65    fn deref_mut(&mut self) -> &mut Self::Target {
66        &mut self.values
67    }
68}
69
70#[derive(Debug, Clone)]
71#[cfg_attr(
72    all(target_arch = "wasm32", feature = "wasm"),
73    derive(Serialize, Deserialize)
74)]
75pub struct CgParams {
76    pub period: Option<usize>,
77}
78
79impl Default for CgParams {
80    fn default() -> Self {
81        Self { period: Some(10) }
82    }
83}
84
85#[derive(Debug, Clone)]
86pub struct CgInput<'a> {
87    pub data: CgData<'a>,
88    pub params: CgParams,
89}
90
91impl<'a> CgInput<'a> {
92    #[inline]
93    pub fn from_candles(c: &'a Candles, s: &'a str, p: CgParams) -> Self {
94        Self {
95            data: CgData::Candles {
96                candles: c,
97                source: s,
98            },
99            params: p,
100        }
101    }
102    #[inline]
103    pub fn from_slice(sl: &'a [f64], p: CgParams) -> Self {
104        Self {
105            data: CgData::Slice(sl),
106            params: p,
107        }
108    }
109    #[inline]
110    pub fn with_default_candles(c: &'a Candles) -> Self {
111        Self::from_candles(c, "close", CgParams::default())
112    }
113    #[inline]
114    pub fn get_period(&self) -> usize {
115        self.params.period.unwrap_or(10)
116    }
117}
118
119#[derive(Copy, Clone, Debug)]
120pub struct CgBuilder {
121    period: Option<usize>,
122    kernel: Kernel,
123}
124
125impl Default for CgBuilder {
126    fn default() -> Self {
127        Self {
128            period: None,
129            kernel: Kernel::Auto,
130        }
131    }
132}
133
134impl CgBuilder {
135    #[inline(always)]
136    pub fn new() -> Self {
137        Self::default()
138    }
139    #[inline(always)]
140    pub fn period(mut self, n: usize) -> Self {
141        self.period = Some(n);
142        self
143    }
144    #[inline(always)]
145    pub fn kernel(mut self, k: Kernel) -> Self {
146        self.kernel = k;
147        self
148    }
149    #[inline(always)]
150    pub fn apply(self, c: &Candles) -> Result<CgOutput, CgError> {
151        let p = CgParams {
152            period: self.period,
153        };
154        let i = CgInput::from_candles(c, "close", p);
155        cg_with_kernel(&i, self.kernel)
156    }
157    #[inline(always)]
158    pub fn apply_slice(self, d: &[f64]) -> Result<CgOutput, CgError> {
159        let p = CgParams {
160            period: self.period,
161        };
162        let i = CgInput::from_slice(d, p);
163        cg_with_kernel(&i, self.kernel)
164    }
165    #[inline(always)]
166    pub fn into_stream(self) -> Result<CgStream, CgError> {
167        let p = CgParams {
168            period: self.period,
169        };
170        CgStream::try_new(p)
171    }
172}
173
174#[derive(Debug, Error)]
175pub enum CgError {
176    #[error("CG: Empty data provided for CG.")]
177    EmptyData,
178    #[error("CG: Invalid period: period = {period}, data length = {data_len}")]
179    InvalidPeriod { period: usize, data_len: usize },
180    #[error("CG: All values are NaN.")]
181    AllValuesNaN,
182    #[error("CG: Not enough valid data: needed = {needed}, valid = {valid}")]
183    NotEnoughValidData { needed: usize, valid: usize },
184    #[error("CG: output length mismatch: expected={expected}, got={got}")]
185    OutputLengthMismatch { expected: usize, got: usize },
186    #[error("CG: invalid range expansion: start={start}, end={end}, step={step}")]
187    InvalidRange {
188        start: usize,
189        end: usize,
190        step: usize,
191    },
192    #[error("CG: invalid kernel for batch: {0:?}")]
193    InvalidKernelForBatch(crate::utilities::enums::Kernel),
194}
195
196#[inline]
197pub fn cg(input: &CgInput) -> Result<CgOutput, CgError> {
198    cg_with_kernel(input, Kernel::Auto)
199}
200
201pub fn cg_with_kernel(input: &CgInput, kernel: Kernel) -> Result<CgOutput, CgError> {
202    let data: &[f64] = match &input.data {
203        CgData::Candles { candles, source } => source_type(candles, source),
204        CgData::Slice(sl) => sl,
205    };
206
207    if data.is_empty() {
208        return Err(CgError::EmptyData);
209    }
210    let first = data
211        .iter()
212        .position(|x| !x.is_nan())
213        .ok_or(CgError::AllValuesNaN)?;
214    let len = data.len();
215    let period = input.get_period();
216
217    if period == 0 || period > len {
218        return Err(CgError::InvalidPeriod {
219            period,
220            data_len: len,
221        });
222    }
223
224    if (len - first) < (period + 1) {
225        return Err(CgError::NotEnoughValidData {
226            needed: period + 1,
227            valid: len - first,
228        });
229    }
230
231    let mut out = alloc_with_nan_prefix(len, first + period);
232
233    let chosen = match kernel {
234        Kernel::Auto => Kernel::Scalar,
235        other => other,
236    };
237
238    unsafe {
239        match chosen {
240            Kernel::Scalar | Kernel::ScalarBatch => cg_scalar(data, period, first, &mut out),
241            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
242            Kernel::Avx2 | Kernel::Avx2Batch => cg_avx2(data, period, first, &mut out),
243            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
244            Kernel::Avx512 | Kernel::Avx512Batch => cg_avx512(data, period, first, &mut out),
245            _ => unreachable!(),
246        }
247    }
248
249    Ok(CgOutput { values: out })
250}
251
252const CG_WEIGHTS: [f64; 64] = [
253    1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
254    18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0,
255    34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0,
256    50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0,
257];
258
259#[inline(always)]
260pub fn cg_scalar(data: &[f64], period: usize, first: usize, out: &mut [f64]) {
261    let start = first + period;
262    let len = data.len();
263    if start >= len {
264        return;
265    }
266
267    let n_items = period - 1;
268
269    if period <= 65 {
270        #[inline(always)]
271        unsafe fn dot_sum_precomputed(base_ptr: *const f64, n_items: usize) -> (f64, f64) {
272            let mut num = 0.0;
273            let mut den = 0.0;
274            let mut k = 0usize;
275            let blocks = n_items & !7usize;
276
277            while k < blocks {
278                let p0 = *base_ptr.sub(k);
279                let w0 = *CG_WEIGHTS.get_unchecked(k);
280                num += w0 * p0;
281                den += p0;
282
283                let p1 = *base_ptr.sub(k + 1);
284                let w1 = *CG_WEIGHTS.get_unchecked(k + 1);
285                num += w1 * p1;
286                den += p1;
287
288                let p2 = *base_ptr.sub(k + 2);
289                let w2 = *CG_WEIGHTS.get_unchecked(k + 2);
290                num += w2 * p2;
291                den += p2;
292
293                let p3 = *base_ptr.sub(k + 3);
294                let w3 = *CG_WEIGHTS.get_unchecked(k + 3);
295                num += w3 * p3;
296                den += p3;
297
298                let p4 = *base_ptr.sub(k + 4);
299                let w4 = *CG_WEIGHTS.get_unchecked(k + 4);
300                num += w4 * p4;
301                den += p4;
302
303                let p5 = *base_ptr.sub(k + 5);
304                let w5 = *CG_WEIGHTS.get_unchecked(k + 5);
305                num += w5 * p5;
306                den += p5;
307
308                let p6 = *base_ptr.sub(k + 6);
309                let w6 = *CG_WEIGHTS.get_unchecked(k + 6);
310                num += w6 * p6;
311                den += p6;
312
313                let p7 = *base_ptr.sub(k + 7);
314                let w7 = *CG_WEIGHTS.get_unchecked(k + 7);
315                num += w7 * p7;
316                den += p7;
317
318                k += 8;
319            }
320
321            while k < n_items {
322                let p = *base_ptr.sub(k);
323                let w = *CG_WEIGHTS.get_unchecked(k);
324                num += w * p;
325                den += p;
326                k += 1;
327            }
328            (num, den)
329        }
330
331        for i in start..len {
332            let base_ptr = unsafe { data.as_ptr().add(i) };
333            let (num, den) = unsafe { dot_sum_precomputed(base_ptr, n_items) };
334            out[i] = if den.abs() > f64::EPSILON {
335                -num / den
336            } else {
337                0.0
338            };
339        }
340        return;
341    }
342
343    for i in start..len {
344        unsafe {
345            let base_ptr = data.as_ptr().add(i);
346            let mut num = 0.0;
347            let mut den = 0.0;
348
349            let mut k = 0usize;
350            let blocks = n_items & !3usize;
351            let mut w = 1.0f64;
352
353            while k < blocks {
354                let p0 = *base_ptr.sub(k);
355                num += w * p0;
356                den += p0;
357                w += 1.0;
358
359                let p1 = *base_ptr.sub(k + 1);
360                num += w * p1;
361                den += p1;
362                w += 1.0;
363
364                let p2 = *base_ptr.sub(k + 2);
365                num += w * p2;
366                den += p2;
367                w += 1.0;
368
369                let p3 = *base_ptr.sub(k + 3);
370                num += w * p3;
371                den += p3;
372                w += 1.0;
373
374                k += 4;
375            }
376
377            while k < n_items {
378                let p = *base_ptr.sub(k);
379                num += w * p;
380                den += p;
381                w += 1.0;
382                k += 1;
383            }
384
385            out[i] = if den.abs() > f64::EPSILON {
386                -num / den
387            } else {
388                0.0
389            };
390        }
391    }
392}
393
394#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
395#[inline]
396pub fn cg_avx512(data: &[f64], period: usize, first: usize, out: &mut [f64]) {
397    if period <= 32 {
398        unsafe { cg_avx512_short(data, period, first, out) }
399    } else {
400        unsafe { cg_avx512_long(data, period, first, out) }
401    }
402}
403
404#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
405#[inline]
406#[target_feature(enable = "fma")]
407pub unsafe fn cg_avx2(data: &[f64], period: usize, first: usize, out: &mut [f64]) {
408    let start = first + period;
409    let len = data.len();
410    if start >= len {
411        return;
412    }
413
414    let n_items = period - 1;
415    const VL: usize = 4;
416
417    #[inline(always)]
418    unsafe fn hsum_m256d(x: __m256d) -> f64 {
419        let hi = _mm256_extractf128_pd(x, 1);
420        let lo = _mm256_castpd256_pd128(x);
421        let sum2 = _mm_add_pd(lo, hi);
422        let hi64 = _mm_unpackhi_pd(sum2, sum2);
423        let sum = _mm_add_sd(sum2, hi64);
424        _mm_cvtsd_f64(sum)
425    }
426
427    for i in start..len {
428        let base_ptr = data.as_ptr().add(i);
429        let mut vnum = _mm256_setzero_pd();
430        let mut vden = _mm256_setzero_pd();
431        let blocks = n_items & !(VL - 1);
432        let mut k = 0usize;
433
434        let step_r = _mm256_setr_pd(3.0, 2.0, 1.0, 0.0);
435        while k < blocks {
436            let p = _mm256_loadu_pd(base_ptr.sub(k + (VL - 1)));
437            let basew = _mm256_set1_pd(k as f64 + 1.0);
438            let w = _mm256_add_pd(basew, step_r);
439            let prod = _mm256_fmadd_pd(p, w, vnum);
440            vnum = prod;
441            vden = _mm256_add_pd(vden, p);
442            k += VL;
443        }
444
445        let mut num = hsum_m256d(vnum);
446        let mut den = hsum_m256d(vden);
447
448        let mut w = 1.0 + k as f64;
449        while k < n_items {
450            let p = *base_ptr.sub(k);
451            num += w * p;
452            den += p;
453            w += 1.0;
454            k += 1;
455        }
456
457        out[i] = if den.abs() > f64::EPSILON {
458            -num / den
459        } else {
460            0.0
461        };
462    }
463}
464
465#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
466#[inline]
467#[target_feature(enable = "fma")]
468pub unsafe fn cg_avx512_short(data: &[f64], period: usize, first: usize, out: &mut [f64]) {
469    const VL: usize = 8;
470    let start = first + period;
471    let len = data.len();
472    if start >= len {
473        return;
474    }
475
476    let n_items = period - 1;
477
478    #[inline(always)]
479    unsafe fn hsum_m512d(x: __m512d) -> f64 {
480        let lo = _mm512_castpd512_pd256(x);
481        let hi = _mm512_extractf64x4_pd::<1>(x);
482        let sum256 = _mm256_add_pd(lo, hi);
483        let hi128 = _mm256_extractf128_pd(sum256, 1);
484        let lo128 = _mm256_castpd256_pd128(sum256);
485        let sum2 = _mm_add_pd(lo128, hi128);
486        let hi64 = _mm_unpackhi_pd(sum2, sum2);
487        let sum = _mm_add_sd(sum2, hi64);
488        _mm_cvtsd_f64(sum)
489    }
490
491    for i in start..len {
492        let base_ptr = data.as_ptr().add(i);
493        let mut vnum = _mm512_setzero_pd();
494        let mut vden = _mm512_setzero_pd();
495        let blocks = n_items & !(VL - 1);
496        let mut k = 0usize;
497
498        let step_r = _mm512_setr_pd(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0);
499        while k < blocks {
500            let p = _mm512_loadu_pd(base_ptr.sub(k + (VL - 1)));
501            let basew = _mm512_set1_pd(k as f64 + 1.0);
502            let w = _mm512_add_pd(basew, step_r);
503            let prod = _mm512_fmadd_pd(p, w, vnum);
504            vnum = prod;
505            vden = _mm512_add_pd(vden, p);
506            k += VL;
507        }
508
509        let mut num = hsum_m512d(vnum);
510        let mut den = hsum_m512d(vden);
511
512        let mut w = 1.0 + k as f64;
513        while k < n_items {
514            let p = *base_ptr.sub(k);
515            num += w * p;
516            den += p;
517            w += 1.0;
518            k += 1;
519        }
520
521        out[i] = if den.abs() > f64::EPSILON {
522            -num / den
523        } else {
524            0.0
525        };
526    }
527}
528
529#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
530#[inline]
531pub unsafe fn cg_avx512_long(data: &[f64], period: usize, first: usize, out: &mut [f64]) {
532    cg_avx512_short(data, period, first, out)
533}
534
535#[inline]
536pub fn cg_into_slice(dst: &mut [f64], input: &CgInput, kern: Kernel) -> Result<(), CgError> {
537    let data: &[f64] = match &input.data {
538        CgData::Candles { candles, source } => source_type(candles, source),
539        CgData::Slice(sl) => sl,
540    };
541
542    if data.is_empty() {
543        return Err(CgError::EmptyData);
544    }
545    let first = data
546        .iter()
547        .position(|x| !x.is_nan())
548        .ok_or(CgError::AllValuesNaN)?;
549    let len = data.len();
550    let period = input.get_period();
551
552    if period == 0 || period > len {
553        return Err(CgError::InvalidPeriod {
554            period,
555            data_len: len,
556        });
557    }
558
559    if (len - first) < (period + 1) {
560        return Err(CgError::NotEnoughValidData {
561            needed: period + 1,
562            valid: len - first,
563        });
564    }
565
566    if dst.len() != data.len() {
567        return Err(CgError::OutputLengthMismatch {
568            expected: data.len(),
569            got: dst.len(),
570        });
571    }
572
573    let chosen = match kern {
574        Kernel::Auto => Kernel::Scalar,
575        other => other,
576    };
577
578    unsafe {
579        match chosen {
580            Kernel::Scalar | Kernel::ScalarBatch => cg_scalar(data, period, first, dst),
581            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
582            Kernel::Avx2 | Kernel::Avx2Batch => cg_avx2(data, period, first, dst),
583            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
584            Kernel::Avx512 | Kernel::Avx512Batch => cg_avx512(data, period, first, dst),
585            _ => unreachable!(),
586        }
587    }
588
589    for v in &mut dst[..first + period] {
590        *v = f64::NAN;
591    }
592    Ok(())
593}
594
595#[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
596#[inline]
597pub fn cg_into(input: &CgInput, out: &mut [f64]) -> Result<(), CgError> {
598    cg_into_slice(out, input, Kernel::Auto)
599}
600
601#[derive(Debug, Clone)]
602pub struct CgStream {
603    period: usize,
604    buffer: Vec<f64>,
605    head: usize,
606    filled: bool,
607    weighted_sum: f64,
608    price_sum: f64,
609}
610
611impl CgStream {
612    pub fn try_new(params: CgParams) -> Result<Self, CgError> {
613        let period = params.period.unwrap_or(10);
614        if period == 0 {
615            return Err(CgError::InvalidPeriod {
616                period,
617                data_len: 0,
618            });
619        }
620        Ok(Self {
621            period,
622            buffer: vec![f64::NAN; period],
623            head: 0,
624            filled: false,
625            weighted_sum: 0.0,
626            price_sum: 0.0,
627        })
628    }
629
630    #[inline(always)]
631    pub fn update(&mut self, value: f64) -> Option<f64> {
632        debug_assert!(self.period >= 2);
633
634        let pos = self.head;
635        self.buffer[pos] = value;
636        let next = if pos + 1 == self.period { 0 } else { pos + 1 };
637
638        if !self.filled {
639            self.head = next;
640
641            if self.head == 0 {
642                let mut num = 0.0;
643                let mut den = 0.0;
644                let mut idx = self.head;
645
646                for k in 0..(self.period - 1) {
647                    idx = if idx == 0 { self.period - 1 } else { idx - 1 };
648                    let p = self.buffer[idx];
649                    num += (1.0 + k as f64) * p;
650                    den += p;
651                }
652                self.weighted_sum = num;
653                self.price_sum = den;
654                self.filled = true;
655            }
656            return None;
657        }
658
659        let last_old = self.buffer[next];
660
661        let den_old = self.price_sum;
662        let num_old = self.weighted_sum;
663
664        let den_new = den_old - last_old + value;
665
666        let num_new = num_old + den_old + value - (self.period as f64) * last_old;
667
668        self.price_sum = den_new;
669        self.weighted_sum = num_new;
670        self.head = next;
671
672        let out = if den_new.abs() > f64::EPSILON {
673            -num_new / den_new
674        } else {
675            0.0
676        };
677        Some(out)
678    }
679}
680
681#[cfg(all(feature = "python", feature = "cuda"))]
682use crate::cuda::moving_averages::DeviceArrayF32 as CudaDeviceArrayF32;
683#[cfg(all(feature = "python", feature = "cuda"))]
684use crate::utilities::dlpack_cuda::export_f32_cuda_dlpack_2d;
685#[cfg(all(feature = "python", feature = "cuda"))]
686use cust::{context::Context, memory::DeviceBuffer};
687#[cfg(all(feature = "python", feature = "cuda"))]
688use numpy::PyReadonlyArray1;
689#[cfg(all(feature = "python", feature = "cuda"))]
690use std::sync::Arc;
691
692#[cfg(all(feature = "python", feature = "cuda"))]
693#[pyclass(module = "ta_indicators.cuda", name = "CgDeviceArrayF32", unsendable)]
694pub struct CgDeviceArrayF32Py {
695    pub inner: CudaDeviceArrayF32,
696    _ctx: Arc<Context>,
697    device_id: u32,
698}
699
700#[cfg(all(feature = "python", feature = "cuda"))]
701#[pymethods]
702impl CgDeviceArrayF32Py {
703    #[getter]
704    fn __cuda_array_interface__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
705        let d = PyDict::new(py);
706        let itemsize = std::mem::size_of::<f32>();
707        d.set_item("shape", (self.inner.rows, self.inner.cols))?;
708        d.set_item("typestr", "<f4")?;
709        d.set_item("strides", (self.inner.cols * itemsize, itemsize))?;
710        d.set_item("data", (self.inner.device_ptr() as usize, false))?;
711
712        d.set_item("version", 3)?;
713        Ok(d)
714    }
715
716    fn __dlpack_device__(&self) -> (i32, i32) {
717        (2, self.device_id as i32)
718    }
719
720    #[pyo3(signature = (stream=None, max_version=None, dl_device=None, copy=None))]
721    fn __dlpack__<'py>(
722        &mut self,
723        py: Python<'py>,
724        stream: Option<pyo3::PyObject>,
725        max_version: Option<pyo3::PyObject>,
726        dl_device: Option<pyo3::PyObject>,
727        copy: Option<pyo3::PyObject>,
728    ) -> PyResult<PyObject> {
729        let (kdl, alloc_dev) = self.__dlpack_device__();
730        if let Some(dev_obj) = dl_device.as_ref() {
731            if let Ok((dev_ty, dev_id)) = dev_obj.extract::<(i32, i32)>(py) {
732                if dev_ty != kdl || dev_id != alloc_dev {
733                    let wants_copy = copy
734                        .as_ref()
735                        .and_then(|c| c.extract::<bool>(py).ok())
736                        .unwrap_or(false);
737                    if wants_copy {
738                        return Err(PyValueError::new_err(
739                            "device copy not implemented for __dlpack__",
740                        ));
741                    } else {
742                        return Err(PyValueError::new_err("dl_device mismatch for __dlpack__"));
743                    }
744                }
745            }
746        }
747        let _ = stream;
748
749        let dummy =
750            DeviceBuffer::from_slice(&[]).map_err(|e| PyValueError::new_err(e.to_string()))?;
751        let inner = std::mem::replace(
752            &mut self.inner,
753            CudaDeviceArrayF32 {
754                buf: dummy,
755                rows: 0,
756                cols: 0,
757            },
758        );
759
760        let rows = inner.rows;
761        let cols = inner.cols;
762        let buf = inner.buf;
763
764        let max_version_bound = max_version.map(|obj| obj.into_bound(py));
765
766        export_f32_cuda_dlpack_2d(py, buf, rows, cols, alloc_dev, max_version_bound)
767    }
768}
769#[cfg(all(feature = "python", feature = "cuda"))]
770#[pyfunction(name = "cg_cuda_batch_dev")]
771#[pyo3(signature = (data, period_range, device_id=0))]
772pub fn cg_cuda_batch_dev_py(
773    py: Python<'_>,
774    data: PyReadonlyArray1<'_, f32>,
775    period_range: (usize, usize, usize),
776    device_id: usize,
777) -> PyResult<CgDeviceArrayF32Py> {
778    use crate::cuda::cuda_available;
779    if !cuda_available() {
780        return Err(PyValueError::new_err("CUDA not available"));
781    }
782    let slice = data.as_slice()?;
783    let sweep = CgBatchRange {
784        period: period_range,
785    };
786    let (inner, ctx, dev_id) = py.allow_threads(|| {
787        let cuda = crate::cuda::oscillators::cg_wrapper::CudaCg::new(device_id)
788            .map_err(|e| PyValueError::new_err(e.to_string()))?;
789        let dev = cuda
790            .cg_batch_dev(slice, &sweep)
791            .map_err(|e| PyValueError::new_err(e.to_string()))?;
792        cuda.synchronize()
793            .map_err(|e| PyValueError::new_err(e.to_string()))?;
794        Ok::<_, PyErr>((dev, cuda.context_arc_clone(), cuda.device_id()))
795    })?;
796    Ok(CgDeviceArrayF32Py {
797        inner,
798        _ctx: ctx,
799        device_id: dev_id,
800    })
801}
802
803#[cfg(all(feature = "python", feature = "cuda"))]
804#[pyfunction(name = "cg_cuda_many_series_one_param_dev")]
805#[pyo3(signature = (time_major, cols, rows, period, device_id=0))]
806pub fn cg_cuda_many_series_one_param_dev_py(
807    py: Python<'_>,
808    time_major: PyReadonlyArray1<'_, f32>,
809    cols: usize,
810    rows: usize,
811    period: usize,
812    device_id: usize,
813) -> PyResult<CgDeviceArrayF32Py> {
814    use crate::cuda::cuda_available;
815    if !cuda_available() {
816        return Err(PyValueError::new_err("CUDA not available"));
817    }
818    let tm = time_major.as_slice()?;
819    if tm.len() != cols * rows {
820        return Err(PyValueError::new_err(
821            "time-major slice length != cols*rows",
822        ));
823    }
824    let params = CgParams {
825        period: Some(period),
826    };
827    let (inner, ctx, dev_id) = py.allow_threads(|| {
828        let cuda = crate::cuda::oscillators::cg_wrapper::CudaCg::new(device_id)
829            .map_err(|e| PyValueError::new_err(e.to_string()))?;
830        let dev = cuda
831            .cg_many_series_one_param_time_major_dev(tm, cols, rows, &params)
832            .map_err(|e| PyValueError::new_err(e.to_string()))?;
833        cuda.synchronize()
834            .map_err(|e| PyValueError::new_err(e.to_string()))?;
835        Ok::<_, PyErr>((dev, cuda.context_arc_clone(), cuda.device_id()))
836    })?;
837    Ok(CgDeviceArrayF32Py {
838        inner,
839        _ctx: ctx,
840        device_id: dev_id,
841    })
842}
843
844#[derive(Clone, Debug)]
845pub struct CgBatchRange {
846    pub period: (usize, usize, usize),
847}
848
849impl Default for CgBatchRange {
850    fn default() -> Self {
851        Self {
852            period: (10, 259, 1),
853        }
854    }
855}
856
857#[derive(Clone, Debug, Default)]
858pub struct CgBatchBuilder {
859    range: CgBatchRange,
860    kernel: Kernel,
861}
862
863impl CgBatchBuilder {
864    pub fn new() -> Self {
865        Self::default()
866    }
867    pub fn kernel(mut self, k: Kernel) -> Self {
868        self.kernel = k;
869        self
870    }
871    #[inline]
872    pub fn period_range(mut self, start: usize, end: usize, step: usize) -> Self {
873        self.range.period = (start, end, step);
874        self
875    }
876    #[inline]
877    pub fn period_static(mut self, p: usize) -> Self {
878        self.range.period = (p, p, 0);
879        self
880    }
881    pub fn apply_slice(self, data: &[f64]) -> Result<CgBatchOutput, CgError> {
882        cg_batch_with_kernel(data, &self.range, self.kernel)
883    }
884    pub fn with_default_slice(data: &[f64], k: Kernel) -> Result<CgBatchOutput, CgError> {
885        CgBatchBuilder::new().kernel(k).apply_slice(data)
886    }
887    pub fn apply_candles(self, c: &Candles, src: &str) -> Result<CgBatchOutput, CgError> {
888        let slice = source_type(c, src);
889        self.apply_slice(slice)
890    }
891    pub fn with_default_candles(c: &Candles) -> Result<CgBatchOutput, CgError> {
892        CgBatchBuilder::new()
893            .kernel(Kernel::Auto)
894            .apply_candles(c, "close")
895    }
896}
897
898pub fn cg_batch_with_kernel(
899    data: &[f64],
900    sweep: &CgBatchRange,
901    k: Kernel,
902) -> Result<CgBatchOutput, CgError> {
903    let kernel = match k {
904        Kernel::Auto => detect_best_batch_kernel(),
905        other if other.is_batch() => other,
906        _ => return Err(CgError::InvalidKernelForBatch(k)),
907    };
908    let simd = match kernel {
909        Kernel::Avx512Batch => Kernel::Avx512,
910        Kernel::Avx2Batch => Kernel::Avx2,
911        Kernel::ScalarBatch => Kernel::Scalar,
912        _ => unreachable!(),
913    };
914    cg_batch_par_slice(data, sweep, simd)
915}
916
917#[derive(Clone, Debug)]
918pub struct CgBatchOutput {
919    pub values: Vec<f64>,
920    pub combos: Vec<CgParams>,
921    pub rows: usize,
922    pub cols: usize,
923}
924
925impl CgBatchOutput {
926    pub fn row_for_params(&self, p: &CgParams) -> Option<usize> {
927        self.combos
928            .iter()
929            .position(|c| c.period.unwrap_or(10) == p.period.unwrap_or(10))
930    }
931    pub fn values_for(&self, p: &CgParams) -> Option<&[f64]> {
932        self.row_for_params(p).map(|row| {
933            let start = row * self.cols;
934            &self.values[start..start + self.cols]
935        })
936    }
937}
938
939#[inline(always)]
940fn expand_grid(r: &CgBatchRange) -> Result<Vec<CgParams>, CgError> {
941    fn axis_usize((start, end, step): (usize, usize, usize)) -> Result<Vec<usize>, CgError> {
942        if step == 0 || start == end {
943            return Ok(vec![start]);
944        }
945        if step == 0 {
946            return Ok(vec![start]);
947        }
948        let mut vals = Vec::new();
949        if start < end {
950            let mut v = start;
951            while v <= end {
952                vals.push(v);
953                match v.checked_add(step) {
954                    Some(next) if next > v => v = next,
955                    _ => break,
956                }
957            }
958        } else {
959            let mut v = start;
960            while v >= end {
961                vals.push(v);
962
963                match v.checked_sub(step) {
964                    Some(next) if next < v => v = next,
965                    _ => break,
966                }
967                if v == 0 {
968                    break;
969                }
970            }
971        }
972        if vals.is_empty() {
973            return Err(CgError::InvalidRange { start, end, step });
974        }
975        Ok(vals)
976    }
977    let periods = axis_usize(r.period)?;
978    let mut out = Vec::with_capacity(periods.len());
979    for &p in &periods {
980        out.push(CgParams { period: Some(p) });
981    }
982    Ok(out)
983}
984
985#[inline(always)]
986pub fn cg_batch_slice(
987    data: &[f64],
988    sweep: &CgBatchRange,
989    kern: Kernel,
990) -> Result<CgBatchOutput, CgError> {
991    cg_batch_inner(data, sweep, kern, false)
992}
993
994#[inline(always)]
995pub fn cg_batch_par_slice(
996    data: &[f64],
997    sweep: &CgBatchRange,
998    kern: Kernel,
999) -> Result<CgBatchOutput, CgError> {
1000    cg_batch_inner(data, sweep, kern, true)
1001}
1002
1003#[inline(always)]
1004fn cg_batch_inner(
1005    data: &[f64],
1006    sweep: &CgBatchRange,
1007    kern: Kernel,
1008    parallel: bool,
1009) -> Result<CgBatchOutput, CgError> {
1010    let combos = expand_grid(sweep)?;
1011    if combos.is_empty() {
1012        return Err(CgError::InvalidRange {
1013            start: sweep.period.0,
1014            end: sweep.period.1,
1015            step: sweep.period.2,
1016        });
1017    }
1018    let first = data
1019        .iter()
1020        .position(|x| !x.is_nan())
1021        .ok_or(CgError::AllValuesNaN)?;
1022    let max_p = combos.iter().map(|c| c.period.unwrap()).max().unwrap();
1023    if data.len() - first < max_p + 1 {
1024        return Err(CgError::NotEnoughValidData {
1025            needed: max_p + 1,
1026            valid: data.len() - first,
1027        });
1028    }
1029    let rows = combos.len();
1030    let cols = data.len();
1031
1032    let _ = rows.checked_mul(cols).ok_or(CgError::InvalidRange {
1033        start: sweep.period.0,
1034        end: sweep.period.1,
1035        step: sweep.period.2,
1036    })?;
1037
1038    let mut buf_mu = make_uninit_matrix(rows, cols);
1039
1040    let warm_prefixes: Vec<usize> = combos.iter().map(|c| first + c.period.unwrap()).collect();
1041
1042    init_matrix_prefixes(&mut buf_mu, cols, &warm_prefixes);
1043
1044    let mut buf_guard = ManuallyDrop::new(buf_mu);
1045    let out: &mut [f64] = unsafe {
1046        core::slice::from_raw_parts_mut(buf_guard.as_mut_ptr() as *mut f64, buf_guard.len())
1047    };
1048
1049    cg_batch_inner_into(data, sweep, kern, parallel, out)?;
1050
1051    let values = unsafe {
1052        Vec::from_raw_parts(
1053            buf_guard.as_mut_ptr() as *mut f64,
1054            buf_guard.len(),
1055            buf_guard.capacity(),
1056        )
1057    };
1058
1059    Ok(CgBatchOutput {
1060        values,
1061        combos,
1062        rows,
1063        cols,
1064    })
1065}
1066
1067#[inline(always)]
1068fn cg_batch_inner_into(
1069    data: &[f64],
1070    sweep: &CgBatchRange,
1071    kern: Kernel,
1072    parallel: bool,
1073    out: &mut [f64],
1074) -> Result<Vec<CgParams>, CgError> {
1075    let combos = expand_grid(sweep)?;
1076    if combos.is_empty() {
1077        return Err(CgError::InvalidRange {
1078            start: sweep.period.0,
1079            end: sweep.period.1,
1080            step: sweep.period.2,
1081        });
1082    }
1083
1084    let first = data
1085        .iter()
1086        .position(|x| !x.is_nan())
1087        .ok_or(CgError::AllValuesNaN)?;
1088    let max_p = combos.iter().map(|c| c.period.unwrap()).max().unwrap();
1089    if data.len() - first < max_p + 1 {
1090        return Err(CgError::NotEnoughValidData {
1091            needed: max_p + 1,
1092            valid: data.len() - first,
1093        });
1094    }
1095
1096    let cols = data.len();
1097
1098    let expected = combos
1099        .len()
1100        .checked_mul(cols)
1101        .ok_or(CgError::InvalidRange {
1102            start: sweep.period.0,
1103            end: sweep.period.1,
1104            step: sweep.period.2,
1105        })?;
1106    if out.len() != expected {
1107        return Err(CgError::OutputLengthMismatch {
1108            expected,
1109            got: out.len(),
1110        });
1111    }
1112
1113    let out_uninit = unsafe {
1114        std::slice::from_raw_parts_mut(out.as_mut_ptr() as *mut MaybeUninit<f64>, out.len())
1115    };
1116
1117    let actual = match kern {
1118        Kernel::Auto => match detect_best_batch_kernel() {
1119            Kernel::Avx512Batch => Kernel::Avx512,
1120            Kernel::Avx2Batch => Kernel::Avx2,
1121            Kernel::ScalarBatch => Kernel::Scalar,
1122            _ => unreachable!(),
1123        },
1124        other => other,
1125    };
1126
1127    let do_row = |row: usize, dst_mu: &mut [MaybeUninit<f64>]| unsafe {
1128        let period = combos[row].period.unwrap();
1129
1130        let dst = core::slice::from_raw_parts_mut(dst_mu.as_mut_ptr() as *mut f64, dst_mu.len());
1131        match actual {
1132            Kernel::Scalar => cg_row_scalar(data, first, period, dst),
1133            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1134            Kernel::Avx2 => cg_row_avx2(data, first, period, dst),
1135            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1136            Kernel::Avx512 => cg_row_avx512(data, first, period, dst),
1137            _ => unreachable!(),
1138        }
1139    };
1140
1141    if parallel {
1142        #[cfg(not(target_arch = "wasm32"))]
1143        {
1144            out_uninit
1145                .par_chunks_mut(cols)
1146                .enumerate()
1147                .for_each(|(row, slice)| do_row(row, slice));
1148        }
1149        #[cfg(target_arch = "wasm32")]
1150        {
1151            for (row, slice) in out_uninit.chunks_mut(cols).enumerate() {
1152                do_row(row, slice);
1153            }
1154        }
1155    } else {
1156        for (row, slice) in out_uninit.chunks_mut(cols).enumerate() {
1157            do_row(row, slice);
1158        }
1159    }
1160
1161    Ok(combos)
1162}
1163
1164#[inline(always)]
1165pub unsafe fn cg_row_scalar(data: &[f64], first: usize, period: usize, out: &mut [f64]) {
1166    cg_scalar(data, period, first, out)
1167}
1168
1169#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1170#[inline(always)]
1171pub unsafe fn cg_row_avx2(data: &[f64], first: usize, period: usize, out: &mut [f64]) {
1172    cg_avx2(data, period, first, out)
1173}
1174
1175#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1176#[inline(always)]
1177pub unsafe fn cg_row_avx512(data: &[f64], first: usize, period: usize, out: &mut [f64]) {
1178    if period <= 32 {
1179        cg_row_avx512_short(data, first, period, out)
1180    } else {
1181        cg_row_avx512_long(data, first, period, out)
1182    }
1183}
1184
1185#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1186#[inline(always)]
1187pub unsafe fn cg_row_avx512_short(data: &[f64], first: usize, period: usize, out: &mut [f64]) {
1188    cg_avx512_short(data, period, first, out)
1189}
1190
1191#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1192#[inline(always)]
1193pub unsafe fn cg_row_avx512_long(data: &[f64], first: usize, period: usize, out: &mut [f64]) {
1194    cg_avx512_long(data, period, first, out)
1195}
1196
1197#[cfg(test)]
1198mod tests {
1199    use super::*;
1200    use crate::skip_if_unsupported;
1201    use crate::utilities::data_loader::read_candles_from_csv;
1202
1203    fn check_cg_partial_params(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1204        skip_if_unsupported!(kernel, test_name);
1205        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1206        let candles = read_candles_from_csv(file_path)?;
1207        let partial_params = CgParams { period: Some(12) };
1208        let input = CgInput::from_candles(&candles, "close", partial_params);
1209        let output = cg_with_kernel(&input, kernel)?;
1210        assert_eq!(output.values.len(), candles.close.len());
1211        Ok(())
1212    }
1213
1214    fn check_cg_accuracy(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1215        skip_if_unsupported!(kernel, test_name);
1216        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1217        let candles = read_candles_from_csv(file_path)?;
1218        let params = CgParams { period: Some(10) };
1219        let input = CgInput::from_candles(&candles, "close", params);
1220        let result = cg_with_kernel(&input, kernel)?;
1221        let expected_last_five = [
1222            -4.99905186931943,
1223            -4.998559827254377,
1224            -4.9970065675119555,
1225            -4.9928483984587295,
1226            -5.004210799262688,
1227        ];
1228        assert!(
1229            result.values.len() >= 5,
1230            "Not enough data for final 5-values check"
1231        );
1232        let start = result.values.len() - 5;
1233        for (i, &exp) in expected_last_five.iter().enumerate() {
1234            let got = result.values[start + i];
1235            assert!(
1236                (got - exp).abs() < 1e-4,
1237                "Mismatch in CG at idx {}: expected={}, got={}",
1238                start + i,
1239                exp,
1240                got
1241            );
1242        }
1243        Ok(())
1244    }
1245
1246    fn check_cg_default_candles(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1247        skip_if_unsupported!(kernel, test_name);
1248        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1249        let candles = read_candles_from_csv(file_path)?;
1250        let input = CgInput::with_default_candles(&candles);
1251        match input.data {
1252            CgData::Candles { source, .. } => assert_eq!(source, "close"),
1253            _ => panic!("Expected CgData::Candles"),
1254        }
1255        let output = cg_with_kernel(&input, kernel)?;
1256        assert_eq!(output.values.len(), candles.close.len());
1257        Ok(())
1258    }
1259
1260    fn check_cg_zero_period(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1261        skip_if_unsupported!(kernel, test_name);
1262        let data = [1.0, 2.0, 3.0];
1263        let params = CgParams { period: Some(0) };
1264        let input = CgInput::from_slice(&data, params);
1265        let result = cg_with_kernel(&input, kernel);
1266        assert!(result.is_err(), "Expected error for zero period");
1267        Ok(())
1268    }
1269
1270    fn check_cg_period_exceeds_length(
1271        test_name: &str,
1272        kernel: Kernel,
1273    ) -> Result<(), Box<dyn Error>> {
1274        skip_if_unsupported!(kernel, test_name);
1275        let data = [10.0, 20.0, 30.0];
1276        let params = CgParams { period: Some(10) };
1277        let input = CgInput::from_slice(&data, params);
1278        let result = cg_with_kernel(&input, kernel);
1279        assert!(result.is_err(), "Expected error for period > data.len()");
1280        Ok(())
1281    }
1282
1283    fn check_cg_very_small_dataset(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1284        skip_if_unsupported!(kernel, test_name);
1285        let data = [42.0];
1286        let params = CgParams { period: Some(10) };
1287        let input = CgInput::from_slice(&data, params);
1288        let result = cg_with_kernel(&input, kernel);
1289        assert!(
1290            result.is_err(),
1291            "Expected error for data smaller than period=10"
1292        );
1293        Ok(())
1294    }
1295
1296    fn check_cg_nan_handling(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1297        skip_if_unsupported!(kernel, test_name);
1298        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1299        let candles = read_candles_from_csv(file_path)?;
1300        let params = CgParams { period: Some(10) };
1301        let input = CgInput::from_candles(&candles, "close", params);
1302        let result = cg_with_kernel(&input, kernel)?;
1303        let check_idx = 240;
1304        if result.values.len() > check_idx {
1305            for i in check_idx..result.values.len() {
1306                if !result.values[i].is_nan() {
1307                    break;
1308                }
1309                if i == result.values.len() - 1 {
1310                    panic!("All CG values from index {} onward are NaN.", check_idx);
1311                }
1312            }
1313        }
1314        Ok(())
1315    }
1316
1317    fn check_cg_streaming(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1318        skip_if_unsupported!(kernel, test_name);
1319        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1320        let candles = read_candles_from_csv(file_path)?;
1321        let period = 10;
1322        let input = CgInput::from_candles(
1323            &candles,
1324            "close",
1325            CgParams {
1326                period: Some(period),
1327            },
1328        );
1329        let batch_output = cg_with_kernel(&input, kernel)?.values;
1330        let mut stream = CgStream::try_new(CgParams {
1331            period: Some(period),
1332        })?;
1333        let mut stream_values = Vec::with_capacity(candles.close.len());
1334        for &price in &candles.close {
1335            match stream.update(price) {
1336                Some(val) => stream_values.push(val),
1337                None => stream_values.push(f64::NAN),
1338            }
1339        }
1340        assert_eq!(batch_output.len(), stream_values.len());
1341        for (i, (&b, &s)) in batch_output.iter().zip(stream_values.iter()).enumerate() {
1342            if b.is_nan() && s.is_nan() {
1343                continue;
1344            }
1345            let diff = (b - s).abs();
1346            let tol = match kernel {
1347                Kernel::Avx2 | Kernel::Avx512 => 1e-6,
1348                _ => 1e-9,
1349            };
1350            assert!(
1351                diff <= tol,
1352                "[{}] CG streaming mismatch at idx {}: batch={}, stream={}, diff={} (tol={})",
1353                test_name,
1354                i,
1355                b,
1356                s,
1357                diff,
1358                tol
1359            );
1360        }
1361        Ok(())
1362    }
1363
1364    #[cfg(debug_assertions)]
1365    fn check_cg_no_poison(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1366        skip_if_unsupported!(kernel, test_name);
1367
1368        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1369        let candles = read_candles_from_csv(file_path)?;
1370
1371        let test_periods = vec![5, 10, 20, 50];
1372
1373        for period in test_periods {
1374            let params = CgParams {
1375                period: Some(period),
1376            };
1377            let input = CgInput::from_candles(&candles, "close", params);
1378            let output = cg_with_kernel(&input, kernel)?;
1379
1380            for (i, &val) in output.values.iter().enumerate() {
1381                if val.is_nan() {
1382                    continue;
1383                }
1384
1385                let bits = val.to_bits();
1386
1387                if bits == 0x11111111_11111111 {
1388                    panic!(
1389						"[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at index {} with period {}",
1390						test_name, val, bits, i, period
1391					);
1392                }
1393
1394                if bits == 0x22222222_22222222 {
1395                    panic!(
1396						"[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at index {} with period {}",
1397						test_name, val, bits, i, period
1398					);
1399                }
1400
1401                if bits == 0x33333333_33333333 {
1402                    panic!(
1403						"[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at index {} with period {}",
1404						test_name, val, bits, i, period
1405					);
1406                }
1407            }
1408        }
1409
1410        Ok(())
1411    }
1412
1413    #[cfg(not(debug_assertions))]
1414    fn check_cg_no_poison(_test_name: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
1415        Ok(())
1416    }
1417
1418    macro_rules! generate_all_cg_tests {
1419        ($($test_fn:ident),*) => {
1420            paste::paste! {
1421                $(
1422                    #[test]
1423                    fn [<$test_fn _scalar_f64>]() {
1424                        let _ = $test_fn(stringify!([<$test_fn _scalar_f64>]), Kernel::Scalar);
1425                    }
1426                )*
1427                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1428                $(
1429                    #[test]
1430                    fn [<$test_fn _avx2_f64>]() {
1431                        let _ = $test_fn(stringify!([<$test_fn _avx2_f64>]), Kernel::Avx2);
1432                    }
1433                    #[test]
1434                    fn [<$test_fn _avx512_f64>]() {
1435                        let _ = $test_fn(stringify!([<$test_fn _avx512_f64>]), Kernel::Avx512);
1436                    }
1437                )*
1438            }
1439        }
1440    }
1441
1442    generate_all_cg_tests!(
1443        check_cg_partial_params,
1444        check_cg_accuracy,
1445        check_cg_default_candles,
1446        check_cg_zero_period,
1447        check_cg_period_exceeds_length,
1448        check_cg_very_small_dataset,
1449        check_cg_nan_handling,
1450        check_cg_streaming,
1451        check_cg_no_poison
1452    );
1453
1454    #[cfg(feature = "proptest")]
1455    generate_all_cg_tests!(check_cg_property);
1456
1457    fn check_batch_default_row(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1458        skip_if_unsupported!(kernel, test);
1459
1460        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1461        let c = read_candles_from_csv(file)?;
1462
1463        let output = CgBatchBuilder::new()
1464            .kernel(kernel)
1465            .apply_candles(&c, "close")?;
1466
1467        let def = CgParams::default();
1468        let row = output.values_for(&def).expect("default row missing");
1469        assert_eq!(row.len(), c.close.len());
1470        Ok(())
1471    }
1472
1473    #[cfg(debug_assertions)]
1474    fn check_batch_no_poison(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1475        skip_if_unsupported!(kernel, test);
1476
1477        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1478        let c = read_candles_from_csv(file)?;
1479
1480        let output = CgBatchBuilder::new()
1481            .kernel(kernel)
1482            .period_range(5, 50, 5)
1483            .apply_candles(&c, "close")?;
1484
1485        for (idx, &val) in output.values.iter().enumerate() {
1486            if val.is_nan() {
1487                continue;
1488            }
1489
1490            let bits = val.to_bits();
1491            let row = idx / output.cols;
1492            let col = idx % output.cols;
1493            let period = output.combos[row].period.unwrap_or(10);
1494
1495            if bits == 0x11111111_11111111 {
1496                panic!(
1497                    "[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at row {} col {} (flat index {}) with period {}",
1498                    test, val, bits, row, col, idx, period
1499                );
1500            }
1501
1502            if bits == 0x22222222_22222222 {
1503                panic!(
1504                    "[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at row {} col {} (flat index {}) with period {}",
1505                    test, val, bits, row, col, idx, period
1506                );
1507            }
1508
1509            if bits == 0x33333333_33333333 {
1510                panic!(
1511                    "[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at row {} col {} (flat index {}) with period {}",
1512                    test, val, bits, row, col, idx, period
1513                );
1514            }
1515        }
1516
1517        Ok(())
1518    }
1519
1520    #[cfg(not(debug_assertions))]
1521    fn check_batch_no_poison(_test: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
1522        Ok(())
1523    }
1524
1525    macro_rules! gen_batch_tests {
1526        ($fn_name:ident) => {
1527            paste::paste! {
1528                #[test] fn [<$fn_name _scalar>]()      {
1529                    let _ = $fn_name(stringify!([<$fn_name _scalar>]), Kernel::ScalarBatch);
1530                }
1531                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1532                #[test] fn [<$fn_name _avx2>]()        {
1533                    let _ = $fn_name(stringify!([<$fn_name _avx2>]), Kernel::Avx2Batch);
1534                }
1535                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1536                #[test] fn [<$fn_name _avx512>]()      {
1537                    let _ = $fn_name(stringify!([<$fn_name _avx512>]), Kernel::Avx512Batch);
1538                }
1539                #[test] fn [<$fn_name _auto_detect>]() {
1540                    let _ = $fn_name(stringify!([<$fn_name _auto_detect>]), Kernel::Auto);
1541                }
1542            }
1543        };
1544    }
1545    gen_batch_tests!(check_batch_default_row);
1546    gen_batch_tests!(check_batch_no_poison);
1547
1548    #[cfg(feature = "proptest")]
1549    fn check_cg_property(
1550        test_name: &str,
1551        kernel: Kernel,
1552    ) -> Result<(), Box<dyn std::error::Error>> {
1553        use proptest::prelude::*;
1554
1555        skip_if_unsupported!(kernel, test_name);
1556
1557        let random_data_strat = (2usize..=30).prop_flat_map(|period| {
1558            (
1559                prop::collection::vec(
1560                    (-1e6f64..1e6f64).prop_filter("finite", |x| x.is_finite()),
1561                    period + 10..400,
1562                ),
1563                Just(period),
1564            )
1565        });
1566
1567        let constant_data_strat = (2usize..=20).prop_flat_map(|period| {
1568            (
1569                (1f64..1000f64).prop_flat_map(move |value| Just(vec![value; period + 50])),
1570                Just(period),
1571            )
1572        });
1573
1574        let trending_data_strat = (2usize..=25).prop_flat_map(|period| {
1575            (
1576                (-100f64..100f64).prop_flat_map(move |start| {
1577                    (-10f64..10f64).prop_map(move |slope| {
1578                        (0..period + 100)
1579                            .map(|i| start + slope * i as f64)
1580                            .collect::<Vec<_>>()
1581                    })
1582                }),
1583                Just(period),
1584            )
1585        });
1586
1587        let edge_case_strat = (2usize..=5).prop_flat_map(|period| {
1588            (
1589                prop::collection::vec(
1590                    (-1e3f64..1e3f64).prop_filter("finite", |x| x.is_finite()),
1591                    period + 5..50,
1592                ),
1593                Just(period),
1594            )
1595        });
1596
1597        let combined_strat = prop_oneof![
1598            random_data_strat.clone(),
1599            constant_data_strat,
1600            trending_data_strat,
1601            edge_case_strat,
1602        ];
1603
1604        proptest::test_runner::TestRunner::default()
1605            .run(&combined_strat, |(data, period)| {
1606                let params = CgParams {
1607                    period: Some(period),
1608                };
1609                let input = CgInput::from_slice(&data, params);
1610
1611                let CgOutput { values: out } = cg_with_kernel(&input, kernel).unwrap();
1612
1613                let CgOutput { values: ref_out } = cg_with_kernel(&input, Kernel::Scalar).unwrap();
1614
1615                for i in 0..period {
1616                    prop_assert!(
1617                        out[i].is_nan(),
1618                        "Expected NaN during warmup at index {}, got {}",
1619                        i,
1620                        out[i]
1621                    );
1622                }
1623
1624                for i in period..data.len() {
1625                    let y = out[i];
1626                    let r = ref_out[i];
1627
1628                    if !y.is_nan() {
1629                        prop_assert!(
1630                            y.is_finite(),
1631                            "CG output at index {} is not finite: {}",
1632                            i,
1633                            y
1634                        );
1635                    }
1636
1637                    if i >= period
1638                        && data[i - period + 1..=i]
1639                            .windows(2)
1640                            .all(|w| (w[0] - w[1]).abs() < 1e-10)
1641                    {
1642                        let constant_val = data[i];
1643                        if constant_val.abs() > f64::EPSILON {
1644                            let weight_sum = ((period - 1) * period) as f64 / 2.0;
1645                            let expected_cg = -weight_sum / (period - 1) as f64;
1646                            prop_assert!(
1647                                (y - expected_cg).abs() < 1e-9,
1648                                "For constant data, CG at index {} should be {}, got {}",
1649                                i,
1650                                expected_cg,
1651                                y
1652                            );
1653                        }
1654                    }
1655
1656                    if period == 2 && i >= 2 {
1657                        let p0 = data[i];
1658                        if p0.abs() > f64::EPSILON {
1659                            prop_assert!(
1660                                (y - (-1.0)).abs() < 1e-9,
1661                                "Period=2 should always yield -1.0, got {} at index {}",
1662                                y,
1663                                i
1664                            );
1665                        } else {
1666                            prop_assert!(
1667                                y.abs() < 1e-9,
1668                                "Period=2 with zero price should yield 0, got {} at index {}",
1669                                y,
1670                                i
1671                            );
1672                        }
1673                    }
1674
1675                    if period > 2 && i >= period + 2 {
1676                        let window = &data[i - period + 1..=i];
1677                        let all_nonzero = window.iter().all(|&x| x.abs() > f64::EPSILON);
1678
1679                        if all_nonzero && !y.is_nan() {
1680                            prop_assert!(
1681								y.abs() > f64::EPSILON,
1682								"CG should be non-zero when all input values are non-zero at index {}, got {}", i, y
1683							);
1684                        }
1685                    }
1686
1687                    let y_bits = y.to_bits();
1688                    let r_bits = r.to_bits();
1689
1690                    if !y.is_finite() || !r.is_finite() {
1691                        prop_assert!(
1692                            y_bits == r_bits,
1693                            "NaN/infinity mismatch at index {}: {} vs {}",
1694                            i,
1695                            y,
1696                            r
1697                        );
1698                        continue;
1699                    }
1700
1701                    let ulp_diff: u64 = y_bits.abs_diff(r_bits);
1702                    let tol = match kernel {
1703                        Kernel::Avx2 | Kernel::Avx512 => 1e-5,
1704                        _ => 1e-9,
1705                    };
1706                    prop_assert!(
1707                        (y - r).abs() <= tol,
1708                        "Kernel mismatch at index {}: {} vs {} (ULP={}), tol={}",
1709                        i,
1710                        y,
1711                        r,
1712                        ulp_diff,
1713                        tol
1714                    );
1715                }
1716
1717                Ok(())
1718            })
1719            .unwrap();
1720
1721        let math_test_strat = (2usize..=10, prop::collection::vec(1f64..100f64, 20..50));
1722
1723        proptest::test_runner::TestRunner::default()
1724            .run(&math_test_strat, |(period, data)| {
1725                let params = CgParams {
1726                    period: Some(period),
1727                };
1728                let input = CgInput::from_slice(&data, params);
1729                let CgOutput { values: out } = cg_with_kernel(&input, kernel).unwrap();
1730
1731                for i in period..data.len() {
1732                    if out[i].is_nan() {
1733                        continue;
1734                    }
1735
1736                    let mut num = 0.0;
1737                    let mut denom = 0.0;
1738                    for count in 0..(period - 1) {
1739                        let price = data[i - count];
1740                        let weight = (1 + count) as f64;
1741                        num += weight * price;
1742                        denom += price;
1743                    }
1744
1745                    if denom.abs() > f64::EPSILON {
1746                        let expected = -num / denom;
1747                        prop_assert!(
1748                            (out[i] - expected).abs() < 1e-9,
1749                            "Manual calculation mismatch at index {}: expected {}, got {}",
1750                            i,
1751                            expected,
1752                            out[i]
1753                        );
1754                    }
1755                }
1756
1757                Ok(())
1758            })
1759            .unwrap();
1760
1761        let volatility_test_strat = (3usize..=15).prop_flat_map(|period| {
1762            (
1763                (10f64..100f64).prop_flat_map(move |base| {
1764                    (1f64..50f64).prop_map(move |amplitude| {
1765                        let mut data = Vec::with_capacity(period + 50);
1766                        for i in 0..(period + 50) {
1767                            if i % 2 == 0 {
1768                                data.push(base + amplitude);
1769                            } else {
1770                                data.push(base - amplitude);
1771                            }
1772                        }
1773                        data
1774                    })
1775                }),
1776                Just(period),
1777            )
1778        });
1779
1780        proptest::test_runner::TestRunner::default()
1781            .run(&volatility_test_strat, |(data, period)| {
1782                let params = CgParams {
1783                    period: Some(period),
1784                };
1785                let input = CgInput::from_slice(&data, params);
1786                let CgOutput { values: out } = cg_with_kernel(&input, kernel).unwrap();
1787
1788                for i in (period + 2)..data.len() {
1789                    if out[i].is_nan() {
1790                        continue;
1791                    }
1792
1793                    if period % 2 == 0 {
1794                        if i >= period + 4 {
1795                            let variation = (out[i] - out[i - 1]).abs();
1796                            prop_assert!(
1797								variation < 2.0,
1798								"CG variation too large for alternating data with even period at index {}: {}", i, variation
1799							);
1800                        }
1801                    }
1802
1803                    let base = (data[i] + data[i - 1]) / 2.0;
1804                    let relative_cg = (out[i] / base).abs();
1805                    prop_assert!(
1806                        relative_cg < 10.0,
1807                        "CG magnitude too large relative to data at index {}: CG={}, base={}",
1808                        i,
1809                        out[i],
1810                        base
1811                    );
1812                }
1813
1814                Ok(())
1815            })
1816            .unwrap();
1817
1818        Ok(())
1819    }
1820
1821    #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
1822    #[test]
1823    fn test_cg_into_matches_api() -> Result<(), Box<dyn Error>> {
1824        let mut data = vec![f64::NAN; 3];
1825        data.extend((0..256).map(|i| (i as f64).sin() * 0.5 + (i as f64) * 0.01));
1826
1827        let input = CgInput::from_slice(&data, CgParams::default());
1828
1829        let baseline = cg_with_kernel(&input, Kernel::Auto)?.values;
1830
1831        let mut out = vec![0.0; data.len()];
1832        cg_into(&input, &mut out)?;
1833
1834        assert_eq!(baseline.len(), out.len());
1835
1836        fn eq_or_both_nan(a: f64, b: f64) -> bool {
1837            (a.is_nan() && b.is_nan()) || (a == b) || ((a - b).abs() <= 1e-12)
1838        }
1839
1840        for i in 0..out.len() {
1841            assert!(
1842                eq_or_both_nan(baseline[i], out[i]),
1843                "mismatch at {}: baseline={} out={}",
1844                i,
1845                baseline[i],
1846                out[i]
1847            );
1848        }
1849
1850        Ok(())
1851    }
1852}
1853
1854#[cfg(feature = "python")]
1855#[pyfunction(name = "cg")]
1856#[pyo3(signature = (data, period=None, *, kernel=None))]
1857pub fn cg_py<'py>(
1858    py: Python<'py>,
1859    data: numpy::PyReadonlyArray1<'py, f64>,
1860    period: Option<usize>,
1861    kernel: Option<&str>,
1862) -> PyResult<Bound<'py, numpy::PyArray1<f64>>> {
1863    use numpy::{IntoPyArray, PyArrayMethods};
1864
1865    let slice_in = data.as_slice()?;
1866    let kern = validate_kernel(kernel, false)?;
1867
1868    let params = CgParams { period };
1869    let cg_in = CgInput::from_slice(slice_in, params);
1870
1871    let result_vec: Vec<f64> = py
1872        .allow_threads(|| cg_with_kernel(&cg_in, kern).map(|o| o.values))
1873        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1874
1875    Ok(result_vec.into_pyarray(py))
1876}
1877
1878#[cfg(feature = "python")]
1879#[pyclass(name = "CgStream")]
1880pub struct CgStreamPy {
1881    stream: CgStream,
1882}
1883
1884#[cfg(feature = "python")]
1885#[pymethods]
1886impl CgStreamPy {
1887    #[new]
1888    fn new(period: usize) -> PyResult<Self> {
1889        let params = CgParams {
1890            period: Some(period),
1891        };
1892        let stream = CgStream::try_new(params).map_err(|e| PyValueError::new_err(e.to_string()))?;
1893        Ok(CgStreamPy { stream })
1894    }
1895
1896    fn update(&mut self, value: f64) -> Option<f64> {
1897        self.stream.update(value)
1898    }
1899}
1900
1901#[cfg(feature = "python")]
1902#[pyfunction(name = "cg_batch")]
1903#[pyo3(signature = (data, period_range, kernel=None))]
1904pub fn cg_batch_py<'py>(
1905    py: Python<'py>,
1906    data: numpy::PyReadonlyArray1<'py, f64>,
1907    period_range: (usize, usize, usize),
1908    kernel: Option<&str>,
1909) -> PyResult<Bound<'py, pyo3::types::PyDict>> {
1910    use numpy::{IntoPyArray, PyArray1, PyArrayMethods};
1911    use pyo3::types::PyDict;
1912
1913    let slice_in = data.as_slice()?;
1914    let kern = validate_kernel(kernel, true)?;
1915    let sweep = CgBatchRange {
1916        period: period_range,
1917    };
1918
1919    let combos = expand_grid(&sweep).map_err(|e| PyValueError::new_err(e.to_string()))?;
1920    let rows = combos.len();
1921    let cols = slice_in.len();
1922    let out_arr = unsafe { PyArray1::<f64>::new(py, [rows * cols], false) };
1923    let slice_out = unsafe { out_arr.as_slice_mut()? };
1924
1925    let first = slice_in
1926        .iter()
1927        .position(|x| !x.is_nan())
1928        .ok_or_else(|| PyValueError::new_err("CG: All values are NaN."))?;
1929    for (r, p) in combos.iter().enumerate() {
1930        let warm = (first + p.period.unwrap()).min(cols);
1931        let row = &mut slice_out[r * cols..r * cols + warm];
1932        for v in row {
1933            *v = f64::NAN;
1934        }
1935    }
1936
1937    let combos = py
1938        .allow_threads(|| {
1939            let kernel = match kern {
1940                Kernel::Auto => detect_best_batch_kernel(),
1941                k => k,
1942            };
1943            let simd = match kernel {
1944                Kernel::Avx512Batch => Kernel::Avx512,
1945                Kernel::Avx2Batch => Kernel::Avx2,
1946                Kernel::ScalarBatch => Kernel::Scalar,
1947                _ => unreachable!(),
1948            };
1949            cg_batch_inner_into(slice_in, &sweep, simd, true, slice_out)
1950        })
1951        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1952
1953    let dict = PyDict::new(py);
1954    dict.set_item("values", out_arr.reshape((rows, cols))?)?;
1955    dict.set_item(
1956        "periods",
1957        combos
1958            .iter()
1959            .map(|p| p.period.unwrap() as u64)
1960            .collect::<Vec<_>>()
1961            .into_pyarray(py),
1962    )?;
1963
1964    Ok(dict)
1965}
1966
1967#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1968#[wasm_bindgen]
1969pub fn cg_js(data: &[f64], period: usize) -> Result<Vec<f64>, JsValue> {
1970    let params = CgParams {
1971        period: Some(period),
1972    };
1973    let input = CgInput::from_slice(data, params);
1974
1975    let mut output = Vec::with_capacity(data.len());
1976    unsafe {
1977        output.set_len(data.len());
1978    }
1979
1980    cg_into_slice(&mut output, &input, Kernel::Auto)
1981        .map_err(|e| JsValue::from_str(&e.to_string()))?;
1982
1983    Ok(output)
1984}
1985
1986#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1987#[derive(Serialize, Deserialize)]
1988pub struct CgBatchConfig {
1989    pub period_range: (usize, usize, usize),
1990}
1991
1992#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1993#[derive(Serialize, Deserialize)]
1994pub struct CgBatchJsOutput {
1995    pub values: Vec<f64>,
1996    pub combos: Vec<CgParams>,
1997    pub rows: usize,
1998    pub cols: usize,
1999}
2000
2001#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2002#[wasm_bindgen(js_name = cg_batch)]
2003pub fn cg_batch_unified_js(data: &[f64], config: JsValue) -> Result<JsValue, JsValue> {
2004    let config: CgBatchConfig = serde_wasm_bindgen::from_value(config)
2005        .map_err(|e| JsValue::from_str(&format!("Invalid config: {}", e)))?;
2006
2007    let sweep = CgBatchRange {
2008        period: config.period_range,
2009    };
2010
2011    let output = cg_batch_inner(data, &sweep, detect_best_kernel(), false)
2012        .map_err(|e| JsValue::from_str(&e.to_string()))?;
2013
2014    let js_output = CgBatchJsOutput {
2015        values: output.values,
2016        combos: output.combos,
2017        rows: output.rows,
2018        cols: output.cols,
2019    };
2020
2021    serde_wasm_bindgen::to_value(&js_output)
2022        .map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
2023}
2024
2025#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2026#[wasm_bindgen]
2027pub fn cg_alloc(len: usize) -> *mut f64 {
2028    let mut vec = Vec::<f64>::with_capacity(len);
2029    let ptr = vec.as_mut_ptr();
2030    std::mem::forget(vec);
2031    ptr
2032}
2033
2034#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2035#[wasm_bindgen]
2036pub fn cg_free(ptr: *mut f64, len: usize) {
2037    if !ptr.is_null() {
2038        unsafe {
2039            let _ = Vec::from_raw_parts(ptr, len, len);
2040        }
2041    }
2042}
2043
2044#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2045#[wasm_bindgen]
2046pub fn cg_into(
2047    in_ptr: *const f64,
2048    out_ptr: *mut f64,
2049    len: usize,
2050    period: usize,
2051) -> Result<(), JsValue> {
2052    if in_ptr.is_null() || out_ptr.is_null() {
2053        return Err(JsValue::from_str("Null pointer passed to cg_into"));
2054    }
2055
2056    unsafe {
2057        let data = std::slice::from_raw_parts(in_ptr, len);
2058
2059        if period == 0 || period > len {
2060            return Err(JsValue::from_str("Invalid period"));
2061        }
2062
2063        let params = CgParams {
2064            period: Some(period),
2065        };
2066        let input = CgInput::from_slice(data, params);
2067
2068        if in_ptr == out_ptr {
2069            let mut temp = Vec::with_capacity(len);
2070            unsafe {
2071                temp.set_len(len);
2072            }
2073            cg_into_slice(&mut temp, &input, Kernel::Auto)
2074                .map_err(|e| JsValue::from_str(&e.to_string()))?;
2075            let out = std::slice::from_raw_parts_mut(out_ptr, len);
2076            out.copy_from_slice(&temp);
2077        } else {
2078            let out = std::slice::from_raw_parts_mut(out_ptr, len);
2079            cg_into_slice(out, &input, Kernel::Auto)
2080                .map_err(|e| JsValue::from_str(&e.to_string()))?;
2081        }
2082
2083        Ok(())
2084    }
2085}
2086
2087#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2088#[wasm_bindgen]
2089pub fn cg_batch_into(
2090    in_ptr: *const f64,
2091    out_ptr: *mut f64,
2092    len: usize,
2093    period_start: usize,
2094    period_end: usize,
2095    period_step: usize,
2096) -> Result<usize, JsValue> {
2097    if in_ptr.is_null() || out_ptr.is_null() {
2098        return Err(JsValue::from_str("null pointer passed to cg_batch_into"));
2099    }
2100    unsafe {
2101        let data = std::slice::from_raw_parts(in_ptr, len);
2102        let sweep = CgBatchRange {
2103            period: (period_start, period_end, period_step),
2104        };
2105        let combos = expand_grid(&sweep).map_err(|e| JsValue::from_str(&e.to_string()))?;
2106        let rows = combos.len();
2107        let cols = len;
2108
2109        let total_elems = rows
2110            .checked_mul(cols)
2111            .ok_or_else(|| JsValue::from_str("cg_batch_into: rows*cols overflow"))?;
2112        let out = std::slice::from_raw_parts_mut(out_ptr, total_elems);
2113
2114        let first = data
2115            .iter()
2116            .position(|x| !x.is_nan())
2117            .ok_or_else(|| JsValue::from_str("CG: All values are NaN."))?;
2118        for (r, p) in combos.iter().enumerate() {
2119            let warm = (first + p.period.unwrap()).min(cols);
2120            let row = &mut out[r * cols..r * cols + warm];
2121            for v in row {
2122                *v = f64::NAN;
2123            }
2124        }
2125
2126        cg_batch_inner_into(data, &sweep, detect_best_kernel(), false, out)
2127            .map_err(|e| JsValue::from_str(&e.to_string()))?;
2128        Ok(rows)
2129    }
2130}