Skip to main content

vector_ta/indicators/
qstick.rs

1use crate::utilities::data_loader::{source_type, Candles};
2use crate::utilities::enums::Kernel;
3use crate::utilities::helpers::{
4    alloc_with_nan_prefix, detect_best_batch_kernel, detect_best_kernel, init_matrix_prefixes,
5    make_uninit_matrix,
6};
7use aligned_vec::{AVec, CACHELINE_ALIGN};
8#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
9use core::arch::x86_64::*;
10#[cfg(not(target_arch = "wasm32"))]
11use rayon::prelude::*;
12use std::error::Error;
13use std::mem::MaybeUninit;
14use thiserror::Error;
15
16#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
17use serde::{Deserialize, Serialize};
18#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
19use wasm_bindgen::prelude::*;
20
21#[derive(Debug, Clone)]
22pub enum QstickData<'a> {
23    Candles {
24        candles: &'a Candles,
25        open_source: &'a str,
26        close_source: &'a str,
27    },
28    Slices {
29        open: &'a [f64],
30        close: &'a [f64],
31    },
32}
33
34#[derive(Debug, Clone)]
35pub struct QstickOutput {
36    pub values: Vec<f64>,
37}
38
39#[derive(Debug, Clone)]
40#[cfg_attr(
41    all(target_arch = "wasm32", feature = "wasm"),
42    derive(Serialize, Deserialize)
43)]
44pub struct QstickParams {
45    pub period: Option<usize>,
46}
47
48impl Default for QstickParams {
49    fn default() -> Self {
50        Self { period: Some(5) }
51    }
52}
53
54#[derive(Debug, Clone)]
55pub struct QstickInput<'a> {
56    pub data: QstickData<'a>,
57    pub params: QstickParams,
58}
59
60impl<'a> QstickInput<'a> {
61    #[inline]
62    pub fn from_candles(
63        candles: &'a Candles,
64        open_source: &'a str,
65        close_source: &'a str,
66        params: QstickParams,
67    ) -> Self {
68        Self {
69            data: QstickData::Candles {
70                candles,
71                open_source,
72                close_source,
73            },
74            params,
75        }
76    }
77
78    #[inline]
79    pub fn from_slices(open: &'a [f64], close: &'a [f64], params: QstickParams) -> Self {
80        Self {
81            data: QstickData::Slices { open, close },
82            params,
83        }
84    }
85
86    #[inline]
87    pub fn with_default_candles(candles: &'a Candles) -> Self {
88        Self {
89            data: QstickData::Candles {
90                candles,
91                open_source: "open",
92                close_source: "close",
93            },
94            params: QstickParams::default(),
95        }
96    }
97
98    #[inline]
99    pub fn get_period(&self) -> usize {
100        self.params.period.unwrap_or(5)
101    }
102}
103
104#[derive(Copy, Clone, Debug)]
105pub struct QstickBuilder {
106    period: Option<usize>,
107    kernel: Kernel,
108}
109
110impl Default for QstickBuilder {
111    fn default() -> Self {
112        Self {
113            period: None,
114            kernel: Kernel::Auto,
115        }
116    }
117}
118
119impl QstickBuilder {
120    #[inline(always)]
121    pub fn new() -> Self {
122        Self::default()
123    }
124    #[inline(always)]
125    pub fn period(mut self, n: usize) -> Self {
126        self.period = Some(n);
127        self
128    }
129    #[inline(always)]
130    pub fn kernel(mut self, k: Kernel) -> Self {
131        self.kernel = k;
132        self
133    }
134    #[inline(always)]
135    pub fn apply(self, candles: &Candles) -> Result<QstickOutput, QstickError> {
136        let params = QstickParams {
137            period: self.period,
138        };
139        let input = QstickInput::from_candles(candles, "open", "close", params);
140        qstick_with_kernel(&input, self.kernel)
141    }
142    #[inline(always)]
143    pub fn apply_slices(self, open: &[f64], close: &[f64]) -> Result<QstickOutput, QstickError> {
144        let params = QstickParams {
145            period: self.period,
146        };
147        let input = QstickInput::from_slices(open, close, params);
148        qstick_with_kernel(&input, self.kernel)
149    }
150    #[inline(always)]
151    pub fn into_stream(self) -> Result<QstickStream, QstickError> {
152        let params = QstickParams {
153            period: self.period,
154        };
155        QstickStream::try_new(params)
156    }
157}
158
159#[derive(Debug, Error)]
160pub enum QstickError {
161    #[error("qstick: Input data slice is empty.")]
162    EmptyInputData,
163    #[error("qstick: All values are NaN.")]
164    AllValuesNaN,
165    #[error("qstick: Invalid period: period = {period}, data length = {data_len}")]
166    InvalidPeriod { period: usize, data_len: usize },
167    #[error("qstick: Not enough valid data: needed = {needed}, valid = {valid}")]
168    NotEnoughValidData { needed: usize, valid: usize },
169    #[error("qstick: output length mismatch: expected = {expected}, got = {got}")]
170    OutputLengthMismatch { expected: usize, got: usize },
171    #[error("qstick: invalid kernel for batch: {0:?}")]
172    InvalidKernelForBatch(Kernel),
173    #[error("qstick: invalid range: start={start}, end={end}, step={step}")]
174    InvalidRange {
175        start: usize,
176        end: usize,
177        step: usize,
178    },
179    #[error("qstick: invalid input: {0}")]
180    InvalidInput(String),
181}
182
183#[inline]
184pub fn qstick(input: &QstickInput) -> Result<QstickOutput, QstickError> {
185    qstick_with_kernel(input, Kernel::Auto)
186}
187
188pub fn qstick_with_kernel(
189    input: &QstickInput,
190    kernel: Kernel,
191) -> Result<QstickOutput, QstickError> {
192    let (open, close) = match &input.data {
193        QstickData::Candles {
194            candles,
195            open_source,
196            close_source,
197        } => {
198            let open = source_type(candles, open_source);
199            let close = source_type(candles, close_source);
200            (open, close)
201        }
202        QstickData::Slices { open, close } => (*open, *close),
203    };
204
205    let len = open.len().min(close.len());
206    let period = input.get_period();
207
208    if len == 0 {
209        return Err(QstickError::EmptyInputData);
210    }
211    if period == 0 || period > len {
212        return Err(QstickError::InvalidPeriod {
213            period,
214            data_len: len,
215        });
216    }
217
218    let mut first = 0;
219    for i in 0..len {
220        if !open[i].is_nan() && !close[i].is_nan() {
221            first = i;
222            break;
223        }
224        if i == len - 1 {
225            return Err(QstickError::AllValuesNaN);
226        }
227    }
228
229    if (len - first) < period {
230        return Err(QstickError::NotEnoughValidData {
231            needed: period,
232            valid: len - first,
233        });
234    }
235
236    let warmup_end = first
237        .checked_add(period)
238        .and_then(|v| v.checked_sub(1))
239        .ok_or_else(|| QstickError::InvalidInput("warmup index overflow".into()))?;
240
241    let mut out = alloc_with_nan_prefix(len, warmup_end);
242
243    let chosen = match kernel {
244        Kernel::Auto => Kernel::Scalar,
245        other => other,
246    };
247
248    unsafe {
249        match chosen {
250            Kernel::Scalar | Kernel::ScalarBatch => {
251                qstick_scalar(open, close, period, first, &mut out)
252            }
253            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
254            Kernel::Avx2 | Kernel::Avx2Batch => qstick_avx2(open, close, period, first, &mut out),
255            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
256            Kernel::Avx512 | Kernel::Avx512Batch => {
257                qstick_avx512(open, close, period, first, &mut out)
258            }
259            _ => unreachable!(),
260        }
261    }
262
263    Ok(QstickOutput { values: out })
264}
265
266#[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
267pub fn qstick_into(input: &QstickInput, out: &mut [f64]) -> Result<(), QstickError> {
268    let (open, close) = match &input.data {
269        QstickData::Candles {
270            candles,
271            open_source,
272            close_source,
273        } => {
274            let open = source_type(candles, open_source);
275            let close = source_type(candles, close_source);
276            (open, close)
277        }
278        QstickData::Slices { open, close } => (*open, *close),
279    };
280
281    let len = open.len().min(close.len());
282    let period = input.get_period();
283
284    if len == 0 {
285        return Err(QstickError::EmptyInputData);
286    }
287    if period == 0 || period > len {
288        return Err(QstickError::InvalidPeriod {
289            period,
290            data_len: len,
291        });
292    }
293    if out.len() != len {
294        return Err(QstickError::OutputLengthMismatch {
295            expected: len,
296            got: out.len(),
297        });
298    }
299
300    let mut first = 0usize;
301    for i in 0..len {
302        if !open[i].is_nan() && !close[i].is_nan() {
303            first = i;
304            break;
305        }
306        if i == len - 1 {
307            return Err(QstickError::AllValuesNaN);
308        }
309    }
310
311    if (len - first) < period {
312        return Err(QstickError::NotEnoughValidData {
313            needed: period,
314            valid: len - first,
315        });
316    }
317
318    let warm = first
319        .checked_add(period)
320        .and_then(|v| v.checked_sub(1))
321        .ok_or_else(|| QstickError::InvalidInput("warmup index overflow".into()))?
322        .min(len);
323    for v in &mut out[..warm] {
324        *v = f64::from_bits(0x7ff8_0000_0000_0000);
325    }
326
327    let chosen = match Kernel::Auto {
328        Kernel::Auto => Kernel::Scalar,
329        other => other,
330    };
331
332    unsafe {
333        match chosen {
334            Kernel::Scalar | Kernel::ScalarBatch => qstick_scalar(open, close, period, first, out),
335            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
336            Kernel::Avx2 | Kernel::Avx2Batch => qstick_avx2(open, close, period, first, out),
337            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
338            Kernel::Avx512 | Kernel::Avx512Batch => qstick_avx512(open, close, period, first, out),
339            _ => unreachable!(),
340        }
341    }
342
343    Ok(())
344}
345
346#[inline]
347pub fn qstick_scalar(
348    open: &[f64],
349    close: &[f64],
350    period: usize,
351    first_valid: usize,
352    out: &mut [f64],
353) {
354    let len = open.len().min(close.len());
355    if len == 0 {
356        return;
357    }
358
359    let start = first_valid;
360    let warm = start + period - 1;
361    let inv_p = 1.0 / (period as f64);
362
363    if period == 1 {
364        let mut i = start;
365
366        while i + 3 < len {
367            out[i] = close[i] - open[i];
368            out[i + 1] = close[i + 1] - open[i + 1];
369            out[i + 2] = close[i + 2] - open[i + 2];
370            out[i + 3] = close[i + 3] - open[i + 3];
371            i += 4;
372        }
373        while i < len {
374            out[i] = close[i] - open[i];
375            i += 1;
376        }
377        return;
378    }
379
380    let mut sum = 0.0f64;
381    let end_init = start + period;
382    let mut k = start;
383
384    let end_unroll = start + ((period) & !3usize);
385    while k < end_unroll {
386        sum += (close[k] - open[k])
387            + (close[k + 1] - open[k + 1])
388            + (close[k + 2] - open[k + 2])
389            + (close[k + 3] - open[k + 3]);
390        k += 4;
391    }
392    while k < end_init {
393        sum += close[k] - open[k];
394        k += 1;
395    }
396
397    out[warm] = sum * inv_p;
398
399    let mut i_new = warm + 1;
400    let mut i_old = start;
401    while i_new + 3 < len {
402        sum = (sum + (close[i_new] - open[i_new])) - (close[i_old] - open[i_old]);
403        out[i_new] = sum * inv_p;
404
405        sum = (sum + (close[i_new + 1] - open[i_new + 1])) - (close[i_old + 1] - open[i_old + 1]);
406        out[i_new + 1] = sum * inv_p;
407
408        sum = (sum + (close[i_new + 2] - open[i_new + 2])) - (close[i_old + 2] - open[i_old + 2]);
409        out[i_new + 2] = sum * inv_p;
410
411        sum = (sum + (close[i_new + 3] - open[i_new + 3])) - (close[i_old + 3] - open[i_old + 3]);
412        out[i_new + 3] = sum * inv_p;
413
414        i_new += 4;
415        i_old += 4;
416    }
417    while i_new < len {
418        sum = (sum + (close[i_new] - open[i_new])) - (close[i_old] - open[i_old]);
419        out[i_new] = sum * inv_p;
420        i_new += 1;
421        i_old += 1;
422    }
423}
424
425#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
426#[inline]
427pub fn qstick_avx512(
428    open: &[f64],
429    close: &[f64],
430    period: usize,
431    first_valid: usize,
432    out: &mut [f64],
433) {
434    unsafe { qstick_avx512_impl(open, close, period, first_valid, out) }
435}
436
437#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
438#[inline]
439pub fn qstick_avx2(
440    open: &[f64],
441    close: &[f64],
442    period: usize,
443    first_valid: usize,
444    out: &mut [f64],
445) {
446    unsafe { qstick_avx2_impl(open, close, period, first_valid, out) }
447}
448
449#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
450#[inline]
451pub fn qstick_avx512_short(
452    open: &[f64],
453    close: &[f64],
454    period: usize,
455    first_valid: usize,
456    out: &mut [f64],
457) {
458    qstick_avx512(open, close, period, first_valid, out)
459}
460
461#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
462#[inline]
463pub fn qstick_avx512_long(
464    open: &[f64],
465    close: &[f64],
466    period: usize,
467    first_valid: usize,
468    out: &mut [f64],
469) {
470    qstick_avx512(open, close, period, first_valid, out)
471}
472
473#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
474#[target_feature(enable = "avx2")]
475unsafe fn qstick_avx2_impl(
476    open: &[f64],
477    close: &[f64],
478    period: usize,
479    first_valid: usize,
480    out: &mut [f64],
481) {
482    let len = open.len().min(close.len());
483    if len == 0 {
484        return;
485    }
486    let start = first_valid;
487    let warm = start + period - 1;
488    let inv_p = 1.0 / (period as f64);
489
490    if period == 1 {
491        let mut i = start;
492        while i + 3 < len {
493            let c = _mm256_loadu_pd(close.as_ptr().add(i));
494            let o = _mm256_loadu_pd(open.as_ptr().add(i));
495            let d = _mm256_sub_pd(c, o);
496            _mm256_storeu_pd(out.as_mut_ptr().add(i), d);
497            i += 4;
498        }
499        while i < len {
500            *out.get_unchecked_mut(i) = *close.get_unchecked(i) - *open.get_unchecked(i);
501            i += 1;
502        }
503        return;
504    }
505
506    let mut v_sum = _mm256_setzero_pd();
507    let mut k = 0usize;
508    let vec_end = period & !3usize;
509    while k < vec_end {
510        let idx = start + k;
511        let c = _mm256_loadu_pd(close.as_ptr().add(idx));
512        let o = _mm256_loadu_pd(open.as_ptr().add(idx));
513        let d = _mm256_sub_pd(c, o);
514        v_sum = _mm256_add_pd(v_sum, d);
515        k += 4;
516    }
517
518    let hi = _mm256_extractf128_pd(v_sum, 1);
519    let lo = _mm256_castpd256_pd128(v_sum);
520    let s2 = _mm_add_pd(lo, hi);
521    let s1 = _mm_hadd_pd(s2, s2);
522    let mut sum = _mm_cvtsd_f64(s1);
523
524    while k < period {
525        let idx = start + k;
526        sum += *close.get_unchecked(idx) - *open.get_unchecked(idx);
527        k += 1;
528    }
529
530    *out.get_unchecked_mut(warm) = sum * inv_p;
531
532    let mut i_new = warm + 1;
533    let mut i_old = start;
534    while i_new + 3 < len {
535        sum = (sum + (*close.get_unchecked(i_new) - *open.get_unchecked(i_new)))
536            - (*close.get_unchecked(i_old) - *open.get_unchecked(i_old));
537        *out.get_unchecked_mut(i_new) = sum * inv_p;
538
539        sum = (sum + (*close.get_unchecked(i_new + 1) - *open.get_unchecked(i_new + 1)))
540            - (*close.get_unchecked(i_old + 1) - *open.get_unchecked(i_old + 1));
541        *out.get_unchecked_mut(i_new + 1) = sum * inv_p;
542
543        sum = (sum + (*close.get_unchecked(i_new + 2) - *open.get_unchecked(i_new + 2)))
544            - (*close.get_unchecked(i_old + 2) - *open.get_unchecked(i_old + 2));
545        *out.get_unchecked_mut(i_new + 2) = sum * inv_p;
546
547        sum = (sum + (*close.get_unchecked(i_new + 3) - *open.get_unchecked(i_new + 3)))
548            - (*close.get_unchecked(i_old + 3) - *open.get_unchecked(i_old + 3));
549        *out.get_unchecked_mut(i_new + 3) = sum * inv_p;
550
551        i_new += 4;
552        i_old += 4;
553    }
554    while i_new < len {
555        sum = (sum + (*close.get_unchecked(i_new) - *open.get_unchecked(i_new)))
556            - (*close.get_unchecked(i_old) - *open.get_unchecked(i_old));
557        *out.get_unchecked_mut(i_new) = sum * inv_p;
558        i_new += 1;
559        i_old += 1;
560    }
561}
562
563#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
564#[target_feature(enable = "avx512f")]
565unsafe fn qstick_avx512_impl(
566    open: &[f64],
567    close: &[f64],
568    period: usize,
569    first_valid: usize,
570    out: &mut [f64],
571) {
572    let len = open.len().min(close.len());
573    if len == 0 {
574        return;
575    }
576    let start = first_valid;
577    let warm = start + period - 1;
578    let inv_p = 1.0 / (period as f64);
579
580    if period == 1 {
581        let mut i = start;
582        while i + 7 < len {
583            let c = _mm512_loadu_pd(close.as_ptr().add(i));
584            let o = _mm512_loadu_pd(open.as_ptr().add(i));
585            let d = _mm512_sub_pd(c, o);
586            _mm512_storeu_pd(out.as_mut_ptr().add(i), d);
587            i += 8;
588        }
589        while i < len {
590            *out.get_unchecked_mut(i) = *close.get_unchecked(i) - *open.get_unchecked(i);
591            i += 1;
592        }
593        return;
594    }
595
596    let mut v_sum = _mm512_setzero_pd();
597    let mut k = 0usize;
598    let vec_end = period & !7usize;
599    while k < vec_end {
600        let idx = start + k;
601        let c = _mm512_loadu_pd(close.as_ptr().add(idx));
602        let o = _mm512_loadu_pd(open.as_ptr().add(idx));
603        let d = _mm512_sub_pd(c, o);
604        v_sum = _mm512_add_pd(v_sum, d);
605        k += 8;
606    }
607
608    let lo256 = _mm512_castpd512_pd256(v_sum);
609    let hi256 = _mm512_extractf64x4_pd(v_sum, 1);
610    let lo_hi128 = _mm256_extractf128_pd(lo256, 1);
611    let lo_lo128 = _mm256_castpd256_pd128(lo256);
612    let lo_s2 = _mm_add_pd(lo_lo128, lo_hi128);
613    let lo_s1 = _mm_hadd_pd(lo_s2, lo_s2);
614    let s_lo = _mm_cvtsd_f64(lo_s1);
615
616    let hi_hi128 = _mm256_extractf128_pd(hi256, 1);
617    let hi_lo128 = _mm256_castpd256_pd128(hi256);
618    let hi_s2 = _mm_add_pd(hi_lo128, hi_hi128);
619    let hi_s1 = _mm_hadd_pd(hi_s2, hi_s2);
620    let s_hi = _mm_cvtsd_f64(hi_s1);
621
622    let mut sum = s_lo + s_hi;
623    while k < period {
624        let idx = start + k;
625        sum += *close.get_unchecked(idx) - *open.get_unchecked(idx);
626        k += 1;
627    }
628
629    *out.get_unchecked_mut(warm) = sum * inv_p;
630
631    let mut i_new = warm + 1;
632    let mut i_old = start;
633    while i_new + 3 < len {
634        sum = (sum + (*close.get_unchecked(i_new) - *open.get_unchecked(i_new)))
635            - (*close.get_unchecked(i_old) - *open.get_unchecked(i_old));
636        *out.get_unchecked_mut(i_new) = sum * inv_p;
637
638        sum = (sum + (*close.get_unchecked(i_new + 1) - *open.get_unchecked(i_new + 1)))
639            - (*close.get_unchecked(i_old + 1) - *open.get_unchecked(i_old + 1));
640        *out.get_unchecked_mut(i_new + 1) = sum * inv_p;
641
642        sum = (sum + (*close.get_unchecked(i_new + 2) - *open.get_unchecked(i_new + 2)))
643            - (*close.get_unchecked(i_old + 2) - *open.get_unchecked(i_old + 2));
644        *out.get_unchecked_mut(i_new + 2) = sum * inv_p;
645
646        sum = (sum + (*close.get_unchecked(i_new + 3) - *open.get_unchecked(i_new + 3)))
647            - (*close.get_unchecked(i_old + 3) - *open.get_unchecked(i_old + 3));
648        *out.get_unchecked_mut(i_new + 3) = sum * inv_p;
649
650        i_new += 4;
651        i_old += 4;
652    }
653    while i_new < len {
654        sum = (sum + (*close.get_unchecked(i_new) - *open.get_unchecked(i_new)))
655            - (*close.get_unchecked(i_old) - *open.get_unchecked(i_old));
656        *out.get_unchecked_mut(i_new) = sum * inv_p;
657        i_new += 1;
658        i_old += 1;
659    }
660}
661
662#[inline]
663pub fn qstick_batch_with_kernel(
664    open: &[f64],
665    close: &[f64],
666    sweep: &QstickBatchRange,
667    kernel: Kernel,
668) -> Result<QstickBatchOutput, QstickError> {
669    let kern = match kernel {
670        Kernel::Auto => detect_best_batch_kernel(),
671        other if other.is_batch() => other,
672        _ => return Err(QstickError::InvalidKernelForBatch(kernel)),
673    };
674    let simd = match kern {
675        Kernel::Avx512Batch => Kernel::Avx512,
676        Kernel::Avx2Batch => Kernel::Avx2,
677        Kernel::ScalarBatch => Kernel::Scalar,
678        _ => unreachable!(),
679    };
680    qstick_batch_par_slice(open, close, sweep, simd)
681}
682
683#[derive(Clone, Debug)]
684pub struct QstickBatchRange {
685    pub period: (usize, usize, usize),
686}
687
688impl Default for QstickBatchRange {
689    fn default() -> Self {
690        Self {
691            period: (5, 254, 1),
692        }
693    }
694}
695
696#[derive(Clone, Debug, Default)]
697pub struct QstickBatchBuilder {
698    range: QstickBatchRange,
699    kernel: Kernel,
700}
701
702impl QstickBatchBuilder {
703    pub fn new() -> Self {
704        Self::default()
705    }
706    pub fn kernel(mut self, k: Kernel) -> Self {
707        self.kernel = k;
708        self
709    }
710    pub fn period_range(mut self, start: usize, end: usize, step: usize) -> Self {
711        self.range.period = (start, end, step);
712        self
713    }
714    pub fn period_static(mut self, p: usize) -> Self {
715        self.range.period = (p, p, 0);
716        self
717    }
718    pub fn apply_slices(
719        self,
720        open: &[f64],
721        close: &[f64],
722    ) -> Result<QstickBatchOutput, QstickError> {
723        qstick_batch_with_kernel(open, close, &self.range, self.kernel)
724    }
725    pub fn apply_candles(
726        self,
727        c: &Candles,
728        open_src: &str,
729        close_src: &str,
730    ) -> Result<QstickBatchOutput, QstickError> {
731        let open = source_type(c, open_src);
732        let close = source_type(c, close_src);
733        self.apply_slices(open, close)
734    }
735    pub fn with_default_candles(c: &Candles) -> Result<QstickBatchOutput, QstickError> {
736        QstickBatchBuilder::new()
737            .kernel(Kernel::Auto)
738            .apply_candles(c, "open", "close")
739    }
740}
741
742#[derive(Clone, Debug)]
743pub struct QstickBatchOutput {
744    pub values: Vec<f64>,
745    pub combos: Vec<QstickParams>,
746    pub rows: usize,
747    pub cols: usize,
748}
749
750impl QstickBatchOutput {
751    pub fn row_for_params(&self, p: &QstickParams) -> Option<usize> {
752        self.combos
753            .iter()
754            .position(|c| c.period.unwrap_or(5) == p.period.unwrap_or(5))
755    }
756    pub fn values_for(&self, p: &QstickParams) -> Option<&[f64]> {
757        self.row_for_params(p).map(|row| {
758            let start = row * self.cols;
759            &self.values[start..start + self.cols]
760        })
761    }
762}
763
764#[inline(always)]
765fn expand_grid(r: &QstickBatchRange) -> Result<Vec<QstickParams>, QstickError> {
766    fn axis_usize((start, end, step): (usize, usize, usize)) -> Result<Vec<usize>, QstickError> {
767        if step == 0 || start == end {
768            return Ok(vec![start]);
769        }
770        let mut v = Vec::new();
771        if start < end {
772            let mut cur = start;
773            while cur <= end {
774                v.push(cur);
775                let next = cur.saturating_add(step);
776                if next == cur {
777                    break;
778                }
779                cur = next;
780            }
781        } else {
782            let mut cur = start;
783            while cur >= end {
784                v.push(cur);
785                let next = cur.saturating_sub(step);
786                if next == cur {
787                    break;
788                }
789                cur = next;
790                if cur == 0 && end > 0 {
791                    break;
792                }
793            }
794        }
795        if v.is_empty() {
796            return Err(QstickError::InvalidRange { start, end, step });
797        }
798        Ok(v)
799    }
800
801    let (start, end, step) = r.period;
802    let periods = axis_usize((start, end, step))?;
803    let mut out = Vec::with_capacity(periods.len());
804    for p in periods {
805        out.push(QstickParams { period: Some(p) });
806    }
807    Ok(out)
808}
809
810#[inline(always)]
811pub fn qstick_batch_slice(
812    open: &[f64],
813    close: &[f64],
814    sweep: &QstickBatchRange,
815    kern: Kernel,
816) -> Result<QstickBatchOutput, QstickError> {
817    qstick_batch_inner(open, close, sweep, kern, false)
818}
819
820#[inline(always)]
821pub fn qstick_batch_par_slice(
822    open: &[f64],
823    close: &[f64],
824    sweep: &QstickBatchRange,
825    kern: Kernel,
826) -> Result<QstickBatchOutput, QstickError> {
827    qstick_batch_inner(open, close, sweep, kern, true)
828}
829
830#[inline(always)]
831fn qstick_batch_inner(
832    open: &[f64],
833    close: &[f64],
834    sweep: &QstickBatchRange,
835    kern: Kernel,
836    parallel: bool,
837) -> Result<QstickBatchOutput, QstickError> {
838    let combos = expand_grid(sweep)?;
839    let len = open.len().min(close.len());
840    if len == 0 {
841        return Err(QstickError::EmptyInputData);
842    }
843
844    let mut first = 0;
845    for i in 0..len {
846        if !open[i].is_nan() && !close[i].is_nan() {
847            first = i;
848            break;
849        }
850        if i == len - 1 {
851            return Err(QstickError::AllValuesNaN);
852        }
853    }
854
855    let max_p = combos.iter().map(|c| c.period.unwrap()).max().unwrap();
856    if len - first < max_p {
857        return Err(QstickError::NotEnoughValidData {
858            needed: max_p,
859            valid: len - first,
860        });
861    }
862    let rows = combos.len();
863    let cols = len;
864
865    let total_elems = rows
866        .checked_mul(cols)
867        .ok_or_else(|| QstickError::InvalidInput("rows*cols overflow".into()))?;
868
869    let mut buf_mu = make_uninit_matrix(rows, cols);
870
871    let warm: Vec<usize> = combos
872        .iter()
873        .map(|c| {
874            first
875                .checked_add(c.period.unwrap_or(0))
876                .and_then(|v| v.checked_sub(1))
877                .unwrap_or(first)
878        })
879        .collect();
880    init_matrix_prefixes(&mut buf_mu, cols, &warm);
881
882    let mut buf_guard = core::mem::ManuallyDrop::new(buf_mu);
883    let out: &mut [f64] = unsafe {
884        core::slice::from_raw_parts_mut(buf_guard.as_mut_ptr() as *mut f64, buf_guard.len())
885    };
886
887    qstick_batch_inner_into(open, close, sweep, kern, parallel, out)?;
888
889    let values = unsafe {
890        Vec::from_raw_parts(
891            buf_guard.as_mut_ptr() as *mut f64,
892            total_elems,
893            buf_guard.capacity(),
894        )
895    };
896
897    Ok(QstickBatchOutput {
898        values,
899        combos,
900        rows,
901        cols,
902    })
903}
904
905#[inline(always)]
906fn qstick_batch_inner_into(
907    open: &[f64],
908    close: &[f64],
909    sweep: &QstickBatchRange,
910    kern: Kernel,
911    parallel: bool,
912    out: &mut [f64],
913) -> Result<Vec<QstickParams>, QstickError> {
914    let combos = expand_grid(sweep)?;
915
916    let len = open.len().min(close.len());
917    if len == 0 {
918        return Err(QstickError::EmptyInputData);
919    }
920    let cols = len;
921
922    let first = (0..len)
923        .find(|&i| !open[i].is_nan() && !close[i].is_nan())
924        .ok_or(QstickError::AllValuesNaN)?;
925
926    let max_p = combos.iter().map(|c| c.period.unwrap()).max().unwrap();
927    if len - first < max_p {
928        return Err(QstickError::NotEnoughValidData {
929            needed: max_p,
930            valid: len - first,
931        });
932    }
933
934    for (row, combo) in combos.iter().enumerate() {
935        let warmup = first
936            .checked_add(combo.period.unwrap_or(0))
937            .and_then(|v| v.checked_sub(1))
938            .ok_or_else(|| QstickError::InvalidInput("warmup index overflow".into()))?;
939        let row_start = row
940            .checked_mul(cols)
941            .ok_or_else(|| QstickError::InvalidInput("row*cols overflow".into()))?;
942        for i in 0..warmup.min(cols) {
943            out[row_start + i] = f64::NAN;
944        }
945    }
946
947    match kern {
948        Kernel::Avx2Batch | Kernel::Avx512Batch => {
949            qstick_batch_shared_prefix_into(open, close, &combos, first, cols, out);
950            return Ok(combos);
951        }
952        _ => {}
953    }
954
955    let out_mu: &mut [MaybeUninit<f64>] = unsafe {
956        std::slice::from_raw_parts_mut(out.as_mut_ptr() as *mut MaybeUninit<f64>, out.len())
957    };
958
959    let do_row = |row: usize, dst_mu: &mut [MaybeUninit<f64>]| unsafe {
960        let period = combos[row].period.unwrap();
961
962        let dst: &mut [f64] =
963            std::slice::from_raw_parts_mut(dst_mu.as_mut_ptr() as *mut f64, dst_mu.len());
964
965        match kern {
966            Kernel::Scalar | Kernel::ScalarBatch | Kernel::Auto => {
967                qstick_scalar(open, close, period, first, dst)
968            }
969            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
970            Kernel::Avx2 | Kernel::Avx2Batch => qstick_avx2(open, close, period, first, dst),
971            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
972            Kernel::Avx512 | Kernel::Avx512Batch => qstick_avx512(open, close, period, first, dst),
973            #[cfg(not(all(feature = "nightly-avx", target_arch = "x86_64")))]
974            _ => qstick_scalar(open, close, period, first, dst),
975        }
976    };
977
978    if parallel {
979        #[cfg(not(target_arch = "wasm32"))]
980        {
981            use rayon::prelude::*;
982            out_mu
983                .par_chunks_mut(cols)
984                .enumerate()
985                .for_each(|(row, slice)| do_row(row, slice));
986        }
987        #[cfg(target_arch = "wasm32")]
988        {
989            for (row, slice) in out_mu.chunks_mut(cols).enumerate() {
990                do_row(row, slice);
991            }
992        }
993    } else {
994        for (row, slice) in out_mu.chunks_mut(cols).enumerate() {
995            do_row(row, slice);
996        }
997    }
998
999    Ok(combos)
1000}
1001
1002#[inline(always)]
1003fn qstick_batch_shared_prefix_into(
1004    open: &[f64],
1005    close: &[f64],
1006    combos: &[QstickParams],
1007    first: usize,
1008    cols: usize,
1009    out: &mut [f64],
1010) {
1011    let len = cols;
1012    if len == 0 {
1013        return;
1014    }
1015
1016    let cap = len.checked_add(1).unwrap_or(len);
1017    let mut prefix = Vec::with_capacity(cap);
1018    prefix.push(0.0);
1019    let mut acc = 0.0f64;
1020
1021    let mut i = 0usize;
1022    while i < first && i < len {
1023        prefix.push(acc);
1024        i += 1;
1025    }
1026    while i + 3 < len {
1027        let d0 = close[i] - open[i];
1028        let d1 = close[i + 1] - open[i + 1];
1029        let d2 = close[i + 2] - open[i + 2];
1030        let d3 = close[i + 3] - open[i + 3];
1031        acc += d0;
1032        prefix.push(acc);
1033        acc += d1;
1034        prefix.push(acc);
1035        acc += d2;
1036        prefix.push(acc);
1037        acc += d3;
1038        prefix.push(acc);
1039        i += 4;
1040    }
1041    while i < len {
1042        acc += close[i] - open[i];
1043        prefix.push(acc);
1044        i += 1;
1045    }
1046
1047    for (row, combo) in combos.iter().enumerate() {
1048        let p = combo.period.unwrap_or(5);
1049        let warm = first
1050            .checked_add(p)
1051            .and_then(|v| v.checked_sub(1))
1052            .unwrap_or(first);
1053        if warm >= len {
1054            continue;
1055        }
1056        let row_start = row.checked_mul(cols).unwrap_or(0);
1057        let inv_p = 1.0 / (p as f64);
1058        let mut j = warm;
1059        while j + 3 < len {
1060            let s0 = prefix[j + 1] - prefix[j + 1 - p];
1061            let s1 = prefix[j + 2] - prefix[j + 2 - p];
1062            let s2 = prefix[j + 3] - prefix[j + 3 - p];
1063            let s3 = prefix[j + 4] - prefix[j + 4 - p];
1064            out[row_start + j] = s0 * inv_p;
1065            out[row_start + j + 1] = s1 * inv_p;
1066            out[row_start + j + 2] = s2 * inv_p;
1067            out[row_start + j + 3] = s3 * inv_p;
1068            j += 4;
1069        }
1070        while j < len {
1071            let s = prefix[j + 1] - prefix[j + 1 - p];
1072            out[row_start + j] = s * inv_p;
1073            j += 1;
1074        }
1075    }
1076}
1077
1078#[inline(always)]
1079unsafe fn qstick_row_scalar(
1080    open: &[f64],
1081    close: &[f64],
1082    first: usize,
1083    period: usize,
1084    out: &mut [f64],
1085) {
1086    qstick_scalar(open, close, period, first, out);
1087}
1088
1089#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1090#[inline(always)]
1091unsafe fn qstick_row_avx2(
1092    open: &[f64],
1093    close: &[f64],
1094    first: usize,
1095    period: usize,
1096    out: &mut [f64],
1097) {
1098    qstick_avx2(open, close, period, first, out);
1099}
1100
1101#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1102#[inline(always)]
1103unsafe fn qstick_row_avx512(
1104    open: &[f64],
1105    close: &[f64],
1106    first: usize,
1107    period: usize,
1108    out: &mut [f64],
1109) {
1110    qstick_avx512(open, close, period, first, out);
1111}
1112
1113#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1114#[inline(always)]
1115unsafe fn qstick_row_avx512_short(
1116    open: &[f64],
1117    close: &[f64],
1118    first: usize,
1119    period: usize,
1120    out: &mut [f64],
1121) {
1122    qstick_avx512_short(open, close, period, first, out)
1123}
1124
1125#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1126#[inline(always)]
1127unsafe fn qstick_row_avx512_long(
1128    open: &[f64],
1129    close: &[f64],
1130    first: usize,
1131    period: usize,
1132    out: &mut [f64],
1133) {
1134    qstick_avx512_long(open, close, period, first, out)
1135}
1136
1137#[derive(Debug, Clone)]
1138pub struct QstickStream {
1139    period: usize,
1140    inv_p: f64,
1141    buffer: Vec<f64>,
1142    head: usize,
1143    len: usize,
1144    sum: f64,
1145    mask: usize,
1146}
1147
1148impl QstickStream {
1149    #[inline(always)]
1150    pub fn try_new(params: QstickParams) -> Result<Self, QstickError> {
1151        let period = params.period.unwrap_or(5);
1152        if period == 0 {
1153            return Err(QstickError::InvalidPeriod {
1154                period,
1155                data_len: 0,
1156            });
1157        }
1158        let mask = if period.is_power_of_two() {
1159            period - 1
1160        } else {
1161            0
1162        };
1163        Ok(Self {
1164            period,
1165            inv_p: 1.0 / (period as f64),
1166            buffer: vec![0.0; period],
1167            head: 0,
1168            len: 0,
1169            sum: 0.0,
1170            mask,
1171        })
1172    }
1173
1174    #[inline(always)]
1175    pub fn update(&mut self, open: f64, close: f64) -> Option<f64> {
1176        let diff = close - open;
1177        let h = self.head;
1178
1179        if self.len < self.period {
1180            self.buffer[h] = diff;
1181            self.sum += diff;
1182            self.head = if self.mask != 0 {
1183                (h + 1) & self.mask
1184            } else if h + 1 == self.period {
1185                0
1186            } else {
1187                h + 1
1188            };
1189            self.len += 1;
1190            if self.len == self.period {
1191                Some(self.sum * self.inv_p)
1192            } else {
1193                None
1194            }
1195        } else {
1196            let old = self.buffer[h];
1197            self.sum += diff - old;
1198            self.buffer[h] = diff;
1199            self.head = if self.mask != 0 {
1200                (h + 1) & self.mask
1201            } else if h + 1 == self.period {
1202                0
1203            } else {
1204                h + 1
1205            };
1206            Some(self.sum * self.inv_p)
1207        }
1208    }
1209
1210    #[inline(always)]
1211    pub fn reset(&mut self) {
1212        self.head = 0;
1213        self.len = 0;
1214        self.sum = 0.0;
1215    }
1216
1217    #[inline(always)]
1218    pub fn update_diff(&mut self, diff: f64) -> Option<f64> {
1219        let h = self.head;
1220
1221        if self.len < self.period {
1222            self.buffer[h] = diff;
1223            self.sum += diff;
1224            self.head = if self.mask != 0 {
1225                (h + 1) & self.mask
1226            } else if h + 1 == self.period {
1227                0
1228            } else {
1229                h + 1
1230            };
1231            self.len += 1;
1232            if self.len == self.period {
1233                Some(self.sum * self.inv_p)
1234            } else {
1235                None
1236            }
1237        } else {
1238            let old = self.buffer[h];
1239            self.sum += diff - old;
1240            self.buffer[h] = diff;
1241            self.head = if self.mask != 0 {
1242                (h + 1) & self.mask
1243            } else if h + 1 == self.period {
1244                0
1245            } else {
1246                h + 1
1247            };
1248            Some(self.sum * self.inv_p)
1249        }
1250    }
1251}
1252
1253#[cfg(test)]
1254mod tests {
1255    use super::*;
1256    use crate::skip_if_unsupported;
1257    use crate::utilities::data_loader::read_candles_from_csv;
1258    #[cfg(feature = "proptest")]
1259    use proptest::prelude::*;
1260
1261    #[test]
1262    fn test_qstick_into_matches_api() -> Result<(), Box<dyn Error>> {
1263        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1264        let candles = read_candles_from_csv(file_path)?;
1265        let params = QstickParams { period: Some(5) };
1266        let input = QstickInput::from_candles(&candles, "open", "close", params);
1267
1268        let baseline = qstick(&input)?.values;
1269
1270        let mut into_out = vec![0.0; baseline.len()];
1271        #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
1272        qstick_into(&input, &mut into_out)?;
1273
1274        assert_eq!(baseline.len(), into_out.len());
1275        for (a, b) in baseline.iter().zip(into_out.iter()) {
1276            let equal = (a.is_nan() && b.is_nan()) || (a == b);
1277            assert!(equal, "qstick_into mismatch: a={}, b={}", a, b);
1278        }
1279        Ok(())
1280    }
1281    fn check_qstick_partial_params(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1282        skip_if_unsupported!(kernel, test_name);
1283        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1284        let candles = read_candles_from_csv(file_path)?;
1285        let default_params = QstickParams { period: None };
1286        let input_default = QstickInput::from_candles(&candles, "open", "close", default_params);
1287        let output_default = qstick_with_kernel(&input_default, kernel)?;
1288        assert_eq!(output_default.values.len(), candles.close.len());
1289        let params_period_7 = QstickParams { period: Some(7) };
1290        let input_period_7 = QstickInput::from_candles(&candles, "open", "close", params_period_7);
1291        let output_period_7 = qstick_with_kernel(&input_period_7, kernel)?;
1292        assert_eq!(output_period_7.values.len(), candles.close.len());
1293        Ok(())
1294    }
1295    fn check_qstick_accuracy(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1296        skip_if_unsupported!(kernel, test_name);
1297        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1298        let candles = read_candles_from_csv(file_path)?;
1299        let params = QstickParams { period: Some(5) };
1300        let input = QstickInput::from_candles(&candles, "open", "close", params);
1301        let result = qstick_with_kernel(&input, kernel)?;
1302        let expected_last_five_qstick = [219.4, 61.6, -51.8, -53.4, -123.2];
1303        let start_index = result.values.len() - 5;
1304        let result_last_five = &result.values[start_index..];
1305        for (i, &value) in result_last_five.iter().enumerate() {
1306            let expected_value = expected_last_five_qstick[i];
1307            assert!(
1308                (value - expected_value).abs() < 1e-1,
1309                "[{}] Qstick mismatch at idx {}: got {}, expected {}",
1310                test_name,
1311                i,
1312                value,
1313                expected_value
1314            );
1315        }
1316        Ok(())
1317    }
1318    fn check_qstick_zero_period(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1319        skip_if_unsupported!(kernel, test_name);
1320        let open_data = [10.0, 20.0, 30.0];
1321        let close_data = [15.0, 25.0, 35.0];
1322        let params = QstickParams { period: Some(0) };
1323        let input = QstickInput::from_slices(&open_data, &close_data, params);
1324        let res = qstick_with_kernel(&input, kernel);
1325        assert!(
1326            res.is_err(),
1327            "[{}] Qstick should fail with zero period",
1328            test_name
1329        );
1330        Ok(())
1331    }
1332    fn check_qstick_period_exceeds_length(
1333        test_name: &str,
1334        kernel: Kernel,
1335    ) -> Result<(), Box<dyn Error>> {
1336        skip_if_unsupported!(kernel, test_name);
1337        let open_data = [10.0, 20.0, 30.0];
1338        let close_data = [15.0, 25.0, 35.0];
1339        let params = QstickParams { period: Some(10) };
1340        let input = QstickInput::from_slices(&open_data, &close_data, params);
1341        let res = qstick_with_kernel(&input, kernel);
1342        assert!(
1343            res.is_err(),
1344            "[{}] Qstick should fail with period exceeding length",
1345            test_name
1346        );
1347        Ok(())
1348    }
1349    fn check_qstick_very_small_dataset(
1350        test_name: &str,
1351        kernel: Kernel,
1352    ) -> Result<(), Box<dyn Error>> {
1353        skip_if_unsupported!(kernel, test_name);
1354        let open_data = [50.0];
1355        let close_data = [55.0];
1356        let params = QstickParams { period: Some(5) };
1357        let input = QstickInput::from_slices(&open_data, &close_data, params);
1358        let res = qstick_with_kernel(&input, kernel);
1359        assert!(
1360            res.is_err(),
1361            "[{}] Qstick should fail with insufficient data",
1362            test_name
1363        );
1364        Ok(())
1365    }
1366    fn check_qstick_reinput(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1367        skip_if_unsupported!(kernel, test_name);
1368        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1369        let candles = read_candles_from_csv(file_path)?;
1370        let first_params = QstickParams { period: Some(5) };
1371        let first_input = QstickInput::from_candles(&candles, "open", "close", first_params);
1372        let first_result = qstick_with_kernel(&first_input, kernel)?;
1373        let second_params = QstickParams { period: Some(5) };
1374        let second_input =
1375            QstickInput::from_slices(&first_result.values, &first_result.values, second_params);
1376        let second_result = qstick_with_kernel(&second_input, kernel)?;
1377        assert_eq!(second_result.values.len(), first_result.values.len());
1378        for i in 10..second_result.values.len() {
1379            assert!(
1380                !second_result.values[i].is_nan(),
1381                "[{}] Qstick Slice Reinput: Expected no NaN after idx 10, found NaN at idx {}",
1382                test_name,
1383                i
1384            );
1385        }
1386        Ok(())
1387    }
1388    fn check_qstick_nan_handling(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1389        skip_if_unsupported!(kernel, test_name);
1390        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1391        let candles = read_candles_from_csv(file_path)?;
1392        let params = QstickParams { period: Some(5) };
1393        let input = QstickInput::from_candles(&candles, "open", "close", params);
1394        let qstick_result = qstick_with_kernel(&input, kernel)?;
1395        if qstick_result.values.len() > 50 {
1396            for i in 50..qstick_result.values.len() {
1397                assert!(
1398                    !qstick_result.values[i].is_nan(),
1399                    "[{}] Expected no NaN after index 50, found NaN at index {}",
1400                    test_name,
1401                    i
1402                );
1403            }
1404        }
1405        Ok(())
1406    }
1407
1408    #[cfg(debug_assertions)]
1409    fn check_qstick_no_poison(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1410        skip_if_unsupported!(kernel, test_name);
1411
1412        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1413        let candles = read_candles_from_csv(file_path)?;
1414
1415        let test_params = vec![
1416            QstickParams::default(),
1417            QstickParams { period: Some(2) },
1418            QstickParams { period: Some(3) },
1419            QstickParams { period: Some(7) },
1420            QstickParams { period: Some(10) },
1421            QstickParams { period: Some(20) },
1422            QstickParams { period: Some(30) },
1423            QstickParams { period: Some(50) },
1424            QstickParams { period: Some(100) },
1425        ];
1426
1427        for (param_idx, params) in test_params.iter().enumerate() {
1428            let input = QstickInput::from_candles(&candles, "open", "close", params.clone());
1429            let output = qstick_with_kernel(&input, kernel)?;
1430
1431            for (i, &val) in output.values.iter().enumerate() {
1432                if val.is_nan() {
1433                    continue;
1434                }
1435
1436                let bits = val.to_bits();
1437
1438                if bits == 0x11111111_11111111 {
1439                    panic!(
1440                        "[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at index {} \
1441						 with params: period={} (param set {})",
1442                        test_name,
1443                        val,
1444                        bits,
1445                        i,
1446                        params.period.unwrap_or(5),
1447                        param_idx
1448                    );
1449                }
1450
1451                if bits == 0x22222222_22222222 {
1452                    panic!(
1453                        "[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at index {} \
1454						 with params: period={} (param set {})",
1455                        test_name,
1456                        val,
1457                        bits,
1458                        i,
1459                        params.period.unwrap_or(5),
1460                        param_idx
1461                    );
1462                }
1463
1464                if bits == 0x33333333_33333333 {
1465                    panic!(
1466                        "[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at index {} \
1467						 with params: period={} (param set {})",
1468                        test_name,
1469                        val,
1470                        bits,
1471                        i,
1472                        params.period.unwrap_or(5),
1473                        param_idx
1474                    );
1475                }
1476            }
1477        }
1478
1479        Ok(())
1480    }
1481
1482    #[cfg(not(debug_assertions))]
1483    fn check_qstick_no_poison(_test_name: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
1484        Ok(())
1485    }
1486
1487    #[cfg(feature = "proptest")]
1488    #[allow(clippy::float_cmp)]
1489    fn check_qstick_property(
1490        test_name: &str,
1491        kernel: Kernel,
1492    ) -> Result<(), Box<dyn std::error::Error>> {
1493        use proptest::prelude::*;
1494        skip_if_unsupported!(kernel, test_name);
1495
1496        let strat = (1usize..=64).prop_flat_map(|period| {
1497            (period..=400usize).prop_flat_map(move |len| {
1498                (
1499                    prop::collection::vec(
1500                        (1.0f64..10000.0f64).prop_filter("finite", |x| x.is_finite()),
1501                        len,
1502                    ),
1503                    prop::collection::vec(
1504                        (-100.0f64..100.0f64).prop_filter("finite", |x| x.is_finite()),
1505                        len,
1506                    ),
1507                    Just(period),
1508                )
1509            })
1510        });
1511
1512        proptest::test_runner::TestRunner::default()
1513            .run(&strat, |(open_prices, close_deltas, period)| {
1514                let close_prices: Vec<f64> = open_prices
1515                    .iter()
1516                    .zip(close_deltas.iter())
1517                    .map(|(o, d)| o + d)
1518                    .collect();
1519
1520                let params = QstickParams {
1521                    period: Some(period),
1522                };
1523                let input = QstickInput::from_slices(&open_prices, &close_prices, params);
1524
1525                let QstickOutput { values: out } = qstick_with_kernel(&input, kernel).unwrap();
1526                let QstickOutput { values: ref_out } =
1527                    qstick_with_kernel(&input, Kernel::Scalar).unwrap();
1528
1529                for i in 0..(period - 1) {
1530                    prop_assert!(
1531                        out[i].is_nan(),
1532                        "Expected NaN during warmup at index {}, got {}",
1533                        i,
1534                        out[i]
1535                    );
1536                }
1537
1538                for i in (period - 1)..open_prices.len() {
1539                    let window_start = i + 1 - period;
1540                    let window_end = i + 1;
1541
1542                    let diffs: Vec<f64> = (window_start..window_end)
1543                        .map(|j| close_prices[j] - open_prices[j])
1544                        .collect();
1545
1546                    let min_diff = diffs.iter().cloned().fold(f64::INFINITY, f64::min);
1547                    let max_diff = diffs.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1548                    let y = out[i];
1549
1550                    prop_assert!(
1551                        y.is_nan() || (y >= min_diff - 1e-9 && y <= max_diff + 1e-9),
1552                        "idx {}: QStick {} not in bounds [{}, {}]",
1553                        i,
1554                        y,
1555                        min_diff,
1556                        max_diff
1557                    );
1558
1559                    if period == 1 {
1560                        let expected = close_prices[i] - open_prices[i];
1561                        prop_assert!(
1562                            (y - expected).abs() <= 1e-10,
1563                            "Period=1: expected {}, got {} at index {}",
1564                            expected,
1565                            y,
1566                            i
1567                        );
1568                    }
1569
1570                    if diffs.windows(2).all(|w| (w[0] - w[1]).abs() < 1e-10) {
1571                        let expected = diffs[0];
1572                        prop_assert!(
1573                            (y - expected).abs() <= 1e-9,
1574                            "Constant diff: expected {}, got {} at index {}",
1575                            expected,
1576                            y,
1577                            i
1578                        );
1579                    }
1580
1581                    if diffs.iter().all(|&d| d.abs() < 1e-10) {
1582                        prop_assert!(
1583                            y.abs() <= 1e-9,
1584                            "Zero diff: expected 0, got {} at index {}",
1585                            y,
1586                            i
1587                        );
1588                    }
1589
1590                    let expected_qstick = diffs.iter().sum::<f64>() / (period as f64);
1591                    prop_assert!(
1592                        (y - expected_qstick).abs() <= 1e-9,
1593                        "Manual calc: expected {}, got {} at index {}",
1594                        expected_qstick,
1595                        y,
1596                        i
1597                    );
1598
1599                    let r = ref_out[i];
1600                    let y_bits = y.to_bits();
1601                    let r_bits = r.to_bits();
1602
1603                    if !y.is_finite() || !r.is_finite() {
1604                        prop_assert!(
1605                            y.to_bits() == r.to_bits(),
1606                            "finite/NaN mismatch idx {}: {} vs {}",
1607                            i,
1608                            y,
1609                            r
1610                        );
1611                        continue;
1612                    }
1613
1614                    let ulp_diff: u64 = y_bits.abs_diff(r_bits);
1615                    prop_assert!(
1616                        (y - r).abs() <= 1e-9 || ulp_diff <= 4,
1617                        "Kernel mismatch idx {}: {} vs {} (ULP={})",
1618                        i,
1619                        y,
1620                        r,
1621                        ulp_diff
1622                    );
1623                }
1624
1625                Ok(())
1626            })
1627            .unwrap();
1628
1629        Ok(())
1630    }
1631
1632    macro_rules! generate_all_qstick_tests {
1633        ($($test_fn:ident),*) => {
1634            paste::paste! {
1635                $(
1636                    #[test]
1637                    fn [<$test_fn _scalar_f64>]() {
1638                        let _ = $test_fn(stringify!([<$test_fn _scalar_f64>]), Kernel::Scalar);
1639                    }
1640                )*
1641                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1642                $(
1643                    #[test]
1644                    fn [<$test_fn _avx2_f64>]() {
1645                        let _ = $test_fn(stringify!([<$test_fn _avx2_f64>]), Kernel::Avx2);
1646                    }
1647                    #[test]
1648                    fn [<$test_fn _avx512_f64>]() {
1649                        let _ = $test_fn(stringify!([<$test_fn _avx512_f64>]), Kernel::Avx512);
1650                    }
1651                )*
1652            }
1653        }
1654    }
1655    generate_all_qstick_tests!(
1656        check_qstick_partial_params,
1657        check_qstick_accuracy,
1658        check_qstick_zero_period,
1659        check_qstick_period_exceeds_length,
1660        check_qstick_very_small_dataset,
1661        check_qstick_reinput,
1662        check_qstick_nan_handling,
1663        check_qstick_no_poison
1664    );
1665
1666    #[cfg(feature = "proptest")]
1667    generate_all_qstick_tests!(check_qstick_property);
1668    fn check_batch_default_row(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1669        skip_if_unsupported!(kernel, test);
1670        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1671        let c = read_candles_from_csv(file)?;
1672        let output = QstickBatchBuilder::new()
1673            .kernel(kernel)
1674            .apply_candles(&c, "open", "close")?;
1675        let def = QstickParams::default();
1676        let row = output.values_for(&def).expect("default row missing");
1677        assert_eq!(row.len(), c.close.len());
1678        let expected = [219.4, 61.6, -51.8, -53.4, -123.2];
1679        let start = row.len() - 5;
1680        for (i, &v) in row[start..].iter().enumerate() {
1681            assert!(
1682                (v - expected[i]).abs() < 1e-1,
1683                "[{test}] default-row mismatch at idx {i}: {v} vs {expected:?}"
1684            );
1685        }
1686        Ok(())
1687    }
1688
1689    #[cfg(debug_assertions)]
1690    fn check_batch_no_poison(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1691        skip_if_unsupported!(kernel, test);
1692
1693        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1694        let c = read_candles_from_csv(file)?;
1695
1696        let test_configs = vec![
1697            (2, 10, 2),
1698            (5, 25, 5),
1699            (30, 60, 15),
1700            (2, 5, 1),
1701            (10, 50, 10),
1702            (15, 30, 5),
1703            (2, 100, 20),
1704        ];
1705
1706        for (cfg_idx, &(p_start, p_end, p_step)) in test_configs.iter().enumerate() {
1707            let output = QstickBatchBuilder::new()
1708                .kernel(kernel)
1709                .period_range(p_start, p_end, p_step)
1710                .apply_candles(&c, "open", "close")?;
1711
1712            for (idx, &val) in output.values.iter().enumerate() {
1713                if val.is_nan() {
1714                    continue;
1715                }
1716
1717                let bits = val.to_bits();
1718                let row = idx / output.cols;
1719                let col = idx % output.cols;
1720                let combo = &output.combos[row];
1721
1722                if bits == 0x11111111_11111111 {
1723                    panic!(
1724                        "[{}] Config {}: Found alloc_with_nan_prefix poison value {} (0x{:016X}) \
1725						 at row {} col {} (flat index {}) with params: period={}",
1726                        test,
1727                        cfg_idx,
1728                        val,
1729                        bits,
1730                        row,
1731                        col,
1732                        idx,
1733                        combo.period.unwrap_or(5)
1734                    );
1735                }
1736
1737                if bits == 0x22222222_22222222 {
1738                    panic!(
1739                        "[{}] Config {}: Found init_matrix_prefixes poison value {} (0x{:016X}) \
1740						 at row {} col {} (flat index {}) with params: period={}",
1741                        test,
1742                        cfg_idx,
1743                        val,
1744                        bits,
1745                        row,
1746                        col,
1747                        idx,
1748                        combo.period.unwrap_or(5)
1749                    );
1750                }
1751
1752                if bits == 0x33333333_33333333 {
1753                    panic!(
1754                        "[{}] Config {}: Found make_uninit_matrix poison value {} (0x{:016X}) \
1755						 at row {} col {} (flat index {}) with params: period={}",
1756                        test,
1757                        cfg_idx,
1758                        val,
1759                        bits,
1760                        row,
1761                        col,
1762                        idx,
1763                        combo.period.unwrap_or(5)
1764                    );
1765                }
1766            }
1767        }
1768
1769        Ok(())
1770    }
1771
1772    #[cfg(not(debug_assertions))]
1773    fn check_batch_no_poison(_test: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
1774        Ok(())
1775    }
1776
1777    macro_rules! gen_batch_tests {
1778        ($fn_name:ident) => {
1779            paste::paste! {
1780                #[test] fn [<$fn_name _scalar>]()      {
1781                    let _ = $fn_name(stringify!([<$fn_name _scalar>]), Kernel::ScalarBatch);
1782                }
1783                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1784                #[test] fn [<$fn_name _avx2>]()        {
1785                    let _ = $fn_name(stringify!([<$fn_name _avx2>]), Kernel::Avx2Batch);
1786                }
1787                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1788                #[test] fn [<$fn_name _avx512>]()      {
1789                    let _ = $fn_name(stringify!([<$fn_name _avx512>]), Kernel::Avx512Batch);
1790                }
1791                #[test] fn [<$fn_name _auto_detect>]() {
1792                    let _ = $fn_name(stringify!([<$fn_name _auto_detect>]), Kernel::Auto);
1793                }
1794            }
1795        };
1796    }
1797    gen_batch_tests!(check_batch_default_row);
1798    gen_batch_tests!(check_batch_no_poison);
1799}
1800
1801#[cfg(all(feature = "python", feature = "cuda"))]
1802use crate::utilities::dlpack_cuda::export_f32_cuda_dlpack_2d;
1803#[cfg(all(feature = "python", feature = "cuda"))]
1804use cust::context::Context;
1805#[cfg(all(feature = "python", feature = "cuda"))]
1806use cust::memory::DeviceBuffer;
1807#[cfg(all(feature = "python", feature = "cuda"))]
1808use std::sync::Arc;
1809
1810#[cfg(all(feature = "python", feature = "cuda"))]
1811#[pyclass(name = "QstickDeviceArrayF32Py")]
1812pub struct QstickDeviceArrayF32Py {
1813    pub(crate) buf: Option<DeviceBuffer<f32>>,
1814    pub(crate) rows: usize,
1815    pub(crate) cols: usize,
1816    pub(crate) _ctx: Arc<Context>,
1817    pub(crate) device_id: u32,
1818}
1819
1820#[cfg(all(feature = "python", feature = "cuda"))]
1821#[pymethods]
1822impl QstickDeviceArrayF32Py {
1823    #[getter]
1824    fn __cuda_array_interface__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyDict>> {
1825        let d = PyDict::new(py);
1826        d.set_item("shape", (self.rows, self.cols))?;
1827        d.set_item("typestr", "<f4")?;
1828        d.set_item(
1829            "strides",
1830            (
1831                self.cols * std::mem::size_of::<f32>(),
1832                std::mem::size_of::<f32>(),
1833            ),
1834        )?;
1835        let ptr = self
1836            .buf
1837            .as_ref()
1838            .ok_or_else(|| PyValueError::new_err("buffer already exported via __dlpack__"))?
1839            .as_device_ptr()
1840            .as_raw() as usize;
1841        d.set_item("data", (ptr, false))?;
1842
1843        d.set_item("version", 3)?;
1844        Ok(d)
1845    }
1846
1847    fn __dlpack_device__(&self) -> (i32, i32) {
1848        (2, self.device_id as i32)
1849    }
1850
1851    #[pyo3(signature = (stream=None, max_version=None, dl_device=None, copy=None))]
1852    fn __dlpack__<'py>(
1853        &mut self,
1854        py: Python<'py>,
1855        stream: Option<pyo3::PyObject>,
1856        max_version: Option<pyo3::PyObject>,
1857        dl_device: Option<pyo3::PyObject>,
1858        copy: Option<pyo3::PyObject>,
1859    ) -> PyResult<PyObject> {
1860        let (kdl, alloc_dev) = self.__dlpack_device__();
1861        if let Some(dev_obj) = dl_device.as_ref() {
1862            if let Ok((dev_ty, dev_id)) = dev_obj.extract::<(i32, i32)>(py) {
1863                if dev_ty != kdl || dev_id != alloc_dev {
1864                    let wants_copy = copy
1865                        .as_ref()
1866                        .and_then(|c| c.extract::<bool>(py).ok())
1867                        .unwrap_or(false);
1868                    if wants_copy {
1869                        return Err(PyValueError::new_err(
1870                            "device copy not implemented for __dlpack__",
1871                        ));
1872                    } else {
1873                        return Err(PyValueError::new_err("dl_device mismatch for __dlpack__"));
1874                    }
1875                }
1876            }
1877        }
1878        let _ = stream;
1879
1880        let buf = self
1881            .buf
1882            .take()
1883            .ok_or_else(|| PyValueError::new_err("__dlpack__ may only be called once"))?;
1884
1885        let rows = self.rows;
1886        let cols = self.cols;
1887
1888        let max_version_bound = max_version.map(|obj| obj.into_bound(py));
1889
1890        export_f32_cuda_dlpack_2d(py, buf, rows, cols, alloc_dev, max_version_bound)
1891    }
1892}
1893#[cfg(feature = "python")]
1894use crate::utilities::kernel_validation::validate_kernel;
1895#[cfg(feature = "python")]
1896use numpy::{IntoPyArray, PyArray1, PyArrayMethods, PyReadonlyArray1};
1897#[cfg(feature = "python")]
1898use pyo3::exceptions::PyValueError;
1899#[cfg(feature = "python")]
1900use pyo3::prelude::*;
1901#[cfg(feature = "python")]
1902use pyo3::types::PyDict;
1903
1904#[cfg(feature = "python")]
1905#[pyfunction(name = "qstick")]
1906#[pyo3(signature = (open, close, period, kernel=None))]
1907pub fn qstick_py<'py>(
1908    py: Python<'py>,
1909    open: PyReadonlyArray1<'py, f64>,
1910    close: PyReadonlyArray1<'py, f64>,
1911    period: usize,
1912    kernel: Option<&str>,
1913) -> PyResult<Bound<'py, PyArray1<f64>>> {
1914    let open_slice = open.as_slice()?;
1915    let close_slice = close.as_slice()?;
1916    let kern = validate_kernel(kernel, false)?;
1917
1918    let params = QstickParams {
1919        period: Some(period),
1920    };
1921    let input = QstickInput::from_slices(open_slice, close_slice, params);
1922
1923    let result_vec: Vec<f64> = py
1924        .allow_threads(|| qstick_with_kernel(&input, kern).map(|o| o.values))
1925        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1926
1927    Ok(result_vec.into_pyarray(py))
1928}
1929
1930#[cfg(feature = "python")]
1931#[pyclass(name = "QstickStream")]
1932pub struct QstickStreamPy {
1933    stream: QstickStream,
1934}
1935
1936#[cfg(feature = "python")]
1937#[pymethods]
1938impl QstickStreamPy {
1939    #[new]
1940    pub fn new(period: usize) -> PyResult<Self> {
1941        let params = QstickParams {
1942            period: Some(period),
1943        };
1944        let stream =
1945            QstickStream::try_new(params).map_err(|e| PyValueError::new_err(e.to_string()))?;
1946        Ok(QstickStreamPy { stream })
1947    }
1948
1949    pub fn update(&mut self, open: f64, close: f64) -> Option<f64> {
1950        self.stream.update(open, close)
1951    }
1952}
1953
1954#[cfg(feature = "python")]
1955#[pyfunction(name = "qstick_batch")]
1956#[pyo3(signature = (open, close, period_range, kernel=None))]
1957pub fn qstick_batch_py<'py>(
1958    py: Python<'py>,
1959    open: PyReadonlyArray1<'py, f64>,
1960    close: PyReadonlyArray1<'py, f64>,
1961    period_range: (usize, usize, usize),
1962    kernel: Option<&str>,
1963) -> PyResult<Bound<'py, PyDict>> {
1964    let open_slice = open.as_slice()?;
1965    let close_slice = close.as_slice()?;
1966    let kern = validate_kernel(kernel, true)?;
1967
1968    let sweep = QstickBatchRange {
1969        period: period_range,
1970    };
1971
1972    let combos = expand_grid(&sweep).map_err(|e| PyValueError::new_err(e.to_string()))?;
1973    let rows = combos.len();
1974    let cols = open_slice.len();
1975
1976    let out_arr = unsafe { PyArray1::<f64>::new(py, [rows * cols], false) };
1977    let slice_out = unsafe { out_arr.as_slice_mut()? };
1978
1979    let combos = py
1980        .allow_threads(|| {
1981            let kernel = match kern {
1982                Kernel::Auto => detect_best_batch_kernel(),
1983                k => k,
1984            };
1985            let simd = match kernel {
1986                Kernel::Avx512Batch => Kernel::Avx512,
1987                Kernel::Avx2Batch => Kernel::Avx2,
1988                Kernel::ScalarBatch => Kernel::Scalar,
1989                _ => kernel,
1990            };
1991
1992            qstick_batch_inner_into(open_slice, close_slice, &sweep, simd, true, slice_out)
1993        })
1994        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1995
1996    let dict = PyDict::new(py);
1997    dict.set_item("values", out_arr.reshape((rows, cols))?)?;
1998    dict.set_item(
1999        "periods",
2000        combos
2001            .iter()
2002            .map(|p| p.period.unwrap() as u64)
2003            .collect::<Vec<_>>()
2004            .into_pyarray(py),
2005    )?;
2006
2007    Ok(dict)
2008}
2009
2010#[cfg(feature = "python")]
2011pub fn register_qstick_module(m: &Bound<'_, pyo3::types::PyModule>) -> PyResult<()> {
2012    m.add_function(wrap_pyfunction!(qstick_py, m)?)?;
2013    m.add_function(wrap_pyfunction!(qstick_batch_py, m)?)?;
2014    m.add_class::<QstickStreamPy>()?;
2015    #[cfg(all(feature = "python", feature = "cuda"))]
2016    {
2017        m.add_class::<QstickDeviceArrayF32Py>()?;
2018    }
2019    #[cfg(feature = "cuda")]
2020    {
2021        m.add_function(wrap_pyfunction!(qstick_cuda_batch_dev_py, m)?)?;
2022        m.add_function(wrap_pyfunction!(
2023            qstick_cuda_many_series_one_param_dev_py,
2024            m
2025        )?)?;
2026    }
2027    Ok(())
2028}
2029
2030#[cfg(all(feature = "python", feature = "cuda"))]
2031#[pyfunction(name = "qstick_cuda_batch_dev")]
2032#[pyo3(signature = (open_f32, close_f32, period_range, device_id=0))]
2033pub fn qstick_cuda_batch_dev_py(
2034    py: Python<'_>,
2035    open_f32: numpy::PyReadonlyArray1<'_, f32>,
2036    close_f32: numpy::PyReadonlyArray1<'_, f32>,
2037    period_range: (usize, usize, usize),
2038    device_id: usize,
2039) -> PyResult<QstickDeviceArrayF32Py> {
2040    use crate::cuda::cuda_available;
2041    use crate::cuda::moving_averages::alma_wrapper::DeviceArrayF32;
2042    use crate::cuda::CudaQstick;
2043    use cust::context::Context;
2044    use cust::memory::DeviceBuffer;
2045    use std::sync::Arc;
2046
2047    if !cuda_available() {
2048        return Err(PyValueError::new_err("CUDA not available"));
2049    }
2050    let open_slice = open_f32.as_slice()?;
2051    let close_slice = close_f32.as_slice()?;
2052    let sweep = QstickBatchRange {
2053        period: period_range,
2054    };
2055    let (buf, rows, cols, ctx, dev_id) = py.allow_threads(|| {
2056        let cuda = CudaQstick::new(device_id).map_err(|e| PyValueError::new_err(e.to_string()))?;
2057        let out: DeviceArrayF32 = cuda
2058            .qstick_batch_dev(open_slice, close_slice, &sweep)
2059            .map_err(|e| PyValueError::new_err(e.to_string()))?;
2060        let ctx_arc: Arc<Context> = cuda.context_arc();
2061        Ok::<_, pyo3::PyErr>((out.buf, out.rows, out.cols, ctx_arc, cuda.device_id()))
2062    })?;
2063    Ok(QstickDeviceArrayF32Py {
2064        buf: Some(buf),
2065        rows,
2066        cols,
2067        _ctx: ctx,
2068        device_id: dev_id,
2069    })
2070}
2071
2072#[cfg(all(feature = "python", feature = "cuda"))]
2073#[pyfunction(name = "qstick_cuda_many_series_one_param_dev")]
2074#[pyo3(signature = (open_tm_f32, close_tm_f32, period, device_id=0))]
2075pub fn qstick_cuda_many_series_one_param_dev_py(
2076    py: Python<'_>,
2077    open_tm_f32: numpy::PyReadonlyArray2<'_, f32>,
2078    close_tm_f32: numpy::PyReadonlyArray2<'_, f32>,
2079    period: usize,
2080    device_id: usize,
2081) -> PyResult<QstickDeviceArrayF32Py> {
2082    use crate::cuda::cuda_available;
2083    use crate::cuda::moving_averages::alma_wrapper::DeviceArrayF32;
2084    use crate::cuda::CudaQstick;
2085    use cust::context::Context;
2086    use cust::memory::DeviceBuffer;
2087    use numpy::PyUntypedArrayMethods;
2088    use std::sync::Arc;
2089
2090    if !cuda_available() {
2091        return Err(PyValueError::new_err("CUDA not available"));
2092    }
2093    if open_tm_f32.shape() != close_tm_f32.shape() {
2094        return Err(PyValueError::new_err("open/close shapes differ"));
2095    }
2096    let flat_open: &[f32] = open_tm_f32.as_slice()?;
2097    let flat_close: &[f32] = close_tm_f32.as_slice()?;
2098    let rows = open_tm_f32.shape()[0];
2099    let cols = open_tm_f32.shape()[1];
2100
2101    let (buf, r_out, c_out, ctx, dev_id) = py.allow_threads(|| {
2102        let cuda = CudaQstick::new(device_id).map_err(|e| PyValueError::new_err(e.to_string()))?;
2103        let out: DeviceArrayF32 = cuda
2104            .qstick_many_series_one_param_time_major_dev(flat_open, flat_close, cols, rows, period)
2105            .map_err(|e| PyValueError::new_err(e.to_string()))?;
2106        let ctx_arc: Arc<Context> = cuda.context_arc();
2107        Ok::<_, pyo3::PyErr>((out.buf, out.rows, out.cols, ctx_arc, cuda.device_id()))
2108    })?;
2109    Ok(QstickDeviceArrayF32Py {
2110        buf: Some(buf),
2111        rows: r_out,
2112        cols: c_out,
2113        _ctx: ctx,
2114        device_id: dev_id,
2115    })
2116}
2117
2118pub fn qstick_into_slice(
2119    dst: &mut [f64],
2120    open: &[f64],
2121    close: &[f64],
2122    period: usize,
2123    kern: Kernel,
2124) -> Result<(), QstickError> {
2125    let len = open.len().min(close.len());
2126    if len == 0 {
2127        return Err(QstickError::InvalidPeriod {
2128            period,
2129            data_len: len,
2130        });
2131    }
2132    if dst.len() != len {
2133        return Err(QstickError::OutputLengthMismatch {
2134            expected: len,
2135            got: dst.len(),
2136        });
2137    }
2138    if period == 0 || period > len {
2139        return Err(QstickError::InvalidPeriod {
2140            period,
2141            data_len: len,
2142        });
2143    }
2144
2145    let mut first_valid = 0;
2146    for i in 0..len {
2147        if !open[i].is_nan() && !close[i].is_nan() {
2148            first_valid = i;
2149            break;
2150        }
2151        if i == len - 1 {
2152            return Err(QstickError::AllValuesNaN);
2153        }
2154    }
2155
2156    if len - first_valid < period {
2157        return Err(QstickError::NotEnoughValidData {
2158            needed: period,
2159            valid: len - first_valid,
2160        });
2161    }
2162
2163    let kernel = match kern {
2164        Kernel::Auto => Kernel::Scalar,
2165        k => k,
2166    };
2167
2168    match kernel {
2169        Kernel::Scalar | Kernel::ScalarBatch => {
2170            qstick_scalar(open, close, period, first_valid, dst)
2171        }
2172        #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
2173        Kernel::Avx2 | Kernel::Avx2Batch => qstick_avx2(open, close, period, first_valid, dst),
2174        #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
2175        Kernel::Avx512 | Kernel::Avx512Batch => {
2176            qstick_avx512(open, close, period, first_valid, dst)
2177        }
2178        _ => unreachable!(),
2179    }
2180
2181    let warmup_end = first_valid + period - 1;
2182    for v in &mut dst[..warmup_end] {
2183        *v = f64::NAN;
2184    }
2185
2186    Ok(())
2187}
2188
2189#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2190#[wasm_bindgen]
2191pub fn qstick_js(open: &[f64], close: &[f64], period: usize) -> Result<Vec<f64>, JsValue> {
2192    let len = open.len();
2193    if len != close.len() {
2194        return Err(JsValue::from_str(
2195            "Open and close arrays must have the same length",
2196        ));
2197    }
2198
2199    let mut output = vec![0.0; len];
2200
2201    qstick_into_slice(&mut output, open, close, period, Kernel::Auto)
2202        .map_err(|e| JsValue::from_str(&e.to_string()))?;
2203
2204    Ok(output)
2205}
2206
2207#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2208#[wasm_bindgen]
2209pub fn qstick_into(
2210    open_ptr: *const f64,
2211    close_ptr: *const f64,
2212    out_ptr: *mut f64,
2213    len: usize,
2214    period: usize,
2215) -> Result<(), JsValue> {
2216    if open_ptr.is_null() || close_ptr.is_null() || out_ptr.is_null() {
2217        return Err(JsValue::from_str("Null pointer provided"));
2218    }
2219
2220    unsafe {
2221        let open = std::slice::from_raw_parts(open_ptr, len);
2222        let close = std::slice::from_raw_parts(close_ptr, len);
2223
2224        if open_ptr == out_ptr || close_ptr == out_ptr {
2225            let mut temp = vec![0.0; len];
2226            qstick_into_slice(&mut temp, open, close, period, Kernel::Auto)
2227                .map_err(|e| JsValue::from_str(&e.to_string()))?;
2228            let out = std::slice::from_raw_parts_mut(out_ptr, len);
2229            out.copy_from_slice(&temp);
2230        } else {
2231            let out = std::slice::from_raw_parts_mut(out_ptr, len);
2232            qstick_into_slice(out, open, close, period, Kernel::Auto)
2233                .map_err(|e| JsValue::from_str(&e.to_string()))?;
2234        }
2235        Ok(())
2236    }
2237}
2238
2239#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2240#[wasm_bindgen]
2241pub fn qstick_alloc(len: usize) -> *mut f64 {
2242    let mut vec = Vec::<f64>::with_capacity(len);
2243    let ptr = vec.as_mut_ptr();
2244    std::mem::forget(vec);
2245    ptr
2246}
2247
2248#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2249#[wasm_bindgen]
2250pub fn qstick_free(ptr: *mut f64, len: usize) {
2251    if !ptr.is_null() {
2252        unsafe {
2253            let _ = Vec::from_raw_parts(ptr, len, len);
2254        }
2255    }
2256}
2257
2258#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2259#[derive(Serialize, Deserialize)]
2260pub struct QstickBatchConfig {
2261    pub period_range: (usize, usize, usize),
2262}
2263
2264#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2265#[derive(Serialize, Deserialize)]
2266pub struct QstickBatchJsOutput {
2267    pub values: Vec<f64>,
2268    pub combos: Vec<QstickParams>,
2269    pub rows: usize,
2270    pub cols: usize,
2271}
2272
2273#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2274#[wasm_bindgen(js_name = qstick_batch)]
2275pub fn qstick_batch_unified_js(
2276    open: &[f64],
2277    close: &[f64],
2278    config: JsValue,
2279) -> Result<JsValue, JsValue> {
2280    let config: QstickBatchConfig = serde_wasm_bindgen::from_value(config)
2281        .map_err(|e| JsValue::from_str(&format!("Invalid config: {}", e)))?;
2282
2283    let len = open.len();
2284    if len != close.len() {
2285        return Err(JsValue::from_str(
2286            "Open and close arrays must have the same length",
2287        ));
2288    }
2289
2290    let sweep = QstickBatchRange {
2291        period: config.period_range,
2292    };
2293
2294    let output = qstick_batch_inner(open, close, &sweep, Kernel::Auto, false)
2295        .map_err(|e| JsValue::from_str(&e.to_string()))?;
2296
2297    let js_output = QstickBatchJsOutput {
2298        values: output.values,
2299        combos: output.combos,
2300        rows: output.rows,
2301        cols: output.cols,
2302    };
2303
2304    serde_wasm_bindgen::to_value(&js_output)
2305        .map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
2306}
2307
2308#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
2309#[wasm_bindgen]
2310pub fn qstick_batch_into(
2311    open_ptr: *const f64,
2312    close_ptr: *const f64,
2313    out_ptr: *mut f64,
2314    len: usize,
2315    period_start: usize,
2316    period_end: usize,
2317    period_step: usize,
2318) -> Result<usize, JsValue> {
2319    if open_ptr.is_null() || close_ptr.is_null() || out_ptr.is_null() {
2320        return Err(JsValue::from_str("Null pointer provided"));
2321    }
2322
2323    unsafe {
2324        let open = std::slice::from_raw_parts(open_ptr, len);
2325        let close = std::slice::from_raw_parts(close_ptr, len);
2326
2327        let sweep = QstickBatchRange {
2328            period: (period_start, period_end, period_step),
2329        };
2330
2331        let combos = expand_grid(&sweep).map_err(|e| JsValue::from_str(&e.to_string()))?;
2332        let rows = combos.len();
2333        let total_size = rows
2334            .checked_mul(len)
2335            .ok_or_else(|| JsValue::from_str("size overflow"))?;
2336
2337        let out = std::slice::from_raw_parts_mut(out_ptr, total_size);
2338
2339        qstick_batch_inner_into(open, close, &sweep, Kernel::Auto, false, out)
2340            .map_err(|e| JsValue::from_str(&e.to_string()))?;
2341
2342        Ok(rows)
2343    }
2344}