Skip to main content

vector_ta/indicators/
msw.rs

1#[cfg(feature = "python")]
2use numpy::{IntoPyArray, PyArray1};
3#[cfg(feature = "python")]
4use pyo3::exceptions::PyValueError;
5#[cfg(feature = "python")]
6use pyo3::prelude::*;
7#[cfg(feature = "python")]
8use pyo3::types::PyDict;
9
10#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
11use serde::{Deserialize, Serialize};
12#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
13use wasm_bindgen::prelude::*;
14
15#[cfg(all(feature = "python", feature = "cuda"))]
16use crate::cuda::{cuda_available, CudaMsw};
17#[cfg(all(feature = "python", feature = "cuda"))]
18use crate::indicators::moving_averages::alma::{make_device_array_py, DeviceArrayF32Py};
19use crate::utilities::data_loader::{source_type, Candles};
20use crate::utilities::enums::Kernel;
21use crate::utilities::helpers::{
22    alloc_with_nan_prefix, detect_best_batch_kernel, detect_best_kernel, init_matrix_prefixes,
23    make_uninit_matrix,
24};
25#[cfg(feature = "python")]
26use crate::utilities::kernel_validation::validate_kernel;
27use aligned_vec::{AVec, CACHELINE_ALIGN};
28#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
29use core::arch::x86_64::*;
30#[cfg(not(target_arch = "wasm32"))]
31use rayon::prelude::*;
32use std::convert::AsRef;
33use std::error::Error;
34use thiserror::Error;
35
36#[allow(clippy::approx_constant)]
37const TULIP_PI: f64 = 3.1415926;
38const TULIP_TPI: f64 = 2.0 * TULIP_PI;
39
40impl<'a> AsRef<[f64]> for MswInput<'a> {
41    #[inline(always)]
42    fn as_ref(&self) -> &[f64] {
43        match &self.data {
44            MswData::Slice(slice) => slice,
45            MswData::Candles { candles, source } => source_type(candles, source),
46        }
47    }
48}
49
50#[derive(Debug, Clone)]
51pub enum MswData<'a> {
52    Candles {
53        candles: &'a Candles,
54        source: &'a str,
55    },
56    Slice(&'a [f64]),
57}
58
59#[derive(Debug, Clone)]
60#[cfg_attr(
61    all(target_arch = "wasm32", feature = "wasm"),
62    derive(Serialize, Deserialize)
63)]
64pub struct MswOutput {
65    pub sine: Vec<f64>,
66    pub lead: Vec<f64>,
67}
68
69#[derive(Debug, Clone)]
70#[cfg_attr(
71    all(target_arch = "wasm32", feature = "wasm"),
72    derive(Serialize, Deserialize)
73)]
74pub struct MswParams {
75    pub period: Option<usize>,
76}
77
78impl Default for MswParams {
79    fn default() -> Self {
80        Self { period: Some(5) }
81    }
82}
83
84#[derive(Debug, Clone)]
85pub struct MswInput<'a> {
86    pub data: MswData<'a>,
87    pub params: MswParams,
88}
89
90impl<'a> MswInput<'a> {
91    #[inline]
92    pub fn from_candles(c: &'a Candles, s: &'a str, p: MswParams) -> Self {
93        Self {
94            data: MswData::Candles {
95                candles: c,
96                source: s,
97            },
98            params: p,
99        }
100    }
101    #[inline]
102    pub fn from_slice(sl: &'a [f64], p: MswParams) -> Self {
103        Self {
104            data: MswData::Slice(sl),
105            params: p,
106        }
107    }
108    #[inline]
109    pub fn with_default_candles(c: &'a Candles) -> Self {
110        Self::from_candles(c, "close", MswParams::default())
111    }
112    #[inline]
113    pub fn get_period(&self) -> usize {
114        self.params.period.unwrap_or(5)
115    }
116}
117
118#[derive(Copy, Clone, Debug)]
119pub struct MswBuilder {
120    period: Option<usize>,
121    kernel: Kernel,
122}
123
124impl Default for MswBuilder {
125    fn default() -> Self {
126        Self {
127            period: None,
128            kernel: Kernel::Auto,
129        }
130    }
131}
132
133impl MswBuilder {
134    #[inline(always)]
135    pub fn new() -> Self {
136        Self::default()
137    }
138    #[inline(always)]
139    pub fn period(mut self, n: usize) -> Self {
140        self.period = Some(n);
141        self
142    }
143    #[inline(always)]
144    pub fn kernel(mut self, k: Kernel) -> Self {
145        self.kernel = k;
146        self
147    }
148    #[inline(always)]
149    pub fn apply(self, c: &Candles) -> Result<MswOutput, MswError> {
150        let p = MswParams {
151            period: self.period,
152        };
153        let i = MswInput::from_candles(c, "close", p);
154        msw_with_kernel(&i, self.kernel)
155    }
156    #[inline(always)]
157    pub fn apply_slice(self, d: &[f64]) -> Result<MswOutput, MswError> {
158        let p = MswParams {
159            period: self.period,
160        };
161        let i = MswInput::from_slice(d, p);
162        msw_with_kernel(&i, self.kernel)
163    }
164    #[inline(always)]
165    pub fn into_stream(self) -> Result<MswStream, MswError> {
166        let p = MswParams {
167            period: self.period,
168        };
169        MswStream::try_new(p)
170    }
171}
172
173#[derive(Debug, Error)]
174pub enum MswError {
175    #[error("msw: Empty data provided for MSW.")]
176    EmptyInputData,
177    #[error("msw: All values are NaN.")]
178    AllValuesNaN,
179    #[error("msw: Invalid period: period = {period}, data length = {data_len}")]
180    InvalidPeriod { period: usize, data_len: usize },
181    #[error("msw: Not enough valid data: needed = {needed}, valid = {valid}")]
182    NotEnoughValidData { needed: usize, valid: usize },
183    #[error("msw: Output length mismatch: expected = {expected}, got = {got}")]
184    OutputLengthMismatch { expected: usize, got: usize },
185    #[error("msw: Invalid range expansion: start={start}, end={end}, step={step}")]
186    InvalidRange {
187        start: usize,
188        end: usize,
189        step: usize,
190    },
191    #[error("msw: Invalid kernel for batch: {0:?}")]
192    InvalidKernelForBatch(crate::utilities::enums::Kernel),
193
194    #[error("msw: Empty data provided for MSW.")]
195    EmptyData,
196}
197
198#[inline]
199pub fn msw(input: &MswInput) -> Result<MswOutput, MswError> {
200    msw_with_kernel(input, Kernel::Auto)
201}
202
203pub fn msw_with_kernel(input: &MswInput, kernel: Kernel) -> Result<MswOutput, MswError> {
204    let data: &[f64] = match &input.data {
205        MswData::Candles { candles, source } => source_type(candles, source),
206        MswData::Slice(sl) => sl,
207    };
208    if data.is_empty() {
209        return Err(MswError::EmptyInputData);
210    }
211    let period = input.get_period();
212    let first = data
213        .iter()
214        .position(|x| !x.is_nan())
215        .ok_or(MswError::AllValuesNaN)?;
216    let len = data.len();
217    if period == 0 || period > len {
218        return Err(MswError::InvalidPeriod {
219            period,
220            data_len: len,
221        });
222    }
223    if (len - first) < period {
224        return Err(MswError::NotEnoughValidData {
225            needed: period,
226            valid: len - first,
227        });
228    }
229    let mut chosen = match kernel {
230        Kernel::Auto => Kernel::Scalar,
231        other => other,
232    };
233
234    #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
235    if matches!(kernel, Kernel::Auto) && matches!(chosen, Kernel::Avx512 | Kernel::Avx512Batch) {
236        chosen = Kernel::Avx2;
237    }
238    unsafe {
239        match chosen {
240            Kernel::Scalar | Kernel::ScalarBatch => msw_scalar(data, period, first, len),
241            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
242            Kernel::Avx2 | Kernel::Avx2Batch => msw_avx2(data, period, first, len),
243            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
244            Kernel::Avx512 | Kernel::Avx512Batch => msw_avx512(data, period, first, len),
245            _ => unreachable!(),
246        }
247    }
248}
249
250#[inline]
251pub unsafe fn msw_scalar(
252    data: &[f64],
253    period: usize,
254    first: usize,
255    len: usize,
256) -> Result<MswOutput, MswError> {
257    let warm = first + period - 1;
258    let mut sine = alloc_with_nan_prefix(len, warm);
259    let mut lead = alloc_with_nan_prefix(len, warm);
260
261    let step = TULIP_TPI / period as f64;
262    let mut cos_table = Vec::with_capacity(period);
263    let mut sin_table = Vec::with_capacity(period);
264    let mut ang = 0.0f64;
265    for _ in 0..period {
266        let (s, c) = ang.sin_cos();
267        sin_table.push(s);
268        cos_table.push(c);
269        ang += step;
270    }
271
272    for i in warm..len {
273        let mut rp = 0.0f64;
274        let mut ip = 0.0f64;
275        for j in 0..period {
276            let w = *data.get_unchecked(i - j);
277            rp += cos_table[j] * w;
278            ip += sin_table[j] * w;
279        }
280
281        let mut phase = if rp.abs() > 0.001 {
282            atan(ip / rp)
283        } else {
284            TULIP_PI * if ip < 0.0 { -1.0 } else { 1.0 }
285        };
286        if rp < 0.0 {
287            phase += TULIP_PI;
288        }
289        phase += TULIP_PI * 0.5;
290        if phase < 0.0 {
291            phase += TULIP_TPI;
292        }
293        if phase > TULIP_TPI {
294            phase -= TULIP_TPI;
295        }
296
297        let (s, c) = phase.sin_cos();
298        *sine.get_unchecked_mut(i) = s;
299        *lead.get_unchecked_mut(i) = (s + c) * 0.707106781186547524400844362104849039_f64;
300    }
301    Ok(MswOutput { sine, lead })
302}
303
304#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
305#[inline]
306#[target_feature(enable = "avx2,fma")]
307pub unsafe fn msw_avx2(
308    data: &[f64],
309    period: usize,
310    first: usize,
311    len: usize,
312) -> Result<MswOutput, MswError> {
313    use core::arch::x86_64::*;
314    let warm = first + period - 1;
315    let mut sine = alloc_with_nan_prefix(len, warm);
316    let mut lead = alloc_with_nan_prefix(len, warm);
317
318    let step = TULIP_TPI / period as f64;
319    let mut cos_table = Vec::with_capacity(period);
320    let mut sin_table = Vec::with_capacity(period);
321    let mut ang = 0.0f64;
322    for _ in 0..period {
323        let (s, c) = ang.sin_cos();
324        sin_table.push(s);
325        cos_table.push(c);
326        ang += step;
327    }
328    let dptr = data.as_ptr();
329
330    const LANES: usize = 4;
331
332    let mut i = warm;
333    while i + (LANES - 1) < len {
334        let k = i + (LANES - 1);
335        let mut rp = _mm256_set1_pd(0.0);
336        let mut ip = _mm256_set1_pd(0.0);
337
338        for j in 0..period {
339            let base = k - j;
340            let wv = _mm256_loadu_pd(dptr.add(base - (LANES - 1)));
341            let cw = _mm256_set1_pd(*cos_table.get_unchecked(j));
342            let sw = _mm256_set1_pd(*sin_table.get_unchecked(j));
343            rp = _mm256_fmadd_pd(cw, wv, rp);
344            ip = _mm256_fmadd_pd(sw, wv, ip);
345        }
346
347        let mut rbuf = [0.0f64; LANES];
348        let mut ibuf = [0.0f64; LANES];
349        _mm256_storeu_pd(rbuf.as_mut_ptr(), rp);
350        _mm256_storeu_pd(ibuf.as_mut_ptr(), ip);
351
352        let mut idx = i;
353        for lane in 0..LANES {
354            let mut phase = if rbuf[lane].abs() > 0.001 {
355                atan(ibuf[lane] / rbuf[lane])
356            } else {
357                TULIP_PI * if ibuf[lane] < 0.0 { -1.0 } else { 1.0 }
358            };
359            if rbuf[lane] < 0.0 {
360                phase += TULIP_PI;
361            }
362            phase += TULIP_PI * 0.5;
363            if phase < 0.0 {
364                phase += TULIP_TPI;
365            }
366            if phase > TULIP_TPI {
367                phase -= TULIP_TPI;
368            }
369
370            let (s, c) = phase.sin_cos();
371            *sine.get_unchecked_mut(idx) = s;
372            *lead.get_unchecked_mut(idx) = (s + c) * 0.707106781186547524400844362104849039_f64;
373            idx += 1;
374        }
375
376        i += LANES;
377    }
378
379    while i < len {
380        let mut rp = 0.0f64;
381        let mut ip = 0.0f64;
382        for j in 0..period {
383            let w = *data.get_unchecked(i - j);
384            rp += cos_table[j] * w;
385            ip += sin_table[j] * w;
386        }
387        let mut phase = if rp.abs() > 0.001 {
388            atan(ip / rp)
389        } else {
390            TULIP_PI * if ip < 0.0 { -1.0 } else { 1.0 }
391        };
392        if rp < 0.0 {
393            phase += TULIP_PI;
394        }
395        phase += TULIP_PI * 0.5;
396        if phase < 0.0 {
397            phase += TULIP_TPI;
398        }
399        if phase > TULIP_TPI {
400            phase -= TULIP_TPI;
401        }
402        let (s, c) = phase.sin_cos();
403        *sine.get_unchecked_mut(i) = s;
404        *lead.get_unchecked_mut(i) = (s + c) * 0.707106781186547524400844362104849039_f64;
405        i += 1;
406    }
407
408    Ok(MswOutput { sine, lead })
409}
410
411#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
412#[inline]
413#[target_feature(enable = "avx512f,fma")]
414pub unsafe fn msw_avx512(
415    data: &[f64],
416    period: usize,
417    first: usize,
418    len: usize,
419) -> Result<MswOutput, MswError> {
420    use core::arch::x86_64::*;
421    let warm = first + period - 1;
422    let mut sine = alloc_with_nan_prefix(len, warm);
423    let mut lead = alloc_with_nan_prefix(len, warm);
424
425    let step = TULIP_TPI / period as f64;
426    let mut cos_table = Vec::with_capacity(period);
427    let mut sin_table = Vec::with_capacity(period);
428    let mut ang = 0.0f64;
429    for _ in 0..period {
430        let (s, c) = ang.sin_cos();
431        sin_table.push(s);
432        cos_table.push(c);
433        ang += step;
434    }
435    let dptr = data.as_ptr();
436
437    const LANES: usize = 8;
438
439    let mut i = warm;
440    while i + (LANES - 1) < len {
441        let k = i + (LANES - 1);
442        let mut rp = _mm512_set1_pd(0.0);
443        let mut ip = _mm512_set1_pd(0.0);
444
445        for j in 0..period {
446            let base = k - j;
447            let wv = _mm512_loadu_pd(dptr.add(base - (LANES - 1)));
448            let cw = _mm512_set1_pd(*cos_table.get_unchecked(j));
449            let sw = _mm512_set1_pd(*sin_table.get_unchecked(j));
450            rp = _mm512_fmadd_pd(cw, wv, rp);
451            ip = _mm512_fmadd_pd(sw, wv, ip);
452        }
453
454        let mut rbuf = [0.0f64; LANES];
455        let mut ibuf = [0.0f64; LANES];
456        _mm512_storeu_pd(rbuf.as_mut_ptr(), rp);
457        _mm512_storeu_pd(ibuf.as_mut_ptr(), ip);
458
459        let mut idx = i;
460        for lane in 0..LANES {
461            let mut phase = if rbuf[lane].abs() > 0.001 {
462                atan(ibuf[lane] / rbuf[lane])
463            } else {
464                TULIP_PI * if ibuf[lane] < 0.0 { -1.0 } else { 1.0 }
465            };
466            if rbuf[lane] < 0.0 {
467                phase += TULIP_PI;
468            }
469            phase += TULIP_PI * 0.5;
470            if phase < 0.0 {
471                phase += TULIP_TPI;
472            }
473            if phase > TULIP_TPI {
474                phase -= TULIP_TPI;
475            }
476
477            let (s, c) = phase.sin_cos();
478            *sine.get_unchecked_mut(idx) = s;
479            *lead.get_unchecked_mut(idx) = (s + c) * 0.707106781186547524400844362104849039_f64;
480            idx += 1;
481        }
482
483        i += LANES;
484    }
485
486    while i < len {
487        let mut rp = 0.0f64;
488        let mut ip = 0.0f64;
489        for j in 0..period {
490            let w = *data.get_unchecked(i - j);
491            rp += cos_table[j] * w;
492            ip += sin_table[j] * w;
493        }
494        let mut phase = if rp.abs() > 0.001 {
495            atan(ip / rp)
496        } else {
497            TULIP_PI * if ip < 0.0 { -1.0 } else { 1.0 }
498        };
499        if rp < 0.0 {
500            phase += TULIP_PI;
501        }
502        phase += TULIP_PI * 0.5;
503        if phase < 0.0 {
504            phase += TULIP_TPI;
505        }
506        if phase > TULIP_TPI {
507            phase -= TULIP_TPI;
508        }
509        let (s, c) = phase.sin_cos();
510        *sine.get_unchecked_mut(i) = s;
511        *lead.get_unchecked_mut(i) = (s + c) * 0.707106781186547524400844362104849039_f64;
512        i += 1;
513    }
514
515    Ok(MswOutput { sine, lead })
516}
517
518#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
519#[inline]
520pub unsafe fn msw_avx512_short(
521    data: &[f64],
522    period: usize,
523    first: usize,
524    len: usize,
525) -> Result<MswOutput, MswError> {
526    msw_scalar(data, period, first, len)
527}
528
529#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
530#[inline]
531pub unsafe fn msw_avx512_long(
532    data: &[f64],
533    period: usize,
534    first: usize,
535    len: usize,
536) -> Result<MswOutput, MswError> {
537    msw_scalar(data, period, first, len)
538}
539
540pub fn atan(x: f64) -> f64 {
541    x.atan()
542}
543
544#[derive(Debug, Clone)]
545pub struct MswStream {
546    period: usize,
547    buffer: Vec<f64>,
548    cos_table: Vec<f64>,
549    sin_table: Vec<f64>,
550    head: usize,
551    filled: bool,
552}
553
554impl MswStream {
555    pub fn try_new(params: MswParams) -> Result<Self, MswError> {
556        let period = params.period.unwrap_or(5);
557        if period == 0 {
558            return Err(MswError::InvalidPeriod {
559                period,
560                data_len: 0,
561            });
562        }
563        let mut cos_table = Vec::with_capacity(period);
564        let mut sin_table = Vec::with_capacity(period);
565        for j in 0..period {
566            let angle = TULIP_TPI * j as f64 / period as f64;
567            cos_table.push(angle.cos());
568            sin_table.push(angle.sin());
569        }
570        Ok(Self {
571            period,
572            buffer: vec![f64::NAN; period],
573            cos_table,
574            sin_table,
575            head: 0,
576            filled: false,
577        })
578    }
579    #[inline(always)]
580    pub fn update(&mut self, value: f64) -> Option<(f64, f64)> {
581        self.buffer[self.head] = value;
582        self.head = (self.head + 1) % self.period;
583        if !self.filled && self.head == 0 {
584            self.filled = true;
585        }
586        if !self.filled {
587            return None;
588        }
589        Some(self.dot_ring())
590    }
591    #[inline(always)]
592    fn dot_ring(&self) -> (f64, f64) {
593        let mut rp = 0.0;
594        let mut ip = 0.0;
595
596        let mut idx = (self.head + self.period - 1) % self.period;
597        for j in 0..self.period {
598            rp += self.cos_table[j] * self.buffer[idx];
599            ip += self.sin_table[j] * self.buffer[idx];
600            idx = if idx == 0 { self.period - 1 } else { idx - 1 };
601        }
602        let mut phase = if rp.abs() > 0.001 {
603            atan(ip / rp)
604        } else {
605            TULIP_PI * if ip < 0.0 { -1.0 } else { 1.0 }
606        };
607        if rp < 0.0 {
608            phase += TULIP_PI;
609        }
610        phase += TULIP_PI / 2.0;
611        if phase < 0.0 {
612            phase += TULIP_TPI;
613        }
614        if phase > TULIP_TPI {
615            phase -= TULIP_TPI;
616        }
617        let (s, c) = phase.sin_cos();
618
619        let lead = (s + c) * 0.707106781186547524400844362104849039_f64;
620        (s, lead)
621    }
622}
623
624#[derive(Clone, Debug)]
625pub struct MswBatchRange {
626    pub period: (usize, usize, usize),
627}
628
629impl Default for MswBatchRange {
630    fn default() -> Self {
631        Self {
632            period: (5, 254, 1),
633        }
634    }
635}
636
637#[derive(Clone, Debug, Default)]
638pub struct MswBatchBuilder {
639    range: MswBatchRange,
640    kernel: Kernel,
641}
642
643impl MswBatchBuilder {
644    pub fn new() -> Self {
645        Self::default()
646    }
647    pub fn kernel(mut self, k: Kernel) -> Self {
648        self.kernel = k;
649        self
650    }
651    #[inline]
652    pub fn period_range(mut self, start: usize, end: usize, step: usize) -> Self {
653        self.range.period = (start, end, step);
654        self
655    }
656    #[inline]
657    pub fn period_static(mut self, p: usize) -> Self {
658        self.range.period = (p, p, 0);
659        self
660    }
661    pub fn apply_slice(self, data: &[f64]) -> Result<MswBatchOutput, MswError> {
662        msw_batch_with_kernel(data, &self.range, self.kernel)
663    }
664    pub fn with_default_slice(data: &[f64], k: Kernel) -> Result<MswBatchOutput, MswError> {
665        MswBatchBuilder::new().kernel(k).apply_slice(data)
666    }
667    pub fn apply_candles(self, c: &Candles, src: &str) -> Result<MswBatchOutput, MswError> {
668        let slice = source_type(c, src);
669        self.apply_slice(slice)
670    }
671    pub fn with_default_candles(c: &Candles) -> Result<MswBatchOutput, MswError> {
672        MswBatchBuilder::new()
673            .kernel(Kernel::Auto)
674            .apply_candles(c, "close")
675    }
676}
677
678pub fn msw_batch_with_kernel(
679    data: &[f64],
680    sweep: &MswBatchRange,
681    k: Kernel,
682) -> Result<MswBatchOutput, MswError> {
683    let kernel = match k {
684        Kernel::Auto => detect_best_batch_kernel(),
685        other if other.is_batch() => other,
686        other => return Err(MswError::InvalidKernelForBatch(other)),
687    };
688    let simd = match kernel {
689        Kernel::Avx512Batch => Kernel::Avx512,
690        Kernel::Avx2Batch => Kernel::Avx2,
691        Kernel::ScalarBatch => Kernel::Scalar,
692        _ => unreachable!(),
693    };
694    msw_batch_par_slice(data, sweep, simd)
695}
696
697#[derive(Clone, Debug)]
698pub struct MswBatchOutput {
699    pub sine: Vec<f64>,
700    pub lead: Vec<f64>,
701    pub combos: Vec<MswParams>,
702    pub rows: usize,
703    pub cols: usize,
704}
705
706impl MswBatchOutput {
707    pub fn row_for_params(&self, p: &MswParams) -> Option<usize> {
708        self.combos
709            .iter()
710            .position(|c| c.period.unwrap_or(5) == p.period.unwrap_or(5))
711    }
712    pub fn sine_for(&self, p: &MswParams) -> Option<&[f64]> {
713        self.row_for_params(p).map(|row| {
714            let start = row * self.cols;
715            &self.sine[start..start + self.cols]
716        })
717    }
718    pub fn lead_for(&self, p: &MswParams) -> Option<&[f64]> {
719        self.row_for_params(p).map(|row| {
720            let start = row * self.cols;
721            &self.lead[start..start + self.cols]
722        })
723    }
724}
725
726#[inline(always)]
727fn expand_grid(r: &MswBatchRange) -> Result<Vec<MswParams>, MswError> {
728    #[inline]
729    fn axis_usize((start, end, step): (usize, usize, usize)) -> Result<Vec<usize>, MswError> {
730        if step == 0 || start == end {
731            return Ok(vec![start]);
732        }
733        if start < end {
734            let v: Vec<usize> = (start..=end).step_by(step).collect();
735            return if v.is_empty() {
736                Err(MswError::InvalidRange { start, end, step })
737            } else {
738                Ok(v)
739            };
740        }
741
742        let mut v = Vec::new();
743        let mut cur = start;
744        loop {
745            v.push(cur);
746            if cur <= end {
747                break;
748            }
749            match cur.checked_sub(step) {
750                Some(next) => {
751                    cur = next;
752                    if cur <= end {
753                        break;
754                    }
755                }
756                None => break,
757            }
758        }
759        if v.is_empty() {
760            Err(MswError::InvalidRange { start, end, step })
761        } else {
762            Ok(v)
763        }
764    }
765
766    let periods = axis_usize(r.period)?;
767    if periods.is_empty() {
768        return Err(MswError::InvalidRange {
769            start: r.period.0,
770            end: r.period.1,
771            step: r.period.2,
772        });
773    }
774    Ok(periods
775        .into_iter()
776        .map(|p| MswParams { period: Some(p) })
777        .collect())
778}
779
780#[inline(always)]
781pub fn msw_batch_slice(
782    data: &[f64],
783    sweep: &MswBatchRange,
784    kern: Kernel,
785) -> Result<MswBatchOutput, MswError> {
786    msw_batch_inner(data, sweep, kern, false)
787}
788
789#[inline(always)]
790pub fn msw_batch_par_slice(
791    data: &[f64],
792    sweep: &MswBatchRange,
793    kern: Kernel,
794) -> Result<MswBatchOutput, MswError> {
795    msw_batch_inner(data, sweep, kern, true)
796}
797
798#[inline(always)]
799fn msw_batch_inner(
800    data: &[f64],
801    sweep: &MswBatchRange,
802    kern: Kernel,
803    parallel: bool,
804) -> Result<MswBatchOutput, MswError> {
805    let combos = expand_grid(sweep)?;
806    let first = data
807        .iter()
808        .position(|x| !x.is_nan())
809        .ok_or(MswError::AllValuesNaN)?;
810    let max_p = combos.iter().map(|c| c.period.unwrap()).max().unwrap();
811    if data.len() - first < max_p {
812        return Err(MswError::NotEnoughValidData {
813            needed: max_p,
814            valid: data.len() - first,
815        });
816    }
817    let rows = combos.len();
818    let cols = data.len();
819
820    let mut sine_buf = make_uninit_matrix(rows, cols);
821    let mut lead_buf = make_uninit_matrix(rows, cols);
822
823    let warmup_periods: Vec<usize> = combos
824        .iter()
825        .map(|c| {
826            let period = c.period.unwrap();
827            first + period - 1
828        })
829        .collect();
830    init_matrix_prefixes(&mut sine_buf, cols, &warmup_periods);
831    init_matrix_prefixes(&mut lead_buf, cols, &warmup_periods);
832
833    let mut sine_guard = core::mem::ManuallyDrop::new(sine_buf);
834    let mut lead_guard = core::mem::ManuallyDrop::new(lead_buf);
835    let sine: &mut [f64] = unsafe {
836        core::slice::from_raw_parts_mut(sine_guard.as_mut_ptr() as *mut f64, sine_guard.len())
837    };
838    let lead: &mut [f64] = unsafe {
839        core::slice::from_raw_parts_mut(lead_guard.as_mut_ptr() as *mut f64, lead_guard.len())
840    };
841    let do_row = |row: usize, sine_row: &mut [f64], lead_row: &mut [f64]| unsafe {
842        let period = combos[row].period.unwrap();
843        match kern {
844            Kernel::Scalar => msw_row_scalar(data, first, period, sine_row, lead_row),
845            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
846            Kernel::Avx2 => msw_row_avx2(data, first, period, sine_row, lead_row),
847            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
848            Kernel::Avx512 => msw_row_avx512(data, first, period, sine_row, lead_row),
849            _ => unreachable!(),
850        }
851    };
852    if parallel {
853        #[cfg(not(target_arch = "wasm32"))]
854        {
855            sine.par_chunks_mut(cols)
856                .zip(lead.par_chunks_mut(cols))
857                .enumerate()
858                .for_each(|(row, (sine_row, lead_row))| do_row(row, sine_row, lead_row));
859        }
860
861        #[cfg(target_arch = "wasm32")]
862        {
863            for (row, (sine_row, lead_row)) in
864                sine.chunks_mut(cols).zip(lead.chunks_mut(cols)).enumerate()
865            {
866                do_row(row, sine_row, lead_row);
867            }
868        }
869    } else {
870        for (row, (sine_row, lead_row)) in
871            sine.chunks_mut(cols).zip(lead.chunks_mut(cols)).enumerate()
872        {
873            do_row(row, sine_row, lead_row);
874        }
875    }
876
877    let sine_vec = unsafe {
878        Vec::from_raw_parts(
879            sine_guard.as_mut_ptr() as *mut f64,
880            sine_guard.len(),
881            sine_guard.capacity(),
882        )
883    };
884    let lead_vec = unsafe {
885        Vec::from_raw_parts(
886            lead_guard.as_mut_ptr() as *mut f64,
887            lead_guard.len(),
888            lead_guard.capacity(),
889        )
890    };
891
892    Ok(MswBatchOutput {
893        sine: sine_vec,
894        lead: lead_vec,
895        combos,
896        rows,
897        cols,
898    })
899}
900
901#[inline(always)]
902fn msw_batch_inner_into(
903    data: &[f64],
904    sweep: &MswBatchRange,
905    kern: Kernel,
906    parallel: bool,
907    sine_out: &mut [f64],
908    lead_out: &mut [f64],
909) -> Result<Vec<MswParams>, MswError> {
910    use std::mem::MaybeUninit;
911
912    let combos = expand_grid(sweep)?;
913
914    let first = data
915        .iter()
916        .position(|x| !x.is_nan())
917        .ok_or(MswError::AllValuesNaN)?;
918    let max_p = combos.iter().map(|c| c.period.unwrap()).max().unwrap();
919
920    if data.len() - first < max_p {
921        return Err(MswError::NotEnoughValidData {
922            needed: max_p,
923            valid: data.len() - first,
924        });
925    }
926
927    let rows = combos.len();
928    let cols = data.len();
929    let expected = rows.checked_mul(cols).ok_or(MswError::InvalidRange {
930        start: sweep.period.0,
931        end: sweep.period.1,
932        step: sweep.period.2,
933    })?;
934    if sine_out.len() != expected || lead_out.len() != expected {
935        return Err(MswError::OutputLengthMismatch {
936            expected,
937            got: sine_out.len().max(lead_out.len()),
938        });
939    }
940
941    let sine_mu = unsafe {
942        std::slice::from_raw_parts_mut(
943            sine_out.as_mut_ptr() as *mut MaybeUninit<f64>,
944            sine_out.len(),
945        )
946    };
947    let lead_mu = unsafe {
948        std::slice::from_raw_parts_mut(
949            lead_out.as_mut_ptr() as *mut MaybeUninit<f64>,
950            lead_out.len(),
951        )
952    };
953
954    let warm: Vec<usize> = combos
955        .iter()
956        .map(|c| first + c.period.unwrap() - 1)
957        .collect();
958    init_matrix_prefixes(sine_mu, cols, &warm);
959    init_matrix_prefixes(lead_mu, cols, &warm);
960
961    let do_row = |row: usize,
962                  sine_row_mu: &mut [MaybeUninit<f64>],
963                  lead_row_mu: &mut [MaybeUninit<f64>]| unsafe {
964        let period = combos[row].period.unwrap();
965        let sine_row =
966            std::slice::from_raw_parts_mut(sine_row_mu.as_mut_ptr() as *mut f64, sine_row_mu.len());
967        let lead_row =
968            std::slice::from_raw_parts_mut(lead_row_mu.as_mut_ptr() as *mut f64, lead_row_mu.len());
969
970        match kern {
971            Kernel::Scalar => msw_row_scalar(data, first, period, sine_row, lead_row),
972            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
973            Kernel::Avx2 => msw_row_avx2(data, first, period, sine_row, lead_row),
974            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
975            Kernel::Avx512 => msw_row_avx512(data, first, period, sine_row, lead_row),
976            _ => unreachable!(),
977        }
978    };
979
980    if parallel {
981        #[cfg(not(target_arch = "wasm32"))]
982        {
983            use rayon::prelude::*;
984            sine_mu
985                .par_chunks_mut(cols)
986                .zip(lead_mu.par_chunks_mut(cols))
987                .enumerate()
988                .for_each(|(row, (sine_row_mu, lead_row_mu))| {
989                    do_row(row, sine_row_mu, lead_row_mu)
990                });
991        }
992        #[cfg(target_arch = "wasm32")]
993        {
994            for (row, (sine_row_mu, lead_row_mu)) in sine_mu
995                .chunks_mut(cols)
996                .zip(lead_mu.chunks_mut(cols))
997                .enumerate()
998            {
999                do_row(row, sine_row_mu, lead_row_mu);
1000            }
1001        }
1002    } else {
1003        for (row, (sine_row_mu, lead_row_mu)) in sine_mu
1004            .chunks_mut(cols)
1005            .zip(lead_mu.chunks_mut(cols))
1006            .enumerate()
1007        {
1008            do_row(row, sine_row_mu, lead_row_mu);
1009        }
1010    }
1011
1012    Ok(combos)
1013}
1014
1015#[inline(always)]
1016unsafe fn msw_row_scalar(
1017    data: &[f64],
1018    first: usize,
1019    period: usize,
1020    sine: &mut [f64],
1021    lead: &mut [f64],
1022) {
1023    let step = TULIP_TPI / period as f64;
1024    let mut cos_table = Vec::with_capacity(period);
1025    let mut sin_table = Vec::with_capacity(period);
1026    let mut ang = 0.0f64;
1027    for _ in 0..period {
1028        let (s, c) = ang.sin_cos();
1029        sin_table.push(s);
1030        cos_table.push(c);
1031        ang += step;
1032    }
1033
1034    let warm = first + period - 1;
1035    for i in warm..data.len() {
1036        let mut rp = 0.0f64;
1037        let mut ip = 0.0f64;
1038        for j in 0..period {
1039            let w = *data.get_unchecked(i - j);
1040            rp += cos_table[j] * w;
1041            ip += sin_table[j] * w;
1042        }
1043
1044        let mut phase = if rp.abs() > 0.001 {
1045            atan(ip / rp)
1046        } else {
1047            TULIP_PI * if ip < 0.0 { -1.0 } else { 1.0 }
1048        };
1049        if rp < 0.0 {
1050            phase += TULIP_PI;
1051        }
1052        phase += TULIP_PI * 0.5;
1053        if phase < 0.0 {
1054            phase += TULIP_TPI;
1055        }
1056        if phase > TULIP_TPI {
1057            phase -= TULIP_TPI;
1058        }
1059        let (s, c) = phase.sin_cos();
1060        sine[i] = s;
1061        lead[i] = (s + c) * 0.707106781186547524400844362104849039_f64;
1062    }
1063}
1064
1065#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1066#[target_feature(enable = "avx2,fma")]
1067unsafe fn msw_row_avx2(
1068    data: &[f64],
1069    first: usize,
1070    period: usize,
1071    sine: &mut [f64],
1072    lead: &mut [f64],
1073) {
1074    use core::arch::x86_64::*;
1075    let warm = first + period - 1;
1076
1077    let step = TULIP_TPI / period as f64;
1078    let mut cos_table = Vec::with_capacity(period);
1079    let mut sin_table = Vec::with_capacity(period);
1080    let mut ang = 0.0f64;
1081    for _ in 0..period {
1082        let (s, c) = ang.sin_cos();
1083        sin_table.push(s);
1084        cos_table.push(c);
1085        ang += step;
1086    }
1087    let dptr = data.as_ptr();
1088
1089    const LANES: usize = 4;
1090
1091    let mut i = warm;
1092    while i + (LANES - 1) < data.len() {
1093        let k = i + (LANES - 1);
1094        let mut rp = _mm256_set1_pd(0.0);
1095        let mut ip = _mm256_set1_pd(0.0);
1096
1097        for j in 0..period {
1098            let base = k - j;
1099            let wv = _mm256_loadu_pd(dptr.add(base - (LANES - 1)));
1100            let cw = _mm256_set1_pd(*cos_table.get_unchecked(j));
1101            let sw = _mm256_set1_pd(*sin_table.get_unchecked(j));
1102            rp = _mm256_fmadd_pd(cw, wv, rp);
1103            ip = _mm256_fmadd_pd(sw, wv, ip);
1104        }
1105
1106        let mut rbuf = [0.0f64; LANES];
1107        let mut ibuf = [0.0f64; LANES];
1108        _mm256_storeu_pd(rbuf.as_mut_ptr(), rp);
1109        _mm256_storeu_pd(ibuf.as_mut_ptr(), ip);
1110
1111        let mut idx = i;
1112        for lane in 0..LANES {
1113            let mut phase = if rbuf[lane].abs() > 0.001 {
1114                atan(ibuf[lane] / rbuf[lane])
1115            } else {
1116                TULIP_PI * if ibuf[lane] < 0.0 { -1.0 } else { 1.0 }
1117            };
1118            if rbuf[lane] < 0.0 {
1119                phase += TULIP_PI;
1120            }
1121            phase += TULIP_PI * 0.5;
1122            if phase < 0.0 {
1123                phase += TULIP_TPI;
1124            }
1125            if phase > TULIP_TPI {
1126                phase -= TULIP_TPI;
1127            }
1128            let (s, c) = phase.sin_cos();
1129            sine[idx] = s;
1130            lead[idx] = (s + c) * 0.707106781186547524400844362104849039_f64;
1131            idx += 1;
1132        }
1133
1134        i += LANES;
1135    }
1136
1137    while i < data.len() {
1138        let mut rp = 0.0;
1139        let mut ip = 0.0;
1140        for j in 0..period {
1141            let w = *dptr.add(i - j);
1142            rp = (*cos_table.get_unchecked(j)).mul_add(w, rp);
1143            ip = (*sin_table.get_unchecked(j)).mul_add(w, ip);
1144        }
1145        let mut phase = if rp.abs() > 0.001 {
1146            atan(ip / rp)
1147        } else {
1148            TULIP_PI * if ip < 0.0 { -1.0 } else { 1.0 }
1149        };
1150        if rp < 0.0 {
1151            phase += TULIP_PI;
1152        }
1153        phase += TULIP_PI * 0.5;
1154        if phase < 0.0 {
1155            phase += TULIP_TPI;
1156        }
1157        if phase > TULIP_TPI {
1158            phase -= TULIP_TPI;
1159        }
1160        let (s, c) = phase.sin_cos();
1161        sine[i] = s;
1162        lead[i] = (s + c) * 0.707106781186547524400844362104849039_f64;
1163        i += 1;
1164    }
1165}
1166
1167#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1168#[inline(always)]
1169unsafe fn msw_row_avx512(
1170    data: &[f64],
1171    first: usize,
1172    period: usize,
1173    sine: &mut [f64],
1174    lead: &mut [f64],
1175) {
1176    if period <= 32 {
1177        msw_row_avx512_short(data, first, period, sine, lead)
1178    } else {
1179        msw_row_avx512_long(data, first, period, sine, lead)
1180    }
1181}
1182
1183#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1184#[target_feature(enable = "avx512f,fma")]
1185unsafe fn msw_row_avx512_short(
1186    data: &[f64],
1187    first: usize,
1188    period: usize,
1189    sine: &mut [f64],
1190    lead: &mut [f64],
1191) {
1192    use core::arch::x86_64::*;
1193    let warm = first + period - 1;
1194
1195    let step = TULIP_TPI / period as f64;
1196    let mut cos_table = Vec::with_capacity(period);
1197    let mut sin_table = Vec::with_capacity(period);
1198    let mut ang = 0.0f64;
1199    for _ in 0..period {
1200        let (s, c) = ang.sin_cos();
1201        sin_table.push(s);
1202        cos_table.push(c);
1203        ang += step;
1204    }
1205    let dptr = data.as_ptr();
1206
1207    const LANES: usize = 8;
1208
1209    let mut i = warm;
1210    while i + (LANES - 1) < data.len() {
1211        let k = i + (LANES - 1);
1212        let mut rp = _mm512_set1_pd(0.0);
1213        let mut ip = _mm512_set1_pd(0.0);
1214
1215        for j in 0..period {
1216            let base = k - j;
1217            let wv = _mm512_loadu_pd(dptr.add(base - (LANES - 1)));
1218            let cw = _mm512_set1_pd(*cos_table.get_unchecked(j));
1219            let sw = _mm512_set1_pd(*sin_table.get_unchecked(j));
1220            rp = _mm512_fmadd_pd(cw, wv, rp);
1221            ip = _mm512_fmadd_pd(sw, wv, ip);
1222        }
1223
1224        let mut rbuf = [0.0f64; LANES];
1225        let mut ibuf = [0.0f64; LANES];
1226        _mm512_storeu_pd(rbuf.as_mut_ptr(), rp);
1227        _mm512_storeu_pd(ibuf.as_mut_ptr(), ip);
1228
1229        let mut idx = i;
1230        for lane in 0..LANES {
1231            let mut phase = if rbuf[lane].abs() > 0.001 {
1232                atan(ibuf[lane] / rbuf[lane])
1233            } else {
1234                TULIP_PI * if ibuf[lane] < 0.0 { -1.0 } else { 1.0 }
1235            };
1236            if rbuf[lane] < 0.0 {
1237                phase += TULIP_PI;
1238            }
1239            phase += TULIP_PI * 0.5;
1240            if phase < 0.0 {
1241                phase += TULIP_TPI;
1242            }
1243            if phase > TULIP_TPI {
1244                phase -= TULIP_TPI;
1245            }
1246            let (s, c) = phase.sin_cos();
1247            sine[idx] = s;
1248            lead[idx] = (s + c) * 0.707106781186547524400844362104849039_f64;
1249            idx += 1;
1250        }
1251
1252        i += LANES;
1253    }
1254
1255    while i < data.len() {
1256        let mut rp = 0.0;
1257        let mut ip = 0.0;
1258        for j in 0..period {
1259            let w = *dptr.add(i - j);
1260            rp = (*cos_table.get_unchecked(j)).mul_add(w, rp);
1261            ip = (*sin_table.get_unchecked(j)).mul_add(w, ip);
1262        }
1263        let mut phase = if rp.abs() > 0.001 {
1264            atan(ip / rp)
1265        } else {
1266            TULIP_PI * if ip < 0.0 { -1.0 } else { 1.0 }
1267        };
1268        if rp < 0.0 {
1269            phase += TULIP_PI;
1270        }
1271        phase += TULIP_PI * 0.5;
1272        if phase < 0.0 {
1273            phase += TULIP_TPI;
1274        }
1275        if phase > TULIP_TPI {
1276            phase -= TULIP_TPI;
1277        }
1278        let (s, c) = phase.sin_cos();
1279        sine[i] = s;
1280        lead[i] = (s + c) * 0.707106781186547524400844362104849039_f64;
1281        i += 1;
1282    }
1283}
1284
1285#[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1286#[target_feature(enable = "avx512f,fma")]
1287unsafe fn msw_row_avx512_long(
1288    data: &[f64],
1289    first: usize,
1290    period: usize,
1291    sine: &mut [f64],
1292    lead: &mut [f64],
1293) {
1294    msw_row_avx512_short(data, first, period, sine, lead)
1295}
1296
1297#[cfg(feature = "python")]
1298#[pyfunction(name = "msw")]
1299#[pyo3(signature = (data, period, kernel=None))]
1300pub fn msw_py<'py>(
1301    py: Python<'py>,
1302    data: numpy::PyReadonlyArray1<'py, f64>,
1303    period: usize,
1304    kernel: Option<&str>,
1305) -> PyResult<(Bound<'py, PyArray1<f64>>, Bound<'py, PyArray1<f64>>)> {
1306    use numpy::PyArrayMethods;
1307    let slice_in = data.as_slice()?;
1308    let kern = validate_kernel(kernel, false)?;
1309
1310    let params = MswParams {
1311        period: Some(period),
1312    };
1313    let msw_in = MswInput::from_slice(slice_in, params);
1314
1315    let out = py
1316        .allow_threads(|| msw_with_kernel(&msw_in, kern))
1317        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1318
1319    Ok((out.sine.into_pyarray(py), out.lead.into_pyarray(py)))
1320}
1321
1322#[cfg(feature = "python")]
1323#[pyclass(name = "MswStream")]
1324pub struct MswStreamPy {
1325    stream: MswStream,
1326}
1327
1328#[cfg(feature = "python")]
1329#[pymethods]
1330impl MswStreamPy {
1331    #[new]
1332    fn new(period: usize) -> PyResult<Self> {
1333        let params = MswParams {
1334            period: Some(period),
1335        };
1336        let stream =
1337            MswStream::try_new(params).map_err(|e| PyValueError::new_err(e.to_string()))?;
1338        Ok(MswStreamPy { stream })
1339    }
1340
1341    fn update(&mut self, value: f64) -> Option<(f64, f64)> {
1342        self.stream.update(value)
1343    }
1344}
1345
1346#[cfg(feature = "python")]
1347#[pyfunction(name = "msw_batch")]
1348#[pyo3(signature = (data, period_range, kernel=None))]
1349pub fn msw_batch_py<'py>(
1350    py: Python<'py>,
1351    data: numpy::PyReadonlyArray1<'py, f64>,
1352    period_range: (usize, usize, usize),
1353    kernel: Option<&str>,
1354) -> PyResult<Bound<'py, PyDict>> {
1355    use numpy::{IntoPyArray, PyArray1, PyArrayMethods};
1356
1357    let slice_in = data.as_slice()?;
1358    let kern = validate_kernel(kernel, true)?;
1359
1360    let sweep = MswBatchRange {
1361        period: period_range,
1362    };
1363
1364    let combos = expand_grid(&sweep).map_err(|e| PyValueError::new_err(e.to_string()))?;
1365    let rows = combos.len();
1366    let cols = slice_in.len();
1367
1368    let total = rows
1369        .checked_mul(cols)
1370        .ok_or_else(|| PyValueError::new_err("msw_batch_py: rows*cols overflow"))?;
1371    let out_sine = unsafe { PyArray1::<f64>::new(py, [total], false) };
1372    let out_lead = unsafe { PyArray1::<f64>::new(py, [total], false) };
1373    let slice_out_sine = unsafe { out_sine.as_slice_mut()? };
1374    let slice_out_lead = unsafe { out_lead.as_slice_mut()? };
1375
1376    let combos = py
1377        .allow_threads(|| {
1378            let kernel = match kern {
1379                Kernel::Auto => detect_best_batch_kernel(),
1380                k => k,
1381            };
1382            let simd = match kernel {
1383                Kernel::Avx512Batch => Kernel::Avx512,
1384                Kernel::Avx2Batch => Kernel::Avx2,
1385                Kernel::ScalarBatch => Kernel::Scalar,
1386                _ => unreachable!(),
1387            };
1388
1389            msw_batch_inner_into(slice_in, &sweep, simd, true, slice_out_sine, slice_out_lead)
1390        })
1391        .map_err(|e| PyValueError::new_err(e.to_string()))?;
1392
1393    let dict = PyDict::new(py);
1394    dict.set_item("sine", out_sine.reshape((rows, cols))?)?;
1395    dict.set_item("lead", out_lead.reshape((rows, cols))?)?;
1396    dict.set_item(
1397        "periods",
1398        combos
1399            .iter()
1400            .map(|p| p.period.unwrap() as u64)
1401            .collect::<Vec<_>>()
1402            .into_pyarray(py),
1403    )?;
1404
1405    Ok(dict)
1406}
1407
1408#[inline]
1409pub fn msw_into_slice(
1410    sine_dst: &mut [f64],
1411    lead_dst: &mut [f64],
1412    input: &MswInput,
1413    kern: Kernel,
1414) -> Result<(), MswError> {
1415    let data: &[f64] = match &input.data {
1416        MswData::Candles { candles, source } => source_type(candles, source),
1417        MswData::Slice(sl) => sl,
1418    };
1419
1420    if data.is_empty() {
1421        return Err(MswError::EmptyInputData);
1422    }
1423
1424    let period = input.get_period();
1425    let first = data
1426        .iter()
1427        .position(|x| !x.is_nan())
1428        .ok_or(MswError::AllValuesNaN)?;
1429    let len = data.len();
1430
1431    if period == 0 || period > len {
1432        return Err(MswError::InvalidPeriod {
1433            period,
1434            data_len: len,
1435        });
1436    }
1437
1438    if (len - first) < period {
1439        return Err(MswError::NotEnoughValidData {
1440            needed: period,
1441            valid: len - first,
1442        });
1443    }
1444
1445    let expected = data.len();
1446    if sine_dst.len() != expected || lead_dst.len() != expected {
1447        return Err(MswError::OutputLengthMismatch {
1448            expected,
1449            got: sine_dst.len().max(lead_dst.len()),
1450        });
1451    }
1452
1453    let chosen = match kern {
1454        Kernel::Auto => Kernel::Scalar,
1455        other => other,
1456    };
1457
1458    unsafe {
1459        match chosen {
1460            Kernel::Scalar | Kernel::ScalarBatch => {
1461                msw_scalar_into(data, period, first, len, sine_dst, lead_dst)
1462            }
1463            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1464            Kernel::Avx2 | Kernel::Avx2Batch => {
1465                msw_scalar_into(data, period, first, len, sine_dst, lead_dst)
1466            }
1467            #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
1468            Kernel::Avx512 | Kernel::Avx512Batch => {
1469                msw_scalar_into(data, period, first, len, sine_dst, lead_dst)
1470            }
1471            _ => unreachable!(),
1472        }
1473    }?;
1474
1475    let warmup = first + period - 1;
1476    for v in &mut sine_dst[..warmup] {
1477        *v = f64::NAN;
1478    }
1479    for v in &mut lead_dst[..warmup] {
1480        *v = f64::NAN;
1481    }
1482
1483    Ok(())
1484}
1485
1486#[inline]
1487unsafe fn msw_scalar_into(
1488    data: &[f64],
1489    period: usize,
1490    first: usize,
1491    len: usize,
1492    sine: &mut [f64],
1493    lead: &mut [f64],
1494) -> Result<(), MswError> {
1495    let step = TULIP_TPI / period as f64;
1496    let mut cos_table = Vec::with_capacity(period);
1497    let mut sin_table = Vec::with_capacity(period);
1498    let mut ang = 0.0f64;
1499    for _ in 0..period {
1500        let (s, c) = ang.sin_cos();
1501        sin_table.push(s);
1502        cos_table.push(c);
1503        ang += step;
1504    }
1505
1506    let warm = first + period - 1;
1507
1508    for i in warm..len {
1509        let mut rp = 0.0f64;
1510        let mut ip = 0.0f64;
1511        for j in 0..period {
1512            let w = *data.get_unchecked(i - j);
1513            rp += cos_table[j] * w;
1514            ip += sin_table[j] * w;
1515        }
1516
1517        let mut phase = if rp.abs() > 0.001 {
1518            atan(ip / rp)
1519        } else {
1520            TULIP_PI * if ip < 0.0 { -1.0 } else { 1.0 }
1521        };
1522        if rp < 0.0 {
1523            phase += TULIP_PI;
1524        }
1525        phase += TULIP_PI * 0.5;
1526        if phase < 0.0 {
1527            phase += TULIP_TPI;
1528        }
1529        if phase > TULIP_TPI {
1530            phase -= TULIP_TPI;
1531        }
1532
1533        let (s, c) = phase.sin_cos();
1534        *sine.get_unchecked_mut(i) = s;
1535        *lead.get_unchecked_mut(i) = (s + c) * 0.707106781186547524400844362104849039_f64;
1536    }
1537    Ok(())
1538}
1539
1540#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1541#[derive(Serialize, Deserialize)]
1542pub struct MswJsOutput {
1543    pub sine: Vec<f64>,
1544    pub lead: Vec<f64>,
1545}
1546
1547#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1548#[derive(Serialize, Deserialize)]
1549pub struct MswResult {
1550    pub values: Vec<f64>,
1551    pub rows: usize,
1552    pub cols: usize,
1553}
1554
1555#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1556#[wasm_bindgen]
1557pub fn msw_js(data: &[f64], period: usize) -> Result<JsValue, JsValue> {
1558    let params = MswParams {
1559        period: Some(period),
1560    };
1561    let input = MswInput::from_slice(data, params);
1562
1563    let len = data.len();
1564    let mut values = vec![f64::NAN; 2 * len];
1565    let (sine, lead) = values.split_at_mut(len);
1566
1567    msw_into_slice(sine, lead, &input, Kernel::Auto)
1568        .map_err(|e| JsValue::from_str(&e.to_string()))?;
1569
1570    let res = MswResult {
1571        values,
1572        rows: 2,
1573        cols: len,
1574    };
1575    serde_wasm_bindgen::to_value(&res)
1576        .map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
1577}
1578
1579#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1580#[wasm_bindgen]
1581#[deprecated(since = "1.0.0", note = "Use msw_js instead")]
1582pub fn msw_wasm(data: &[f64], period: usize) -> Result<JsValue, JsValue> {
1583    msw_js(data, period)
1584}
1585
1586#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1587#[wasm_bindgen]
1588pub fn msw_into_flat(
1589    in_ptr: *const f64,
1590    out_ptr: *mut f64,
1591    len: usize,
1592    period: usize,
1593) -> Result<(), JsValue> {
1594    if in_ptr.is_null() || out_ptr.is_null() {
1595        return Err(JsValue::from_str("null pointer"));
1596    }
1597    unsafe {
1598        let data = std::slice::from_raw_parts(in_ptr, len);
1599        let (sine, lead) = (
1600            std::slice::from_raw_parts_mut(out_ptr, len),
1601            std::slice::from_raw_parts_mut(out_ptr.add(len), len),
1602        );
1603        let input = MswInput::from_slice(
1604            data,
1605            MswParams {
1606                period: Some(period),
1607            },
1608        );
1609        msw_into_slice(sine, lead, &input, Kernel::Auto)
1610            .map_err(|e| JsValue::from_str(&e.to_string()))
1611    }
1612}
1613
1614#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1615#[wasm_bindgen]
1616pub fn msw_into(
1617    in_ptr: *const f64,
1618    sine_ptr: *mut f64,
1619    lead_ptr: *mut f64,
1620    len: usize,
1621    period: usize,
1622) -> Result<(), JsValue> {
1623    if in_ptr.is_null() || sine_ptr.is_null() || lead_ptr.is_null() {
1624        return Err(JsValue::from_str("Null pointer provided"));
1625    }
1626
1627    unsafe {
1628        let data = std::slice::from_raw_parts(in_ptr, len);
1629
1630        if period == 0 || period > len {
1631            return Err(JsValue::from_str("Invalid period"));
1632        }
1633
1634        let params = MswParams {
1635            period: Some(period),
1636        };
1637        let input = MswInput::from_slice(data, params);
1638
1639        let aliasing = in_ptr as *const _ == sine_ptr as *const _
1640            || in_ptr as *const _ == lead_ptr as *const _
1641            || sine_ptr == lead_ptr;
1642
1643        if aliasing {
1644            let mut temp_sine = vec![0.0; len];
1645            let mut temp_lead = vec![0.0; len];
1646            msw_into_slice(&mut temp_sine, &mut temp_lead, &input, Kernel::Auto)
1647                .map_err(|e| JsValue::from_str(&e.to_string()))?;
1648
1649            let sine_out = std::slice::from_raw_parts_mut(sine_ptr, len);
1650            let lead_out = std::slice::from_raw_parts_mut(lead_ptr, len);
1651            sine_out.copy_from_slice(&temp_sine);
1652            lead_out.copy_from_slice(&temp_lead);
1653        } else {
1654            let sine_out = std::slice::from_raw_parts_mut(sine_ptr, len);
1655            let lead_out = std::slice::from_raw_parts_mut(lead_ptr, len);
1656            msw_into_slice(sine_out, lead_out, &input, Kernel::Auto)
1657                .map_err(|e| JsValue::from_str(&e.to_string()))?;
1658        }
1659
1660        Ok(())
1661    }
1662}
1663
1664#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1665#[wasm_bindgen]
1666pub fn msw_alloc(len: usize) -> *mut f64 {
1667    let mut vec = Vec::<f64>::with_capacity(len);
1668    let ptr = vec.as_mut_ptr();
1669    std::mem::forget(vec);
1670    ptr
1671}
1672
1673#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1674#[wasm_bindgen]
1675pub fn msw_free(ptr: *mut f64, len: usize) {
1676    if !ptr.is_null() {
1677        unsafe {
1678            let _ = Vec::from_raw_parts(ptr, len, len);
1679        }
1680    }
1681}
1682
1683#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1684#[derive(Serialize, Deserialize)]
1685pub struct MswBatchConfig {
1686    pub period_range: (usize, usize, usize),
1687}
1688
1689#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1690#[derive(Serialize, Deserialize)]
1691pub struct MswBatchJsOutput {
1692    pub sine: Vec<f64>,
1693    pub lead: Vec<f64>,
1694    pub combos: Vec<MswParams>,
1695    pub rows: usize,
1696    pub cols: usize,
1697}
1698
1699#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1700#[derive(Serialize, Deserialize)]
1701pub struct MswBatchFlatJsOutput {
1702    pub values: Vec<f64>,
1703    pub combos: Vec<MswParams>,
1704    pub rows: usize,
1705    pub cols: usize,
1706}
1707
1708#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1709#[wasm_bindgen(js_name = msw_batch)]
1710pub fn msw_batch_unified_js(data: &[f64], config: JsValue) -> Result<JsValue, JsValue> {
1711    let config: MswBatchConfig = serde_wasm_bindgen::from_value(config)
1712        .map_err(|e| JsValue::from_str(&format!("Invalid config: {}", e)))?;
1713    let sweep = MswBatchRange {
1714        period: config.period_range,
1715    };
1716
1717    let combos = expand_grid(&sweep).map_err(|_| JsValue::from_str("No parameter combinations"))?;
1718    let rows = combos.len();
1719    let cols = data.len();
1720
1721    let total = rows
1722        .checked_mul(cols)
1723        .and_then(|n| n.checked_mul(2))
1724        .ok_or_else(|| JsValue::from_str("rows*cols overflow"))?;
1725    let mut values = vec![f64::NAN; total];
1726    let (sine_out, lead_out) = values.split_at_mut(rows * cols);
1727
1728    msw_batch_inner_into(data, &sweep, Kernel::Auto, false, sine_out, lead_out)
1729        .map_err(|e| JsValue::from_str(&e.to_string()))?;
1730
1731    let out = MswBatchFlatJsOutput {
1732        values,
1733        combos,
1734        rows: 2 * rows,
1735        cols,
1736    };
1737    serde_wasm_bindgen::to_value(&out)
1738        .map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
1739}
1740
1741#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1742#[wasm_bindgen]
1743pub fn msw_batch_js(
1744    data: &[f64],
1745    period_start: usize,
1746    period_end: usize,
1747    period_step: usize,
1748) -> Result<JsValue, JsValue> {
1749    let sweep = MswBatchRange {
1750        period: (period_start, period_end, period_step),
1751    };
1752
1753    let output = msw_batch_inner(data, &sweep, Kernel::Auto, false)
1754        .map_err(|e| JsValue::from_str(&e.to_string()))?;
1755
1756    let js_output = MswBatchJsOutput {
1757        sine: output.sine,
1758        lead: output.lead,
1759        combos: output.combos,
1760        rows: output.rows,
1761        cols: output.cols,
1762    };
1763
1764    serde_wasm_bindgen::to_value(&js_output)
1765        .map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
1766}
1767
1768#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1769#[wasm_bindgen]
1770pub fn msw_batch_metadata_js(
1771    period_start: usize,
1772    period_end: usize,
1773    period_step: usize,
1774) -> Result<Vec<f64>, JsValue> {
1775    let sweep = MswBatchRange {
1776        period: (period_start, period_end, period_step),
1777    };
1778
1779    let combos = expand_grid(&sweep).map_err(|_| JsValue::from_str("No parameter combinations"))?;
1780    let metadata = combos
1781        .iter()
1782        .map(|combo| combo.period.unwrap() as f64)
1783        .collect();
1784
1785    Ok(metadata)
1786}
1787
1788#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1789#[wasm_bindgen]
1790pub fn msw_batch_into_flat(
1791    in_ptr: *const f64,
1792    out_ptr: *mut f64,
1793    len: usize,
1794    period_start: usize,
1795    period_end: usize,
1796    period_step: usize,
1797) -> Result<usize, JsValue> {
1798    if in_ptr.is_null() || out_ptr.is_null() {
1799        return Err(JsValue::from_str("null pointer"));
1800    }
1801    unsafe {
1802        let data = std::slice::from_raw_parts(in_ptr, len);
1803        let sweep = MswBatchRange {
1804            period: (period_start, period_end, period_step),
1805        };
1806        let combos =
1807            expand_grid(&sweep).map_err(|_| JsValue::from_str("No parameter combinations"))?;
1808        let rows = combos.len();
1809        let cols = len;
1810
1811        let sine_out = std::slice::from_raw_parts_mut(out_ptr, rows * cols);
1812        let lead_out = std::slice::from_raw_parts_mut(out_ptr.add(rows * cols), rows * cols);
1813
1814        msw_batch_inner_into(data, &sweep, Kernel::Auto, false, sine_out, lead_out)
1815            .map_err(|e| JsValue::from_str(&e.to_string()))?;
1816
1817        Ok(2 * rows)
1818    }
1819}
1820
1821#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
1822#[wasm_bindgen]
1823pub fn msw_batch_into(
1824    in_ptr: *const f64,
1825    sine_ptr: *mut f64,
1826    lead_ptr: *mut f64,
1827    len: usize,
1828    period_start: usize,
1829    period_end: usize,
1830    period_step: usize,
1831) -> Result<usize, JsValue> {
1832    if in_ptr.is_null() || sine_ptr.is_null() || lead_ptr.is_null() {
1833        return Err(JsValue::from_str("Null pointer provided"));
1834    }
1835
1836    unsafe {
1837        let data = std::slice::from_raw_parts(in_ptr, len);
1838
1839        let sweep = MswBatchRange {
1840            period: (period_start, period_end, period_step),
1841        };
1842
1843        let combos = expand_grid(&sweep)
1844            .map_err(|_| JsValue::from_str("No valid parameter combinations"))?;
1845
1846        let rows = combos.len();
1847        let cols = len;
1848        let total_len = rows
1849            .checked_mul(cols)
1850            .ok_or_else(|| JsValue::from_str("rows*cols overflow"))?;
1851
1852        let sine_out = std::slice::from_raw_parts_mut(sine_ptr, total_len);
1853        let lead_out = std::slice::from_raw_parts_mut(lead_ptr, total_len);
1854
1855        for (idx, params) in combos.iter().enumerate() {
1856            let row_start = idx * cols;
1857            let row_end = row_start + cols;
1858
1859            let input = MswInput::from_slice(data, params.clone());
1860
1861            msw_into_slice(
1862                &mut sine_out[row_start..row_end],
1863                &mut lead_out[row_start..row_end],
1864                &input,
1865                Kernel::Auto,
1866            )
1867            .map_err(|e| JsValue::from_str(&e.to_string()))?;
1868        }
1869
1870        Ok(rows)
1871    }
1872}
1873
1874#[cfg(test)]
1875mod tests {
1876    use super::*;
1877    use crate::skip_if_unsupported;
1878    use crate::utilities::data_loader::read_candles_from_csv;
1879
1880    fn check_msw_partial_params(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1881        skip_if_unsupported!(kernel, test_name);
1882        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1883        let candles = read_candles_from_csv(file_path)?;
1884        let default_params = MswParams { period: None };
1885        let input_default = MswInput::from_candles(&candles, "close", default_params);
1886        let output_default = msw_with_kernel(&input_default, kernel)?;
1887        assert_eq!(output_default.sine.len(), candles.close.len());
1888        assert_eq!(output_default.lead.len(), candles.close.len());
1889        Ok(())
1890    }
1891
1892    fn check_msw_accuracy(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1893        skip_if_unsupported!(kernel, test_name);
1894        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1895        let candles = read_candles_from_csv(file_path)?;
1896        let params = MswParams { period: Some(5) };
1897        let input = MswInput::from_candles(&candles, "close", params);
1898        let msw_result = msw_with_kernel(&input, kernel)?;
1899        let expected_last_five_sine = [
1900            -0.49733966449848194,
1901            -0.8909425976991894,
1902            -0.709353328514554,
1903            -0.40483478076837887,
1904            -0.8817006719953886,
1905        ];
1906        let expected_last_five_lead = [
1907            -0.9651269132969991,
1908            -0.30888310410390457,
1909            -0.003182174183612666,
1910            0.36030983330963545,
1911            -0.28983704937461496,
1912        ];
1913        let start = msw_result.sine.len().saturating_sub(5);
1914        for (i, &val) in msw_result.sine[start..].iter().enumerate() {
1915            let diff = (val - expected_last_five_sine[i]).abs();
1916            assert!(
1917                diff < 1e-1,
1918                "[{}] MSW sine mismatch at idx {}: got {}, expected {}",
1919                test_name,
1920                i,
1921                val,
1922                expected_last_five_sine[i]
1923            );
1924        }
1925        for (i, &val) in msw_result.lead[start..].iter().enumerate() {
1926            let diff = (val - expected_last_five_lead[i]).abs();
1927            assert!(
1928                diff < 1e-1,
1929                "[{}] MSW lead mismatch at idx {}: got {}, expected {}",
1930                test_name,
1931                i,
1932                val,
1933                expected_last_five_lead[i]
1934            );
1935        }
1936        Ok(())
1937    }
1938
1939    fn check_msw_default_candles(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1940        skip_if_unsupported!(kernel, test_name);
1941        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1942        let candles = read_candles_from_csv(file_path)?;
1943        let input = MswInput::with_default_candles(&candles);
1944        let output = msw_with_kernel(&input, kernel)?;
1945        assert_eq!(output.sine.len(), candles.close.len());
1946        assert_eq!(output.lead.len(), candles.close.len());
1947        Ok(())
1948    }
1949
1950    fn check_msw_zero_period(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1951        skip_if_unsupported!(kernel, test_name);
1952        let input_data = [10.0, 20.0, 30.0];
1953        let params = MswParams { period: Some(0) };
1954        let input = MswInput::from_slice(&input_data, params);
1955        let res = msw_with_kernel(&input, kernel);
1956        assert!(
1957            res.is_err(),
1958            "[{}] MSW should fail with zero period",
1959            test_name
1960        );
1961        Ok(())
1962    }
1963
1964    fn check_msw_period_exceeds_length(
1965        test_name: &str,
1966        kernel: Kernel,
1967    ) -> Result<(), Box<dyn Error>> {
1968        skip_if_unsupported!(kernel, test_name);
1969        let data_small = [10.0, 20.0, 30.0];
1970        let params = MswParams { period: Some(10) };
1971        let input = MswInput::from_slice(&data_small, params);
1972        let res = msw_with_kernel(&input, kernel);
1973        assert!(
1974            res.is_err(),
1975            "[{}] MSW should fail with period exceeding length",
1976            test_name
1977        );
1978        Ok(())
1979    }
1980
1981    fn check_msw_very_small_dataset(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1982        skip_if_unsupported!(kernel, test_name);
1983        let single_point = [42.0];
1984        let params = MswParams { period: Some(5) };
1985        let input = MswInput::from_slice(&single_point, params);
1986        let res = msw_with_kernel(&input, kernel);
1987        assert!(
1988            res.is_err(),
1989            "[{}] MSW should fail with insufficient data",
1990            test_name
1991        );
1992        Ok(())
1993    }
1994
1995    fn check_msw_nan_handling(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
1996        skip_if_unsupported!(kernel, test_name);
1997        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
1998        let candles = read_candles_from_csv(file_path)?;
1999        let params = MswParams { period: Some(5) };
2000        let input = MswInput::from_candles(&candles, "close", params);
2001        let res = msw_with_kernel(&input, kernel)?;
2002        assert_eq!(res.sine.len(), candles.close.len());
2003        assert_eq!(res.lead.len(), candles.close.len());
2004        Ok(())
2005    }
2006
2007    fn check_msw_streaming(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2008        skip_if_unsupported!(kernel, test_name);
2009        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2010        let candles = read_candles_from_csv(file_path)?;
2011        let period = 5;
2012        let input = MswInput::from_candles(
2013            &candles,
2014            "close",
2015            MswParams {
2016                period: Some(period),
2017            },
2018        );
2019        let batch_output = msw_with_kernel(&input, kernel)?;
2020        let mut stream = MswStream::try_new(MswParams {
2021            period: Some(period),
2022        })?;
2023        let mut sine_stream = Vec::with_capacity(candles.close.len());
2024        let mut lead_stream = Vec::with_capacity(candles.close.len());
2025        for &price in &candles.close {
2026            match stream.update(price) {
2027                Some((s, l)) => {
2028                    sine_stream.push(s);
2029                    lead_stream.push(l);
2030                }
2031                None => {
2032                    sine_stream.push(f64::NAN);
2033                    lead_stream.push(f64::NAN);
2034                }
2035            }
2036        }
2037        assert_eq!(batch_output.sine.len(), sine_stream.len());
2038        assert_eq!(batch_output.lead.len(), lead_stream.len());
2039        for (i, (&b, &s)) in batch_output.sine.iter().zip(sine_stream.iter()).enumerate() {
2040            if b.is_nan() && s.is_nan() {
2041                continue;
2042            }
2043            let diff = (b - s).abs();
2044            assert!(
2045                diff < 1e-9,
2046                "[{}] MSW streaming sine mismatch at idx {}: batch={}, stream={}, diff={}",
2047                test_name,
2048                i,
2049                b,
2050                s,
2051                diff
2052            );
2053        }
2054        for (i, (&b, &l)) in batch_output.lead.iter().zip(lead_stream.iter()).enumerate() {
2055            if b.is_nan() && l.is_nan() {
2056                continue;
2057            }
2058            let diff = (b - l).abs();
2059            assert!(
2060                diff < 1e-9,
2061                "[{}] MSW streaming lead mismatch at idx {}: batch={}, stream={}, diff={}",
2062                test_name,
2063                i,
2064                b,
2065                l,
2066                diff
2067            );
2068        }
2069        Ok(())
2070    }
2071
2072    #[cfg(debug_assertions)]
2073    fn check_msw_no_poison(test_name: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2074        skip_if_unsupported!(kernel, test_name);
2075
2076        let file_path = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2077        let candles = read_candles_from_csv(file_path)?;
2078
2079        let test_params = vec![
2080            MswParams::default(),
2081            MswParams { period: Some(2) },
2082            MswParams { period: Some(3) },
2083            MswParams { period: Some(7) },
2084            MswParams { period: Some(10) },
2085            MswParams { period: Some(20) },
2086            MswParams { period: Some(50) },
2087            MswParams { period: Some(100) },
2088        ];
2089
2090        for (param_idx, params) in test_params.iter().enumerate() {
2091            let input = MswInput::from_candles(&candles, "close", params.clone());
2092            let output = msw_with_kernel(&input, kernel)?;
2093
2094            for (i, &val) in output.sine.iter().enumerate() {
2095                if val.is_nan() {
2096                    continue;
2097                }
2098
2099                let bits = val.to_bits();
2100
2101                if bits == 0x11111111_11111111 {
2102                    panic!(
2103                        "[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at index {} \
2104						 in sine output with params: period={} (param set {})",
2105                        test_name,
2106                        val,
2107                        bits,
2108                        i,
2109                        params.period.unwrap_or(5),
2110                        param_idx
2111                    );
2112                }
2113
2114                if bits == 0x22222222_22222222 {
2115                    panic!(
2116                        "[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at index {} \
2117						 in sine output with params: period={} (param set {})",
2118                        test_name,
2119                        val,
2120                        bits,
2121                        i,
2122                        params.period.unwrap_or(5),
2123                        param_idx
2124                    );
2125                }
2126
2127                if bits == 0x33333333_33333333 {
2128                    panic!(
2129                        "[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at index {} \
2130						 in sine output with params: period={} (param set {})",
2131                        test_name,
2132                        val,
2133                        bits,
2134                        i,
2135                        params.period.unwrap_or(5),
2136                        param_idx
2137                    );
2138                }
2139            }
2140
2141            for (i, &val) in output.lead.iter().enumerate() {
2142                if val.is_nan() {
2143                    continue;
2144                }
2145
2146                let bits = val.to_bits();
2147
2148                if bits == 0x11111111_11111111 {
2149                    panic!(
2150                        "[{}] Found alloc_with_nan_prefix poison value {} (0x{:016X}) at index {} \
2151						 in lead output with params: period={} (param set {})",
2152                        test_name,
2153                        val,
2154                        bits,
2155                        i,
2156                        params.period.unwrap_or(5),
2157                        param_idx
2158                    );
2159                }
2160
2161                if bits == 0x22222222_22222222 {
2162                    panic!(
2163                        "[{}] Found init_matrix_prefixes poison value {} (0x{:016X}) at index {} \
2164						 in lead output with params: period={} (param set {})",
2165                        test_name,
2166                        val,
2167                        bits,
2168                        i,
2169                        params.period.unwrap_or(5),
2170                        param_idx
2171                    );
2172                }
2173
2174                if bits == 0x33333333_33333333 {
2175                    panic!(
2176                        "[{}] Found make_uninit_matrix poison value {} (0x{:016X}) at index {} \
2177						 in lead output with params: period={} (param set {})",
2178                        test_name,
2179                        val,
2180                        bits,
2181                        i,
2182                        params.period.unwrap_or(5),
2183                        param_idx
2184                    );
2185                }
2186            }
2187        }
2188
2189        Ok(())
2190    }
2191
2192    #[cfg(not(debug_assertions))]
2193    fn check_msw_no_poison(_test_name: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
2194        Ok(())
2195    }
2196
2197    macro_rules! generate_all_msw_tests {
2198        ($($test_fn:ident),*) => {
2199            paste::paste! {
2200                $(
2201                    #[test]
2202                    fn [<$test_fn _scalar_f64>]() {
2203                        let _ = $test_fn(stringify!([<$test_fn _scalar_f64>]), Kernel::Scalar);
2204                    }
2205                )*
2206                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
2207                $(
2208                    #[test]
2209                    fn [<$test_fn _avx2_f64>]() {
2210                        let _ = $test_fn(stringify!([<$test_fn _avx2_f64>]), Kernel::Avx2);
2211                    }
2212                    #[test]
2213                    fn [<$test_fn _avx512_f64>]() {
2214                        let _ = $test_fn(stringify!([<$test_fn _avx512_f64>]), Kernel::Avx512);
2215                    }
2216                )*
2217            }
2218        }
2219    }
2220    generate_all_msw_tests!(
2221        check_msw_partial_params,
2222        check_msw_accuracy,
2223        check_msw_default_candles,
2224        check_msw_zero_period,
2225        check_msw_period_exceeds_length,
2226        check_msw_very_small_dataset,
2227        check_msw_nan_handling,
2228        check_msw_streaming,
2229        check_msw_no_poison
2230    );
2231
2232    #[cfg(feature = "proptest")]
2233    fn check_msw_property(
2234        test_name: &str,
2235        kernel: Kernel,
2236    ) -> Result<(), Box<dyn std::error::Error>> {
2237        use proptest::prelude::*;
2238        skip_if_unsupported!(kernel, test_name);
2239
2240        let strat = (2usize..=50).prop_flat_map(|period| {
2241            (period..=400).prop_flat_map(move |data_len| {
2242                prop_oneof![
2243
2244                    6 => prop::collection::vec(
2245                        (10.0f64..10000.0f64).prop_filter("finite", |x| x.is_finite()),
2246                        data_len
2247                    ).prop_map(move |v| (v, period)),
2248
2249                    3 => prop::collection::vec(
2250                        Just(100.0f64),
2251                        data_len
2252                    ).prop_map(move |v| (v, period)),
2253
2254                    3 => (0.0f64..100.0f64, 0.01f64..1.0f64).prop_map(move |(start, step)| {
2255                        let data: Vec<f64> = (0..data_len)
2256                            .map(|i| start + (i as f64) * step)
2257                            .collect();
2258                        (data, period)
2259                    }),
2260
2261                    2 => prop_oneof![
2262                        prop::collection::vec(Just(0.0f64), data_len),
2263                        prop::collection::vec((0.0001f64..0.01f64), data_len),
2264                    ].prop_map(move |v| (v, period))
2265                ]
2266            })
2267        });
2268
2269        proptest::test_runner::TestRunner::default()
2270            .run(&strat, |(data, period)| {
2271                let params = MswParams {
2272                    period: Some(period),
2273                };
2274                let input = MswInput::from_slice(&data, params.clone());
2275
2276                let output = msw_with_kernel(&input, kernel).unwrap();
2277                let ref_output = msw_with_kernel(&input, Kernel::Scalar).unwrap();
2278
2279                prop_assert_eq!(output.sine.len(), data.len(), "Sine output length mismatch");
2280                prop_assert_eq!(output.lead.len(), data.len(), "Lead output length mismatch");
2281
2282                let first_valid = data.iter().position(|x| !x.is_nan()).unwrap_or(0);
2283                let warmup_end = first_valid + period - 1;
2284
2285                for i in 0..warmup_end.min(data.len()) {
2286                    prop_assert!(
2287                        output.sine[i].is_nan(),
2288                        "Sine[{}] should be NaN during warmup (first_valid={}, period={})",
2289                        i,
2290                        first_valid,
2291                        period
2292                    );
2293                    prop_assert!(
2294                        output.lead[i].is_nan(),
2295                        "Lead[{}] should be NaN during warmup (first_valid={}, period={})",
2296                        i,
2297                        first_valid,
2298                        period
2299                    );
2300                }
2301
2302                if warmup_end < data.len() {
2303                    prop_assert!(
2304                        !output.sine[warmup_end].is_nan(),
2305                        "Sine[{}] should be valid after warmup",
2306                        warmup_end
2307                    );
2308                    prop_assert!(
2309                        !output.lead[warmup_end].is_nan(),
2310                        "Lead[{}] should be valid after warmup",
2311                        warmup_end
2312                    );
2313                }
2314
2315                for i in warmup_end..data.len() {
2316                    let sine_val = output.sine[i];
2317                    let lead_val = output.lead[i];
2318
2319                    if !sine_val.is_nan() {
2320                        prop_assert!(
2321                            sine_val >= -1.0 - 1e-9 && sine_val <= 1.0 + 1e-9,
2322                            "Sine[{}] = {} is outside [-1, 1] bounds",
2323                            i,
2324                            sine_val
2325                        );
2326                    }
2327
2328                    if !lead_val.is_nan() {
2329                        prop_assert!(
2330                            lead_val >= -1.0 - 1e-9 && lead_val <= 1.0 + 1e-9,
2331                            "Lead[{}] = {} is outside [-1, 1] bounds",
2332                            i,
2333                            lead_val
2334                        );
2335                    }
2336                }
2337
2338                if data.windows(2).all(|w| (w[0] - w[1]).abs() < 1e-10)
2339                    && warmup_end + 5 < data.len()
2340                {
2341                    let first_sine = output.sine[warmup_end];
2342                    let first_lead = output.lead[warmup_end];
2343
2344                    for i in (warmup_end + 1)..(warmup_end + 5).min(data.len()) {
2345                        let sine_val = output.sine[i];
2346                        let lead_val = output.lead[i];
2347
2348                        if !sine_val.is_nan() && !first_sine.is_nan() {
2349                            prop_assert!(
2350                                (sine_val - first_sine).abs() < 1e-9,
2351                                "Constant data: Sine[{}] = {} differs from first = {}",
2352                                i,
2353                                sine_val,
2354                                first_sine
2355                            );
2356                        }
2357                        if !lead_val.is_nan() && !first_lead.is_nan() {
2358                            prop_assert!(
2359                                (lead_val - first_lead).abs() < 1e-9,
2360                                "Constant data: Lead[{}] = {} differs from first = {}",
2361                                i,
2362                                lead_val,
2363                                first_lead
2364                            );
2365                        }
2366                    }
2367                }
2368
2369                if warmup_end + 10 < data.len() {
2370                    const COS_PI4: f64 = 0.7071067811865476;
2371
2372                    for i in (warmup_end + 5)..(warmup_end + 10).min(data.len()) {
2373                        let sine_val = output.sine[i];
2374                        let lead_val = output.lead[i];
2375
2376                        if !sine_val.is_nan() && !lead_val.is_nan() && sine_val.abs() < 0.999 {
2377                            let cos_phase = (1.0 - sine_val * sine_val).sqrt();
2378
2379                            let expected_lead_pos = sine_val * COS_PI4 + cos_phase * COS_PI4;
2380                            let expected_lead_neg = sine_val * COS_PI4 - cos_phase * COS_PI4;
2381
2382                            let diff_pos = (lead_val - expected_lead_pos).abs();
2383                            let diff_neg = (lead_val - expected_lead_neg).abs();
2384                            let min_diff = diff_pos.min(diff_neg);
2385
2386                            prop_assert!(
2387								min_diff < 0.01,
2388								"Phase relationship incorrect at [{}]: sine={}, lead={}, expected ≈ {} or {}",
2389								i, sine_val, lead_val, expected_lead_pos, expected_lead_neg
2390							);
2391                        }
2392                    }
2393                }
2394
2395                if data.iter().all(|&x| x.abs() < 1e-10) && warmup_end < data.len() {
2396                    const EXPECTED_SINE: f64 = -1.0;
2397                    const EXPECTED_LEAD: f64 = -0.7071067811865476;
2398
2399                    for i in warmup_end..(warmup_end + 3).min(data.len()) {
2400                        let sine_val = output.sine[i];
2401                        let lead_val = output.lead[i];
2402
2403                        prop_assert!(
2404                            (sine_val - EXPECTED_SINE).abs() < 1e-7,
2405                            "Zero data: Sine[{}] = {}, expected {}",
2406                            i,
2407                            sine_val,
2408                            EXPECTED_SINE
2409                        );
2410                        prop_assert!(
2411                            (lead_val - EXPECTED_LEAD).abs() < 1e-7,
2412                            "Zero data: Lead[{}] = {}, expected {}",
2413                            i,
2414                            lead_val,
2415                            EXPECTED_LEAD
2416                        );
2417                    }
2418                }
2419
2420                if period == 2 && warmup_end < data.len() {
2421                    for i in warmup_end..(warmup_end + 3).min(data.len()) {
2422                        let sine_val = output.sine[i];
2423                        let lead_val = output.lead[i];
2424
2425                        prop_assert!(
2426                            !sine_val.is_nan(),
2427                            "Period=2: Sine[{}] should not be NaN",
2428                            i
2429                        );
2430                        prop_assert!(
2431                            !lead_val.is_nan(),
2432                            "Period=2: Lead[{}] should not be NaN",
2433                            i
2434                        );
2435
2436                        prop_assert!(
2437                            sine_val >= -1.0 - 1e-9 && sine_val <= 1.0 + 1e-9,
2438                            "Period=2: Sine[{}] = {} out of bounds",
2439                            i,
2440                            sine_val
2441                        );
2442                        prop_assert!(
2443                            lead_val >= -1.0 - 1e-9 && lead_val <= 1.0 + 1e-9,
2444                            "Period=2: Lead[{}] = {} out of bounds",
2445                            i,
2446                            lead_val
2447                        );
2448
2449                        if i >= 3
2450                            && i >= warmup_end + 2
2451                            && (data[i] - data[i - 2]).abs() < 1e-10
2452                            && (data[i - 1] - data[i - 3]).abs() < 1e-10
2453                        {
2454                            let prev_sine = output.sine[i - 2];
2455                            prop_assert!(
2456								(sine_val - prev_sine).abs() < 1e-6,
2457								"Period=2 with alternating data: Sine should repeat every 2 samples"
2458							);
2459                        }
2460                    }
2461                }
2462
2463                for i in 0..data.len() {
2464                    let sine_val = output.sine[i];
2465                    let ref_sine = ref_output.sine[i];
2466                    let lead_val = output.lead[i];
2467                    let ref_lead = ref_output.lead[i];
2468
2469                    if sine_val.is_nan() && ref_sine.is_nan() {
2470                        continue;
2471                    }
2472
2473                    if sine_val.is_finite() && ref_sine.is_finite() {
2474                        let sine_bits = sine_val.to_bits();
2475                        let ref_sine_bits = ref_sine.to_bits();
2476                        let ulp_diff = sine_bits.abs_diff(ref_sine_bits);
2477
2478                        prop_assert!(
2479                            (sine_val - ref_sine).abs() <= 1e-9 || ulp_diff <= 5,
2480                            "Kernel mismatch for sine at [{}]: {} vs {} (ULP={})",
2481                            i,
2482                            sine_val,
2483                            ref_sine,
2484                            ulp_diff
2485                        );
2486                    } else {
2487                        prop_assert_eq!(
2488                            sine_val.is_nan(),
2489                            ref_sine.is_nan(),
2490                            "Kernel NaN mismatch for sine at [{}]",
2491                            i
2492                        );
2493                    }
2494
2495                    if lead_val.is_nan() && ref_lead.is_nan() {
2496                        continue;
2497                    }
2498
2499                    if lead_val.is_finite() && ref_lead.is_finite() {
2500                        let lead_bits = lead_val.to_bits();
2501                        let ref_lead_bits = ref_lead.to_bits();
2502                        let ulp_diff = lead_bits.abs_diff(ref_lead_bits);
2503
2504                        prop_assert!(
2505                            (lead_val - ref_lead).abs() <= 1e-9 || ulp_diff <= 5,
2506                            "Kernel mismatch for lead at [{}]: {} vs {} (ULP={})",
2507                            i,
2508                            lead_val,
2509                            ref_lead,
2510                            ulp_diff
2511                        );
2512                    } else {
2513                        prop_assert_eq!(
2514                            lead_val.is_nan(),
2515                            ref_lead.is_nan(),
2516                            "Kernel NaN mismatch for lead at [{}]",
2517                            i
2518                        );
2519                    }
2520                }
2521
2522                Ok(())
2523            })
2524            .unwrap();
2525
2526        Ok(())
2527    }
2528
2529    #[cfg(feature = "proptest")]
2530    generate_all_msw_tests!(check_msw_property);
2531
2532    fn check_batch_default_row(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2533        skip_if_unsupported!(kernel, test);
2534        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2535        let c = read_candles_from_csv(file)?;
2536        let output = MswBatchBuilder::new()
2537            .kernel(kernel)
2538            .apply_candles(&c, "close")?;
2539        let def = MswParams::default();
2540        let row = output.sine_for(&def).expect("default row missing");
2541        assert_eq!(row.len(), c.close.len());
2542        Ok(())
2543    }
2544
2545    #[cfg(debug_assertions)]
2546    fn check_batch_no_poison(test: &str, kernel: Kernel) -> Result<(), Box<dyn Error>> {
2547        skip_if_unsupported!(kernel, test);
2548
2549        let file = "src/data/2018-09-01-2024-Bitfinex_Spot-4h.csv";
2550        let c = read_candles_from_csv(file)?;
2551
2552        let test_configs = vec![
2553            (2, 10, 2),
2554            (5, 25, 5),
2555            (30, 60, 15),
2556            (2, 5, 1),
2557            (10, 20, 2),
2558            (15, 30, 3),
2559            (50, 100, 10),
2560        ];
2561
2562        for (cfg_idx, &(period_start, period_end, period_step)) in test_configs.iter().enumerate() {
2563            let output = MswBatchBuilder::new()
2564                .kernel(kernel)
2565                .period_range(period_start, period_end, period_step)
2566                .apply_candles(&c, "close")?;
2567
2568            for (idx, &val) in output.sine.iter().enumerate() {
2569                if val.is_nan() {
2570                    continue;
2571                }
2572
2573                let bits = val.to_bits();
2574                let row = idx / output.cols;
2575                let col = idx % output.cols;
2576                let combo = &output.combos[row];
2577
2578                if bits == 0x11111111_11111111 {
2579                    panic!(
2580                        "[{}] Config {}: Found alloc_with_nan_prefix poison value {} (0x{:016X}) \
2581						 at row {} col {} (flat index {}) in sine output with params: period={}",
2582                        test,
2583                        cfg_idx,
2584                        val,
2585                        bits,
2586                        row,
2587                        col,
2588                        idx,
2589                        combo.period.unwrap_or(5)
2590                    );
2591                }
2592
2593                if bits == 0x22222222_22222222 {
2594                    panic!(
2595                        "[{}] Config {}: Found init_matrix_prefixes poison value {} (0x{:016X}) \
2596						 at row {} col {} (flat index {}) in sine output with params: period={}",
2597                        test,
2598                        cfg_idx,
2599                        val,
2600                        bits,
2601                        row,
2602                        col,
2603                        idx,
2604                        combo.period.unwrap_or(5)
2605                    );
2606                }
2607
2608                if bits == 0x33333333_33333333 {
2609                    panic!(
2610                        "[{}] Config {}: Found make_uninit_matrix poison value {} (0x{:016X}) \
2611						 at row {} col {} (flat index {}) in sine output with params: period={}",
2612                        test,
2613                        cfg_idx,
2614                        val,
2615                        bits,
2616                        row,
2617                        col,
2618                        idx,
2619                        combo.period.unwrap_or(5)
2620                    );
2621                }
2622            }
2623
2624            for (idx, &val) in output.lead.iter().enumerate() {
2625                if val.is_nan() {
2626                    continue;
2627                }
2628
2629                let bits = val.to_bits();
2630                let row = idx / output.cols;
2631                let col = idx % output.cols;
2632                let combo = &output.combos[row];
2633
2634                if bits == 0x11111111_11111111 {
2635                    panic!(
2636                        "[{}] Config {}: Found alloc_with_nan_prefix poison value {} (0x{:016X}) \
2637						 at row {} col {} (flat index {}) in lead output with params: period={}",
2638                        test,
2639                        cfg_idx,
2640                        val,
2641                        bits,
2642                        row,
2643                        col,
2644                        idx,
2645                        combo.period.unwrap_or(5)
2646                    );
2647                }
2648
2649                if bits == 0x22222222_22222222 {
2650                    panic!(
2651                        "[{}] Config {}: Found init_matrix_prefixes poison value {} (0x{:016X}) \
2652						 at row {} col {} (flat index {}) in lead output with params: period={}",
2653                        test,
2654                        cfg_idx,
2655                        val,
2656                        bits,
2657                        row,
2658                        col,
2659                        idx,
2660                        combo.period.unwrap_or(5)
2661                    );
2662                }
2663
2664                if bits == 0x33333333_33333333 {
2665                    panic!(
2666                        "[{}] Config {}: Found make_uninit_matrix poison value {} (0x{:016X}) \
2667						 at row {} col {} (flat index {}) in lead output with params: period={}",
2668                        test,
2669                        cfg_idx,
2670                        val,
2671                        bits,
2672                        row,
2673                        col,
2674                        idx,
2675                        combo.period.unwrap_or(5)
2676                    );
2677                }
2678            }
2679        }
2680
2681        Ok(())
2682    }
2683
2684    #[cfg(not(debug_assertions))]
2685    fn check_batch_no_poison(_test: &str, _kernel: Kernel) -> Result<(), Box<dyn Error>> {
2686        Ok(())
2687    }
2688
2689    macro_rules! gen_batch_tests {
2690        ($fn_name:ident) => {
2691            paste::paste! {
2692                #[test] fn [<$fn_name _scalar>]()      {
2693                    let _ = $fn_name(stringify!([<$fn_name _scalar>]), Kernel::ScalarBatch);
2694                }
2695                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
2696                #[test] fn [<$fn_name _avx2>]()        {
2697                    let _ = $fn_name(stringify!([<$fn_name _avx2>]), Kernel::Avx2Batch);
2698                }
2699                #[cfg(all(feature = "nightly-avx", target_arch = "x86_64"))]
2700                #[test] fn [<$fn_name _avx512>]()      {
2701                    let _ = $fn_name(stringify!([<$fn_name _avx512>]), Kernel::Avx512Batch);
2702                }
2703                #[test] fn [<$fn_name _auto_detect>]() {
2704                    let _ = $fn_name(stringify!([<$fn_name _auto_detect>]), Kernel::Auto);
2705                }
2706            }
2707        };
2708    }
2709    gen_batch_tests!(check_batch_default_row);
2710    gen_batch_tests!(check_batch_no_poison);
2711}
2712
2713#[cfg(all(feature = "python", feature = "cuda"))]
2714#[pyfunction(name = "msw_cuda_batch_dev")]
2715#[pyo3(signature = (close_f32, period_range, device_id=0))]
2716pub fn msw_cuda_batch_dev_py<'py>(
2717    py: Python<'py>,
2718    close_f32: numpy::PyReadonlyArray1<'py, f32>,
2719    period_range: (usize, usize, usize),
2720    device_id: usize,
2721) -> PyResult<(DeviceArrayF32Py, Bound<'py, pyo3::types::PyDict>)> {
2722    use numpy::IntoPyArray;
2723    if !cuda_available() {
2724        return Err(PyValueError::new_err("CUDA not available"));
2725    }
2726    let slice = close_f32.as_slice()?;
2727    let sweep = MswBatchRange {
2728        period: period_range,
2729    };
2730    let (inner, combos) = py.allow_threads(|| {
2731        let cuda = CudaMsw::new(device_id).map_err(|e| PyValueError::new_err(e.to_string()))?;
2732        cuda.msw_batch_dev(slice, &sweep)
2733            .map_err(|e| PyValueError::new_err(e.to_string()))
2734    })?;
2735    let handle = make_device_array_py(device_id, inner)?;
2736    let dict = pyo3::types::PyDict::new(py);
2737    dict.set_item(
2738        "periods",
2739        combos
2740            .iter()
2741            .map(|p| p.period.unwrap() as u64)
2742            .collect::<Vec<_>>()
2743            .into_pyarray(py),
2744    )?;
2745    dict.set_item("rows", 2 * combos.len())?;
2746    dict.set_item("cols", slice.len())?;
2747    Ok((handle, dict))
2748}
2749
2750#[cfg(all(feature = "python", feature = "cuda"))]
2751#[pyfunction(name = "msw_cuda_many_series_one_param_dev")]
2752#[pyo3(signature = (data_tm_f32, period, device_id=0))]
2753pub fn msw_cuda_many_series_one_param_dev_py<'py>(
2754    py: Python<'py>,
2755    data_tm_f32: numpy::PyReadonlyArray2<'py, f32>,
2756    period: usize,
2757    device_id: usize,
2758) -> PyResult<DeviceArrayF32Py> {
2759    use numpy::PyUntypedArrayMethods;
2760    if !cuda_available() {
2761        return Err(PyValueError::new_err("CUDA not available"));
2762    }
2763    let shape = data_tm_f32.shape();
2764    if shape.len() != 2 {
2765        return Err(PyValueError::new_err("expected 2D array (rows x cols)"));
2766    }
2767    let rows = shape[0];
2768    let cols = shape[1];
2769    let flat = data_tm_f32.as_slice()?;
2770    let params = MswParams {
2771        period: Some(period),
2772    };
2773    let inner = py.allow_threads(|| {
2774        let cuda = CudaMsw::new(device_id).map_err(|e| PyValueError::new_err(e.to_string()))?;
2775        cuda.msw_many_series_one_param_time_major_dev(flat, cols, rows, &params)
2776            .map_err(|e| PyValueError::new_err(e.to_string()))
2777    })?;
2778    Ok(make_device_array_py(device_id, inner)?)
2779}