1include!(concat!(env!("OUT_DIR"), "/simd_lanes.rs"));
34
35#[cfg(feature = "simd")]
36use std::simd::{Mask, Select, Simd, num::SimdFloat};
37
38use minarrow::{Bitmask, FloatArray, Vec64};
39
40#[cfg(feature = "simd")]
41use crate::kernels::aggregate::neumaier_simd_add;
42use crate::kernels::aggregate::{neumaier_add, sum_squares};
43use crate::utils::has_nulls;
44use minarrow::enums::error::KernelError;
45
46#[cfg(feature = "simd")]
47use crate::utils::bitmask_to_simd_mask;
48
49#[cfg(feature = "simd")]
50use minarrow::utils::is_simd_aligned;
51
52#[inline(always)]
56pub fn dot(v1: &[f64], v2: &[f64], null_mask: Option<&Bitmask>, null_count: Option<usize>) -> f64 {
57 assert_eq!(v1.len(), v2.len(), "dot: input length mismatch");
58
59 let len = v1.len();
60
61 #[cfg(feature = "simd")]
62 if is_simd_aligned(v1) && is_simd_aligned(v2) {
63 let needs_nulls = has_nulls(null_count, null_mask);
64 const N: usize = W64;
65 let mut sum_v = Simd::<f64, N>::splat(0.0);
66 let mut comp_v = Simd::<f64, N>::splat(0.0);
67 let mut i = 0;
68 if !needs_nulls {
69 while i + N <= len {
71 let a = Simd::<f64, N>::from_slice(&v1[i..i + N]);
72 let b = Simd::<f64, N>::from_slice(&v2[i..i + N]);
73 neumaier_simd_add(&mut sum_v, &mut comp_v, a * b);
74 i += N;
75 }
76 } else {
77 let mask_bytes = null_mask.unwrap().as_bytes();
79 while i + N <= len {
80 let a = Simd::<f64, N>::from_slice(&v1[i..i + N]);
81 let b = Simd::<f64, N>::from_slice(&v2[i..i + N]);
82 let prod = a * b;
83 let lane_mask: Mask<i64, N> = bitmask_to_simd_mask::<N, i64>(mask_bytes, i, len);
84 neumaier_simd_add(
85 &mut sum_v,
86 &mut comp_v,
87 lane_mask.select(prod, Simd::splat(0.0)),
88 );
89 i += N;
90 }
91 }
92 let mut acc = (sum_v + comp_v).reduce_sum();
94 for idx in i..len {
95 if !needs_nulls || unsafe { null_mask.unwrap().get_unchecked(idx) } {
96 acc += v1[idx] * v2[idx];
97 }
98 }
99 return acc;
100 }
101
102 let needs_nulls = has_nulls(null_count, null_mask);
104 let mut acc = 0.0_f64;
105 let mut comp = 0.0_f64;
106 if !needs_nulls {
107 for i in 0..len {
108 neumaier_add(&mut acc, &mut comp, v1[i] * v2[i]);
109 }
110 } else {
111 let mb = null_mask.unwrap();
112 for i in 0..len {
113 if unsafe { mb.get_unchecked(i) } {
114 neumaier_add(&mut acc, &mut comp, v1[i] * v2[i]);
115 }
116 }
117 }
118 acc + comp
119}
120
121#[inline(always)]
123pub fn argsort(
124 v: &[f64],
125 mask: Option<&Bitmask>,
126 null_count: Option<usize>,
127 descending: bool,
128) -> Vec64<usize> {
129 let mut idx: Vec64<usize> = (0..v.len()).collect();
130 if !has_nulls(null_count, mask) {
131 if descending {
132 idx.sort_unstable_by(|&i, &j| {
133 v[j].partial_cmp(&v[i]).unwrap_or(std::cmp::Ordering::Equal)
134 });
135 } else {
136 idx.sort_unstable_by(|&i, &j| {
137 v[i].partial_cmp(&v[j]).unwrap_or(std::cmp::Ordering::Equal)
138 });
139 }
140 } else {
141 let mb = mask.expect("argsort: mask required when nulls present");
142 idx.sort_unstable_by(|&i, &j| {
143 let vi_null = !unsafe { mb.get_unchecked(i) };
144 let vj_null = !unsafe { mb.get_unchecked(j) };
145 match (vi_null, vj_null) {
146 (true, true) => std::cmp::Ordering::Equal,
147 (true, false) => std::cmp::Ordering::Greater, (false, true) => std::cmp::Ordering::Less,
149 (false, false) => {
150 if descending {
151 v[j].partial_cmp(&v[i]).unwrap_or(std::cmp::Ordering::Equal)
152 } else {
153 v[i].partial_cmp(&v[j]).unwrap_or(std::cmp::Ordering::Equal)
154 }
155 }
156 }
157 });
158 }
159 idx
160}
161
162#[inline(always)]
164pub fn histogram(
165 v: &[f64],
166 bins: &[f64],
167 mask: Option<&Bitmask>,
168 null_count: Option<usize>,
169) -> Vec<usize> {
170 assert!(!bins.is_empty(), "histogram: bins must be non-empty");
171 let has_nulls = match null_count {
172 Some(n) => n > 0,
173 None => mask.is_some(),
174 };
175
176 let bins_sorted = bins.windows(2).all(|w| w[0] <= w[1]);
177 let mut counts = vec![0; bins.len() + 1];
178 if !has_nulls {
179 if bins_sorted {
180 for &x in v {
181 match bins.binary_search_by(|b| b.partial_cmp(&x).unwrap()) {
182 Ok(pos) | Err(pos) => counts[pos] += 1,
183 }
184 }
185 } else {
186 for &x in v {
187 let mut placed = false;
188 for (i, &b) in bins.iter().enumerate() {
189 if x < b {
190 counts[i] += 1;
191 placed = true;
192 break;
193 }
194 }
195 if !placed {
196 counts[bins.len()] += 1;
197 }
198 }
199 }
200 } else {
201 let mb = mask.expect("histogram: mask required when nulls present");
202 if bins_sorted {
203 for (i, &x) in v.iter().enumerate() {
204 if unsafe { mb.get_unchecked(i) } {
205 match bins.binary_search_by(|b| b.partial_cmp(&x).unwrap()) {
206 Ok(pos) | Err(pos) => counts[pos] += 1,
207 }
208 }
209 }
210 } else {
211 for (i, &x) in v.iter().enumerate() {
212 if unsafe { mb.get_unchecked(i) } {
213 let mut placed = false;
214 for (j, &b) in bins.iter().enumerate() {
215 if x < b {
216 counts[j] += 1;
217 placed = true;
218 break;
219 }
220 }
221 if !placed {
222 counts[bins.len()] += 1;
223 }
224 }
225 }
226 }
227 }
228 counts
229}
230
231#[inline(always)]
233pub fn reservoir_sample(
234 v: &[f64],
235 k: usize,
236 mask: Option<&Bitmask>,
237 null_count: Option<usize>,
238) -> FloatArray<f64> {
239 use rand::prelude::*;
240 assert!(k > 0, "reservoir_sample: k must be positive");
241 let has_nulls = match null_count {
242 Some(n) => n > 0,
243 None => mask.is_some(),
244 };
245
246 let mut rng = rand::rng();
247 let mut out = Vec64::with_capacity(k);
248 let mut seen = 0usize;
249
250 for (idx, &val) in v.iter().enumerate() {
251 if has_nulls && !unsafe { mask.unwrap().get_unchecked(idx) } {
252 continue;
253 }
254 if out.len() < k {
255 out.push(val);
256 } else {
257 let j = rng.random_range(0..=seen);
258 if j < k {
259 out[j] = val;
260 }
261 }
262 seen += 1;
263 }
264 assert_eq!(out.len(), k, "reservoir_sample: not enough valid values");
265 FloatArray::from_vec64(out, None)
266}
267
268#[inline(always)]
273pub fn scale(v: &mut [f64], alpha: f64, null_mask: Option<&Bitmask>, null_count: Option<usize>) {
274 #[cfg(feature = "simd")]
275 if is_simd_aligned(v) {
276 const N: usize = W64;
277 let len = v.len();
278 let alpha_v = Simd::<f64, N>::splat(alpha);
279 let mut i = 0;
280
281 if !has_nulls(null_count, null_mask) {
282 while i + N <= len {
284 let chunk = Simd::from_slice(&v[i..i + N]);
285 let scaled = chunk * alpha_v;
286 v[i..i + N].copy_from_slice(&scaled.to_array());
287 i += N;
288 }
289 for x in &mut v[i..] {
291 *x *= alpha;
292 }
293 } else {
294 let mb = null_mask.expect("scale: mask required when nulls present");
296 let mask_bytes = mb.as_bytes();
297
298 while i + N <= len {
300 let chunk = Simd::from_slice(&v[i..i + N]);
301 let lane_mask: Mask<i64, N> = bitmask_to_simd_mask::<N, i64>(mask_bytes, i, len);
303 let result = lane_mask.select(chunk * alpha_v, chunk);
305 v[i..i + N].copy_from_slice(&result.to_array());
306 i += N;
307 }
308 for idx in i..len {
310 if unsafe { mb.get_unchecked(idx) } {
311 v[idx] *= alpha;
312 }
313 }
314 }
315 return;
316 }
317
318 if !has_nulls(null_count, null_mask) {
320 for x in v {
321 *x *= alpha;
322 }
323 } else {
324 let mb = null_mask.expect("scale: mask required when nulls present");
325 for (i, x) in v.iter_mut().enumerate() {
326 if unsafe { mb.get_unchecked(i) } {
327 *x *= alpha;
328 }
329 }
330 }
331}
332
333#[inline(always)]
341pub fn scale_vec(
342 n: i32,
343 alpha: f64,
344 x: &mut [f64],
345 incx: i32,
346 null_mask: Option<&Bitmask>,
347 null_count: Option<usize>,
348) -> Result<(), KernelError> {
349 if n < 0 {
350 return Err(KernelError::InvalidArguments(
351 "n must be non-negative".into(),
352 ));
353 }
354 let n = n as usize;
355 let incx = incx as usize;
356 if incx == 0 {
357 return Err(KernelError::InvalidArguments(
358 "incx must be positive".into(),
359 ));
360 }
361 if n == 0 {
362 return Ok(());
363 }
364 if (n - 1).saturating_mul(incx) >= x.len() {
365 return Err(KernelError::InvalidArguments(
366 "indexing out of bounds".into(),
367 ));
368 }
369
370 let mask_present = has_nulls(null_count, null_mask);
371
372 #[cfg(feature = "simd")]
373 if is_simd_aligned(x) {
374 const LANES: usize = W64;
375 let alpha_v = Simd::<f64, LANES>::splat(alpha);
376
377 if incx == 1 && !mask_present {
379 let mut i = 0;
381 while i + LANES <= n {
382 let chunk = Simd::from_slice(&x[i..i + LANES]);
383 let out_v = chunk * alpha_v;
384 x[i..i + LANES].copy_from_slice(&out_v.to_array());
385 i += LANES;
386 }
387 for xi in &mut x[i..n] {
388 *xi *= alpha;
389 }
390 return Ok(());
391 }
392 if incx == 1 && mask_present {
393 let mb = null_mask.unwrap();
395 let bytes = mb.as_bytes();
396 let mut i = 0;
397 while i + LANES <= n {
398 let lane_mask: Mask<i64, LANES> = bitmask_to_simd_mask::<LANES, i64>(bytes, i, n);
400 let chunk = Simd::from_slice(&x[i..i + LANES]);
401 let scaled = chunk * alpha_v;
402 let out_v = lane_mask.select(scaled, chunk);
404 x[i..i + LANES].copy_from_slice(&out_v.to_array());
405 i += LANES;
406 }
407 for idx in i..n {
409 if unsafe { mb.get_unchecked(idx) } {
410 x[idx] *= alpha;
411 }
412 }
413 return Ok(());
414 }
415 }
416
417 if !mask_present {
419 let mut idx = 0;
420 for _ in 0..n {
421 x[idx] *= alpha;
422 idx += incx;
423 }
424 } else {
425 let mb = null_mask.unwrap();
426 let mut idx = 0;
427 for _ in 0..n {
428 if unsafe { mb.get_unchecked(idx) } {
429 x[idx] *= alpha;
430 }
431 idx += incx;
432 }
433 }
434 Ok(())
435}
436
437#[inline(always)]
442pub fn axpy(
443 n: i32,
444 alpha: f64,
445 x: &[f64],
446 incx: i32,
447 y: &mut [f64],
448 incy: i32,
449 null_mask: Option<&Bitmask>,
450 null_count: Option<usize>,
451) -> Result<(), KernelError> {
452 if n < 0 {
453 return Err(KernelError::InvalidArguments(
454 "n must be non-negative".into(),
455 ));
456 }
457 let n = n as usize;
458 let incx = incx as usize;
459 let incy = incy as usize;
460 if incx == 0 || incy == 0 {
461 return Err(KernelError::InvalidArguments(
462 "increments must be positive".into(),
463 ));
464 }
465 if n == 0 {
466 return Ok(());
467 }
468 if (n - 1).saturating_mul(incx) >= x.len() {
469 return Err(KernelError::InvalidArguments("x out of bounds".into()));
470 }
471 if (n - 1).saturating_mul(incy) >= y.len() {
472 return Err(KernelError::InvalidArguments("y out of bounds".into()));
473 }
474
475 let mask_present = has_nulls(null_count, null_mask);
476
477 #[cfg(feature = "simd")]
478 if is_simd_aligned(x) {
479 const LANES: usize = W64;
480 let alpha_v = Simd::<f64, LANES>::splat(alpha);
481
482 if incx == 1 && incy == 1 && !mask_present {
484 let mut i = 0;
486 while i + LANES <= n {
487 let xv = Simd::from_slice(&x[i..i + LANES]);
488 let yv = Simd::from_slice(&y[i..i + LANES]);
489 let out_v = alpha_v * xv + yv;
490 y[i..i + LANES].copy_from_slice(&out_v.to_array());
491 i += LANES;
492 }
493 for j in i..n {
494 y[j] += alpha * x[j];
495 }
496 return Ok(());
497 }
498 if incx == 1 && incy == 1 && mask_present {
499 let mb = null_mask.unwrap();
501 let bytes = mb.as_bytes();
502 let mut i = 0;
503 while i + LANES <= n {
504 let lane_mask: Mask<i64, LANES> = bitmask_to_simd_mask::<LANES, i64>(bytes, i, n);
505 let xv = Simd::from_slice(&x[i..i + LANES]);
506 let yv = Simd::from_slice(&y[i..i + LANES]);
507 let computed = alpha_v * xv + yv;
508 let out_v = lane_mask.select(computed, yv);
509 y[i..i + LANES].copy_from_slice(&out_v.to_array());
510 i += LANES;
511 }
512 for idx in i..n {
513 if unsafe { mb.get_unchecked(idx) } {
514 y[idx] += alpha * x[idx];
515 }
516 }
517 return Ok(());
518 }
519 }
520
521 if !mask_present {
523 let mut ix = 0;
524 let mut iy = 0;
525 for _ in 0..n {
526 y[iy] += alpha * x[ix];
527 ix += incx;
528 iy += incy;
529 }
530 } else {
531 let mb = null_mask.unwrap();
532 let mut ix = 0;
533 let mut iy = 0;
534 for _ in 0..n {
535 if unsafe { mb.get_unchecked(ix) } {
536 y[iy] += alpha * x[ix];
537 }
538 ix += incx;
539 iy += incy;
540 }
541 }
542 Ok(())
543}
544
545#[inline(always)]
547pub fn l2_norm(v: &[f64], mask: Option<&Bitmask>, null_count: Option<usize>) -> f64 {
548 sum_squares(v, mask, null_count).sqrt()
549}
550
551#[inline(always)]
556pub fn vector_norm(
557 n: i32,
558 x: &[f64],
559 incx: i32,
560 null_mask: Option<&Bitmask>,
561 null_count: Option<usize>,
562) -> Result<f64, KernelError> {
563 if n < 0 {
564 return Err(KernelError::InvalidArguments(
565 "n must be non-negative".into(),
566 ));
567 }
568 if incx <= 0 {
569 return Err(KernelError::InvalidArguments(
570 "incx must be positive".into(),
571 ));
572 }
573 let n = n as usize;
574 let incx = incx as usize;
575 if n == 0 {
576 return Ok(0.0);
577 }
578 if (n - 1).saturating_mul(incx) >= x.len() {
579 return Err(KernelError::InvalidArguments(
580 "indexing out of bounds for x".into(),
581 ));
582 }
583
584 let mask_present = has_nulls(null_count, null_mask);
585
586 #[cfg(feature = "simd")]
587 if is_simd_aligned(x) {
588 use crate::utils::bitmask_to_simd_mask;
589 use core::simd::Simd;
590
591 const LANES: usize = W64;
592
593 if incx == 1 && !mask_present {
595 let mut sum_v = Simd::<f64, LANES>::splat(0.0);
597 let mut comp_v = Simd::<f64, LANES>::splat(0.0);
598 let mut i = 0;
599 while i + LANES <= n {
600 let v = Simd::from_slice(&x[i..i + LANES]);
601 neumaier_simd_add(&mut sum_v, &mut comp_v, v * v);
602 i += LANES;
603 }
604 let mut sumsq = (sum_v + comp_v).reduce_sum();
606 for &xi in &x[i..n] {
607 sumsq += xi * xi;
608 }
609 return Ok(sumsq.sqrt());
610 }
611
612 if incx == 1 && mask_present {
613 let mb = null_mask.unwrap();
615 let bytes = mb.as_bytes();
616 let mut sum_v = Simd::<f64, LANES>::splat(0.0);
617 let mut comp_v = Simd::<f64, LANES>::splat(0.0);
618 let mut i = 0;
619 while i + LANES <= n {
620 let lane_mask: Mask<i64, LANES> = bitmask_to_simd_mask::<LANES, i64>(bytes, i, n);
621 let v = Simd::from_slice(&x[i..i + LANES]);
622 let v = lane_mask.select(v, Simd::splat(0.0));
623 neumaier_simd_add(&mut sum_v, &mut comp_v, v * v);
624 i += LANES;
625 }
626 let mut sumsq = (sum_v + comp_v).reduce_sum();
628 for idx in i..n {
629 if unsafe { mb.get_unchecked(idx) } {
630 sumsq += x[idx] * x[idx];
631 }
632 }
633 return Ok(sumsq.sqrt());
634 }
635 }
636
637 let mut sumsq = 0.0_f64;
639 let mut comp = 0.0_f64;
640 if !mask_present {
641 let mut idx = 0;
642 for _ in 0..n {
643 let xi = x[idx];
644 neumaier_add(&mut sumsq, &mut comp, xi * xi);
645 idx += incx;
646 }
647 } else {
648 let mb = null_mask.unwrap();
649 let mut idx = 0;
650 for _ in 0..n {
651 if unsafe { mb.get_unchecked(idx) } {
652 let xi = x[idx];
653 neumaier_add(&mut sumsq, &mut comp, xi * xi);
654 }
655 idx += incx;
656 }
657 }
658 Ok((sumsq + comp).sqrt())
659}
660
661#[cfg(test)]
662mod tests {
663 use minarrow::{Bitmask, vec64};
664
665 use super::*;
666
667 fn make_mask(bits: &[bool]) -> Bitmask {
668 Bitmask::from_bools(bits)
669 }
670
671 #[test]
672 fn test_dot_no_nulls() {
673 let v1 = vec64![1.0, 2.0, 3.0, 4.0];
674 let v2 = vec64![2.0, 0.5, 1.0, -1.0];
675 let expected = 1.0 * 2.0 + 2.0 * 0.5 + 3.0 * 1.0 + 4.0 * (-1.0);
676 assert_eq!(dot(v1.as_slice(), v2.as_slice(), None, None), expected);
677
678 assert_eq!(dot(&v1, &v2, None, Some(0)), expected);
680 }
681
682 #[test]
683 fn test_dot_with_nulls() {
684 let v1 = vec64![1.0, 2.0, 3.0, 4.0];
685 let v2 = vec64![2.0, 0.5, 1.0, -1.0];
686 let mask = make_mask(&[true, false, true, false]);
687 let null_count = 2;
688 let expected = 1.0 * 2.0 + 3.0 * 1.0;
690 assert_eq!(dot(&v1, &v2, Some(&mask), Some(null_count)), expected);
691
692 assert_eq!(dot(&v1, &v2, Some(&mask), None), expected);
694 }
695
696 #[test]
697 fn test_dot_all_nulls() {
698 let v1 = vec64![1.0, 2.0, 3.0, 4.0];
699 let v2 = vec64![2.0, 0.5, 1.0, -1.0];
700 let mask = make_mask(&[false, false, false, false]);
701 let expected = 0.0;
702 assert_eq!(dot(&v1, &v2, Some(&mask), Some(4)), expected);
703 }
704
705 #[test]
706 fn test_argsort_no_nulls() {
707 let v = vec64![5.0, 2.0, 1.0, 4.0];
708 assert_eq!(argsort(&v, None, None, false), vec64![2, 1, 3, 0]);
709 assert_eq!(argsort(&v, None, None, true), vec64![0, 3, 1, 2]);
710 }
711
712 #[test]
713 fn test_argsort_with_nulls() {
714 let v = vec64![5.0, 2.0, 1.0, 4.0, 3.0];
715 let mask = make_mask(&[true, false, true, true, false]);
716 assert_eq!(
718 argsort(&v, Some(&mask), Some(2), false),
719 vec64![2, 3, 0, 1, 4]
720 );
721 assert_eq!(
722 argsort(&v, Some(&mask), Some(2), true),
723 vec64![0, 3, 2, 1, 4]
724 );
725 }
726
727 #[test]
728 fn test_histogram_no_nulls() {
729 let v = vec64![1.0, 2.0, 4.0, 6.0];
730 let bins = [2.5, 5.0];
731 assert_eq!(histogram(&v, &bins, None, None), vec![2, 1, 1]);
734 }
735
736 #[test]
737 fn test_histogram_with_nulls() {
738 let v = vec64![1.0, 2.0, 4.0, 6.0];
739 let bins = [2.5, 5.0];
740 let mask = make_mask(&[false, true, true, false]);
741 assert_eq!(histogram(&v, &bins, Some(&mask), Some(2)), vec![1, 1, 0]);
743 }
744
745 #[test]
746 fn test_histogram_all_nulls() {
747 let v = vec64![1.0, 2.0, 4.0, 6.0];
748 let bins = [2.5, 5.0];
749 let mask = make_mask(&[false, false, false, false]);
750 assert_eq!(histogram(&v, &bins, Some(&mask), Some(4)), vec![0, 0, 0]);
751 }
752
753 #[test]
754 fn test_reservoir_sample_basic() {
755 let v = vec64![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
756 let arr = reservoir_sample(&v, 3, None, None);
757 assert_eq!(arr.data.len(), 3);
758 for &x in arr.data.iter() {
760 assert!(v.contains(&x));
761 }
762 }
763
764 #[test]
765 fn test_reservoir_sample_with_nulls() {
766 let v = vec64![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
767 let mask = make_mask(&[false, true, false, true, false, true]);
768 let arr = reservoir_sample(&v, 2, Some(&mask), Some(3));
769 for &x in arr.data.iter() {
771 assert!(x == 2.0 || x == 4.0 || x == 6.0);
772 }
773 }
774
775 #[test]
776 fn test_scale_no_nulls() {
777 let mut v = vec64![1.0, 2.0, 3.0];
778 scale(&mut v, 3.0, None, None);
779 assert_eq!(*v, [3.0, 6.0, 9.0]);
780 }
781
782 #[test]
783 fn test_scale_with_nulls() {
784 let mut v = vec64![1.0, 2.0, 3.0];
785 let mask = make_mask(&[false, true, true]);
786 scale(&mut v, 4.0, Some(&mask), Some(1));
787 assert_eq!(*v, [1.0, 8.0, 12.0]);
789 }
790
791 #[test]
792 fn test_scale_vec_no_nulls() {
793 let mut v = vec64![2.0, 4.0, 8.0, 16.0];
794 let r = scale_vec(2, 0.5, &mut v, 2, None, None);
795 assert!(r.is_ok());
796 assert_eq!(*v, [1.0, 4.0, 4.0, 16.0]);
797 }
798
799 #[test]
800 fn test_scale_vec_with_nulls() {
801 let mut v = vec64![2.0, 4.0, 8.0, 16.0];
802 let mask = make_mask(&[false, true, true, false]);
803 let r = scale_vec(2, 0.5, &mut v, 2, Some(&mask), Some(2));
804 assert!(r.is_ok());
805 assert_eq!(*v, [2.0, 4.0, 4.0, 16.0]);
807 }
808
809 #[test]
810 fn test_axpy_no_nulls() {
811 let mut y = vec64![10.0, 20.0, 30.0, 40.0];
812 let x = vec64![1.0, 2.0, 3.0, 4.0];
813 let r = axpy(4, 2.0, &x, 1, &mut y, 1, None, None);
814 assert!(r.is_ok());
815 assert_eq!(*y, [12.0, 24.0, 36.0, 48.0]);
816 }
817
818 #[test]
819 fn test_axpy_with_nulls() {
820 let mut y = vec64![10.0, 20.0, 30.0, 40.0];
821 let x = vec64![1.0, 2.0, 3.0, 4.0];
822 let mask = make_mask(&[false, true, false, true]);
823 let r = axpy(4, 2.0, &x, 1, &mut y, 1, Some(&mask), Some(2));
824 assert!(r.is_ok());
825 assert_eq!(*y, [10.0, 24.0, 30.0, 48.0]);
827 }
828
829 #[test]
830 fn test_l2_norm_no_nulls() {
831 let v = vec64![3.0, 4.0];
832 let n = l2_norm(&v, None, None);
833 assert!((n - 5.0).abs() < 1e-12);
834 }
835
836 #[test]
837 fn test_l2_norm_with_nulls() {
838 let v = vec64![3.0, 4.0];
839 let mask = make_mask(&[false, true]);
840 let n = l2_norm(&v, Some(&mask), Some(1));
841 assert!((n - 4.0).abs() < 1e-12);
842 }
843
844 #[test]
845 fn test_vector_norm_no_nulls() {
846 let v = vec64![3.0, 0.0, 4.0];
847 let n = vector_norm(3, &v, 1, None, None).unwrap();
848 assert!((n - 5.0).abs() < 1e-12);
849 }
850
851 #[test]
852 fn test_vector_norm_with_nulls() {
853 let v = vec64![3.0, 0.0, 4.0];
854 let mask = make_mask(&[true, false, true]);
855 let n = vector_norm(3, &v, 1, Some(&mask), Some(1)).unwrap();
856 assert!((n - 5.0).abs() < 1e-12);
858 }
859
860 #[test]
861 fn test_vector_norm_all_nulls() {
862 let v = vec64![3.0, 0.0, 4.0];
863 let mask = make_mask(&[false, false, false]);
864 let n = vector_norm(3, &v, 1, Some(&mask), Some(3)).unwrap();
865 assert_eq!(n, 0.0);
866 }
867
868 #[test]
876 fn test_dot_large_compensated_accuracy() {
877 let n = 1_000_000;
878 let offset = 1e12;
879
880 let v1: Vec64<f64> = (0..n).map(|_| offset + 1.0).collect();
884 let v2: Vec64<f64> = (0..n).map(|_| 1.0).collect();
885
886 let result = dot(&v1, &v2, None, None);
887 let exact = n as f64 * (offset + 1.0);
888
889 let ulps = ((result - exact) / exact).abs() / f64::EPSILON;
892 assert!(
893 ulps < 4.0,
894 "dot product off by {ulps:.1} ULPs: got {result}, expected {exact}",
895 );
896 }
897
898 #[test]
901 fn test_dot_large_cancellation() {
902 let n = 1_000_000;
903
904 let v1: Vec64<f64> = (0..n)
907 .map(|i| if i % 2 == 0 { 1e8 } else { -1e8 })
908 .collect();
909 let v2: Vec64<f64> = (0..n).map(|_| 1.0).collect();
910
911 let result = dot(&v1, &v2, None, None);
912 assert!(
914 result.abs() < 1e-4,
915 "dot cancellation failed: got {result}, expected 0.0",
916 );
917 }
918
919 #[test]
922 fn test_l2_norm_large_compensated_accuracy() {
923 let n = 1_000_000;
924
925 let v: Vec64<f64> = (0..n).map(|_| 1.0).collect();
927
928 let result = l2_norm(&v, None, None);
929 let exact = (n as f64).sqrt();
930
931 let ulps = ((result - exact) / exact).abs() / f64::EPSILON;
932 assert!(
933 ulps < 4.0,
934 "l2_norm off by {ulps:.1} ULPs: got {result}, expected {exact}",
935 );
936 }
937}