1include!(concat!(env!("OUT_DIR"), "/simd_lanes.rs"));
34
35#[cfg(feature = "simd")]
36use std::simd::{Mask, Select, Simd, num::SimdFloat};
37
38use minarrow::{Bitmask, FloatArray, Vec64};
39
40use crate::kernels::aggregate::{neumaier_add, sum_squares};
41#[cfg(feature = "simd")]
42use crate::kernels::aggregate::neumaier_simd_add;
43use crate::utils::has_nulls;
44use minarrow::enums::error::KernelError;
45
46#[cfg(feature = "simd")]
47use crate::utils::bitmask_to_simd_mask;
48
49#[cfg(feature = "simd")]
50use minarrow::utils::is_simd_aligned;
51
52#[inline(always)]
56pub fn dot(v1: &[f64], v2: &[f64], null_mask: Option<&Bitmask>, null_count: Option<usize>) -> f64 {
57 assert_eq!(v1.len(), v2.len(), "dot: input length mismatch");
58
59 let len = v1.len();
60
61 #[cfg(feature = "simd")]
62 if is_simd_aligned(v1) && is_simd_aligned(v2) {
63 let needs_nulls = has_nulls(null_count, null_mask);
64 const N: usize = W64;
65 let mut sum_v = Simd::<f64, N>::splat(0.0);
66 let mut comp_v = Simd::<f64, N>::splat(0.0);
67 let mut i = 0;
68 if !needs_nulls {
69 while i + N <= len {
71 let a = Simd::<f64, N>::from_slice(&v1[i..i + N]);
72 let b = Simd::<f64, N>::from_slice(&v2[i..i + N]);
73 neumaier_simd_add(&mut sum_v, &mut comp_v, a * b);
74 i += N;
75 }
76 } else {
77 let mask_bytes = null_mask.unwrap().as_bytes();
79 while i + N <= len {
80 let a = Simd::<f64, N>::from_slice(&v1[i..i + N]);
81 let b = Simd::<f64, N>::from_slice(&v2[i..i + N]);
82 let prod = a * b;
83 let lane_mask: Mask<i64, N> = bitmask_to_simd_mask::<N, i64>(mask_bytes, i, len);
84 neumaier_simd_add(&mut sum_v, &mut comp_v, lane_mask.select(prod, Simd::splat(0.0)));
85 i += N;
86 }
87 }
88 let mut acc = (sum_v + comp_v).reduce_sum();
90 for idx in i..len {
91 if !needs_nulls || unsafe { null_mask.unwrap().get_unchecked(idx) } {
92 acc += v1[idx] * v2[idx];
93 }
94 }
95 return acc;
96 }
97
98 let needs_nulls = has_nulls(null_count, null_mask);
100 let mut acc = 0.0_f64;
101 let mut comp = 0.0_f64;
102 if !needs_nulls {
103 for i in 0..len {
104 neumaier_add(&mut acc, &mut comp, v1[i] * v2[i]);
105 }
106 } else {
107 let mb = null_mask.unwrap();
108 for i in 0..len {
109 if unsafe { mb.get_unchecked(i) } {
110 neumaier_add(&mut acc, &mut comp, v1[i] * v2[i]);
111 }
112 }
113 }
114 acc + comp
115}
116
117#[inline(always)]
119pub fn argsort(
120 v: &[f64],
121 mask: Option<&Bitmask>,
122 null_count: Option<usize>,
123 descending: bool,
124) -> Vec64<usize> {
125 let mut idx: Vec64<usize> = (0..v.len()).collect();
126 if !has_nulls(null_count, mask) {
127 if descending {
128 idx.sort_unstable_by(|&i, &j| {
129 v[j].partial_cmp(&v[i]).unwrap_or(std::cmp::Ordering::Equal)
130 });
131 } else {
132 idx.sort_unstable_by(|&i, &j| {
133 v[i].partial_cmp(&v[j]).unwrap_or(std::cmp::Ordering::Equal)
134 });
135 }
136 } else {
137 let mb = mask.expect("argsort: mask required when nulls present");
138 idx.sort_unstable_by(|&i, &j| {
139 let vi_null = !unsafe { mb.get_unchecked(i) };
140 let vj_null = !unsafe { mb.get_unchecked(j) };
141 match (vi_null, vj_null) {
142 (true, true) => std::cmp::Ordering::Equal,
143 (true, false) => std::cmp::Ordering::Greater, (false, true) => std::cmp::Ordering::Less,
145 (false, false) => {
146 if descending {
147 v[j].partial_cmp(&v[i]).unwrap_or(std::cmp::Ordering::Equal)
148 } else {
149 v[i].partial_cmp(&v[j]).unwrap_or(std::cmp::Ordering::Equal)
150 }
151 }
152 }
153 });
154 }
155 idx
156}
157
158#[inline(always)]
160pub fn histogram(
161 v: &[f64],
162 bins: &[f64],
163 mask: Option<&Bitmask>,
164 null_count: Option<usize>,
165) -> Vec<usize> {
166 assert!(!bins.is_empty(), "histogram: bins must be non-empty");
167 let has_nulls = match null_count {
168 Some(n) => n > 0,
169 None => mask.is_some(),
170 };
171
172 let bins_sorted = bins.windows(2).all(|w| w[0] <= w[1]);
173 let mut counts = vec![0; bins.len() + 1];
174 if !has_nulls {
175 if bins_sorted {
176 for &x in v {
177 match bins.binary_search_by(|b| b.partial_cmp(&x).unwrap()) {
178 Ok(pos) | Err(pos) => counts[pos] += 1,
179 }
180 }
181 } else {
182 for &x in v {
183 let mut placed = false;
184 for (i, &b) in bins.iter().enumerate() {
185 if x < b {
186 counts[i] += 1;
187 placed = true;
188 break;
189 }
190 }
191 if !placed {
192 counts[bins.len()] += 1;
193 }
194 }
195 }
196 } else {
197 let mb = mask.expect("histogram: mask required when nulls present");
198 if bins_sorted {
199 for (i, &x) in v.iter().enumerate() {
200 if unsafe { mb.get_unchecked(i) } {
201 match bins.binary_search_by(|b| b.partial_cmp(&x).unwrap()) {
202 Ok(pos) | Err(pos) => counts[pos] += 1,
203 }
204 }
205 }
206 } else {
207 for (i, &x) in v.iter().enumerate() {
208 if unsafe { mb.get_unchecked(i) } {
209 let mut placed = false;
210 for (j, &b) in bins.iter().enumerate() {
211 if x < b {
212 counts[j] += 1;
213 placed = true;
214 break;
215 }
216 }
217 if !placed {
218 counts[bins.len()] += 1;
219 }
220 }
221 }
222 }
223 }
224 counts
225}
226
227#[inline(always)]
229pub fn reservoir_sample(
230 v: &[f64],
231 k: usize,
232 mask: Option<&Bitmask>,
233 null_count: Option<usize>,
234) -> FloatArray<f64> {
235 use rand::prelude::*;
236 assert!(k > 0, "reservoir_sample: k must be positive");
237 let has_nulls = match null_count {
238 Some(n) => n > 0,
239 None => mask.is_some(),
240 };
241
242 let mut rng = rand::rng();
243 let mut out = Vec64::with_capacity(k);
244 let mut seen = 0usize;
245
246 for (idx, &val) in v.iter().enumerate() {
247 if has_nulls && !unsafe { mask.unwrap().get_unchecked(idx) } {
248 continue;
249 }
250 if out.len() < k {
251 out.push(val);
252 } else {
253 let j = rng.random_range(0..=seen);
254 if j < k {
255 out[j] = val;
256 }
257 }
258 seen += 1;
259 }
260 assert_eq!(out.len(), k, "reservoir_sample: not enough valid values");
261 FloatArray::from_vec64(out, None)
262}
263
264#[inline(always)]
269pub fn scale(v: &mut [f64], alpha: f64, null_mask: Option<&Bitmask>, null_count: Option<usize>) {
270 #[cfg(feature = "simd")]
271 if is_simd_aligned(v) {
272 const N: usize = W64;
273 let len = v.len();
274 let alpha_v = Simd::<f64, N>::splat(alpha);
275 let mut i = 0;
276
277 if !has_nulls(null_count, null_mask) {
278 while i + N <= len {
280 let chunk = Simd::from_slice(&v[i..i + N]);
281 let scaled = chunk * alpha_v;
282 v[i..i + N].copy_from_slice(&scaled.to_array());
283 i += N;
284 }
285 for x in &mut v[i..] {
287 *x *= alpha;
288 }
289 } else {
290 let mb = null_mask.expect("scale: mask required when nulls present");
292 let mask_bytes = mb.as_bytes();
293
294 while i + N <= len {
296 let chunk = Simd::from_slice(&v[i..i + N]);
297 let lane_mask: Mask<i64, N> = bitmask_to_simd_mask::<N, i64>(mask_bytes, i, len);
299 let result = lane_mask.select(chunk * alpha_v, chunk);
301 v[i..i + N].copy_from_slice(&result.to_array());
302 i += N;
303 }
304 for idx in i..len {
306 if unsafe { mb.get_unchecked(idx) } {
307 v[idx] *= alpha;
308 }
309 }
310 }
311 return;
312 }
313
314 if !has_nulls(null_count, null_mask) {
316 for x in v {
317 *x *= alpha;
318 }
319 } else {
320 let mb = null_mask.expect("scale: mask required when nulls present");
321 for (i, x) in v.iter_mut().enumerate() {
322 if unsafe { mb.get_unchecked(i) } {
323 *x *= alpha;
324 }
325 }
326 }
327}
328
329#[inline(always)]
337pub fn scale_vec(
338 n: i32,
339 alpha: f64,
340 x: &mut [f64],
341 incx: i32,
342 null_mask: Option<&Bitmask>,
343 null_count: Option<usize>,
344) -> Result<(), KernelError> {
345 if n < 0 {
346 return Err(KernelError::InvalidArguments(
347 "n must be non-negative".into(),
348 ));
349 }
350 let n = n as usize;
351 let incx = incx as usize;
352 if incx == 0 {
353 return Err(KernelError::InvalidArguments(
354 "incx must be positive".into(),
355 ));
356 }
357 if n == 0 {
358 return Ok(());
359 }
360 if (n - 1).saturating_mul(incx) >= x.len() {
361 return Err(KernelError::InvalidArguments(
362 "indexing out of bounds".into(),
363 ));
364 }
365
366 let mask_present = has_nulls(null_count, null_mask);
367
368 #[cfg(feature = "simd")]
369 if is_simd_aligned(x) {
370 const LANES: usize = W64;
371 let alpha_v = Simd::<f64, LANES>::splat(alpha);
372
373 if incx == 1 && !mask_present {
375 let mut i = 0;
377 while i + LANES <= n {
378 let chunk = Simd::from_slice(&x[i..i + LANES]);
379 let out_v = chunk * alpha_v;
380 x[i..i + LANES].copy_from_slice(&out_v.to_array());
381 i += LANES;
382 }
383 for xi in &mut x[i..n] {
384 *xi *= alpha;
385 }
386 return Ok(());
387 }
388 if incx == 1 && mask_present {
389 let mb = null_mask.unwrap();
391 let bytes = mb.as_bytes();
392 let mut i = 0;
393 while i + LANES <= n {
394 let lane_mask: Mask<i64, LANES> = bitmask_to_simd_mask::<LANES, i64>(bytes, i, n);
396 let chunk = Simd::from_slice(&x[i..i + LANES]);
397 let scaled = chunk * alpha_v;
398 let out_v = lane_mask.select(scaled, chunk);
400 x[i..i + LANES].copy_from_slice(&out_v.to_array());
401 i += LANES;
402 }
403 for idx in i..n {
405 if unsafe { mb.get_unchecked(idx) } {
406 x[idx] *= alpha;
407 }
408 }
409 return Ok(());
410 }
411 }
412
413 if !mask_present {
415 let mut idx = 0;
416 for _ in 0..n {
417 x[idx] *= alpha;
418 idx += incx;
419 }
420 } else {
421 let mb = null_mask.unwrap();
422 let mut idx = 0;
423 for _ in 0..n {
424 if unsafe { mb.get_unchecked(idx) } {
425 x[idx] *= alpha;
426 }
427 idx += incx;
428 }
429 }
430 Ok(())
431}
432
433#[inline(always)]
438pub fn axpy(
439 n: i32,
440 alpha: f64,
441 x: &[f64],
442 incx: i32,
443 y: &mut [f64],
444 incy: i32,
445 null_mask: Option<&Bitmask>,
446 null_count: Option<usize>,
447) -> Result<(), KernelError> {
448 if n < 0 {
449 return Err(KernelError::InvalidArguments(
450 "n must be non-negative".into(),
451 ));
452 }
453 let n = n as usize;
454 let incx = incx as usize;
455 let incy = incy as usize;
456 if incx == 0 || incy == 0 {
457 return Err(KernelError::InvalidArguments(
458 "increments must be positive".into(),
459 ));
460 }
461 if n == 0 {
462 return Ok(());
463 }
464 if (n - 1).saturating_mul(incx) >= x.len() {
465 return Err(KernelError::InvalidArguments("x out of bounds".into()));
466 }
467 if (n - 1).saturating_mul(incy) >= y.len() {
468 return Err(KernelError::InvalidArguments("y out of bounds".into()));
469 }
470
471 let mask_present = has_nulls(null_count, null_mask);
472
473 #[cfg(feature = "simd")]
474 if is_simd_aligned(x) {
475 const LANES: usize = W64;
476 let alpha_v = Simd::<f64, LANES>::splat(alpha);
477
478 if incx == 1 && incy == 1 && !mask_present {
480 let mut i = 0;
482 while i + LANES <= n {
483 let xv = Simd::from_slice(&x[i..i + LANES]);
484 let yv = Simd::from_slice(&y[i..i + LANES]);
485 let out_v = alpha_v * xv + yv;
486 y[i..i + LANES].copy_from_slice(&out_v.to_array());
487 i += LANES;
488 }
489 for j in i..n {
490 y[j] += alpha * x[j];
491 }
492 return Ok(());
493 }
494 if incx == 1 && incy == 1 && mask_present {
495 let mb = null_mask.unwrap();
497 let bytes = mb.as_bytes();
498 let mut i = 0;
499 while i + LANES <= n {
500 let lane_mask: Mask<i64, LANES> = bitmask_to_simd_mask::<LANES, i64>(bytes, i, n);
501 let xv = Simd::from_slice(&x[i..i + LANES]);
502 let yv = Simd::from_slice(&y[i..i + LANES]);
503 let computed = alpha_v * xv + yv;
504 let out_v = lane_mask.select(computed, yv);
505 y[i..i + LANES].copy_from_slice(&out_v.to_array());
506 i += LANES;
507 }
508 for idx in i..n {
509 if unsafe { mb.get_unchecked(idx) } {
510 y[idx] += alpha * x[idx];
511 }
512 }
513 return Ok(());
514 }
515 }
516
517 if !mask_present {
519 let mut ix = 0;
520 let mut iy = 0;
521 for _ in 0..n {
522 y[iy] += alpha * x[ix];
523 ix += incx;
524 iy += incy;
525 }
526 } else {
527 let mb = null_mask.unwrap();
528 let mut ix = 0;
529 let mut iy = 0;
530 for _ in 0..n {
531 if unsafe { mb.get_unchecked(ix) } {
532 y[iy] += alpha * x[ix];
533 }
534 ix += incx;
535 iy += incy;
536 }
537 }
538 Ok(())
539}
540
541#[inline(always)]
543pub fn l2_norm(v: &[f64], mask: Option<&Bitmask>, null_count: Option<usize>) -> f64 {
544 sum_squares(v, mask, null_count).sqrt()
545}
546
547#[inline(always)]
552pub fn vector_norm(
553 n: i32,
554 x: &[f64],
555 incx: i32,
556 null_mask: Option<&Bitmask>,
557 null_count: Option<usize>,
558) -> Result<f64, KernelError> {
559 if n < 0 {
560 return Err(KernelError::InvalidArguments(
561 "n must be non-negative".into(),
562 ));
563 }
564 if incx <= 0 {
565 return Err(KernelError::InvalidArguments(
566 "incx must be positive".into(),
567 ));
568 }
569 let n = n as usize;
570 let incx = incx as usize;
571 if n == 0 {
572 return Ok(0.0);
573 }
574 if (n - 1).saturating_mul(incx) >= x.len() {
575 return Err(KernelError::InvalidArguments(
576 "indexing out of bounds for x".into(),
577 ));
578 }
579
580 let mask_present = has_nulls(null_count, null_mask);
581
582 #[cfg(feature = "simd")]
583 if is_simd_aligned(x) {
584 use crate::utils::bitmask_to_simd_mask;
585 use core::simd::Simd;
586
587 const LANES: usize = W64;
588
589 if incx == 1 && !mask_present {
591 let mut sum_v = Simd::<f64, LANES>::splat(0.0);
593 let mut comp_v = Simd::<f64, LANES>::splat(0.0);
594 let mut i = 0;
595 while i + LANES <= n {
596 let v = Simd::from_slice(&x[i..i + LANES]);
597 neumaier_simd_add(&mut sum_v, &mut comp_v, v * v);
598 i += LANES;
599 }
600 let mut sumsq = (sum_v + comp_v).reduce_sum();
602 for &xi in &x[i..n] {
603 sumsq += xi * xi;
604 }
605 return Ok(sumsq.sqrt());
606 }
607
608 if incx == 1 && mask_present {
609 let mb = null_mask.unwrap();
611 let bytes = mb.as_bytes();
612 let mut sum_v = Simd::<f64, LANES>::splat(0.0);
613 let mut comp_v = Simd::<f64, LANES>::splat(0.0);
614 let mut i = 0;
615 while i + LANES <= n {
616 let lane_mask: Mask<i64, LANES> = bitmask_to_simd_mask::<LANES, i64>(bytes, i, n);
617 let v = Simd::from_slice(&x[i..i + LANES]);
618 let v = lane_mask.select(v, Simd::splat(0.0));
619 neumaier_simd_add(&mut sum_v, &mut comp_v, v * v);
620 i += LANES;
621 }
622 let mut sumsq = (sum_v + comp_v).reduce_sum();
624 for idx in i..n {
625 if unsafe { mb.get_unchecked(idx) } {
626 sumsq += x[idx] * x[idx];
627 }
628 }
629 return Ok(sumsq.sqrt());
630 }
631 }
632
633 let mut sumsq = 0.0_f64;
635 let mut comp = 0.0_f64;
636 if !mask_present {
637 let mut idx = 0;
638 for _ in 0..n {
639 let xi = x[idx];
640 neumaier_add(&mut sumsq, &mut comp, xi * xi);
641 idx += incx;
642 }
643 } else {
644 let mb = null_mask.unwrap();
645 let mut idx = 0;
646 for _ in 0..n {
647 if unsafe { mb.get_unchecked(idx) } {
648 let xi = x[idx];
649 neumaier_add(&mut sumsq, &mut comp, xi * xi);
650 }
651 idx += incx;
652 }
653 }
654 Ok((sumsq + comp).sqrt())
655}
656
657#[cfg(test)]
658mod tests {
659 use minarrow::{Bitmask, vec64};
660
661 use super::*;
662
663 fn make_mask(bits: &[bool]) -> Bitmask {
664 Bitmask::from_bools(bits)
665 }
666
667 #[test]
668 fn test_dot_no_nulls() {
669 let v1 = vec64![1.0, 2.0, 3.0, 4.0];
670 let v2 = vec64![2.0, 0.5, 1.0, -1.0];
671 let expected = 1.0 * 2.0 + 2.0 * 0.5 + 3.0 * 1.0 + 4.0 * (-1.0);
672 assert_eq!(dot(v1.as_slice(), v2.as_slice(), None, None), expected);
673
674 assert_eq!(dot(&v1, &v2, None, Some(0)), expected);
676 }
677
678 #[test]
679 fn test_dot_with_nulls() {
680 let v1 = vec64![1.0, 2.0, 3.0, 4.0];
681 let v2 = vec64![2.0, 0.5, 1.0, -1.0];
682 let mask = make_mask(&[true, false, true, false]);
683 let null_count = 2;
684 let expected = 1.0 * 2.0 + 3.0 * 1.0;
686 assert_eq!(dot(&v1, &v2, Some(&mask), Some(null_count)), expected);
687
688 assert_eq!(dot(&v1, &v2, Some(&mask), None), expected);
690 }
691
692 #[test]
693 fn test_dot_all_nulls() {
694 let v1 = vec64![1.0, 2.0, 3.0, 4.0];
695 let v2 = vec64![2.0, 0.5, 1.0, -1.0];
696 let mask = make_mask(&[false, false, false, false]);
697 let expected = 0.0;
698 assert_eq!(dot(&v1, &v2, Some(&mask), Some(4)), expected);
699 }
700
701 #[test]
702 fn test_argsort_no_nulls() {
703 let v = vec64![5.0, 2.0, 1.0, 4.0];
704 assert_eq!(argsort(&v, None, None, false), vec64![2, 1, 3, 0]);
705 assert_eq!(argsort(&v, None, None, true), vec64![0, 3, 1, 2]);
706 }
707
708 #[test]
709 fn test_argsort_with_nulls() {
710 let v = vec64![5.0, 2.0, 1.0, 4.0, 3.0];
711 let mask = make_mask(&[true, false, true, true, false]);
712 assert_eq!(
714 argsort(&v, Some(&mask), Some(2), false),
715 vec64![2, 3, 0, 1, 4]
716 );
717 assert_eq!(
718 argsort(&v, Some(&mask), Some(2), true),
719 vec64![0, 3, 2, 1, 4]
720 );
721 }
722
723 #[test]
724 fn test_histogram_no_nulls() {
725 let v = vec64![1.0, 2.0, 4.0, 6.0];
726 let bins = [2.5, 5.0];
727 assert_eq!(histogram(&v, &bins, None, None), vec![2, 1, 1]);
730 }
731
732 #[test]
733 fn test_histogram_with_nulls() {
734 let v = vec64![1.0, 2.0, 4.0, 6.0];
735 let bins = [2.5, 5.0];
736 let mask = make_mask(&[false, true, true, false]);
737 assert_eq!(histogram(&v, &bins, Some(&mask), Some(2)), vec![1, 1, 0]);
739 }
740
741 #[test]
742 fn test_histogram_all_nulls() {
743 let v = vec64![1.0, 2.0, 4.0, 6.0];
744 let bins = [2.5, 5.0];
745 let mask = make_mask(&[false, false, false, false]);
746 assert_eq!(histogram(&v, &bins, Some(&mask), Some(4)), vec![0, 0, 0]);
747 }
748
749 #[test]
750 fn test_reservoir_sample_basic() {
751 let v = vec64![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
752 let arr = reservoir_sample(&v, 3, None, None);
753 assert_eq!(arr.data.len(), 3);
754 for &x in arr.data.iter() {
756 assert!(v.contains(&x));
757 }
758 }
759
760 #[test]
761 fn test_reservoir_sample_with_nulls() {
762 let v = vec64![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
763 let mask = make_mask(&[false, true, false, true, false, true]);
764 let arr = reservoir_sample(&v, 2, Some(&mask), Some(3));
765 for &x in arr.data.iter() {
767 assert!(x == 2.0 || x == 4.0 || x == 6.0);
768 }
769 }
770
771 #[test]
772 fn test_scale_no_nulls() {
773 let mut v = vec64![1.0, 2.0, 3.0];
774 scale(&mut v, 3.0, None, None);
775 assert_eq!(*v, [3.0, 6.0, 9.0]);
776 }
777
778 #[test]
779 fn test_scale_with_nulls() {
780 let mut v = vec64![1.0, 2.0, 3.0];
781 let mask = make_mask(&[false, true, true]);
782 scale(&mut v, 4.0, Some(&mask), Some(1));
783 assert_eq!(*v, [1.0, 8.0, 12.0]);
785 }
786
787 #[test]
788 fn test_scale_vec_no_nulls() {
789 let mut v = vec64![2.0, 4.0, 8.0, 16.0];
790 let r = scale_vec(2, 0.5, &mut v, 2, None, None);
791 assert!(r.is_ok());
792 assert_eq!(*v, [1.0, 4.0, 4.0, 16.0]);
793 }
794
795 #[test]
796 fn test_scale_vec_with_nulls() {
797 let mut v = vec64![2.0, 4.0, 8.0, 16.0];
798 let mask = make_mask(&[false, true, true, false]);
799 let r = scale_vec(2, 0.5, &mut v, 2, Some(&mask), Some(2));
800 assert!(r.is_ok());
801 assert_eq!(*v, [2.0, 4.0, 4.0, 16.0]);
803 }
804
805 #[test]
806 fn test_axpy_no_nulls() {
807 let mut y = vec64![10.0, 20.0, 30.0, 40.0];
808 let x = vec64![1.0, 2.0, 3.0, 4.0];
809 let r = axpy(4, 2.0, &x, 1, &mut y, 1, None, None);
810 assert!(r.is_ok());
811 assert_eq!(*y, [12.0, 24.0, 36.0, 48.0]);
812 }
813
814 #[test]
815 fn test_axpy_with_nulls() {
816 let mut y = vec64![10.0, 20.0, 30.0, 40.0];
817 let x = vec64![1.0, 2.0, 3.0, 4.0];
818 let mask = make_mask(&[false, true, false, true]);
819 let r = axpy(4, 2.0, &x, 1, &mut y, 1, Some(&mask), Some(2));
820 assert!(r.is_ok());
821 assert_eq!(*y, [10.0, 24.0, 30.0, 48.0]);
823 }
824
825 #[test]
826 fn test_l2_norm_no_nulls() {
827 let v = vec64![3.0, 4.0];
828 let n = l2_norm(&v, None, None);
829 assert!((n - 5.0).abs() < 1e-12);
830 }
831
832 #[test]
833 fn test_l2_norm_with_nulls() {
834 let v = vec64![3.0, 4.0];
835 let mask = make_mask(&[false, true]);
836 let n = l2_norm(&v, Some(&mask), Some(1));
837 assert!((n - 4.0).abs() < 1e-12);
838 }
839
840 #[test]
841 fn test_vector_norm_no_nulls() {
842 let v = vec64![3.0, 0.0, 4.0];
843 let n = vector_norm(3, &v, 1, None, None).unwrap();
844 assert!((n - 5.0).abs() < 1e-12);
845 }
846
847 #[test]
848 fn test_vector_norm_with_nulls() {
849 let v = vec64![3.0, 0.0, 4.0];
850 let mask = make_mask(&[true, false, true]);
851 let n = vector_norm(3, &v, 1, Some(&mask), Some(1)).unwrap();
852 assert!((n - 5.0).abs() < 1e-12);
854 }
855
856 #[test]
857 fn test_vector_norm_all_nulls() {
858 let v = vec64![3.0, 0.0, 4.0];
859 let mask = make_mask(&[false, false, false]);
860 let n = vector_norm(3, &v, 1, Some(&mask), Some(3)).unwrap();
861 assert_eq!(n, 0.0);
862 }
863
864 #[test]
872 fn test_dot_large_compensated_accuracy() {
873 let n = 1_000_000;
874 let offset = 1e12;
875
876 let v1: Vec64<f64> = (0..n).map(|_| offset + 1.0).collect();
880 let v2: Vec64<f64> = (0..n).map(|_| 1.0).collect();
881
882 let result = dot(&v1, &v2, None, None);
883 let exact = n as f64 * (offset + 1.0);
884
885 let ulps = ((result - exact) / exact).abs() / f64::EPSILON;
888 assert!(
889 ulps < 4.0,
890 "dot product off by {ulps:.1} ULPs: got {result}, expected {exact}",
891 );
892 }
893
894 #[test]
897 fn test_dot_large_cancellation() {
898 let n = 1_000_000;
899
900 let v1: Vec64<f64> = (0..n)
903 .map(|i| if i % 2 == 0 { 1e8 } else { -1e8 })
904 .collect();
905 let v2: Vec64<f64> = (0..n).map(|_| 1.0).collect();
906
907 let result = dot(&v1, &v2, None, None);
908 assert!(
910 result.abs() < 1e-4,
911 "dot cancellation failed: got {result}, expected 0.0",
912 );
913 }
914
915 #[test]
918 fn test_l2_norm_large_compensated_accuracy() {
919 let n = 1_000_000;
920
921 let v: Vec64<f64> = (0..n).map(|_| 1.0).collect();
923
924 let result = l2_norm(&v, None, None);
925 let exact = (n as f64).sqrt();
926
927 let ulps = ((result - exact) / exact).abs() / f64::EPSILON;
928 assert!(
929 ulps < 4.0,
930 "l2_norm off by {ulps:.1} ULPs: got {result}, expected {exact}",
931 );
932 }
933
934}