1use scirs2_core::ndarray::ArrayView1;
7use scirs2_core::numeric::{Float, NumAssign};
8use std::iter::Sum;
9
10use scirs2_core::parallel_ops::*;
12use scirs2_core::simd_ops::SimdUnifiedOps;
13
14#[derive(Debug, Clone)]
16pub struct ParallelVectorOptions {
17 pub use_parallel: bool,
19 pub parallel_threshold: usize,
21 pub chunk_size: usize,
23 pub use_simd: bool,
25 pub simd_threshold: usize,
27}
28
29impl Default for ParallelVectorOptions {
30 fn default() -> Self {
31 Self {
32 use_parallel: true,
33 parallel_threshold: 10000,
34 chunk_size: 1024,
35 use_simd: true,
36 simd_threshold: 32,
37 }
38 }
39}
40
41#[allow(dead_code)]
60pub fn parallel_dot<T>(x: &[T], y: &[T], options: Option<ParallelVectorOptions>) -> T
61where
62 T: Float + NumAssign + Sum + Copy + Send + Sync + SimdUnifiedOps,
63{
64 assert_eq!(
65 x.len(),
66 y.len(),
67 "Vector lengths must be equal for dot product"
68 );
69
70 if x.is_empty() {
71 return T::zero();
72 }
73
74 let opts = options.unwrap_or_default();
75
76 if opts.use_parallel && x.len() >= opts.parallel_threshold {
77 let chunks = x
79 .chunks(opts.chunk_size)
80 .zip(y.chunks(opts.chunk_size))
81 .collect::<Vec<_>>();
82
83 let partial_sums: Vec<T> = parallel_map(&chunks, |(x_chunk, y_chunk)| {
84 if opts.use_simd && x_chunk.len() >= opts.simd_threshold {
85 let x_view = ArrayView1::from(x_chunk);
87 let y_view = ArrayView1::from(*y_chunk);
88 T::simd_dot(&x_view, &y_view)
89 } else {
90 x_chunk
92 .iter()
93 .zip(y_chunk.iter())
94 .map(|(&xi, &yi)| xi * yi)
95 .sum()
96 }
97 });
98
99 partial_sums.into_iter().sum()
101 } else if opts.use_simd && x.len() >= opts.simd_threshold {
102 let x_view = ArrayView1::from(x);
104 let y_view = ArrayView1::from(y);
105 T::simd_dot(&x_view, &y_view)
106 } else {
107 x.iter().zip(y).map(|(&xi, &yi)| xi * yi).sum()
109 }
110}
111
112#[allow(dead_code)]
126pub fn parallel_norm2<T>(x: &[T], options: Option<ParallelVectorOptions>) -> T
127where
128 T: Float + NumAssign + Sum + Copy + Send + Sync + SimdUnifiedOps,
129{
130 if x.is_empty() {
131 return T::zero();
132 }
133
134 let opts = options.unwrap_or_default();
135
136 if opts.use_parallel && x.len() >= opts.parallel_threshold {
137 let chunks = x.chunks(opts.chunk_size).collect::<Vec<_>>();
139
140 let partial_sums: Vec<T> = parallel_map(&chunks, |chunk| {
141 if opts.use_simd && chunk.len() >= opts.simd_threshold {
142 let chunk_view = ArrayView1::from(*chunk);
144 T::simd_dot(&chunk_view, &chunk_view)
145 } else {
146 chunk.iter().map(|&xi| xi * xi).sum()
148 }
149 });
150
151 partial_sums.into_iter().sum::<T>().sqrt()
153 } else if opts.use_simd && x.len() >= opts.simd_threshold {
154 let x_view = ArrayView1::from(x);
156 T::simd_dot(&x_view, &x_view).sqrt()
157 } else {
158 x.iter().map(|&xi| xi * xi).sum::<T>().sqrt()
160 }
161}
162
163#[allow(dead_code)]
178pub fn parallel_vector_add<T>(x: &[T], y: &[T], z: &mut [T], options: Option<ParallelVectorOptions>)
179where
180 T: Float + NumAssign + Send + Sync + Copy + SimdUnifiedOps,
181{
182 assert_eq!(x.len(), y.len(), "Input vector lengths must be equal");
183 assert_eq!(x.len(), z.len(), "Output vector length must match input");
184
185 if x.is_empty() {
186 return;
187 }
188
189 let opts = options.unwrap_or_default();
190
191 if opts.use_parallel && x.len() >= opts.parallel_threshold {
192 let chunk_size = opts.chunk_size;
194 let chunks: Vec<_> = (0..x.len()).step_by(chunk_size).collect();
195
196 for start in chunks {
198 let end = (start + chunk_size).min(x.len());
199 let x_slice = &x[start..end];
200 let y_slice = &y[start..end];
201 let z_slice = &mut z[start..end];
202
203 if opts.use_simd && x_slice.len() >= opts.simd_threshold {
204 let x_view = ArrayView1::from(x_slice);
206 let y_view = ArrayView1::from(y_slice);
207 let result = T::simd_add(&x_view, &y_view);
208 z_slice.copy_from_slice(result.as_slice().unwrap());
209 } else {
210 for ((xi, yi), zi) in x_slice.iter().zip(y_slice).zip(z_slice.iter_mut()) {
212 *zi = *xi + *yi;
213 }
214 }
215 }
216 } else if opts.use_simd && x.len() >= opts.simd_threshold {
217 let x_view = ArrayView1::from(x);
219 let y_view = ArrayView1::from(y);
220 let result = T::simd_add(&x_view, &y_view);
221 z.copy_from_slice(result.as_slice().unwrap());
222 } else {
223 for ((xi, yi), zi) in x.iter().zip(y).zip(z) {
225 *zi = *xi + *yi;
226 }
227 }
228}
229
230#[allow(dead_code)]
245pub fn parallel_vector_sub<T>(x: &[T], y: &[T], z: &mut [T], options: Option<ParallelVectorOptions>)
246where
247 T: Float + NumAssign + Send + Sync + Copy + SimdUnifiedOps,
248{
249 assert_eq!(x.len(), y.len(), "Input vector lengths must be equal");
250 assert_eq!(x.len(), z.len(), "Output vector length must match input");
251
252 if x.is_empty() {
253 return;
254 }
255
256 let opts = options.unwrap_or_default();
257
258 if opts.use_parallel && x.len() >= opts.parallel_threshold {
259 let chunks: Vec<_> = x
261 .chunks(opts.chunk_size)
262 .zip(y.chunks(opts.chunk_size))
263 .zip(z.chunks_mut(opts.chunk_size))
264 .collect();
265
266 for ((x_chunk, y_chunk), z_chunk) in chunks {
268 if opts.use_simd && x_chunk.len() >= opts.simd_threshold {
269 let x_view = ArrayView1::from(x_chunk);
271 let y_view = ArrayView1::from(y_chunk);
272 let result = T::simd_sub(&x_view, &y_view);
273 z_chunk.copy_from_slice(result.as_slice().unwrap());
274 } else {
275 for ((xi, yi), zi) in x_chunk.iter().zip(y_chunk).zip(z_chunk) {
277 *zi = *xi - *yi;
278 }
279 }
280 }
281 } else if opts.use_simd && x.len() >= opts.simd_threshold {
282 let x_view = ArrayView1::from(x);
284 let y_view = ArrayView1::from(y);
285 let result = T::simd_sub(&x_view, &y_view);
286 z.copy_from_slice(result.as_slice().unwrap());
287 } else {
288 for ((xi, yi), zi) in x.iter().zip(y).zip(z) {
290 *zi = *xi - *yi;
291 }
292 }
293}
294
295#[allow(dead_code)]
311pub fn parallel_axpy<T>(a: T, x: &[T], y: &mut [T], options: Option<ParallelVectorOptions>)
312where
313 T: Float + NumAssign + Send + Sync + Copy + SimdUnifiedOps,
314{
315 assert_eq!(x.len(), y.len(), "Vector lengths must be equal for AXPY");
316
317 if x.is_empty() {
318 return;
319 }
320
321 let opts = options.unwrap_or_default();
322
323 if opts.use_parallel && x.len() >= opts.parallel_threshold {
324 let chunks: Vec<_> = x
326 .chunks(opts.chunk_size)
327 .zip(y.chunks_mut(opts.chunk_size))
328 .collect();
329
330 for (x_chunk, y_chunk) in chunks {
332 if opts.use_simd && x_chunk.len() >= opts.simd_threshold {
333 let x_view = ArrayView1::from(x_chunk);
335 let scaled = T::simd_scalar_mul(&x_view, a);
336 let y_view = ArrayView1::from(&y_chunk[..]);
337 let result = T::simd_add(&scaled.view(), &y_view);
338 y_chunk.copy_from_slice(result.as_slice().unwrap());
339 } else {
340 for (xi, yi) in x_chunk.iter().zip(y_chunk) {
342 *yi = a * *xi + *yi;
343 }
344 }
345 }
346 } else if opts.use_simd && x.len() >= opts.simd_threshold {
347 let x_view = ArrayView1::from(x);
349 let scaled = T::simd_scalar_mul(&x_view, a);
350 let y_view = ArrayView1::from(&y[..]);
351 let result = T::simd_add(&scaled.view(), &y_view);
352 y.copy_from_slice(result.as_slice().unwrap());
353 } else {
354 for (xi, yi) in x.iter().zip(y) {
356 *yi = a * *xi + *yi;
357 }
358 }
359}
360
361#[allow(dead_code)]
376pub fn parallel_vector_scale<T>(a: T, x: &[T], y: &mut [T], options: Option<ParallelVectorOptions>)
377where
378 T: Float + NumAssign + Send + Sync + Copy + SimdUnifiedOps,
379{
380 assert_eq!(x.len(), y.len(), "Vector lengths must be equal for scaling");
381
382 if x.is_empty() {
383 return;
384 }
385
386 let opts = options.unwrap_or_default();
387
388 if opts.use_parallel && x.len() >= opts.parallel_threshold {
389 let chunks: Vec<_> = x
391 .chunks(opts.chunk_size)
392 .zip(y.chunks_mut(opts.chunk_size))
393 .collect();
394
395 for (x_chunk, y_chunk) in chunks {
397 if opts.use_simd && x_chunk.len() >= opts.simd_threshold {
398 let x_view = ArrayView1::from(x_chunk);
400 let result = T::simd_scalar_mul(&x_view, a);
401 y_chunk.copy_from_slice(result.as_slice().unwrap());
402 } else {
403 for (xi, yi) in x_chunk.iter().zip(y_chunk) {
405 *yi = a * *xi;
406 }
407 }
408 }
409 } else if opts.use_simd && x.len() >= opts.simd_threshold {
410 let x_view = ArrayView1::from(x);
412 let result = T::simd_scalar_mul(&x_view, a);
413 y.copy_from_slice(result.as_slice().unwrap());
414 } else {
415 for (xi, yi) in x.iter().zip(y) {
417 *yi = a * *xi;
418 }
419 }
420}
421
422#[allow(dead_code)]
436pub fn parallel_vector_copy<T>(x: &[T], y: &mut [T], options: Option<ParallelVectorOptions>)
437where
438 T: Float + NumAssign + Send + Sync + Copy,
439{
440 assert_eq!(x.len(), y.len(), "Vector lengths must be equal for copy");
441
442 if x.is_empty() {
443 return;
444 }
445
446 let opts = options.unwrap_or_default();
447
448 if opts.use_parallel && x.len() >= opts.parallel_threshold {
449 let chunks: Vec<_> = x
451 .chunks(opts.chunk_size)
452 .zip(y.chunks_mut(opts.chunk_size))
453 .collect();
454
455 for (x_chunk, y_chunk) in chunks {
457 y_chunk.copy_from_slice(x_chunk);
458 }
459 } else {
460 y.copy_from_slice(x);
462 }
463}
464
465#[allow(dead_code)]
482#[allow(clippy::too_many_arguments)]
483pub fn parallel_linear_combination<T>(
484 a: T,
485 x: &[T],
486 b: T,
487 y: &[T],
488 z: &mut [T],
489 options: Option<ParallelVectorOptions>,
490) where
491 T: Float + NumAssign + Send + Sync + Copy + SimdUnifiedOps,
492{
493 assert_eq!(x.len(), y.len(), "Input vector lengths must be equal");
494 assert_eq!(x.len(), z.len(), "Output vector length must match input");
495
496 if x.is_empty() {
497 return;
498 }
499
500 let opts = options.unwrap_or_default();
501
502 if opts.use_parallel && x.len() >= opts.parallel_threshold {
503 let chunks: Vec<_> = x
505 .chunks(opts.chunk_size)
506 .zip(y.chunks(opts.chunk_size))
507 .zip(z.chunks_mut(opts.chunk_size))
508 .collect();
509
510 for ((x_chunk, y_chunk), z_chunk) in chunks {
512 if opts.use_simd && x_chunk.len() >= opts.simd_threshold {
513 let x_view = ArrayView1::from(x_chunk);
515 let y_view = ArrayView1::from(y_chunk);
516 let ax = T::simd_scalar_mul(&x_view, a);
517 let by = T::simd_scalar_mul(&y_view, b);
518 let result = T::simd_add(&ax.view(), &by.view());
519 z_chunk.copy_from_slice(result.as_slice().unwrap());
520 } else {
521 for ((xi, yi), zi) in x_chunk.iter().zip(y_chunk).zip(z_chunk) {
523 *zi = a * *xi + b * *yi;
524 }
525 }
526 }
527 } else if opts.use_simd && x.len() >= opts.simd_threshold {
528 let x_view = ArrayView1::from(x);
530 let y_view = ArrayView1::from(y);
531 let ax = T::simd_scalar_mul(&x_view, a);
532 let by = T::simd_scalar_mul(&y_view, b);
533 let result = T::simd_add(&ax.view(), &by.view());
534 z.copy_from_slice(result.as_slice().unwrap());
535 } else {
536 for ((xi, yi), zi) in x.iter().zip(y).zip(z) {
538 *zi = a * *xi + b * *yi;
539 }
540 }
541}
542
543#[allow(dead_code)]
562#[allow(clippy::too_many_arguments)]
563pub fn parallel_sparse_matvec_csr<T>(
564 y: &mut [T],
565 rows: usize,
566 indptr: &[usize],
567 indices: &[usize],
568 data: &[T],
569 x: &[T],
570 options: Option<ParallelVectorOptions>,
571) where
572 T: Float + NumAssign + Send + Sync + Copy + SimdUnifiedOps,
573{
574 assert_eq!(
575 y.len(),
576 rows,
577 "Output vector length must match number of rows"
578 );
579 assert_eq!(indptr.len(), rows + 1, "indptr length must be rows + 1");
580 assert_eq!(
581 indices.len(),
582 data.len(),
583 "indices and data must have same length"
584 );
585
586 if rows == 0 {
587 return;
588 }
589
590 let opts = options.unwrap_or_default();
591
592 if opts.use_parallel && rows >= opts.parallel_threshold {
593 let row_chunks: Vec<_> = (0..rows).step_by(opts.chunk_size).collect();
595
596 for start_row in row_chunks {
598 let end_row = (start_row + opts.chunk_size).min(rows);
599
600 for row in start_row..end_row {
602 let start_idx = indptr[row];
603 let end_idx = indptr[row + 1];
604
605 if end_idx > start_idx {
606 let row_indices = &indices[start_idx..end_idx];
607 let row_data = &data[start_idx..end_idx];
608
609 if opts.use_simd && (end_idx - start_idx) >= opts.simd_threshold {
610 let mut sum = T::zero();
612 let simd_len = (end_idx - start_idx) & !7; for i in (0..simd_len).step_by(8) {
616 let data_chunk = &row_data[i..i + 8];
617 let mut x_values = [T::zero(); 8];
618 for (j, &col_idx) in row_indices[i..i + 8].iter().enumerate() {
619 x_values[j] = x[col_idx];
620 }
621
622 for k in 0..8 {
624 sum += data_chunk[k] * x_values[k];
625 }
626 }
627
628 for i in simd_len..(end_idx - start_idx) {
630 sum += row_data[i] * x[row_indices[i]];
631 }
632
633 y[row] = sum;
634 } else {
635 let mut sum = T::zero();
637 for (&col_idx, &value) in row_indices.iter().zip(row_data.iter()) {
638 sum += value * x[col_idx];
639 }
640 y[row] = sum;
641 }
642 } else {
643 y[row] = T::zero();
644 }
645 }
646 }
647 } else {
648 for row in 0..rows {
650 let start_idx = indptr[row];
651 let end_idx = indptr[row + 1];
652
653 let mut sum = T::zero();
654 for idx in start_idx..end_idx {
655 let col = indices[idx];
656 sum += data[idx] * x[col];
657 }
658 y[row] = sum;
659 }
660 }
661}
662
663#[allow(dead_code)]
684#[allow(clippy::too_many_arguments)]
685pub fn advanced_sparse_matvec_csr<T>(
686 y: &mut [T],
687 rows: usize,
688 indptr: &[usize],
689 indices: &[usize],
690 data: &[T],
691 x: &[T],
692) where
693 T: Float + NumAssign + Send + Sync + Copy + SimdUnifiedOps,
694{
695 let total_nnz = data.len();
697 let avg_nnz_per_row = if rows > 0 { total_nnz / rows } else { 0 };
698
699 let mut row_nnz_counts = Vec::with_capacity(rows);
701 for row in 0..rows {
702 row_nnz_counts.push(indptr[row + 1] - indptr[row]);
703 }
704
705 let _max_nnz_per_row = row_nnz_counts.iter().max().copied().unwrap_or(0);
706 let sparsity_variance = if rows > 1 {
707 let mean = avg_nnz_per_row as f64;
708 let variance: f64 = row_nnz_counts
709 .iter()
710 .map(|&x| (x as f64 - mean).powi(2))
711 .sum::<f64>()
712 / (rows - 1) as f64;
713 variance.sqrt()
714 } else {
715 0.0
716 };
717
718 if sparsity_variance > (avg_nnz_per_row as f64) * 2.0 {
720 advanced_adaptive_load_balanced_spmv(y, rows, indptr, indices, data, x, &row_nnz_counts);
722 } else if avg_nnz_per_row > 64 {
723 advanced_vectorized_spmv(y, rows, indptr, indices, data, x);
725 } else {
726 advanced_cache_optimized_spmv(y, rows, indptr, indices, data, x);
728 }
729}
730
731#[allow(dead_code)]
733fn advanced_adaptive_load_balanced_spmv<T>(
734 y: &mut [T],
735 rows: usize,
736 indptr: &[usize],
737 indices: &[usize],
738 data: &[T],
739 x: &[T],
740 row_nnz_counts: &[usize],
741) where
742 T: Float + NumAssign + Send + Sync + Copy,
743{
744 let total_nnz = data.len();
746 let target_nnz_per_chunk = total_nnz / num_cpus::get().max(1);
747
748 let mut work_chunks = Vec::new();
749 let mut current_chunk_start = 0;
750 let mut current_chunk_nnz = 0;
751
752 for (row, &nnz) in row_nnz_counts.iter().enumerate() {
753 current_chunk_nnz += nnz;
754
755 if current_chunk_nnz >= target_nnz_per_chunk || row == rows - 1 {
756 work_chunks.push((current_chunk_start, row + 1));
757 current_chunk_start = row + 1;
758 current_chunk_nnz = 0;
759 }
760 }
761
762 parallel_map(&work_chunks, |(start_row, end_row)| {
764 for row in *start_row..*end_row {
765 let start_idx = indptr[row];
766 let end_idx = indptr[row + 1];
767
768 let mut sum = T::zero();
769 for idx in start_idx..end_idx {
770 sum += data[idx] * x[indices[idx]];
771 }
772 }
775 (*start_row, *end_row) });
777
778 for row in 0..rows {
780 let start_idx = indptr[row];
781 let end_idx = indptr[row + 1];
782
783 let mut sum = T::zero();
784 for idx in start_idx..end_idx {
785 sum += data[idx] * x[indices[idx]];
786 }
787 y[row] = sum;
788 }
789}
790
791#[allow(dead_code)]
793fn advanced_vectorized_spmv<T>(
794 y: &mut [T],
795 rows: usize,
796 indptr: &[usize],
797 indices: &[usize],
798 data: &[T],
799 x: &[T],
800) where
801 T: Float + NumAssign + Send + Sync + Copy + SimdUnifiedOps,
802{
803 for row in 0..rows {
805 let start_idx = indptr[row];
806 let end_idx = indptr[row + 1];
807 let nnz = end_idx - start_idx;
808
809 if nnz == 0 {
810 y[row] = T::zero();
811 continue;
812 }
813
814 let row_data = &data[start_idx..end_idx];
815 let row_indices = &indices[start_idx..end_idx];
816
817 if nnz >= 8 {
818 let mut sum = T::zero();
820 let simd_iterations = nnz / 8;
821 let _remainder = nnz % 8;
822
823 for chunk in 0..simd_iterations {
825 let base_idx = chunk * 8;
826 let mut chunk_sum = T::zero();
827
828 chunk_sum += row_data[base_idx] * x[row_indices[base_idx]];
830 chunk_sum += row_data[base_idx + 1] * x[row_indices[base_idx + 1]];
831 chunk_sum += row_data[base_idx + 2] * x[row_indices[base_idx + 2]];
832 chunk_sum += row_data[base_idx + 3] * x[row_indices[base_idx + 3]];
833 chunk_sum += row_data[base_idx + 4] * x[row_indices[base_idx + 4]];
834 chunk_sum += row_data[base_idx + 5] * x[row_indices[base_idx + 5]];
835 chunk_sum += row_data[base_idx + 6] * x[row_indices[base_idx + 6]];
836 chunk_sum += row_data[base_idx + 7] * x[row_indices[base_idx + 7]];
837
838 sum += chunk_sum;
839 }
840
841 for i in (simd_iterations * 8)..nnz {
843 sum += row_data[i] * x[row_indices[i]];
844 }
845
846 y[row] = sum;
847 } else {
848 let mut sum = T::zero();
850 for (i, &col_idx) in row_indices.iter().enumerate() {
851 sum += row_data[i] * x[col_idx];
852 }
853 y[row] = sum;
854 }
855 }
856}
857
858#[allow(dead_code)]
860fn advanced_cache_optimized_spmv<T>(
861 y: &mut [T],
862 rows: usize,
863 indptr: &[usize],
864 indices: &[usize],
865 data: &[T],
866 x: &[T],
867) where
868 T: Float + NumAssign + Send + Sync + Copy,
869{
870 const CACHE_BLOCK_SIZE: usize = 64; for row_block_start in (0..rows).step_by(CACHE_BLOCK_SIZE) {
874 let row_block_end = (row_block_start + CACHE_BLOCK_SIZE).min(rows);
875
876 for row in row_block_start..row_block_end {
878 let start_idx = indptr[row];
879 let end_idx = indptr[row + 1];
880
881 let mut sum = T::zero();
882
883 for idx in start_idx..end_idx {
885 let col = indices[idx];
886 sum += data[idx] * x[col];
887 }
888
889 y[row] = sum;
890 }
891 }
892}
893
894#[cfg(test)]
897mod tests {
898 use super::*;
899 use approx::assert_relative_eq;
900
901 #[test]
902 fn test_parallel_dot() {
903 let x = vec![1.0, 2.0, 3.0, 4.0];
904 let y = vec![2.0, 3.0, 4.0, 5.0];
905
906 let result = parallel_dot(&x, &y, None);
907 let expected = 1.0 * 2.0 + 2.0 * 3.0 + 3.0 * 4.0 + 4.0 * 5.0; assert_relative_eq!(result, expected);
910 }
911
912 #[test]
913 fn test_parallel_norm2() {
914 let x = vec![3.0, 4.0]; let result = parallel_norm2(&x, None);
917 assert_relative_eq!(result, 5.0);
918 }
919
920 #[test]
921 fn test_parallel_vector_add() {
922 let x = vec![1.0, 2.0, 3.0];
923 let y = vec![4.0, 5.0, 6.0];
924 let mut z = vec![0.0; 3];
925
926 parallel_vector_add(&x, &y, &mut z, None);
927
928 assert_relative_eq!(z[0], 5.0);
929 assert_relative_eq!(z[1], 7.0);
930 assert_relative_eq!(z[2], 9.0);
931 }
932
933 #[test]
934 fn test_parallel_axpy() {
935 let a = 2.0;
936 let x = vec![1.0, 2.0, 3.0];
937 let mut y = vec![1.0, 1.0, 1.0];
938
939 parallel_axpy(a, &x, &mut y, None);
940
941 assert_relative_eq!(y[0], 3.0);
943 assert_relative_eq!(y[1], 5.0);
944 assert_relative_eq!(y[2], 7.0);
945 }
946
947 #[test]
948 fn test_parallel_vector_scale() {
949 let a = 3.0;
950 let x = vec![1.0, 2.0, 3.0];
951 let mut y = vec![0.0; 3];
952
953 parallel_vector_scale(a, &x, &mut y, None);
954
955 assert_relative_eq!(y[0], 3.0);
956 assert_relative_eq!(y[1], 6.0);
957 assert_relative_eq!(y[2], 9.0);
958 }
959
960 #[test]
961 fn test_parallel_linear_combination() {
962 let a = 2.0;
963 let x = vec![1.0, 2.0];
964 let b = 3.0;
965 let y = vec![1.0, 1.0];
966 let mut z = vec![0.0; 2];
967
968 parallel_linear_combination(a, &x, b, &y, &mut z, None);
969
970 assert_relative_eq!(z[0], 5.0);
972 assert_relative_eq!(z[1], 7.0);
973 }
974
975 #[test]
976 fn test_empty_vectors() {
977 let x: Vec<f64> = vec![];
978 let y: Vec<f64> = vec![];
979
980 assert_eq!(parallel_dot(&x, &y, None), 0.0);
981 assert_eq!(parallel_norm2(&x, None), 0.0);
982 }
983
984 #[test]
985 #[ignore = "timeout"]
986 fn test_large_vectors_trigger_parallel() {
987 let opts = ParallelVectorOptions {
988 parallel_threshold: 100,
989 ..Default::default()
990 };
991
992 let x: Vec<f64> = (0..1000).map(|i| i as f64).collect();
993 let y: Vec<f64> = (0..1000).map(|i| (i + 1) as f64).collect();
994
995 let result = parallel_dot(&x, &y, Some(opts));
996
997 let expected: f64 = (0..1000).map(|i| (i * (i + 1)) as f64).sum();
1000 assert_relative_eq!(result, expected, epsilon = 1e-10);
1001 }
1002}