Skip to main content

oxiphysics_core/parallel/
functions_2.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5#[allow(unused_imports)]
6use super::functions::*;
7#[cfg(test)]
8mod tests {
9    use super::*;
10
11    use crate::parallel::ParallelFor;
12    use crate::parallel::SerialWorkQueue;
13
14    use crate::parallel::ThreadPoolStats;
15    use crate::parallel::WorkRange;
16
17    use crate::parallel::WorkStealingPool;
18    use crate::parallel::parallel_dot_product;
19
20    use crate::parallel::prefix_sum;
21
22    use std::sync::Arc;
23    use std::sync::atomic::{AtomicUsize, Ordering};
24    #[test]
25    fn test_parallel_map_f64_square() {
26        let data = vec![1.0_f64, 2.0, 3.0, 4.0];
27        let result = parallel_map_f64(&data, |x| x * x);
28        assert_eq!(result, vec![1.0, 4.0, 9.0, 16.0]);
29    }
30    #[test]
31    fn test_parallel_reduce_f64_sum() {
32        let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
33        let result = parallel_reduce_f64(&data, 0.0, |a, b| a + b);
34        assert!((result - 15.0).abs() < f64::EPSILON);
35    }
36    #[test]
37    fn test_scatter_gather_roundtrip() {
38        let data: Vec<i32> = (0..10).collect();
39        let parts = scatter_gather(&data, 3);
40        assert_eq!(parts.len(), 3);
41        assert_eq!(parts[0].len(), 4);
42        assert_eq!(parts[1].len(), 3);
43        assert_eq!(parts[2].len(), 3);
44        let reassembled = gather(parts);
45        assert_eq!(reassembled, data);
46    }
47    #[test]
48    fn test_parallel_dot_product() {
49        let a = [1.0_f64, 2.0, 3.0];
50        let b = [4.0_f64, 5.0, 6.0];
51        assert!((parallel_dot_product(&a, &b) - 32.0).abs() < f64::EPSILON);
52    }
53    #[test]
54    fn test_parallel_matrix_vec_multiply_identity() {
55        let matrix = vec![vec![1.0_f64, 0.0], vec![0.0_f64, 1.0]];
56        let vector = [3.0_f64, 4.0];
57        let result = parallel_matrix_vec_multiply(&matrix, &vector);
58        assert!((result[0] - 3.0).abs() < f64::EPSILON);
59        assert!((result[1] - 4.0).abs() < f64::EPSILON);
60    }
61    #[test]
62    fn test_work_range_chunks() {
63        let wr = WorkRange::new(0, 10, 3);
64        let chunks = wr.chunks();
65        assert_eq!(chunks, vec![(0, 3), (3, 6), (6, 9), (9, 10)]);
66        assert_eq!(wr.n_chunks(), 4);
67        assert_eq!(wr.total_work(), 10);
68    }
69    #[test]
70    fn test_parallel_map_square() {
71        let data: Vec<i32> = (1..=10).collect();
72        let result = parallel_map(&data, |x| x * x);
73        let expected: Vec<i32> = (1..=10).map(|x| x * x).collect();
74        assert_eq!(result, expected);
75    }
76    #[test]
77    fn test_parallel_map_preserves_order() {
78        let data: Vec<usize> = (0..100).collect();
79        let result = parallel_map(&data, |&x| x * 2);
80        let expected: Vec<usize> = (0..100).map(|x| x * 2).collect();
81        assert_eq!(result, expected, "parallel_map must preserve order");
82    }
83    #[test]
84    fn test_parallel_map_empty() {
85        let data: Vec<i32> = vec![];
86        let result = parallel_map(&data, |x| x * x);
87        assert!(result.is_empty());
88    }
89    #[test]
90    fn test_parallel_filter_even() {
91        let data: Vec<i32> = (0..20).collect();
92        let result = parallel_filter(&data, |x| x % 2 == 0);
93        let expected: Vec<i32> = (0..20).filter(|x| x % 2 == 0).collect();
94        assert_eq!(result, expected);
95    }
96    #[test]
97    fn test_parallel_filter_empty() {
98        let data: Vec<i32> = vec![];
99        let result = parallel_filter(&data, |_| true);
100        assert!(result.is_empty());
101    }
102    #[test]
103    fn test_parallel_filter_none_match() {
104        let data: Vec<i32> = (0..10).collect();
105        let result = parallel_filter(&data, |_| false);
106        assert!(result.is_empty());
107    }
108    #[test]
109    fn test_parallel_for_each_counter() {
110        let counter = Arc::new(AtomicUsize::new(0));
111        let data: Vec<i32> = (0..50).collect();
112        let cc = Arc::clone(&counter);
113        parallel_for_each(&data, move |_| {
114            cc.fetch_add(1, Ordering::Relaxed);
115        });
116        assert_eq!(counter.load(Ordering::Relaxed), 50);
117    }
118    #[test]
119    fn test_parallel_reduce_sum() {
120        let data: Vec<i64> = (1..=100).collect();
121        let result = parallel_reduce(&data, |a, b| a + b, 0_i64);
122        assert_eq!(result, 5050);
123    }
124    #[test]
125    fn test_parallel_reduce_empty() {
126        let data: Vec<i32> = vec![];
127        let result = parallel_reduce(&data, |a, b| a + b, 42_i32);
128        assert_eq!(result, 42);
129    }
130    pub(super) struct SumOp;
131    impl ReduceOperator for SumOp {
132        type Acc = f64;
133        type Item = f64;
134        type Result = f64;
135        fn identity(&self) -> f64 {
136            0.0
137        }
138        fn fold(&self, acc: f64, item: f64) -> f64 {
139            acc + item
140        }
141        fn combine(&self, left: f64, right: f64) -> f64 {
142            left + right
143        }
144        fn finalize(&self, acc: f64) -> f64 {
145            acc
146        }
147    }
148    #[test]
149    fn test_reduce_with_op() {
150        let data: Vec<f64> = (1..=10).map(|x| x as f64).collect();
151        let result = parallel_reduce_with_op(&data, &SumOp);
152        assert!((result - 55.0).abs() < 1e-10);
153    }
154    pub(super) struct MeanOp;
155    impl ReduceOperator for MeanOp {
156        type Acc = (f64, usize);
157        type Item = f64;
158        type Result = f64;
159        fn identity(&self) -> (f64, usize) {
160            (0.0, 0)
161        }
162        fn fold(&self, acc: (f64, usize), item: f64) -> (f64, usize) {
163            (acc.0 + item, acc.1 + 1)
164        }
165        fn combine(&self, left: (f64, usize), right: (f64, usize)) -> (f64, usize) {
166            (left.0 + right.0, left.1 + right.1)
167        }
168        fn finalize(&self, acc: (f64, usize)) -> f64 {
169            if acc.1 == 0 {
170                0.0
171            } else {
172                acc.0 / acc.1 as f64
173            }
174        }
175    }
176    #[test]
177    fn test_reduce_with_mean_op() {
178        let data: Vec<f64> = vec![2.0, 4.0, 6.0, 8.0];
179        let result = parallel_reduce_with_op(&data, &MeanOp);
180        assert!((result - 5.0).abs() < 1e-10);
181    }
182    #[test]
183    fn test_parallel_for_coverage() {
184        let counter = Arc::new(AtomicUsize::new(0));
185        let counter_clone = Arc::clone(&counter);
186        let pf = ParallelFor::with_chunks(4);
187        pf.run(0, 100, move |_i| {
188            counter_clone.fetch_add(1, Ordering::Relaxed);
189        });
190        assert_eq!(counter.load(Ordering::Relaxed), 100);
191    }
192    #[test]
193    fn test_parallel_for_run_n() {
194        let counter = Arc::new(AtomicUsize::new(0));
195        let counter_clone = Arc::clone(&counter);
196        let pf = ParallelFor::with_chunks(2);
197        pf.run_n(50, move |_| {
198            counter_clone.fetch_add(1, Ordering::Relaxed);
199        });
200        assert_eq!(counter.load(Ordering::Relaxed), 50);
201    }
202    #[test]
203    fn test_parallel_for_empty_range() {
204        let counter = Arc::new(AtomicUsize::new(0));
205        let cc = Arc::clone(&counter);
206        let pf = ParallelFor::new();
207        pf.run(5, 5, move |_| {
208            cc.fetch_add(1, Ordering::Relaxed);
209        });
210        assert_eq!(counter.load(Ordering::Relaxed), 0);
211    }
212    #[test]
213    fn test_work_stealing_pool_basic() {
214        let pool = WorkStealingPool::new(4);
215        let counter = Arc::new(AtomicUsize::new(0));
216        for _ in 0..10 {
217            let cc = Arc::clone(&counter);
218            pool.submit(move || {
219                cc.fetch_add(1, Ordering::Relaxed);
220            });
221        }
222        pool.join();
223        assert_eq!(counter.load(Ordering::Relaxed), 10);
224    }
225    #[test]
226    fn test_work_stealing_pool_empty() {
227        let pool = WorkStealingPool::new(2);
228        pool.join();
229    }
230    #[test]
231    fn test_work_stealing_pool_stats() {
232        let pool = WorkStealingPool::new(2);
233        for _ in 0..5 {
234            pool.submit(|| {});
235        }
236        let stats = pool.stats();
237        assert_eq!(stats.tasks_submitted, 5);
238        pool.join();
239    }
240    #[test]
241    fn test_thread_pool_stats() {
242        let mut stats = ThreadPoolStats::new(4);
243        stats.tasks_submitted = 100;
244        stats.tasks_completed = 100;
245        assert!((stats.tasks_per_worker() - 25.0).abs() < 1e-10);
246        assert!((stats.completion_rate() - 1.0).abs() < 1e-10);
247    }
248    #[test]
249    fn test_thread_pool_stats_zero_workers() {
250        let stats = ThreadPoolStats::new(0);
251        assert!(stats.tasks_per_worker().abs() < 1e-10);
252    }
253    #[test]
254    fn test_parallel_merge_sort_basic() {
255        let mut data = vec![5.0, 3.0, 1.0, 4.0, 2.0];
256        parallel_merge_sort(&mut data);
257        assert_eq!(data, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
258    }
259    #[test]
260    fn test_parallel_merge_sort_large() {
261        let mut data: Vec<f64> = (0..200).rev().map(|x| x as f64).collect();
262        parallel_merge_sort(&mut data);
263        let expected: Vec<f64> = (0..200).map(|x| x as f64).collect();
264        assert_eq!(data, expected);
265    }
266    #[test]
267    fn test_parallel_merge_sort_empty() {
268        let mut data: Vec<f64> = vec![];
269        parallel_merge_sort(&mut data);
270        assert!(data.is_empty());
271    }
272    #[test]
273    fn test_parallel_merge_sort_single() {
274        let mut data = vec![42.0];
275        parallel_merge_sort(&mut data);
276        assert_eq!(data, vec![42.0]);
277    }
278    #[test]
279    fn test_parallel_merge_sort_already_sorted() {
280        let mut data: Vec<f64> = (0..100).map(|x| x as f64).collect();
281        parallel_merge_sort(&mut data);
282        let expected: Vec<f64> = (0..100).map(|x| x as f64).collect();
283        assert_eq!(data, expected);
284    }
285    #[test]
286    fn test_prefix_sum() {
287        let data = [1.0, 2.0, 3.0, 4.0];
288        let result = prefix_sum(&data);
289        assert_eq!(result, vec![1.0, 3.0, 6.0, 10.0]);
290    }
291    #[test]
292    fn test_exclusive_prefix_sum() {
293        let data = [1.0, 2.0, 3.0, 4.0];
294        let result = exclusive_prefix_sum(&data);
295        assert_eq!(result, vec![0.0, 1.0, 3.0, 6.0]);
296    }
297    #[test]
298    fn test_parallel_min() {
299        let data = vec![5.0, 2.0, 8.0, 1.0, 9.0];
300        assert!((parallel_min(&data) - 1.0).abs() < 1e-10);
301    }
302    #[test]
303    fn test_parallel_max() {
304        let data = vec![5.0, 2.0, 8.0, 1.0, 9.0];
305        assert!((parallel_max(&data) - 9.0).abs() < 1e-10);
306    }
307    #[test]
308    fn test_parallel_min_empty() {
309        let data: Vec<f64> = vec![];
310        assert_eq!(parallel_min(&data), f64::INFINITY);
311    }
312    #[test]
313    fn test_parallel_max_empty() {
314        let data: Vec<f64> = vec![];
315        assert_eq!(parallel_max(&data), f64::NEG_INFINITY);
316    }
317    #[test]
318    fn test_merge_sorted() {
319        let a = [1.0, 3.0, 5.0];
320        let b = [2.0, 4.0, 6.0];
321        let merged = merge_sorted(&a, &b);
322        assert_eq!(merged, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
323    }
324    #[test]
325    fn test_merge_sorted_empty() {
326        let a: Vec<f64> = vec![];
327        let b = vec![1.0, 2.0];
328        assert_eq!(merge_sorted(&a, &b), vec![1.0, 2.0]);
329        assert_eq!(merge_sorted(&b, &a), vec![1.0, 2.0]);
330    }
331    #[test]
332    fn test_insertion_sort() {
333        let mut data = vec![5.0, 1.0, 3.0, 2.0, 4.0];
334        insertion_sort(&mut data);
335        assert_eq!(data, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
336    }
337    #[test]
338    fn test_available_threads_positive() {
339        assert!(available_threads() >= 1, "should report at least 1 thread");
340    }
341    #[test]
342    fn test_suggested_thread_count_at_least_one() {
343        assert!(suggested_thread_count() >= 1);
344    }
345    #[test]
346    fn test_chunk_process_sum_chunks() {
347        let data: Vec<f64> = (1..=12).map(|x| x as f64).collect();
348        let result = chunk_process(&data, 4, |chunk| vec![chunk.iter().sum::<f64>()]);
349        assert_eq!(result, vec![10.0, 26.0, 42.0]);
350    }
351    #[test]
352    fn test_chunk_process_empty() {
353        let data: Vec<f64> = vec![];
354        let result = chunk_process(&data, 4, |c| vec![c.iter().sum::<f64>()]);
355        assert!(result.is_empty());
356    }
357    #[test]
358    fn test_chunk_process_single_element_chunks() {
359        let data = vec![1.0, 2.0, 3.0];
360        let result = chunk_process(&data, 1, |c| vec![c[0] * 2.0]);
361        assert_eq!(result, vec![2.0, 4.0, 6.0]);
362    }
363    #[test]
364    fn test_chunk_zip_map_add() {
365        let a = [1.0, 2.0, 3.0, 4.0];
366        let b = [10.0, 20.0, 30.0, 40.0];
367        let result = chunk_zip_map(&a, &b, 2, |x, y| x + y);
368        assert_eq!(result, vec![11.0, 22.0, 33.0, 44.0]);
369    }
370    #[test]
371    fn test_chunk_zip_map_multiply() {
372        let a = [2.0, 3.0, 4.0];
373        let b = [5.0, 6.0, 7.0];
374        let result = chunk_zip_map(&a, &b, 8, |x, y| x * y);
375        assert_eq!(result, vec![10.0, 18.0, 28.0]);
376    }
377    #[test]
378    fn test_chunk_dot_product_basic() {
379        let a = [1.0, 2.0, 3.0];
380        let b = [4.0, 5.0, 6.0];
381        let result = chunk_dot_product(&a, &b, 2);
382        assert!((result - 32.0).abs() < 1e-10);
383    }
384    #[test]
385    fn test_chunk_dot_product_chunk_size_1() {
386        let a = [1.0, 2.0, 3.0, 4.0];
387        let b = [1.0, 1.0, 1.0, 1.0];
388        let result = chunk_dot_product(&a, &b, 1);
389        assert!((result - 10.0).abs() < 1e-10);
390    }
391    #[test]
392    fn test_chunk_dot_product_empty() {
393        assert!((chunk_dot_product(&[], &[], 4)).abs() < 1e-10);
394    }
395    #[test]
396    fn test_serial_work_queue_push_pop() {
397        let mut q: SerialWorkQueue<i32> = SerialWorkQueue::new();
398        q.push(1);
399        q.push(2);
400        q.push(3);
401        assert_eq!(q.pop(), Some(1));
402        assert_eq!(q.pop(), Some(2));
403        assert_eq!(q.len(), 1);
404    }
405    #[test]
406    fn test_serial_work_queue_steal() {
407        let mut q: SerialWorkQueue<i32> = SerialWorkQueue::new();
408        q.push(1);
409        q.push(2);
410        q.push(3);
411        assert_eq!(q.steal(), Some(3));
412        assert_eq!(q.len(), 2);
413    }
414    #[test]
415    fn test_serial_work_queue_empty() {
416        let mut q: SerialWorkQueue<i32> = SerialWorkQueue::new();
417        assert!(q.is_empty());
418        assert_eq!(q.pop(), None);
419        assert_eq!(q.steal(), None);
420    }
421    #[test]
422    fn test_serial_work_queue_drain_and_run() {
423        let mut q: SerialWorkQueue<i32> = SerialWorkQueue::new();
424        for i in 0..5 {
425            q.push(i);
426        }
427        let mut collected = Vec::new();
428        q.drain_and_run(|x| collected.push(x));
429        assert_eq!(collected, vec![0, 1, 2, 3, 4]);
430        assert!(q.is_empty());
431    }
432    #[test]
433    fn test_parallel_sort_basic() {
434        let mut data = vec![5.0, 3.0, 1.0, 4.0, 2.0];
435        parallel_sort(&mut data);
436        assert_eq!(data, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
437    }
438    #[test]
439    fn test_parallel_sort_large() {
440        let mut data: Vec<f64> = (0..500).rev().map(|x| x as f64).collect();
441        parallel_sort(&mut data);
442        let expected: Vec<f64> = (0..500).map(|x| x as f64).collect();
443        assert_eq!(data, expected);
444    }
445    #[test]
446    fn test_parallel_sort_empty() {
447        let mut data: Vec<f64> = vec![];
448        parallel_sort(&mut data);
449        assert!(data.is_empty());
450    }
451    #[test]
452    fn test_parallel_sort_already_sorted() {
453        let mut data: Vec<f64> = (0..100).map(|x| x as f64).collect();
454        parallel_sort(&mut data);
455        assert_eq!(data, (0..100).map(|x| x as f64).collect::<Vec<_>>());
456    }
457    #[test]
458    fn test_parallel_sort_duplicates() {
459        let mut data = vec![3.0, 1.0, 3.0, 2.0, 1.0];
460        parallel_sort(&mut data);
461        assert_eq!(data, vec![1.0, 1.0, 2.0, 3.0, 3.0]);
462    }
463    #[test]
464    fn test_sorted_copy_does_not_mutate_input() {
465        let original = vec![5.0, 3.0, 1.0, 4.0, 2.0];
466        let sorted = sorted_copy(&original);
467        assert_eq!(
468            original,
469            vec![5.0, 3.0, 1.0, 4.0, 2.0],
470            "original must be unchanged"
471        );
472        assert_eq!(sorted, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
473    }
474    #[test]
475    fn test_parallel_histogram_uniform() {
476        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
477        let counts = parallel_histogram(&data, 10);
478        assert_eq!(counts.len(), 10);
479        let total: usize = counts.iter().sum();
480        assert_eq!(total, 100);
481    }
482    #[test]
483    fn test_parallel_histogram_empty() {
484        assert!(parallel_histogram(&[], 5).is_empty());
485    }
486    #[test]
487    fn test_parallel_histogram_zero_bins() {
488        let data = vec![1.0, 2.0, 3.0];
489        assert!(parallel_histogram(&data, 0).is_empty());
490    }
491    #[test]
492    fn test_parallel_histogram_single_value() {
493        let data = vec![5.0; 10];
494        let counts = parallel_histogram(&data, 4);
495        let total: usize = counts.iter().sum();
496        assert_eq!(total, 10);
497    }
498}
499/// Compute a chunked (SIMD-mimic) dot product of `a` and `b`.
500///
501/// Unrolls into chunks of 4 elements to mimic 4-wide SIMD lanes.
502/// Falls back to scalar for the remaining elements.
503#[allow(dead_code)]
504pub fn vectorized_dot_product(a: &[f64], b: &[f64]) -> f64 {
505    assert_eq!(a.len(), b.len(), "vectorized_dot_product: length mismatch");
506    let n = a.len();
507    let chunks = n / 4;
508    let mut acc0 = 0.0_f64;
509    let mut acc1 = 0.0_f64;
510    let mut acc2 = 0.0_f64;
511    let mut acc3 = 0.0_f64;
512    for i in 0..chunks {
513        let base = i * 4;
514        acc0 += a[base] * b[base];
515        acc1 += a[base + 1] * b[base + 1];
516        acc2 += a[base + 2] * b[base + 2];
517        acc3 += a[base + 3] * b[base + 3];
518    }
519    let mut total = acc0 + acc1 + acc2 + acc3;
520    for i in (chunks * 4)..n {
521        total += a[i] * b[i];
522    }
523    total
524}
525/// Parallel-style prefix sum using a two-pass approach.
526///
527/// Phase 1: Compute local prefix sums for each chunk in parallel.
528/// Phase 2: Adjust each chunk by the cumulative sum of previous chunks.
529///
530/// In practice both phases run sequentially here (correctness reference).
531#[allow(dead_code)]
532pub fn parallel_prefix_scan(data: &[f64], n_chunks: usize) -> Vec<f64> {
533    if data.is_empty() {
534        return vec![];
535    }
536    let n = data.len();
537    let n_chunks = n_chunks.max(1).min(n);
538    let chunk_size = n.div_ceil(n_chunks);
539    let mut local: Vec<Vec<f64>> = data
540        .chunks(chunk_size)
541        .map(|chunk| {
542            let mut acc = 0.0;
543            chunk
544                .iter()
545                .map(|&x| {
546                    acc += x;
547                    acc
548                })
549                .collect()
550        })
551        .collect();
552    let mut running = 0.0_f64;
553    for chunk in &mut local {
554        let total = *chunk.last().unwrap_or(&0.0);
555        for v in chunk.iter_mut() {
556            *v += running;
557        }
558        running += total;
559    }
560    local.into_iter().flatten().collect()
561}
562#[cfg(test)]
563mod tests_new_parallel {
564
565    use crate::parallel::ExtendedPoolStats;
566
567    use crate::parallel::SoaVec3;
568
569    use crate::parallel::WorkStealingDeque;
570
571    use crate::parallel::parallel_dot_product;
572    use crate::parallel::parallel_prefix_scan;
573    use crate::parallel::prefix_sum;
574    use crate::parallel::vectorized_dot_product;
575    use std::sync::{Arc, Mutex};
576    #[test]
577    fn test_soa_vec3_push_get() {
578        let mut soa = SoaVec3::new();
579        soa.push(1.0, 2.0, 3.0);
580        soa.push(4.0, 5.0, 6.0);
581        assert_eq!(soa.len(), 2);
582        assert_eq!(soa.get(0), (1.0, 2.0, 3.0));
583        assert_eq!(soa.get(1), (4.0, 5.0, 6.0));
584    }
585    #[test]
586    fn test_soa_vec3_dot_with() {
587        let mut soa = SoaVec3::new();
588        soa.push(1.0, 0.0, 0.0);
589        soa.push(0.0, 1.0, 0.0);
590        soa.push(0.0, 0.0, 1.0);
591        let dots = soa.dot_with(1.0, 2.0, 3.0);
592        assert!((dots[0] - 1.0).abs() < 1e-10);
593        assert!((dots[1] - 2.0).abs() < 1e-10);
594        assert!((dots[2] - 3.0).abs() < 1e-10);
595    }
596    #[test]
597    fn test_soa_vec3_norms_sq() {
598        let mut soa = SoaVec3::new();
599        soa.push(3.0, 4.0, 0.0);
600        let norms = soa.norms_sq();
601        assert!(
602            (norms[0] - 25.0).abs() < 1e-10,
603            "3-4-0 vector norm²=25, got {}",
604            norms[0]
605        );
606    }
607    #[test]
608    fn test_soa_vec3_empty() {
609        let soa = SoaVec3::new();
610        assert!(soa.is_empty());
611        assert_eq!(soa.len(), 0);
612    }
613    #[test]
614    fn test_soa_vec3_with_capacity() {
615        let soa = SoaVec3::with_capacity(100);
616        assert!(soa.is_empty());
617    }
618    #[test]
619    fn test_vectorized_dot_product_basic() {
620        let a = [1.0, 2.0, 3.0, 4.0, 5.0];
621        let b = [1.0, 1.0, 1.0, 1.0, 1.0];
622        let result = vectorized_dot_product(&a, &b);
623        assert!((result - 15.0).abs() < 1e-10);
624    }
625    #[test]
626    fn test_vectorized_dot_product_orthogonal() {
627        let a = [1.0, 0.0, 0.0, 0.0];
628        let b = [0.0, 1.0, 0.0, 0.0];
629        let result = vectorized_dot_product(&a, &b);
630        assert!(
631            result.abs() < 1e-10,
632            "orthogonal vectors: dot=0, got {result}"
633        );
634    }
635    #[test]
636    fn test_vectorized_dot_product_matches_parallel_dot() {
637        let a: Vec<f64> = (1..=100).map(|x| x as f64).collect();
638        let b: Vec<f64> = (1..=100).map(|x| x as f64).collect();
639        let v = vectorized_dot_product(&a, &b);
640        let p = parallel_dot_product(&a, &b);
641        assert!(
642            (v - p).abs() < 1e-6,
643            "vectorized and parallel dot should match"
644        );
645    }
646    #[test]
647    fn test_vectorized_dot_product_non_multiple_of_4() {
648        let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
649        let b = [1.0; 7];
650        let result = vectorized_dot_product(&a, &b);
651        assert!((result - 28.0).abs() < 1e-10);
652    }
653    #[test]
654    fn test_parallel_prefix_scan_basic() {
655        let data = [1.0, 2.0, 3.0, 4.0];
656        let result = parallel_prefix_scan(&data, 2);
657        assert_eq!(result, vec![1.0, 3.0, 6.0, 10.0]);
658    }
659    #[test]
660    fn test_parallel_prefix_scan_single_chunk() {
661        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
662        let result = parallel_prefix_scan(&data, 1);
663        assert_eq!(result, vec![1.0, 3.0, 6.0, 10.0, 15.0]);
664    }
665    #[test]
666    fn test_parallel_prefix_scan_matches_prefix_sum() {
667        let data: Vec<f64> = (1..=10).map(|x| x as f64).collect();
668        let scan = parallel_prefix_scan(&data, 4);
669        let psum = prefix_sum(&data);
670        for (a, b) in scan.iter().zip(psum.iter()) {
671            assert!((a - b).abs() < 1e-10, "parallel scan {a} vs prefix_sum {b}");
672        }
673    }
674    #[test]
675    fn test_parallel_prefix_scan_empty() {
676        let result = parallel_prefix_scan(&[], 4);
677        assert!(result.is_empty());
678    }
679    #[test]
680    fn test_extended_pool_stats_throughput() {
681        let mut stats = ExtendedPoolStats::new(4);
682        stats.base.tasks_completed = 1000;
683        stats.total_ns = 1_000_000;
684        let tp = stats.throughput_tasks_per_us();
685        assert!(
686            (tp - 1.0).abs() < 1e-10,
687            "1000 tasks / 1000µs = 1 task/µs, got {tp}"
688        );
689    }
690    #[test]
691    fn test_extended_pool_stats_zero_ns() {
692        let stats = ExtendedPoolStats::new(2);
693        assert_eq!(stats.throughput_tasks_per_us(), 0.0);
694    }
695    #[test]
696    fn test_extended_pool_stats_memory_efficiency() {
697        let mut stats = ExtendedPoolStats::new(4);
698        stats.base.tasks_completed = 100;
699        stats.peak_memory_bytes = 1024;
700        let eff = stats.memory_efficiency();
701        assert!(
702            (eff - 100.0).abs() < 1e-10,
703            "100 tasks / 1 KB = 100 tasks/KB, got {eff}"
704        );
705    }
706    #[test]
707    fn test_work_stealing_deque_push_pop_bottom() {
708        let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
709        deque.push_bottom(1);
710        deque.push_bottom(2);
711        deque.push_bottom(3);
712        assert_eq!(deque.pop_bottom(), Some(3));
713        assert_eq!(deque.pop_bottom(), Some(2));
714        assert_eq!(deque.len(), 1);
715    }
716    #[test]
717    fn test_work_stealing_deque_steal_top() {
718        let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
719        for i in 0..5 {
720            deque.push_bottom(i);
721        }
722        assert_eq!(deque.steal_top(), Some(0));
723        assert_eq!(deque.steal_top(), Some(1));
724        assert_eq!(deque.steal_count(), 2);
725    }
726    #[test]
727    fn test_work_stealing_deque_empty_returns_none() {
728        let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
729        assert_eq!(deque.pop_bottom(), None);
730        assert_eq!(deque.steal_top(), None);
731    }
732    #[test]
733    fn test_work_stealing_deque_is_empty() {
734        let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
735        assert!(deque.is_empty());
736        deque.push_bottom(42);
737        assert!(!deque.is_empty());
738    }
739    #[test]
740    fn test_work_stealing_deque_pop_count() {
741        let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
742        for i in 0..5 {
743            deque.push_bottom(i);
744        }
745        let _ = deque.pop_bottom();
746        let _ = deque.pop_bottom();
747        assert_eq!(deque.pop_count(), 2);
748    }
749    #[test]
750    fn test_parallel_prefix_scan_all_ones() {
751        let data = vec![1.0; 8];
752        let result = parallel_prefix_scan(&data, 4);
753        let expected: Vec<f64> = (1..=8).map(|i| i as f64).collect();
754        for (a, b) in result.iter().zip(expected.iter()) {
755            assert!((a - b).abs() < 1e-10, "scan[i]={a} expected {b}");
756        }
757    }
758    #[test]
759    fn test_vectorized_dot_single_element() {
760        let a = [3.0];
761        let b = [4.0];
762        let result = vectorized_dot_product(&a, &b);
763        assert!((result - 12.0).abs() < 1e-10);
764    }
765    #[test]
766    fn test_soa_vec3_large_push() {
767        let mut soa = SoaVec3::with_capacity(1000);
768        for i in 0..1000 {
769            soa.push(i as f64, i as f64 * 2.0, i as f64 * 3.0);
770        }
771        assert_eq!(soa.len(), 1000);
772        let norms = soa.norms_sq();
773        for (idx, &n) in norms.iter().enumerate() {
774            let expected = 14.0 * (idx as f64).powi(2);
775            assert!(
776                (n - expected).abs() < 1e-6 * (expected.abs() + 1.0),
777                "norm²[{idx}]={n}, expected {expected}"
778            );
779        }
780    }
781    #[test]
782    fn test_extended_pool_stats_zero_memory() {
783        let stats = ExtendedPoolStats::new(4);
784        assert_eq!(stats.memory_efficiency(), 0.0);
785    }
786    #[test]
787    fn test_work_stealing_simulation() {
788        let shared = Arc::new(Mutex::new(WorkStealingDeque::<i32>::new()));
789        for i in 0..10 {
790            shared
791                .lock()
792                .unwrap_or_else(|e| e.into_inner())
793                .push_bottom(i);
794        }
795        let mut stolen = Vec::new();
796        for _ in 0..3 {
797            if let Some(v) = shared.lock().unwrap_or_else(|e| e.into_inner()).steal_top() {
798                stolen.push(v);
799            }
800        }
801        assert_eq!(stolen.len(), 3);
802        assert_eq!(stolen, vec![0, 1, 2], "steals should come from top (FIFO)");
803        assert_eq!(
804            shared.lock().unwrap_or_else(|e| e.into_inner()).len(),
805            7,
806            "7 items should remain"
807        );
808    }
809}