1#[allow(unused_imports)]
6use super::functions::*;
7#[cfg(test)]
8mod tests {
9 use super::*;
10
11 use crate::parallel::ParallelFor;
12 use crate::parallel::SerialWorkQueue;
13
14 use crate::parallel::ThreadPoolStats;
15 use crate::parallel::WorkRange;
16
17 use crate::parallel::WorkStealingPool;
18 use crate::parallel::parallel_dot_product;
19
20 use crate::parallel::prefix_sum;
21
22 use std::sync::Arc;
23 use std::sync::atomic::{AtomicUsize, Ordering};
24 #[test]
25 fn test_parallel_map_f64_square() {
26 let data = vec![1.0_f64, 2.0, 3.0, 4.0];
27 let result = parallel_map_f64(&data, |x| x * x);
28 assert_eq!(result, vec![1.0, 4.0, 9.0, 16.0]);
29 }
30 #[test]
31 fn test_parallel_reduce_f64_sum() {
32 let data = vec![1.0_f64, 2.0, 3.0, 4.0, 5.0];
33 let result = parallel_reduce_f64(&data, 0.0, |a, b| a + b);
34 assert!((result - 15.0).abs() < f64::EPSILON);
35 }
36 #[test]
37 fn test_scatter_gather_roundtrip() {
38 let data: Vec<i32> = (0..10).collect();
39 let parts = scatter_gather(&data, 3);
40 assert_eq!(parts.len(), 3);
41 assert_eq!(parts[0].len(), 4);
42 assert_eq!(parts[1].len(), 3);
43 assert_eq!(parts[2].len(), 3);
44 let reassembled = gather(parts);
45 assert_eq!(reassembled, data);
46 }
47 #[test]
48 fn test_parallel_dot_product() {
49 let a = [1.0_f64, 2.0, 3.0];
50 let b = [4.0_f64, 5.0, 6.0];
51 assert!((parallel_dot_product(&a, &b) - 32.0).abs() < f64::EPSILON);
52 }
53 #[test]
54 fn test_parallel_matrix_vec_multiply_identity() {
55 let matrix = vec![vec![1.0_f64, 0.0], vec![0.0_f64, 1.0]];
56 let vector = [3.0_f64, 4.0];
57 let result = parallel_matrix_vec_multiply(&matrix, &vector);
58 assert!((result[0] - 3.0).abs() < f64::EPSILON);
59 assert!((result[1] - 4.0).abs() < f64::EPSILON);
60 }
61 #[test]
62 fn test_work_range_chunks() {
63 let wr = WorkRange::new(0, 10, 3);
64 let chunks = wr.chunks();
65 assert_eq!(chunks, vec![(0, 3), (3, 6), (6, 9), (9, 10)]);
66 assert_eq!(wr.n_chunks(), 4);
67 assert_eq!(wr.total_work(), 10);
68 }
69 #[test]
70 fn test_parallel_map_square() {
71 let data: Vec<i32> = (1..=10).collect();
72 let result = parallel_map(&data, |x| x * x);
73 let expected: Vec<i32> = (1..=10).map(|x| x * x).collect();
74 assert_eq!(result, expected);
75 }
76 #[test]
77 fn test_parallel_map_preserves_order() {
78 let data: Vec<usize> = (0..100).collect();
79 let result = parallel_map(&data, |&x| x * 2);
80 let expected: Vec<usize> = (0..100).map(|x| x * 2).collect();
81 assert_eq!(result, expected, "parallel_map must preserve order");
82 }
83 #[test]
84 fn test_parallel_map_empty() {
85 let data: Vec<i32> = vec![];
86 let result = parallel_map(&data, |x| x * x);
87 assert!(result.is_empty());
88 }
89 #[test]
90 fn test_parallel_filter_even() {
91 let data: Vec<i32> = (0..20).collect();
92 let result = parallel_filter(&data, |x| x % 2 == 0);
93 let expected: Vec<i32> = (0..20).filter(|x| x % 2 == 0).collect();
94 assert_eq!(result, expected);
95 }
96 #[test]
97 fn test_parallel_filter_empty() {
98 let data: Vec<i32> = vec![];
99 let result = parallel_filter(&data, |_| true);
100 assert!(result.is_empty());
101 }
102 #[test]
103 fn test_parallel_filter_none_match() {
104 let data: Vec<i32> = (0..10).collect();
105 let result = parallel_filter(&data, |_| false);
106 assert!(result.is_empty());
107 }
108 #[test]
109 fn test_parallel_for_each_counter() {
110 let counter = Arc::new(AtomicUsize::new(0));
111 let data: Vec<i32> = (0..50).collect();
112 let cc = Arc::clone(&counter);
113 parallel_for_each(&data, move |_| {
114 cc.fetch_add(1, Ordering::Relaxed);
115 });
116 assert_eq!(counter.load(Ordering::Relaxed), 50);
117 }
118 #[test]
119 fn test_parallel_reduce_sum() {
120 let data: Vec<i64> = (1..=100).collect();
121 let result = parallel_reduce(&data, |a, b| a + b, 0_i64);
122 assert_eq!(result, 5050);
123 }
124 #[test]
125 fn test_parallel_reduce_empty() {
126 let data: Vec<i32> = vec![];
127 let result = parallel_reduce(&data, |a, b| a + b, 42_i32);
128 assert_eq!(result, 42);
129 }
130 pub(super) struct SumOp;
131 impl ReduceOperator for SumOp {
132 type Acc = f64;
133 type Item = f64;
134 type Result = f64;
135 fn identity(&self) -> f64 {
136 0.0
137 }
138 fn fold(&self, acc: f64, item: f64) -> f64 {
139 acc + item
140 }
141 fn combine(&self, left: f64, right: f64) -> f64 {
142 left + right
143 }
144 fn finalize(&self, acc: f64) -> f64 {
145 acc
146 }
147 }
148 #[test]
149 fn test_reduce_with_op() {
150 let data: Vec<f64> = (1..=10).map(|x| x as f64).collect();
151 let result = parallel_reduce_with_op(&data, &SumOp);
152 assert!((result - 55.0).abs() < 1e-10);
153 }
154 pub(super) struct MeanOp;
155 impl ReduceOperator for MeanOp {
156 type Acc = (f64, usize);
157 type Item = f64;
158 type Result = f64;
159 fn identity(&self) -> (f64, usize) {
160 (0.0, 0)
161 }
162 fn fold(&self, acc: (f64, usize), item: f64) -> (f64, usize) {
163 (acc.0 + item, acc.1 + 1)
164 }
165 fn combine(&self, left: (f64, usize), right: (f64, usize)) -> (f64, usize) {
166 (left.0 + right.0, left.1 + right.1)
167 }
168 fn finalize(&self, acc: (f64, usize)) -> f64 {
169 if acc.1 == 0 {
170 0.0
171 } else {
172 acc.0 / acc.1 as f64
173 }
174 }
175 }
176 #[test]
177 fn test_reduce_with_mean_op() {
178 let data: Vec<f64> = vec![2.0, 4.0, 6.0, 8.0];
179 let result = parallel_reduce_with_op(&data, &MeanOp);
180 assert!((result - 5.0).abs() < 1e-10);
181 }
182 #[test]
183 fn test_parallel_for_coverage() {
184 let counter = Arc::new(AtomicUsize::new(0));
185 let counter_clone = Arc::clone(&counter);
186 let pf = ParallelFor::with_chunks(4);
187 pf.run(0, 100, move |_i| {
188 counter_clone.fetch_add(1, Ordering::Relaxed);
189 });
190 assert_eq!(counter.load(Ordering::Relaxed), 100);
191 }
192 #[test]
193 fn test_parallel_for_run_n() {
194 let counter = Arc::new(AtomicUsize::new(0));
195 let counter_clone = Arc::clone(&counter);
196 let pf = ParallelFor::with_chunks(2);
197 pf.run_n(50, move |_| {
198 counter_clone.fetch_add(1, Ordering::Relaxed);
199 });
200 assert_eq!(counter.load(Ordering::Relaxed), 50);
201 }
202 #[test]
203 fn test_parallel_for_empty_range() {
204 let counter = Arc::new(AtomicUsize::new(0));
205 let cc = Arc::clone(&counter);
206 let pf = ParallelFor::new();
207 pf.run(5, 5, move |_| {
208 cc.fetch_add(1, Ordering::Relaxed);
209 });
210 assert_eq!(counter.load(Ordering::Relaxed), 0);
211 }
212 #[test]
213 fn test_work_stealing_pool_basic() {
214 let pool = WorkStealingPool::new(4);
215 let counter = Arc::new(AtomicUsize::new(0));
216 for _ in 0..10 {
217 let cc = Arc::clone(&counter);
218 pool.submit(move || {
219 cc.fetch_add(1, Ordering::Relaxed);
220 });
221 }
222 pool.join();
223 assert_eq!(counter.load(Ordering::Relaxed), 10);
224 }
225 #[test]
226 fn test_work_stealing_pool_empty() {
227 let pool = WorkStealingPool::new(2);
228 pool.join();
229 }
230 #[test]
231 fn test_work_stealing_pool_stats() {
232 let pool = WorkStealingPool::new(2);
233 for _ in 0..5 {
234 pool.submit(|| {});
235 }
236 let stats = pool.stats();
237 assert_eq!(stats.tasks_submitted, 5);
238 pool.join();
239 }
240 #[test]
241 fn test_thread_pool_stats() {
242 let mut stats = ThreadPoolStats::new(4);
243 stats.tasks_submitted = 100;
244 stats.tasks_completed = 100;
245 assert!((stats.tasks_per_worker() - 25.0).abs() < 1e-10);
246 assert!((stats.completion_rate() - 1.0).abs() < 1e-10);
247 }
248 #[test]
249 fn test_thread_pool_stats_zero_workers() {
250 let stats = ThreadPoolStats::new(0);
251 assert!(stats.tasks_per_worker().abs() < 1e-10);
252 }
253 #[test]
254 fn test_parallel_merge_sort_basic() {
255 let mut data = vec![5.0, 3.0, 1.0, 4.0, 2.0];
256 parallel_merge_sort(&mut data);
257 assert_eq!(data, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
258 }
259 #[test]
260 fn test_parallel_merge_sort_large() {
261 let mut data: Vec<f64> = (0..200).rev().map(|x| x as f64).collect();
262 parallel_merge_sort(&mut data);
263 let expected: Vec<f64> = (0..200).map(|x| x as f64).collect();
264 assert_eq!(data, expected);
265 }
266 #[test]
267 fn test_parallel_merge_sort_empty() {
268 let mut data: Vec<f64> = vec![];
269 parallel_merge_sort(&mut data);
270 assert!(data.is_empty());
271 }
272 #[test]
273 fn test_parallel_merge_sort_single() {
274 let mut data = vec![42.0];
275 parallel_merge_sort(&mut data);
276 assert_eq!(data, vec![42.0]);
277 }
278 #[test]
279 fn test_parallel_merge_sort_already_sorted() {
280 let mut data: Vec<f64> = (0..100).map(|x| x as f64).collect();
281 parallel_merge_sort(&mut data);
282 let expected: Vec<f64> = (0..100).map(|x| x as f64).collect();
283 assert_eq!(data, expected);
284 }
285 #[test]
286 fn test_prefix_sum() {
287 let data = [1.0, 2.0, 3.0, 4.0];
288 let result = prefix_sum(&data);
289 assert_eq!(result, vec![1.0, 3.0, 6.0, 10.0]);
290 }
291 #[test]
292 fn test_exclusive_prefix_sum() {
293 let data = [1.0, 2.0, 3.0, 4.0];
294 let result = exclusive_prefix_sum(&data);
295 assert_eq!(result, vec![0.0, 1.0, 3.0, 6.0]);
296 }
297 #[test]
298 fn test_parallel_min() {
299 let data = vec![5.0, 2.0, 8.0, 1.0, 9.0];
300 assert!((parallel_min(&data) - 1.0).abs() < 1e-10);
301 }
302 #[test]
303 fn test_parallel_max() {
304 let data = vec![5.0, 2.0, 8.0, 1.0, 9.0];
305 assert!((parallel_max(&data) - 9.0).abs() < 1e-10);
306 }
307 #[test]
308 fn test_parallel_min_empty() {
309 let data: Vec<f64> = vec![];
310 assert_eq!(parallel_min(&data), f64::INFINITY);
311 }
312 #[test]
313 fn test_parallel_max_empty() {
314 let data: Vec<f64> = vec![];
315 assert_eq!(parallel_max(&data), f64::NEG_INFINITY);
316 }
317 #[test]
318 fn test_merge_sorted() {
319 let a = [1.0, 3.0, 5.0];
320 let b = [2.0, 4.0, 6.0];
321 let merged = merge_sorted(&a, &b);
322 assert_eq!(merged, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
323 }
324 #[test]
325 fn test_merge_sorted_empty() {
326 let a: Vec<f64> = vec![];
327 let b = vec![1.0, 2.0];
328 assert_eq!(merge_sorted(&a, &b), vec![1.0, 2.0]);
329 assert_eq!(merge_sorted(&b, &a), vec![1.0, 2.0]);
330 }
331 #[test]
332 fn test_insertion_sort() {
333 let mut data = vec![5.0, 1.0, 3.0, 2.0, 4.0];
334 insertion_sort(&mut data);
335 assert_eq!(data, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
336 }
337 #[test]
338 fn test_available_threads_positive() {
339 assert!(available_threads() >= 1, "should report at least 1 thread");
340 }
341 #[test]
342 fn test_suggested_thread_count_at_least_one() {
343 assert!(suggested_thread_count() >= 1);
344 }
345 #[test]
346 fn test_chunk_process_sum_chunks() {
347 let data: Vec<f64> = (1..=12).map(|x| x as f64).collect();
348 let result = chunk_process(&data, 4, |chunk| vec![chunk.iter().sum::<f64>()]);
349 assert_eq!(result, vec![10.0, 26.0, 42.0]);
350 }
351 #[test]
352 fn test_chunk_process_empty() {
353 let data: Vec<f64> = vec![];
354 let result = chunk_process(&data, 4, |c| vec![c.iter().sum::<f64>()]);
355 assert!(result.is_empty());
356 }
357 #[test]
358 fn test_chunk_process_single_element_chunks() {
359 let data = vec![1.0, 2.0, 3.0];
360 let result = chunk_process(&data, 1, |c| vec![c[0] * 2.0]);
361 assert_eq!(result, vec![2.0, 4.0, 6.0]);
362 }
363 #[test]
364 fn test_chunk_zip_map_add() {
365 let a = [1.0, 2.0, 3.0, 4.0];
366 let b = [10.0, 20.0, 30.0, 40.0];
367 let result = chunk_zip_map(&a, &b, 2, |x, y| x + y);
368 assert_eq!(result, vec![11.0, 22.0, 33.0, 44.0]);
369 }
370 #[test]
371 fn test_chunk_zip_map_multiply() {
372 let a = [2.0, 3.0, 4.0];
373 let b = [5.0, 6.0, 7.0];
374 let result = chunk_zip_map(&a, &b, 8, |x, y| x * y);
375 assert_eq!(result, vec![10.0, 18.0, 28.0]);
376 }
377 #[test]
378 fn test_chunk_dot_product_basic() {
379 let a = [1.0, 2.0, 3.0];
380 let b = [4.0, 5.0, 6.0];
381 let result = chunk_dot_product(&a, &b, 2);
382 assert!((result - 32.0).abs() < 1e-10);
383 }
384 #[test]
385 fn test_chunk_dot_product_chunk_size_1() {
386 let a = [1.0, 2.0, 3.0, 4.0];
387 let b = [1.0, 1.0, 1.0, 1.0];
388 let result = chunk_dot_product(&a, &b, 1);
389 assert!((result - 10.0).abs() < 1e-10);
390 }
391 #[test]
392 fn test_chunk_dot_product_empty() {
393 assert!((chunk_dot_product(&[], &[], 4)).abs() < 1e-10);
394 }
395 #[test]
396 fn test_serial_work_queue_push_pop() {
397 let mut q: SerialWorkQueue<i32> = SerialWorkQueue::new();
398 q.push(1);
399 q.push(2);
400 q.push(3);
401 assert_eq!(q.pop(), Some(1));
402 assert_eq!(q.pop(), Some(2));
403 assert_eq!(q.len(), 1);
404 }
405 #[test]
406 fn test_serial_work_queue_steal() {
407 let mut q: SerialWorkQueue<i32> = SerialWorkQueue::new();
408 q.push(1);
409 q.push(2);
410 q.push(3);
411 assert_eq!(q.steal(), Some(3));
412 assert_eq!(q.len(), 2);
413 }
414 #[test]
415 fn test_serial_work_queue_empty() {
416 let mut q: SerialWorkQueue<i32> = SerialWorkQueue::new();
417 assert!(q.is_empty());
418 assert_eq!(q.pop(), None);
419 assert_eq!(q.steal(), None);
420 }
421 #[test]
422 fn test_serial_work_queue_drain_and_run() {
423 let mut q: SerialWorkQueue<i32> = SerialWorkQueue::new();
424 for i in 0..5 {
425 q.push(i);
426 }
427 let mut collected = Vec::new();
428 q.drain_and_run(|x| collected.push(x));
429 assert_eq!(collected, vec![0, 1, 2, 3, 4]);
430 assert!(q.is_empty());
431 }
432 #[test]
433 fn test_parallel_sort_basic() {
434 let mut data = vec![5.0, 3.0, 1.0, 4.0, 2.0];
435 parallel_sort(&mut data);
436 assert_eq!(data, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
437 }
438 #[test]
439 fn test_parallel_sort_large() {
440 let mut data: Vec<f64> = (0..500).rev().map(|x| x as f64).collect();
441 parallel_sort(&mut data);
442 let expected: Vec<f64> = (0..500).map(|x| x as f64).collect();
443 assert_eq!(data, expected);
444 }
445 #[test]
446 fn test_parallel_sort_empty() {
447 let mut data: Vec<f64> = vec![];
448 parallel_sort(&mut data);
449 assert!(data.is_empty());
450 }
451 #[test]
452 fn test_parallel_sort_already_sorted() {
453 let mut data: Vec<f64> = (0..100).map(|x| x as f64).collect();
454 parallel_sort(&mut data);
455 assert_eq!(data, (0..100).map(|x| x as f64).collect::<Vec<_>>());
456 }
457 #[test]
458 fn test_parallel_sort_duplicates() {
459 let mut data = vec![3.0, 1.0, 3.0, 2.0, 1.0];
460 parallel_sort(&mut data);
461 assert_eq!(data, vec![1.0, 1.0, 2.0, 3.0, 3.0]);
462 }
463 #[test]
464 fn test_sorted_copy_does_not_mutate_input() {
465 let original = vec![5.0, 3.0, 1.0, 4.0, 2.0];
466 let sorted = sorted_copy(&original);
467 assert_eq!(
468 original,
469 vec![5.0, 3.0, 1.0, 4.0, 2.0],
470 "original must be unchanged"
471 );
472 assert_eq!(sorted, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
473 }
474 #[test]
475 fn test_parallel_histogram_uniform() {
476 let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
477 let counts = parallel_histogram(&data, 10);
478 assert_eq!(counts.len(), 10);
479 let total: usize = counts.iter().sum();
480 assert_eq!(total, 100);
481 }
482 #[test]
483 fn test_parallel_histogram_empty() {
484 assert!(parallel_histogram(&[], 5).is_empty());
485 }
486 #[test]
487 fn test_parallel_histogram_zero_bins() {
488 let data = vec![1.0, 2.0, 3.0];
489 assert!(parallel_histogram(&data, 0).is_empty());
490 }
491 #[test]
492 fn test_parallel_histogram_single_value() {
493 let data = vec![5.0; 10];
494 let counts = parallel_histogram(&data, 4);
495 let total: usize = counts.iter().sum();
496 assert_eq!(total, 10);
497 }
498}
499#[allow(dead_code)]
504pub fn vectorized_dot_product(a: &[f64], b: &[f64]) -> f64 {
505 assert_eq!(a.len(), b.len(), "vectorized_dot_product: length mismatch");
506 let n = a.len();
507 let chunks = n / 4;
508 let mut acc0 = 0.0_f64;
509 let mut acc1 = 0.0_f64;
510 let mut acc2 = 0.0_f64;
511 let mut acc3 = 0.0_f64;
512 for i in 0..chunks {
513 let base = i * 4;
514 acc0 += a[base] * b[base];
515 acc1 += a[base + 1] * b[base + 1];
516 acc2 += a[base + 2] * b[base + 2];
517 acc3 += a[base + 3] * b[base + 3];
518 }
519 let mut total = acc0 + acc1 + acc2 + acc3;
520 for i in (chunks * 4)..n {
521 total += a[i] * b[i];
522 }
523 total
524}
525#[allow(dead_code)]
532pub fn parallel_prefix_scan(data: &[f64], n_chunks: usize) -> Vec<f64> {
533 if data.is_empty() {
534 return vec![];
535 }
536 let n = data.len();
537 let n_chunks = n_chunks.max(1).min(n);
538 let chunk_size = n.div_ceil(n_chunks);
539 let mut local: Vec<Vec<f64>> = data
540 .chunks(chunk_size)
541 .map(|chunk| {
542 let mut acc = 0.0;
543 chunk
544 .iter()
545 .map(|&x| {
546 acc += x;
547 acc
548 })
549 .collect()
550 })
551 .collect();
552 let mut running = 0.0_f64;
553 for chunk in &mut local {
554 let total = *chunk.last().unwrap_or(&0.0);
555 for v in chunk.iter_mut() {
556 *v += running;
557 }
558 running += total;
559 }
560 local.into_iter().flatten().collect()
561}
562#[cfg(test)]
563mod tests_new_parallel {
564
565 use crate::parallel::ExtendedPoolStats;
566
567 use crate::parallel::SoaVec3;
568
569 use crate::parallel::WorkStealingDeque;
570
571 use crate::parallel::parallel_dot_product;
572 use crate::parallel::parallel_prefix_scan;
573 use crate::parallel::prefix_sum;
574 use crate::parallel::vectorized_dot_product;
575 use std::sync::{Arc, Mutex};
576 #[test]
577 fn test_soa_vec3_push_get() {
578 let mut soa = SoaVec3::new();
579 soa.push(1.0, 2.0, 3.0);
580 soa.push(4.0, 5.0, 6.0);
581 assert_eq!(soa.len(), 2);
582 assert_eq!(soa.get(0), (1.0, 2.0, 3.0));
583 assert_eq!(soa.get(1), (4.0, 5.0, 6.0));
584 }
585 #[test]
586 fn test_soa_vec3_dot_with() {
587 let mut soa = SoaVec3::new();
588 soa.push(1.0, 0.0, 0.0);
589 soa.push(0.0, 1.0, 0.0);
590 soa.push(0.0, 0.0, 1.0);
591 let dots = soa.dot_with(1.0, 2.0, 3.0);
592 assert!((dots[0] - 1.0).abs() < 1e-10);
593 assert!((dots[1] - 2.0).abs() < 1e-10);
594 assert!((dots[2] - 3.0).abs() < 1e-10);
595 }
596 #[test]
597 fn test_soa_vec3_norms_sq() {
598 let mut soa = SoaVec3::new();
599 soa.push(3.0, 4.0, 0.0);
600 let norms = soa.norms_sq();
601 assert!(
602 (norms[0] - 25.0).abs() < 1e-10,
603 "3-4-0 vector norm²=25, got {}",
604 norms[0]
605 );
606 }
607 #[test]
608 fn test_soa_vec3_empty() {
609 let soa = SoaVec3::new();
610 assert!(soa.is_empty());
611 assert_eq!(soa.len(), 0);
612 }
613 #[test]
614 fn test_soa_vec3_with_capacity() {
615 let soa = SoaVec3::with_capacity(100);
616 assert!(soa.is_empty());
617 }
618 #[test]
619 fn test_vectorized_dot_product_basic() {
620 let a = [1.0, 2.0, 3.0, 4.0, 5.0];
621 let b = [1.0, 1.0, 1.0, 1.0, 1.0];
622 let result = vectorized_dot_product(&a, &b);
623 assert!((result - 15.0).abs() < 1e-10);
624 }
625 #[test]
626 fn test_vectorized_dot_product_orthogonal() {
627 let a = [1.0, 0.0, 0.0, 0.0];
628 let b = [0.0, 1.0, 0.0, 0.0];
629 let result = vectorized_dot_product(&a, &b);
630 assert!(
631 result.abs() < 1e-10,
632 "orthogonal vectors: dot=0, got {result}"
633 );
634 }
635 #[test]
636 fn test_vectorized_dot_product_matches_parallel_dot() {
637 let a: Vec<f64> = (1..=100).map(|x| x as f64).collect();
638 let b: Vec<f64> = (1..=100).map(|x| x as f64).collect();
639 let v = vectorized_dot_product(&a, &b);
640 let p = parallel_dot_product(&a, &b);
641 assert!(
642 (v - p).abs() < 1e-6,
643 "vectorized and parallel dot should match"
644 );
645 }
646 #[test]
647 fn test_vectorized_dot_product_non_multiple_of_4() {
648 let a = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0];
649 let b = [1.0; 7];
650 let result = vectorized_dot_product(&a, &b);
651 assert!((result - 28.0).abs() < 1e-10);
652 }
653 #[test]
654 fn test_parallel_prefix_scan_basic() {
655 let data = [1.0, 2.0, 3.0, 4.0];
656 let result = parallel_prefix_scan(&data, 2);
657 assert_eq!(result, vec![1.0, 3.0, 6.0, 10.0]);
658 }
659 #[test]
660 fn test_parallel_prefix_scan_single_chunk() {
661 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
662 let result = parallel_prefix_scan(&data, 1);
663 assert_eq!(result, vec![1.0, 3.0, 6.0, 10.0, 15.0]);
664 }
665 #[test]
666 fn test_parallel_prefix_scan_matches_prefix_sum() {
667 let data: Vec<f64> = (1..=10).map(|x| x as f64).collect();
668 let scan = parallel_prefix_scan(&data, 4);
669 let psum = prefix_sum(&data);
670 for (a, b) in scan.iter().zip(psum.iter()) {
671 assert!((a - b).abs() < 1e-10, "parallel scan {a} vs prefix_sum {b}");
672 }
673 }
674 #[test]
675 fn test_parallel_prefix_scan_empty() {
676 let result = parallel_prefix_scan(&[], 4);
677 assert!(result.is_empty());
678 }
679 #[test]
680 fn test_extended_pool_stats_throughput() {
681 let mut stats = ExtendedPoolStats::new(4);
682 stats.base.tasks_completed = 1000;
683 stats.total_ns = 1_000_000;
684 let tp = stats.throughput_tasks_per_us();
685 assert!(
686 (tp - 1.0).abs() < 1e-10,
687 "1000 tasks / 1000µs = 1 task/µs, got {tp}"
688 );
689 }
690 #[test]
691 fn test_extended_pool_stats_zero_ns() {
692 let stats = ExtendedPoolStats::new(2);
693 assert_eq!(stats.throughput_tasks_per_us(), 0.0);
694 }
695 #[test]
696 fn test_extended_pool_stats_memory_efficiency() {
697 let mut stats = ExtendedPoolStats::new(4);
698 stats.base.tasks_completed = 100;
699 stats.peak_memory_bytes = 1024;
700 let eff = stats.memory_efficiency();
701 assert!(
702 (eff - 100.0).abs() < 1e-10,
703 "100 tasks / 1 KB = 100 tasks/KB, got {eff}"
704 );
705 }
706 #[test]
707 fn test_work_stealing_deque_push_pop_bottom() {
708 let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
709 deque.push_bottom(1);
710 deque.push_bottom(2);
711 deque.push_bottom(3);
712 assert_eq!(deque.pop_bottom(), Some(3));
713 assert_eq!(deque.pop_bottom(), Some(2));
714 assert_eq!(deque.len(), 1);
715 }
716 #[test]
717 fn test_work_stealing_deque_steal_top() {
718 let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
719 for i in 0..5 {
720 deque.push_bottom(i);
721 }
722 assert_eq!(deque.steal_top(), Some(0));
723 assert_eq!(deque.steal_top(), Some(1));
724 assert_eq!(deque.steal_count(), 2);
725 }
726 #[test]
727 fn test_work_stealing_deque_empty_returns_none() {
728 let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
729 assert_eq!(deque.pop_bottom(), None);
730 assert_eq!(deque.steal_top(), None);
731 }
732 #[test]
733 fn test_work_stealing_deque_is_empty() {
734 let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
735 assert!(deque.is_empty());
736 deque.push_bottom(42);
737 assert!(!deque.is_empty());
738 }
739 #[test]
740 fn test_work_stealing_deque_pop_count() {
741 let mut deque: WorkStealingDeque<i32> = WorkStealingDeque::new();
742 for i in 0..5 {
743 deque.push_bottom(i);
744 }
745 let _ = deque.pop_bottom();
746 let _ = deque.pop_bottom();
747 assert_eq!(deque.pop_count(), 2);
748 }
749 #[test]
750 fn test_parallel_prefix_scan_all_ones() {
751 let data = vec![1.0; 8];
752 let result = parallel_prefix_scan(&data, 4);
753 let expected: Vec<f64> = (1..=8).map(|i| i as f64).collect();
754 for (a, b) in result.iter().zip(expected.iter()) {
755 assert!((a - b).abs() < 1e-10, "scan[i]={a} expected {b}");
756 }
757 }
758 #[test]
759 fn test_vectorized_dot_single_element() {
760 let a = [3.0];
761 let b = [4.0];
762 let result = vectorized_dot_product(&a, &b);
763 assert!((result - 12.0).abs() < 1e-10);
764 }
765 #[test]
766 fn test_soa_vec3_large_push() {
767 let mut soa = SoaVec3::with_capacity(1000);
768 for i in 0..1000 {
769 soa.push(i as f64, i as f64 * 2.0, i as f64 * 3.0);
770 }
771 assert_eq!(soa.len(), 1000);
772 let norms = soa.norms_sq();
773 for (idx, &n) in norms.iter().enumerate() {
774 let expected = 14.0 * (idx as f64).powi(2);
775 assert!(
776 (n - expected).abs() < 1e-6 * (expected.abs() + 1.0),
777 "norm²[{idx}]={n}, expected {expected}"
778 );
779 }
780 }
781 #[test]
782 fn test_extended_pool_stats_zero_memory() {
783 let stats = ExtendedPoolStats::new(4);
784 assert_eq!(stats.memory_efficiency(), 0.0);
785 }
786 #[test]
787 fn test_work_stealing_simulation() {
788 let shared = Arc::new(Mutex::new(WorkStealingDeque::<i32>::new()));
789 for i in 0..10 {
790 shared
791 .lock()
792 .unwrap_or_else(|e| e.into_inner())
793 .push_bottom(i);
794 }
795 let mut stolen = Vec::new();
796 for _ in 0..3 {
797 if let Some(v) = shared.lock().unwrap_or_else(|e| e.into_inner()).steal_top() {
798 stolen.push(v);
799 }
800 }
801 assert_eq!(stolen.len(), 3);
802 assert_eq!(stolen, vec![0, 1, 2], "steals should come from top (FIFO)");
803 assert_eq!(
804 shared.lock().unwrap_or_else(|e| e.into_inner()).len(),
805 7,
806 "7 items should remain"
807 );
808 }
809}