Skip to main content

oxiphysics_gpu/
gpu_reduction.rs

1// Copyright 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! GPU reduction and scan operations (CPU mock implementation).
5//!
6//! Provides parallel-style reductions (sum, max, min, dot product),
7//! exclusive prefix scan, radix sort (counting sort mock), and histogram.
8
9/// Compute the sum of all elements in a slice (parallel reduction mock).
10///
11/// Returns `0.0` for an empty slice.
12pub fn gpu_sum(data: &[f64]) -> f64 {
13    data.iter().copied().sum()
14}
15
16/// Compute the maximum element in a slice (parallel reduction mock).
17///
18/// Returns `f64::NEG_INFINITY` for an empty slice.
19pub fn gpu_max(data: &[f64]) -> f64 {
20    data.iter().copied().fold(f64::NEG_INFINITY, f64::max)
21}
22
23/// Compute the minimum element in a slice (parallel reduction mock).
24///
25/// Returns `f64::INFINITY` for an empty slice.
26pub fn gpu_min(data: &[f64]) -> f64 {
27    data.iter().copied().fold(f64::INFINITY, f64::min)
28}
29
30/// Compute the dot product of two equal-length slices (parallel mock).
31///
32/// Panics in debug mode if the slices differ in length.
33pub fn gpu_dot(a: &[f64], b: &[f64]) -> f64 {
34    debug_assert_eq!(a.len(), b.len(), "gpu_dot: lengths must match");
35    a.iter().zip(b.iter()).map(|(&x, &y)| x * y).sum()
36}
37
38/// Compute the exclusive prefix scan (prefix sum) of a slice.
39///
40/// `result[i] = sum(data[0..i])`. The output length equals the input length.
41/// Returns an empty `Vec` for an empty input.
42pub fn gpu_prefix_sum(data: &[f64]) -> Vec<f64> {
43    let mut result = Vec::with_capacity(data.len());
44    let mut acc = 0.0_f64;
45    for &v in data {
46        result.push(acc);
47        acc += v;
48    }
49    result
50}
51
52/// Sort a slice of `u64` values using a radix sort (counting sort) mock.
53///
54/// This is a stable sort over the full 64-bit key space, implemented here
55/// as a mock using counting-sort passes over 8 bits at a time (8 passes).
56/// Returns a new sorted `Vec`.
57pub fn gpu_sort_radix(data: &[u64]) -> Vec<u64> {
58    if data.is_empty() {
59        return Vec::new();
60    }
61
62    let mut src = data.to_vec();
63    let mut dst = vec![0u64; data.len()];
64
65    // 8 passes × 8 bits = 64 bits
66    for pass in 0..8u64 {
67        let shift = pass * 8;
68        let mut counts = [0usize; 256];
69
70        for &v in src.iter() {
71            let bucket = ((v >> shift) & 0xFF) as usize;
72            counts[bucket] += 1;
73        }
74
75        // Prefix sum to get output positions
76        let mut offsets = [0usize; 256];
77        let mut total = 0usize;
78        for (i, &c) in counts.iter().enumerate() {
79            offsets[i] = total;
80            total += c;
81        }
82
83        // Scatter
84        for &v in src.iter() {
85            let bucket = ((v >> shift) & 0xFF) as usize;
86            dst[offsets[bucket]] = v;
87            offsets[bucket] += 1;
88        }
89
90        std::mem::swap(&mut src, &mut dst);
91    }
92
93    src
94}
95
96/// Compute a histogram of the input data over `num_bins` equal-width bins
97/// spanning `[min_val, max_val)`.
98///
99/// Returns a `Vec`usize` of length `num_bins`. Values outside the range are
100/// clamped to the first/last bin.
101pub fn gpu_histogram(data: &[f64], min_val: f64, max_val: f64, num_bins: usize) -> Vec<usize> {
102    let mut bins = vec![0usize; num_bins];
103    if num_bins == 0 {
104        return bins;
105    }
106    let range = max_val - min_val;
107    for &v in data {
108        let t = if range <= 0.0 {
109            0.0
110        } else {
111            (v - min_val) / range
112        };
113        let idx = (t * num_bins as f64) as isize;
114        let idx = idx.clamp(0, num_bins as isize - 1) as usize;
115        bins[idx] += 1;
116    }
117    bins
118}
119
120// ── Tests ─────────────────────────────────────────────────────────────────────
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    // gpu_sum
127    #[test]
128    fn test_gpu_sum_basic() {
129        assert!((gpu_sum(&[1.0, 2.0, 3.0]) - 6.0).abs() < 1e-12);
130    }
131
132    #[test]
133    fn test_gpu_sum_empty() {
134        assert!((gpu_sum(&[])).abs() < 1e-12);
135    }
136
137    #[test]
138    fn test_gpu_sum_single() {
139        assert!((gpu_sum(&[42.0]) - 42.0).abs() < 1e-12);
140    }
141
142    #[test]
143    fn test_gpu_sum_negatives() {
144        assert!((gpu_sum(&[-1.0, -2.0, 3.0]) - 0.0).abs() < 1e-12);
145    }
146
147    #[test]
148    fn test_gpu_sum_large() {
149        let data: Vec<f64> = (0..1000).map(|i| i as f64).collect();
150        let expected = 999.0 * 1000.0 / 2.0;
151        assert!((gpu_sum(&data) - expected).abs() < 1e-6);
152    }
153
154    // gpu_max
155    #[test]
156    fn test_gpu_max_basic() {
157        assert!((gpu_max(&[1.0, 5.0, 3.0]) - 5.0).abs() < 1e-12);
158    }
159
160    #[test]
161    fn test_gpu_max_empty() {
162        assert!(gpu_max(&[]).is_infinite() && gpu_max(&[]) < 0.0);
163    }
164
165    #[test]
166    fn test_gpu_max_all_negative() {
167        assert!((gpu_max(&[-5.0, -3.0, -1.0]) - (-1.0)).abs() < 1e-12);
168    }
169
170    #[test]
171    fn test_gpu_max_single() {
172        assert!((gpu_max(&[7.0]) - 7.0).abs() < 1e-12);
173    }
174
175    // gpu_min
176    #[test]
177    fn test_gpu_min_basic() {
178        assert!((gpu_min(&[1.0, 5.0, 3.0]) - 1.0).abs() < 1e-12);
179    }
180
181    #[test]
182    fn test_gpu_min_empty() {
183        assert!(gpu_min(&[]).is_infinite() && gpu_min(&[]) > 0.0);
184    }
185
186    #[test]
187    fn test_gpu_min_all_positive() {
188        assert!((gpu_min(&[2.0, 4.0, 6.0]) - 2.0).abs() < 1e-12);
189    }
190
191    // gpu_dot
192    #[test]
193    fn test_gpu_dot_basic() {
194        let a = [1.0, 2.0, 3.0];
195        let b = [4.0, 5.0, 6.0];
196        assert!((gpu_dot(&a, &b) - 32.0).abs() < 1e-12);
197    }
198
199    #[test]
200    fn test_gpu_dot_orthogonal() {
201        let a = [1.0, 0.0, 0.0];
202        let b = [0.0, 1.0, 0.0];
203        assert!(gpu_dot(&a, &b).abs() < 1e-12);
204    }
205
206    #[test]
207    fn test_gpu_dot_empty() {
208        assert!(gpu_dot(&[], &[]).abs() < 1e-12);
209    }
210
211    #[test]
212    fn test_gpu_dot_self() {
213        let a = [3.0, 4.0];
214        assert!((gpu_dot(&a, &a) - 25.0).abs() < 1e-12);
215    }
216
217    // gpu_prefix_sum
218    #[test]
219    fn test_gpu_prefix_sum_basic() {
220        let result = gpu_prefix_sum(&[1.0, 2.0, 3.0, 4.0]);
221        assert_eq!(result, vec![0.0, 1.0, 3.0, 6.0]);
222    }
223
224    #[test]
225    fn test_gpu_prefix_sum_empty() {
226        assert!(gpu_prefix_sum(&[]).is_empty());
227    }
228
229    #[test]
230    fn test_gpu_prefix_sum_single() {
231        let result = gpu_prefix_sum(&[5.0]);
232        assert_eq!(result, vec![0.0]);
233    }
234
235    #[test]
236    fn test_gpu_prefix_sum_all_ones() {
237        let data = vec![1.0; 5];
238        let result = gpu_prefix_sum(&data);
239        assert_eq!(result, vec![0.0, 1.0, 2.0, 3.0, 4.0]);
240    }
241
242    #[test]
243    fn test_gpu_prefix_sum_length_preserved() {
244        let data = vec![2.0; 10];
245        let result = gpu_prefix_sum(&data);
246        assert_eq!(result.len(), 10);
247    }
248
249    // gpu_sort_radix
250    #[test]
251    fn test_gpu_sort_radix_basic() {
252        let data = vec![5u64, 3, 8, 1, 9, 2];
253        let sorted = gpu_sort_radix(&data);
254        assert_eq!(sorted, vec![1, 2, 3, 5, 8, 9]);
255    }
256
257    #[test]
258    fn test_gpu_sort_radix_empty() {
259        assert!(gpu_sort_radix(&[]).is_empty());
260    }
261
262    #[test]
263    fn test_gpu_sort_radix_single() {
264        assert_eq!(gpu_sort_radix(&[42]), vec![42]);
265    }
266
267    #[test]
268    fn test_gpu_sort_radix_already_sorted() {
269        let data = vec![1u64, 2, 3, 4, 5];
270        assert_eq!(gpu_sort_radix(&data), data);
271    }
272
273    #[test]
274    fn test_gpu_sort_radix_reverse_sorted() {
275        let data = vec![5u64, 4, 3, 2, 1];
276        let sorted = gpu_sort_radix(&data);
277        assert_eq!(sorted, vec![1, 2, 3, 4, 5]);
278    }
279
280    #[test]
281    fn test_gpu_sort_radix_duplicates() {
282        let data = vec![3u64, 1, 3, 2, 1];
283        let sorted = gpu_sort_radix(&data);
284        assert_eq!(sorted, vec![1, 1, 2, 3, 3]);
285    }
286
287    #[test]
288    fn test_gpu_sort_radix_large_values() {
289        let data = vec![u64::MAX, 0, u64::MAX / 2, 1];
290        let sorted = gpu_sort_radix(&data);
291        assert_eq!(sorted[0], 0);
292        assert_eq!(sorted[3], u64::MAX);
293    }
294
295    // gpu_histogram
296    #[test]
297    fn test_gpu_histogram_basic() {
298        let data = vec![0.5, 1.5, 2.5, 3.5];
299        let hist = gpu_histogram(&data, 0.0, 4.0, 4);
300        assert_eq!(hist, vec![1, 1, 1, 1]);
301    }
302
303    #[test]
304    fn test_gpu_histogram_empty_data() {
305        let hist = gpu_histogram(&[], 0.0, 10.0, 5);
306        assert_eq!(hist, vec![0, 0, 0, 0, 0]);
307    }
308
309    #[test]
310    fn test_gpu_histogram_zero_bins() {
311        let hist = gpu_histogram(&[1.0, 2.0], 0.0, 3.0, 0);
312        assert!(hist.is_empty());
313    }
314
315    #[test]
316    fn test_gpu_histogram_all_in_one_bin() {
317        let data = vec![1.0, 1.1, 1.2];
318        let hist = gpu_histogram(&data, 0.0, 10.0, 10);
319        assert_eq!(hist[1], 3);
320    }
321
322    #[test]
323    fn test_gpu_histogram_out_of_range_clamped() {
324        let data = vec![-100.0, 200.0];
325        let hist = gpu_histogram(&data, 0.0, 10.0, 5);
326        // Both values are clamped: -100 → bin 0, 200 → bin 4
327        assert_eq!(hist[0], 1);
328        assert_eq!(hist[4], 1);
329    }
330
331    #[test]
332    fn test_gpu_histogram_bin_count() {
333        let data: Vec<f64> = (0..100).map(|i| i as f64).collect();
334        let hist = gpu_histogram(&data, 0.0, 100.0, 10);
335        assert_eq!(hist.len(), 10);
336        // Each bin should have roughly 10 elements
337        for &count in &hist {
338            assert!((9..=11).contains(&count));
339        }
340    }
341
342    // Combined / integration tests
343    #[test]
344    fn test_prefix_sum_then_last_is_total() {
345        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
346        let ps = gpu_prefix_sum(&data);
347        // The total sum should equal last exclusive prefix + last element
348        let total = ps.last().unwrap() + data.last().unwrap();
349        assert!((total - gpu_sum(&data)).abs() < 1e-12);
350    }
351
352    #[test]
353    fn test_sort_then_min_max() {
354        let data = vec![9u64, 3, 7, 1, 5];
355        let sorted = gpu_sort_radix(&data);
356        assert_eq!(sorted[0], 1);
357        assert_eq!(*sorted.last().unwrap(), 9);
358    }
359}