para-graph 0.1.2

"Para-Graph" delivers swift, parallel graph algorithms in Rust, optimized for CPU and GPU. Ideal for HPC workflows.
Documentation
use crate::bridge::ffi;
use num::Num;
use rayon::prelude::*;

pub fn pref_sum_serial<T>(arr: &mut [T])
where
    T: Num + Clone,
{
    for i in 1..arr.len() {
        arr[i] = arr[i].clone() + arr[i - 1].clone();
    }
}

pub fn pref_sum_par_cpu<T>(arr: &mut [T])
where
    T: Num + Clone + Send + Sync,
{
    let threads = rayon::current_num_threads();
    let chunks = threads;
    let chunk_size = arr.len().div_ceil(chunks);
    arr.par_chunks_mut(chunk_size).for_each(|chunk| {
        pref_sum_serial(chunk);
    });
    let mut offsets: Vec<T> = arr
        .par_chunks(chunk_size)
        .take(chunks - 1)
        .map(|chunk| chunk.last().unwrap())
        .cloned()
        .collect();
    pref_sum_serial(&mut offsets);
    arr.par_chunks_mut(chunk_size)
        .skip(1)
        .zip(offsets.par_iter())
        .for_each(|(chunk, offset)| {
            for i in 0..chunk.len() {
                chunk[i] = chunk[i].clone() + offset.clone();
            }
        });
}

pub fn pref_sum_par_gpu(arr: &mut [f64]) {
    unsafe {
        ffi::prefix_sum(arr.len(), arr);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use itertools::Itertools;

    const ARR: [u64; 60] = [
        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
        49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
    ];

    fn expected() -> Vec<u64> {
        ARR.iter()
            .scan(0, |s, &e| {
                *s += e;
                Some(*s)
            })
            .collect()
    }

    #[test]
    fn test_pref_sum_serial() {
        let mut arr = ARR.into_iter().collect_vec();
        pref_sum_serial(&mut arr);
        assert_eq!(arr, expected());
    }

    #[test]
    fn test_pref_sum_par_cpu() {
        let mut arr = ARR.into_iter().collect_vec();
        pref_sum_par_cpu(&mut arr);
        assert_eq!(arr, expected());
    }

    #[test]
    fn test_pref_sum_par_gpu() {
        let mut arr = ARR.into_iter().map(|x| x as f64).collect_vec();
        pref_sum_par_gpu(&mut arr);
        assert_eq!(arr, expected().into_iter().map(|x| x as f64).collect_vec());
    }

    #[test]
    fn test_pref_sum_par_gpu_pot2() {
        let mut arr = [1., 2.];
        pref_sum_par_gpu(&mut arr);
        assert_eq!(arr, [1., 3.]);
    }
}