survival 1.1.29

#![allow(dead_code)]
use pulp::{Arch, Simd, WithSimd};

pub fn sum_f64(data: &[f64]) -> f64 {
    struct Sum<'a>(&'a [f64]);

    impl WithSimd for Sum<'_> {
        type Output = f64;

        #[inline(always)]
        fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
            let (head, tail) = S::as_simd_f64s(self.0);
            let mut acc = simd.splat_f64s(0.0);
            for &chunk in head {
                acc = simd.add_f64s(acc, chunk);
            }
            simd.reduce_sum_f64s(acc) + tail.iter().sum::<f64>()
        }
    }

    Arch::new().dispatch(Sum(data))
}

pub fn weighted_sum_f64(data: &[f64], weights: &[f64]) -> f64 {
    struct WeightedSum<'a>(&'a [f64], &'a [f64]);

    impl WithSimd for WeightedSum<'_> {
        type Output = f64;

        #[inline(always)]
        fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
            let (data_head, data_tail) = S::as_simd_f64s(self.0);
            let (weights_head, weights_tail) = S::as_simd_f64s(self.1);

            let mut acc = simd.splat_f64s(0.0);
            for (&d, &w) in data_head.iter().zip(weights_head.iter()) {
                acc = simd.mul_add_f64s(d, w, acc);
            }

            let mut scalar_sum = simd.reduce_sum_f64s(acc);
            for (&d, &w) in data_tail.iter().zip(weights_tail.iter()) {
                scalar_sum += d * w;
            }
            scalar_sum
        }
    }

    Arch::new().dispatch(WeightedSum(data, weights))
}

pub fn weighted_squared_diff_sum(predictions: &[f64], outcomes: &[f64], weights: &[f64]) -> f64 {
    struct WeightedSquaredDiff<'a>(&'a [f64], &'a [f64], &'a [f64]);

    impl WithSimd for WeightedSquaredDiff<'_> {
        type Output = f64;

        #[inline(always)]
        fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
            let (pred_head, pred_tail) = S::as_simd_f64s(self.0);
            let (out_head, out_tail) = S::as_simd_f64s(self.1);
            let (wt_head, wt_tail) = S::as_simd_f64s(self.2);

            let mut acc = simd.splat_f64s(0.0);
            for ((&p, &o), &w) in pred_head.iter().zip(out_head.iter()).zip(wt_head.iter()) {
                let diff = simd.sub_f64s(p, o);
                let sq = simd.mul_f64s(diff, diff);
                acc = simd.mul_add_f64s(sq, w, acc);
            }

            let mut scalar_sum = simd.reduce_sum_f64s(acc);
            for ((&p, &o), &w) in pred_tail.iter().zip(out_tail.iter()).zip(wt_tail.iter()) {
                let diff = p - o;
                scalar_sum += w * diff * diff;
            }
            scalar_sum
        }
    }

    Arch::new().dispatch(WeightedSquaredDiff(predictions, outcomes, weights))
}

pub fn squared_diff_sum(predictions: &[f64], outcomes: &[f64]) -> f64 {
    struct SquaredDiff<'a>(&'a [f64], &'a [f64]);

    impl WithSimd for SquaredDiff<'_> {
        type Output = f64;

        #[inline(always)]
        fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
            let (pred_head, pred_tail) = S::as_simd_f64s(self.0);
            let (out_head, out_tail) = S::as_simd_f64s(self.1);

            let mut acc = simd.splat_f64s(0.0);
            for (&p, &o) in pred_head.iter().zip(out_head.iter()) {
                let diff = simd.sub_f64s(p, o);
                acc = simd.mul_add_f64s(diff, diff, acc);
            }

            let mut scalar_sum = simd.reduce_sum_f64s(acc);
            for (&p, &o) in pred_tail.iter().zip(out_tail.iter()) {
                let diff = p - o;
                scalar_sum += diff * diff;
            }
            scalar_sum
        }
    }

    Arch::new().dispatch(SquaredDiff(predictions, outcomes))
}

pub fn dot_product(a: &[f64], b: &[f64]) -> f64 {
    struct DotProduct<'a>(&'a [f64], &'a [f64]);

    impl WithSimd for DotProduct<'_> {
        type Output = f64;

        #[inline(always)]
        fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
            let (a_head, a_tail) = S::as_simd_f64s(self.0);
            let (b_head, b_tail) = S::as_simd_f64s(self.1);

            let mut acc = simd.splat_f64s(0.0);
            for (&a_chunk, &b_chunk) in a_head.iter().zip(b_head.iter()) {
                acc = simd.mul_add_f64s(a_chunk, b_chunk, acc);
            }

            let mut scalar_sum = simd.reduce_sum_f64s(acc);
            for (&a_val, &b_val) in a_tail.iter().zip(b_tail.iter()) {
                scalar_sum += a_val * b_val;
            }
            scalar_sum
        }
    }

    Arch::new().dispatch(DotProduct(a, b))
}

pub fn elementwise_mul_accumulate(a: &[f64], b: &[f64], c: &[f64]) -> f64 {
    struct MulAccumulate<'a>(&'a [f64], &'a [f64], &'a [f64]);

    impl WithSimd for MulAccumulate<'_> {
        type Output = f64;

        #[inline(always)]
        fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
            let (a_head, a_tail) = S::as_simd_f64s(self.0);
            let (b_head, b_tail) = S::as_simd_f64s(self.1);
            let (c_head, c_tail) = S::as_simd_f64s(self.2);

            let mut acc = simd.splat_f64s(0.0);
            for ((&a_chunk, &b_chunk), &c_chunk) in
                a_head.iter().zip(b_head.iter()).zip(c_head.iter())
            {
                let prod = simd.mul_f64s(a_chunk, b_chunk);
                acc = simd.mul_add_f64s(prod, c_chunk, acc);
            }

            let mut scalar_sum = simd.reduce_sum_f64s(acc);
            for ((&a_val, &b_val), &c_val) in a_tail.iter().zip(b_tail.iter()).zip(c_tail.iter()) {
                scalar_sum += a_val * b_val * c_val;
            }
            scalar_sum
        }
    }

    Arch::new().dispatch(MulAccumulate(a, b, c))
}

pub fn sum_with_predicate(data: &[f64], mask: &[bool]) -> f64 {
    let mut sum = 0.0;
    for (&val, &m) in data.iter().zip(mask.iter()) {
        if m {
            sum += val;
        }
    }
    sum
}

pub fn min_max_f64(data: &[f64]) -> (f64, f64) {
    struct MinMax<'a>(&'a [f64]);

    impl WithSimd for MinMax<'_> {
        type Output = (f64, f64);

        #[inline(always)]
        fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
            if self.0.is_empty() {
                return (f64::INFINITY, f64::NEG_INFINITY);
            }

            let (head, tail) = S::as_simd_f64s(self.0);

            let mut min_acc = simd.splat_f64s(f64::INFINITY);
            let mut max_acc = simd.splat_f64s(f64::NEG_INFINITY);

            for &chunk in head {
                min_acc = simd.min_f64s(min_acc, chunk);
                max_acc = simd.max_f64s(max_acc, chunk);
            }

            let mut min_val = simd.reduce_min_f64s(min_acc);
            let mut max_val = simd.reduce_max_f64s(max_acc);

            for &val in tail {
                min_val = min_val.min(val);
                max_val = max_val.max(val);
            }

            (min_val, max_val)
        }
    }

    Arch::new().dispatch(MinMax(data))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sum() {
        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
        assert!((sum_f64(&data) - 55.0).abs() < 1e-10);
    }

    #[test]
    fn test_dot_product() {
        let a = vec![1.0, 2.0, 3.0, 4.0];
        let b = vec![2.0, 3.0, 4.0, 5.0];
        assert!((dot_product(&a, &b) - 40.0).abs() < 1e-10);
    }

    #[test]
    fn test_squared_diff() {
        let a = vec![1.0, 2.0, 3.0];
        let b = vec![2.0, 4.0, 6.0];
        assert!((squared_diff_sum(&a, &b) - 14.0).abs() < 1e-10);
    }

    #[test]
    fn test_min_max() {
        let data = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
        let (min, max) = min_max_f64(&data);
        assert!((min - 1.0).abs() < 1e-10);
        assert!((max - 9.0).abs() < 1e-10);
    }
}