scirs2-stats 0.4.1

//! Further enhanced SIMD optimizations for statistical operations
//!
//! This module extends the existing SIMD capabilities with additional optimizations
//! for complex statistical operations that can benefit from vectorization.

use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{ArrayBase, ArrayView1, Data, Ix1, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::simd_ops::SimdUnifiedOps;

/// SIMD-optimized distance matrix computation
///
/// Computes pairwise distances between all points using SIMD acceleration.
/// This is commonly used in clustering and nearest neighbor algorithms.
#[allow(dead_code)]
pub fn distance_matrix_simd<F, D>(
    data: &ArrayBase<D, Ix2>,
    metric: &str,
) -> StatsResult<scirs2_core::ndarray::Array2<F>>
where
    F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
    D: Data<Elem = F>,
{
    let (n_samples_, n_features) = data.dim();

    if n_samples_ == 0 || n_features == 0 {
        return Err(StatsError::InvalidArgument(
            "Data matrix cannot be empty".to_string(),
        ));
    }

    let mut distances = scirs2_core::ndarray::Array2::zeros((n_samples_, n_samples_));

    // Only compute upper triangle (distance matrix is symmetric)
    for i in 0..n_samples_ {
        for j in (i + 1)..n_samples_ {
            let row_i = data.row(i);
            let row_j = data.row(j);

            let distance = match metric {
                "euclidean" => euclidean_distance_simd(&row_i, &row_j)?,
                "manhattan" => manhattan_distance_simd(&row_i, &row_j)?,
                "cosine" => cosine_distance_simd(&row_i, &row_j)?,
                _ => {
                    return Err(StatsError::InvalidArgument(format!(
                        "Unknown metric: {}",
                        metric
                    )))
                }
            };

            distances[(i, j)] = distance;
            distances[(j, i)] = distance; // Symmetric
        }
    }

    Ok(distances)
}

/// SIMD-optimized Euclidean distance computation
#[allow(dead_code)]
pub fn euclidean_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
where
    F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
{
    if x.len() != y.len() {
        return Err(StatsError::DimensionMismatch(format!(
            "Vectors must have the same length: {} vs {}",
            x.len(),
            y.len()
        )));
    }

    let n = x.len();
    let sum_sq_diff = if n > 16 {
        // SIMD path
        let diff = F::simd_sub(&x.view(), &y.view());
        let sq_diff = F::simd_mul(&diff.view(), &diff.view());
        F::simd_sum(&sq_diff.view())
    } else {
        // Scalar fallback
        x.iter()
            .zip(y.iter())
            .map(|(&xi, &yi)| {
                let diff = xi - yi;
                diff * diff
            })
            .fold(F::zero(), |acc, x| acc + x)
    };

    Ok(sum_sq_diff.sqrt())
}

/// SIMD-optimized Manhattan distance computation
#[allow(dead_code)]
pub fn manhattan_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
where
    F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
{
    if x.len() != y.len() {
        return Err(StatsError::DimensionMismatch(format!(
            "Vectors must have the same length: {} vs {}",
            x.len(),
            y.len()
        )));
    }

    let n = x.len();
    let sum_abs_diff = if n > 16 {
        // SIMD path
        let diff = F::simd_sub(&x.view(), &y.view());
        let abs_diff = F::simd_abs(&diff.view());
        F::simd_sum(&abs_diff.view())
    } else {
        // Scalar fallback
        x.iter()
            .zip(y.iter())
            .map(|(&xi, &yi)| (xi - yi).abs())
            .fold(F::zero(), |acc, x| acc + x)
    };

    Ok(sum_abs_diff)
}

/// SIMD-optimized cosine distance computation
#[allow(dead_code)]
pub fn cosine_distance_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<F>
where
    F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
{
    if x.len() != y.len() {
        return Err(StatsError::DimensionMismatch(format!(
            "Vectors must have the same length: {} vs {}",
            x.len(),
            y.len()
        )));
    }

    let n = x.len();
    let (dot_product, norm_x_sq, norm_y_sq) = if n > 16 {
        // SIMD path
        let dot = F::simd_mul(&x.view(), &y.view());
        let dot_product = F::simd_sum(&dot.view());

        let x_sq = F::simd_mul(&x.view(), &x.view());
        let norm_x_sq = F::simd_sum(&x_sq.view());

        let y_sq = F::simd_mul(&y.view(), &y.view());
        let norm_y_sq = F::simd_sum(&y_sq.view());

        (dot_product, norm_x_sq, norm_y_sq)
    } else {
        // Scalar fallback
        let mut dot_product = F::zero();
        let mut norm_x_sq = F::zero();
        let mut norm_y_sq = F::zero();

        for (&xi, &yi) in x.iter().zip(y.iter()) {
            dot_product = dot_product + xi * yi;
            norm_x_sq = norm_x_sq + xi * xi;
            norm_y_sq = norm_y_sq + yi * yi;
        }

        (dot_product, norm_x_sq, norm_y_sq)
    };

    let norm_product = (norm_x_sq * norm_y_sq).sqrt();
    if norm_product <= F::epsilon() {
        return Err(StatsError::InvalidArgument(
            "Cannot compute cosine distance for zero vectors".to_string(),
        ));
    }

    let cosine_similarity = dot_product / norm_product;
    Ok(F::one() - cosine_similarity)
}

/// SIMD-optimized moving window statistics
///
/// Computes rolling statistics over a sliding window using SIMD acceleration.
pub struct MovingWindowSIMD<F> {
    windowsize: usize,
    _phantom: std::marker::PhantomData<F>,
}

impl<F> MovingWindowSIMD<F>
where
    F: Float + NumCast + SimdUnifiedOps + std::fmt::Display,
{
    pub fn new(_windowsize: usize) -> Self {
        Self {
            windowsize: _windowsize,
            _phantom: std::marker::PhantomData,
        }
    }

    /// Compute moving mean using SIMD operations
    pub fn moving_mean<D>(
        &self,
        data: &ArrayBase<D, Ix1>,
    ) -> StatsResult<scirs2_core::ndarray::Array1<F>>
    where
        D: Data<Elem = F>,
    {
        if data.len() < self.windowsize {
            return Err(StatsError::InvalidArgument(
                "Data length must be >= window size".to_string(),
            ));
        }

        let n_windows = data.len() - self.windowsize + 1;
        let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);

        if self.windowsize > 16 {
            // SIMD path: compute each window mean
            for i in 0..n_windows {
                let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);
                let sum = F::simd_sum(&window);
                result[i] = sum / F::from(self.windowsize).expect("Failed to convert to float");
            }
        } else {
            // Optimized scalar path using sliding window sum
            let mut window_sum = data
                .slice(scirs2_core::ndarray::s![0..self.windowsize])
                .iter()
                .fold(F::zero(), |acc, &x| acc + x);

            result[0] = window_sum / F::from(self.windowsize).expect("Failed to convert to float");

            for i in 1..n_windows {
                window_sum = window_sum - data[i - 1] + data[i + self.windowsize - 1];
                result[i] =
                    window_sum / F::from(self.windowsize).expect("Failed to convert to float");
            }
        }

        Ok(result)
    }

    /// Compute moving variance using SIMD operations
    pub fn moving_variance<D>(
        &self,
        data: &ArrayBase<D, Ix1>,
        ddof: usize,
    ) -> StatsResult<scirs2_core::ndarray::Array1<F>>
    where
        D: Data<Elem = F>,
    {
        if data.len() < self.windowsize {
            return Err(StatsError::InvalidArgument(
                "Data length must be >= window size".to_string(),
            ));
        }

        if self.windowsize <= ddof {
            return Err(StatsError::InvalidArgument(
                "Window size must be > ddof".to_string(),
            ));
        }

        let n_windows = data.len() - self.windowsize + 1;
        let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);

        for i in 0..n_windows {
            let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);

            if self.windowsize > 16 {
                // SIMD path: compute mean, then variance
                let mean = F::simd_sum(&window.view())
                    / F::from(self.windowsize).expect("Failed to convert to float");
                let mean_array = scirs2_core::ndarray::Array1::from_elem(self.windowsize, mean);
                let diff = F::simd_sub(&window, &mean_array.view());
                let sq_diff = F::simd_mul(&diff.view(), &diff.view());
                let sum_sq_diff = F::simd_sum(&sq_diff.view());

                result[i] = sum_sq_diff
                    / F::from(self.windowsize - ddof).expect("Failed to convert to float");
            } else {
                // Scalar path: Welford's algorithm
                let mut mean = F::zero();
                let mut m2 = F::zero();
                let mut count = 0;

                for &val in window.iter() {
                    count += 1;
                    let delta = val - mean;
                    mean = mean + delta / F::from(count).expect("Failed to convert to float");
                    let delta2 = val - mean;
                    m2 = m2 + delta * delta2;
                }

                result[i] = m2 / F::from(count - ddof).expect("Failed to convert to float");
            }
        }

        Ok(result)
    }

    /// Compute moving minimum using SIMD operations where applicable
    pub fn moving_min<D>(
        &self,
        data: &ArrayBase<D, Ix1>,
    ) -> StatsResult<scirs2_core::ndarray::Array1<F>>
    where
        D: Data<Elem = F>,
    {
        if data.len() < self.windowsize {
            return Err(StatsError::InvalidArgument(
                "Data length must be >= window size".to_string(),
            ));
        }

        let n_windows = data.len() - self.windowsize + 1;
        let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);

        for i in 0..n_windows {
            let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);

            // Use scalar path for now (SIMD min/max need different implementation)
            result[i] = window.iter().fold(
                window[0],
                |min_val, &x| if x < min_val { x } else { min_val },
            );
        }

        Ok(result)
    }

    /// Compute moving maximum using SIMD operations where applicable
    pub fn moving_max<D>(
        &self,
        data: &ArrayBase<D, Ix1>,
    ) -> StatsResult<scirs2_core::ndarray::Array1<F>>
    where
        D: Data<Elem = F>,
    {
        if data.len() < self.windowsize {
            return Err(StatsError::InvalidArgument(
                "Data length must be >= window size".to_string(),
            ));
        }

        let n_windows = data.len() - self.windowsize + 1;
        let mut result = scirs2_core::ndarray::Array1::zeros(n_windows);

        for i in 0..n_windows {
            let window = data.slice(scirs2_core::ndarray::s![i..i + self.windowsize]);

            // Use scalar path for now (SIMD min/max need different implementation)
            result[i] = window.iter().fold(
                window[0],
                |max_val, &x| if x > max_val { x } else { max_val },
            );
        }

        Ok(result)
    }
}

/// SIMD-optimized histogram computation
///
/// Efficiently computes histograms using SIMD operations for binning.
#[allow(dead_code)]
pub fn histogram_simd<F, D>(
    data: &ArrayBase<D, Ix1>,
    bins: usize,
    range: Option<(F, F)>,
) -> StatsResult<(
    scirs2_core::ndarray::Array1<usize>,
    scirs2_core::ndarray::Array1<F>,
)>
where
    F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
    D: Data<Elem = F>,
{
    if data.is_empty() {
        return Err(StatsError::InvalidArgument(
            "Data cannot be empty".to_string(),
        ));
    }

    if bins == 0 {
        return Err(StatsError::InvalidArgument(
            "Number of bins must be positive".to_string(),
        ));
    }

    // Simple threshold instead of optimizer

    // Determine range
    let (min_val, max_val) = if let Some((min, max)) = range {
        (min, max)
    } else {
        // Find min/max using scalar path (SIMD min/max need different implementation)
        let min_val = data
            .iter()
            .fold(data[0], |min, &x| if x < min { x } else { min });
        let max_val = data
            .iter()
            .fold(data[0], |max, &x| if x > max { x } else { max });
        (min_val, max_val)
    };

    if max_val <= min_val {
        return Err(StatsError::InvalidArgument(
            "Invalid range: max must be > min".to_string(),
        ));
    }

    // Create bin edges
    let mut bin_edges = scirs2_core::ndarray::Array1::zeros(bins + 1);
    let range_width = max_val - min_val;
    let bin_width = range_width / F::from(bins).expect("Failed to convert to float");

    for i in 0..=bins {
        bin_edges[i] = min_val + F::from(i).expect("Failed to convert to float") * bin_width;
    }

    // Initialize histogram counts
    let mut counts = scirs2_core::ndarray::Array1::zeros(bins);

    // Bin the data
    for &value in data.iter() {
        if value >= min_val && value <= max_val {
            let bin_idx = if value == max_val {
                bins - 1 // Handle edge case where value equals max
            } else {
                let normalized = (value - min_val) / bin_width;
                normalized
                    .floor()
                    .to_usize()
                    .expect("Operation failed")
                    .min(bins - 1)
            };
            counts[bin_idx] += 1;
        }
    }

    Ok((counts, bin_edges))
}

/// SIMD-optimized outlier detection using z-score method
///
/// Identifies outliers based on z-scores using SIMD-accelerated statistics.
#[allow(dead_code)]
pub fn detect_outliers_zscore_simd<F, D>(
    data: &ArrayBase<D, Ix1>,
    threshold: F,
) -> StatsResult<scirs2_core::ndarray::Array1<bool>>
where
    F: Float + NumCast + SimdUnifiedOps + std::fmt::Display + std::iter::Sum + Send + Sync,
    D: Data<Elem = F>,
{
    if data.is_empty() {
        return Err(StatsError::InvalidArgument(
            "Data cannot be empty".to_string(),
        ));
    }

    // Simple threshold instead of optimizer
    let n = data.len();

    // Compute mean and standard deviation using SIMD
    let mean = data.iter().fold(F::zero(), |acc, &x| acc + x)
        / F::from(n).expect("Failed to convert to float");

    // Compute standard deviation
    let variance = {
        let sum_sq_diff = data
            .iter()
            .map(|&x| {
                let diff = x - mean;
                diff * diff
            })
            .fold(F::zero(), |acc, x| acc + x);
        sum_sq_diff / F::from(n - 1).expect("Failed to convert to float")
    };

    let std_dev = variance.sqrt();

    if std_dev <= F::epsilon() {
        // All values are the same
        return Ok(scirs2_core::ndarray::Array1::from_elem(n, false));
    }

    // Compute z-scores and identify outliers
    let mut outliers = scirs2_core::ndarray::Array1::from_elem(n, false);

    // Scalar path
    for (i, &value) in data.iter().enumerate() {
        let z_score = (value - mean) / std_dev;
        outliers[i] = z_score.abs() > threshold;
    }

    Ok(outliers)
}

#[cfg(test)]
mod tests {
    use super::*;
    use approx::assert_relative_eq;
    use scirs2_core::ndarray::array;

    #[test]
    fn test_euclidean_distance_simd() {
        let x = array![1.0, 2.0, 3.0];
        let y = array![4.0, 5.0, 6.0];
        // Simple threshold instead of optimizer

        let distance = euclidean_distance_simd(&x.view(), &y.view()).expect("Operation failed");
        let expected = ((4.0 - 1.0).powi(2) + (5.0 - 2.0).powi(2) + (6.0 - 3.0).powi(2)).sqrt();
        assert_relative_eq!(distance, expected, epsilon = 1e-10);
    }

    #[test]
    fn test_moving_window_simd() {
        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
        let window = MovingWindowSIMD::new(3);

        let moving_mean = window.moving_mean(&data.view()).expect("Operation failed");
        assert_relative_eq!(moving_mean[0], 2.0, epsilon = 1e-10); // (1+2+3)/3
        assert_relative_eq!(moving_mean[1], 3.0, epsilon = 1e-10); // (2+3+4)/3
        assert_relative_eq!(moving_mean[7], 9.0, epsilon = 1e-10); // (8+9+10)/3
    }

    #[test]
    fn test_histogram_simd() {
        let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
        let (counts, edges) = histogram_simd(&data.view(), 5, None).expect("Operation failed");

        assert_eq!(counts.len(), 5);
        assert_eq!(edges.len(), 6);
        assert_eq!(counts.sum(), 5); // Total count should equal data length
    }

    #[test]
    fn test_outlier_detection_simd() {
        let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]; // 100.0 is an outlier
        let outliers = detect_outliers_zscore_simd(&data.view(), 2.0).expect("Operation failed");

        assert!(!outliers[0]); // 1.0 is not an outlier
        assert!(!outliers[4]); // 5.0 is not an outlier
        assert!(outliers[5]); // 100.0 is an outlier
    }
}