oxits 0.1.0

Time series classification and transformation library for Rust
Documentation
use crate::core::traits::Transformer;
use crate::utils::segmentation::segmentation;

#[derive(Debug, Clone, Copy)]
pub struct PaaConfig {
    pub output_size: usize,
    pub overlapping: bool,
}

impl PaaConfig {
    pub fn new(output_size: usize) -> Self {
        Self {
            output_size,
            overlapping: false,
        }
    }
}

pub struct Paa;

impl Transformer for Paa {
    type Config = PaaConfig;

    fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
        assert!(!x.is_empty(), "Input must have at least one sample");
        let n_timestamps = x[0].len();
        assert!(
            x.iter().all(|s| s.len() == n_timestamps),
            "All samples must have same length"
        );
        assert!(
            config.output_size <= n_timestamps,
            "output_size must not exceed n_timestamps"
        );

        if config.overlapping {
            let segments = segmentation(n_timestamps, config.output_size, true);
            #[cfg(feature = "parallel")]
            {
                use rayon::prelude::*;
                return x
                    .par_iter()
                    .map(|sample| paa_single_segments(sample, &segments))
                    .collect();
            }
            #[cfg(not(feature = "parallel"))]
            x.iter()
                .map(|sample| paa_single_segments(sample, &segments))
                .collect()
        } else {
            let output_size = config.output_size;
            let base_size = n_timestamps / output_size;
            let remainder = n_timestamps % output_size;
            // Only two possible segment lengths: base_size+1 and base_size
            let recip_large = 1.0 / (base_size + 1) as f64;
            let recip_small = 1.0 / base_size as f64;

            #[cfg(feature = "parallel")]
            {
                use rayon::prelude::*;
                return x
                    .par_iter()
                    .map(|sample| {
                        paa_single_inline(
                            sample,
                            output_size,
                            base_size,
                            remainder,
                            recip_large,
                            recip_small,
                        )
                    })
                    .collect();
            }
            #[cfg(not(feature = "parallel"))]
            x.iter()
                .map(|sample| {
                    paa_single_inline(
                        sample,
                        output_size,
                        base_size,
                        remainder,
                        recip_large,
                        recip_small,
                    )
                })
                .collect()
        }
    }
}

/// Fast path for non-overlapping PAA: inline segment boundaries, multiply by reciprocal.
#[inline]
fn paa_single_inline(
    sample: &[f64],
    output_size: usize,
    base_size: usize,
    remainder: usize,
    recip_large: f64,
    recip_small: f64,
) -> Vec<f64> {
    let mut result = Vec::with_capacity(output_size);
    let mut start = 0;
    for i in 0..output_size {
        let (size, recip) = if i < remainder {
            (base_size + 1, recip_large)
        } else {
            (base_size, recip_small)
        };
        let sum: f64 = sample[start..start + size].iter().sum();
        result.push(sum * recip);
        start += size;
    }
    result
}

/// Overlapping path: uses precomputed segment boundaries.
#[inline]
fn paa_single_segments(sample: &[f64], segments: &[(usize, usize)]) -> Vec<f64> {
    segments
        .iter()
        .map(|&(start, end)| {
            let slice = &sample[start..end];
            slice.iter().sum::<f64>() / slice.len() as f64
        })
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_paa_basic() {
        let config = PaaConfig::new(4);
        let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]];
        let result = Paa::transform(&config, &x);
        assert_eq!(result[0].len(), 4);
        assert!((result[0][0] - 1.5).abs() < 1e-10);
        assert!((result[0][1] - 3.5).abs() < 1e-10);
        assert!((result[0][2] - 5.5).abs() < 1e-10);
        assert!((result[0][3] - 7.5).abs() < 1e-10);
    }

    #[test]
    fn test_paa_identity() {
        let config = PaaConfig::new(4);
        let x = vec![vec![1.0, 2.0, 3.0, 4.0]];
        let result = Paa::transform(&config, &x);
        assert_eq!(result[0], vec![1.0, 2.0, 3.0, 4.0]);
    }

    #[test]
    fn test_paa_single_output() {
        let config = PaaConfig::new(1);
        let x = vec![vec![2.0, 4.0, 6.0]];
        let result = Paa::transform(&config, &x);
        assert_eq!(result[0].len(), 1);
        assert!((result[0][0] - 4.0).abs() < 1e-10);
    }

    #[test]
    fn test_paa_multiple_samples() {
        let config = PaaConfig::new(2);
        let x = vec![vec![1.0, 2.0, 3.0, 4.0], vec![10.0, 20.0, 30.0, 40.0]];
        let result = Paa::transform(&config, &x);
        assert_eq!(result.len(), 2);
        assert!((result[0][0] - 1.5).abs() < 1e-10);
        assert!((result[1][0] - 15.0).abs() < 1e-10);
    }

    #[test]
    fn test_paa_overlapping() {
        let config = PaaConfig {
            output_size: 3,
            overlapping: true,
        };
        let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]];
        let result = Paa::transform(&config, &x);
        assert_eq!(result[0].len(), 3);
    }

    #[test]
    fn test_paa_uneven_split() {
        // 7 timestamps into 3 segments: sizes 3,2,2 (remainder=1)
        let config = PaaConfig::new(3);
        let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]];
        let result = Paa::transform(&config, &x);
        assert_eq!(result[0].len(), 3);
        // First segment has 3 elements (base_size+1): mean(1,2,3) = 2
        assert!((result[0][0] - 2.0).abs() < 1e-10);
    }
}