oxits 0.1.0 - Docs.rs

use std::collections::HashMap;

use crate::approximation::dft::{Dft, DftConfig};
use crate::approximation::sfa::{
    fit_from_coefs, sfa_transform_symbolic, transform_from_coefs, Sfa, SfaConfig, SfaFitted,
};
use crate::core::config::{BinStrategy, NumerosityReduction};
use crate::core::traits::{FittableTransformer, Transformer};

/// BOSS (Bag of SFA Symbols) transformation.
///
/// Pipeline per sample:
/// 1. Extract sliding windows
/// 2. Apply SFA to each window → symbolic words
/// 3. Apply numerosity reduction
/// 4. Build histogram (word → count)

#[derive(Debug, Clone)]
pub struct BossConfig {
    pub window_size: usize,
    pub word_size: usize,
    pub n_bins: usize,
    pub strategy: BinStrategy,
    pub numerosity_reduction: NumerosityReduction,
    pub window_step: usize,
    pub norm_mean: bool,
    pub norm_std: bool,
    pub drop_sum: bool,
    pub anova: bool,
}

impl BossConfig {
    pub fn new(window_size: usize, word_size: usize) -> Self {
        Self {
            window_size,
            word_size,
            n_bins: 4,
            strategy: BinStrategy::Quantile,
            numerosity_reduction: NumerosityReduction::IdenticalConsecutive,
            window_step: 1,
            norm_mean: true,
            norm_std: true,
            drop_sum: false,
            anova: false,
        }
    }
}

#[derive(Debug, Clone)]
pub struct BossFitted {
    pub sfa_fitted: SfaFitted,
    pub config: BossConfig,
}

pub struct Boss;

impl Boss {
    /// Fit the BOSS model by learning SFA bin edges from windowed training data.
    pub fn fit(config: &BossConfig, x: &[Vec<f64>], y: Option<&[String]>) -> BossFitted {
        assert!(!x.is_empty(), "Input must have at least one sample");
        let n_timestamps = x[0].len();
        assert!(
            config.window_size <= n_timestamps,
            "window_size must not exceed n_timestamps"
        );

        // Extract all sliding windows from all samples
        let windows = extract_all_windows(x, config.window_size, config.window_step);

        // Expand labels if provided (each window inherits its sample's label)
        let expanded_y: Option<Vec<String>> = y.map(|labels| {
            let n_windows_per_sample = (n_timestamps - config.window_size) / config.window_step + 1;
            labels
                .iter()
                .flat_map(|l| std::iter::repeat_n(l.clone(), n_windows_per_sample))
                .collect()
        });

        // Fit SFA on all windows
        let sfa_config = SfaConfig {
            n_coefs: Some(config.word_size),
            n_bins: config.n_bins,
            strategy: config.strategy,
            drop_sum: config.drop_sum,
            anova: config.anova,
            norm_mean: config.norm_mean,
            norm_std: config.norm_std,
        };

        let sfa_fitted = Sfa::fit(&sfa_config, &windows, expanded_y.as_deref());

        BossFitted {
            sfa_fitted,
            config: config.clone(),
        }
    }

    /// Transform time series into BOSS histograms.
    ///
    /// Returns a vector of histograms (one per sample), where each histogram
    /// maps SFA words to their counts.
    pub fn transform(fitted: &BossFitted, x: &[Vec<f64>]) -> Vec<HashMap<String, usize>> {
        assert!(!x.is_empty(), "Input must have at least one sample");

        let config = &fitted.config;
        let n_timestamps = x[0].len();
        let n_windows_per_sample = (n_timestamps - config.window_size) / config.window_step + 1;

        // Extract all windows
        let windows = extract_all_windows(x, config.window_size, config.window_step);

        // Apply SFA
        let symbolic = sfa_transform_symbolic(&fitted.sfa_fitted, &windows);

        build_histograms(&symbolic, x.len(), n_windows_per_sample, config)
    }

    /// Fused fit and transform in one step.
    /// Extracts windows once, computes DFT once, then fits SFA and transforms with the same coefficients.
    pub fn fit_transform(
        config: &BossConfig,
        x: &[Vec<f64>],
        y: Option<&[String]>,
    ) -> Vec<HashMap<String, usize>> {
        let (fitted, histograms) = Self::fit_with_histograms(config, x, y);
        let _ = fitted;
        histograms
    }

    /// Fit and return both the fitted model and the training histograms.
    /// Used by BOSSVS to avoid a separate transform call.
    pub fn fit_with_histograms(
        config: &BossConfig,
        x: &[Vec<f64>],
        y: Option<&[String]>,
    ) -> (BossFitted, Vec<HashMap<String, usize>>) {
        assert!(!x.is_empty(), "Input must have at least one sample");
        let n_timestamps = x[0].len();
        assert!(
            config.window_size <= n_timestamps,
            "window_size must not exceed n_timestamps"
        );

        // Step 1: Extract windows ONCE
        let windows = extract_all_windows(x, config.window_size, config.window_step);

        // Expand labels if provided
        let expanded_y: Option<Vec<String>> = y.map(|labels| {
            let n_windows_per_sample = (n_timestamps - config.window_size) / config.window_step + 1;
            labels
                .iter()
                .flat_map(|l| std::iter::repeat_n(l.clone(), n_windows_per_sample))
                .collect()
        });

        // Step 2: Compute DFT ONCE on all windows
        let dft_config = DftConfig {
            n_coefs: None,
            norm_mean: config.norm_mean,
            norm_std: config.norm_std,
            drop_sum: config.drop_sum,
        };
        let all_coefs = Dft::transform(&dft_config, &windows);

        // Step 3: Fit SFA from pre-computed coefficients
        let sfa_config = SfaConfig {
            n_coefs: Some(config.word_size),
            n_bins: config.n_bins,
            strategy: config.strategy,
            drop_sum: config.drop_sum,
            anova: config.anova,
            norm_mean: config.norm_mean,
            norm_std: config.norm_std,
        };
        let sfa_fitted =
            fit_from_coefs(&sfa_config, &all_coefs, expanded_y.as_deref(), &dft_config);

        // Step 4: Transform using same pre-computed coefficients
        let symbolic = transform_from_coefs(&sfa_fitted, &all_coefs);

        let fitted = BossFitted {
            sfa_fitted,
            config: config.clone(),
        };

        // Step 5: Build histograms
        let n_windows_per_sample = (n_timestamps - config.window_size) / config.window_step + 1;
        let histograms = build_histograms(&symbolic, x.len(), n_windows_per_sample, config);

        (fitted, histograms)
    }
}

/// Build histograms from symbolic SFA output.
fn build_histograms(
    symbolic: &[Vec<u8>],
    n_samples: usize,
    n_windows_per_sample: usize,
    config: &BossConfig,
) -> Vec<HashMap<String, usize>> {
    let mut histograms = Vec::with_capacity(n_samples);

    for sample_idx in 0..n_samples {
        let start = sample_idx * n_windows_per_sample;
        let end = start + n_windows_per_sample;
        let sample_words = &symbolic[start..end];

        let words: Vec<String> = sample_words
            .iter()
            .map(|bins| bins.iter().map(|&b| (b'a' + b) as char).collect())
            .collect();

        // Apply numerosity reduction
        let reduced = match config.numerosity_reduction {
            NumerosityReduction::IdenticalConsecutive => {
                let mut result = Vec::new();
                let mut prev = String::new();
                for word in words {
                    if word != prev {
                        prev.clone_from(&word);
                        result.push(word);
                    }
                }
                result
            }
            NumerosityReduction::None => words,
        };

        // Build histogram
        let mut hist = HashMap::new();
        for word in reduced {
            *hist.entry(word).or_insert(0) += 1;
        }
        histograms.push(hist);
    }

    histograms
}

/// Extract sliding windows from all samples, concatenated into a single Vec.
fn extract_all_windows(x: &[Vec<f64>], window_size: usize, window_step: usize) -> Vec<Vec<f64>> {
    x.iter()
        .flat_map(|sample| {
            let n = sample.len();
            (0..=n - window_size)
                .step_by(window_step)
                .map(move |i| sample[i..i + window_size].to_vec())
        })
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_boss_basic() {
        let config = BossConfig::new(4, 2);
        let x = vec![
            vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
            vec![7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0],
            vec![0.0, 2.0, 4.0, 6.0, 4.0, 2.0, 0.0, -2.0],
            vec![1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0],
        ];
        let histograms = Boss::fit_transform(&config, &x, None);
        assert_eq!(histograms.len(), 4);
        for hist in &histograms {
            assert!(!hist.is_empty());
            // All words should have length == word_size
            for word in hist.keys() {
                assert_eq!(word.len(), 2);
            }
        }
    }

    #[test]
    fn test_boss_fit_then_transform() {
        let config = BossConfig::new(3, 2);
        let x = vec![
            vec![0.0, 1.0, 2.0, 3.0, 4.0],
            vec![4.0, 3.0, 2.0, 1.0, 0.0],
            vec![1.0, 3.0, 2.0, 4.0, 0.0],
        ];
        let fitted = Boss::fit(&config, &x, None);
        let result = Boss::transform(&fitted, &x);
        assert_eq!(result.len(), 3);
    }

    #[test]
    fn test_boss_histogram_counts() {
        let config = BossConfig {
            numerosity_reduction: NumerosityReduction::None,
            ..BossConfig::new(3, 2)
        };
        let x = vec![vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0]];
        let result = Boss::fit_transform(&config, &x, None);
        // Total count should equal number of windows
        let total: usize = result[0].values().sum();
        assert_eq!(total, 4); // 6 - 3 + 1 = 4 windows
    }
}