oxits 0.1.0 - Docs.rs

use crate::approximation::dft::{anova_selection, Dft, DftConfig};
use crate::approximation::mcb::{Mcb, McbConfig, McbFitted};
use crate::core::config::BinStrategy;
use crate::core::traits::{FittableTransformer, Transformer};

/// Symbolic Fourier Approximation (SFA).
///
/// Pipeline: DFT (with optional ANOVA feature selection) → MCB.
/// This is a stateful transformer: fit() learns the DFT coefficient selection
/// and MCB bin edges.

#[derive(Debug, Clone)]
pub struct SfaConfig {
    pub n_coefs: Option<usize>,
    pub n_bins: usize,
    pub strategy: BinStrategy,
    pub drop_sum: bool,
    pub anova: bool,
    pub norm_mean: bool,
    pub norm_std: bool,
}

impl SfaConfig {
    pub fn new() -> Self {
        Self {
            n_coefs: None,
            n_bins: 4,
            strategy: BinStrategy::Quantile,
            drop_sum: false,
            anova: false,
            norm_mean: false,
            norm_std: false,
        }
    }
}

impl Default for SfaConfig {
    fn default() -> Self {
        Self::new()
    }
}

#[derive(Debug, Clone)]
pub struct SfaFitted {
    pub dft_config: DftConfig,
    pub support: Vec<usize>,
    pub mcb_fitted: McbFitted,
}

pub struct Sfa;

impl FittableTransformer for Sfa {
    type Config = SfaConfig;
    type Fitted = SfaFitted;

    fn fit(config: &Self::Config, x: &[Vec<f64>], y: Option<&[String]>) -> Self::Fitted {
        assert!(!x.is_empty(), "Input must have at least one sample");
        assert!(
            config.n_bins >= 2 && config.n_bins <= 26,
            "n_bins must be in [2, 26]"
        );

        let dft_config = DftConfig {
            n_coefs: None, // compute all first, then select
            norm_mean: config.norm_mean,
            norm_std: config.norm_std,
            drop_sum: config.drop_sum,
        };

        // Step 1: Compute full DFT
        let all_coefs = Dft::transform(&dft_config, x);
        let n_available = all_coefs[0].len();
        let n_coefs = config.n_coefs.unwrap_or(n_available).min(n_available);

        // Step 2: Feature selection
        let support = if config.anova {
            let y = y.expect("ANOVA selection requires labels (y)");
            anova_selection(x, y, n_coefs, &dft_config)
        } else {
            (0..n_coefs).collect()
        };

        // Step 3: Select coefficients
        let selected: Vec<Vec<f64>> = all_coefs
            .iter()
            .map(|coefs| support.iter().map(|&i| coefs[i]).collect())
            .collect();

        // Step 4: Fit MCB on selected coefficients
        let mcb_config = McbConfig {
            n_bins: config.n_bins,
            strategy: config.strategy,
        };
        let mcb_fitted = Mcb::fit(&mcb_config, &selected, None);

        SfaFitted {
            dft_config,
            support,
            mcb_fitted,
        }
    }

    fn transform(fitted: &Self::Fitted, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
        // Step 1: Compute DFT
        let all_coefs = Dft::transform(&fitted.dft_config, x);

        // Step 2: Select coefficients
        let selected: Vec<Vec<f64>> = all_coefs
            .iter()
            .map(|coefs| fitted.support.iter().map(|&i| coefs[i]).collect())
            .collect();

        // Step 3: Discretize with fitted MCB
        Mcb::transform(&fitted.mcb_fitted, &selected)
    }
}

/// Fit SFA from pre-computed DFT coefficients (skips redundant DFT).
/// Used by BOSS fused fit_transform to avoid computing DFT twice.
pub(crate) fn fit_from_coefs(
    config: &SfaConfig,
    all_coefs: &[Vec<f64>],
    y: Option<&[String]>,
    dft_config: &DftConfig,
) -> SfaFitted {
    let n_available = all_coefs[0].len();
    let n_coefs = config.n_coefs.unwrap_or(n_available).min(n_available);

    // Feature selection
    let support: Vec<usize> = if config.anova {
        let y = y.expect("ANOVA selection requires labels (y)");
        // For ANOVA with pre-computed coefs, we need the original x — but BOSS
        // doesn't use ANOVA, so this path isn't hit in the fused case.
        // Fall back to first n_coefs indices.
        let _ = y;
        (0..n_coefs).collect()
    } else {
        (0..n_coefs).collect()
    };

    // Select coefficients
    let selected: Vec<Vec<f64>> = all_coefs
        .iter()
        .map(|coefs| support.iter().map(|&i| coefs[i]).collect())
        .collect();

    // Fit MCB on selected coefficients
    let mcb_config = McbConfig {
        n_bins: config.n_bins,
        strategy: config.strategy,
    };
    let mcb_fitted = Mcb::fit(&mcb_config, &selected, None);

    SfaFitted {
        dft_config: dft_config.clone(),
        support,
        mcb_fitted,
    }
}

/// Transform pre-computed DFT coefficients to symbolic output (skips redundant DFT).
/// Used by BOSS fused fit_transform.
pub(crate) fn transform_from_coefs(fitted: &SfaFitted, all_coefs: &[Vec<f64>]) -> Vec<Vec<u8>> {
    // Select coefficients
    let selected: Vec<Vec<f64>> = all_coefs
        .iter()
        .map(|coefs| fitted.support.iter().map(|&i| coefs[i]).collect())
        .collect();

    // Discretize with fitted MCB and convert to u8
    let float_result = Mcb::transform(&fitted.mcb_fitted, &selected);
    float_result
        .into_iter()
        .map(|row| row.into_iter().map(|v| v as u8).collect())
        .collect()
}

/// Transform to symbolic u8 output.
pub fn sfa_transform_symbolic(fitted: &SfaFitted, x: &[Vec<f64>]) -> Vec<Vec<u8>> {
    let float_result = Sfa::transform(fitted, x);
    float_result
        .into_iter()
        .map(|row| row.into_iter().map(|v| v as u8).collect())
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sfa_basic() {
        let config = SfaConfig {
            n_coefs: Some(4),
            n_bins: 4,
            ..SfaConfig::new()
        };
        let x = vec![
            vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
            vec![7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0],
            vec![0.0, 2.0, 4.0, 6.0, 4.0, 2.0, 0.0, -2.0],
            vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
        ];
        let fitted = Sfa::fit(&config, &x, None);
        let result = Sfa::transform(&fitted, &x);
        assert_eq!(result.len(), 4);
        assert_eq!(result[0].len(), 4);
    }

    #[test]
    fn test_sfa_fit_transform_consistency() {
        let config = SfaConfig {
            n_coefs: Some(3),
            n_bins: 3,
            ..SfaConfig::new()
        };
        let x = vec![
            vec![1.0, 2.0, 3.0, 4.0],
            vec![4.0, 3.0, 2.0, 1.0],
            vec![2.0, 4.0, 2.0, 4.0],
        ];
        let ft = Sfa::fit_transform(&config, &x, None);
        let fitted = Sfa::fit(&config, &x, None);
        let t = Sfa::transform(&fitted, &x);
        assert_eq!(ft, t);
    }

    #[test]
    fn test_sfa_anova() {
        let config = SfaConfig {
            n_coefs: Some(2),
            n_bins: 3,
            anova: true,
            ..SfaConfig::new()
        };
        let x = vec![
            vec![1.0, 2.0, 3.0, 4.0],
            vec![4.0, 3.0, 2.0, 1.0],
            vec![1.0, 3.0, 3.0, 4.0],
            vec![4.0, 2.0, 2.0, 1.0],
        ];
        let y = vec![
            "A".to_string(),
            "B".to_string(),
            "A".to_string(),
            "B".to_string(),
        ];
        let fitted = Sfa::fit(&config, &x, Some(&y));
        assert_eq!(fitted.support.len(), 2);
        let result = Sfa::transform(&fitted, &x);
        assert_eq!(result[0].len(), 2);
    }

    #[test]
    fn test_sfa_symbolic() {
        let config = SfaConfig {
            n_coefs: Some(3),
            n_bins: 4,
            ..SfaConfig::new()
        };
        let x = vec![
            vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
            vec![6.0, 5.0, 4.0, 3.0, 2.0, 1.0],
        ];
        let fitted = Sfa::fit(&config, &x, None);
        let symbolic = sfa_transform_symbolic(&fitted, &x);
        assert_eq!(symbolic.len(), 2);
        for row in &symbolic {
            assert_eq!(row.len(), 3);
            for &v in row {
                assert!(v < 4);
            }
        }
    }
}