rustyml 0.11.0

A high-performance machine learning & deep learning library in pure Rust, offering ML algorithms and neural network support
Documentation
#![cfg(feature = "utility")]

use approx::assert_abs_diff_eq;
use ndarray::prelude::*;
use rustyml::error::ModelError;
use rustyml::utility::kernel_pca::*;
use std::error::Error;

fn make_kernel_pca_dataset() -> Array2<f64> {
    arr2(&[
        [0.0, 1.0, 2.0],
        [1.0, 2.0, 0.5],
        [2.0, 0.5, 1.0],
        [3.0, 1.5, 2.5],
        [4.0, 3.0, 0.0],
        [5.0, 2.5, 1.5],
        [6.0, 4.0, 3.0],
        [7.0, 5.0, 2.0],
    ])
}

fn make_concentric_circle_dataset(
    inner_radius: f64,
    outer_radius: f64,
    n_angles: usize,
) -> (Array2<f64>, Vec<usize>) {
    let mut data = Vec::with_capacity(n_angles * 2 * 2);
    let mut labels = Vec::with_capacity(n_angles * 2);

    for k in 0..n_angles {
        let angle = 2.0 * std::f64::consts::PI * (k as f64) / (n_angles as f64);
        data.push(inner_radius * angle.cos());
        data.push(inner_radius * angle.sin());
        labels.push(0);
    }

    for k in 0..n_angles {
        let angle = 2.0 * std::f64::consts::PI * (k as f64) / (n_angles as f64);
        data.push(outer_radius * angle.cos());
        data.push(outer_radius * angle.sin());
        labels.push(1);
    }

    let dataset = Array2::from_shape_vec((n_angles * 2, 2), data).expect("Failed to build dataset");
    (dataset, labels)
}

fn centroid_distance(transformed: &Array2<f64>, labels: &[usize]) -> f64 {
    let n_components = transformed.ncols();
    let mut sum0 = vec![0.0; n_components];
    let mut sum1 = vec![0.0; n_components];
    let mut count0 = 0usize;
    let mut count1 = 0usize;

    for (idx, &label) in labels.iter().enumerate() {
        if label == 0 {
            count0 += 1;
            for j in 0..n_components {
                sum0[j] += transformed[[idx, j]];
            }
        } else {
            count1 += 1;
            for j in 0..n_components {
                sum1[j] += transformed[[idx, j]];
            }
        }
    }

    if count0 == 0 || count1 == 0 {
        return 0.0;
    }

    let mut dist_sq = 0.0;
    for j in 0..n_components {
        let mean0 = sum0[j] / count0 as f64;
        let mean1 = sum1[j] / count1 as f64;
        let diff = mean0 - mean1;
        dist_sq += diff * diff;
    }

    dist_sq.sqrt()
}

#[test]
fn test_kernel_pca_default_and_new() {
    let kpca = KernelPCA::default();
    assert_eq!(kpca.get_n_components(), 2);
    assert_eq!(kpca.get_eigen_solver(), EigenSolver::Dense);
    match kpca.get_kernel() {
        KernelType::RBF { gamma } => assert_abs_diff_eq!(gamma, 0.1, epsilon = 1e-12),
        _ => panic!("Expected RBF kernel"),
    }
    assert!(kpca.get_eigenvalues().is_none());
    assert!(kpca.get_eigenvectors().is_none());
    assert!(kpca.get_kernel_row_means().is_none());
    assert!(kpca.get_kernel_all_mean().is_none());
    assert!(kpca.get_n_samples().is_none());
    assert!(kpca.get_n_features().is_none());

    let custom = KernelPCA::new(
        KernelType::Poly {
            degree: 2,
            gamma: 1.0,
            coef0: 1.0,
        },
        3,
        EigenSolver::ARPACK,
    )
    .unwrap();
    assert_eq!(custom.get_n_components(), 3);
    assert_eq!(custom.get_eigen_solver(), EigenSolver::ARPACK);
    match custom.get_kernel() {
        KernelType::Poly {
            degree,
            gamma,
            coef0,
        } => {
            assert_eq!(degree, 2);
            assert_abs_diff_eq!(gamma, 1.0, epsilon = 1e-12);
            assert_abs_diff_eq!(coef0, 1.0, epsilon = 1e-12);
        }
        _ => panic!("Expected Poly kernel"),
    }

    let cosine = KernelPCA::new(KernelType::Cosine, 2, EigenSolver::Dense).unwrap();
    match cosine.get_kernel() {
        KernelType::Cosine => (),
        _ => panic!("Expected Cosine kernel"),
    }
}

#[test]
fn test_kernel_pca_new_validation() {
    assert!(matches!(
        KernelPCA::new(KernelType::Linear, 0, EigenSolver::Dense),
        Err(ModelError::InputValidationError(_))
    ));
    assert!(matches!(
        KernelPCA::new(KernelType::RBF { gamma: 0.0 }, 2, EigenSolver::Dense),
        Err(ModelError::InputValidationError(_))
    ));
    assert!(matches!(
        KernelPCA::new(
            KernelType::Poly {
                degree: 0,
                gamma: 1.0,
                coef0: 0.0
            },
            2,
            EigenSolver::Dense
        ),
        Err(ModelError::InputValidationError(_))
    ));
    assert!(matches!(
        KernelPCA::new(
            KernelType::Poly {
                degree: 2,
                gamma: -1.0,
                coef0: 0.0
            },
            2,
            EigenSolver::Dense
        ),
        Err(ModelError::InputValidationError(_))
    ));
    assert!(matches!(
        KernelPCA::new(
            KernelType::Sigmoid {
                gamma: f64::NAN,
                coef0: 0.0
            },
            2,
            EigenSolver::Dense
        ),
        Err(ModelError::InputValidationError(_))
    ));
    assert!(matches!(
        KernelPCA::new(
            KernelType::Sigmoid {
                gamma: 1.0,
                coef0: f64::INFINITY
            },
            2,
            EigenSolver::Dense
        ),
        Err(ModelError::InputValidationError(_))
    ));
}

#[test]
fn test_kernel_pca_fit_and_transform() -> Result<(), Box<dyn Error>> {
    let data = make_kernel_pca_dataset();
    let mut kpca = KernelPCA::new(KernelType::RBF { gamma: 0.5 }, 2, EigenSolver::Dense)?;

    kpca.fit(&data.view())?;

    assert_eq!(kpca.get_n_samples(), Some(data.nrows()));
    assert_eq!(kpca.get_n_features(), Some(data.ncols()));
    assert!(kpca.get_eigenvalues().is_some());
    assert!(kpca.get_eigenvectors().is_some());
    assert_eq!(kpca.get_eigenvalues().unwrap().len(), 2);
    assert_eq!(kpca.get_eigenvectors().unwrap().shape(), &[data.nrows(), 2]);
    assert!(kpca.get_kernel_row_means().is_some());
    assert_eq!(kpca.get_kernel_row_means().unwrap().len(), data.nrows());
    assert!(kpca.get_kernel_all_mean().is_some());
    assert!(
        kpca.get_eigenvalues()
            .unwrap()
            .iter()
            .all(|v| *v > 0.0 && v.is_finite())
    );

    let transformed = kpca.transform(&data.view())?;
    assert_eq!(transformed.shape(), &[data.nrows(), 2]);
    assert!(transformed.iter().all(|v| v.is_finite()));

    let new_data = arr2(&[[1.0, 1.5, 2.5], [2.0, 2.5, 3.5]]);
    let new_transformed = kpca.transform(&new_data.view())?;
    assert_eq!(new_transformed.shape(), &[2, 2]);
    assert!(new_transformed.iter().all(|v| v.is_finite()));

    Ok(())
}

#[test]
fn test_kernel_pca_cosine_kernel() -> Result<(), Box<dyn Error>> {
    let data = make_kernel_pca_dataset();
    let mut kpca = KernelPCA::new(KernelType::Cosine, 2, EigenSolver::Dense)?;

    let transformed = kpca.fit_transform(&data.view())?;
    assert_eq!(transformed.shape(), &[data.nrows(), 2]);
    assert!(transformed.iter().all(|v| v.is_finite()));

    Ok(())
}

#[test]
fn test_kernel_pca_fit_transform_consistency() -> Result<(), Box<dyn Error>> {
    let data = make_kernel_pca_dataset();
    let mut kpca = KernelPCA::new(KernelType::RBF { gamma: 0.8 }, 2, EigenSolver::Dense)?;

    let transformed_fit = kpca.fit_transform(&data.view())?;
    let transformed_again = kpca.transform(&data.view())?;

    assert_eq!(transformed_fit.shape(), transformed_again.shape());
    for (a, b) in transformed_fit.iter().zip(transformed_again.iter()) {
        assert_abs_diff_eq!(a, b, epsilon = 1e-10);
    }

    Ok(())
}

#[test]
fn test_kernel_pca_solver_variants() -> Result<(), Box<dyn Error>> {
    let data = make_kernel_pca_dataset();
    let solvers = [EigenSolver::Dense, EigenSolver::ARPACK];

    for solver in solvers {
        let mut kpca = KernelPCA::new(KernelType::RBF { gamma: 0.6 }, 2, solver)?;
        kpca.fit(&data.view())?;
        let transformed = kpca.transform(&data.view())?;
        assert_eq!(transformed.shape(), &[data.nrows(), 2]);
        assert!(kpca.get_eigenvectors().is_some());
    }

    Ok(())
}

#[test]
fn test_kernel_pca_validation_and_errors() {
    let data = make_kernel_pca_dataset();
    let mut kpca = KernelPCA::new(KernelType::RBF { gamma: 0.5 }, 2, EigenSolver::Dense).unwrap();

    assert!(matches!(
        kpca.transform(&data.view()),
        Err(ModelError::NotFitted)
    ));

    let empty = Array2::<f64>::zeros((0, 3));
    assert!(matches!(
        kpca.fit(&empty.view()),
        Err(ModelError::InputValidationError(_))
    ));

    let single = Array2::from_shape_vec((1, 3), vec![1.0, 2.0, 3.0]).unwrap();
    assert!(matches!(
        kpca.fit(&single.view()),
        Err(ModelError::InputValidationError(_))
    ));

    let mut kpca_bad = KernelPCA::new(
        KernelType::RBF { gamma: 0.5 },
        data.nrows() + 1,
        EigenSolver::Dense,
    )
    .unwrap();
    assert!(matches!(
        kpca_bad.fit(&data.view()),
        Err(ModelError::InputValidationError(_))
    ));

    let mut data_with_nan = data.clone();
    data_with_nan[[0, 1]] = f64::NAN;
    assert!(matches!(
        kpca.fit(&data_with_nan.view()),
        Err(ModelError::InputValidationError(_))
    ));

    let mut kpca_fit =
        KernelPCA::new(KernelType::RBF { gamma: 0.5 }, 2, EigenSolver::Dense).unwrap();
    kpca_fit.fit(&data.view()).unwrap();

    let wrong_features = Array2::<f64>::zeros((2, 4));
    assert!(matches!(
        kpca_fit.transform(&wrong_features.view()),
        Err(ModelError::InputValidationError(_))
    ));
}

#[test]
fn test_kernel_pca_effectiveness_on_nonlinear_data() -> Result<(), Box<dyn Error>> {
    let (data, labels) = make_concentric_circle_dataset(1.0, 3.0, 16);

    let mut kpca_rbf = KernelPCA::new(KernelType::RBF { gamma: 0.5 }, 2, EigenSolver::Dense)?;
    let transformed_rbf = kpca_rbf.fit_transform(&data.view())?;
    let rbf_distance = centroid_distance(&transformed_rbf, &labels);

    let mut kpca_linear = KernelPCA::new(KernelType::Linear, 2, EigenSolver::Dense)?;
    let transformed_linear = kpca_linear.fit_transform(&data.view())?;
    let linear_distance = centroid_distance(&transformed_linear, &labels);

    assert!(rbf_distance > linear_distance + 1e-2);
    assert!(rbf_distance > 0.1);

    Ok(())
}