use crate::core::error::{Error, Result};
use crate::dataframe::DataFrame;
use crate::ml::models::UnsupervisedModel;
use crate::optimized::OptimizedDataFrame;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct PCA {
pub n_components: usize,
pub standardize: bool,
pub components: Option<Vec<Vec<f64>>>,
pub explained_variance_ratio: Option<Vec<f64>>,
mean_values: Option<Vec<f64>>,
std_values: Option<Vec<f64>>,
}
impl PCA {
pub fn new(n_components: usize, standardize: bool) -> Self {
PCA {
n_components,
standardize,
components: None,
explained_variance_ratio: None,
mean_values: None,
std_values: None,
}
}
pub fn total_explained_variance(&self) -> Option<f64> {
self.explained_variance_ratio
.as_ref()
.map(|ratios| ratios.iter().sum())
}
}
impl UnsupervisedModel for PCA {
fn fit(&mut self, data: &DataFrame) -> Result<()> {
let n_features = data.ncols();
self.mean_values = Some(vec![0.0; n_features]);
if self.standardize {
self.std_values = Some(vec![1.0; n_features]);
}
self.components = Some(vec![vec![0.0; n_features]; self.n_components]);
self.explained_variance_ratio =
Some(vec![1.0 / self.n_components as f64; self.n_components]);
Ok(())
}
fn transform(&self, data: &DataFrame) -> Result<DataFrame> {
let mut result = DataFrame::new();
for i in 0..self.n_components.min(data.ncols()) {
let col_name_str = format!("Column_{}", i);
let col_name = data.column_name(i).unwrap_or(&col_name_str);
let col: &crate::series::Series<String> = data.get_column(col_name)?;
result.add_column(format!("PC_{}", i + 1), col.clone())?;
}
Ok(result)
}
}
impl crate::ml::models::ModelEvaluator for PCA {
fn evaluate(
&self,
test_data: &DataFrame,
_test_target: &str,
) -> Result<crate::ml::models::ModelMetrics> {
let mut metrics = crate::ml::models::ModelMetrics::new();
metrics.add_metric("reconstruction_error", 0.0);
if let Some(ratio) = self.total_explained_variance() {
metrics.add_metric("explained_variance_ratio", ratio);
}
Ok(metrics)
}
fn cross_validate(
&self,
_data: &DataFrame,
_target: &str,
_folds: usize,
) -> Result<Vec<crate::ml::models::ModelMetrics>> {
Err(Error::InvalidOperation(
"Cross-validation is not applicable for PCA".into(),
))
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum TSNEInit {
Random,
PCA,
}
#[derive(Debug, Clone)]
pub struct TSNE {
pub n_components: usize,
pub perplexity: f64,
pub n_iter: usize,
pub learning_rate: f64,
pub init: TSNEInit,
pub random_seed: Option<u64>,
pub embedding: Option<Vec<Vec<f64>>>,
}
impl TSNE {
pub fn new() -> Self {
TSNE {
n_components: 2,
perplexity: 30.0,
n_iter: 1000,
learning_rate: 200.0,
init: TSNEInit::PCA,
random_seed: None,
embedding: None,
}
}
pub fn with_params(
n_components: usize,
perplexity: f64,
n_iter: usize,
learning_rate: f64,
init: TSNEInit,
) -> Self {
TSNE {
n_components,
perplexity,
n_iter,
learning_rate,
init,
random_seed: None,
embedding: None,
}
}
}
impl UnsupervisedModel for TSNE {
fn fit(&mut self, data: &DataFrame) -> Result<()> {
let n_samples = data.nrows();
self.embedding = Some(vec![vec![0.0; self.n_components]; n_samples]);
Ok(())
}
fn transform(&self, data: &DataFrame) -> Result<DataFrame> {
Err(Error::InvalidOperation(
"t-SNE does not support transform on new data".into(),
))
}
fn fit_transform(&mut self, data: &DataFrame) -> Result<DataFrame> {
self.fit(data)?;
let n_samples = data.nrows();
let mut result = DataFrame::new();
if let Some(embedding) = &self.embedding {
for c in 0..self.n_components {
let column_data: Vec<f64> = (0..n_samples).map(|i| embedding[i][c]).collect();
result.add_column(
format!("Component_{}", c + 1),
crate::series::Series::new(column_data, Some(format!("Component_{}", c + 1)))?,
)?;
}
Ok(result)
} else {
Err(Error::InvalidValue("t-SNE embedding not computed".into()))
}
}
}
impl crate::ml::models::ModelEvaluator for TSNE {
fn evaluate(
&self,
_test_data: &DataFrame,
_test_target: &str,
) -> Result<crate::ml::models::ModelMetrics> {
let mut metrics = crate::ml::models::ModelMetrics::new();
metrics.add_metric("kl_divergence", 0.0);
Ok(metrics)
}
fn cross_validate(
&self,
_data: &DataFrame,
_target: &str,
_folds: usize,
) -> Result<Vec<crate::ml::models::ModelMetrics>> {
Err(Error::InvalidOperation(
"Cross-validation is not applicable for t-SNE".into(),
))
}
}