#![allow(unused_variables)]
#![allow(unused_imports)]
use backend::AutodiffBackend;
use burn::module::AutodiffModule;
use crossbeam_channel::Receiver;
use model::{UMAPModel, UMAPModelConfigBuilder};
use num::Float;
use train::*;
use utils::*;
use burn::tensor::{Device, Tensor};
pub mod backend;
pub mod chart;
pub mod distances;
pub mod kernels;
pub mod macros;
pub mod model;
pub mod normalizer;
pub mod prelude;
pub mod train;
pub mod utils;
pub use train::{
EpochProgress, GraphParams, LossReduction, ManifoldParams, Metric, OptimizationParams,
TrainingConfig, TrainingConfigBuilder, UmapConfig,
};
pub struct Umap<B: AutodiffBackend> {
config: UmapConfig,
device: Device<B>,
}
impl<B: AutodiffBackend> Umap<B> {
pub fn new(config: UmapConfig) -> Self {
Self {
config,
device: Default::default(),
}
}
pub fn with_device(config: UmapConfig, device: Device<B>) -> Self {
Self { config, device }
}
pub fn config(&self) -> &UmapConfig {
&self.config
}
pub fn fit<F: Float>(self, data: Vec<Vec<F>>, labels: Option<Vec<String>>) -> FittedUmap<B>
where
F: num::FromPrimitive + burn::tensor::Element,
{
let (exit_tx, exit_rx) = crossbeam_channel::unbounded();
ctrlc::set_handler(move || {
let _ = exit_tx.send(());
})
.ok();
self.fit_with_signal(data, labels, exit_rx)
}
pub fn fit_with_signal<F: Float>(
self,
data: Vec<Vec<F>>,
labels: Option<Vec<String>>,
exit_rx: Receiver<()>,
) -> FittedUmap<B>
where
F: num::FromPrimitive + burn::tensor::Element,
{
let default_name = "model";
let num_samples = data.len();
let num_features = data[0].len();
let batch_size = num_samples;
let seed = 9999;
B::seed(&self.device, seed);
let train_data: Vec<F> = data.into_iter().flatten().collect();
let model_config = UMAPModelConfigBuilder::default()
.input_size(num_features)
.hidden_sizes(self.config.hidden_sizes.clone())
.output_size(self.config.n_components)
.build()
.unwrap();
let model: UMAPModel<B> = UMAPModel::new(&model_config, &self.device);
let (kernel_a, kernel_b) = train::fit_ab(
self.config.manifold.min_dist,
self.config.manifold.spread,
);
let training_config = TrainingConfig {
metric: self.config.graph.metric.clone(),
epochs: self.config.optimization.n_epochs,
batch_size,
learning_rate: self.config.optimization.learning_rate,
beta1: self.config.optimization.beta1,
beta2: self.config.optimization.beta2,
penalty: self.config.optimization.penalty,
verbose: self.config.optimization.verbose,
patience: self.config.optimization.patience,
loss_reduction: self.config.optimization.loss_reduction.clone(),
k_neighbors: self.config.graph.n_neighbors,
min_desired_loss: self.config.optimization.min_desired_loss,
timeout: self.config.optimization.timeout,
normalized: self.config.graph.normalized,
minkowski_p: self.config.graph.minkowski_p,
repulsion_strength: self.config.optimization.repulsion_strength,
kernel_a,
kernel_b,
neg_sample_rate: self.config.optimization.neg_sample_rate,
figures_dir: self.config.optimization.figures_dir.clone(),
};
let (model, _losses, _best_loss): (UMAPModel<B>, Vec<F>, F) = train::train_sparse(
default_name,
model,
num_samples,
num_features,
train_data.clone(),
&training_config,
self.device.clone(),
exit_rx,
labels,
None,
);
Self::finalize(model, train_data, num_samples, num_features, self.device, self.config)
}
pub fn fit_with_progress<F: Float>(
self,
data: Vec<Vec<F>>,
labels: Option<Vec<String>>,
exit_rx: Receiver<()>,
on_progress: Box<dyn Fn(EpochProgress) + Send>,
) -> FittedUmap<B>
where
F: num::FromPrimitive + burn::tensor::Element,
{
let default_name = "model";
let num_samples = data.len();
let num_features = data[0].len();
let batch_size = num_samples;
let seed = 9999;
B::seed(&self.device, seed);
let train_data: Vec<F> = data.into_iter().flatten().collect();
let model_config = UMAPModelConfigBuilder::default()
.input_size(num_features)
.hidden_sizes(self.config.hidden_sizes.clone())
.output_size(self.config.n_components)
.build()
.unwrap();
let model: UMAPModel<B> = UMAPModel::new(&model_config, &self.device);
let (kernel_a, kernel_b) = train::fit_ab(
self.config.manifold.min_dist,
self.config.manifold.spread,
);
let training_config = TrainingConfig {
metric: self.config.graph.metric.clone(),
epochs: self.config.optimization.n_epochs,
batch_size,
learning_rate: self.config.optimization.learning_rate,
beta1: self.config.optimization.beta1,
beta2: self.config.optimization.beta2,
penalty: self.config.optimization.penalty,
verbose: self.config.optimization.verbose,
patience: self.config.optimization.patience,
loss_reduction: self.config.optimization.loss_reduction.clone(),
k_neighbors: self.config.graph.n_neighbors,
min_desired_loss: self.config.optimization.min_desired_loss,
timeout: self.config.optimization.timeout,
normalized: self.config.graph.normalized,
minkowski_p: self.config.graph.minkowski_p,
repulsion_strength: self.config.optimization.repulsion_strength,
kernel_a,
kernel_b,
neg_sample_rate: self.config.optimization.neg_sample_rate,
figures_dir: self.config.optimization.figures_dir.clone(),
};
let (model, _losses, _best_loss): (UMAPModel<B>, Vec<F>, F) = train::train_sparse(
default_name,
model,
num_samples,
num_features,
train_data.clone(),
&training_config,
self.device.clone(),
exit_rx,
labels,
Some(on_progress),
);
Self::finalize(model, train_data, num_samples, num_features, self.device, self.config)
}
fn finalize<F: Float>(
model: UMAPModel<B>,
train_data: Vec<F>,
num_samples: usize,
num_features: usize,
device: Device<B>,
config: UmapConfig,
) -> FittedUmap<B>
where
F: num::FromPrimitive + burn::tensor::Element,
{
let model: UMAPModel<B::InnerBackend> = model.valid();
let mut normalized_data = train_data;
normalize_data(&mut normalized_data, num_samples, num_features);
let global = convert_vector_to_tensor(
normalized_data,
num_samples,
num_features,
&device,
);
let embedding_tensor = model.forward(global);
let embedding: Vec<Vec<f64>> = convert_tensor_to_vector(embedding_tensor);
FittedUmap {
model,
device,
config,
embedding,
num_features,
}
}
}
pub struct FittedUmap<B: AutodiffBackend> {
model: UMAPModel<B::InnerBackend>,
device: Device<B>,
config: UmapConfig,
embedding: Vec<Vec<f64>>,
#[allow(dead_code)]
num_features: usize,
}
impl<B: AutodiffBackend> FittedUmap<B> {
pub fn embedding(&self) -> &Vec<Vec<f64>> {
&self.embedding
}
pub fn into_embedding(self) -> Vec<Vec<f64>> {
self.embedding
}
pub fn config(&self) -> &UmapConfig {
&self.config
}
pub fn transform(&self, data: Vec<Vec<f64>>) -> Vec<Vec<f64>> {
let local = self.transform_to_tensor(data);
convert_tensor_to_vector(local)
}
pub fn transform_to_tensor(&self, data: Vec<Vec<f64>>) -> Tensor<B::InnerBackend, 2> {
let num_samples = data.len();
let num_features = data[0].len();
let train_data: Vec<f64> = data.into_iter().flatten().collect();
let global = convert_vector_to_tensor(train_data, num_samples, num_features, &self.device);
self.model.forward(global)
}
}
pub struct UMAP<B: AutodiffBackend> {
model: UMAPModel<B::InnerBackend>,
device: Device<B>,
}
impl<B: AutodiffBackend> UMAP<B> {
pub fn fit<F: Float>(
data: Vec<Vec<F>>,
device: Device<B>,
output_size: usize,
exit_rx: Receiver<()>,
) -> Self
where
F: num::FromPrimitive + burn::tensor::Element,
{
let default_name = "model";
let num_samples = data.len();
let num_features = data[0].len();
let batch_size = num_samples;
let hidden_sizes = vec![100];
let learning_rate = 0.001;
let beta1 = 0.9;
let beta2 = 0.999;
let epochs = 100;
let seed = 9999;
B::seed(&device, seed);
let train_data: Vec<F> = data.into_iter().flatten().collect();
let model_config = UMAPModelConfigBuilder::default()
.input_size(num_features)
.hidden_sizes(hidden_sizes)
.output_size(output_size)
.build()
.unwrap();
let model: UMAPModel<B> = UMAPModel::new(&model_config, &device);
let config = TrainingConfig::builder()
.with_epochs(epochs)
.with_batch_size(batch_size)
.with_learning_rate(learning_rate)
.with_beta1(beta1)
.with_beta2(beta2)
.build()
.expect("Failed to build TrainingConfig");
let (model, _losses, _best_loss): (UMAPModel<B>, Vec<F>, F) = train(
default_name,
model,
num_samples,
num_features,
train_data.clone(),
&config,
device.clone(),
exit_rx,
None,
);
let model: UMAPModel<B::InnerBackend> = model.valid();
UMAP { model, device }
}
pub fn transform_to_tensor(&self, data: Vec<Vec<f64>>) -> Tensor<B::InnerBackend, 2> {
let num_samples = data.len();
let num_features = data[0].len();
let train_data: Vec<f64> = data.into_iter().flatten().map(|f| f64::from(f)).collect();
let global = convert_vector_to_tensor(train_data, num_samples, num_features, &self.device);
let local = self.model.forward(global);
local
}
pub fn transform(&self, data: Vec<Vec<f64>>) -> Vec<Vec<f64>> {
let local = self.transform_to_tensor(data);
convert_tensor_to_vector(local)
}
}