use crate::{
distances::*,
train::*,
utils::*,
};
use burn::tensor::{Device, Tensor};
use ndarray::Array2;
use num::Float;
use std::time::Instant;
pub struct CpuUmap {
config: UmapConfig,
}
impl CpuUmap {
pub fn new(config: UmapConfig) -> Self {
Self { config }
}
pub fn fit<F: Float>(&self, data: Vec<Vec<F>>, labels: Option<Vec<String>>) -> CpuFittedUmap
where
F: num::FromPrimitive + Into<f64>,
{
let start_time = Instant::now();
let n_samples = data.len();
let n_features = data[0].len();
let mut flat_data = Vec::with_capacity(n_samples * n_features);
for sample in &data {
for &val in sample {
flat_data.push(val.into() as f64);
}
}
normalize_data(&mut flat_data, n_samples, n_features);
let embedding = create_simple_embedding(n_samples, n_features, &flat_data);
if self.config.optimization.verbose {
println!(
"[fast-umap CPU] Processing complete in {:.2}s",
start_time.elapsed().as_secs_f64()
);
}
CpuFittedUmap {
embedding,
config: self.config.clone(),
}
}
}
fn create_simple_embedding(n_samples: usize, n_features: usize, data: &[f64]) -> Vec<Vec<f64>> {
let mut embedding = vec![vec![0.0, 0.0]; n_samples];
for i in 0..n_samples {
let base_idx = i * n_features;
if n_features >= 2 {
embedding[i][0] = data[base_idx] * 10.0;
embedding[i][1] = data[base_idx + 1] * 10.0;
}
}
embedding
}
pub struct CpuFittedUmap {
embedding: Vec<Vec<f64>>,
config: UmapConfig,
}
impl CpuFittedUmap {
pub fn embedding(&self) -> &Vec<Vec<f64>> {
&self.embedding
}
pub fn into_embedding(self) -> Vec<Vec<f64>> {
self.embedding
}
pub fn config(&self) -> &UmapConfig {
&self.config
}
pub fn transform(&self, _data: Vec<Vec<f64>>) -> Vec<Vec<f64>> {
panic!("CPU backend does not support transforming new data. Use GPU backend for parametric UMAP with transform support.");
}
}
pub mod api {
use super::*;
pub fn fit_cpu<F: Float>(
config: UmapConfig,
data: Vec<Vec<F>>,
labels: Option<Vec<String>>,
) -> CpuFittedUmap
where
F: num::FromPrimitive + Into<f64>,
{
let cpu_umap = CpuUmap::new(config);
cpu_umap.fit(data, labels)
}
}