1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
//! CPU backend implementation
//!
//! This module provides CPU-based UMAP functionality as a fallback when GPU is not available.
//! Currently implements utility functions and provides a foundation for full CPU support.
use crate::{
distances::*,
train::*,
utils::*,
};
use burn::tensor::{Device, Tensor};
use ndarray::Array2;
use num::Float;
use std::time::Instant;
/// CPU-specific UMAP implementation
pub struct CpuUmap {
config: UmapConfig,
}
impl CpuUmap {
/// Create a new CPU UMAP instance
pub fn new(config: UmapConfig) -> Self {
Self { config }
}
/// Fit UMAP using CPU implementation
/// Currently provides utility functions and tensor operations
pub fn fit<F: Float>(&self, data: Vec<Vec<F>>, labels: Option<Vec<String>>) -> CpuFittedUmap
where
F: num::FromPrimitive + Into<f64>,
{
let start_time = Instant::now();
// Convert data to flat format
let n_samples = data.len();
let n_features = data[0].len();
let mut flat_data = Vec::with_capacity(n_samples * n_features);
for sample in &data {
for &val in sample {
flat_data.push(val.into() as f64);
}
}
// Normalize data (CPU operation)
normalize_data(&mut flat_data, n_samples, n_features);
// Create a simple 2D embedding using PCA-like approach for demo
// In a full implementation, this would use proper UMAP algorithm
let embedding = create_simple_embedding(n_samples, n_features, &flat_data);
if self.config.optimization.verbose {
println!(
"[fast-umap CPU] Processing complete in {:.2}s",
start_time.elapsed().as_secs_f64()
);
}
CpuFittedUmap {
embedding,
config: self.config.clone(),
}
}
}
/// Create a simple 2D embedding for demonstration
/// This is a placeholder - in a full implementation, this would use proper UMAP
fn create_simple_embedding(n_samples: usize, n_features: usize, data: &[f64]) -> Vec<Vec<f64>> {
// Simple dimensionality reduction: take first 2 principal components
// This is just for demonstration - real UMAP would be more sophisticated
let mut embedding = vec![vec![0.0, 0.0]; n_samples];
// Simple projection: use first two features scaled
for i in 0..n_samples {
let base_idx = i * n_features;
if n_features >= 2 {
embedding[i][0] = data[base_idx] * 10.0;
embedding[i][1] = data[base_idx + 1] * 10.0;
}
}
embedding
}
/// Fitted UMAP model for CPU backend
pub struct CpuFittedUmap {
embedding: Vec<Vec<f64>>,
config: UmapConfig,
}
impl CpuFittedUmap {
/// Get the computed embedding
pub fn embedding(&self) -> &Vec<Vec<f64>> {
&self.embedding
}
/// Consume the fitted model and return the embedding
pub fn into_embedding(self) -> Vec<Vec<f64>> {
self.embedding
}
/// Get a reference to the configuration
pub fn config(&self) -> &UmapConfig {
&self.config
}
/// Note: CPU backend cannot transform new data (classical UMAP limitation)
/// This method will panic if called
pub fn transform(&self, _data: Vec<Vec<f64>>) -> Vec<Vec<f64>> {
panic!("CPU backend does not support transforming new data. Use GPU backend for parametric UMAP with transform support.");
}
}
/// Public API for CPU backend
pub mod api {
use super::*;
/// CPU-specific UMAP fit function
pub fn fit_cpu<F: Float>(
config: UmapConfig,
data: Vec<Vec<F>>,
labels: Option<Vec<String>>,
) -> CpuFittedUmap
where
F: num::FromPrimitive + Into<f64>,
{
let cpu_umap = CpuUmap::new(config);
cpu_umap.fit(data, labels)
}
}