1use ndarray::{Array2, ArrayView1, ArrayView2};
8use thiserror::Error;
9
10mod distance;
11mod nearest_neighbor;
12mod simplicial_set;
13mod spectral;
14mod optimization;
15
16#[derive(Error, Debug)]
18pub enum UmapError {
19 #[error("Invalid parameter: {0}")]
20 InvalidParameter(String),
21 #[error("Computation error: {0}")]
22 ComputationError(String),
23 #[error("Dimension mismatch: expected {expected}, got {actual}")]
24 DimensionMismatch { expected: usize, actual: usize },
25 #[error("Insufficient data: need at least {min} samples, got {actual}")]
26 InsufficientData { min: usize, actual: usize },
27}
28
29pub type Result<T> = std::result::Result<T, UmapError>;
30
31trait DistanceMetric: Send + Sync {
33 fn distance(&self, x: ArrayView1<f64>, y: ArrayView1<f64>) -> f64;
35
36}
37
38#[derive(Debug, Clone)]
40struct UmapParams {
41 pub n_neighbors: usize,
43 pub n_components: usize,
45 pub min_dist: f64,
47 pub n_epochs: usize,
49 pub negative_sample_rate: usize,
51 pub random_seed: Option<u64>,
53 pub spread: f64,
55 pub set_op_mix_ratio: f64,
57 pub local_connectivity: f64,
59 pub repulsion_strength: f64,
61 pub initial_alpha: f64,
63}
64
65impl Default for UmapParams {
66 fn default() -> Self {
67 Self {
68 n_neighbors: 15,
69 n_components: 2,
70 min_dist: 0.1,
71 n_epochs: 200,
72 negative_sample_rate: 5,
73 random_seed: None,
74 spread: 1.0,
75 set_op_mix_ratio: 1.0,
76 local_connectivity: 1.0,
77 repulsion_strength: 1.0,
78 initial_alpha: 1.0,
79 }
80 }
81}
82
83impl UmapParams {
84 pub fn new() -> Self {
86 Self::default()
87 }
88
89 pub fn n_neighbors(mut self, n_neighbors: usize) -> Self {
91 self.n_neighbors = n_neighbors;
92 self
93 }
94
95 pub fn n_components(mut self, n_components: usize) -> Self {
97 self.n_components = n_components;
98 self
99 }
100
101
102 pub fn validate(&self) -> Result<()> {
104 if self.n_neighbors < 2 {
105 return Err(UmapError::InvalidParameter(
106 "n_neighbors must be at least 2".to_string()
107 ));
108 }
109
110 if self.n_components < 1 {
111 return Err(UmapError::InvalidParameter(
112 "n_components must be at least 1".to_string()
113 ));
114 }
115
116 if self.min_dist < 0.0 {
117 return Err(UmapError::InvalidParameter(
118 "min_dist must be non-negative".to_string()
119 ));
120 }
121
122 if self.n_epochs == 0 {
123 return Err(UmapError::InvalidParameter(
124 "n_epochs must be positive".to_string()
125 ));
126 }
127
128 Ok(())
129 }
130}
131
132#[derive(Debug, Clone)]
134struct FuzzySimplicialSet {
135 pub edges: Vec<(usize, usize, f64)>,
137 pub n_vertices: usize,
139}
140
141impl FuzzySimplicialSet {
142 pub fn new(n_vertices: usize) -> Self {
144 Self {
145 edges: Vec::new(),
146 n_vertices,
147 }
148 }
149
150 pub fn add_edge(&mut self, i: usize, j: usize, weight: f64) {
152 if weight > 0.0 {
153 self.edges.push((i, j, weight));
154 }
155 }
156
157}
158
159struct Umap {
161 pub params: UmapParams,
163 distance_metric: Box<dyn DistanceMetric>,
165 pub embedding: Option<Array2<f64>>,
167 pub fuzzy_set: Option<FuzzySimplicialSet>,
169}
170
171impl Umap {
172 pub fn new(params: UmapParams, distance_metric: Box<dyn DistanceMetric>) -> Result<Self> {
174 params.validate()?;
175
176 Ok(Self {
177 params,
178 distance_metric,
179 embedding: None,
180 fuzzy_set: None,
181 })
182 }
183
184 pub fn new_with_euclidean_distance() -> Result<Self> {
186 let params = UmapParams::default();
187 let distance_metric = Box::new(distance::EuclideanDistance);
188 Self::new(params, distance_metric)
189 }
190
191 pub fn fit_transform(&mut self, data: ArrayView2<f64>) -> Result<Array2<f64>> {
193 let n_samples = data.nrows();
194 let _n_features = data.ncols();
195
196 if n_samples < self.params.n_neighbors {
198 return Err(UmapError::InsufficientData {
199 min: self.params.n_neighbors,
200 actual: n_samples,
201 });
202 }
203
204 let fuzzy_set = self.construct_fuzzy_simplicial_set(data)?;
206 self.fuzzy_set = Some(fuzzy_set.clone());
207
208 let mut embedding = self.spectral_layout(&fuzzy_set)?;
210
211 self.optimize_layout(&mut embedding, &fuzzy_set, data)?;
213
214 self.embedding = Some(embedding.clone());
215 Ok(embedding)
216 }
217
218
219 fn construct_fuzzy_simplicial_set(&self, data: ArrayView2<f64>) -> Result<FuzzySimplicialSet> {
221 simplicial_set::fuzzy_simplicial_set(
222 data,
223 self.params.n_neighbors,
224 self.distance_metric.as_ref(),
225 self.params.local_connectivity,
226 self.params.set_op_mix_ratio,
227 self.params.random_seed,
228 )
229 }
230
231 fn spectral_layout(&self, fuzzy_set: &FuzzySimplicialSet) -> Result<Array2<f64>> {
233 spectral::spectral_layout(fuzzy_set, self.params.n_components)
234 }
235
236 fn optimize_layout(
238 &self,
239 embedding: &mut Array2<f64>,
240 fuzzy_set: &FuzzySimplicialSet,
241 _data: ArrayView2<f64>,
242 ) -> Result<()> {
243 let opt_params = optimization::OptimizationParams {
244 n_epochs: self.params.n_epochs,
245 initial_alpha: self.params.initial_alpha,
246 min_dist: self.params.min_dist,
247 spread: self.params.spread,
248 negative_sample_rate: self.params.negative_sample_rate,
249 repulsion_strength: self.params.repulsion_strength,
250 random_seed: self.params.random_seed,
251 };
252
253 optimization::optimize_embedding(embedding, fuzzy_set, &opt_params)
254 }
255}
256
257pub fn convert_to_2d<T: Into<f64> + Copy>(embeddings: Vec<Vec<T>>) -> Result<Vec<Vec<f64>>> {
265 let n_samples = embeddings.len();
266 let n_features = embeddings.first().map(|row| row.len()).unwrap_or(0);
267
268 let mut data = Array2::zeros((n_samples, n_features));
269 for (i, row) in embeddings.iter().enumerate() {
270 for (j, &val) in row.iter().enumerate() {
271 data[[i, j]] = val.into();
272 }
273 }
274
275 let params = UmapParams::new()
276 .n_components(2)
277 .n_neighbors(15.min(n_samples - 1));
278
279 let mut umap = Umap::new_with_euclidean_distance()?;
280 umap.params = params;
281 let result = umap.fit_transform(data.view())?;
282
283 let mut output = Vec::new();
284 for i in 0..result.nrows() {
285 output.push(vec![result[[i, 0]], result[[i, 1]]]);
286 }
287 Ok(output)
288}
289
290pub fn convert_to_3d<T: Into<f64> + Copy>(embeddings: Vec<Vec<T>>) -> Result<Vec<Vec<f64>>> {
298 let n_samples = embeddings.len();
299 let n_features = embeddings.first().map(|row| row.len()).unwrap_or(0);
300
301 let mut data = Array2::zeros((n_samples, n_features));
302 for (i, row) in embeddings.iter().enumerate() {
303 for (j, &val) in row.iter().enumerate() {
304 data[[i, j]] = val.into();
305 }
306 }
307
308 let params = UmapParams::new()
309 .n_components(3)
310 .n_neighbors(15.min(n_samples - 1));
311
312 let mut umap = Umap::new_with_euclidean_distance()?;
313 umap.params = params;
314 let result = umap.fit_transform(data.view())?;
315
316 let mut output = Vec::new();
317 for i in 0..result.nrows() {
318 output.push(vec![result[[i, 0]], result[[i, 1]], result[[i, 2]]]);
319 }
320 Ok(output)
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326 use ndarray::Array2;
327
328 #[test]
329 fn test_umap_params_validation() {
330 let valid_params = UmapParams::new()
331 .n_neighbors(10)
332 .n_components(2)
333 .min_dist(0.1);
334
335 assert!(valid_params.validate().is_ok());
336
337 let invalid_params = UmapParams::new().n_neighbors(1);
338 assert!(invalid_params.validate().is_err());
339 }
340
341 #[test]
342 fn test_fuzzy_simplicial_set() {
343 let mut fs_set = FuzzySimplicialSet::new(3);
344 fs_set.add_edge(0, 1, 0.5);
345 fs_set.add_edge(1, 2, 0.8);
346
347 assert_eq!(fs_set.edges.len(), 2);
348 assert_eq!(fs_set.vertex_degree(0), 0.5);
349 assert_eq!(fs_set.vertex_degree(1), 0.8);
350 }
351}