scirs2_transform/reduction/
spectral_embedding.rs1use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
8use scirs2_core::numeric::{Float, NumCast};
9use scirs2_core::validation::{check_positive, checkshape};
10use scirs2_linalg::eigh;
11
12use crate::error::{Result, TransformError};
13
14#[derive(Debug, Clone, PartialEq)]
16pub enum AffinityMethod {
17 Gaussian,
19 KNN,
21 Epsilon,
23}
24
25#[derive(Debug, Clone)]
30pub struct SpectralEmbedding {
31 n_components: usize,
33 affinity_method: AffinityMethod,
35 n_neighbors: usize,
37 gamma: Option<f64>,
39 epsilon: f64,
41 normalized: bool,
43 random_state: Option<u64>,
45 embedding: Option<Array2<f64>>,
47 training_data: Option<Array2<f64>>,
49 affinity_matrix: Option<Array2<f64>>,
51 eigenvectors: Option<Array2<f64>>,
53 eigenvalues: Option<Array1<f64>>,
55}
56
57impl SpectralEmbedding {
58 pub fn new(n_components: usize, affinitymethod: AffinityMethod) -> Self {
64 SpectralEmbedding {
65 n_components,
66 affinity_method: affinitymethod,
67 n_neighbors: 10,
68 gamma: None,
69 epsilon: 1.0,
70 normalized: true,
71 random_state: None,
72 embedding: None,
73 training_data: None,
74 affinity_matrix: None,
75 eigenvectors: None,
76 eigenvalues: None,
77 }
78 }
79
80 pub fn with_n_neighbors(mut self, nneighbors: usize) -> Self {
82 self.n_neighbors = nneighbors;
83 self
84 }
85
86 pub fn with_gamma(mut self, gamma: f64) -> Self {
88 self.gamma = Some(gamma);
89 self
90 }
91
92 pub fn with_epsilon(mut self, epsilon: f64) -> Self {
94 self.epsilon = epsilon;
95 self
96 }
97
98 pub fn with_normalized(mut self, normalized: bool) -> Self {
100 self.normalized = normalized;
101 self
102 }
103
104 pub fn with_random_state(mut self, seed: u64) -> Self {
106 self.random_state = Some(seed);
107 self
108 }
109
110 fn compute_distances<S>(&self, x: &ArrayBase<S, Ix2>) -> Array2<f64>
112 where
113 S: Data,
114 S::Elem: Float + NumCast,
115 {
116 let n_samples = x.nrows();
117 let mut distances = Array2::zeros((n_samples, n_samples));
118
119 for i in 0..n_samples {
120 for j in i + 1..n_samples {
121 let mut dist_sq = 0.0;
122 for k in 0..x.ncols() {
123 let diff = NumCast::from(x[[i, k]]).unwrap_or(0.0)
124 - NumCast::from(x[[j, k]]).unwrap_or(0.0);
125 dist_sq += diff * diff;
126 }
127 let dist = dist_sq.sqrt();
128 distances[[i, j]] = dist;
129 distances[[j, i]] = dist;
130 }
131 }
132
133 distances
134 }
135
136 fn construct_affinity_matrix(&self, distances: &Array2<f64>) -> Result<Array2<f64>> {
138 let n_samples = distances.nrows();
139 let mut affinity = Array2::zeros((n_samples, n_samples));
140
141 match &self.affinity_method {
142 AffinityMethod::Gaussian => {
143 let gamma = if let Some(g) = self.gamma {
145 g
146 } else {
147 let mut all_distances: Vec<f64> = Vec::new();
149 for i in 0..n_samples {
150 for j in i + 1..n_samples {
151 all_distances.push(distances[[i, j]]);
152 }
153 }
154 all_distances.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
155 let median_dist = all_distances[all_distances.len() / 2];
156 1.0 / (2.0 * median_dist * median_dist)
157 };
158
159 for i in 0..n_samples {
161 for j in 0..n_samples {
162 if i != j {
163 let dist_sq = distances[[i, j]] * distances[[i, j]];
164 affinity[[i, j]] = (-gamma * dist_sq).exp();
165 }
166 }
167 }
168 }
169 AffinityMethod::KNN => {
170 for i in 0..n_samples {
172 let mut neighbors_with_dist: Vec<(f64, usize)> = Vec::new();
174 for j in 0..n_samples {
175 if i != j {
176 neighbors_with_dist.push((distances[[i, j]], j));
177 }
178 }
179 neighbors_with_dist
180 .sort_by(|a, b| a.0.partial_cmp(&b.0).expect("Operation failed"));
181
182 #[allow(clippy::needless_range_loop)]
184 for k in 0..self.n_neighbors.min(neighbors_with_dist.len()) {
185 let neighbor = neighbors_with_dist[k].1;
186 let weight = 1.0; affinity[[i, neighbor]] = weight;
188 affinity[[neighbor, i]] = weight; }
190 }
191 }
192 AffinityMethod::Epsilon => {
193 for i in 0..n_samples {
195 for j in i + 1..n_samples {
196 if distances[[i, j]] <= self.epsilon {
197 let weight = 1.0; affinity[[i, j]] = weight;
199 affinity[[j, i]] = weight;
200 }
201 }
202 }
203 }
204 }
205
206 Ok(affinity)
207 }
208
209 fn compute_laplacian(&self, affinity: &Array2<f64>) -> Result<Array2<f64>> {
211 let n_samples = affinity.nrows();
212
213 let mut degree = Array1::zeros(n_samples);
215 for i in 0..n_samples {
216 degree[i] = affinity.row(i).sum();
217 }
218
219 for i in 0..n_samples {
221 if degree[i] < 1e-10 {
222 return Err(TransformError::ComputationError(
223 "Graph has isolated nodes. Try increasing epsilon or n_neighbors.".to_string(),
224 ));
225 }
226 }
227
228 let mut laplacian = Array2::zeros((n_samples, n_samples));
229
230 if self.normalized {
231 for i in 0..n_samples {
233 for j in 0..n_samples {
234 if i == j {
235 laplacian[[i, j]] = 1.0;
236 } else {
237 let normalized_affinity = affinity[[i, j]] / (degree[i] * degree[j]).sqrt();
238 laplacian[[i, j]] = -normalized_affinity;
239 }
240 }
241 }
242 } else {
243 for i in 0..n_samples {
245 for j in 0..n_samples {
246 if i == j {
247 laplacian[[i, j]] = degree[i];
248 } else {
249 laplacian[[i, j]] = -affinity[[i, j]];
250 }
251 }
252 }
253 }
254
255 Ok(laplacian)
256 }
257
258 pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
266 where
267 S: Data,
268 S::Elem: Float + NumCast + Send + Sync,
269 {
270 let (n_samples, n_features) = x.dim();
271
272 check_positive(self.n_components, "n_components")?;
274 checkshape(x, &[n_samples, n_features], "x")?;
275
276 if self.n_components >= n_samples {
277 return Err(TransformError::InvalidInput(format!(
278 "n_components={} must be < n_samples={}",
279 self.n_components, n_samples
280 )));
281 }
282
283 if matches!(self.affinity_method, AffinityMethod::KNN) && n_samples <= self.n_neighbors {
284 return Err(TransformError::InvalidInput(format!(
285 "n_neighbors={} must be < n_samples={}",
286 self.n_neighbors, n_samples
287 )));
288 }
289
290 let x_f64 = x.mapv(|v| NumCast::from(v).unwrap_or(0.0));
292
293 let distances = self.compute_distances(&x_f64.view());
295
296 let affinity = self.construct_affinity_matrix(&distances)?;
298
299 let laplacian = self.compute_laplacian(&affinity)?;
301
302 let (eigenvalues, eigenvectors) = match eigh(&laplacian.view(), None) {
304 Ok(result) => result,
305 Err(e) => return Err(TransformError::LinalgError(e)),
306 };
307
308 let mut indices: Vec<usize> = (0..n_samples).collect();
310 indices.sort_by(|&i, &j| {
311 eigenvalues[i]
312 .partial_cmp(&eigenvalues[j])
313 .expect("Operation failed")
314 });
315
316 let start_idx = if self.normalized { 1 } else { 0 }; let mut embedding = Array2::zeros((n_samples, self.n_components));
319
320 for j in 0..self.n_components {
321 let idx = indices[start_idx + j];
322 for i in 0..n_samples {
323 embedding[[i, j]] = eigenvectors[[i, idx]];
324 }
325 }
326
327 self.embedding = Some(embedding);
329 self.training_data = Some(x_f64);
330 self.affinity_matrix = Some(affinity);
331 self.eigenvectors = Some(eigenvectors);
332 self.eigenvalues = Some(eigenvalues);
333
334 Ok(())
335 }
336
337 pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
345 where
346 S: Data,
347 S::Elem: Float + NumCast,
348 {
349 if self.embedding.is_none() {
350 return Err(TransformError::NotFitted(
351 "SpectralEmbedding model has not been fitted".to_string(),
352 ));
353 }
354
355 let training_data = self
356 .training_data
357 .as_ref()
358 .ok_or_else(|| TransformError::NotFitted("Training data not available".to_string()))?;
359
360 let x_f64 = x.mapv(|v| NumCast::from(v).unwrap_or(0.0));
361
362 if self.is_same_data(&x_f64, training_data) {
364 return Ok(self.embedding.as_ref().expect("Operation failed").clone());
365 }
366
367 self.nystrom_extension(&x_f64)
369 }
370
371 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
379 where
380 S: Data,
381 S::Elem: Float + NumCast + Send + Sync,
382 {
383 self.fit(x)?;
384 self.transform(x)
385 }
386
387 pub fn embedding(&self) -> Option<&Array2<f64>> {
389 self.embedding.as_ref()
390 }
391
392 pub fn affinity_matrix(&self) -> Option<&Array2<f64>> {
394 self.affinity_matrix.as_ref()
395 }
396
397 pub fn eigenvalues(&self) -> Option<&Array1<f64>> {
399 self.eigenvalues.as_ref()
400 }
401
402 fn is_same_data(&self, x: &Array2<f64>, trainingdata: &Array2<f64>) -> bool {
404 if x.dim() != trainingdata.dim() {
405 return false;
406 }
407
408 let (n_samples, n_features) = x.dim();
409 for i in 0..n_samples {
410 for j in 0..n_features {
411 if (x[[i, j]] - trainingdata[[i, j]]).abs() > 1e-10 {
412 return false;
413 }
414 }
415 }
416 true
417 }
418
419 fn nystrom_extension(&self, xnew: &Array2<f64>) -> Result<Array2<f64>> {
421 let training_data = self.training_data.as_ref().expect("Operation failed");
422 let training_embedding = self.embedding.as_ref().expect("Operation failed");
423 let eigenvalues = self.eigenvalues.as_ref().expect("Operation failed");
424
425 let (n_new, n_features) = xnew.dim();
426 let (n_training_, _) = training_data.dim();
427
428 if n_features != training_data.ncols() {
429 return Err(TransformError::InvalidInput(format!(
430 "Input features {} must match training features {}",
431 n_features,
432 training_data.ncols()
433 )));
434 }
435
436 let mut new_to_training_affinity = Array2::zeros((n_new, n_training_));
438
439 for i in 0..n_new {
440 for j in 0..n_training_ {
441 let mut dist_sq = 0.0;
442 for k in 0..n_features {
443 let diff = xnew[[i, k]] - training_data[[j, k]];
444 dist_sq += diff * diff;
445 }
446
447 let affinity_value = match &self.affinity_method {
448 AffinityMethod::Gaussian => {
449 let gamma = self.gamma.unwrap_or(1.0); (-gamma * dist_sq).exp()
451 }
452 AffinityMethod::KNN | AffinityMethod::Epsilon => {
453 let dist = dist_sq.sqrt();
455 if dist > 0.0 {
456 1.0 / (1.0 + dist)
457 } else {
458 1.0
459 }
460 }
461 };
462
463 new_to_training_affinity[[i, j]] = affinity_value;
464 }
465 }
466
467 let mut new_embedding = Array2::zeros((n_new, self.n_components));
470
471 let start_idx = if self.normalized { 1 } else { 0 };
472
473 for i in 0..n_new {
474 for j in 0..self.n_components {
475 let eigenvalue = eigenvalues[start_idx + j];
476 if eigenvalue.abs() > 1e-10 {
477 let mut coord = 0.0;
478 for k in 0..n_training_ {
479 coord += new_to_training_affinity[[i, k]] * training_embedding[[k, j]];
480 }
481 new_embedding[[i, j]] = coord / eigenvalue.sqrt();
482 }
483 }
484 }
485
486 Ok(new_embedding)
487 }
488}
489
490#[cfg(test)]
491mod tests {
492 use super::*;
493 use scirs2_core::ndarray::Array;
494
495 #[test]
496 fn test_spectral_embedding_gaussian() {
497 let data = vec![1.0, 1.0, 1.1, 1.1, 1.2, 0.9, 5.0, 5.0, 5.1, 5.1, 4.9, 5.2];
499 let x = Array::from_shape_vec((6, 2), data).expect("Operation failed");
500
501 let mut spectral = SpectralEmbedding::new(2, AffinityMethod::Gaussian);
502 let embedding = spectral.fit_transform(&x).expect("Operation failed");
503
504 assert_eq!(embedding.shape(), &[6, 2]);
505
506 for val in embedding.iter() {
508 assert!(val.is_finite());
509 }
510 }
511
512 #[test]
513 fn test_spectral_embedding_knn() {
514 let x: Array2<f64> = Array::eye(8);
515
516 let mut spectral = SpectralEmbedding::new(3, AffinityMethod::KNN).with_n_neighbors(3);
517 let embedding = spectral.fit_transform(&x).expect("Operation failed");
518
519 assert_eq!(embedding.shape(), &[8, 3]);
520
521 for val in embedding.iter() {
523 assert!(val.is_finite());
524 }
525 }
526
527 #[test]
528 fn test_spectral_embedding_out_of_sample() {
529 let x_train: Array2<f64> = Array::eye(5);
530 let x_test = Array::from_shape_vec(
531 (2, 5),
532 vec![0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.0],
533 )
534 .expect("Operation failed");
535
536 let mut spectral = SpectralEmbedding::new(2, AffinityMethod::Gaussian);
537 spectral.fit(&x_train).expect("Operation failed");
538 let test_embedding = spectral.transform(&x_test).expect("Operation failed");
539
540 assert_eq!(test_embedding.shape(), &[2, 2]);
541
542 for val in test_embedding.iter() {
544 assert!(val.is_finite());
545 }
546 }
547}