scirs2_transform/reduction/
spectral_embedding.rs1use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
8use scirs2_core::numeric::{Float, NumCast};
9use scirs2_core::validation::{check_positive, checkshape};
10use scirs2_linalg::eigh;
11
12use crate::error::{Result, TransformError};
13
14#[derive(Debug, Clone, PartialEq)]
16pub enum AffinityMethod {
17 Gaussian,
19 KNN,
21 Epsilon,
23}
24
25#[derive(Debug, Clone)]
30pub struct SpectralEmbedding {
31 n_components: usize,
33 affinity_method: AffinityMethod,
35 n_neighbors: usize,
37 gamma: Option<f64>,
39 epsilon: f64,
41 normalized: bool,
43 random_state: Option<u64>,
45 embedding: Option<Array2<f64>>,
47 training_data: Option<Array2<f64>>,
49 affinity_matrix: Option<Array2<f64>>,
51 eigenvectors: Option<Array2<f64>>,
53 eigenvalues: Option<Array1<f64>>,
55}
56
57impl SpectralEmbedding {
58 pub fn new(n_components: usize, affinitymethod: AffinityMethod) -> Self {
64 SpectralEmbedding {
65 n_components,
66 affinity_method: affinitymethod,
67 n_neighbors: 10,
68 gamma: None,
69 epsilon: 1.0,
70 normalized: true,
71 random_state: None,
72 embedding: None,
73 training_data: None,
74 affinity_matrix: None,
75 eigenvectors: None,
76 eigenvalues: None,
77 }
78 }
79
80 pub fn with_n_neighbors(mut self, nneighbors: usize) -> Self {
82 self.n_neighbors = nneighbors;
83 self
84 }
85
86 pub fn with_gamma(mut self, gamma: f64) -> Self {
88 self.gamma = Some(gamma);
89 self
90 }
91
92 pub fn with_epsilon(mut self, epsilon: f64) -> Self {
94 self.epsilon = epsilon;
95 self
96 }
97
98 pub fn with_normalized(mut self, normalized: bool) -> Self {
100 self.normalized = normalized;
101 self
102 }
103
104 pub fn with_random_state(mut self, seed: u64) -> Self {
106 self.random_state = Some(seed);
107 self
108 }
109
110 fn compute_distances<S>(&self, x: &ArrayBase<S, Ix2>) -> Array2<f64>
112 where
113 S: Data,
114 S::Elem: Float + NumCast,
115 {
116 let n_samples = x.nrows();
117 let mut distances = Array2::zeros((n_samples, n_samples));
118
119 for i in 0..n_samples {
120 for j in i + 1..n_samples {
121 let mut dist_sq = 0.0;
122 for k in 0..x.ncols() {
123 let diff = NumCast::from(x[[i, k]]).unwrap_or(0.0)
124 - NumCast::from(x[[j, k]]).unwrap_or(0.0);
125 dist_sq += diff * diff;
126 }
127 let dist = dist_sq.sqrt();
128 distances[[i, j]] = dist;
129 distances[[j, i]] = dist;
130 }
131 }
132
133 distances
134 }
135
136 fn construct_affinity_matrix(&self, distances: &Array2<f64>) -> Result<Array2<f64>> {
138 let n_samples = distances.nrows();
139 let mut affinity = Array2::zeros((n_samples, n_samples));
140
141 match &self.affinity_method {
142 AffinityMethod::Gaussian => {
143 let gamma = if let Some(g) = self.gamma {
145 g
146 } else {
147 let mut all_distances: Vec<f64> = Vec::new();
149 for i in 0..n_samples {
150 for j in i + 1..n_samples {
151 all_distances.push(distances[[i, j]]);
152 }
153 }
154 all_distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
155 let median_dist = all_distances[all_distances.len() / 2];
156 1.0 / (2.0 * median_dist * median_dist)
157 };
158
159 for i in 0..n_samples {
161 for j in 0..n_samples {
162 if i != j {
163 let dist_sq = distances[[i, j]] * distances[[i, j]];
164 affinity[[i, j]] = (-gamma * dist_sq).exp();
165 }
166 }
167 }
168 }
169 AffinityMethod::KNN => {
170 for i in 0..n_samples {
172 let mut neighbors_with_dist: Vec<(f64, usize)> = Vec::new();
174 for j in 0..n_samples {
175 if i != j {
176 neighbors_with_dist.push((distances[[i, j]], j));
177 }
178 }
179 neighbors_with_dist.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
180
181 #[allow(clippy::needless_range_loop)]
183 for k in 0..self.n_neighbors.min(neighbors_with_dist.len()) {
184 let neighbor = neighbors_with_dist[k].1;
185 let weight = 1.0; affinity[[i, neighbor]] = weight;
187 affinity[[neighbor, i]] = weight; }
189 }
190 }
191 AffinityMethod::Epsilon => {
192 for i in 0..n_samples {
194 for j in i + 1..n_samples {
195 if distances[[i, j]] <= self.epsilon {
196 let weight = 1.0; affinity[[i, j]] = weight;
198 affinity[[j, i]] = weight;
199 }
200 }
201 }
202 }
203 }
204
205 Ok(affinity)
206 }
207
208 fn compute_laplacian(&self, affinity: &Array2<f64>) -> Result<Array2<f64>> {
210 let n_samples = affinity.nrows();
211
212 let mut degree = Array1::zeros(n_samples);
214 for i in 0..n_samples {
215 degree[i] = affinity.row(i).sum();
216 }
217
218 for i in 0..n_samples {
220 if degree[i] < 1e-10 {
221 return Err(TransformError::ComputationError(
222 "Graph has isolated nodes. Try increasing epsilon or n_neighbors.".to_string(),
223 ));
224 }
225 }
226
227 let mut laplacian = Array2::zeros((n_samples, n_samples));
228
229 if self.normalized {
230 for i in 0..n_samples {
232 for j in 0..n_samples {
233 if i == j {
234 laplacian[[i, j]] = 1.0;
235 } else {
236 let normalized_affinity = affinity[[i, j]] / (degree[i] * degree[j]).sqrt();
237 laplacian[[i, j]] = -normalized_affinity;
238 }
239 }
240 }
241 } else {
242 for i in 0..n_samples {
244 for j in 0..n_samples {
245 if i == j {
246 laplacian[[i, j]] = degree[i];
247 } else {
248 laplacian[[i, j]] = -affinity[[i, j]];
249 }
250 }
251 }
252 }
253
254 Ok(laplacian)
255 }
256
257 pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
265 where
266 S: Data,
267 S::Elem: Float + NumCast + Send + Sync,
268 {
269 let (n_samples, n_features) = x.dim();
270
271 check_positive(self.n_components, "n_components")?;
273 checkshape(x, &[n_samples, n_features], "x")?;
274
275 if self.n_components >= n_samples {
276 return Err(TransformError::InvalidInput(format!(
277 "n_components={} must be < n_samples={}",
278 self.n_components, n_samples
279 )));
280 }
281
282 if matches!(self.affinity_method, AffinityMethod::KNN) && n_samples <= self.n_neighbors {
283 return Err(TransformError::InvalidInput(format!(
284 "n_neighbors={} must be < n_samples={}",
285 self.n_neighbors, n_samples
286 )));
287 }
288
289 let x_f64 = x.mapv(|v| NumCast::from(v).unwrap_or(0.0));
291
292 let distances = self.compute_distances(&x_f64.view());
294
295 let affinity = self.construct_affinity_matrix(&distances)?;
297
298 let laplacian = self.compute_laplacian(&affinity)?;
300
301 let (eigenvalues, eigenvectors) = match eigh(&laplacian.view(), None) {
303 Ok(result) => result,
304 Err(e) => return Err(TransformError::LinalgError(e)),
305 };
306
307 let mut indices: Vec<usize> = (0..n_samples).collect();
309 indices.sort_by(|&i, &j| eigenvalues[i].partial_cmp(&eigenvalues[j]).unwrap());
310
311 let start_idx = if self.normalized { 1 } else { 0 }; let mut embedding = Array2::zeros((n_samples, self.n_components));
314
315 for j in 0..self.n_components {
316 let idx = indices[start_idx + j];
317 for i in 0..n_samples {
318 embedding[[i, j]] = eigenvectors[[i, idx]];
319 }
320 }
321
322 self.embedding = Some(embedding);
324 self.training_data = Some(x_f64);
325 self.affinity_matrix = Some(affinity);
326 self.eigenvectors = Some(eigenvectors);
327 self.eigenvalues = Some(eigenvalues);
328
329 Ok(())
330 }
331
332 pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
340 where
341 S: Data,
342 S::Elem: Float + NumCast,
343 {
344 if self.embedding.is_none() {
345 return Err(TransformError::NotFitted(
346 "SpectralEmbedding model has not been fitted".to_string(),
347 ));
348 }
349
350 let training_data = self
351 .training_data
352 .as_ref()
353 .ok_or_else(|| TransformError::NotFitted("Training data not available".to_string()))?;
354
355 let x_f64 = x.mapv(|v| NumCast::from(v).unwrap_or(0.0));
356
357 if self.is_same_data(&x_f64, training_data) {
359 return Ok(self.embedding.as_ref().unwrap().clone());
360 }
361
362 self.nystrom_extension(&x_f64)
364 }
365
366 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
374 where
375 S: Data,
376 S::Elem: Float + NumCast + Send + Sync,
377 {
378 self.fit(x)?;
379 self.transform(x)
380 }
381
382 pub fn embedding(&self) -> Option<&Array2<f64>> {
384 self.embedding.as_ref()
385 }
386
387 pub fn affinity_matrix(&self) -> Option<&Array2<f64>> {
389 self.affinity_matrix.as_ref()
390 }
391
392 pub fn eigenvalues(&self) -> Option<&Array1<f64>> {
394 self.eigenvalues.as_ref()
395 }
396
397 fn is_same_data(&self, x: &Array2<f64>, trainingdata: &Array2<f64>) -> bool {
399 if x.dim() != trainingdata.dim() {
400 return false;
401 }
402
403 let (n_samples, n_features) = x.dim();
404 for i in 0..n_samples {
405 for j in 0..n_features {
406 if (x[[i, j]] - trainingdata[[i, j]]).abs() > 1e-10 {
407 return false;
408 }
409 }
410 }
411 true
412 }
413
414 fn nystrom_extension(&self, xnew: &Array2<f64>) -> Result<Array2<f64>> {
416 let training_data = self.training_data.as_ref().unwrap();
417 let training_embedding = self.embedding.as_ref().unwrap();
418 let eigenvalues = self.eigenvalues.as_ref().unwrap();
419
420 let (n_new, n_features) = xnew.dim();
421 let (n_training_, _) = training_data.dim();
422
423 if n_features != training_data.ncols() {
424 return Err(TransformError::InvalidInput(format!(
425 "Input features {} must match training features {}",
426 n_features,
427 training_data.ncols()
428 )));
429 }
430
431 let mut new_to_training_affinity = Array2::zeros((n_new, n_training_));
433
434 for i in 0..n_new {
435 for j in 0..n_training_ {
436 let mut dist_sq = 0.0;
437 for k in 0..n_features {
438 let diff = xnew[[i, k]] - training_data[[j, k]];
439 dist_sq += diff * diff;
440 }
441
442 let affinity_value = match &self.affinity_method {
443 AffinityMethod::Gaussian => {
444 let gamma = self.gamma.unwrap_or(1.0); (-gamma * dist_sq).exp()
446 }
447 AffinityMethod::KNN | AffinityMethod::Epsilon => {
448 let dist = dist_sq.sqrt();
450 if dist > 0.0 {
451 1.0 / (1.0 + dist)
452 } else {
453 1.0
454 }
455 }
456 };
457
458 new_to_training_affinity[[i, j]] = affinity_value;
459 }
460 }
461
462 let mut new_embedding = Array2::zeros((n_new, self.n_components));
465
466 let start_idx = if self.normalized { 1 } else { 0 };
467
468 for i in 0..n_new {
469 for j in 0..self.n_components {
470 let eigenvalue = eigenvalues[start_idx + j];
471 if eigenvalue.abs() > 1e-10 {
472 let mut coord = 0.0;
473 for k in 0..n_training_ {
474 coord += new_to_training_affinity[[i, k]] * training_embedding[[k, j]];
475 }
476 new_embedding[[i, j]] = coord / eigenvalue.sqrt();
477 }
478 }
479 }
480
481 Ok(new_embedding)
482 }
483}
484
485#[cfg(test)]
486mod tests {
487 use super::*;
488 use scirs2_core::ndarray::Array;
489
490 #[test]
491 fn test_spectral_embedding_gaussian() {
492 let data = vec![1.0, 1.0, 1.1, 1.1, 1.2, 0.9, 5.0, 5.0, 5.1, 5.1, 4.9, 5.2];
494 let x = Array::from_shape_vec((6, 2), data).unwrap();
495
496 let mut spectral = SpectralEmbedding::new(2, AffinityMethod::Gaussian);
497 let embedding = spectral.fit_transform(&x).unwrap();
498
499 assert_eq!(embedding.shape(), &[6, 2]);
500
501 for val in embedding.iter() {
503 assert!(val.is_finite());
504 }
505 }
506
507 #[test]
508 fn test_spectral_embedding_knn() {
509 let x: Array2<f64> = Array::eye(8);
510
511 let mut spectral = SpectralEmbedding::new(3, AffinityMethod::KNN).with_n_neighbors(3);
512 let embedding = spectral.fit_transform(&x).unwrap();
513
514 assert_eq!(embedding.shape(), &[8, 3]);
515
516 for val in embedding.iter() {
518 assert!(val.is_finite());
519 }
520 }
521
522 #[test]
523 fn test_spectral_embedding_out_of_sample() {
524 let x_train: Array2<f64> = Array::eye(5);
525 let x_test = Array::from_shape_vec(
526 (2, 5),
527 vec![0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.0],
528 )
529 .unwrap();
530
531 let mut spectral = SpectralEmbedding::new(2, AffinityMethod::Gaussian);
532 spectral.fit(&x_train).unwrap();
533 let test_embedding = spectral.transform(&x_test).unwrap();
534
535 assert_eq!(test_embedding.shape(), &[2, 2]);
536
537 for val in test_embedding.iter() {
539 assert!(val.is_finite());
540 }
541 }
542}