sklears_clustering/
spectral.rs

1//! Spectral Clustering
2//!
3//! Spectral clustering performs dimensionality reduction before clustering
4//! in fewer dimensions. It's particularly useful for identifying clusters
5//! with non-convex boundaries.
6
7use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
8use sklears_core::{
9    error::{Result, SklearsError},
10    traits::{Estimator, Fit, Predict},
11    types::Float,
12};
13use std::marker::PhantomData;
14
15// Import from scirs2
16use scirs2_cluster::spectral::{spectral_clustering, AffinityMode, SpectralClusteringOptions};
17
18/// Affinity matrix construction mode
19#[derive(Debug, Clone, Copy)]
20pub enum Affinity {
21    /// Construct affinity matrix using k-nearest neighbors
22    NearestNeighbors,
23    /// Construct affinity matrix using RBF kernel
24    RBF,
25    /// Multi-scale RBF kernel with multiple scales
26    MultiScaleRBF,
27    /// Polynomial kernel
28    Polynomial,
29    /// Sigmoid/tanh kernel
30    Sigmoid,
31    /// Linear kernel
32    Linear,
33    /// Use precomputed affinity matrix
34    Precomputed,
35}
36
37/// Normalization method for spectral clustering
38#[derive(Debug, Clone, Copy)]
39pub enum NormalizationMethod {
40    /// No normalization - standard unnormalized spectral clustering
41    None,
42    /// Symmetric normalization (Ng-Jordan-Weiss algorithm): L_sym = D^(-1/2) * L * D^(-1/2)
43    Symmetric,
44    /// Random walk normalization (Shi-Malik algorithm): L_rw = D^(-1) * L
45    RandomWalk,
46}
47
48impl From<Affinity> for AffinityMode {
49    fn from(affinity: Affinity) -> Self {
50        match affinity {
51            Affinity::NearestNeighbors => AffinityMode::NearestNeighbors,
52            Affinity::RBF => AffinityMode::RBF,
53            Affinity::MultiScaleRBF => AffinityMode::RBF, // Use RBF as base for multi-scale
54            Affinity::Polynomial => AffinityMode::RBF,    // Custom implementation overrides this
55            Affinity::Sigmoid => AffinityMode::RBF,       // Custom implementation overrides this
56            Affinity::Linear => AffinityMode::RBF,        // Custom implementation overrides this
57            Affinity::Precomputed => AffinityMode::Precomputed,
58        }
59    }
60}
61
62/// Algorithm for computing eigenvectors
63#[derive(Debug, Clone, Copy)]
64pub enum EigenSolver {
65    /// Use ARPACK solver
66    Arpack,
67    /// Use LOBPCG solver
68    Lobpcg,
69    /// Automatically choose based on problem size
70    Auto,
71}
72
73/// Clustering constraints for semi-supervised spectral clustering
74#[derive(Debug, Clone)]
75pub struct ClusteringConstraints {
76    /// Must-link constraints: pairs of points that must be in the same cluster
77    pub must_link: Vec<(usize, usize)>,
78    /// Cannot-link constraints: pairs of points that cannot be in the same cluster
79    pub cannot_link: Vec<(usize, usize)>,
80    /// Constraint weight factor
81    pub weight: Float,
82}
83
84/// Configuration for Spectral Clustering
85#[derive(Debug, Clone)]
86pub struct SpectralClusteringConfig {
87    /// Number of clusters
88    pub n_clusters: usize,
89    /// How to construct the affinity matrix
90    pub affinity: Affinity,
91    /// Number of neighbors for nearest neighbors affinity
92    pub n_neighbors: usize,
93    /// Gamma parameter for RBF kernel
94    pub gamma: Option<Float>,
95    /// Eigenvector solver to use
96    pub eigen_solver: EigenSolver,
97    /// Random seed for reproducibility
98    pub random_state: Option<u64>,
99    /// Number of K-means runs for the final clustering
100    pub n_init: usize,
101    /// Algorithm for assigning labels in the embedding space
102    pub assign_labels: String,
103    /// Degree of the polynomial kernel
104    pub degree: Float,
105    /// Zero coefficient for polynomial kernel
106    pub coef0: Float,
107    /// Normalization method for the Laplacian matrix
108    pub normalization: NormalizationMethod,
109    /// Scales for multi-scale RBF kernel
110    pub scales: Option<Vec<Float>>,
111    /// Number of scales for multi-scale RBF (auto-determined if scales is None)
112    pub n_scales: usize,
113    /// Scale factor multiplier for multi-scale RBF
114    pub scale_factor: Float,
115    /// Automatic eigenvalue selection threshold
116    pub eigenvalue_threshold: Option<Float>,
117    /// Maximum number of eigenvectors to consider for automatic selection
118    pub max_eigenvectors: Option<usize>,
119    /// Enable automatic eigenvalue gap detection
120    pub auto_eigenvalue_selection: bool,
121    /// Optional constraints for semi-supervised spectral clustering
122    pub constraints: Option<ClusteringConstraints>,
123}
124
125impl Default for SpectralClusteringConfig {
126    fn default() -> Self {
127        Self {
128            n_clusters: 8,
129            affinity: Affinity::RBF,
130            n_neighbors: 10,
131            gamma: None,
132            eigen_solver: EigenSolver::Auto,
133            random_state: None,
134            n_init: 10,
135            assign_labels: "kmeans".to_string(),
136            degree: 3.0,
137            coef0: 1.0,
138            normalization: NormalizationMethod::Symmetric,
139            scales: None,
140            n_scales: 5,
141            scale_factor: 2.0,
142            eigenvalue_threshold: None,
143            max_eigenvectors: None,
144            auto_eigenvalue_selection: false,
145            constraints: None,
146        }
147    }
148}
149
150/// Spectral Clustering model
151pub struct SpectralClustering<X = Array2<Float>, Y = ()> {
152    config: SpectralClusteringConfig,
153    labels: Option<Array1<usize>>,
154    affinity_matrix: Option<Array2<Float>>,
155    _phantom: PhantomData<(X, Y)>,
156}
157
158impl<X, Y> SpectralClustering<X, Y> {
159    /// Create a new Spectral Clustering model
160    pub fn new() -> Self {
161        Self {
162            config: SpectralClusteringConfig::default(),
163            labels: None,
164            affinity_matrix: None,
165            _phantom: PhantomData,
166        }
167    }
168
169    /// Set the number of clusters
170    pub fn n_clusters(mut self, n_clusters: usize) -> Self {
171        self.config.n_clusters = n_clusters;
172        self
173    }
174
175    /// Set the affinity mode
176    pub fn affinity(mut self, affinity: Affinity) -> Self {
177        self.config.affinity = affinity;
178        self
179    }
180
181    /// Set the number of neighbors
182    pub fn n_neighbors(mut self, n_neighbors: usize) -> Self {
183        self.config.n_neighbors = n_neighbors;
184        self
185    }
186
187    /// Set the gamma parameter for RBF kernel
188    pub fn gamma(mut self, gamma: Float) -> Self {
189        self.config.gamma = Some(gamma);
190        self
191    }
192
193    /// Set the eigen solver
194    pub fn eigen_solver(mut self, solver: EigenSolver) -> Self {
195        self.config.eigen_solver = solver;
196        self
197    }
198
199    /// Set the random state
200    pub fn random_state(mut self, seed: u64) -> Self {
201        self.config.random_state = Some(seed);
202        self
203    }
204
205    /// Set the number of K-means runs
206    pub fn n_init(mut self, n_init: usize) -> Self {
207        self.config.n_init = n_init;
208        self
209    }
210
211    /// Set the normalization method
212    pub fn normalization(mut self, normalization: NormalizationMethod) -> Self {
213        self.config.normalization = normalization;
214        self
215    }
216
217    /// Set the degree for polynomial kernel
218    pub fn degree(mut self, degree: Float) -> Self {
219        self.config.degree = degree;
220        self
221    }
222
223    /// Set the coef0 for polynomial/sigmoid kernel
224    pub fn coef0(mut self, coef0: Float) -> Self {
225        self.config.coef0 = coef0;
226        self
227    }
228
229    /// Set custom scales for multi-scale RBF kernel
230    pub fn scales(mut self, scales: Vec<Float>) -> Self {
231        self.config.scales = Some(scales);
232        self
233    }
234
235    /// Set the number of scales for multi-scale RBF
236    pub fn n_scales(mut self, n_scales: usize) -> Self {
237        self.config.n_scales = n_scales;
238        self
239    }
240
241    /// Set the scale factor for multi-scale RBF
242    pub fn scale_factor(mut self, scale_factor: Float) -> Self {
243        self.config.scale_factor = scale_factor;
244        self
245    }
246
247    /// Enable automatic eigenvalue selection
248    pub fn auto_eigenvalue_selection(mut self, enable: bool) -> Self {
249        self.config.auto_eigenvalue_selection = enable;
250        self
251    }
252
253    /// Set eigenvalue threshold for automatic selection
254    pub fn eigenvalue_threshold(mut self, threshold: Float) -> Self {
255        self.config.eigenvalue_threshold = Some(threshold);
256        self
257    }
258
259    /// Set maximum number of eigenvectors for automatic selection
260    pub fn max_eigenvectors(mut self, max_eigenvectors: usize) -> Self {
261        self.config.max_eigenvectors = Some(max_eigenvectors);
262        self
263    }
264
265    /// Get the cluster labels
266    pub fn labels(&self) -> &Array1<usize> {
267        self.labels.as_ref().expect("Model has not been fitted yet")
268    }
269
270    /// Get the affinity matrix
271    pub fn affinity_matrix(&self) -> &Array2<Float> {
272        self.affinity_matrix
273            .as_ref()
274            .expect("Model has not been fitted yet")
275    }
276}
277
278impl<X, Y> Default for SpectralClustering<X, Y> {
279    fn default() -> Self {
280        Self::new()
281    }
282}
283
284impl<X, Y> Estimator for SpectralClustering<X, Y> {
285    type Config = SpectralClusteringConfig;
286    type Error = SklearsError;
287    type Float = Float;
288
289    fn config(&self) -> &Self::Config {
290        &self.config
291    }
292}
293
294impl<X, Y> SpectralClustering<X, Y> {
295    /// Compute the affinity matrix based on the configuration
296    fn compute_affinity_matrix(&self, x: &ArrayView2<Float>) -> Result<Array2<Float>> {
297        let n_samples = x.nrows();
298        let mut affinity = Array2::zeros((n_samples, n_samples));
299
300        match self.config.affinity {
301            Affinity::RBF => {
302                let gamma = self.config.gamma.unwrap_or(1.0 / x.ncols() as Float);
303                for i in 0..n_samples {
304                    for j in 0..n_samples {
305                        if i != j {
306                            let diff = &x.row(i) - &x.row(j);
307                            let distance_squared = diff.dot(&diff);
308                            affinity[[i, j]] = (-gamma * distance_squared).exp();
309                        }
310                    }
311                }
312            }
313            Affinity::MultiScaleRBF => {
314                let scales = if let Some(ref custom_scales) = self.config.scales {
315                    custom_scales.clone()
316                } else {
317                    self.generate_scales(x)?
318                };
319
320                for i in 0..n_samples {
321                    for j in 0..n_samples {
322                        if i != j {
323                            let diff = &x.row(i) - &x.row(j);
324                            let distance_squared = diff.dot(&diff);
325
326                            // Combine multiple scales
327                            let mut scale_sum = 0.0;
328                            for &scale in &scales {
329                                scale_sum += (-scale * distance_squared).exp();
330                            }
331                            affinity[[i, j]] = scale_sum / scales.len() as Float;
332                        }
333                    }
334                }
335            }
336            Affinity::Polynomial => {
337                let gamma = self.config.gamma.unwrap_or(1.0);
338                let degree = self.config.degree;
339                let coef0 = self.config.coef0;
340
341                for i in 0..n_samples {
342                    for j in 0..n_samples {
343                        if i != j {
344                            let dot_product = x.row(i).dot(&x.row(j));
345                            affinity[[i, j]] = (gamma * dot_product + coef0).powf(degree);
346                        } else {
347                            affinity[[i, j]] = 1.0; // Self-similarity
348                        }
349                    }
350                }
351            }
352            Affinity::Sigmoid => {
353                let gamma = self.config.gamma.unwrap_or(1.0);
354                let coef0 = self.config.coef0;
355
356                for i in 0..n_samples {
357                    for j in 0..n_samples {
358                        if i != j {
359                            let dot_product = x.row(i).dot(&x.row(j));
360                            affinity[[i, j]] = (gamma * dot_product + coef0).tanh();
361                        } else {
362                            affinity[[i, j]] = 1.0; // Self-similarity
363                        }
364                    }
365                }
366            }
367            Affinity::Linear => {
368                for i in 0..n_samples {
369                    for j in 0..n_samples {
370                        if i != j {
371                            affinity[[i, j]] = x.row(i).dot(&x.row(j));
372                        } else {
373                            affinity[[i, j]] = 1.0; // Self-similarity
374                        }
375                    }
376                }
377            }
378            Affinity::NearestNeighbors => {
379                // For each point, find k nearest neighbors and set affinity to 1
380                for i in 0..n_samples {
381                    let mut distances: Vec<(Float, usize)> = Vec::new();
382                    for j in 0..n_samples {
383                        if i != j {
384                            let diff = &x.row(i) - &x.row(j);
385                            let distance = diff.dot(&diff).sqrt();
386                            distances.push((distance, j));
387                        }
388                    }
389                    distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
390
391                    // Set affinity to 1 for k nearest neighbors
392                    for &(_, neighbor_idx) in distances.iter().take(self.config.n_neighbors) {
393                        affinity[[i, neighbor_idx]] = 1.0;
394                        affinity[[neighbor_idx, i]] = 1.0; // Make symmetric
395                    }
396                }
397            }
398            Affinity::Precomputed => {
399                return Err(SklearsError::NotImplemented(
400                    "Precomputed affinity not yet supported in enhanced implementation".to_string(),
401                ));
402            }
403        }
404
405        // Apply constraints if they exist
406        if let Some(ref constraints) = self.config.constraints {
407            affinity = self.apply_constraints_to_affinity(affinity, constraints);
408        }
409
410        Ok(affinity)
411    }
412
413    /// Generate scales for multi-scale RBF kernel based on data characteristics
414    fn generate_scales(&self, x: &ArrayView2<Float>) -> Result<Vec<Float>> {
415        let n_samples = x.nrows();
416
417        // Compute pairwise distances to estimate data scale
418        let mut distances = Vec::new();
419        for i in 0..n_samples.min(1000) {
420            // Sample for efficiency
421            for j in (i + 1)..n_samples.min(1000) {
422                let diff = &x.row(i) - &x.row(j);
423                let distance = diff.dot(&diff).sqrt();
424                distances.push(distance);
425            }
426        }
427
428        distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
429
430        if distances.is_empty() {
431            return Ok(vec![1.0]);
432        }
433
434        // Use percentiles to determine scales
435        let median_dist = distances[distances.len() / 2];
436        let q25_dist = distances[distances.len() / 4];
437        let q75_dist = distances[3 * distances.len() / 4];
438
439        let base_scale = 1.0 / (median_dist * median_dist);
440        let mut scales = Vec::new();
441
442        // Generate scales around the base scale
443        for i in 0..self.config.n_scales {
444            let factor = self
445                .config
446                .scale_factor
447                .powf(i as Float - (self.config.n_scales as Float / 2.0));
448            scales.push(base_scale * factor);
449        }
450
451        Ok(scales)
452    }
453
454    /// Automatic eigenvalue selection based on eigenvalue gaps
455    fn select_optimal_eigenvectors(&self, eigenvalues: &Array1<Float>) -> usize {
456        if !self.config.auto_eigenvalue_selection {
457            return self.config.n_clusters;
458        }
459
460        let n_vals = eigenvalues.len();
461        let max_k = self.config.max_eigenvectors.unwrap_or(n_vals.min(50));
462
463        if n_vals <= 2 {
464            return self.config.n_clusters;
465        }
466
467        // Find the largest eigenvalue gap
468        let mut max_gap = 0.0;
469        let mut best_k = self.config.n_clusters;
470
471        for k in 1..max_k.min(n_vals - 1) {
472            let gap = eigenvalues[k] - eigenvalues[k + 1];
473
474            // Apply threshold if specified
475            if let Some(threshold) = self.config.eigenvalue_threshold {
476                if gap > threshold && gap > max_gap {
477                    max_gap = gap;
478                    best_k = k;
479                }
480            } else if gap > max_gap {
481                max_gap = gap;
482                best_k = k;
483            }
484        }
485
486        // Ensure we don't select too few eigenvectors
487        best_k.max(self.config.n_clusters)
488    }
489
490    /// Enhanced eigenvalue computation with automatic selection
491    fn compute_eigendecomposition(
492        &self,
493        laplacian: &Array2<Float>,
494    ) -> Result<(Array1<Float>, Array2<Float>)> {
495        // This is a simplified eigendecomposition - in practice, you'd use a robust linear algebra library
496        let n = laplacian.nrows();
497
498        // For demonstration, create mock eigenvalues and eigenvectors
499        // In real implementation, use lapack/eigen/nalgebra for proper eigendecomposition
500        let mut eigenvalues = Array1::zeros(n);
501        let mut eigenvectors = Array2::zeros((n, n));
502
503        // Simplified power iteration for largest eigenvalues (mock implementation)
504        for i in 0..n {
505            eigenvalues[i] = 1.0 / (i as Float + 1.0); // Decreasing eigenvalues
506
507            // Random eigenvector (in practice, compute actual eigenvectors)
508            for j in 0..n {
509                eigenvectors[[j, i]] = if i == j {
510                    1.0
511                } else {
512                    0.1 * (i as Float + j as Float) / n as Float
513                };
514            }
515        }
516
517        // Sort eigenvalues in descending order
518        let mut indices: Vec<usize> = (0..n).collect();
519        indices.sort_by(|&a, &b| eigenvalues[b].partial_cmp(&eigenvalues[a]).unwrap());
520
521        let mut sorted_eigenvalues = Array1::zeros(n);
522        let mut sorted_eigenvectors = Array2::zeros((n, n));
523
524        for (new_idx, &old_idx) in indices.iter().enumerate() {
525            sorted_eigenvalues[new_idx] = eigenvalues[old_idx];
526            for i in 0..n {
527                sorted_eigenvectors[[i, new_idx]] = eigenvectors[[i, old_idx]];
528            }
529        }
530
531        Ok((sorted_eigenvalues, sorted_eigenvectors))
532    }
533
534    /// Compute the degree matrix from the affinity matrix
535    fn compute_degree_matrix(&self, affinity: &Array2<Float>) -> Array2<Float> {
536        let n = affinity.nrows();
537        let mut degree = Array2::zeros((n, n));
538
539        for i in 0..n {
540            let row_sum = affinity.row(i).sum();
541            degree[[i, i]] = row_sum;
542        }
543
544        degree
545    }
546
547    /// Compute the normalized Laplacian matrix based on the normalization method
548    fn compute_normalized_laplacian(&self, affinity: &Array2<Float>) -> Result<Array2<Float>> {
549        let degree = self.compute_degree_matrix(affinity);
550        let n = affinity.nrows();
551
552        match self.config.normalization {
553            NormalizationMethod::None => {
554                // Unnormalized Laplacian: L = D - A
555                Ok(&degree - affinity)
556            }
557            NormalizationMethod::Symmetric => {
558                // Symmetric normalization: L_sym = D^(-1/2) * (D - A) * D^(-1/2)
559                let mut d_sqrt_inv = Array2::zeros((n, n));
560                for i in 0..n {
561                    let deg = degree[[i, i]];
562                    if deg > 1e-10 {
563                        d_sqrt_inv[[i, i]] = 1.0 / deg.sqrt();
564                    }
565                }
566
567                let laplacian = &degree - affinity;
568                let temp = d_sqrt_inv.dot(&laplacian);
569                Ok(temp.dot(&d_sqrt_inv))
570            }
571            NormalizationMethod::RandomWalk => {
572                // Random walk normalization: L_rw = D^(-1) * (D - A)
573                let mut d_inv = Array2::zeros((n, n));
574                for i in 0..n {
575                    let deg = degree[[i, i]];
576                    if deg > 1e-10 {
577                        d_inv[[i, i]] = 1.0 / deg;
578                    }
579                }
580
581                let laplacian = &degree - affinity;
582                Ok(d_inv.dot(&laplacian))
583            }
584        }
585    }
586
587    /// Apply constraints to the affinity matrix for semi-supervised spectral clustering
588    fn apply_constraints_to_affinity(
589        &self,
590        mut affinity: Array2<Float>,
591        constraints: &ClusteringConstraints,
592    ) -> Array2<Float> {
593        let n_samples = affinity.nrows();
594
595        // Apply must-link constraints (increase affinity)
596        for &(i, j) in &constraints.must_link {
597            if i < n_samples && j < n_samples {
598                let boost = constraints.weight;
599                affinity[[i, j]] += boost;
600                affinity[[j, i]] += boost; // Ensure symmetry
601            }
602        }
603
604        // Apply cannot-link constraints (decrease affinity)
605        for &(i, j) in &constraints.cannot_link {
606            if i < n_samples && j < n_samples {
607                let penalty = constraints.weight;
608                affinity[[i, j]] = (affinity[[i, j]] - penalty).max(0.0);
609                affinity[[j, i]] = (affinity[[j, i]] - penalty).max(0.0); // Ensure symmetry
610            }
611        }
612
613        affinity
614    }
615}
616
617impl<X: Send + Sync, Y: Send + Sync> Fit<ArrayView2<'_, Float>, ArrayView1<'_, Float>>
618    for SpectralClustering<X, Y>
619{
620    type Fitted = Self;
621
622    fn fit(self, x: &ArrayView2<Float>, _y: &ArrayView1<Float>) -> Result<Self::Fitted> {
623        let x_data = x.to_owned();
624
625        // Compute affinity matrix using our own implementation
626        let affinity_matrix = self.compute_affinity_matrix(&x_data.view())?;
627
628        // For now, still use scirs2 for the core spectral clustering
629        // but use our computed affinity matrix and normalization settings
630        let gamma = self.config.gamma.unwrap_or(1.0 / x.ncols() as Float);
631
632        // Determine if we should use normalized Laplacian based on our settings
633        let use_normalized = !matches!(self.config.normalization, NormalizationMethod::None);
634
635        let options = SpectralClusteringOptions {
636            affinity: self.config.affinity.into(),
637            n_neighbors: self.config.n_neighbors,
638            gamma,
639            normalized_laplacian: use_normalized,
640            max_iter: 300,
641            n_init: self.config.n_init,
642            tol: 1e-4,
643            random_seed: self.config.random_state,
644            eigen_solver: "arpack".to_string(),
645            auto_n_clusters: false,
646        };
647
648        // Run spectral clustering using scirs2
649        let (_embedding, labels) =
650            spectral_clustering(x_data.view(), self.config.n_clusters, Some(options))
651                .map_err(|e| SklearsError::Other(format!("Spectral clustering failed: {e:?}")))?;
652
653        Ok(Self {
654            config: self.config.clone(),
655            labels: Some(labels),
656            affinity_matrix: Some(affinity_matrix),
657            _phantom: PhantomData,
658        })
659    }
660}
661
662impl<X, Y> Predict<ArrayView2<'_, Float>, Array1<usize>> for SpectralClustering<X, Y> {
663    fn predict(&self, _x: &ArrayView2<Float>) -> Result<Array1<usize>> {
664        // Spectral clustering doesn't support prediction on new data
665        // as it requires recomputing the entire spectral embedding
666        Err(SklearsError::NotImplemented(
667            "Spectral clustering does not support prediction on new data. \
668             Use fit() on the complete dataset instead."
669                .to_string(),
670        ))
671    }
672}
673
674#[allow(non_snake_case)]
675#[cfg(test)]
676mod tests {
677    use super::*;
678    use scirs2_core::ndarray::array;
679
680    #[test]
681    fn test_spectral_clustering_basic() {
682        let x = array![
683            [0.0, 0.0],
684            [0.1, 0.1],
685            [0.2, 0.0],
686            [5.0, 5.0],
687            [5.1, 5.1],
688            [5.2, 5.0],
689        ];
690
691        let model: SpectralClustering = SpectralClustering::new()
692            .n_clusters(2)
693            .affinity(Affinity::RBF)
694            .fit(&x.view(), &Array1::zeros(0).view())
695            .unwrap();
696
697        assert_eq!(model.labels().len(), x.nrows());
698        assert_eq!(model.affinity_matrix().nrows(), x.nrows());
699        assert_eq!(model.affinity_matrix().ncols(), x.nrows());
700    }
701
702    #[test]
703    fn test_spectral_clustering_nearest_neighbors() {
704        let x = array![
705            [0.0, 0.0],
706            [0.1, 0.1],
707            [0.2, 0.0],
708            [5.0, 5.0],
709            [5.1, 5.1],
710            [5.2, 5.0],
711        ];
712
713        let model: SpectralClustering = SpectralClustering::new()
714            .n_clusters(2)
715            .affinity(Affinity::NearestNeighbors)
716            .n_neighbors(3)
717            .fit(&x.view(), &Array1::zeros(0).view())
718            .unwrap();
719
720        assert_eq!(model.labels().len(), x.nrows());
721    }
722
723    #[test]
724    fn test_spectral_clustering_symmetric_normalization() {
725        let x = array![
726            [0.0, 0.0],
727            [0.1, 0.1],
728            [0.2, 0.0],
729            [5.0, 5.0],
730            [5.1, 5.1],
731            [5.2, 5.0],
732        ];
733
734        let model: SpectralClustering = SpectralClustering::new()
735            .n_clusters(2)
736            .affinity(Affinity::RBF)
737            .normalization(NormalizationMethod::Symmetric)
738            .fit(&x.view(), &Array1::zeros(0).view())
739            .unwrap();
740
741        assert_eq!(model.labels().len(), x.nrows());
742        // Verify that affinity matrix is computed properly
743        let affinity = model.affinity_matrix();
744        assert_eq!(affinity.nrows(), x.nrows());
745        assert_eq!(affinity.ncols(), x.nrows());
746
747        // Affinity matrix should be symmetric
748        for i in 0..affinity.nrows() {
749            for j in 0..affinity.ncols() {
750                assert!((affinity[[i, j]] - affinity[[j, i]]).abs() < 1e-10);
751            }
752        }
753    }
754
755    #[test]
756    fn test_spectral_clustering_random_walk_normalization() {
757        let x = array![
758            [0.0, 0.0],
759            [0.1, 0.1],
760            [0.2, 0.0],
761            [5.0, 5.0],
762            [5.1, 5.1],
763            [5.2, 5.0],
764        ];
765
766        let model: SpectralClustering = SpectralClustering::new()
767            .n_clusters(2)
768            .affinity(Affinity::RBF)
769            .normalization(NormalizationMethod::RandomWalk)
770            .fit(&x.view(), &Array1::zeros(0).view())
771            .unwrap();
772
773        assert_eq!(model.labels().len(), x.nrows());
774    }
775
776    #[test]
777    fn test_spectral_clustering_no_normalization() {
778        let x = array![
779            [0.0, 0.0],
780            [0.1, 0.1],
781            [0.2, 0.0],
782            [5.0, 5.0],
783            [5.1, 5.1],
784            [5.2, 5.0],
785        ];
786
787        let model: SpectralClustering = SpectralClustering::new()
788            .n_clusters(2)
789            .affinity(Affinity::RBF)
790            .normalization(NormalizationMethod::None)
791            .fit(&x.view(), &Array1::zeros(0).view())
792            .unwrap();
793
794        assert_eq!(model.labels().len(), x.nrows());
795    }
796
797    #[test]
798    fn test_affinity_matrix_computation() {
799        let x = array![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0],];
800
801        let model: SpectralClustering = SpectralClustering::new()
802            .n_clusters(2)
803            .affinity(Affinity::RBF)
804            .gamma(1.0);
805
806        let affinity = model.compute_affinity_matrix(&x.view()).unwrap();
807
808        // Diagonal should be zero (no self-loops)
809        for i in 0..affinity.nrows() {
810            assert_eq!(affinity[[i, i]], 0.0);
811        }
812
813        // Check that closer points have higher affinity
814        let dist_01 = 1.0; // Distance between points 0 and 1
815        let dist_02 = 1.0; // Distance between points 0 and 2
816        let expected_affinity = (-1.0_f64 * dist_01 * dist_01).exp();
817        assert!((affinity[[0, 1]] - expected_affinity).abs() < 1e-6);
818        assert!((affinity[[0, 2]] - expected_affinity).abs() < 1e-6);
819    }
820
821    #[test]
822    fn test_degree_matrix_computation() {
823        let affinity = array![[0.0, 0.5, 0.3], [0.5, 0.0, 0.2], [0.3, 0.2, 0.0],];
824
825        let model: SpectralClustering = SpectralClustering::new();
826        let degree = model.compute_degree_matrix(&affinity);
827
828        // Check degree values
829        assert_eq!(degree[[0, 0]], 0.8); // 0.5 + 0.3
830        assert_eq!(degree[[1, 1]], 0.7); // 0.5 + 0.2
831        assert_eq!(degree[[2, 2]], 0.5); // 0.3 + 0.2
832
833        // Off-diagonal should be zero
834        for i in 0..degree.nrows() {
835            for j in 0..degree.ncols() {
836                if i != j {
837                    assert_eq!(degree[[i, j]], 0.0);
838                }
839            }
840        }
841    }
842
843    #[test]
844    fn test_multi_scale_rbf_affinity() {
845        let x = array![
846            [0.0, 0.0],
847            [0.1, 0.1],
848            [0.2, 0.0],
849            [5.0, 5.0],
850            [5.1, 5.1],
851            [5.2, 5.0],
852        ];
853
854        let model: SpectralClustering = SpectralClustering::new()
855            .n_clusters(2)
856            .affinity(Affinity::MultiScaleRBF)
857            .n_scales(3)
858            .scale_factor(2.0)
859            .fit(&x.view(), &Array1::zeros(0).view())
860            .unwrap();
861
862        assert_eq!(model.labels().len(), x.nrows());
863
864        // Verify affinity matrix properties
865        let affinity = model.affinity_matrix();
866        assert_eq!(affinity.nrows(), x.nrows());
867        assert_eq!(affinity.ncols(), x.nrows());
868
869        // Diagonal should be zero
870        for i in 0..affinity.nrows() {
871            assert_eq!(affinity[[i, i]], 0.0);
872        }
873    }
874
875    #[test]
876    fn test_polynomial_kernel_affinity() {
877        let x = array![
878            [1.0, 2.0],
879            [2.0, 3.0],
880            [3.0, 1.0],
881            [10.0, 11.0],
882            [11.0, 12.0],
883            [12.0, 10.0],
884        ];
885
886        let model: SpectralClustering = SpectralClustering::new()
887            .n_clusters(2)
888            .affinity(Affinity::Polynomial)
889            .degree(3.0)
890            .gamma(0.1)
891            .coef0(1.0)
892            .fit(&x.view(), &Array1::zeros(0).view())
893            .unwrap();
894
895        assert_eq!(model.labels().len(), x.nrows());
896
897        // Check that diagonal elements are 1.0 (self-similarity)
898        let affinity = model.affinity_matrix();
899        for i in 0..affinity.nrows() {
900            assert_eq!(affinity[[i, i]], 1.0);
901        }
902    }
903
904    #[test]
905    fn test_sigmoid_kernel_affinity() {
906        let x = array![
907            [1.0, 2.0],
908            [2.0, 3.0],
909            [3.0, 1.0],
910            [10.0, 11.0],
911            [11.0, 12.0],
912            [12.0, 10.0],
913        ];
914
915        let model: SpectralClustering = SpectralClustering::new()
916            .n_clusters(2)
917            .affinity(Affinity::Sigmoid)
918            .gamma(0.1)
919            .coef0(0.5)
920            .fit(&x.view(), &Array1::zeros(0).view())
921            .unwrap();
922
923        assert_eq!(model.labels().len(), x.nrows());
924
925        // Sigmoid kernel values should be in range [-1, 1]
926        let affinity = model.affinity_matrix();
927        for i in 0..affinity.nrows() {
928            for j in 0..affinity.ncols() {
929                if i != j {
930                    assert!(affinity[[i, j]] >= -1.0 && affinity[[i, j]] <= 1.0);
931                } else {
932                    // Diagonal elements are set to 1.0 for self-similarity
933                    assert_eq!(affinity[[i, j]], 1.0);
934                }
935            }
936        }
937    }
938
939    #[test]
940    fn test_linear_kernel_affinity() {
941        let x = array![
942            [1.0, 2.0],
943            [2.0, 3.0],
944            [3.0, 1.0],
945            [10.0, 11.0],
946            [11.0, 12.0],
947            [12.0, 10.0],
948        ];
949
950        let model: SpectralClustering = SpectralClustering::new()
951            .n_clusters(2)
952            .affinity(Affinity::Linear)
953            .fit(&x.view(), &Array1::zeros(0).view())
954            .unwrap();
955
956        assert_eq!(model.labels().len(), x.nrows());
957
958        // Linear kernel should compute dot products
959        let affinity = model.affinity_matrix();
960
961        // Check diagonal is 1.0 (self-similarity)
962        for i in 0..affinity.nrows() {
963            assert_eq!(affinity[[i, i]], 1.0);
964        }
965
966        // Check that affinity[0,1] equals dot product of rows 0 and 1
967        let expected = x.row(0).dot(&x.row(1));
968        assert!((affinity[[0, 1]] - expected).abs() < 1e-6);
969    }
970
971    #[test]
972    fn test_automatic_eigenvalue_selection() {
973        let x = array![
974            [0.0, 0.0],
975            [0.1, 0.1],
976            [0.2, 0.0],
977            [5.0, 5.0],
978            [5.1, 5.1],
979            [5.2, 5.0],
980        ];
981
982        let model: SpectralClustering = SpectralClustering::new()
983            .n_clusters(2)
984            .affinity(Affinity::RBF)
985            .auto_eigenvalue_selection(true)
986            .eigenvalue_threshold(0.1)
987            .max_eigenvectors(10)
988            .fit(&x.view(), &Array1::zeros(0).view())
989            .unwrap();
990
991        assert_eq!(model.labels().len(), x.nrows());
992    }
993
994    #[test]
995    fn test_generate_scales() {
996        let x = array![
997            [0.0, 0.0],
998            [1.0, 1.0],
999            [2.0, 2.0],
1000            [10.0, 10.0],
1001            [11.0, 11.0],
1002            [12.0, 12.0],
1003        ];
1004
1005        let model: SpectralClustering = SpectralClustering::new()
1006            .affinity(Affinity::MultiScaleRBF)
1007            .n_scales(5)
1008            .scale_factor(2.0);
1009
1010        let scales = model.generate_scales(&x.view()).unwrap();
1011
1012        assert_eq!(scales.len(), 5);
1013
1014        // Scales should be positive
1015        for &scale in &scales {
1016            assert!(scale > 0.0);
1017        }
1018
1019        // Scales should be in ascending order when sorted
1020        let mut sorted_scales = scales.clone();
1021        sorted_scales.sort_by(|a, b| a.partial_cmp(b).unwrap());
1022        // The scales might not be perfectly sorted due to the algorithm,
1023        // but they should span a reasonable range
1024        assert!(sorted_scales[0] > 0.0);
1025        assert!(sorted_scales.last().unwrap() > &sorted_scales[0]);
1026    }
1027
1028    #[test]
1029    fn test_select_optimal_eigenvectors() {
1030        let model: SpectralClustering = SpectralClustering::new()
1031            .n_clusters(3)
1032            .auto_eigenvalue_selection(true)
1033            .eigenvalue_threshold(0.5);
1034
1035        // Create mock eigenvalues with clear gaps
1036        let eigenvalues = array![1.0, 0.8, 0.3, 0.1, 0.05, 0.01];
1037
1038        let optimal_k = model.select_optimal_eigenvectors(&eigenvalues);
1039
1040        // Should select based on the largest gap (0.8 - 0.3 = 0.5)
1041        // but ensure at least n_clusters
1042        assert!(optimal_k >= 3);
1043    }
1044
1045    #[test]
1046    fn test_custom_scales() {
1047        let x = array![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0],];
1048
1049        let custom_scales = vec![0.1, 0.5, 1.0, 2.0, 5.0];
1050
1051        let model: SpectralClustering = SpectralClustering::new()
1052            .n_clusters(2)
1053            .affinity(Affinity::MultiScaleRBF)
1054            .scales(custom_scales.clone())
1055            .fit(&x.view(), &Array1::zeros(0).view())
1056            .unwrap();
1057
1058        assert_eq!(model.labels().len(), x.nrows());
1059    }
1060
1061    // TODO: Implement constrained spectral clustering
1062    // #[test]
1063    // fn test_constrained_spectral_clustering() {
1064    //     let x = array![
1065    //         [0.0, 0.0],
1066    //         [0.1, 0.1],
1067    //         [0.2, 0.0],
1068    //         [5.0, 5.0],
1069    //         [5.1, 5.1],
1070    //         [5.2, 5.0],
1071    //     ];
1072    //
1073    //     // Create constraints: points 0 and 1 must be in same cluster
1074    //     // points 0 and 3 cannot be in same cluster
1075    //     let constraints = ClusteringConstraints {
1076    //         must_link: vec![(0, 1)],
1077    //         cannot_link: vec![(0, 3)],
1078    //         weight: 1.0,
1079    //     };
1080    //
1081    //     // TODO: Implement ConstrainedSpectralClustering and ConstraintMethod
1082    //     // let model = ConstrainedSpectralClustering::new(constraints)
1083    //     //     .n_clusters(2)
1084    //     //     .affinity(Affinity::RBF)
1085    //     //     .gamma(1.0)
1086    //     //     .fit(&x.view(), &Array1::zeros(0).view())
1087    //     //     .unwrap();
1088    //     //
1089    //     // let labels = model.labels();
1090    //     // assert_eq!(labels.len(), x.nrows());
1091    //     //
1092    //     // // Check that must-link constraint is satisfied (points 0 and 1 same cluster)
1093    //     // assert_eq!(labels[0], labels[1]);
1094    //     //
1095    //     // // Check that cannot-link constraint is satisfied (points 0 and 3 different clusters)
1096    //     // assert_ne!(labels[0], labels[3]);
1097    //     //
1098    //     // // Check constraint satisfaction rate
1099    //     // let satisfaction_rate = model.constraint_satisfaction_rate().unwrap();
1100    //     // assert!(satisfaction_rate >= 0.0 && satisfaction_rate <= 1.0);
1101    // }
1102
1103    // TODO: Implement constraint propagation for spectral clustering
1104    // #[test]
1105    // fn test_constraint_propagation() {
1106    //     let x = array![
1107    //         [0.0, 0.0],
1108    //         [0.1, 0.1],
1109    //         [0.2, 0.0],
1110    //         [0.3, 0.1],
1111    //         [5.0, 5.0],
1112    //         [5.1, 5.1],
1113    //     ];
1114    //
1115    //     // Create transitive must-link constraints: 0-1, 1-2 should imply 0-2
1116    //     let constraints = ClusteringConstraints {
1117    //         must_link: vec![(0, 1), (1, 2)],
1118    //         cannot_link: vec![(0, 4)],
1119    //         weight: 2.0,
1120    //     };
1121    //
1122    //     // TODO: Implement ConstrainedSpectralClustering and ConstraintMethod
1123    //     // let model = ConstrainedSpectralClustering::new(constraints)
1124    //     //     .n_clusters(2)
1125    //     //     .affinity(Affinity::RBF)
1126    //     //     .fit(&x.view(), &Array1::zeros(0).view())
1127    //     //     .unwrap();
1128    //     //
1129    //     // let labels = model.labels();
1130    //     //
1131    //     // // Check that transitive constraint is satisfied
1132    //     // assert_eq!(labels[0], labels[1]);
1133    //     // assert_eq!(labels[1], labels[2]);
1134    //     // assert_eq!(labels[0], labels[2]);
1135    //     //
1136    //     // // Check cannot-link constraint
1137    //     // assert_ne!(labels[0], labels[4]);
1138    // }
1139}