sklears_manifold/
lib.rs

1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5#![allow(clippy::all)]
6#![allow(clippy::pedantic)]
7#![allow(clippy::nursery)]
8#![allow(unused_imports)]
9#![allow(unused_variables)]
10#![allow(unused_assignments)]
11#![allow(unused_mut)]
12#![allow(unused_doc_comments)]
13#![allow(unused_comparisons)]
14#![allow(unused_must_use)]
15#![allow(mismatched_lifetime_syntaxes)]
16#![allow(ambiguous_glob_reexports)]
17#![allow(unexpected_cfgs)]
18//! Manifold learning algorithms (t-SNE, Isomap, etc.)
19//!
20//! This module is part of sklears, providing scikit-learn compatible
21//! machine learning algorithms in Rust.
22
23// #![warn(missing_docs)]
24
25use scirs2_core::ndarray::{Array1, Array2, ArrayView2, Axis};
26use scirs2_core::random::rngs::StdRng;
27use scirs2_core::random::thread_rng;
28use scirs2_core::random::Rng;
29use scirs2_core::random::SeedableRng;
30use scirs2_core::Distribution;
31use sklears_core::{
32    error::{Result as SklResult, SklearsError},
33    traits::{Estimator, Fit, Transform, Untrained},
34    types::Float,
35};
36
37// TSNE moved to tsne.rs module
38
39// Isomap moved to isomap.rs module
40
41// LocallyLinearEmbedding moved to lle.rs module
42
43// LaplacianEigenmaps moved to laplacian_eigenmaps.rs module
44
45// LaplacianEigenmaps implementations moved to laplacian_eigenmaps.rs module
46
47// LaplacianEigenmaps implementations and LaplacianTrained moved to laplacian_eigenmaps.rs module
48
49// UMAP moved to umap.rs module
50
51// UMAP implementations and UmapTrained moved to umap.rs module
52// DiffusionMaps implementation and DiffusionMapsTrained moved to diffusion_maps.rs module
53// HLLE implementation and HessianLleTrained moved to hessian_lle.rs module
54
55#[allow(non_snake_case)]
56#[cfg(test)]
57mod tests;
58
59pub mod quality_metrics;
60
61/// Re-export quality metrics for convenience
62pub use quality_metrics::*;
63
64// =====================================================================================
65// STRESS TESTING AND SCALABILITY MODULE
66// =====================================================================================
67
68pub mod stress_testing;
69pub use stress_testing::*;
70
71// =====================================================================================
72// GEODESIC DISTANCE COMPUTATION MODULE
73// =====================================================================================
74
75pub mod geodesic_distance;
76pub use geodesic_distance::*;
77
78// =====================================================================================
79// DIFFUSION DISTANCE MODULE
80// =====================================================================================
81
82pub mod diffusion_distance;
83pub use diffusion_distance::*;
84
85// =====================================================================================
86// RIEMANNIAN GEOMETRY MODULE
87// =====================================================================================
88
89pub mod riemannian;
90pub use riemannian::*;
91
92// =====================================================================================
93// TOPOLOGICAL DATA ANALYSIS MODULE
94// =====================================================================================
95
96pub mod topological;
97pub use topological::*;
98
99// =====================================================================================
100// RANDOM WALK EMBEDDINGS
101// =====================================================================================
102
103// =====================================================================================
104// NODE2VEC AND DEEPWALK ALGORITHMS
105// =====================================================================================
106
107// =====================================================================================
108
109/// Sparse Coding for manifold learning
110///
111/// Sparse coding learns a dictionary of basis vectors such that each data point
112/// can be represented as a sparse linear combination of these basis vectors.
113/// This is particularly useful for manifold learning when the data lies on
114/// a low-dimensional manifold that can be sparsely represented.
115///
116/// # Parameters
117///
118/// * `n_components` - Number of dictionary atoms
119/// * `alpha` - Sparsity regularization parameter
120/// * `max_iter` - Maximum number of iterations
121/// * `tol` - Tolerance for convergence
122/// * `random_state` - Random seed for reproducibility
123///
124/// # Examples
125///
126/// ```
127/// use sklears_manifold::SparseCoding;
128/// use sklears_core::traits::{Transform, Fit};
129/// use scirs2_core::ndarray::array;
130///
131/// let x = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]];
132///
133/// let sc = SparseCoding::new()
134///     .n_components(2)
135///     .alpha(0.1);
136///
137/// let fitted = sc.fit(&x.view(), &()).unwrap();
138/// let embedded = fitted.transform(&x.view()).unwrap();
139/// ```
140#[derive(Debug, Clone)]
141pub struct SparseCoding<S = Untrained> {
142    state: S,
143    n_components: usize,
144    alpha: f64,
145    max_iter: usize,
146    tol: f64,
147    random_state: Option<u64>,
148}
149
150impl Default for SparseCoding<Untrained> {
151    fn default() -> Self {
152        Self::new()
153    }
154}
155
156impl SparseCoding<Untrained> {
157    /// Create a new SparseCoding instance
158    pub fn new() -> Self {
159        Self {
160            state: Untrained,
161            n_components: 100,
162            alpha: 1.0,
163            max_iter: 1000,
164            tol: 1e-8,
165            random_state: None,
166        }
167    }
168
169    /// Set the number of dictionary atoms
170    pub fn n_components(mut self, n_components: usize) -> Self {
171        self.n_components = n_components;
172        self
173    }
174
175    /// Set the sparsity regularization parameter
176    pub fn alpha(mut self, alpha: f64) -> Self {
177        self.alpha = alpha;
178        self
179    }
180
181    /// Set the maximum number of iterations
182    pub fn max_iter(mut self, max_iter: usize) -> Self {
183        self.max_iter = max_iter;
184        self
185    }
186
187    /// Set the tolerance for convergence
188    pub fn tol(mut self, tol: f64) -> Self {
189        self.tol = tol;
190        self
191    }
192
193    /// Set the random state
194    pub fn random_state(mut self, random_state: u64) -> Self {
195        self.random_state = Some(random_state);
196        self
197    }
198
199    /// Soft thresholding function for sparse coding
200    fn soft_threshold(x: f64, lambda: f64) -> f64 {
201        if x > lambda {
202            x - lambda
203        } else if x < -lambda {
204            x + lambda
205        } else {
206            0.0
207        }
208    }
209}
210
211#[derive(Debug, Clone)]
212pub struct SCTrained {
213    dictionary: Array2<f64>,
214    mean: Array1<f64>,
215}
216
217impl Estimator for SparseCoding<Untrained> {
218    type Config = ();
219    type Error = SklearsError;
220    type Float = Float;
221
222    fn config(&self) -> &Self::Config {
223        &()
224    }
225}
226
227impl Fit<ArrayView2<'_, Float>, ()> for SparseCoding<Untrained> {
228    type Fitted = SparseCoding<SCTrained>;
229
230    fn fit(self, x: &ArrayView2<'_, Float>, _y: &()) -> SklResult<Self::Fitted> {
231        let (n_samples, n_features) = x.dim();
232
233        if self.n_components > n_features {
234            return Err(SklearsError::InvalidInput(
235                "n_components cannot be larger than n_features".to_string(),
236            ));
237        }
238
239        // Convert to f64 and center the data
240        let x_f64 = x.mapv(|v| v);
241        let mean = x_f64.mean_axis(Axis(0)).unwrap();
242        let x_centered = &x_f64 - &mean.view().broadcast(x_f64.dim()).unwrap();
243
244        let mut rng = if let Some(seed) = self.random_state {
245            StdRng::seed_from_u64(seed)
246        } else {
247            StdRng::seed_from_u64(thread_rng().random::<u64>())
248        };
249
250        // Initialize dictionary with random normalized vectors
251        let mut dictionary = Array2::<f64>::zeros((n_features, self.n_components));
252        for mut col in dictionary.columns_mut() {
253            for elem in col.iter_mut() {
254                *elem = rng.sample(scirs2_core::StandardNormal);
255            }
256            // Normalize the column
257            let norm = col.dot(&col).sqrt();
258            if norm > 0.0 {
259                col /= norm;
260            }
261        }
262
263        // Iterative dictionary learning using coordinate descent
264        for iter in 0..self.max_iter {
265            let mut max_change = 0.0f64;
266
267            // Update dictionary atoms one at a time
268            for k in 0..self.n_components {
269                // Compute residual without atom k
270                let mut residual = x_centered.clone();
271                for j in 0..self.n_components {
272                    if j != k {
273                        let atom_j = dictionary.column(j);
274                        // Compute sparse codes for atom j
275                        let mut codes_j = Array1::zeros(n_samples);
276                        for i in 0..n_samples {
277                            let dot_product = residual.row(i).dot(&atom_j);
278                            codes_j[i] = Self::soft_threshold(dot_product, self.alpha);
279                        }
280
281                        // Subtract contribution of atom j
282                        for i in 0..n_samples {
283                            let mut row = residual.row_mut(i);
284                            row.scaled_add(-codes_j[i], &atom_j);
285                        }
286                    }
287                }
288
289                // Update atom k
290                let mut new_atom = Array1::zeros(n_features);
291                let mut total_code = 0.0;
292
293                for i in 0..n_samples {
294                    let code_k = Self::soft_threshold(
295                        residual.row(i).dot(&dictionary.column(k)),
296                        self.alpha,
297                    );
298                    if code_k.abs() > 1e-12 {
299                        new_atom.scaled_add(code_k, &residual.row(i));
300                        total_code += code_k * code_k;
301                    }
302                }
303
304                if total_code > 1e-12 {
305                    new_atom /= total_code;
306                    // Normalize
307                    let norm = new_atom.dot(&new_atom).sqrt();
308                    if norm > 1e-12 {
309                        new_atom /= norm;
310                    }
311
312                    // Check convergence
313                    let change = (&new_atom - &dictionary.column(k)).mapv(|x| x.abs()).sum();
314                    max_change = max_change.max(change);
315
316                    // Update dictionary
317                    dictionary.column_mut(k).assign(&new_atom);
318                }
319            }
320
321            // Check convergence
322            if max_change < self.tol {
323                break;
324            }
325        }
326
327        Ok(SparseCoding {
328            state: SCTrained { dictionary, mean },
329            n_components: self.n_components,
330            alpha: self.alpha,
331            max_iter: self.max_iter,
332            tol: self.tol,
333            random_state: self.random_state,
334        })
335    }
336}
337
338impl Transform<ArrayView2<'_, Float>, Array2<f64>> for SparseCoding<SCTrained> {
339    fn transform(&self, x: &ArrayView2<'_, Float>) -> SklResult<Array2<f64>> {
340        let (n_samples, _) = x.dim();
341        let x_f64 = x.mapv(|v| v);
342        let x_centered = &x_f64 - &self.state.mean.view().broadcast(x_f64.dim()).unwrap();
343
344        // Compute sparse codes using coordinate descent
345        let mut codes = Array2::zeros((n_samples, self.n_components));
346
347        for i in 0..n_samples {
348            let sample = x_centered.row(i);
349            let mut code = Array1::<f64>::zeros(self.n_components);
350
351            // Coordinate descent for sparse coding
352            for _ in 0..100 {
353                // Limited iterations for transform
354                let mut max_change = 0.0f64;
355
356                for k in 0..self.n_components {
357                    // Compute residual without component k
358                    let mut residual = sample.to_owned();
359                    for j in 0..self.n_components {
360                        if j != k {
361                            let atom_j = self.state.dictionary.column(j);
362                            residual.scaled_add(-code[j], &atom_j);
363                        }
364                    }
365
366                    // Update component k
367                    let atom_k = self.state.dictionary.column(k);
368                    let new_code_k =
369                        SparseCoding::soft_threshold(residual.dot(&atom_k), self.alpha);
370                    let change = (new_code_k - code[k]).abs();
371                    max_change = max_change.max(change);
372                    code[k] = new_code_k;
373                }
374
375                if max_change < 1e-6 {
376                    break;
377                }
378            }
379
380            codes.row_mut(i).assign(&code);
381        }
382
383        Ok(codes)
384    }
385}
386
387// MINI-BATCH EMBEDDING METHODS FOR SCALABILITY
388// =====================================================================================
389
390/// t-SNE (t-distributed Stochastic Neighbor Embedding) module
391pub mod tsne;
392
393/// Isomap (Isometric Mapping) module
394pub mod isomap;
395
396/// LLE (Locally Linear Embedding) module
397pub mod lle;
398
399/// MDS (Multidimensional Scaling) module
400pub mod mds;
401
402/// Laplacian Eigenmaps module
403pub mod laplacian_eigenmaps;
404
405/// UMAP module
406pub mod umap;
407
408/// Diffusion Maps module
409pub mod diffusion_maps;
410
411/// HLLE (Hessian LLE) module
412pub mod hessian_lle;
413
414/// LTSA (Local Tangent Space Alignment) module
415pub mod ltsa;
416
417/// MVU (Maximum Variance Unfolding) module
418pub mod mvu;
419
420/// SNE (Stochastic Neighbor Embedding) module
421pub mod sne;
422
423/// SymmetricSNE (Symmetric Stochastic Neighbor Embedding) module
424pub mod symmetric_sne;
425
426/// ParametricTSNE (Parametric t-SNE) module
427pub mod parametric_tsne;
428
429/// HeavyTailedSymmetricSNE (Heavy-Tailed Symmetric SNE) module
430pub mod heavy_tailed_symmetric_sne;
431
432/// Spectral Embedding module
433pub mod spectral_embedding;
434
435/// Random Walk Embedding module
436pub mod random_walk_embedding;
437
438/// Node2Vec algorithm module
439pub mod node2vec;
440
441/// DeepWalk algorithm module
442pub mod deepwalk;
443
444/// Dictionary Learning module
445pub mod dictionary_learning;
446
447/// Mini-batch t-SNE module
448pub mod minibatch_tsne;
449
450/// Mini-batch UMAP module
451pub mod minibatch_umap;
452
453/// Distance methods and kernel functions module
454pub mod distance_kernels;
455
456/// Graph Neural Networks module
457pub mod graph_neural_networks;
458
459/// Random projection methods module
460pub mod random_projections;
461
462/// Similarity learning module
463pub mod similarity;
464
465/// Hierarchical manifold learning module
466pub mod hierarchical;
467
468/// Temporal manifold learning module
469pub mod temporal;
470
471/// Robust manifold learning module
472pub mod robust;
473
474/// Re-export t-SNE utilities for convenience
475pub use tsne::{TsneTrained, TSNE};
476
477/// Re-export Isomap utilities for convenience
478pub use isomap::{Isomap, IsomapTrained};
479
480/// Re-export LLE utilities for convenience
481pub use lle::{LleTrained, LocallyLinearEmbedding};
482
483/// Re-export MDS utilities for convenience
484pub use mds::{MdsTrained, MDS};
485
486/// Re-export Laplacian Eigenmaps utilities for convenience
487pub use laplacian_eigenmaps::{LaplacianEigenmaps, LaplacianTrained};
488
489/// Re-export UMAP utilities for convenience
490pub use umap::{UmapTrained, UMAP};
491
492/// Re-export Diffusion Maps utilities for convenience
493pub use diffusion_maps::{DiffusionMaps, DiffusionMapsTrained};
494
495/// Re-export DeepWalk types for convenience
496pub use deepwalk::{DeepWalk, DeepWalkTrained};
497/// Re-export DictionaryLearning types for convenience
498pub use dictionary_learning::{DLTrained, DictionaryLearning};
499/// Re-export HeavyTailedSymmetricSNE types for convenience
500pub use heavy_tailed_symmetric_sne::{HeavyTailedSymmetricSNE, HeavyTailedSymmetricSneTrained};
501/// Re-export HLLE utilities for convenience
502pub use hessian_lle::{HessianLLE, HessianLleTrained};
503/// Re-export LTSA types for convenience
504pub use ltsa::{LtsaTrained, LTSA};
505/// Re-export MiniBatchTSNE types for convenience
506pub use minibatch_tsne::{MBTSNETrained, MiniBatchTSNE};
507/// Re-export MiniBatchUMAP types for convenience
508pub use minibatch_umap::{MBUMAPTrained, MiniBatchUMAP};
509/// Re-export MVU types for convenience
510pub use mvu::{MvuTrained, MVU};
511/// Re-export Node2Vec types for convenience
512pub use node2vec::{Node2Vec, Node2VecTrained};
513/// Re-export ParametricTSNE types for convenience
514pub use parametric_tsne::{ParametricTSNE, ParametricTsneTrained};
515/// Re-export RandomWalkEmbedding types for convenience
516pub use random_walk_embedding::{RandomWalkEmbedding, RandomWalkEmbeddingTrained};
517/// Re-export SNE types for convenience
518pub use sne::{SneTrained, SNE};
519/// Re-export SpectralEmbedding types for convenience
520pub use spectral_embedding::{SpectralEmbedding, SpectralEmbeddingTrained};
521/// Re-export SymmetricSNE types for convenience
522pub use symmetric_sne::{SymmetricSNE, SymmetricSneTrained};
523
524/// Re-export distance methods and kernel functions for convenience
525pub use distance_kernels::*;
526
527/// Re-export Graph Neural Networks for convenience
528pub use graph_neural_networks::*;
529
530/// Re-export random projection methods for convenience
531pub use random_projections::*;
532
533/// Re-export similarity learning utilities for convenience
534pub use similarity::*;
535
536/// Re-export hierarchical manifold learning utilities for convenience
537pub use hierarchical::*;
538
539/// Re-export temporal manifold learning utilities for convenience
540pub use temporal::*;
541
542/// Re-export robust manifold learning utilities for convenience
543pub use robust::*;
544
545/// Multi-view learning module
546pub mod multi_view;
547
548/// Nyström approximation module
549pub mod nystrom;
550
551/// Compressed sensing module
552pub mod compressed_sensing;
553
554/// Parallel k-nearest neighbors module
555pub mod parallel_knn;
556
557/// Stochastic manifold learning module
558pub mod stochastic;
559
560/// Re-export multi-view learning utilities for convenience
561pub use multi_view::*;
562
563/// Re-export Nyström approximation utilities for convenience
564pub use nystrom::*;
565
566/// Re-export compressed sensing utilities for convenience
567pub use compressed_sensing::*;
568
569/// Re-export parallel KNN utilities for convenience
570pub use parallel_knn::*;
571
572/// Re-export stochastic manifold learning utilities for convenience
573pub use stochastic::*;
574
575/// Benchmark datasets module
576pub mod benchmark_datasets;
577
578/// Timing utilities module
579pub mod timing_utilities;
580
581/// Memory profiler module
582pub mod memory_profiler;
583
584/// SIMD-optimized distance computations
585pub mod simd_distance;
586
587/// Validation framework for hyperparameter tuning
588pub mod validation;
589
590/// Visualization integration utilities
591pub mod visualization;
592
593/// GPU-accelerated methods for manifold learning
594#[cfg(feature = "gpu")]
595pub mod gpu_acceleration;
596
597/// Type-safe manifold abstractions with phantom types
598pub mod type_safe_manifolds;
599
600/// Zero-cost abstractions for manifold learning
601pub mod zero_cost_abstractions;
602
603/// Comparison tests against reference implementations
604pub mod reference_tests;
605
606/// Numerically stable eigenvalue algorithms
607pub mod stable_eigenvalue;
608
609/// Robust optimization methods for manifold learning
610pub mod robust_optimization;
611
612/// Condition number monitoring for numerical stability
613pub mod condition_monitoring;
614
615/// Trait-based manifold learning framework
616pub mod manifold_traits;
617
618/// Fluent API for manifold learning configuration
619pub mod fluent_api;
620
621/// Extensible distance metrics registry
622pub mod extensible_metrics;
623
624/// Type-safe geometric operations with compile-time dimension checking
625pub mod type_safe_geometry;
626
627/// Re-export manifold traits and utilities for convenience
628pub use manifold_traits::*;
629
630/// Re-export fluent API for convenience
631pub use fluent_api::*;
632
633/// Re-export extensible metrics for convenience
634pub use extensible_metrics::*;
635
636/// Re-export type-safe geometry for convenience
637pub use type_safe_geometry::*;
638
639/// Re-export visualization utilities for convenience
640pub use visualization::*;
641
642/// Serialization support for manifold learning models
643#[cfg(feature = "serialization")]
644pub mod serialization;
645
646/// Serialization implementations for specific algorithms
647#[cfg(feature = "serialization")]
648pub mod serialization_impl;
649
650/// Re-export serialization utilities for convenience
651#[cfg(feature = "serialization")]
652pub use serialization::*;
653
654/// Plugin architecture for custom manifold learning methods
655pub mod plugin_architecture;
656
657/// Re-export plugin architecture utilities for convenience
658pub use plugin_architecture::*;
659
660/// Information-theoretic manifold learning methods
661pub mod information_theory;
662
663/// Re-export information theory utilities for convenience
664pub use information_theory::*;
665
666/// Optimal transport methods for manifold learning
667pub mod optimal_transport;
668
669/// Re-export optimal transport utilities for convenience
670pub use optimal_transport::*;
671
672/// Iterative refinement methods for improved numerical stability
673pub mod iterative_refinement;
674
675/// Re-export iterative refinement utilities for convenience
676pub use iterative_refinement::*;
677
678/// Pipeline middleware system for composable manifold learning
679pub mod pipeline_middleware;
680
681/// Re-export pipeline middleware utilities for convenience
682pub use pipeline_middleware::*;
683
684/// Embedding callbacks for monitoring and customizing manifold learning training
685pub mod embedding_callbacks;
686
687/// Re-export embedding callback utilities for convenience
688pub use embedding_callbacks::*;
689
690/// Category theory-based manifold representations and functorial embeddings
691pub mod category_theory;
692
693/// Re-export category theory utilities for convenience
694pub use category_theory::*;
695
696/// Advanced performance optimizations including cache-friendly data layouts and unsafe optimizations
697pub mod performance_optimization;
698
699/// Re-export performance optimization utilities for convenience
700pub use performance_optimization::*;
701
702/// Deep learning integration for manifold learning including autoencoders and variational autoencoders
703pub mod deep_learning;
704
705/// Re-export deep learning utilities for convenience
706pub use deep_learning::*;
707
708/// Computer vision applications for manifold learning including image patch embedding and face analysis
709pub mod computer_vision;
710
711/// Re-export computer vision utilities for convenience
712pub use computer_vision::*;
713
714/// Adversarial manifold learning module
715pub mod adversarial;
716
717/// Re-export adversarial manifold learning utilities for convenience
718pub use adversarial::*;
719
720/// Continuous normalizing flows module
721pub mod continuous_normalizing_flows;
722
723/// Re-export continuous normalizing flows utilities for convenience
724pub use continuous_normalizing_flows::*;
725
726/// Natural Language Processing manifold learning module
727pub mod nlp;
728
729/// Re-export NLP manifold learning utilities for convenience
730pub use nlp::*;
731
732/// Quantum methods for manifold learning module
733pub mod quantum;
734
735/// Re-export quantum methods for convenience
736pub use quantum::*;
737
738/// Causal inference on manifolds module
739pub mod causal;
740
741/// Re-export causal inference methods for convenience
742pub use causal::*;
743
744/// Bioinformatics applications for manifold learning including genomic analysis,
745/// protein structures, phylogenetics, single-cell trajectories, and metabolic pathways
746pub mod bioinformatics;
747
748/// Re-export bioinformatics utilities for convenience
749pub use bioinformatics::*;