sklears_manifold/lib.rs
1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5#![allow(clippy::all)]
6#![allow(clippy::pedantic)]
7#![allow(clippy::nursery)]
8#![allow(unused_imports)]
9#![allow(unused_variables)]
10#![allow(unused_assignments)]
11#![allow(unused_mut)]
12#![allow(unused_doc_comments)]
13#![allow(unused_comparisons)]
14#![allow(unused_must_use)]
15#![allow(mismatched_lifetime_syntaxes)]
16#![allow(ambiguous_glob_reexports)]
17#![allow(unexpected_cfgs)]
18//! Manifold learning algorithms (t-SNE, Isomap, etc.)
19//!
20//! This module is part of sklears, providing scikit-learn compatible
21//! machine learning algorithms in Rust.
22
23// #![warn(missing_docs)]
24
25use scirs2_core::ndarray::{Array1, Array2, ArrayView2, Axis};
26use scirs2_core::random::rngs::StdRng;
27use scirs2_core::random::thread_rng;
28use scirs2_core::random::Rng;
29use scirs2_core::random::SeedableRng;
30use scirs2_core::Distribution;
31use sklears_core::{
32 error::{Result as SklResult, SklearsError},
33 traits::{Estimator, Fit, Transform, Untrained},
34 types::Float,
35};
36
37// TSNE moved to tsne.rs module
38
39// Isomap moved to isomap.rs module
40
41// LocallyLinearEmbedding moved to lle.rs module
42
43// LaplacianEigenmaps moved to laplacian_eigenmaps.rs module
44
45// LaplacianEigenmaps implementations moved to laplacian_eigenmaps.rs module
46
47// LaplacianEigenmaps implementations and LaplacianTrained moved to laplacian_eigenmaps.rs module
48
49// UMAP moved to umap.rs module
50
51// UMAP implementations and UmapTrained moved to umap.rs module
52// DiffusionMaps implementation and DiffusionMapsTrained moved to diffusion_maps.rs module
53// HLLE implementation and HessianLleTrained moved to hessian_lle.rs module
54
55#[allow(non_snake_case)]
56#[cfg(test)]
57mod tests;
58
59pub mod quality_metrics;
60
61/// Re-export quality metrics for convenience
62pub use quality_metrics::*;
63
64// =====================================================================================
65// STRESS TESTING AND SCALABILITY MODULE
66// =====================================================================================
67
68pub mod stress_testing;
69pub use stress_testing::*;
70
71// =====================================================================================
72// GEODESIC DISTANCE COMPUTATION MODULE
73// =====================================================================================
74
75pub mod geodesic_distance;
76pub use geodesic_distance::*;
77
78// =====================================================================================
79// DIFFUSION DISTANCE MODULE
80// =====================================================================================
81
82pub mod diffusion_distance;
83pub use diffusion_distance::*;
84
85// =====================================================================================
86// RIEMANNIAN GEOMETRY MODULE
87// =====================================================================================
88
89pub mod riemannian;
90pub use riemannian::*;
91
92// =====================================================================================
93// TOPOLOGICAL DATA ANALYSIS MODULE
94// =====================================================================================
95
96pub mod topological;
97pub use topological::*;
98
99// =====================================================================================
100// RANDOM WALK EMBEDDINGS
101// =====================================================================================
102
103// =====================================================================================
104// NODE2VEC AND DEEPWALK ALGORITHMS
105// =====================================================================================
106
107// =====================================================================================
108
109/// Sparse Coding for manifold learning
110///
111/// Sparse coding learns a dictionary of basis vectors such that each data point
112/// can be represented as a sparse linear combination of these basis vectors.
113/// This is particularly useful for manifold learning when the data lies on
114/// a low-dimensional manifold that can be sparsely represented.
115///
116/// # Parameters
117///
118/// * `n_components` - Number of dictionary atoms
119/// * `alpha` - Sparsity regularization parameter
120/// * `max_iter` - Maximum number of iterations
121/// * `tol` - Tolerance for convergence
122/// * `random_state` - Random seed for reproducibility
123///
124/// # Examples
125///
126/// ```
127/// use sklears_manifold::SparseCoding;
128/// use sklears_core::traits::{Transform, Fit};
129/// use scirs2_core::ndarray::array;
130///
131/// let x = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]];
132///
133/// let sc = SparseCoding::new()
134/// .n_components(2)
135/// .alpha(0.1);
136///
137/// let fitted = sc.fit(&x.view(), &()).unwrap();
138/// let embedded = fitted.transform(&x.view()).unwrap();
139/// ```
140#[derive(Debug, Clone)]
141pub struct SparseCoding<S = Untrained> {
142 state: S,
143 n_components: usize,
144 alpha: f64,
145 max_iter: usize,
146 tol: f64,
147 random_state: Option<u64>,
148}
149
150impl Default for SparseCoding<Untrained> {
151 fn default() -> Self {
152 Self::new()
153 }
154}
155
156impl SparseCoding<Untrained> {
157 /// Create a new SparseCoding instance
158 pub fn new() -> Self {
159 Self {
160 state: Untrained,
161 n_components: 100,
162 alpha: 1.0,
163 max_iter: 1000,
164 tol: 1e-8,
165 random_state: None,
166 }
167 }
168
169 /// Set the number of dictionary atoms
170 pub fn n_components(mut self, n_components: usize) -> Self {
171 self.n_components = n_components;
172 self
173 }
174
175 /// Set the sparsity regularization parameter
176 pub fn alpha(mut self, alpha: f64) -> Self {
177 self.alpha = alpha;
178 self
179 }
180
181 /// Set the maximum number of iterations
182 pub fn max_iter(mut self, max_iter: usize) -> Self {
183 self.max_iter = max_iter;
184 self
185 }
186
187 /// Set the tolerance for convergence
188 pub fn tol(mut self, tol: f64) -> Self {
189 self.tol = tol;
190 self
191 }
192
193 /// Set the random state
194 pub fn random_state(mut self, random_state: u64) -> Self {
195 self.random_state = Some(random_state);
196 self
197 }
198
199 /// Soft thresholding function for sparse coding
200 fn soft_threshold(x: f64, lambda: f64) -> f64 {
201 if x > lambda {
202 x - lambda
203 } else if x < -lambda {
204 x + lambda
205 } else {
206 0.0
207 }
208 }
209}
210
211#[derive(Debug, Clone)]
212pub struct SCTrained {
213 dictionary: Array2<f64>,
214 mean: Array1<f64>,
215}
216
217impl Estimator for SparseCoding<Untrained> {
218 type Config = ();
219 type Error = SklearsError;
220 type Float = Float;
221
222 fn config(&self) -> &Self::Config {
223 &()
224 }
225}
226
227impl Fit<ArrayView2<'_, Float>, ()> for SparseCoding<Untrained> {
228 type Fitted = SparseCoding<SCTrained>;
229
230 fn fit(self, x: &ArrayView2<'_, Float>, _y: &()) -> SklResult<Self::Fitted> {
231 let (n_samples, n_features) = x.dim();
232
233 if self.n_components > n_features {
234 return Err(SklearsError::InvalidInput(
235 "n_components cannot be larger than n_features".to_string(),
236 ));
237 }
238
239 // Convert to f64 and center the data
240 let x_f64 = x.mapv(|v| v);
241 let mean = x_f64.mean_axis(Axis(0)).unwrap();
242 let x_centered = &x_f64 - &mean.view().broadcast(x_f64.dim()).unwrap();
243
244 let mut rng = if let Some(seed) = self.random_state {
245 StdRng::seed_from_u64(seed)
246 } else {
247 StdRng::seed_from_u64(thread_rng().random::<u64>())
248 };
249
250 // Initialize dictionary with random normalized vectors
251 let mut dictionary = Array2::<f64>::zeros((n_features, self.n_components));
252 for mut col in dictionary.columns_mut() {
253 for elem in col.iter_mut() {
254 *elem = rng.sample(scirs2_core::StandardNormal);
255 }
256 // Normalize the column
257 let norm = col.dot(&col).sqrt();
258 if norm > 0.0 {
259 col /= norm;
260 }
261 }
262
263 // Iterative dictionary learning using coordinate descent
264 for iter in 0..self.max_iter {
265 let mut max_change = 0.0f64;
266
267 // Update dictionary atoms one at a time
268 for k in 0..self.n_components {
269 // Compute residual without atom k
270 let mut residual = x_centered.clone();
271 for j in 0..self.n_components {
272 if j != k {
273 let atom_j = dictionary.column(j);
274 // Compute sparse codes for atom j
275 let mut codes_j = Array1::zeros(n_samples);
276 for i in 0..n_samples {
277 let dot_product = residual.row(i).dot(&atom_j);
278 codes_j[i] = Self::soft_threshold(dot_product, self.alpha);
279 }
280
281 // Subtract contribution of atom j
282 for i in 0..n_samples {
283 let mut row = residual.row_mut(i);
284 row.scaled_add(-codes_j[i], &atom_j);
285 }
286 }
287 }
288
289 // Update atom k
290 let mut new_atom = Array1::zeros(n_features);
291 let mut total_code = 0.0;
292
293 for i in 0..n_samples {
294 let code_k = Self::soft_threshold(
295 residual.row(i).dot(&dictionary.column(k)),
296 self.alpha,
297 );
298 if code_k.abs() > 1e-12 {
299 new_atom.scaled_add(code_k, &residual.row(i));
300 total_code += code_k * code_k;
301 }
302 }
303
304 if total_code > 1e-12 {
305 new_atom /= total_code;
306 // Normalize
307 let norm = new_atom.dot(&new_atom).sqrt();
308 if norm > 1e-12 {
309 new_atom /= norm;
310 }
311
312 // Check convergence
313 let change = (&new_atom - &dictionary.column(k)).mapv(|x| x.abs()).sum();
314 max_change = max_change.max(change);
315
316 // Update dictionary
317 dictionary.column_mut(k).assign(&new_atom);
318 }
319 }
320
321 // Check convergence
322 if max_change < self.tol {
323 break;
324 }
325 }
326
327 Ok(SparseCoding {
328 state: SCTrained { dictionary, mean },
329 n_components: self.n_components,
330 alpha: self.alpha,
331 max_iter: self.max_iter,
332 tol: self.tol,
333 random_state: self.random_state,
334 })
335 }
336}
337
338impl Transform<ArrayView2<'_, Float>, Array2<f64>> for SparseCoding<SCTrained> {
339 fn transform(&self, x: &ArrayView2<'_, Float>) -> SklResult<Array2<f64>> {
340 let (n_samples, _) = x.dim();
341 let x_f64 = x.mapv(|v| v);
342 let x_centered = &x_f64 - &self.state.mean.view().broadcast(x_f64.dim()).unwrap();
343
344 // Compute sparse codes using coordinate descent
345 let mut codes = Array2::zeros((n_samples, self.n_components));
346
347 for i in 0..n_samples {
348 let sample = x_centered.row(i);
349 let mut code = Array1::<f64>::zeros(self.n_components);
350
351 // Coordinate descent for sparse coding
352 for _ in 0..100 {
353 // Limited iterations for transform
354 let mut max_change = 0.0f64;
355
356 for k in 0..self.n_components {
357 // Compute residual without component k
358 let mut residual = sample.to_owned();
359 for j in 0..self.n_components {
360 if j != k {
361 let atom_j = self.state.dictionary.column(j);
362 residual.scaled_add(-code[j], &atom_j);
363 }
364 }
365
366 // Update component k
367 let atom_k = self.state.dictionary.column(k);
368 let new_code_k =
369 SparseCoding::soft_threshold(residual.dot(&atom_k), self.alpha);
370 let change = (new_code_k - code[k]).abs();
371 max_change = max_change.max(change);
372 code[k] = new_code_k;
373 }
374
375 if max_change < 1e-6 {
376 break;
377 }
378 }
379
380 codes.row_mut(i).assign(&code);
381 }
382
383 Ok(codes)
384 }
385}
386
387// MINI-BATCH EMBEDDING METHODS FOR SCALABILITY
388// =====================================================================================
389
390/// t-SNE (t-distributed Stochastic Neighbor Embedding) module
391pub mod tsne;
392
393/// Isomap (Isometric Mapping) module
394pub mod isomap;
395
396/// LLE (Locally Linear Embedding) module
397pub mod lle;
398
399/// MDS (Multidimensional Scaling) module
400pub mod mds;
401
402/// Laplacian Eigenmaps module
403pub mod laplacian_eigenmaps;
404
405/// UMAP module
406pub mod umap;
407
408/// Diffusion Maps module
409pub mod diffusion_maps;
410
411/// HLLE (Hessian LLE) module
412pub mod hessian_lle;
413
414/// LTSA (Local Tangent Space Alignment) module
415pub mod ltsa;
416
417/// MVU (Maximum Variance Unfolding) module
418pub mod mvu;
419
420/// SNE (Stochastic Neighbor Embedding) module
421pub mod sne;
422
423/// SymmetricSNE (Symmetric Stochastic Neighbor Embedding) module
424pub mod symmetric_sne;
425
426/// ParametricTSNE (Parametric t-SNE) module
427pub mod parametric_tsne;
428
429/// HeavyTailedSymmetricSNE (Heavy-Tailed Symmetric SNE) module
430pub mod heavy_tailed_symmetric_sne;
431
432/// Spectral Embedding module
433pub mod spectral_embedding;
434
435/// Random Walk Embedding module
436pub mod random_walk_embedding;
437
438/// Node2Vec algorithm module
439pub mod node2vec;
440
441/// DeepWalk algorithm module
442pub mod deepwalk;
443
444/// Dictionary Learning module
445pub mod dictionary_learning;
446
447/// Mini-batch t-SNE module
448pub mod minibatch_tsne;
449
450/// Mini-batch UMAP module
451pub mod minibatch_umap;
452
453/// Distance methods and kernel functions module
454pub mod distance_kernels;
455
456/// Graph Neural Networks module
457pub mod graph_neural_networks;
458
459/// Random projection methods module
460pub mod random_projections;
461
462/// Similarity learning module
463pub mod similarity;
464
465/// Hierarchical manifold learning module
466pub mod hierarchical;
467
468/// Temporal manifold learning module
469pub mod temporal;
470
471/// Robust manifold learning module
472pub mod robust;
473
474/// Re-export t-SNE utilities for convenience
475pub use tsne::{TsneTrained, TSNE};
476
477/// Re-export Isomap utilities for convenience
478pub use isomap::{Isomap, IsomapTrained};
479
480/// Re-export LLE utilities for convenience
481pub use lle::{LleTrained, LocallyLinearEmbedding};
482
483/// Re-export MDS utilities for convenience
484pub use mds::{MdsTrained, MDS};
485
486/// Re-export Laplacian Eigenmaps utilities for convenience
487pub use laplacian_eigenmaps::{LaplacianEigenmaps, LaplacianTrained};
488
489/// Re-export UMAP utilities for convenience
490pub use umap::{UmapTrained, UMAP};
491
492/// Re-export Diffusion Maps utilities for convenience
493pub use diffusion_maps::{DiffusionMaps, DiffusionMapsTrained};
494
495/// Re-export DeepWalk types for convenience
496pub use deepwalk::{DeepWalk, DeepWalkTrained};
497/// Re-export DictionaryLearning types for convenience
498pub use dictionary_learning::{DLTrained, DictionaryLearning};
499/// Re-export HeavyTailedSymmetricSNE types for convenience
500pub use heavy_tailed_symmetric_sne::{HeavyTailedSymmetricSNE, HeavyTailedSymmetricSneTrained};
501/// Re-export HLLE utilities for convenience
502pub use hessian_lle::{HessianLLE, HessianLleTrained};
503/// Re-export LTSA types for convenience
504pub use ltsa::{LtsaTrained, LTSA};
505/// Re-export MiniBatchTSNE types for convenience
506pub use minibatch_tsne::{MBTSNETrained, MiniBatchTSNE};
507/// Re-export MiniBatchUMAP types for convenience
508pub use minibatch_umap::{MBUMAPTrained, MiniBatchUMAP};
509/// Re-export MVU types for convenience
510pub use mvu::{MvuTrained, MVU};
511/// Re-export Node2Vec types for convenience
512pub use node2vec::{Node2Vec, Node2VecTrained};
513/// Re-export ParametricTSNE types for convenience
514pub use parametric_tsne::{ParametricTSNE, ParametricTsneTrained};
515/// Re-export RandomWalkEmbedding types for convenience
516pub use random_walk_embedding::{RandomWalkEmbedding, RandomWalkEmbeddingTrained};
517/// Re-export SNE types for convenience
518pub use sne::{SneTrained, SNE};
519/// Re-export SpectralEmbedding types for convenience
520pub use spectral_embedding::{SpectralEmbedding, SpectralEmbeddingTrained};
521/// Re-export SymmetricSNE types for convenience
522pub use symmetric_sne::{SymmetricSNE, SymmetricSneTrained};
523
524/// Re-export distance methods and kernel functions for convenience
525pub use distance_kernels::*;
526
527/// Re-export Graph Neural Networks for convenience
528pub use graph_neural_networks::*;
529
530/// Re-export random projection methods for convenience
531pub use random_projections::*;
532
533/// Re-export similarity learning utilities for convenience
534pub use similarity::*;
535
536/// Re-export hierarchical manifold learning utilities for convenience
537pub use hierarchical::*;
538
539/// Re-export temporal manifold learning utilities for convenience
540pub use temporal::*;
541
542/// Re-export robust manifold learning utilities for convenience
543pub use robust::*;
544
545/// Multi-view learning module
546pub mod multi_view;
547
548/// Nyström approximation module
549pub mod nystrom;
550
551/// Compressed sensing module
552pub mod compressed_sensing;
553
554/// Parallel k-nearest neighbors module
555pub mod parallel_knn;
556
557/// Stochastic manifold learning module
558pub mod stochastic;
559
560/// Re-export multi-view learning utilities for convenience
561pub use multi_view::*;
562
563/// Re-export Nyström approximation utilities for convenience
564pub use nystrom::*;
565
566/// Re-export compressed sensing utilities for convenience
567pub use compressed_sensing::*;
568
569/// Re-export parallel KNN utilities for convenience
570pub use parallel_knn::*;
571
572/// Re-export stochastic manifold learning utilities for convenience
573pub use stochastic::*;
574
575/// Benchmark datasets module
576pub mod benchmark_datasets;
577
578/// Timing utilities module
579pub mod timing_utilities;
580
581/// Memory profiler module
582pub mod memory_profiler;
583
584/// SIMD-optimized distance computations
585pub mod simd_distance;
586
587/// Validation framework for hyperparameter tuning
588pub mod validation;
589
590/// Visualization integration utilities
591pub mod visualization;
592
593/// GPU-accelerated methods for manifold learning
594#[cfg(feature = "gpu")]
595pub mod gpu_acceleration;
596
597/// Type-safe manifold abstractions with phantom types
598pub mod type_safe_manifolds;
599
600/// Zero-cost abstractions for manifold learning
601pub mod zero_cost_abstractions;
602
603/// Comparison tests against reference implementations
604pub mod reference_tests;
605
606/// Numerically stable eigenvalue algorithms
607pub mod stable_eigenvalue;
608
609/// Robust optimization methods for manifold learning
610pub mod robust_optimization;
611
612/// Condition number monitoring for numerical stability
613pub mod condition_monitoring;
614
615/// Trait-based manifold learning framework
616pub mod manifold_traits;
617
618/// Fluent API for manifold learning configuration
619pub mod fluent_api;
620
621/// Extensible distance metrics registry
622pub mod extensible_metrics;
623
624/// Type-safe geometric operations with compile-time dimension checking
625pub mod type_safe_geometry;
626
627/// Re-export manifold traits and utilities for convenience
628pub use manifold_traits::*;
629
630/// Re-export fluent API for convenience
631pub use fluent_api::*;
632
633/// Re-export extensible metrics for convenience
634pub use extensible_metrics::*;
635
636/// Re-export type-safe geometry for convenience
637pub use type_safe_geometry::*;
638
639/// Re-export visualization utilities for convenience
640pub use visualization::*;
641
642/// Serialization support for manifold learning models
643#[cfg(feature = "serialization")]
644pub mod serialization;
645
646/// Serialization implementations for specific algorithms
647#[cfg(feature = "serialization")]
648pub mod serialization_impl;
649
650/// Re-export serialization utilities for convenience
651#[cfg(feature = "serialization")]
652pub use serialization::*;
653
654/// Plugin architecture for custom manifold learning methods
655pub mod plugin_architecture;
656
657/// Re-export plugin architecture utilities for convenience
658pub use plugin_architecture::*;
659
660/// Information-theoretic manifold learning methods
661pub mod information_theory;
662
663/// Re-export information theory utilities for convenience
664pub use information_theory::*;
665
666/// Optimal transport methods for manifold learning
667pub mod optimal_transport;
668
669/// Re-export optimal transport utilities for convenience
670pub use optimal_transport::*;
671
672/// Iterative refinement methods for improved numerical stability
673pub mod iterative_refinement;
674
675/// Re-export iterative refinement utilities for convenience
676pub use iterative_refinement::*;
677
678/// Pipeline middleware system for composable manifold learning
679pub mod pipeline_middleware;
680
681/// Re-export pipeline middleware utilities for convenience
682pub use pipeline_middleware::*;
683
684/// Embedding callbacks for monitoring and customizing manifold learning training
685pub mod embedding_callbacks;
686
687/// Re-export embedding callback utilities for convenience
688pub use embedding_callbacks::*;
689
690/// Category theory-based manifold representations and functorial embeddings
691pub mod category_theory;
692
693/// Re-export category theory utilities for convenience
694pub use category_theory::*;
695
696/// Advanced performance optimizations including cache-friendly data layouts and unsafe optimizations
697pub mod performance_optimization;
698
699/// Re-export performance optimization utilities for convenience
700pub use performance_optimization::*;
701
702/// Deep learning integration for manifold learning including autoencoders and variational autoencoders
703pub mod deep_learning;
704
705/// Re-export deep learning utilities for convenience
706pub use deep_learning::*;
707
708/// Computer vision applications for manifold learning including image patch embedding and face analysis
709pub mod computer_vision;
710
711/// Re-export computer vision utilities for convenience
712pub use computer_vision::*;
713
714/// Adversarial manifold learning module
715pub mod adversarial;
716
717/// Re-export adversarial manifold learning utilities for convenience
718pub use adversarial::*;
719
720/// Continuous normalizing flows module
721pub mod continuous_normalizing_flows;
722
723/// Re-export continuous normalizing flows utilities for convenience
724pub use continuous_normalizing_flows::*;
725
726/// Natural Language Processing manifold learning module
727pub mod nlp;
728
729/// Re-export NLP manifold learning utilities for convenience
730pub use nlp::*;
731
732/// Quantum methods for manifold learning module
733pub mod quantum;
734
735/// Re-export quantum methods for convenience
736pub use quantum::*;
737
738/// Causal inference on manifolds module
739pub mod causal;
740
741/// Re-export causal inference methods for convenience
742pub use causal::*;
743
744/// Bioinformatics applications for manifold learning including genomic analysis,
745/// protein structures, phylogenetics, single-cell trajectories, and metabolic pathways
746pub mod bioinformatics;
747
748/// Re-export bioinformatics utilities for convenience
749pub use bioinformatics::*;