Skip to main content

manifolds_rs/
lib.rs

1//! Dimensionality reduction algorithms including UMAP, t-SNE, PHATE, Diffusion
2//! Maps and PacMAP.
3//!
4//! Provides both standard and approximate nearest-neighbour-based graph
5//! construction, multiple optimisers, and (optionally) parametric UMAP via a
6//! neural network encoder.
7//!
8//! Additionally, optional GPU-accelerated versions (in terms of kNN search)
9//! can be used when the right feature flags are active.
10
11#![allow(clippy::needless_range_loop)] // I like loops ... !
12#![warn(missing_docs)]
13
14pub mod data;
15pub mod errors;
16pub mod prelude;
17pub mod training;
18pub mod utils;
19
20#[cfg(feature = "parametric")]
21pub mod parametric;
22
23use ann_search_rs::cpu::hnsw::{HnswIndex, HnswState};
24use ann_search_rs::cpu::nndescent::{ApplySortedUpdates, NNDescent, NNDescentQuery};
25use ann_search_rs::utils::dist::parse_ann_dist;
26use faer::MatRef;
27use rand_distr::{Distribution, StandardNormal};
28use std::{default::Default, time::Instant};
29use thousands::*;
30
31#[cfg(feature = "parametric")]
32use burn::tensor::{backend::AutodiffBackend, Element};
33#[cfg(feature = "parametric")]
34use num_traits::ToPrimitive;
35
36#[cfg(feature = "gpu")]
37use ann_search_rs::gpu::nndescent_gpu::NNDescentGpu;
38#[cfg(feature = "gpu")]
39use ann_search_rs::gpu::traits_gpu::AnnSearchGpuFloat;
40#[cfg(feature = "gpu")]
41use cubecl::prelude::*;
42
43use crate::data::graph::*;
44use crate::data::init::*;
45use crate::data::nearest_neighbours::*;
46use crate::data::pacmap_pairs::*;
47use crate::data::structures::*;
48use crate::prelude::*;
49use crate::training::mds_optimiser::*;
50use crate::training::pacmap_optimiser::{
51    optimise_pacmap, optimise_pacmap_parallel, PacMapOptimiser,
52};
53use crate::training::tsne_optimiser::*;
54use crate::training::umap_optimisers::*;
55use crate::utils::diffusions::*;
56use crate::utils::math::compute_largest_eigenpairs_lanczos;
57use crate::utils::potentials::compute_potential_distances;
58use crate::utils::sparse_ops::matrix_power;
59
60#[cfg(feature = "parametric")]
61use crate::parametric::model::*;
62#[cfg(feature = "parametric")]
63use crate::parametric::parametric_train::*;
64#[cfg(feature = "fft_tsne")]
65use crate::utils::fft::FftwFloat;
66
67///////////
68// Types //
69///////////
70
71/// Type for the pre-computed kNN
72///
73/// ### Fields
74///
75/// * `0` - Should be the indices of the nearest neighbours excluding self
76/// * `1` - Should be the distances to the nearest neighbours excluding self
77pub type PreComputedKnn<T> = Option<(Vec<Vec<usize>>, Vec<Vec<T>>)>;
78
79/// Result of the tSNE graph
80///
81/// ### Fields
82///
83/// * `0` - The coordinate list
84/// * `1` - The nearest neighbours
85/// * `2` - The distances to the nearest neighbours
86///
87/// ### Errors
88///
89/// Potential errors from the generation of the graph
90pub type TsneGraph<T> = Result<(CoordinateList<T>, Vec<Vec<usize>>, Vec<Vec<T>>), ManifoldsError>;
91
92//////////
93// Umap //
94//////////
95
96/// Main Config structure with all of the possible sub configurations
97#[derive(Debug, Clone)]
98pub struct UmapParams<T> {
99    /// How many dimensions to return
100    pub n_dim: usize,
101    /// Number of neighbours
102    pub k: usize,
103    /// Which optimiser to use. Defaults to `"adam_parallel"`.
104    pub optimiser: String,
105    /// (Approximate) Nearest neighbour method. One of `"exhaustive"`, `"ivf"`,
106    /// `"hnsw"`, `"nndescent"`, `"annoy"`, `"kmknn"` or `"balltree"`.
107    pub ann_type: String,
108    /// Which embedding initialisation to use. Defaults to spectral clustering.
109    pub initialisation: String,
110    /// Optional initialisation range to use
111    pub init_range: Option<T>,
112    /// Nearest neighbour parameters.
113    pub nn_params: NearestNeighbourParams<T>,
114    /// Parameters for the UMAP graph generation.
115    pub umap_graph_params: UmapGraphParams<T>,
116    /// Parameters to use for the UMAP optimiser.
117    pub optim_params: UmapOptimParams<T>,
118    /// Shall randomised SVC be used for PCA-based embedding
119    pub randomised: bool,
120}
121
122impl<T> Default for UmapParams<T>
123where
124    T: ManifoldsFloat,
125{
126    fn default() -> Self {
127        Self {
128            n_dim: 2,
129            k: 15,
130            optimiser: "adam_parallel".to_string(),
131            ann_type: "kmknn".to_string(),
132            initialisation: "spectral".to_string(),
133            init_range: None,
134            nn_params: NearestNeighbourParams::default(),
135            optim_params: UmapOptimParams::default(),
136            umap_graph_params: UmapGraphParams::default(),
137            randomised: false,
138        }
139    }
140}
141
142impl<T> UmapParams<T>
143where
144    T: ManifoldsFloat,
145{
146    /// Generate new UMAP parameters with full control over every field.
147    ///
148    /// This constructor exposes every field directly. For sensible defaults,
149    /// use [`UmapParams::default`] or [`UmapParams::new_default_2d`] instead.
150    ///
151    /// ### Params
152    ///
153    /// * `n_dim` - How many dimensions to return.
154    /// * `k` - How many neighbours to consider.
155    /// * `optimiser` - Which optimiser to use, e.g. `"adam_parallel"`.
156    /// * `ann_type` - (Approximate) nearest neighbour method. One of
157    ///   `"exhaustive"`, `"ivf"`, `"hnsw"`, `"nndescent"`, `"annoy"`,
158    ///   `"kmknn"` or `"balltree"`.
159    /// * `initialisation` - Which embedding initialisation to use, e.g.
160    ///   `"spectral"`.
161    /// * `init_range` - Optional initialisation range.
162    /// * `nn_params` - Nearest neighbour parameters.
163    /// * `optim_params` - Parameters for the UMAP optimiser.
164    /// * `umap_graph_params` - Parameters for the UMAP graph generation.
165    /// * `randomised` - Whether randomised SVD is used for PCA-based embedding.
166    ///
167    /// ### Returns
168    ///
169    /// A fully specified set of UMAP parameters.
170    #[allow(clippy::too_many_arguments)]
171    pub fn new(
172        n_dim: usize,
173        k: usize,
174        optimiser: String,
175        ann_type: String,
176        initialisation: String,
177        init_range: Option<T>,
178        nn_params: NearestNeighbourParams<T>,
179        optim_params: UmapOptimParams<T>,
180        umap_graph_params: UmapGraphParams<T>,
181        randomised: bool,
182    ) -> Self {
183        Self {
184            n_dim,
185            k,
186            optimiser,
187            ann_type,
188            initialisation,
189            init_range,
190            nn_params,
191            optim_params,
192            umap_graph_params,
193            randomised,
194        }
195    }
196
197    /// Default parameters for standard 2D visualisation.
198    ///
199    /// Returns the default parameters but lets you tune `min_dist` and
200    /// `spread`, which feed into the optimiser parameters and control how
201    /// tightly points are packed in the embedding.
202    ///
203    /// ### Params
204    ///
205    /// * `min_dist` - Minimum distance between data points. Defaults to `0.5`.
206    /// * `spread` - Spread parameter. Defaults to `1.0`.
207    ///
208    /// ### Returns
209    ///
210    /// Hopefully sensible standard parameters for 2D visualisation.
211    pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>) -> Self {
212        let min_dist = min_dist.unwrap_or(T::from_f64(0.5).unwrap());
213        let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
214
215        Self {
216            optim_params: UmapOptimParams::from_min_dist_spread(
217                min_dist, spread, None, None, None, None, None, None, None,
218            ),
219            ..Self::default()
220        }
221    }
222}
223
224/// Helper function to generate the UMAP graph
225///
226/// ### Params
227///
228/// * `data` - Input data matrix (samples × features)
229/// * `precomputed_knn` - Precomputed k-nearest neighbours and distances. Needs
230///   to be a tuple of `(Vec<Vec<usize>>, Vec<Vec<T>>)` with indices and
231///   distances excluding self.
232/// * `k` - Number of nearest neighbours (typically 15-50).
233/// * `ann_type` - (Approximate) Nearest neighbour method: `"exhaustive"`,
234///   `"kmknn"`, `"balltree"`, `"annoy"`, `"hnsw"`, or `"nndescent"`. If you
235///   provide a weird string, the function will default to `"kmknn"`
236/// * `umap_params` - UMAP-specific parameters (bandwidth, local_connectivity,
237///   mix_weight)
238/// * `nn_params` - Nearest neighbour parameters for nearest neighbour search.
239/// * `seed` - Random seed
240/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
241///   verbosity.
242///
243/// ### Returns
244///
245/// Tuple of (graph, knn_indices, knn_dist) for use in optimisation
246#[allow(clippy::too_many_arguments)]
247pub fn construct_umap_graph<T>(
248    data: MatRef<T>,
249    precomputed_knn: PreComputedKnn<T>,
250    k: usize,
251    ann_type: String,
252    umap_params: &UmapGraphParams<T>,
253    nn_params: &NearestNeighbourParams<T>,
254    n_epochs: usize,
255    seed: usize,
256    verbose: usize,
257) -> UmapGraphResults<T>
258where
259    T: ManifoldsFloat,
260    HnswIndex<T>: HnswState<T>,
261    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
262{
263    let verbosity = parse_verbosity_level(verbose);
264
265    let (knn_indices, knn_dist) = match precomputed_knn {
266        Some((indices, distances)) => {
267            if verbosity.normal_verbosity() {
268                println!("Using precomputed kNN graph...");
269            }
270            (indices, distances)
271        }
272        None => {
273            if verbosity.normal_verbosity() {
274                println!(
275                    "Running (approximate) nearest neighbour search using {}...",
276                    ann_type
277                );
278            }
279            let start_knn = Instant::now();
280            let result = run_ann_search(data, k, ann_type, nn_params, seed, verbose)?;
281            if verbosity.normal_verbosity() {
282                println!("kNN search done in: {:.2?}.", start_knn.elapsed());
283            }
284            result
285        }
286    };
287
288    if verbosity.normal_verbosity() {
289        println!("Constructing fuzzy simplicial set...");
290    }
291
292    let start_graph = Instant::now();
293
294    let (sigma, rho) = smooth_knn_dist(
295        &knn_dist,
296        knn_dist[0].len(),
297        umap_params.local_connectivity,
298        umap_params.bandwidth,
299        64,
300    );
301
302    let graph = knn_to_coo(&knn_indices, &knn_dist, &sigma, &rho);
303    let graph = symmetrise_graph(graph, umap_params.mix_weight);
304    let graph = filter_weak_edges(graph, n_epochs, verbose);
305
306    if verbosity.normal_verbosity() {
307        println!(
308            "Finalised graph generation in {:.2?}.",
309            start_graph.elapsed()
310        );
311    }
312
313    Ok((graph, knn_indices, knn_dist))
314}
315
316/// Run UMAP dimensionality reduction
317///
318/// Uniform Manifold Approximation and Projection (UMAP) is a manifold learning
319/// technique for dimensionality reduction. This implementation follows the
320/// standard UMAP algorithm:
321///
322/// 1. Find k-nearest neighbours using approximate nearest neighbour search
323/// 2. Construct fuzzy simplicial set via smooth kNN distances
324/// 3. Symmetrise the graph using fuzzy set union
325/// 4. Initialise embedding via spectral decomposition
326/// 5. Optimise embedding using stochastic gradient descent
327///
328/// ### Params
329///
330/// * `data` - Input data matrix (samples × features)
331/// * `precomputed_knn` - Precomputed k-nearest neighbours and distances. Needs
332///   to be a tuple of `(Vec<Vec<usize>>, Vec<Vec<T>>)` with indices and
333///   distances excluding self.
334/// * `umap_params` - The UMAP parameters.
335/// * `seed` - Seed for reproducibility.
336/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
337///   verbosity.
338///
339/// ### Returns
340///
341/// Embedding coordinates as `Vec<Vec<T>>` where outer vector has length
342/// `n_dim` and inner vectors have length `n_samples`. Each outer element
343/// represents one embedding dimension.
344pub fn umap<T>(
345    data: MatRef<T>,
346    precomputed_knn: PreComputedKnn<T>,
347    umap_params: &UmapParams<T>,
348    seed: usize,
349    verbose: usize,
350) -> Result<Vec<Vec<T>>, ManifoldsError>
351where
352    T: ManifoldsFloat,
353    HnswIndex<T>: HnswState<T>,
354    StandardNormal: Distribution<T>,
355    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
356{
357    let verbosity = parse_verbosity_level(verbose);
358
359    // parse various parameters
360    let init_type = parse_initilisation(
361        &umap_params.initialisation,
362        umap_params.randomised,
363        umap_params.init_range,
364    )
365    .unwrap_or_else(|| {
366        println!(
367            "Unknown initialisation provided: {:?}. Defaulting to PCA.",
368            umap_params.initialisation,
369        );
370        EmbdInit::PcaInit {
371            range: None,
372            randomised: true,
373        }
374    });
375    let optimiser = parse_umap_optimiser(&umap_params.optimiser).unwrap_or_default();
376
377    if verbosity.normal_verbosity() {
378        println!(
379            "Running umap with alpha: {:.2?} and beta: {:.2?}",
380            umap_params.optim_params.a, umap_params.optim_params.b
381        );
382    }
383
384    let (graph, _, _) = construct_umap_graph(
385        data,
386        precomputed_knn,
387        umap_params.k,
388        umap_params.ann_type.clone(),
389        &umap_params.umap_graph_params,
390        &umap_params.nn_params,
391        umap_params.optim_params.n_epochs,
392        seed,
393        verbose,
394    )?;
395
396    if verbosity.normal_verbosity() {
397        println!(
398            "Initialising embedding via {} layout...",
399            umap_params.initialisation
400        );
401    }
402
403    let start_layout = Instant::now();
404
405    let mut embd = initialise_embedding(&init_type, umap_params.n_dim, seed as u64, &graph, data)?;
406
407    let graph_adj = coo_to_adjacency_list(&graph);
408
409    if verbosity.normal_verbosity() {
410        println!(
411            "Optimising embedding via {} ({} epochs) on {} edges...",
412            match optimiser {
413                UmapOptimiser::Adam => "Adam",
414                UmapOptimiser::Sgd => "SGD",
415                UmapOptimiser::AdamParallel => "Adam (multi-threaded)",
416            },
417            umap_params.optim_params.n_epochs,
418            graph.col_indices.len().separate_with_underscores()
419        );
420    }
421
422    match optimiser {
423        UmapOptimiser::Adam => optimise_embedding_adam(
424            &mut embd,
425            &graph_adj,
426            &umap_params.optim_params,
427            seed as u64,
428            verbose,
429        )?,
430        UmapOptimiser::Sgd => {
431            optimise_embedding_sgd(
432                &mut embd,
433                &graph_adj,
434                &umap_params.optim_params,
435                seed as u64,
436                verbose,
437            )?;
438        }
439        UmapOptimiser::AdamParallel => {
440            optimise_embedding_adam_parallel(
441                &mut embd,
442                &graph_adj,
443                &umap_params.optim_params,
444                seed as u64,
445                verbose,
446            )?;
447        }
448    }
449
450    let end_layout = start_layout.elapsed();
451
452    if verbosity.normal_verbosity() {
453        println!(
454            "Initialised and optimised embedding in: {:.2?}.",
455            end_layout
456        );
457        println!("UMAP complete!");
458    }
459
460    // transpose: from [n_samples][n_dim] to [n_dim][n_samples]
461    let n_samples = embd.len();
462    let mut transposed = vec![vec![T::zero(); n_samples]; umap_params.n_dim];
463
464    for sample_idx in 0..n_samples {
465        for dim_idx in 0..umap_params.n_dim {
466            transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
467        }
468    }
469
470    Ok(transposed)
471}
472
473//////////
474// tSNE //
475//////////
476
477/// Main configuration for t-SNE dimensionality reduction
478#[derive(Debug, Clone)]
479pub struct TsneParams<T> {
480    ///  Number of output dimensions (typically 2)
481    pub n_dim: usize,
482    /// Perplexity parameter controlling neighbourhood size (typical: 5-50)
483    pub perplexity: T,
484    /// (Approximate) Nearest neighbour method. One of `"exhaustive"`, `"ivf"`,
485    /// `"hnsw"`, `"nndescent"`, `"annoy"`, `"kmknn"` or `"balltree"`.
486    pub ann_type: String,
487    /// Embedding initialisation method: `"pca"`, `"random"`, or `"spectral"`
488    pub initialisation: String,
489    /// Optional initialisation range
490    pub init_range: Option<T>,
491    /// Nearest neighbour parameters
492    pub nn_params: NearestNeighbourParams<T>,
493    /// tSNE optimisation parameters
494    pub optim_params: TsneOptimParams<T>,
495    /// Use randomised SVD for PCA initialisation
496    pub randomised_init: bool,
497}
498
499impl<T> Default for TsneParams<T>
500where
501    T: ManifoldsFloat,
502{
503    fn default() -> Self {
504        Self {
505            n_dim: 2,
506            perplexity: T::from_f64(30.0).unwrap(),
507            ann_type: "kmknn".to_string(),
508            initialisation: "pca".to_string(),
509            init_range: None,
510            nn_params: NearestNeighbourParams::default(),
511            optim_params: TsneOptimParams {
512                n_epochs: 1000,
513                lr: None,
514                early_exag_iter: 250,
515                early_exag_factor: T::from_f64(12.0).unwrap(),
516                late_exag_factor: None,
517                theta: T::from_f64(0.5).unwrap(),
518                n_interp_points: 3,
519            },
520            randomised_init: true,
521        }
522    }
523}
524
525impl<T> TsneParams<T>
526where
527    T: ManifoldsFloat,
528{
529    /// Create new t-SNE parameters with full control over every field.
530    ///
531    /// This constructor exposes every field directly, including the
532    /// optimiser parameters. For sensible defaults, use
533    /// [`TsneParams::default`] or [`TsneParams::new_default_2d`] instead.
534    ///
535    /// ### Params
536    ///
537    /// * `n_dim` - Number of output dimensions.
538    /// * `perplexity` - Perplexity parameter controlling neighbourhood size
539    ///   (typical: 5-50).
540    /// * `ann_type` - (Approximate) nearest neighbour method. One of
541    ///   `"exhaustive"`, `"ivf"`, `"hnsw"`, `"nndescent"`, `"annoy"`,
542    ///   `"kmknn"` or `"balltree"`.
543    /// * `initialisation` - Embedding initialisation method: `"pca"`,
544    ///   `"random"`, or `"spectral"`.
545    /// * `init_range` - Optional initialisation range to fix the initial
546    ///   embedding between certain values.
547    /// * `nn_params` - Nearest neighbour parameters.
548    /// * `optim_params` - t-SNE optimisation parameters. These cover the
549    ///   learning rate, number of epochs, early/late exaggeration, the
550    ///   Barnes-Hut `theta`, and the number of FFT interpolation points.
551    /// * `randomised_init` - Whether randomised SVD is used for PCA
552    ///   initialisation.
553    ///
554    /// ### Returns
555    ///
556    /// A fully specified set of t-SNE parameters.
557    #[allow(clippy::too_many_arguments)]
558    pub fn new(
559        n_dim: usize,
560        perplexity: T,
561        ann_type: String,
562        initialisation: String,
563        init_range: Option<T>,
564        nn_params: NearestNeighbourParams<T>,
565        optim_params: TsneOptimParams<T>,
566        randomised_init: bool,
567    ) -> Self {
568        Self {
569            n_dim,
570            perplexity,
571            ann_type,
572            initialisation,
573            init_range,
574            nn_params,
575            optim_params,
576            randomised_init,
577        }
578    }
579
580    /// Default parameters for standard 2D visualisation.
581    ///
582    /// Returns the default parameters but lets you tune `perplexity`, the
583    /// characteristic t-SNE knob controlling neighbourhood size.
584    ///
585    /// ### Params
586    ///
587    /// * `perplexity` - Perplexity parameter (typical: 5-50). Defaults to
588    ///   `30.0`.
589    ///
590    /// ### Returns
591    ///
592    /// Hopefully sensible standard parameters for 2D visualisation.
593    pub fn new_default_2d(perplexity: Option<T>) -> Self {
594        let perplexity = perplexity.unwrap_or_else(|| T::from_f64(30.0).unwrap());
595
596        Self {
597            perplexity,
598            ..Self::default()
599        }
600    }
601}
602
603/// Construct affinity graph for t-SNE from high-dimensional data
604///
605/// Performs the following steps:
606///
607/// 1. Runs k-nearest neighbour search where k = 3 × perplexity
608/// 2. Computes Gaussian affinities P(j|i) via binary search for target entropy
609/// 3. Symmetrises to joint probabilities: P_ij = (P(j|i) + P(i|j)) / 2N
610///
611/// ### Params
612///
613/// * `data` - Input data matrix (samples × features)
614/// * `precomputed_knn` - Precomputed k-nearest neighbours and distances. Needs
615///   to be a tuple of `(Vec<Vec<usize>>, Vec<Vec<T>>)` with indices and
616///   distances excluding self.
617/// * `perplexity` - Target perplexity (effective number of neighbours,
618///   typical: 5-50)
619/// * `ann_type` - (Approximate) Nearest neighbour method: `"exhaustive"`,
620///   `"kmknn"`, `"balltree"`, `"annoy"`, `"hnsw"`, or `"nndescent"`. If you
621///   provide a weird string, the function will default to `"kmknn"`
622/// * `nn_params` - Nearest neighbour search parameters
623/// * `seed` - Random seed for reproducibility
624/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
625///   verbosity.
626///
627/// ### Returns
628///
629/// Tuple of:
630///
631/// - `CoordinateList<T>` containing symmetric joint probabilities P_ij
632/// - `Vec<Vec<usize>>` k-nearest neighbour indices for each point
633/// - `Vec<Vec<T>>` k-nearest neighbour distances for each point
634///
635/// # Notes
636///
637/// The k value is automatically set to `3 × perplexity`, clamped between 5 and
638/// n-1. This is standard practice in t-SNE implementations.
639pub fn construct_tsne_graph<T>(
640    data: MatRef<T>,
641    precomputed_knn: PreComputedKnn<T>,
642    perplexity: T,
643    ann_type: String,
644    nn_params: &NearestNeighbourParams<T>,
645    seed: usize,
646    verbose: usize,
647) -> TsneGraph<T>
648where
649    T: ManifoldsFloat,
650    HnswIndex<T>: HnswState<T>,
651    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
652{
653    let verbosity = parse_verbosity_level(verbose);
654
655    let (knn_indices, knn_dist) = match precomputed_knn {
656        Some((indices, distances)) => {
657            if verbosity.normal_verbosity() {
658                println!("Using precomputed kNN graph...");
659            }
660            (indices, distances)
661        }
662        None => {
663            let k_float = perplexity * T::from_f64(3.0).unwrap();
664            let k = k_float.to_usize().unwrap().max(5).min(data.nrows() - 1);
665
666            if verbosity.normal_verbosity() {
667                println!("Running kNN search (k={}) using {}...", k, ann_type);
668            }
669
670            let start_knn = Instant::now();
671            let result = run_ann_search(data, k, ann_type, nn_params, seed, verbose)?;
672
673            if verbosity.normal_verbosity() {
674                println!("kNN search done in: {:.2?}.", start_knn.elapsed());
675            }
676
677            result
678        }
679    };
680
681    if verbosity.normal_verbosity() {
682        println!("Computing Gaussian affinities and symmetrising...");
683    }
684
685    let start_graph = Instant::now();
686
687    let directed_graph = gaussian_knn_affinities(
688        &knn_indices,
689        &knn_dist,
690        perplexity,
691        T::from_f64(1e-5).unwrap(),
692        200,
693        nn_params.dist_metric == "euclidean",
694    )?;
695
696    let graph = symmetrise_affinities_tsne(directed_graph);
697
698    if verbosity.normal_verbosity() {
699        println!(
700            "Finalised graph generation in {:.2?}.",
701            start_graph.elapsed()
702        );
703    }
704
705    Ok((graph, knn_indices, knn_dist))
706}
707
708/// Run t-SNE dimensionality reduction
709///
710/// t-Distributed Stochastic Neighbour Embedding (t-SNE) is a technique for
711/// visualising high-dimensional data by reducing it to 2 dimensions. This
712/// version supports both Barnes-Hut approximation and FFT-accelerated
713/// interpolation for repulsive forces.
714///
715/// ### Algorithm
716///
717/// 1. Construct high-dimensional affinity graph via Gaussian kernels
718///    - k-NN search with k = 3 × perplexity
719///    - Binary search for precision to match target perplexity
720///    - Symmetrise to joint probabilities P_ij
721/// 2. Initialise low-dimensional embedding (typically via PCA)
722/// 3. Optimise embedding via gradient descent
723///    - Attractive forces: exact computation from graph
724///    - Repulsive forces: Barnes-Hut approximation or FFT-accelerated
725///      interpolation.
726///    - Early exaggeration (first 250 iterations)
727///    - Momentum switching at iteration 250
728///
729/// ### Params
730///
731/// * `data` - Input data matrix (samples × features)
732/// * `precomputed_knn` - Precomputed k-nearest neighbours and distances. Needs
733///   to be a tuple of `(Vec<Vec<usize>>, Vec<Vec<T>>)` with indices and
734///   distances excluding self.
735/// * `params` - t-SNE parameters controlling algorithm behaviour
736/// * `approx_type` - Type of approximation to use for repulsive forces.
737///   Options: `"barnes_hut" | "bh"`, `"fft"`
738/// * `seed` - Random seed for reproducibility
739/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
740///   verbosity.
741///
742/// ### Returns
743///
744/// Embedding coordinates as `Vec<Vec<T>>` where outer vector has length
745/// `n_dim` and inner vectors have length `n_samples`. Each outer element
746/// represents one embedding dimension.
747///
748/// ### References
749///
750/// - van der Maaten & Hinton (2008): "Visualizing Data using t-SNE"
751/// - van der Maaten (2014): "Accelerating t-SNE using Tree-Based Algorithms"
752/// - Linderman et al. (2019): " Fast interpolation-based t-SNE for improved
753///   visualization of single-cell RNA-seq data"
754#[cfg(feature = "fft_tsne")]
755pub fn tsne<T>(
756    data: MatRef<T>,
757    precomputed_knn: PreComputedKnn<T>,
758    params: &TsneParams<T>,
759    approx_type: &str,
760    seed: usize,
761    verbose: usize,
762) -> Result<Vec<Vec<T>>, ManifoldsError>
763where
764    T: ManifoldsFloat + FftwFloat,
765    HnswIndex<T>: HnswState<T>,
766    StandardNormal: Distribution<T>,
767    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
768{
769    if params.n_dim != 2 {
770        return Err(ManifoldsError::IncorrectDim {
771            n_dim: params.n_dim,
772        });
773    }
774
775    let verbosity = parse_verbosity_level(verbose);
776
777    // 1. graph construction
778    let (graph, _, _) = construct_tsne_graph(
779        data,
780        precomputed_knn,
781        params.perplexity,
782        params.ann_type.clone(),
783        &params.nn_params,
784        seed,
785        verbose,
786    )?;
787
788    if verbosity.normal_verbosity() {
789        println!("Initialising embedding via {}...", &params.initialisation);
790    }
791
792    // 2. initialise embedding
793    let init_type = parse_initilisation(
794        &params.initialisation,
795        params.randomised_init,
796        params.init_range,
797    )
798    .unwrap_or_else(|| {
799        println!(
800            "Unknown initialisation provided: {:?}. Defaulting to PCA.",
801            params.initialisation,
802        );
803        EmbdInit::PcaInit {
804            range: Some(T::from_f64(1e-2).unwrap()),
805            randomised: true,
806        }
807    });
808
809    let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
810
811    // parse the optimisation type
812    let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
813
814    // 3. optimise
815    let start_optim = Instant::now();
816    match tsne_approx {
817        TsneOpt::BarnesHut => {
818            if verbosity.normal_verbosity() {
819                println!(
820                    "Optimising via Barnes-Hut t-SNE ({} epochs)...",
821                    params.optim_params.n_epochs
822                );
823            }
824            optimise_bh_tsne(&mut embd, &params.optim_params, &graph, verbose);
825        }
826        #[cfg(feature = "fft_tsne")]
827        TsneOpt::Fft => {
828            if verbosity.normal_verbosity() {
829                println!(
830                    "Optimising via FFT Interpolation-based t-SNE ({} epochs)...",
831                    params.optim_params.n_epochs
832                );
833            }
834            let _ = optimise_fft_tsne(&mut embd, &params.optim_params, &graph, verbose);
835        }
836    }
837
838    if verbosity.normal_verbosity() {
839        println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
840    }
841
842    // 4. transpose output: [n_samples][n_dim] → [n_dim][n_samples]
843    let n_samples = embd.len();
844    let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
845
846    for sample_idx in 0..n_samples {
847        for dim_idx in 0..params.n_dim {
848            transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
849        }
850    }
851
852    Ok(transposed)
853}
854
855/// Run t-SNE dimensionality reduction
856///
857/// t-Distributed Stochastic Neighbour Embedding (t-SNE) is a technique for
858/// visualising high-dimensional data by reducing it to 2 dimensions. This
859/// version supports both Barnes-Hut approximation and FFT-accelerated
860/// interpolation for repulsive forces.
861///
862/// ### Algorithm
863///
864/// 1. Construct high-dimensional affinity graph via Gaussian kernels
865///    - k-NN search with k = 3 × perplexity
866///    - Binary search for precision to match target perplexity
867///    - Symmetrise to joint probabilities P_ij
868/// 2. Initialise low-dimensional embedding (typically via PCA)
869/// 3. Optimise embedding via gradient descent
870///    - Attractive forces: exact computation from graph
871///    - Repulsive forces: Barnes-Hut approximation or FFT-accelerated
872///      interpolation.
873///    - Early exaggeration (first 250 iterations)
874///    - Momentum switching at iteration 250
875///
876/// ### Params
877///
878/// * `data` - Input data matrix (samples × features)
879/// * `precomputed_knn` - Precomputed k-nearest neighbours and distances. Needs
880///   to be a tuple of `(Vec<Vec<usize>>, Vec<Vec<T>>)` with indices and
881///   distances excluding self.
882/// * `params` - t-SNE parameters controlling algorithm behaviour
883/// * `approx_type` - Type of approximation to use for repulsive forces.
884///   Options: `"barnes_hut" | "bh"`, `"fft"`
885/// * `seed` - Random seed for reproducibility
886/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
887///   verbosity.
888///
889/// ### Returns
890///
891/// Embedding coordinates as `Vec<Vec<T>>` where outer vector has length
892/// `n_dim` and inner vectors have length `n_samples`. Each outer element
893/// represents one embedding dimension.
894///
895/// ### References
896///
897/// - van der Maaten & Hinton (2008): "Visualizing Data using t-SNE"
898/// - van der Maaten (2014): "Accelerating t-SNE using Tree-Based Algorithms"
899/// - Linderman et al. (2019): " Fast interpolation-based t-SNE for improved
900///   visualization of single-cell RNA-seq data"
901#[cfg(not(feature = "fft_tsne"))]
902pub fn tsne<T>(
903    data: MatRef<T>,
904    precomputed_knn: PreComputedKnn<T>,
905    params: &TsneParams<T>,
906    approx_type: &str,
907    seed: usize,
908    verbose: usize,
909) -> Result<Vec<Vec<T>>, ManifoldsError>
910where
911    T: ManifoldsFloat,
912    HnswIndex<T>: HnswState<T>,
913    StandardNormal: Distribution<T>,
914    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
915{
916    if params.n_dim != 2 {
917        return Err(ManifoldsError::IncorrectDim {
918            n_dim: params.n_dim,
919        });
920    }
921    let verbosity = parse_verbosity_level(verbose);
922
923    // 1. graph construction
924    let (graph, _, _) = construct_tsne_graph(
925        data,
926        precomputed_knn,
927        params.perplexity,
928        params.ann_type.clone(),
929        &params.nn_params,
930        seed,
931        verbose,
932    )?;
933
934    // 2. initialise embedding
935    if verbosity.normal_verbosity() {
936        println!("Initialising embedding via {}...", &params.initialisation);
937    }
938
939    let init_type = parse_initilisation(
940        &params.initialisation,
941        params.randomised_init,
942        params.init_range,
943    )
944    .unwrap_or_else(|| {
945        println!(
946            "Unknown initialisation provided: {:?}. Defaulting to PCA.",
947            params.initialisation,
948        );
949        EmbdInit::PcaInit {
950            range: Some(T::from_f64(1e-2).unwrap()),
951            randomised: true,
952        }
953    });
954
955    let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
956
957    // parse the optimisation type
958    let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
959
960    // 3. optimise
961    let start_optim = Instant::now();
962    match tsne_approx {
963        TsneOpt::BarnesHut => {
964            if verbosity.normal_verbosity() {
965                println!(
966                    "Optimising via Barnes-Hut t-SNE ({} epochs)...",
967                    params.optim_params.n_epochs
968                );
969            }
970            optimise_bh_tsne(&mut embd, &params.optim_params, &graph, verbose);
971        }
972        #[cfg(not(feature = "fft_tsne"))]
973        TsneOpt::Fft => {
974            panic!("FFT-accelerated t-SNE not available. Recompile with 'fft_tsne' feature or use 'barnes_hut' approximation.");
975        }
976    }
977
978    if verbosity.normal_verbosity() {
979        println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
980    }
981
982    // 4. transpose output: [n_samples][n_dim] → [n_dim][n_samples]
983    let n_samples = embd.len();
984    let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
985
986    for sample_idx in 0..n_samples {
987        for dim_idx in 0..params.n_dim {
988            transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
989        }
990    }
991
992    Ok(transposed)
993}
994
995///////////
996// PHATE //
997///////////
998
999/// PHATE parameters
1000#[derive(Debug, Clone)]
1001pub struct PhateParams<T> {
1002    /// Number of output dimensions (default: 2)
1003    pub n_dim: usize,
1004    /// Number of neighbours to use
1005    pub k: usize,
1006    /// (Approximate) Nearest neighbour method. One of `"exhaustive"`, `"ivf"`,
1007    /// `"hnsw"`, `"nndescent"`, `"annoy"`, `"kmknn"`, or `"balltree"`.
1008    pub ann_type: String,
1009    /// Nearest neighbour search parameters to use
1010    pub ann_params: NearestNeighbourParams<T>,
1011    /// Diffusion parameters to use
1012    pub diffusion_params: PhateDiffusionParams<T>,
1013    /// Which MDS implementation to use
1014    pub mds_method: String,
1015    /// Optional number of iterations for MDS fitting
1016    pub mds_iter: Option<usize>,
1017    /// Shall randomised SVD be used for PCA-based initialisation
1018    pub randomised: bool,
1019}
1020
1021impl<T> Default for PhateParams<T>
1022where
1023    T: ManifoldsFloat,
1024{
1025    fn default() -> Self {
1026        let diffusion_params = PhateDiffusionParams::new(
1027            Some(T::from_f64(40.0).unwrap()),
1028            T::from_f64(1.0).unwrap(),
1029            T::from_f64(1e-4).unwrap(),
1030            "average".to_string(),
1031            None,
1032            "spectral".to_string(),
1033            None,
1034            None,
1035            None,
1036            T::from_f64(1.0).unwrap(),
1037        );
1038
1039        Self {
1040            n_dim: 2,
1041            k: 5,
1042            ann_type: "kmknn".to_string(),
1043            ann_params: NearestNeighbourParams::default(),
1044            diffusion_params,
1045            mds_method: "sgd_dense".to_string(),
1046            mds_iter: None,
1047            randomised: true,
1048        }
1049    }
1050}
1051
1052impl<T> PhateParams<T>
1053where
1054    T: ManifoldsFloat,
1055{
1056    /// Create new PHATE parameters with full control over every field.
1057    ///
1058    /// This constructor exposes every field directly, including the
1059    /// diffusion parameters. For sensible defaults, use
1060    /// [`PhateParams::default`] or [`PhateParams::new_default_2d`] instead.
1061    ///
1062    /// ### Params
1063    ///
1064    /// * `n_dim` - Number of output dimensions.
1065    /// * `k` - Number of nearest neighbours.
1066    /// * `ann_type` - (Approximate) nearest neighbour method. One of
1067    ///   `"exhaustive"`, `"ivf"`, `"hnsw"`, `"nndescent"`, `"annoy"`,
1068    ///   `"kmknn"` or `"balltree"`.
1069    /// * `ann_params` - Nearest neighbour search parameters.
1070    /// * `diffusion_params` - Diffusion parameters. These cover the alpha
1071    ///   decay, kernel bandwidth scaling, graph symmetry, landmarks, SVD
1072    ///   components, diffusion time selection, and the informational distance
1073    ///   constant `gamma`.
1074    /// * `mds_method` - Which MDS implementation to use, e.g. `"sgd_dense"`.
1075    /// * `mds_iter` - Optional number of iterations for MDS fitting; `None`
1076    ///   uses the MDS default.
1077    /// * `randomised` - Whether randomised SVD is used for PCA-based
1078    ///   initialisation.
1079    ///
1080    /// ### Returns
1081    ///
1082    /// A fully specified set of PHATE parameters.
1083    #[allow(clippy::too_many_arguments)]
1084    pub fn new(
1085        n_dim: usize,
1086        k: usize,
1087        ann_type: String,
1088        ann_params: NearestNeighbourParams<T>,
1089        diffusion_params: PhateDiffusionParams<T>,
1090        mds_method: String,
1091        mds_iter: Option<usize>,
1092        randomised: bool,
1093    ) -> Self {
1094        Self {
1095            n_dim,
1096            k,
1097            ann_type,
1098            ann_params,
1099            diffusion_params,
1100            mds_method,
1101            mds_iter,
1102            randomised,
1103        }
1104    }
1105
1106    /// Default parameters for standard 2D visualisation.
1107    ///
1108    /// Returns the default parameters but lets you tune `k`, the number of
1109    /// nearest neighbours used to build the affinity graph.
1110    ///
1111    /// ### Params
1112    ///
1113    /// * `k` - Number of nearest neighbours. Defaults to `5`.
1114    ///
1115    /// ### Returns
1116    ///
1117    /// Hopefully sensible standard parameters for 2D visualisation.
1118    pub fn new_default_2d(k: Option<usize>) -> Self {
1119        let k = k.unwrap_or(5);
1120
1121        Self {
1122            k,
1123            ..Self::default()
1124        }
1125    }
1126}
1127
1128/////////////////////
1129// PHATE diffusion //
1130/////////////////////
1131
1132/// Build the PHATE diffusion operator from high-dimensional data
1133///
1134/// Runs kNN search (or uses precomputed results), computes alpha decay
1135/// affinities, and constructs either a full or landmark diffusion operator
1136/// depending on `phate_params.n_landmarks`.
1137///
1138/// ### Params
1139///
1140/// * `data` - Input data matrix (samples × features)
1141/// * `k` - Number of nearest neighbours for graph construction
1142/// * `precomputed_knn` - Precomputed kNN indices and distances as
1143///   `Some((Vec<Vec<usize>>, Vec<Vec<T>>))`, or `None` to run search
1144///   internally. Indices and distances must exclude self.
1145/// * `ann_type` - (Approximate) Nearest neighbour method: `"exhaustive"`,
1146///   `"kmknn"`, `"balltree"`, `"annoy"`, `"hnsw"`, or `"nndescent"`. If you
1147///   provide a weird string, the function will default to `"kmknn"`
1148/// * `nn_params` - Nearest neighbour search parameters
1149/// * `phate_params` - Full PHATE parameter struct (uses `k`, `decay`,
1150///   `bandwidth_scale`, `thresh`, `symmetrise`, `n_landmarks`,
1151///   `landmark_mode`)
1152/// * `seed` - Random seed for reproducibility
1153/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
1154///   verbosity.
1155///
1156/// ### Returns
1157///
1158/// `PhateDiffusion::Full { operator }` when `n_landmarks` is `None` or
1159/// \>= N, otherwise `PhateDiffusion::Landmark { landmarks }` containing
1160/// the compressed landmark operator and interpolation matrices.
1161#[allow(clippy::too_many_arguments)]
1162pub fn construct_phate_diffusion<T>(
1163    data: MatRef<T>,
1164    k: usize,
1165    precomputed_knn: PreComputedKnn<T>,
1166    ann_type: &str,
1167    nn_params: &NearestNeighbourParams<T>,
1168    phate_params: &PhateParams<T>,
1169    seed: usize,
1170    verbose: usize,
1171) -> Result<PhateDiffusion<T>, ManifoldsError>
1172where
1173    T: ManifoldsFloat,
1174    HnswIndex<T>: HnswState<T>,
1175    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
1176{
1177    let verbosity = parse_verbosity_level(verbose);
1178
1179    let (knn_indices, knn_dist) = match precomputed_knn {
1180        Some((indices, distances)) => {
1181            if verbosity.normal_verbosity() {
1182                println!("Using precomputed kNN graph...");
1183            }
1184            (indices, distances)
1185        }
1186        None => {
1187            if verbosity.normal_verbosity() {
1188                println!(
1189                    "Running (approximate) nearest neighbour search using {}...",
1190                    ann_type
1191                );
1192            }
1193            let start_knn = Instant::now();
1194            let result = run_ann_search(data, k, ann_type.to_string(), nn_params, seed, verbose)?;
1195            if verbosity.normal_verbosity() {
1196                println!("kNN search done in: {:.2?}.", start_knn.elapsed());
1197            }
1198            result
1199        }
1200    };
1201
1202    if verbosity.normal_verbosity() {
1203        println!("Calculating alpha decay affinities");
1204    }
1205    let start_alpha_affinities = Instant::now();
1206
1207    let graph = phate_alpha_decay_affinities(
1208        &knn_indices,
1209        &knn_dist,
1210        phate_params.k,
1211        phate_params.diffusion_params.decay,
1212        phate_params.diffusion_params.bandwidth_scale,
1213        phate_params.diffusion_params.thresh,
1214        &phate_params.diffusion_params.graph_symmetry,
1215        nn_params.dist_metric == "euclidean",
1216    );
1217
1218    if verbosity.normal_verbosity() {
1219        println!(
1220            "Alpha decay affinity calculations done in: {:.2?}.",
1221            start_alpha_affinities.elapsed()
1222        );
1223    }
1224
1225    let affinity = coo_to_csr(&graph);
1226    let diffusion_op = build_diffusion_operator(&affinity);
1227
1228    match phate_params.diffusion_params.n_landmarks {
1229        None => Ok(PhateDiffusion::Full {
1230            operator: diffusion_op,
1231        }),
1232        Some(n_landmarks) if n_landmarks >= data.nrows() => Ok(PhateDiffusion::Full {
1233            operator: diffusion_op,
1234        }),
1235        Some(n_landmarks) => {
1236            if verbosity.normal_verbosity() {
1237                println!(" Building {} landmarks...", n_landmarks);
1238            }
1239            let start_landmarks = Instant::now();
1240            let landmarks = PhateLandmarks::build(
1241                data,
1242                &affinity,
1243                &diffusion_op,
1244                n_landmarks,
1245                &phate_params.diffusion_params.landmark_method,
1246                &nn_params.dist_metric,
1247                seed,
1248                Some(100),
1249                verbose,
1250            )?;
1251            if verbosity.normal_verbosity() {
1252                println!(
1253                    " Landmarks generated in: {:.2?}.",
1254                    start_landmarks.elapsed()
1255                );
1256            }
1257            Ok(PhateDiffusion::Landmark { landmarks })
1258        }
1259    }
1260}
1261
1262/// Run PHATE dimensionality reduction
1263///
1264/// Potential of Heat-diffusion for Affinity-based Transition Embedding
1265/// (PHATE) learns a low-dimensional embedding that preserves the
1266/// diffusion geometry of the data.
1267///
1268/// ### Algorithm
1269///
1270/// 1. Build affinity graph via alpha decay kernel on kNN distances
1271/// 2. Construct row-stochastic diffusion operator P = D^{-1} K
1272/// 3. Determine diffusion time t (knee of Von Neumann entropy, or fixed)
1273/// 4. Compute diffusion potential: log transformation of P^t by default
1274/// 5. Embed via MDS on pairwise potential distances
1275///
1276/// Steps 2–4 optionally operate on a compressed landmark representation
1277/// (N × L instead of N × N) when `phate_params.n_landmarks` is set.
1278/// Pairwise distances and MDS (step 5) always run in the full N × N space;
1279/// however, to avoid holding the full N x N matrix in memory, you can
1280/// use a streaming version of MDS that computes the distances on the fly.
1281///
1282/// ### Params
1283///
1284/// * `data` - Input data matrix (samples × features)
1285/// * `precomputed_knn` - Precomputed kNN indices and distances as
1286///   `Some((Vec<Vec<usize>>, Vec<Vec<T>>))`, or `None` to run search
1287///   internally. Indices and distances must exclude self.
1288/// * `phate_params` - PHATE parameters. See `PhateParams` for full
1289///   documentation of each field.
1290/// * `seed` - Random seed for reproducibility
1291/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
1292///   verbosity.
1293///
1294/// ### Returns
1295///
1296/// Embedding coordinates as `Vec<Vec<T>>` where the outer vector has
1297/// length `n_dim` and each inner vector has length `n_samples`. Each
1298/// outer element represents one embedding dimension.
1299///
1300/// ### References
1301///
1302/// - Moon et al. (2019): "Visualizing Structure and Transitions in
1303///   High-Dimensional Biological Data" (Nature Biotechnology)
1304pub fn phate<T>(
1305    data: MatRef<T>,
1306    precomputed_knn: PreComputedKnn<T>,
1307    phate_params: PhateParams<T>,
1308    seed: usize,
1309    verbose: usize,
1310) -> Result<Vec<Vec<T>>, ManifoldsError>
1311where
1312    T: ManifoldsFloat,
1313    HnswIndex<T>: HnswState<T>,
1314    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
1315    StandardNormal: Distribution<T>,
1316{
1317    let start_phate = Instant::now();
1318    let verbosity = parse_verbosity_level(verbose);
1319
1320    let phate_diffusion = construct_phate_diffusion(
1321        data,
1322        phate_params.k,
1323        precomputed_knn,
1324        &phate_params.ann_type,
1325        &phate_params.ann_params,
1326        &phate_params,
1327        seed,
1328        verbose,
1329    )?;
1330
1331    let start_t = Instant::now();
1332    let t = match phate_params.diffusion_params.t {
1333        PhateTime::Auto { t_max } => {
1334            if verbosity.normal_verbosity() {
1335                println!("Finding optimal t (t_max={})...", t_max);
1336            }
1337            match &phate_diffusion {
1338                PhateDiffusion::Landmark { landmarks } => landmarks.find_optimal_t(t_max),
1339                PhateDiffusion::Full { operator } => {
1340                    let entropy = landmark_von_neumann_entropy(operator, t_max)?;
1341                    Ok(find_knee_point(&entropy))
1342                }
1343            }
1344        }
1345        PhateTime::Fixed(t) => Ok(t),
1346    }?;
1347    if verbosity.normal_verbosity() {
1348        println!("Identified t = {} in {:.2?}.", t, start_t.elapsed());
1349    }
1350
1351    let mds_method = parse_mds_method(&phate_params.mds_method).unwrap_or_default();
1352    let dist = parse_ann_dist(&phate_params.ann_params.dist_metric).unwrap_or_default();
1353    let mds_params = MdsOptimParams::new(
1354        data.nrows(),
1355        phate_params.randomised,
1356        phate_params.mds_iter,
1357        None,
1358    );
1359
1360    let start_embed = Instant::now();
1361
1362    let embedding = match phate_diffusion {
1363        PhateDiffusion::Full { operator } => {
1364            if verbosity.normal_verbosity() {
1365                println!("Powering diffusion operator...");
1366            }
1367            let powered = matrix_power(&operator, t)?;
1368            let potential = calculate_potential(&powered, 1, phate_params.diffusion_params.gamma)?;
1369
1370            if verbosity.normal_verbosity() {
1371                println!(
1372                    "Potential shape: {} × {} - calculated in {:.2?}.",
1373                    potential.shape().0,
1374                    potential.shape().1,
1375                    start_embed.elapsed()
1376                );
1377            }
1378
1379            let res = match mds_method {
1380                MdsMethod::ClassicMds => {
1381                    if verbosity.normal_verbosity() {
1382                        println!("Computing pairwise distances, running classic MDS...");
1383                    }
1384                    let distances = compute_potential_distances(&potential, &dist);
1385                    classic_mds(&distances, phate_params.n_dim, mds_params.randomised, seed)
1386                }
1387                MdsMethod::SgdDense => {
1388                    if verbosity.normal_verbosity() {
1389                        println!("Computing pairwise distances, running SGD-MDS...");
1390                    }
1391                    let distances = compute_potential_distances(&potential, &dist);
1392                    sgd_mds(
1393                        &distances,
1394                        phate_params.n_dim,
1395                        &mds_params,
1396                        None,
1397                        seed,
1398                        verbose,
1399                    )
1400                }
1401            }?;
1402
1403            res
1404        }
1405        PhateDiffusion::Landmark { landmarks } => {
1406            if verbosity.normal_verbosity() {
1407                println!(
1408                    "Powering landmark operator ({} landmarks)...",
1409                    landmarks.get_n_landmarks()
1410                );
1411            }
1412            let landmark_powered = landmarks.power(t)?;
1413            let landmark_potential =
1414                calculate_potential(&landmark_powered, 1, phate_params.diffusion_params.gamma)?;
1415
1416            if verbosity.normal_verbosity() {
1417                println!(
1418                    "Landmark potential shape: {} × {} - calculated in {:.2?}.",
1419                    landmark_potential.shape().0,
1420                    landmark_potential.shape().1,
1421                    start_embed.elapsed()
1422                );
1423                println!("Computing landmark pairwise distances...");
1424            }
1425
1426            let landmark_distances = compute_potential_distances(&landmark_potential, &dist);
1427
1428            let landmark_mds_params = MdsOptimParams::new(
1429                landmarks.get_n_landmarks(),
1430                phate_params.randomised,
1431                None,
1432                None,
1433            );
1434
1435            if verbosity.normal_verbosity() {
1436                println!("Running MDS on landmarks...");
1437            }
1438
1439            let landmark_embedding = match mds_method {
1440                MdsMethod::ClassicMds => classic_mds(
1441                    &landmark_distances,
1442                    phate_params.n_dim,
1443                    landmark_mds_params.randomised,
1444                    seed,
1445                ),
1446                _ => sgd_mds(
1447                    &landmark_distances,
1448                    phate_params.n_dim,
1449                    &landmark_mds_params,
1450                    None,
1451                    seed,
1452                    verbose,
1453                ),
1454            }?;
1455
1456            if verbosity.normal_verbosity() {
1457                println!("Interpolating to full N points via Nyström...");
1458            }
1459            landmarks.interpolate_embedding(&landmark_embedding)
1460        }
1461    };
1462
1463    if verbosity.normal_verbosity() {
1464        println!("Ran MDS in {:.2?}.", start_embed.elapsed());
1465        println!("Finished running PHATE in {:.2?}.", start_phate.elapsed());
1466    }
1467
1468    let n_samples = embedding.len();
1469    let mut transposed = vec![vec![T::zero(); n_samples]; phate_params.n_dim];
1470    for i in 0..n_samples {
1471        for d in 0..phate_params.n_dim {
1472            transposed[d][i] = embedding[i][d];
1473        }
1474    }
1475
1476    Ok(transposed)
1477}
1478
1479////////////
1480// PaCMAP //
1481////////////
1482
1483////////////
1484// Params //
1485////////////
1486
1487/// Parameters for PaCMAP dimensionality reduction.
1488#[derive(Debug, Clone)]
1489pub struct PacmapParams<T> {
1490    /// Output dimensionality. Default 2.
1491    pub n_dim: usize,
1492    /// Number of near neighbours. Default 10 (paper default; lower than UMAP's
1493    /// 15 since PaCMAP is less sensitive to k).
1494    pub k: usize,
1495    /// (Approximate) Nearest neighbour method. One of `"exhaustive"`, `"ivf"`,
1496    /// `"hnsw"`, `"nndescent"`, `"annoy"`, `"kmknn"` or `"balltree"`.
1497    pub ann_type: String,
1498    /// Which optimiser to use. Options are `"adam"` and `"adam_parallel"`
1499    pub optimiser_type: String,
1500    /// Mid-near pairs per point. Default 2.
1501    pub n_mid_near: usize,
1502    /// Further (random) pairs per point. Default 2.
1503    pub n_further: usize,
1504    /// Start index into kNN list for mid-near candidate window. Default 4
1505    /// (skip the 4 nearest).
1506    pub mn_candidate_start: usize,
1507    /// End index into kNN list for mid-near candidate window. Default 50.
1508    /// Requires k >= this value.
1509    pub mn_candidate_end: usize,
1510    /// Embedding initialisation. Default `"pca"`. PCA is strongly recommended
1511    /// for PaCMAP as random init degrades global structure.
1512    pub initialisation: String,
1513    /// Nearest neighbour search parameters.
1514    pub nn_params: NearestNeighbourParams<T>,
1515    /// Optimiser parameters.
1516    pub optim_params: PacmapOptimParams<T>,
1517}
1518
1519impl<T> Default for PacmapParams<T>
1520where
1521    T: ManifoldsFloat,
1522{
1523    fn default() -> Self {
1524        Self {
1525            n_dim: 2,
1526            k: 50,
1527            ann_type: "kmknn".to_string(),
1528            optimiser_type: "adam_parallel".to_string(),
1529            n_mid_near: 2,
1530            n_further: 2,
1531            mn_candidate_start: 4,
1532            mn_candidate_end: 50,
1533            initialisation: "pca".to_string(),
1534            nn_params: NearestNeighbourParams::default(),
1535            optim_params: PacmapOptimParams::default(),
1536        }
1537    }
1538}
1539
1540impl<T> PacmapParams<T>
1541where
1542    T: ManifoldsFloat,
1543{
1544    /// Generate a new instance of the PaCMAP parameters with full control over
1545    /// every field.
1546    ///
1547    /// For sensible defaults, use [`PacmapParams::default`] or
1548    /// [`PacmapParams::new_default_2d`] instead.
1549    ///
1550    /// Note: `k` is clamped to at least `mn_candidate_end`, since the mid-near
1551    /// candidate window indexes into the kNN list and must not run past its
1552    /// end.
1553    ///
1554    /// ### Params
1555    ///
1556    /// * `n_dim` - Number of dimensions for the embedding.
1557    /// * `k` - Number of near neighbours. Clamped to at least
1558    ///   `mn_candidate_end`.
1559    /// * `ann_type` - (Approximate) nearest neighbour method. One of
1560    ///   `"exhaustive"`, `"ivf"`, `"hnsw"`, `"nndescent"`, `"annoy"`,
1561    ///   `"kmknn"` or `"balltree"`.
1562    /// * `optimiser_type` - Which optimiser to use. Options are `"adam"` and
1563    ///   `"adam_parallel"`.
1564    /// * `n_mid_near` - Mid-near pairs per point.
1565    /// * `n_further` - Further (random) pairs per point.
1566    /// * `mn_candidate_start` - Start index into the kNN list for the mid-near
1567    ///   candidate window.
1568    /// * `mn_candidate_end` - End index into the kNN list for the mid-near
1569    ///   candidate window. Requires `k >= this value`.
1570    /// * `initialisation` - Embedding initialisation. PCA is strongly
1571    ///   recommended for PaCMAP as random init degrades global structure.
1572    /// * `nn_params` - Nearest neighbour search parameters.
1573    /// * `optim_params` - Optimiser parameters.
1574    ///
1575    /// ### Returns
1576    ///
1577    /// A fully specified set of PaCMAP parameters.
1578    #[allow(clippy::too_many_arguments)]
1579    pub fn new(
1580        n_dim: usize,
1581        k: usize,
1582        ann_type: String,
1583        optimiser_type: String,
1584        n_mid_near: usize,
1585        n_further: usize,
1586        mn_candidate_start: usize,
1587        mn_candidate_end: usize,
1588        initialisation: String,
1589        nn_params: NearestNeighbourParams<T>,
1590        optim_params: PacmapOptimParams<T>,
1591    ) -> Self {
1592        let k = k.max(mn_candidate_end);
1593
1594        Self {
1595            n_dim,
1596            k,
1597            ann_type,
1598            optimiser_type,
1599            n_mid_near,
1600            n_further,
1601            mn_candidate_start,
1602            mn_candidate_end,
1603            initialisation,
1604            nn_params,
1605            optim_params,
1606        }
1607    }
1608
1609    /// Default parameters for standard 2D visualisation.
1610    ///
1611    /// Returns the default parameters but lets you tune `k`, the number of
1612    /// near neighbours. Note that `k` is clamped to at least
1613    /// `mn_candidate_end` (default `50`).
1614    ///
1615    /// ### Params
1616    ///
1617    /// * `k` - Number of near neighbours. Defaults to `50`.
1618    ///
1619    /// ### Returns
1620    ///
1621    /// Hopefully sensible standard parameters for 2D visualisation.
1622    pub fn new_default_2d(k: Option<usize>) -> Self {
1623        let default = Self::default();
1624        let k = k.unwrap_or(default.k).max(default.mn_candidate_end);
1625
1626        Self { k, ..default }
1627    }
1628}
1629
1630//////////
1631// Main //
1632//////////
1633
1634/// Run PaCMAP dimensionality reduction.
1635///
1636/// Pairwise Controlled Manifold Approximation (PaCMAP) is a dimensionality
1637/// reduction method that explicitly balances local and global structure
1638/// preservation via three pair types and a phased optimisation schedule.
1639///
1640/// 1. Find k-nearest neighbours
1641/// 2. Construct near, mid-near, and further pairs
1642/// 3. Initialise embedding (PCA strongly recommended)
1643/// 4. Optimise via three-phase Adam with pair-type weight schedule
1644///
1645/// ### Params
1646///
1647/// * `data` - Input data matrix (samples × features).
1648/// * `precomputed_knn` - Optional precomputed kNN. Must have been computed
1649///   with k >= `params.mn_candidate_end` to support mid-near sampling.
1650/// * `params` - PaCMAP parameters.
1651/// * `seed` - Seed for reproducibility.
1652/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
1653///   verbosity.
1654///
1655/// ### Returns
1656///
1657/// Embedding as `Vec<Vec<T>>` with shape `[n_dim][n_samples]`.
1658pub fn pacmap<T>(
1659    data: MatRef<T>,
1660    precomputed_knn: PreComputedKnn<T>,
1661    params_pacmap: &PacmapParams<T>,
1662    seed: usize,
1663    verbose: usize,
1664) -> Result<Vec<Vec<T>>, ManifoldsError>
1665where
1666    T: ManifoldsFloat,
1667    HnswIndex<T>: HnswState<T>,
1668    StandardNormal: Distribution<T>,
1669    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
1670{
1671    let n_samples = data.nrows();
1672
1673    let verbosity = parse_verbosity_level(verbose);
1674
1675    let (knn_indices, _) = match precomputed_knn {
1676        Some((indices, distances)) => {
1677            if verbosity.normal_verbosity() {
1678                println!("Using precomputed kNN graph...");
1679            }
1680            (indices, distances)
1681        }
1682        None => {
1683            if verbosity.normal_verbosity() {
1684                println!(
1685                    "Running (approximate) nearest neighbour search using {} (k={})...",
1686                    params_pacmap.ann_type, params_pacmap.k
1687                );
1688            }
1689            let start_knn = Instant::now();
1690            let result = run_ann_search(
1691                data,
1692                params_pacmap.k,
1693                params_pacmap.ann_type.clone(),
1694                &params_pacmap.nn_params,
1695                seed,
1696                verbose,
1697            )?;
1698            if verbosity.normal_verbosity() {
1699                println!("kNN search done in: {:.2?}.", start_knn.elapsed());
1700            }
1701            result
1702        }
1703    };
1704
1705    if verbosity.normal_verbosity() {
1706        println!("Constructing PaCMAP pairs...");
1707    }
1708
1709    let start_pairs = Instant::now();
1710
1711    let pairs: PacmapPairs = construct_pacmap_pairs(
1712        &knn_indices,
1713        params_pacmap.n_mid_near,
1714        params_pacmap.n_further,
1715        params_pacmap.mn_candidate_start,
1716        params_pacmap.mn_candidate_end,
1717        seed as u64,
1718    );
1719
1720    let end_pairs = start_pairs.elapsed();
1721
1722    if verbosity.normal_verbosity() {
1723        println!(
1724            "Pairs generated in {:.2?}: {} near, {} mid-near, {} further.",
1725            end_pairs,
1726            pairs.near.len().separate_with_underscores(),
1727            pairs.mid_near.len().separate_with_underscores(),
1728            pairs.further.len().separate_with_underscores()
1729        );
1730    }
1731
1732    let init_type =
1733        parse_initilisation(&params_pacmap.initialisation, true, None).unwrap_or_else(|| {
1734            println!(
1735                "Unknown initialisation provided: {:?}. Defaulting to PCA.",
1736                params_pacmap.initialisation,
1737            );
1738            EmbdInit::PcaInit {
1739                range: None,
1740                randomised: true,
1741            }
1742        });
1743
1744    if verbosity.normal_verbosity() {
1745        println!(
1746            "Initialising embedding via {} layout...",
1747            params_pacmap.initialisation
1748        );
1749    }
1750
1751    let dummy_graph = knn_to_coo_unweighted(&knn_indices);
1752    let mut embd = initialise_embedding(
1753        &init_type,
1754        params_pacmap.n_dim,
1755        seed as u64,
1756        &dummy_graph,
1757        data,
1758    )?;
1759
1760    let optimiser = parse_pacmap_optimiser(&params_pacmap.optimiser_type).unwrap_or_default();
1761
1762    let start_optim = Instant::now();
1763
1764    match optimiser {
1765        PacMapOptimiser::AdamParallel => {
1766            let _ =
1767                optimise_pacmap_parallel(&mut embd, &pairs, &params_pacmap.optim_params, verbose);
1768        }
1769        PacMapOptimiser::Adam => {
1770            let _ = optimise_pacmap(&mut embd, &pairs, &params_pacmap.optim_params, verbose);
1771        }
1772    }
1773
1774    let end_optim = start_optim.elapsed();
1775
1776    if verbosity.normal_verbosity() {
1777        println!("Optimisation done in {:.2?}.", end_optim);
1778        println!("PaCMAP complete!");
1779    }
1780
1781    // transpose from [n_samples][n_dim] to [n_dim][n_samples]
1782    let mut transposed = vec![vec![T::zero(); n_samples]; params_pacmap.n_dim];
1783    for sample_idx in 0..n_samples {
1784        for dim_idx in 0..params_pacmap.n_dim {
1785            transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
1786        }
1787    }
1788
1789    Ok(transposed)
1790}
1791
1792////////////////////
1793// Diffusion maps //
1794////////////////////
1795
1796/// Parameters for classical diffusion maps (Coifman & Lafon, 2006).
1797#[derive(Debug, Clone)]
1798pub struct DiffusionMapsParams<T> {
1799    /// Output embedding dimensionality (default: 2)
1800    pub n_dim: usize,
1801    /// Number of nearest neighbours used to build the graph
1802    pub k: usize,
1803    /// ANN algorithm name (see `NearestNeighbourParams`)
1804    pub ann_type: String,
1805    /// Nearest neighbour search parameters
1806    pub ann_params: NearestNeighbourParams<T>,
1807    /// Multiplicative factor applied to the adaptive kernel bandwidth
1808    pub bandwidth_scale: T,
1809    /// Sparsity threshold applied to kernel entries
1810    pub thresh: T,
1811    /// Graph symmetrisation: `"add"`, `"multiply"`, `"mnn"` or `"none"`
1812    pub graph_symmetry: String,
1813    /// Anisotropic density-correction exponent in [0, 1]. 0 gives the
1814    /// normalised graph Laplacian, 0.5 the Fokker-Planck operator, 1 the
1815    /// Laplace-Beltrami operator.
1816    pub alpha_norm: T,
1817    /// Diffusion time: `Auto` picks the VNE knee, `Fixed(t)` uses `t` directly.
1818    pub t: PhateTime,
1819    /// Landmark count. `None` or `>= n` runs full DM.
1820    pub n_landmarks: Option<usize>,
1821    /// `"random"`, `"spectral"` or `"density"`
1822    pub landmark_method: String,
1823    /// Components for spectral landmark selection (ignored otherwise)
1824    pub n_svd: Option<usize>,
1825}
1826
1827impl<T> Default for DiffusionMapsParams<T>
1828where
1829    T: ManifoldsFloat,
1830{
1831    fn default() -> Self {
1832        Self {
1833            n_dim: 2,
1834            k: 5,
1835            ann_type: "kmknn".to_string(),
1836            ann_params: NearestNeighbourParams::default(),
1837            bandwidth_scale: T::from_f64(1.0).unwrap(),
1838            thresh: T::from_f64(1e-4).unwrap(),
1839            graph_symmetry: "add".to_string(),
1840            alpha_norm: T::from_f64(1.0).unwrap(),
1841            t: parse_phate_time(None, PHATE_MAX_T),
1842            n_landmarks: None,
1843            landmark_method: "spectral".to_string(),
1844            n_svd: None,
1845        }
1846    }
1847}
1848
1849impl<T> DiffusionMapsParams<T>
1850where
1851    T: ManifoldsFloat,
1852{
1853    /// Construct a new `DiffusionMapsParams` with full control over every
1854    /// field.
1855    ///
1856    /// For sensible defaults, use [`DiffusionMapsParams::default`] or
1857    /// [`DiffusionMapsParams::new_default_2d`] instead.
1858    ///
1859    /// ### Params
1860    ///
1861    /// * `n_dim` - Output embedding dimensionality.
1862    /// * `k` - Number of nearest neighbours for graph construction.
1863    /// * `ann_type` - ANN algorithm: `"exhaustive"`, `"kmknn"`, `"balltree"`,
1864    ///   `"annoy"`, `"hnsw"`, or `"nndescent"`.
1865    /// * `ann_params` - Nearest neighbour search parameters.
1866    /// * `bandwidth_scale` - Multiplicative factor on the adaptive kernel
1867    ///   bandwidth.
1868    /// * `thresh` - Kernel entries below this value are set to zero.
1869    /// * `graph_symmetry` - Symmetrisation method: `"add"`, `"multiply"`,
1870    ///   `"mnn"` or `"none"`.
1871    /// * `alpha_norm` - Anisotropic normalisation exponent in [0, 1]. 0 gives
1872    ///   the normalised graph Laplacian, 0.5 the Fokker-Planck operator, 1 the
1873    ///   Laplace-Beltrami operator.
1874    /// * `t` - Diffusion time: `Auto` picks the VNE knee, `Fixed(t)` uses `t`
1875    ///   directly.
1876    /// * `n_landmarks` - Number of landmarks. `None` or `>= n` runs full DM.
1877    /// * `landmark_method` - `"random"`, `"spectral"`, or `"density"`.
1878    /// * `n_svd` - SVD components for spectral landmark selection. Ignored
1879    ///   otherwise.
1880    ///
1881    /// ### Returns
1882    ///
1883    /// A fully specified set of diffusion maps parameters.
1884    #[allow(clippy::too_many_arguments)]
1885    pub fn new(
1886        n_dim: usize,
1887        k: usize,
1888        ann_type: String,
1889        ann_params: NearestNeighbourParams<T>,
1890        bandwidth_scale: T,
1891        thresh: T,
1892        graph_symmetry: String,
1893        alpha_norm: T,
1894        t: PhateTime,
1895        n_landmarks: Option<usize>,
1896        landmark_method: String,
1897        n_svd: Option<usize>,
1898    ) -> Self {
1899        Self {
1900            n_dim,
1901            k,
1902            ann_type,
1903            ann_params,
1904            bandwidth_scale,
1905            thresh,
1906            graph_symmetry,
1907            alpha_norm,
1908            t,
1909            n_landmarks,
1910            landmark_method,
1911            n_svd,
1912        }
1913    }
1914
1915    /// Default parameters for standard 2D visualisation.
1916    ///
1917    /// Returns the default parameters but lets you tune `k`, the number of
1918    /// nearest neighbours used to build the graph.
1919    ///
1920    /// ### Params
1921    ///
1922    /// * `k` - Number of nearest neighbours. Defaults to `5`.
1923    ///
1924    /// ### Returns
1925    ///
1926    /// Hopefully sensible standard parameters for 2D visualisation.
1927    pub fn new_default_2d(k: Option<usize>) -> Self {
1928        let k = k.unwrap_or(5);
1929
1930        Self {
1931            k,
1932            ..Self::default()
1933        }
1934    }
1935}
1936
1937/// Full or landmark diffusion maps operator.
1938///
1939/// `Full` is used when no landmarks are requested or when the landmark count
1940/// meets or exceeds the data size. `Landmark` delegates eigendecomposition and
1941/// Nystroem extension to `DiffusionMapsLandmarks`.
1942pub enum DiffusionMapsOperator<T>
1943where
1944    T: ManifoldsFloat,
1945{
1946    /// Full N×N symmetric diffusion operator.
1947    Full {
1948        /// Symmetric diffusion operator P_sym = D^{-1/2} K D^{-1/2}
1949        p_sym: CompressedSparseData<T>,
1950        /// Square-root of node degrees; used to recover right eigenvectors
1951        /// of the row-stochastic operator from the symmetric ones
1952        sqrt_degrees: Vec<T>,
1953    },
1954    /// Landmark-based operator for large data sets.
1955    Landmark {
1956        /// Landmark operator ready for eigendecomposition and Nystroem extension
1957        landmarks: DiffusionMapsLandmarks<T>,
1958    },
1959}
1960
1961/// Build the diffusion maps operator from raw data.
1962///
1963/// Runs kNN search, builds a Gaussian kernel via alpha-decay, applies
1964/// anisotropic normalisation, then returns either a full or landmark operator
1965/// depending on `dm_params.n_landmarks`. Falls back to full if
1966/// `n_landmarks >= n`.
1967///
1968/// ### Params
1969///
1970/// * `data` - Input data matrix (N × features)
1971/// * `k` - Number of nearest neighbours
1972/// * `precomputed_knn` - Optional precomputed kNN; skips search if provided
1973/// * `ann_type` - ANN algorithm name
1974/// * `nn_params` - Nearest neighbour search parameters
1975/// * `dm_params` - Diffusion maps parameters
1976/// * `seed` - RNG seed
1977/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
1978///   verbosity.
1979///
1980/// ### Returns
1981///
1982/// `DiffusionMapsOperator` (full or landmark)
1983#[allow(clippy::too_many_arguments)]
1984pub fn construct_diffusion_maps_operator<T>(
1985    data: MatRef<T>,
1986    k: usize,
1987    precomputed_knn: PreComputedKnn<T>,
1988    ann_type: &str,
1989    nn_params: &NearestNeighbourParams<T>,
1990    dm_params: &DiffusionMapsParams<T>,
1991    seed: usize,
1992    verbose: usize,
1993) -> Result<DiffusionMapsOperator<T>, ManifoldsError>
1994where
1995    T: ManifoldsFloat,
1996    HnswIndex<T>: HnswState<T>,
1997    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
1998{
1999    let use_full = match dm_params.n_landmarks {
2000        None => true,
2001        Some(n) => n >= data.nrows(),
2002    };
2003
2004    let verbosity = parse_verbosity_level(verbose);
2005
2006    let needs_affinity =
2007        use_full || matches!(dm_params.landmark_method.as_str(), "spectral" | "density");
2008
2009    let affinity = if needs_affinity {
2010        let (knn_indices, knn_dist) = match precomputed_knn {
2011            Some((indices, distances)) => {
2012                if verbosity.normal_verbosity() {
2013                    println!("Using precomputed kNN graph...");
2014                }
2015                (indices, distances)
2016            }
2017            None => {
2018                if verbosity.normal_verbosity() {
2019                    println!(
2020                        "Running (approximate) nearest neighbour search using {}...",
2021                        ann_type
2022                    );
2023                }
2024                let start_knn = Instant::now();
2025                let res = run_ann_search(data, k, ann_type.to_string(), nn_params, seed, verbose)?;
2026                if verbosity.normal_verbosity() {
2027                    println!("kNN search done in {:.2?}.", start_knn.elapsed());
2028                }
2029                res
2030            }
2031        };
2032
2033        if verbosity.normal_verbosity() {
2034            println!("Building Gaussian kernel affinities");
2035        }
2036        let graph = phate_alpha_decay_affinities(
2037            &knn_indices,
2038            &knn_dist,
2039            dm_params.k,
2040            Some(T::from_f64(2.0).unwrap()),
2041            dm_params.bandwidth_scale,
2042            dm_params.thresh,
2043            &dm_params.graph_symmetry,
2044            nn_params.dist_metric == "euclidean",
2045        );
2046        Some(coo_to_csr(&graph))
2047    } else {
2048        if verbosity.normal_verbosity() {
2049            println!("Skipping full affinity (random landmarks).");
2050        }
2051        None
2052    };
2053
2054    if use_full {
2055        let kernel = affinity.unwrap();
2056        let kernel_norm = apply_anisotropic_normalisation(&kernel, dm_params.alpha_norm)?;
2057        let (p_sym, sqrt_degrees) = build_symmetric_diffusion_operator(&kernel_norm)?;
2058        return Ok(DiffusionMapsOperator::Full {
2059            p_sym,
2060            sqrt_degrees,
2061        });
2062    }
2063
2064    let n_landmarks = dm_params.n_landmarks.unwrap();
2065    if verbosity.normal_verbosity() {
2066        println!(" Building {} landmarks...", n_landmarks);
2067    }
2068    let start_l = Instant::now();
2069    let landmarks = DiffusionMapsLandmarks::build(
2070        data,
2071        affinity.as_ref(),
2072        n_landmarks,
2073        &dm_params.landmark_method,
2074        &nn_params.dist_metric,
2075        dm_params.alpha_norm,
2076        dm_params.k,
2077        dm_params.bandwidth_scale,
2078        dm_params.thresh,
2079        &dm_params.graph_symmetry,
2080        seed,
2081        dm_params.n_svd,
2082        verbose,
2083    )?;
2084    if verbosity.normal_verbosity() {
2085        println!(" Landmarks built in {:.2?}.", start_l.elapsed());
2086    }
2087    Ok(DiffusionMapsOperator::Landmark { landmarks })
2088}
2089
2090/// Run diffusion maps end-to-end.
2091///
2092/// 1. kNN graph on the raw data
2093/// 2. Gaussian kernel via alpha-decay with decay = 2 (adaptive bandwidth)
2094/// 3. Anisotropic (alpha) normalisation for density correction
2095/// 4. Symmetric diffusion operator P_sym = D^{-1/2} K D^{-1/2}
2096/// 5. Top (n_dim + 1) eigenpairs of P_sym via Lanczos
2097/// 6. Drop the trivial eigenvalue (= 1) and scale each non-trivial eigenvector
2098///    phi_k by lambda_k^t
2099///
2100/// With landmarks, steps 4–6 run on the L×L operator and are Nystroem-extended
2101/// back to N points.
2102///
2103/// ### Params
2104///
2105/// * `data` - Input data matrix (N × features)
2106/// * `precomputed_knn` - Optional precomputed kNN; skips search if provided.
2107///   Must have been computed with k >= `dm_params.k`.
2108/// * `dm_params` - Diffusion maps parameters
2109/// * `seed` - RNG seed
2110/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
2111///   verbosity.
2112///
2113/// ### Returns
2114///
2115/// Embedding as `Vec<Vec<T>>` with shape `[n_dim][n_samples]`
2116pub fn diffusion_maps<T>(
2117    data: MatRef<T>,
2118    precomputed_knn: PreComputedKnn<T>,
2119    dm_params: DiffusionMapsParams<T>,
2120    seed: usize,
2121    verbose: usize,
2122) -> Result<Vec<Vec<T>>, ManifoldsError>
2123where
2124    T: ManifoldsFloat,
2125    HnswIndex<T>: HnswState<T>,
2126    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
2127{
2128    let start_dm = Instant::now();
2129
2130    let verbosity = parse_verbosity_level(verbose);
2131
2132    let op = construct_diffusion_maps_operator(
2133        data,
2134        dm_params.k,
2135        precomputed_knn,
2136        &dm_params.ann_type,
2137        &dm_params.ann_params,
2138        &dm_params,
2139        seed,
2140        verbose,
2141    )?;
2142
2143    let embedding: Vec<Vec<T>> = match op {
2144        DiffusionMapsOperator::Full {
2145            p_sym,
2146            sqrt_degrees,
2147        } => {
2148            let t = match dm_params.t {
2149                PhateTime::Auto { t_max } => {
2150                    if verbosity.normal_verbosity() {
2151                        println!("Finding optimal t (t_max={})...", t_max);
2152                    }
2153                    let entropy = landmark_von_neumann_entropy(&p_sym, t_max)?;
2154                    find_knee_point(&entropy)
2155                }
2156                PhateTime::Fixed(t) => t,
2157            };
2158            if verbosity.normal_verbosity() {
2159                println!("Using t = {}.", t);
2160                println!(
2161                    "Computing top {} eigenpairs of P_sym...",
2162                    dm_params.n_dim + 1
2163                );
2164            }
2165            let start_eig = Instant::now();
2166            let n_ask = (dm_params.n_dim + 5).min(sqrt_degrees.len() - 1);
2167            let (evals, evecs) = compute_largest_eigenpairs_lanczos(&p_sym, n_ask, seed as u64)?;
2168
2169            if verbosity.normal_verbosity() {
2170                println!("Eigendecomposition done in {:.2?}.", start_eig.elapsed());
2171            }
2172
2173            let n = sqrt_degrees.len();
2174            let mut embedding = vec![vec![T::zero(); dm_params.n_dim]; n];
2175            for comp_idx in 1..=dm_params.n_dim {
2176                let lambda = T::from_f32(evals[comp_idx]).unwrap();
2177                let lambda_t = lambda.powi(t as i32);
2178
2179                let mut max_abs = 0.0f32;
2180                let mut sign = 1.0f32;
2181                for i in 0..n {
2182                    let v = evecs[i][comp_idx];
2183                    if v.abs() > max_abs {
2184                        max_abs = v.abs();
2185                        sign = if v >= 0.0 { 1.0 } else { -1.0 };
2186                    }
2187                }
2188                for i in 0..n {
2189                    let u = T::from_f32(evecs[i][comp_idx] * sign).unwrap();
2190                    embedding[i][comp_idx - 1] = lambda_t * u / sqrt_degrees[i];
2191                }
2192            }
2193            embedding
2194        }
2195        DiffusionMapsOperator::Landmark { landmarks } => {
2196            let t = match dm_params.t {
2197                PhateTime::Auto { t_max } => {
2198                    if verbosity.normal_verbosity() {
2199                        println!("Finding optimal t on landmarks (t_max={})...", t_max);
2200                    }
2201                    landmarks.find_optimal_t(t_max)?
2202                }
2203                PhateTime::Fixed(t) => t,
2204            };
2205            if verbosity.detailed_verbosity() {
2206                println!(
2207                    "Using t = {}. Eigendecomposing {}x{} landmark operator...",
2208                    t,
2209                    landmarks.get_n_landmarks(),
2210                    landmarks.get_n_landmarks()
2211                );
2212            }
2213            let start_eig = Instant::now();
2214            let (evals, evecs) = landmarks.eigendecompose(dm_params.n_dim, seed as u64)?;
2215
2216            if verbosity.detailed_verbosity() {
2217                println!("Eigendecomposition done in {:.2?}.", start_eig.elapsed());
2218            }
2219            if verbosity.normal_verbosity() {
2220                println!("Nystroem-extending to full data...");
2221            }
2222            let (landmark_embedding, lambdas) =
2223                landmarks.compute_landmark_embedding(&evals, &evecs, dm_params.n_dim, t);
2224            landmarks.nystrom_extend(&landmark_embedding, &lambdas)
2225        }
2226    };
2227
2228    if verbosity.normal_verbosity() {
2229        println!("Diffusion maps finished in {:.2?}.", start_dm.elapsed());
2230    }
2231
2232    let n = embedding.len();
2233    let mut transposed = vec![vec![T::zero(); n]; dm_params.n_dim];
2234    for i in 0..n {
2235        for d in 0..dm_params.n_dim {
2236            transposed[d][i] = embedding[i][d];
2237        }
2238    }
2239
2240    Ok(transposed)
2241}
2242
2243/////////////////////
2244// Parametric UMAP //
2245/////////////////////
2246
2247#[cfg(feature = "parametric")]
2248/// Stores the parameters for parametric UMAP via neural nets
2249#[derive(Debug, Clone)]
2250pub struct ParametricUmapParams<T> {
2251    /// How many dimensions to return
2252    pub n_dim: usize,
2253    /// Number of neighbours
2254    pub k: usize,
2255    /// Which of the possible approximate nearest neighbour searches to use.
2256    /// Defaults to `"hnsw"`.
2257    pub ann_type: String,
2258    /// Vector of usizes for the hidden layers in the MLP.
2259    pub hidden_layers: Vec<usize>,
2260    /// Nearest neighbour parameters.
2261    pub nn_params: NearestNeighbourParams<T>,
2262    /// The graph parameters for the generation of the graph structure.
2263    pub umap_graph_params: UmapGraphParams<T>,
2264    /// Train parameters for the neural network.
2265    pub train_param: TrainParametricParams<T>,
2266}
2267
2268#[cfg(feature = "parametric")]
2269impl<T> Default for ParametricUmapParams<T>
2270where
2271    T: ManifoldsFloat + Element,
2272{
2273    fn default() -> Self {
2274        Self {
2275            n_dim: 2,
2276            k: 15,
2277            ann_type: "hnsw".to_string(),
2278            hidden_layers: vec![128, 128, 128],
2279            nn_params: NearestNeighbourParams::default(),
2280            umap_graph_params: UmapGraphParams::default(),
2281            train_param: TrainParametricParams::default(),
2282        }
2283    }
2284}
2285
2286#[cfg(feature = "parametric")]
2287impl<T> ParametricUmapParams<T>
2288where
2289    T: ManifoldsFloat + Element,
2290{
2291    /// Generate new parametric UMAP parameters with full control over every
2292    /// field.
2293    ///
2294    /// For sensible defaults, use [`ParametricUmapParams::default`] or
2295    /// [`ParametricUmapParams::new_default_2d`] instead.
2296    ///
2297    /// ### Params
2298    ///
2299    /// * `n_dim` - Number of embedding dimensions.
2300    /// * `k` - Number of nearest neighbours.
2301    /// * `ann_type` - (Approximate) nearest neighbour method. One of
2302    ///   `"exhaustive"`, `"ivf"`, `"hnsw"`, `"nndescent"`, `"annoy"`,
2303    ///   `"kmknn"` or `"balltree"`.
2304    /// * `hidden_layers` - Hidden layer sizes for the MLP encoder.
2305    /// * `nn_params` - Nearest neighbour parameters.
2306    /// * `umap_graph_params` - UMAP graph generation parameters.
2307    /// * `train_param` - Training parameters for the neural network.
2308    ///
2309    /// ### Returns
2310    ///
2311    /// A fully specified set of parametric UMAP parameters.
2312    pub fn new(
2313        n_dim: usize,
2314        k: usize,
2315        ann_type: String,
2316        hidden_layers: Vec<usize>,
2317        nn_params: NearestNeighbourParams<T>,
2318        umap_graph_params: UmapGraphParams<T>,
2319        train_param: TrainParametricParams<T>,
2320    ) -> Self {
2321        Self {
2322            n_dim,
2323            k,
2324            ann_type,
2325            hidden_layers,
2326            nn_params,
2327            umap_graph_params,
2328            train_param,
2329        }
2330    }
2331
2332    /// Default parameters for standard 2D parametric UMAP.
2333    ///
2334    /// Returns the default parameters but lets you tune `min_dist`, `spread`
2335    /// and `corr_weight`, which feed into the training parameters.
2336    ///
2337    /// ### Params
2338    ///
2339    /// * `min_dist` - Minimum distance between embedded points. Defaults to
2340    ///   `0.1`.
2341    /// * `spread` - Effective scale of embedded points. Defaults to `1.0`.
2342    /// * `corr_weight` - Correlation loss weight. Defaults to `0.0`.
2343    ///
2344    /// ### Returns
2345    ///
2346    /// Hopefully sensible standard parameters for 2D visualisation.
2347    pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>, corr_weight: Option<T>) -> Self {
2348        let min_dist = min_dist.unwrap_or(T::from_f64(0.1).unwrap());
2349        let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
2350        let corr_weight = corr_weight.unwrap_or(T::from_f64(0.0).unwrap());
2351
2352        Self {
2353            train_param: TrainParametricParams::from_min_dist_spread(
2354                min_dist,
2355                spread,
2356                corr_weight,
2357                None,
2358                None,
2359                None,
2360                None,
2361            ),
2362            ..Self::default()
2363        }
2364    }
2365}
2366
2367#[cfg(feature = "parametric")]
2368/// Run parametric UMAP dimensionality reduction
2369///
2370/// Parametric UMAP learns a neural network encoder that maps high-dimensional
2371/// data to a low-dimensional embedding space. Unlike standard UMAP, this
2372/// approach provides an explicit parametric mapping that can be applied to
2373/// new data points without retraining.
2374///
2375/// The algorithm follows these steps:
2376///
2377/// 1. Find k-nearest neighbours using approximate nearest neighbour search
2378/// 2. Construct fuzzy simplicial set via smooth kNN distances
2379/// 3. Symmetrise the graph using fuzzy set union
2380/// 4. Train an MLP encoder to preserve the graph structure using UMAP loss
2381/// 5. Return embeddings by passing all data through the trained encoder
2382///
2383/// ### Params
2384///
2385/// * `data` - Input data matrix (samples × features)
2386/// * `precomputed_knn` - Precomputed k-nearest neighbours and distances. Needs
2387///   to be a tuple of `(Vec<Vec<usize>>, Vec<Vec<T>>)` with indices and
2388///   distances excluding self.
2389/// * `umap_params` - Configuration parameters for parametric UMAP
2390/// * `device` - Burn backend device for neural network training
2391/// * `seed` - Random seed for reproducibility
2392/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
2393///   verbosity.
2394///
2395/// ### Returns
2396///
2397/// Embedding coordinates as `Vec<Vec<T>>` where outer vector has length
2398/// `n_dim` and inner vectors have length `n_samples`. Each outer element
2399/// represents one embedding dimension.
2400pub fn parametric_umap<T, B>(
2401    data: MatRef<T>,
2402    precomputed_knn: PreComputedKnn<T>,
2403    umap_params: &ParametricUmapParams<T>,
2404    device: &B::Device,
2405    seed: usize,
2406    verbose: usize,
2407) -> Result<Vec<Vec<T>>, ManifoldsError>
2408where
2409    T: ManifoldsFloat + Element,
2410    B: AutodiffBackend,
2411    HnswIndex<T>: HnswState<T>,
2412    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
2413{
2414    // parse various parameters
2415    let nn_params = umap_params.nn_params.clone();
2416
2417    let verbosity = parse_verbosity_level(verbose);
2418
2419    if verbosity.normal_verbosity() {
2420        println!(
2421            "Running parametric umap with alpha: {:.2?} and beta: {:.2?}",
2422            ToPrimitive::to_f32(&umap_params.train_param.a).unwrap(),
2423            ToPrimitive::to_f32(&umap_params.train_param.b).unwrap()
2424        );
2425    }
2426
2427    let (graph, _, _) = construct_umap_graph(
2428        data,
2429        precomputed_knn,
2430        umap_params.k,
2431        umap_params.ann_type.clone(),
2432        &umap_params.umap_graph_params,
2433        &nn_params,
2434        umap_params.train_param.n_epochs,
2435        seed,
2436        verbose,
2437    )?;
2438
2439    let model_params = UmapMlpConfig::from_params(
2440        data.ncols(),
2441        umap_params.hidden_layers.clone(),
2442        umap_params.n_dim,
2443    );
2444
2445    let (embd, _) = train_parametric_umap::<B, T>(
2446        data,
2447        graph,
2448        &model_params,
2449        &umap_params.train_param,
2450        device,
2451        seed,
2452        verbosity.normal_verbosity(),
2453    );
2454
2455    Ok(embd)
2456}
2457
2458#[cfg(feature = "parametric")]
2459/// Train the parametric UMAP model and return it
2460///
2461/// ### Params
2462///
2463/// * `data` - Input data matrix (samples × features)
2464/// * `umap_params` - Configuration parameters for parametric UMAP
2465/// * `device` - Burn backend device for neural network training
2466/// * `seed` - Random seed for reproducibility
2467/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
2468///   verbosity.
2469///
2470/// Parametric UMAP learns a neural network encoder that maps high-dimensional
2471/// data to a low-dimensional embedding space. Unlike standard UMAP, this
2472/// approach provides an explicit parametric mapping that can be applied to
2473/// new data points without retraining.
2474///
2475/// The algorithm follows these steps:
2476///
2477/// 1. Find k-nearest neighbours using approximate nearest neighbour search
2478/// 2. Construct fuzzy simplicial set via smooth kNN distances
2479/// 3. Symmetrise the graph using fuzzy set union
2480/// 4. Train an MLP encoder to preserve the graph structure using UMAP loss
2481/// 5. Returns the trained MLP encoder that can be used now also on new data
2482///
2483/// ### Returns
2484///
2485/// Returns a tuple of embedding and the `TrainedUmapModel` for further usage.
2486pub fn train_parametric_umap_model<T, B>(
2487    data: MatRef<T>,
2488    umap_params: &ParametricUmapParams<T>,
2489    device: &B::Device,
2490    seed: usize,
2491    verbose: usize,
2492) -> ParametricUmapResults<B, T>
2493where
2494    T: ManifoldsFloat + Element,
2495    B: AutodiffBackend,
2496    HnswIndex<T>: HnswState<T>,
2497    NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
2498{
2499    // parse various parameters
2500    let nn_params = umap_params.nn_params.clone();
2501    let verbosity = parse_verbosity_level(verbose);
2502
2503    if verbosity.normal_verbosity() {
2504        println!(
2505            "Training parametric umap model with alpha: {:.2?} and beta: {:.2?}",
2506            ToPrimitive::to_f32(&umap_params.train_param.a).unwrap(),
2507            ToPrimitive::to_f32(&umap_params.train_param.b).unwrap()
2508        );
2509    }
2510
2511    let (graph, _, _) = construct_umap_graph(
2512        data,
2513        None,
2514        umap_params.k,
2515        umap_params.ann_type.clone(),
2516        &umap_params.umap_graph_params,
2517        &nn_params,
2518        umap_params.train_param.n_epochs,
2519        seed,
2520        verbose,
2521    )?;
2522
2523    let model_params = UmapMlpConfig::from_params(
2524        data.ncols(),
2525        umap_params.hidden_layers.clone(),
2526        umap_params.n_dim,
2527    );
2528
2529    let (embd, trained_model) = train_parametric_umap::<B, T>(
2530        data,
2531        graph,
2532        &model_params,
2533        &umap_params.train_param,
2534        device,
2535        seed,
2536        verbosity.normal_verbosity(),
2537    );
2538
2539    Ok((embd, trained_model))
2540}
2541
2542/////////
2543// GPU //
2544/////////
2545
2546//////////////
2547// UMAP GPU //
2548//////////////
2549
2550/// UMAP parameters for GPU-accelerated nearest neighbour search
2551#[cfg(feature = "gpu")]
2552#[derive(Debug, Clone)]
2553pub struct UmapParamsGpu<T> {
2554    /// How many dimensions to return
2555    pub n_dim: usize,
2556    /// Number of neighbours
2557    pub k: usize,
2558    /// Which optimiser to use. Defaults to `"adam_parallel"`.
2559    pub optimiser: String,
2560    /// Which GPU nearest neighbour search to use. One of `"exhaustive_gpu"`,
2561    /// `"ivf_gpu"` or `"nndescent_gpu"`. Defaults to `"ivf_gpu"`.
2562    pub ann_type: String,
2563    /// Which embedding initialisation to use. Defaults to spectral clustering.
2564    pub initialisation: String,
2565    /// Optional initialisation range
2566    pub init_range: Option<T>,
2567    /// GPU nearest neighbour parameters
2568    pub nn_params: NearestNeighbourParamsGpu<T>,
2569    /// Parameters for UMAP graph generation
2570    pub umap_graph_params: UmapGraphParams<T>,
2571    /// Parameters for the UMAP optimiser
2572    pub optim_params: UmapOptimParams<T>,
2573    /// Use randomised SVD for PCA-based initialisation
2574    pub randomised: bool,
2575}
2576
2577#[cfg(feature = "gpu")]
2578impl<T> Default for UmapParamsGpu<T>
2579where
2580    T: ManifoldsFloat,
2581{
2582    fn default() -> Self {
2583        Self {
2584            n_dim: 2,
2585            k: 15,
2586            optimiser: "adam_parallel".to_string(),
2587            ann_type: "ivf_gpu".to_string(),
2588            initialisation: "spectral".to_string(),
2589            init_range: None,
2590            nn_params: NearestNeighbourParamsGpu::default(),
2591            umap_graph_params: UmapGraphParams::default(),
2592            optim_params: UmapOptimParams::default(),
2593            randomised: false,
2594        }
2595    }
2596}
2597
2598#[cfg(feature = "gpu")]
2599impl<T> UmapParamsGpu<T>
2600where
2601    T: ManifoldsFloat,
2602{
2603    /// Generate new GPU UMAP parameters with full control over every field.
2604    ///
2605    /// This constructor exposes every field directly. For sensible defaults,
2606    /// use [`UmapParamsGpu::default`] or [`UmapParamsGpu::new_default_2d`]
2607    /// instead.
2608    ///
2609    /// ### Params
2610    ///
2611    /// * `n_dim` - How many dimensions to return.
2612    /// * `k` - How many neighbours to consider.
2613    /// * `optimiser` - Which optimiser to use, e.g. `"adam_parallel"`.
2614    /// * `ann_type` - Which GPU ANN search to use. One of `"exhaustive_gpu"`,
2615    ///   `"ivf_gpu"` or `"nndescent_gpu"`.
2616    /// * `initialisation` - Which embedding initialisation to use, e.g.
2617    ///   `"spectral"`.
2618    /// * `init_range` - Optional initialisation range.
2619    /// * `nn_params` - GPU nearest neighbour parameters.
2620    /// * `umap_graph_params` - Parameters for the UMAP graph generation.
2621    /// * `optim_params` - Parameters for the UMAP optimiser.
2622    /// * `randomised` - Whether randomised SVD is used for PCA-based
2623    ///   initialisation.
2624    ///
2625    /// ### Returns
2626    ///
2627    /// A fully specified set of GPU UMAP parameters.
2628    #[allow(clippy::too_many_arguments)]
2629    pub fn new(
2630        n_dim: usize,
2631        k: usize,
2632        optimiser: String,
2633        ann_type: String,
2634        initialisation: String,
2635        init_range: Option<T>,
2636        nn_params: NearestNeighbourParamsGpu<T>,
2637        umap_graph_params: UmapGraphParams<T>,
2638        optim_params: UmapOptimParams<T>,
2639        randomised: bool,
2640    ) -> Self {
2641        Self {
2642            n_dim,
2643            k,
2644            optimiser,
2645            ann_type,
2646            initialisation,
2647            init_range,
2648            nn_params,
2649            umap_graph_params,
2650            optim_params,
2651            randomised,
2652        }
2653    }
2654
2655    /// Default parameters for standard 2D visualisation.
2656    ///
2657    /// Returns the default parameters but lets you tune `min_dist` and
2658    /// `spread`, which feed into the optimiser parameters and control how
2659    /// tightly points are packed in the embedding.
2660    ///
2661    /// ### Params
2662    ///
2663    /// * `min_dist` - Minimum distance between data points. Defaults to `0.5`.
2664    /// * `spread` - Spread parameter. Defaults to `1.0`.
2665    ///
2666    /// ### Returns
2667    ///
2668    /// Hopefully sensible standard parameters for 2D visualisation with GPU
2669    /// kNN search.
2670    pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>) -> Self {
2671        let min_dist = min_dist.unwrap_or(T::from_f64(0.5).unwrap());
2672        let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
2673
2674        Self {
2675            optim_params: UmapOptimParams::from_min_dist_spread(
2676                min_dist, spread, None, None, None, None, None, None, None,
2677            ),
2678            ..Self::default()
2679        }
2680    }
2681}
2682
2683/// Construct the UMAP graph using GPU-accelerated nearest neighbour search
2684///
2685/// Identical to `construct_umap_graph` except the kNN search runs on a GPU
2686/// device via `run_ann_search_gpu`. All downstream graph construction
2687/// (smooth kNN distances, symmetrisation, edge filtering) remains on CPU.
2688///
2689/// ### Params
2690///
2691/// * `data` - Input data matrix (samples x features)
2692/// * `precomputed_knn` - Optional precomputed kNN (indices, distances)
2693///   excluding self.
2694/// * `k` - Number of nearest neighbours.
2695/// * `ann_type` - GPU ANN method: `"exhaustive_gpu"`, `"ivf_gpu"` or
2696///   `"nndescent_gpu"`.
2697/// * `umap_params` - UMAP graph parameters (bandwidth, local_connectivity,
2698///   mix_weight).
2699/// * `nn_params` - GPU nearest neighbour search parameters.
2700/// * `n_epochs` - Number of optimisation epochs (used for edge filtering).
2701/// * `device` - The GPU device to use.
2702/// * `seed` - Random seed.
2703/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
2704///   verbosity.
2705///
2706/// ### Returns
2707///
2708/// Tuple of (graph, knn_indices, knn_dist).
2709#[cfg(feature = "gpu")]
2710#[allow(clippy::too_many_arguments)]
2711pub fn construct_umap_graph_gpu<T, R>(
2712    data: MatRef<T>,
2713    precomputed_knn: PreComputedKnn<T>,
2714    k: usize,
2715    ann_type: String,
2716    umap_params: &UmapGraphParams<T>,
2717    nn_params: &NearestNeighbourParamsGpu<T>,
2718    n_epochs: usize,
2719    device: R::Device,
2720    seed: usize,
2721    verbose: usize,
2722) -> UmapGraphResults<T>
2723where
2724    T: ManifoldsFloat + AnnSearchGpuFloat,
2725    R: Runtime,
2726    NNDescentGpu<T, R>: NNDescentQuery<T>,
2727{
2728    let verbosity = parse_verbosity_level(verbose);
2729
2730    let (knn_indices, knn_dist) = match precomputed_knn {
2731        Some((indices, distances)) => {
2732            if verbosity.normal_verbosity() {
2733                println!("Using precomputed kNN graph...");
2734            }
2735            (indices, distances)
2736        }
2737        None => {
2738            if verbosity.normal_verbosity() {
2739                println!("Running GPU nearest neighbour search using {}...", ann_type);
2740            }
2741            let start_knn = Instant::now();
2742            let result =
2743                run_ann_search_gpu::<T, R>(data, k, ann_type, nn_params, device, seed, verbose)?;
2744            if verbosity.normal_verbosity() {
2745                println!("GPU kNN search done in: {:.2?}.", start_knn.elapsed());
2746            }
2747            result
2748        }
2749    };
2750
2751    if verbosity.normal_verbosity() {
2752        println!("Constructing fuzzy simplicial set...");
2753    }
2754
2755    let start_graph = Instant::now();
2756
2757    let (sigma, rho) = smooth_knn_dist(
2758        &knn_dist,
2759        knn_dist[0].len(),
2760        umap_params.local_connectivity,
2761        umap_params.bandwidth,
2762        64,
2763    );
2764
2765    let graph = knn_to_coo(&knn_indices, &knn_dist, &sigma, &rho);
2766    let graph = symmetrise_graph(graph, umap_params.mix_weight);
2767    let graph = filter_weak_edges(graph, n_epochs, verbose);
2768
2769    if verbosity.normal_verbosity() {
2770        println!(
2771            "Finalised graph generation in {:.2?}.",
2772            start_graph.elapsed()
2773        );
2774    }
2775
2776    Ok((graph, knn_indices, knn_dist))
2777}
2778
2779/// Run UMAP with GPU-accelerated nearest neighbour search
2780///
2781/// Identical to `umap` except the kNN graph is constructed on the GPU.
2782/// Embedding initialisation and optimisation remain on the CPU (the parallel
2783/// Adam optimiser is already extremely fast for the 2D embedding updates).
2784///
2785/// ### Params
2786///
2787/// * `data` - Input data matrix (samples x features)
2788/// * `precomputed_knn` - Optional precomputed kNN (indices, distances)
2789///   excluding self.
2790/// * `umap_params` - GPU UMAP parameters.
2791/// * `device` - The GPU device to use.
2792/// * `seed` - Random seed.
2793/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
2794///   verbosity.
2795///
2796/// ### Returns
2797///
2798/// Embedding coordinates as `Vec<Vec<T>>` where outer vector has length
2799/// `n_dim` and inner vectors have length `n_samples`.
2800#[cfg(feature = "gpu")]
2801pub fn umap_gpu<T, R>(
2802    data: MatRef<T>,
2803    precomputed_knn: PreComputedKnn<T>,
2804    umap_params: &UmapParamsGpu<T>,
2805    device: R::Device,
2806    seed: usize,
2807    verbose: usize,
2808) -> Result<Vec<Vec<T>>, ManifoldsError>
2809where
2810    T: ManifoldsFloat + AnnSearchGpuFloat,
2811    R: Runtime,
2812    StandardNormal: Distribution<T>,
2813    NNDescentGpu<T, R>: NNDescentQuery<T>,
2814{
2815    let verbosity = parse_verbosity_level(verbose);
2816
2817    let init_type = parse_initilisation(
2818        &umap_params.initialisation,
2819        umap_params.randomised,
2820        umap_params.init_range,
2821    )
2822    .unwrap_or_else(|| {
2823        println!(
2824            "Unknown initialisation provided: {:?}. Defaulting to PCA.",
2825            umap_params.initialisation,
2826        );
2827        EmbdInit::PcaInit {
2828            range: None,
2829            randomised: true,
2830        }
2831    });
2832    let optimiser = parse_umap_optimiser(&umap_params.optimiser).unwrap_or_default();
2833
2834    if verbosity.normal_verbosity() {
2835        println!(
2836            "Running umap (GPU kNN) with alpha: {:.2?} and beta: {:.2?}",
2837            umap_params.optim_params.a, umap_params.optim_params.b
2838        );
2839    }
2840
2841    let (graph, _, _) = construct_umap_graph_gpu::<T, R>(
2842        data,
2843        precomputed_knn,
2844        umap_params.k,
2845        umap_params.ann_type.clone(),
2846        &umap_params.umap_graph_params,
2847        &umap_params.nn_params,
2848        umap_params.optim_params.n_epochs,
2849        device,
2850        seed,
2851        verbose,
2852    )?;
2853
2854    if verbosity.normal_verbosity() {
2855        println!(
2856            "Initialising embedding via {} layout...",
2857            umap_params.initialisation
2858        );
2859    }
2860
2861    let start_layout = Instant::now();
2862
2863    let mut embd = initialise_embedding(&init_type, umap_params.n_dim, seed as u64, &graph, data)?;
2864
2865    let graph_adj = coo_to_adjacency_list(&graph);
2866
2867    if verbosity.normal_verbosity() {
2868        println!(
2869            "Optimising embedding via {} ({} epochs) on {} edges...",
2870            match optimiser {
2871                UmapOptimiser::Adam => "Adam",
2872                UmapOptimiser::Sgd => "SGD",
2873                UmapOptimiser::AdamParallel => "Adam (multi-threaded)",
2874            },
2875            umap_params.optim_params.n_epochs,
2876            graph.col_indices.len().separate_with_underscores()
2877        );
2878    }
2879
2880    match optimiser {
2881        UmapOptimiser::Adam => optimise_embedding_adam(
2882            &mut embd,
2883            &graph_adj,
2884            &umap_params.optim_params,
2885            seed as u64,
2886            verbose,
2887        )?,
2888        UmapOptimiser::Sgd => {
2889            optimise_embedding_sgd(
2890                &mut embd,
2891                &graph_adj,
2892                &umap_params.optim_params,
2893                seed as u64,
2894                verbose,
2895            )?;
2896        }
2897        UmapOptimiser::AdamParallel => {
2898            optimise_embedding_adam_parallel(
2899                &mut embd,
2900                &graph_adj,
2901                &umap_params.optim_params,
2902                seed as u64,
2903                verbose,
2904            )?;
2905        }
2906    }
2907
2908    if verbosity.normal_verbosity() {
2909        println!(
2910            "Initialised and optimised embedding in: {:.2?}.",
2911            start_layout.elapsed()
2912        );
2913        println!("UMAP (GPU) complete!");
2914    }
2915
2916    let n_samples = embd.len();
2917    let mut transposed = vec![vec![T::zero(); n_samples]; umap_params.n_dim];
2918
2919    for sample_idx in 0..n_samples {
2920        for dim_idx in 0..umap_params.n_dim {
2921            transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
2922        }
2923    }
2924
2925    Ok(transposed)
2926}
2927
2928//////////////
2929// tSNE GPU //
2930//////////////
2931
2932/// t-SNE parameters for GPU-accelerated nearest neighbour search
2933#[cfg(feature = "gpu")]
2934#[derive(Debug, Clone)]
2935pub struct TsneParamsGpu<T> {
2936    /// Number of output dimensions (typically 2)
2937    pub n_dim: usize,
2938    /// Perplexity parameter controlling neighbourhood size (typical: 5-50)
2939    pub perplexity: T,
2940    /// Which GPU ANN search to use. One of `"exhaustive_gpu"`, `"ivf_gpu"` or
2941    /// `"nndescent_gpu"`. Defaults to `"ivf_gpu"`.
2942    pub ann_type: String,
2943    /// Embedding initialisation method: `"pca"`, `"random"`, or `"spectral"`
2944    pub initialisation: String,
2945    /// Optional initialisation range
2946    pub init_range: Option<T>,
2947    /// GPU nearest neighbour parameters
2948    pub nn_params: NearestNeighbourParamsGpu<T>,
2949    /// tSNE optimisation parameters
2950    pub optim_params: TsneOptimParams<T>,
2951    /// Use randomised SVD for PCA initialisation
2952    pub randomised_init: bool,
2953}
2954
2955#[cfg(feature = "gpu")]
2956impl<T> Default for TsneParamsGpu<T>
2957where
2958    T: ManifoldsFloat,
2959{
2960    fn default() -> Self {
2961        Self {
2962            n_dim: 2,
2963            perplexity: T::from_f64(30.0).unwrap(),
2964            ann_type: "ivf_gpu".to_string(),
2965            initialisation: "pca".to_string(),
2966            init_range: None,
2967            nn_params: NearestNeighbourParamsGpu::default(),
2968            optim_params: TsneOptimParams {
2969                n_epochs: 1000,
2970                lr: None,
2971                early_exag_iter: 250,
2972                early_exag_factor: T::from_f64(12.0).unwrap(),
2973                late_exag_factor: None,
2974                theta: T::from_f64(0.5).unwrap(),
2975                n_interp_points: 3,
2976            },
2977            randomised_init: true,
2978        }
2979    }
2980}
2981
2982#[cfg(feature = "gpu")]
2983impl<T> TsneParamsGpu<T>
2984where
2985    T: ManifoldsFloat,
2986{
2987    /// Create new GPU t-SNE parameters with full control over every field.
2988    ///
2989    /// This constructor exposes every field directly, including the
2990    /// optimiser parameters. For sensible defaults, use
2991    /// [`TsneParamsGpu::default`] or [`TsneParamsGpu::new_default_2d`] instead.
2992    ///
2993    /// ### Params
2994    ///
2995    /// * `n_dim` - Number of output dimensions.
2996    /// * `perplexity` - Perplexity parameter controlling neighbourhood size
2997    ///   (typical: 5-50).
2998    /// * `ann_type` - GPU ANN algorithm. One of `"exhaustive_gpu"`,
2999    ///   `"ivf_gpu"` or `"nndescent_gpu"`.
3000    /// * `initialisation` - Embedding initialisation method: `"pca"`,
3001    ///   `"random"`, or `"spectral"`.
3002    /// * `init_range` - Optional initialisation range.
3003    /// * `nn_params` - GPU nearest neighbour parameters.
3004    /// * `optim_params` - t-SNE optimisation parameters. These cover the
3005    ///   learning rate, number of epochs, early/late exaggeration, the
3006    ///   Barnes-Hut `theta`, and the number of FFT interpolation points.
3007    /// * `randomised_init` - Whether randomised SVD is used for PCA
3008    ///   initialisation.
3009    ///
3010    /// ### Returns
3011    ///
3012    /// A fully specified set of GPU t-SNE parameters.
3013    #[allow(clippy::too_many_arguments)]
3014    pub fn new(
3015        n_dim: usize,
3016        perplexity: T,
3017        ann_type: String,
3018        initialisation: String,
3019        init_range: Option<T>,
3020        nn_params: NearestNeighbourParamsGpu<T>,
3021        optim_params: TsneOptimParams<T>,
3022        randomised_init: bool,
3023    ) -> Self {
3024        Self {
3025            n_dim,
3026            perplexity,
3027            ann_type,
3028            initialisation,
3029            init_range,
3030            nn_params,
3031            optim_params,
3032            randomised_init,
3033        }
3034    }
3035
3036    /// Default parameters for standard 2D visualisation.
3037    ///
3038    /// Returns the default parameters but lets you tune `perplexity`, the
3039    /// characteristic t-SNE knob controlling neighbourhood size.
3040    ///
3041    /// ### Params
3042    ///
3043    /// * `perplexity` - Perplexity parameter (typical: 5-50). Defaults to
3044    ///   `30.0`.
3045    ///
3046    /// ### Returns
3047    ///
3048    /// Hopefully sensible standard parameters for 2D visualisation with GPU
3049    /// kNN search.
3050    pub fn new_default_2d(perplexity: Option<T>) -> Self {
3051        let perplexity = perplexity.unwrap_or_else(|| T::from_f64(30.0).unwrap());
3052
3053        Self {
3054            perplexity,
3055            ..Self::default()
3056        }
3057    }
3058}
3059
3060/// Construct affinity graph for t-SNE using GPU-accelerated kNN search
3061///
3062/// Identical to `construct_tsne_graph` except the kNN runs on the GPU.
3063/// Gaussian affinity computation and symmetrisation remain on CPU.
3064///
3065/// ### Params
3066///
3067/// * `data` - Input data matrix (samples × features)
3068/// * `precomputed_knn` - Precomputed kNN (indices, distances) excluding self
3069/// * `perplexity` - Target perplexity
3070/// * `ann_type` - GPU ANN method: `"exhaustive_gpu"`, `"ivf_gpu"` or
3071///   `"nndescent_gpu"`
3072/// * `nn_params` - GPU nearest neighbour search parameters
3073/// * `device` - GPU device to use
3074/// * `seed` - Random seed
3075/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
3076///   verbosity.
3077///
3078/// ### Returns
3079///
3080/// Tuple of (symmetric affinity graph, knn_indices, knn_dist)
3081#[cfg(feature = "gpu")]
3082#[allow(clippy::too_many_arguments)]
3083pub fn construct_tsne_graph_gpu<T, R>(
3084    data: MatRef<T>,
3085    precomputed_knn: PreComputedKnn<T>,
3086    perplexity: T,
3087    ann_type: String,
3088    nn_params: &NearestNeighbourParamsGpu<T>,
3089    device: R::Device,
3090    seed: usize,
3091    verbose: usize,
3092) -> TsneGraph<T>
3093where
3094    T: ManifoldsFloat + AnnSearchGpuFloat,
3095    R: Runtime,
3096    NNDescentGpu<T, R>: NNDescentQuery<T>,
3097{
3098    let verbosity = parse_verbosity_level(verbose);
3099
3100    let (knn_indices, knn_dist) = match precomputed_knn {
3101        Some((indices, distances)) => {
3102            if verbosity.normal_verbosity() {
3103                println!("Using precomputed kNN graph...");
3104            }
3105            (indices, distances)
3106        }
3107        None => {
3108            let k_float = perplexity * T::from_f64(3.0).unwrap();
3109            let k = k_float.to_usize().unwrap().max(5).min(data.nrows() - 1);
3110
3111            if verbosity.normal_verbosity() {
3112                println!("Running GPU kNN search (k={}) using {}...", k, ann_type);
3113            }
3114
3115            let start_knn = Instant::now();
3116            let result =
3117                run_ann_search_gpu::<T, R>(data, k, ann_type, nn_params, device, seed, verbose)?;
3118
3119            if verbosity.normal_verbosity() {
3120                println!("GPU kNN search done in: {:.2?}.", start_knn.elapsed());
3121            }
3122
3123            result
3124        }
3125    };
3126
3127    if verbosity.normal_verbosity() {
3128        println!("Computing Gaussian affinities and symmetrising...");
3129    }
3130
3131    let start_graph = Instant::now();
3132
3133    let directed_graph = gaussian_knn_affinities(
3134        &knn_indices,
3135        &knn_dist,
3136        perplexity,
3137        T::from_f64(1e-5).unwrap(),
3138        200,
3139        nn_params.dist_metric == "euclidean",
3140    )?;
3141
3142    let graph = symmetrise_affinities_tsne(directed_graph);
3143
3144    if verbosity.normal_verbosity() {
3145        println!(
3146            "Finalised graph generation in {:.2?}.",
3147            start_graph.elapsed()
3148        );
3149    }
3150
3151    Ok((graph, knn_indices, knn_dist))
3152}
3153
3154/// Run t-SNE with GPU-accelerated nearest neighbour search (FFT build)
3155///
3156/// Identical to `tsne` except the kNN graph is constructed on the GPU.
3157/// Optimisation (Barnes-Hut or FFT) stays on CPU.
3158///
3159/// ### Params
3160///
3161/// * `data` - Input data matrix (samples × features)
3162/// * `precomputed_knn` - Optional precomputed kNN, indices and distances
3163///   excluding self
3164/// * `params` - GPU t-SNE parameters
3165/// * `approx_type` - Repulsive-force approximation: `"barnes_hut" | "bh"` or
3166///   `"fft"`
3167/// * `device` - GPU device to use
3168/// * `seed` - Random seed
3169/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
3170///   verbosity.
3171///
3172/// ### Returns
3173///
3174/// Embedding as `Vec<Vec<T>>` with shape `[n_dim][n_samples]`.
3175#[cfg(all(feature = "gpu", feature = "fft_tsne"))]
3176pub fn tsne_gpu<T, R>(
3177    data: MatRef<T>,
3178    precomputed_knn: PreComputedKnn<T>,
3179    params: &TsneParamsGpu<T>,
3180    approx_type: &str,
3181    device: R::Device,
3182    seed: usize,
3183    verbose: usize,
3184) -> Result<Vec<Vec<T>>, ManifoldsError>
3185where
3186    T: ManifoldsFloat + AnnSearchGpuFloat + FftwFloat,
3187    R: Runtime,
3188    StandardNormal: Distribution<T>,
3189    NNDescentGpu<T, R>: NNDescentQuery<T>,
3190{
3191    if params.n_dim != 2 {
3192        return Err(ManifoldsError::IncorrectDim {
3193            n_dim: params.n_dim,
3194        });
3195    }
3196
3197    let verbosity = parse_verbosity_level(verbose);
3198
3199    let (graph, _, _) = construct_tsne_graph_gpu::<T, R>(
3200        data,
3201        precomputed_knn,
3202        params.perplexity,
3203        params.ann_type.clone(),
3204        &params.nn_params,
3205        device,
3206        seed,
3207        verbose,
3208    )?;
3209
3210    if verbosity.normal_verbosity() {
3211        println!("Initialising embedding via {}...", &params.initialisation);
3212    }
3213
3214    let init_type = parse_initilisation(
3215        &params.initialisation,
3216        params.randomised_init,
3217        params.init_range,
3218    )
3219    .unwrap_or_else(|| {
3220        println!(
3221            "Unknown initialisation provided: {:?}. Defaulting to PCA.",
3222            params.initialisation,
3223        );
3224        EmbdInit::PcaInit {
3225            range: Some(T::from_f64(1e-2).unwrap()),
3226            randomised: true,
3227        }
3228    });
3229
3230    let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
3231
3232    let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
3233
3234    let start_optim = Instant::now();
3235    match tsne_approx {
3236        TsneOpt::BarnesHut => {
3237            if verbosity.normal_verbosity() {
3238                println!(
3239                    "Optimising via Barnes-Hut t-SNE ({} epochs)...",
3240                    params.optim_params.n_epochs
3241                );
3242            }
3243            optimise_bh_tsne(&mut embd, &params.optim_params, &graph, verbose);
3244        }
3245        TsneOpt::Fft => {
3246            if verbosity.normal_verbosity() {
3247                println!(
3248                    "Optimising via FFT Interpolation-based t-SNE ({} epochs)...",
3249                    params.optim_params.n_epochs
3250                );
3251            }
3252            optimise_fft_tsne(&mut embd, &params.optim_params, &graph, verbose)?;
3253        }
3254    }
3255
3256    if verbosity.normal_verbosity() {
3257        println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
3258    }
3259
3260    let n_samples = embd.len();
3261    let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
3262
3263    for sample_idx in 0..n_samples {
3264        for dim_idx in 0..params.n_dim {
3265            transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
3266        }
3267    }
3268
3269    Ok(transposed)
3270}
3271
3272/// Run t-SNE with GPU-accelerated nearest neighbour search (non-FFT build)
3273///
3274/// Identical to `tsne` except the kNN graph is constructed on the GPU.
3275/// Barnes-Hut optimisation stays on CPU. Calling with `approx_type = "fft"`
3276/// panics; recompile with the `fft_tsne` feature for FFT support.
3277///
3278/// ### Params
3279///
3280/// * `data` - Input data matrix (samples × features)
3281/// * `precomputed_knn` - Optional precomputed kNN, indices and distances
3282///   excluding self
3283/// * `params` - GPU t-SNE parameters
3284/// * `approx_type` - Repulsive-force approximation: `"barnes_hut" | "bh"`
3285/// * `device` - GPU device to use
3286/// * `seed` - Random seed
3287/// * `verbose` - If `0` -> silent or `1` for normal verbosity, `2` for detailed
3288///   verbosity.
3289///
3290/// ### Returns
3291///
3292/// Embedding as `Vec<Vec<T>>` with shape `[n_dim][n_samples]`.
3293#[cfg(all(feature = "gpu", not(feature = "fft_tsne")))]
3294pub fn tsne_gpu<T, R>(
3295    data: MatRef<T>,
3296    precomputed_knn: PreComputedKnn<T>,
3297    params: &TsneParamsGpu<T>,
3298    approx_type: &str,
3299    device: R::Device,
3300    seed: usize,
3301    verbose: usize,
3302) -> Result<Vec<Vec<T>>, ManifoldsError>
3303where
3304    T: ManifoldsFloat + AnnSearchGpuFloat,
3305    R: Runtime,
3306    StandardNormal: Distribution<T>,
3307    NNDescentGpu<T, R>: NNDescentQuery<T>,
3308{
3309    if params.n_dim != 2 {
3310        return Err(ManifoldsError::IncorrectDim {
3311            n_dim: params.n_dim,
3312        });
3313    }
3314
3315    let verbosity = parse_verbosity_level(verbose);
3316
3317    let (graph, _, _) = construct_tsne_graph_gpu::<T, R>(
3318        data,
3319        precomputed_knn,
3320        params.perplexity,
3321        params.ann_type.clone(),
3322        &params.nn_params,
3323        device,
3324        seed,
3325        verbose,
3326    )?;
3327
3328    if verbosity.normal_verbosity() {
3329        println!("Initialising embedding via {}...", &params.initialisation);
3330    }
3331
3332    let init_type = parse_initilisation(
3333        &params.initialisation,
3334        params.randomised_init,
3335        params.init_range,
3336    )
3337    .unwrap_or_else(|| {
3338        println!(
3339            "Unknown initialisation provided: {:?}. Defaulting to PCA.",
3340            params.initialisation,
3341        );
3342        EmbdInit::PcaInit {
3343            range: Some(T::from_f64(1e-2).unwrap()),
3344            randomised: true,
3345        }
3346    });
3347
3348    let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
3349
3350    let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
3351
3352    let start_optim = Instant::now();
3353    match tsne_approx {
3354        TsneOpt::BarnesHut => {
3355            if verbosity.normal_verbosity() {
3356                println!(
3357                    "Optimising via Barnes-Hut t-SNE ({} epochs)...",
3358                    params.optim_params.n_epochs
3359                );
3360            }
3361            optimise_bh_tsne(&mut embd, &params.optim_params, &graph, verbose);
3362        }
3363        TsneOpt::Fft => {
3364            panic!("FFT-accelerated t-SNE not available. Recompile with 'fft_tsne' feature or use 'barnes_hut' approximation.");
3365        }
3366    }
3367
3368    if verbosity.normal_verbosity() {
3369        println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
3370    }
3371
3372    let n_samples = embd.len();
3373    let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
3374
3375    for sample_idx in 0..n_samples {
3376        for dim_idx in 0..params.n_dim {
3377            transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
3378        }
3379    }
3380
3381    Ok(transposed)
3382}