1#![allow(clippy::needless_range_loop)] #![warn(missing_docs)]
13
14pub mod data;
15pub mod errors;
16pub mod prelude;
17pub mod training;
18pub mod utils;
19
20#[cfg(feature = "parametric")]
21pub mod parametric;
22
23use ann_search_rs::cpu::hnsw::{HnswIndex, HnswState};
24use ann_search_rs::cpu::nndescent::{ApplySortedUpdates, NNDescent, NNDescentQuery};
25use ann_search_rs::utils::dist::parse_ann_dist;
26use faer::MatRef;
27use rand_distr::{Distribution, StandardNormal};
28use std::{default::Default, time::Instant};
29use thousands::*;
30
31#[cfg(feature = "parametric")]
32use burn::tensor::{backend::AutodiffBackend, Element};
33#[cfg(feature = "parametric")]
34use num_traits::ToPrimitive;
35
36#[cfg(feature = "gpu")]
37use ann_search_rs::gpu::nndescent_gpu::NNDescentGpu;
38#[cfg(feature = "gpu")]
39use ann_search_rs::gpu::traits_gpu::AnnSearchGpuFloat;
40#[cfg(feature = "gpu")]
41use cubecl::prelude::*;
42
43use crate::data::graph::*;
44use crate::data::init::*;
45use crate::data::nearest_neighbours::*;
46use crate::data::pacmap_pairs::*;
47use crate::data::structures::*;
48use crate::prelude::*;
49use crate::training::mds_optimiser::*;
50use crate::training::pacmap_optimiser::{
51 optimise_pacmap, optimise_pacmap_parallel, PacMapOptimiser,
52};
53use crate::training::tsne_optimiser::*;
54use crate::training::umap_optimisers::*;
55use crate::utils::diffusions::*;
56use crate::utils::math::compute_largest_eigenpairs_lanczos;
57use crate::utils::potentials::compute_potential_distances;
58use crate::utils::sparse_ops::matrix_power;
59
60#[cfg(feature = "parametric")]
61use crate::parametric::model::*;
62#[cfg(feature = "parametric")]
63use crate::parametric::parametric_train::*;
64#[cfg(feature = "fft_tsne")]
65use crate::utils::fft::FftwFloat;
66
67pub type PreComputedKnn<T> = Option<(Vec<Vec<usize>>, Vec<Vec<T>>)>;
78
79pub type TsneGraph<T> = Result<(CoordinateList<T>, Vec<Vec<usize>>, Vec<Vec<T>>), ManifoldsError>;
91
92#[derive(Debug, Clone)]
98pub struct UmapParams<T> {
99 pub n_dim: usize,
101 pub k: usize,
103 pub optimiser: String,
105 pub ann_type: String,
108 pub initialisation: String,
110 pub init_range: Option<T>,
112 pub nn_params: NearestNeighbourParams<T>,
114 pub umap_graph_params: UmapGraphParams<T>,
116 pub optim_params: UmapOptimParams<T>,
118 pub randomised: bool,
120}
121
122impl<T> Default for UmapParams<T>
123where
124 T: ManifoldsFloat,
125{
126 fn default() -> Self {
127 Self {
128 n_dim: 2,
129 k: 15,
130 optimiser: "adam_parallel".to_string(),
131 ann_type: "kmknn".to_string(),
132 initialisation: "spectral".to_string(),
133 init_range: None,
134 nn_params: NearestNeighbourParams::default(),
135 optim_params: UmapOptimParams::default(),
136 umap_graph_params: UmapGraphParams::default(),
137 randomised: false,
138 }
139 }
140}
141
142impl<T> UmapParams<T>
143where
144 T: ManifoldsFloat,
145{
146 #[allow(clippy::too_many_arguments)]
171 pub fn new(
172 n_dim: usize,
173 k: usize,
174 optimiser: String,
175 ann_type: String,
176 initialisation: String,
177 init_range: Option<T>,
178 nn_params: NearestNeighbourParams<T>,
179 optim_params: UmapOptimParams<T>,
180 umap_graph_params: UmapGraphParams<T>,
181 randomised: bool,
182 ) -> Self {
183 Self {
184 n_dim,
185 k,
186 optimiser,
187 ann_type,
188 initialisation,
189 init_range,
190 nn_params,
191 optim_params,
192 umap_graph_params,
193 randomised,
194 }
195 }
196
197 pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>) -> Self {
212 let min_dist = min_dist.unwrap_or(T::from_f64(0.5).unwrap());
213 let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
214
215 Self {
216 optim_params: UmapOptimParams::from_min_dist_spread(
217 min_dist, spread, None, None, None, None, None, None, None,
218 ),
219 ..Self::default()
220 }
221 }
222}
223
224#[allow(clippy::too_many_arguments)]
247pub fn construct_umap_graph<T>(
248 data: MatRef<T>,
249 precomputed_knn: PreComputedKnn<T>,
250 k: usize,
251 ann_type: String,
252 umap_params: &UmapGraphParams<T>,
253 nn_params: &NearestNeighbourParams<T>,
254 n_epochs: usize,
255 seed: usize,
256 verbose: usize,
257) -> UmapGraphResults<T>
258where
259 T: ManifoldsFloat,
260 HnswIndex<T>: HnswState<T>,
261 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
262{
263 let verbosity = parse_verbosity_level(verbose);
264
265 let (knn_indices, knn_dist) = match precomputed_knn {
266 Some((indices, distances)) => {
267 if verbosity.normal_verbosity() {
268 println!("Using precomputed kNN graph...");
269 }
270 (indices, distances)
271 }
272 None => {
273 if verbosity.normal_verbosity() {
274 println!(
275 "Running (approximate) nearest neighbour search using {}...",
276 ann_type
277 );
278 }
279 let start_knn = Instant::now();
280 let result = run_ann_search(data, k, ann_type, nn_params, seed, verbose)?;
281 if verbosity.normal_verbosity() {
282 println!("kNN search done in: {:.2?}.", start_knn.elapsed());
283 }
284 result
285 }
286 };
287
288 if verbosity.normal_verbosity() {
289 println!("Constructing fuzzy simplicial set...");
290 }
291
292 let start_graph = Instant::now();
293
294 let (sigma, rho) = smooth_knn_dist(
295 &knn_dist,
296 knn_dist[0].len(),
297 umap_params.local_connectivity,
298 umap_params.bandwidth,
299 64,
300 );
301
302 let graph = knn_to_coo(&knn_indices, &knn_dist, &sigma, &rho);
303 let graph = symmetrise_graph(graph, umap_params.mix_weight);
304 let graph = filter_weak_edges(graph, n_epochs, verbose);
305
306 if verbosity.normal_verbosity() {
307 println!(
308 "Finalised graph generation in {:.2?}.",
309 start_graph.elapsed()
310 );
311 }
312
313 Ok((graph, knn_indices, knn_dist))
314}
315
316pub fn umap<T>(
345 data: MatRef<T>,
346 precomputed_knn: PreComputedKnn<T>,
347 umap_params: &UmapParams<T>,
348 seed: usize,
349 verbose: usize,
350) -> Result<Vec<Vec<T>>, ManifoldsError>
351where
352 T: ManifoldsFloat,
353 HnswIndex<T>: HnswState<T>,
354 StandardNormal: Distribution<T>,
355 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
356{
357 let verbosity = parse_verbosity_level(verbose);
358
359 let init_type = parse_initilisation(
361 &umap_params.initialisation,
362 umap_params.randomised,
363 umap_params.init_range,
364 )
365 .unwrap_or_else(|| {
366 println!(
367 "Unknown initialisation provided: {:?}. Defaulting to PCA.",
368 umap_params.initialisation,
369 );
370 EmbdInit::PcaInit {
371 range: None,
372 randomised: true,
373 }
374 });
375 let optimiser = parse_umap_optimiser(&umap_params.optimiser).unwrap_or_default();
376
377 if verbosity.normal_verbosity() {
378 println!(
379 "Running umap with alpha: {:.2?} and beta: {:.2?}",
380 umap_params.optim_params.a, umap_params.optim_params.b
381 );
382 }
383
384 let (graph, _, _) = construct_umap_graph(
385 data,
386 precomputed_knn,
387 umap_params.k,
388 umap_params.ann_type.clone(),
389 &umap_params.umap_graph_params,
390 &umap_params.nn_params,
391 umap_params.optim_params.n_epochs,
392 seed,
393 verbose,
394 )?;
395
396 if verbosity.normal_verbosity() {
397 println!(
398 "Initialising embedding via {} layout...",
399 umap_params.initialisation
400 );
401 }
402
403 let start_layout = Instant::now();
404
405 let mut embd = initialise_embedding(&init_type, umap_params.n_dim, seed as u64, &graph, data)?;
406
407 let graph_adj = coo_to_adjacency_list(&graph);
408
409 if verbosity.normal_verbosity() {
410 println!(
411 "Optimising embedding via {} ({} epochs) on {} edges...",
412 match optimiser {
413 UmapOptimiser::Adam => "Adam",
414 UmapOptimiser::Sgd => "SGD",
415 UmapOptimiser::AdamParallel => "Adam (multi-threaded)",
416 },
417 umap_params.optim_params.n_epochs,
418 graph.col_indices.len().separate_with_underscores()
419 );
420 }
421
422 match optimiser {
423 UmapOptimiser::Adam => optimise_embedding_adam(
424 &mut embd,
425 &graph_adj,
426 &umap_params.optim_params,
427 seed as u64,
428 verbose,
429 )?,
430 UmapOptimiser::Sgd => {
431 optimise_embedding_sgd(
432 &mut embd,
433 &graph_adj,
434 &umap_params.optim_params,
435 seed as u64,
436 verbose,
437 )?;
438 }
439 UmapOptimiser::AdamParallel => {
440 optimise_embedding_adam_parallel(
441 &mut embd,
442 &graph_adj,
443 &umap_params.optim_params,
444 seed as u64,
445 verbose,
446 )?;
447 }
448 }
449
450 let end_layout = start_layout.elapsed();
451
452 if verbosity.normal_verbosity() {
453 println!(
454 "Initialised and optimised embedding in: {:.2?}.",
455 end_layout
456 );
457 println!("UMAP complete!");
458 }
459
460 let n_samples = embd.len();
462 let mut transposed = vec![vec![T::zero(); n_samples]; umap_params.n_dim];
463
464 for sample_idx in 0..n_samples {
465 for dim_idx in 0..umap_params.n_dim {
466 transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
467 }
468 }
469
470 Ok(transposed)
471}
472
473#[derive(Debug, Clone)]
479pub struct TsneParams<T> {
480 pub n_dim: usize,
482 pub perplexity: T,
484 pub ann_type: String,
487 pub initialisation: String,
489 pub init_range: Option<T>,
491 pub nn_params: NearestNeighbourParams<T>,
493 pub optim_params: TsneOptimParams<T>,
495 pub randomised_init: bool,
497}
498
499impl<T> Default for TsneParams<T>
500where
501 T: ManifoldsFloat,
502{
503 fn default() -> Self {
504 Self {
505 n_dim: 2,
506 perplexity: T::from_f64(30.0).unwrap(),
507 ann_type: "kmknn".to_string(),
508 initialisation: "pca".to_string(),
509 init_range: None,
510 nn_params: NearestNeighbourParams::default(),
511 optim_params: TsneOptimParams {
512 n_epochs: 1000,
513 lr: None,
514 early_exag_iter: 250,
515 early_exag_factor: T::from_f64(12.0).unwrap(),
516 late_exag_factor: None,
517 theta: T::from_f64(0.5).unwrap(),
518 n_interp_points: 3,
519 },
520 randomised_init: true,
521 }
522 }
523}
524
525impl<T> TsneParams<T>
526where
527 T: ManifoldsFloat,
528{
529 #[allow(clippy::too_many_arguments)]
558 pub fn new(
559 n_dim: usize,
560 perplexity: T,
561 ann_type: String,
562 initialisation: String,
563 init_range: Option<T>,
564 nn_params: NearestNeighbourParams<T>,
565 optim_params: TsneOptimParams<T>,
566 randomised_init: bool,
567 ) -> Self {
568 Self {
569 n_dim,
570 perplexity,
571 ann_type,
572 initialisation,
573 init_range,
574 nn_params,
575 optim_params,
576 randomised_init,
577 }
578 }
579
580 pub fn new_default_2d(perplexity: Option<T>) -> Self {
594 let perplexity = perplexity.unwrap_or_else(|| T::from_f64(30.0).unwrap());
595
596 Self {
597 perplexity,
598 ..Self::default()
599 }
600 }
601}
602
603pub fn construct_tsne_graph<T>(
640 data: MatRef<T>,
641 precomputed_knn: PreComputedKnn<T>,
642 perplexity: T,
643 ann_type: String,
644 nn_params: &NearestNeighbourParams<T>,
645 seed: usize,
646 verbose: usize,
647) -> TsneGraph<T>
648where
649 T: ManifoldsFloat,
650 HnswIndex<T>: HnswState<T>,
651 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
652{
653 let verbosity = parse_verbosity_level(verbose);
654
655 let (knn_indices, knn_dist) = match precomputed_knn {
656 Some((indices, distances)) => {
657 if verbosity.normal_verbosity() {
658 println!("Using precomputed kNN graph...");
659 }
660 (indices, distances)
661 }
662 None => {
663 let k_float = perplexity * T::from_f64(3.0).unwrap();
664 let k = k_float.to_usize().unwrap().max(5).min(data.nrows() - 1);
665
666 if verbosity.normal_verbosity() {
667 println!("Running kNN search (k={}) using {}...", k, ann_type);
668 }
669
670 let start_knn = Instant::now();
671 let result = run_ann_search(data, k, ann_type, nn_params, seed, verbose)?;
672
673 if verbosity.normal_verbosity() {
674 println!("kNN search done in: {:.2?}.", start_knn.elapsed());
675 }
676
677 result
678 }
679 };
680
681 if verbosity.normal_verbosity() {
682 println!("Computing Gaussian affinities and symmetrising...");
683 }
684
685 let start_graph = Instant::now();
686
687 let directed_graph = gaussian_knn_affinities(
688 &knn_indices,
689 &knn_dist,
690 perplexity,
691 T::from_f64(1e-5).unwrap(),
692 200,
693 nn_params.dist_metric == "euclidean",
694 )?;
695
696 let graph = symmetrise_affinities_tsne(directed_graph);
697
698 if verbosity.normal_verbosity() {
699 println!(
700 "Finalised graph generation in {:.2?}.",
701 start_graph.elapsed()
702 );
703 }
704
705 Ok((graph, knn_indices, knn_dist))
706}
707
708#[cfg(feature = "fft_tsne")]
755pub fn tsne<T>(
756 data: MatRef<T>,
757 precomputed_knn: PreComputedKnn<T>,
758 params: &TsneParams<T>,
759 approx_type: &str,
760 seed: usize,
761 verbose: usize,
762) -> Result<Vec<Vec<T>>, ManifoldsError>
763where
764 T: ManifoldsFloat + FftwFloat,
765 HnswIndex<T>: HnswState<T>,
766 StandardNormal: Distribution<T>,
767 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
768{
769 if params.n_dim != 2 {
770 return Err(ManifoldsError::IncorrectDim {
771 n_dim: params.n_dim,
772 });
773 }
774
775 let verbosity = parse_verbosity_level(verbose);
776
777 let (graph, _, _) = construct_tsne_graph(
779 data,
780 precomputed_knn,
781 params.perplexity,
782 params.ann_type.clone(),
783 ¶ms.nn_params,
784 seed,
785 verbose,
786 )?;
787
788 if verbosity.normal_verbosity() {
789 println!("Initialising embedding via {}...", ¶ms.initialisation);
790 }
791
792 let init_type = parse_initilisation(
794 ¶ms.initialisation,
795 params.randomised_init,
796 params.init_range,
797 )
798 .unwrap_or_else(|| {
799 println!(
800 "Unknown initialisation provided: {:?}. Defaulting to PCA.",
801 params.initialisation,
802 );
803 EmbdInit::PcaInit {
804 range: Some(T::from_f64(1e-2).unwrap()),
805 randomised: true,
806 }
807 });
808
809 let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
810
811 let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
813
814 let start_optim = Instant::now();
816 match tsne_approx {
817 TsneOpt::BarnesHut => {
818 if verbosity.normal_verbosity() {
819 println!(
820 "Optimising via Barnes-Hut t-SNE ({} epochs)...",
821 params.optim_params.n_epochs
822 );
823 }
824 optimise_bh_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
825 }
826 #[cfg(feature = "fft_tsne")]
827 TsneOpt::Fft => {
828 if verbosity.normal_verbosity() {
829 println!(
830 "Optimising via FFT Interpolation-based t-SNE ({} epochs)...",
831 params.optim_params.n_epochs
832 );
833 }
834 let _ = optimise_fft_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
835 }
836 }
837
838 if verbosity.normal_verbosity() {
839 println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
840 }
841
842 let n_samples = embd.len();
844 let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
845
846 for sample_idx in 0..n_samples {
847 for dim_idx in 0..params.n_dim {
848 transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
849 }
850 }
851
852 Ok(transposed)
853}
854
855#[cfg(not(feature = "fft_tsne"))]
902pub fn tsne<T>(
903 data: MatRef<T>,
904 precomputed_knn: PreComputedKnn<T>,
905 params: &TsneParams<T>,
906 approx_type: &str,
907 seed: usize,
908 verbose: usize,
909) -> Result<Vec<Vec<T>>, ManifoldsError>
910where
911 T: ManifoldsFloat,
912 HnswIndex<T>: HnswState<T>,
913 StandardNormal: Distribution<T>,
914 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
915{
916 if params.n_dim != 2 {
917 return Err(ManifoldsError::IncorrectDim {
918 n_dim: params.n_dim,
919 });
920 }
921 let verbosity = parse_verbosity_level(verbose);
922
923 let (graph, _, _) = construct_tsne_graph(
925 data,
926 precomputed_knn,
927 params.perplexity,
928 params.ann_type.clone(),
929 ¶ms.nn_params,
930 seed,
931 verbose,
932 )?;
933
934 if verbosity.normal_verbosity() {
936 println!("Initialising embedding via {}...", ¶ms.initialisation);
937 }
938
939 let init_type = parse_initilisation(
940 ¶ms.initialisation,
941 params.randomised_init,
942 params.init_range,
943 )
944 .unwrap_or_else(|| {
945 println!(
946 "Unknown initialisation provided: {:?}. Defaulting to PCA.",
947 params.initialisation,
948 );
949 EmbdInit::PcaInit {
950 range: Some(T::from_f64(1e-2).unwrap()),
951 randomised: true,
952 }
953 });
954
955 let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
956
957 let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
959
960 let start_optim = Instant::now();
962 match tsne_approx {
963 TsneOpt::BarnesHut => {
964 if verbosity.normal_verbosity() {
965 println!(
966 "Optimising via Barnes-Hut t-SNE ({} epochs)...",
967 params.optim_params.n_epochs
968 );
969 }
970 optimise_bh_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
971 }
972 #[cfg(not(feature = "fft_tsne"))]
973 TsneOpt::Fft => {
974 panic!("FFT-accelerated t-SNE not available. Recompile with 'fft_tsne' feature or use 'barnes_hut' approximation.");
975 }
976 }
977
978 if verbosity.normal_verbosity() {
979 println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
980 }
981
982 let n_samples = embd.len();
984 let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
985
986 for sample_idx in 0..n_samples {
987 for dim_idx in 0..params.n_dim {
988 transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
989 }
990 }
991
992 Ok(transposed)
993}
994
995#[derive(Debug, Clone)]
1001pub struct PhateParams<T> {
1002 pub n_dim: usize,
1004 pub k: usize,
1006 pub ann_type: String,
1009 pub ann_params: NearestNeighbourParams<T>,
1011 pub diffusion_params: PhateDiffusionParams<T>,
1013 pub mds_method: String,
1015 pub mds_iter: Option<usize>,
1017 pub randomised: bool,
1019}
1020
1021impl<T> Default for PhateParams<T>
1022where
1023 T: ManifoldsFloat,
1024{
1025 fn default() -> Self {
1026 let diffusion_params = PhateDiffusionParams::new(
1027 Some(T::from_f64(40.0).unwrap()),
1028 T::from_f64(1.0).unwrap(),
1029 T::from_f64(1e-4).unwrap(),
1030 "average".to_string(),
1031 None,
1032 "spectral".to_string(),
1033 None,
1034 None,
1035 None,
1036 T::from_f64(1.0).unwrap(),
1037 );
1038
1039 Self {
1040 n_dim: 2,
1041 k: 5,
1042 ann_type: "kmknn".to_string(),
1043 ann_params: NearestNeighbourParams::default(),
1044 diffusion_params,
1045 mds_method: "sgd_dense".to_string(),
1046 mds_iter: None,
1047 randomised: true,
1048 }
1049 }
1050}
1051
1052impl<T> PhateParams<T>
1053where
1054 T: ManifoldsFloat,
1055{
1056 #[allow(clippy::too_many_arguments)]
1084 pub fn new(
1085 n_dim: usize,
1086 k: usize,
1087 ann_type: String,
1088 ann_params: NearestNeighbourParams<T>,
1089 diffusion_params: PhateDiffusionParams<T>,
1090 mds_method: String,
1091 mds_iter: Option<usize>,
1092 randomised: bool,
1093 ) -> Self {
1094 Self {
1095 n_dim,
1096 k,
1097 ann_type,
1098 ann_params,
1099 diffusion_params,
1100 mds_method,
1101 mds_iter,
1102 randomised,
1103 }
1104 }
1105
1106 pub fn new_default_2d(k: Option<usize>) -> Self {
1119 let k = k.unwrap_or(5);
1120
1121 Self {
1122 k,
1123 ..Self::default()
1124 }
1125 }
1126}
1127
1128#[allow(clippy::too_many_arguments)]
1162pub fn construct_phate_diffusion<T>(
1163 data: MatRef<T>,
1164 k: usize,
1165 precomputed_knn: PreComputedKnn<T>,
1166 ann_type: &str,
1167 nn_params: &NearestNeighbourParams<T>,
1168 phate_params: &PhateParams<T>,
1169 seed: usize,
1170 verbose: usize,
1171) -> Result<PhateDiffusion<T>, ManifoldsError>
1172where
1173 T: ManifoldsFloat,
1174 HnswIndex<T>: HnswState<T>,
1175 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
1176{
1177 let verbosity = parse_verbosity_level(verbose);
1178
1179 let (knn_indices, knn_dist) = match precomputed_knn {
1180 Some((indices, distances)) => {
1181 if verbosity.normal_verbosity() {
1182 println!("Using precomputed kNN graph...");
1183 }
1184 (indices, distances)
1185 }
1186 None => {
1187 if verbosity.normal_verbosity() {
1188 println!(
1189 "Running (approximate) nearest neighbour search using {}...",
1190 ann_type
1191 );
1192 }
1193 let start_knn = Instant::now();
1194 let result = run_ann_search(data, k, ann_type.to_string(), nn_params, seed, verbose)?;
1195 if verbosity.normal_verbosity() {
1196 println!("kNN search done in: {:.2?}.", start_knn.elapsed());
1197 }
1198 result
1199 }
1200 };
1201
1202 if verbosity.normal_verbosity() {
1203 println!("Calculating alpha decay affinities");
1204 }
1205 let start_alpha_affinities = Instant::now();
1206
1207 let graph = phate_alpha_decay_affinities(
1208 &knn_indices,
1209 &knn_dist,
1210 phate_params.k,
1211 phate_params.diffusion_params.decay,
1212 phate_params.diffusion_params.bandwidth_scale,
1213 phate_params.diffusion_params.thresh,
1214 &phate_params.diffusion_params.graph_symmetry,
1215 nn_params.dist_metric == "euclidean",
1216 );
1217
1218 if verbosity.normal_verbosity() {
1219 println!(
1220 "Alpha decay affinity calculations done in: {:.2?}.",
1221 start_alpha_affinities.elapsed()
1222 );
1223 }
1224
1225 let affinity = coo_to_csr(&graph);
1226 let diffusion_op = build_diffusion_operator(&affinity);
1227
1228 match phate_params.diffusion_params.n_landmarks {
1229 None => Ok(PhateDiffusion::Full {
1230 operator: diffusion_op,
1231 }),
1232 Some(n_landmarks) if n_landmarks >= data.nrows() => Ok(PhateDiffusion::Full {
1233 operator: diffusion_op,
1234 }),
1235 Some(n_landmarks) => {
1236 if verbosity.normal_verbosity() {
1237 println!(" Building {} landmarks...", n_landmarks);
1238 }
1239 let start_landmarks = Instant::now();
1240 let landmarks = PhateLandmarks::build(
1241 data,
1242 &affinity,
1243 &diffusion_op,
1244 n_landmarks,
1245 &phate_params.diffusion_params.landmark_method,
1246 &nn_params.dist_metric,
1247 seed,
1248 Some(100),
1249 verbose,
1250 )?;
1251 if verbosity.normal_verbosity() {
1252 println!(
1253 " Landmarks generated in: {:.2?}.",
1254 start_landmarks.elapsed()
1255 );
1256 }
1257 Ok(PhateDiffusion::Landmark { landmarks })
1258 }
1259 }
1260}
1261
1262pub fn phate<T>(
1305 data: MatRef<T>,
1306 precomputed_knn: PreComputedKnn<T>,
1307 phate_params: PhateParams<T>,
1308 seed: usize,
1309 verbose: usize,
1310) -> Result<Vec<Vec<T>>, ManifoldsError>
1311where
1312 T: ManifoldsFloat,
1313 HnswIndex<T>: HnswState<T>,
1314 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
1315 StandardNormal: Distribution<T>,
1316{
1317 let start_phate = Instant::now();
1318 let verbosity = parse_verbosity_level(verbose);
1319
1320 let phate_diffusion = construct_phate_diffusion(
1321 data,
1322 phate_params.k,
1323 precomputed_knn,
1324 &phate_params.ann_type,
1325 &phate_params.ann_params,
1326 &phate_params,
1327 seed,
1328 verbose,
1329 )?;
1330
1331 let start_t = Instant::now();
1332 let t = match phate_params.diffusion_params.t {
1333 PhateTime::Auto { t_max } => {
1334 if verbosity.normal_verbosity() {
1335 println!("Finding optimal t (t_max={})...", t_max);
1336 }
1337 match &phate_diffusion {
1338 PhateDiffusion::Landmark { landmarks } => landmarks.find_optimal_t(t_max),
1339 PhateDiffusion::Full { operator } => {
1340 let entropy = landmark_von_neumann_entropy(operator, t_max)?;
1341 Ok(find_knee_point(&entropy))
1342 }
1343 }
1344 }
1345 PhateTime::Fixed(t) => Ok(t),
1346 }?;
1347 if verbosity.normal_verbosity() {
1348 println!("Identified t = {} in {:.2?}.", t, start_t.elapsed());
1349 }
1350
1351 let mds_method = parse_mds_method(&phate_params.mds_method).unwrap_or_default();
1352 let dist = parse_ann_dist(&phate_params.ann_params.dist_metric).unwrap_or_default();
1353 let mds_params = MdsOptimParams::new(
1354 data.nrows(),
1355 phate_params.randomised,
1356 phate_params.mds_iter,
1357 None,
1358 );
1359
1360 let start_embed = Instant::now();
1361
1362 let embedding = match phate_diffusion {
1363 PhateDiffusion::Full { operator } => {
1364 if verbosity.normal_verbosity() {
1365 println!("Powering diffusion operator...");
1366 }
1367 let powered = matrix_power(&operator, t)?;
1368 let potential = calculate_potential(&powered, 1, phate_params.diffusion_params.gamma)?;
1369
1370 if verbosity.normal_verbosity() {
1371 println!(
1372 "Potential shape: {} × {} - calculated in {:.2?}.",
1373 potential.shape().0,
1374 potential.shape().1,
1375 start_embed.elapsed()
1376 );
1377 }
1378
1379 let res = match mds_method {
1380 MdsMethod::ClassicMds => {
1381 if verbosity.normal_verbosity() {
1382 println!("Computing pairwise distances, running classic MDS...");
1383 }
1384 let distances = compute_potential_distances(&potential, &dist);
1385 classic_mds(&distances, phate_params.n_dim, mds_params.randomised, seed)
1386 }
1387 MdsMethod::SgdDense => {
1388 if verbosity.normal_verbosity() {
1389 println!("Computing pairwise distances, running SGD-MDS...");
1390 }
1391 let distances = compute_potential_distances(&potential, &dist);
1392 sgd_mds(
1393 &distances,
1394 phate_params.n_dim,
1395 &mds_params,
1396 None,
1397 seed,
1398 verbose,
1399 )
1400 }
1401 }?;
1402
1403 res
1404 }
1405 PhateDiffusion::Landmark { landmarks } => {
1406 if verbosity.normal_verbosity() {
1407 println!(
1408 "Powering landmark operator ({} landmarks)...",
1409 landmarks.get_n_landmarks()
1410 );
1411 }
1412 let landmark_powered = landmarks.power(t)?;
1413 let landmark_potential =
1414 calculate_potential(&landmark_powered, 1, phate_params.diffusion_params.gamma)?;
1415
1416 if verbosity.normal_verbosity() {
1417 println!(
1418 "Landmark potential shape: {} × {} - calculated in {:.2?}.",
1419 landmark_potential.shape().0,
1420 landmark_potential.shape().1,
1421 start_embed.elapsed()
1422 );
1423 println!("Computing landmark pairwise distances...");
1424 }
1425
1426 let landmark_distances = compute_potential_distances(&landmark_potential, &dist);
1427
1428 let landmark_mds_params = MdsOptimParams::new(
1429 landmarks.get_n_landmarks(),
1430 phate_params.randomised,
1431 None,
1432 None,
1433 );
1434
1435 if verbosity.normal_verbosity() {
1436 println!("Running MDS on landmarks...");
1437 }
1438
1439 let landmark_embedding = match mds_method {
1440 MdsMethod::ClassicMds => classic_mds(
1441 &landmark_distances,
1442 phate_params.n_dim,
1443 landmark_mds_params.randomised,
1444 seed,
1445 ),
1446 _ => sgd_mds(
1447 &landmark_distances,
1448 phate_params.n_dim,
1449 &landmark_mds_params,
1450 None,
1451 seed,
1452 verbose,
1453 ),
1454 }?;
1455
1456 if verbosity.normal_verbosity() {
1457 println!("Interpolating to full N points via Nyström...");
1458 }
1459 landmarks.interpolate_embedding(&landmark_embedding)
1460 }
1461 };
1462
1463 if verbosity.normal_verbosity() {
1464 println!("Ran MDS in {:.2?}.", start_embed.elapsed());
1465 println!("Finished running PHATE in {:.2?}.", start_phate.elapsed());
1466 }
1467
1468 let n_samples = embedding.len();
1469 let mut transposed = vec![vec![T::zero(); n_samples]; phate_params.n_dim];
1470 for i in 0..n_samples {
1471 for d in 0..phate_params.n_dim {
1472 transposed[d][i] = embedding[i][d];
1473 }
1474 }
1475
1476 Ok(transposed)
1477}
1478
1479#[derive(Debug, Clone)]
1489pub struct PacmapParams<T> {
1490 pub n_dim: usize,
1492 pub k: usize,
1495 pub ann_type: String,
1498 pub optimiser_type: String,
1500 pub n_mid_near: usize,
1502 pub n_further: usize,
1504 pub mn_candidate_start: usize,
1507 pub mn_candidate_end: usize,
1510 pub initialisation: String,
1513 pub nn_params: NearestNeighbourParams<T>,
1515 pub optim_params: PacmapOptimParams<T>,
1517}
1518
1519impl<T> Default for PacmapParams<T>
1520where
1521 T: ManifoldsFloat,
1522{
1523 fn default() -> Self {
1524 Self {
1525 n_dim: 2,
1526 k: 50,
1527 ann_type: "kmknn".to_string(),
1528 optimiser_type: "adam_parallel".to_string(),
1529 n_mid_near: 2,
1530 n_further: 2,
1531 mn_candidate_start: 4,
1532 mn_candidate_end: 50,
1533 initialisation: "pca".to_string(),
1534 nn_params: NearestNeighbourParams::default(),
1535 optim_params: PacmapOptimParams::default(),
1536 }
1537 }
1538}
1539
1540impl<T> PacmapParams<T>
1541where
1542 T: ManifoldsFloat,
1543{
1544 #[allow(clippy::too_many_arguments)]
1579 pub fn new(
1580 n_dim: usize,
1581 k: usize,
1582 ann_type: String,
1583 optimiser_type: String,
1584 n_mid_near: usize,
1585 n_further: usize,
1586 mn_candidate_start: usize,
1587 mn_candidate_end: usize,
1588 initialisation: String,
1589 nn_params: NearestNeighbourParams<T>,
1590 optim_params: PacmapOptimParams<T>,
1591 ) -> Self {
1592 let k = k.max(mn_candidate_end);
1593
1594 Self {
1595 n_dim,
1596 k,
1597 ann_type,
1598 optimiser_type,
1599 n_mid_near,
1600 n_further,
1601 mn_candidate_start,
1602 mn_candidate_end,
1603 initialisation,
1604 nn_params,
1605 optim_params,
1606 }
1607 }
1608
1609 pub fn new_default_2d(k: Option<usize>) -> Self {
1623 let default = Self::default();
1624 let k = k.unwrap_or(default.k).max(default.mn_candidate_end);
1625
1626 Self { k, ..default }
1627 }
1628}
1629
1630pub fn pacmap<T>(
1659 data: MatRef<T>,
1660 precomputed_knn: PreComputedKnn<T>,
1661 params_pacmap: &PacmapParams<T>,
1662 seed: usize,
1663 verbose: usize,
1664) -> Result<Vec<Vec<T>>, ManifoldsError>
1665where
1666 T: ManifoldsFloat,
1667 HnswIndex<T>: HnswState<T>,
1668 StandardNormal: Distribution<T>,
1669 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
1670{
1671 let n_samples = data.nrows();
1672
1673 let verbosity = parse_verbosity_level(verbose);
1674
1675 let (knn_indices, _) = match precomputed_knn {
1676 Some((indices, distances)) => {
1677 if verbosity.normal_verbosity() {
1678 println!("Using precomputed kNN graph...");
1679 }
1680 (indices, distances)
1681 }
1682 None => {
1683 if verbosity.normal_verbosity() {
1684 println!(
1685 "Running (approximate) nearest neighbour search using {} (k={})...",
1686 params_pacmap.ann_type, params_pacmap.k
1687 );
1688 }
1689 let start_knn = Instant::now();
1690 let result = run_ann_search(
1691 data,
1692 params_pacmap.k,
1693 params_pacmap.ann_type.clone(),
1694 ¶ms_pacmap.nn_params,
1695 seed,
1696 verbose,
1697 )?;
1698 if verbosity.normal_verbosity() {
1699 println!("kNN search done in: {:.2?}.", start_knn.elapsed());
1700 }
1701 result
1702 }
1703 };
1704
1705 if verbosity.normal_verbosity() {
1706 println!("Constructing PaCMAP pairs...");
1707 }
1708
1709 let start_pairs = Instant::now();
1710
1711 let pairs: PacmapPairs = construct_pacmap_pairs(
1712 &knn_indices,
1713 params_pacmap.n_mid_near,
1714 params_pacmap.n_further,
1715 params_pacmap.mn_candidate_start,
1716 params_pacmap.mn_candidate_end,
1717 seed as u64,
1718 );
1719
1720 let end_pairs = start_pairs.elapsed();
1721
1722 if verbosity.normal_verbosity() {
1723 println!(
1724 "Pairs generated in {:.2?}: {} near, {} mid-near, {} further.",
1725 end_pairs,
1726 pairs.near.len().separate_with_underscores(),
1727 pairs.mid_near.len().separate_with_underscores(),
1728 pairs.further.len().separate_with_underscores()
1729 );
1730 }
1731
1732 let init_type =
1733 parse_initilisation(¶ms_pacmap.initialisation, true, None).unwrap_or_else(|| {
1734 println!(
1735 "Unknown initialisation provided: {:?}. Defaulting to PCA.",
1736 params_pacmap.initialisation,
1737 );
1738 EmbdInit::PcaInit {
1739 range: None,
1740 randomised: true,
1741 }
1742 });
1743
1744 if verbosity.normal_verbosity() {
1745 println!(
1746 "Initialising embedding via {} layout...",
1747 params_pacmap.initialisation
1748 );
1749 }
1750
1751 let dummy_graph = knn_to_coo_unweighted(&knn_indices);
1752 let mut embd = initialise_embedding(
1753 &init_type,
1754 params_pacmap.n_dim,
1755 seed as u64,
1756 &dummy_graph,
1757 data,
1758 )?;
1759
1760 let optimiser = parse_pacmap_optimiser(¶ms_pacmap.optimiser_type).unwrap_or_default();
1761
1762 let start_optim = Instant::now();
1763
1764 match optimiser {
1765 PacMapOptimiser::AdamParallel => {
1766 let _ =
1767 optimise_pacmap_parallel(&mut embd, &pairs, ¶ms_pacmap.optim_params, verbose);
1768 }
1769 PacMapOptimiser::Adam => {
1770 let _ = optimise_pacmap(&mut embd, &pairs, ¶ms_pacmap.optim_params, verbose);
1771 }
1772 }
1773
1774 let end_optim = start_optim.elapsed();
1775
1776 if verbosity.normal_verbosity() {
1777 println!("Optimisation done in {:.2?}.", end_optim);
1778 println!("PaCMAP complete!");
1779 }
1780
1781 let mut transposed = vec![vec![T::zero(); n_samples]; params_pacmap.n_dim];
1783 for sample_idx in 0..n_samples {
1784 for dim_idx in 0..params_pacmap.n_dim {
1785 transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
1786 }
1787 }
1788
1789 Ok(transposed)
1790}
1791
1792#[derive(Debug, Clone)]
1798pub struct DiffusionMapsParams<T> {
1799 pub n_dim: usize,
1801 pub k: usize,
1803 pub ann_type: String,
1805 pub ann_params: NearestNeighbourParams<T>,
1807 pub bandwidth_scale: T,
1809 pub thresh: T,
1811 pub graph_symmetry: String,
1813 pub alpha_norm: T,
1817 pub t: PhateTime,
1819 pub n_landmarks: Option<usize>,
1821 pub landmark_method: String,
1823 pub n_svd: Option<usize>,
1825}
1826
1827impl<T> Default for DiffusionMapsParams<T>
1828where
1829 T: ManifoldsFloat,
1830{
1831 fn default() -> Self {
1832 Self {
1833 n_dim: 2,
1834 k: 5,
1835 ann_type: "kmknn".to_string(),
1836 ann_params: NearestNeighbourParams::default(),
1837 bandwidth_scale: T::from_f64(1.0).unwrap(),
1838 thresh: T::from_f64(1e-4).unwrap(),
1839 graph_symmetry: "add".to_string(),
1840 alpha_norm: T::from_f64(1.0).unwrap(),
1841 t: parse_phate_time(None, PHATE_MAX_T),
1842 n_landmarks: None,
1843 landmark_method: "spectral".to_string(),
1844 n_svd: None,
1845 }
1846 }
1847}
1848
1849impl<T> DiffusionMapsParams<T>
1850where
1851 T: ManifoldsFloat,
1852{
1853 #[allow(clippy::too_many_arguments)]
1885 pub fn new(
1886 n_dim: usize,
1887 k: usize,
1888 ann_type: String,
1889 ann_params: NearestNeighbourParams<T>,
1890 bandwidth_scale: T,
1891 thresh: T,
1892 graph_symmetry: String,
1893 alpha_norm: T,
1894 t: PhateTime,
1895 n_landmarks: Option<usize>,
1896 landmark_method: String,
1897 n_svd: Option<usize>,
1898 ) -> Self {
1899 Self {
1900 n_dim,
1901 k,
1902 ann_type,
1903 ann_params,
1904 bandwidth_scale,
1905 thresh,
1906 graph_symmetry,
1907 alpha_norm,
1908 t,
1909 n_landmarks,
1910 landmark_method,
1911 n_svd,
1912 }
1913 }
1914
1915 pub fn new_default_2d(k: Option<usize>) -> Self {
1928 let k = k.unwrap_or(5);
1929
1930 Self {
1931 k,
1932 ..Self::default()
1933 }
1934 }
1935}
1936
1937pub enum DiffusionMapsOperator<T>
1943where
1944 T: ManifoldsFloat,
1945{
1946 Full {
1948 p_sym: CompressedSparseData<T>,
1950 sqrt_degrees: Vec<T>,
1953 },
1954 Landmark {
1956 landmarks: DiffusionMapsLandmarks<T>,
1958 },
1959}
1960
1961#[allow(clippy::too_many_arguments)]
1984pub fn construct_diffusion_maps_operator<T>(
1985 data: MatRef<T>,
1986 k: usize,
1987 precomputed_knn: PreComputedKnn<T>,
1988 ann_type: &str,
1989 nn_params: &NearestNeighbourParams<T>,
1990 dm_params: &DiffusionMapsParams<T>,
1991 seed: usize,
1992 verbose: usize,
1993) -> Result<DiffusionMapsOperator<T>, ManifoldsError>
1994where
1995 T: ManifoldsFloat,
1996 HnswIndex<T>: HnswState<T>,
1997 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
1998{
1999 let use_full = match dm_params.n_landmarks {
2000 None => true,
2001 Some(n) => n >= data.nrows(),
2002 };
2003
2004 let verbosity = parse_verbosity_level(verbose);
2005
2006 let needs_affinity =
2007 use_full || matches!(dm_params.landmark_method.as_str(), "spectral" | "density");
2008
2009 let affinity = if needs_affinity {
2010 let (knn_indices, knn_dist) = match precomputed_knn {
2011 Some((indices, distances)) => {
2012 if verbosity.normal_verbosity() {
2013 println!("Using precomputed kNN graph...");
2014 }
2015 (indices, distances)
2016 }
2017 None => {
2018 if verbosity.normal_verbosity() {
2019 println!(
2020 "Running (approximate) nearest neighbour search using {}...",
2021 ann_type
2022 );
2023 }
2024 let start_knn = Instant::now();
2025 let res = run_ann_search(data, k, ann_type.to_string(), nn_params, seed, verbose)?;
2026 if verbosity.normal_verbosity() {
2027 println!("kNN search done in {:.2?}.", start_knn.elapsed());
2028 }
2029 res
2030 }
2031 };
2032
2033 if verbosity.normal_verbosity() {
2034 println!("Building Gaussian kernel affinities");
2035 }
2036 let graph = phate_alpha_decay_affinities(
2037 &knn_indices,
2038 &knn_dist,
2039 dm_params.k,
2040 Some(T::from_f64(2.0).unwrap()),
2041 dm_params.bandwidth_scale,
2042 dm_params.thresh,
2043 &dm_params.graph_symmetry,
2044 nn_params.dist_metric == "euclidean",
2045 );
2046 Some(coo_to_csr(&graph))
2047 } else {
2048 if verbosity.normal_verbosity() {
2049 println!("Skipping full affinity (random landmarks).");
2050 }
2051 None
2052 };
2053
2054 if use_full {
2055 let kernel = affinity.unwrap();
2056 let kernel_norm = apply_anisotropic_normalisation(&kernel, dm_params.alpha_norm)?;
2057 let (p_sym, sqrt_degrees) = build_symmetric_diffusion_operator(&kernel_norm)?;
2058 return Ok(DiffusionMapsOperator::Full {
2059 p_sym,
2060 sqrt_degrees,
2061 });
2062 }
2063
2064 let n_landmarks = dm_params.n_landmarks.unwrap();
2065 if verbosity.normal_verbosity() {
2066 println!(" Building {} landmarks...", n_landmarks);
2067 }
2068 let start_l = Instant::now();
2069 let landmarks = DiffusionMapsLandmarks::build(
2070 data,
2071 affinity.as_ref(),
2072 n_landmarks,
2073 &dm_params.landmark_method,
2074 &nn_params.dist_metric,
2075 dm_params.alpha_norm,
2076 dm_params.k,
2077 dm_params.bandwidth_scale,
2078 dm_params.thresh,
2079 &dm_params.graph_symmetry,
2080 seed,
2081 dm_params.n_svd,
2082 verbose,
2083 )?;
2084 if verbosity.normal_verbosity() {
2085 println!(" Landmarks built in {:.2?}.", start_l.elapsed());
2086 }
2087 Ok(DiffusionMapsOperator::Landmark { landmarks })
2088}
2089
2090pub fn diffusion_maps<T>(
2117 data: MatRef<T>,
2118 precomputed_knn: PreComputedKnn<T>,
2119 dm_params: DiffusionMapsParams<T>,
2120 seed: usize,
2121 verbose: usize,
2122) -> Result<Vec<Vec<T>>, ManifoldsError>
2123where
2124 T: ManifoldsFloat,
2125 HnswIndex<T>: HnswState<T>,
2126 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
2127{
2128 let start_dm = Instant::now();
2129
2130 let verbosity = parse_verbosity_level(verbose);
2131
2132 let op = construct_diffusion_maps_operator(
2133 data,
2134 dm_params.k,
2135 precomputed_knn,
2136 &dm_params.ann_type,
2137 &dm_params.ann_params,
2138 &dm_params,
2139 seed,
2140 verbose,
2141 )?;
2142
2143 let embedding: Vec<Vec<T>> = match op {
2144 DiffusionMapsOperator::Full {
2145 p_sym,
2146 sqrt_degrees,
2147 } => {
2148 let t = match dm_params.t {
2149 PhateTime::Auto { t_max } => {
2150 if verbosity.normal_verbosity() {
2151 println!("Finding optimal t (t_max={})...", t_max);
2152 }
2153 let entropy = landmark_von_neumann_entropy(&p_sym, t_max)?;
2154 find_knee_point(&entropy)
2155 }
2156 PhateTime::Fixed(t) => t,
2157 };
2158 if verbosity.normal_verbosity() {
2159 println!("Using t = {}.", t);
2160 println!(
2161 "Computing top {} eigenpairs of P_sym...",
2162 dm_params.n_dim + 1
2163 );
2164 }
2165 let start_eig = Instant::now();
2166 let n_ask = (dm_params.n_dim + 5).min(sqrt_degrees.len() - 1);
2167 let (evals, evecs) = compute_largest_eigenpairs_lanczos(&p_sym, n_ask, seed as u64)?;
2168
2169 if verbosity.normal_verbosity() {
2170 println!("Eigendecomposition done in {:.2?}.", start_eig.elapsed());
2171 }
2172
2173 let n = sqrt_degrees.len();
2174 let mut embedding = vec![vec![T::zero(); dm_params.n_dim]; n];
2175 for comp_idx in 1..=dm_params.n_dim {
2176 let lambda = T::from_f32(evals[comp_idx]).unwrap();
2177 let lambda_t = lambda.powi(t as i32);
2178
2179 let mut max_abs = 0.0f32;
2180 let mut sign = 1.0f32;
2181 for i in 0..n {
2182 let v = evecs[i][comp_idx];
2183 if v.abs() > max_abs {
2184 max_abs = v.abs();
2185 sign = if v >= 0.0 { 1.0 } else { -1.0 };
2186 }
2187 }
2188 for i in 0..n {
2189 let u = T::from_f32(evecs[i][comp_idx] * sign).unwrap();
2190 embedding[i][comp_idx - 1] = lambda_t * u / sqrt_degrees[i];
2191 }
2192 }
2193 embedding
2194 }
2195 DiffusionMapsOperator::Landmark { landmarks } => {
2196 let t = match dm_params.t {
2197 PhateTime::Auto { t_max } => {
2198 if verbosity.normal_verbosity() {
2199 println!("Finding optimal t on landmarks (t_max={})...", t_max);
2200 }
2201 landmarks.find_optimal_t(t_max)?
2202 }
2203 PhateTime::Fixed(t) => t,
2204 };
2205 if verbosity.detailed_verbosity() {
2206 println!(
2207 "Using t = {}. Eigendecomposing {}x{} landmark operator...",
2208 t,
2209 landmarks.get_n_landmarks(),
2210 landmarks.get_n_landmarks()
2211 );
2212 }
2213 let start_eig = Instant::now();
2214 let (evals, evecs) = landmarks.eigendecompose(dm_params.n_dim, seed as u64)?;
2215
2216 if verbosity.detailed_verbosity() {
2217 println!("Eigendecomposition done in {:.2?}.", start_eig.elapsed());
2218 }
2219 if verbosity.normal_verbosity() {
2220 println!("Nystroem-extending to full data...");
2221 }
2222 let (landmark_embedding, lambdas) =
2223 landmarks.compute_landmark_embedding(&evals, &evecs, dm_params.n_dim, t);
2224 landmarks.nystrom_extend(&landmark_embedding, &lambdas)
2225 }
2226 };
2227
2228 if verbosity.normal_verbosity() {
2229 println!("Diffusion maps finished in {:.2?}.", start_dm.elapsed());
2230 }
2231
2232 let n = embedding.len();
2233 let mut transposed = vec![vec![T::zero(); n]; dm_params.n_dim];
2234 for i in 0..n {
2235 for d in 0..dm_params.n_dim {
2236 transposed[d][i] = embedding[i][d];
2237 }
2238 }
2239
2240 Ok(transposed)
2241}
2242
2243#[cfg(feature = "parametric")]
2248#[derive(Debug, Clone)]
2250pub struct ParametricUmapParams<T> {
2251 pub n_dim: usize,
2253 pub k: usize,
2255 pub ann_type: String,
2258 pub hidden_layers: Vec<usize>,
2260 pub nn_params: NearestNeighbourParams<T>,
2262 pub umap_graph_params: UmapGraphParams<T>,
2264 pub train_param: TrainParametricParams<T>,
2266}
2267
2268#[cfg(feature = "parametric")]
2269impl<T> Default for ParametricUmapParams<T>
2270where
2271 T: ManifoldsFloat + Element,
2272{
2273 fn default() -> Self {
2274 Self {
2275 n_dim: 2,
2276 k: 15,
2277 ann_type: "hnsw".to_string(),
2278 hidden_layers: vec![128, 128, 128],
2279 nn_params: NearestNeighbourParams::default(),
2280 umap_graph_params: UmapGraphParams::default(),
2281 train_param: TrainParametricParams::default(),
2282 }
2283 }
2284}
2285
2286#[cfg(feature = "parametric")]
2287impl<T> ParametricUmapParams<T>
2288where
2289 T: ManifoldsFloat + Element,
2290{
2291 pub fn new(
2313 n_dim: usize,
2314 k: usize,
2315 ann_type: String,
2316 hidden_layers: Vec<usize>,
2317 nn_params: NearestNeighbourParams<T>,
2318 umap_graph_params: UmapGraphParams<T>,
2319 train_param: TrainParametricParams<T>,
2320 ) -> Self {
2321 Self {
2322 n_dim,
2323 k,
2324 ann_type,
2325 hidden_layers,
2326 nn_params,
2327 umap_graph_params,
2328 train_param,
2329 }
2330 }
2331
2332 pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>, corr_weight: Option<T>) -> Self {
2348 let min_dist = min_dist.unwrap_or(T::from_f64(0.1).unwrap());
2349 let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
2350 let corr_weight = corr_weight.unwrap_or(T::from_f64(0.0).unwrap());
2351
2352 Self {
2353 train_param: TrainParametricParams::from_min_dist_spread(
2354 min_dist,
2355 spread,
2356 corr_weight,
2357 None,
2358 None,
2359 None,
2360 None,
2361 ),
2362 ..Self::default()
2363 }
2364 }
2365}
2366
2367#[cfg(feature = "parametric")]
2368pub fn parametric_umap<T, B>(
2401 data: MatRef<T>,
2402 precomputed_knn: PreComputedKnn<T>,
2403 umap_params: &ParametricUmapParams<T>,
2404 device: &B::Device,
2405 seed: usize,
2406 verbose: usize,
2407) -> Result<Vec<Vec<T>>, ManifoldsError>
2408where
2409 T: ManifoldsFloat + Element,
2410 B: AutodiffBackend,
2411 HnswIndex<T>: HnswState<T>,
2412 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
2413{
2414 let nn_params = umap_params.nn_params.clone();
2416
2417 let verbosity = parse_verbosity_level(verbose);
2418
2419 if verbosity.normal_verbosity() {
2420 println!(
2421 "Running parametric umap with alpha: {:.2?} and beta: {:.2?}",
2422 ToPrimitive::to_f32(&umap_params.train_param.a).unwrap(),
2423 ToPrimitive::to_f32(&umap_params.train_param.b).unwrap()
2424 );
2425 }
2426
2427 let (graph, _, _) = construct_umap_graph(
2428 data,
2429 precomputed_knn,
2430 umap_params.k,
2431 umap_params.ann_type.clone(),
2432 &umap_params.umap_graph_params,
2433 &nn_params,
2434 umap_params.train_param.n_epochs,
2435 seed,
2436 verbose,
2437 )?;
2438
2439 let model_params = UmapMlpConfig::from_params(
2440 data.ncols(),
2441 umap_params.hidden_layers.clone(),
2442 umap_params.n_dim,
2443 );
2444
2445 let (embd, _) = train_parametric_umap::<B, T>(
2446 data,
2447 graph,
2448 &model_params,
2449 &umap_params.train_param,
2450 device,
2451 seed,
2452 verbosity.normal_verbosity(),
2453 );
2454
2455 Ok(embd)
2456}
2457
2458#[cfg(feature = "parametric")]
2459pub fn train_parametric_umap_model<T, B>(
2487 data: MatRef<T>,
2488 umap_params: &ParametricUmapParams<T>,
2489 device: &B::Device,
2490 seed: usize,
2491 verbose: usize,
2492) -> ParametricUmapResults<B, T>
2493where
2494 T: ManifoldsFloat + Element,
2495 B: AutodiffBackend,
2496 HnswIndex<T>: HnswState<T>,
2497 NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
2498{
2499 let nn_params = umap_params.nn_params.clone();
2501 let verbosity = parse_verbosity_level(verbose);
2502
2503 if verbosity.normal_verbosity() {
2504 println!(
2505 "Training parametric umap model with alpha: {:.2?} and beta: {:.2?}",
2506 ToPrimitive::to_f32(&umap_params.train_param.a).unwrap(),
2507 ToPrimitive::to_f32(&umap_params.train_param.b).unwrap()
2508 );
2509 }
2510
2511 let (graph, _, _) = construct_umap_graph(
2512 data,
2513 None,
2514 umap_params.k,
2515 umap_params.ann_type.clone(),
2516 &umap_params.umap_graph_params,
2517 &nn_params,
2518 umap_params.train_param.n_epochs,
2519 seed,
2520 verbose,
2521 )?;
2522
2523 let model_params = UmapMlpConfig::from_params(
2524 data.ncols(),
2525 umap_params.hidden_layers.clone(),
2526 umap_params.n_dim,
2527 );
2528
2529 let (embd, trained_model) = train_parametric_umap::<B, T>(
2530 data,
2531 graph,
2532 &model_params,
2533 &umap_params.train_param,
2534 device,
2535 seed,
2536 verbosity.normal_verbosity(),
2537 );
2538
2539 Ok((embd, trained_model))
2540}
2541
2542#[cfg(feature = "gpu")]
2552#[derive(Debug, Clone)]
2553pub struct UmapParamsGpu<T> {
2554 pub n_dim: usize,
2556 pub k: usize,
2558 pub optimiser: String,
2560 pub ann_type: String,
2563 pub initialisation: String,
2565 pub init_range: Option<T>,
2567 pub nn_params: NearestNeighbourParamsGpu<T>,
2569 pub umap_graph_params: UmapGraphParams<T>,
2571 pub optim_params: UmapOptimParams<T>,
2573 pub randomised: bool,
2575}
2576
2577#[cfg(feature = "gpu")]
2578impl<T> Default for UmapParamsGpu<T>
2579where
2580 T: ManifoldsFloat,
2581{
2582 fn default() -> Self {
2583 Self {
2584 n_dim: 2,
2585 k: 15,
2586 optimiser: "adam_parallel".to_string(),
2587 ann_type: "ivf_gpu".to_string(),
2588 initialisation: "spectral".to_string(),
2589 init_range: None,
2590 nn_params: NearestNeighbourParamsGpu::default(),
2591 umap_graph_params: UmapGraphParams::default(),
2592 optim_params: UmapOptimParams::default(),
2593 randomised: false,
2594 }
2595 }
2596}
2597
2598#[cfg(feature = "gpu")]
2599impl<T> UmapParamsGpu<T>
2600where
2601 T: ManifoldsFloat,
2602{
2603 #[allow(clippy::too_many_arguments)]
2629 pub fn new(
2630 n_dim: usize,
2631 k: usize,
2632 optimiser: String,
2633 ann_type: String,
2634 initialisation: String,
2635 init_range: Option<T>,
2636 nn_params: NearestNeighbourParamsGpu<T>,
2637 umap_graph_params: UmapGraphParams<T>,
2638 optim_params: UmapOptimParams<T>,
2639 randomised: bool,
2640 ) -> Self {
2641 Self {
2642 n_dim,
2643 k,
2644 optimiser,
2645 ann_type,
2646 initialisation,
2647 init_range,
2648 nn_params,
2649 umap_graph_params,
2650 optim_params,
2651 randomised,
2652 }
2653 }
2654
2655 pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>) -> Self {
2671 let min_dist = min_dist.unwrap_or(T::from_f64(0.5).unwrap());
2672 let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
2673
2674 Self {
2675 optim_params: UmapOptimParams::from_min_dist_spread(
2676 min_dist, spread, None, None, None, None, None, None, None,
2677 ),
2678 ..Self::default()
2679 }
2680 }
2681}
2682
2683#[cfg(feature = "gpu")]
2710#[allow(clippy::too_many_arguments)]
2711pub fn construct_umap_graph_gpu<T, R>(
2712 data: MatRef<T>,
2713 precomputed_knn: PreComputedKnn<T>,
2714 k: usize,
2715 ann_type: String,
2716 umap_params: &UmapGraphParams<T>,
2717 nn_params: &NearestNeighbourParamsGpu<T>,
2718 n_epochs: usize,
2719 device: R::Device,
2720 seed: usize,
2721 verbose: usize,
2722) -> UmapGraphResults<T>
2723where
2724 T: ManifoldsFloat + AnnSearchGpuFloat,
2725 R: Runtime,
2726 NNDescentGpu<T, R>: NNDescentQuery<T>,
2727{
2728 let verbosity = parse_verbosity_level(verbose);
2729
2730 let (knn_indices, knn_dist) = match precomputed_knn {
2731 Some((indices, distances)) => {
2732 if verbosity.normal_verbosity() {
2733 println!("Using precomputed kNN graph...");
2734 }
2735 (indices, distances)
2736 }
2737 None => {
2738 if verbosity.normal_verbosity() {
2739 println!("Running GPU nearest neighbour search using {}...", ann_type);
2740 }
2741 let start_knn = Instant::now();
2742 let result =
2743 run_ann_search_gpu::<T, R>(data, k, ann_type, nn_params, device, seed, verbose)?;
2744 if verbosity.normal_verbosity() {
2745 println!("GPU kNN search done in: {:.2?}.", start_knn.elapsed());
2746 }
2747 result
2748 }
2749 };
2750
2751 if verbosity.normal_verbosity() {
2752 println!("Constructing fuzzy simplicial set...");
2753 }
2754
2755 let start_graph = Instant::now();
2756
2757 let (sigma, rho) = smooth_knn_dist(
2758 &knn_dist,
2759 knn_dist[0].len(),
2760 umap_params.local_connectivity,
2761 umap_params.bandwidth,
2762 64,
2763 );
2764
2765 let graph = knn_to_coo(&knn_indices, &knn_dist, &sigma, &rho);
2766 let graph = symmetrise_graph(graph, umap_params.mix_weight);
2767 let graph = filter_weak_edges(graph, n_epochs, verbose);
2768
2769 if verbosity.normal_verbosity() {
2770 println!(
2771 "Finalised graph generation in {:.2?}.",
2772 start_graph.elapsed()
2773 );
2774 }
2775
2776 Ok((graph, knn_indices, knn_dist))
2777}
2778
2779#[cfg(feature = "gpu")]
2801pub fn umap_gpu<T, R>(
2802 data: MatRef<T>,
2803 precomputed_knn: PreComputedKnn<T>,
2804 umap_params: &UmapParamsGpu<T>,
2805 device: R::Device,
2806 seed: usize,
2807 verbose: usize,
2808) -> Result<Vec<Vec<T>>, ManifoldsError>
2809where
2810 T: ManifoldsFloat + AnnSearchGpuFloat,
2811 R: Runtime,
2812 StandardNormal: Distribution<T>,
2813 NNDescentGpu<T, R>: NNDescentQuery<T>,
2814{
2815 let verbosity = parse_verbosity_level(verbose);
2816
2817 let init_type = parse_initilisation(
2818 &umap_params.initialisation,
2819 umap_params.randomised,
2820 umap_params.init_range,
2821 )
2822 .unwrap_or_else(|| {
2823 println!(
2824 "Unknown initialisation provided: {:?}. Defaulting to PCA.",
2825 umap_params.initialisation,
2826 );
2827 EmbdInit::PcaInit {
2828 range: None,
2829 randomised: true,
2830 }
2831 });
2832 let optimiser = parse_umap_optimiser(&umap_params.optimiser).unwrap_or_default();
2833
2834 if verbosity.normal_verbosity() {
2835 println!(
2836 "Running umap (GPU kNN) with alpha: {:.2?} and beta: {:.2?}",
2837 umap_params.optim_params.a, umap_params.optim_params.b
2838 );
2839 }
2840
2841 let (graph, _, _) = construct_umap_graph_gpu::<T, R>(
2842 data,
2843 precomputed_knn,
2844 umap_params.k,
2845 umap_params.ann_type.clone(),
2846 &umap_params.umap_graph_params,
2847 &umap_params.nn_params,
2848 umap_params.optim_params.n_epochs,
2849 device,
2850 seed,
2851 verbose,
2852 )?;
2853
2854 if verbosity.normal_verbosity() {
2855 println!(
2856 "Initialising embedding via {} layout...",
2857 umap_params.initialisation
2858 );
2859 }
2860
2861 let start_layout = Instant::now();
2862
2863 let mut embd = initialise_embedding(&init_type, umap_params.n_dim, seed as u64, &graph, data)?;
2864
2865 let graph_adj = coo_to_adjacency_list(&graph);
2866
2867 if verbosity.normal_verbosity() {
2868 println!(
2869 "Optimising embedding via {} ({} epochs) on {} edges...",
2870 match optimiser {
2871 UmapOptimiser::Adam => "Adam",
2872 UmapOptimiser::Sgd => "SGD",
2873 UmapOptimiser::AdamParallel => "Adam (multi-threaded)",
2874 },
2875 umap_params.optim_params.n_epochs,
2876 graph.col_indices.len().separate_with_underscores()
2877 );
2878 }
2879
2880 match optimiser {
2881 UmapOptimiser::Adam => optimise_embedding_adam(
2882 &mut embd,
2883 &graph_adj,
2884 &umap_params.optim_params,
2885 seed as u64,
2886 verbose,
2887 )?,
2888 UmapOptimiser::Sgd => {
2889 optimise_embedding_sgd(
2890 &mut embd,
2891 &graph_adj,
2892 &umap_params.optim_params,
2893 seed as u64,
2894 verbose,
2895 )?;
2896 }
2897 UmapOptimiser::AdamParallel => {
2898 optimise_embedding_adam_parallel(
2899 &mut embd,
2900 &graph_adj,
2901 &umap_params.optim_params,
2902 seed as u64,
2903 verbose,
2904 )?;
2905 }
2906 }
2907
2908 if verbosity.normal_verbosity() {
2909 println!(
2910 "Initialised and optimised embedding in: {:.2?}.",
2911 start_layout.elapsed()
2912 );
2913 println!("UMAP (GPU) complete!");
2914 }
2915
2916 let n_samples = embd.len();
2917 let mut transposed = vec![vec![T::zero(); n_samples]; umap_params.n_dim];
2918
2919 for sample_idx in 0..n_samples {
2920 for dim_idx in 0..umap_params.n_dim {
2921 transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
2922 }
2923 }
2924
2925 Ok(transposed)
2926}
2927
2928#[cfg(feature = "gpu")]
2934#[derive(Debug, Clone)]
2935pub struct TsneParamsGpu<T> {
2936 pub n_dim: usize,
2938 pub perplexity: T,
2940 pub ann_type: String,
2943 pub initialisation: String,
2945 pub init_range: Option<T>,
2947 pub nn_params: NearestNeighbourParamsGpu<T>,
2949 pub optim_params: TsneOptimParams<T>,
2951 pub randomised_init: bool,
2953}
2954
2955#[cfg(feature = "gpu")]
2956impl<T> Default for TsneParamsGpu<T>
2957where
2958 T: ManifoldsFloat,
2959{
2960 fn default() -> Self {
2961 Self {
2962 n_dim: 2,
2963 perplexity: T::from_f64(30.0).unwrap(),
2964 ann_type: "ivf_gpu".to_string(),
2965 initialisation: "pca".to_string(),
2966 init_range: None,
2967 nn_params: NearestNeighbourParamsGpu::default(),
2968 optim_params: TsneOptimParams {
2969 n_epochs: 1000,
2970 lr: None,
2971 early_exag_iter: 250,
2972 early_exag_factor: T::from_f64(12.0).unwrap(),
2973 late_exag_factor: None,
2974 theta: T::from_f64(0.5).unwrap(),
2975 n_interp_points: 3,
2976 },
2977 randomised_init: true,
2978 }
2979 }
2980}
2981
2982#[cfg(feature = "gpu")]
2983impl<T> TsneParamsGpu<T>
2984where
2985 T: ManifoldsFloat,
2986{
2987 #[allow(clippy::too_many_arguments)]
3014 pub fn new(
3015 n_dim: usize,
3016 perplexity: T,
3017 ann_type: String,
3018 initialisation: String,
3019 init_range: Option<T>,
3020 nn_params: NearestNeighbourParamsGpu<T>,
3021 optim_params: TsneOptimParams<T>,
3022 randomised_init: bool,
3023 ) -> Self {
3024 Self {
3025 n_dim,
3026 perplexity,
3027 ann_type,
3028 initialisation,
3029 init_range,
3030 nn_params,
3031 optim_params,
3032 randomised_init,
3033 }
3034 }
3035
3036 pub fn new_default_2d(perplexity: Option<T>) -> Self {
3051 let perplexity = perplexity.unwrap_or_else(|| T::from_f64(30.0).unwrap());
3052
3053 Self {
3054 perplexity,
3055 ..Self::default()
3056 }
3057 }
3058}
3059
3060#[cfg(feature = "gpu")]
3082#[allow(clippy::too_many_arguments)]
3083pub fn construct_tsne_graph_gpu<T, R>(
3084 data: MatRef<T>,
3085 precomputed_knn: PreComputedKnn<T>,
3086 perplexity: T,
3087 ann_type: String,
3088 nn_params: &NearestNeighbourParamsGpu<T>,
3089 device: R::Device,
3090 seed: usize,
3091 verbose: usize,
3092) -> TsneGraph<T>
3093where
3094 T: ManifoldsFloat + AnnSearchGpuFloat,
3095 R: Runtime,
3096 NNDescentGpu<T, R>: NNDescentQuery<T>,
3097{
3098 let verbosity = parse_verbosity_level(verbose);
3099
3100 let (knn_indices, knn_dist) = match precomputed_knn {
3101 Some((indices, distances)) => {
3102 if verbosity.normal_verbosity() {
3103 println!("Using precomputed kNN graph...");
3104 }
3105 (indices, distances)
3106 }
3107 None => {
3108 let k_float = perplexity * T::from_f64(3.0).unwrap();
3109 let k = k_float.to_usize().unwrap().max(5).min(data.nrows() - 1);
3110
3111 if verbosity.normal_verbosity() {
3112 println!("Running GPU kNN search (k={}) using {}...", k, ann_type);
3113 }
3114
3115 let start_knn = Instant::now();
3116 let result =
3117 run_ann_search_gpu::<T, R>(data, k, ann_type, nn_params, device, seed, verbose)?;
3118
3119 if verbosity.normal_verbosity() {
3120 println!("GPU kNN search done in: {:.2?}.", start_knn.elapsed());
3121 }
3122
3123 result
3124 }
3125 };
3126
3127 if verbosity.normal_verbosity() {
3128 println!("Computing Gaussian affinities and symmetrising...");
3129 }
3130
3131 let start_graph = Instant::now();
3132
3133 let directed_graph = gaussian_knn_affinities(
3134 &knn_indices,
3135 &knn_dist,
3136 perplexity,
3137 T::from_f64(1e-5).unwrap(),
3138 200,
3139 nn_params.dist_metric == "euclidean",
3140 )?;
3141
3142 let graph = symmetrise_affinities_tsne(directed_graph);
3143
3144 if verbosity.normal_verbosity() {
3145 println!(
3146 "Finalised graph generation in {:.2?}.",
3147 start_graph.elapsed()
3148 );
3149 }
3150
3151 Ok((graph, knn_indices, knn_dist))
3152}
3153
3154#[cfg(all(feature = "gpu", feature = "fft_tsne"))]
3176pub fn tsne_gpu<T, R>(
3177 data: MatRef<T>,
3178 precomputed_knn: PreComputedKnn<T>,
3179 params: &TsneParamsGpu<T>,
3180 approx_type: &str,
3181 device: R::Device,
3182 seed: usize,
3183 verbose: usize,
3184) -> Result<Vec<Vec<T>>, ManifoldsError>
3185where
3186 T: ManifoldsFloat + AnnSearchGpuFloat + FftwFloat,
3187 R: Runtime,
3188 StandardNormal: Distribution<T>,
3189 NNDescentGpu<T, R>: NNDescentQuery<T>,
3190{
3191 if params.n_dim != 2 {
3192 return Err(ManifoldsError::IncorrectDim {
3193 n_dim: params.n_dim,
3194 });
3195 }
3196
3197 let verbosity = parse_verbosity_level(verbose);
3198
3199 let (graph, _, _) = construct_tsne_graph_gpu::<T, R>(
3200 data,
3201 precomputed_knn,
3202 params.perplexity,
3203 params.ann_type.clone(),
3204 ¶ms.nn_params,
3205 device,
3206 seed,
3207 verbose,
3208 )?;
3209
3210 if verbosity.normal_verbosity() {
3211 println!("Initialising embedding via {}...", ¶ms.initialisation);
3212 }
3213
3214 let init_type = parse_initilisation(
3215 ¶ms.initialisation,
3216 params.randomised_init,
3217 params.init_range,
3218 )
3219 .unwrap_or_else(|| {
3220 println!(
3221 "Unknown initialisation provided: {:?}. Defaulting to PCA.",
3222 params.initialisation,
3223 );
3224 EmbdInit::PcaInit {
3225 range: Some(T::from_f64(1e-2).unwrap()),
3226 randomised: true,
3227 }
3228 });
3229
3230 let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
3231
3232 let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
3233
3234 let start_optim = Instant::now();
3235 match tsne_approx {
3236 TsneOpt::BarnesHut => {
3237 if verbosity.normal_verbosity() {
3238 println!(
3239 "Optimising via Barnes-Hut t-SNE ({} epochs)...",
3240 params.optim_params.n_epochs
3241 );
3242 }
3243 optimise_bh_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
3244 }
3245 TsneOpt::Fft => {
3246 if verbosity.normal_verbosity() {
3247 println!(
3248 "Optimising via FFT Interpolation-based t-SNE ({} epochs)...",
3249 params.optim_params.n_epochs
3250 );
3251 }
3252 optimise_fft_tsne(&mut embd, ¶ms.optim_params, &graph, verbose)?;
3253 }
3254 }
3255
3256 if verbosity.normal_verbosity() {
3257 println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
3258 }
3259
3260 let n_samples = embd.len();
3261 let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
3262
3263 for sample_idx in 0..n_samples {
3264 for dim_idx in 0..params.n_dim {
3265 transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
3266 }
3267 }
3268
3269 Ok(transposed)
3270}
3271
3272#[cfg(all(feature = "gpu", not(feature = "fft_tsne")))]
3294pub fn tsne_gpu<T, R>(
3295 data: MatRef<T>,
3296 precomputed_knn: PreComputedKnn<T>,
3297 params: &TsneParamsGpu<T>,
3298 approx_type: &str,
3299 device: R::Device,
3300 seed: usize,
3301 verbose: usize,
3302) -> Result<Vec<Vec<T>>, ManifoldsError>
3303where
3304 T: ManifoldsFloat + AnnSearchGpuFloat,
3305 R: Runtime,
3306 StandardNormal: Distribution<T>,
3307 NNDescentGpu<T, R>: NNDescentQuery<T>,
3308{
3309 if params.n_dim != 2 {
3310 return Err(ManifoldsError::IncorrectDim {
3311 n_dim: params.n_dim,
3312 });
3313 }
3314
3315 let verbosity = parse_verbosity_level(verbose);
3316
3317 let (graph, _, _) = construct_tsne_graph_gpu::<T, R>(
3318 data,
3319 precomputed_knn,
3320 params.perplexity,
3321 params.ann_type.clone(),
3322 ¶ms.nn_params,
3323 device,
3324 seed,
3325 verbose,
3326 )?;
3327
3328 if verbosity.normal_verbosity() {
3329 println!("Initialising embedding via {}...", ¶ms.initialisation);
3330 }
3331
3332 let init_type = parse_initilisation(
3333 ¶ms.initialisation,
3334 params.randomised_init,
3335 params.init_range,
3336 )
3337 .unwrap_or_else(|| {
3338 println!(
3339 "Unknown initialisation provided: {:?}. Defaulting to PCA.",
3340 params.initialisation,
3341 );
3342 EmbdInit::PcaInit {
3343 range: Some(T::from_f64(1e-2).unwrap()),
3344 randomised: true,
3345 }
3346 });
3347
3348 let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
3349
3350 let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
3351
3352 let start_optim = Instant::now();
3353 match tsne_approx {
3354 TsneOpt::BarnesHut => {
3355 if verbosity.normal_verbosity() {
3356 println!(
3357 "Optimising via Barnes-Hut t-SNE ({} epochs)...",
3358 params.optim_params.n_epochs
3359 );
3360 }
3361 optimise_bh_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
3362 }
3363 TsneOpt::Fft => {
3364 panic!("FFT-accelerated t-SNE not available. Recompile with 'fft_tsne' feature or use 'barnes_hut' approximation.");
3365 }
3366 }
3367
3368 if verbosity.normal_verbosity() {
3369 println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
3370 }
3371
3372 let n_samples = embd.len();
3373 let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
3374
3375 for sample_idx in 0..n_samples {
3376 for dim_idx in 0..params.n_dim {
3377 transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
3378 }
3379 }
3380
3381 Ok(transposed)
3382}