#![allow(clippy::needless_range_loop)] #![warn(missing_docs)]
pub mod data;
pub mod errors;
pub mod prelude;
pub mod training;
pub mod utils;
#[cfg(feature = "parametric")]
pub mod parametric;
use ann_search_rs::cpu::hnsw::{HnswIndex, HnswState};
use ann_search_rs::cpu::nndescent::{ApplySortedUpdates, NNDescent, NNDescentQuery};
use ann_search_rs::utils::dist::parse_ann_dist;
use faer::MatRef;
use rand_distr::{Distribution, StandardNormal};
use std::{default::Default, time::Instant};
use thousands::*;
#[cfg(feature = "parametric")]
use burn::tensor::{backend::AutodiffBackend, Element};
#[cfg(feature = "parametric")]
use num_traits::ToPrimitive;
#[cfg(feature = "gpu")]
use ann_search_rs::gpu::nndescent_gpu::NNDescentGpu;
#[cfg(feature = "gpu")]
use ann_search_rs::gpu::traits_gpu::AnnSearchGpuFloat;
#[cfg(feature = "gpu")]
use cubecl::prelude::*;
use crate::data::graph::*;
use crate::data::init::*;
use crate::data::nearest_neighbours::*;
use crate::data::pacmap_pairs::*;
use crate::data::structures::*;
use crate::prelude::*;
use crate::training::mds_optimiser::*;
use crate::training::pacmap_optimiser::{
optimise_pacmap, optimise_pacmap_parallel, PacMapOptimiser,
};
use crate::training::tsne_optimiser::*;
use crate::training::umap_optimisers::*;
use crate::utils::diffusions::*;
use crate::utils::math::compute_largest_eigenpairs_lanczos;
use crate::utils::potentials::compute_potential_distances;
use crate::utils::sparse_ops::matrix_power;
#[cfg(feature = "parametric")]
use crate::parametric::model::*;
#[cfg(feature = "parametric")]
use crate::parametric::parametric_train::*;
#[cfg(feature = "fft_tsne")]
use crate::utils::fft::FftwFloat;
pub type PreComputedKnn<T> = Option<(Vec<Vec<usize>>, Vec<Vec<T>>)>;
pub type TsneGraph<T> = Result<(CoordinateList<T>, Vec<Vec<usize>>, Vec<Vec<T>>), ManifoldsError>;
#[derive(Debug, Clone)]
pub struct UmapParams<T> {
pub n_dim: usize,
pub k: usize,
pub optimiser: String,
pub ann_type: String,
pub initialisation: String,
pub init_range: Option<T>,
pub nn_params: NearestNeighbourParams<T>,
pub umap_graph_params: UmapGraphParams<T>,
pub optim_params: UmapOptimParams<T>,
pub randomised: bool,
}
impl<T> Default for UmapParams<T>
where
T: ManifoldsFloat,
{
fn default() -> Self {
Self {
n_dim: 2,
k: 15,
optimiser: "adam_parallel".to_string(),
ann_type: "kmknn".to_string(),
initialisation: "spectral".to_string(),
init_range: None,
nn_params: NearestNeighbourParams::default(),
optim_params: UmapOptimParams::default(),
umap_graph_params: UmapGraphParams::default(),
randomised: false,
}
}
}
impl<T> UmapParams<T>
where
T: ManifoldsFloat,
{
#[allow(clippy::too_many_arguments)]
pub fn new(
n_dim: usize,
k: usize,
optimiser: String,
ann_type: String,
initialisation: String,
init_range: Option<T>,
nn_params: NearestNeighbourParams<T>,
optim_params: UmapOptimParams<T>,
umap_graph_params: UmapGraphParams<T>,
randomised: bool,
) -> Self {
Self {
n_dim,
k,
optimiser,
ann_type,
initialisation,
init_range,
nn_params,
optim_params,
umap_graph_params,
randomised,
}
}
pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>) -> Self {
let min_dist = min_dist.unwrap_or(T::from_f64(0.5).unwrap());
let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
Self {
optim_params: UmapOptimParams::from_min_dist_spread(
min_dist, spread, None, None, None, None, None, None, None,
),
..Self::default()
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn construct_umap_graph<T>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
k: usize,
ann_type: String,
umap_params: &UmapGraphParams<T>,
nn_params: &NearestNeighbourParams<T>,
n_epochs: usize,
seed: usize,
verbose: usize,
) -> UmapGraphResults<T>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let verbosity = parse_verbosity_level(verbose);
let (knn_indices, knn_dist) = match precomputed_knn {
Some((indices, distances)) => {
if verbosity.normal_verbosity() {
println!("Using precomputed kNN graph...");
}
(indices, distances)
}
None => {
if verbosity.normal_verbosity() {
println!(
"Running (approximate) nearest neighbour search using {}...",
ann_type
);
}
let start_knn = Instant::now();
let result = run_ann_search(data, k, ann_type, nn_params, seed, verbose)?;
if verbosity.normal_verbosity() {
println!("kNN search done in: {:.2?}.", start_knn.elapsed());
}
result
}
};
if verbosity.normal_verbosity() {
println!("Constructing fuzzy simplicial set...");
}
let start_graph = Instant::now();
let (sigma, rho) = smooth_knn_dist(
&knn_dist,
knn_dist[0].len(),
umap_params.local_connectivity,
umap_params.bandwidth,
64,
);
let graph = knn_to_coo(&knn_indices, &knn_dist, &sigma, &rho);
let graph = symmetrise_graph(graph, umap_params.mix_weight);
let graph = filter_weak_edges(graph, n_epochs, verbose);
if verbosity.normal_verbosity() {
println!(
"Finalised graph generation in {:.2?}.",
start_graph.elapsed()
);
}
Ok((graph, knn_indices, knn_dist))
}
pub fn umap<T>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
umap_params: &UmapParams<T>,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
StandardNormal: Distribution<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let verbosity = parse_verbosity_level(verbose);
let init_type = parse_initilisation(
&umap_params.initialisation,
umap_params.randomised,
umap_params.init_range,
)
.unwrap_or_else(|| {
println!(
"Unknown initialisation provided: {:?}. Defaulting to PCA.",
umap_params.initialisation,
);
EmbdInit::PcaInit {
range: None,
randomised: true,
}
});
let optimiser = parse_umap_optimiser(&umap_params.optimiser).unwrap_or_default();
if verbosity.normal_verbosity() {
println!(
"Running umap with alpha: {:.2?} and beta: {:.2?}",
umap_params.optim_params.a, umap_params.optim_params.b
);
}
let (graph, _, _) = construct_umap_graph(
data,
precomputed_knn,
umap_params.k,
umap_params.ann_type.clone(),
&umap_params.umap_graph_params,
&umap_params.nn_params,
umap_params.optim_params.n_epochs,
seed,
verbose,
)?;
if verbosity.normal_verbosity() {
println!(
"Initialising embedding via {} layout...",
umap_params.initialisation
);
}
let start_layout = Instant::now();
let mut embd = initialise_embedding(&init_type, umap_params.n_dim, seed as u64, &graph, data)?;
let graph_adj = coo_to_adjacency_list(&graph);
if verbosity.normal_verbosity() {
println!(
"Optimising embedding via {} ({} epochs) on {} edges...",
match optimiser {
UmapOptimiser::Adam => "Adam",
UmapOptimiser::Sgd => "SGD",
UmapOptimiser::AdamParallel => "Adam (multi-threaded)",
},
umap_params.optim_params.n_epochs,
graph.col_indices.len().separate_with_underscores()
);
}
match optimiser {
UmapOptimiser::Adam => optimise_embedding_adam(
&mut embd,
&graph_adj,
&umap_params.optim_params,
seed as u64,
verbose,
)?,
UmapOptimiser::Sgd => {
optimise_embedding_sgd(
&mut embd,
&graph_adj,
&umap_params.optim_params,
seed as u64,
verbose,
)?;
}
UmapOptimiser::AdamParallel => {
optimise_embedding_adam_parallel(
&mut embd,
&graph_adj,
&umap_params.optim_params,
seed as u64,
verbose,
)?;
}
}
let end_layout = start_layout.elapsed();
if verbosity.normal_verbosity() {
println!(
"Initialised and optimised embedding in: {:.2?}.",
end_layout
);
println!("UMAP complete!");
}
let n_samples = embd.len();
let mut transposed = vec![vec![T::zero(); n_samples]; umap_params.n_dim];
for sample_idx in 0..n_samples {
for dim_idx in 0..umap_params.n_dim {
transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
}
}
Ok(transposed)
}
#[derive(Debug, Clone)]
pub struct TsneParams<T> {
pub n_dim: usize,
pub perplexity: T,
pub ann_type: String,
pub initialisation: String,
pub init_range: Option<T>,
pub nn_params: NearestNeighbourParams<T>,
pub optim_params: TsneOptimParams<T>,
pub randomised_init: bool,
}
impl<T> Default for TsneParams<T>
where
T: ManifoldsFloat,
{
fn default() -> Self {
Self {
n_dim: 2,
perplexity: T::from_f64(30.0).unwrap(),
ann_type: "kmknn".to_string(),
initialisation: "pca".to_string(),
init_range: None,
nn_params: NearestNeighbourParams::default(),
optim_params: TsneOptimParams {
n_epochs: 1000,
lr: None,
early_exag_iter: 250,
early_exag_factor: T::from_f64(12.0).unwrap(),
late_exag_factor: None,
theta: T::from_f64(0.5).unwrap(),
n_interp_points: 3,
},
randomised_init: true,
}
}
}
impl<T> TsneParams<T>
where
T: ManifoldsFloat,
{
#[allow(clippy::too_many_arguments)]
pub fn new(
n_dim: usize,
perplexity: T,
ann_type: String,
initialisation: String,
init_range: Option<T>,
nn_params: NearestNeighbourParams<T>,
optim_params: TsneOptimParams<T>,
randomised_init: bool,
) -> Self {
Self {
n_dim,
perplexity,
ann_type,
initialisation,
init_range,
nn_params,
optim_params,
randomised_init,
}
}
pub fn new_default_2d(perplexity: Option<T>) -> Self {
let perplexity = perplexity.unwrap_or_else(|| T::from_f64(30.0).unwrap());
Self {
perplexity,
..Self::default()
}
}
}
pub fn construct_tsne_graph<T>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
perplexity: T,
ann_type: String,
nn_params: &NearestNeighbourParams<T>,
seed: usize,
verbose: usize,
) -> TsneGraph<T>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let verbosity = parse_verbosity_level(verbose);
let (knn_indices, knn_dist) = match precomputed_knn {
Some((indices, distances)) => {
if verbosity.normal_verbosity() {
println!("Using precomputed kNN graph...");
}
(indices, distances)
}
None => {
let k_float = perplexity * T::from_f64(3.0).unwrap();
let k = k_float.to_usize().unwrap().max(5).min(data.nrows() - 1);
if verbosity.normal_verbosity() {
println!("Running kNN search (k={}) using {}...", k, ann_type);
}
let start_knn = Instant::now();
let result = run_ann_search(data, k, ann_type, nn_params, seed, verbose)?;
if verbosity.normal_verbosity() {
println!("kNN search done in: {:.2?}.", start_knn.elapsed());
}
result
}
};
if verbosity.normal_verbosity() {
println!("Computing Gaussian affinities and symmetrising...");
}
let start_graph = Instant::now();
let directed_graph = gaussian_knn_affinities(
&knn_indices,
&knn_dist,
perplexity,
T::from_f64(1e-5).unwrap(),
200,
nn_params.dist_metric == "euclidean",
)?;
let graph = symmetrise_affinities_tsne(directed_graph);
if verbosity.normal_verbosity() {
println!(
"Finalised graph generation in {:.2?}.",
start_graph.elapsed()
);
}
Ok((graph, knn_indices, knn_dist))
}
#[cfg(feature = "fft_tsne")]
pub fn tsne<T>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
params: &TsneParams<T>,
approx_type: &str,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat + FftwFloat,
HnswIndex<T>: HnswState<T>,
StandardNormal: Distribution<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
if params.n_dim != 2 {
return Err(ManifoldsError::IncorrectDim {
n_dim: params.n_dim,
});
}
let verbosity = parse_verbosity_level(verbose);
let (graph, _, _) = construct_tsne_graph(
data,
precomputed_knn,
params.perplexity,
params.ann_type.clone(),
¶ms.nn_params,
seed,
verbose,
)?;
if verbosity.normal_verbosity() {
println!("Initialising embedding via {}...", ¶ms.initialisation);
}
let init_type = parse_initilisation(
¶ms.initialisation,
params.randomised_init,
params.init_range,
)
.unwrap_or_else(|| {
println!(
"Unknown initialisation provided: {:?}. Defaulting to PCA.",
params.initialisation,
);
EmbdInit::PcaInit {
range: Some(T::from_f64(1e-2).unwrap()),
randomised: true,
}
});
let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
let start_optim = Instant::now();
match tsne_approx {
TsneOpt::BarnesHut => {
if verbosity.normal_verbosity() {
println!(
"Optimising via Barnes-Hut t-SNE ({} epochs)...",
params.optim_params.n_epochs
);
}
optimise_bh_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
}
#[cfg(feature = "fft_tsne")]
TsneOpt::Fft => {
if verbosity.normal_verbosity() {
println!(
"Optimising via FFT Interpolation-based t-SNE ({} epochs)...",
params.optim_params.n_epochs
);
}
let _ = optimise_fft_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
}
}
if verbosity.normal_verbosity() {
println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
}
let n_samples = embd.len();
let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
for sample_idx in 0..n_samples {
for dim_idx in 0..params.n_dim {
transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
}
}
Ok(transposed)
}
#[cfg(not(feature = "fft_tsne"))]
pub fn tsne<T>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
params: &TsneParams<T>,
approx_type: &str,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
StandardNormal: Distribution<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
if params.n_dim != 2 {
return Err(ManifoldsError::IncorrectDim {
n_dim: params.n_dim,
});
}
let verbosity = parse_verbosity_level(verbose);
let (graph, _, _) = construct_tsne_graph(
data,
precomputed_knn,
params.perplexity,
params.ann_type.clone(),
¶ms.nn_params,
seed,
verbose,
)?;
if verbosity.normal_verbosity() {
println!("Initialising embedding via {}...", ¶ms.initialisation);
}
let init_type = parse_initilisation(
¶ms.initialisation,
params.randomised_init,
params.init_range,
)
.unwrap_or_else(|| {
println!(
"Unknown initialisation provided: {:?}. Defaulting to PCA.",
params.initialisation,
);
EmbdInit::PcaInit {
range: Some(T::from_f64(1e-2).unwrap()),
randomised: true,
}
});
let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
let start_optim = Instant::now();
match tsne_approx {
TsneOpt::BarnesHut => {
if verbosity.normal_verbosity() {
println!(
"Optimising via Barnes-Hut t-SNE ({} epochs)...",
params.optim_params.n_epochs
);
}
optimise_bh_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
}
#[cfg(not(feature = "fft_tsne"))]
TsneOpt::Fft => {
panic!("FFT-accelerated t-SNE not available. Recompile with 'fft_tsne' feature or use 'barnes_hut' approximation.");
}
}
if verbosity.normal_verbosity() {
println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
}
let n_samples = embd.len();
let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
for sample_idx in 0..n_samples {
for dim_idx in 0..params.n_dim {
transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
}
}
Ok(transposed)
}
#[derive(Debug, Clone)]
pub struct PhateParams<T> {
pub n_dim: usize,
pub k: usize,
pub ann_type: String,
pub ann_params: NearestNeighbourParams<T>,
pub diffusion_params: PhateDiffusionParams<T>,
pub mds_method: String,
pub mds_iter: Option<usize>,
pub randomised: bool,
}
impl<T> Default for PhateParams<T>
where
T: ManifoldsFloat,
{
fn default() -> Self {
let diffusion_params = PhateDiffusionParams::new(
Some(T::from_f64(40.0).unwrap()),
T::from_f64(1.0).unwrap(),
T::from_f64(1e-4).unwrap(),
"average".to_string(),
None,
"spectral".to_string(),
None,
None,
None,
T::from_f64(1.0).unwrap(),
);
Self {
n_dim: 2,
k: 5,
ann_type: "kmknn".to_string(),
ann_params: NearestNeighbourParams::default(),
diffusion_params,
mds_method: "sgd_dense".to_string(),
mds_iter: None,
randomised: true,
}
}
}
impl<T> PhateParams<T>
where
T: ManifoldsFloat,
{
#[allow(clippy::too_many_arguments)]
pub fn new(
n_dim: usize,
k: usize,
ann_type: String,
ann_params: NearestNeighbourParams<T>,
diffusion_params: PhateDiffusionParams<T>,
mds_method: String,
mds_iter: Option<usize>,
randomised: bool,
) -> Self {
Self {
n_dim,
k,
ann_type,
ann_params,
diffusion_params,
mds_method,
mds_iter,
randomised,
}
}
pub fn new_default_2d(k: Option<usize>) -> Self {
let k = k.unwrap_or(5);
Self {
k,
..Self::default()
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn construct_phate_diffusion<T>(
data: MatRef<T>,
k: usize,
precomputed_knn: PreComputedKnn<T>,
ann_type: &str,
nn_params: &NearestNeighbourParams<T>,
phate_params: &PhateParams<T>,
seed: usize,
verbose: usize,
) -> Result<PhateDiffusion<T>, ManifoldsError>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let verbosity = parse_verbosity_level(verbose);
let (knn_indices, knn_dist) = match precomputed_knn {
Some((indices, distances)) => {
if verbosity.normal_verbosity() {
println!("Using precomputed kNN graph...");
}
(indices, distances)
}
None => {
if verbosity.normal_verbosity() {
println!(
"Running (approximate) nearest neighbour search using {}...",
ann_type
);
}
let start_knn = Instant::now();
let result = run_ann_search(data, k, ann_type.to_string(), nn_params, seed, verbose)?;
if verbosity.normal_verbosity() {
println!("kNN search done in: {:.2?}.", start_knn.elapsed());
}
result
}
};
if verbosity.normal_verbosity() {
println!("Calculating alpha decay affinities");
}
let start_alpha_affinities = Instant::now();
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dist,
phate_params.k,
phate_params.diffusion_params.decay,
phate_params.diffusion_params.bandwidth_scale,
phate_params.diffusion_params.thresh,
&phate_params.diffusion_params.graph_symmetry,
nn_params.dist_metric == "euclidean",
);
if verbosity.normal_verbosity() {
println!(
"Alpha decay affinity calculations done in: {:.2?}.",
start_alpha_affinities.elapsed()
);
}
let affinity = coo_to_csr(&graph);
let diffusion_op = build_diffusion_operator(&affinity);
match phate_params.diffusion_params.n_landmarks {
None => Ok(PhateDiffusion::Full {
operator: diffusion_op,
}),
Some(n_landmarks) if n_landmarks >= data.nrows() => Ok(PhateDiffusion::Full {
operator: diffusion_op,
}),
Some(n_landmarks) => {
if verbosity.normal_verbosity() {
println!(" Building {} landmarks...", n_landmarks);
}
let start_landmarks = Instant::now();
let landmarks = PhateLandmarks::build(
data,
&affinity,
&diffusion_op,
n_landmarks,
&phate_params.diffusion_params.landmark_method,
&nn_params.dist_metric,
seed,
Some(100),
verbose,
)?;
if verbosity.normal_verbosity() {
println!(
" Landmarks generated in: {:.2?}.",
start_landmarks.elapsed()
);
}
Ok(PhateDiffusion::Landmark { landmarks })
}
}
}
pub fn phate<T>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
phate_params: PhateParams<T>,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
StandardNormal: Distribution<T>,
{
let start_phate = Instant::now();
let verbosity = parse_verbosity_level(verbose);
let phate_diffusion = construct_phate_diffusion(
data,
phate_params.k,
precomputed_knn,
&phate_params.ann_type,
&phate_params.ann_params,
&phate_params,
seed,
verbose,
)?;
let start_t = Instant::now();
let t = match phate_params.diffusion_params.t {
PhateTime::Auto { t_max } => {
if verbosity.normal_verbosity() {
println!("Finding optimal t (t_max={})...", t_max);
}
match &phate_diffusion {
PhateDiffusion::Landmark { landmarks } => landmarks.find_optimal_t(t_max),
PhateDiffusion::Full { operator } => {
let entropy = landmark_von_neumann_entropy(operator, t_max)?;
Ok(find_knee_point(&entropy))
}
}
}
PhateTime::Fixed(t) => Ok(t),
}?;
if verbosity.normal_verbosity() {
println!("Identified t = {} in {:.2?}.", t, start_t.elapsed());
}
let mds_method = parse_mds_method(&phate_params.mds_method).unwrap_or_default();
let dist = parse_ann_dist(&phate_params.ann_params.dist_metric).unwrap_or_default();
let mds_params = MdsOptimParams::new(
data.nrows(),
phate_params.randomised,
phate_params.mds_iter,
None,
);
let start_embed = Instant::now();
let embedding = match phate_diffusion {
PhateDiffusion::Full { operator } => {
if verbosity.normal_verbosity() {
println!("Powering diffusion operator...");
}
let powered = matrix_power(&operator, t)?;
let potential = calculate_potential(&powered, 1, phate_params.diffusion_params.gamma)?;
if verbosity.normal_verbosity() {
println!(
"Potential shape: {} × {} - calculated in {:.2?}.",
potential.shape().0,
potential.shape().1,
start_embed.elapsed()
);
}
let res = match mds_method {
MdsMethod::ClassicMds => {
if verbosity.normal_verbosity() {
println!("Computing pairwise distances, running classic MDS...");
}
let distances = compute_potential_distances(&potential, &dist);
classic_mds(&distances, phate_params.n_dim, mds_params.randomised, seed)
}
MdsMethod::SgdDense => {
if verbosity.normal_verbosity() {
println!("Computing pairwise distances, running SGD-MDS...");
}
let distances = compute_potential_distances(&potential, &dist);
sgd_mds(
&distances,
phate_params.n_dim,
&mds_params,
None,
seed,
verbose,
)
}
}?;
res
}
PhateDiffusion::Landmark { landmarks } => {
if verbosity.normal_verbosity() {
println!(
"Powering landmark operator ({} landmarks)...",
landmarks.get_n_landmarks()
);
}
let landmark_powered = landmarks.power(t)?;
let landmark_potential =
calculate_potential(&landmark_powered, 1, phate_params.diffusion_params.gamma)?;
if verbosity.normal_verbosity() {
println!(
"Landmark potential shape: {} × {} - calculated in {:.2?}.",
landmark_potential.shape().0,
landmark_potential.shape().1,
start_embed.elapsed()
);
println!("Computing landmark pairwise distances...");
}
let landmark_distances = compute_potential_distances(&landmark_potential, &dist);
let landmark_mds_params = MdsOptimParams::new(
landmarks.get_n_landmarks(),
phate_params.randomised,
None,
None,
);
if verbosity.normal_verbosity() {
println!("Running MDS on landmarks...");
}
let landmark_embedding = match mds_method {
MdsMethod::ClassicMds => classic_mds(
&landmark_distances,
phate_params.n_dim,
landmark_mds_params.randomised,
seed,
),
_ => sgd_mds(
&landmark_distances,
phate_params.n_dim,
&landmark_mds_params,
None,
seed,
verbose,
),
}?;
if verbosity.normal_verbosity() {
println!("Interpolating to full N points via Nyström...");
}
landmarks.interpolate_embedding(&landmark_embedding)
}
};
if verbosity.normal_verbosity() {
println!("Ran MDS in {:.2?}.", start_embed.elapsed());
println!("Finished running PHATE in {:.2?}.", start_phate.elapsed());
}
let n_samples = embedding.len();
let mut transposed = vec![vec![T::zero(); n_samples]; phate_params.n_dim];
for i in 0..n_samples {
for d in 0..phate_params.n_dim {
transposed[d][i] = embedding[i][d];
}
}
Ok(transposed)
}
#[derive(Debug, Clone)]
pub struct PacmapParams<T> {
pub n_dim: usize,
pub k: usize,
pub ann_type: String,
pub optimiser_type: String,
pub n_mid_near: usize,
pub n_further: usize,
pub mn_candidate_start: usize,
pub mn_candidate_end: usize,
pub initialisation: String,
pub nn_params: NearestNeighbourParams<T>,
pub optim_params: PacmapOptimParams<T>,
}
impl<T> Default for PacmapParams<T>
where
T: ManifoldsFloat,
{
fn default() -> Self {
Self {
n_dim: 2,
k: 50,
ann_type: "kmknn".to_string(),
optimiser_type: "adam_parallel".to_string(),
n_mid_near: 2,
n_further: 2,
mn_candidate_start: 4,
mn_candidate_end: 50,
initialisation: "pca".to_string(),
nn_params: NearestNeighbourParams::default(),
optim_params: PacmapOptimParams::default(),
}
}
}
impl<T> PacmapParams<T>
where
T: ManifoldsFloat,
{
#[allow(clippy::too_many_arguments)]
pub fn new(
n_dim: usize,
k: usize,
ann_type: String,
optimiser_type: String,
n_mid_near: usize,
n_further: usize,
mn_candidate_start: usize,
mn_candidate_end: usize,
initialisation: String,
nn_params: NearestNeighbourParams<T>,
optim_params: PacmapOptimParams<T>,
) -> Self {
let k = k.max(mn_candidate_end);
Self {
n_dim,
k,
ann_type,
optimiser_type,
n_mid_near,
n_further,
mn_candidate_start,
mn_candidate_end,
initialisation,
nn_params,
optim_params,
}
}
pub fn new_default_2d(k: Option<usize>) -> Self {
let default = Self::default();
let k = k.unwrap_or(default.k).max(default.mn_candidate_end);
Self { k, ..default }
}
}
pub fn pacmap<T>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
params_pacmap: &PacmapParams<T>,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
StandardNormal: Distribution<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let n_samples = data.nrows();
let verbosity = parse_verbosity_level(verbose);
let (knn_indices, _) = match precomputed_knn {
Some((indices, distances)) => {
if verbosity.normal_verbosity() {
println!("Using precomputed kNN graph...");
}
(indices, distances)
}
None => {
if verbosity.normal_verbosity() {
println!(
"Running (approximate) nearest neighbour search using {} (k={})...",
params_pacmap.ann_type, params_pacmap.k
);
}
let start_knn = Instant::now();
let result = run_ann_search(
data,
params_pacmap.k,
params_pacmap.ann_type.clone(),
¶ms_pacmap.nn_params,
seed,
verbose,
)?;
if verbosity.normal_verbosity() {
println!("kNN search done in: {:.2?}.", start_knn.elapsed());
}
result
}
};
if verbosity.normal_verbosity() {
println!("Constructing PaCMAP pairs...");
}
let start_pairs = Instant::now();
let pairs: PacmapPairs = construct_pacmap_pairs(
&knn_indices,
params_pacmap.n_mid_near,
params_pacmap.n_further,
params_pacmap.mn_candidate_start,
params_pacmap.mn_candidate_end,
seed as u64,
);
let end_pairs = start_pairs.elapsed();
if verbosity.normal_verbosity() {
println!(
"Pairs generated in {:.2?}: {} near, {} mid-near, {} further.",
end_pairs,
pairs.near.len().separate_with_underscores(),
pairs.mid_near.len().separate_with_underscores(),
pairs.further.len().separate_with_underscores()
);
}
let init_type =
parse_initilisation(¶ms_pacmap.initialisation, true, None).unwrap_or_else(|| {
println!(
"Unknown initialisation provided: {:?}. Defaulting to PCA.",
params_pacmap.initialisation,
);
EmbdInit::PcaInit {
range: None,
randomised: true,
}
});
if verbosity.normal_verbosity() {
println!(
"Initialising embedding via {} layout...",
params_pacmap.initialisation
);
}
let dummy_graph = knn_to_coo_unweighted(&knn_indices);
let mut embd = initialise_embedding(
&init_type,
params_pacmap.n_dim,
seed as u64,
&dummy_graph,
data,
)?;
let optimiser = parse_pacmap_optimiser(¶ms_pacmap.optimiser_type).unwrap_or_default();
let start_optim = Instant::now();
match optimiser {
PacMapOptimiser::AdamParallel => {
let _ =
optimise_pacmap_parallel(&mut embd, &pairs, ¶ms_pacmap.optim_params, verbose);
}
PacMapOptimiser::Adam => {
let _ = optimise_pacmap(&mut embd, &pairs, ¶ms_pacmap.optim_params, verbose);
}
}
let end_optim = start_optim.elapsed();
if verbosity.normal_verbosity() {
println!("Optimisation done in {:.2?}.", end_optim);
println!("PaCMAP complete!");
}
let mut transposed = vec![vec![T::zero(); n_samples]; params_pacmap.n_dim];
for sample_idx in 0..n_samples {
for dim_idx in 0..params_pacmap.n_dim {
transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
}
}
Ok(transposed)
}
#[derive(Debug, Clone)]
pub struct DiffusionMapsParams<T> {
pub n_dim: usize,
pub k: usize,
pub ann_type: String,
pub ann_params: NearestNeighbourParams<T>,
pub bandwidth_scale: T,
pub thresh: T,
pub graph_symmetry: String,
pub alpha_norm: T,
pub t: PhateTime,
pub n_landmarks: Option<usize>,
pub landmark_method: String,
pub n_svd: Option<usize>,
}
impl<T> Default for DiffusionMapsParams<T>
where
T: ManifoldsFloat,
{
fn default() -> Self {
Self {
n_dim: 2,
k: 5,
ann_type: "kmknn".to_string(),
ann_params: NearestNeighbourParams::default(),
bandwidth_scale: T::from_f64(1.0).unwrap(),
thresh: T::from_f64(1e-4).unwrap(),
graph_symmetry: "add".to_string(),
alpha_norm: T::from_f64(1.0).unwrap(),
t: parse_phate_time(None, PHATE_MAX_T),
n_landmarks: None,
landmark_method: "spectral".to_string(),
n_svd: None,
}
}
}
impl<T> DiffusionMapsParams<T>
where
T: ManifoldsFloat,
{
#[allow(clippy::too_many_arguments)]
pub fn new(
n_dim: usize,
k: usize,
ann_type: String,
ann_params: NearestNeighbourParams<T>,
bandwidth_scale: T,
thresh: T,
graph_symmetry: String,
alpha_norm: T,
t: PhateTime,
n_landmarks: Option<usize>,
landmark_method: String,
n_svd: Option<usize>,
) -> Self {
Self {
n_dim,
k,
ann_type,
ann_params,
bandwidth_scale,
thresh,
graph_symmetry,
alpha_norm,
t,
n_landmarks,
landmark_method,
n_svd,
}
}
pub fn new_default_2d(k: Option<usize>) -> Self {
let k = k.unwrap_or(5);
Self {
k,
..Self::default()
}
}
}
pub enum DiffusionMapsOperator<T>
where
T: ManifoldsFloat,
{
Full {
p_sym: CompressedSparseData<T>,
sqrt_degrees: Vec<T>,
},
Landmark {
landmarks: DiffusionMapsLandmarks<T>,
},
}
#[allow(clippy::too_many_arguments)]
pub fn construct_diffusion_maps_operator<T>(
data: MatRef<T>,
k: usize,
precomputed_knn: PreComputedKnn<T>,
ann_type: &str,
nn_params: &NearestNeighbourParams<T>,
dm_params: &DiffusionMapsParams<T>,
seed: usize,
verbose: usize,
) -> Result<DiffusionMapsOperator<T>, ManifoldsError>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let use_full = match dm_params.n_landmarks {
None => true,
Some(n) => n >= data.nrows(),
};
let verbosity = parse_verbosity_level(verbose);
let needs_affinity =
use_full || matches!(dm_params.landmark_method.as_str(), "spectral" | "density");
let affinity = if needs_affinity {
let (knn_indices, knn_dist) = match precomputed_knn {
Some((indices, distances)) => {
if verbosity.normal_verbosity() {
println!("Using precomputed kNN graph...");
}
(indices, distances)
}
None => {
if verbosity.normal_verbosity() {
println!(
"Running (approximate) nearest neighbour search using {}...",
ann_type
);
}
let start_knn = Instant::now();
let res = run_ann_search(data, k, ann_type.to_string(), nn_params, seed, verbose)?;
if verbosity.normal_verbosity() {
println!("kNN search done in {:.2?}.", start_knn.elapsed());
}
res
}
};
if verbosity.normal_verbosity() {
println!("Building Gaussian kernel affinities");
}
let graph = phate_alpha_decay_affinities(
&knn_indices,
&knn_dist,
dm_params.k,
Some(T::from_f64(2.0).unwrap()),
dm_params.bandwidth_scale,
dm_params.thresh,
&dm_params.graph_symmetry,
nn_params.dist_metric == "euclidean",
);
Some(coo_to_csr(&graph))
} else {
if verbosity.normal_verbosity() {
println!("Skipping full affinity (random landmarks).");
}
None
};
if use_full {
let kernel = affinity.unwrap();
let kernel_norm = apply_anisotropic_normalisation(&kernel, dm_params.alpha_norm)?;
let (p_sym, sqrt_degrees) = build_symmetric_diffusion_operator(&kernel_norm)?;
return Ok(DiffusionMapsOperator::Full {
p_sym,
sqrt_degrees,
});
}
let n_landmarks = dm_params.n_landmarks.unwrap();
if verbosity.normal_verbosity() {
println!(" Building {} landmarks...", n_landmarks);
}
let start_l = Instant::now();
let landmarks = DiffusionMapsLandmarks::build(
data,
affinity.as_ref(),
n_landmarks,
&dm_params.landmark_method,
&nn_params.dist_metric,
dm_params.alpha_norm,
dm_params.k,
dm_params.bandwidth_scale,
dm_params.thresh,
&dm_params.graph_symmetry,
seed,
dm_params.n_svd,
verbose,
)?;
if verbosity.normal_verbosity() {
println!(" Landmarks built in {:.2?}.", start_l.elapsed());
}
Ok(DiffusionMapsOperator::Landmark { landmarks })
}
pub fn diffusion_maps<T>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
dm_params: DiffusionMapsParams<T>,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat,
HnswIndex<T>: HnswState<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let start_dm = Instant::now();
let verbosity = parse_verbosity_level(verbose);
let op = construct_diffusion_maps_operator(
data,
dm_params.k,
precomputed_knn,
&dm_params.ann_type,
&dm_params.ann_params,
&dm_params,
seed,
verbose,
)?;
let embedding: Vec<Vec<T>> = match op {
DiffusionMapsOperator::Full {
p_sym,
sqrt_degrees,
} => {
let t = match dm_params.t {
PhateTime::Auto { t_max } => {
if verbosity.normal_verbosity() {
println!("Finding optimal t (t_max={})...", t_max);
}
let entropy = landmark_von_neumann_entropy(&p_sym, t_max)?;
find_knee_point(&entropy)
}
PhateTime::Fixed(t) => t,
};
if verbosity.normal_verbosity() {
println!("Using t = {}.", t);
println!(
"Computing top {} eigenpairs of P_sym...",
dm_params.n_dim + 1
);
}
let start_eig = Instant::now();
let n_ask = (dm_params.n_dim + 5).min(sqrt_degrees.len() - 1);
let (evals, evecs) = compute_largest_eigenpairs_lanczos(&p_sym, n_ask, seed as u64)?;
if verbosity.normal_verbosity() {
println!("Eigendecomposition done in {:.2?}.", start_eig.elapsed());
}
let n = sqrt_degrees.len();
let mut embedding = vec![vec![T::zero(); dm_params.n_dim]; n];
for comp_idx in 1..=dm_params.n_dim {
let lambda = T::from_f32(evals[comp_idx]).unwrap();
let lambda_t = lambda.powi(t as i32);
let mut max_abs = 0.0f32;
let mut sign = 1.0f32;
for i in 0..n {
let v = evecs[i][comp_idx];
if v.abs() > max_abs {
max_abs = v.abs();
sign = if v >= 0.0 { 1.0 } else { -1.0 };
}
}
for i in 0..n {
let u = T::from_f32(evecs[i][comp_idx] * sign).unwrap();
embedding[i][comp_idx - 1] = lambda_t * u / sqrt_degrees[i];
}
}
embedding
}
DiffusionMapsOperator::Landmark { landmarks } => {
let t = match dm_params.t {
PhateTime::Auto { t_max } => {
if verbosity.normal_verbosity() {
println!("Finding optimal t on landmarks (t_max={})...", t_max);
}
landmarks.find_optimal_t(t_max)?
}
PhateTime::Fixed(t) => t,
};
if verbosity.detailed_verbosity() {
println!(
"Using t = {}. Eigendecomposing {}x{} landmark operator...",
t,
landmarks.get_n_landmarks(),
landmarks.get_n_landmarks()
);
}
let start_eig = Instant::now();
let (evals, evecs) = landmarks.eigendecompose(dm_params.n_dim, seed as u64)?;
if verbosity.detailed_verbosity() {
println!("Eigendecomposition done in {:.2?}.", start_eig.elapsed());
}
if verbosity.normal_verbosity() {
println!("Nystroem-extending to full data...");
}
let (landmark_embedding, lambdas) =
landmarks.compute_landmark_embedding(&evals, &evecs, dm_params.n_dim, t);
landmarks.nystrom_extend(&landmark_embedding, &lambdas)
}
};
if verbosity.normal_verbosity() {
println!("Diffusion maps finished in {:.2?}.", start_dm.elapsed());
}
let n = embedding.len();
let mut transposed = vec![vec![T::zero(); n]; dm_params.n_dim];
for i in 0..n {
for d in 0..dm_params.n_dim {
transposed[d][i] = embedding[i][d];
}
}
Ok(transposed)
}
#[cfg(feature = "parametric")]
#[derive(Debug, Clone)]
pub struct ParametricUmapParams<T> {
pub n_dim: usize,
pub k: usize,
pub ann_type: String,
pub hidden_layers: Vec<usize>,
pub nn_params: NearestNeighbourParams<T>,
pub umap_graph_params: UmapGraphParams<T>,
pub train_param: TrainParametricParams<T>,
}
#[cfg(feature = "parametric")]
impl<T> Default for ParametricUmapParams<T>
where
T: ManifoldsFloat + Element,
{
fn default() -> Self {
Self {
n_dim: 2,
k: 15,
ann_type: "hnsw".to_string(),
hidden_layers: vec![128, 128, 128],
nn_params: NearestNeighbourParams::default(),
umap_graph_params: UmapGraphParams::default(),
train_param: TrainParametricParams::default(),
}
}
}
#[cfg(feature = "parametric")]
impl<T> ParametricUmapParams<T>
where
T: ManifoldsFloat + Element,
{
pub fn new(
n_dim: usize,
k: usize,
ann_type: String,
hidden_layers: Vec<usize>,
nn_params: NearestNeighbourParams<T>,
umap_graph_params: UmapGraphParams<T>,
train_param: TrainParametricParams<T>,
) -> Self {
Self {
n_dim,
k,
ann_type,
hidden_layers,
nn_params,
umap_graph_params,
train_param,
}
}
pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>, corr_weight: Option<T>) -> Self {
let min_dist = min_dist.unwrap_or(T::from_f64(0.1).unwrap());
let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
let corr_weight = corr_weight.unwrap_or(T::from_f64(0.0).unwrap());
Self {
train_param: TrainParametricParams::from_min_dist_spread(
min_dist,
spread,
corr_weight,
None,
None,
None,
None,
),
..Self::default()
}
}
}
#[cfg(feature = "parametric")]
pub fn parametric_umap<T, B>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
umap_params: &ParametricUmapParams<T>,
device: &B::Device,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat + Element,
B: AutodiffBackend,
HnswIndex<T>: HnswState<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let nn_params = umap_params.nn_params.clone();
let verbosity = parse_verbosity_level(verbose);
if verbosity.normal_verbosity() {
println!(
"Running parametric umap with alpha: {:.2?} and beta: {:.2?}",
ToPrimitive::to_f32(&umap_params.train_param.a).unwrap(),
ToPrimitive::to_f32(&umap_params.train_param.b).unwrap()
);
}
let (graph, _, _) = construct_umap_graph(
data,
precomputed_knn,
umap_params.k,
umap_params.ann_type.clone(),
&umap_params.umap_graph_params,
&nn_params,
umap_params.train_param.n_epochs,
seed,
verbose,
)?;
let model_params = UmapMlpConfig::from_params(
data.ncols(),
umap_params.hidden_layers.clone(),
umap_params.n_dim,
);
let (embd, _) = train_parametric_umap::<B, T>(
data,
graph,
&model_params,
&umap_params.train_param,
device,
seed,
verbosity.normal_verbosity(),
);
Ok(embd)
}
#[cfg(feature = "parametric")]
pub fn train_parametric_umap_model<T, B>(
data: MatRef<T>,
umap_params: &ParametricUmapParams<T>,
device: &B::Device,
seed: usize,
verbose: usize,
) -> ParametricUmapResults<B, T>
where
T: ManifoldsFloat + Element,
B: AutodiffBackend,
HnswIndex<T>: HnswState<T>,
NNDescent<T>: ApplySortedUpdates<T> + NNDescentQuery<T>,
{
let nn_params = umap_params.nn_params.clone();
let verbosity = parse_verbosity_level(verbose);
if verbosity.normal_verbosity() {
println!(
"Training parametric umap model with alpha: {:.2?} and beta: {:.2?}",
ToPrimitive::to_f32(&umap_params.train_param.a).unwrap(),
ToPrimitive::to_f32(&umap_params.train_param.b).unwrap()
);
}
let (graph, _, _) = construct_umap_graph(
data,
None,
umap_params.k,
umap_params.ann_type.clone(),
&umap_params.umap_graph_params,
&nn_params,
umap_params.train_param.n_epochs,
seed,
verbose,
)?;
let model_params = UmapMlpConfig::from_params(
data.ncols(),
umap_params.hidden_layers.clone(),
umap_params.n_dim,
);
let (embd, trained_model) = train_parametric_umap::<B, T>(
data,
graph,
&model_params,
&umap_params.train_param,
device,
seed,
verbosity.normal_verbosity(),
);
Ok((embd, trained_model))
}
#[cfg(feature = "gpu")]
#[derive(Debug, Clone)]
pub struct UmapParamsGpu<T> {
pub n_dim: usize,
pub k: usize,
pub optimiser: String,
pub ann_type: String,
pub initialisation: String,
pub init_range: Option<T>,
pub nn_params: NearestNeighbourParamsGpu<T>,
pub umap_graph_params: UmapGraphParams<T>,
pub optim_params: UmapOptimParams<T>,
pub randomised: bool,
}
#[cfg(feature = "gpu")]
impl<T> Default for UmapParamsGpu<T>
where
T: ManifoldsFloat,
{
fn default() -> Self {
Self {
n_dim: 2,
k: 15,
optimiser: "adam_parallel".to_string(),
ann_type: "ivf_gpu".to_string(),
initialisation: "spectral".to_string(),
init_range: None,
nn_params: NearestNeighbourParamsGpu::default(),
umap_graph_params: UmapGraphParams::default(),
optim_params: UmapOptimParams::default(),
randomised: false,
}
}
}
#[cfg(feature = "gpu")]
impl<T> UmapParamsGpu<T>
where
T: ManifoldsFloat,
{
#[allow(clippy::too_many_arguments)]
pub fn new(
n_dim: usize,
k: usize,
optimiser: String,
ann_type: String,
initialisation: String,
init_range: Option<T>,
nn_params: NearestNeighbourParamsGpu<T>,
umap_graph_params: UmapGraphParams<T>,
optim_params: UmapOptimParams<T>,
randomised: bool,
) -> Self {
Self {
n_dim,
k,
optimiser,
ann_type,
initialisation,
init_range,
nn_params,
umap_graph_params,
optim_params,
randomised,
}
}
pub fn new_default_2d(min_dist: Option<T>, spread: Option<T>) -> Self {
let min_dist = min_dist.unwrap_or(T::from_f64(0.5).unwrap());
let spread = spread.unwrap_or(T::from_f64(1.0).unwrap());
Self {
optim_params: UmapOptimParams::from_min_dist_spread(
min_dist, spread, None, None, None, None, None, None, None,
),
..Self::default()
}
}
}
#[cfg(feature = "gpu")]
#[allow(clippy::too_many_arguments)]
pub fn construct_umap_graph_gpu<T, R>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
k: usize,
ann_type: String,
umap_params: &UmapGraphParams<T>,
nn_params: &NearestNeighbourParamsGpu<T>,
n_epochs: usize,
device: R::Device,
seed: usize,
verbose: usize,
) -> UmapGraphResults<T>
where
T: ManifoldsFloat + AnnSearchGpuFloat,
R: Runtime,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
let verbosity = parse_verbosity_level(verbose);
let (knn_indices, knn_dist) = match precomputed_knn {
Some((indices, distances)) => {
if verbosity.normal_verbosity() {
println!("Using precomputed kNN graph...");
}
(indices, distances)
}
None => {
if verbosity.normal_verbosity() {
println!("Running GPU nearest neighbour search using {}...", ann_type);
}
let start_knn = Instant::now();
let result =
run_ann_search_gpu::<T, R>(data, k, ann_type, nn_params, device, seed, verbose)?;
if verbosity.normal_verbosity() {
println!("GPU kNN search done in: {:.2?}.", start_knn.elapsed());
}
result
}
};
if verbosity.normal_verbosity() {
println!("Constructing fuzzy simplicial set...");
}
let start_graph = Instant::now();
let (sigma, rho) = smooth_knn_dist(
&knn_dist,
knn_dist[0].len(),
umap_params.local_connectivity,
umap_params.bandwidth,
64,
);
let graph = knn_to_coo(&knn_indices, &knn_dist, &sigma, &rho);
let graph = symmetrise_graph(graph, umap_params.mix_weight);
let graph = filter_weak_edges(graph, n_epochs, verbose);
if verbosity.normal_verbosity() {
println!(
"Finalised graph generation in {:.2?}.",
start_graph.elapsed()
);
}
Ok((graph, knn_indices, knn_dist))
}
#[cfg(feature = "gpu")]
pub fn umap_gpu<T, R>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
umap_params: &UmapParamsGpu<T>,
device: R::Device,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat + AnnSearchGpuFloat,
R: Runtime,
StandardNormal: Distribution<T>,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
let verbosity = parse_verbosity_level(verbose);
let init_type = parse_initilisation(
&umap_params.initialisation,
umap_params.randomised,
umap_params.init_range,
)
.unwrap_or_else(|| {
println!(
"Unknown initialisation provided: {:?}. Defaulting to PCA.",
umap_params.initialisation,
);
EmbdInit::PcaInit {
range: None,
randomised: true,
}
});
let optimiser = parse_umap_optimiser(&umap_params.optimiser).unwrap_or_default();
if verbosity.normal_verbosity() {
println!(
"Running umap (GPU kNN) with alpha: {:.2?} and beta: {:.2?}",
umap_params.optim_params.a, umap_params.optim_params.b
);
}
let (graph, _, _) = construct_umap_graph_gpu::<T, R>(
data,
precomputed_knn,
umap_params.k,
umap_params.ann_type.clone(),
&umap_params.umap_graph_params,
&umap_params.nn_params,
umap_params.optim_params.n_epochs,
device,
seed,
verbose,
)?;
if verbosity.normal_verbosity() {
println!(
"Initialising embedding via {} layout...",
umap_params.initialisation
);
}
let start_layout = Instant::now();
let mut embd = initialise_embedding(&init_type, umap_params.n_dim, seed as u64, &graph, data)?;
let graph_adj = coo_to_adjacency_list(&graph);
if verbosity.normal_verbosity() {
println!(
"Optimising embedding via {} ({} epochs) on {} edges...",
match optimiser {
UmapOptimiser::Adam => "Adam",
UmapOptimiser::Sgd => "SGD",
UmapOptimiser::AdamParallel => "Adam (multi-threaded)",
},
umap_params.optim_params.n_epochs,
graph.col_indices.len().separate_with_underscores()
);
}
match optimiser {
UmapOptimiser::Adam => optimise_embedding_adam(
&mut embd,
&graph_adj,
&umap_params.optim_params,
seed as u64,
verbose,
)?,
UmapOptimiser::Sgd => {
optimise_embedding_sgd(
&mut embd,
&graph_adj,
&umap_params.optim_params,
seed as u64,
verbose,
)?;
}
UmapOptimiser::AdamParallel => {
optimise_embedding_adam_parallel(
&mut embd,
&graph_adj,
&umap_params.optim_params,
seed as u64,
verbose,
)?;
}
}
if verbosity.normal_verbosity() {
println!(
"Initialised and optimised embedding in: {:.2?}.",
start_layout.elapsed()
);
println!("UMAP (GPU) complete!");
}
let n_samples = embd.len();
let mut transposed = vec![vec![T::zero(); n_samples]; umap_params.n_dim];
for sample_idx in 0..n_samples {
for dim_idx in 0..umap_params.n_dim {
transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
}
}
Ok(transposed)
}
#[cfg(feature = "gpu")]
#[derive(Debug, Clone)]
pub struct TsneParamsGpu<T> {
pub n_dim: usize,
pub perplexity: T,
pub ann_type: String,
pub initialisation: String,
pub init_range: Option<T>,
pub nn_params: NearestNeighbourParamsGpu<T>,
pub optim_params: TsneOptimParams<T>,
pub randomised_init: bool,
}
#[cfg(feature = "gpu")]
impl<T> Default for TsneParamsGpu<T>
where
T: ManifoldsFloat,
{
fn default() -> Self {
Self {
n_dim: 2,
perplexity: T::from_f64(30.0).unwrap(),
ann_type: "ivf_gpu".to_string(),
initialisation: "pca".to_string(),
init_range: None,
nn_params: NearestNeighbourParamsGpu::default(),
optim_params: TsneOptimParams {
n_epochs: 1000,
lr: None,
early_exag_iter: 250,
early_exag_factor: T::from_f64(12.0).unwrap(),
late_exag_factor: None,
theta: T::from_f64(0.5).unwrap(),
n_interp_points: 3,
},
randomised_init: true,
}
}
}
#[cfg(feature = "gpu")]
impl<T> TsneParamsGpu<T>
where
T: ManifoldsFloat,
{
#[allow(clippy::too_many_arguments)]
pub fn new(
n_dim: usize,
perplexity: T,
ann_type: String,
initialisation: String,
init_range: Option<T>,
nn_params: NearestNeighbourParamsGpu<T>,
optim_params: TsneOptimParams<T>,
randomised_init: bool,
) -> Self {
Self {
n_dim,
perplexity,
ann_type,
initialisation,
init_range,
nn_params,
optim_params,
randomised_init,
}
}
pub fn new_default_2d(perplexity: Option<T>) -> Self {
let perplexity = perplexity.unwrap_or_else(|| T::from_f64(30.0).unwrap());
Self {
perplexity,
..Self::default()
}
}
}
#[cfg(feature = "gpu")]
#[allow(clippy::too_many_arguments)]
pub fn construct_tsne_graph_gpu<T, R>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
perplexity: T,
ann_type: String,
nn_params: &NearestNeighbourParamsGpu<T>,
device: R::Device,
seed: usize,
verbose: usize,
) -> TsneGraph<T>
where
T: ManifoldsFloat + AnnSearchGpuFloat,
R: Runtime,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
let verbosity = parse_verbosity_level(verbose);
let (knn_indices, knn_dist) = match precomputed_knn {
Some((indices, distances)) => {
if verbosity.normal_verbosity() {
println!("Using precomputed kNN graph...");
}
(indices, distances)
}
None => {
let k_float = perplexity * T::from_f64(3.0).unwrap();
let k = k_float.to_usize().unwrap().max(5).min(data.nrows() - 1);
if verbosity.normal_verbosity() {
println!("Running GPU kNN search (k={}) using {}...", k, ann_type);
}
let start_knn = Instant::now();
let result =
run_ann_search_gpu::<T, R>(data, k, ann_type, nn_params, device, seed, verbose)?;
if verbosity.normal_verbosity() {
println!("GPU kNN search done in: {:.2?}.", start_knn.elapsed());
}
result
}
};
if verbosity.normal_verbosity() {
println!("Computing Gaussian affinities and symmetrising...");
}
let start_graph = Instant::now();
let directed_graph = gaussian_knn_affinities(
&knn_indices,
&knn_dist,
perplexity,
T::from_f64(1e-5).unwrap(),
200,
nn_params.dist_metric == "euclidean",
)?;
let graph = symmetrise_affinities_tsne(directed_graph);
if verbosity.normal_verbosity() {
println!(
"Finalised graph generation in {:.2?}.",
start_graph.elapsed()
);
}
Ok((graph, knn_indices, knn_dist))
}
#[cfg(all(feature = "gpu", feature = "fft_tsne"))]
pub fn tsne_gpu<T, R>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
params: &TsneParamsGpu<T>,
approx_type: &str,
device: R::Device,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat + AnnSearchGpuFloat + FftwFloat,
R: Runtime,
StandardNormal: Distribution<T>,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
if params.n_dim != 2 {
return Err(ManifoldsError::IncorrectDim {
n_dim: params.n_dim,
});
}
let verbosity = parse_verbosity_level(verbose);
let (graph, _, _) = construct_tsne_graph_gpu::<T, R>(
data,
precomputed_knn,
params.perplexity,
params.ann_type.clone(),
¶ms.nn_params,
device,
seed,
verbose,
)?;
if verbosity.normal_verbosity() {
println!("Initialising embedding via {}...", ¶ms.initialisation);
}
let init_type = parse_initilisation(
¶ms.initialisation,
params.randomised_init,
params.init_range,
)
.unwrap_or_else(|| {
println!(
"Unknown initialisation provided: {:?}. Defaulting to PCA.",
params.initialisation,
);
EmbdInit::PcaInit {
range: Some(T::from_f64(1e-2).unwrap()),
randomised: true,
}
});
let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
let start_optim = Instant::now();
match tsne_approx {
TsneOpt::BarnesHut => {
if verbosity.normal_verbosity() {
println!(
"Optimising via Barnes-Hut t-SNE ({} epochs)...",
params.optim_params.n_epochs
);
}
optimise_bh_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
}
TsneOpt::Fft => {
if verbosity.normal_verbosity() {
println!(
"Optimising via FFT Interpolation-based t-SNE ({} epochs)...",
params.optim_params.n_epochs
);
}
optimise_fft_tsne(&mut embd, ¶ms.optim_params, &graph, verbose)?;
}
}
if verbosity.normal_verbosity() {
println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
}
let n_samples = embd.len();
let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
for sample_idx in 0..n_samples {
for dim_idx in 0..params.n_dim {
transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
}
}
Ok(transposed)
}
#[cfg(all(feature = "gpu", not(feature = "fft_tsne")))]
pub fn tsne_gpu<T, R>(
data: MatRef<T>,
precomputed_knn: PreComputedKnn<T>,
params: &TsneParamsGpu<T>,
approx_type: &str,
device: R::Device,
seed: usize,
verbose: usize,
) -> Result<Vec<Vec<T>>, ManifoldsError>
where
T: ManifoldsFloat + AnnSearchGpuFloat,
R: Runtime,
StandardNormal: Distribution<T>,
NNDescentGpu<T, R>: NNDescentQuery<T>,
{
if params.n_dim != 2 {
return Err(ManifoldsError::IncorrectDim {
n_dim: params.n_dim,
});
}
let verbosity = parse_verbosity_level(verbose);
let (graph, _, _) = construct_tsne_graph_gpu::<T, R>(
data,
precomputed_knn,
params.perplexity,
params.ann_type.clone(),
¶ms.nn_params,
device,
seed,
verbose,
)?;
if verbosity.normal_verbosity() {
println!("Initialising embedding via {}...", ¶ms.initialisation);
}
let init_type = parse_initilisation(
¶ms.initialisation,
params.randomised_init,
params.init_range,
)
.unwrap_or_else(|| {
println!(
"Unknown initialisation provided: {:?}. Defaulting to PCA.",
params.initialisation,
);
EmbdInit::PcaInit {
range: Some(T::from_f64(1e-2).unwrap()),
randomised: true,
}
});
let mut embd = initialise_embedding(&init_type, params.n_dim, seed as u64, &graph, data)?;
let tsne_approx = parse_tsne_optimiser(approx_type).unwrap_or_default();
let start_optim = Instant::now();
match tsne_approx {
TsneOpt::BarnesHut => {
if verbosity.normal_verbosity() {
println!(
"Optimising via Barnes-Hut t-SNE ({} epochs)...",
params.optim_params.n_epochs
);
}
optimise_bh_tsne(&mut embd, ¶ms.optim_params, &graph, verbose);
}
TsneOpt::Fft => {
panic!("FFT-accelerated t-SNE not available. Recompile with 'fft_tsne' feature or use 'barnes_hut' approximation.");
}
}
if verbosity.normal_verbosity() {
println!("Optimisation complete in {:.2?}.", start_optim.elapsed());
}
let n_samples = embd.len();
let mut transposed = vec![vec![T::zero(); n_samples]; params.n_dim];
for sample_idx in 0..n_samples {
for dim_idx in 0..params.n_dim {
transposed[dim_idx][sample_idx] = embd[sample_idx][dim_idx];
}
}
Ok(transposed)
}