use num_traits::Float;
use num_traits::cast::FromPrimitive;
use ndarray::Array2;
use ndarray_linalg::Scalar;
use hnsw_rs::prelude::*;
use crate::tools::nodeparam::*;
use crate::fromhnsw::*;
use crate::graphlaplace::*;
use crate::embedder::*;
#[derive(Copy,Clone)]
pub struct DiffusionParams {
asked_dim : usize,
t : Option<f32>,
}
impl DiffusionParams {
pub fn new(asked_dim : usize, t_opt : Option<f32>) -> Self {
DiffusionParams{asked_dim, t : t_opt}
}
pub fn get_t(&self) -> Option<f32> {
self.t
}
pub fn get_embedding_dimension(&self) -> usize {
return self.asked_dim;
}
}
pub struct DiffusionMaps {
params : DiffusionParams,
_node_params: Option<NodeParams>,
}
impl DiffusionMaps {
pub fn new(params : DiffusionParams) -> Self {
DiffusionMaps{params, _node_params : None}
}
pub fn embed_hnsw<T,D,F>(&mut self, hnsw : &Hnsw<T,D>) -> Array2<F> where
T : Clone + Send + Sync,
F : Float + FromPrimitive + std::marker::Sync + Send + std::fmt::UpperExp + std::iter::Sum,
D : Distance<T> + Send + Sync {
let knbn = hnsw.get_max_nb_connection();
let kgraph = kgraph_from_hnsw_all::<T,D,F>(hnsw, knbn as usize).unwrap();
let nodeparams = to_proba_edges::<F>(&kgraph, 1., 2.);
let embedded = get_dmap_embedding::<F>(&nodeparams, self.params.asked_dim, self.params.get_t());
embedded
}
}
pub(crate) fn get_dmap_embedding<F>(initial_space : &NodeParams, asked_dim: usize, t_opt : Option<f32>) -> Array2<F>
where F : Float + FromPrimitive {
assert!(asked_dim >= 2);
let mut laplacian = get_laplacian(initial_space);
log::debug!("got laplacian, going to svd ... asked_dim : {}", asked_dim);
let svd_res = laplacian.do_svd(asked_dim+25).unwrap();
let lambdas = svd_res.get_sigma().as_ref().unwrap();
if lambdas.len() > 2 && lambdas[1] > lambdas[0] {
panic!("svd spectrum not decreasing");
}
log::info!(" first 3 eigen values {:.2e} {:.2e} {:2e}",lambdas[0], lambdas[1] , lambdas[2]);
log::info!(" last eigenvalue computed rank {} value {:.2e}", lambdas.len()-1, lambdas[lambdas.len()-1]);
log::debug!("keeping columns from 1 to : {}", asked_dim);
let u = svd_res.get_u().as_ref().unwrap();
log::debug!("u shape : nrows: {} , ncols : {} ", u.nrows(), u.ncols());
let mut embedded = Array2::<F>::zeros((u.nrows(), asked_dim));
let normalized_lambdas = lambdas/(*lambdas)[0];
let time = match t_opt {
Some(t) => t,
_ => 5.0f32.min(0.9f32.ln()/ (normalized_lambdas[2]/normalized_lambdas[1]).ln()),
};
log::info!("get_dmap_initial_embedding applying dmap time {:.2e}", time);
let sum_diag = laplacian.degrees.iter().sum::<f32>();
for i in 0..u.nrows() {
let row_i = u.row(i);
let weight_i = (laplacian.degrees[i]/sum_diag).sqrt();
for j in 0..asked_dim {
embedded[[i, j]] = F::from_f32(normalized_lambdas[j+1].pow(time) * row_i[j+1] / weight_i).unwrap();
}
}
log::trace!("ended get_dmap_initial_embedding");
return embedded;
}
pub fn array2_insert_hnsw<T,D>(data : &Array2<T>, hnsw : &mut Hnsw<T,D>) -> Result<usize, usize>
where T : Clone + Send + Sync,
D : Distance<T> + Send + Sync {
if hnsw.get_nb_point() > 0 {
log::error!("array2_insert_hnsw , insertion on non empty hnsw structure, nb point : {}", hnsw.get_nb_point());
return Err(1);
}
let blocksize = 10000;
let (nb_row, _) = data.dim();
let nb_block = nb_row / blocksize;
for i in 0..nb_block {
let start = i*blocksize;
let end = i*blocksize + blocksize-1;
let to_insert = (start..=end).into_iter().map(|n| (data.row(n).to_slice().unwrap(), n)).collect();
hnsw.parallel_insert_slice(&to_insert);
}
let start = nb_block*blocksize;
let to_insert = (start..nb_row).into_iter().map(|n| (data.row(n).to_slice().unwrap(), n)).collect();
hnsw.parallel_insert_slice(&to_insert);
Ok(hnsw.get_nb_point())
}
mod tests {
}