use anyhow::anyhow;
use std::fs::OpenOptions;
use std::io::BufWriter;
use std::path::Path;
use ndarray::Array2;
use hnsw_rs::prelude::*;
use super::kgproj::*;
pub struct ToRipserer<'a, 'b, T, D>
where
T: Clone + Send + Sync + 'b,
D: Distance<T> + Send + Sync,
{
hnsw: &'a Hnsw<'b, T, D>,
}
impl<'a, 'b, T, D> ToRipserer<'a, 'b, T, D>
where
T: Clone + Send + Sync,
D: Distance<T> + Send + Sync,
{
pub fn new(hnsw: &'a Hnsw<'b, T, D>) -> Self {
ToRipserer { hnsw }
}
pub fn extract_neighbourhood(
&self,
center: &[T],
knbn: usize,
ef_c: usize,
outbson: &String,
) -> Result<(), anyhow::Error> {
let point_indexation = self.hnsw.get_point_indexation();
log::debug!("extract_neighbourhood asking for {} points", knbn);
let neighbour_0 = self.hnsw.search(center, knbn, ef_c);
let nb_points = neighbour_0.len();
log::debug!("got nb neighbours : {}", nb_points);
let mut dist_mat = Array2::zeros((nb_points, nb_points));
let distance = self.hnsw.get_distance();
for i in 0..neighbour_0.len() {
let data_i = point_indexation
.get_point_data(&neighbour_0[i].p_id)
.unwrap();
for j in 0..i {
let point_j = point_indexation
.get_point_data(&neighbour_0[j].p_id)
.unwrap();
dist_mat[[i, j]] = distance.eval(&data_i, &point_j);
}
} let vlen = nb_points * (nb_points + 1) / 2;
let mut v = Vec::<f32>::with_capacity(vlen);
for i in 0..neighbour_0.len() {
for j in 0..i {
v.push(dist_mat[[i, j]]);
}
v.push(0.);
}
log::debug!(
"serializing , first values : {} {} {} , last : {}",
v[0],
v[1],
v[2],
v[v.len() - 2]
);
let serializer = bson::Serializer::new();
let res = serde::ser::Serialize::serialize::<bson::Serializer>(&v, serializer);
if res.is_err() {
log::error!("serialization of distance matrix failed");
return Err(anyhow!("serialization of distance matrix failed"));
}
let bson_v = res.unwrap();
let path = Path::new(outbson);
let fileres = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(path);
let file = if let Ok(file) = fileres {
file
} else {
log::error!("could not open file : {}", path.display());
return Err(anyhow!("could not open file : {}", path.display()));
};
let mut doc = bson::Document::new();
doc.insert("limat", bson_v);
let bufwriter = BufWriter::new(file);
let res = doc.to_writer(bufwriter);
if res.is_err() {
log::error!("dump of bson failed: {}", res.clone().err().unwrap());
return Err(anyhow!("dump of bson failed: {}", res.err().unwrap()));
}
Ok(())
}
pub fn extract_projection_to_ripserer(
&self,
knbn: usize,
layer: usize,
fname: &String,
) -> Result<(), anyhow::Error> {
let graph_projection = KGraphProjection::<f32>::new(self.hnsw, knbn, layer);
let quant = graph_projection.get_projection_distance_quant();
if quant.count() > 0 {
log::debug!("\n\n projection distance from lower layers to upper layers");
log::debug!(
"\n quantile at 0.05 : {:.2e} , 0.5 : {:.2e}, 0.95 : {:.2e}, 0.99 : {:.2e}",
quant.query(0.05).unwrap().1,
quant.query(0.5).unwrap().1,
quant.query(0.95).unwrap().1,
quant.query(0.99).unwrap().1
);
}
log::debug!("output fashion projection for ripser : {}", fname);
let res = graph_projection.dump_sparse_mat_for_ripser(fname);
if res.is_err() {
log::error!("graph_projection dump_sparse_mat_for_ripser failed");
return Err(anyhow!(
"graph_projection dump_sparse_mat_for_ripser failed"
));
}
Ok(())
} }