#![allow(unused)]
use num_traits::cast::FromPrimitive;
use num_traits::int::PrimInt;
use num_traits::float::*;
use std::cmp::{Ordering, PartialEq, PartialOrd};
use std::collections::BinaryHeap;
use hnsw_rs::prelude::*;
use super::kruskal::*;
use crate::fromhnsw::kgraph::KGraph;
use crate::fromhnsw::kgraph_from_hnsw_all;
pub struct UnionStep<NodeIdx: PrimInt, F: Float> {
nodea: NodeIdx,
nodeb: NodeIdx,
weight: F,
step: usize,
clusta: NodeIdx,
clustb: NodeIdx,
}
pub struct ClusterStat {
mean_density: f32,
size: u32,
}
pub struct Dendrogram<NodeIdx: PrimInt, F: Float> {
steps: Vec<UnionStep<NodeIdx, F>>,
}
impl<NodeIdx: PrimInt, F: Float> Dendrogram<NodeIdx, F> {
pub fn new(nbstep: usize) -> Self {
Dendrogram {
steps: Vec::<UnionStep<NodeIdx, F>>::with_capacity(nbstep),
}
}
}
struct Edge<F: Float + PartialOrd> {
nodea: u32,
nodeb: u32,
weight: F,
}
fn compare_edge<F: Float + PartialOrd>(edgea: &Edge<F>, edgeb: &Edge<F>) -> Ordering {
match (edgea.weight, edgeb.weight) {
(x, y) if x.is_nan() && y.is_nan() => Ordering::Equal,
(x, _) if x.is_nan() => Ordering::Greater,
(_, y) if y.is_nan() => Ordering::Less,
(_, _) => edgea.weight.partial_cmp(&edgeb.weight).unwrap(),
}
}
impl<F: Float + PartialOrd> PartialEq for Edge<F> {
fn eq(&self, other: &Self) -> bool {
self.nodea == other.nodea && self.nodeb == other.nodeb
}
}
impl<F: Float + PartialOrd> Eq for Edge<F> {}
impl<F: Float + PartialOrd> PartialOrd for Edge<F> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<F: Float + PartialOrd> Ord for Edge<F> {
fn cmp(&self, other: &Self) -> Ordering {
compare_edge(self, other)
}
}
pub struct SLclustering<NodeIdx: PrimInt, F: Float> {
kgraph: KGraph<F>,
dendrogram: Dendrogram<NodeIdx, F>,
nbcluster: usize,
}
impl<NodeIdx: PrimInt, F> SLclustering<NodeIdx, F>
where
F: PartialOrd
+ FromPrimitive
+ Float
+ Send
+ Sync
+ Clone
+ std::fmt::UpperExp
+ std::iter::Sum,
{
pub fn new<D>(hnsw: &Hnsw<F, D>, nbcluster: usize) -> Self
where
D: Distance<F> + Send + Sync,
{
let nbng = hnsw.get_max_nb_connection() as usize;
let kgraph = kgraph_from_hnsw_all(hnsw, nbng).unwrap();
let nbstep = kgraph.get_nb_nodes() - nbcluster;
SLclustering {
kgraph,
dendrogram: Dendrogram::<NodeIdx, F>::new(nbstep),
nbcluster,
}
}
pub fn cluster(&mut self) {
let _kgraph_stats = self.kgraph.get_kraph_stats();
let neighboourhood_info = self.kgraph.get_neighbours();
let nbnodes = neighboourhood_info.len();
let max_nbng = self.kgraph.get_max_nbng();
let mut edge_list = Vec::<(u32, u32, F)>::with_capacity(max_nbng * nbnodes);
for (n, edge_vec) in neighboourhood_info.iter().enumerate() {
for edge in edge_vec.iter() {
edge_list.push((n as u32, edge.node as u32, edge.weight));
}
}
let mst_edge_iter = kruskal(&edge_list);
let mut edge_heap = BinaryHeap::<Edge<F>>::with_capacity(edge_list.len());
for edge in mst_edge_iter {
edge_heap.push(Edge {
nodea: edge.0,
nodeb: edge.1,
weight: edge.2,
});
}
} }