use ferrolearn_core::error::FerroError;
use ferrolearn_core::traits::Fit;
use ndarray::{Array1, Array2};
use num_traits::Float;
#[derive(Debug, Clone)]
pub struct Hdbscan<F> {
pub min_cluster_size: usize,
pub min_samples: Option<usize>,
pub cluster_selection_epsilon: F,
}
impl<F: Float> Hdbscan<F> {
#[must_use]
pub fn new() -> Self {
Self {
min_cluster_size: 5,
min_samples: None,
cluster_selection_epsilon: F::zero(),
}
}
#[must_use]
pub fn with_min_cluster_size(mut self, min_cluster_size: usize) -> Self {
self.min_cluster_size = min_cluster_size;
self
}
#[must_use]
pub fn with_min_samples(mut self, min_samples: usize) -> Self {
self.min_samples = Some(min_samples);
self
}
#[must_use]
pub fn with_cluster_selection_epsilon(mut self, eps: F) -> Self {
self.cluster_selection_epsilon = eps;
self
}
}
impl<F: Float> Default for Hdbscan<F> {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct FittedHdbscan<F> {
labels_: Array1<isize>,
probabilities_: Array1<F>,
}
impl<F: Float> FittedHdbscan<F> {
#[must_use]
pub fn labels(&self) -> &Array1<isize> {
&self.labels_
}
#[must_use]
pub fn probabilities(&self) -> &Array1<F> {
&self.probabilities_
}
#[must_use]
pub fn n_clusters(&self) -> usize {
let max_label = self.labels_.iter().max().copied().unwrap_or(-1);
if max_label < 0 {
0
} else {
(max_label + 1) as usize
}
}
}
#[inline]
fn sq_euclidean<F: Float>(a: &[F], b: &[F]) -> F {
a.iter()
.zip(b.iter())
.fold(F::zero(), |acc, (&ai, &bi)| acc + (ai - bi) * (ai - bi))
}
fn compute_core_distances<F: Float>(x: &Array2<F>, min_samples: usize) -> Vec<F> {
let n = x.nrows();
let mut core_dists = vec![F::zero(); n];
for (i, cd) in core_dists.iter_mut().enumerate() {
let row_i = x.row(i);
let si = row_i.as_slice().unwrap_or(&[]);
let mut dists: Vec<F> = (0..n)
.map(|j| {
if i == j {
F::zero()
} else {
sq_euclidean(si, x.row(j).as_slice().unwrap_or(&[])).sqrt()
}
})
.collect();
dists.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let k = min_samples.min(n - 1);
*cd = dists[k];
}
core_dists
}
#[derive(Debug, Clone, Copy)]
struct MstEdge<F> {
u: usize,
v: usize,
weight: F,
}
fn build_mst<F: Float>(x: &Array2<F>, core_dists: &[F]) -> Vec<MstEdge<F>> {
let n = x.nrows();
if n <= 1 {
return Vec::new();
}
let mut in_tree = vec![false; n];
let mut min_weight = vec![F::max_value(); n];
let mut min_source = vec![0usize; n];
let mut edges = Vec::with_capacity(n - 1);
in_tree[0] = true;
for j in 1..n {
let d = mutual_reachability(x, core_dists, 0, j);
min_weight[j] = d;
min_source[j] = 0;
}
for _ in 0..(n - 1) {
let mut best_node = 0;
let mut best_weight = F::max_value();
for j in 0..n {
if !in_tree[j] && min_weight[j] < best_weight {
best_weight = min_weight[j];
best_node = j;
}
}
in_tree[best_node] = true;
edges.push(MstEdge {
u: min_source[best_node],
v: best_node,
weight: best_weight,
});
for j in 0..n {
if !in_tree[j] {
let d = mutual_reachability(x, core_dists, best_node, j);
if d < min_weight[j] {
min_weight[j] = d;
min_source[j] = best_node;
}
}
}
}
edges
}
#[inline]
fn mutual_reachability<F: Float>(x: &Array2<F>, core_dists: &[F], i: usize, j: usize) -> F {
let d = sq_euclidean(
x.row(i).as_slice().unwrap_or(&[]),
x.row(j).as_slice().unwrap_or(&[]),
)
.sqrt();
let mut result = d;
if core_dists[i] > result {
result = core_dists[i];
}
if core_dists[j] > result {
result = core_dists[j];
}
result
}
#[derive(Debug, Clone)]
struct CondensedNode {
children: Vec<usize>,
child_lambdas: Vec<f64>,
birth_lambda: f64,
death_lambda: f64,
stability: f64,
size: usize,
}
fn extract_clusters<F: Float>(
n_samples: usize,
mst_edges: &mut [MstEdge<F>],
min_cluster_size: usize,
cluster_selection_epsilon: F,
) -> (Array1<isize>, Array1<F>) {
if n_samples == 0 {
return (Array1::zeros(0), Array1::zeros(0));
}
mst_edges.sort_by(|a, b| {
a.weight
.partial_cmp(&b.weight)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut parent: Vec<usize> = (0..n_samples).collect();
let mut size = vec![1usize; n_samples];
fn find(parent: &mut [usize], i: usize) -> usize {
let mut root = i;
while parent[root] != root {
root = parent[root];
}
let mut current = i;
while parent[current] != root {
let next = parent[current];
parent[current] = root;
current = next;
}
root
}
let mut point_cluster = vec![0usize; n_samples]; let mut condensed_clusters: Vec<CondensedNode> = Vec::new();
condensed_clusters.push(CondensedNode {
children: Vec::new(),
child_lambdas: Vec::new(),
birth_lambda: 0.0,
death_lambda: 0.0,
stability: 0.0,
size: n_samples,
});
let mut dendrogram: Vec<(usize, usize, f64, usize)> = Vec::with_capacity(n_samples - 1);
let mut next_cluster_id = n_samples;
let mut uf_to_cluster: Vec<usize> = (0..n_samples).collect();
for edge in mst_edges.iter() {
let ru = find(&mut parent, edge.u);
let rv = find(&mut parent, edge.v);
if ru == rv {
continue;
}
let su = size[ru];
let sv = size[rv];
let cu = uf_to_cluster[ru];
let cv = uf_to_cluster[rv];
let dist = edge.weight.to_f64().unwrap_or(0.0);
dendrogram.push((cu, cv, dist, su + sv));
if su < sv {
parent[ru] = rv;
size[rv] += su;
uf_to_cluster[rv] = next_cluster_id;
} else {
parent[rv] = ru;
size[ru] += sv;
uf_to_cluster[ru] = next_cluster_id;
}
next_cluster_id += 1;
}
let total_nodes = n_samples + dendrogram.len();
let mut node_children: Vec<(usize, usize)> = vec![(0, 0); total_nodes];
let mut node_distance: Vec<f64> = vec![0.0; total_nodes];
let mut node_size: Vec<usize> = vec![1; total_nodes];
for (i, &(left, right, dist, sz)) in dendrogram.iter().enumerate() {
let node_id = n_samples + i;
node_children[node_id] = (left, right);
node_distance[node_id] = dist;
node_size[node_id] = sz;
}
let root_node = if dendrogram.is_empty() {
0
} else {
n_samples + dendrogram.len() - 1
};
condensed_clusters.clear();
let mut cond_cluster_counter: usize = 0;
let mut node_to_cond: Vec<Option<usize>> = vec![None; total_nodes];
let root_cond = cond_cluster_counter;
condensed_clusters.push(CondensedNode {
children: Vec::new(),
child_lambdas: Vec::new(),
birth_lambda: 0.0,
death_lambda: 0.0,
stability: 0.0,
size: n_samples,
});
cond_cluster_counter += 1;
node_to_cond[root_node] = Some(root_cond);
let mut stack: Vec<(usize, usize)> = vec![(root_node, root_cond)];
let eps_f64 = cluster_selection_epsilon.to_f64().unwrap_or(0.0);
while let Some((node_id, parent_cond)) = stack.pop() {
if node_id < n_samples {
condensed_clusters[parent_cond].children.push(node_id);
let lambda = if condensed_clusters[parent_cond].death_lambda > 0.0 {
condensed_clusters[parent_cond].death_lambda
} else {
condensed_clusters[parent_cond].birth_lambda
};
condensed_clusters[parent_cond].child_lambdas.push(lambda);
point_cluster[node_id] = parent_cond;
continue;
}
let (left, right) = node_children[node_id];
let left_size = node_size.get(left).copied().unwrap_or(1);
let right_size = node_size.get(right).copied().unwrap_or(1);
let split_dist = node_distance[node_id];
let lambda = if split_dist > 0.0 {
1.0 / split_dist
} else {
f64::MAX
};
let both_large = left_size >= min_cluster_size && right_size >= min_cluster_size;
let above_epsilon = split_dist > eps_f64;
if both_large && above_epsilon {
condensed_clusters[parent_cond].death_lambda = lambda;
let left_cond = cond_cluster_counter;
condensed_clusters.push(CondensedNode {
children: Vec::new(),
child_lambdas: Vec::new(),
birth_lambda: lambda,
death_lambda: 0.0,
stability: 0.0,
size: left_size,
});
cond_cluster_counter += 1;
let right_cond = cond_cluster_counter;
condensed_clusters.push(CondensedNode {
children: Vec::new(),
child_lambdas: Vec::new(),
birth_lambda: lambda,
death_lambda: 0.0,
stability: 0.0,
size: right_size,
});
cond_cluster_counter += 1;
condensed_clusters[parent_cond]
.children
.push(left_cond + n_samples);
condensed_clusters[parent_cond].child_lambdas.push(lambda);
condensed_clusters[parent_cond]
.children
.push(right_cond + n_samples);
condensed_clusters[parent_cond].child_lambdas.push(lambda);
node_to_cond[left] = Some(left_cond);
node_to_cond[right] = Some(right_cond);
stack.push((left, left_cond));
stack.push((right, right_cond));
} else {
if left_size >= min_cluster_size && !above_epsilon {
stack.push((left, parent_cond));
stack.push((right, parent_cond));
} else if left_size < min_cluster_size && right_size < min_cluster_size {
collect_leaves(left, n_samples, &node_children, &mut |leaf| {
condensed_clusters[parent_cond].children.push(leaf);
condensed_clusters[parent_cond].child_lambdas.push(lambda);
point_cluster[leaf] = parent_cond;
});
collect_leaves(right, n_samples, &node_children, &mut |leaf| {
condensed_clusters[parent_cond].children.push(leaf);
condensed_clusters[parent_cond].child_lambdas.push(lambda);
point_cluster[leaf] = parent_cond;
});
condensed_clusters[parent_cond].death_lambda = lambda;
} else {
let (large, small) = if left_size >= min_cluster_size {
(left, right)
} else {
(right, left)
};
collect_leaves(small, n_samples, &node_children, &mut |leaf| {
condensed_clusters[parent_cond].children.push(leaf);
condensed_clusters[parent_cond].child_lambdas.push(lambda);
point_cluster[leaf] = parent_cond;
});
stack.push((large, parent_cond));
}
}
}
for cluster in condensed_clusters.iter_mut() {
let birth = cluster.birth_lambda;
let mut stab = 0.0;
for &child_lambda in &cluster.child_lambdas {
stab += child_lambda - birth;
}
cluster.stability = if stab > 0.0 { stab } else { 0.0 };
}
let n_cond = condensed_clusters.len();
let mut selected = vec![true; n_cond];
let mut total_stability = vec![0.0f64; n_cond];
for i in 0..n_cond {
total_stability[i] = condensed_clusters[i].stability;
}
for i in (0..n_cond).rev() {
let mut child_sum = 0.0;
let mut has_child_clusters = false;
for &child_id in &condensed_clusters[i].children {
if child_id >= n_samples {
let cond_child = child_id - n_samples;
if cond_child < n_cond {
child_sum += total_stability[cond_child];
has_child_clusters = true;
}
}
}
if has_child_clusters {
if condensed_clusters[i].stability >= child_sum {
total_stability[i] = condensed_clusters[i].stability;
deselect_descendants(i, n_samples, &condensed_clusters, &mut selected);
} else {
selected[i] = false;
total_stability[i] = child_sum;
}
}
}
if n_cond > 0 {
selected[0] = false;
}
let mut cluster_label_map: Vec<isize> = vec![-1; n_cond];
let mut label_counter: isize = 0;
for (i, &sel) in selected.iter().enumerate() {
if sel && condensed_clusters[i].size >= min_cluster_size {
cluster_label_map[i] = label_counter;
label_counter += 1;
}
}
let mut labels = Array1::from_elem(n_samples, -1isize);
let mut probabilities = Array1::from_elem(n_samples, F::zero());
for (pt, &cond_id) in point_cluster.iter().enumerate() {
let label =
find_selected_cluster(cond_id, &cluster_label_map, &condensed_clusters, n_samples);
labels[pt] = label;
}
for cond_id in 0..n_cond {
if cluster_label_map[cond_id] < 0 {
continue;
}
let birth = condensed_clusters[cond_id].birth_lambda;
let death = condensed_clusters[cond_id].death_lambda;
let range = if death > birth { death - birth } else { 1.0 };
for (idx, &child) in condensed_clusters[cond_id].children.iter().enumerate() {
if child < n_samples {
let child_lambda = condensed_clusters[cond_id].child_lambdas[idx];
let prob = if range > 0.0 {
((child_lambda - birth) / range).clamp(0.0, 1.0)
} else {
1.0
};
if labels[child] == cluster_label_map[cond_id] {
probabilities[child] = F::from(prob).unwrap_or(F::zero());
}
}
}
}
for i in 0..n_samples {
if labels[i] >= 0 && probabilities[i] == F::zero() {
probabilities[i] = F::one();
}
}
(labels, probabilities)
}
fn collect_leaves(
node_id: usize,
n_samples: usize,
node_children: &[(usize, usize)],
callback: &mut dyn FnMut(usize),
) {
if node_id < n_samples {
callback(node_id);
return;
}
let (left, right) = node_children[node_id];
collect_leaves(left, n_samples, node_children, callback);
collect_leaves(right, n_samples, node_children, callback);
}
fn deselect_descendants(
cond_id: usize,
n_samples: usize,
condensed_clusters: &[CondensedNode],
selected: &mut [bool],
) {
for &child_id in &condensed_clusters[cond_id].children {
if child_id >= n_samples {
let child_cond = child_id - n_samples;
if child_cond < condensed_clusters.len() {
selected[child_cond] = false;
deselect_descendants(child_cond, n_samples, condensed_clusters, selected);
}
}
}
}
fn find_selected_cluster(
cond_id: usize,
cluster_label_map: &[isize],
condensed_clusters: &[CondensedNode],
n_samples: usize,
) -> isize {
if cluster_label_map[cond_id] >= 0 {
return cluster_label_map[cond_id];
}
for (i, cluster) in condensed_clusters.iter().enumerate() {
for &child_id in &cluster.children {
if child_id >= n_samples && child_id - n_samples == cond_id {
return find_selected_cluster(i, cluster_label_map, condensed_clusters, n_samples);
}
}
}
-1 }
impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, ()> for Hdbscan<F> {
type Fitted = FittedHdbscan<F>;
type Error = FerroError;
fn fit(&self, x: &Array2<F>, _y: &()) -> Result<FittedHdbscan<F>, FerroError> {
if self.min_cluster_size < 2 {
return Err(FerroError::InvalidParameter {
name: "min_cluster_size".into(),
reason: "must be at least 2".into(),
});
}
let min_samples = self.min_samples.unwrap_or(self.min_cluster_size);
if min_samples == 0 {
return Err(FerroError::InvalidParameter {
name: "min_samples".into(),
reason: "must be at least 1".into(),
});
}
if self.cluster_selection_epsilon < F::zero() {
return Err(FerroError::InvalidParameter {
name: "cluster_selection_epsilon".into(),
reason: "must be non-negative".into(),
});
}
let n_samples = x.nrows();
if n_samples == 0 {
return Ok(FittedHdbscan {
labels_: Array1::zeros(0),
probabilities_: Array1::zeros(0),
});
}
if n_samples < self.min_cluster_size {
return Ok(FittedHdbscan {
labels_: Array1::from_elem(n_samples, -1isize),
probabilities_: Array1::from_elem(n_samples, F::zero()),
});
}
let core_dists = compute_core_distances(x, min_samples);
let mut mst_edges = build_mst(x, &core_dists);
let (labels, probabilities) = extract_clusters(
n_samples,
&mut mst_edges,
self.min_cluster_size,
self.cluster_selection_epsilon,
);
Ok(FittedHdbscan {
labels_: labels,
probabilities_: probabilities,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
fn make_two_blobs() -> Array2<f64> {
Array2::from_shape_vec(
(12, 2),
vec![
0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.1, 0.1, 0.05, 0.05, -0.05, 0.05,
10.0, 10.0, 10.1, 10.0, 10.0, 10.1, 10.1, 10.1, 10.05, 10.05, 9.95, 10.05,
],
)
.unwrap()
}
fn make_dense_clusters() -> Array2<f64> {
Array2::from_shape_vec(
(20, 2),
vec![
0.0, 0.0, 0.05, 0.0, 0.0, 0.05, 0.05, 0.05, -0.05, 0.0, 0.0, -0.05, -0.05, -0.05,
0.03, 0.02, -0.02, 0.03, 0.04, -0.01,
5.0, 5.0, 5.05, 5.0, 5.0, 5.05, 5.05, 5.05, 4.95, 5.0, 5.0, 4.95, 4.95, 4.95, 5.03,
5.02, 4.98, 5.03, 5.04, 4.99,
],
)
.unwrap()
}
#[test]
fn test_two_clusters() {
let x = make_two_blobs();
let model = Hdbscan::<f64>::new().with_min_cluster_size(3);
let fitted = model.fit(&x, &()).unwrap();
let labels = fitted.labels();
assert_eq!(labels.len(), 12);
let first_label = labels[0];
assert!(
first_label >= 0,
"expected cluster, got noise for first blob"
);
for i in 0..6 {
assert_eq!(
labels[i], first_label,
"point {i} should be in the same cluster as point 0"
);
}
let second_label = labels[6];
assert!(
second_label >= 0,
"expected cluster, got noise for second blob"
);
for i in 6..12 {
assert_eq!(
labels[i], second_label,
"point {i} should be in same cluster as point 6"
);
}
assert_ne!(first_label, second_label);
assert_eq!(fitted.n_clusters(), 2);
}
#[test]
fn test_noise_detection() {
let x = Array2::from_shape_vec(
(14, 2),
vec![
0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.1, 0.1, 0.05, 0.05, 10.0, 10.0, 10.1, 10.0, 10.0, 10.1, 10.1, 10.1, 10.05, 10.05, 50.0, 50.0, -50.0, -50.0, 100.0, 0.0, 0.0, 100.0,
],
)
.unwrap();
let model = Hdbscan::<f64>::new().with_min_cluster_size(3);
let fitted = model.fit(&x, &()).unwrap();
let labels = fitted.labels();
assert_eq!(labels[10], -1, "outlier at (50,50) should be noise");
assert_eq!(labels[11], -1, "outlier at (-50,-50) should be noise");
assert_eq!(labels[12], -1, "outlier at (100,0) should be noise");
assert_eq!(labels[13], -1, "outlier at (0,100) should be noise");
assert!(labels[0] >= 0, "cluster A point should not be noise");
assert!(labels[5] >= 0, "cluster B point should not be noise");
}
#[test]
fn test_min_cluster_size_effect() {
let x = make_two_blobs();
let model_small = Hdbscan::<f64>::new().with_min_cluster_size(2);
let fitted_small = model_small.fit(&x, &()).unwrap();
assert!(
fitted_small.n_clusters() >= 1,
"should find at least 1 cluster"
);
let model_large = Hdbscan::<f64>::new().with_min_cluster_size(100);
let fitted_large = model_large.fit(&x, &()).unwrap();
for &label in fitted_large.labels().iter() {
assert_eq!(
label, -1,
"all points should be noise with large min_cluster_size"
);
}
}
#[test]
fn test_probabilities_range() {
let x = make_two_blobs();
let model = Hdbscan::<f64>::new().with_min_cluster_size(3);
let fitted = model.fit(&x, &()).unwrap();
for (i, &prob) in fitted.probabilities().iter().enumerate() {
assert!(
prob >= 0.0 && prob <= 1.0,
"probability at index {i} is {prob}, expected [0, 1]"
);
}
for i in 0..x.nrows() {
if fitted.labels()[i] == -1 {
assert_relative_eq!(fitted.probabilities()[i], 0.0);
}
}
}
#[test]
fn test_dense_clusters() {
let x = make_dense_clusters();
let model = Hdbscan::<f64>::new()
.with_min_cluster_size(3)
.with_min_samples(3);
let fitted = model.fit(&x, &()).unwrap();
assert_eq!(fitted.labels().len(), 20);
assert_eq!(
fitted.n_clusters(),
2,
"should find 2 clusters in well-separated dense data"
);
let first_label = fitted.labels()[0];
assert!(first_label >= 0, "first cluster points should not be noise");
for i in 0..10 {
assert_eq!(
fitted.labels()[i],
first_label,
"point {i} should be in cluster A"
);
}
let second_label = fitted.labels()[10];
assert!(
second_label >= 0,
"second cluster points should not be noise"
);
for i in 10..20 {
assert_eq!(
fitted.labels()[i],
second_label,
"point {i} should be in cluster B"
);
}
assert_ne!(
first_label, second_label,
"two clusters should have different labels"
);
}
#[test]
fn test_empty_data() {
let x = Array2::<f64>::zeros((0, 2));
let model = Hdbscan::<f64>::new();
let fitted = model.fit(&x, &()).unwrap();
assert_eq!(fitted.labels().len(), 0);
assert_eq!(fitted.probabilities().len(), 0);
assert_eq!(fitted.n_clusters(), 0);
}
#[test]
fn test_single_point() {
let x = Array2::from_shape_vec((1, 2), vec![1.0, 1.0]).unwrap();
let model = Hdbscan::<f64>::new().with_min_cluster_size(2);
let fitted = model.fit(&x, &()).unwrap();
assert_eq!(fitted.labels()[0], -1);
assert_eq!(fitted.n_clusters(), 0);
}
#[test]
fn test_too_few_for_cluster() {
let x = Array2::from_shape_vec((3, 2), vec![0.0, 0.0, 0.1, 0.0, 0.0, 0.1]).unwrap();
let model = Hdbscan::<f64>::new().with_min_cluster_size(5);
let fitted = model.fit(&x, &()).unwrap();
for &label in fitted.labels().iter() {
assert_eq!(label, -1);
}
}
#[test]
fn test_invalid_min_cluster_size() {
let x = Array2::from_shape_vec((3, 2), vec![1.0, 1.0, 2.0, 2.0, 3.0, 3.0]).unwrap();
let model = Hdbscan::<f64>::new().with_min_cluster_size(1);
let result = model.fit(&x, &());
assert!(result.is_err());
}
#[test]
fn test_cluster_selection_epsilon() {
let x = make_two_blobs();
let model = Hdbscan::<f64>::new()
.with_min_cluster_size(3)
.with_cluster_selection_epsilon(1000.0);
let fitted = model.fit(&x, &()).unwrap();
let n = fitted.n_clusters();
assert!(
n <= 1,
"with large epsilon, should have at most 1 cluster, got {n}"
);
}
#[test]
fn test_f32_support() {
let x = Array2::from_shape_vec(
(10, 2),
vec![
0.0f32, 0.0, 0.1, 0.0, 0.0, 0.1, 0.1, 0.1, 0.05, 0.05, 10.0, 10.0, 10.1, 10.0,
10.0, 10.1, 10.1, 10.1, 10.05, 10.05,
],
)
.unwrap();
let model = Hdbscan::<f32>::new().with_min_cluster_size(3);
let fitted = model.fit(&x, &()).unwrap();
assert_eq!(fitted.labels().len(), 10);
}
#[test]
fn test_identical_points() {
let x = Array2::from_shape_vec(
(6, 2),
vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
)
.unwrap();
let model = Hdbscan::<f64>::new().with_min_cluster_size(2);
let fitted = model.fit(&x, &()).unwrap();
let labels = fitted.labels();
let first = labels[0];
for &l in labels.iter() {
assert_eq!(l, first, "identical points should have the same label");
}
}
#[test]
fn test_n_clusters_accessor() {
let x = make_two_blobs();
let model = Hdbscan::<f64>::new().with_min_cluster_size(3);
let fitted = model.fit(&x, &()).unwrap();
let n = fitted.n_clusters();
assert!(n > 0, "should find clusters");
}
#[test]
fn test_default_constructor() {
let model = Hdbscan::<f64>::default();
assert_eq!(model.min_cluster_size, 5);
assert!(model.min_samples.is_none());
}
}