use crate::clump_compat::{map_clump_error, soa_to_aos};
use crate::RetrieveError;
pub use clump::{ClusterHierarchy, ClusterLayer, ClusterNode};
#[derive(Clone, Debug)]
pub struct EVoCParams {
pub intermediate_dim: usize,
pub min_cluster_size: usize,
pub noise_level: f32,
pub min_number_clusters: Option<usize>,
}
impl Default for EVoCParams {
fn default() -> Self {
Self {
intermediate_dim: 15,
min_cluster_size: 10,
noise_level: 0.0,
min_number_clusters: None,
}
}
}
impl EVoCParams {
fn to_clump(&self) -> clump::EVoCParams {
clump::EVoCParams {
intermediate_dim: self.intermediate_dim,
min_cluster_size: self.min_cluster_size,
noise_level: self.noise_level,
..clump::EVoCParams::default()
}
}
}
pub struct EVoC {
params: EVoCParams,
original_dim: usize,
inner: clump::EVoC,
}
impl EVoC {
pub fn new(original_dim: usize, params: EVoCParams) -> Result<Self, RetrieveError> {
if original_dim == 0 {
return Err(RetrieveError::InvalidParameter(
"Original dimension must be greater than 0".to_string(),
));
}
let inner = clump::EVoC::new(params.to_clump());
Ok(Self {
params,
original_dim,
inner,
})
}
pub fn fit_predict(
&mut self,
vectors: &[f32],
num_vectors: usize,
) -> Result<Vec<Option<usize>>, RetrieveError> {
let expected_len = num_vectors * self.original_dim;
if vectors.len() < expected_len {
return Err(RetrieveError::InvalidParameter(
"Insufficient vectors".to_string(),
));
}
let data = soa_to_aos(vectors, num_vectors, self.original_dim);
let labels = self.inner.fit_predict(&data).map_err(map_clump_error)?;
if let Some(target) = self.params.min_number_clusters {
if let Ok(layer) = self.inner.layer_for_n_clusters(target) {
return Ok(layer.assignments);
}
}
Ok(labels)
}
pub fn cluster_layers(&self) -> &[ClusterLayer] {
self.inner.cluster_layers()
}
pub fn cluster_tree(&self) -> Option<&ClusterHierarchy> {
self.inner.cluster_tree()
}
pub fn duplicates(&self) -> &[Vec<usize>] {
self.inner.duplicates()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_evoc() {
let evoc = EVoC::new(4, EVoCParams::default());
assert!(evoc.is_ok());
}
#[test]
fn test_zero_dimension_error() {
let result = EVoC::new(0, EVoCParams::default());
assert!(result.is_err());
}
#[test]
fn test_fit_predict() {
let params = EVoCParams {
intermediate_dim: 2,
min_cluster_size: 2,
noise_level: 0.0,
min_number_clusters: None,
};
#[allow(clippy::unwrap_used)]
let mut evoc = EVoC::new(4, params).unwrap();
let mut vectors = Vec::new();
for i in 0..10 {
vectors.extend_from_slice(&[1.0 + i as f32 * 0.01, 0.0, 0.0, 0.0]);
}
for i in 0..10 {
vectors.extend_from_slice(&[0.0, 0.0, 0.0, 1.0 + i as f32 * 0.01]);
}
#[allow(clippy::unwrap_used)]
let assignments = evoc.fit_predict(&vectors, 20).unwrap();
assert_eq!(assignments.len(), 20);
}
}