use Classification::{Core, Edge, Noise};
#[inline]
pub fn euclidean_distance<T>(a: &[T], b: &[T]) -> f64
where
f64: From<T>,
T: Copy,
{
a.iter()
.zip(b.iter())
.fold(0f64, |acc, (&x, &y)| {
acc + (f64::from(x) - f64::from(y)).powi(2)
})
.sqrt()
}
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd)]
pub enum Classification {
Core(usize),
Edge(usize),
Noise,
}
pub fn cluster<T>(eps: f64, min_points: usize, input: &Vec<Vec<T>>) -> Vec<Classification>
where
T: Copy,
f64: From<T>,
{
Model::new(eps, min_points).run(input)
}
pub struct Model<T>
where
T: Copy,
f64: From<T>,
{
pub eps: f64,
pub mpt: usize,
distance: fn(&[T], &[T]) -> f64,
c: Vec<Classification>,
v: Vec<bool>,
}
impl<T> Model<T>
where
T: Copy,
f64: From<T>,
{
pub fn new(eps: f64, min_points: usize) -> Model<T> {
Model {
eps,
mpt: min_points,
c: Vec::new(),
v: Vec::new(),
distance: euclidean_distance,
}
}
pub fn set_distance_fn<F>(mut self, func: fn(&[T], &[T]) -> f64) -> Model<T> {
self.distance = func;
self
}
fn expand_cluster(
&mut self,
population: &[Vec<T>],
queue: &mut Vec<usize>,
cluster: usize,
) -> bool {
let mut new_cluster = false;
while let Some(ind) = queue.pop() {
let neighbors = self.range_query(&population[ind], population);
if neighbors.len() < self.mpt {
continue;
}
new_cluster = true;
self.c[ind] = Core(cluster);
for n_idx in neighbors {
if self.c[n_idx] == Noise {
self.c[n_idx] = Edge(cluster);
}
if self.v[n_idx] {
continue;
}
self.v[n_idx] = true;
queue.push(n_idx);
}
}
new_cluster
}
#[inline]
fn range_query(&self, sample: &[T], population: &[Vec<T>]) -> Vec<usize> {
population
.iter()
.enumerate()
.filter(|(_, pt)| (self.distance)(sample, pt) < self.eps)
.map(|(idx, _)| idx)
.collect()
}
pub fn run(mut self, population: &Vec<Vec<T>>) -> Vec<Classification> {
self.c = vec![Noise; population.len()];
self.v = vec![false; population.len()];
let mut cluster = 0;
let mut queue = Vec::new();
for idx in 0..population.len() {
if self.v[idx] {
continue;
}
self.v[idx] = true;
queue.push(idx);
if self.expand_cluster(population, &mut queue, cluster) {
cluster += 1;
}
}
self.c
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cluster() {
let model = Model::new(1.0, 3);
let inputs = vec![
vec![1.5, 2.2],
vec![1.0, 1.1],
vec![1.2, 1.4],
vec![0.8, 1.0],
vec![3.7, 4.0],
vec![3.9, 3.9],
vec![3.6, 4.1],
vec![10.0, 10.0],
];
let output = model.run(&inputs);
assert_eq!(
output,
vec![
Edge(0),
Core(0),
Core(0),
Core(0),
Core(1),
Core(1),
Core(1),
Noise
]
);
}
#[test]
fn cluster_edge() {
let model = Model::new(0.253110, 3);
let inputs = vec![
vec![
0.3311755015020835,
0.20474852214361858,
0.21050489388506638,
0.23040992344219402,
0.023161159027037505,
],
vec![
0.5112445458548497,
0.1898442816540571,
0.11674072294944157,
0.14853288499259437,
0.03363756454905728,
],
vec![
0.581134172697341,
0.15084733646825743,
0.09997992993087741,
0.13580335513916678,
0.03223520576435743,
],
vec![
0.17210416043100868,
0.3403172702783598,
0.18218098373740396,
0.2616980943829193,
0.04369949117030829,
],
];
let output = model.run(&inputs);
assert_eq!(output, vec![Core(0), Core(0), Edge(0), Edge(0)]);
}
#[test]
fn range_query() {
let model = Model::new(1.0, 3);
let inputs = vec![vec![1.0, 1.0], vec![1.1, 1.9], vec![3.0, 3.0]];
let neighbours = model.range_query(&[1.0, 1.0], &inputs);
assert!(neighbours.len() == 2);
}
#[test]
fn range_query_small_eps() {
let model = Model::new(0.01, 3);
let inputs = vec![vec![1.0, 1.0], vec![1.1, 1.9], vec![3.0, 3.0]];
let neighbours = model.range_query(&[1.0, 1.0], &inputs);
assert!(neighbours.len() == 1);
}
fn taxicab(a: &[f64], b: &[f64]) -> f64 {
a.iter().zip(b.iter()).fold(0f64, |acc, (&x, &y)| {
acc + (f64::from(x) - f64::from(y)).abs()
})
}
#[test]
fn range_query_custom_distance() {
let model = Model::new(1.0, 3).set_distance_fn::<fn(&[f64], &[f64]) -> f64>(taxicab);
let inputs = vec![vec![1.0, 1.0], vec![1.1, 1.9], vec![3.0, 3.0]];
let neighbours = model.range_query(&[1.0, 1.0], &inputs);
assert_eq!(neighbours.len(), 1)
}
}