extern crate ordered_float;
use ordered_float::OrderedFloat;
use std::collections::HashMap;
pub trait IntoPoint: Sized {
fn get_distance(&self, neighbor: &Self) -> f64;
}
#[derive(Debug, PartialEq)]
pub enum ClusterId {
Outline,
Unclassified,
Classified(usize),
}
pub struct PointWrapper<F: IntoPoint> {
point: F,
cluster_id: ClusterId,
index: usize,
}
impl<F: IntoPoint> PointWrapper<F> {
fn new(index: usize, point: F) -> PointWrapper<F> {
PointWrapper {
point,
index,
cluster_id: ClusterId::Unclassified,
}
}
fn set_cluster_id(&mut self, cluster_id: ClusterId) {
self.cluster_id = cluster_id;
}
pub fn get_cluster_id(&self) -> &ClusterId {
&self.cluster_id
}
fn get_distance(&self, wrapper: &PointWrapper<F>) -> f64 {
self.point.get_distance(&wrapper.point)
}
pub fn get_id(&self) -> usize {
self.index
}
pub fn into_inner(&self) -> &F {
&self.point
}
}
struct Kddbscan<F: IntoPoint> {
points: Vec<PointWrapper<F>>,
k: u32,
n: u32,
deviation_factor: u32,
}
impl<F: IntoPoint> Kddbscan<F> {
pub fn new<T: IntoPoint>(points: Vec<T>, k: u32, n: u32, deviation_factor: u32) -> Kddbscan<T> {
let mut i = 0;
let mut wrappers: Vec<PointWrapper<T>> = vec![];
for point in points {
wrappers.push(PointWrapper::new(i, point));
i += 1;
}
Kddbscan {
points: wrappers,
k,
n,
deviation_factor,
}
}
pub fn cluster(&mut self) {
let mut points_iter = self.points.iter();
let mut c = 0;
let mut cluster_assigns: HashMap<usize, ClusterId> = HashMap::new();
while let Some(point) = points_iter.next() {
let cluster_id = cluster_assigns
.get(&point.get_id())
.unwrap_or(&ClusterId::Unclassified);
match cluster_id {
ClusterId::Unclassified => {
let density = self.deviation_density(point).unwrap();
if density <= self.deviation_factor as f64 {
let tmp_cluster_assigns = self.expand_cluster(&cluster_assigns, point, c);
for (k, v) in tmp_cluster_assigns {
cluster_assigns.insert(k, v);
}
c += 1;
} else {
cluster_assigns.insert(point.get_id(), ClusterId::Outline);
}
}
_ => {}
}
}
for (k, v) in cluster_assigns {
self.points.get_mut(k).unwrap().set_cluster_id(v);
}
}
fn deviation_density(&self, point: &PointWrapper<F>) -> Result<f64, &'static str> {
let neighbors = self.get_mutual_neighbors(point);
let distances: Vec<OrderedFloat<f64>> = neighbors
.iter()
.map(|v| OrderedFloat::from(point.get_distance(v)))
.collect();
let max = distances.iter().max();
match max {
Some(max) => {
let max = max.into_inner();
let all_distances: Vec<OrderedFloat<f64>> = self
.points
.iter()
.map(|p| OrderedFloat::from(p.get_distance(point)))
.collect();
let all_max = all_distances.iter().max().unwrap().into_inner();
let without_max: Vec<f64> = all_distances
.iter()
.map(|d| d.into_inner())
.filter(|d| d != &all_max)
.collect();
let all_avg = without_max.iter().sum::<f64>() / without_max.len() as f64;
Ok(max / all_avg)
}
None => Err("Can not find a neighbor"),
}
}
fn get_mutual_neighbors<'a>(&'a self, point: &'a PointWrapper<F>) -> Vec<&'a PointWrapper<F>> {
let mut neighbors = vec![];
fn fill_mutual_neighbor<'a, 'b: 'a, T: IntoPoint>(
inner_neighbors: &mut Vec<&'a PointWrapper<T>>,
kddbscan: &'a Kddbscan<T>,
main_point: &'a PointWrapper<T>,
point: &'b PointWrapper<T>,
n: u32,
) {
if n >= kddbscan.n {
if n != 0 {
inner_neighbors.push(point);
}
}
let mut points: Vec<&PointWrapper<T>> =
kddbscan.points.iter().map(|point| point).collect();
points.sort_by(|a, b| {
OrderedFloat::from(a.get_distance(point))
.cmp(&OrderedFloat::from(b.get_distance(point)))
});
let mut i = 1;
while i <= kddbscan.k {
let in_point_result = points.get(i as usize);
if let Some(in_point) = in_point_result {
if in_point.get_id() != point.get_id() {
if main_point.get_id() != in_point.get_id() {
if !inner_neighbors
.iter()
.any(|in_neighbor| in_neighbor.get_id() == in_point.get_id())
{
fill_mutual_neighbor(
inner_neighbors,
kddbscan,
main_point,
in_point,
n + 1,
);
}
}
}
}
i += 1;
}
}
fill_mutual_neighbor(&mut neighbors, self, point, point, 0);
neighbors
}
fn expand_cluster(
&self,
core_cluster_assigns: &HashMap<usize, ClusterId>,
point: &PointWrapper<F>,
cluster_id: usize,
) -> HashMap<usize, ClusterId> {
let mut cluster_assigns: HashMap<usize, ClusterId> = HashMap::new();
cluster_assigns.insert(point.get_id(), ClusterId::Classified(cluster_id));
let mut core_points: Vec<&PointWrapper<F>> = vec![];
core_points.push(point);
let mut i = 0;
while i < core_points.len() {
let point_i = core_points[i];
let neighbors = self.get_mutual_neighbors(point_i);
cluster_assigns.insert(point_i.get_id(), ClusterId::Classified(cluster_id));
for point_j in neighbors {
let point_j_cluster_id = cluster_assigns.get(&point_j.get_id()).unwrap_or(
core_cluster_assigns
.get(&point_j.get_id())
.unwrap_or(&ClusterId::Unclassified),
);
match point_j_cluster_id {
ClusterId::Classified(_) => {}
_ => {
cluster_assigns.insert(point_j.get_id(), ClusterId::Classified(cluster_id));
}
}
let dev_density = {
self.deviation_density(point_j)
.expect("Can not calculate deviation factor.")
};
let density_reachable = self.density_reachable(point_j, point_i);
if dev_density <= self.deviation_factor as f64 && density_reachable {
if !core_points.iter().any(|p| p.get_id() == point_j.get_id()) {
core_points.push(point_j);
}
} else {
cluster_assigns.insert(point_j.get_id(), ClusterId::Outline);
}
}
i += 1;
}
cluster_assigns
}
fn density_reachable(&self, p: &PointWrapper<F>, q: &PointWrapper<F>) -> bool {
let p_mutual_neighbors = self.get_mutual_neighbors(p);
let q_mutual_neighbors = self.get_mutual_neighbors(q);
let p_distances: Vec<OrderedFloat<f64>> = p_mutual_neighbors
.iter()
.map(|point| OrderedFloat::from(point.get_distance(p)))
.collect();
let q_distances: Vec<OrderedFloat<f64>> = q_mutual_neighbors
.iter()
.map(|point| OrderedFloat::from(point.get_distance(q)))
.collect();
let p_max = p_distances.iter().max().unwrap().into_inner();
let q_max = q_distances.iter().max().unwrap().into_inner();
let p_len = p_distances.len();
let q_len = q_distances.len();
let p_sum = p_distances
.iter()
.map(|ord_float| ord_float.into_inner())
.sum::<f64>();
let q_sum = q_distances
.iter()
.map(|ord_float| ord_float.into_inner())
.sum::<f64>();
let p_avg = p_sum / (p_len as f64);
let q_avg = q_sum / (q_len as f64);
let p_to_q = p.get_distance(q);
let first = (p_max / q_max).max(q_max / p_max) <= self.deviation_factor as f64;
let second = (p_avg / q_avg).max(q_avg / p_avg) <= self.deviation_factor as f64;
let third = (p_max / q_avg).max(q_max / p_avg) <= self.deviation_factor as f64;
let fourth = (p_to_q / p_max).max(p_max / p_to_q) <= self.deviation_factor as f64;
let fifth = (p_to_q / q_max).max(q_max / p_to_q) <= self.deviation_factor as f64;
first && second && third && fourth && fifth
}
pub fn get_clustered(self) -> Vec<PointWrapper<F>> {
self.points
}
}
pub fn cluster<T: IntoPoint>(
points: Vec<T>,
k: u32,
n: Option<u32>,
deviation_factor: Option<u32>,
) -> Vec<PointWrapper<T>> {
let mut kddbscan = Kddbscan::<T>::new::<T>(
points,
k,
n.unwrap_or(1),
deviation_factor.unwrap_or(999999),
);
kddbscan.cluster();
let clusters = kddbscan.get_clustered();
clusters
}
#[cfg(test)]
mod tests {
use super::*;
pub struct Coordinate {
pub x: f64,
pub y: f64,
}
impl IntoPoint for Coordinate {
fn get_distance(&self, neighbor: &Coordinate) -> f64 {
((self.x - neighbor.x).powi(2) + (self.y - neighbor.y).powi(2)).powf(0.5)
}
}
fn create_kddbscan(k: u32) -> Kddbscan<Coordinate> {
let mut coordinates: Vec<Coordinate> = vec![];
coordinates.push(Coordinate { x: 11.0, y: 12.0 });
coordinates.push(Coordinate { x: 0.0, y: 0.0 });
coordinates.push(Coordinate { x: 12.0, y: 11.0 });
coordinates.push(Coordinate { x: 11.0, y: 11.0 });
coordinates.push(Coordinate { x: 1.0, y: 2.0 });
coordinates.push(Coordinate { x: 3.0, y: 1.0 });
Kddbscan::<Coordinate>::new::<Coordinate>(coordinates, k, 1, 999999)
}
#[test]
fn test_mutual_neighbor() {
let kddbscan_2 = create_kddbscan(2);
let point_wrapper = kddbscan_2.points.get(0).unwrap();
let mutual_neighbors = kddbscan_2.get_mutual_neighbors(point_wrapper);
assert_eq!(mutual_neighbors.len(), 2);
assert_eq!(mutual_neighbors.get(0).unwrap().get_id(), 3);
assert_eq!(mutual_neighbors.get(1).unwrap().get_id(), 2);
let kddbscan_3 = create_kddbscan(3);
let point_wrapper = kddbscan_3.points.get(0).unwrap();
let mutual_neighbors = kddbscan_3.get_mutual_neighbors(point_wrapper);
assert_eq!(mutual_neighbors.len(), 5);
}
#[test]
fn test_deviation_density() {
let kddbscan = create_kddbscan(2);
let point_wrapper = kddbscan.points.get(0).unwrap();
let deviation_density = kddbscan.deviation_density(point_wrapper).unwrap();
assert!(deviation_density<0.24 && deviation_density>0.22);
let point_wrapper = kddbscan.points.get(1).unwrap();
let deviation_density = kddbscan.deviation_density(point_wrapper).unwrap();
assert!(deviation_density<0.61 && deviation_density>0.60);
}
#[test]
fn test_density_reachable(){
let kddbscan = create_kddbscan(2);
let density_reachable = kddbscan.density_reachable(
kddbscan.points.get(0).unwrap()
,
kddbscan.points.get(1).unwrap()
);
assert!(density_reachable);
}
}