extern crate wasm_bindgen;
#[macro_use]
extern crate serde_derive;
use wasm_bindgen::prelude::*;
use std::collections::HashSet;
use std::f64;
fn take_arbitrary(set: &mut HashSet<usize>) -> Option<usize> {
let value_copy = if let Some(value) = set.iter().next() {
Some(*value)
} else {
None
};
if let Some(value) = value_copy {
set.take(&value)
} else {
None
}
}
pub trait MetricSpace: Sized {
fn distance(&self, other: &Self) -> f64;
}
#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
#[derive(Deserialize)]
pub struct JsPoint {
x: f64,
y: f64,
}
#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
impl MetricSpace for JsPoint {
fn distance(&self, other: &Self) -> f64 {
((other.x - self.x).powi(2) + (other.y - self.y).powi(2)).sqrt() as f64
}
}
#[derive(Debug, PartialEq, Serialize)]
pub enum Category {
Core,
Border,
Noise,
}
#[derive(Debug, Serialize)]
pub struct Assignment {
pub index: usize,
pub label: f64,
pub category: Category,
}
pub type Cluster = Vec<Assignment>;
#[wasm_bindgen]
pub struct FuzzyDBSCAN {
pub eps_min: f64,
pub eps_max: f64,
pub pts_min: f64,
pub pts_max: f64,
}
#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
#[wasm_bindgen]
impl FuzzyDBSCAN {
#[wasm_bindgen(constructor)]
pub fn new() -> Self {
FuzzyDBSCAN {
eps_min: f64::NAN,
eps_max: f64::NAN,
pts_min: f64::NAN,
pts_max: f64::NAN,
}
}
pub fn cluster(&self, js_points: JsValue) -> JsValue {
let points: Vec<JsPoint> = js_points.into_serde().unwrap();
let clusters = self.fuzzy_dbscan(&points);
JsValue::from_serde(&clusters).unwrap()
}
}
#[cfg(not(target_arch = "wasm32"))]
impl FuzzyDBSCAN {
pub fn cluster<P: MetricSpace>(&self, points: &[P]) -> Vec<Cluster> {
self.fuzzy_dbscan(points)
}
}
impl FuzzyDBSCAN {
fn fuzzy_dbscan<P: MetricSpace>(&self, points: &[P]) -> Vec<Cluster> {
let mut clusters = Vec::new();
let mut noise_cluster = Vec::new();
let mut visited = vec![false; points.len()];
for point_index in 0..points.len() {
if visited[point_index] {
continue;
}
visited[point_index] = true;
let neighbor_indices = self.region_query(points, point_index);
let point_label = self.mu_min_p(self.density(point_index, &neighbor_indices, points));
if point_label == 0.0 {
noise_cluster.push(Assignment {
index: point_index,
category: Category::Noise,
label: 1.0,
});
} else {
clusters.push(self.expand_cluster_fuzzy(
point_label,
point_index,
neighbor_indices,
points,
&mut visited,
));
}
}
if !noise_cluster.is_empty() {
clusters.push(noise_cluster);
}
clusters
}
fn expand_cluster_fuzzy<P: MetricSpace>(
&self,
point_label: f64,
point_index: usize,
mut neighbor_indices: HashSet<usize>,
points: &[P],
visited: &mut [bool],
) -> Vec<Assignment> {
let mut cluster = vec![Assignment {
index: point_index,
category: Category::Core,
label: point_label,
}];
let mut border_points = Vec::new();
let mut neighbor_visited = vec![false; points.len()];
while let Some(neighbor_index) = take_arbitrary(&mut neighbor_indices) {
neighbor_visited[neighbor_index] = true;
visited[neighbor_index] = true;
let neighbor_neighbor_indices = self.region_query(points, neighbor_index);
let neighbor_label =
self.mu_min_p(self.density(neighbor_index, &neighbor_neighbor_indices, points));
if neighbor_label > 0.0 {
for neighbor_neighbor_index in neighbor_neighbor_indices {
if !neighbor_visited[neighbor_neighbor_index] {
neighbor_indices.insert(neighbor_neighbor_index);
}
}
cluster.push(Assignment {
index: neighbor_index,
category: Category::Core,
label: neighbor_label,
});
} else {
border_points.push(Assignment {
index: neighbor_index,
category: Category::Border,
label: f64::MAX,
});
}
}
for border_point in &mut border_points {
for cluster_point in &cluster {
let mu_distance =
self.mu_distance(&points[border_point.index], &points[cluster_point.index]);
if mu_distance > 0.0 {
border_point.label =
cluster_point.label.min(mu_distance).min(border_point.label);
}
}
}
cluster.append(&mut border_points);
cluster
}
fn region_query<P: MetricSpace>(&self, points: &[P], point_index: usize) -> HashSet<usize> {
points
.iter()
.enumerate()
.filter(|(neighbor_index, neighbor_point)| {
*neighbor_index != point_index
&& neighbor_point.distance(&points[point_index]) <= self.eps_max
}).map(|(neighbor_index, _)| neighbor_index)
.collect() }
fn density<P: MetricSpace>(
&self,
point_index: usize,
neighbor_indices: &HashSet<usize>,
points: &[P],
) -> f64 {
1.0 + neighbor_indices.iter().fold(0.0, |sum, &neighbor_index| {
sum + self.mu_distance(&points[point_index], &points[neighbor_index])
})
}
fn mu_min_p(&self, n: f64) -> f64 {
if n >= self.pts_max {
1.0
} else if n < self.pts_min {
0.0
} else {
(n - self.pts_min) / (self.pts_max - self.pts_min)
}
}
fn mu_distance<P: MetricSpace>(&self, a: &P, b: &P) -> f64 {
let distance = a.distance(b);
if distance <= self.eps_min {
1.0
} else if distance > self.eps_max {
0.0
} else {
(self.eps_max - distance) / (self.eps_max - self.eps_min)
}
}
}