1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
//! This module contains an implementation of Density-Based Spatial Clustering of Applications with
//! Noise (DBSCAN)

#[cfg(test)]
#[path = "../../../tests/unit/algorithms/dbscan/dbscan_test.rs"]
mod dbscan_test;

use hashbrown::{HashMap, HashSet};
use std::hash::Hash;

/// Represents a cluster of items.
pub type Cluster<'a, T> = Vec<&'a T>;

/// A function which returns neighbors of given item with given epsilon.
pub type NeighborhoodFn<'a, T> = Box<dyn Fn(&'a T, f64) -> Box<dyn Iterator<Item = &'a T> + 'a> + 'a>;

/// Creates clusters of items using DBSCAN (Density-Based Spatial Clustering of Applications with Noise) algorithm.
/// NOTE: `neighborhood_fn` shall return point itself.
pub fn create_clusters<'a, T>(
    items: &'a [T],
    eps: f64,
    min_items: usize,
    neighborhood_fn: &NeighborhoodFn<'a, T>,
) -> Vec<Cluster<'a, T>>
where
    T: Hash + Eq,
{
    let mut item_types = HashMap::<&T, ItemType>::new();
    let mut clusters = Vec::new();

    for item in items {
        if item_types.get(item).is_some() {
            continue;
        }

        let mut neighbors = neighborhood_fn(item, eps).collect::<Vec<_>>();
        if neighbors.len() < min_items {
            item_types.insert(item, ItemType::Noise);
        } else {
            let mut cluster = Vec::new();

            cluster.push(item);
            item_types.insert(item, ItemType::Clustered);

            let mut index = 0;
            while index < neighbors.len() {
                let item = neighbors[index];

                let item_type = item_types.get(item);

                if item_type.is_none() {
                    let other_neighbours = neighborhood_fn(item, eps).collect::<Vec<_>>();
                    if other_neighbours.len() >= min_items {
                        let set = neighbors.iter().cloned().collect::<HashSet<_>>();
                        neighbors.extend(other_neighbours.into_iter().filter(move |item| !set.contains(item)));
                    }
                }

                match item_type {
                    Some(item_type) if *item_type == ItemType::Clustered => {}
                    _ => {
                        item_types.insert(item, ItemType::Clustered);
                        cluster.push(item);
                    }
                }

                index += 1;
            }

            clusters.push(cluster);
        }
    }

    clusters
}

#[derive(Eq, PartialEq)]
enum ItemType {
    Noise,
    Clustered,
}