appr_dbscan/utils/
mod.rs

1use rstar::{Point as RPoint};
2
3#[derive(Clone,Copy,PartialEq,Debug)]
4/// Mock struct to use RTrees with const generics
5pub struct CellIndexPoint<const D: usize>{
6    pub index: CellIndex<D>
7}
8
9impl <const D:usize> RPoint for CellIndexPoint<D>{
10    
11    type Scalar = i64;
12    const DIMENSIONS: usize = D;
13
14    fn generate(generator: impl Fn(usize) -> Self::Scalar) -> Self
15    {
16        let mut r : CellIndexPoint<D> = CellIndexPoint{index: [0;D]};
17        for i in 0..D {
18            r.index[i] = generator(i);
19        }
20        r
21    }
22
23    fn nth(&self, index: usize) -> Self::Scalar{
24        if index < D {
25            self.index[index]
26        } else {
27            unreachable!()
28        }
29    }
30
31    fn nth_mut(&mut self, index: usize) -> &mut Self::Scalar
32    {
33        if index < D {
34            &mut self.index[index]
35        } else {
36            unreachable!()
37        }
38    }
39}
40
41/// The parameters needed to run the approximate DBSCAN algorithm
42pub struct DBSCANParams{
43    /// The number of points to cluster
44    pub cardinality: usize,
45    /// The dimensionality of the space containing the points
46    pub dimensionality: u32,
47    /// The clustering radius
48    pub epsilon: f64,
49    /// The approximation factor
50    pub rho: f64,
51    /// The minimum number of points for density
52    pub min_pts: usize
53}
54
55#[derive(PartialEq, Debug)]
56/// See documentation for the function `utils::determine_intersection`
57pub enum IntersectionType{
58    FullyCovered,
59    Disjoint,
60    Intersecting
61}
62
63// Computes the euclidean distance between two points in a `D` dimensional space
64pub fn euclidean_distance<const D: usize>(p: &Point<D>, q: &Point<D>) -> f64 {
65    let mut sum : f64 = 0.0;
66    for i in 0..D{
67        sum += (p[i]-q[i]).powf(2_f64);
68    }
69    sum.sqrt()
70}
71
72/// Determines the type of intersection between a cell and an approximated ball.
73/// The cell is determined by its center and the side of its size.
74/// Returns: 
75///  * IntersectionType::FullyCovered if the cell is completely contained in a ball with center `q` and radius `epsilon(1 + rho)`;
76///  * IntersectionType::Disjoint if the cell is completely outside of a ball with center `q` and radius `epsilon`;
77///  * IntersectionType::Intersecting otherwise;
78pub fn determine_intersection<const D: usize>(q: &Point<D>, params: &DBSCANParams, index_c: &CellIndex<D>, side_size:f64) -> IntersectionType{
79    let n_corners = (2_usize.pow(D as u32)) as usize;
80    let mut cell_center : CellCenter<D> = [0.0;D];
81    for i in 0..D {
82        cell_center[i] = index_c[i] as f64 * side_size;
83    }
84    let corners = get_corners(&cell_center, side_size);
85    let appr_dist = (1.0 + params.rho) * params.epsilon;
86    let mut appr_in_count : usize = 0;
87    let mut out_count : usize = 0;
88    for corner in corners {
89        let dist = euclidean_distance(q, &corner);
90        if dist <= appr_dist {
91            appr_in_count += 1;
92        }
93        if dist >= params.epsilon {
94            out_count += 1;
95        }
96    }
97    if appr_in_count == n_corners{
98        return IntersectionType::FullyCovered
99    } else if out_count == n_corners{
100        return IntersectionType::Disjoint
101    }
102    IntersectionType::Intersecting
103}
104
105/// Gets the coordinates of all the corners (2^D) of a cell given its center points and its side size.
106fn get_corners<const D: usize>(cell_center: &CellCenter<D>, side_size: f64) -> Vec<Point<D>>{
107    let dist = side_size/2.0;
108    //Ho 2^d combinazioni. Posso pensare ogni combinazione come un numero binario di d cifre.
109    //Immagino di sostituire lo 0 con -dist e l'1 con +dist. Allora posso partire da cell_center
110    //e fare la sua somma con ogni numero binario per trovare tutti i vertici
111    let top = 2_usize.pow(D as u32);
112    let mut corners = Vec::with_capacity(top);
113    for bin_rep in 0..top {
114        let mut new_corner = cell_center.clone();
115        for bit_i in 0..D {
116            let mask = 1 << bit_i;
117            if bin_rep & mask == 0 {
118                new_corner[bit_i] -= dist;
119            } else {
120                new_corner[bit_i] += dist;
121            }
122        }
123        corners.push(new_corner);
124    }
125    corners
126}
127
128/// Gets the indexes of the intervals of the axes in the `D` dimensional space where lies a Cell with side 
129/// size equal to `side_size` that contains point `p`
130pub fn get_cell_index<const D: usize>(p: &Point<D>, side_size: f64) -> CellIndex<D>{
131    let mut new_index = [0;D];
132    let half_size = side_size/2.0;
133    for i in 0..p.len() {
134        if p[i] >= (-1.0 * half_size) && p[i] < half_size {
135            new_index[i] =  0;
136        } else if p[i] > 0.0 {
137            new_index[i] = ((p[i] - half_size) / side_size).ceil() as i64;
138        } else {
139            new_index[i] = -1 + ((p[i] + half_size) / side_size).ceil() as i64;
140        }
141    }
142    new_index
143}
144
145/// Gets the indexes of the intervals of the axes in the `D` dimensional space where lies a Cell with side 
146/// size equal to `epsilon/sqrt(D)` that contains point `p`
147pub fn get_base_cell_index<const D: usize>(p: &Point<D>, params: &DBSCANParams) ->CellIndex<D>{
148    get_cell_index(p, params.epsilon/(params.dimensionality as f64).sqrt())
149}
150
151/// Gets the euclidean distance to the power of 2 between two arrays representing cell indexes
152pub fn index_distance_sq<const D: usize>(i_1 : &CellIndex<D>, i_2: &CellIndex<D>) -> usize {
153    let mut dist : usize = 0;
154    for j in 0..i_1.len() {
155        dist += (i_1[j] - i_2[j]).pow(2) as usize;
156    }
157    dist
158}
159
160/// Array that stores the indexes of the intervals of the axes in the `D` dimensional space 
161/// that are occupied by a certain cell
162pub type CellIndex<const D: usize> = [i64;D];
163/// Type that represent the point in the `D` dimensional space that lays at the center of a cell
164pub type CellCenter<const D: usize> = Point<D>;
165/// Type that represents a point with dimensionality D
166pub type Point<const D: usize> = [f64;D];
167/// Collection of points in the same cluster
168pub type Cluster <const D: usize> = Vec<Point<D>>;
169/// Collection of all the cluster found by the DBSCAN algorithm. Its first element
170/// will be the collection of noise points.
171pub type DBSCANResult <const D: usize> = Vec<Cluster<D>>;
172
173/// Point defined as a vector instead of as an array like in `utils::Point`.
174/// Used for when dimensionality is not previously known.
175/// If dimensionality D is known then using `utils::Point<D>` is preferred 
176pub type VectorPoint = Vec<f64>;
177/// Cluster redefined to accomodate vector points.
178/// If dimensionality D is known then using `utils::Cluster<D>` is preferred 
179pub type VectorCluster = Vec<VectorPoint>;
180/// Result redefined to accomodate vector clusters
181/// If dimensionality D is known then using `utils::DBSCANResult<D>` is preferred 
182pub type VectorDBSCANResult = Vec<VectorCluster>;
183
184/// Translates a vector of points represented as vectors in a vector of points represented ad fixed length arrays.
185/// Panics if the points do not all have the same length.
186pub fn vector_input_to_array_input<const D: usize>(v_in: Vec<VectorPoint>) -> Vec<Point<D>> {
187    if v_in.len() == 0 {
188        panic!("Received an unexpected 0 length vector. This should not have happened");
189    }
190    let mut arr_in = Vec::with_capacity(v_in.len());
191    for i in 1..v_in.len() {
192        if v_in[i].len() != D {
193            panic!("DBSCAN: expected all points to have {} components, but point {} has {} components instead",D, i, v_in[i].len());
194        }
195        let mut arr_point = [0.0;D];
196        for j in 0..D {
197            arr_point[j] = v_in[i][j];
198        }
199        arr_in.push(arr_point);
200    }
201    arr_in
202}
203
204/// Transforms a vector of clusters containing points represented as arrays into a vector of clusters
205/// where each point is represented as a vector.
206pub fn array_res_to_vector_res<const D: usize>(a_res: DBSCANResult<D>) -> VectorDBSCANResult {
207    let mut v_res : VectorDBSCANResult = Vec::with_capacity(a_res.len());
208    for i in 0..a_res.len() {
209        let mut v_cluster = Vec::with_capacity(a_res[i].len());
210        for a_point in &a_res[i] {
211            v_cluster.push(a_point.to_vec());
212        }
213        v_res.push(v_cluster);
214    }
215    v_res
216}
217
218#[cfg(test)]
219mod tests;