appr_dbscan/
lib.rs

1#![feature(min_const_generics)]
2pub mod utils;
3mod tree_structure;
4mod cell;
5mod core_cell;
6mod cluster;
7pub mod dbscan;
8pub mod data_io;
9
10extern crate partitions;
11extern crate rstar;
12
13use utils::*;
14use data_io::{params_from_file, read_points_from_file};
15use dbscan::approximate_dbscan;
16use std::path::{Path};
17
18/// Function that returns the result of the approximate DBSCAN algorithm 
19/// executed on the set of points contained in `filename` with the given values of epsilon and rho.
20///  
21/// # Arguments
22/// 
23/// * `filename`: the path to the file containing the data points. The file should be formatted with one point per line and the values for each coordinate should be 
24///     separated by a white space. Only numerical coordinates values are accepted. 
25/// * `epsilon`: the radius for the DBSCAN algorithm. 
26/// * `rho`: the approximation factor. The smaller it is the more precise the result. Usual values are 0.1 and 0.01.
27/// * `min_pts`: the minimum number of nearby points required by the DBSCAN algorithm to declare an area as 'dense'.
28/// 
29/// # Constant argument
30/// 
31/// * `D`: The dimensionality of each point in the data file.
32/// 
33/// # Return value
34/// 
35/// This function returns a vector of clusters, where each cluster is a vector of the points contained in it. Each point is stored as an array of f64 (`[f64;D]`).
36/// The element at index `0` is the collection of all noise points, while all the other elements are the actual clusters. 
37/// 
38/// # Example
39/// ``` rust
40/// extern crate appr_dbscan;
41/// use appr_dbscan::do_appr_dbscan_file;
42/// use appr_dbscan::utils::DBSCANResult;
43/// 
44/// let res : DBSCANResult<2> = do_appr_dbscan_file("./datasets/out_test_1.txt", 0.3, 0.1, 10);
45/// let clusters_count = res.len() - 1;
46/// let noise_points_count = res[0].len();
47/// ```
48/// 
49pub fn do_appr_dbscan_file<P, const D: usize>(filename: P, epsilon: f64, rho: f64, min_pts: usize) -> DBSCANResult<D> 
50where P: AsRef<Path>{
51    let mut params = params_from_file(&filename);
52    if params.dimensionality != D as u32 {
53        panic!("Error: declared point dimensionality is {} but the data file contains points with {} dimensions", D, params.dimensionality);   
54    }
55    params.epsilon = epsilon;
56    params.rho = rho;
57    params.min_pts = min_pts;
58    let points : Vec<Point<D>> = read_points_from_file(&filename, &params);
59    let res = approximate_dbscan(points, &params);
60    res
61}
62
63
64/// Function that returns the result of the approximate DBSCAN algorithm 
65/// executed on the set of points contained in `points` with the given values of epsilon and rho.
66///  
67/// # Arguments
68/// 
69/// * `points`: the vector of points to execute the algorithm on. All points must be arrays of lenght `D` 
70/// * `epsilon`: the radius for the DBSCAN algorithm. 
71/// * `rho`: the approximation factor. The smaller it is the more precise the result. Usual values are 0.1 and 0.01.
72/// * `min_pts`: the minimum number of nearby points required by the DBSCAN algorithm to declare an area as 'dense'.
73/// 
74/// # Constant argument
75/// 
76/// * `D`: The dimensionality of each point in the data. 
77/// 
78/// # Return value
79/// 
80/// This function returns a vector of clusters, where each cluster is a vector of the points contained in it. Each point is stored as an array of f64 (``[f64;D]``).
81/// The element at index `0` is the collection of all noise points, while all the other elements are the actual clusters. 
82/// 
83/// # Example
84/// ``` rust
85/// extern crate appr_dbscan;
86/// use appr_dbscan::do_appr_dbscan_points;
87/// use appr_dbscan::utils::DBSCANResult;
88/// 
89/// let points = vec![[0.0,0.0],[1.0,1.0],[0.0,1.0],[1.0,0.0],[2.0,1.0],[0.0,2.0],[2.0,1.0],[1.0,1.0]];
90/// let res : DBSCANResult<2> = do_appr_dbscan_points(points, 0.3, 0.1, 10);
91/// let clusters_count = res.len() - 1;
92/// let noise_points_count = res[0].len();
93/// ```
94/// 
95pub fn do_appr_dbscan_points<const D: usize>(points: Vec<Point<D>>, epsilon: f64, rho: f64, min_pts: usize) -> DBSCANResult<D> {
96    let params = DBSCANParams{
97        dimensionality: D as u32,
98        cardinality: points.len(),
99        epsilon: epsilon,
100        rho: rho,
101        min_pts: min_pts
102    };
103    let res = approximate_dbscan(points, &params);
104    res
105}
106
107/// Function that returns the result of the approximate DBSCAN algorithm without prior knowledge of the points dimensionality
108///, executed on the set of points contained in `filename` with the given values of epsilon and rho.
109///  
110/// # Arguments
111/// 
112/// * `filename`: the path to the file containing the data points. The file should be formatted with one point per line and the values for each coordinate should be 
113///     separated by a white space. Only numerical coordinates values are accepted. 
114/// * `epsilon`: the radius for the DBSCAN algorithm. 
115/// * `rho`: the approximation factor. The smaller it is the more precise the result. Usual values are 0.1 and 0.01.
116/// * `min_pts`: the minimum number of nearby points required by the DBSCAN algorithm to declare an area as 'dense'.
117/// 
118/// # Return value
119/// 
120/// This function returns a vector of clusters, where each cluster is a vector of the points contained in it. Each point is stored as a vector of `f64`, 
121/// contrary to the other functions, along with the detected dimensionality of the points inside.
122/// The element at index `0` is the collection of all noise points, while all the other elements are the actual clusters. 
123/// 
124/// # Example
125/// ``` rust
126/// extern crate appr_dbscan;
127/// use appr_dbscan::do_appr_dbscan_auto_dimensionality_file;
128/// 
129/// let (res,dimensionality) = do_appr_dbscan_auto_dimensionality_file("./datasets/out_test_1.txt", 0.3, 0.1, 10);
130/// let clusters_count = res.len() - 1;
131/// let noise_points_count = res[0].len();
132/// ```
133/// 
134pub fn do_appr_dbscan_auto_dimensionality_file<P>(filename: P, epsilon: f64, rho: f64, min_pts: usize) -> (VectorDBSCANResult, usize)
135where P: AsRef<Path>{
136    let params = params_from_file(&filename);
137    match params.dimensionality {
138        0 => {panic!("There has been an error while reading the data: 0 dimensionality point found");},
139        1 => (array_res_to_vector_res::<1>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
140        2 => (array_res_to_vector_res::<2>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
141        3 => (array_res_to_vector_res::<3>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
142        4 => (array_res_to_vector_res::<4>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
143        5 => (array_res_to_vector_res::<5>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
144        6 => (array_res_to_vector_res::<6>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
145        7 => (array_res_to_vector_res::<7>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
146        _ => {panic!("Dimensionalities over 7 are not supported")}
147    }
148}
149
150/// Function that returns the result of the approximate DBSCAN algorithm without prior knowledge of the points dimensionality
151///, executed on the set of points contained in vector `points` with the given values of `epsilon`, `rho` and `min_pts`.
152///  
153/// # Arguments
154/// 
155/// * `points`: the vector of points to execute the algorithm on. All points must be vectors of the same length in order to be points from the same space. 
156/// * `epsilon`: the radius for the DBSCAN algorithm. 
157/// * `rho`: the approximation factor. The smaller it is the more precise the result. Usual values are 0.1 and 0.01.
158/// * `min_pts`: the minimum number of nearby points required by the DBSCAN algorithm to declare an area as 'dense'.
159/// 
160/// # Return value
161/// 
162/// This function returns a vector of clusters, where each cluster is a vector of the points contained in it. Each point is stored as a vector of `f64`, 
163/// contrary to the other functions, along with the detected dimensionality.
164/// The element at index `0` is the collection of all noise points, while all the other elements are the actual clusters. 
165/// 
166/// # Example
167/// ``` rust
168/// extern crate appr_dbscan;
169/// use appr_dbscan::do_appr_dbscan_auto_dimensionality_points;
170/// 
171/// let points = vec![vec![0.0,0.0],vec![1.0,1.0],vec![0.0,1.0],vec![1.0,0.0],vec![2.0,1.0],vec![0.0,2.0],vec![2.0,1.0],vec![1.0,1.0]];
172/// let (res, dimensionality) = do_appr_dbscan_auto_dimensionality_points(points, 0.3, 0.1, 10);
173/// let clusters_count = res.len() - 1;
174/// let noise_points_count = res[0].len();
175/// ```
176/// 
177pub fn do_appr_dbscan_auto_dimensionality_points(points: Vec<VectorPoint>, epsilon: f64, rho: f64, min_pts: usize) -> (VectorDBSCANResult, usize) {
178    if points.len() == 0 {
179        return (Vec::new(),0);
180    }
181    let dimensionality = points[0].len();
182    match dimensionality {
183        0 => {panic!("There has been an error while reading the data: 0 dimensionality point found");},
184        1 => {
185            let arr_points = vector_input_to_array_input(points);
186            (array_res_to_vector_res::<1>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
187        },
188        2 => {
189            let arr_points = vector_input_to_array_input(points);
190            (array_res_to_vector_res::<2>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
191        },
192        3 => {
193            let arr_points = vector_input_to_array_input(points);
194            (array_res_to_vector_res::<3>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
195        },
196        4 => {
197            let arr_points = vector_input_to_array_input(points);
198            (array_res_to_vector_res::<4>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
199        },
200        5 => {
201            let arr_points = vector_input_to_array_input(points);
202            (array_res_to_vector_res::<5>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
203        },
204        6 => {
205            let arr_points = vector_input_to_array_input(points);
206            (array_res_to_vector_res::<6>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
207        },
208        7 => {
209            let arr_points = vector_input_to_array_input(points);
210            (array_res_to_vector_res::<7>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
211        },
212        _ => {panic!("Dimensionalities over 7 are not supported")}
213    }
214}
215