appr_dbscan/lib.rs
1#![feature(min_const_generics)]
2pub mod utils;
3mod tree_structure;
4mod cell;
5mod core_cell;
6mod cluster;
7pub mod dbscan;
8pub mod data_io;
9
10extern crate partitions;
11extern crate rstar;
12
13use utils::*;
14use data_io::{params_from_file, read_points_from_file};
15use dbscan::approximate_dbscan;
16use std::path::{Path};
17
18/// Function that returns the result of the approximate DBSCAN algorithm
19/// executed on the set of points contained in `filename` with the given values of epsilon and rho.
20///
21/// # Arguments
22///
23/// * `filename`: the path to the file containing the data points. The file should be formatted with one point per line and the values for each coordinate should be
24/// separated by a white space. Only numerical coordinates values are accepted.
25/// * `epsilon`: the radius for the DBSCAN algorithm.
26/// * `rho`: the approximation factor. The smaller it is the more precise the result. Usual values are 0.1 and 0.01.
27/// * `min_pts`: the minimum number of nearby points required by the DBSCAN algorithm to declare an area as 'dense'.
28///
29/// # Constant argument
30///
31/// * `D`: The dimensionality of each point in the data file.
32///
33/// # Return value
34///
35/// This function returns a vector of clusters, where each cluster is a vector of the points contained in it. Each point is stored as an array of f64 (`[f64;D]`).
36/// The element at index `0` is the collection of all noise points, while all the other elements are the actual clusters.
37///
38/// # Example
39/// ``` rust
40/// extern crate appr_dbscan;
41/// use appr_dbscan::do_appr_dbscan_file;
42/// use appr_dbscan::utils::DBSCANResult;
43///
44/// let res : DBSCANResult<2> = do_appr_dbscan_file("./datasets/out_test_1.txt", 0.3, 0.1, 10);
45/// let clusters_count = res.len() - 1;
46/// let noise_points_count = res[0].len();
47/// ```
48///
49pub fn do_appr_dbscan_file<P, const D: usize>(filename: P, epsilon: f64, rho: f64, min_pts: usize) -> DBSCANResult<D>
50where P: AsRef<Path>{
51 let mut params = params_from_file(&filename);
52 if params.dimensionality != D as u32 {
53 panic!("Error: declared point dimensionality is {} but the data file contains points with {} dimensions", D, params.dimensionality);
54 }
55 params.epsilon = epsilon;
56 params.rho = rho;
57 params.min_pts = min_pts;
58 let points : Vec<Point<D>> = read_points_from_file(&filename, ¶ms);
59 let res = approximate_dbscan(points, ¶ms);
60 res
61}
62
63
64/// Function that returns the result of the approximate DBSCAN algorithm
65/// executed on the set of points contained in `points` with the given values of epsilon and rho.
66///
67/// # Arguments
68///
69/// * `points`: the vector of points to execute the algorithm on. All points must be arrays of lenght `D`
70/// * `epsilon`: the radius for the DBSCAN algorithm.
71/// * `rho`: the approximation factor. The smaller it is the more precise the result. Usual values are 0.1 and 0.01.
72/// * `min_pts`: the minimum number of nearby points required by the DBSCAN algorithm to declare an area as 'dense'.
73///
74/// # Constant argument
75///
76/// * `D`: The dimensionality of each point in the data.
77///
78/// # Return value
79///
80/// This function returns a vector of clusters, where each cluster is a vector of the points contained in it. Each point is stored as an array of f64 (``[f64;D]``).
81/// The element at index `0` is the collection of all noise points, while all the other elements are the actual clusters.
82///
83/// # Example
84/// ``` rust
85/// extern crate appr_dbscan;
86/// use appr_dbscan::do_appr_dbscan_points;
87/// use appr_dbscan::utils::DBSCANResult;
88///
89/// let points = vec![[0.0,0.0],[1.0,1.0],[0.0,1.0],[1.0,0.0],[2.0,1.0],[0.0,2.0],[2.0,1.0],[1.0,1.0]];
90/// let res : DBSCANResult<2> = do_appr_dbscan_points(points, 0.3, 0.1, 10);
91/// let clusters_count = res.len() - 1;
92/// let noise_points_count = res[0].len();
93/// ```
94///
95pub fn do_appr_dbscan_points<const D: usize>(points: Vec<Point<D>>, epsilon: f64, rho: f64, min_pts: usize) -> DBSCANResult<D> {
96 let params = DBSCANParams{
97 dimensionality: D as u32,
98 cardinality: points.len(),
99 epsilon: epsilon,
100 rho: rho,
101 min_pts: min_pts
102 };
103 let res = approximate_dbscan(points, ¶ms);
104 res
105}
106
107/// Function that returns the result of the approximate DBSCAN algorithm without prior knowledge of the points dimensionality
108///, executed on the set of points contained in `filename` with the given values of epsilon and rho.
109///
110/// # Arguments
111///
112/// * `filename`: the path to the file containing the data points. The file should be formatted with one point per line and the values for each coordinate should be
113/// separated by a white space. Only numerical coordinates values are accepted.
114/// * `epsilon`: the radius for the DBSCAN algorithm.
115/// * `rho`: the approximation factor. The smaller it is the more precise the result. Usual values are 0.1 and 0.01.
116/// * `min_pts`: the minimum number of nearby points required by the DBSCAN algorithm to declare an area as 'dense'.
117///
118/// # Return value
119///
120/// This function returns a vector of clusters, where each cluster is a vector of the points contained in it. Each point is stored as a vector of `f64`,
121/// contrary to the other functions, along with the detected dimensionality of the points inside.
122/// The element at index `0` is the collection of all noise points, while all the other elements are the actual clusters.
123///
124/// # Example
125/// ``` rust
126/// extern crate appr_dbscan;
127/// use appr_dbscan::do_appr_dbscan_auto_dimensionality_file;
128///
129/// let (res,dimensionality) = do_appr_dbscan_auto_dimensionality_file("./datasets/out_test_1.txt", 0.3, 0.1, 10);
130/// let clusters_count = res.len() - 1;
131/// let noise_points_count = res[0].len();
132/// ```
133///
134pub fn do_appr_dbscan_auto_dimensionality_file<P>(filename: P, epsilon: f64, rho: f64, min_pts: usize) -> (VectorDBSCANResult, usize)
135where P: AsRef<Path>{
136 let params = params_from_file(&filename);
137 match params.dimensionality {
138 0 => {panic!("There has been an error while reading the data: 0 dimensionality point found");},
139 1 => (array_res_to_vector_res::<1>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
140 2 => (array_res_to_vector_res::<2>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
141 3 => (array_res_to_vector_res::<3>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
142 4 => (array_res_to_vector_res::<4>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
143 5 => (array_res_to_vector_res::<5>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
144 6 => (array_res_to_vector_res::<6>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
145 7 => (array_res_to_vector_res::<7>(do_appr_dbscan_file(filename, epsilon, rho, min_pts)),params.dimensionality as usize),
146 _ => {panic!("Dimensionalities over 7 are not supported")}
147 }
148}
149
150/// Function that returns the result of the approximate DBSCAN algorithm without prior knowledge of the points dimensionality
151///, executed on the set of points contained in vector `points` with the given values of `epsilon`, `rho` and `min_pts`.
152///
153/// # Arguments
154///
155/// * `points`: the vector of points to execute the algorithm on. All points must be vectors of the same length in order to be points from the same space.
156/// * `epsilon`: the radius for the DBSCAN algorithm.
157/// * `rho`: the approximation factor. The smaller it is the more precise the result. Usual values are 0.1 and 0.01.
158/// * `min_pts`: the minimum number of nearby points required by the DBSCAN algorithm to declare an area as 'dense'.
159///
160/// # Return value
161///
162/// This function returns a vector of clusters, where each cluster is a vector of the points contained in it. Each point is stored as a vector of `f64`,
163/// contrary to the other functions, along with the detected dimensionality.
164/// The element at index `0` is the collection of all noise points, while all the other elements are the actual clusters.
165///
166/// # Example
167/// ``` rust
168/// extern crate appr_dbscan;
169/// use appr_dbscan::do_appr_dbscan_auto_dimensionality_points;
170///
171/// let points = vec![vec![0.0,0.0],vec![1.0,1.0],vec![0.0,1.0],vec![1.0,0.0],vec![2.0,1.0],vec![0.0,2.0],vec![2.0,1.0],vec![1.0,1.0]];
172/// let (res, dimensionality) = do_appr_dbscan_auto_dimensionality_points(points, 0.3, 0.1, 10);
173/// let clusters_count = res.len() - 1;
174/// let noise_points_count = res[0].len();
175/// ```
176///
177pub fn do_appr_dbscan_auto_dimensionality_points(points: Vec<VectorPoint>, epsilon: f64, rho: f64, min_pts: usize) -> (VectorDBSCANResult, usize) {
178 if points.len() == 0 {
179 return (Vec::new(),0);
180 }
181 let dimensionality = points[0].len();
182 match dimensionality {
183 0 => {panic!("There has been an error while reading the data: 0 dimensionality point found");},
184 1 => {
185 let arr_points = vector_input_to_array_input(points);
186 (array_res_to_vector_res::<1>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
187 },
188 2 => {
189 let arr_points = vector_input_to_array_input(points);
190 (array_res_to_vector_res::<2>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
191 },
192 3 => {
193 let arr_points = vector_input_to_array_input(points);
194 (array_res_to_vector_res::<3>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
195 },
196 4 => {
197 let arr_points = vector_input_to_array_input(points);
198 (array_res_to_vector_res::<4>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
199 },
200 5 => {
201 let arr_points = vector_input_to_array_input(points);
202 (array_res_to_vector_res::<5>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
203 },
204 6 => {
205 let arr_points = vector_input_to_array_input(points);
206 (array_res_to_vector_res::<6>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
207 },
208 7 => {
209 let arr_points = vector_input_to_array_input(points);
210 (array_res_to_vector_res::<7>(do_appr_dbscan_points(arr_points, epsilon, rho, min_pts)), dimensionality)
211 },
212 _ => {panic!("Dimensionalities over 7 are not supported")}
213 }
214}
215