flow_clustering/clustering/dbscan.rs
1//! DBSCAN clustering implementation
2
3use crate::clustering::{ClusteringError, ClusteringResult};
4use ndarray::Array2;
5
6/// Configuration for DBSCAN clustering
7#[derive(Debug, Clone)]
8pub struct DbscanConfig {
9 /// Maximum distance between samples for one to be considered in the neighborhood of the other
10 pub eps: f64,
11 /// Minimum number of samples in a neighborhood for a point to be a core point
12 pub min_samples: usize,
13}
14
15impl Default for DbscanConfig {
16 fn default() -> Self {
17 Self {
18 eps: 0.5,
19 min_samples: 5,
20 }
21 }
22}
23
24/// DBSCAN clustering result
25#[derive(Debug, Clone)]
26pub struct DbscanResult {
27 /// Cluster assignments for each point (-1 indicates noise/outlier)
28 pub assignments: Vec<i32>,
29 /// Number of clusters found
30 pub n_clusters: usize,
31 /// Number of noise points
32 pub n_noise: usize,
33}
34
35/// DBSCAN clustering
36pub struct Dbscan;
37
38impl Dbscan {
39 /// Perform DBSCAN clustering
40 ///
41 /// # Arguments
42 /// * `data` - Input data matrix (n_samples × n_features)
43 /// * `config` - Configuration for DBSCAN
44 ///
45 /// # Returns
46 /// DbscanResult with cluster assignments
47 pub fn fit(data: &Array2<f64>, _config: &DbscanConfig) -> ClusteringResult<DbscanResult> {
48 if data.nrows() == 0 {
49 return Err(ClusteringError::EmptyData);
50 }
51
52 // Use linfa-clustering for DBSCAN
53 // NOTE: DBSCAN in linfa-clustering 0.8 has trait bound issues with ParamGuard
54 // This is a known limitation - DBSCAN clustering is temporarily disabled
55 // TODO: Fix DBSCAN once linfa-clustering API is updated or use alternative implementation
56 Err(ClusteringError::ClusteringFailed(
57 "DBSCAN clustering is temporarily disabled due to linfa-clustering API limitations. \
58 Please use K-means or GMM clustering instead."
59 .to_string(),
60 ))
61
62 // Original implementation (commented out until API issue is resolved):
63 /*
64 let dataset = DatasetBase::new(data.clone(), ());
65 let model = LinfaDbscan::params(config.min_samples)
66 .tolerance(config.eps)
67 .check()
68 .map_err(|e| ClusteringError::ValidationFailed(format!("DBSCAN params validation failed: {:?}", e)))?
69 .fit(&dataset)
70 .map_err(|e| ClusteringError::ClusteringFailed(format!("{}", e)))?;
71
72 let assignments: Vec<i32> = model
73 .labels()
74 .iter()
75 .map(|&label| label as i32)
76 .collect();
77
78 let n_clusters = assignments
79 .iter()
80 .filter(|&&a| a >= 0)
81 .map(|&a| a as usize)
82 .max()
83 .map(|m| m + 1)
84 .unwrap_or(0);
85 let n_noise = assignments.iter().filter(|&&a| a == -1).count();
86
87 Ok(DbscanResult {
88 assignments,
89 n_clusters,
90 n_noise,
91 })
92 */
93 }
94}