flow_utils/clustering/dbscan.rs
1//! DBSCAN clustering implementation
2
3use crate::clustering::{ClusteringError, ClusteringResult};
4use ndarray::Array2;
5
6/// Configuration for DBSCAN clustering
7#[derive(Debug, Clone)]
8pub struct DbscanConfig {
9 /// Maximum distance between samples for one to be considered in the neighborhood of the other
10 pub eps: f64,
11 /// Minimum number of samples in a neighborhood for a point to be a core point
12 pub min_samples: usize,
13}
14
15impl Default for DbscanConfig {
16 fn default() -> Self {
17 Self {
18 eps: 0.5,
19 min_samples: 5,
20 }
21 }
22}
23
24/// DBSCAN clustering result
25#[derive(Debug)]
26pub struct DbscanResult {
27 /// Cluster assignments for each point (-1 indicates noise/outlier)
28 pub assignments: Vec<i32>,
29 /// Number of clusters found
30 pub n_clusters: usize,
31 /// Number of noise points
32 pub n_noise: usize,
33}
34
35/// DBSCAN clustering
36pub struct Dbscan;
37
38impl Dbscan {
39 /// Perform DBSCAN clustering
40 ///
41 /// # Arguments
42 /// * `data` - Input data matrix (n_samples × n_features)
43 /// * `config` - Configuration for DBSCAN
44 ///
45 /// # Returns
46 /// DbscanResult with cluster assignments
47 pub fn fit(data: &Array2<f64>, _config: &DbscanConfig) -> ClusteringResult<DbscanResult> {
48 if data.nrows() == 0 {
49 return Err(ClusteringError::EmptyData);
50 }
51
52 // Use linfa-clustering for DBSCAN
53 // NOTE: DBSCAN in linfa-clustering 0.8 has trait bound issues with ParamGuard
54 // This is a known limitation - DBSCAN clustering is temporarily disabled
55 // TODO: Fix DBSCAN once linfa-clustering API is updated or use alternative implementation
56 Err(ClusteringError::ClusteringFailed(
57 "DBSCAN clustering is temporarily disabled due to linfa-clustering API limitations. \
58 Please use K-means or GMM clustering instead.".to_string()
59 ))
60
61 // Original implementation (commented out until API issue is resolved):
62 /*
63 let dataset = DatasetBase::new(data.clone(), ());
64 let model = LinfaDbscan::params(config.min_samples)
65 .tolerance(config.eps)
66 .check()
67 .map_err(|e| ClusteringError::ValidationFailed(format!("DBSCAN params validation failed: {:?}", e)))?
68 .fit(&dataset)
69 .map_err(|e| ClusteringError::ClusteringFailed(format!("{}", e)))?;
70
71 let assignments: Vec<i32> = model
72 .labels()
73 .iter()
74 .map(|&label| label as i32)
75 .collect();
76
77 let n_clusters = assignments
78 .iter()
79 .filter(|&&a| a >= 0)
80 .map(|&a| a as usize)
81 .max()
82 .map(|m| m + 1)
83 .unwrap_or(0);
84 let n_noise = assignments.iter().filter(|&&a| a == -1).count();
85
86 Ok(DbscanResult {
87 assignments,
88 n_clusters,
89 n_noise,
90 })
91 */
92 }
93}