Skip to main content

flow_utils/clustering/
dbscan.rs

1//! DBSCAN clustering implementation
2
3use crate::clustering::{ClusteringError, ClusteringResult};
4use ndarray::Array2;
5
6/// Configuration for DBSCAN clustering
7#[derive(Debug, Clone)]
8pub struct DbscanConfig {
9    /// Maximum distance between samples for one to be considered in the neighborhood of the other
10    pub eps: f64,
11    /// Minimum number of samples in a neighborhood for a point to be a core point
12    pub min_samples: usize,
13}
14
15impl Default for DbscanConfig {
16    fn default() -> Self {
17        Self {
18            eps: 0.5,
19            min_samples: 5,
20        }
21    }
22}
23
24/// DBSCAN clustering result
25#[derive(Debug)]
26pub struct DbscanResult {
27    /// Cluster assignments for each point (-1 indicates noise/outlier)
28    pub assignments: Vec<i32>,
29    /// Number of clusters found
30    pub n_clusters: usize,
31    /// Number of noise points
32    pub n_noise: usize,
33}
34
35/// DBSCAN clustering
36pub struct Dbscan;
37
38impl Dbscan {
39    /// Perform DBSCAN clustering
40    ///
41    /// # Arguments
42    /// * `data` - Input data matrix (n_samples × n_features)
43    /// * `config` - Configuration for DBSCAN
44    ///
45    /// # Returns
46    /// DbscanResult with cluster assignments
47    pub fn fit(data: &Array2<f64>, _config: &DbscanConfig) -> ClusteringResult<DbscanResult> {
48        if data.nrows() == 0 {
49            return Err(ClusteringError::EmptyData);
50        }
51
52        // Use linfa-clustering for DBSCAN
53        // NOTE: DBSCAN in linfa-clustering 0.8 has trait bound issues with ParamGuard
54        // This is a known limitation - DBSCAN clustering is temporarily disabled
55        // TODO: Fix DBSCAN once linfa-clustering API is updated or use alternative implementation
56        Err(ClusteringError::ClusteringFailed(
57            "DBSCAN clustering is temporarily disabled due to linfa-clustering API limitations. \
58             Please use K-means or GMM clustering instead.".to_string()
59        ))
60        
61        // Original implementation (commented out until API issue is resolved):
62        /*
63        let dataset = DatasetBase::new(data.clone(), ());
64        let model = LinfaDbscan::params(config.min_samples)
65            .tolerance(config.eps)
66            .check()
67            .map_err(|e| ClusteringError::ValidationFailed(format!("DBSCAN params validation failed: {:?}", e)))?
68            .fit(&dataset)
69            .map_err(|e| ClusteringError::ClusteringFailed(format!("{}", e)))?;
70        
71        let assignments: Vec<i32> = model
72            .labels()
73            .iter()
74            .map(|&label| label as i32)
75            .collect();
76        
77        let n_clusters = assignments
78            .iter()
79            .filter(|&&a| a >= 0)
80            .map(|&a| a as usize)
81            .max()
82            .map(|m| m + 1)
83            .unwrap_or(0);
84        let n_noise = assignments.iter().filter(|&&a| a == -1).count();
85        
86        Ok(DbscanResult {
87            assignments,
88            n_clusters,
89            n_noise,
90        })
91        */
92    }
93}