Skip to main content

flow_clustering/clustering/
dbscan.rs

1//! DBSCAN clustering implementation
2
3use crate::clustering::{ClusteringError, ClusteringResult};
4use ndarray::Array2;
5
6/// Configuration for DBSCAN clustering
7#[derive(Debug, Clone)]
8pub struct DbscanConfig {
9    /// Maximum distance between samples for one to be considered in the neighborhood of the other
10    pub eps: f64,
11    /// Minimum number of samples in a neighborhood for a point to be a core point
12    pub min_samples: usize,
13}
14
15impl Default for DbscanConfig {
16    fn default() -> Self {
17        Self {
18            eps: 0.5,
19            min_samples: 5,
20        }
21    }
22}
23
24/// DBSCAN clustering result
25#[derive(Debug, Clone)]
26pub struct DbscanResult {
27    /// Cluster assignments for each point (-1 indicates noise/outlier)
28    pub assignments: Vec<i32>,
29    /// Number of clusters found
30    pub n_clusters: usize,
31    /// Number of noise points
32    pub n_noise: usize,
33}
34
35/// DBSCAN clustering
36pub struct Dbscan;
37
38impl Dbscan {
39    /// Perform DBSCAN clustering
40    ///
41    /// # Arguments
42    /// * `data` - Input data matrix (n_samples × n_features)
43    /// * `config` - Configuration for DBSCAN
44    ///
45    /// # Returns
46    /// DbscanResult with cluster assignments
47    pub fn fit(data: &Array2<f64>, _config: &DbscanConfig) -> ClusteringResult<DbscanResult> {
48        if data.nrows() == 0 {
49            return Err(ClusteringError::EmptyData);
50        }
51
52        // Use linfa-clustering for DBSCAN
53        // NOTE: DBSCAN in linfa-clustering 0.8 has trait bound issues with ParamGuard
54        // This is a known limitation - DBSCAN clustering is temporarily disabled
55        // TODO: Fix DBSCAN once linfa-clustering API is updated or use alternative implementation
56        Err(ClusteringError::ClusteringFailed(
57            "DBSCAN clustering is temporarily disabled due to linfa-clustering API limitations. \
58             Please use K-means or GMM clustering instead."
59                .to_string(),
60        ))
61
62        // Original implementation (commented out until API issue is resolved):
63        /*
64        let dataset = DatasetBase::new(data.clone(), ());
65        let model = LinfaDbscan::params(config.min_samples)
66            .tolerance(config.eps)
67            .check()
68            .map_err(|e| ClusteringError::ValidationFailed(format!("DBSCAN params validation failed: {:?}", e)))?
69            .fit(&dataset)
70            .map_err(|e| ClusteringError::ClusteringFailed(format!("{}", e)))?;
71
72        let assignments: Vec<i32> = model
73            .labels()
74            .iter()
75            .map(|&label| label as i32)
76            .collect();
77
78        let n_clusters = assignments
79            .iter()
80            .filter(|&&a| a >= 0)
81            .map(|&a| a as usize)
82            .max()
83            .map(|m| m + 1)
84            .unwrap_or(0);
85        let n_noise = assignments.iter().filter(|&&a| a == -1).count();
86
87        Ok(DbscanResult {
88            assignments,
89            n_clusters,
90            n_noise,
91        })
92        */
93    }
94}