use crate::error::{ClusterError, ClusterResult};
use crate::traits::{
AlgorithmComplexity, ClusteringAlgorithm, ClusteringConfig, ClusteringResult,
DensityBasedClustering, Fit, FitPredict, MemoryPattern,
};
use crate::utils::validation::validate_cluster_input;
use scirs2_core::ndarray::{Array2, ArrayView1};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::collections::{BinaryHeap, HashMap};
use torsh_tensor::Tensor;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct OPTICSConfig {
pub max_eps: f64,
pub min_samples: usize,
pub metric: String,
pub xi: Option<f64>,
pub predecessor_correction: bool,
}
impl Default for OPTICSConfig {
fn default() -> Self {
Self {
max_eps: f64::INFINITY,
min_samples: 5,
metric: "euclidean".to_string(),
xi: Some(0.05),
predecessor_correction: true,
}
}
}
impl ClusteringConfig for OPTICSConfig {
fn validate(&self) -> ClusterResult<()> {
if self.max_eps <= 0.0 {
return Err(ClusterError::ConfigError(
"max_eps must be positive".to_string(),
));
}
if self.min_samples == 0 {
return Err(ClusterError::ConfigError(
"min_samples must be positive".to_string(),
));
}
if let Some(xi) = self.xi {
if xi <= 0.0 || xi >= 1.0 {
return Err(ClusterError::ConfigError(
"xi must be between 0 and 1".to_string(),
));
}
}
Ok(())
}
fn default() -> Self {
<OPTICSConfig as std::default::Default>::default()
}
fn merge(&mut self, other: &Self) {
let default_config = <OPTICSConfig as std::default::Default>::default();
if other.max_eps != default_config.max_eps {
self.max_eps = other.max_eps;
}
if other.min_samples != default_config.min_samples {
self.min_samples = other.min_samples;
}
if other.xi != default_config.xi {
self.xi = other.xi;
}
}
}
#[derive(Debug, Clone)]
pub struct OPTICSResult {
pub labels: Tensor,
pub reachability: Tensor,
pub core_distances: Tensor,
pub ordering: Vec<usize>,
pub predecessor: Vec<Option<usize>>,
pub n_clusters: usize,
}
impl ClusteringResult for OPTICSResult {
fn labels(&self) -> &Tensor {
&self.labels
}
fn n_clusters(&self) -> usize {
self.n_clusters
}
fn n_iter(&self) -> Option<usize> {
Some(self.ordering.len())
}
fn metadata(&self) -> Option<&HashMap<String, String>> {
None
}
}
#[derive(Debug, Clone)]
struct SeedPoint {
index: usize,
reachability_dist: f64,
}
impl PartialEq for SeedPoint {
fn eq(&self, other: &Self) -> bool {
self.reachability_dist == other.reachability_dist
}
}
impl Eq for SeedPoint {}
impl PartialOrd for SeedPoint {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for SeedPoint {
fn cmp(&self, other: &Self) -> Ordering {
other
.reachability_dist
.partial_cmp(&self.reachability_dist)
.unwrap_or(Ordering::Equal)
}
}
#[derive(Debug, Clone)]
pub struct OPTICS {
config: OPTICSConfig,
fitted: bool,
}
impl OPTICS {
pub fn new(max_eps: f64, min_samples: usize) -> Self {
Self {
config: OPTICSConfig {
max_eps,
min_samples,
..Default::default()
},
fitted: false,
}
}
pub fn with_config(config: OPTICSConfig) -> Self {
Self {
config,
fitted: false,
}
}
pub fn metric(mut self, metric: impl Into<String>) -> Self {
self.config.metric = metric.into();
self
}
pub fn xi(mut self, xi: f64) -> Self {
self.config.xi = Some(xi);
self
}
pub fn predecessor_correction(mut self, enabled: bool) -> Self {
self.config.predecessor_correction = enabled;
self
}
fn euclidean_distance(&self, p1: &ArrayView1<f64>, p2: &ArrayView1<f64>) -> f64 {
let diff = p1 - p2;
diff.iter().map(|x| x * x).sum::<f64>().sqrt()
}
fn manhattan_distance(&self, p1: &ArrayView1<f64>, p2: &ArrayView1<f64>) -> f64 {
let diff = p1 - p2;
diff.iter().map(|x| x.abs()).sum()
}
fn compute_distance(&self, p1: &ArrayView1<f64>, p2: &ArrayView1<f64>) -> f64 {
match self.config.metric.as_str() {
"euclidean" => self.euclidean_distance(p1, p2),
"manhattan" => self.manhattan_distance(p1, p2),
_ => self.euclidean_distance(p1, p2), }
}
fn get_neighbors(&self, data: &Array2<f64>, point_idx: usize) -> Vec<usize> {
let point = data.row(point_idx);
let mut neighbors = Vec::new();
for i in 0..data.nrows() {
if i != point_idx {
let dist = self.compute_distance(&point, &data.row(i));
if dist <= self.config.max_eps {
neighbors.push(i);
}
}
}
neighbors
}
fn compute_core_distance(&self, data: &Array2<f64>, point_idx: usize) -> f64 {
let point = data.row(point_idx);
let mut distances: Vec<f64> = Vec::new();
for i in 0..data.nrows() {
if i != point_idx {
let dist = self.compute_distance(&point, &data.row(i));
if dist <= self.config.max_eps {
distances.push(dist);
}
}
}
distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
if distances.len() < self.config.min_samples {
f64::INFINITY
} else {
distances[self.config.min_samples - 1]
}
}
fn compute_reachability_distance(&self, core_dist: f64, actual_dist: f64) -> f64 {
core_dist.max(actual_dist)
}
fn update_seeds(
&self,
seeds: &mut BinaryHeap<SeedPoint>,
neighbors: &[usize],
center_idx: usize,
core_dist: f64,
data: &Array2<f64>,
processed: &[bool],
reachability: &mut [f64],
) {
let center_point = data.row(center_idx);
for &neighbor_idx in neighbors {
if processed[neighbor_idx] {
continue;
}
let neighbor_point = data.row(neighbor_idx);
let dist = self.compute_distance(¢er_point, &neighbor_point);
let new_reach_dist = self.compute_reachability_distance(core_dist, dist);
if reachability[neighbor_idx] == f64::INFINITY {
reachability[neighbor_idx] = new_reach_dist;
seeds.push(SeedPoint {
index: neighbor_idx,
reachability_dist: new_reach_dist,
});
} else if new_reach_dist < reachability[neighbor_idx] {
reachability[neighbor_idx] = new_reach_dist;
seeds.push(SeedPoint {
index: neighbor_idx,
reachability_dist: new_reach_dist,
});
}
}
}
fn extract_clusters_xi(
&self,
ordering: &[usize],
reachability: &[f64],
n_samples: usize,
) -> Vec<i32> {
let mut labels = vec![-1i32; n_samples];
if let Some(xi) = self.config.xi {
let mut cluster_id = 0i32;
let mut in_cluster = vec![false; ordering.len()];
for i in 1..ordering.len() - 1 {
let prev_reach = reachability[i - 1];
let curr_reach = reachability[i];
let next_reach = reachability[i + 1];
if prev_reach != f64::INFINITY
&& curr_reach != f64::INFINITY
&& curr_reach < prev_reach * (1.0 - xi)
{
cluster_id += 1;
in_cluster[i] = true;
labels[ordering[i]] = cluster_id;
}
else if in_cluster[i - 1]
&& curr_reach != f64::INFINITY
&& curr_reach < prev_reach * (1.0 + xi)
{
in_cluster[i] = true;
labels[ordering[i]] = cluster_id;
}
else if in_cluster[i - 1]
&& next_reach != f64::INFINITY
&& next_reach > curr_reach * (1.0 + xi)
{
in_cluster[i] = false;
}
}
}
labels
}
fn run_optics(&self, data: &Array2<f64>) -> ClusterResult<OPTICSResult> {
let n_samples = data.nrows();
let mut ordering = Vec::with_capacity(n_samples);
let mut reachability = vec![f64::INFINITY; n_samples];
let mut core_distances = vec![f64::INFINITY; n_samples];
let mut processed = vec![false; n_samples];
let predecessor = vec![None; n_samples];
for i in 0..n_samples {
core_distances[i] = self.compute_core_distance(data, i);
}
for start_idx in 0..n_samples {
if processed[start_idx] {
continue;
}
ordering.push(start_idx);
processed[start_idx] = true;
reachability[start_idx] = f64::INFINITY;
let mut seeds = BinaryHeap::new();
let neighbors = self.get_neighbors(data, start_idx);
if core_distances[start_idx] != f64::INFINITY {
self.update_seeds(
&mut seeds,
&neighbors,
start_idx,
core_distances[start_idx],
data,
&processed,
&mut reachability,
);
}
while let Some(seed) = seeds.pop() {
let current_idx = seed.index;
if processed[current_idx] {
continue;
}
ordering.push(current_idx);
processed[current_idx] = true;
let neighbors = self.get_neighbors(data, current_idx);
if core_distances[current_idx] != f64::INFINITY {
self.update_seeds(
&mut seeds,
&neighbors,
current_idx,
core_distances[current_idx],
data,
&processed,
&mut reachability,
);
}
}
}
let labels = self.extract_clusters_xi(&ordering, &reachability, n_samples);
let n_clusters = labels.iter().filter(|&&l| l >= 0).max().unwrap_or(&-1) + 1;
let labels_tensor = array1_i32_to_tensor(&labels)?;
let reachability_ordered: Vec<f64> = ordering.iter().map(|&i| reachability[i]).collect();
let reachability_tensor = array1_to_tensor(&reachability_ordered)?;
let core_distances_tensor = array1_to_tensor(&core_distances)?;
Ok(OPTICSResult {
labels: labels_tensor,
reachability: reachability_tensor,
core_distances: core_distances_tensor,
ordering,
predecessor,
n_clusters: n_clusters as usize,
})
}
}
impl ClusteringAlgorithm for OPTICS {
fn name(&self) -> &str {
"OPTICS"
}
fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert("max_eps".to_string(), self.config.max_eps.to_string());
params.insert(
"min_samples".to_string(),
self.config.min_samples.to_string(),
);
params.insert("metric".to_string(), self.config.metric.clone());
if let Some(xi) = self.config.xi {
params.insert("xi".to_string(), xi.to_string());
}
params.insert(
"predecessor_correction".to_string(),
self.config.predecessor_correction.to_string(),
);
params
}
fn set_params(&mut self, params: HashMap<String, String>) -> ClusterResult<()> {
for (key, value) in params {
match key.as_str() {
"max_eps" => {
self.config.max_eps = value.parse().map_err(|_| {
ClusterError::ConfigError(format!("Invalid max_eps: {}", value))
})?;
}
"min_samples" => {
self.config.min_samples = value.parse().map_err(|_| {
ClusterError::ConfigError(format!("Invalid min_samples: {}", value))
})?;
}
"metric" => {
self.config.metric = value;
}
"xi" => {
let xi: f64 = value
.parse()
.map_err(|_| ClusterError::ConfigError(format!("Invalid xi: {}", value)))?;
self.config.xi = Some(xi);
}
_ => {
return Err(ClusterError::ConfigError(format!(
"Unknown parameter: {}",
key
)));
}
}
}
self.config.validate()?;
Ok(())
}
fn is_fitted(&self) -> bool {
self.fitted
}
fn complexity_info(&self) -> AlgorithmComplexity {
AlgorithmComplexity {
time_complexity: "O(n²)".to_string(),
space_complexity: "O(n)".to_string(),
deterministic: true,
online_capable: false,
memory_pattern: MemoryPattern::Linear,
}
}
fn supported_distance_metrics(&self) -> Vec<&str> {
vec!["euclidean", "manhattan"]
}
}
impl Fit for OPTICS {
type Result = OPTICSResult;
fn fit(&self, data: &Tensor) -> ClusterResult<Self::Result> {
self.validate_input(data)?;
validate_cluster_input(data)?;
self.config.validate()?;
let data_array = tensor_to_array2(data)?;
self.run_optics(&data_array)
}
}
impl FitPredict for OPTICS {
type Result = OPTICSResult;
fn fit_predict(&self, data: &Tensor) -> ClusterResult<Self::Result> {
self.fit(data)
}
}
impl DensityBasedClustering for OPTICS {
fn core_points(&self) -> Option<&Tensor> {
None }
fn noise_points(&self) -> Option<&Tensor> {
None }
fn density_estimates(&self, data: &Tensor) -> ClusterResult<Tensor> {
let data_array = tensor_to_array2(data)?;
let n_samples = data_array.nrows();
let mut densities = vec![0.0; n_samples];
for i in 0..n_samples {
let core_dist = self.compute_core_distance(&data_array, i);
if core_dist == f64::INFINITY {
densities[i] = 0.0; } else {
densities[i] = 1.0 / (core_dist + 1e-10); }
}
array1_to_tensor(&densities)
}
}
fn tensor_to_array2(tensor: &Tensor) -> ClusterResult<Array2<f64>> {
let tensor_shape = tensor.shape();
let shape = tensor_shape.dims();
if shape.len() != 2 {
return Err(ClusterError::InvalidInput("Expected 2D tensor".to_string()));
}
let data_f32: Vec<f32> = tensor.to_vec().map_err(ClusterError::TensorError)?;
let data: Vec<f64> = data_f32.into_iter().map(|x| x as f64).collect();
Array2::from_shape_vec((shape[0], shape[1]), data)
.map_err(|_| ClusterError::InvalidInput("Failed to convert tensor to array".to_string()))
}
fn array1_to_tensor(array: &[f64]) -> ClusterResult<Tensor> {
let len = array.len();
let data: Vec<f32> = array.iter().map(|&x| x as f32).collect();
Tensor::from_vec(data, &[len]).map_err(ClusterError::TensorError)
}
fn array1_i32_to_tensor(array: &[i32]) -> ClusterResult<Tensor> {
let len = array.len();
let data: Vec<f32> = array.iter().map(|&x| x as f32).collect();
Tensor::from_vec(data, &[len]).map_err(ClusterError::TensorError)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_optics_basic() {
let data = vec![
0.0, 0.0, 0.1, 0.1, 0.0, 0.2, 0.2, 0.0, 0.1, 0.2, 5.0, 5.0, 5.1, 5.1, 5.0, 5.2, 5.2, 5.0, 5.1, 5.2,
];
let tensor = Tensor::from_vec(data, &[10, 2]).expect("test tensor creation should succeed");
let optics = OPTICS::new(0.5, 2);
let result = optics.fit(&tensor).expect("OPTICS fit should succeed");
assert!(result.n_clusters >= 1);
assert_eq!(result.ordering.len(), 10);
}
#[test]
fn test_optics_config_validation() {
let mut config = <OPTICSConfig as std::default::Default>::default();
assert!(config.validate().is_ok());
config.max_eps = -1.0;
assert!(config.validate().is_err());
config.max_eps = 1.0;
config.min_samples = 0;
assert!(config.validate().is_err());
config.min_samples = 5;
config.xi = Some(1.5);
assert!(config.validate().is_err());
config.xi = Some(0.05);
assert!(config.validate().is_ok());
}
#[test]
fn test_optics_algorithm_properties() {
let optics = OPTICS::new(0.5, 5);
assert_eq!(optics.name(), "OPTICS");
assert!(!optics.is_fitted());
let params = optics.get_params();
assert!(params.contains_key("max_eps"));
assert!(params.contains_key("min_samples"));
let complexity = optics.complexity_info();
assert_eq!(complexity.time_complexity, "O(n²)");
assert!(complexity.deterministic);
}
#[test]
fn test_optics_reachability_ordering() {
let data = vec![0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 5.0, 5.0, 5.1, 5.0, 5.0, 5.1];
let tensor = Tensor::from_vec(data, &[6, 2]).expect("test tensor creation should succeed");
let optics = OPTICS::new(1.0, 2);
let result = optics.fit(&tensor).expect("OPTICS fit should succeed");
assert_eq!(result.ordering.len(), 6);
assert_eq!(result.reachability.shape().dims()[0], 6);
}
}