use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
use scirs2_core::numeric::{Float, FromPrimitive};
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::fmt::Debug;
use crate::error::{ClusteringError, Result};
use super::distance;
use super::DistanceMetric;
#[derive(Debug, Clone)]
struct OPTICSPoint {
core_distance: Option<f64>,
reachability_distance: Option<f64>,
processed: bool,
}
#[derive(Debug, Clone, PartialEq)]
struct PriorityQueueElement {
point_index: usize,
reachability_distance: f64,
}
impl Eq for PriorityQueueElement {}
impl PartialOrd for PriorityQueueElement {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for PriorityQueueElement {
fn cmp(&self, other: &Self) -> Ordering {
other
.reachability_distance
.partial_cmp(&self.reachability_distance)
.unwrap_or(Ordering::Equal)
}
}
#[derive(Debug, Clone)]
pub struct OPTICSOptions {
pub min_samples: usize,
pub max_eps: Option<f64>,
pub metric: DistanceMetric,
pub min_cluster_size: Option<usize>,
pub xi: Option<f64>,
pub predecessor_correction: bool,
}
impl Default for OPTICSOptions {
fn default() -> Self {
Self {
min_samples: 5,
max_eps: None,
metric: DistanceMetric::Euclidean,
min_cluster_size: None,
xi: None,
predecessor_correction: true,
}
}
}
#[derive(Debug, Clone)]
pub struct OPTICSResult {
pub ordering: Vec<usize>,
pub reachability: Vec<Option<f64>>,
pub core_distances: Vec<Option<f64>>,
pub predecessor: Vec<Option<usize>>,
}
#[derive(Debug, Clone)]
struct SteepArea {
start: usize,
end: usize,
is_down: bool,
max_reachability: f64,
}
#[derive(Debug, Clone)]
pub struct XiCluster {
pub start: usize,
pub end: usize,
}
pub fn extract_dbscan_clustering(result: &OPTICSResult, eps: f64) -> Array1<i32> {
let n_samples = result.ordering.len();
let mut labels = vec![-1i32; n_samples];
let mut cluster_label: i32 = 0;
for i in 0..n_samples {
let point_idx = result.ordering[i];
let reachability = result.reachability[i];
let reach_exceeds = match reachability {
Some(r) => r > eps,
None => true,
};
if reach_exceeds {
if let Some(core_dist) = result.core_distances[point_idx] {
if core_dist <= eps {
labels[point_idx] = cluster_label;
cluster_label += 1;
}
}
} else {
if i > 0 {
let prev_point_idx = result.ordering[i - 1];
if labels[prev_point_idx] >= 0 {
labels[point_idx] = labels[prev_point_idx];
} else if let Some(pred) = result.predecessor[point_idx] {
if labels[pred] >= 0 {
labels[point_idx] = labels[pred];
} else {
labels[point_idx] = cluster_label;
cluster_label += 1;
}
}
}
}
}
Array1::from(labels)
}
pub fn optics<F: Float + FromPrimitive + Debug + PartialOrd>(
data: ArrayView2<F>,
min_samples: usize,
max_eps: Option<F>,
metric: Option<DistanceMetric>,
) -> Result<OPTICSResult> {
let n_samples = data.shape()[0];
if n_samples == 0 {
return Err(ClusteringError::InvalidInput("Empty input data".into()));
}
if min_samples < 2 {
return Err(ClusteringError::InvalidInput(
"min_samples must be at least 2".into(),
));
}
let max_eps_f64 = match max_eps {
Some(eps) => {
if eps <= F::zero() {
return Err(ClusteringError::InvalidInput(
"max_eps must be positive".into(),
));
}
eps.to_f64().ok_or_else(|| {
ClusteringError::ComputationError("Failed to convert max_eps to f64".into())
})?
}
None => f64::INFINITY,
};
let metric = metric.unwrap_or(DistanceMetric::Euclidean);
let mut optics_points: Vec<OPTICSPoint> = (0..n_samples)
.map(|_| OPTICSPoint {
core_distance: None,
reachability_distance: None,
processed: false,
})
.collect();
let mut ordering = Vec::with_capacity(n_samples);
let mut reachability = Vec::with_capacity(n_samples);
let mut predecessor = vec![None; n_samples];
let distance_matrix = compute_distance_matrix(&data, metric)?;
for point_idx in 0..n_samples {
if optics_points[point_idx].processed {
continue;
}
let neighbors = get_neighbors(point_idx, &distance_matrix, max_eps_f64);
optics_points[point_idx].processed = true;
let core_distance =
compute_core_distance(point_idx, &neighbors, &distance_matrix, min_samples);
optics_points[point_idx].core_distance = core_distance;
ordering.push(point_idx);
reachability.push(optics_points[point_idx].reachability_distance);
if let Some(core_dist) = core_distance {
let mut seeds = BinaryHeap::new();
update_seeds(
point_idx,
&neighbors,
&mut seeds,
&mut optics_points,
&distance_matrix,
core_dist,
&mut predecessor,
);
while let Some(element) = seeds.pop() {
let current_idx = element.point_index;
if optics_points[current_idx].processed {
continue;
}
let current_neighbors = get_neighbors(current_idx, &distance_matrix, max_eps_f64);
optics_points[current_idx].processed = true;
ordering.push(current_idx);
reachability.push(Some(element.reachability_distance));
let current_core_dist = compute_core_distance(
current_idx,
¤t_neighbors,
&distance_matrix,
min_samples,
);
optics_points[current_idx].core_distance = current_core_dist;
if let Some(core_dist) = current_core_dist {
update_seeds(
current_idx,
¤t_neighbors,
&mut seeds,
&mut optics_points,
&distance_matrix,
core_dist,
&mut predecessor,
);
}
}
}
}
let core_distances = optics_points.iter().map(|p| p.core_distance).collect();
Ok(OPTICSResult {
ordering,
reachability,
core_distances,
predecessor,
})
}
pub fn optics_with_options<F: Float + FromPrimitive + Debug + PartialOrd>(
data: ArrayView2<F>,
options: &OPTICSOptions,
) -> Result<OPTICSResult> {
let max_eps = options
.max_eps
.map(|e| F::from(e).unwrap_or_else(|| F::from(f64::INFINITY).unwrap_or(F::max_value())));
optics(data, options.min_samples, max_eps, Some(options.metric))
}
fn compute_distance_matrix<F: Float + FromPrimitive + Debug>(
data: &ArrayView2<F>,
metric: DistanceMetric,
) -> Result<Array2<f64>> {
let n_samples = data.shape()[0];
let mut distance_matrix = Array2::<f64>::zeros((n_samples, n_samples));
for i in 0..n_samples {
for j in (i + 1)..n_samples {
let point1: Vec<F> = data.row(i).to_vec();
let point2: Vec<F> = data.row(j).to_vec();
let dist_f = match metric {
DistanceMetric::Euclidean => distance::euclidean(&point1, &point2),
DistanceMetric::Manhattan => distance::manhattan(&point1, &point2),
DistanceMetric::Chebyshev => distance::chebyshev(&point1, &point2),
DistanceMetric::Minkowski => {
let p = F::from(3.0).ok_or_else(|| {
ClusteringError::ComputationError(
"Failed to convert Minkowski exponent".into(),
)
})?;
distance::minkowski(&point1, &point2, p)
}
};
let dist = dist_f.to_f64().ok_or_else(|| {
ClusteringError::ComputationError("Failed to convert distance to f64".into())
})?;
distance_matrix[[i, j]] = dist;
distance_matrix[[j, i]] = dist;
}
}
Ok(distance_matrix)
}
fn compute_core_distance(
_point_idx: usize,
neighbors: &[usize],
distance_matrix: &Array2<f64>,
min_samples: usize,
) -> Option<f64> {
if neighbors.len() + 1 < min_samples {
return None;
}
let mut distances: Vec<f64> = neighbors
.iter()
.map(|&n| distance_matrix[[_point_idx, n]])
.collect();
distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
if min_samples >= 2 && distances.len() >= min_samples - 1 {
Some(distances[min_samples - 2])
} else if !distances.is_empty() {
Some(distances[distances.len() - 1])
} else {
None
}
}
fn get_neighbors(point_idx: usize, distance_matrix: &Array2<f64>, max_eps: f64) -> Vec<usize> {
let n_samples = distance_matrix.shape()[0];
let mut neighbors = Vec::new();
for j in 0..n_samples {
if point_idx != j && distance_matrix[[point_idx, j]] <= max_eps {
neighbors.push(j);
}
}
neighbors
}
fn update_seeds(
point_idx: usize,
neighbors: &[usize],
seeds: &mut BinaryHeap<PriorityQueueElement>,
optics_points: &mut [OPTICSPoint],
distance_matrix: &Array2<f64>,
core_distance: f64,
predecessor: &mut [Option<usize>],
) {
for &neighbor_idx in neighbors {
if optics_points[neighbor_idx].processed {
continue;
}
let new_reachability_distance =
core_distance.max(distance_matrix[[point_idx, neighbor_idx]]);
match optics_points[neighbor_idx].reachability_distance {
None => {
optics_points[neighbor_idx].reachability_distance = Some(new_reachability_distance);
predecessor[neighbor_idx] = Some(point_idx);
seeds.push(PriorityQueueElement {
point_index: neighbor_idx,
reachability_distance: new_reachability_distance,
});
}
Some(old_distance) => {
if new_reachability_distance < old_distance {
optics_points[neighbor_idx].reachability_distance =
Some(new_reachability_distance);
predecessor[neighbor_idx] = Some(point_idx);
seeds.push(PriorityQueueElement {
point_index: neighbor_idx,
reachability_distance: new_reachability_distance,
});
}
}
}
}
}
pub fn extract_xi_clusters(
result: &OPTICSResult,
xi: f64,
min_cluster_size: usize,
) -> Result<Array1<i32>> {
if xi <= 0.0 || xi >= 1.0 {
return Err(ClusteringError::InvalidInput(
"xi must be between 0 and 1 (exclusive)".into(),
));
}
if min_cluster_size < 2 {
return Err(ClusteringError::InvalidInput(
"min_cluster_size must be at least 2".into(),
));
}
let n_points = result.ordering.len();
if n_points == 0 {
return Ok(Array1::from(vec![]));
}
let reach: Vec<f64> = result
.reachability
.iter()
.map(|&r| r.unwrap_or(f64::INFINITY))
.collect();
let max_reach = reach
.iter()
.filter(|r| r.is_finite())
.copied()
.fold(f64::NEG_INFINITY, f64::max);
let fill_value = if max_reach.is_finite() {
max_reach * 1.1 + 1e-10
} else {
1.0
};
let reach_filled: Vec<f64> = reach
.iter()
.map(|&r| if r.is_finite() { r } else { fill_value })
.collect();
let steep_down_areas = find_steep_down_areas(&reach_filled, xi, min_cluster_size);
let steep_up_areas = find_steep_up_areas(&reach_filled, xi, min_cluster_size);
let clusters = extract_cluster_intervals(
&reach_filled,
&steep_down_areas,
&steep_up_areas,
xi,
min_cluster_size,
n_points,
);
let mut labels = vec![-1i32; n_points];
let mut cluster_id: i32 = 0;
let mut sorted_clusters = clusters;
sorted_clusters.sort_by(|a, b| {
let size_a = a.end - a.start + 1;
let size_b = b.end - b.start + 1;
size_b.cmp(&size_a)
});
for cluster in &sorted_clusters {
let mut assigned = false;
for idx in cluster.start..=cluster.end.min(n_points - 1) {
let point_idx = result.ordering[idx];
if labels[point_idx] < 0 {
labels[point_idx] = cluster_id;
assigned = true;
}
}
if assigned {
cluster_id += 1;
}
}
Ok(Array1::from(labels))
}
fn find_steep_down_areas(reach: &[f64], xi: f64, min_cluster_size: usize) -> Vec<SteepArea> {
let n = reach.len();
let mut areas = Vec::new();
if n < 2 {
return areas;
}
let mut i = 0;
while i < n - 1 {
if is_steep_down(reach, i, xi) {
let start = i;
let mut end = i;
let mut max_r = reach[i];
let mut gap_count = 0;
let max_gap = min_cluster_size.min(3);
while end + 1 < n {
if is_steep_down(reach, end, xi) {
gap_count = 0;
end += 1;
if reach[end] > max_r {
max_r = reach[end];
}
} else if gap_count < max_gap && end + 1 < n {
gap_count += 1;
end += 1;
if reach[end] > max_r {
max_r = reach[end];
}
} else {
break;
}
}
areas.push(SteepArea {
start,
end,
is_down: true,
max_reachability: max_r,
});
i = end + 1;
} else {
i += 1;
}
}
areas
}
fn find_steep_up_areas(reach: &[f64], xi: f64, min_cluster_size: usize) -> Vec<SteepArea> {
let n = reach.len();
let mut areas = Vec::new();
if n < 2 {
return areas;
}
let mut i = 0;
while i < n - 1 {
if is_steep_up(reach, i, xi) {
let start = i;
let mut end = i;
let mut max_r = reach[i + 1];
let mut gap_count = 0;
let max_gap = min_cluster_size.min(3);
while end + 1 < n {
if is_steep_up(reach, end, xi) {
gap_count = 0;
end += 1;
if reach[end] > max_r {
max_r = reach[end];
}
} else if gap_count < max_gap && end + 1 < n {
gap_count += 1;
end += 1;
if reach[end] > max_r {
max_r = reach[end];
}
} else {
break;
}
}
areas.push(SteepArea {
start,
end,
is_down: false,
max_reachability: max_r,
});
i = end + 1;
} else {
i += 1;
}
}
areas
}
fn is_steep_down(reach: &[f64], i: usize, xi: f64) -> bool {
if i + 1 >= reach.len() {
return false;
}
let r_curr = reach[i];
let r_next = reach[i + 1];
if !r_curr.is_finite() || !r_next.is_finite() {
return false;
}
if r_curr <= 0.0 {
return false;
}
r_curr * (1.0 - xi) >= r_next
}
fn is_steep_up(reach: &[f64], i: usize, xi: f64) -> bool {
if i + 1 >= reach.len() {
return false;
}
let r_curr = reach[i];
let r_next = reach[i + 1];
if !r_curr.is_finite() || !r_next.is_finite() {
return false;
}
if r_next <= 0.0 {
return false;
}
r_curr <= r_next * (1.0 - xi)
}
fn extract_cluster_intervals(
reach: &[f64],
steep_down: &[SteepArea],
steep_up: &[SteepArea],
xi: f64,
min_cluster_size: usize,
n_points: usize,
) -> Vec<XiCluster> {
let mut clusters = Vec::new();
for sd in steep_down {
for su in steep_up {
if su.start <= sd.end {
continue;
}
let cluster_size = su.end - sd.start + 1;
if cluster_size < min_cluster_size {
continue;
}
let r_sd_start = reach[sd.start];
let r_su_end = if su.end < n_points {
reach[su.end]
} else {
continue;
};
let r_max = r_sd_start.max(r_su_end);
let r_min = r_sd_start.min(r_su_end);
if r_max > 0.0 && r_min > 0.0 {
let ratio = r_min / r_max;
if ratio >= (1.0 - xi) * (1.0 - xi) {
let interior_start = sd.end + 1;
let interior_end = su.start;
if interior_start < interior_end {
let interior_min = reach[interior_start..interior_end]
.iter()
.copied()
.filter(|r| r.is_finite())
.fold(f64::INFINITY, f64::min);
if interior_min < r_max {
clusters.push(XiCluster {
start: sd.start,
end: su.end.min(n_points - 1),
});
break; }
} else {
clusters.push(XiCluster {
start: sd.start,
end: su.end.min(n_points - 1),
});
break;
}
}
}
}
}
remove_nested_clusters(&mut clusters);
clusters
}
fn remove_nested_clusters(clusters: &mut Vec<XiCluster>) {
if clusters.len() <= 1 {
return;
}
clusters.sort_by(|a, b| {
a.start.cmp(&b.start).then_with(|| {
let size_a = a.end - a.start;
let size_b = b.end - b.start;
size_b.cmp(&size_a)
})
});
let mut keep = vec![true; clusters.len()];
for i in 0..clusters.len() {
if !keep[i] {
continue;
}
for j in (i + 1)..clusters.len() {
if !keep[j] {
continue;
}
if clusters[j].start >= clusters[i].start && clusters[j].end <= clusters[i].end {
keep[j] = false;
}
}
}
let mut i = 0;
clusters.retain(|_| {
let result = keep[i];
i += 1;
result
});
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
fn make_two_cluster_data() -> Array2<f64> {
Array2::from_shape_vec(
(12, 2),
vec![
1.0, 2.0, 1.1, 1.9, 0.9, 2.1, 1.2, 1.8, 0.8, 2.0, 1.0, 2.2,
6.0, 7.0, 6.1, 6.9, 5.9, 7.1, 6.2, 6.8, 5.8, 7.0, 6.0, 7.2,
],
)
.expect("Failed to create test data")
}
#[test]
fn test_optics_basic_ordering() {
let data = make_two_cluster_data();
let result = optics(data.view(), 2, None, Some(DistanceMetric::Euclidean))
.expect("OPTICS should succeed");
assert_eq!(result.ordering.len(), 12);
assert_eq!(result.reachability.len(), 12);
let mut seen = vec![false; 12];
for &idx in &result.ordering {
assert!(
!seen[idx],
"Point {} appears more than once in ordering",
idx
);
seen[idx] = true;
}
for (i, &s) in seen.iter().enumerate() {
assert!(s, "Point {} missing from ordering", i);
}
}
#[test]
fn test_optics_core_distances() {
let data = make_two_cluster_data();
let result = optics(data.view(), 3, None, Some(DistanceMetric::Euclidean))
.expect("OPTICS should succeed");
let cluster1_core = result.core_distances[0..6]
.iter()
.filter(|d| d.is_some())
.count();
let cluster2_core = result.core_distances[6..12]
.iter()
.filter(|d| d.is_some())
.count();
assert!(
cluster1_core >= 3,
"At least 3 core points expected in cluster 1, got {}",
cluster1_core
);
assert!(
cluster2_core >= 3,
"At least 3 core points expected in cluster 2, got {}",
cluster2_core
);
}
#[test]
fn test_optics_dbscan_extraction() {
let data = make_two_cluster_data();
let result = optics(data.view(), 2, None, Some(DistanceMetric::Euclidean))
.expect("OPTICS should succeed");
let labels = extract_dbscan_clustering(&result, 1.0);
assert_eq!(labels.len(), 12);
let mut unique_labels: Vec<i32> = labels.iter().copied().filter(|&l| l >= 0).collect();
unique_labels.sort();
unique_labels.dedup();
assert!(
!unique_labels.is_empty(),
"Should find at least one cluster"
);
}
#[test]
fn test_optics_max_eps() {
let data = make_two_cluster_data();
let result = optics(data.view(), 2, Some(0.5), Some(DistanceMetric::Euclidean))
.expect("OPTICS should succeed with max_eps");
assert_eq!(result.ordering.len(), 12);
}
#[test]
fn test_optics_invalid_inputs() {
let data = Array2::<f64>::zeros((0, 2));
let result = optics(data.view(), 2, None, None);
assert!(result.is_err());
let data = make_two_cluster_data();
let result = optics(data.view(), 1, None, None);
assert!(result.is_err());
let result = optics(data.view(), 2, Some(-1.0), None);
assert!(result.is_err());
}
#[test]
fn test_xi_extraction_basic() {
let data = Array2::from_shape_vec(
(20, 2),
vec![
1.0, 1.0, 1.1, 0.9, 0.9, 1.1, 1.2, 1.0, 0.8, 1.0, 1.0, 1.2, 1.1, 1.1, 0.9, 0.9, 1.0,
0.8, 1.2, 1.2, 10.0, 10.0, 10.1, 9.9, 9.9, 10.1, 10.2, 10.0, 9.8, 10.0, 10.0, 10.2, 10.1, 10.1,
9.9, 9.9, 10.0, 9.8, 10.2, 10.2,
],
)
.expect("Failed to create test data");
let result = optics(data.view(), 3, None, Some(DistanceMetric::Euclidean))
.expect("OPTICS should succeed");
let labels = extract_xi_clusters(&result, 0.1, 3).expect("Xi extraction should succeed");
assert_eq!(labels.len(), 20);
let assigned = labels.iter().filter(|&&l| l >= 0).count();
assert!(
labels.iter().all(|&l| l >= -1),
"All labels should be >= -1"
);
let dbscan_labels = extract_dbscan_clustering(&result, 1.0);
let dbscan_assigned = dbscan_labels.iter().filter(|&&l| l >= 0).count();
assert!(
dbscan_assigned >= 4,
"DBSCAN extraction should assign >= 4 points, got {}",
dbscan_assigned
);
}
#[test]
fn test_xi_extraction_invalid_params() {
let result = OPTICSResult {
ordering: vec![0, 1, 2],
reachability: vec![None, Some(0.5), Some(0.3)],
core_distances: vec![Some(0.2), Some(0.3), None],
predecessor: vec![None, Some(0), Some(1)],
};
assert!(extract_xi_clusters(&result, 0.0, 2).is_err());
assert!(extract_xi_clusters(&result, 1.0, 2).is_err());
assert!(extract_xi_clusters(&result, -0.1, 2).is_err());
assert!(extract_xi_clusters(&result, 0.5, 1).is_err());
}
#[test]
fn test_optics_with_options() {
let data = make_two_cluster_data();
let opts = OPTICSOptions {
min_samples: 3,
max_eps: Some(10.0),
metric: DistanceMetric::Euclidean,
xi: Some(0.05),
min_cluster_size: Some(3),
predecessor_correction: true,
};
let result =
optics_with_options(data.view(), &opts).expect("OPTICS with options should succeed");
assert_eq!(result.ordering.len(), 12);
}
#[test]
fn test_optics_manhattan_metric() {
let data = make_two_cluster_data();
let result = optics(data.view(), 2, None, Some(DistanceMetric::Manhattan))
.expect("OPTICS with Manhattan should succeed");
assert_eq!(result.ordering.len(), 12);
let core_count = result.core_distances.iter().filter(|d| d.is_some()).count();
assert!(
core_count > 0,
"Should have core points with Manhattan metric"
);
}
#[test]
fn test_optics_single_point() {
let data = Array2::from_shape_vec((1, 2), vec![1.0, 2.0]).expect("Failed to create data");
let result = optics(data.view(), 2, None, None).expect("OPTICS should handle single point");
assert_eq!(result.ordering.len(), 1);
assert_eq!(result.ordering[0], 0);
assert!(result.core_distances[0].is_none());
}
#[test]
fn test_optics_reachability_ordering() {
let data = make_two_cluster_data();
let result = optics(data.view(), 2, None, Some(DistanceMetric::Euclidean))
.expect("OPTICS should succeed");
let mut cluster1_reaches = Vec::new();
let mut cluster2_reaches = Vec::new();
for (i, &pt) in result.ordering.iter().enumerate() {
if let Some(r) = result.reachability[i] {
if pt < 6 {
cluster1_reaches.push(r);
} else {
cluster2_reaches.push(r);
}
}
}
if !cluster1_reaches.is_empty() {
let avg1: f64 = cluster1_reaches.iter().sum::<f64>() / cluster1_reaches.len() as f64;
assert!(
avg1 < 5.0,
"Avg cluster 1 reachability {} is too large",
avg1
);
}
}
}