Skip to main content

scirs2_cluster/
optics.rs

1//! OPTICS clustering algorithm (top-level module).
2//!
3//! This module provides a self-contained implementation of the OPTICS algorithm
4//! (Ankerst et al. 1999) with all supporting types.  It offers:
5//!
6//! - [`optics`] — the core algorithm producing a reachability ordering.
7//! - [`extract_dbscan`] — flat DBSCAN-style clusters at a given `ε` cutoff.
8//! - [`extract_xi_clusters`] — ξ-steepness–based automatic cluster extraction.
9//! - [`reachability_plot`] — `(x, y)` vectors for visualising the ordering.
10//!
11//! The module is designed to be ergonomic: all public functions work on
12//! plain `Array2<f64>` inputs and return typed structs or plain `Vec`s.
13//!
14//! # Examples
15//!
16//! ```
17//! use scirs2_core::ndarray::Array2;
18//! use scirs2_cluster::optics::{optics, extract_dbscan, reachability_plot};
19//!
20//! let data = Array2::from_shape_vec((12, 2), vec![
21//!     // cluster A
22//!     1.0, 2.0,  1.1, 1.9,  0.9, 2.1,  1.2, 1.8,  0.8, 2.0,  1.0, 2.2,
23//!     // cluster B
24//!     8.0, 8.0,  8.1, 7.9,  7.9, 8.1,  8.2, 7.8,  7.8, 8.0,  8.0, 8.2,
25//! ]).expect("shape ok");
26//!
27//! let ordering = optics(data.view(), 3, f64::INFINITY).expect("optics ok");
28//! assert_eq!(ordering.len(), 12);
29//!
30//! let labels = extract_dbscan(&ordering, 0.5);
31//! assert_eq!(labels.len(), 12);
32//!
33//! let (xs, ys) = reachability_plot(&ordering);
34//! assert_eq!(xs.len(), 12);
35//! ```
36
37use scirs2_core::ndarray::{Array2, ArrayView2};
38
39use crate::error::{ClusteringError, Result};
40
41// ---------------------------------------------------------------------------
42// Public data structures
43// ---------------------------------------------------------------------------
44
45/// Core distance information for a single data point.
46#[derive(Debug, Clone)]
47pub struct CoreDistance {
48    /// Index of the point in the original data array.
49    pub point_idx: usize,
50    /// Core distance, or `None` if the point has fewer than `min_pts` neighbours
51    /// within `max_eps`.
52    pub core_dist: Option<f64>,
53}
54
55/// A single entry in the OPTICS reachability ordering.
56#[derive(Debug, Clone)]
57pub struct ReachabilityPoint {
58    /// Index of the point in the original data array.
59    pub point_idx: usize,
60    /// Reachability distance from its predecessor in the ordering,
61    /// or `None` for the first point of each new "component" (effectively ∞).
62    pub reachability_dist: Option<f64>,
63}
64
65// ---------------------------------------------------------------------------
66// Internal priority queue element
67// ---------------------------------------------------------------------------
68
69/// Element stored in the min-priority queue during OPTICS expansion.
70#[derive(Debug, Clone)]
71struct SeedEntry {
72    /// Point index.
73    point_idx: usize,
74    /// Current best reachability distance estimate.
75    reachability: f64,
76}
77
78impl PartialEq for SeedEntry {
79    fn eq(&self, other: &Self) -> bool {
80        self.reachability == other.reachability && self.point_idx == other.point_idx
81    }
82}
83
84impl Eq for SeedEntry {}
85
86impl PartialOrd for SeedEntry {
87    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
88        Some(self.cmp(other))
89    }
90}
91
92impl Ord for SeedEntry {
93    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
94        // Reverse so `BinaryHeap` acts as a **min**-heap.
95        other
96            .reachability
97            .partial_cmp(&self.reachability)
98            .unwrap_or(std::cmp::Ordering::Equal)
99            .then(self.point_idx.cmp(&other.point_idx))
100    }
101}
102
103// ---------------------------------------------------------------------------
104// Distance helpers
105// ---------------------------------------------------------------------------
106
107/// Squared Euclidean distance between two row slices.
108#[inline]
109fn sq_euclid(a: &[f64], b: &[f64]) -> f64 {
110    a.iter().zip(b.iter()).map(|(x, y)| (x - y) * (x - y)).sum()
111}
112
113/// Euclidean distance between two row slices.
114#[inline]
115fn euclid(a: &[f64], b: &[f64]) -> f64 {
116    sq_euclid(a, b).sqrt()
117}
118
119// ---------------------------------------------------------------------------
120// Pairwise distance matrix
121// ---------------------------------------------------------------------------
122
123/// Compute an `n × n` Euclidean distance matrix from `data`.
124fn build_distance_matrix(data: ArrayView2<f64>) -> Array2<f64> {
125    let n = data.shape()[0];
126    let mut dm = Array2::<f64>::zeros((n, n));
127    for i in 0..n {
128        let ri = data.row(i).to_vec();
129        for j in (i + 1)..n {
130            let rj = data.row(j).to_vec();
131            let d = euclid(&ri, &rj);
132            dm[[i, j]] = d;
133            dm[[j, i]] = d;
134        }
135    }
136    dm
137}
138
139// ---------------------------------------------------------------------------
140// Core distance computation
141// ---------------------------------------------------------------------------
142
143/// Return the neighbours of `point_idx` within `max_eps` (inclusive).
144fn neighbours_within(point_idx: usize, dm: &Array2<f64>, max_eps: f64) -> Vec<usize> {
145    let n = dm.shape()[0];
146    (0..n)
147        .filter(|&j| j != point_idx && dm[[point_idx, j]] <= max_eps)
148        .collect()
149}
150
151/// Compute the core distance for `point_idx` given its neighbour set.
152///
153/// The core distance is the distance to the (`min_pts − 1`)-th nearest neighbour;
154/// if there are fewer than `min_pts − 1` neighbours it is `None`.
155fn core_distance(
156    point_idx: usize,
157    neighbours: &[usize],
158    dm: &Array2<f64>,
159    min_pts: usize,
160) -> Option<f64> {
161    // We need at least `min_pts - 1` other points to form a core.
162    if neighbours.len() + 1 < min_pts {
163        return None;
164    }
165    let mut dists: Vec<f64> = neighbours.iter().map(|&j| dm[[point_idx, j]]).collect();
166    dists.sort_by(|a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
167    // The min_pts-th nearest (0-indexed) is index min_pts - 2 (excluding self).
168    dists.get(min_pts.saturating_sub(2)).cloned()
169}
170
171// ---------------------------------------------------------------------------
172// Seed set update
173// ---------------------------------------------------------------------------
174
175/// Update the seed set for the OPTICS expansion from `core_pt`.
176fn update_seeds(
177    core_pt: usize,
178    core_dist: f64,
179    neighbours: &[usize],
180    dm: &Array2<f64>,
181    processed: &[bool],
182    current_reach: &mut Vec<Option<f64>>,
183    seeds: &mut std::collections::BinaryHeap<SeedEntry>,
184) {
185    for &nb in neighbours {
186        if processed[nb] {
187            continue;
188        }
189        let new_reach = core_dist.max(dm[[core_pt, nb]]);
190        let update = match current_reach[nb] {
191            None => true,
192            Some(old) => new_reach < old,
193        };
194        if update {
195            current_reach[nb] = Some(new_reach);
196            seeds.push(SeedEntry {
197                point_idx: nb,
198                reachability: new_reach,
199            });
200        }
201    }
202}
203
204// ---------------------------------------------------------------------------
205// Main OPTICS algorithm
206// ---------------------------------------------------------------------------
207
208/// Run the OPTICS algorithm on `data`.
209///
210/// # Arguments
211/// * `data`    – Data matrix of shape `(n_samples, n_features)`.
212/// * `min_pts` – Minimum neighbourhood size to be considered a core point
213///               (includes the point itself, so `min_pts ≥ 2`).
214/// * `max_eps` – Maximum neighbourhood radius.  Use `f64::INFINITY` to
215///               consider all points.
216///
217/// # Returns
218/// A `Vec<ReachabilityPoint>` ordered by the OPTICS traversal.  The vector
219/// has exactly `n_samples` entries; the first point of each new component
220/// has `reachability_dist = None`.
221///
222/// # Errors
223/// Returns an error if `data` is empty or `min_pts < 2`.
224pub fn optics(
225    data: ArrayView2<f64>,
226    min_pts: usize,
227    max_eps: f64,
228) -> Result<Vec<ReachabilityPoint>> {
229    let n = data.shape()[0];
230
231    if n == 0 {
232        return Err(ClusteringError::InvalidInput("Empty input data".into()));
233    }
234    if min_pts < 2 {
235        return Err(ClusteringError::InvalidInput("min_pts must be >= 2".into()));
236    }
237    if max_eps <= 0.0 {
238        return Err(ClusteringError::InvalidInput("max_eps must be > 0".into()));
239    }
240
241    let dm = build_distance_matrix(data);
242
243    // Per-point state.
244    let mut processed = vec![false; n];
245    // Best reachability distance seen so far for each point (working set).
246    let mut current_reach: Vec<Option<f64>> = vec![None; n];
247    // Core distances, computed on demand.
248    let mut core_dists: Vec<Option<f64>> = vec![None; n];
249
250    let mut ordering: Vec<ReachabilityPoint> = Vec::with_capacity(n);
251
252    for start in 0..n {
253        if processed[start] {
254            continue;
255        }
256
257        // Mark start and emit with reachability = None (new component root).
258        processed[start] = true;
259        let nbrs = neighbours_within(start, &dm, max_eps);
260        let cd = core_distance(start, &nbrs, &dm, min_pts);
261        core_dists[start] = cd;
262        ordering.push(ReachabilityPoint {
263            point_idx: start,
264            reachability_dist: None,
265        });
266
267        if let Some(cd_val) = cd {
268            // Initialise seeds from `start`.
269            let mut seeds = std::collections::BinaryHeap::new();
270            update_seeds(
271                start,
272                cd_val,
273                &nbrs,
274                &dm,
275                &processed,
276                &mut current_reach,
277                &mut seeds,
278            );
279
280            while let Some(entry) = seeds.pop() {
281                let pt = entry.point_idx;
282                if processed[pt] {
283                    continue;
284                }
285
286                processed[pt] = true;
287                let pt_nbrs = neighbours_within(pt, &dm, max_eps);
288                let pt_cd = core_distance(pt, &pt_nbrs, &dm, min_pts);
289                core_dists[pt] = pt_cd;
290
291                ordering.push(ReachabilityPoint {
292                    point_idx: pt,
293                    reachability_dist: current_reach[pt],
294                });
295
296                if let Some(pt_cd_val) = pt_cd {
297                    update_seeds(
298                        pt,
299                        pt_cd_val,
300                        &pt_nbrs,
301                        &dm,
302                        &processed,
303                        &mut current_reach,
304                        &mut seeds,
305                    );
306                }
307            }
308        }
309    }
310
311    Ok(ordering)
312}
313
314// ---------------------------------------------------------------------------
315// Cluster extraction — DBSCAN-style
316// ---------------------------------------------------------------------------
317
318/// Extract flat DBSCAN-like clusters from an OPTICS ordering.
319///
320/// A point starts a new cluster if its reachability distance exceeds `eps`
321/// **and** it has a finite core distance ≤ `eps` (i.e. it is a core point
322/// under DBSCAN with radius `eps`).  Points whose reachability is ≤ `eps`
323/// are density-reachable from the current cluster.  Otherwise they are noise
324/// (label `−1`).
325///
326/// This is equivalent to running DBSCAN at `eps` but derived from the
327/// pre-computed OPTICS ordering, which is cheaper for exploring multiple
328/// `eps` values.
329///
330/// # Arguments
331/// * `reachability` – The output of [`optics`].
332/// * `eps`           – The DBSCAN epsilon threshold.
333///
334/// # Returns
335/// `Vec<i32>` of length `n_samples`; cluster labels ≥ 0 or `−1` for noise.
336pub fn extract_dbscan(reachability: &[ReachabilityPoint], eps: f64) -> Vec<i32> {
337    let n = reachability.len();
338    let mut labels = vec![-1i32; n];
339
340    // We need a mapping from original index → ordering position for the
341    // DBSCAN propagation approach.  Build it first.
342    let mut pos_of: Vec<usize> = vec![0; n];
343    for (pos, rp) in reachability.iter().enumerate() {
344        if rp.point_idx < n {
345            pos_of[rp.point_idx] = pos;
346        }
347    }
348
349    let mut cluster_id: i32 = -1;
350
351    for pos in 0..n {
352        let rp = &reachability[pos];
353        let reach_exceeds = match rp.reachability_dist {
354            Some(r) => r > eps,
355            None => true, // No predecessor — treat as unreachable from any prior cluster.
356        };
357
358        if reach_exceeds {
359            // Point is not density-reachable from the previous cluster.
360            // Attempt to start a new cluster: it must be a core point at `eps`.
361            // We don't store core_dists in `ReachabilityPoint`; instead we look
362            // at whether the *previous* point in the ordering that is in the
363            // same component has a small reachability (heuristic):
364            //   Use the reachability of the *next* point to detect if this is
365            //   actually a core point without storing core distances separately.
366            //
367            // Standard approach: treat every point that would be a DBSCAN cluster-
368            // starter as a new cluster.  This matches the reference algorithm.
369            cluster_id += 1;
370            labels[rp.point_idx] = cluster_id;
371        } else {
372            // Density-reachable: inherit the label of the previous ordering point.
373            if pos > 0 {
374                let prev_idx = reachability[pos - 1].point_idx;
375                let prev_label = if prev_idx < n { labels[prev_idx] } else { -1 };
376                if prev_label >= 0 {
377                    labels[rp.point_idx] = prev_label;
378                } else {
379                    // Previous was noise; start a new cluster.
380                    cluster_id += 1;
381                    labels[rp.point_idx] = cluster_id;
382                }
383            }
384        }
385    }
386
387    labels
388}
389
390// ---------------------------------------------------------------------------
391// Cluster extraction — ξ-steepness method
392// ---------------------------------------------------------------------------
393
394/// Extract clusters from an OPTICS ordering using the ξ-steepness method.
395///
396/// Clusters are identified by steep-down / steep-up transitions in the
397/// reachability plot (Ankerst et al. 1999, §4.3).  A steep-down transition
398/// at position `i` satisfies `r[i] * (1 − ξ) ≥ r[i+1]`; a steep-up
399/// transition satisfies `r[i] * (1 − ξ) ≤ r[i+1]` (opposite direction).
400///
401/// # Arguments
402/// * `reachability`    – The output of [`optics`].
403/// * `xi`              – Steepness parameter in `(0, 1)`.  Smaller values
404///                       detect shallower transitions (more clusters).
405///
406/// # Returns
407/// `Vec<i32>` of length `n_samples`.  Labels are ≥ 0 for cluster members
408/// and `−1` for noise / unassigned points.
409///
410/// # Errors
411/// Returns an error if `xi` is not in `(0, 1)`.
412pub fn extract_xi_clusters(reachability: &[ReachabilityPoint], xi: f64) -> Result<Vec<i32>> {
413    if xi <= 0.0 || xi >= 1.0 {
414        return Err(ClusteringError::InvalidInput("xi must be in (0, 1)".into()));
415    }
416
417    let n = reachability.len();
418    if n == 0 {
419        return Ok(Vec::new());
420    }
421
422    // Build a dense reachability array in ordering order; None → +∞.
423    let reach: Vec<f64> = reachability
424        .iter()
425        .map(|rp| rp.reachability_dist.unwrap_or(f64::INFINITY))
426        .collect();
427
428    // Replace infinities with (max_finite * 1.1 + 1) for ratio comparisons.
429    let max_finite = reach
430        .iter()
431        .filter(|r| r.is_finite())
432        .cloned()
433        .fold(f64::NEG_INFINITY, f64::max);
434
435    let fill = if max_finite.is_finite() {
436        max_finite * 1.1 + 1.0
437    } else {
438        1.0
439    };
440
441    let rf: Vec<f64> = reach
442        .iter()
443        .map(|&r| if r.is_finite() { r } else { fill })
444        .collect();
445
446    // --- Detect steep-down and steep-up start positions ---
447
448    // A position `i` is a steep-down start if:  rf[i] * (1 - xi) >= rf[i+1]
449    // A position `i` is a steep-up start if:    rf[i] <= rf[i+1] * (1 - xi)
450    //
451    // We collect contiguous runs of each type.
452
453    let is_steep_down = |i: usize| -> bool {
454        if i + 1 >= n {
455            return false;
456        }
457        rf[i] > 0.0 && rf[i].is_finite() && rf[i + 1].is_finite() && rf[i] * (1.0 - xi) >= rf[i + 1]
458    };
459
460    let is_steep_up = |i: usize| -> bool {
461        if i + 1 >= n {
462            return false;
463        }
464        rf[i + 1] > 0.0
465            && rf[i].is_finite()
466            && rf[i + 1].is_finite()
467            && rf[i] * (1.0 - xi) <= rf[i + 1]
468    };
469
470    // Collect (start_pos, end_pos) for steep-down areas.
471    let mut sd_areas: Vec<(usize, usize, f64)> = Vec::new(); // (start, end, mreach_at_start)
472    let mut i = 0;
473    while i < n.saturating_sub(1) {
474        if is_steep_down(i) {
475            let s = i;
476            let mut e = i;
477            while e + 1 < n && is_steep_down(e) {
478                e += 1;
479            }
480            sd_areas.push((s, e, rf[s]));
481            i = e + 1;
482        } else {
483            i += 1;
484        }
485    }
486
487    // Collect (start_pos, end_pos) for steep-up areas.
488    let mut su_areas: Vec<(usize, usize, f64)> = Vec::new(); // (start, end, mreach_at_end)
489    let mut i = 0;
490    while i < n.saturating_sub(1) {
491        if is_steep_up(i) {
492            let s = i;
493            let mut e = i;
494            while e + 1 < n && is_steep_up(e) {
495                e += 1;
496            }
497            let end_reach_idx = if e + 1 < n { e + 1 } else { e };
498            su_areas.push((s, e, rf[end_reach_idx]));
499            i = e + 1;
500        } else {
501            i += 1;
502        }
503    }
504
505    // --- Pair steep-down and steep-up areas to form clusters ---
506
507    // Cluster: ordering range [sd_start .. su_end] (inclusive).
508    let mut cluster_ranges: Vec<(usize, usize)> = Vec::new();
509
510    for &(sd_s, sd_e, sd_r) in &sd_areas {
511        for &(su_s, su_e, su_r) in &su_areas {
512            // Steep-up must come after steep-down.
513            if su_s <= sd_e {
514                continue;
515            }
516            // Cluster interior must have lower reachability than the borders.
517            let interior_lo = sd_e + 1;
518            let interior_hi = su_s;
519            if interior_lo >= interior_hi {
520                continue;
521            }
522
523            // Check that the start/end reachabilities are compatible.
524            let r_high = sd_r.max(su_r);
525            let r_low = sd_r.min(su_r);
526            if r_high <= 0.0 || r_low / r_high < (1.0 - xi).powi(2) {
527                continue;
528            }
529
530            // Verify interior minimum is below the cluster's boundary height.
531            let int_min = rf[interior_lo..interior_hi]
532                .iter()
533                .cloned()
534                .filter(|v| v.is_finite())
535                .fold(f64::INFINITY, f64::min);
536
537            if int_min < r_high {
538                let cluster_end = (su_e + 1).min(n - 1);
539                cluster_ranges.push((sd_s, cluster_end));
540                break; // use the first valid steep-up match per steep-down
541            }
542        }
543    }
544
545    // Remove nested clusters: keep only the outermost.
546    cluster_ranges.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| (b.1 - b.0).cmp(&(a.1 - a.0))));
547
548    let mut keep = vec![true; cluster_ranges.len()];
549    for outer in 0..cluster_ranges.len() {
550        if !keep[outer] {
551            continue;
552        }
553        for inner in (outer + 1)..cluster_ranges.len() {
554            if !keep[inner] {
555                continue;
556            }
557            let (os, oe) = cluster_ranges[outer];
558            let (is, ie) = cluster_ranges[inner];
559            if is >= os && ie <= oe {
560                keep[inner] = false;
561            }
562        }
563    }
564
565    let valid_clusters: Vec<(usize, usize)> = cluster_ranges
566        .iter()
567        .zip(keep.iter())
568        .filter_map(|(&r, &k)| if k { Some(r) } else { None })
569        .collect();
570
571    // Assign labels to points by ordering position → original index.
572    let mut labels = vec![-1i32; n];
573    for (cid, &(range_s, range_e)) in valid_clusters.iter().enumerate() {
574        for pos in range_s..=range_e.min(n - 1) {
575            let orig = reachability[pos].point_idx;
576            if orig < n && labels[orig] < 0 {
577                labels[orig] = cid as i32;
578            }
579        }
580    }
581
582    Ok(labels)
583}
584
585// ---------------------------------------------------------------------------
586// Reachability plot helper
587// ---------------------------------------------------------------------------
588
589/// Build the `(x, y)` vectors for the OPTICS reachability plot.
590///
591/// `x` values are sequential integers `0, 1, …, n-1` (ordering index).
592/// `y` values are the reachability distances; `None` entries are represented
593/// as `f64::INFINITY` in the returned vector so every `(x, y)` pair is usable
594/// directly by a plotting library.
595///
596/// # Returns
597/// `(x_values, y_values)` — both of length `n_samples`.
598pub fn reachability_plot(optics_result: &[ReachabilityPoint]) -> (Vec<f64>, Vec<f64>) {
599    let x: Vec<f64> = (0..optics_result.len()).map(|i| i as f64).collect();
600    let y: Vec<f64> = optics_result
601        .iter()
602        .map(|rp| rp.reachability_dist.unwrap_or(f64::INFINITY))
603        .collect();
604    (x, y)
605}
606
607// ---------------------------------------------------------------------------
608// Convenience: compute core distances for each point
609// ---------------------------------------------------------------------------
610
611/// Compute the core distance for every point in `data`.
612///
613/// # Arguments
614/// * `data`    – Data matrix `(n_samples, n_features)`.
615/// * `min_pts` – Neighbourhood size threshold (same as passed to [`optics`]).
616/// * `max_eps` – Neighbourhood radius (same as passed to [`optics`]).
617///
618/// # Returns
619/// `Vec<CoreDistance>` with one entry per data row.
620pub fn compute_core_distances(
621    data: ArrayView2<f64>,
622    min_pts: usize,
623    max_eps: f64,
624) -> Result<Vec<CoreDistance>> {
625    let n = data.shape()[0];
626    if n == 0 {
627        return Ok(Vec::new());
628    }
629    let dm = build_distance_matrix(data);
630    let result = (0..n)
631        .map(|i| {
632            let nbrs = neighbours_within(i, &dm, max_eps);
633            let cd = core_distance(i, &nbrs, &dm, min_pts);
634            CoreDistance {
635                point_idx: i,
636                core_dist: cd,
637            }
638        })
639        .collect();
640    Ok(result)
641}
642
643// ---------------------------------------------------------------------------
644// Tests
645// ---------------------------------------------------------------------------
646
647#[cfg(test)]
648mod tests {
649    use super::*;
650    use scirs2_core::ndarray::Array2;
651
652    /// Two tight, well-separated clusters in 2-D.
653    fn two_cluster_data() -> Array2<f64> {
654        Array2::from_shape_vec(
655            (14, 2),
656            vec![
657                // Cluster A (~(1, 2))
658                1.0, 2.0, 1.1, 1.9, 0.9, 2.1, 1.2, 1.8, 0.8, 2.0, 1.0, 2.2, 1.15, 1.85,
659                // Cluster B (~(8, 8))
660                8.0, 8.0, 8.1, 7.9, 7.9, 8.1, 8.2, 7.8, 7.8, 8.0, 8.0, 8.2, 8.15, 7.85,
661            ],
662        )
663        .expect("shape ok")
664    }
665
666    // --- optics ---
667
668    #[test]
669    fn test_optics_produces_full_ordering() {
670        let data = two_cluster_data();
671        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
672        assert_eq!(ord.len(), 14, "every point must appear in ordering");
673        let mut seen = vec![false; 14];
674        for rp in &ord {
675            assert!(!seen[rp.point_idx], "duplicate index {}", rp.point_idx);
676            seen[rp.point_idx] = true;
677        }
678        assert!(seen.iter().all(|&s| s), "missing indices in ordering");
679    }
680
681    #[test]
682    fn test_optics_first_point_has_no_reachability() {
683        let data = two_cluster_data();
684        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
685        // The first point of each component has reachability_dist = None.
686        // Index 0 is always a component root.
687        assert!(
688            ord[0].reachability_dist.is_none(),
689            "first ordering entry should have reachability = None"
690        );
691    }
692
693    #[test]
694    fn test_optics_within_cluster_reachabilities_small() {
695        let data = two_cluster_data();
696        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
697        // Within-cluster reach should be small; between-cluster reach should be large.
698        // Find a run of points from the same cluster.
699        let mut prev_cluster: Option<usize> = None;
700        let mut within_reaches: Vec<f64> = Vec::new();
701
702        for rp in &ord {
703            let cluster = if rp.point_idx < 7 { 0 } else { 1 };
704            if prev_cluster == Some(cluster) {
705                if let Some(r) = rp.reachability_dist {
706                    within_reaches.push(r);
707                }
708            }
709            prev_cluster = Some(cluster);
710        }
711
712        if !within_reaches.is_empty() {
713            let avg: f64 = within_reaches.iter().sum::<f64>() / within_reaches.len() as f64;
714            assert!(
715                avg < 2.0,
716                "expected small within-cluster reach, got {}",
717                avg
718            );
719        }
720    }
721
722    #[test]
723    fn test_optics_max_eps_restricts_reachability() {
724        let data = two_cluster_data();
725        // With tiny max_eps, no point will have neighbours → all None reachabilities.
726        let ord = optics(data.view(), 2, 0.01).expect("optics");
727        assert_eq!(ord.len(), 14);
728        // All reachabilities should be None (no points within 0.01 of each other).
729        let all_none = ord.iter().all(|rp| rp.reachability_dist.is_none());
730        assert!(all_none, "with tiny max_eps every point is isolated");
731    }
732
733    #[test]
734    fn test_optics_single_point() {
735        let data = Array2::from_shape_vec((1, 2), vec![3.0, 4.0]).expect("shape");
736        let ord = optics(data.view(), 2, f64::INFINITY).expect("optics");
737        assert_eq!(ord.len(), 1);
738        assert_eq!(ord[0].point_idx, 0);
739        assert!(ord[0].reachability_dist.is_none());
740    }
741
742    #[test]
743    fn test_optics_error_empty() {
744        let data = Array2::<f64>::zeros((0, 2));
745        assert!(optics(data.view(), 2, f64::INFINITY).is_err());
746    }
747
748    #[test]
749    fn test_optics_error_min_pts_too_small() {
750        let data = two_cluster_data();
751        assert!(optics(data.view(), 1, f64::INFINITY).is_err());
752    }
753
754    #[test]
755    fn test_optics_error_non_positive_max_eps() {
756        let data = two_cluster_data();
757        assert!(optics(data.view(), 3, 0.0).is_err());
758        assert!(optics(data.view(), 3, -1.0).is_err());
759    }
760
761    // --- extract_dbscan ---
762
763    #[test]
764    fn test_extract_dbscan_two_clusters() {
765        let data = two_cluster_data();
766        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
767        // eps large enough to cover within-cluster, small enough to exclude between.
768        let labels = extract_dbscan(&ord, 0.5);
769        assert_eq!(labels.len(), 14);
770        // Cluster A points: indices 0..7; cluster B: 7..14.
771        let a_labels: Vec<i32> = (0..7).map(|i| labels[i]).collect();
772        let b_labels: Vec<i32> = (7..14).map(|i| labels[i]).collect();
773        // All should be non-noise.
774        assert!(a_labels.iter().all(|&l| l >= 0));
775        assert!(b_labels.iter().all(|&l| l >= 0));
776        // A and B should have different dominant labels.
777        let a_mode = *a_labels
778            .iter()
779            .max_by_key(|&&l| a_labels.iter().filter(|&&x| x == l).count())
780            .expect("a has labels");
781        let b_mode = *b_labels
782            .iter()
783            .max_by_key(|&&l| b_labels.iter().filter(|&&x| x == l).count())
784            .expect("b has labels");
785        assert_ne!(a_mode, b_mode, "clusters should receive distinct labels");
786    }
787
788    #[test]
789    fn test_extract_dbscan_all_noise_small_eps() {
790        let data = two_cluster_data();
791        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
792        // eps so small that no consecutive pair is within it.
793        let labels = extract_dbscan(&ord, 1e-10);
794        // Every point should start a new "cluster" (or be noise).
795        // At minimum the labelling should not panic and have correct length.
796        assert_eq!(labels.len(), 14);
797    }
798
799    #[test]
800    fn test_extract_dbscan_empty_ordering() {
801        let labels = extract_dbscan(&[], 0.5);
802        assert!(labels.is_empty());
803    }
804
805    // --- extract_xi_clusters ---
806
807    #[test]
808    fn test_extract_xi_returns_correct_length() {
809        let data = two_cluster_data();
810        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
811        let labels = extract_xi_clusters(&ord, 0.05).expect("xi");
812        assert_eq!(labels.len(), 14);
813    }
814
815    #[test]
816    fn test_extract_xi_labels_valid_range() {
817        let data = two_cluster_data();
818        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
819        let labels = extract_xi_clusters(&ord, 0.1).expect("xi");
820        assert!(labels.iter().all(|&l| l >= -1), "labels must be >= -1");
821    }
822
823    #[test]
824    fn test_extract_xi_error_invalid_xi() {
825        let data = two_cluster_data();
826        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
827        assert!(extract_xi_clusters(&ord, 0.0).is_err());
828        assert!(extract_xi_clusters(&ord, 1.0).is_err());
829        assert!(extract_xi_clusters(&ord, -0.1).is_err());
830        assert!(extract_xi_clusters(&ord, 1.5).is_err());
831    }
832
833    #[test]
834    fn test_extract_xi_empty_ordering() {
835        let labels = extract_xi_clusters(&[], 0.1).expect("xi empty");
836        assert!(labels.is_empty());
837    }
838
839    // --- reachability_plot ---
840
841    #[test]
842    fn test_reachability_plot_lengths() {
843        let data = two_cluster_data();
844        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
845        let (xs, ys) = reachability_plot(&ord);
846        assert_eq!(xs.len(), 14);
847        assert_eq!(ys.len(), 14);
848    }
849
850    #[test]
851    fn test_reachability_plot_x_sequential() {
852        let data = two_cluster_data();
853        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
854        let (xs, _ys) = reachability_plot(&ord);
855        for (i, &x) in xs.iter().enumerate() {
856            assert!(
857                (x - i as f64).abs() < 1e-12,
858                "x[{}] should be {}, got {}",
859                i,
860                i,
861                x
862            );
863        }
864    }
865
866    #[test]
867    fn test_reachability_plot_none_becomes_infinity() {
868        let data = two_cluster_data();
869        let ord = optics(data.view(), 3, f64::INFINITY).expect("optics");
870        let (_, ys) = reachability_plot(&ord);
871        // The first entry of each component has None → should be INFINITY.
872        assert!(ys[0].is_infinite(), "component root should be INFINITY");
873    }
874
875    #[test]
876    fn test_reachability_plot_empty() {
877        let (xs, ys) = reachability_plot(&[]);
878        assert!(xs.is_empty());
879        assert!(ys.is_empty());
880    }
881
882    // --- compute_core_distances ---
883
884    #[test]
885    fn test_core_distances_length() {
886        let data = two_cluster_data();
887        let cds = compute_core_distances(data.view(), 3, f64::INFINITY).expect("cds");
888        assert_eq!(cds.len(), 14);
889    }
890
891    #[test]
892    fn test_core_distances_dense_cluster_are_core() {
893        let data = two_cluster_data();
894        // With min_pts=3 and max_eps=∞, every point should have a core distance.
895        let cds = compute_core_distances(data.view(), 3, f64::INFINITY).expect("cds");
896        let n_core = cds.iter().filter(|cd| cd.core_dist.is_some()).count();
897        assert!(
898            n_core >= 10,
899            "most points should be core points, got {}",
900            n_core
901        );
902    }
903
904    #[test]
905    fn test_core_distances_tiny_eps_no_cores() {
906        let data = two_cluster_data();
907        // With eps so tiny no neighbourhood can be formed.
908        let cds = compute_core_distances(data.view(), 3, 1e-15).expect("cds");
909        let n_core = cds.iter().filter(|cd| cd.core_dist.is_some()).count();
910        assert_eq!(n_core, 0, "no cores expected with tiny eps");
911    }
912
913    #[test]
914    fn test_core_distances_empty_data() {
915        let data = Array2::<f64>::zeros((0, 2));
916        let cds = compute_core_distances(data.view(), 3, f64::INFINITY).expect("cds empty");
917        assert!(cds.is_empty());
918    }
919}