Skip to main content

sphereql_embed/
quality_metric.rs

1//! Pluggable quality metrics for auto-tuning a SphereQL pipeline.
2//!
3//! Auto-tuning needs a scalar objective to optimize. EVR alone is a bad
4//! target — a high-EVR projection can still produce a geometry where every
5//! bridge is an `OverlapArtifact`, so the "quality" the user experiences
6//! is low. This module defines a [`QualityMetric`] trait and five concrete
7//! metrics that each measure a distinct slice of pipeline quality.
8//!
9//! Compose them via [`CompositeMetric`] for a weighted objective.
10//!
11//! All metrics produce a scalar in `[0, 1]` where higher is better, so they
12//! can be mixed linearly without per-metric rescaling.
13
14use std::collections::HashSet;
15
16use sphereql_core::{SphericalPoint, angular_distance, spherical_to_cartesian};
17
18use crate::ann::{AnnConfig, AnnIndex};
19use crate::category::BridgeClassification;
20use crate::pipeline::SphereQLPipeline;
21
22/// A scalar pipeline-quality score.
23///
24/// Implementers evaluate some slice of pipeline health and return a value
25/// in `[0, 1]`. Higher = better. Metrics must be deterministic given a
26/// pipeline (no internal RNG), so auto-tuners can compare scores across
27/// configurations without noise.
28///
29/// `Send + Sync` are supertrait bounds so `Box<dyn QualityMetric>` can
30/// cross a thread boundary — the Python binding's `py.detach()` closure
31/// needs this, and every concrete metric in this crate is already
32/// thread-safe.
33pub trait QualityMetric: Send + Sync {
34    /// Short identifier for logs and tuner reports.
35    fn name(&self) -> &str;
36    /// Evaluate the pipeline. Must return a value in `[0, 1]`.
37    fn score(&self, pipeline: &SphereQLPipeline) -> f64;
38
39    /// Evaluate the pipeline and report any per-component breakdown in
40    /// the same pass. Returns `(score, components)` where each
41    /// component is `(name, weight, component_score)`.
42    ///
43    /// The default implementation returns the scalar score with no
44    /// components — correct for leaf metrics. [`CompositeMetric`]
45    /// overrides it so the tuner can record *which* sub-metric moved
46    /// on each trial without paying for a second evaluation. A
47    /// component whose score barely varies across trials carries no
48    /// signal for the knobs being swept — the fastest diagnosis for a
49    /// flat tuner landscape.
50    fn score_with_components(&self, pipeline: &SphereQLPipeline) -> (f64, Vec<(String, f64, f64)>) {
51        (self.score(pipeline), Vec::new())
52    }
53}
54
55// ── Territorial health ─────────────────────────────────────────────
56
57/// Mean pairwise `territorial_factor` across every category pair.
58///
59/// High = categories own distinct regions of S² (sharp boundaries).
60/// Low = cap overlap dominates, and most cross-category signal degenerates
61/// into shared-territory noise.
62pub struct TerritorialHealth;
63
64impl QualityMetric for TerritorialHealth {
65    fn name(&self) -> &str {
66        "territorial_health"
67    }
68
69    fn score(&self, pipeline: &SphereQLPipeline) -> f64 {
70        let layer = pipeline.category_layer();
71        let n = layer.num_categories();
72        if n < 2 {
73            return 1.0;
74        }
75        let sq = &layer.spatial_quality;
76        let mut sum = 0.0;
77        let mut count = 0usize;
78        for i in 0..n {
79            for j in (i + 1)..n {
80                sum += sq.territorial_factor(i, j);
81                count += 1;
82            }
83        }
84        if count == 0 {
85            1.0
86        } else {
87            (sum / count as f64).clamp(0.0, 1.0)
88        }
89    }
90}
91
92// ── Bridge coherence ───────────────────────────────────────────────
93
94/// Neutral score returned by [`BridgeCoherence`] when bridges exist but
95/// none are classified `Genuine`. A 0.0 in that case dominates the
96/// composite (weight 0.40) and flattens the tuner landscape on lossy
97/// projections; the midpoint preserves rank ordering for downstream
98/// metrics.
99pub const BRIDGE_COHERENCE_NEUTRAL: f64 = 0.5;
100
101/// Fraction of bridges classified as [`BridgeClassification::Genuine`].
102///
103/// High = the projection surfaces meaningful cross-domain connectors.
104/// Low = most "bridges" are `OverlapArtifact` (shared cap territory) or
105/// `Weak` (below median strength in a clean-territory pair). Returns `1.0`
106/// if there are no bridges at all (nothing to be incoherent about).
107pub struct BridgeCoherence;
108
109impl QualityMetric for BridgeCoherence {
110    fn name(&self) -> &str {
111        "bridge_coherence"
112    }
113
114    fn score(&self, pipeline: &SphereQLPipeline) -> f64 {
115        let layer = pipeline.category_layer();
116        let mut genuine = 0usize;
117        let mut total = 0usize;
118        for bridges in layer.graph.bridges.values() {
119            for b in bridges {
120                total += 1;
121                if b.classification == BridgeClassification::Genuine {
122                    genuine += 1;
123                }
124            }
125        }
126        if total == 0 {
127            // No bridges at all — nothing to be incoherent about.
128            1.0
129        } else if genuine == 0 {
130            // Bridges exist but none are `Genuine` — see
131            // [`BRIDGE_COHERENCE_NEUTRAL`] for why this isn't 0.0.
132            BRIDGE_COHERENCE_NEUTRAL
133        } else {
134            genuine as f64 / total as f64
135        }
136    }
137}
138
139// ── Bridge diversity ───────────────────────────────────────────────
140
141/// Fraction of distinct category pairs connected by at least one
142/// [`BridgeClassification::Genuine`] bridge.
143///
144/// High = the projection creates genuine cross-domain connectors across
145/// many category boundaries. Low = genuine bridges are concentrated in
146/// a few pairs or absent entirely. Returns `1.0` if there are fewer
147/// than 2 categories (no pairs to connect).
148///
149/// This replaces [`BridgeCoherence`] in the default composite because
150/// `genuine / total` converges to ~0.50 under the quantile-based
151/// classification floor regardless of projection quality.
152/// `BridgeDiversity` has real variance across projection configurations
153/// and directly measures the breadth of cross-domain connectivity that
154/// globetrot's marble dynamics and reasoning traces depend on.
155pub struct BridgeDiversity;
156
157impl QualityMetric for BridgeDiversity {
158    fn name(&self) -> &str {
159        "bridge_diversity"
160    }
161
162    fn score(&self, pipeline: &SphereQLPipeline) -> f64 {
163        let layer = pipeline.category_layer();
164        let n_cats = layer.num_categories();
165        if n_cats < 2 {
166            return 1.0;
167        }
168
169        let total_pairs = n_cats * (n_cats - 1) / 2;
170
171        let mut pairs_with_genuine: HashSet<(usize, usize)> = HashSet::new();
172        for (&(src, tgt), bridges) in &layer.graph.bridges {
173            if bridges
174                .iter()
175                .any(|b| b.classification == BridgeClassification::Genuine)
176            {
177                let pair = if src < tgt { (src, tgt) } else { (tgt, src) };
178                pairs_with_genuine.insert(pair);
179            }
180        }
181
182        (pairs_with_genuine.len() as f64 / total_pairs as f64).clamp(0.0, 1.0)
183    }
184}
185
186// ── Cluster silhouette ─────────────────────────────────────────────
187
188/// Simplified silhouette score of the category assignment on S²,
189/// remapped to `[0, 1]`.
190///
191/// Uses the centroid-based approximation (Hruschka et al., 2004):
192/// `a_i` = angular distance from item `i` to its own category centroid;
193/// `b_i` = minimum angular distance to any other category's centroid.
194/// This reduces complexity from O(N²) to O(N·C) where C is the number
195/// of categories, making it feasible for corpora with 500k+ items.
196///
197/// The centroid approximation correlates > 0.95 with the full silhouette
198/// on well-separated clusters and preserves the relative ranking across
199/// pipeline configurations that the auto-tuner needs.
200///
201/// High = categories form tight, well-separated clusters on S².
202/// Low = categories blur into each other on the projected surface.
203pub struct ClusterSilhouette;
204
205impl QualityMetric for ClusterSilhouette {
206    fn name(&self) -> &str {
207        "cluster_silhouette"
208    }
209
210    fn score(&self, pipeline: &SphereQLPipeline) -> f64 {
211        let layer = pipeline.category_layer();
212        let n_cats = layer.num_categories();
213        if n_cats < 2 {
214            return 1.0;
215        }
216
217        let points = pipeline.metric_points();
218        if points.positions.len() < 2 {
219            return 1.0;
220        }
221
222        // Collect category centroids from the already-computed summaries.
223        // These are projected positions on S² — no extra projection pass needed.
224        let centroids: Vec<SphericalPoint> = layer
225            .summaries
226            .iter()
227            .map(|s| s.centroid_position)
228            .collect();
229
230        let mut silhouette_sum = 0.0;
231        let mut scored_points = 0usize;
232
233        for (sp, cat) in points.positions.iter().zip(&points.category_indices) {
234            let Some(ci) = *cat else {
235                continue;
236            };
237
238            // a(i): distance to own category centroid (simplified silhouette).
239            let a = angular_distance(sp, &centroids[ci]);
240            // Items coinciding with their centroid would contribute a
241            // guaranteed s = 1.0 regardless of separation — skip them.
242            if a < 1e-12 {
243                continue;
244            }
245
246            // b(i): minimum distance to any other category's centroid.
247            let mut b = f64::INFINITY;
248            for (j, centroid) in centroids.iter().enumerate() {
249                if j == ci {
250                    continue;
251                }
252                let d = angular_distance(sp, centroid);
253                if d < b {
254                    b = d;
255                }
256            }
257            if b.is_infinite() {
258                continue;
259            }
260
261            let s = (b - a) / a.max(b).max(f64::EPSILON);
262            silhouette_sum += s;
263            scored_points += 1;
264        }
265
266        if scored_points == 0 {
267            return 1.0;
268        }
269        let mean_s = silhouette_sum / scored_points as f64;
270        ((mean_s + 1.0) / 2.0).clamp(0.0, 1.0)
271    }
272}
273
274// ── Graph modularity ───────────────────────────────────────────────
275
276/// Corpus size at which [`GraphModularity`] switches its k-NN edge
277/// construction from the exact O(N²) scan to the RP-forest ANN index.
278/// Mirrors the brute-force/ANN crossover used by UMAP's graph builder.
279const MODULARITY_ANN_THRESHOLD: usize = 2000;
280
281/// Build the symmetric k-NN edge set over projected positions: edge
282/// `{i, j}` exists if `j ∈ top-k(i)` OR `i ∈ top-k(j)`, keyed as
283/// `(min(i, j), max(i, j))` in a `HashSet` to dedupe the union.
284///
285/// `exact` selects the all-pairs angular-distance scan; otherwise the
286/// RP-forest [`AnnIndex`] is used. The two rankings are interchangeable:
287/// `angular_distance` depends only on each point's direction (its
288/// `unit_cartesian`), and cosine similarity over those directions is
289/// strictly monotone with the angle, so descending ANN similarity
290/// reproduces the ascending-angular ordering up to ANN recall. The ANN
291/// index is seed-deterministic, preserving the [`QualityMetric`]
292/// determinism contract.
293fn knn_edges(positions: &[SphericalPoint], k: usize, exact: bool) -> HashSet<(usize, usize)> {
294    let n = positions.len();
295    let mut edges: HashSet<(usize, usize)> = HashSet::new();
296    if exact {
297        for i in 0..n {
298            let mut dists: Vec<(usize, f64)> = (0..n)
299                .filter(|&j| j != i)
300                .map(|j| (j, angular_distance(&positions[i], &positions[j])))
301                .collect();
302            dists.sort_by(|a, b| a.1.total_cmp(&b.1));
303            for &(j, _) in dists.iter().take(k) {
304                let e = if i < j { (i, j) } else { (j, i) };
305                edges.insert(e);
306            }
307        }
308    } else {
309        let coords: Vec<Vec<f64>> = positions
310            .iter()
311            .map(|p| {
312                let c = spherical_to_cartesian(p);
313                vec![c.x, c.y, c.z]
314            })
315            .collect();
316        let index = AnnIndex::build(&coords, &AnnConfig::default());
317        for i in 0..n {
318            for (j, _) in index.query_by_index(i, k) {
319                let e = if i < j { (i, j) } else { (j, i) };
320                edges.insert(e);
321            }
322        }
323    }
324    edges
325}
326
327/// Modularity of the category assignment on the k-NN graph of projected
328/// positions on S².
329///
330/// Modularity measures how well a partition (here: categories) aligns
331/// with the community structure of a graph. Formally for a graph with
332/// `m` edges, each node `i` with degree `kᵢ`, and community assignment
333/// `cᵢ`:
334///
335/// ```text
336/// Q = Σ_c [ (L_c / m) − (D_c / 2m)² ]
337/// ```
338///
339/// where `L_c` is the number of edges inside community `c` and `D_c` is
340/// the total degree of its members. Raw `Q` lies in `[-0.5, 1.0]`. We
341/// clamp to `[0, 1]` since negative modularity (anti-structured
342/// partition) is strictly worse than random assignment and shouldn't be
343/// rewarded differentially by the tuner.
344///
345/// This metric is **connectivity-native** — it evaluates "are same-category
346/// points close *in the neighbor graph*" rather than "are same-category
347/// points close in raw angular distance". That makes it an honest
348/// objective for connectivity-preserving projections like
349/// [`LaplacianEigenmapProjection`](crate::laplacian::LaplacianEigenmapProjection),
350/// which otherwise get discounted by variance-centric metrics
351/// ([`ClusterSilhouette`]) that reward PCA's spread.
352///
353/// Edge construction is exact (all-pairs) below
354/// [`MODULARITY_ANN_THRESHOLD`] items and ANN-backed above it, keeping
355/// the metric usable inside tuner loops on 100k–500k corpora.
356pub struct GraphModularity {
357    /// Number of nearest neighbors per node in the k-NN graph.
358    ///
359    /// Larger `k` smooths local structure and blurs category boundaries;
360    /// smaller `k` is more sensitive to noise but resolves tighter
361    /// communities. Default 15.
362    pub k: usize,
363}
364
365impl Default for GraphModularity {
366    fn default() -> Self {
367        Self { k: 15 }
368    }
369}
370
371impl GraphModularity {
372    pub fn new(k: usize) -> Self {
373        Self { k }
374    }
375}
376
377impl QualityMetric for GraphModularity {
378    fn name(&self) -> &str {
379        "graph_modularity"
380    }
381
382    fn score(&self, pipeline: &SphereQLPipeline) -> f64 {
383        let layer = pipeline.category_layer();
384        let n_cats = layer.num_categories();
385        if n_cats < 2 {
386            return 1.0;
387        }
388
389        let points = pipeline.metric_points();
390        let n = points.positions.len();
391        if n < 2 {
392            return 1.0;
393        }
394        let item_cats = &points.category_indices;
395
396        let k = self.k.min(n - 1).max(1);
397        let edges = knn_edges(&points.positions, k, n < MODULARITY_ANN_THRESHOLD);
398
399        let m = edges.len() as f64;
400        if m < 1.0 {
401            return 0.0;
402        }
403
404        // Degrees.
405        let mut degree = vec![0usize; n];
406        for &(a, b) in &edges {
407            degree[a] += 1;
408            degree[b] += 1;
409        }
410
411        // Per-category totals.
412        let mut intra_edges = vec![0.0f64; n_cats];
413        let mut degree_sum = vec![0.0f64; n_cats];
414        for (i, &d) in degree.iter().enumerate() {
415            if let Some(c) = item_cats[i] {
416                degree_sum[c] += d as f64;
417            }
418        }
419        for &(a, b) in &edges {
420            if let (Some(ca), Some(cb)) = (item_cats[a], item_cats[b])
421                && ca == cb
422            {
423                intra_edges[ca] += 1.0;
424            }
425        }
426
427        let two_m = 2.0 * m;
428        let q: f64 = (0..n_cats)
429            .map(|c| (intra_edges[c] / m) - (degree_sum[c] / two_m).powi(2))
430            .sum();
431
432        // Clamp raw [-0.5, 1.0] range to [0, 1] — negative modularity means
433        // the partition anti-correlates with the graph, which is strictly
434        // worse than a random partition (Q ≈ 0) for tuner purposes.
435        q.clamp(0.0, 1.0)
436    }
437}
438
439// ── Composite ──────────────────────────────────────────────────────
440
441/// Weighted linear combination of multiple [`QualityMetric`]s.
442///
443/// Weights are renormalized to sum to 1.0 at construction, so the resulting
444/// score stays in `[0, 1]` when every sub-metric also does.
445pub struct CompositeMetric {
446    label: String,
447    components: Vec<(Box<dyn QualityMetric>, f64)>,
448}
449
450impl CompositeMetric {
451    /// Build a composite from (metric, weight) pairs. Weights with
452    /// non-positive values are dropped; remaining weights are renormalized
453    /// to sum to 1.
454    pub fn new(label: impl Into<String>, components: Vec<(Box<dyn QualityMetric>, f64)>) -> Self {
455        let filtered: Vec<(Box<dyn QualityMetric>, f64)> =
456            components.into_iter().filter(|(_, w)| *w > 0.0).collect();
457        let sum: f64 = filtered.iter().map(|(_, w)| *w).sum();
458        let components: Vec<(Box<dyn QualityMetric>, f64)> = if sum > 0.0 {
459            filtered.into_iter().map(|(m, w)| (m, w / sum)).collect()
460        } else {
461            Vec::new()
462        };
463        Self {
464            label: label.into(),
465            components,
466        }
467    }
468
469    /// Default composite: 30% bridge diversity + 25% territorial health +
470    /// 25% cluster silhouette + 20% graph modularity.
471    ///
472    /// Bridge diversity replaces bridge coherence (which converges to ~0.50
473    /// under quantile-based classification). Graph modularity is included
474    /// to reward connectivity-preserving projections (UMAP, Laplacian)
475    /// alongside variance-centric ones (PCA).
476    pub fn default_composite() -> Self {
477        Self::new(
478            "default_composite",
479            vec![
480                (Box::new(BridgeDiversity) as Box<dyn QualityMetric>, 0.30),
481                (Box::new(TerritorialHealth) as Box<dyn QualityMetric>, 0.25),
482                (Box::new(ClusterSilhouette) as Box<dyn QualityMetric>, 0.25),
483                (
484                    Box::new(GraphModularity::default()) as Box<dyn QualityMetric>,
485                    0.20,
486                ),
487            ],
488        )
489    }
490
491    /// Connectivity-native composite: 40% graph modularity + 35% bridge
492    /// diversity + 25% territorial health. Designed as a counter-hypothesis
493    /// to [`Self::default_composite`] — which weights the variance-centric
494    /// [`ClusterSilhouette`] and systematically rewards PCA-style spread.
495    /// This composite instead rewards projections that preserve
496    /// same-category adjacency in the k-NN graph regardless of how much
497    /// raw angular variance the projection produces.
498    pub fn connectivity_composite() -> Self {
499        Self::new(
500            "connectivity_composite",
501            vec![
502                (
503                    Box::new(GraphModularity::default()) as Box<dyn QualityMetric>,
504                    0.40,
505                ),
506                (Box::new(BridgeDiversity) as Box<dyn QualityMetric>, 0.35),
507                (Box::new(TerritorialHealth) as Box<dyn QualityMetric>, 0.25),
508            ],
509        )
510    }
511
512    /// Score each component separately. Useful for diagnostic reports where
513    /// the user wants to see which sub-metric dominates the composite.
514    pub fn score_components(&self, pipeline: &SphereQLPipeline) -> Vec<(String, f64, f64)> {
515        self.components
516            .iter()
517            .map(|(m, w)| (m.name().to_string(), *w, m.score(pipeline)))
518            .collect()
519    }
520}
521
522impl QualityMetric for CompositeMetric {
523    fn name(&self) -> &str {
524        &self.label
525    }
526
527    fn score(&self, pipeline: &SphereQLPipeline) -> f64 {
528        if self.components.is_empty() {
529            return 0.0;
530        }
531        let total: f64 = self
532            .components
533            .iter()
534            .map(|(m, w)| w * m.score(pipeline))
535            .sum();
536        total.clamp(0.0, 1.0)
537    }
538
539    fn score_with_components(&self, pipeline: &SphereQLPipeline) -> (f64, Vec<(String, f64, f64)>) {
540        if self.components.is_empty() {
541            return (0.0, Vec::new());
542        }
543        // Each component is evaluated exactly once; the composite total
544        // is recomposed from the breakdown so the two always agree.
545        let breakdown = self.score_components(pipeline);
546        let total: f64 = breakdown.iter().map(|(_, w, s)| w * s).sum();
547        (total.clamp(0.0, 1.0), breakdown)
548    }
549}
550
551// ── Tests ──────────────────────────────────────────────────────────
552
553#[cfg(test)]
554mod tests {
555    use super::*;
556    use crate::pipeline::{PipelineInput, SphereQLPipeline};
557
558    fn make_pipeline() -> SphereQLPipeline {
559        let n = 30;
560        let dim = 10;
561        let mut embeddings = Vec::new();
562        let mut categories = Vec::new();
563        for i in 0..n {
564            let mut v = vec![0.0; dim];
565            if i < n / 2 {
566                v[0] = 1.0 + (i as f64 * 0.01);
567                v[1] = 0.1;
568                categories.push("alpha".to_string());
569            } else {
570                v[0] = 0.1;
571                v[1] = 1.0 + (i as f64 * 0.01);
572                categories.push("beta".to_string());
573            }
574            v[2] = 0.05 * i as f64;
575            embeddings.push(v);
576        }
577        SphereQLPipeline::new(PipelineInput {
578            categories,
579            embeddings,
580        })
581        .unwrap()
582    }
583
584    #[test]
585    fn territorial_health_in_range() {
586        let p = make_pipeline();
587        let s = TerritorialHealth.score(&p);
588        assert!((0.0..=1.0).contains(&s), "got {s}");
589    }
590
591    #[test]
592    fn bridge_coherence_in_range() {
593        let p = make_pipeline();
594        let s = BridgeCoherence.score(&p);
595        assert!((0.0..=1.0).contains(&s), "got {s}");
596    }
597
598    #[test]
599    fn bridge_coherence_returns_neutral_when_no_genuine() {
600        // BridgeCoherence returns 0.5 (not 0.0) when bridges exist but
601        // none are Genuine. Force that state by setting
602        // `min_evr_for_classification` above any achievable EVR — every
603        // bridge falls back to Weak and we exercise the neutral arm.
604        use crate::config::PipelineConfig;
605        let n = 30;
606        let dim = 10;
607        let mut embeddings = Vec::new();
608        let mut categories = Vec::new();
609        for i in 0..n {
610            let mut v = vec![0.0; dim];
611            if i < n / 2 {
612                v[0] = 1.0 + (i as f64 * 0.01);
613                v[1] = 0.1;
614                categories.push("alpha".to_string());
615            } else {
616                v[0] = 0.1;
617                v[1] = 1.0 + (i as f64 * 0.01);
618                categories.push("beta".to_string());
619            }
620            v[2] = 0.05 * i as f64;
621            embeddings.push(v);
622        }
623        let mut config = PipelineConfig::default();
624        config.bridges.min_evr_for_classification = 1.5;
625        let p = SphereQLPipeline::new_with_config(
626            PipelineInput {
627                categories,
628                embeddings,
629            },
630            config,
631        )
632        .expect("pipeline build failed");
633
634        let has_bridges = p
635            .category_layer()
636            .graph
637            .bridges
638            .values()
639            .any(|v| !v.is_empty());
640
641        let score = BridgeCoherence.score(&p);
642        if has_bridges {
643            assert!(
644                (score - BRIDGE_COHERENCE_NEUTRAL).abs() < 1e-12,
645                "expected neutral {BRIDGE_COHERENCE_NEUTRAL} when bridges exist but none Genuine, got {score}"
646            );
647        } else {
648            // No bridges at all → the original "nothing to be
649            // incoherent about" path returns 1.0.
650            assert!((score - 1.0).abs() < 1e-12);
651        }
652    }
653
654    #[test]
655    fn cluster_silhouette_in_range() {
656        let p = make_pipeline();
657        let s = ClusterSilhouette.score(&p);
658        assert!((0.0..=1.0).contains(&s), "got {s}");
659    }
660
661    #[test]
662    fn composite_in_range() {
663        let p = make_pipeline();
664        let m = CompositeMetric::default_composite();
665        let s = m.score(&p);
666        assert!((0.0..=1.0).contains(&s));
667    }
668
669    #[test]
670    fn composite_with_zero_weight_drops_component() {
671        let m = CompositeMetric::new(
672            "test",
673            vec![
674                (Box::new(TerritorialHealth) as Box<dyn QualityMetric>, 1.0),
675                (Box::new(BridgeCoherence) as Box<dyn QualityMetric>, 0.0),
676            ],
677        );
678        let p = make_pipeline();
679        let combined = m.score(&p);
680        let solo = TerritorialHealth.score(&p);
681        // With BridgeCoherence dropped, composite should equal solo.
682        assert!((combined - solo).abs() < 1e-12);
683    }
684
685    #[test]
686    fn composite_renormalizes_weights() {
687        // Absolute weights 2.0 and 3.0 should normalize to 0.4 and 0.6.
688        let m = CompositeMetric::new(
689            "test",
690            vec![
691                (Box::new(TerritorialHealth) as Box<dyn QualityMetric>, 2.0),
692                (Box::new(BridgeCoherence) as Box<dyn QualityMetric>, 3.0),
693            ],
694        );
695        let p = make_pipeline();
696        let combined = m.score(&p);
697        let expected = 0.4 * TerritorialHealth.score(&p) + 0.6 * BridgeCoherence.score(&p);
698        assert!((combined - expected).abs() < 1e-12);
699    }
700
701    #[test]
702    fn composite_component_breakdown_sums_to_score() {
703        let p = make_pipeline();
704        let m = CompositeMetric::default_composite();
705        let breakdown = m.score_components(&p);
706        let weighted: f64 = breakdown.iter().map(|(_, w, s)| w * s).sum();
707        let total = m.score(&p);
708        assert!((weighted - total).abs() < 1e-12);
709    }
710
711    #[test]
712    fn score_with_components_matches_score() {
713        let p = make_pipeline();
714        let m = CompositeMetric::default_composite();
715        let (total, components) = m.score_with_components(&p);
716        assert!((total - m.score(&p)).abs() < 1e-12);
717        assert_eq!(components.len(), 4);
718        let recomposed: f64 = components.iter().map(|(_, w, s)| w * s).sum();
719        assert!((total - recomposed).abs() < 1e-12);
720    }
721
722    #[test]
723    fn score_with_components_default_is_empty_for_leaf_metrics() {
724        let p = make_pipeline();
725        let (total, components) = TerritorialHealth.score_with_components(&p);
726        assert!((total - TerritorialHealth.score(&p)).abs() < 1e-12);
727        assert!(components.is_empty());
728    }
729
730    #[test]
731    fn metric_names_stable() {
732        assert_eq!(TerritorialHealth.name(), "territorial_health");
733        assert_eq!(BridgeCoherence.name(), "bridge_coherence");
734        assert_eq!(BridgeDiversity.name(), "bridge_diversity");
735        assert_eq!(ClusterSilhouette.name(), "cluster_silhouette");
736        assert_eq!(GraphModularity::default().name(), "graph_modularity");
737        assert_eq!(
738            CompositeMetric::default_composite().name(),
739            "default_composite"
740        );
741        assert_eq!(
742            CompositeMetric::connectivity_composite().name(),
743            "connectivity_composite"
744        );
745    }
746
747    #[test]
748    fn bridge_diversity_in_range() {
749        let p = make_pipeline();
750        let s = BridgeDiversity.score(&p);
751        assert!((0.0..=1.0).contains(&s), "got {s}");
752    }
753
754    #[test]
755    fn bridge_diversity_is_deterministic() {
756        let p = make_pipeline();
757        let a = BridgeDiversity.score(&p);
758        let b = BridgeDiversity.score(&p);
759        assert_eq!(a, b);
760    }
761
762    #[test]
763    fn default_composite_includes_bridge_diversity_not_coherence() {
764        let p = make_pipeline();
765        let m = CompositeMetric::default_composite();
766        let breakdown = m.score_components(&p);
767        let names: Vec<&str> = breakdown.iter().map(|(n, _, _)| n.as_str()).collect();
768        assert!(
769            names.contains(&"bridge_diversity"),
770            "default composite should include bridge_diversity, got {names:?}"
771        );
772        assert!(
773            !names.contains(&"bridge_coherence"),
774            "default composite should NOT include bridge_coherence, got {names:?}"
775        );
776        assert!(
777            names.contains(&"graph_modularity"),
778            "default composite should include graph_modularity, got {names:?}"
779        );
780    }
781
782    #[test]
783    fn default_composite_has_four_components() {
784        let p = make_pipeline();
785        let m = CompositeMetric::default_composite();
786        let breakdown = m.score_components(&p);
787        assert_eq!(
788            breakdown.len(),
789            4,
790            "default composite should have 4 components"
791        );
792    }
793
794    #[test]
795    fn silhouette_is_deterministic() {
796        let p = make_pipeline();
797        let a = ClusterSilhouette.score(&p);
798        let b = ClusterSilhouette.score(&p);
799        assert_eq!(a, b);
800    }
801
802    #[test]
803    fn cluster_silhouette_separates_well_structured_from_random() {
804        // make_pipeline() builds two disjoint clusters assigned to
805        // distinct categories. The simplified silhouette should produce
806        // a score well above the random-partition baseline of 0.5,
807        // validating that the centroid approximation still captures
808        // cluster quality.
809        let p = make_pipeline();
810        let s = ClusterSilhouette.score(&p);
811        assert!(
812            s > 0.55,
813            "well-separated clusters should score above random baseline, got {s}"
814        );
815    }
816
817    #[test]
818    fn graph_modularity_in_range() {
819        let p = make_pipeline();
820        let s = GraphModularity::default().score(&p);
821        assert!((0.0..=1.0).contains(&s), "got {s}");
822    }
823
824    #[test]
825    fn graph_modularity_is_deterministic() {
826        let p = make_pipeline();
827        let m = GraphModularity::default();
828        let a = m.score(&p);
829        let b = m.score(&p);
830        assert_eq!(a, b);
831    }
832
833    #[test]
834    fn graph_modularity_detects_real_structure() {
835        // With two well-separated clusters assigned to distinct categories,
836        // modularity should be materially positive. This is the property
837        // that makes the metric useful: a partition aligned with graph
838        // adjacency scores higher than random.
839        let p = make_pipeline();
840        let s = GraphModularity::default().score(&p);
841        // The test corpus is deliberately well-structured (two disjoint
842        // clusters, distinct categories). Modularity should comfortably
843        // exceed the "random partition" baseline near 0.
844        assert!(
845            s > 0.1,
846            "well-separated category assignment should have Q > 0.1, got {s}"
847        );
848    }
849
850    #[test]
851    fn graph_modularity_k_parameter_respected() {
852        let p = make_pipeline();
853        let s_small = GraphModularity::new(3).score(&p);
854        let s_large = GraphModularity::new(15).score(&p);
855        // Both should be valid scores; the exact relationship between k
856        // and the score is corpus-dependent, so we only check validity.
857        assert!((0.0..=1.0).contains(&s_small));
858        assert!((0.0..=1.0).contains(&s_large));
859    }
860
861    #[test]
862    fn knn_edges_ann_path_respects_cluster_structure() {
863        // Two tight, well-separated caps on S². Both the exact and the
864        // ANN edge builders must keep every edge inside its own cap —
865        // the ANN path is otherwise only reachable at >= 2000 items, so
866        // this drives it directly through the helper.
867        let mut positions = Vec::new();
868        for i in 0..60 {
869            let t = i as f64;
870            positions.push(SphericalPoint::new_unchecked(
871                1.0,
872                0.3 + t * 0.001,
873                1.2 + t * 0.0005,
874            ));
875        }
876        for i in 0..60 {
877            let t = i as f64;
878            positions.push(SphericalPoint::new_unchecked(
879                1.0,
880                3.5 + t * 0.001,
881                1.8 + t * 0.0005,
882            ));
883        }
884
885        for exact in [true, false] {
886            let edges = knn_edges(&positions, 5, exact);
887            assert!(!edges.is_empty(), "edge set empty (exact={exact})");
888            for &(a, b) in &edges {
889                assert_eq!(
890                    a < 60,
891                    b < 60,
892                    "edge ({a}, {b}) crosses caps (exact={exact})"
893                );
894            }
895        }
896    }
897
898    #[test]
899    fn connectivity_composite_weights_modularity_most() {
900        // The connectivity composite should score higher than the default
901        // composite when graph modularity is high — sanity check that the
902        // weighted combination behaves as advertised.
903        let p = make_pipeline();
904        let cc = CompositeMetric::connectivity_composite();
905        let breakdown = cc.score_components(&p);
906        // graph_modularity should be the heaviest-weighted component.
907        let heaviest = breakdown
908            .iter()
909            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
910            .unwrap();
911        assert_eq!(heaviest.0, "graph_modularity");
912        assert!((heaviest.1 - 0.40).abs() < 1e-12);
913    }
914}