gam_solve/topology_selector.rs
1//! Auto-selection helpers for latent-coordinate topology candidates.
2//!
3//! This module deliberately returns a single selected topology rather than a
4//! stacked predictive-distribution mixture. The selector is an evidence
5//! comparator: its inputs are one scalar REML/LAML evidence summary per fitted
6//! topology plus null-space normalizers. It does not receive a per-observation
7//! held-out log predictive density table, nor does the saved-model prediction
8//! path retain the alternative candidate fits required to evaluate a mixture at
9//! new rows. A pseudo-BMA softmax over the scalar TK scores would therefore be
10//! model-probability averaging of incomparable topology objects, not stacking of
11//! predictive distributions. Until a real per-point LOO/ALO density consumer is
12//! introduced alongside retained candidate predictors, the principled live path
13//! is winner-take-all with deterministic score ordering.
14//!
15//! ## Selection-time stacking is now wired (WP-C / Object 3a)
16//!
17//! The winner-take-all blocker above is specifically about the SAVED-MODEL
18//! PREDICTION path: that path does not retain alternative candidate predictors,
19//! so it cannot evaluate a mixture (or any losing candidate) at new rows. That
20//! blocker still stands for out-of-sample prediction.
21//!
22//! It does NOT, however, block stacking *at selection time*. During the race
23//! the candidate fits all exist, so we can build a per-observation held-out
24//! predictive log-density table by cross-validation folds within the race and
25//! feed it to [`crate::evidence::solve_stacking_weights`]. This module
26//! does exactly that when a race mixes model classes (smooth manifold vs the
27//! discrete-mixture rung): the HEADLINE ranking statistic switches to held-out
28//! predictive log-density / stacking weights, with the rank-aware Laplace
29//! evidence retained as corroboration. Same-class races keep today's
30//! winner-take-all evidence behavior. The class mix is auto-detected from the
31//! candidate kinds — there is no flag.
32//!
33//! What is still future work: persisting a mixture predictor for OOS prediction
34//! on new rows. The selection-time stacking table is computed from fits that
35//! exist only during the race; it is not retained for the saved-model
36//! prediction path. That OOS-retention package is out of scope here.
37
38use crate::row_sampling_measure::CoresetCertificate;
39use crate::evidence::{
40 GaussianMixtureConfig, StackingConfig, StackingWeights, TopologyScoreScale,
41 UNION_STRUCTURE_LADDER, UnionStructure, UnionStructureFit, fit_gaussian_mixture,
42 fit_union_ladder, fit_union_structure, solve_stacking_weights, union_per_point_log_density,
43};
44use crate::priority_selection::{PriorityCandidate, rank_priority_candidates};
45use ndarray::{Array2, ArrayView2};
46use serde_json::Value as JsonValue;
47use std::sync::Mutex;
48use std::time::{Duration, Instant};
49
50const TK_LOG_2PI: f64 = 1.8378770664093453_f64;
51
52/// Fixed component ladder swept for the discrete-mixture rung. Deterministic;
53/// each `k` is priced by its own free-parameter count via the rank-aware
54/// Laplace evidence and ranked against the others in-class before the winning
55/// mixture order competes cross-class.
56pub const MIXTURE_K_LADDER: &[usize] = &[1, 2, 3, 5, 7, 9];
57
58/// Number of cross-validation folds used to build the selection-time held-out
59/// predictive log-density table for cross-class stacking. Fixed (no flag).
60pub const STACKING_CV_FOLDS: usize = 5;
61
62/// Default seed mixed into the deterministic CV fold assignment for cross-class
63/// stacking. Matches the pyo3 `seed = 11` default of `adjudicate_atom_shape`, so
64/// the FFI default and the in-tree default produce the identical (deterministic)
65/// folding. Different seeds yield different — but still deterministic — foldings;
66/// the same seed always reproduces the same folding (#1386).
67pub const STACKING_CV_SEED: u64 = 11;
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
70pub enum AutoTopologyKind {
71 Euclidean,
72 Circle,
73 Sphere,
74 Torus,
75 Cylinder,
76 /// Constant-curvature space form `M_κ` with the sectional curvature κ
77 /// ESTIMATED (#944 stage 4). This single candidate replaces the family of
78 /// fixed simply-connected constant-curvature geometries — `Euclidean`
79 /// (κ = 0), `Sphere` (κ > 0), and the hyperbolic disk (κ < 0) — which are
80 /// all the same `S^d ← ℝ^d → H^d` manifold at different κ. Rather than
81 /// racing those as separate discrete candidates, the curvature is fitted as
82 /// a continuous estimand (`curv(...)`), so the topology stack only has to
83 /// adjudicate genuinely non-homotopic candidates (`Circle`/`Torus`/
84 /// `Cylinder`/discrete rungs). "Within a candidate, curvature is estimated."
85 ConstantCurvature,
86 /// Discrete `k`-component Gaussian-mixture rung (Object 3a / WP-C). Not a
87 /// smooth manifold: a finite-parameter clustered density. Its presence in a
88 /// race triggers cross-class stacking adjudication.
89 Mixture {
90 k: usize,
91 },
92 /// Structured-union composite (#907): a small FIXED set of component
93 /// structures joined by a hard responsibility split (e.g. circle+circle,
94 /// circle+cluster, line+cluster). Like the mixture rung it is a discrete,
95 /// non-smooth density class — its evidence is the SUMMED rank-aware Laplace
96 /// evidence of its components, priced by total parameter count. Its presence
97 /// in a race triggers cross-class stacking adjudication.
98 Union {
99 structure: UnionStructure,
100 },
101}
102
103impl AutoTopologyKind {
104 /// Stable display name. The mixture variant carries its order `k`, so its
105 /// label is rendered with [`AutoTopologyKind::display_name`]; the borrowed
106 /// `as_str` returns the class tag for the smooth variants and the bare
107 /// `"mixture"` tag for the discrete rung.
108 pub const fn as_str(self) -> &'static str {
109 match self {
110 AutoTopologyKind::Euclidean => "euclidean",
111 AutoTopologyKind::Circle => "circle",
112 AutoTopologyKind::Sphere => "sphere",
113 AutoTopologyKind::Torus => "torus",
114 AutoTopologyKind::Cylinder => "cylinder",
115 AutoTopologyKind::ConstantCurvature => "constant_curvature",
116 AutoTopologyKind::Mixture { .. } => "mixture",
117 AutoTopologyKind::Union { structure } => structure.as_str(),
118 }
119 }
120
121 /// Owned display name including the mixture order, e.g. `"mixture_k7"`, or
122 /// the union composite tag, e.g. `"union_circle+circle"`.
123 pub fn display_name(self) -> String {
124 match self {
125 AutoTopologyKind::Mixture { k } => format!("mixture_k{k}"),
126 other => other.as_str().to_string(),
127 }
128 }
129
130 /// `true` iff this candidate is the discrete-mixture model class (as
131 /// opposed to a smooth manifold / Euclidean latent topology).
132 pub const fn is_discrete_mixture(self) -> bool {
133 matches!(self, AutoTopologyKind::Mixture { .. })
134 }
135
136 /// `true` iff this candidate is the structured-union composite class (#907).
137 pub const fn is_structured_union(self) -> bool {
138 matches!(self, AutoTopologyKind::Union { .. })
139 }
140
141 /// `true` iff this candidate is a discrete (non-smooth) density class — the
142 /// mixture rung or a structured union. Cross-class stacking adjudication is
143 /// triggered when a race mixes a smooth/Euclidean candidate with any
144 /// discrete one.
145 pub const fn is_discrete_class(self) -> bool {
146 self.is_discrete_mixture() || self.is_structured_union()
147 }
148
149 pub fn parse(value: &str) -> Result<Self, String> {
150 let normalized = value.trim().to_ascii_lowercase().replace('-', "_");
151 if let Some(structure) = parse_union_name(&normalized) {
152 return Ok(AutoTopologyKind::Union { structure });
153 }
154 if let Some(rest) = normalized.strip_prefix("mixture") {
155 // Accept "mixture", "mixture_k7", "mixture7", "mixture_7".
156 let digits: String = rest.chars().filter(|c| c.is_ascii_digit()).collect();
157 if digits.is_empty() {
158 // Bare "mixture" expands to the full ladder; callers that want a
159 // single order pass "mixture_k{n}". Here we default to the
160 // richest ladder order so a singleton request is still valid.
161 return Ok(AutoTopologyKind::Mixture {
162 k: *MIXTURE_K_LADDER.last().unwrap_or(&7),
163 });
164 }
165 let k: usize = digits
166 .parse()
167 .map_err(|_| format!("mixture order must be a positive integer; got {value:?}"))?;
168 if k == 0 {
169 return Err("mixture order k must be >= 1".to_string());
170 }
171 return Ok(AutoTopologyKind::Mixture { k });
172 }
173 match normalized.as_str() {
174 "euclidean" | "flat" | "euclidean_patch" | "euclideanpatch" => {
175 Ok(AutoTopologyKind::Euclidean)
176 }
177 "circle" | "periodic" | "s1" => Ok(AutoTopologyKind::Circle),
178 "sphere" | "s2" => Ok(AutoTopologyKind::Sphere),
179 "torus" => Ok(AutoTopologyKind::Torus),
180 "cylinder" => Ok(AutoTopologyKind::Cylinder),
181 "constant_curvature" | "curv" | "curvature" | "mkappa" | "m_kappa" => {
182 Ok(AutoTopologyKind::ConstantCurvature)
183 }
184 other => Err(format!(
185 "topology candidate must be euclidean, circle, sphere, torus, cylinder, constant_curvature, mixture[_k{{n}}], or a union (union_circle+circle, union_circle+cluster, union_line+cluster); got {other:?}"
186 )),
187 }
188 }
189
190 pub fn all() -> Vec<Self> {
191 vec![
192 AutoTopologyKind::Euclidean,
193 AutoTopologyKind::Circle,
194 AutoTopologyKind::Sphere,
195 AutoTopologyKind::Torus,
196 AutoTopologyKind::Cylinder,
197 ]
198 }
199
200 /// `true` iff this candidate is a FIXED member of the simply-connected
201 /// constant-curvature space-form family `M_κ` — the geometries that differ
202 /// only by their (fixed) sectional curvature κ and are therefore the *same*
203 /// continuous `S^d ← ℝ^d → H^d` manifold at different κ:
204 /// * [`Euclidean`](AutoTopologyKind::Euclidean) — κ = 0,
205 /// * [`Sphere`](AutoTopologyKind::Sphere) — κ > 0.
206 /// (`Circle`/`Torus`/`Cylinder` are NOT in this family: they are not
207 /// simply connected — distinct topology, not distinct curvature — so they
208 /// must keep racing as separate candidates.) The fitted-κ
209 /// [`ConstantCurvature`](AutoTopologyKind::ConstantCurvature) candidate is
210 /// itself the fusion target, not a member to be fused.
211 pub const fn is_fixed_constant_curvature_form(self) -> bool {
212 matches!(self, AutoTopologyKind::Euclidean | AutoTopologyKind::Sphere)
213 }
214
215 /// #944 stage 4 — "within a candidate, curvature is estimated." Collapse the
216 /// fixed constant-curvature space forms in a candidate set into ONE
217 /// [`ConstantCurvature`](AutoTopologyKind::ConstantCurvature) candidate whose
218 /// κ is fitted, so the discrete topology stack only adjudicates genuinely
219 /// non-homotopic candidates. Magic by default: the fusion fires exactly when
220 /// the set contains **≥ 2** fixed constant-curvature forms (e.g. both
221 /// `Euclidean` and `Sphere`) — a single such form is left untouched (there
222 /// is nothing to estimate κ *across*), and a set already carrying an
223 /// explicit `ConstantCurvature` candidate simply has its redundant fixed
224 /// forms removed.
225 ///
226 /// Order is preserved and otherwise stable: the fused `ConstantCurvature`
227 /// candidate takes the position of the first fixed form it replaces; all
228 /// non-family candidates (`Circle`/`Torus`/`Cylinder`/`Mixture`/`Union`) and
229 /// any duplicates keep their relative order. Idempotent.
230 pub fn fuse_constant_curvature_family(candidates: &[Self]) -> Vec<Self> {
231 let already_has_cc = candidates
232 .iter()
233 .any(|c| matches!(c, AutoTopologyKind::ConstantCurvature));
234 let fixed_form_count = candidates
235 .iter()
236 .filter(|c| c.is_fixed_constant_curvature_form())
237 .count();
238 // Fuse when ≥2 fixed forms are present, OR when an explicit
239 // ConstantCurvature candidate already subsumes ≥1 redundant fixed form.
240 let should_fuse = fixed_form_count >= 2 || (already_has_cc && fixed_form_count >= 1);
241 if !should_fuse {
242 return candidates.to_vec();
243 }
244 let mut out = Vec::with_capacity(candidates.len());
245 let mut emitted_cc = false;
246 for &c in candidates {
247 if c.is_fixed_constant_curvature_form() {
248 // Replace the first fixed form by the fitted-κ candidate (unless
249 // an explicit one already exists); drop the rest.
250 if !already_has_cc && !emitted_cc {
251 out.push(AutoTopologyKind::ConstantCurvature);
252 emitted_cc = true;
253 }
254 continue;
255 }
256 if matches!(c, AutoTopologyKind::ConstantCurvature) {
257 if emitted_cc {
258 continue; // collapse duplicate CC candidates
259 }
260 emitted_cc = true;
261 }
262 out.push(c);
263 }
264 out
265 }
266
267 /// The full discrete-mixture rung: one candidate per `k` in
268 /// [`MIXTURE_K_LADDER`].
269 pub fn mixture_ladder() -> Vec<Self> {
270 MIXTURE_K_LADDER
271 .iter()
272 .map(|&k| AutoTopologyKind::Mixture { k })
273 .collect()
274 }
275
276 /// The full structured-union rung: one candidate per composite in
277 /// [`UNION_STRUCTURE_LADDER`] (#907). Fixed and closed — no open-ended
278 /// structure search (that stays owned by #976's move set).
279 pub fn union_ladder() -> Vec<Self> {
280 UNION_STRUCTURE_LADDER
281 .iter()
282 .map(|&structure| AutoTopologyKind::Union { structure })
283 .collect()
284 }
285}
286
287/// Parse a structured-union composite name (#907). Accepts the canonical display
288/// tags (`union_circle+circle`, `union_circle+cluster`, `union_line+cluster`)
289/// and tolerant `_`/`-` separators in place of `+`. Returns `None` for any name
290/// that is not a union so [`AutoTopologyKind::parse`] can fall through to the
291/// smooth/mixture variants. Input is assumed already lowercased with `-`→`_`.
292pub fn parse_union_name(normalized: &str) -> Option<UnionStructure> {
293 let Some(rest) = normalized.strip_prefix("union") else {
294 return None;
295 };
296 // Canonicalize component separators: both `+` and the tolerant `_`/`-`
297 // forms collapse to `+`, and any leading separator after the `union`
298 // prefix is dropped (so `union_circle+circle` and `union__circle_circle`
299 // both normalize to `circle+circle`).
300 let body: String = rest
301 .chars()
302 .map(|c| if c == '_' || c == '-' { '+' } else { c })
303 .collect();
304 let body = body.trim_matches('+');
305 match body {
306 "circle+circle" => Some(UnionStructure::CircleCircle),
307 "circle+cluster" | "circle+point+cluster" | "circle+pointcluster" => {
308 Some(UnionStructure::CirclePointCluster)
309 }
310 "line+cluster" | "line+point+cluster" | "line+pointcluster" => {
311 Some(UnionStructure::LineCluster)
312 }
313 _ => None,
314 }
315}
316
317#[derive(Debug, Clone)]
318pub struct TopologyAutoSelector {
319 pub candidates: Vec<AutoTopologyKind>,
320 pub score_scale: TopologyScoreScale,
321 pub latent: Option<String>,
322}
323
324impl TopologyAutoSelector {
325 pub fn new(candidates: Option<Vec<AutoTopologyKind>>) -> Self {
326 Self {
327 candidates: candidates.unwrap_or_else(AutoTopologyKind::all),
328 score_scale: TopologyScoreScale::PerEffectiveDim,
329 latent: None,
330 }
331 }
332
333 pub fn from_json(value: &JsonValue) -> Result<Self, String> {
334 let obj = value
335 .as_object()
336 .ok_or_else(|| "topology_auto_selector must be an object".to_string())?;
337 let candidates = match obj.get("candidates").filter(|value| !value.is_null()) {
338 None => AutoTopologyKind::all(),
339 Some(raw) => {
340 let items = raw.as_array().ok_or_else(|| {
341 "topology_auto_selector.candidates must be a list".to_string()
342 })?;
343 if items.is_empty() {
344 return Err(
345 "topology_auto_selector.candidates must have at least one entry"
346 .to_string(),
347 );
348 }
349 let mut out = Vec::with_capacity(items.len());
350 for (idx, item) in items.iter().enumerate() {
351 let name = item.as_str().ok_or_else(|| {
352 format!("topology_auto_selector.candidates[{idx}] must be a string")
353 })?;
354 let kind = AutoTopologyKind::parse(name)?;
355 if out.contains(&kind) {
356 return Err(format!(
357 "topology_auto_selector duplicate candidate {:?}",
358 kind.as_str()
359 ));
360 }
361 out.push(kind);
362 }
363 out
364 }
365 };
366 let score_scale = match obj
367 .get("score_scale")
368 .and_then(JsonValue::as_str)
369 .unwrap_or("per_effective_dim")
370 .trim()
371 .to_ascii_lowercase()
372 .replace('-', "_")
373 .as_str()
374 {
375 "per_observation" => TopologyScoreScale::PerObservation,
376 "per_effective_dim" => TopologyScoreScale::PerEffectiveDim,
377 other => {
378 return Err(format!(
379 "topology_auto_selector.score_scale must be per_effective_dim or per_observation; got {other:?}"
380 ));
381 }
382 };
383 let latent = obj
384 .get("latent")
385 .filter(|value| !value.is_null())
386 .map(|value| {
387 value
388 .as_str()
389 .map(str::to_string)
390 .ok_or_else(|| "topology_auto_selector.latent must be a string".to_string())
391 })
392 .transpose()?;
393 Ok(Self {
394 candidates,
395 score_scale,
396 latent,
397 })
398 }
399}
400
401#[derive(Debug, Clone)]
402pub struct TopologyAutoFitEvidence<FitHandle> {
403 pub topology_name: String,
404 pub raw_reml: f64,
405 pub null_dim: f64,
406 pub null_space_logdet: Option<f64>,
407 pub effective_dim: f64,
408 pub n_obs: usize,
409 pub fit_handle: FitHandle,
410}
411
412#[derive(Debug, Clone)]
413pub struct TopologyAutoRankedFit<FitHandle> {
414 pub topology_name: String,
415 pub tk_score: f64,
416 pub raw_reml: f64,
417 pub effective_dim: f64,
418 pub n_obs: usize,
419 pub fit_handle: FitHandle,
420}
421
422#[derive(Debug, Clone)]
423pub struct TopologyAutoSelectorResult<FitHandle> {
424 pub ranked: Vec<TopologyAutoRankedFit<FitHandle>>,
425 pub winner_index: usize,
426}
427
428impl<FitHandle> TopologyAutoSelectorResult<FitHandle> {
429 pub fn winner(&self) -> Option<&TopologyAutoRankedFit<FitHandle>> {
430 self.ranked.get(self.winner_index)
431 }
432}
433
434/// Result for one candidate executed by [`run_topology_race_parallel`].
435#[derive(Debug, Clone)]
436pub struct TopologyRaceParallelCandidate<FitResult> {
437 /// Original position in the input candidate vector.
438 pub candidate_index: usize,
439 /// The number of Rayon workers made available to this candidate's fit body.
440 pub per_fit_threads: usize,
441 /// Wall-clock time spent inside the candidate's local Rayon pool.
442 pub wall_time: Duration,
443 /// The fit closure's output. Use `FitResult = Result<T, E>` when individual
444 /// candidate failures should be collected rather than short-circuiting.
445 pub result: FitResult,
446}
447
448#[derive(Debug, Clone, Copy, PartialEq, Eq)]
449struct TopologyRaceThreadPlan {
450 coordinator_threads: usize,
451 per_fit_threads: usize,
452 concurrent_fits: usize,
453}
454
455impl TopologyRaceThreadPlan {
456 fn for_budget(candidate_count: usize, max_total_threads: usize) -> Self {
457 let max_total_threads = max_total_threads.max(1);
458 if candidate_count <= 1 {
459 return Self {
460 coordinator_threads: 0,
461 per_fit_threads: max_total_threads,
462 concurrent_fits: candidate_count,
463 };
464 }
465
466 let concurrent_fits = if max_total_threads >= 4 {
467 candidate_count.min(max_total_threads / 2).max(1)
468 } else {
469 1
470 };
471 let coordinator_threads = concurrent_fits;
472 let remaining = max_total_threads.saturating_sub(coordinator_threads);
473 let per_fit_threads = if remaining == 0 {
474 1
475 } else {
476 (remaining / concurrent_fits).max(1)
477 };
478 Self {
479 coordinator_threads,
480 per_fit_threads,
481 concurrent_fits,
482 }
483 }
484}
485
486/// Run independent topology-race candidates concurrently with bounded nested
487/// Rayon use.
488///
489/// Each candidate is executed inside its own local Rayon pool, so fit internals
490/// that call `par_iter`, `rayon::join`, or faer-through-Rayon consume the
491/// candidate's `per_fit_threads` budget rather than the global pool. For
492/// multi-candidate races the runner batches candidates through a Rayon scope and
493/// chooses `concurrent_fits`/`per_fit_threads` so the coordinator workers plus
494/// per-fit workers do not exceed `std::thread::available_parallelism()` on hosts
495/// with at least two cores. Single-core hosts run candidates sequentially.
496///
497/// The return vector is in input order and keeps each closure output intact; use
498/// `FitResult = Result<T, E>` to collect per-candidate failures with wall times.
499pub fn run_topology_race_parallel<Candidate, FitResult, FitOne>(
500 candidates: Vec<Candidate>,
501 fit_one: FitOne,
502) -> Result<Vec<TopologyRaceParallelCandidate<FitResult>>, String>
503where
504 Candidate: Send,
505 FitResult: Send,
506 FitOne: Fn(Candidate) -> FitResult + Sync,
507{
508 let max_total_threads = std::thread::available_parallelism()
509 .map(std::num::NonZeroUsize::get)
510 .unwrap_or(1);
511 run_topology_race_parallel_with_budget(candidates, fit_one, max_total_threads)
512}
513
514fn run_topology_race_parallel_with_budget<Candidate, FitResult, FitOne>(
515 candidates: Vec<Candidate>,
516 fit_one: FitOne,
517 max_total_threads: usize,
518) -> Result<Vec<TopologyRaceParallelCandidate<FitResult>>, String>
519where
520 Candidate: Send,
521 FitResult: Send,
522 FitOne: Fn(Candidate) -> FitResult + Sync,
523{
524 let candidate_count = candidates.len();
525 if candidate_count == 0 {
526 return Ok(Vec::new());
527 }
528
529 let plan = TopologyRaceThreadPlan::for_budget(candidate_count, max_total_threads);
530 let mut candidates: Vec<Option<Candidate>> = candidates.into_iter().map(Some).collect();
531 let slots: Vec<Mutex<Option<TopologyRaceParallelCandidate<FitResult>>>> =
532 (0..candidate_count).map(|_| Mutex::new(None)).collect();
533 let pool_error: Mutex<Option<String>> = Mutex::new(None);
534
535 if plan.concurrent_fits <= 1 {
536 for idx in 0..candidate_count {
537 let candidate = candidates[idx]
538 .take()
539 .expect("topology race candidate must be present");
540 run_one_topology_race_candidate(
541 idx,
542 candidate,
543 &fit_one,
544 plan.per_fit_threads,
545 &slots[idx],
546 &pool_error,
547 );
548 if let Some(err) = pool_error.lock().expect("pool_error mutex poisoned").take() {
549 return Err(err);
550 }
551 }
552 } else {
553 let coordinator_pool = rayon::ThreadPoolBuilder::new()
554 .num_threads(plan.coordinator_threads)
555 .thread_name(|idx| format!("topology-race-coordinator-{idx}"))
556 .build()
557 .map_err(|err| format!("topology race coordinator Rayon pool: {err}"))?;
558 let mut batch_start = 0usize;
559 while batch_start < candidate_count {
560 let batch_end = (batch_start + plan.concurrent_fits).min(candidate_count);
561 coordinator_pool.scope(|scope| {
562 for idx in batch_start..batch_end {
563 let candidate = candidates[idx]
564 .take()
565 .expect("topology race candidate must be present");
566 let slot = &slots[idx];
567 let pool_error = &pool_error;
568 let fit_one = &fit_one;
569 scope.spawn(move |_| {
570 run_one_topology_race_candidate(
571 idx,
572 candidate,
573 fit_one,
574 plan.per_fit_threads,
575 slot,
576 pool_error,
577 );
578 });
579 }
580 });
581 if let Some(err) = pool_error.lock().expect("pool_error mutex poisoned").take() {
582 return Err(err);
583 }
584 batch_start = batch_end;
585 }
586 }
587
588 let mut out = Vec::with_capacity(candidate_count);
589 for (idx, slot) in slots.into_iter().enumerate() {
590 let row = slot
591 .into_inner()
592 .expect("topology race result mutex poisoned")
593 .ok_or_else(|| format!("topology race candidate {idx} did not produce a result"))?;
594 out.push(row);
595 }
596 Ok(out)
597}
598
599fn run_one_topology_race_candidate<Candidate, FitResult, FitOne>(
600 candidate_index: usize,
601 candidate: Candidate,
602 fit_one: &FitOne,
603 per_fit_threads: usize,
604 slot: &Mutex<Option<TopologyRaceParallelCandidate<FitResult>>>,
605 pool_error: &Mutex<Option<String>>,
606) where
607 Candidate: Send,
608 FitResult: Send,
609 FitOne: Fn(Candidate) -> FitResult + Sync,
610{
611 let pool = match rayon::ThreadPoolBuilder::new()
612 .num_threads(per_fit_threads)
613 .thread_name(move |idx| format!("topology-race-fit-{candidate_index}-{idx}"))
614 .build()
615 {
616 Ok(pool) => pool,
617 Err(err) => {
618 *pool_error.lock().expect("pool_error mutex poisoned") =
619 Some(format!("topology race candidate Rayon pool: {err}"));
620 return;
621 }
622 };
623
624 let started = Instant::now();
625 let result = pool.install(|| fit_one(candidate));
626 let wall_time = started.elapsed();
627 *slot.lock().expect("topology race result mutex poisoned") =
628 Some(TopologyRaceParallelCandidate {
629 candidate_index,
630 per_fit_threads,
631 wall_time,
632 result,
633 });
634}
635
636pub fn select_topology_with_fit<FitHandle, FitErr>(
637 selector: &TopologyAutoSelector,
638 mut fit_one: impl FnMut(AutoTopologyKind) -> Result<TopologyAutoFitEvidence<FitHandle>, FitErr>,
639) -> Result<TopologyAutoSelectorResult<FitHandle>, String>
640where
641 FitErr: ToString,
642{
643 // #944 stage 4: collapse fixed simply-connected constant-curvature forms
644 // (Euclidean/Sphere) into ONE estimated-κ ConstantCurvature candidate so the
645 // discrete stack only adjudicates genuinely non-homotopic topologies.
646 let fused = AutoTopologyKind::fuse_constant_curvature_family(&selector.candidates);
647 let mut ranked = Vec::with_capacity(fused.len());
648 let mut errors = Vec::new();
649 for candidate in &fused {
650 match fit_one(*candidate) {
651 Ok(evidence) => {
652 let tk_score = tk_normalized_score(
653 evidence.raw_reml,
654 evidence.null_dim,
655 evidence.null_space_logdet,
656 evidence.effective_dim,
657 evidence.n_obs,
658 selector.score_scale,
659 )?;
660 ranked.push(TopologyAutoRankedFit {
661 topology_name: evidence.topology_name,
662 tk_score,
663 raw_reml: evidence.raw_reml,
664 effective_dim: evidence.effective_dim,
665 n_obs: evidence.n_obs,
666 fit_handle: evidence.fit_handle,
667 });
668 }
669 Err(err) => errors.push(format!("{}: {}", candidate.as_str(), err.to_string())),
670 }
671 }
672 if ranked.is_empty() {
673 return Err(format!(
674 "TopologyAutoSelector found no fittable topology candidates{}",
675 if errors.is_empty() {
676 String::new()
677 } else {
678 format!(" ({})", errors.join("; "))
679 }
680 ));
681 }
682 // Sign convention (issue #396, see `solver::evidence`): `tk_score` is a
683 // minimised TK / REML cost, so LOWER is better. Route through the shared
684 // priority selector so topology ranking, seed screening, and model
685 // comparison share one deterministic ordering contract (#782).
686 ranked = rank_priority_candidates(
687 ranked
688 .into_iter()
689 .enumerate()
690 .map(|(idx, row)| {
691 let score = row.tk_score;
692 PriorityCandidate::new(row, idx, score, 0)
693 })
694 .collect(),
695 )
696 .into_iter()
697 .map(|row| row.item)
698 .collect();
699 Ok(TopologyAutoSelectorResult {
700 ranked,
701 winner_index: 0,
702 })
703}
704
705/// Driver-level parallel sibling of [`select_topology_with_fit`] (#1017 Phase 0).
706///
707/// Topology candidates are INDEPENDENT fits — the sequential
708/// [`select_topology_with_fit`] loop walks them one at a time, leaving 5–20× on
709/// the table on a multi-core host. This variant fans the candidate fits across
710/// the bounded-nested-Rayon [`run_topology_race_parallel`] driver (the same one
711/// the closure-profile grid and the SAE-resident race already use), then ranks
712/// the survivors through the IDENTICAL deterministic priority selector — results
713/// come back in input order, so the winner is bit-identical to the sequential
714/// path. The only contract difference is `fit_one: Fn + Sync` (each candidate
715/// fit must be callable concurrently) instead of `FnMut`; callers whose fit
716/// closure captures shared mutable state keep the sequential entry.
717pub fn select_topology_with_fit_parallel<FitHandle, FitErr>(
718 selector: &TopologyAutoSelector,
719 fit_one: impl Fn(AutoTopologyKind) -> Result<TopologyAutoFitEvidence<FitHandle>, FitErr> + Sync,
720) -> Result<TopologyAutoSelectorResult<FitHandle>, String>
721where
722 FitHandle: Send,
723 FitErr: ToString + Send,
724{
725 // #944 stage 4: collapse fixed simply-connected constant-curvature forms
726 // into ONE estimated-κ ConstantCurvature candidate before the parallel race.
727 let candidates: Vec<AutoTopologyKind> =
728 AutoTopologyKind::fuse_constant_curvature_family(&selector.candidates);
729 let race = run_topology_race_parallel(candidates, |candidate| {
730 // Carry the candidate kind alongside the fit so per-candidate failures
731 // are reported with their topology name, exactly as the sequential path.
732 (candidate, fit_one(candidate))
733 })?;
734
735 let mut ranked = Vec::with_capacity(race.len());
736 let mut errors = Vec::new();
737 for entry in race {
738 let (candidate, fit_result) = entry.result;
739 match fit_result {
740 Ok(evidence) => {
741 let tk_score = tk_normalized_score(
742 evidence.raw_reml,
743 evidence.null_dim,
744 evidence.null_space_logdet,
745 evidence.effective_dim,
746 evidence.n_obs,
747 selector.score_scale,
748 )?;
749 ranked.push(TopologyAutoRankedFit {
750 topology_name: evidence.topology_name,
751 tk_score,
752 raw_reml: evidence.raw_reml,
753 effective_dim: evidence.effective_dim,
754 n_obs: evidence.n_obs,
755 fit_handle: evidence.fit_handle,
756 });
757 }
758 Err(err) => errors.push(format!("{}: {}", candidate.as_str(), err.to_string())),
759 }
760 }
761 if ranked.is_empty() {
762 return Err(format!(
763 "TopologyAutoSelector found no fittable topology candidates{}",
764 if errors.is_empty() {
765 String::new()
766 } else {
767 format!(" ({})", errors.join("; "))
768 }
769 ));
770 }
771 // Same deterministic priority ranking as the sequential path (#782): lower
772 // tk_score is better; route through the shared selector so ordering is
773 // identical regardless of which entry produced the candidates.
774 ranked = rank_priority_candidates(
775 ranked
776 .into_iter()
777 .enumerate()
778 .map(|(idx, row)| {
779 let score = row.tk_score;
780 PriorityCandidate::new(row, idx, score, 0)
781 })
782 .collect(),
783 )
784 .into_iter()
785 .map(|row| row.item)
786 .collect();
787 Ok(TopologyAutoSelectorResult {
788 ranked,
789 winner_index: 0,
790 })
791}
792
793pub fn tk_normalized_score(
794 raw_reml: f64,
795 null_dim: f64,
796 null_space_logdet: Option<f64>,
797 effective_dim: f64,
798 n_obs: usize,
799 score_scale: TopologyScoreScale,
800) -> Result<f64, String> {
801 if !(raw_reml.is_finite() && null_dim.is_finite()) || null_dim < -1.0e-9 {
802 return Err("TopologyAutoSelector received non-finite TK evidence inputs".to_string());
803 }
804 let normalizer = if null_dim.max(0.0) == 0.0 {
805 0.0
806 } else {
807 let logdet = null_space_logdet.ok_or_else(|| {
808 "TopologyAutoSelector TK normalizer requires null-space Hessian logdet".to_string()
809 })?;
810 if !logdet.is_finite() {
811 return Err("TopologyAutoSelector null-space Hessian logdet is not finite".to_string());
812 }
813 -0.5 * null_dim.max(0.0) * TK_LOG_2PI + 0.5 * logdet
814 };
815 let tk = raw_reml + normalizer;
816 match score_scale {
817 TopologyScoreScale::PerObservation => {
818 if n_obs == 0 {
819 Err("TopologyAutoSelector requires n_obs > 0".to_string())
820 } else {
821 Ok(tk / n_obs as f64)
822 }
823 }
824 TopologyScoreScale::PerEffectiveDim => {
825 if !(effective_dim.is_finite() && effective_dim > 0.0) {
826 Err("TopologyAutoSelector requires finite positive effective_dim".to_string())
827 } else {
828 Ok(tk / effective_dim)
829 }
830 }
831 }
832}
833
834// ===========================================================================
835// Discrete-mixture rung + cross-class adjudication (Object 3a / WP-C)
836// ===========================================================================
837
838/// One fitted entry of the discrete-mixture rung: the mixture order `k`, the
839/// fitted Gaussian mixture, and its rank-aware Laplace **negative** log evidence
840/// computed through the SAME [`crate::evidence::laplace_evidence`]
841/// entry point used by the smooth rungs. Lower negative-log-evidence is better.
842#[derive(Debug, Clone)]
843pub struct MixtureRungFit {
844 pub k: usize,
845 pub fit: crate::evidence::GaussianMixtureFit,
846 /// Free-parameter count `P` — the quantity that enters the rank-aware
847 /// normalizer as `dim(H) − rank(S) = P − 0`.
848 pub num_parameters: usize,
849 /// Rank-aware Laplace negative log evidence on the smooth-rung scale.
850 pub negative_log_evidence: f64,
851}
852
853/// Result of fitting the whole mixture ladder: every fitted order plus the index
854/// of the in-class winner (lowest rank-aware Laplace negative-log-evidence).
855#[derive(Debug, Clone)]
856pub struct MixtureRungResult {
857 pub fits: Vec<MixtureRungFit>,
858 pub winner_index: usize,
859}
860
861impl MixtureRungResult {
862 pub fn winner(&self) -> &MixtureRungFit {
863 &self.fits[self.winner_index]
864 }
865}
866
867/// Hard cap on the number of EXTRA orders the local refinement around the
868/// coarse-ladder winner may probe. Refinement walks one neighbour at a time
869/// and stops as soon as the running winner is bracketed (both immediate
870/// neighbours fitted and worse), so this cap only binds on a pathological
871/// evidence profile that keeps improving monotonically past the ladder — a
872/// regime the rank-aware parameter pricing rules out for any real cluster
873/// structure. It exists so the sweep stays a bounded pure function of the
874/// data, never a runaway loop.
875pub const MIXTURE_REFINEMENT_MAX_PROBES: usize = 16;
876
877/// Fit the discrete-mixture rung over a fixed `k`-ladder, then **refine
878/// locally around the winner**, and rank in-class by rank-aware Laplace
879/// evidence. Each order is priced by its own free-parameter count entering the
880/// `−½ (dim(H) − rank(S)) log(2π)` normalizer. Deterministic: the seeding is
881/// the basis k-means farthest-point init, EM is a pure map, and the refinement
882/// order is a pure function of the fitted scores.
883///
884/// The coarse ladder ([`MIXTURE_K_LADDER`]) keeps the sweep cheap but cannot
885/// *name* every order (it skips 4, 6, 8, …). Refinement closes that hole: after
886/// the sweep, the immediate missing neighbours `k*−1`, `k*+1` of the running
887/// winner are fitted, repeating until the winner is **bracketed** — both
888/// neighbours present and scoring worse — so a planted `k = 4` truth is
889/// recovered as exactly 4, not as the nearest ladder rung. On an in-ladder
890/// winner the bracketing typically costs two extra EM fits and terminates at
891/// the same order the coarse sweep found.
892pub fn fit_mixture_rung(
893 data: ArrayView2<'_, f64>,
894 ladder: &[usize],
895 config: GaussianMixtureConfig,
896) -> Result<MixtureRungResult, String> {
897 let n = data.nrows();
898 let mut fits: Vec<MixtureRungFit> = Vec::new();
899 let mut errors: Vec<String> = Vec::new();
900 // Every order ever attempted (fitted OR failed): refinement must not
901 // re-propose a failed order forever.
902 let mut attempted: std::collections::BTreeSet<usize> = std::collections::BTreeSet::new();
903
904 let try_order = |k: usize,
905 fits: &mut Vec<MixtureRungFit>,
906 errors: &mut Vec<String>,
907 attempted: &mut std::collections::BTreeSet<usize>| {
908 if k == 0 || k > n || !attempted.insert(k) {
909 return;
910 }
911 match fit_gaussian_mixture(data, k, config) {
912 Ok(fit) => match fit.laplace_negative_log_evidence(data) {
913 Ok(nle) => {
914 let num_parameters = fit.num_free_parameters();
915 fits.push(MixtureRungFit {
916 k,
917 fit,
918 num_parameters,
919 negative_log_evidence: nle,
920 });
921 }
922 Err(e) => errors.push(format!("mixture k={k} evidence: {e}")),
923 },
924 Err(e) => errors.push(format!("mixture k={k} fit: {e}")),
925 }
926 };
927
928 for &k in ladder {
929 try_order(k, &mut fits, &mut errors, &mut attempted);
930 }
931 if fits.is_empty() {
932 return Err(format!(
933 "mixture rung produced no fittable orders{}",
934 if errors.is_empty() {
935 String::new()
936 } else {
937 format!(" ({})", errors.join("; "))
938 }
939 ));
940 }
941
942 // Local refinement: bracket the running winner. The running winner uses
943 // the same rule as the final ranking (lower negative-log-evidence, ties to
944 // the smaller k), so refinement and ranking can never disagree about who
945 // the winner is.
946 let mut probes = 0usize;
947 while probes < MIXTURE_REFINEMENT_MAX_PROBES {
948 let best_k = fits
949 .iter()
950 .min_by(|a, b| {
951 a.negative_log_evidence
952 .partial_cmp(&b.negative_log_evidence)
953 .unwrap_or(std::cmp::Ordering::Equal)
954 .then(a.k.cmp(&b.k))
955 })
956 .map(|f| f.k)
957 .unwrap_or(1);
958 let next = [best_k.saturating_sub(1), best_k + 1]
959 .into_iter()
960 .find(|&k| k >= 1 && k <= n && !attempted.contains(&k));
961 let Some(k) = next else {
962 break; // bracketed: both neighbours attempted (or out of range).
963 };
964 try_order(k, &mut fits, &mut errors, &mut attempted);
965 probes += 1;
966 }
967 // In-class winner-take-all on the rank-aware evidence scale (lower wins).
968 let ranked = rank_priority_candidates(
969 fits.into_iter()
970 .enumerate()
971 .map(|(idx, row)| {
972 let score = row.negative_log_evidence;
973 let tie = row.k; // simpler (smaller k) wins ties
974 PriorityCandidate::new(row, idx, score, tie)
975 })
976 .collect(),
977 )
978 .into_iter()
979 .map(|row| row.item)
980 .collect::<Vec<_>>();
981 Ok(MixtureRungResult {
982 fits: ranked,
983 winner_index: 0,
984 })
985}
986
987// ===========================================================================
988// Structured-union rung (#907)
989// ===========================================================================
990
991/// One fitted entry of the structured-union rung: the composite structure, its
992/// summed rank-aware Laplace **negative** log evidence (the SUM `Σ_c V_c` of its
993/// components, each scored through the identical [`crate::evidence::laplace_evidence`]
994/// entry point used by the smooth rungs and the mixture rung), and the TOTAL
995/// free-parameter count across components (the complexity price). Lower
996/// negative-log-evidence wins.
997#[derive(Debug, Clone)]
998pub struct UnionRungFit {
999 pub structure: UnionStructure,
1000 pub fit: UnionStructureFit,
1001 /// `Σ_c P_c` — total free-parameter count across all components. This is the
1002 /// complexity quantity that the summed `+ ½ Σ_c P_c log(2π)` normalizer
1003 /// charges, so a union is strictly more expensive than either pure rung.
1004 pub total_parameters: usize,
1005 /// `Σ_c V_c` — summed rank-aware Laplace negative log evidence.
1006 pub negative_log_evidence: f64,
1007}
1008
1009/// Result of fitting the whole fixed union ladder: every fitted composite plus
1010/// the index of the in-class winner (lowest summed rank-aware Laplace
1011/// negative-log-evidence).
1012#[derive(Debug, Clone)]
1013pub struct UnionRungResult {
1014 pub fits: Vec<UnionRungFit>,
1015 pub winner_index: usize,
1016}
1017
1018impl UnionRungResult {
1019 pub fn winner(&self) -> &UnionRungFit {
1020 &self.fits[self.winner_index]
1021 }
1022}
1023
1024/// Fit the structured-union rung over the FIXED ladder
1025/// [`crate::evidence::UNION_STRUCTURE_LADDER`] and rank in-class by
1026/// summed rank-aware Laplace evidence. Each composite is hard-split into one
1027/// responsibility group per component (reusing the mixture rung's deterministic
1028/// seeding + EM), each component is REML/Laplace-fit on its group, and the
1029/// per-component evidences are SUMMED. Composites whose groups are too small to
1030/// identify their structure are skipped (they never enter the race rather than
1031/// scoring spuriously well). Deterministic: the split and the component fits are
1032/// pure functions of the data.
1033pub fn fit_union_rung(
1034 data: ArrayView2<'_, f64>,
1035 config: GaussianMixtureConfig,
1036) -> Result<UnionRungResult, String> {
1037 // `fit_union_ladder` already fits the fixed ladder and ranks best-first by
1038 // summed rank-aware evidence (cheaper composite wins ties). Re-wrap each
1039 // fit with its complexity price for the rung view.
1040 let ladder = fit_union_ladder(data, config)?;
1041 let fits: Vec<UnionRungFit> = ladder
1042 .into_iter()
1043 .map(|fit| UnionRungFit {
1044 structure: fit.structure,
1045 total_parameters: fit.total_parameters,
1046 negative_log_evidence: fit.negative_log_evidence,
1047 fit,
1048 })
1049 .collect();
1050 if fits.is_empty() {
1051 return Err("union rung produced no fittable composites".to_string());
1052 }
1053 Ok(UnionRungResult {
1054 fits,
1055 winner_index: 0,
1056 })
1057}
1058
1059/// Fit a SINGLE structured-union composite and return it as a rung fit. Thin
1060/// convenience over [`crate::evidence::fit_union_structure`] for callers
1061/// (and the race) that already chose a specific composite.
1062pub fn fit_union_candidate(
1063 data: ArrayView2<'_, f64>,
1064 structure: UnionStructure,
1065 config: GaussianMixtureConfig,
1066) -> Result<UnionRungFit, String> {
1067 let fit = fit_union_structure(data, structure, config)?;
1068 Ok(UnionRungFit {
1069 structure: fit.structure,
1070 total_parameters: fit.total_parameters,
1071 negative_log_evidence: fit.negative_log_evidence,
1072 fit,
1073 })
1074}
1075
1076/// A selection-time predictive-density provider: given the row indices to TRAIN
1077/// on and the row indices to EVALUATE on, it returns the per-eval-row held-out
1078/// log predictive density `log p(y_eval | train)`. This is the decoupled seam
1079/// that lets the cross-class race build a stacking table without persisting any
1080/// predictor: the closure refits on each fold's training rows.
1081///
1082/// The mixture provider is constructed here ([`mixture_density_provider`]); a
1083/// smooth-manifold provider is supplied by the caller (it owns the smooth
1084/// fitting machinery). Both refit per fold so the table is genuinely held-out.
1085pub type HeldOutDensityProvider<'a> =
1086 Box<dyn Fn(&[usize], &[usize]) -> Result<Vec<f64>, String> + 'a>;
1087
1088/// Build a mixture held-out-density provider for a fixed order `k`. It refits a
1089/// `k`-component mixture on the training rows and scores the eval rows.
1090pub fn mixture_density_provider<'a>(
1091 data: ArrayView2<'a, f64>,
1092 k: usize,
1093 config: GaussianMixtureConfig,
1094) -> HeldOutDensityProvider<'a> {
1095 let owned = data.to_owned();
1096 Box::new(
1097 move |train: &[usize], eval: &[usize]| -> Result<Vec<f64>, String> {
1098 let train_mat = gather_rows(owned.view(), train);
1099 let fit = fit_gaussian_mixture(train_mat.view(), k.min(train.len().max(1)), config)?;
1100 let eval_mat = gather_rows(owned.view(), eval);
1101 let dens = fit.per_point_log_density(eval_mat.view())?;
1102 Ok(dens.to_vec())
1103 },
1104 )
1105}
1106
1107/// Build a structured-union held-out-density provider for a fixed composite. It
1108/// refits the union's component densities on the training rows and scores the
1109/// eval rows under the soft mixture `log Σ_c π_c p_c(y)` (the union analogue of
1110/// [`mixture_density_provider`]). Refits per fold, so the stacking table is
1111/// genuinely held out.
1112pub fn union_density_provider<'a>(
1113 data: ArrayView2<'a, f64>,
1114 structure: UnionStructure,
1115 config: GaussianMixtureConfig,
1116) -> HeldOutDensityProvider<'a> {
1117 let owned = data.to_owned();
1118 Box::new(
1119 move |train: &[usize], eval: &[usize]| -> Result<Vec<f64>, String> {
1120 let train_mat = gather_rows(owned.view(), train);
1121 let eval_mat = gather_rows(owned.view(), eval);
1122 let dens =
1123 union_per_point_log_density(train_mat.view(), eval_mat.view(), structure, config)?;
1124 Ok(dens.to_vec())
1125 },
1126 )
1127}
1128
1129fn gather_rows(data: ArrayView2<'_, f64>, idx: &[usize]) -> Array2<f64> {
1130 let d = data.ncols();
1131 let mut out = Array2::<f64>::zeros((idx.len(), d));
1132 for (r, &i) in idx.iter().enumerate() {
1133 for c in 0..d {
1134 out[[r, c]] = data[[i, c]];
1135 }
1136 }
1137 out
1138}
1139
1140/// Deterministic contiguous `folds`-way CV partition of `0..n` (no clock
1141/// randomness). Returns, for each fold, `(train_indices, eval_indices)`.
1142///
1143/// Uses the default stacking seed ([`STACKING_CV_SEED`]); see
1144/// [`deterministic_cv_folds_seeded`] for the seed-reproducible variant (#1386).
1145pub fn deterministic_cv_folds(n: usize, folds: usize) -> Vec<(Vec<usize>, Vec<usize>)> {
1146 deterministic_cv_folds_seeded(n, folds, STACKING_CV_SEED)
1147}
1148
1149/// SplitMix64 finalizer — a full-avalanche integer hash. Pure and deterministic:
1150/// it never touches the clock or any RNG state, so the folding it drives is
1151/// reproducible for a given `(seed, index)` and decorrelated across seeds.
1152#[inline]
1153fn splitmix64(mut x: u64) -> u64 {
1154 x = x.wrapping_add(0x9E37_79B9_7F4A_7C15);
1155 let mut z = x;
1156 z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
1157 z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
1158 z ^ (z >> 31)
1159}
1160
1161/// Deterministic, seed-reproducible `folds`-way CV partition of `0..n` (no clock
1162/// randomness). The eval fold of sample `i` is `hash(seed, i) % folds`, so:
1163/// - the same `seed` always reproduces the identical folding (deterministic),
1164/// - different seeds give different (still-deterministic) foldings.
1165///
1166/// This is the seam that makes the `adjudicate_atom_shape` `seed=` kwarg
1167/// functional (#1386): it is mixed into the held-out CV partition rather than
1168/// being a silent no-op. Returns, for each fold, `(train_indices, eval_indices)`,
1169/// dropping any fold whose train or eval set is empty.
1170pub fn deterministic_cv_folds_seeded(
1171 n: usize,
1172 folds: usize,
1173 seed: u64,
1174) -> Vec<(Vec<usize>, Vec<usize>)> {
1175 let folds = folds.clamp(2, n.max(2));
1176 // Precompute the seed-mixed fold of every sample once.
1177 let assign: Vec<usize> = (0..n)
1178 .map(|i| {
1179 // Mix the seed with the index through a full-avalanche hash so the
1180 // partition genuinely depends on both. The modulo keeps fold sizes
1181 // balanced in expectation while remaining deterministic.
1182 (splitmix64(seed ^ splitmix64(i as u64)) % folds as u64) as usize
1183 })
1184 .collect();
1185 let mut out = Vec::with_capacity(folds);
1186 for f in 0..folds {
1187 let mut train = Vec::new();
1188 let mut eval = Vec::new();
1189 for (i, &fold) in assign.iter().enumerate() {
1190 if fold == f {
1191 eval.push(i);
1192 } else {
1193 train.push(i);
1194 }
1195 }
1196 if !eval.is_empty() && !train.is_empty() {
1197 out.push((train, eval));
1198 }
1199 }
1200 out
1201}
1202
1203/// Build the selection-time held-out predictive log-density table
1204/// `log_density[i, c] = log p_c(y_i | train_fold(i))`, with one column per
1205/// candidate provider. Each row `i` is scored by the candidate refit on the CV
1206/// fold whose eval set contains `i`, so every entry is genuinely held out. This
1207/// is exactly the table that feeds
1208/// [`crate::evidence::solve_stacking_weights`].
1209pub fn build_cv_log_density_table(
1210 n: usize,
1211 folds: usize,
1212 seed: u64,
1213 providers: &[HeldOutDensityProvider<'_>],
1214) -> Result<Array2<f64>, String> {
1215 if providers.is_empty() {
1216 return Err("stacking table requires at least one candidate provider".to_string());
1217 }
1218 let partition = deterministic_cv_folds_seeded(n, folds, seed);
1219 if partition.is_empty() {
1220 return Err("stacking CV partition is empty (n too small for folds)".to_string());
1221 }
1222 let mut table = Array2::<f64>::from_elem((n, providers.len()), f64::NEG_INFINITY);
1223 for (train, eval) in &partition {
1224 for (col, provider) in providers.iter().enumerate() {
1225 let dens = provider(train, eval)?;
1226 if dens.len() != eval.len() {
1227 return Err(format!(
1228 "provider {col} returned {} densities for {} eval rows",
1229 dens.len(),
1230 eval.len()
1231 ));
1232 }
1233 for (slot, &row) in eval.iter().enumerate() {
1234 table[[row, col]] = dens[slot];
1235 }
1236 }
1237 }
1238 Ok(table)
1239}
1240
1241/// Adjudicated outcome of a cross-class race. When the race mixes a smooth
1242/// manifold candidate with the discrete-mixture rung, `headline` is the stacking
1243/// verdict (held-out predictive log-density), and the rank-aware Laplace
1244/// evidence is retained per-candidate as corroboration. Same-class races report
1245/// `Headline::Evidence` (winner-take-all on rank-aware evidence).
1246#[derive(Debug, Clone)]
1247pub struct CrossClassRaceVerdict {
1248 /// Candidate display names, column-aligned with the stacking table / weights.
1249 pub candidate_names: Vec<String>,
1250 /// Whether the race actually mixed model classes (smooth vs discrete).
1251 pub is_cross_class: bool,
1252 /// Rank-aware Laplace negative-log-evidence per candidate (corroboration;
1253 /// lower is better).
1254 pub negative_log_evidence: Vec<f64>,
1255 /// Stacking weights over the candidates (present iff `is_cross_class`).
1256 pub stacking: Option<StackingWeights>,
1257 /// Index of the headline winner. For cross-class races this is the max
1258 /// stacking-weight candidate; for same-class it is the min-evidence one.
1259 pub winner_index: usize,
1260 /// Which statistic drove the headline.
1261 pub headline: Headline,
1262 /// `Some(_)` when the same-class evidence winner's lead over the runner-up
1263 /// did NOT clear the decision margin required by approximate (enclosure /
1264 /// coreset) evidence — the verdict is provisional and the caller must
1265 /// refine or escalate to the exact path. `None` when the margin held (or no
1266 /// approximate evidence was involved, or the race adjudicated by stacking).
1267 pub insufficient_margin: Option<InsufficientRaceMargin>,
1268}
1269
1270/// Which statistic adjudicated the headline ranking.
1271#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1272pub enum Headline {
1273 /// Rank-aware Laplace evidence (same-class race, winner-take-all).
1274 Evidence,
1275 /// Held-out predictive log-density / stacking weights (cross-class race).
1276 Stacking,
1277}
1278
1279/// How a candidate's `negative_log_evidence` was certified — the source of the
1280/// decision-margin the same-class race must respect before it can transfer a
1281/// verdict from approximate evidence to the full-corpus verdict.
1282///
1283/// * [`Exact`] — the evidence is a genuine point value (dense logdet, full
1284/// corpus); no margin floor.
1285/// * [`Enclosure`] — the log-determinant half came from a
1286/// [`block_preconditioned_logdet_enclosure`]; the race lead Δ must exceed the
1287/// enclosure gap (#1011 contract) before the winner is trustworthy.
1288/// * [`Coreset`] — the evidence was raced on a certified row coreset; the lead
1289/// must exceed the certificate's [`CoresetCertificate::race_transfer_margin`]
1290/// (#1012 contract — the SAME margin seam as the enclosure).
1291///
1292/// [`Exact`]: EvidenceCertification::Exact
1293/// [`Enclosure`]: EvidenceCertification::Enclosure
1294/// [`Coreset`]: EvidenceCertification::Coreset
1295/// [`block_preconditioned_logdet_enclosure`]: crate::logdet_bounds::block_preconditioned_logdet_enclosure
1296#[derive(Clone, Copy, Debug, PartialEq)]
1297pub enum EvidenceCertification {
1298 Exact,
1299 Enclosure { gap: f64 },
1300 Coreset { certificate: CoresetCertificate },
1301}
1302
1303impl EvidenceCertification {
1304 /// The smallest race lead Δ for which this candidate's evidence is
1305 /// trustworthy. Exact evidence transfers at any positive lead; an enclosure
1306 /// needs its gap; a coreset needs its certified transfer margin.
1307 pub fn required_margin(&self) -> f64 {
1308 match self {
1309 EvidenceCertification::Exact => 0.0,
1310 EvidenceCertification::Enclosure { gap } => *gap,
1311 EvidenceCertification::Coreset { certificate } => certificate.race_transfer_margin(),
1312 }
1313 }
1314
1315 /// The unified `Verdict` (`gam_inference::certificates::Verdict`) for a race
1316 /// won by lead `race_lead` over the runner-up, on the shared certificate
1317 /// ladder (task #16). The race transfers its approximate-evidence verdict to
1318 /// the full corpus only when the lead strictly clears [`Self::required_margin`]
1319 /// — otherwise the consumer must refine or fall back to the exact dense path,
1320 /// so the verdict is `Insufficient`, never a silent pass. Exact evidence with
1321 /// any positive lead is `Certified`. A non-finite or non-positive lead leaves
1322 /// the race undecided (`Insufficient`); the per-family margin verdicts
1323 /// (`gam_inference::certificate_impls::coreset_race_verdict`,
1324 /// `gam_inference::certificate_impls::enclosure_margin_verdict`) supply
1325 /// the underlying mapping so there is one source of truth.
1326 pub fn race_verdict(&self, race_lead: f64) -> gam_problem::topology_certificates::Verdict {
1327 use gam_problem::topology_certificates::Verdict;
1328 if !(race_lead.is_finite() && race_lead > 0.0) {
1329 return Verdict::Insufficient;
1330 }
1331 match self {
1332 EvidenceCertification::Exact => Verdict::Certified,
1333 EvidenceCertification::Enclosure { gap } => {
1334 let enclosure = crate::logdet_bounds::LogdetEnclosure {
1335 block_diag_logdet: 0.0,
1336 lower: 0.0,
1337 upper: *gap,
1338 rho: 0.0,
1339 p2: 0.0,
1340 p3: None,
1341 };
1342 crate::inference::certificate_impls::enclosure_margin_verdict(&enclosure, race_lead)
1343 }
1344 EvidenceCertification::Coreset { certificate } => {
1345 crate::inference::certificate_impls::coreset_race_verdict(
1346 certificate.certify_margin(race_lead),
1347 )
1348 }
1349 }
1350 }
1351}
1352
1353/// One candidate entering the cross-class adjudicator: its kind, its rank-aware
1354/// Laplace negative-log-evidence (already computed on the common scale), how
1355/// that evidence was certified (for the margin contract), and a selection-time
1356/// held-out-density provider that refits per CV fold.
1357pub struct CrossClassCandidate<'a> {
1358 pub kind: AutoTopologyKind,
1359 pub negative_log_evidence: f64,
1360 /// Certification of `negative_log_evidence`. Defaults conceptually to
1361 /// [`EvidenceCertification::Exact`]; construct with [`Self::exact`] for the
1362 /// classic point-value path.
1363 pub certification: EvidenceCertification,
1364 pub density_provider: HeldOutDensityProvider<'a>,
1365}
1366
1367impl<'a> CrossClassCandidate<'a> {
1368 /// Construct a candidate whose evidence is an exact point value (the
1369 /// classic full-corpus dense-logdet path — no margin floor).
1370 pub fn exact(
1371 kind: AutoTopologyKind,
1372 negative_log_evidence: f64,
1373 density_provider: HeldOutDensityProvider<'a>,
1374 ) -> Self {
1375 Self {
1376 kind,
1377 negative_log_evidence,
1378 certification: EvidenceCertification::Exact,
1379 density_provider,
1380 }
1381 }
1382}
1383
1384/// Why a same-class race could not transfer its approximate-evidence verdict to
1385/// the full corpus: the winner's lead Δ over the runner-up did not clear the
1386/// required decision margin (the enclosure gap or the coreset transfer margin).
1387/// The consumer must refine (more moments / pair absorption / a larger coreset)
1388/// or re-run the top contenders on the exact dense path.
1389#[derive(Clone, Copy, Debug, PartialEq)]
1390pub struct InsufficientRaceMargin {
1391 /// Index of the provisional (below-margin) winner.
1392 pub provisional_winner: usize,
1393 /// Index of the runner-up whose evidence is within margin of the winner.
1394 pub contender: usize,
1395 /// The realized lead Δ = nle[contender] − nle[winner] (≥ 0).
1396 pub lead: f64,
1397 /// The margin the lead had to exceed (max of the two candidates'
1398 /// required margins).
1399 pub required_margin: f64,
1400}
1401
1402/// Adjudicate a race that may mix smooth-manifold and discrete candidates
1403/// (the discrete-mixture rung and/or a structured union, #907). Cross-class
1404/// mixing is auto-detected from the candidate kinds (a race is cross-class iff
1405/// it contains BOTH at least one smooth/Euclidean candidate AND at least one
1406/// discrete candidate — [`AutoTopologyKind::Mixture`] or
1407/// [`AutoTopologyKind::Union`]). When cross-class,
1408/// the headline switches to stacking over a selection-time CV held-out
1409/// log-density table; otherwise the headline is the rank-aware evidence winner.
1410/// `seed` is mixed into the deterministic CV fold assignment (#1386): the same
1411/// seed reproduces the identical held-out folding, different seeds give
1412/// different — but still deterministic — foldings. It only affects the
1413/// cross-class (stacking) path; same-class races are winner-take-all on evidence
1414/// and ignore it. Pass [`STACKING_CV_SEED`] for the default folding.
1415pub fn adjudicate_cross_class_race(
1416 n: usize,
1417 candidates: Vec<CrossClassCandidate<'_>>,
1418 folds: usize,
1419 seed: u64,
1420 stacking_config: StackingConfig,
1421) -> Result<CrossClassRaceVerdict, String> {
1422 if candidates.is_empty() {
1423 return Err("cross-class race requires at least one candidate".to_string());
1424 }
1425 let names: Vec<String> = candidates.iter().map(|c| c.kind.display_name()).collect();
1426 let evidence: Vec<f64> = candidates.iter().map(|c| c.negative_log_evidence).collect();
1427
1428 // Cross-class iff the race mixes at least one discrete (non-smooth) density
1429 // class — the mixture rung OR a structured union (#907) — with at least one
1430 // smooth/Euclidean manifold candidate. A union competing against a smooth
1431 // ring must therefore adjudicate by held-out predictive stacking, exactly
1432 // like the mixture rung does.
1433 let has_discrete = candidates.iter().any(|c| c.kind.is_discrete_class());
1434 let has_smooth = candidates.iter().any(|c| !c.kind.is_discrete_class());
1435 let is_cross_class = has_discrete && has_smooth;
1436
1437 if !is_cross_class {
1438 // Same-class: winner-take-all on rank-aware evidence (lower wins).
1439 let certifications: Vec<EvidenceCertification> =
1440 candidates.iter().map(|c| c.certification).collect();
1441 let mut winner_index = 0usize;
1442 let mut best = f64::INFINITY;
1443 for (idx, &nle) in evidence.iter().enumerate() {
1444 if nle.is_finite() && nle < best {
1445 best = nle;
1446 winner_index = idx;
1447 }
1448 }
1449 // Decision-margin contract (#1011 enclosure / #1012 coreset, one seam):
1450 // the winner's lead over the closest contender must clear the larger of
1451 // the two candidates' required margins (an exact candidate floors at 0,
1452 // an enclosure at its gap, a coreset at its transfer margin). When the
1453 // lead is inside that margin the verdict is provisional — the
1454 // approximate evidence does not actually separate them — so we surface
1455 // an explicit escalation rather than silently anointing a winner the
1456 // bounds cannot distinguish.
1457 let mut insufficient_margin: Option<InsufficientRaceMargin> = None;
1458 for (idx, &nle) in evidence.iter().enumerate() {
1459 if idx == winner_index || !nle.is_finite() {
1460 continue;
1461 }
1462 let lead = nle - best;
1463 let required = certifications[winner_index]
1464 .required_margin()
1465 .max(certifications[idx].required_margin());
1466 if required > 0.0 && lead <= required {
1467 let tighter = insufficient_margin.map(|m| lead < m.lead).unwrap_or(true);
1468 if tighter {
1469 insufficient_margin = Some(InsufficientRaceMargin {
1470 provisional_winner: winner_index,
1471 contender: idx,
1472 lead,
1473 required_margin: required,
1474 });
1475 }
1476 }
1477 }
1478 return Ok(CrossClassRaceVerdict {
1479 candidate_names: names,
1480 is_cross_class: false,
1481 negative_log_evidence: evidence,
1482 stacking: None,
1483 winner_index,
1484 headline: Headline::Evidence,
1485 insufficient_margin,
1486 });
1487 }
1488
1489 // Cross-class: build the selection-time held-out density table and stack.
1490 let providers: Vec<HeldOutDensityProvider<'_>> =
1491 candidates.into_iter().map(|c| c.density_provider).collect();
1492 let table = build_cv_log_density_table(n, folds, seed, &providers)?;
1493 let stacking = solve_stacking_weights(table.view(), stacking_config)?;
1494 // Headline winner = max stacking weight (most predictive mass).
1495 let mut winner_index = 0usize;
1496 let mut best_w = f64::NEG_INFINITY;
1497 for (idx, &w) in stacking.weights.iter().enumerate() {
1498 if w > best_w {
1499 best_w = w;
1500 winner_index = idx;
1501 }
1502 }
1503 Ok(CrossClassRaceVerdict {
1504 candidate_names: names,
1505 is_cross_class: true,
1506 negative_log_evidence: evidence,
1507 stacking: Some(stacking),
1508 winner_index,
1509 headline: Headline::Stacking,
1510 // Cross-class headlines adjudicate by held-out predictive stacking on
1511 // the full corpus, not by the approximate-evidence scalar, so the
1512 // enclosure/coreset margin contract does not gate the verdict here.
1513 insufficient_margin: None,
1514 })
1515}
1516
1517// ===========================================================================
1518// Closure-parameter smooth class (#1015): circle ⇄ interval as one estimand
1519// ===========================================================================
1520
1521/// The deterministic closure-parameter grid the smooth-class profiler sweeps.
1522///
1523/// `γ = 1` is the closed circle, `γ = 0` the interval limit; the grid is dense
1524/// near both boundaries (where the Wilks crossing for the one-sided CI lives)
1525/// and is a fixed function of nothing but this constant, so the profile — and
1526/// thus the reported CI — is reproducible with no data-dependent node placement.
1527pub const CLOSURE_GAMMA_GRID: &[f64] = &[
1528 0.0, 0.02, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 0.98, 1.0,
1529];
1530
1531/// One profiled point of the closure family: the closure value `γ`, the
1532/// profiled (θ and λ_smooth optimised) negative-log evidence at that γ, and the
1533/// fit handle the caller wants carried for the winner.
1534#[derive(Debug, Clone)]
1535pub struct ClosureProfilePoint<FitHandle> {
1536 pub gamma: f64,
1537 pub tk_score: f64,
1538 pub fit_handle: FitHandle,
1539}
1540
1541/// The result of profiling the closure parameter inside the smooth class.
1542///
1543/// The headline is `gamma_hat` with its profile-likelihood CI (a regular Wilks
1544/// interval when the optimum is interior; one-sided when it touches a boundary).
1545/// `representative` is the γ̂ fit, scored on the SAME TK evidence surface the
1546/// discrete race uses, so this single smooth-class entry competes directly with
1547/// the non-homotopic candidates (mixture/union) the discrete race retains: the
1548/// closure family absorbs the within-smooth-class circle/line grid, it does not
1549/// replace the cross-class race.
1550#[derive(Debug, Clone)]
1551pub struct ClosureSelection<FitHandle> {
1552 pub ci: gam_geometry::ClosureProfileCi,
1553 pub representative: ClosureProfilePoint<FitHandle>,
1554 /// True when the profile pinned γ at the singular cluster boundary — the
1555 /// "not a regular smooth 1-D topology" signal that must be routed to the
1556 /// #907 mixture/union rung rather than reported as a regular closure.
1557 pub route_to_mixture_rung: bool,
1558}
1559
1560/// Profile the closure parameter `γ` over [`CLOSURE_GAMMA_GRID`], returning the
1561/// minimiser, its profile-likelihood CI, and the representative fit.
1562///
1563/// `fit_at_gamma` performs the inner fit at a fixed closure value and returns
1564/// `(tk_score, fit_handle)`, where `tk_score` is the profiled
1565/// negative-log evidence on the same scale [`tk_normalized_score`] produces (so
1566/// γ and λ_smooth must both be optimised inside the closure, per the issue's
1567/// confounding contract). Lower score is better. The grid is swept in parallel
1568/// via [`run_topology_race_parallel`].
1569pub fn profile_closure_within_smooth_class<FitHandle, FitAtGamma>(
1570 fit_at_gamma: FitAtGamma,
1571 level: f64,
1572) -> Result<ClosureSelection<FitHandle>, String>
1573where
1574 FitHandle: Send,
1575 FitAtGamma: Fn(f64) -> Result<(f64, FitHandle), String> + Sync,
1576{
1577 let gammas: Vec<f64> = CLOSURE_GAMMA_GRID.to_vec();
1578 let results = run_topology_race_parallel(gammas.clone(), |gamma| {
1579 fit_at_gamma(gamma).map(|(score, handle)| (gamma, score, handle))
1580 })?;
1581
1582 let mut points: Vec<ClosureProfilePoint<FitHandle>> = Vec::with_capacity(results.len());
1583 for entry in results {
1584 let (gamma, tk_score, fit_handle) = entry.result?;
1585 if !tk_score.is_finite() {
1586 return Err(format!(
1587 "closure profile produced non-finite score at γ={gamma}"
1588 ));
1589 }
1590 points.push(ClosureProfilePoint {
1591 gamma,
1592 tk_score,
1593 fit_handle,
1594 });
1595 }
1596 if points.len() < 2 {
1597 return Err("closure profile needs at least two evaluable γ grid points".into());
1598 }
1599 points.sort_by(|a, b| a.gamma.partial_cmp(&b.gamma).expect("finite γ"));
1600
1601 let grid: Vec<(f64, f64)> = points.iter().map(|p| (p.gamma, p.tk_score)).collect();
1602 let ci = gam_geometry::profile_ci_from_grid(&grid, level)?;
1603
1604 // Pull the representative γ̂ fit out of the points (the minimiser).
1605 let hat_index = points
1606 .iter()
1607 .enumerate()
1608 .min_by(|(_, a), (_, b)| a.tk_score.partial_cmp(&b.tk_score).expect("finite score"))
1609 .map(|(i, _)| i)
1610 .expect("non-empty points");
1611 let representative = points.swap_remove(hat_index);
1612
1613 Ok(ClosureSelection {
1614 ci,
1615 representative,
1616 route_to_mixture_rung: ci.singular_boundary,
1617 })
1618}
1619
1620#[cfg(test)]
1621mod tests {
1622 use super::*;
1623 use rayon::iter::{IntoParallelIterator, ParallelIterator};
1624
1625 #[derive(Clone)]
1626 struct SyntheticRaceCandidate {
1627 seed: u64,
1628 len: usize,
1629 }
1630
1631 fn synthetic_fit(candidate: SyntheticRaceCandidate) -> Vec<u64> {
1632 (0..candidate.len)
1633 .into_par_iter()
1634 .map(|i| {
1635 let x = candidate.seed ^ (i as u64 + 1).wrapping_mul(0x9e37_79b9_7f4a_7c15);
1636 x.rotate_left((i % 31) as u32)
1637 .wrapping_mul(0xbf58_476d_1ce4_e5b9)
1638 })
1639 .collect()
1640 }
1641
1642 #[test]
1643 fn topology_race_parallel_matches_sequential_synthetic_candidates() {
1644 let candidates = vec![
1645 SyntheticRaceCandidate { seed: 11, len: 64 },
1646 SyntheticRaceCandidate { seed: 29, len: 64 },
1647 SyntheticRaceCandidate { seed: 47, len: 64 },
1648 ];
1649 let sequential = candidates
1650 .iter()
1651 .cloned()
1652 .map(synthetic_fit)
1653 .collect::<Vec<_>>();
1654
1655 let parallel =
1656 run_topology_race_parallel_with_budget(candidates, synthetic_fit, 8).unwrap();
1657 assert_eq!(parallel.len(), 3);
1658 assert_eq!(
1659 parallel
1660 .iter()
1661 .map(|row| row.candidate_index)
1662 .collect::<Vec<_>>(),
1663 vec![0, 1, 2]
1664 );
1665 assert!(parallel.iter().all(|row| row.per_fit_threads == 1));
1666 let wall_times = parallel.iter().map(|row| row.wall_time).collect::<Vec<_>>();
1667 assert_eq!(wall_times.len(), 3);
1668 assert_eq!(
1669 parallel
1670 .into_iter()
1671 .map(|row| row.result)
1672 .collect::<Vec<_>>(),
1673 sequential
1674 );
1675 }
1676
1677 fn trivial_provider<'a>() -> HeldOutDensityProvider<'a> {
1678 Box::new(|_train: &[usize], eval: &[usize]| Ok(vec![0.0; eval.len()]))
1679 }
1680
1681 /// #1011/#1012 decision-margin contract on the same-class evidence race:
1682 /// when the winner's lead over the runner-up is inside the enclosure gap,
1683 /// the verdict is provisional (`insufficient_margin` set) so the caller must
1684 /// refine or escalate; a lead that clears the gap transfers cleanly.
1685 #[test]
1686 fn same_class_race_respects_enclosure_decision_margin() {
1687 // Two smooth candidates (same class) whose evidence came from a logdet
1688 // enclosure with gap 1.0. Lead of 0.5 < gap ⇒ provisional.
1689 let near = vec![
1690 CrossClassCandidate {
1691 kind: AutoTopologyKind::Circle,
1692 negative_log_evidence: 100.0,
1693 certification: EvidenceCertification::Enclosure { gap: 1.0 },
1694 density_provider: trivial_provider(),
1695 },
1696 CrossClassCandidate {
1697 kind: AutoTopologyKind::Euclidean,
1698 negative_log_evidence: 100.5,
1699 certification: EvidenceCertification::Enclosure { gap: 1.0 },
1700 density_provider: trivial_provider(),
1701 },
1702 ];
1703 let verdict = adjudicate_cross_class_race(
1704 8,
1705 near,
1706 STACKING_CV_FOLDS,
1707 STACKING_CV_SEED,
1708 StackingConfig::default(),
1709 )
1710 .expect("same-class race");
1711 assert!(!verdict.is_cross_class);
1712 assert_eq!(verdict.winner_index, 0);
1713 let escalation = verdict
1714 .insufficient_margin
1715 .expect("lead inside the enclosure gap must be flagged provisional");
1716 assert_eq!(escalation.provisional_winner, 0);
1717 assert_eq!(escalation.contender, 1);
1718 assert!((escalation.lead - 0.5).abs() < 1e-12);
1719 assert!((escalation.required_margin - 1.0).abs() < 1e-12);
1720
1721 // A lead that clears the gap transfers the verdict cleanly.
1722 let far = vec![
1723 CrossClassCandidate {
1724 kind: AutoTopologyKind::Circle,
1725 negative_log_evidence: 100.0,
1726 certification: EvidenceCertification::Enclosure { gap: 1.0 },
1727 density_provider: trivial_provider(),
1728 },
1729 CrossClassCandidate {
1730 kind: AutoTopologyKind::Euclidean,
1731 negative_log_evidence: 105.0,
1732 certification: EvidenceCertification::Enclosure { gap: 1.0 },
1733 density_provider: trivial_provider(),
1734 },
1735 ];
1736 let verdict_far = adjudicate_cross_class_race(
1737 8,
1738 far,
1739 STACKING_CV_FOLDS,
1740 STACKING_CV_SEED,
1741 StackingConfig::default(),
1742 )
1743 .expect("same-class race");
1744 assert_eq!(verdict_far.winner_index, 0);
1745 assert!(
1746 verdict_far.insufficient_margin.is_none(),
1747 "a lead clearing the enclosure gap must transfer the verdict"
1748 );
1749 }
1750
1751 /// The coreset transfer margin (#1012) flows through the SAME race seam: a
1752 /// lead inside `CoresetCertificate::race_transfer_margin` is provisional.
1753 #[test]
1754 fn same_class_race_respects_coreset_transfer_margin() {
1755 let cert = CoresetCertificate::new(0.05, 0.1, 32, 1000).expect("certificate");
1756 let required = cert.race_transfer_margin();
1757 // Lead strictly inside the certified transfer margin.
1758 let lead = 0.5 * required;
1759 let candidates = vec![
1760 CrossClassCandidate {
1761 kind: AutoTopologyKind::Circle,
1762 negative_log_evidence: 10.0,
1763 certification: EvidenceCertification::Coreset { certificate: cert },
1764 density_provider: trivial_provider(),
1765 },
1766 CrossClassCandidate {
1767 kind: AutoTopologyKind::Euclidean,
1768 negative_log_evidence: 10.0 + lead,
1769 certification: EvidenceCertification::Coreset { certificate: cert },
1770 density_provider: trivial_provider(),
1771 },
1772 ];
1773 let verdict = adjudicate_cross_class_race(
1774 8,
1775 candidates,
1776 STACKING_CV_FOLDS,
1777 STACKING_CV_SEED,
1778 StackingConfig::default(),
1779 )
1780 .expect("same-class race");
1781 let escalation = verdict
1782 .insufficient_margin
1783 .expect("lead inside the coreset transfer margin must be flagged");
1784 assert!((escalation.required_margin - required).abs() < 1e-9);
1785 }
1786
1787 /// #1386: the `seed` mixed into the cross-class CV folding is functional, not
1788 /// a silent no-op. The same seed must reproduce the identical fold
1789 /// assignment, and two different seeds must produce a different assignment
1790 /// for at least one sample (while both remain deterministic). This test FAILS
1791 /// when the seed is ignored (a pure `i % folds` rule is seed-independent, so
1792 /// the two-seed inequality below can never hold) and PASSES once the seed is
1793 /// genuinely threaded into the fold rule.
1794 #[test]
1795 fn cv_folds_are_seed_reproducible_and_seed_varying() {
1796 const N: usize = 40;
1797 const FOLDS: usize = 5;
1798
1799 // Flatten a partition into a per-sample fold-of-sample vector so two
1800 // foldings can be compared sample-by-sample regardless of fold order.
1801 fn fold_of_sample(n: usize, partition: &[(Vec<usize>, Vec<usize>)]) -> Vec<Option<usize>> {
1802 let mut assign = vec![None; n];
1803 for (fold, (_train, eval)) in partition.iter().enumerate() {
1804 for &i in eval {
1805 assign[i] = Some(fold);
1806 }
1807 }
1808 assign
1809 }
1810
1811 // (a) Reproducible: the same seed gives the identical fold assignment.
1812 let a1 = deterministic_cv_folds_seeded(N, FOLDS, 11);
1813 let a2 = deterministic_cv_folds_seeded(N, FOLDS, 11);
1814 assert_eq!(
1815 fold_of_sample(N, &a1),
1816 fold_of_sample(N, &a2),
1817 "same seed must reproduce the identical CV folding"
1818 );
1819
1820 // (b) Seed-varying: two different seeds must differ for at least one
1821 // sample. If the seed were ignored this assertion could never pass.
1822 let b = deterministic_cv_folds_seeded(N, FOLDS, 12);
1823 assert_ne!(
1824 fold_of_sample(N, &a1),
1825 fold_of_sample(N, &b),
1826 "different seeds must produce different fold assignments (seed must \
1827 not be a no-op)"
1828 );
1829
1830 // The default-seed convenience wrapper agrees with the explicit default
1831 // seed, so existing seed-less call sites keep their deterministic folding.
1832 assert_eq!(
1833 fold_of_sample(N, &deterministic_cv_folds(N, FOLDS)),
1834 fold_of_sample(
1835 N,
1836 &deterministic_cv_folds_seeded(N, FOLDS, STACKING_CV_SEED)
1837 ),
1838 "deterministic_cv_folds must equal the default-seeded folding"
1839 );
1840 }
1841
1842 /// The unified certificate ladder (#16): `EvidenceCertification::race_verdict`
1843 /// maps the same margin contract onto `Verdict`. Exact transfers at any
1844 /// positive lead; an enclosure / coreset certifies only when the lead clears
1845 /// the required margin, else `Insufficient` (never a silent pass).
1846 #[test]
1847 fn race_verdict_maps_onto_unified_ladder() {
1848 use gam_problem::topology_certificates::Verdict;
1849 assert_eq!(
1850 EvidenceCertification::Exact.race_verdict(1e-6),
1851 Verdict::Certified
1852 );
1853 // Non-positive lead is undecided regardless of certification.
1854 assert_eq!(
1855 EvidenceCertification::Exact.race_verdict(0.0),
1856 Verdict::Insufficient
1857 );
1858 let enc = EvidenceCertification::Enclosure { gap: 0.2 };
1859 assert_eq!(enc.race_verdict(0.5), Verdict::Certified);
1860 assert_eq!(enc.race_verdict(0.1), Verdict::Insufficient);
1861 let cert = CoresetCertificate::new(0.05, 0.1, 32, 1000).expect("certificate");
1862 let required = cert.race_transfer_margin();
1863 let coreset = EvidenceCertification::Coreset { certificate: cert };
1864 assert_eq!(coreset.race_verdict(0.5 * required), Verdict::Insufficient);
1865 assert_eq!(
1866 coreset.race_verdict(2.0 * required + 1.0),
1867 Verdict::Certified
1868 );
1869 }
1870
1871 #[test]
1872 fn closure_profiler_recovers_interior_minimum_and_ci() {
1873 // A planted parabolic profile in γ with minimum at 0.7: the closure
1874 // smooth class must recover γ̂ ≈ 0.7 with a CI that excludes both the
1875 // circle (γ=1) and the interval (γ=0) boundaries, and must NOT route to
1876 // the mixture rung (this is a regular interior optimum).
1877 let selection = profile_closure_within_smooth_class(
1878 |gamma| Ok::<_, String>((100.0 + 80.0 * (gamma - 0.7).powi(2), gamma)),
1879 0.95,
1880 )
1881 .expect("closure profile");
1882 assert!(
1883 (selection.ci.gamma_hat - 0.7).abs() < 0.06,
1884 "γ̂ {}",
1885 selection.ci.gamma_hat
1886 );
1887 assert!(!selection.ci.ci_includes_circle);
1888 assert!(!selection.ci.ci_includes_interval);
1889 assert!(!selection.route_to_mixture_rung);
1890 // The representative fit handle is the γ̂ point.
1891 assert!((selection.representative.gamma - selection.ci.gamma_hat).abs() < 1e-12);
1892 }
1893
1894 #[test]
1895 fn closure_profiler_routes_collapse_to_mixture_rung() {
1896 // A profile that keeps improving toward γ=0 (support collapse) pins the
1897 // minimiser at the floor and must hand off to the mixture/union rung.
1898 let selection = profile_closure_within_smooth_class(
1899 |gamma| Ok::<_, String>((10.0 + 25.0 * gamma, gamma)),
1900 0.95,
1901 )
1902 .expect("closure profile");
1903 assert!(selection.ci.gamma_hat.abs() < 1e-9);
1904 assert!(selection.route_to_mixture_rung);
1905 assert!(selection.ci.ci_includes_interval);
1906 }
1907
1908 #[test]
1909 fn topology_race_thread_plan_bounds_nested_rayon_threads() {
1910 let plan = TopologyRaceThreadPlan::for_budget(3, 8);
1911 assert_eq!(plan.concurrent_fits, 3);
1912 assert!(
1913 plan.coordinator_threads + plan.concurrent_fits * plan.per_fit_threads <= 8,
1914 "plan must bound coordinator plus per-fit Rayon workers"
1915 );
1916
1917 let small = TopologyRaceThreadPlan::for_budget(3, 2);
1918 assert_eq!(small.concurrent_fits, 1);
1919 assert!(small.coordinator_threads + small.per_fit_threads <= 2);
1920 }
1921
1922 // --- #944 stage-4 topology collapse tests --------------------------------
1923
1924 /// Two fixed constant-curvature forms (Euclidean + Sphere) must be fused
1925 /// into a single estimated-κ ConstantCurvature candidate, at the position of
1926 /// the first fixed form. Non-CC candidates (Circle, Torus) keep their order.
1927 #[test]
1928 fn fuse_cc_family_collapses_euclidean_and_sphere() {
1929 let input = vec![
1930 AutoTopologyKind::Circle,
1931 AutoTopologyKind::Euclidean,
1932 AutoTopologyKind::Torus,
1933 AutoTopologyKind::Sphere,
1934 ];
1935 let fused = AutoTopologyKind::fuse_constant_curvature_family(&input);
1936 assert_eq!(
1937 fused,
1938 vec![
1939 AutoTopologyKind::Circle,
1940 AutoTopologyKind::ConstantCurvature, // replaced first fixed form
1941 AutoTopologyKind::Torus,
1942 // Sphere dropped — absorbed into ConstantCurvature
1943 ],
1944 "fused candidates: {fused:?}"
1945 );
1946 }
1947
1948 /// A single fixed form alone must NOT be fused (nothing to estimate κ across).
1949 #[test]
1950 fn fuse_cc_family_leaves_single_form_intact() {
1951 let euclidean_only = vec![AutoTopologyKind::Euclidean, AutoTopologyKind::Circle];
1952 let fused = AutoTopologyKind::fuse_constant_curvature_family(&euclidean_only);
1953 assert_eq!(fused, euclidean_only, "single fixed form must not be fused");
1954
1955 let sphere_only = vec![AutoTopologyKind::Sphere];
1956 let fused2 = AutoTopologyKind::fuse_constant_curvature_family(&sphere_only);
1957 assert_eq!(fused2, sphere_only);
1958 }
1959
1960 /// An explicit ConstantCurvature candidate alongside any fixed form fuses
1961 /// by dropping the fixed forms (the explicit CC already subsumes them).
1962 #[test]
1963 fn fuse_cc_family_explicit_cc_absorbs_fixed_forms() {
1964 let input = vec![
1965 AutoTopologyKind::ConstantCurvature,
1966 AutoTopologyKind::Euclidean,
1967 AutoTopologyKind::Circle,
1968 ];
1969 let fused = AutoTopologyKind::fuse_constant_curvature_family(&input);
1970 assert_eq!(
1971 fused,
1972 vec![
1973 AutoTopologyKind::ConstantCurvature,
1974 AutoTopologyKind::Circle
1975 ],
1976 "explicit CC must absorb the fixed Euclidean form"
1977 );
1978 }
1979
1980 /// Idempotence: fusing an already-fused list leaves it unchanged.
1981 #[test]
1982 fn fuse_cc_family_is_idempotent() {
1983 let input = vec![
1984 AutoTopologyKind::Circle,
1985 AutoTopologyKind::ConstantCurvature,
1986 AutoTopologyKind::Torus,
1987 ];
1988 let once = AutoTopologyKind::fuse_constant_curvature_family(&input);
1989 let twice = AutoTopologyKind::fuse_constant_curvature_family(&once);
1990 assert_eq!(once, twice, "fuse must be idempotent");
1991 assert_eq!(once, input, "already-fused list must be unchanged");
1992 }
1993
1994 /// Non-CC lists (no Euclidean/Sphere) are returned as-is.
1995 #[test]
1996 fn fuse_cc_family_noop_for_non_cc_list() {
1997 let input = vec![
1998 AutoTopologyKind::Circle,
1999 AutoTopologyKind::Torus,
2000 AutoTopologyKind::Cylinder,
2001 ];
2002 let fused = AutoTopologyKind::fuse_constant_curvature_family(&input);
2003 assert_eq!(fused, input);
2004 }
2005}