Skip to main content

gam_sae/
structure_harvest.rs

1//! #997 — the wiring seam between a fitted [`SaeManifoldTerm`] and the
2//! evidence-guarded move engine of [`gam_solve::structure_search`].
3//!
4//! #976 closed with the move engine (`search`) and its triggers
5//! (`gam_sae::atom_codes::SparseAtomCodes::coactivation`, ARD precisions,
6//! terminal [`CollapseEvent`]s) on main but deliberately unwired: nothing
7//! harvested move proposals from a fitted dictionary or drove `search` around
8//! the production fit. This module is that seam. It owns three things:
9//!
10//! 1. [`harvest_move_proposals`] — reads a fitted term + its ρ + the per-row
11//!    reconstruction residuals and emits the canonical-order-ready
12//!    [`MoveProposal`] stream (deaths, fusions, fission audits, births).
13//! 2. [`apply_structure_move`] — the warm-inheritance restructuring of a
14//!    [`SaeManifoldTerm`] under one [`StructureMove`]: a death demotes an atom's
15//!    routing, a fission splits an atom into two children that inherit its
16//!    decoder block, a fusion folds the weaker of a pair into the stronger, a
17//!    birth appends a residual-factor atom whose TOPOLOGY is chosen by EVIDENCE
18//!    (#977): [`race_birth_topology`] races the candidate bases matched to the
19//!    atom's intrinsic dim (`d = 1`: circle vs line; `d = 2`: torus vs
20//!    sphere/constant-curvature vs euclidean vs cylinder) by TK-normalized REML and seeds the
21//!    born atom from the winner, so the discovered dictionary is genuinely
22//!    heterogeneous rather than all-circle. Every child state is built FROM the
23//!    parent (never cold) so the engine's warm-state contract holds by
24//!    construction.
25//! 3. [`run_structure_search_rounds`] — the round driver: fit → harvest →
26//!    [`search`] (over held-out row-block shards, with warm child refits) →
27//!    re-fit → repeat until a round applies no moves. The accumulated
28//!    [`SearchLedger`] (with the joint fit's [`CollapseEvent`]s) is the honesty
29//!    surface returned to the caller and serialized onto the fit payload.
30//!
31//! # Determinism
32//!
33//! Pure: no RNG, no clock. Proposal triggers are deterministic functions of the
34//! fitted state; the engine canonicalizes and gates them; the ledger serializes
35//! byte-identically for identical inputs. The structural hashes that dedup the
36//! proposal stream are computed with the same [`gam_runtime::warm_start::Fingerprinter`]
37//! the [`gam_terms::smooth::TermCollectionSpec`] machinery (#869) uses, fed
38//! the POST-move dictionary shape (atom count, per-atom basis kind + latent dim
39//! + the move that produced it), so two proposals that reach the same dictionary
40//! shape collide exactly as the engine requires.
41
42use std::sync::Arc;
43
44use ndarray::{Array1, Array2, ArrayView1, ArrayView2};
45
46use gam_solve::gaussian_reml::gaussian_reml_multi_closed_form;
47use gam_solve::inference::residual_factor::{ResidualFactorInput, StructuredResidualModel};
48use gam_terms::inference::structure_evidence::{ClaimKind, StructureLedger};
49use gam_solve::structure_search::{
50    CollapseAction, MoveBudget, MoveProposal, SearchLedger, SearchOutcome, StructureMove, search,
51};
52use gam_solve::{
53    AutoTopologyKind, TopologyAutoFitEvidence, TopologyAutoSelector, TopologyScoreScale,
54    select_topology_with_fit,
55};
56use gam_terms::latent::{LatentIdMode, LatentManifold};
57use crate::atom_codes::SparseAtomCodes;
58use crate::basis::{
59    CylinderHarmonicEvaluator, EuclideanPatchEvaluator, PeriodicHarmonicEvaluator,
60    SaeBasisSecondJet, SphereChartEvaluator, TorusHarmonicEvaluator,
61};
62use crate::manifold::{
63    AssignmentMode, SaeAtomBasisKind, SaeManifoldAtom, SaeManifoldRho, SaeManifoldTerm,
64};
65use gam_terms::structure::anova_atom::{
66    CarveReport, FissionDecision, carve, carve_input_from_fitted_atom, fission_decision,
67};
68use gam_runtime::warm_start::Fingerprinter;
69
70/// Per-row soft-assignment mass below which an atom is treated as INACTIVE on
71/// that row when deriving the discrete co-activation support. A soft softmax /
72/// gate assignment never reaches exactly zero, so the discrete masks the
73/// coactivation triggers consume are obtained by thresholding the per-row mass.
74/// Chosen as a fixed structural constant (magic-by-default): small enough that a
75/// genuinely-routed atom counts as active on its rows, large enough that the
76/// near-uniform softmax floor (`≈ 1/K`) on rows an atom does not own does not
77/// leak into its support. The threshold is relative to a uniform-assignment
78/// reference so it scales with `K`.
79const ACTIVE_SUPPORT_REL_FLOOR: f64 = 0.5;
80
81/// ARD log-precision above which an atom's coordinate prior is treated as
82/// DIVERGED — the coordinate has been shrunk to its prior mean, so the atom
83/// carries no on-manifold structure and its existence was never certified by
84/// the data. This is the death-proposal trigger (#976): diverged ARD ⇒ demote
85/// the atom unless its `AtomExists` claim certified in an earlier round (the
86/// veto). A large positive `log_alpha` is a precision blow-up; the floor is set
87/// well above the strengths a live coordinate settles at and well below the
88/// `stable_exp_strength` clamp.
89const ARD_DIVERGENCE_LOG_PRECISION: f64 = 12.0;
90
91/// Minimum symmetric code dependence for a pair to be proposed for FUSION. Below
92/// this the two atoms' supports are essentially independent (the shattering
93/// signature needs both conditionals high); above it the pair is a fusion
94/// candidate. The e-gate, not this threshold, decides acceptance — this only
95/// keeps the proposal stream from carrying every independent pair.
96const FUSION_DEPENDENCE_FLOOR: f64 = 0.6;
97
98/// Minimum conditional asymmetry for a pair to be proposed for a FISSION audit
99/// (the A⇒B absorption signature: one conditional near 1 without the converse).
100const ABSORPTION_ASYMMETRY_FLOOR: f64 = 0.5;
101
102/// Anti-symmetric decoder perturbation applied when a fission DUPLICATES an atom,
103/// to break the symmetric saddle so the two children can separate in the joint
104/// refit (see `duplicate_atom`). Small enough to preserve the warm-start (and the
105/// mass-split combined decoder is exactly unchanged), but ≫ floating-point noise.
106const FISSION_SYMMETRY_BREAK_EPS: f64 = 0.05;
107
108/// Level at which the within-atom representational carve (#993) calls binding
109/// PROVEN (blocking a fission). The harvest carve is a PROPOSAL filter, not the
110/// final certificate — the downstream held-out e-gate owns acceptance — so this
111/// is the conventional 0.05 screening level, deliberately not the stricter
112/// certificate level; a carve that fails to reject here still rides as a
113/// fission proposal for the e-gate to adjudicate on held-out shards.
114const WITHIN_ATOM_CARVE_ALPHA: f64 = 0.05;
115
116/// Knobs for one harvest pass. All magic-by-default — derived from the fit, not
117/// surfaced as user flags.
118#[derive(Clone, Copy, Debug)]
119pub struct HarvestParams {
120    /// Maximum fusion pairs proposed per round (the top-dependence pairs).
121    pub max_fusions: usize,
122    /// Maximum fission audits proposed per round (the top-asymmetry pairs).
123    pub max_fissions: usize,
124    /// Maximum residual-factor birth candidates proposed per round (the top
125    /// factor directions by explained residual mass).
126    pub max_births: usize,
127}
128
129impl Default for HarvestParams {
130    fn default() -> Self {
131        // A small fixed budget per round; the round driver iterates until a
132        // round applies nothing, so per-round breadth need not be exhaustive.
133        Self {
134            max_fusions: 4,
135            max_fissions: 4,
136            max_births: 4,
137        }
138    }
139}
140
141/// Derive the discrete active-support codes the co-activation triggers consume
142/// from a fitted term's SOFT assignments. An atom counts as active on a row when
143/// its assignment mass exceeds `ACTIVE_SUPPORT_REL_FLOOR / K` (relative to the
144/// uniform-assignment reference), so the discrete support reflects genuine
145/// routing rather than the near-uniform softmax floor.
146pub fn sparse_codes_from_term(term: &SaeManifoldTerm) -> SparseAtomCodes {
147    let assignments = term.assignment.assignments();
148    let n = assignments.nrows();
149    let k = assignments.ncols();
150    let floor = if k == 0 {
151        0.0
152    } else {
153        ACTIVE_SUPPORT_REL_FLOOR / k as f64
154    };
155    let mut codes = SparseAtomCodes::empty(n, k);
156    for row in 0..n {
157        for atom in 0..k {
158            let mass = assignments[[row, atom]];
159            if mass > floor {
160                codes.row_mut(row).assign(atom, mass);
161            }
162        }
163    }
164    codes
165}
166
167/// Per-atom maximum active mass over rows — the collapse statistic (a
168/// legitimately sparse atom has small MEAN mass but high MAX on its rows; only
169/// an atom with no material support anywhere has a small MAX). Used as the
170/// birth-residual activity coordinate and as a secondary death signal.
171fn per_atom_max_mass(term: &SaeManifoldTerm) -> Array1<f64> {
172    let assignments = term.assignment.assignments();
173    let k = assignments.ncols();
174    let mut out = Array1::<f64>::zeros(k);
175    for atom in 0..k {
176        let mut max = 0.0_f64;
177        for &m in assignments.column(atom).iter() {
178            if m > max {
179                max = m;
180            }
181        }
182        out[atom] = max;
183    }
184    out
185}
186
187/// The largest per-atom ARD log-precision (over the atom's axes), or `-inf` for
188/// an atom with native ARD disabled (empty block). A diverged precision on ANY
189/// axis collapses that coordinate, so the per-atom death trigger is the max.
190fn per_atom_ard_divergence(rho: &SaeManifoldRho, atom: usize) -> f64 {
191    rho.log_ard
192        .get(atom)
193        .and_then(|axes| axes.iter().copied().reduce(f64::max))
194        .unwrap_or(f64::NEG_INFINITY)
195}
196
197/// Structural hash of the POST-move dictionary shape, computed with the same
198/// [`Fingerprinter`] the [`TermCollectionSpec`](gam_terms::smooth::TermCollectionSpec)
199/// hash machinery (#869) uses. The hash covers the move kind, the atoms it
200/// touches, and the resulting atom count + per-atom basis-kind/latent-dim
201/// shape — structural identity only, never decoder coefficients or coordinates,
202/// so two distinct proposals that reach the same dictionary shape collide.
203fn post_move_structure_hash(term: &SaeManifoldTerm, mv: &StructureMove) -> u64 {
204    let mut fp = Fingerprinter::new();
205    fp.write_str("sae_structure_move");
206    match mv {
207        StructureMove::Birth { candidate } => {
208            fp.write_str("birth");
209            fp.write_usize(*candidate);
210        }
211        StructureMove::Death { atom } => {
212            fp.write_str("death");
213            fp.write_usize(*atom);
214        }
215        StructureMove::Fission { atom } => {
216            fp.write_str("fission");
217            fp.write_usize(*atom);
218        }
219        StructureMove::Fusion { a, b } => {
220            fp.write_str("fusion");
221            // Order-independent: a fusion of (a,b) is the same structure as
222            // (b,a).
223            fp.write_usize((*a).min(*b));
224            fp.write_usize((*a).max(*b));
225        }
226    }
227    // Post-move atom-shape skeleton: the current per-atom (basis-kind tag,
228    // latent dim) plus the count delta the move applies. Births/fissions add an
229    // atom; deaths/fusions do not change the count (death demotes, fusion folds)
230    // — the routing change, not a structural resize, so the shape skeleton is
231    // the parent's plus the move tag above.
232    fp.write_usize(term.atoms.len());
233    for atom in &term.atoms {
234        fp.write_str(basis_kind_tag(&atom.basis_kind));
235        fp.write_usize(atom.latent_dim);
236    }
237    let digest = fp.finalize();
238    let bytes = digest.as_bytes();
239    u64::from_le_bytes([
240        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
241    ])
242}
243
244/// Structural tag for an atom basis kind — the discrete shape identity the
245/// structural hash needs (never coordinates or coefficients).
246fn basis_kind_tag(kind: &SaeAtomBasisKind) -> &str {
247    match kind {
248        SaeAtomBasisKind::Duchon => "duchon",
249        SaeAtomBasisKind::Periodic => "periodic",
250        SaeAtomBasisKind::Sphere => "sphere",
251        SaeAtomBasisKind::Torus => "torus",
252        SaeAtomBasisKind::Linear => "linear",
253        SaeAtomBasisKind::EuclideanPatch => "euclidean_patch",
254        SaeAtomBasisKind::Poincare => "poincare",
255        SaeAtomBasisKind::Cylinder => "cylinder",
256        SaeAtomBasisKind::Precomputed(_) => "precomputed",
257    }
258}
259
260/// Build a [`MoveProposal`] from a move + trigger by stamping its post-move
261/// structural hash and the structural claim it asserts.
262fn proposal(term: &SaeManifoldTerm, mv: StructureMove, trigger: f64) -> MoveProposal {
263    let structure_hash = post_move_structure_hash(term, &mv);
264    let claim = match &mv {
265        StructureMove::Birth { candidate } => ClaimKind::AtomExists {
266            // Births claim the existence of the NEXT atom index (appended).
267            atom: term.k_atoms() + *candidate,
268        },
269        StructureMove::Death { atom } => ClaimKind::AtomExists { atom: *atom },
270        StructureMove::Fusion { a, b } => ClaimKind::BindingEdge { a: *a, b: *b },
271        StructureMove::Fission { atom } => ClaimKind::Custom {
272            label: format!("fission:{atom}"),
273        },
274    };
275    MoveProposal {
276        mv,
277        trigger,
278        structure_hash,
279        claim,
280    }
281}
282
283/// Harvest the canonical move-proposal stream from a fitted term, its ρ, and the
284/// per-row reconstruction residuals `R = target − fitted` (used for the birth
285/// channel under the `WhitenedStructured` (`gam_inference::row_metric::MetricProvenance::WhitenedStructured`)
286/// residual-factor metric — never raw-Euclidean Λ, per the #974 rescope).
287///
288/// The four channels (#976/#997):
289///
290/// * **Deaths** from diverged ARD precisions ∪ terminal [`CollapseEvent`]s. The
291///   trigger is the ARD precision (descending); a terminally-collapsed atom is
292///   proposed even with finite ARD (its routing is gone regardless of its
293///   coordinate prior).
294/// * **Fusions** from the top co-activation pairs by symmetric code dependence.
295/// * **Fission audits** from absorption-suspect pairs (high conditional
296///   asymmetry). For each candidate that is a `d = 2` product atom the
297///   within-atom functional-ANOVA carve (#975 / #993) RUNS on the atom's own
298///   fitted decoder via [`run_within_atom_carve`]: a carve that proves binding
299///   blocks the fission (the atom is irreducible), an additive carve rides as a
300///   fission proposal ranked by its interaction fraction, and every outcome is
301///   recorded on [`HarvestReport::fission_carve_results`]. A non-product
302///   candidate (no factor split) rides on the co-activation audit and is
303///   counted in [`HarvestReport::fission_carve_unavailable_count`] — never a
304///   silent drop. The held-out e-gate still owns final acceptance.
305/// * **Births** from the whitened residual-factor subspace: the residuals are
306///   fed to [`StructuredResidualModel::fit`], whose factor directions
307///   ([`StructuredResidualModel::factor`]) are the birth candidates, ranked by
308///   explained residual mass.
309pub fn harvest_move_proposals(
310    term: &SaeManifoldTerm,
311    rho: &SaeManifoldRho,
312    residuals: ArrayView2<'_, f64>,
313    params: &HarvestParams,
314) -> Result<HarvestReport, String> {
315    let k = term.k_atoms();
316    let mut proposals: Vec<MoveProposal> = Vec::new();
317
318    // --- Deaths: diverged ARD ∪ terminal collapses -------------------------
319    let max_mass = per_atom_max_mass(term);
320    let terminal: std::collections::HashSet<usize> = term
321        .collapse_events()
322        .iter()
323        .filter(|e| matches!(e.action, CollapseAction::Terminal))
324        .map(|e| e.atom)
325        .collect();
326    for atom in 0..k {
327        let ard = per_atom_ard_divergence(rho, atom);
328        let diverged = ard >= ARD_DIVERGENCE_LOG_PRECISION;
329        let collapsed = terminal.contains(&atom);
330        if diverged || collapsed {
331            // Trigger (descending): a terminal collapse is maximally urgent
332            // (the routing is already gone), ranked above ARD divergence; ARD
333            // deaths rank by precision. `max_mass` breaks ties toward emptier
334            // atoms.
335            let trigger = if collapsed { f64::MAX / 2.0 } else { ard };
336            // Lower max-mass (emptier) sorts first among equal triggers; encode
337            // by subtracting a small mass-proportional term that cannot reorder
338            // across the collapsed/ARD bands.
339            let trigger = trigger - max_mass[atom].min(1.0) * 1e-9;
340            proposals.push(proposal(term, StructureMove::Death { atom }, trigger));
341        }
342    }
343
344    // --- Fusions: top co-activation dependence -----------------------------
345    let codes = sparse_codes_from_term(term);
346    let mut fusion_pairs: Vec<(usize, usize, f64)> = Vec::new();
347    for a in 0..k {
348        for b in (a + 1)..k {
349            let stats = codes.coactivation(a, b);
350            let dep = stats.dependence();
351            if dep >= FUSION_DEPENDENCE_FLOOR {
352                fusion_pairs.push((a, b, dep));
353            }
354        }
355    }
356    fusion_pairs.sort_by(|x, y| y.2.total_cmp(&x.2).then(x.0.cmp(&y.0)).then(x.1.cmp(&y.1)));
357    for &(a, b, dep) in fusion_pairs.iter().take(params.max_fusions) {
358        proposals.push(proposal(term, StructureMove::Fusion { a, b }, dep));
359    }
360
361    // --- Fission audits: absorption-suspect asymmetry ----------------------
362    let mut fission_atoms: Vec<(usize, f64)> = Vec::new();
363    for a in 0..k {
364        for b in (a + 1)..k {
365            let stats = codes.coactivation(a, b);
366            let asym = stats.absorption_asymmetry();
367            if asym >= ABSORPTION_ASYMMETRY_FLOOR {
368                // The parent (the conditioned-on atom whose support nests the
369                // child) is the one whose `P(parent|child) ≈ 1`. Audit the
370                // parent for the absorbed substructure.
371                let parent = if stats.p_a_given_b >= stats.p_b_given_a {
372                    a
373                } else {
374                    b
375                };
376                // Fission trigger is audit significance ASCENDING; map a high
377                // asymmetry to a low significance proxy `1 − asym` so the most
378                // asymmetric (most suspect) pair sorts first.
379                let significance = (1.0 - asym).max(0.0);
380                fission_atoms.push((parent, significance));
381            }
382        }
383    }
384    // Keep the most-suspect (lowest significance) audit per parent atom.
385    fission_atoms.sort_by(|x, y| x.1.total_cmp(&y.1).then(x.0.cmp(&y.0)));
386    fission_atoms.dedup_by_key(|(atom, _)| *atom);
387
388    // #993: run the within-atom functional-ANOVA carve on each fission
389    // candidate that is a genuine `d = 2` product atom. The carve adjudicates
390    // the representational binding question (is the surface ONE bound product
391    // atom or TWO superposed factors?) on the atom's OWN fitted decoder, on the
392    // same empirical code measure. A carve that PROVES binding (the interaction
393    // is significant, or energetically non-negligible) blocks the fission — the
394    // atom stays whole and contested; the e-gate never sees a fission proposal
395    // for a bound atom. A carve that does NOT prove binding rides as a fission
396    // proposal whose trigger is the carve's interaction fraction (ascending —
397    // the most-separable atom sorts first), and whose binding evidence is the
398    // carve's `edge_p_value`, recorded for the ledger.
399    //
400    // A candidate that is NOT a recoverable product atom (single-axis, sphere
401    // chart, monomial patch — `factor_basis_sizes() == None`), or whose carve
402    // could not run (degenerate sample, non-separable basis), is recorded
403    // loudly via `fission_carve_unavailable` rather than silently dropped: its
404    // fission audit still rides on the co-activation significance, exactly the
405    // pre-#993 behavior, but the absence of the carve is now an explicit,
406    // counted signal instead of a blanket skip.
407    let mut carve_results: Vec<FissionCarveResult> = Vec::new();
408    let mut fission_carve_ran_count = 0usize;
409    let mut fission_carve_unavailable_count = 0usize;
410    let mut fission_carve_blocked_count = 0usize;
411    let mut gated_fissions: Vec<(usize, f64)> = Vec::new();
412    for &(atom, significance) in fission_atoms.iter().take(params.max_fissions) {
413        match run_within_atom_carve(term, atom) {
414            Some(Ok(report)) => {
415                fission_carve_ran_count += 1;
416                let decision = fission_decision(&report, None);
417                let edge_p = report.edge_p_value;
418                let interaction = report.interaction_fraction;
419                carve_results.push(FissionCarveResult {
420                    atom,
421                    edge_p_value: edge_p,
422                    interaction_fraction: interaction,
423                    decision,
424                });
425                match decision {
426                    FissionDecision::Keep => {
427                        // Binding proven (or interaction non-negligible): the
428                        // atom is irreducible. Do NOT propose a fission.
429                        fission_carve_blocked_count += 1;
430                        log::debug!(
431                            "[structure-harvest] #993 carve KEEPS atom {atom}: binding proven \
432                             (edge_p={edge_p:?}, interaction_fraction={interaction:.3e}); no fission proposed",
433                        );
434                    }
435                    FissionDecision::SplitReconstructionOnly
436                    | FissionDecision::SplitCertifiedJoint => {
437                        // Separable: propose the fission, ranked by interaction
438                        // fraction (ascending — most-additive first).
439                        gated_fissions.push((atom, interaction));
440                    }
441                }
442            }
443            Some(Err(err)) => {
444                fission_carve_unavailable_count += 1;
445                log::debug!(
446                    "[structure-harvest] #993 carve could not run on atom {atom}: {err}; \
447                     fission audit rides on co-activation significance, e-gate owns acceptance",
448                );
449                gated_fissions.push((atom, significance));
450            }
451            None => {
452                // Not a recoverable product atom — the within-atom carve is not
453                // defined here. Ride the co-activation audit, count it loudly.
454                fission_carve_unavailable_count += 1;
455                gated_fissions.push((atom, significance));
456            }
457        }
458    }
459
460    for &(atom, trigger) in &gated_fissions {
461        proposals.push(proposal(term, StructureMove::Fission { atom }, trigger));
462    }
463
464    // --- Births: whitened residual-factor subspace -------------------------
465    // The activity coordinate the residual-factor scale law is smooth in is the
466    // per-row total assignment mass (an activation-strength summary): rows where
467    // the dictionary routes strongly should have smaller unexplained residual
468    // factor energy than rows it does not cover.
469    let n = residuals.nrows();
470    let assignments = term.assignment.assignments();
471    let activity: Array1<f64> = (0..n).map(|r| assignments.row(r).sum()).collect();
472    let mut births_proposed = 0usize;
473    let mut birth_skipped_reason: Option<String> = None;
474    if params.max_births > 0 && n > 0 && residuals.ncols() > 0 {
475        let p = residuals.ncols();
476        let max_rank = params.max_births.min(p.saturating_sub(1));
477        match StructuredResidualModel::fit(ResidualFactorInput {
478            residuals,
479            activity: activity.view(),
480            max_factor_rank: max_rank,
481        }) {
482            Ok(model) => {
483                let factor = model.factor();
484                let r = model.factor_rank();
485                // Rank each factor direction by its explained residual mass
486                // (column norm of Λ scaled by the mean activity); births are
487                // proposed in descending mass order, capped at `max_births`.
488                let mut dirs: Vec<(usize, f64)> = (0..r)
489                    .map(|j| {
490                        let mass = factor.column(j).iter().map(|v| v * v).sum::<f64>().sqrt();
491                        (j, mass)
492                    })
493                    .collect();
494                dirs.sort_by(|x, y| y.1.total_cmp(&x.1).then(x.0.cmp(&y.0)));
495                for &(candidate, mass) in dirs.iter().take(params.max_births) {
496                    proposals.push(proposal(term, StructureMove::Birth { candidate }, mass));
497                    births_proposed += 1;
498                }
499            }
500            Err(e) => {
501                birth_skipped_reason = Some(e);
502            }
503        }
504    } else if params.max_births > 0 {
505        birth_skipped_reason =
506            Some("residuals empty or single-channel; no factor subspace to mine".to_string());
507    }
508
509    Ok(HarvestReport {
510        proposals,
511        fission_carve_results: carve_results,
512        fission_carve_ran_count,
513        fission_carve_unavailable_count,
514        fission_carve_blocked_count,
515        births_proposed,
516        birth_skipped_reason,
517    })
518}
519
520/// One within-atom carve outcome on a fission candidate (#993). Recorded on
521/// the [`HarvestReport`] so the binding decision and its evidence are visible
522/// — including the `edge_p_value` the dictionary certificate's `BindingEdge`
523/// claim reads — never silent.
524#[derive(Clone, Debug)]
525pub struct FissionCarveResult {
526    /// The audited product atom.
527    pub atom: usize,
528    /// Edge-level representational binding p-value (the carve's joint Wald over
529    /// the gauge-projected interaction block). `None` when the test degenerated.
530    pub edge_p_value: Option<f64>,
531    /// Fraction of centered surface energy carried by the interaction
532    /// (0 = perfectly additive / separable, 1 = pure interaction).
533    pub interaction_fraction: f64,
534    /// The carve's representational fission verdict.
535    pub decision: FissionDecision,
536}
537
538/// Run the within-atom representational carve on one fitted atom (#993).
539///
540/// Returns:
541/// * `None` — the atom is not a recoverable `d = 2` product atom (no factor
542///   split); the within-atom carve is undefined here.
543/// * `Some(Err(_))` — the atom is a product atom but the carve could not run
544///   (degenerate sample, non-separable basis, REML fit failure).
545/// * `Some(Ok(report))` — the carve ran; the report carries the binding
546///   verdict and evidence.
547///
548/// The factor sizes come from the atom's basis evaluator
549/// ([`SaeBasisEvaluator::factor_basis_sizes`]); the carve inputs are built from
550/// the atom's FUSED basis and decoder by
551/// [`gam_terms::structure::anova_atom::carve_input_from_fitted_atom`], which
552/// verifies the Kronecker separability before fitting.
553fn run_within_atom_carve(
554    term: &SaeManifoldTerm,
555    atom: usize,
556) -> Option<Result<CarveReport, String>> {
557    let a = &term.atoms[atom];
558    if a.latent_dim != 2 {
559        return None;
560    }
561    let evaluator = a.basis_evaluator.as_ref()?;
562    let (m_a, m_b) = evaluator.factor_basis_sizes()?;
563    let build = carve_input_from_fitted_atom(
564        a.basis_values.view(),
565        a.decoder_coefficients.view(),
566        m_a,
567        m_b,
568    );
569    let bundle = match build {
570        Ok(b) => b,
571        Err(e) => return Some(Err(e)),
572    };
573    let input = bundle.representational_carve_input();
574    Some(carve(&input, WITHIN_ATOM_CARVE_ALPHA))
575}
576
577/// The output of one [`harvest_move_proposals`] pass: the proposal stream plus
578/// the loud records of any degrade-to-skip path taken (no silent drops).
579#[derive(Clone, Debug)]
580pub struct HarvestReport {
581    /// Trigger-stamped, claim-stamped, structurally-hashed proposals, ready for
582    /// [`search`] (which canonicalizes and gates them).
583    pub proposals: Vec<MoveProposal>,
584    /// The within-atom carve outcomes (#993): one entry per fission candidate
585    /// (within the `max_fissions` cap) that is a recoverable `d = 2` product
586    /// atom whose carve RAN. Carries the representational binding verdict and
587    /// `edge_p_value` — the dictionary certificate's `BindingEdge` evidence —
588    /// so a fission's binding decision is visible, never silent. A carve that
589    /// KEEPS the atom (binding proven) blocked its fission proposal; the
590    /// remaining entries' atoms each have a corresponding `Fission` proposal.
591    pub fission_carve_results: Vec<FissionCarveResult>,
592    /// How many fission candidates had the #993 within-atom carve actually run.
593    pub fission_carve_ran_count: usize,
594    /// How many fission candidates could NOT be carved (not a product atom, or
595    /// the carve failed) and rode on the co-activation audit instead — the
596    /// precise, never-silent record of the residual degrade path.
597    pub fission_carve_unavailable_count: usize,
598    /// How many fission candidates the carve BLOCKED (binding proven → atom kept
599    /// whole, no fission proposed). These never reach the e-gate.
600    pub fission_carve_blocked_count: usize,
601    /// Number of residual-factor birth candidates proposed.
602    pub births_proposed: usize,
603    /// If the birth channel could not run (empty residuals, evidence-ladder
604    /// failure), why — so the absence of births is explained, not silent.
605    pub birth_skipped_reason: Option<String>,
606}
607
608/// Apply one [`StructureMove`] to a fitted term + ρ, returning the warm child
609/// state. Warm inheritance by construction: the child is cloned from the parent
610/// and only the touched atoms are restructured.
611///
612/// * **Death** demotes atom `atom`: its assignment logits are driven to a
613///   strongly-negative value (routing → ~0) on every row, and its ARD block is
614///   left in place. The atom is NOT removed (stable indices for the round); it
615///   simply stops carrying mass. Demote-never-reject (#976).
616/// * **Fission** appends a child cloned from atom `atom` (same basis, decoder,
617///   coordinates), splitting the parent's per-row routing between parent and
618///   child so the joint refit can pull them apart along the absorbed
619///   substructure. The child inherits the parent's main-effect block.
620/// * **Fusion** folds atom `b` into atom `a`: `a`'s routing absorbs `b`'s mass
621///   (logit-sum on the active rows) and `b` is demoted. The retained atom's
622///   product coordinates are initialized from the pair.
623/// * **Birth** appends a fresh atom whose decoder is seeded from the
624///   residual-factor direction `candidate` (passed in `birth_decoders`), routed
625///   at a small neutral mass so the refit can grow it if it is real.
626pub fn apply_structure_move(
627    term: &SaeManifoldTerm,
628    rho: &SaeManifoldRho,
629    mv: &StructureMove,
630    birth_decoders: &[Array2<f64>],
631) -> Result<(SaeManifoldTerm, SaeManifoldRho), String> {
632    match mv {
633        StructureMove::Death { atom } => {
634            let mut child = term.clone();
635            demote_atom(&mut child, *atom)?;
636            Ok((child, rho.clone()))
637        }
638        StructureMove::Fusion { a, b } => {
639            let mut child = term.clone();
640            fold_atom_into(&mut child, *a, *b)?;
641            Ok((child, rho.clone()))
642        }
643        StructureMove::Fission { atom } => {
644            let (child, child_rho) = duplicate_atom(term, rho, *atom)?;
645            Ok((child, child_rho))
646        }
647        StructureMove::Birth { candidate } => {
648            let decoder = birth_decoders.get(*candidate).ok_or_else(|| {
649                format!(
650                    "apply_structure_move: birth candidate {candidate} out of range \
651                     ({} residual-factor decoders)",
652                    birth_decoders.len()
653                )
654            })?;
655            born_atom(term, rho, decoder.view())
656        }
657    }
658}
659
660/// A strongly-negative logit that drives a softmax / gate routing channel to ~0
661/// mass without producing a non-finite value the assignment validator rejects.
662const DEMOTE_LOGIT: f64 = -40.0;
663
664/// Drive an atom's per-row routing to ~0 by setting its logit column to a
665/// strongly-negative constant. Demotion, not removal: the atom keeps its index.
666fn demote_atom(term: &mut SaeManifoldTerm, atom: usize) -> Result<(), String> {
667    let k = term.k_atoms();
668    if atom >= k {
669        return Err(format!("demote_atom: atom {atom} out of range (K={k})"));
670    }
671    for row in 0..term.assignment.logits.nrows() {
672        term.assignment.logits[[row, atom]] = DEMOTE_LOGIT;
673    }
674    Ok(())
675}
676
677/// Fold atom `b` into atom `a`: `a` absorbs `b`'s routing mass on every row
678/// (logit max, the dominance the fused atom should express), then `b` is
679/// demoted. The retained atom keeps its decoder; the joint refit reconciles the
680/// merged structure.
681fn fold_atom_into(term: &mut SaeManifoldTerm, a: usize, b: usize) -> Result<(), String> {
682    let k = term.k_atoms();
683    if a >= k || b >= k {
684        return Err(format!(
685            "fold_atom_into: atoms ({a},{b}) out of range (K={k})"
686        ));
687    }
688    if a == b {
689        return Err("fold_atom_into: cannot fuse an atom with itself".to_string());
690    }
691    // For SOFTMAX the fused atom must carry the COMBINED routing mass of its two
692    // constituents. `softmax` mass is `e^logit/Z`, so the mass-preserving combine
693    // is `logsumexp(la, lb)` (`softmax(logsumexp(la,lb)) = softmax(la)+softmax(lb)`).
694    // Plain `max` UNDER-masses by up to `ln 2` on exactly the co-active rows that
695    // triggered the fusion (where `la ≈ lb`): it gives the fused atom half the
696    // combined mass, leaving the warm-start short and risking a FALSE rejection by
697    // the e-gate under a capped refit. For IBP/JumpReLU the per-atom gate is the
698    // UN-normalized `σ(logit)`, so the union gate is `max(σ(la),σ(lb)) = σ(max(la,lb))`
699    // → `max` is the correct combine there (a sum/logsumexp would over-gate).
700    let softmax_routing = matches!(term.assignment.mode, AssignmentMode::Softmax { .. });
701    for row in 0..term.assignment.logits.nrows() {
702        let la = term.assignment.logits[[row, a]];
703        let lb = term.assignment.logits[[row, b]];
704        term.assignment.logits[[row, a]] = if softmax_routing {
705            // Numerically stable logsumexp. When BOTH logits are -∞ (two rows of
706            // zero softmax mass — a hard-masked/dead pair), `m = -∞` makes
707            // `la - m = -∞ - (-∞) = NaN`, and the NaN poisons the whole logits
708            // row (every subsequent softmax over it is NaN). The combined mass of
709            // two zero-mass atoms is exactly zero, i.e. logit -∞ — return that
710            // directly instead of computing NaN.
711            let m = la.max(lb);
712            if m == f64::NEG_INFINITY {
713                f64::NEG_INFINITY
714            } else {
715                m + ((la - m).exp() + (lb - m).exp()).ln()
716            }
717        } else {
718            la.max(lb)
719        };
720    }
721    demote_atom(term, b)?;
722    Ok(())
723}
724
725/// Append a child cloned from atom `parent`: identical basis, decoder, and
726/// coordinates, with the parent's routing split evenly between parent and child
727/// (the parent's logit dropped by `ln 2` on every row, the child seeded equal).
728/// The joint refit then pulls the two apart along the absorbed substructure. The
729/// child's ARD block is inherited from the parent.
730fn duplicate_atom(
731    term: &SaeManifoldTerm,
732    rho: &SaeManifoldRho,
733    parent: usize,
734) -> Result<(SaeManifoldTerm, SaeManifoldRho), String> {
735    let k = term.k_atoms();
736    if parent >= k {
737        return Err(format!(
738            "duplicate_atom: parent {parent} out of range (K={k})"
739        ));
740    }
741    let mut atoms = term.atoms.clone();
742    let mut child_atom = term.atoms[parent].clone();
743    // Symmetry-breaking perturbation. A fission that duplicates the parent atom
744    // IDENTICALLY (same decoder, same coords, mass split 50/50) sits at a
745    // SYMMETRIC SADDLE of the joint refit: the two children have identical
746    // gradients, so a deterministic Newton/descent refit moves them in lockstep
747    // and they NEVER separate — the fission stays a no-op (two identical
748    // half-atoms ≡ the original atom) and the e-gate, seeing no reconstruction
749    // gain, rejects it. So fission could only ever land by floating-point noise.
750    // Apply a small ANTI-SYMMETRIC perturbation — parent decoder ×(1−ε·s_ij),
751    // child decoder ×(1+ε·s_ij) for a deterministic varying pattern s_ij — which
752    // breaks the symmetry (the refit can roll off the saddle toward the
753    // two-factor configuration the carve identified) while leaving the mass-split
754    // combined decoder `½·parent + ½·child = original` EXACTLY unchanged (the
755    // ±ε·s_ij cancel), so the warm-start is preserved. `ε ≫ fp noise`.
756    {
757        let (m, p) = atoms[parent].decoder_coefficients.dim();
758        let s = |i: usize, j: usize| -> f64 {
759            // Deterministic, varying, NON-ZERO pattern in [-1,-0.2]∪[0.2,1]. It
760            // must vary across (i,j) (so `parent − child = −2·f_ij·D_ij` points
761            // off the symmetric `D` direction and can separate factors) and never
762            // vanish (else a sparse decoder element gets no perturbation).
763            let raw = ((i * 7 + j * 13) % 11) as f64 / 5.0 - 1.0;
764            if raw.abs() < 0.2 { 0.3 } else { raw }
765        };
766        for i in 0..m {
767            for j in 0..p {
768                let f = FISSION_SYMMETRY_BREAK_EPS * s(i, j);
769                atoms[parent].decoder_coefficients[[i, j]] *= 1.0 - f;
770                child_atom.decoder_coefficients[[i, j]] *= 1.0 + f;
771            }
772        }
773        // The Grassmann decoder frame is derived from the coefficients; drop both
774        // so the warm refit recomputes them consistent with the perturbed decoders.
775        atoms[parent].decoder_frame = None;
776        child_atom.decoder_frame = None;
777    }
778    atoms.push(child_atom);
779
780    let n = term.assignment.logits.nrows();
781    let mut logits = Array2::<f64>::zeros((n, k + 1));
782    let split = std::f64::consts::LN_2;
783    for row in 0..n {
784        for col in 0..k {
785            let mut v = term.assignment.logits[[row, col]];
786            if col == parent {
787                // Halve the parent's routing mass (logit − ln 2) and give the
788                // other half to the child.
789                v -= split;
790            }
791            logits[[row, col]] = v;
792        }
793        logits[[row, k]] = term.assignment.logits[[row, parent]] - split;
794    }
795    let mut coords = term.assignment.coords.clone();
796    coords.push(term.assignment.coords[parent].clone());
797    let assignment = crate::manifold::SaeAssignment::with_mode(
798        logits,
799        coords,
800        term.assignment.mode,
801    )?;
802    let child = SaeManifoldTerm::new(atoms, assignment)?;
803
804    let mut child_rho = rho.clone();
805    if parent < child_rho.log_ard.len() {
806        let inherited = child_rho.log_ard[parent].clone();
807        child_rho.log_ard.push(inherited);
808    } else {
809        child_rho.log_ard.push(Array1::<f64>::zeros(0));
810    }
811    // The fissioned child inherits the PARENT atom's per-atom smoothness strength
812    // (#1556). As with `log_ard`, failing to grow `log_lambda_smooth` in step with
813    // `k_atoms()` makes the next `assemble_arrow_schur` panic on the per-atom
814    // `lambda_smooth[atom_idx]` index (out of bounds).
815    let inherited_smooth = child_rho
816        .log_lambda_smooth
817        .get(parent)
818        .or_else(|| child_rho.log_lambda_smooth.first())
819        .copied()
820        .unwrap_or(0.0);
821    child_rho.log_lambda_smooth.push(inherited_smooth);
822    Ok((child, child_rho))
823}
824
825// ===========================================================================
826// #977 — per-atom topology RACE at birth.
827//
828// A born atom must not inherit atom-0's circle template by fiat. Its TOPOLOGY is
829// chosen by EVIDENCE: each candidate basis whose required intrinsic dimension
830// matches the born atom's ARD-selected `d_k` is fit to the residual-factor image
831// the atom would reconstruct, and the winner is the lowest TK-normalized REML —
832// the SAME gauge-invariant comparison [`select_topology_with_fit`] applies to the
833// smooth-term topology race, so cross-topology scores are commensurable. The
834// dictionary the learner discovers is therefore genuinely heterogeneous: a born
835// atom on a circular residual gets a circle, one on a straight residual a line.
836// ===========================================================================
837
838/// One realized candidate of the birth topology race: the fitted evaluator, its
839/// penalized-least-squares decoder against the birth target, the chart manifold
840/// the winning atom will carry, and the basis kind tag. Carried as the
841/// `select_topology_with_fit` fit handle so the winner's basis seeds the born
842/// atom directly (no re-fit, no cold restart).
843#[derive(Clone)]
844struct TopologyRaceFit {
845    evaluator: Arc<dyn SaeBasisSecondJet>,
846    basis_kind: SaeAtomBasisKind,
847    manifold: LatentManifold,
848    latent_dim: usize,
849    /// The `(n × d)` coordinates the winning basis was evaluated at — the born
850    /// atom's coordinate block, dimension-matched to the winning evaluator.
851    coords: Array2<f64>,
852    /// Fitted basis design `Φ(coords)` (`n × m`).
853    phi: Array2<f64>,
854    /// Fitted basis Jacobian `∂Φ` (`n × m × d`).
855    jet: ndarray::Array3<f64>,
856    /// Penalized-least-squares decoder `B` (`m × p`).
857    decoder: Array2<f64>,
858    /// Intrinsic roughness Gram `S` (`m × m`) the atom is seeded with.
859    penalty: Array2<f64>,
860}
861
862/// A candidate topology paired with the evaluator + coordinates + manifold it
863/// realizes for a `d`-dimensional birth. The evaluator is built fresh (cold) for
864/// each candidate; the race then fits it to the birth target.
865struct TopologyCandidateSpec {
866    kind: AutoTopologyKind,
867    basis_kind: SaeAtomBasisKind,
868    manifold: LatentManifold,
869    latent_dim: usize,
870    evaluator: Arc<dyn SaeBasisSecondJet>,
871    /// The `(n, d)` coordinates this candidate evaluates its basis at. A `d = 1`
872    /// candidate reads the template coordinate column; a `d = 2` candidate reads
873    /// the first two columns (or pads with the single column the seed carries).
874    coords: Array2<f64>,
875}
876
877/// Build the topology candidate set whose required intrinsic dimension matches
878/// the born atom's `d_k`, each realized over `coords` (`n × d_seed`, the
879/// template's coordinate block). The candidate set is the realizable subset of
880/// the smooth-term topology race — every member is a CORE basis evaluator
881/// (`src/terms/sae/basis.rs`), so no FFI round-trip and no cold curved family
882/// that the joint refit cannot warm-start:
883///
884/// * **`d = 1`** — `Circle` ([`PeriodicHarmonicEvaluator`]) vs `Euclidean` line
885///   ([`EuclideanPatchEvaluator`] degree 3). These are the line-vs-circle race
886///   the #1026 curved-vs-linear rung adjudicates post-fit, lifted to BIRTH.
887/// * **`d = 2`** — `Torus` ([`TorusHarmonicEvaluator`]), `Sphere`
888///   ([`SphereChartEvaluator`]), a flat `Euclidean` patch
889///   ([`EuclideanPatchEvaluator`] degree 2), and `Cylinder` `S¹ × ℝ`
890///   ([`CylinderHarmonicEvaluator`]: a periodic circle axis tensored with a flat
891///   line axis). The cylinder is now a first-class d=2 candidate (the basis
892///   landed in `src/terms/sae/basis.rs`), so a residual that is periodic along
893///   one axis and unbounded-linear along the other earns a cylinder rather than
894///   being forced into a torus stand-in (which would wrap the linear axis
895///   spuriously) or a flat patch (which would lose the periodicity).
896///
897/// The fixed harmonic / degree budgets mirror the seed-dictionary builder
898/// (`sae_build_atom_plans`): periodic gets `2·d_k + 1` columns, torus two
899/// harmonics per axis, the patch degree 3 (`d = 1`) / 2 (`d = 2`).
900fn topology_candidates_for_dim(
901    coords: ArrayView2<'_, f64>,
902    d_k: usize,
903) -> Result<Vec<TopologyCandidateSpec>, String> {
904    let n = coords.nrows();
905    let d_seed = coords.ncols();
906    if d_k == 0 {
907        // d_k = 0 is the cluster-null rung — it is NOT a manifold topology and is
908        // adjudicated below the race (the bottom rung), never inside it.
909        return Ok(Vec::new());
910    }
911    // Project the seed coordinates onto the candidate's intrinsic dimension. A
912    // d=1 candidate uses the first column; a d=2 candidate uses the first two
913    // (padding the second from the first when the seed carries only one column,
914    // so a 1-D seed can still present a 2-D candidate to the race).
915    let coords_d = |d: usize| -> Array2<f64> {
916        let mut out = Array2::<f64>::zeros((n, d));
917        for row in 0..n {
918            for col in 0..d {
919                let src = col.min(d_seed.saturating_sub(1));
920                out[[row, col]] = coords[[row, src]];
921            }
922        }
923        out
924    };
925
926    let mut specs: Vec<TopologyCandidateSpec> = Vec::new();
927    match d_k {
928        1 => {
929            let n_harmonics = (2 * d_k + 1).max(3) | 1; // odd, ≥ 3
930            specs.push(TopologyCandidateSpec {
931                kind: AutoTopologyKind::Circle,
932                basis_kind: SaeAtomBasisKind::Periodic,
933                manifold: LatentManifold::Circle { period: 1.0 },
934                latent_dim: 1,
935                evaluator: Arc::new(PeriodicHarmonicEvaluator::new(n_harmonics)?),
936                coords: coords_d(1),
937            });
938            specs.push(TopologyCandidateSpec {
939                kind: AutoTopologyKind::Euclidean,
940                basis_kind: SaeAtomBasisKind::EuclideanPatch,
941                manifold: LatentManifold::Euclidean,
942                latent_dim: 1,
943                evaluator: Arc::new(EuclideanPatchEvaluator::new(1, 3)?),
944                coords: coords_d(1),
945            });
946        }
947        2 => {
948            specs.push(TopologyCandidateSpec {
949                kind: AutoTopologyKind::Torus,
950                basis_kind: SaeAtomBasisKind::Torus,
951                // T² = S¹ × S¹: each axis is a unit-period circle (the
952                // fraction-of-period convention `TorusHarmonicEvaluator` shares
953                // with the periodic 1-D atom). This MUST match the production
954                // seeding (`AtomTopology::Torus` → Product[Circle, Circle] in
955                // `sae::manifold::atom`); a flat `Euclidean` manifold would leave
956                // the born atom's angles un-wrapped and the joint refit would
957                // retract on the wrong geometry.
958                manifold: LatentManifold::Product(vec![
959                    LatentManifold::Circle { period: 1.0 },
960                    LatentManifold::Circle { period: 1.0 },
961                ]),
962                latent_dim: 2,
963                evaluator: Arc::new(TorusHarmonicEvaluator::new(2, 2)?),
964                coords: coords_d(2),
965            });
966            specs.push(TopologyCandidateSpec {
967                kind: AutoTopologyKind::Sphere,
968                basis_kind: SaeAtomBasisKind::Sphere,
969                // The `SphereChartEvaluator` is a (lat, lon) intrinsic chart, so
970                // the latent manifold is the 2-D product of a bounded latitude
971                // interval and a wrapped longitude circle — NOT
972                // `LatentManifold::Sphere { dim: 2 }`, which would demand ambient
973                // unit 3-vectors the chart never produces. This matches the
974                // production seeding (`AtomTopology::Sphere` →
975                // Product[Interval(-π/2, π/2), Circle(τ)] in `sae::manifold::atom`).
976                manifold: LatentManifold::Product(vec![
977                    LatentManifold::Interval {
978                        lo: -std::f64::consts::FRAC_PI_2,
979                        hi: std::f64::consts::FRAC_PI_2,
980                    },
981                    LatentManifold::Circle {
982                        period: std::f64::consts::TAU,
983                    },
984                ]),
985                latent_dim: 2,
986                evaluator: Arc::new(SphereChartEvaluator),
987                coords: coords_d(2),
988            });
989            specs.push(TopologyCandidateSpec {
990                kind: AutoTopologyKind::Euclidean,
991                basis_kind: SaeAtomBasisKind::EuclideanPatch,
992                manifold: LatentManifold::Euclidean,
993                latent_dim: 2,
994                evaluator: Arc::new(EuclideanPatchEvaluator::new(2, 2)?),
995                coords: coords_d(2),
996            });
997            specs.push(TopologyCandidateSpec {
998                kind: AutoTopologyKind::Cylinder,
999                basis_kind: SaeAtomBasisKind::Cylinder,
1000                // Cylinder S¹ × ℝ: axis 0 is a unit-period circle (the
1001                // fraction-of-period convention `CylinderHarmonicEvaluator` shares
1002                // with the periodic / torus atoms), axis 1 is the unbounded flat
1003                // line (`Euclidean`). This MUST match the production seeding
1004                // (`SaeAtomBasisKind::Cylinder` → Product[Circle(1.0), Euclidean]
1005                // in `sae::manifold::atom`); a flat `Euclidean` manifold would leave
1006                // the born atom's phase axis un-wrapped, and a torus stand-in would
1007                // wrap the linear axis spuriously. The harmonic / degree budget
1008                // mirrors the torus (2 circle harmonics) and the patch (degree 2)
1009                // so the cross-topology design widths stay commensurable.
1010                manifold: LatentManifold::Product(vec![
1011                    LatentManifold::Circle { period: 1.0 },
1012                    LatentManifold::Euclidean,
1013                ]),
1014                latent_dim: 2,
1015                evaluator: Arc::new(CylinderHarmonicEvaluator::new(2, 2)?),
1016                coords: coords_d(2),
1017            });
1018        }
1019        _ => {
1020            // d_k ≥ 3: a flat Euclidean patch is the only realizable core basis
1021            // (the curved families top out at d = 2). The race degenerates to a
1022            // single candidate — still honest (the winner is reported), just not
1023            // a contest.
1024            specs.push(TopologyCandidateSpec {
1025                kind: AutoTopologyKind::Euclidean,
1026                basis_kind: SaeAtomBasisKind::EuclideanPatch,
1027                manifold: LatentManifold::Euclidean,
1028                latent_dim: d_k,
1029                evaluator: Arc::new(EuclideanPatchEvaluator::new(d_k, 2)?),
1030                coords: coords_d(d_k),
1031            });
1032        }
1033    }
1034    Ok(specs)
1035}
1036
1037/// Fit one topology candidate to the birth target `Y` (`n × p`) over `weights`
1038/// (`n`, the candidate's per-row reconstruction mass) by PROPER closed-form
1039/// Gaussian REML, and return its TK evidence inputs + the realized fit handle.
1040///
1041/// The per-atom Gaussian-reconstruction evidence is the marginal likelihood the
1042/// solver's [`gaussian_reml_multi_closed_form`] returns at the REML-optimal
1043/// smoothing strength λ̂ — the SAME REML/LAML quantity every smooth term is
1044/// scored by, so it is commensurable under the shared TK normalizer:
1045///
1046/// * `raw_reml` — the rank-aware closed-form REML score
1047///   `½d·(log|Φᵀ W Φ + λ̂S| − log|λ̂S|₊) + dispersion` at the estimated λ̂. Unlike
1048///   a fixed-λ, unit-dispersion Laplace term, this (a) estimates λ per candidate
1049///   on its own basis (SPEC: REML/LAML always, never a hand-set λ) and (b) prices
1050///   complexity with the penalty pseudo-determinant on a cross-basis-comparable
1051///   scale, so a perfect periodic fit to a circle beats a poor cubic-patch fit.
1052/// * `null_dim = 0` / `null_space_logdet = None` — the closed-form REML score is
1053///   ALREADY null-space-restricted (rank-aware), so the TK normalizer must not
1054///   re-subtract a gauge term; we report no null space to avoid double-counting.
1055/// * `effective_dim = tr[(Φᵀ W Φ + λ̂S)⁻¹ Φᵀ W Φ]` — the penalized effective
1056///   degrees of freedom the solver returns (`edf`), the per-effective-dim scale's
1057///   denominator.
1058fn fit_topology_candidate(
1059    spec: &TopologyCandidateSpec,
1060    target: ArrayView2<'_, f64>,
1061    weights: ArrayView1<'_, f64>,
1062) -> Result<TopologyAutoFitEvidence<TopologyRaceFit>, String> {
1063    let n = target.nrows();
1064    let (phi, jet) = spec.evaluator.evaluate(spec.coords.view())?;
1065    let m = phi.ncols();
1066    if phi.nrows() != n {
1067        return Err(format!(
1068            "fit_topology_candidate: basis rows {} != target rows {n}",
1069            phi.nrows()
1070        ));
1071    }
1072    if weights.len() != n {
1073        return Err(format!(
1074            "fit_topology_candidate: weights length {} != target rows {n}",
1075            weights.len()
1076        ));
1077    }
1078
1079    // Validate the per-row reconstruction mass and reject a degenerate
1080    // (zero-total-mass) birth target — the closed-form REML below needs a
1081    // positive weighted sample to estimate a dispersion.
1082    let mut w_sum = 0.0_f64;
1083    for row in 0..n {
1084        let w = weights[row];
1085        if !(w.is_finite() && w >= 0.0) {
1086            return Err("fit_topology_candidate: weights must be finite and non-negative".into());
1087        }
1088        w_sum += w;
1089    }
1090    if !(w_sum > 0.0 && w_sum.is_finite()) {
1091        return Err("fit_topology_candidate: degenerate (zero-mass) birth target".into());
1092    }
1093
1094    // The candidate basis's raw roughness Gram, the smoothness operator the
1095    // topology evidence prices. The gauge-invariant, basis-AGNOSTIC roughness
1096    // every candidate here can present analytically is the total second-derivative
1097    // (curvature) energy `S = Σ_n Σ_{a,c} Φ''_{·,a,c}(t_n)ᵀ Φ''_{·,a,c}(t_n)` — the
1098    // thin-plate / Reinsch penalty — read off each evaluator's analytic second jet
1099    // `Φ''[n, μ, a, c]`. A flat (line / patch) basis has a small curvature Gram
1100    // (its low-degree monomials are barely curved), a periodic / sphere basis a
1101    // large one for its high harmonics: exactly the smoothness price the race must
1102    // weigh against data fit. Computed identically for every candidate so the
1103    // cross-topology comparison stays commensurable.
1104    let second_jet = spec.evaluator.second_jet(spec.coords.view())?; // (n, m, d, d)
1105    let d = spec.latent_dim;
1106    let mut s_raw = Array2::<f64>::zeros((m, m));
1107    for row in 0..n {
1108        for a in 0..d {
1109            for c in 0..d {
1110                // Outer product of the (a,c) second-derivative column over basis
1111                // functions, accumulated into the roughness Gram.
1112                for mu in 0..m {
1113                    let hmu = second_jet[[row, mu, a, c]];
1114                    if hmu == 0.0 {
1115                        continue;
1116                    }
1117                    for nu in mu..m {
1118                        s_raw[[mu, nu]] += hmu * second_jet[[row, nu, a, c]];
1119                    }
1120                }
1121            }
1122        }
1123    }
1124    for mu in 0..m {
1125        for nu in (mu + 1)..m {
1126            s_raw[[nu, mu]] = s_raw[[mu, nu]];
1127        }
1128    }
1129
1130    // Score each candidate by its PROPER closed-form Gaussian REML evidence
1131    // (#977/#1026), NOT a hand-rolled fixed-λ Laplace term. The previous score
1132    // (`½·SSE + ½·log|H|` at a stamped λ = 1, unit dispersion) was wrong on two
1133    // counts, and they conspired to make a perfect circle lose to a line:
1134    //
1135    //   1. λ = 1 is NOT commensurable across bases. A periodic basis's curvature
1136    //      energy for a `cos(2πt)` harmonic scales like `(2π)⁴ ≈ 1.6e3` the data
1137    //      Gram, so a unit-λ penalty CRUSHES the very harmonics that reconstruct a
1138    //      circle exactly, while the barely-curved cubic patch is left essentially
1139    //      unpenalized. SPEC also forbids a hand-set smoothing strength — λ must be
1140    //      REML/LAML-estimated, ALWAYS.
1141    //   2. Unit dispersion (`½·SSE`) does not reward a near-perfect fit. The
1142    //      profiled-dispersion REML deviance `½ν·log(σ̂²)` rewards a basis that
1143    //      drives σ̂² → 0 (the circle's exact harmonic fit) far more strongly,
1144    //      which is what makes the contest honest, and `½·log|H| − ½·log|λS|₊`
1145    //      prices complexity on a scale that is comparable across bases (the
1146    //      penalty pseudo-determinant the old score dropped is what restores
1147    //      commensurability).
1148    //
1149    // `gaussian_reml_multi_closed_form` returns exactly this: the marginal-
1150    // likelihood-optimal λ̂ for each candidate on its OWN basis, the penalized
1151    // decoder at λ̂, the rank-aware REML score (`½d·(log|H| − log|λS|₊) +
1152    // dispersion`, already null-space-restricted), and the effective degrees of
1153    // freedom `tr[(ΦᵀWΦ + λS)⁻¹ ΦᵀWΦ]`. We feed `reml_score` as `raw_reml` and
1154    // `edf` as `effective_dim`, and report `null_dim = 0` so the TK normalizer does
1155    // NOT re-subtract a null-space term the REML score already integrated out.
1156    let reml_fit = gaussian_reml_multi_closed_form(
1157        phi.view(),
1158        target,
1159        s_raw.view(),
1160        Some(weights),
1161        None,
1162    )
1163    .map_err(|e| format!("fit_topology_candidate: REML evidence: {e:?}"))?;
1164    let lambda = reml_fit.lambda;
1165    if !(lambda.is_finite() && lambda >= 0.0) {
1166        return Err(format!(
1167            "fit_topology_candidate: REML returned a non-finite/negative λ ({lambda})"
1168        ));
1169    }
1170    let raw_reml = reml_fit.reml_score;
1171    if !raw_reml.is_finite() {
1172        return Err("fit_topology_candidate: non-finite REML score".into());
1173    }
1174    let decoder = reml_fit.coefficients.clone(); // penalized fit at λ̂, m × p
1175    let mut effective_dim = reml_fit.edf;
1176    if !(effective_dim.is_finite() && effective_dim > 0.0) {
1177        // A fully-penalized fit (no effective parameters) cannot be scored on the
1178        // per-effective-dim scale; floor at a single effective parameter so the
1179        // race still ranks it (the REML deviance dominates the verdict anyway).
1180        effective_dim = 1.0;
1181    }
1182
1183    // The born atom is seeded with the RAW roughness Gram; `SaeManifoldAtom::new`
1184    // installs it as `smooth_penalty_raw` and `refresh_intrinsic_smooth_penalty`
1185    // recomputes the pullback-metric `smooth_penalty` from it + the fitted decoder
1186    // (the production seeding path).
1187    let penalty = s_raw.clone();
1188    Ok(TopologyAutoFitEvidence {
1189        topology_name: spec.kind.as_str().to_string(),
1190        raw_reml,
1191        // The closed-form REML score is ALREADY restricted to the penalty's range
1192        // complement (rank-aware: `log|λS|₊` over the non-null directions, the null
1193        // space integrated out), so the TK null-space normalizer must NOT fire
1194        // again — pass `null_dim = 0` (its `null_space_logdet` branch is then
1195        // skipped) so we don't double-count the gauge directions.
1196        null_dim: 0.0,
1197        null_space_logdet: None,
1198        effective_dim,
1199        n_obs: n,
1200        fit_handle: TopologyRaceFit {
1201            evaluator: spec.evaluator.clone(),
1202            basis_kind: spec.basis_kind.clone(),
1203            manifold: spec.manifold.clone(),
1204            latent_dim: spec.latent_dim,
1205            coords: spec.coords.clone(),
1206            phi,
1207            jet,
1208            decoder,
1209            penalty,
1210        },
1211    })
1212}
1213
1214/// Race the candidate topologies whose required intrinsic dimension matches the
1215/// born atom's `d_k` against the birth target `Y` (`n × p`, the residual-factor
1216/// image the atom would reconstruct) over the template coordinates `coords`, and
1217/// return the EVIDENCE-WINNING fit. The winner is the lowest TK-normalized REML
1218/// via [`select_topology_with_fit`] — the gauge-invariant comparison the
1219/// smooth-term topology race already applies — so a circular residual gets a
1220/// circle, a straight residual a line, a spherical residual a sphere, etc.
1221///
1222/// Returns `None` when the race has no realizable candidate (`d_k = 0`, the
1223/// cluster-null rung, handled below the race) or the birth target is degenerate;
1224/// the caller then falls back to the template basis (warm inheritance) and the
1225/// post-fit curved-vs-linear rung adjudicates as before.
1226fn race_birth_topology(
1227    coords: ArrayView2<'_, f64>,
1228    target: ArrayView2<'_, f64>,
1229    weights: ArrayView1<'_, f64>,
1230    d_k: usize,
1231) -> Result<Option<TopologyRaceFit>, String> {
1232    let specs = topology_candidates_for_dim(coords, d_k)?;
1233    if specs.is_empty() {
1234        return Ok(None);
1235    }
1236    let selector = TopologyAutoSelector {
1237        // The race is over EXACTLY the candidate set we built; do not let the
1238        // selector's constant-curvature fuse drop one — pass them through as-is.
1239        candidates: specs.iter().map(|s| s.kind).collect(),
1240        // PER-OBSERVATION normalization (a common `n` divisor across candidates).
1241        // The candidate scores are now PROPER closed-form REML marginal
1242        // likelihoods (see `fit_topology_candidate`), which ALREADY price model
1243        // complexity through `log|H| − log|λS|₊` + the profiled dispersion. The
1244        // older `PerEffectiveDim` scale was calibrated for the previous hand-rolled
1245        // POSITIVE cost (`½·SSE + ½·log|H|`, which grew with model size and needed
1246        // per-parameter normalization); applied to a proper (negative) evidence it
1247        // DOUBLE-COUNTS complexity and inverts the ranking for higher-parameter
1248        // bases — e.g. a cylinder that fits a cylindrical residual best (most
1249        // negative evidence) would lose to a sphere purely because it spends more
1250        // effective dimensions. A common-`n` divisor preserves the raw
1251        // marginal-likelihood ranking the Bayesian evidence is designed to support,
1252        // so the genuinely-best-fitting topology wins.
1253        score_scale: TopologyScoreScale::PerObservation,
1254        latent: None,
1255    };
1256    // Index the realized specs by kind so the fit closure can find the right
1257    // evaluator/coords for the kind the selector hands it.
1258    //
1259    // #944 stage 4: `select_topology_with_fit` FUSES the fixed simply-connected
1260    // constant-curvature forms (Euclidean κ = 0 ∪ Sphere κ > 0) into ONE
1261    // estimated-κ `ConstantCurvature` candidate when both are present (the d = 2
1262    // case: euclidean-patch + sphere). That fusion is correct — euclidean-vs-sphere
1263    // IS a curvature estimation, not two discrete topologies — so the fused
1264    // `ConstantCurvature` candidate is realized by the CURVED (sphere) basis, the
1265    // simply-connected form that can express both flat and positively-curved
1266    // images under its fitted decoder. The race then adjudicates that one
1267    // constant-curvature form against the genuinely non-homotopic `Torus`. For
1268    // d = 1 no fusion fires (Circle is not simply connected), so circle-vs-line
1269    // races as two discrete candidates.
1270    let mut by_kind: std::collections::HashMap<AutoTopologyKind, &TopologyCandidateSpec> =
1271        std::collections::HashMap::with_capacity(specs.len() + 1);
1272    for spec in &specs {
1273        by_kind.insert(spec.kind, spec);
1274    }
1275    if !by_kind.contains_key(&AutoTopologyKind::ConstantCurvature) {
1276        // Resolve the fused candidate to the curved simply-connected realization
1277        // (sphere) when present, else the flat patch (euclidean) — whichever the
1278        // realizable set carries.
1279        if let Some(sphere) = specs.iter().find(|s| s.kind == AutoTopologyKind::Sphere) {
1280            by_kind.insert(AutoTopologyKind::ConstantCurvature, sphere);
1281        } else if let Some(euclid) = specs.iter().find(|s| s.kind == AutoTopologyKind::Euclidean) {
1282            by_kind.insert(AutoTopologyKind::ConstantCurvature, euclid);
1283        }
1284    }
1285    let ranked = select_topology_with_fit(&selector, |kind| {
1286        let spec = by_kind.get(&kind).ok_or_else(|| {
1287            format!(
1288                "race_birth_topology: no realized candidate for fused topology {:?}",
1289                kind.as_str()
1290            )
1291        })?;
1292        fit_topology_candidate(spec, target, weights)
1293    })?;
1294    let winner = ranked
1295        .winner()
1296        .ok_or_else(|| "race_birth_topology: empty ranking".to_string())?;
1297    Ok(Some(winner.fit_handle.clone()))
1298}
1299
1300/// A small neutral routing logit a born atom is seeded at: large enough that the
1301/// refit can grow it if the residual-factor direction is real, small relative to
1302/// the established atoms so it does not perturb the current routing.
1303const BIRTH_SEED_LOGIT: f64 = -4.0;
1304
1305/// Append a fresh atom whose decoder is seeded from a residual-factor direction.
1306/// The new atom reuses the structural basis of atom 0 (same basis kind, latent
1307/// dim, basis values + jacobian + smooth penalty) as its BIRTH TEMPLATE — warm
1308/// inheritance by construction, so the engine's warm-state contract holds and
1309/// the joint refit starts from a live basis rather than a cold curved family.
1310/// Only its decoder coefficients carry the residual-factor direction. Routed at
1311/// a small neutral mass on every row so the refit grows it if it is real and the
1312/// death channel demotes it next round if it is not.
1313///
1314/// # Topology adjudication (#977)
1315///
1316/// The template basis is the atom's INITIAL parameterization, not its final
1317/// topology. A born atom's topology is adjudicated by EVIDENCE downstream, on
1318/// the discovered dictionary, at two rungs:
1319///
1320/// * **Existence** — the #984 held-out e-value birth gate (run inside
1321///   [`gam_solve::structure_search::search`]) decides whether the atom is
1322///   born at all. Only a residual factor whose held-out reconstruction
1323///   likelihood-ratio crosses the Ville threshold earns an atom; the rest stay
1324///   contested in the [`SearchLedger`].
1325/// * **Curved (`d ≥ 1`) vs straight / cluster (`d = 0`)** — the #1026
1326///   hybrid-split pass ([`SaeManifoldTerm::compute_hybrid_split_report`], run
1327///   post-search over the FULL discovered dictionary) adjudicates every eligible
1328///   `d = 1` atom's fitted curved image against its straight (linear
1329///   special-case) secant on the common rank-aware Laplace evidence scale, and
1330///   records the verdict. A born atom whose curvature does not pay collapses to
1331///   the linear / cluster lane; one that earns it keeps its curved image. The
1332///   dictionary is therefore genuinely heterogeneous (curved + linear atoms),
1333///   not all-circle, with the per-atom verdict surfaced on the fit payload.
1334///
1335/// # The race (#977)
1336///
1337/// The born atom's topology is now chosen by EVIDENCE at birth, not inherited.
1338/// The residual-factor direction `factor_dir` is expressed as a per-row image
1339/// `Y = Φ_template(coords) · factor_dir` (the structure the atom would
1340/// reconstruct), and [`race_birth_topology`] fits each candidate basis whose
1341/// intrinsic dimension matches the template's `d_k` (`d = 1`: circle vs line;
1342/// `d = 2`: torus vs sphere vs euclidean-patch) to `Y` by penalized least
1343/// squares, ranking them by TK-normalized REML — the gauge-invariant comparison
1344/// the smooth-term topology race applies. The WINNING topology's evaluator,
1345/// decoder, manifold, and roughness penalty seed the born atom, so the discovered
1346/// dictionary is genuinely heterogeneous: different atoms get different topologies
1347/// by evidence. The post-fit curved-vs-linear hybrid-split rung remains the
1348/// second line of defense (an atom whose curvature does not pay over the FULL
1349/// dictionary still collapses linear), and the held-out e-value birth gate
1350/// decides whether the atom is born at all. When the race finds no realizable
1351/// candidate (`d_k = 0` cluster-null, or a degenerate image) the born atom falls
1352/// back to the template basis (warm inheritance), exactly the prior behavior.
1353fn born_atom(
1354    term: &SaeManifoldTerm,
1355    rho: &SaeManifoldRho,
1356    factor_dir: ArrayView2<'_, f64>,
1357) -> Result<(SaeManifoldTerm, SaeManifoldRho), String> {
1358    let k = term.k_atoms();
1359    if term.atoms.is_empty() {
1360        return Err(
1361            "born_atom: cannot birth from an empty dictionary (no template atom to seed the \
1362             coordinate block / basis from)"
1363                .to_string(),
1364        );
1365    }
1366    let template = &term.atoms[0];
1367    let m = template.basis_size();
1368    let p = term.output_dim();
1369    if factor_dir.dim() != (m, p) {
1370        return Err(format!(
1371            "born_atom: residual-factor decoder must be ({m}, {p}); got {:?}",
1372            factor_dir.dim()
1373        ));
1374    }
1375    let mut atoms = term.atoms.clone();
1376
1377    // The per-row birth target the topology race adjudicates: the residual-factor
1378    // direction expressed as a reconstruction image over the template
1379    // coordinates. A born atom seeded with `factor_dir` in the template basis
1380    // would emit exactly `Y = Φ_template · factor_dir`; racing topologies asks
1381    // which geometry parameterizes that image most parsimoniously.
1382    let template_coords = term.assignment.coords[0].as_matrix();
1383    let birth_target = template.basis_values.dot(&factor_dir); // (n, p)
1384    // Uniform per-row mass: at birth the routing is neutral (the atom does not yet
1385    // own any rows), so every row contributes equally to the topology evidence.
1386    let weights = Array1::<f64>::ones(birth_target.nrows());
1387
1388    // Race the candidate topologies matched to the template's intrinsic dim. On a
1389    // win, seed the born atom from the winning evaluator + penalized decoder; on
1390    // no realizable candidate (cluster-null d_k, degenerate image), fall back to
1391    // the template basis (warm inheritance), and let the post-fit curved-vs-linear
1392    // rung adjudicate as before.
1393    let raced = race_birth_topology(
1394        template_coords.view(),
1395        birth_target.view(),
1396        weights.view(),
1397        template.latent_dim,
1398    )?;
1399    // The born atom + its coordinate block. The race-won path carries the winning
1400    // topology's coordinate block (dimension-matched to its evaluator, manifold
1401    // set to the winning chart); the fallback path reuses the template block.
1402    let (born, born_coord_block) = match raced {
1403        Some(fit) => {
1404            // Build the born atom directly from the winning topology's realized
1405            // basis: its evaluator, penalized decoder, and roughness penalty. The
1406            // intrinsic (pullback-metric) penalty is then refreshed from the
1407            // seeded decoder so the atom carries exactly the production seeding.
1408            let mut atom = SaeManifoldAtom::new(
1409                format!("atom_born_{k}"),
1410                fit.basis_kind.clone(),
1411                fit.latent_dim,
1412                fit.phi.clone(),
1413                fit.jet.clone(),
1414                fit.decoder.clone(),
1415                fit.penalty.clone(),
1416            )?
1417            .with_basis_second_jet(fit.evaluator.clone());
1418            atom.refresh_intrinsic_smooth_penalty();
1419            // Coordinate block matched to the winning evaluator's intrinsic dim,
1420            // carrying the winning chart manifold so the joint refit retracts on
1421            // the right geometry.
1422            let coord_block = gam_terms::latent::LatentCoordValues::from_matrix_with_manifold(
1423                fit.coords.view(),
1424                LatentIdMode::None,
1425                fit.manifold.clone(),
1426            );
1427            (atom, coord_block)
1428        }
1429        None => {
1430            // The born atom reuses the template's structural basis (kind, latent
1431            // dim, basis values + jacobian + raw penalty); only its decoder carries
1432            // the residual-factor direction.
1433            let mut atom = template.clone();
1434            atom.decoder_coefficients = factor_dir.to_owned();
1435            atom.refresh_intrinsic_smooth_penalty();
1436            (atom, term.assignment.coords[0].clone())
1437        }
1438    };
1439    atoms.push(born);
1440
1441    let n = term.assignment.logits.nrows();
1442    let mut logits = Array2::<f64>::zeros((n, k + 1));
1443    for row in 0..n {
1444        for col in 0..k {
1445            logits[[row, col]] = term.assignment.logits[[row, col]];
1446        }
1447        logits[[row, k]] = BIRTH_SEED_LOGIT;
1448    }
1449    let mut coords = term.assignment.coords.clone();
1450    coords.push(born_coord_block);
1451    let assignment = crate::manifold::SaeAssignment::with_mode(
1452        logits,
1453        coords,
1454        term.assignment.mode,
1455    )?;
1456    let child = SaeManifoldTerm::new(atoms, assignment)?;
1457
1458    let mut child_rho = rho.clone();
1459    // The born atom inherits the template atom's ARD block shape (disabled if
1460    // the template's was disabled).
1461    let inherited = child_rho
1462        .log_ard
1463        .first()
1464        .cloned()
1465        .unwrap_or_else(|| Array1::<f64>::zeros(0));
1466    child_rho.log_ard.push(inherited);
1467    // ρ carries a PER-ATOM smoothness strength `log_lambda_smooth[k]` (#1556),
1468    // and `assemble_arrow_schur` indexes it by atom (`lambda_smooth[atom_idx]`).
1469    // Growing the dictionary without growing this vector leaves `k_atoms()`
1470    // ahead of `log_lambda_smooth.len()` and the next assemble panics with an
1471    // out-of-bounds index. The born atom inherits the template atom's smoothness
1472    // strength (atom 0), matching the `log_ard` inheritance just above.
1473    let inherited_smooth = child_rho.log_lambda_smooth.first().copied().unwrap_or(0.0);
1474    child_rho.log_lambda_smooth.push(inherited_smooth);
1475    Ok((child, child_rho))
1476}
1477
1478/// A held-out row-block shard for the universal-inference estimation/evaluation
1479/// split the gates run over: a contiguous block of row indices into the FULL
1480/// target the triggers were not tuned on.
1481///
1482/// The split is realized through the term's per-row reconstruction weights
1483/// ([`SaeManifoldTerm::set_row_loss_weights`]): a candidate is refit with the
1484/// currently-held-out shards' rows at weight `0` (no fitting pressure) and the
1485/// estimation rows at weight `1`, then EVALUATED on the held-out rows. The
1486/// predictable-plugin e-process streams the shards: shard `k` is evaluated under
1487/// a candidate that has not yet seen its rows, then folded into the estimation
1488/// set (un-masked) for shard `k+1` — exactly the contract
1489/// [`run_atom_birth_gate`](gam_terms::inference::structure_evidence::run_atom_birth_gate)
1490/// guarantees the call order of.
1491#[derive(Clone, Debug)]
1492pub struct RowBlockShard {
1493    /// The full target, shared across shards (`(N, p)`).
1494    pub target: std::sync::Arc<Array2<f64>>,
1495    /// Row indices into the full target that this shard holds out for
1496    /// evaluation.
1497    pub rows: Vec<usize>,
1498}
1499
1500/// The estimation/evaluation row split the e-process gates run over. The
1501/// estimation rows are the candidate's fitting set (weight `1`); the evaluation
1502/// rows are held out (weight `0` during the fit) and partitioned into the shard
1503/// stream the gate accumulates evidence over.
1504#[derive(Clone, Debug)]
1505pub struct EstimationEvalSplit {
1506    /// Estimation row indices (the candidate is refit on these; held-out rows
1507    /// carry weight `0`).
1508    pub estimation_rows: Vec<usize>,
1509    /// The evaluation shards, in stream order.
1510    pub shards: Vec<RowBlockShard>,
1511}
1512
1513/// Fraction of rows reserved for estimation (the candidate's fitting set); the
1514/// remainder is split into evaluation shards. A fixed structural constant
1515/// (magic-by-default): a majority estimation split keeps the candidate fit
1516/// faithful while leaving a held-out block for honest evidence.
1517const ESTIMATION_FRACTION: f64 = 0.6;
1518
1519/// Build the estimation/evaluation split: the first `ESTIMATION_FRACTION` of the
1520/// rows (contiguous) are the estimation set, the remainder is partitioned into
1521/// `n_shards` contiguous held-out evaluation blocks. Deterministic — contiguous
1522/// blocks, no shuffle. Each shard shares the full target by reference.
1523pub fn estimation_eval_split(target: ArrayView2<'_, f64>, n_shards: usize) -> EstimationEvalSplit {
1524    let n = target.nrows();
1525    if n == 0 {
1526        return EstimationEvalSplit {
1527            estimation_rows: Vec::new(),
1528            shards: Vec::new(),
1529        };
1530    }
1531    let shared = std::sync::Arc::new(target.to_owned());
1532    // At least one estimation row and at least one evaluation row when n ≥ 2.
1533    let n_est =
1534        ((n as f64 * ESTIMATION_FRACTION).round() as usize).clamp(1, n.saturating_sub(1).max(1));
1535    let estimation_rows: Vec<usize> = (0..n_est).collect();
1536    let eval_rows: Vec<usize> = (n_est..n).collect();
1537    let n_eval = eval_rows.len();
1538    let n_shards = n_shards.min(n_eval).max(usize::from(n_eval > 0));
1539    let mut shards = Vec::new();
1540    if n_eval > 0 && n_shards > 0 {
1541        let base = n_eval / n_shards;
1542        let rem = n_eval % n_shards;
1543        let mut cursor = 0usize;
1544        for s in 0..n_shards {
1545            let len = base + usize::from(s < rem);
1546            let rows: Vec<usize> = eval_rows[cursor..cursor + len].to_vec();
1547            shards.push(RowBlockShard {
1548                target: shared.clone(),
1549                rows,
1550            });
1551            cursor += len;
1552        }
1553    }
1554    EstimationEvalSplit {
1555        estimation_rows,
1556        shards,
1557    }
1558}
1559
1560/// Outcome of the full round driver: the (possibly restructured) fitted term +
1561/// ρ and the per-round ledgers, each carrying the joint fit's collapse events.
1562pub struct StructureSearchResult {
1563    pub term: SaeManifoldTerm,
1564    pub rho: SaeManifoldRho,
1565    /// One ledger per round actually run (a round that applies no move is the
1566    /// last; its ledger is included so the certificate covers the fixpoint).
1567    pub rounds: Vec<SearchLedger>,
1568}
1569
1570impl StructureSearchResult {
1571    /// `true` iff at least one structure-changing move LANDED across the rounds —
1572    /// an `Accepted` move (certified birth / fission / fusion that restructured
1573    /// the dictionary and triggered a warm refit) or a `Demoted` death (an atom
1574    /// folded to ~0 routing). Both mutate the returned `term`/`rho` away from the
1575    /// pre-search joint fit, so any shape uncertainty assembled from the
1576    /// PRE-search joint Hessian is stale and must be recomputed from the final
1577    /// post-search per-atom inner fits (#1230). When this is `false`, every round
1578    /// was contested / vetoed / deduplicated / deferred / stale, the term/rho are
1579    /// byte-for-byte the pre-search fit, and the exact joint-Hessian bands remain
1580    /// valid.
1581    #[must_use]
1582    pub fn structure_changed(&self) -> bool {
1583        use gam_solve::structure_search::MoveVerdict;
1584        self.rounds.iter().any(|round| {
1585            round.moves.iter().any(|record| {
1586                matches!(
1587                    record.verdict,
1588                    MoveVerdict::Accepted { .. } | MoveVerdict::Demoted { .. }
1589                )
1590            })
1591        })
1592    }
1593}
1594
1595/// The round driver's configuration: how the data is split into shards, the
1596/// e-gate's budget/level, the round cap, and the per-round harvest breadth.
1597/// Bundled so the driver entry points stay below the argument-count threshold
1598/// and so a caller configures one object rather than a positional argument
1599/// cascade.
1600#[derive(Clone, Copy, Debug)]
1601pub struct RoundDriverConfig {
1602    /// Number of held-out evaluation shards the gate streams over.
1603    pub n_shards: usize,
1604    /// Move budget + α the e-gates certify at (fixed for the run).
1605    pub budget: MoveBudget,
1606    /// Maximum harvest → search rounds before stopping at the fixpoint.
1607    pub max_rounds: usize,
1608    /// Per-round harvest breadth (max fusions / fissions / births).
1609    pub harvest_params: HarvestParams,
1610}
1611
1612/// Drive evidence-guarded structure search around a fitted SAE term until a
1613/// round applies no moves (#997 round driver).
1614///
1615/// Each round: harvest proposals from the current fitted term, run [`search`]
1616/// over the held-out evaluation shards (gating births/fissions/fusions, demoting
1617/// never-certified deaths), and adopt the restructured state. The loop stops
1618/// when a round's ledger contains no applied move (every record is
1619/// contested / vetoed / deduplicated / deferred / stale) or `max_rounds` is hit.
1620///
1621/// `candidate_fit` is the warm refit: given a RESTRUCTURED candidate term + ρ,
1622/// it refits the candidate on the ESTIMATION rows only (held-out evaluation rows
1623/// carry weight `0`), so the candidate is the predictable plug-in the e-process
1624/// evaluates on the held-out shard stream. It is INFALLIBLE at this boundary —
1625/// it absorbs its own inner-solve errors by returning the unchanged candidate (a
1626/// conservative no-improvement signal to the gate, never a panic). The shard
1627/// fold is a no-op: the candidate is fixed across the stream (a predictable
1628/// plug-in), and each shard contributes its held-out reconstruction
1629/// likelihood-ratio against the honestly-refit null sup.
1630pub fn run_structure_search_rounds(
1631    mut term: SaeManifoldTerm,
1632    mut rho: SaeManifoldRho,
1633    target: ArrayView2<'_, f64>,
1634    config: RoundDriverConfig,
1635    ledger: &mut StructureLedger,
1636    mut candidate_fit: impl FnMut(
1637        SaeManifoldTerm,
1638        SaeManifoldRho,
1639        &[usize],
1640    ) -> (SaeManifoldTerm, SaeManifoldRho),
1641    mut finalize_round: impl FnMut(
1642        SaeManifoldTerm,
1643        SaeManifoldRho,
1644        &[usize],
1645    ) -> (SaeManifoldTerm, SaeManifoldRho),
1646) -> Result<StructureSearchResult, String> {
1647    let RoundDriverConfig {
1648        n_shards,
1649        budget,
1650        max_rounds,
1651        harvest_params,
1652    } = config;
1653    let split = estimation_eval_split(target, n_shards);
1654    let mut rounds: Vec<SearchLedger> = Vec::new();
1655
1656    for _ in 0..max_rounds {
1657        // Harvest from the current fitted state. Residuals R = target − fitted.
1658        let fitted = term.try_fitted()?;
1659        let residuals = &target.to_owned() - &fitted;
1660        let report = harvest_move_proposals(&term, &rho, residuals.view(), &harvest_params)?;
1661
1662        // #993 item 3: BANK the within-atom carve binding evidence in the
1663        // ledger. The carve ran on each `d = 2` product-atom fission candidate
1664        // (`harvest_move_proposals` → `run_within_atom_carve`) and reported a
1665        // representational binding p-value; absorb it as a `BindingEdge` claim
1666        // on the atom's OWN two factors (a self-edge `{atom, atom}`: the carve
1667        // asks whether THIS atom's two latent factors are bound). A small
1668        // `edge_p_value` (interaction proven) calibrates to strong positive
1669        // evidence FOR the binding claim via `log_e_from_p_calibrator`; a
1670        // p ≈ 1 (additive) absorbs evidence AGAINST it. This makes the binding
1671        // verdict not merely observable on the `HarvestReport` but BANKED in
1672        // the persisted ledger, so the dictionary certificate covers it and the
1673        // evidence resumes across corpus shards. A `None` p-value (the Wald
1674        // test degenerated) is skipped — no fabricated evidence.
1675
1676        // Pre-build the birth-decoder list ONCE per round from the residual
1677        // factor (the birth candidates index into it), so the apply-move
1678        // closure inside the gate is a pure function of the candidate index.
1679        let birth_decoders = build_birth_decoders(&term, residuals.view(), &harvest_params)?;
1680
1681        if report.proposals.is_empty() || split.shards.is_empty() {
1682            // Nothing to do this round — record an empty ledger (with the live
1683            // collapse events) as the fixpoint and stop.
1684            rounds.push(SearchLedger {
1685                alpha: budget.alpha,
1686                moves: Vec::new(),
1687                collapse_events: term.collapse_events().to_vec(),
1688            });
1689            break;
1690        }
1691
1692        // The search state threads (term, rho) together. apply_move restructures
1693        // both AND refits the candidate on the estimation rows so it is the
1694        // predictable plug-in the held-out shards are evaluated against.
1695        type State = (SaeManifoldTerm, SaeManifoldRho);
1696        let collapse_events = term.collapse_events().to_vec();
1697        let decoders = birth_decoders;
1698        let estimation_rows = split.estimation_rows.clone();
1699        let outcome: SearchOutcome<State> = search(
1700            (term, rho),
1701            report.proposals,
1702            &split.shards,
1703            &budget,
1704            ledger,
1705            |state: &State, mv: &StructureMove| {
1706                let (cand_term, cand_rho) =
1707                    apply_structure_move(&state.0, &state.1, mv, &decoders)?;
1708                // Refit the restructured candidate on the estimation rows only.
1709                Ok(candidate_fit(cand_term, cand_rho, &estimation_rows))
1710            },
1711            |state: &State, shard: &RowBlockShard| eval_log_lik(&state.0, shard),
1712            |state: &State, shard: &RowBlockShard| eval_log_lik(&state.0, shard),
1713            // No-op fold: the candidate is the fixed predictable plug-in across
1714            // the held-out stream.
1715            |state: State, _: &RowBlockShard| state,
1716        )?;
1717
1718        let (next_term, next_rho) = outcome.state;
1719        let mut round_ledger = outcome.ledger;
1720        round_ledger.collapse_events = collapse_events;
1721        let applied = round_ledger.moves.iter().any(|m| {
1722            matches!(
1723                m.verdict,
1724                gam_solve::structure_search::MoveVerdict::Accepted { .. }
1725                    | gam_solve::structure_search::MoveVerdict::Demoted { .. }
1726            )
1727        });
1728        rounds.push(round_ledger);
1729
1730        if applied {
1731            // The adopted winner reached its restructured form through the cheap
1732            // capped-iteration SCORING refit; re-refit it at the full inner
1733            // budget (same estimation-row weighting) before it becomes the next
1734            // round's parent and the returned dictionary, so the cap is a
1735            // scoring-only economy and the adopted state matches a direct
1736            // full-iter refit (the inner solve is convergent; the capped score
1737            // was only a worse starting iterate). When no move landed, the state
1738            // is byte-identical to the unrefit pre-search parent and needs no
1739            // polish.
1740            let (polished_term, polished_rho) =
1741                finalize_round(next_term, next_rho, &split.estimation_rows);
1742            term = polished_term;
1743            rho = polished_rho;
1744        } else {
1745            term = next_term;
1746            rho = next_rho;
1747            break;
1748        }
1749    }
1750
1751    Ok(StructureSearchResult { term, rho, rounds })
1752}
1753
1754/// Build the per-round residual-factor decoder list the birth apply-move indexes
1755/// into: each factor direction lifted to a `(m, p)` decoder in atom 0's basis.
1756fn build_birth_decoders(
1757    term: &SaeManifoldTerm,
1758    residuals: ArrayView2<'_, f64>,
1759    params: &HarvestParams,
1760) -> Result<Vec<Array2<f64>>, String> {
1761    let n = residuals.nrows();
1762    let p = residuals.ncols();
1763    if params.max_births == 0 || n == 0 || p == 0 {
1764        return Ok(Vec::new());
1765    }
1766    let assignments = term.assignment.assignments();
1767    let activity: Array1<f64> = (0..n).map(|r| assignments.row(r).sum()).collect();
1768    let max_rank = params.max_births.min(p.saturating_sub(1));
1769    let model = match StructuredResidualModel::fit(ResidualFactorInput {
1770        residuals,
1771        activity: activity.view(),
1772        max_factor_rank: max_rank,
1773    }) {
1774        Ok(m) => m,
1775        Err(_) => return Ok(Vec::new()),
1776    };
1777    let factor = model.factor();
1778    let r = factor.ncols();
1779    let m = term.atoms[0].basis_size();
1780    // Lift each p-vector factor direction to a (m, p) decoder: place the
1781    // direction on the constant (first) basis row so the born atom emits the
1782    // residual-factor direction as a flat decoder the refit can then shape. This
1783    // is the WhitenedStructured residual subspace, not raw-Euclidean Λ.
1784    let mut decoders = Vec::with_capacity(r);
1785    for j in 0..r {
1786        let mut decoder = Array2::<f64>::zeros((m, p));
1787        for out in 0..p {
1788            decoder[[0, out]] = factor[[out, j]];
1789        }
1790        decoders.push(decoder);
1791    }
1792    Ok(decoders)
1793}
1794
1795/// Per-row Gaussian reconstruction log-likelihood of a shard under the current
1796/// (restructured, possibly shard-refit) state. The gate's evaluation statistic;
1797/// the engine guarantees a shard is evaluated strictly before it is folded in.
1798fn eval_log_lik(term: &SaeManifoldTerm, shard: &RowBlockShard) -> f64 {
1799    // The fitted reconstruction at the shard's held-out rows, scored against the
1800    // full target. The term's per-row routing/basis covers all N rows, so the
1801    // reconstruction at a held-out row is the model's prediction for it.
1802    let fitted = match term.try_fitted() {
1803        Ok(f) => f,
1804        Err(_) => return f64::NEG_INFINITY,
1805    };
1806    let n_full = fitted.nrows();
1807    let p = fitted.ncols();
1808    if p != shard.target.ncols() || n_full != shard.target.nrows() {
1809        return f64::NEG_INFINITY;
1810    }
1811    let mut sse = 0.0_f64;
1812    let mut count = 0usize;
1813    for &row in &shard.rows {
1814        if row >= n_full {
1815            continue;
1816        }
1817        for out in 0..p {
1818            let d = fitted[[row, out]] - shard.target[[row, out]];
1819            sse_accumulate(&mut sse, d);
1820        }
1821        count += p;
1822    }
1823    if count == 0 {
1824        return f64::NEG_INFINITY;
1825    }
1826    // Gaussian log-lik up to the additive constant that cancels in every
1827    // e-value ratio: −½·SSE (unit dispersion). The gate forms differences of
1828    // this against the null sup, so the constant and the dispersion scale drop
1829    // out of the certified evidence.
1830    let reconstruction = -0.5 * sse;
1831
1832    // Occam-priced gate-block evidence (#1016/#1218). The split-LR difference
1833    // this gate forms is between the K+1 candidate (alternative) and the K null;
1834    // the gate/assignment-logit block is the weakest-Gaussian piece of the SAE
1835    // evidence and is mispriced by a plain Laplace quadratic near a birth. The
1836    // deterministic Pólya–Gamma gate-block marginal supplies the correct
1837    // normalizer, whose `−½·d_g·log(2π)` term scales with the gate dimension
1838    // `d_g` (one coordinate per atom). Because the candidate carries one more
1839    // gate coordinate than the null, that `d_g`-dependent normalizer does NOT
1840    // cancel in the K-vs-(K+1) difference — it is exactly the per-coordinate
1841    // `log(2π)` Occam term #1218 corrects the sign of. Folding it into the
1842    // evaluation likelihood is what makes the corrected sign reach the live
1843    // gate decision (the unit test alone never touched this path).
1844    let gate_evidence = gate_block_log_evidence(term, shard);
1845
1846    reconstruction + gate_evidence
1847}
1848
1849/// The deterministic Pólya–Gamma gate-block marginal log-evidence of the
1850/// candidate's per-atom logistic gates on a shard's held-out rows (#1016/#1218).
1851///
1852/// Each atom carries one free per-atom gate logit, so the gate block is a stack
1853/// of `K` one-dimensional logistic gates: design `X_g = 1` (the per-atom gate
1854/// coordinate), tilt `ψ̂ =` the atom's per-row logit, binomial response `y =`
1855/// the binarized activation (`b = 1`), under a unit ridge gate prior. The
1856/// returned value is the log-evidence `−neg_log_evidence` from
1857/// `gam_inference::pg_gate_evidence::pg_gate_evidence`, summed over atoms,
1858/// so the K-dependent `−½·d_g·log(2π)` normalizer enters the gate's split-LR.
1859///
1860/// A degenerate/non-PD gate block contributes `0` (no gate evidence) rather than
1861/// poisoning the reconstruction likelihood — a conservative, valid degradation.
1862fn gate_block_log_evidence(term: &SaeManifoldTerm, shard: &RowBlockShard) -> f64 {
1863    use gam_solve::inference::pg_gate_evidence::{GateBlock, pg_gate_evidence};
1864
1865    let logits = &term.assignment.logits;
1866    let n_full = logits.nrows();
1867    let k = logits.ncols();
1868    if k == 0 {
1869        return 0.0;
1870    }
1871    // Restrict to the shard's held-out rows; an empty / out-of-range shard
1872    // carries no gate evidence.
1873    let rows: Vec<usize> = shard.rows.iter().copied().filter(|&r| r < n_full).collect();
1874    let m = rows.len();
1875    if m == 0 {
1876        return 0.0;
1877    }
1878
1879    // Unit gate design (one gate coordinate per atom) and a unit ridge gate
1880    // prior; the PG block is solved per atom and summed, so `d_g = K` overall.
1881    let design = Array2::<f64>::ones((m, 1));
1882    let b = Array1::<f64>::ones(m);
1883    let penalty = Array2::<f64>::eye(1);
1884
1885    let mut total = 0.0_f64;
1886    for atom in 0..k {
1887        let mut psi = Array1::<f64>::zeros(m);
1888        let mut y = Array1::<f64>::zeros(m);
1889        for (i, &row) in rows.iter().enumerate() {
1890            let logit = logits[[row, atom]];
1891            if !logit.is_finite() {
1892                return 0.0;
1893            }
1894            psi[i] = logit;
1895            // Binarized activation: the gate is ON when its logit is positive.
1896            y[i] = if logit > 0.0 { 1.0 } else { 0.0 };
1897        }
1898        let block = GateBlock {
1899            design: design.view(),
1900            y: y.view(),
1901            b: b.view(),
1902            offset: None,
1903            psi_hat: Some(psi.view()),
1904            penalty: Some(penalty.view()),
1905            hess_rest: None,
1906            h_rest: None,
1907        };
1908        match pg_gate_evidence(&block) {
1909            // `neg_log_evidence` is `−log p(gate block)`; the log-likelihood the
1910            // split-LR consumes is its negation.
1911            Ok(ev) => total -= ev.neg_log_evidence,
1912            // A non-PD / degenerate gate block contributes no evidence.
1913            Err(_) => return 0.0,
1914        }
1915    }
1916    total
1917}
1918
1919#[inline]
1920fn sse_accumulate(sse: &mut f64, d: f64) {
1921    *sse += d * d;
1922}
1923
1924/// Inner-fit knobs for the production structure-search refit (the same numbers
1925/// the outer SAE fit drove its inner Arrow-Schur joint fit with).
1926#[derive(Clone, Copy, Debug)]
1927pub struct ProductionRefitParams {
1928    /// Inner Newton iterations for the FULL refit that produces the adopted
1929    /// state (the round winner that becomes the next round's parent and the
1930    /// returned dictionary). Quality-bearing — kept at the outer fit's budget.
1931    pub inner_max_iter: usize,
1932    /// Inner Newton iterations for the per-candidate SCORING refit only (the
1933    /// many rejected / contested candidates the e-gate ranks each round). A
1934    /// structural move yields a WARM child — the parent's converged dictionary
1935    /// with one atom restructured (see [`apply_structure_move`]) — so only the
1936    /// touched atom must re-equilibrate before the held-out evidence gate can
1937    /// rank it; a small budget suffices. This caps the dominant cost of the
1938    /// search (≈ K·rounds full-dictionary refits) WITHOUT changing the adopted
1939    /// state: every round's winner is re-refit at the full `inner_max_iter`
1940    /// budget before it is adopted, so cap-then-polish reaches the same inner
1941    /// optimum as a direct full-iter refit (the inner solve is convergent; the
1942    /// capped score is only a worse starting iterate for the polish). The e-gate
1943    /// DECISION reads the capped score, so this is the one quantity that can in
1944    /// principle differ from a full-iter search; it is held to a tight
1945    /// match-or-equivalent bar (#1026).
1946    pub scoring_inner_max_iter: usize,
1947    /// Inner Newton step size.
1948    pub learning_rate: f64,
1949    /// Ext-coordinate ridge.
1950    pub ridge_ext_coord: f64,
1951    /// β ridge.
1952    pub ridge_beta: f64,
1953}
1954
1955/// Run the production structure-search pass around a fitted SAE term: harvest →
1956/// e-gated [`search`] over held-out row blocks → adopt certified/demoted moves →
1957/// repeat, returning the (possibly restructured) term + ρ and the per-round
1958/// ledgers (#997).
1959///
1960/// The shard refit folds a held-out block into a candidate via the SAME inner
1961/// joint-fit driver the outer fit used ([`SaeManifoldTerm::run_joint_fit_arrow_schur`]),
1962/// PENALTY-FREE: the gate's evidence is a held-out reconstruction
1963/// likelihood-ratio, and the isometry/ARD penalties are gauge/regularization
1964/// terms that do not belong in the evaluation likelihood. The refit absorbs its
1965/// own inner-solve errors by returning the unchanged candidate (a conservative
1966/// no-improvement signal, never a panic). `ledger` carries banked evidence
1967/// across rounds so the death veto sees earlier certifications.
1968pub fn run_production_structure_search(
1969    term: SaeManifoldTerm,
1970    rho: SaeManifoldRho,
1971    target: ArrayView2<'_, f64>,
1972    config: RoundDriverConfig,
1973    refit_params: ProductionRefitParams,
1974    ledger: &mut StructureLedger,
1975) -> Result<StructureSearchResult, String> {
1976    let n = target.nrows();
1977    // Refit a restructured candidate on the ESTIMATION rows only at a chosen
1978    // inner-iteration budget: the held-out evaluation rows carry a near-zero
1979    // weight (vanishing fitting pressure) via the per-row reconstruction-weight
1980    // seam, so the candidate is the predictable plug-in the held-out shards are
1981    // scored against. The seam requires strictly-positive weights, so a tiny
1982    // epsilon stands in for the structural zero; after mean-1 normalization the
1983    // estimation rows carry weight ≈ n/n_est and the held-out rows ≈ 0. A
1984    // non-converging inner solve returns the unchanged candidate (infallible at
1985    // the boundary). `inner_max_iter` is the only knob that varies between the
1986    // cheap per-candidate scoring pass and the full-iter polish of the adopted
1987    // winner; `full_target` is borrowed by reference so the helper holds no owned
1988    // capture and can be called from both closures below.
1989    let refit_at = |full_target: ArrayView2<'_, f64>,
1990                    mut cand_term: SaeManifoldTerm,
1991                    mut cand_rho: SaeManifoldRho,
1992                    estimation_rows: &[usize],
1993                    inner_max_iter: usize|
1994     -> (SaeManifoldTerm, SaeManifoldRho) {
1995        const HELD_OUT_WEIGHT: f64 = 1e-12;
1996        let mut weights = vec![HELD_OUT_WEIGHT; n];
1997        for &r in estimation_rows {
1998            if r < n {
1999                weights[r] = 1.0;
2000            }
2001        }
2002        if cand_term.set_row_loss_weights(weights).is_err() {
2003            return (cand_term, cand_rho);
2004        }
2005        if cand_term
2006            .run_joint_fit_arrow_schur(
2007                full_target,
2008                &mut cand_rho,
2009                None,
2010                inner_max_iter,
2011                refit_params.learning_rate,
2012                refit_params.ridge_ext_coord,
2013                refit_params.ridge_beta,
2014            )
2015            .is_err()
2016        {
2017            return (cand_term, cand_rho);
2018        }
2019        (cand_term, cand_rho)
2020    };
2021    let scoring_iters = refit_params.scoring_inner_max_iter;
2022    let full_iters = refit_params.inner_max_iter;
2023    let full_target_score = target.to_owned();
2024    let full_target_polish = target.to_owned();
2025    run_structure_search_rounds(
2026        term,
2027        rho,
2028        target,
2029        config,
2030        ledger,
2031        // Per-candidate SCORING refit: capped (warm child, few iters).
2032        move |cand_term, cand_rho, estimation_rows| {
2033            refit_at(
2034                full_target_score.view(),
2035                cand_term,
2036                cand_rho,
2037                estimation_rows,
2038                scoring_iters,
2039            )
2040        },
2041        // Full-iter POLISH of each round's adopted winner before it becomes the
2042        // next round's parent / the returned dictionary, so the cap is a
2043        // scoring-only economy and the adopted state matches a full-iter refit.
2044        move |adopted_term, adopted_rho, estimation_rows| {
2045            refit_at(
2046                full_target_polish.view(),
2047                adopted_term,
2048                adopted_rho,
2049                estimation_rows,
2050                full_iters,
2051            )
2052        },
2053    )
2054}
2055
2056/// Serialize the per-round ledgers to a JSON string for the fit payload — the
2057/// honesty surface the python boundary attaches under an additive
2058/// `structure_search` key. Byte-deterministic for identical inputs.
2059pub fn rounds_to_json(rounds: &[SearchLedger]) -> Result<String, String> {
2060    serde_json::to_string(rounds)
2061        .map_err(|e| format!("rounds_to_json: serialize search ledger: {e}"))
2062}
2063
2064#[cfg(test)]
2065mod tests {
2066    use super::*;
2067    use gam_solve::structure_search::{CollapseAction, CollapseEvent};
2068    use gam_terms::latent::LatentManifold;
2069    use crate::manifold::{
2070        AssignmentMode, PeriodicHarmonicEvaluator, SaeAssignment, SaeAtomBasisKind,
2071        SaeBasisEvaluator, SaeManifoldAtom,
2072    };
2073    use ndarray::Array2;
2074    use std::sync::Arc;
2075
2076    /// A high active logit (atom routes strongly on the row) and a low one
2077    /// (atom is dormant). With the `ACTIVE_SUPPORT_REL_FLOOR / K` threshold a
2078    /// softmax of these separates the discrete support cleanly.
2079    const ON: f64 = 6.0;
2080    const OFF: f64 = -6.0;
2081
2082    /// Build a `K`-atom periodic SAE term whose per-row routing is dictated by a
2083    /// caller-supplied boolean activity matrix `active[(row, atom)]` (ON/OFF
2084    /// logits). Every atom shares the same circle basis; only the routing (and,
2085    /// for the birth template, the decoder) differs. Returns the term and a
2086    /// matching ρ with native ARD enabled (one axis per atom).
2087    fn planted_term(active: &[Vec<bool>]) -> (SaeManifoldTerm, SaeManifoldRho) {
2088        let n = active.len();
2089        let k = active[0].len();
2090        let p = 4usize;
2091        let evaluator = Arc::new(PeriodicHarmonicEvaluator::new(3).unwrap());
2092        let coords = Array2::<f64>::from_shape_fn((n, 1), |(row, _)| row as f64 / n as f64);
2093        let (phi, jet) = evaluator.evaluate(coords.view()).unwrap();
2094        let mut atoms = Vec::with_capacity(k);
2095        let mut coord_blocks = Vec::with_capacity(k);
2096        for atom_idx in 0..k {
2097            let mut decoder = Array2::<f64>::zeros((3, p));
2098            // Give each atom a distinct decoder direction so reconstruction is
2099            // non-degenerate.
2100            decoder[[1, atom_idx % p]] = 1.0;
2101            decoder[[2, (atom_idx + 1) % p]] = 1.0;
2102            let atom = SaeManifoldAtom::new(
2103                format!("atom_{atom_idx}"),
2104                SaeAtomBasisKind::Periodic,
2105                1,
2106                phi.clone(),
2107                jet.clone(),
2108                decoder,
2109                Array2::<f64>::eye(3),
2110            )
2111            .unwrap()
2112            .with_basis_second_jet(evaluator.clone());
2113            atoms.push(atom);
2114            coord_blocks.push(coords.clone());
2115        }
2116        let mut logits = Array2::<f64>::zeros((n, k));
2117        for (row, atom_active) in active.iter().enumerate() {
2118            for (atom, &on) in atom_active.iter().enumerate() {
2119                logits[[row, atom]] = if on { ON } else { OFF };
2120            }
2121        }
2122        let assignment = SaeAssignment::from_blocks_with_mode_and_manifolds(
2123            logits,
2124            coord_blocks,
2125            vec![LatentManifold::Circle { period: 1.0 }; k],
2126            AssignmentMode::softmax(1.0),
2127        )
2128        .unwrap();
2129        let term = SaeManifoldTerm::new(atoms, assignment).unwrap();
2130        let rho = SaeManifoldRho::new(0.0, 0.0, vec![Array1::<f64>::zeros(1); k]);
2131        (term, rho)
2132    }
2133
2134    fn residuals_of(term: &SaeManifoldTerm) -> Array2<f64> {
2135        // A term scored against zero target gives R = −fitted; non-degenerate
2136        // residuals for the birth channel.
2137        let fitted = term.try_fitted().unwrap();
2138        -&fitted
2139    }
2140
2141    /// #977 discovery oracle: with the production birth budget enabled, a fit
2142    /// #1230 — `StructureSearchResult::structure_changed()` is the trigger the
2143    /// FFI uses to decide whether the pre-search joint-Hessian shape bands are
2144    /// stale and must be recomputed from the final post-search model.
2145    ///
2146    /// It must report `true` iff at least one move LANDED and mutated the
2147    /// returned `term`/`rho`: an `Accepted` move (certified birth / fission /
2148    /// fusion + warm refit) or a `Demoted` death. It must report `false` when
2149    /// every round was contested / vetoed (the term/rho are byte-for-byte the
2150    /// pre-search fit, so the exact joint-Hessian bands stay valid), and when no
2151    /// round ran at all. A false negative leaves seed atoms with stale bands
2152    /// (the #1230 bug); a false positive needlessly discards exact bands.
2153    #[test]
2154    fn structure_changed_is_true_only_when_a_move_lands() {
2155        use gam_solve::structure_search::{MoveRecord, MoveVerdict};
2156
2157        fn ledger_with(verdicts: Vec<MoveVerdict>) -> SearchLedger {
2158            SearchLedger {
2159                alpha: 0.05,
2160                moves: verdicts
2161                    .into_iter()
2162                    .enumerate()
2163                    .map(|(i, verdict)| MoveRecord {
2164                        mv: StructureMove::Death { atom: i },
2165                        trigger: 0.0,
2166                        structure_hash: i as u64,
2167                        claim: ClaimKind::AtomExists { atom: i },
2168                        verdict,
2169                    })
2170                    .collect(),
2171                collapse_events: Vec::new(),
2172            }
2173        }
2174
2175        // No rounds ran at all: nothing changed.
2176        let (term0, rho0) = planted_term(&[vec![true], vec![true]]);
2177        let empty = StructureSearchResult {
2178            term: term0.clone(),
2179            rho: rho0.clone(),
2180            rounds: Vec::new(),
2181        };
2182        assert!(
2183            !empty.structure_changed(),
2184            "no rounds ⇒ the term/rho are the pre-search fit ⇒ structure_changed() must be false"
2185        );
2186
2187        // Every move contested or vetoed: the dictionary is byte-for-byte the
2188        // pre-search fit, so the exact joint-Hessian bands remain valid.
2189        let no_landed = StructureSearchResult {
2190            term: term0.clone(),
2191            rho: rho0.clone(),
2192            rounds: vec![ledger_with(vec![
2193                MoveVerdict::Contested { log_e: -1.0 },
2194                MoveVerdict::Vetoed { log_e: -2.0 },
2195            ])],
2196        };
2197        assert!(
2198            !no_landed.structure_changed(),
2199            "all-contested/vetoed rounds leave the model unchanged ⇒ structure_changed() must be false"
2200        );
2201
2202        // An Accepted move landed (certified restructuring + warm refit): the
2203        // returned model differs from the pre-search fit ⇒ bands are stale.
2204        let accepted = StructureSearchResult {
2205            term: term0.clone(),
2206            rho: rho0.clone(),
2207            rounds: vec![ledger_with(vec![
2208                MoveVerdict::Contested { log_e: -1.0 },
2209                MoveVerdict::Accepted { log_e: 3.0 },
2210            ])],
2211        };
2212        assert!(
2213            accepted.structure_changed(),
2214            "a landed Accepted move mutates term/rho ⇒ structure_changed() must be true (recompute bands)"
2215        );
2216
2217        // A Demoted death is also a landed structure change.
2218        let demoted = StructureSearchResult {
2219            term: term0.clone(),
2220            rho: rho0.clone(),
2221            rounds: vec![ledger_with(vec![MoveVerdict::Demoted { log_e: -1.0 }])],
2222        };
2223        assert!(
2224            demoted.structure_changed(),
2225            "a landed Demoted death folds an atom to ~0 routing ⇒ structure_changed() must be true"
2226        );
2227    }
2228
2229    /// whose residuals carry an unexplained factor direction (a structure the
2230    /// current dictionary does not express) HARVESTS a birth proposal — the
2231    /// candidate atom whose held-out e-value the gate then adjudicates. This is
2232    /// the proposal channel the production site re-enabled (`max_births > 0`);
2233    /// without it K could never grow.
2234    #[test]
2235    fn residual_bearing_fit_harvests_birth_proposal() {
2236        // A single circle atom routed on every row; its fitted reconstruction
2237        // leaves a structured residual (R = −fitted has rank > 1 across the p=4
2238        // output channels), so the whitened residual-factor subspace is
2239        // non-empty and the birth channel mines a candidate direction.
2240        let n = 40usize;
2241        let active: Vec<Vec<bool>> = (0..n).map(|_| vec![true]).collect();
2242        let (term, rho) = planted_term(&active);
2243        // Inject a clear shared-direction (rank-1) factor into the residuals that
2244        // varies smoothly with the per-row activity coordinate, so the whitened
2245        // residual-factor evidence ladder selects rank ≥ 1: every row gets a
2246        // multiple of the same unit output direction `u`, scaled by a per-row
2247        // amplitude. This is the unexplained shared structure a born atom would
2248        // absorb.
2249        let p = term.output_dim();
2250        let mut residuals = Array2::<f64>::zeros((n, p));
2251        let u = [0.6_f64, -0.4, 0.5, -0.3];
2252        for row in 0..n {
2253            // A non-constant per-row amplitude so the factor is genuine shared
2254            // structure (not absorbed by the diagonal noise floor).
2255            let amp = 1.0 + (row as f64) / (n as f64);
2256            for c in 0..p {
2257                residuals[[row, c]] = amp * u[c % u.len()];
2258            }
2259        }
2260        let params = HarvestParams {
2261            max_fusions: 0,
2262            max_fissions: 0,
2263            // The production-enabled budget (births > 0) — the whole point of
2264            // #977: K can grow.
2265            max_births: 2,
2266        };
2267        let report = harvest_move_proposals(&term, &rho, residuals.view(), &params).unwrap();
2268        let births: usize = report
2269            .proposals
2270            .iter()
2271            .filter(|p| matches!(p.mv, StructureMove::Birth { .. }))
2272            .count();
2273        assert!(
2274            births >= 1,
2275            "a residual-bearing fit with births enabled must harvest at least \
2276             one birth proposal (so K can be discovered); got {:?}",
2277            report.proposals.iter().map(|p| &p.mv).collect::<Vec<_>>()
2278        );
2279        assert!(
2280            report.births_proposed >= 1,
2281            "births_proposed must count the harvested births; got {}",
2282            report.births_proposed
2283        );
2284        assert!(
2285            report.birth_skipped_reason.is_none(),
2286            "the birth channel must run (no skip) on a non-degenerate residual; got {:?}",
2287            report.birth_skipped_reason
2288        );
2289    }
2290
2291    /// #977 NULL oracle: a target the dictionary reconstructs exactly leaves
2292    /// ZERO residual, so the birth channel finds no factor subspace and proposes
2293    /// no birth — nothing is born under the null. (The round driver's e-gate is
2294    /// the second line of defense; this asserts the harvest itself does not
2295    /// manufacture growth where there is no unexplained structure.)
2296    #[test]
2297    fn fully_reconstructed_null_harvests_no_birth() {
2298        let n = 40usize;
2299        let active: Vec<Vec<bool>> = (0..n).map(|_| vec![true]).collect();
2300        let (term, rho) = planted_term(&active);
2301        // Residual ≡ 0: the dictionary reconstructs the target exactly, so there
2302        // is no unexplained factor to mine.
2303        let p = term.output_dim();
2304        let zero_residual = Array2::<f64>::zeros((n, p));
2305        let params = HarvestParams {
2306            max_fusions: 0,
2307            max_fissions: 0,
2308            max_births: 2,
2309        };
2310        let report = harvest_move_proposals(&term, &rho, zero_residual.view(), &params).unwrap();
2311        let births: usize = report
2312            .proposals
2313            .iter()
2314            .filter(|p| matches!(p.mv, StructureMove::Birth { .. }))
2315            .count();
2316        assert_eq!(
2317            births, 0,
2318            "a fully-reconstructed (zero-residual) null must harvest no birth \
2319             proposal; got {births} births"
2320        );
2321    }
2322
2323    /// Oracle (#997 trigger): a planted SHATTER — two atoms with identical
2324    /// supports (one curved family re-encoded as near-duplicate flat atoms) —
2325    /// produces a FUSION proposal on that pair (symmetric code dependence ≈ 1),
2326    /// and NO fission audit (asymmetry ≈ 0).
2327    #[test]
2328    fn planted_shatter_harvests_fusion_not_fission() {
2329        // Atoms 0 and 1 share support exactly (every third row); atom 2 is
2330        // independent. n = 30.
2331        let n = 30usize;
2332        let active: Vec<Vec<bool>> = (0..n)
2333            .map(|row| {
2334                let dup = row % 3 == 0;
2335                vec![dup, dup, row % 2 == 0]
2336            })
2337            .collect();
2338        let (term, rho) = planted_term(&active);
2339        let residuals = residuals_of(&term);
2340        let params = HarvestParams {
2341            max_fusions: 4,
2342            max_fissions: 4,
2343            max_births: 0,
2344        };
2345        let report = harvest_move_proposals(&term, &rho, residuals.view(), &params).unwrap();
2346        let has_fusion_01 = report.proposals.iter().any(|p| {
2347            matches!(p.mv, StructureMove::Fusion { a, b } if (a, b) == (0, 1) || (a, b) == (1, 0))
2348        });
2349        assert!(
2350            has_fusion_01,
2351            "shattered duplicate pair (0,1) must yield a fusion proposal; got {:?}",
2352            report.proposals.iter().map(|p| &p.mv).collect::<Vec<_>>()
2353        );
2354        // The duplicate pair is symmetric ⇒ no absorption fission audit on it.
2355        let has_fission = report
2356            .proposals
2357            .iter()
2358            .any(|p| matches!(p.mv, StructureMove::Fission { .. }));
2359        assert!(
2360            !has_fission,
2361            "symmetric duplicate supports must not trigger an absorption fission audit"
2362        );
2363    }
2364
2365    /// Oracle (#997 trigger): a planted ABSORPTION (A⊇B: B's support nests
2366    /// inside A's) produces a FISSION audit on the parent A (high conditional
2367    /// asymmetry, parent conditional ≈ 1). The planted atoms are 1-D `Periodic`
2368    /// (NOT a `d = 2` product), so the #993 within-atom carve is undefined on
2369    /// them and the candidate rides on the co-activation audit — recorded
2370    /// loudly via `fission_carve_unavailable_count`, never silent.
2371    #[test]
2372    fn planted_absorption_harvests_fission_audit_with_loud_carve_skip() {
2373        // Atom 0 (parent) active on rows ≡ 0 mod 2 PLUS rows ≡ 1 mod 4; atom 1
2374        // (child) active only on rows ≡ 0 mod 4 — strictly nested in 0's
2375        // support ⇒ P(0|1) = 1, P(1|0) < 1. n = 40.
2376        let n = 40usize;
2377        let active: Vec<Vec<bool>> = (0..n)
2378            .map(|row| {
2379                let child = row % 4 == 0;
2380                let parent = row % 2 == 0 || row % 4 == 1;
2381                vec![parent, child, row % 5 == 0]
2382            })
2383            .collect();
2384        let (term, rho) = planted_term(&active);
2385        let residuals = residuals_of(&term);
2386        let params = HarvestParams {
2387            max_fusions: 4,
2388            max_fissions: 4,
2389            max_births: 0,
2390        };
2391        let report = harvest_move_proposals(&term, &rho, residuals.view(), &params).unwrap();
2392        let fissioned_parent = report
2393            .proposals
2394            .iter()
2395            .any(|p| matches!(p.mv, StructureMove::Fission { atom: 0 }));
2396        assert!(
2397            fissioned_parent,
2398            "nested-support parent (atom 0) must be flagged for a fission audit; got {:?}",
2399            report.proposals.iter().map(|p| &p.mv).collect::<Vec<_>>()
2400        );
2401        assert_eq!(
2402            report.fission_carve_ran_count, 0,
2403            "1-D periodic atoms are not a product manifold; the within-atom carve cannot run"
2404        );
2405        assert!(
2406            report.fission_carve_unavailable_count >= 1,
2407            "the non-product fission candidate must be recorded as carve-unavailable, not silent"
2408        );
2409        assert!(
2410            report.fission_carve_results.is_empty(),
2411            "no carve ran, so there are no carve results to report"
2412        );
2413    }
2414
2415    /// Oracle (#997 type-I): three INDEPENDENT planted atoms (marginal supports
2416    /// at coprime strides) yield NO fusion proposal — the trigger does not
2417    /// manufacture binding edges where the codes are independent, so the e-gate
2418    /// is never even asked to reject a true null.
2419    #[test]
2420    fn independent_atoms_harvest_no_fusion() {
2421        let n = 60usize;
2422        let active: Vec<Vec<bool>> = (0..n)
2423            .map(|row| vec![row % 2 == 0, row % 3 == 0, row % 5 == 0])
2424            .collect();
2425        let (term, rho) = planted_term(&active);
2426        let residuals = residuals_of(&term);
2427        let params = HarvestParams {
2428            max_fusions: 4,
2429            max_fissions: 4,
2430            max_births: 0,
2431        };
2432        let report = harvest_move_proposals(&term, &rho, residuals.view(), &params).unwrap();
2433        let has_fusion = report
2434            .proposals
2435            .iter()
2436            .any(|p| matches!(p.mv, StructureMove::Fusion { .. }));
2437        assert!(
2438            !has_fusion,
2439            "independent atom supports must not produce fusion proposals; got {:?}",
2440            report.proposals.iter().map(|p| &p.mv).collect::<Vec<_>>()
2441        );
2442    }
2443
2444    /// Oracle (#997 death trigger): a diverged ARD precision yields a DEATH
2445    /// proposal; a terminal collapse event yields a death even with finite ARD.
2446    #[test]
2447    fn diverged_ard_and_terminal_collapse_harvest_deaths() {
2448        let n = 20usize;
2449        let active: Vec<Vec<bool>> = (0..n).map(|row| vec![true, row % 2 == 0, false]).collect();
2450        let (mut term, mut rho) = planted_term(&active);
2451        // Diverge atom 2's ARD precision well past the divergence floor.
2452        rho.log_ard[2] = Array1::from_elem(1, ARD_DIVERGENCE_LOG_PRECISION + 5.0);
2453        // Inject a terminal collapse for atom 1 (finite ARD, but routing gone).
2454        term.record_collapse_event(CollapseEvent {
2455            iteration: 3,
2456            atom: 1,
2457            max_active_mass: 1e-6,
2458            floor: 1e-3,
2459            action: CollapseAction::Terminal,
2460        });
2461        let residuals = residuals_of(&term);
2462        let params = HarvestParams {
2463            max_fusions: 0,
2464            max_fissions: 0,
2465            max_births: 0,
2466        };
2467        let report = harvest_move_proposals(&term, &rho, residuals.view(), &params).unwrap();
2468        let death_atoms: Vec<usize> = report
2469            .proposals
2470            .iter()
2471            .filter_map(|p| match p.mv {
2472                StructureMove::Death { atom } => Some(atom),
2473                _ => None,
2474            })
2475            .collect();
2476        assert!(
2477            death_atoms.contains(&2),
2478            "diverged ARD on atom 2 must yield a death proposal; got {death_atoms:?}"
2479        );
2480        assert!(
2481            death_atoms.contains(&1),
2482            "terminal collapse on atom 1 must yield a death proposal; got {death_atoms:?}"
2483        );
2484    }
2485
2486    /// Apply-move restructuring oracle: fission GROWS the dictionary by one atom
2487    /// (child inherits parent's basis + ARD block), fusion and death keep K
2488    /// (fold / demote), birth appends a residual-factor atom.
2489    #[test]
2490    fn apply_move_restructures_warm() {
2491        let n = 12usize;
2492        let active: Vec<Vec<bool>> = (0..n).map(|row| vec![true, row % 2 == 0]).collect();
2493        let (term, rho) = planted_term(&active);
2494        let k0 = term.k_atoms();
2495
2496        // Fission: K grows, child ARD block inherited.
2497        let (fissioned, fissioned_rho) =
2498            apply_structure_move(&term, &rho, &StructureMove::Fission { atom: 0 }, &[]).unwrap();
2499        assert_eq!(fissioned.k_atoms(), k0 + 1);
2500        assert_eq!(fissioned_rho.log_ard.len(), k0 + 1);
2501        // Every length-K ρ vector the penalty assembler indexes by atom must
2502        // grow with K, not just `log_ard`. `log_lambda_smooth` is read as
2503        // `lambda_smooth[atom_idx]` in construction.rs; a stale length-K vector
2504        // panics out of bounds on the K-th (new) atom (#357).
2505        assert_eq!(
2506            fissioned_rho.log_lambda_smooth.len(),
2507            fissioned.k_atoms(),
2508            "fission must grow per-atom log_lambda_smooth in lockstep with K"
2509        );
2510
2511        // Fusion: K unchanged, atom b demoted to ~0 routing.
2512        let (fused, _) =
2513            apply_structure_move(&term, &rho, &StructureMove::Fusion { a: 0, b: 1 }, &[]).unwrap();
2514        assert_eq!(fused.k_atoms(), k0);
2515        let fused_assign = fused.assignment.assignments();
2516        assert!(
2517            fused_assign.column(1).iter().all(|&m| m < 1e-6),
2518            "fused-away atom 1 must route to ~0 mass"
2519        );
2520
2521        // Death: K unchanged, atom demoted.
2522        let (dead, _) =
2523            apply_structure_move(&term, &rho, &StructureMove::Death { atom: 1 }, &[]).unwrap();
2524        assert_eq!(dead.k_atoms(), k0);
2525        let dead_assign = dead.assignment.assignments();
2526        assert!(dead_assign.column(1).iter().all(|&m| m < 1e-6));
2527
2528        // Birth: K grows, and the new atom RECONSTRUCTS the residual-factor image.
2529        //
2530        // Since the #977 topology RACE, a born atom no longer carries the raw
2531        // `factor_dir` coefficients verbatim: its topology is chosen by evidence
2532        // and its decoder is the winning basis's penalized least-squares fit to the
2533        // birth target `Y = Φ_template · factor_dir` (so the raw coefficient
2534        // `[[0,0]]` is shrunk by the fit ridge — `0.6999…`, not exactly `0.7`).
2535        // The structural invariant the move must preserve is therefore
2536        // RECONSTRUCTION PARITY, not coefficient identity: the born atom, evaluated
2537        // on its own coordinates with its own (raced) basis, must reproduce the
2538        // birth-target image to within the small fit ridge.
2539        let p = term.output_dim();
2540        let m = term.atoms[0].basis_size();
2541        let mut decoder = Array2::<f64>::zeros((m, p));
2542        decoder[[0, 0]] = 0.7;
2543        let birth_target = term.atoms[0].basis_values.dot(&decoder); // Φ_template · factor_dir
2544        let (born, born_rho) = apply_structure_move(
2545            &term,
2546            &rho,
2547            &StructureMove::Birth { candidate: 0 },
2548            &[decoder],
2549        )
2550        .unwrap();
2551        assert_eq!(born.k_atoms(), k0 + 1);
2552        assert_eq!(born_rho.log_ard.len(), k0 + 1);
2553        // ρ's per-atom smoothness vector must grow in step with K (the #1556
2554        // contract `assemble_arrow_schur` validates); a stale-length vector would
2555        // panic the next assemble on the per-atom `lambda_smooth[atom_idx]` index.
2556        assert_eq!(born_rho.log_lambda_smooth.len(), k0 + 1);
2557        let born_atom = &born.atoms[k0];
2558        let born_image = born_atom.basis_values.dot(&born_atom.decoder_coefficients);
2559        assert_eq!(born_image.dim(), birth_target.dim());
2560        let mut max_recon_err = 0.0_f64;
2561        for (a, b) in born_image.iter().zip(birth_target.iter()) {
2562            max_recon_err = max_recon_err.max((a - b).abs());
2563        }
2564        assert!(
2565            max_recon_err < 1e-3,
2566            "born atom must reconstruct the residual-factor image (penalized fit); \
2567             max |Φ_born·B_born − Φ_template·factor_dir| = {max_recon_err:.3e} (> 1e-3)"
2568        );
2569    }
2570
2571    /// #357 regression: after a structure move that GROWS the atom count
2572    /// (fission/birth), the returned ρ's per-atom `log_lambda_smooth` must be
2573    /// length-K so the penalty assembler's `lambda_smooth[atom_idx]` read does
2574    /// not panic out of bounds. Before the fix `duplicate_atom`/`born_atom`
2575    /// pushed only `log_ard`, leaving `log_lambda_smooth` one short — the next
2576    /// `assemble_arrow_schur_inner` panicked with `index out of bounds: the len
2577    /// is K but the index is K` (construction.rs `scaled_s[[i,j]] =
2578    /// lambda_smooth[atom_idx] * s_ij`). This drives the REAL assembly so it
2579    /// fails on the buggy path, not just on a length assertion.
2580    #[test]
2581    fn grown_atom_count_assembles_without_lambda_smooth_oob_357() {
2582        let n = 16usize;
2583        let active: Vec<Vec<bool>> = (0..n).map(|row| vec![true, row % 2 == 0]).collect();
2584        let (term, rho) = planted_term(&active);
2585        let target = Array2::<f64>::from_shape_fn((n, term.output_dim()), |(row, col)| {
2586            0.1 * (row as f64) - 0.05 * (col as f64)
2587        });
2588
2589        // Fission grows K by one.
2590        let (fissioned, fissioned_rho) =
2591            apply_structure_move(&term, &rho, &StructureMove::Fission { atom: 0 }, &[]).unwrap();
2592        assert_eq!(fissioned_rho.log_lambda_smooth.len(), fissioned.k_atoms());
2593        // The assembly indexes lambda_smooth[atom_idx] for every atom; on the
2594        // pre-fix ρ this panicked out of bounds for the new K-th atom.
2595        let mut fissioned = fissioned;
2596        fissioned
2597            .assemble_arrow_schur_scaled(target.view(), &fissioned_rho, None, 1.0)
2598            .expect("post-fission assembly must not panic or error on the grown atom set");
2599
2600        // Birth grows K by one and must assemble too.
2601        let p = term.output_dim();
2602        let m = term.atoms[0].basis_size();
2603        let mut decoder = Array2::<f64>::zeros((m, p));
2604        decoder[[0, 0]] = 0.5;
2605        let (born, born_rho) = apply_structure_move(
2606            &term,
2607            &rho,
2608            &StructureMove::Birth { candidate: 0 },
2609            &[decoder],
2610        )
2611        .unwrap();
2612        assert_eq!(born_rho.log_lambda_smooth.len(), born.k_atoms());
2613        let mut born = born;
2614        born.assemble_arrow_schur_scaled(target.view(), &born_rho, None, 1.0)
2615            .expect("post-birth assembly must not panic or error on the grown atom set");
2616    }
2617
2618    /// Ledger byte-determinism oracle (#997): two runs of the round driver over
2619    /// the same planted shatter, with a deterministic scripted fit, serialize
2620    /// the per-round ledgers byte-identically.
2621    #[test]
2622    fn round_driver_ledger_is_byte_deterministic() {
2623        let n = 24usize;
2624        let active: Vec<Vec<bool>> = (0..n)
2625            .map(|row| {
2626                let dup = row % 3 == 0;
2627                vec![dup, dup, row % 2 == 0]
2628            })
2629            .collect();
2630
2631        let run = || {
2632            let (term, rho) = planted_term(&active);
2633            let target = Array2::<f64>::zeros((n, term.output_dim()));
2634            let mut ledger = gam_terms::inference::structure_evidence::StructureLedger::new();
2635            let budget = MoveBudget {
2636                max_moves: 4,
2637                alpha: 0.05,
2638            };
2639            let params = HarvestParams {
2640                max_fusions: 4,
2641                max_fissions: 0,
2642                max_births: 0,
2643            };
2644            let config = RoundDriverConfig {
2645                n_shards: 3,
2646                budget,
2647                max_rounds: 2,
2648                harvest_params: params,
2649            };
2650            // Deterministic no-op fit: the scripted gate sees the unrefit
2651            // candidate (the engine's determinism is what this asserts, not the
2652            // SAE inner solve).
2653            run_structure_search_rounds(
2654                term,
2655                rho,
2656                target.view(),
2657                config,
2658                &mut ledger,
2659                |t, r, _| (t, r),
2660                // No-op polish: this determinism oracle scripts the gate and
2661                // never runs the SAE inner solve.
2662                |t, r, _| (t, r),
2663            )
2664            .unwrap()
2665        };
2666
2667        let a = run();
2668        let b = run();
2669        let sa = serde_json::to_string(&a.rounds).unwrap();
2670        let sb = serde_json::to_string(&b.rounds).unwrap();
2671        assert_eq!(
2672            sa, sb,
2673            "identical inputs must produce a byte-identical ledger"
2674        );
2675        assert_eq!(a.term.k_atoms(), b.term.k_atoms());
2676    }
2677
2678    /// #1026 move-equivalence oracle for the candidate-SCORING iteration cap.
2679    ///
2680    /// The production structure search caps the per-candidate scoring refit's
2681    /// inner iterations (`scoring_inner_max_iter`) well below the outer fit's
2682    /// `inner_max_iter`, then re-refits each round's adopted winner at the full
2683    /// budget. The economy is sound only if it does NOT change WHICH moves the
2684    /// e-gate accepts (the gate ranks the capped-score candidate) NOR the adopted
2685    /// dictionary (the winner is polished to the full-iter optimum). This runs
2686    /// the real `run_production_structure_search` driver — same residual-bearing
2687    /// target, seed, splits, budgets — twice: a full-iter REFERENCE
2688    /// (`scoring_inner_max_iter == inner_max_iter`) and the CAPPED production
2689    /// path, and asserts the accepted-move ledger is byte-identical and the
2690    /// final fitted reconstruction matches to a tight tolerance. On a tractable
2691    /// K/n where the full-iter reference completes, equivalence here certifies
2692    /// the cap is a pure scoring-cost economy (the #1026 perf fix is quality- and
2693    /// decision-preserving).
2694    #[test]
2695    fn scoring_iter_cap_preserves_moves_and_adopted_fit() {
2696        // A residual-bearing single-atom fit so the birth channel mines a real
2697        // shared-structure candidate the gate can certify (mirrors
2698        // `residual_bearing_fit_harvests_birth_proposal`'s planted factor).
2699        let n = 40usize;
2700        let active: Vec<Vec<bool>> = (0..n).map(|_| vec![true]).collect();
2701        let p = 4usize;
2702        let u = [0.6_f64, -0.4, 0.5, -0.3];
2703        let mut target = Array2::<f64>::zeros((n, p));
2704        for row in 0..n {
2705            let amp = 1.0 + (row as f64) / (n as f64);
2706            for c in 0..p {
2707                target[[row, c]] = amp * u[c % u.len()];
2708            }
2709        }
2710        let config = RoundDriverConfig {
2711            n_shards: 4,
2712            budget: MoveBudget {
2713                max_moves: 4,
2714                alpha: 0.05,
2715            },
2716            max_rounds: 2,
2717            harvest_params: HarvestParams {
2718                max_fusions: 2,
2719                max_fissions: 2,
2720                max_births: 2,
2721            },
2722        };
2723        let full_iters = 24usize;
2724        let run = |scoring_inner_max_iter: usize| {
2725            let (term, rho) = planted_term(&active);
2726            let mut ledger = StructureLedger::new();
2727            let refit_params = ProductionRefitParams {
2728                inner_max_iter: full_iters,
2729                scoring_inner_max_iter,
2730                learning_rate: 1.0,
2731                ridge_ext_coord: 1e-6,
2732                ridge_beta: 1e-6,
2733            };
2734            let result = run_production_structure_search(
2735                term,
2736                rho,
2737                target.view(),
2738                config,
2739                refit_params,
2740                &mut ledger,
2741            )
2742            .unwrap();
2743            let fitted = result.term.try_fitted().unwrap();
2744            (result, fitted)
2745        };
2746
2747        // Full-iter reference: scoring budget == full budget (no economy).
2748        let (reference, ref_fitted) = run(full_iters);
2749        // Production cap: a warm child re-equilibrates in very few iters.
2750        let (capped, cap_fitted) = run(4);
2751
2752        // The accepted-move trajectory — the per-round `moves` (each carrying the
2753        // proposed `mv`, its `trigger`, `structure_hash`, `claim`, and the e-gate
2754        // `verdict`) — must be identical: the cap must not flip a single e-gate
2755        // decision. We compare ONLY `moves`, NOT the per-round `collapse_events`.
2756        // The collapse-event log is the #976 active-mass guard's per-INNER-iteration
2757        // diagnostic trail: a full-iter refit runs more inner Newton steps than a
2758        // capped one, so it legitimately records more reseed/terminal guard fires
2759        // for the SAME structural outcome. Those events are a refit-trajectory
2760        // diagnostic, not an e-gate decision — the move verdicts already encode
2761        // their structural effect — so comparing them would assert a property the
2762        // cap is not meant to preserve (and the adopted-fit check below is the
2763        // real guarantee that the converged state matches).
2764        let round_moves = |rounds: &[SearchLedger]| -> String {
2765            serde_json::to_string(&rounds.iter().map(|r| &r.moves).collect::<Vec<_>>()).unwrap()
2766        };
2767        assert_eq!(
2768            round_moves(&reference.rounds),
2769            round_moves(&capped.rounds),
2770            "scoring-iteration cap changed the accepted-move trajectory — the e-gate \
2771             decisions are NOT cap-invariant (the #1026 economy is unsound)"
2772        );
2773        assert_eq!(
2774            reference.term.k_atoms(),
2775            capped.term.k_atoms(),
2776            "scoring cap changed the discovered dictionary size"
2777        );
2778
2779        // The adopted dictionary's reconstruction must match: the full-iter
2780        // polish lands the capped path on the same inner optimum.
2781        assert_eq!(ref_fitted.dim(), cap_fitted.dim());
2782        let mut max_abs = 0.0_f64;
2783        for (a, b) in ref_fitted.iter().zip(cap_fitted.iter()) {
2784            max_abs = max_abs.max((a - b).abs());
2785        }
2786        assert!(
2787            max_abs < 1e-6,
2788            "capped-scoring adopted fit diverged from the full-iter reference by \
2789             {max_abs:.3e} (> 1e-6); the polish did not reach the same optimum"
2790        );
2791    }
2792
2793    /// Estimation/eval split oracle: the split reserves estimation rows and
2794    /// partitions the remainder into held-out shards that do NOT overlap the
2795    /// estimation set (the universal-inference contract the gates rely on).
2796    #[test]
2797    fn estimation_eval_split_is_disjoint() {
2798        let target = Array2::<f64>::zeros((20, 3));
2799        let split = estimation_eval_split(target.view(), 4);
2800        assert!(!split.estimation_rows.is_empty());
2801        assert!(!split.shards.is_empty());
2802        let est: std::collections::HashSet<usize> = split.estimation_rows.iter().copied().collect();
2803        for shard in &split.shards {
2804            for &row in &shard.rows {
2805                assert!(
2806                    !est.contains(&row),
2807                    "eval shard row {row} must not be in the estimation set"
2808                );
2809            }
2810        }
2811    }
2812
2813    /// #977 per-atom topology RACE oracle: two birth targets — one tracing a
2814    /// CIRCLE in output space as the coordinate sweeps, the other a straight
2815    /// LINE — must be assigned DIFFERENT topologies by evidence. A genuine
2816    /// dictionary learner does not stamp every born atom with atom-0's circle
2817    /// template: the circular residual earns a Periodic (circle) basis, the
2818    /// straight residual a EuclideanPatch (line). This is the heterogeneous,
2819    /// evidence-chosen dictionary the issue demands.
2820    #[test]
2821    fn birth_topology_race_assigns_circle_vs_line_by_evidence() {
2822        use std::f64::consts::TAU;
2823
2824        let n = 80usize;
2825        // A monotone 1-D latent coordinate the residual image is parameterized by.
2826        let coords = Array2::<f64>::from_shape_fn((n, 1), |(row, _)| row as f64 / n as f64);
2827
2828        // CIRCLE target: γ(t) = (cos 2πt, sin 2πt) — full revolution, strong
2829        // turning a straight line cannot express. Two output channels carry the
2830        // circle; the rest are zero.
2831        let p = 4usize;
2832        let mut circle_target = Array2::<f64>::zeros((n, p));
2833        for row in 0..n {
2834            let t = coords[[row, 0]];
2835            circle_target[[row, 0]] = (TAU * t).cos();
2836            circle_target[[row, 1]] = (TAU * t).sin();
2837        }
2838
2839        // LINE target: γ(t) = t·u — a straight ray, zero turning. The circle basis
2840        // has no parsimony advantage; the cheaper line wins on evidence.
2841        let mut line_target = Array2::<f64>::zeros((n, p));
2842        let u = [0.7_f64, -0.4, 0.5, -0.2];
2843        for row in 0..n {
2844            let t = coords[[row, 0]];
2845            for c in 0..p {
2846                line_target[[row, c]] = t * u[c];
2847            }
2848        }
2849
2850        let weights = Array1::<f64>::ones(n);
2851
2852        let circle_fit =
2853            race_birth_topology(coords.view(), circle_target.view(), weights.view(), 1)
2854                .expect("circle race runs")
2855                .expect("circle race has a realizable candidate");
2856        let line_fit = race_birth_topology(coords.view(), line_target.view(), weights.view(), 1)
2857            .expect("line race runs")
2858            .expect("line race has a realizable candidate");
2859
2860        assert_eq!(
2861            circle_fit.basis_kind,
2862            SaeAtomBasisKind::Periodic,
2863            "a circular birth residual must win the circle (Periodic) topology"
2864        );
2865        assert_eq!(
2866            line_fit.basis_kind,
2867            SaeAtomBasisKind::EuclideanPatch,
2868            "a straight birth residual must win the line (EuclideanPatch) topology"
2869        );
2870        // The crux: the two atoms get DIFFERENT topologies by evidence — the
2871        // dictionary is heterogeneous, not all-circle.
2872        assert_ne!(
2873            circle_fit.basis_kind, line_fit.basis_kind,
2874            "the discovery must assign DIFFERENT topologies to the circle and line \
2875             atoms (evidence-chosen, not inherited)"
2876        );
2877    }
2878
2879    /// #977 d=2 topology-race COMPLETENESS: the candidate set includes the
2880    /// Cylinder kind, and a birth target that is genuinely cylindrical — periodic
2881    /// along one latent axis and unbounded-linear along the other — is adjudicated
2882    /// to the Cylinder topology, not forced into a torus (which would wrap the
2883    /// linear axis spuriously) or a flat patch (which would lose the periodicity).
2884    /// This is the realizable d=2 race the issue demands: torus / sphere /
2885    /// euclidean / cylinder, evidence-chosen.
2886    #[test]
2887    fn birth_topology_race_d2_includes_and_selects_cylinder() {
2888        use std::f64::consts::TAU;
2889
2890        // The d=2 candidate set must literally CONTAIN the cylinder candidate.
2891        let n = 120usize;
2892        let coords = Array2::<f64>::from_shape_fn((n, 2), |(row, axis)| {
2893            // axis 0: a phase that completes ~2 revolutions over the rows;
2894            // axis 1: a monotone unbounded coordinate.
2895            if axis == 0 {
2896                (row as f64 / n as f64) * 2.0
2897            } else {
2898                (row as f64 / n as f64) * 3.0 - 1.5
2899            }
2900        });
2901        let specs = topology_candidates_for_dim(coords.view(), 2).expect("d=2 candidates build");
2902        let has_cylinder = specs
2903            .iter()
2904            .any(|s| s.basis_kind == SaeAtomBasisKind::Cylinder);
2905        assert!(
2906            has_cylinder,
2907            "the d=2 topology-race candidate set MUST include the Cylinder kind; got {:?}",
2908            specs.iter().map(|s| &s.basis_kind).collect::<Vec<_>>()
2909        );
2910        let has_torus = specs
2911            .iter()
2912            .any(|s| s.basis_kind == SaeAtomBasisKind::Torus);
2913        let has_sphere = specs
2914            .iter()
2915            .any(|s| s.basis_kind == SaeAtomBasisKind::Sphere);
2916        let has_patch = specs
2917            .iter()
2918            .any(|s| s.basis_kind == SaeAtomBasisKind::EuclideanPatch);
2919        assert!(
2920            has_torus && has_sphere && has_patch,
2921            "the d=2 race must be COMPLETE (torus + sphere + euclidean + cylinder)"
2922        );
2923
2924        // CYLINDER target: periodic along axis 0 (cos/sin of the phase) AND
2925        // linearly growing along axis 1 (a magnitude ramp). A torus would have to
2926        // wrap the magnitude axis (no periodicity there); a flat patch cannot
2927        // express the full revolution; the cylinder expresses both exactly.
2928        let p = 4usize;
2929        let mut cyl_target = Array2::<f64>::zeros((n, p));
2930        for row in 0..n {
2931            let phase = coords[[row, 0]];
2932            let mag = coords[[row, 1]];
2933            cyl_target[[row, 0]] = (TAU * phase).cos();
2934            cyl_target[[row, 1]] = (TAU * phase).sin();
2935            // The linear-axis structure: a magnitude ramp on a third channel.
2936            cyl_target[[row, 2]] = mag;
2937        }
2938        let weights = Array1::<f64>::ones(n);
2939        let cyl_fit = race_birth_topology(coords.view(), cyl_target.view(), weights.view(), 2)
2940            .expect("cylinder race runs")
2941            .expect("cylinder race has a realizable candidate");
2942        assert_eq!(
2943            cyl_fit.basis_kind,
2944            SaeAtomBasisKind::Cylinder,
2945            "a cylindrical birth residual (periodic along one axis, linear along the \
2946             other) must win the Cylinder topology by evidence; got {:?}",
2947            cyl_fit.basis_kind
2948        );
2949    }
2950
2951    /// #977 BORN-ATOM UNCERTAINTY: a structure-search-born atom (grown past the
2952    /// seed K the joint Schur factor was assembled at) must report a FINITE
2953    /// shape-uncertainty band, computed from its OWN fitted penalized inner
2954    /// Hessian — never a silently-missing band. This is the completed deferred
2955    /// gap: `complete_born_atom_shape_bands` fills the born atom's band so no
2956    /// post-search atom is reported without honest uncertainty.
2957    #[test]
2958    fn born_atom_reports_finite_uncertainty_band() {
2959        let n = 48usize;
2960        // A K=1 seed dictionary, routed on every row.
2961        let active: Vec<Vec<bool>> = (0..n).map(|_| vec![true]).collect();
2962        let (term, rho) = planted_term(&active);
2963        let k_seed = term.k_atoms();
2964
2965        // Grow the dictionary by one BORN atom (index k_seed) seeded from a
2966        // residual-factor decoder. This is the atom the pre-search Schur factor
2967        // never covered.
2968        let p = term.output_dim();
2969        let m = term.atoms[0].basis_size();
2970        let mut decoder = Array2::<f64>::zeros((m, p));
2971        decoder[[1, 0]] = 0.9;
2972        decoder[[2, 1]] = -0.6;
2973        let (mut born, born_rho) = apply_structure_move(
2974            &term,
2975            &rho,
2976            &StructureMove::Birth { candidate: 0 },
2977            &[decoder],
2978        )
2979        .expect("birth applies");
2980        assert_eq!(born.k_atoms(), k_seed + 1, "the birth grows K by one");
2981
2982        // Build a reconstruction target the born dictionary fits, then harvest the
2983        // per-atom inner fits at the settled state (this populates the per-atom
2984        // penalized inner Hessian for EVERY atom, born included).
2985        let target = born.try_fitted().expect("born term reconstructs");
2986        let dispersion = 1.0e-2_f64;
2987        born.set_atom_inner_fits(target.view(), &born_rho, dispersion)
2988            .expect("inner fits build");
2989
2990        // The pre-search Schur factor only covered the SEED atoms: emulate the
2991        // production path by starting from a band list that is missing the born
2992        // atom (the no-decoder-covariance fallback over the seed atoms), then
2993        // completing it.
2994        let mut unc = born.shape_uncertainty_without_decoder_covariance(dispersion);
2995        // Truncate to the seed atoms to emulate a Schur factor assembled at the
2996        // seed K (the born atom has NO entry yet).
2997        unc.atoms.truncate(k_seed);
2998        assert_eq!(
2999            unc.atoms.len(),
3000            k_seed,
3001            "seed-K Schur band omits the born atom"
3002        );
3003
3004        born.complete_born_atom_shape_bands(&mut unc)
3005            .expect("born-atom band completes");
3006
3007        // Every post-search atom now has a band slot.
3008        assert_eq!(
3009            unc.atoms.len(),
3010            born.k_atoms(),
3011            "completion must grow the band list to the post-search atom count"
3012        );
3013        let born_band = &unc.atoms[k_seed];
3014        assert!(
3015            born_band.band_sd.nrows() > 0 && born_band.band_sd.ncols() == p,
3016            "the born atom's band must be shaped (G>0, p)"
3017        );
3018        // The crux: the born atom's band is FINITE and non-negative everywhere —
3019        // an honest uncertainty, not a silently-missing (or NaN) band.
3020        let mut any_positive = false;
3021        for &sd in born_band.band_sd.iter() {
3022            assert!(
3023                sd.is_finite() && sd >= 0.0,
3024                "born-atom band sd must be finite and non-negative; got {sd}"
3025            );
3026            if sd > 0.0 {
3027                any_positive = true;
3028            }
3029        }
3030        assert!(
3031            any_positive,
3032            "a born atom with a non-degenerate inner Hessian must report a strictly \
3033             positive uncertainty somewhere (a finite band, never all-zero / missing)"
3034        );
3035    }
3036
3037    /// #1218 PRODUCTION-GATE wiring proof: the corrected PG gate-block
3038    /// normalizer is consumed by the live per-shard likelihood the K-vs-(K+1)
3039    /// birth gate forms its split-LR from — not just by the isolated unit test.
3040    ///
3041    /// `eval_log_lik` is the exact `alternative_log_lik` / `null_sup_log_lik`
3042    /// closure `run_atom_birth_gate` accumulates (see [`run_structure_search_rounds`]),
3043    /// so it is the production gate's evaluation statistic. We score the SAME
3044    /// shard under a K-atom null and a (K+1)-atom candidate and isolate the
3045    /// gate-block contribution: growing the dictionary by one atom adds exactly
3046    /// one gate coordinate, so the `−½·d_g·log(2π)` normalizer (the term #1218
3047    /// fixed the sign of) does NOT cancel in the gate difference. With the
3048    /// corrected (subtracted) sign it is an Occam PENALTY that resists the
3049    /// extra atom; the buggy (added) sign would flip it into a spurious REWARD.
3050    #[test]
3051    fn production_gate_consumes_corrected_pg_normalizer() {
3052        let n = 32usize;
3053        // K=2 null and a K=3 candidate, every atom routed on every row so the
3054        // gate logits are well-defined and finite.
3055        let null_active: Vec<Vec<bool>> = (0..n).map(|_| vec![true, true]).collect();
3056        let cand_active: Vec<Vec<bool>> = (0..n).map(|_| vec![true, true, true]).collect();
3057        let (null_term, _) = planted_term(&null_active);
3058        let (cand_term, _) = planted_term(&cand_active);
3059        assert_eq!(null_term.k_atoms(), 2);
3060        assert_eq!(cand_term.k_atoms(), 3, "candidate grows K by one atom");
3061
3062        // One held-out shard: the row block the gate accumulates evidence over.
3063        let p = null_term.output_dim();
3064        let target = Arc::new(Array2::<f64>::zeros((n, p)));
3065        let shard = RowBlockShard {
3066            target: target.clone(),
3067            rows: (0..n).collect(),
3068        };
3069
3070        // The gate-block contribution alone (private helper the live
3071        // `eval_log_lik` adds in): the corrected normalizer is reachable here.
3072        let null_gate = gate_block_log_evidence(&null_term, &shard);
3073        let cand_gate = gate_block_log_evidence(&cand_term, &shard);
3074        assert!(
3075            null_gate.is_finite() && cand_gate.is_finite(),
3076            "gate-block evidence must be finite on a well-posed gate block"
3077        );
3078
3079        // The Occam normalizer per added gate coordinate. The candidate carries
3080        // K+1 gate coordinates, the null K, so the gate-difference includes one
3081        // extra `−½·log(2π)` normalizer that must NOT cancel.
3082        let log_2pi = (2.0 * std::f64::consts::PI).ln();
3083        let gate_delta = cand_gate - null_gate;
3084
3085        // Corrected sign ⇒ the per-coordinate normalizer SUBTRACTS, so the
3086        // extra atom's gate-block log-evidence is pushed DOWN by ≈ ½·log(2π)
3087        // relative to a no-normalizer baseline. The decisive, sign-sensitive
3088        // assertion: the extra-coordinate normalizer is the *negative*
3089        // ½·log(2π) Occam term, never the positive (buggy) one. Compare against
3090        // the per-atom evidence WITHOUT the normalizer to isolate it.
3091        let per_atom_no_norm = |term: &SaeManifoldTerm| -> f64 {
3092            // Re-derive the gate evidence with the normalizer ADDED back (the
3093            // pre-fix sign) to recover the unnormalized quadratic/logdet part.
3094            // `gate_block_log_evidence` already SUBTRACTS ½·d_g·log(2π); adding
3095            // it back yields the normalizer-free score, and the difference
3096            // between candidate and null of THAT isolates everything except the
3097            // one extra normalizer.
3098            let dg = term.k_atoms() as f64; // one gate coordinate per atom
3099            gate_block_log_evidence(term, &shard) + 0.5 * dg * log_2pi
3100        };
3101        let no_norm_delta = per_atom_no_norm(&cand_term) - per_atom_no_norm(&null_term);
3102        let normalizer_in_delta = gate_delta - no_norm_delta;
3103
3104        // The normalizer contribution to the K→K+1 gate difference must be
3105        // exactly `−½·log(2π)` (one extra gate coordinate, corrected sign).
3106        assert!(
3107            (normalizer_in_delta + 0.5 * log_2pi).abs() < 1e-9,
3108            "the gate-block normalizer in the K→K+1 difference must be the \
3109             corrected −½·log(2π) Occam penalty, got {normalizer_in_delta} \
3110             (buggy +½·log(2π) = {})",
3111            0.5 * log_2pi
3112        );
3113
3114        // And the full production statistic carries it: the gate-block evidence
3115        // is a real, finite addend on top of the reconstruction likelihood.
3116        let full = eval_log_lik(&cand_term, &shard);
3117        let recon_only = {
3118            // Reconstruction-only baseline (what the path returned BEFORE the
3119            // wiring): −½·SSE over the shard rows.
3120            let fitted = cand_term.try_fitted().unwrap();
3121            let mut sse = 0.0;
3122            for &row in &shard.rows {
3123                for out in 0..p {
3124                    let d = fitted[[row, out]] - shard.target[[row, out]];
3125                    sse += d * d;
3126                }
3127            }
3128            -0.5 * sse
3129        };
3130        assert!(
3131            (full - (recon_only + cand_gate)).abs() < 1e-9,
3132            "the live per-shard likelihood must equal reconstruction + the \
3133             PG gate-block evidence (so the corrected normalizer reaches the gate)"
3134        );
3135    }
3136
3137    /// Fission must BREAK the parent/child symmetry. Duplicating an atom
3138    /// identically (same decoder, mass split 50/50) sits at a symmetric saddle of
3139    /// the joint refit — the children's gradients are identical, so a
3140    /// deterministic refit never separates them and the fission is a no-op the
3141    /// e-gate rejects. The anti-symmetric perturbation makes the two children's
3142    /// decoders genuinely differ (so the refit can separate factors) while the
3143    /// equal-mass combined decoder `½(parent+child)` stays EXACTLY the original
3144    /// (warm-start preserved).
3145    #[test]
3146    fn fission_breaks_symmetry_so_children_can_separate() {
3147        let (term, rho) = planted_term(&vec![vec![true]; 8]);
3148        assert_eq!(term.k_atoms(), 1);
3149        let orig = term.atoms[0].decoder_coefficients.clone();
3150
3151        let (child, _child_rho) =
3152            apply_structure_move(&term, &rho, &StructureMove::Fission { atom: 0 }, &[]).unwrap();
3153        assert_eq!(child.k_atoms(), 2, "fission must add one atom");
3154
3155        let d0 = &child.atoms[0].decoder_coefficients;
3156        let d1 = &child.atoms[1].decoder_coefficients;
3157        // (1) Symmetry BROKEN: the children's decoders are not identical (without
3158        // this the refit is stuck at the symmetric saddle and fission is a no-op).
3159        let sep = (d0 - d1).iter().map(|x| x * x).sum::<f64>().sqrt();
3160        let scale = orig.iter().map(|x| x * x).sum::<f64>().sqrt().max(1e-12);
3161        assert!(
3162            sep / scale > 1.0e-3,
3163            "fission children must NOT be identical (symmetric saddle); rel sep = {}",
3164            sep / scale
3165        );
3166        // (2) Warm-start preserved EXACTLY: the equal-mass combined decoder is the
3167        // original (the anti-symmetric ±ε perturbation cancels).
3168        let combined = (d0 + d1).mapv(|x| 0.5 * x);
3169        let warm_err = (&combined - &orig).iter().map(|x| x * x).sum::<f64>().sqrt();
3170        assert!(
3171            warm_err < 1.0e-12,
3172            "mass-split combined decoder must equal the original; err = {warm_err}"
3173        );
3174        // (3) Mass split is EVEN: the parent and child carry equal routing logits
3175        // on every row (each gets half the parent's softmax mass).
3176        for row in 0..child.assignment.logits.nrows() {
3177            assert!(
3178                (child.assignment.logits[[row, 0]] - child.assignment.logits[[row, 1]]).abs()
3179                    < 1e-12,
3180                "fission must split routing mass 50/50 (equal child logits)"
3181            );
3182        }
3183    }
3184
3185    /// Softmax fusion must PRESERVE the combined routing mass. Merging the two
3186    /// constituent logits with `logsumexp` keeps `mass(fused) = mass(a)+mass(b)`;
3187    /// the old `max` under-massed the fused atom (½ vs ⅔ on this 3-atom fixture
3188    /// where atoms 0,1 are co-active and atom 2 competes), leaving the warm-start
3189    /// short and risking a FALSE e-gate rejection of a good fusion under a capped
3190    /// refit. (For IBP routing `max` stays correct — the gate is un-normalized.)
3191    #[test]
3192    fn fusion_preserves_combined_softmax_mass() {
3193        let (term, rho) = planted_term(&vec![vec![true, true, true]; 6]);
3194        let combined: Vec<f64> = (0..6)
3195            .map(|r| {
3196                let a = term.assignment.try_assignments_row(r).unwrap();
3197                a[0] + a[1]
3198            })
3199            .collect();
3200        let (fused, _) =
3201            apply_structure_move(&term, &rho, &StructureMove::Fusion { a: 0, b: 1 }, &[]).unwrap();
3202        for r in 0..6 {
3203            let a = fused.assignment.try_assignments_row(r).unwrap();
3204            assert!(
3205                (a[0] - combined[r]).abs() < 1e-6,
3206                "fused atom must carry the COMBINED softmax mass (logsumexp, not \
3207                 max): got {}, want {} (row {r})",
3208                a[0],
3209                combined[r]
3210            );
3211            // Sanity: plain max would have given ½ here, materially short of ⅔.
3212            assert!(
3213                combined[r] > 0.6,
3214                "fixture must exercise a co-active pair (combined mass {} should be ~⅔)",
3215                combined[r]
3216            );
3217        }
3218    }
3219
3220    #[test]
3221    fn fusion_of_zero_mass_pair_yields_neg_inf_not_nan() {
3222        // Folding two atoms whose softmax logits are BOTH -∞ (zero routing mass on
3223        // a row) must give the mass-preserving combined logit -∞ (combined mass 0),
3224        // NOT NaN. Pre-fix, `logsumexp(-∞,-∞)` evaluated `(-∞)-(-∞)=NaN` and poisoned
3225        // the entire logits row.
3226        let (mut term, rho) = planted_term(&vec![vec![true, true, true]; 6]);
3227        assert!(
3228            matches!(term.assignment.mode, AssignmentMode::Softmax { .. }),
3229            "fixture must be softmax-routed to exercise the logsumexp combine"
3230        );
3231        // Zero out atoms 0 and 1 on row 0 (both -∞), leave the rest finite.
3232        term.assignment.logits[[0, 0]] = f64::NEG_INFINITY;
3233        term.assignment.logits[[0, 1]] = f64::NEG_INFINITY;
3234        let (fused, _) =
3235            apply_structure_move(&term, &rho, &StructureMove::Fusion { a: 0, b: 1 }, &[]).unwrap();
3236        let folded = fused.assignment.logits[[0, 0]];
3237        assert!(
3238            !folded.is_nan(),
3239            "fused zero-mass logit must not be NaN (got {folded})"
3240        );
3241        assert_eq!(
3242            folded,
3243            f64::NEG_INFINITY,
3244            "combined mass of two zero-mass atoms is zero → logit -∞"
3245        );
3246        // The whole row must stay NaN-free so softmax over it is well defined.
3247        for c in 0..fused.assignment.logits.ncols() {
3248            assert!(
3249                !fused.assignment.logits[[0, c]].is_nan(),
3250                "row 0 col {c} must not be NaN after the fold"
3251            );
3252        }
3253    }
3254}