gam 0.3.117 - Docs.rs

//! Per-point sparse atom codes for multi-manifold reconstruction.
//!
//! This module owns the storage of per-observation soft assignments over a
//! library of `K` candidate manifold-atoms (see [`crate::terms::atom_selection`]
//! for the surrounding selection layer). The two key types are:
//!
//! * [`BitVec`] — a minimal dependency-free bitset used to record the *active
//!   support* `S_n ⊆ {0, …, K−1}` of each observation. We avoid pulling in
//!   the external `bitvec` crate to keep this module aligned with the rest of
//!   `gam`'s "no extra deps for new primitives" policy.
//! * [`SparseAtomCode`] — the per-point pair `(active_mask, weights)` whose
//!   semantics are documented on the type. Reconstruction at point `n` is
//!
//!   ```text
//!   Ẑ_n  =  Σ_{k ∈ S_n}  w_{n,k}  ·  decoder_k(t_{n,k})
//!   ```
//!
//!   so `weights[k]` is meaningful only when `active_mask.get(k) == true`.
//!   We store `weights` densely (`Vec<f64>` of length `K`) rather than
//!   sparsely; for the typical SAE workload `K` is small (tens to low
//!   hundreds), and the dense layout lets us reuse [`ndarray`] views and
//!   simple BLAS-shaped loops downstream. The mask carries the discrete
//!   active-set information; the weights carry the soft amplitudes.
//!
//! ## Per-point block locality (arrow structure)
//!
//! Each [`SparseAtomCode`] is the per-row ext-coordinate block for observation `n`
//! restricted to the `K` atoms. Combined with the per-atom on-manifold
//! coordinate `t_{n,k} ∈ ℝ^{d_k}` (held in
//! [`crate::terms::atom_selection::AtomLibrary`]'s per-atom
//! `LatentCoordValues`), the row-local ext-coordinate vector is
//!
//! ```text
//!   ext_n  =  ( a_{n,1..K}  ;  t_{n,1,·}  ;  …  ;  t_{n,K,·} )
//! ```
//!
//! whose interaction graph with the shared decoder coefficients `B_1..B_K`
//! is exactly the arrow / bordered-Hessian pattern from `latent_coord.md`
//! §2.2. The Schur complement that Piece 1 uses to eliminate β before the
//! per-row solve generalises here with one change: the row-`n` block now
//! couples to *only the active subset* `S_n` of decoder borders, not to all
//! K of them. That is the structural fact this module records.

use ndarray::Array1;

/// Minimal bit-vector. Backing storage is `Vec<u64>` words.
///
/// We expose only the operations the atom-selection layer needs: construction,
/// `get`, `set`, `count_ones`, and iteration of set indices. This is
/// deliberately tiny — adding the external `bitvec` crate would be overkill
/// for a few hundred bits per observation.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BitVec {
    words: Vec<u64>,
    len: usize,
}

impl BitVec {
    /// All-zero bitset of length `len`.
    pub fn zeros(len: usize) -> Self {
        let words = vec![0u64; len.div_ceil(64)];
        Self { words, len }
    }

    /// All-ones bitset of length `len`.
    pub fn ones(len: usize) -> Self {
        let mut bv = Self::zeros(len);
        for i in 0..len {
            bv.set(i, true);
        }
        bv
    }

    pub fn len(&self) -> usize {
        self.len
    }

    pub fn is_empty(&self) -> bool {
        self.len == 0
    }

    #[inline]
    pub fn get(&self, i: usize) -> bool {
        assert!(
            i < self.len,
            "BitVec::get index {i} out of bounds {}",
            self.len
        );
        let (w, b) = (i / 64, i % 64);
        (self.words[w] >> b) & 1 == 1
    }

    #[inline]
    pub fn set(&mut self, i: usize, v: bool) {
        assert!(
            i < self.len,
            "BitVec::set index {i} out of bounds {}",
            self.len
        );
        let (w, b) = (i / 64, i % 64);
        if v {
            self.words[w] |= 1u64 << b;
        } else {
            self.words[w] &= !(1u64 << b);
        }
    }

    /// Number of set bits.
    pub fn count_ones(&self) -> usize {
        self.words.iter().map(|w| w.count_ones() as usize).sum()
    }

    /// Iterator over set indices in ascending order.
    pub fn iter_ones(&self) -> impl Iterator<Item = usize> + '_ {
        (0..self.len).filter(move |&i| self.get(i))
    }

    /// Zero all bits in place.
    pub fn clear(&mut self) {
        for w in self.words.iter_mut() {
            *w = 0;
        }
    }
}

/// Per-point sparse code over `K` candidate atoms.
///
/// Invariants (checked in debug builds):
///
/// * `active_mask.len() == weights.len() == K`.
/// * For any `k` with `active_mask.get(k) == false`, the value `weights[k]`
///   is a nuisance — it must not influence reconstruction. Selection
///   strategies that lower a weight to zero (e.g. [`crate::terms::atom_selection::AtomSelectionStrategy`]'s
///   `L1Relaxed` after thresholding) are responsible for clearing the
///   corresponding mask bit *and* zeroing `weights[k]`.
///
/// We do not require `weights[k] >= 0`; some strategies (entropic softmax,
/// TopK projection) keep the simplex, while others (L¹-relaxed) only enforce
/// non-negativity at the active-set step. The owning
/// [`crate::terms::atom_selection::AtomSelectionStrategy`] documents which
/// invariant it maintains.
#[derive(Debug, Clone)]
pub struct SparseAtomCode {
    /// Length-`K` bitmask of active atoms for this point.
    pub active_mask: BitVec,
    /// Length-`K` dense weight vector. Only entries at active indices are
    /// semantically meaningful.
    pub weights: Vec<f64>,
}

impl SparseAtomCode {
    /// Cold-start: no atoms active, all weights zero.
    pub fn empty(k_atoms: usize) -> Self {
        Self {
            active_mask: BitVec::zeros(k_atoms),
            weights: vec![0.0; k_atoms],
        }
    }

    /// Total number of candidate atoms `K` this code is sized for.
    pub fn k_atoms(&self) -> usize {
        self.weights.len()
    }

    /// Cardinality of the active support `|S_n|`.
    pub fn n_active(&self) -> usize {
        self.active_mask.count_ones()
    }

    /// Sum of active weights. For simplex-projected codes this should be ≈ 1.
    pub fn active_weight_sum(&self) -> f64 {
        self.active_mask.iter_ones().map(|k| self.weights[k]).sum()
    }

    /// Set the weight for atom `k` and mark it active.
    pub fn assign(&mut self, k: usize, w: f64) {
        assert!(k < self.k_atoms());
        self.active_mask.set(k, true);
        self.weights[k] = w;
    }

    /// Deactivate atom `k` and zero its stored weight.
    pub fn deactivate(&mut self, k: usize) {
        assert!(k < self.k_atoms());
        self.active_mask.set(k, false);
        self.weights[k] = 0.0;
    }

    /// Materialize the *effective* weight vector (zeros at inactive indices)
    /// as an owned `Array1`. Useful for matmul-shaped downstream code.
    pub fn effective_weights(&self) -> Array1<f64> {
        let mut out = Array1::<f64>::zeros(self.k_atoms());
        for k in self.active_mask.iter_ones() {
            out[k] = self.weights[k];
        }
        out
    }
}

/// Storage for the per-row codes of all `N` observations.
///
/// Held column-of-structs rather than struct-of-columns: each row's
/// `(active_mask, weights)` lives together because the atom-selection
/// strategies all touch a single row at a time. Cross-row vectorization
/// happens through ndarray views built on demand.
#[derive(Debug, Clone)]
pub struct SparseAtomCodes {
    codes: Vec<SparseAtomCode>,
    k_atoms: usize,
}

impl SparseAtomCodes {
    /// Allocate `n_obs` empty codes, each sized for `k_atoms`.
    pub fn empty(n_obs: usize, k_atoms: usize) -> Self {
        let codes = (0..n_obs).map(|_| SparseAtomCode::empty(k_atoms)).collect();
        Self { codes, k_atoms }
    }

    pub fn n_obs(&self) -> usize {
        self.codes.len()
    }

    pub fn k_atoms(&self) -> usize {
        self.k_atoms
    }

    pub fn row(&self, n: usize) -> &SparseAtomCode {
        &self.codes[n]
    }

    pub fn row_mut(&mut self, n: usize) -> &mut SparseAtomCode {
        &mut self.codes[n]
    }

    pub fn iter(&self) -> impl Iterator<Item = &SparseAtomCode> {
        self.codes.iter()
    }

    pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut SparseAtomCode> {
        self.codes.iter_mut()
    }

    /// Flatten weights into a single `(N, K)` array, with zeros where the
    /// mask is unset. Allocates; intended for diagnostic / post-fit use.
    pub fn weights_matrix(&self) -> ndarray::Array2<f64> {
        let n = self.n_obs();
        let k = self.k_atoms();
        let mut out = ndarray::Array2::<f64>::zeros((n, k));
        for n_idx in 0..n {
            let code = &self.codes[n_idx];
            for kk in code.active_mask.iter_ones() {
                out[[n_idx, kk]] = code.weights[kk];
            }
        }
        out
    }

    /// Co-activation statistics for one atom pair `(a, b)` — the #976
    /// code-dependence trigger. Pure popcount ratios over the active masks:
    /// `P(a|b) = #{rows: a∧b} / #{rows: b}` and symmetrically.
    ///
    /// Two derived readings drive the structure search:
    ///
    /// * [`CoactivationStats::dependence`] (symmetric, the FUSION trigger) —
    ///   independent atoms with marginal activation rates `π_a, π_b` co-activate
    ///   at rate `π_a·π_b`, so both conditionals stay near the marginals; a
    ///   shattered curved family re-encoded as several near-duplicate atoms
    ///   pushes *both* conditionals toward 1.
    /// * [`CoactivationStats::absorption_asymmetry`] (the ABSORPTION-audit
    ///   trigger) — an A⇒B hierarchy where sparsity folded B's content into A
    ///   shows `P(parent|child) ≈ 1` without the converse, so a large asymmetry
    ///   with one conditional near 1 flags the pair for the within-atom
    ///   substructure audit (#907 race on the atom's own code distribution).
    ///
    /// These are *triggers*, not decisions: they rank move proposals
    /// deterministically; acceptance is owned by the e-process gates in
    /// [`crate::solver::structure_search`].
    pub fn coactivation(&self, a: usize, b: usize) -> CoactivationStats {
        assert!(
            a < self.k_atoms && b < self.k_atoms,
            "SparseAtomCodes::coactivation: atoms ({a}, {b}) out of range K={}",
            self.k_atoms
        );
        let n_obs = self.n_obs();
        let mut n_a = 0usize;
        let mut n_b = 0usize;
        let mut n_joint = 0usize;
        for code in &self.codes {
            let on_a = code.active_mask.get(a);
            let on_b = code.active_mask.get(b);
            n_a += usize::from(on_a);
            n_b += usize::from(on_b);
            n_joint += usize::from(on_a && on_b);
        }
        let cond = |joint: usize, marg: usize| {
            if marg == 0 {
                0.0
            } else {
                joint as f64 / marg as f64
            }
        };
        let lift = if n_a == 0 || n_b == 0 || n_obs == 0 {
            0.0
        } else {
            (n_joint as f64 * n_obs as f64) / (n_a as f64 * n_b as f64)
        };
        CoactivationStats {
            n_obs,
            n_a,
            n_b,
            n_joint,
            p_a_given_b: cond(n_joint, n_b),
            p_b_given_a: cond(n_joint, n_a),
            lift,
        }
    }
}

/// Pairwise co-activation summary for two atoms (see
/// [`SparseAtomCodes::coactivation`]). All probabilities are empirical
/// popcount ratios over the active-support masks.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct CoactivationStats {
    /// Total number of observations the codes cover.
    pub n_obs: usize,
    /// Rows where atom `a` is active.
    pub n_a: usize,
    /// Rows where atom `b` is active.
    pub n_b: usize,
    /// Rows where both are active.
    pub n_joint: usize,
    /// `P(a active | b active)`; `0` when `b` is never active.
    pub p_a_given_b: f64,
    /// `P(b active | a active)`; `0` when `a` is never active.
    pub p_b_given_a: f64,
    /// `P(a∧b) / (P(a)·P(b))`; `1` for independent atoms, `0` when either
    /// marginal is empty.
    pub lift: f64,
}

impl CoactivationStats {
    /// Symmetric code dependence `min(P(a|b), P(b|a))` — the canonical-order
    /// trigger for FUSION proposals (descending). Near 0 for independent or
    /// disjoint atoms; near 1 only when the two supports essentially coincide,
    /// which is the shattering signature.
    pub fn dependence(&self) -> f64 {
        self.p_a_given_b.min(self.p_b_given_a)
    }

    /// Conditional asymmetry `|P(a|b) − P(b|a)|` — large when one atom's
    /// support nests inside the other's (the A⇒B absorption signature, where
    /// `P(parent|child) ≈ 1` but not conversely). Flags the pair for a
    /// targeted within-atom substructure audit; it is never itself an
    /// acceptance criterion.
    pub fn absorption_asymmetry(&self) -> f64 {
        (self.p_a_given_b - self.p_b_given_a).abs()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn bitvec_basic() {
        let mut bv = BitVec::zeros(70);
        assert_eq!(bv.len(), 70);
        assert!(!bv.get(5));
        bv.set(5, true);
        bv.set(64, true);
        assert!(bv.get(5));
        assert!(bv.get(64));
        assert_eq!(bv.count_ones(), 2);
        let ones: Vec<usize> = bv.iter_ones().collect();
        assert_eq!(ones, vec![5, 64]);
        bv.set(5, false);
        assert_eq!(bv.count_ones(), 1);
    }

    #[test]
    fn sparse_code_assign() {
        let mut c = SparseAtomCode::empty(8);
        c.assign(2, 0.7);
        c.assign(5, 0.3);
        assert_eq!(c.n_active(), 2);
        assert!((c.active_weight_sum() - 1.0).abs() < 1e-12);
        c.deactivate(2);
        assert_eq!(c.n_active(), 1);
        assert_eq!(c.weights[2], 0.0);
    }

    #[test]
    fn codes_matrix_roundtrip() {
        let mut codes = SparseAtomCodes::empty(3, 4);
        codes.row_mut(0).assign(1, 0.5);
        codes.row_mut(2).assign(3, 0.9);
        let m = codes.weights_matrix();
        assert_eq!(m[[0, 1]], 0.5);
        assert_eq!(m[[2, 3]], 0.9);
        assert_eq!(m[[1, 0]], 0.0);
    }

    /// Co-activation triggers separate the three planted regimes: independent
    /// atoms (low dependence), a shattered duplicate pair (dependence ≈ 1,
    /// symmetric), and an absorption hierarchy (high asymmetry, parent
    /// conditional ≈ 1).
    #[test]
    fn coactivation_separates_independent_shattered_and_absorbed() {
        let n = 100usize;
        let mut codes = SparseAtomCodes::empty(n, 4);
        for row in 0..n {
            // Atom 0: active on even rows; atom 1: active on rows ≡ 0 (mod 5)
            // — independent-ish supports (joint = rows ≡ 0 mod 10).
            if row % 2 == 0 {
                codes.row_mut(row).assign(0, 1.0);
            }
            if row % 5 == 0 {
                codes.row_mut(row).assign(1, 1.0);
            }
            // Atoms 2 and 3: a nested pair — 3 (child) active on rows ≡ 0
            // (mod 4), 2 (parent) active whenever 3 is plus half of the rest.
            if row % 4 == 0 || row % 2 == 1 {
                codes.row_mut(row).assign(2, 1.0);
            }
            if row % 4 == 0 {
                codes.row_mut(row).assign(3, 1.0);
            }
        }

        // Independent pair: P(0|1) = 0.5 (even rows among multiples of 5),
        // P(1|0) = 10/50 = 0.2 → low symmetric dependence, lift = 1.
        let indep = codes.coactivation(0, 1);
        assert_eq!(indep.n_joint, 10);
        assert!((indep.p_a_given_b - 0.5).abs() < 1e-12);
        assert!((indep.p_b_given_a - 0.2).abs() < 1e-12);
        assert!((indep.lift - 1.0).abs() < 1e-12);
        assert!(indep.dependence() < 0.25);

        // Nested (absorption-suspect) pair: P(parent|child) = 1, converse
        // small → near-maximal asymmetry.
        let nested = codes.coactivation(2, 3);
        assert!((nested.p_a_given_b - 1.0).abs() < 1e-12);
        assert!(nested.p_b_given_a < 0.5);
        assert!(nested.absorption_asymmetry() > 0.6);

        // Shattered pair: identical supports → dependence = 1, asymmetry = 0.
        let mut dup = SparseAtomCodes::empty(n, 2);
        for row in (0..n).step_by(3) {
            dup.row_mut(row).assign(0, 1.0);
            dup.row_mut(row).assign(1, 1.0);
        }
        let shat = dup.coactivation(0, 1);
        assert!((shat.dependence() - 1.0).abs() < 1e-12);
        assert!(shat.absorption_asymmetry() < 1e-12);

        // Empty marginals are total, not NaN.
        let empty = SparseAtomCodes::empty(4, 2).coactivation(0, 1);
        assert_eq!(empty.dependence(), 0.0);
        assert_eq!(empty.lift, 0.0);
    }
}