feral 0.6.0 - Docs.rs

//! Knight-Ruiz ∞-norm iterative equilibration for sparse symmetric
//! matrices.
//!
//! Given a symmetric CSC matrix `A`, compute a diagonal `d` such that
//! each row of `D·A·D` has infinity-norm ≈ 1, where `D = diag(d)`.
//! This is the same algorithm used by the dense path in
//! `src/dense/equilibrate.rs`, adapted to iterate over lower-triangular
//! CSC storage.
//!
//! Phase 2.2.3 follow-up: the dense BK factorization succeeds on
//! HYDCAR20 / METHANL8 / SWOPF / HATFLDG because it equilibrates the
//! matrix before BK. The sparse multifrontal path was missing this
//! step — MC64 matching happens to classify these matrices as already
//! balanced even when their row norms span 4+ orders of magnitude.
//! Porting the dense path's equilibration recovers these matrices for
//! the sparse path.
//!
//! Algorithm (Jacobi-style, converges in the same number of iterations
//! as the Gauss-Seidel variant used by `dense::equilibrate` while being
//! simpler to implement over CSC lower-triangle storage):
//!
//! 1. Initialize `d = 1`.
//! 2. Repeat up to `max_iter` times:
//!    a. For each row `i`, compute `max_i = max_j |d[i]·a[i,j]·d[j]|`.
//!    b. Update `d[i] /= sqrt(max_i)` for every row whose `max_i > 0`.
//!    c. Stop when `max_i |1 − max_i|` falls below `tol`.

use crate::dense::matrix::SymmetricMatrix;
use crate::scaling::ScalingInfo;
use crate::sparse::csc::CscMatrix;

/// Compute the Knight-Ruiz ∞-norm symmetric scaling vector for a
/// lower-triangular symmetric CSC matrix. Returns the diagonal `d`
/// such that `D·A·D` has unit-∞ rows, paired with `ScalingInfo::Applied`.
pub fn compute_infnorm(matrix: &CscMatrix) -> (Vec<f64>, ScalingInfo) {
    let n = matrix.n;
    if n == 0 {
        return (Vec::new(), ScalingInfo::Applied);
    }
    let mut d = vec![1.0f64; n];

    // 10 iterations is the same cap the dense path uses. Most matrices
    // converge in 2–4 iterations; a few pathological ones need all 10.
    let max_iter = 10;
    let tol = 1e-8;

    // Work buffer for the row ∞-norms.
    let mut row_max = vec![0.0f64; n];

    for _ in 0..max_iter {
        // Reset the row-max buffer.
        for r in row_max.iter_mut() {
            *r = 0.0;
        }

        // Accumulate row maxes by scanning the lower triangle once.
        // Each (i, j) entry with i >= j contributes to both row i and
        // (by symmetry) row j, unless i == j.
        for j in 0..n {
            for k in matrix.col_ptr[j]..matrix.col_ptr[j + 1] {
                let i = matrix.row_idx[k];
                let v = (d[i] * matrix.values[k] * d[j]).abs();
                if v > row_max[i] {
                    row_max[i] = v;
                }
                if i != j && v > row_max[j] {
                    row_max[j] = v;
                }
            }
        }

        // Update diagonal and check convergence.
        let mut max_dev = 0.0f64;
        for i in 0..n {
            let m = row_max[i];
            if m > 0.0 {
                d[i] /= m.sqrt();
                let dev = (m - 1.0).abs();
                if dev > max_dev {
                    max_dev = dev;
                }
            }
            // Rows with all-zero entries keep d[i] at the current value
            // (initially 1.0) — they are structurally zero and the
            // downstream numeric phase will reject them as singular
            // pivots.
        }

        if max_dev < tol {
            break;
        }
    }

    (d, ScalingInfo::Applied)
}

/// Knight-Ruiz ∞-norm scaling computed on a `SymmetricMatrix`
/// (column-major dense storage with the lower triangle authoritative).
///
/// Produces a bit-exact scaling vector with [`compute_infnorm`] for
/// any matrix whose sparse CSC and dense column-major lower-triangle
/// store the same set of nonzeros (i.e. zeros at "missing" positions
/// in the sparse storage). All such fast-path-gate matrices satisfy
/// this, since `CscMatrix::to_dense_into` writes only the stored
/// entries and leaves the rest as zero — and the inner KR step
/// `max(v, row_max[i])` is a no-op for `v == 0.0`.
///
/// The win over [`compute_infnorm`] on small-dense matrices comes
/// from removing the `row_idx[k]` indirection — the dense loop walks
/// contiguous columns with the compiler able to keep `d[j]` in a
/// register and stride `data[col + i]` linearly. On TRO3X3_0013
/// (n=69, density 0.73) this halves the scaling phase from ~34 µs
/// to ~17 µs — see
/// `dev/results/lever-d3/stage1-stage2-2026-04-19.md` §1 for the
/// pre-change breakdown.
///
/// Intended for the D.3/D.4 dense fast-path; see
/// [`crate::scaling::compute_scaling_dense_fast`].
pub fn compute_infnorm_dense(sym: &SymmetricMatrix) -> (Vec<f64>, ScalingInfo) {
    let n = sym.n;
    if n == 0 {
        return (Vec::new(), ScalingInfo::Applied);
    }
    let mut d = vec![1.0f64; n];

    // Same cap and tolerance as the sparse path.
    let max_iter = 10;
    let tol = 1e-8;

    let mut row_max = vec![0.0f64; n];

    for _ in 0..max_iter {
        for r in row_max.iter_mut() {
            *r = 0.0;
        }

        // Walk the lower triangle column-by-column — same traversal
        // order the sparse CSC loop uses, so the floating-point max
        // reduction sees entries in the same sequence. Entries above
        // the lower-triangle gate are zero (set by `to_dense_into`)
        // and contribute nothing to `row_max`.
        for j in 0..n {
            let col = j * n;
            let dj = d[j];
            for i in j..n {
                let v = (d[i] * sym.data[col + i] * dj).abs();
                if v > row_max[i] {
                    row_max[i] = v;
                }
                if i != j && v > row_max[j] {
                    row_max[j] = v;
                }
            }
        }

        let mut max_dev = 0.0f64;
        for i in 0..n {
            let m = row_max[i];
            if m > 0.0 {
                d[i] /= m.sqrt();
                let dev = (m - 1.0).abs();
                if dev > max_dev {
                    max_dev = dev;
                }
            }
        }

        if max_dev < tol {
            break;
        }
    }

    (d, ScalingInfo::Applied)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::sparse::csc::CscMatrix;

    /// Diagonal matrix diag(2, 3, 5). The oracle scaling is
    /// d = [1/sqrt(2), 1/sqrt(3), 1/sqrt(5)], so that
    /// D·A·D = diag(1, 1, 1).
    #[test]
    fn diag_3x3() {
        let m = CscMatrix::from_triplets(3, &[0, 1, 2], &[0, 1, 2], &[2.0, 3.0, 5.0]).unwrap();
        let (d, _info) = compute_infnorm(&m);
        let expected = [1.0 / 2f64.sqrt(), 1.0 / 3f64.sqrt(), 1.0 / 5f64.sqrt()];
        for i in 0..3 {
            assert!(
                (d[i] - expected[i]).abs() < 1e-12,
                "d[{}] = {} != {}",
                i,
                d[i],
                expected[i]
            );
        }
    }

    /// 2x2 matrix [[4, 2], [2, 9]]. Row max [i=0]: max(|4|, |2|) = 4;
    /// row max [i=1]: max(|2|, |9|) = 9. After one KR sweep:
    /// d = [1/2, 1/3]. Check D·A·D row norms converge to 1.
    #[test]
    fn sym_2x2() {
        let m = CscMatrix::from_triplets(2, &[0, 1, 1], &[0, 0, 1], &[4.0, 2.0, 9.0]).unwrap();
        let (d, _) = compute_infnorm(&m);
        // D·A·D:
        //   [d0*d0*4, d0*d1*2]
        //   [d0*d1*2, d1*d1*9]
        let a00 = d[0] * d[0] * 4.0;
        let a01 = d[0] * d[1] * 2.0;
        let a11 = d[1] * d[1] * 9.0;
        let row0 = a00.abs().max(a01.abs());
        let row1 = a01.abs().max(a11.abs());
        assert!((row0 - 1.0).abs() < 1e-6, "row0 max = {}", row0);
        assert!((row1 - 1.0).abs() < 1e-6, "row1 max = {}", row1);
    }

    /// Dense KR must produce a bit-equal scaling vector to sparse KR
    /// on any matrix whose dense column-major lower triangle contains
    /// exactly the entries that the sparse CSC stores. The `arrow_6x6`
    /// pattern hits both off-diag and degree-1 rows.
    #[test]
    fn dense_matches_sparse_on_arrow_6x6() {
        let mut rows = Vec::new();
        let mut cols = Vec::new();
        let mut vals = Vec::new();
        for j in 0..6 {
            rows.push(j);
            cols.push(j);
            vals.push((j + 2) as f64);
        }
        for j in 0..5 {
            rows.push(5);
            cols.push(j);
            vals.push(1.0);
        }
        let m = CscMatrix::from_triplets(6, &rows, &cols, &vals).unwrap();
        let sym = m.to_dense();
        let (d_sparse, _) = compute_infnorm(&m);
        let (d_dense, _) = compute_infnorm_dense(&sym);
        assert_eq!(d_sparse.len(), d_dense.len());
        for i in 0..d_sparse.len() {
            assert_eq!(
                d_sparse[i].to_bits(),
                d_dense[i].to_bits(),
                "dense-vs-sparse KR parity broke at i={}: sparse={} dense={}",
                i,
                d_sparse[i],
                d_dense[i],
            );
        }
    }

    /// Bit-exact parity on a fully-dense small matrix — the dense
    /// fast-path's target regime.
    #[test]
    fn dense_matches_sparse_on_dense_5x5() {
        // Lower-triangular dense block of a 5×5 symmetric matrix.
        let mut rows = Vec::new();
        let mut cols = Vec::new();
        let mut vals = Vec::new();
        for j in 0..5 {
            for i in j..5 {
                rows.push(i);
                cols.push(j);
                // Diagonally-dominant non-trivial pattern.
                vals.push(if i == j {
                    10.0 * (i as f64 + 1.0)
                } else {
                    1.0 + 0.1 * (i - j) as f64
                });
            }
        }
        let m = CscMatrix::from_triplets(5, &rows, &cols, &vals).unwrap();
        let sym = m.to_dense();
        let (d_sparse, _) = compute_infnorm(&m);
        let (d_dense, _) = compute_infnorm_dense(&sym);
        for i in 0..5 {
            assert_eq!(
                d_sparse[i].to_bits(),
                d_dense[i].to_bits(),
                "dense KR diverged at i={}: sparse={} dense={}",
                i,
                d_sparse[i],
                d_dense[i],
            );
        }
    }

    /// Arrow matrix: diagonal [2, 3, 4, 5, 6, 7] with (5, 0..=4) = 1.
    /// Row 5 has 5 off-diagonal entries plus the diagonal 7; its
    /// initial ∞-norm is max(1, 1, 1, 1, 1, 7) = 7. The first KR
    /// sweep should shrink d[5] by sqrt(7).
    #[test]
    fn arrow_6x6() {
        let mut rows = Vec::new();
        let mut cols = Vec::new();
        let mut vals = Vec::new();
        for j in 0..6 {
            rows.push(j);
            cols.push(j);
            vals.push((j + 2) as f64);
        }
        for j in 0..5 {
            rows.push(5);
            cols.push(j);
            vals.push(1.0);
        }
        let m = CscMatrix::from_triplets(6, &rows, &cols, &vals).unwrap();
        let (d, _) = compute_infnorm(&m);
        // After KR convergence, every row's max-magnitude entry in
        // D·A·D should be ≈ 1.
        for i in 0..6 {
            let mut row_max = 0.0f64;
            for j in 0..6 {
                // Look up a[i, j] from lower triangle
                let (ii, jj) = if i >= j { (i, j) } else { (j, i) };
                let mut v = 0.0;
                for k in m.col_ptr[jj]..m.col_ptr[jj + 1] {
                    if m.row_idx[k] == ii {
                        v = m.values[k];
                        break;
                    }
                }
                let scaled = (d[i] * v * d[j]).abs();
                if scaled > row_max {
                    row_max = scaled;
                }
            }
            assert!(
                (row_max - 1.0).abs() < 1e-6,
                "row {} max = {}, expected 1",
                i,
                row_max
            );
        }
    }
}