feral 0.11.2

Sparse symmetric indefinite direct solver in pure Rust, with certified inertia counts.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
//! Sparse rank-1 column-replacement update — Forrest–Tomlin / Bartels–Golub–Reid.
//!
//! Replacing basis slot `leaving_slot` (column position `r`) by a new column
//! folds the spike `ρ = G⁻¹ L⁻¹ P aₙₑw` into `U`'s column `r`, then
//! re-triangularizes the resulting *bump* by sparse Gaussian elimination with
//! partial pivoting (partial pivoting is what makes this correct on a sparse
//! `U`: a zero diagonal pivot is replaced by a nonzero sub-diagonal spike entry
//! via a row interchange; the swap goes into the eta so the base `L` is never
//! permuted).
//!
//! The work is **bump-local**: the spike is computed by a Gilbert–Peierls
//! depth-first reach (only the reachable `L`-columns are touched, not all `n`),
//! column `r` is located via the `u_above` index (no full-row scan), and the
//! "unchanged on failure" guarantee is provided by saving/restoring only the
//! changed rows (no `O(nnz)` clone of `U`). Apart from one `O(n)` read of the
//! dense entering column, the cost is `O(bump)`.

use super::sparse_factor::{FtEta, FtOp, SparseLu};
use super::sparse_symbolic::SparseLuSymbolic;
use crate::error::FeralError;
use crate::lu::sparse_matrix::SparseColMatrix;

impl SparseLu {
    /// Number of rank-1 updates applied since the last factor/refactor.
    #[inline]
    pub fn updates_since_refactor(&self) -> usize {
        self.etas.len()
    }

    /// Replace basis slot `leaving_slot` with the dense entering column `aₙₑw`.
    ///
    /// Convenience wrapper over [`SparseLu::update_sparse`]: it scans `aₙₑw`
    /// once (`O(n)`) for its nonzeros, then delegates. Callers that already hold
    /// the sparse entering column should call `update_sparse` directly to avoid
    /// the scan.
    ///
    /// Returns [`FeralError::NeedsRefactor`] (leaving `self` unchanged) when the
    /// update or growth budget is exceeded, or when the bump has no acceptable
    /// pivot (a singular replacement basis). In every failure mode the update
    /// signals NeedsRefactor rather than SingularBasis — matching the dense
    /// update path ([`super::DenseLu::update`]); the authoritative singularity
    /// verdict comes from a fresh factorization, not the incremental update.
    pub fn update(&mut self, leaving_slot: usize, entering_col: &[f64]) -> Result<(), FeralError> {
        if entering_col.len() != self.m {
            return Err(FeralError::DimensionMismatch {
                expected: self.m,
                got: entering_col.len(),
            });
        }
        let sparse: Vec<(usize, f64)> = entering_col
            .iter()
            .enumerate()
            .filter(|&(_, &v)| v != 0.0)
            .map(|(i, &v)| (i, v))
            .collect();
        self.update_sparse(leaving_slot, &sparse)
    }

    /// Replace basis slot `leaving_slot` with the entering column given by its
    /// nonzeros `(row, value)` (rows need not be sorted; duplicates are summed).
    /// Fully bump-local: cost is `O(bump + nnz(aₙₑw))`, with no `O(n)` term.
    ///
    /// Returns [`FeralError::NeedsRefactor`] on any failure, as for
    /// [`SparseLu::update`].
    pub fn update_sparse(
        &mut self,
        leaving_slot: usize,
        entering: &[(usize, f64)],
    ) -> Result<(), FeralError> {
        let m = self.m;
        if leaving_slot >= m {
            return Err(FeralError::InvalidInput(format!(
                "leaving_slot {} out of range for basis dimension {}",
                leaving_slot, m
            )));
        }
        for &(row, _) in entering.iter() {
            if row >= m {
                return Err(FeralError::InvalidInput(format!(
                    "entering-column row {} out of range for dimension {}",
                    row, m
                )));
            }
        }
        if self.updates_since_refactor() + 1 > self.params.max_updates {
            return Err(FeralError::NeedsRefactor);
        }

        // --- Sparse spike ρ = G⁻¹ L⁻¹ P (scaled aₙₑw) via Gilbert–Peierls reach ---
        let mut w = std::mem::take(&mut self.ft_work); // dedicated buffer, zero on entry
        let mut touched: Vec<usize> = Vec::new(); // positions made nonzero in w
        self.compute_spike(entering, leaving_slot, &mut w, &mut touched);

        let r = self.qcol_inv[leaving_slot];
        let mut supp: Vec<usize> = touched.iter().copied().filter(|&k| w[k] != 0.0).collect();
        supp.sort_unstable();
        supp.dedup();
        let h = match supp.last().copied() {
            Some(h) if h >= r => h,
            _ => {
                clear(&mut w, &touched);
                self.ft_work = w;
                // Spike support deficient: the replacement basis is singular as
                // far as the incremental update can tell. Signal NeedsRefactor
                // so the driver refactors from scratch — matching the dense
                // update path (see DenseLu::update), which also returns
                // NeedsRefactor for a vanishing/missing pivot rather than
                // SingularBasis. The authoritative singularity verdict comes
                // from the fresh factorization, not the update.
                return Err(FeralError::NeedsRefactor);
            }
        };

        // --- Rows whose U content will change (all <= h): bump + spike support
        // + old column-r entries (above-diagonal rows from u_above, plus r). ---
        let mut changed: Vec<usize> = (r..=h).collect();
        changed.extend(supp.iter().copied());
        changed.extend(self.u_above[r].iter().copied());
        changed.push(r);
        changed.sort_unstable();
        changed.dedup();

        // Save the changed rows so a mid-elimination failure can roll back (and
        // so the commit can refresh `u_above` from the old content). Snapshot
        // buffers come from the pooled free-list — was a fresh clone per row,
        // the dominant per-update allocation after the bump-loop pools.
        let mut saved = std::mem::take(&mut self.saved_scratch);
        saved.clear();
        for &i in changed.iter() {
            let mut buf = self.saved_pool.pop().unwrap_or_default();
            buf.clear();
            buf.extend_from_slice(&self.u_rows[i]);
            saved.push((i, buf));
        }

        // Overwrite column r of U with the spike ρ (= w).
        self.set_column_r(r, &w, &supp);

        // Re-triangularize the bump [r, h] with partial pivoting.
        let result = self.eliminate_bump(r, h);

        clear(&mut w, &touched);
        self.ft_work = w;

        match result {
            Ok(ops) => {
                // L5 (dev/research/repo-review-2026-06-09.md): monitor element
                // growth, not the largest single multiplier. Every U entry that
                // changed this update lives in a `changed` row, so the high-water
                // `max|U|/u_max0` is maintained by scanning only those rows —
                // O(changed), keeping the update cheap — and it compounds across
                // a chain of updates (a per-multiplier max did not).
                let mut changed_max = 0.0_f64;
                for &i in changed.iter() {
                    for &(_, v) in self.u_rows[i].iter() {
                        changed_max = changed_max.max(v.abs());
                    }
                }
                let growth = self.growth.max(changed_max / self.u_max0);
                if growth > self.params.max_growth {
                    // Over budget: roll back exactly like the elimination-error
                    // path (u_above was not yet modified) and force a refactor.
                    // The saved buffers move back into u_rows; the caller's
                    // NeedsRefactor rebuilds the whole SparseLu, so the shrunk
                    // pool is irrelevant.
                    for (i, row) in saved.drain(..) {
                        self.u_rows[i] = row;
                    }
                    self.saved_scratch = saved;
                    return Err(FeralError::NeedsRefactor);
                }
                // Commit: refresh the u_above index for every changed row.
                for (i, old_row) in saved.iter() {
                    self.unindex_above(*i, old_row);
                    let new_row = std::mem::take(&mut self.u_rows[*i]);
                    self.index_above(*i, &new_row);
                    self.u_rows[*i] = new_row;
                }
                // Recycle the snapshot buffers and the outer vec for the next
                // update (steady state: zero per-update snapshot allocation).
                for (_, buf) in saved.drain(..) {
                    self.saved_pool.push(buf);
                }
                self.saved_scratch = saved;
                self.etas.push(FtEta { ops });
                self.growth = growth;
                Ok(())
            }
            Err(e) => {
                // Roll back the changed rows (u_above was not yet modified).
                for (i, row) in saved.drain(..) {
                    self.u_rows[i] = row;
                }
                self.saved_scratch = saved;
                Err(e)
            }
        }
    }

    /// Discard all pending updates and re-factor from scratch on `a`, reusing
    /// the column ordering in `symbolic`.
    pub fn refactor(
        &mut self,
        a: &SparseColMatrix,
        symbolic: &SparseLuSymbolic,
    ) -> Result<(), FeralError> {
        *self = SparseLu::factor(a, symbolic, self.params.clone())?;
        Ok(())
    }

    /// Compute the spike `ρ = G⁻¹ L⁻¹ P (D_row Π aₙₑw D_col[slot])` into the dense
    /// work vector `w`, recording every touched position in `touched`. Seeds
    /// directly from the sparse entering column (no `O(n)` scan), then uses a
    /// Gilbert–Peierls depth-first reach so only the reachable `L`-columns are
    /// visited, and finally replays the FT etas forward.
    fn compute_spike(
        &mut self,
        entering: &[(usize, f64)],
        leaving_slot: usize,
        w: &mut [f64],
        touched: &mut Vec<usize>,
    ) {
        let dcol = self.scale.d_col[leaving_slot];
        let mut mark = std::mem::take(&mut self.scratch_mark);
        let mut stack: Vec<usize> = Vec::new();

        // Scatter the scaled entering column into w (pivot-position space) and
        // seed the reach. An original-row entry `o` scales to scaled row
        // `i = rperm_inv[o]` (factor `d_row[i]·dcol`) and lands at pivot position
        // `perm_inv[i]`. Duplicates accumulate (`+=`).
        for &(o, val) in entering.iter() {
            let i = self.scale_rperm_inv[o];
            let v = self.scale.d_row[i] * val * dcol;
            if v == 0.0 {
                continue;
            }
            let k = self.perm_inv[i];
            w[k] += v;
            if !mark[k] {
                mark[k] = true;
                touched.push(k);
                stack.push(k);
            }
        }

        // Depth-first reach over the graph of L (column k -> its rows).
        let mut reach: Vec<usize> = touched.clone();
        while let Some(k) = stack.pop() {
            let (lo, hi) = (self.l_col_ptr[k], self.l_col_ptr[k + 1]);
            for idx in lo..hi {
                let i = self.l_row_idx[idx];
                if !mark[i] {
                    mark[i] = true;
                    touched.push(i);
                    reach.push(i);
                    stack.push(i);
                }
            }
        }
        reach.sort_unstable(); // ascending = valid topological order for L

        // Sparse forward solve L y = w over the reach.
        for &k in reach.iter() {
            let yk = w[k];
            if yk == 0.0 {
                continue;
            }
            let (lo, hi) = (self.l_col_ptr[k], self.l_col_ptr[k + 1]);
            for idx in lo..hi {
                w[self.l_row_idx[idx]] -= self.l_val[idx] * yk;
            }
        }

        // Replay the FT etas forward (G⁻¹), tracking newly touched positions.
        for eta in self.etas.iter() {
            for op in eta.ops.iter() {
                match *op {
                    FtOp::Swap(a, b) => {
                        w.swap(a, b);
                        for x in [a, b] {
                            if w[x] != 0.0 && !mark[x] {
                                mark[x] = true;
                                touched.push(x);
                            }
                        }
                    }
                    FtOp::Axpy { target, src, mult } => {
                        w[target] -= mult * w[src];
                        if w[target] != 0.0 && !mark[target] {
                            mark[target] = true;
                            touched.push(target);
                        }
                    }
                }
            }
        }

        // Clear the marker for the touched set and hand the buffer back.
        for &k in touched.iter() {
            mark[k] = false;
        }
        self.scratch_mark = mark;
    }

    /// Overwrite column `r` of `U` with the spike (`w` at positions `supp`),
    /// removing any old column-`r` entries. Old above-diagonal rows are located
    /// via `u_above[r]` (read only — `u_above` is refreshed wholesale in the
    /// commit phase via unindex/reindex of the changed rows).
    fn set_column_r(&mut self, r: usize, w: &[f64], supp: &[usize]) {
        let old_above = self.u_above[r].clone();
        for &i in old_above.iter() {
            remove_col(&mut self.u_rows[i], r);
        }
        remove_col(&mut self.u_rows[r], r); // old diagonal
        for &i in supp.iter() {
            insert_or_set(&mut self.u_rows[i], r, w[i]);
        }
    }

    /// Eliminate the bump `[r, h]` of `U` with partial pivoting, returning the
    /// recorded eta ops and the updated growth monitor. Operates in place on
    /// `self.u_rows`; the caller is responsible for rollback on `Err`.
    fn eliminate_bump(&mut self, r: usize, h: usize) -> Result<Vec<FtOp>, FeralError> {
        // L6 (dev/research/repo-review-2026-06-09.md): the bump-pivot zero test
        // is relative to the basis magnitude, not an absolute 1e-13. `u_max0`
        // (max|U| at the last factor, floored away from zero for L5) is the scale
        // reference, consistent with the dense update and the factor paths.
        let ztol = self.params.zero_pivot_tol * self.u_max0;
        let mut ops: Vec<FtOp> = Vec::new();

        // Churn-buffer pools, taken into locals for the duration of the bump
        // (restored on every return path). See
        // `dev/research/lu-update-alloc-pooling-2026-06-19.md`. Reusing these
        // removes the per-pivot / per-axpy allocator traffic; contents are
        // cleared and rebuilt here, never read across updates.
        let mut col_rows = std::mem::take(&mut self.col_rows_pool);
        let mut pivot_scratch = std::mem::take(&mut self.pivot_scratch);
        let mut targets = std::mem::take(&mut self.targets_scratch);
        let mut row_pool = std::mem::take(&mut self.row_pool);

        // Sub-diagonal pivot index (Step 1, dev/plans/bump-elim-step1-subdiagonal-index.md):
        // `col_rows[c - r]` is the sorted list of bump rows `i ∈ [r, h]` that
        // currently hold a column-`c` entry. It mirrors the actual nonzero
        // pattern of columns `[r, h]` over the bump rows, so the pivot search
        // and the elimination iterate only real entries instead of probing
        // every row in `k..=h` (the old O(bump²) scan). Rows are pushed in
        // increasing `i`, so each list stays sorted ascending — preserving the
        // lowest-index tie-break of the original scan exactly.
        let width = h - r + 1;
        while col_rows.len() < width {
            col_rows.push(Vec::new());
        }
        for c in col_rows.iter_mut().take(width) {
            c.clear();
        }
        for i in r..=h {
            for &(c, _) in self.u_rows[i].iter() {
                if c < r {
                    continue;
                }
                if c > h {
                    break;
                }
                col_rows[c - r].push(i);
            }
        }

        for k in r..=h {
            let kc = k - r;
            // Strict partial pivoting among rows [k, h] with a column-k entry.
            // Unlike the initial factorization (which honors `pivot_threshold`
            // to trade stability for fill, L2), the bump's structure is already
            // fixed here, so a relaxed threshold buys no fill reduction and only
            // trades away stability — we always take the max-magnitude row.
            // Candidates come from the index; `i < k` entries are upper-triangle
            // (not pivot candidates) and are skipped, matching `i in k..=h`.
            let mut pivot_row = k;
            let mut pivot_abs = 0.0_f64;
            for &i in col_rows[kc].iter() {
                if i < k {
                    continue;
                }
                if let Some(v) = get_col(&self.u_rows[i], k) {
                    if v.abs() > pivot_abs {
                        pivot_abs = v.abs();
                        pivot_row = i;
                    }
                }
            }
            if pivot_abs <= ztol {
                // Bump pivot vanished: the incremental update cannot continue.
                // Signal NeedsRefactor (not SingularBasis) so the driver gets
                // the same contract as the dense update path — update failures
                // always mean "refactor from scratch."
                self.restore_bump_pools(col_rows, pivot_scratch, targets, row_pool);
                return Err(FeralError::NeedsRefactor);
            }
            if pivot_row != k {
                self.u_rows.swap(k, pivot_row);
                ops.push(FtOp::Swap(k, pivot_row));
                // Relabel the index: the two rows' contents swapped, so for every
                // column either now touches, fix membership of `k` and `pivot_row`.
                swap_membership(&mut col_rows, r, h, k, pivot_row, &self.u_rows);
            }
            // Copy the pivot row into the pooled scratch (was a fresh clone): it
            // is read as `dst − mult·pivot` below while other rows are written,
            // so it must be a buffer separate from `u_rows`. `extend_from_slice`
            // reuses the scratch's capacity — same bytes, no allocation.
            pivot_scratch.clear();
            pivot_scratch.extend_from_slice(&self.u_rows[k]);
            let pivot = get_col(&pivot_scratch, k).unwrap_or(0.0);

            // Targets: rows below the pivot (i > k) with a column-k entry, in
            // ascending order — identical to the old `i in k+1..=h` order. A
            // pooled snapshot: the inner loop mutates `col_rows[kc]` via
            // `reindex_after_rowsub`, so it cannot iterate the live list.
            targets.clear();
            targets.extend(col_rows[kc].iter().copied().filter(|&i| i > k));
            for &i in &targets {
                if let Some(vik) = get_col(&self.u_rows[i], k) {
                    let mult = vik / pivot;
                    let old_row = std::mem::take(&mut self.u_rows[i]);
                    // Build the new row into a recycled buffer; hand the old
                    // row's buffer back to the pool for the next axpy.
                    let mut new_row = row_pool.pop().unwrap_or_default();
                    row_sub_into(&old_row, &pivot_scratch, mult, k, &mut new_row);
                    reindex_after_rowsub(&mut col_rows, r, h, i, &old_row, &new_row);
                    self.u_rows[i] = new_row;
                    row_pool.push(old_row);
                    ops.push(FtOp::Axpy {
                        target: i,
                        src: k,
                        mult,
                    });
                }
            }
        }
        self.restore_bump_pools(col_rows, pivot_scratch, targets, row_pool);
        Ok(ops)
    }

    /// Return the bump-loop churn buffers to their `SparseLu` pools. Called on
    /// every `eliminate_bump` exit so the next update finds them warm.
    fn restore_bump_pools(
        &mut self,
        col_rows: Vec<Vec<usize>>,
        pivot_scratch: Vec<(usize, f64)>,
        targets: Vec<usize>,
        row_pool: Vec<Vec<(usize, f64)>>,
    ) {
        self.col_rows_pool = col_rows;
        self.pivot_scratch = pivot_scratch;
        self.targets_scratch = targets;
        self.row_pool = row_pool;
    }

    /// Remove row `i`'s strict-upper entries from the `u_above` column index
    /// (using its pre-update content `old_row`).
    fn unindex_above(&mut self, i: usize, old_row: &[(usize, f64)]) {
        for &(c, _) in old_row.iter() {
            if c > i {
                if let Ok(pos) = self.u_above[c].binary_search(&i) {
                    self.u_above[c].remove(pos);
                }
            }
        }
    }

    /// Add row `i`'s strict-upper entries to the `u_above` column index.
    fn index_above(&mut self, i: usize, new_row: &[(usize, f64)]) {
        for &(c, _) in new_row.iter() {
            if c > i {
                if let Err(pos) = self.u_above[c].binary_search(&i) {
                    self.u_above[c].insert(pos, i);
                }
            }
        }
    }
}

fn clear(w: &mut [f64], touched: &[usize]) {
    for &k in touched.iter() {
        w[k] = 0.0;
    }
}

/// Sorted insert/remove of row `i` in a column's bump-row list (`eliminate_bump`
/// sub-diagonal index). Keeps the list sorted ascending and duplicate-free.
fn set_member(list: &mut Vec<usize>, i: usize, present: bool) {
    match list.binary_search(&i) {
        Ok(pos) => {
            if !present {
                list.remove(pos);
            }
        }
        Err(pos) => {
            if present {
                list.insert(pos, i);
            }
        }
    }
}

/// After swapping the contents of rows `k` and `p`, fix the sub-diagonal index:
/// for every column in `[r, h]` that either row now touches, set membership of
/// `k` and `p` from their (post-swap) content. The union of the two rows'
/// columns is unchanged by the swap, so every column whose membership could
/// change is visited; processing a shared column twice is idempotent.
fn swap_membership(
    col_rows: &mut [Vec<usize>],
    r: usize,
    h: usize,
    k: usize,
    p: usize,
    u_rows: &[Vec<(usize, f64)>],
) {
    for &src in &[k, p] {
        for &(c, _) in u_rows[src].iter() {
            if c < r {
                continue;
            }
            if c > h {
                break;
            }
            let cc = c - r;
            set_member(&mut col_rows[cc], k, get_col(&u_rows[k], c).is_some());
            set_member(&mut col_rows[cc], p, get_col(&u_rows[p], c).is_some());
        }
    }
}

/// After `row i` is rewritten by `row_sub`, update the sub-diagonal index over
/// `[r, h]`: drop `i` from columns it lost (including the eliminated pivot
/// column) and add `i` to columns it gained (fill).
fn reindex_after_rowsub(
    col_rows: &mut [Vec<usize>],
    r: usize,
    h: usize,
    i: usize,
    old_row: &[(usize, f64)],
    new_row: &[(usize, f64)],
) {
    for &(c, _) in old_row {
        if c < r {
            continue;
        }
        if c > h {
            break;
        }
        if get_col(new_row, c).is_none() {
            set_member(&mut col_rows[c - r], i, false);
        }
    }
    for &(c, _) in new_row {
        if c < r {
            continue;
        }
        if c > h {
            break;
        }
        set_member(&mut col_rows[c - r], i, true);
    }
}

/// Look up the value at column `c` in a column-sorted sparse row.
fn get_col(row: &[(usize, f64)], c: usize) -> Option<f64> {
    row.binary_search_by_key(&c, |&(col, _)| col)
        .ok()
        .map(|pos| row[pos].1)
}

/// Remove column `c` from a column-sorted sparse row, if present.
fn remove_col(row: &mut Vec<(usize, f64)>, c: usize) {
    if let Ok(pos) = row.binary_search_by_key(&c, |&(col, _)| col) {
        row.remove(pos);
    }
}

/// Set column `c` of a column-sorted sparse row to `v` (insert/replace; remove
/// if `v == 0`).
fn insert_or_set(row: &mut Vec<(usize, f64)>, c: usize, v: f64) {
    match row.binary_search_by_key(&c, |&(col, _)| col) {
        Ok(pos) => {
            if v != 0.0 {
                row[pos].1 = v;
            } else {
                row.remove(pos);
            }
        }
        Err(pos) => {
            if v != 0.0 {
                row.insert(pos, (c, v));
            }
        }
    }
}

/// `dst − mult·src` over two column-sorted sparse rows, dropping the eliminated
/// column `drop_col` and any exact zeros, written into `out` (cleared first).
/// Result stays column-sorted. `out` is a recycled buffer from `row_pool`, so
/// the merge reuses its capacity — identical contents to the old allocating
/// `row_sub`, no per-axpy allocation once the pool is warm.
fn row_sub_into(
    dst: &[(usize, f64)],
    src: &[(usize, f64)],
    mult: f64,
    drop_col: usize,
    out: &mut Vec<(usize, f64)>,
) {
    out.clear();
    let (mut i, mut j) = (0usize, 0usize);
    while i < dst.len() && j < src.len() {
        let (dc, dv) = dst[i];
        let (sc, sv) = src[j];
        if dc < sc {
            if dc != drop_col {
                out.push((dc, dv));
            }
            i += 1;
        } else if dc > sc {
            let v = -mult * sv;
            if sc != drop_col && v != 0.0 {
                out.push((sc, v));
            }
            j += 1;
        } else {
            let v = dv - mult * sv;
            if dc != drop_col && v != 0.0 {
                out.push((dc, v));
            }
            i += 1;
            j += 1;
        }
    }
    while i < dst.len() {
        let (dc, dv) = dst[i];
        if dc != drop_col {
            out.push((dc, dv));
        }
        i += 1;
    }
    while j < src.len() {
        let (sc, sv) = src[j];
        let v = -mult * sv;
        if sc != drop_col && v != 0.0 {
            out.push((sc, v));
        }
        j += 1;
    }
}

#[cfg(test)]
mod tests {
    use crate::lu::sparse_factor::SparseLu;
    use crate::lu::LuParams;

    /// L5 (dev/research/repo-review-2026-06-09.md): the sparse growth monitor
    /// recorded only the largest single elimination multiplier, so compounded
    /// element growth across a chain of updates went unmonitored. After the fix
    /// `growth` is the ‖U‖∞ high-water ratio (max|U| over update history ÷
    /// max|U| at factor), which compounds. Oracle is the independent
    /// recomputation of that ratio from the committed `U` (via `u_dense`), not
    /// the monitor's own bookkeeping; pre-fix `growth` is the max single
    /// multiplier and does not match.
    #[test]
    fn growth_monitor_tracks_compounded_element_growth() {
        let cols = vec![
            vec![4.0, 1.0, 0.0, 0.0],
            vec![1.0, 3.0, 1.0, 0.0],
            vec![0.0, 1.0, 2.0, 1.0],
            vec![0.0, 0.0, 1.0, 5.0],
        ];
        let m = 4;
        let params = LuParams {
            max_updates: 20,
            max_growth: 1e12,
            ..LuParams::default()
        };
        let mut lu = SparseLu::factor_dense_columns(m, &cols, params).expect("factor");

        let umax = |lu: &SparseLu| {
            let mut mx = 0.0_f64;
            for i in 0..m {
                for j in 0..m {
                    mx = mx.max(lu.u_dense(i, j).abs());
                }
            }
            mx
        };
        let u_max0 = umax(&lu);
        let mut hw = 1.0_f64;

        // Replace the last slot each time: no bump forms (the spike lands in the
        // last column), every update commits, and max|U| in that column grows.
        let updates = [
            (3usize, vec![0.0, 0.0, 1.0, 20.0]),
            (3usize, vec![0.0, 0.0, 1.0, 60.0]),
            (3usize, vec![0.0, 0.0, 1.0, 180.0]),
        ];
        for (i, (slot, col)) in updates.iter().enumerate() {
            lu.update(*slot, col)
                .unwrap_or_else(|e| panic!("update {i} should commit: {e:?}"));
            hw = hw.max(umax(&lu) / u_max0);
            assert!(
                (lu.growth - hw).abs() <= 1e-9 * hw,
                "growth monitor {} must equal element-growth high-water {}",
                lu.growth,
                hw
            );
        }
        assert!(hw > 1.0, "test must exercise genuine element growth");
    }
}