Skip to main content

numra_ode/
bdf.rs

1//! BDF / NDF: variable-order, variable-step backward differentiation formulas.
2//!
3//! This is a Rust port of SciPy's `scipy.integrate.BDF` (the canonical
4//! implementation of Shampine–Reichelt's NDF method from MATLAB's `ode15s`).
5//! The previous revision of this file was algorithmically incorrect — see
6//! "Why a rewrite" below.
7//!
8//! ## Mathematical formulation
9//!
10//! The k-th order BDF method approximates
11//!   M · ∑ⱼ αⱼ · y_{n+1-j}  =  h · β · f(t_{n+1}, y_{n+1})
12//! where M is an optional (possibly singular) mass matrix. For ODEs M = I.
13//!
14//! Rather than storing past `y` values and applying fixed-coefficient BDF
15//! formulas (which is *not* order-k accurate when the step size varies — this
16//! was the previous file's fundamental error), we work with **modified
17//! divided differences** D, defined by:
18//!   D⁰yₙ = yₙ
19//!   Dʲyₙ = Dʲ⁻¹yₙ − Dʲ⁻¹y_{n-1}    (j ≥ 1)
20//!
21//! The differences are stored in an array `D[0..k+2]` whose rows are scaled
22//! so that the BDF formula holds *as if* the step size were constant. When h
23//! changes, the array is rescaled in-place via a small transformation matrix
24//! (`change_d`); this preserves order accuracy. This is the
25//! "fixed-leading-coefficient" / "quasi-constant step size" trick from
26//! Shampine & Reichelt (1997).
27//!
28//! ## NDF coefficients (Klopfenstein–Shampine)
29//!
30//! For each order k we store a `KAPPA[k]` "boost" parameter; setting κ = 0
31//! recovers pure BDF, while small negative κ gives the NDF method, which
32//! reduces the error constant by ~25–30% at the cost of slight stability
33//! tightening. Defaults follow Shampine–Reichelt.
34//!
35//! ## DAE support (index-1)
36//!
37//! For singular M, the iteration matrix becomes M − c·J (instead of I − c·J),
38//! and the Newton residual carries M·ψ and M·d terms. Algebraic constraint
39//! rows (zero rows of M) reduce to f_alg(t, y) = 0, enforced at every step.
40//!
41//! ## Why a rewrite
42//!
43//! The previous bdf.rs had three independent algorithmic bugs that
44//! manifested catastrophically (silent wrong answers, runaway step counts):
45//!
46//! 1. **Newton convergence test on the residual norm** — when h·f was small
47//!    in absolute terms, Newton declared convergence at iteration 0 with
48//!    `y_new = predictor = y_old`, leaving the solution unchanged while time
49//!    advanced. This is the source of the reported "y_final = y_initial at
50//!    success=true" bug.
51//! 2. **Error estimator identically ≈ 0** — the formula referenced
52//!    `history[0]`, which was always equal to the current `y` (just pushed),
53//!    so the estimate's leading term cancelled.
54//! 3. **Fixed BDF coefficients applied to variable-h history** — without the
55//!    modified-divided-differences rescaling, the method silently loses
56//!    order whenever h changes. The textbook coefficients [1, -4/3, 1/3,
57//!    2/3·h] only define BDF2 when y_n, y_{n-1} are spaced equally.
58//!
59//! ## References
60//! - L. F. Shampine and M. W. Reichelt, "The MATLAB ODE Suite", SIAM J. Sci.
61//!   Comput. 18(1), 1–22 (1997). [NDF coefficients, fixed-leading-coefficient
62//!   BDF, error estimator.]
63//! - G. D. Byrne and A. C. Hindmarsh, "A Polyalgorithm for the Numerical
64//!   Solution of ODEs", ACM TOMS 1(1), 71–96 (1975).
65//! - SciPy `scipy/integrate/_ivp/bdf.py` (BSD-3 / Apache-2.0). Reference
66//!   implementation that this file ports.
67//!
68//! Author: Moussa Leblouba
69//! Date: 5 March 2026
70//! Modified: 6 May 2026 (full rewrite based on SciPy's NDF/MDD algorithm)
71
72use faer::{ComplexField, Conjugate, SimpleEntity};
73use numra_core::Scalar;
74use numra_linalg::{DenseMatrix, LUFactorization, Matrix};
75
76use crate::error::SolverError;
77use crate::problem::OdeSystem;
78use crate::solver::{Solver, SolverOptions, SolverResult, SolverStats};
79use crate::t_eval::{validate_grid, TEvalEmitter};
80
81/// BDF / NDF solver for stiff ODEs and index-1 DAEs (orders 1-5).
82#[derive(Clone, Debug, Default)]
83pub struct Bdf {
84    max_order: usize,
85    min_order: usize,
86}
87
88impl Bdf {
89    pub fn new() -> Self {
90        Self {
91            max_order: MAX_ORDER,
92            min_order: 1,
93        }
94    }
95
96    /// Cap the maximum order. Useful to keep the method L-stable
97    /// (max_order ≤ 2) for problems that need strict L-stability.
98    pub fn with_max_order(max_order: usize) -> Self {
99        Self {
100            max_order: max_order.clamp(1, MAX_ORDER),
101            min_order: 1,
102        }
103    }
104
105    /// Pin the order. Note: BDF *always* starts at order 1 (single-step info
106    /// available at startup); `fixed_order` only acts as a soft floor and a
107    /// hard ceiling for adaptive selection.
108    pub fn fixed_order(order: usize) -> Self {
109        let order = order.clamp(1, MAX_ORDER);
110        Self {
111            max_order: order,
112            min_order: order,
113        }
114    }
115}
116
117// ---- Algorithm constants ---------------------------------------------------
118
119const MAX_ORDER: usize = 5;
120const NEWTON_MAXITER: usize = 4;
121const MIN_FACTOR: f64 = 0.2;
122const MAX_FACTOR: f64 = 10.0;
123
124/// Klopfenstein–Shampine NDF "boost" parameter for orders 0..=5.
125/// Setting all to zero recovers pure BDF.
126const KAPPA: [f64; MAX_ORDER + 1] = [0.0, -0.1850, -1.0 / 9.0, -0.0823, -0.0415, 0.0];
127
128/// γ_k = ∑_{i=1}^{k} 1/i, with γ_0 = 0.
129const GAMMA: [f64; MAX_ORDER + 1] = [
130    0.0,
131    1.0,
132    1.0 + 1.0 / 2.0,
133    1.0 + 1.0 / 2.0 + 1.0 / 3.0,
134    1.0 + 1.0 / 2.0 + 1.0 / 3.0 + 1.0 / 4.0,
135    1.0 + 1.0 / 2.0 + 1.0 / 3.0 + 1.0 / 4.0 + 1.0 / 5.0,
136];
137
138/// α_k = (1 − κ_k) γ_k. The leading coefficient in the BDF/NDF
139/// fixed-leading-coefficient form.
140const ALPHA: [f64; MAX_ORDER + 1] = [
141    (1.0 - KAPPA[0]) * GAMMA[0],
142    (1.0 - KAPPA[1]) * GAMMA[1],
143    (1.0 - KAPPA[2]) * GAMMA[2],
144    (1.0 - KAPPA[3]) * GAMMA[3],
145    (1.0 - KAPPA[4]) * GAMMA[4],
146    (1.0 - KAPPA[5]) * GAMMA[5],
147];
148
149/// Error constant C_{k+1} for order k: C = κ_k γ_k + 1/(k+1). Scales the
150/// next-order divided difference into a local truncation error estimate.
151const ERROR_CONST: [f64; MAX_ORDER + 1] = [
152    KAPPA[0] * GAMMA[0] + 1.0,
153    KAPPA[1] * GAMMA[1] + 1.0 / 2.0,
154    KAPPA[2] * GAMMA[2] + 1.0 / 3.0,
155    KAPPA[3] * GAMMA[3] + 1.0 / 4.0,
156    KAPPA[4] * GAMMA[4] + 1.0 / 5.0,
157    KAPPA[5] * GAMMA[5] + 1.0 / 6.0,
158];
159
160const RU_SIZE: usize = (MAX_ORDER + 1) * (MAX_ORDER + 1); // = 36
161
162// ---- Solver trait impl -----------------------------------------------------
163
164impl<S: Scalar + SimpleEntity + Conjugate<Canonical = S> + ComplexField> Solver<S> for Bdf {
165    fn solve<Sys: OdeSystem<S>>(
166        problem: &Sys,
167        t0: S,
168        tf: S,
169        y0: &[S],
170        options: &SolverOptions<S>,
171    ) -> Result<SolverResult<S>, SolverError> {
172        let solver = Bdf::new();
173        solver.solve_internal(problem, t0, tf, y0, options)
174    }
175}
176
177// ---- compute_R / change_D --------------------------------------------------
178//
179// `compute_r(order, factor)` builds the (order+1)×(order+1) transformation
180// matrix R such that, when D is the modified-divided-differences array for
181// step h, the rescaled differences for step (factor·h) are obtained by
182//   D' = (R · U)^T · D
183// where U = compute_r(order, 1). For numerical efficiency we use stack
184// arrays of size (MAX_ORDER+1)² = 36 and only access the leading
185// (order+1)×(order+1) submatrix.
186//
187// Reference: SciPy's `_ivp/bdf.py::compute_R`, derived from Shampine &
188// Reichelt §3.
189
190/// Compute R(order, factor) into the leading (order+1)×(order+1) block of `out`.
191/// Layout: out[r * (MAX_ORDER+1) + c] = R[r, c].
192fn compute_r<S: Scalar>(order: usize, factor: S, out: &mut [S; RU_SIZE]) {
193    for slot in out.iter_mut() {
194        *slot = S::ZERO;
195    }
196    let stride = MAX_ORDER + 1;
197
198    // M[0, :] = 1
199    for c in 0..=order {
200        out[c] = S::ONE;
201    }
202    // M[r, c] = (r - 1 - factor * c) / r  for r, c in 1..=order
203    for r in 1..=order {
204        for c in 1..=order {
205            let num = S::from_usize(r - 1) - factor * S::from_usize(c);
206            out[r * stride + c] = num / S::from_usize(r);
207        }
208    }
209    // Cumulative product down each column.
210    for r in 1..=order {
211        for c in 0..=order {
212            let prev = out[(r - 1) * stride + c];
213            out[r * stride + c] = out[r * stride + c] * prev;
214        }
215    }
216}
217
218/// In-place: replace D[0..=order] with (R·U)^T · D[0..=order],
219/// where R = compute_r(order, factor) and U = compute_r(order, 1).
220fn change_d<S: Scalar>(d: &mut [Vec<S>], order: usize, factor: S) {
221    // factor = 1 is a no-op; bail early.
222    if (factor - S::ONE).abs() < S::from_f64(1e-15) {
223        return;
224    }
225
226    let mut r_mat = [S::ZERO; RU_SIZE];
227    let mut u_mat = [S::ZERO; RU_SIZE];
228    compute_r(order, factor, &mut r_mat);
229    compute_r(order, S::ONE, &mut u_mat);
230
231    let stride = MAX_ORDER + 1;
232
233    // RU = R · U  (only the (order+1)×(order+1) leading block matters).
234    let mut ru = [S::ZERO; RU_SIZE];
235    for i in 0..=order {
236        for j in 0..=order {
237            let mut s = S::ZERO;
238            for k in 0..=order {
239                s = s + r_mat[i * stride + k] * u_mat[k * stride + j];
240            }
241            ru[i * stride + j] = s;
242        }
243    }
244
245    // Apply (RU)^T to the rows of D:  D'_i = ∑_k RU[k, i] · D_k.
246    let dim = d[0].len();
247    let mut new_rows: Vec<Vec<S>> = (0..=order).map(|_| vec![S::ZERO; dim]).collect();
248    for i in 0..=order {
249        for k in 0..=order {
250            let coef = ru[k * stride + i];
251            if coef == S::ZERO {
252                continue;
253            }
254            for col in 0..dim {
255                new_rows[i][col] = new_rows[i][col] + coef * d[k][col];
256            }
257        }
258    }
259    for (i, row) in new_rows.into_iter().enumerate() {
260        d[i] = row;
261    }
262}
263
264// ---- Newton solver ---------------------------------------------------------
265
266struct NewtonOutcome<S: Scalar> {
267    converged: bool,
268    n_iter: usize,
269    y_new: Vec<S>,
270    /// d = y_new − y_predict. Equals the (order+1)-th modified divided
271    /// difference at the new step, and is the raw input to the local
272    /// truncation error estimate.
273    d: Vec<S>,
274}
275
276/// Bookkeeping for an accepted step, captured inside the inner retry loop
277/// and consumed by the commit phase.
278struct AcceptedStep<S: Scalar> {
279    y_new: Vec<S>,
280    d_corr: Vec<S>,
281    err_norm: S,
282    h_abs_used: S,
283    safety: S,
284}
285
286/// Solve the BDF/NDF algebraic system via simplified Newton iteration.
287///
288/// At convergence:  M·d = c·f(t_new, y_predict + d) − M·ψ.
289/// Iteration uses the pre-factored linear operator (M − c·J).
290///
291/// Convergence test (Hairer–Wanner ODE II §IV.8, also SciPy):
292///   converged ⟺ rate / (1 − rate) · ‖dy‖ < tol
293/// with `rate` = ratio of successive correction norms. This is a *correction*
294/// norm test, not a residual norm test — using residual norms (as the
295/// previous file did) leads to false convergence at iteration 0 when the
296/// predictor is near the new point in absolute terms but Newton hasn't
297/// actually moved.
298#[allow(clippy::too_many_arguments)]
299fn solve_bdf_system<S, Sys>(
300    problem: &Sys,
301    t_new: S,
302    y_predict: &[S],
303    c: S,
304    psi: &[S],
305    lu: &LUFactorization<S>,
306    scale: &[S],
307    tol: S,
308    mass: Option<&[S]>,
309    stats: &mut SolverStats,
310    dim: usize,
311) -> Result<NewtonOutcome<S>, SolverError>
312where
313    S: Scalar + SimpleEntity + Conjugate<Canonical = S> + ComplexField,
314    Sys: OdeSystem<S>,
315{
316    let mut d = vec![S::ZERO; dim];
317    let mut y = y_predict.to_vec();
318    let mut f_buf = vec![S::ZERO; dim];
319    let mut rhs_buf = vec![S::ZERO; dim];
320
321    // Pre-compute M·ψ once (constant within the Newton loop).
322    let mut m_psi = vec![S::ZERO; dim];
323    if let Some(m) = mass {
324        for i in 0..dim {
325            let mut s = S::ZERO;
326            for j in 0..dim {
327                s = s + m[i * dim + j] * psi[j];
328            }
329            m_psi[i] = s;
330        }
331    } else {
332        m_psi.copy_from_slice(psi);
333    }
334
335    let mut dy_norm_old: Option<S> = None;
336    let mut converged = false;
337    let mut n_iter_done = 0usize;
338
339    for iter in 0..NEWTON_MAXITER {
340        n_iter_done = iter + 1;
341
342        problem.rhs(t_new, &y, &mut f_buf);
343        stats.n_eval += 1;
344
345        // Bail on NaN/Inf in f.
346        if f_buf.iter().any(|v| !v.to_f64().is_finite()) {
347            break;
348        }
349
350        // RHS of Newton update:  c·f − M·(ψ + d).
351        // For ODEs (M = I), this simplifies to  c·f − ψ − d.
352        if let Some(m) = mass {
353            for i in 0..dim {
354                let mut md = S::ZERO;
355                for j in 0..dim {
356                    md = md + m[i * dim + j] * d[j];
357                }
358                rhs_buf[i] = c * f_buf[i] - m_psi[i] - md;
359            }
360        } else {
361            for i in 0..dim {
362                rhs_buf[i] = c * f_buf[i] - psi[i] - d[i];
363            }
364        }
365
366        let dy = lu.solve(&rhs_buf)?;
367
368        // Weighted-RMS norm of the correction.
369        let mut dy_norm_sq = S::ZERO;
370        for i in 0..dim {
371            let r = dy[i] / scale[i];
372            dy_norm_sq = dy_norm_sq + r * r;
373        }
374        let dy_norm = (dy_norm_sq / S::from_usize(dim)).sqrt();
375
376        let rate = dy_norm_old.and_then(|old| {
377            if old > S::ZERO {
378                Some(dy_norm / old)
379            } else {
380                None
381            }
382        });
383
384        // Early divergence: rate ≥ 1, or projected error after the remaining
385        // iterations would still exceed tol.
386        if let Some(r) = rate {
387            if r >= S::ONE {
388                break;
389            }
390            let remaining = NEWTON_MAXITER - iter;
391            let r_pow = r.powf(S::from_usize(remaining));
392            if r_pow / (S::ONE - r) * dy_norm > tol {
393                break;
394            }
395        }
396
397        // Apply correction.
398        for i in 0..dim {
399            y[i] = y[i] + dy[i];
400            d[i] = d[i] + dy[i];
401        }
402
403        // Convergence test on the *correction*.
404        if dy_norm == S::ZERO {
405            converged = true;
406            break;
407        }
408        if let Some(r) = rate {
409            if r / (S::ONE - r) * dy_norm < tol {
410                converged = true;
411                break;
412            }
413        }
414
415        dy_norm_old = Some(dy_norm);
416    }
417
418    Ok(NewtonOutcome {
419        converged,
420        n_iter: n_iter_done,
421        y_new: y,
422        d,
423    })
424}
425
426// ---- Main solve loop -------------------------------------------------------
427
428impl Bdf {
429    fn solve_internal<S, Sys>(
430        &self,
431        problem: &Sys,
432        t0: S,
433        tf: S,
434        y0: &[S],
435        options: &SolverOptions<S>,
436    ) -> Result<SolverResult<S>, SolverError>
437    where
438        S: Scalar + SimpleEntity + Conjugate<Canonical = S> + ComplexField,
439        Sys: OdeSystem<S>,
440    {
441        let dim = problem.dim();
442        if y0.len() != dim {
443            return Err(SolverError::DimensionMismatch {
444                expected: dim,
445                actual: y0.len(),
446            });
447        }
448
449        let mut stats = SolverStats::default();
450
451        let mut t = t0;
452        let direction = if tf > t0 { S::ONE } else { -S::ONE };
453
454        if let Some(grid) = options.t_eval.as_deref() {
455            validate_grid(grid, t0, tf)?;
456        }
457        let mut grid_emitter = options
458            .t_eval
459            .as_deref()
460            .map(|g| TEvalEmitter::new(g, direction));
461        let (mut t_out, mut y_out) = if grid_emitter.is_some() {
462            (Vec::new(), Vec::new())
463        } else {
464            (vec![t0], y0.to_vec())
465        };
466        // Slopes at the previous accepted t (start of step) and the new t,
467        // needed by the Hermite interpolant in t_eval mode. BDF doesn't keep
468        // f(t, y) hot between steps, so we do explicit RHS evaluations only
469        // when a grid is supplied.
470        let mut dy_old_buf = vec![S::ZERO; dim];
471        let mut dy_new_buf = vec![S::ZERO; dim];
472        if grid_emitter.is_some() {
473            problem.rhs(t0, y0, &mut dy_old_buf);
474            stats.n_eval += 1;
475        }
476
477        // ---- Initial RHS, step size, and divided differences ----
478        let mut f_eval = vec![S::ZERO; dim];
479        problem.rhs(t, y0, &mut f_eval);
480        stats.n_eval += 1;
481
482        let mut h_abs = self.initial_step_size(y0, &f_eval, options, dim);
483
484        // D[0] = y_0;  D[1] = h_0 · f_0 · direction;  D[2..] = 0.
485        let mut d_arr: Vec<Vec<S>> = (0..MAX_ORDER + 3).map(|_| vec![S::ZERO; dim]).collect();
486        d_arr[0].copy_from_slice(y0);
487        for i in 0..dim {
488            d_arr[1][i] = h_abs * f_eval[i] * direction;
489        }
490
491        // Mass matrix.
492        let mass_data = if problem.has_mass_matrix() {
493            let mut m = vec![S::ZERO; dim * dim];
494            problem.mass_matrix(&mut m);
495            Some(m)
496        } else {
497            None
498        };
499        let mass_ref = mass_data.as_deref();
500
501        // Jacobian + LU. Delegates to OdeSystem::jacobian, which lets a
502        // system override with an analytical Jacobian (e.g. MOLSystem*) and
503        // otherwise falls through to the canonical FD default in
504        // problem.rs. Costs one extra rhs evaluation per Jacobian rebuild
505        // vs the previous inlined path because the trait default
506        // recomputes f0 internally; that overhead is dominated by the LU
507        // on every problem we care about.
508        let mut jac = vec![S::ZERO; dim * dim];
509        problem.jacobian(t, y0, &mut jac);
510        stats.n_jac += 1;
511        let mut current_jac = true;
512        let mut lu: Option<LUFactorization<S>> = None;
513        let mut last_c: Option<S> = None;
514
515        // Order management.
516        let mut order = 1usize;
517        let mut n_equal_steps: usize = 0;
518
519        // Newton tolerance (Hairer formula).
520        let uround = S::from_f64(2.220446049250313e-16);
521        let newton_tol = (S::from_f64(10.0) * uround / options.rtol)
522            .max(S::from_f64(0.03).min(options.rtol.sqrt()));
523
524        let h_min = options.h_min;
525        let h_max = options.h_max.min((tf - t0).abs());
526
527        let mut step_count = 0usize;
528
529        while (tf - t) * direction > S::from_f64(1e-12) * (tf - t0).abs() {
530            if step_count >= options.max_steps {
531                return Err(SolverError::MaxIterationsExceeded { t: t.to_f64() });
532            }
533
534            // --- Step-size cap pre-checks (SciPy: top of _step_impl) ---
535            let ulp_min = S::from_f64(10.0) * uround * (t.abs().max(tf.abs())).max(S::ONE);
536            let min_step = h_min.max(ulp_min);
537
538            if h_abs > h_max {
539                let factor = h_max / h_abs;
540                change_d(&mut d_arr, order, factor);
541                n_equal_steps = 0;
542                h_abs = h_max;
543                lu = None;
544                last_c = None;
545            } else if h_abs < min_step {
546                let factor = min_step / h_abs;
547                change_d(&mut d_arr, order, factor);
548                n_equal_steps = 0;
549                h_abs = min_step;
550                lu = None;
551                last_c = None;
552            }
553
554            // --- Inner loop: try, retry, possibly reduce h ---
555            let mut step_accepted = false;
556            // (y_new, d_correction, error_norm, h_abs_used, safety_factor)
557            let mut accepted: Option<AcceptedStep<S>> = None;
558
559            while !step_accepted {
560                if h_abs < min_step {
561                    return Err(SolverError::StepSizeTooSmall {
562                        t: t.to_f64(),
563                        h: h_abs.to_f64(),
564                        h_min: h_min.to_f64(),
565                    });
566                }
567
568                let h = h_abs * direction;
569                let mut t_new = t + h;
570                let mut h_used = h;
571                let mut h_abs_used = h_abs;
572
573                // Clip to t_bound (rescale D to match the shorter step).
574                if (t_new - tf) * direction > S::ZERO {
575                    t_new = tf;
576                    h_used = t_new - t;
577                    h_abs_used = h_used.abs();
578                    let factor = h_abs_used / h_abs;
579                    change_d(&mut d_arr, order, factor);
580                    n_equal_steps = 0;
581                    h_abs = h_abs_used;
582                    lu = None;
583                    last_c = None;
584                }
585
586                // Predictor:  y_predict = ∑_{k=0..order} D[k]
587                let mut y_predict = vec![S::ZERO; dim];
588                for k in 0..=order {
589                    for i in 0..dim {
590                        y_predict[i] = y_predict[i] + d_arr[k][i];
591                    }
592                }
593
594                // ψ = (∑_{j=1..order} γ_j · D[j]) / α_order
595                let alpha_o = S::from_f64(ALPHA[order]);
596                let mut psi = vec![S::ZERO; dim];
597                for j in 1..=order {
598                    let g = S::from_f64(GAMMA[j]);
599                    for i in 0..dim {
600                        psi[i] = psi[i] + g * d_arr[j][i];
601                    }
602                }
603                for i in 0..dim {
604                    psi[i] = psi[i] / alpha_o;
605                }
606
607                let c = h_used / alpha_o;
608
609                // Pre-Newton scale based on |y_predict|.
610                let mut scale = vec![S::ZERO; dim];
611                for i in 0..dim {
612                    scale[i] =
613                        (options.atol + options.rtol * y_predict[i].abs()).max(S::from_f64(1e-300));
614                }
615
616                // --- Newton with retry-on-stale-Jacobian ---
617                let outcome;
618                loop {
619                    if lu.is_none() || last_c != Some(c) {
620                        let it = self.form_iteration_matrix(&jac, c, dim, mass_ref);
621                        lu = Some(LUFactorization::new(&it)?);
622                        stats.n_lu += 1;
623                        last_c = Some(c);
624                    }
625                    let attempt = solve_bdf_system(
626                        problem,
627                        t_new,
628                        &y_predict,
629                        c,
630                        &psi,
631                        lu.as_ref().unwrap(),
632                        &scale,
633                        newton_tol,
634                        mass_ref,
635                        &mut stats,
636                        dim,
637                    )?;
638
639                    if attempt.converged || current_jac {
640                        outcome = attempt;
641                        break;
642                    }
643
644                    // Stale Jacobian: refresh f baseline + J, retry
645                    // *without* reducing h. Critical: previous BDF skipped
646                    // this and reduced h on every Newton failure, which
647                    // wasted enormous numbers of steps.
648                    problem.rhs(t_new, &y_predict, &mut f_eval);
649                    stats.n_eval += 1;
650                    problem.jacobian(t_new, &y_predict, &mut jac);
651                    stats.n_jac += 1;
652                    current_jac = true;
653                    lu = None;
654                    last_c = None;
655                }
656
657                if !outcome.converged {
658                    // Real Newton failure with fresh Jacobian. Halve h.
659                    let factor = S::from_f64(0.5);
660                    change_d(&mut d_arr, order, factor);
661                    n_equal_steps = 0;
662                    h_abs = h_abs * factor;
663                    stats.n_reject += 1;
664                    lu = None;
665                    last_c = None;
666                    continue;
667                }
668
669                // --- Newton converged; check error norm ---
670                let safety = S::from_f64(0.9 * (2.0 * NEWTON_MAXITER as f64 + 1.0))
671                    / S::from_f64(2.0 * NEWTON_MAXITER as f64 + outcome.n_iter as f64);
672
673                let mut scale_new = vec![S::ZERO; dim];
674                for i in 0..dim {
675                    scale_new[i] = (options.atol + options.rtol * outcome.y_new[i].abs())
676                        .max(S::from_f64(1e-300));
677                }
678
679                let err_const = S::from_f64(ERROR_CONST[order]);
680                let mut err_sq = S::ZERO;
681                for i in 0..dim {
682                    let e = err_const * outcome.d[i] / scale_new[i];
683                    err_sq = err_sq + e * e;
684                }
685                let err_norm = (err_sq / S::from_usize(dim)).sqrt();
686
687                if err_norm > S::ONE {
688                    // Reject by error: shrink h. Do NOT reset LU per SciPy
689                    // (Newton was fine, only the error was too big), but DO
690                    // reset last_c since c changes with h.
691                    let order_p1 = S::from_usize(order + 1);
692                    let factor =
693                        S::from_f64(MIN_FACTOR).max(safety * err_norm.powf(-S::ONE / order_p1));
694                    change_d(&mut d_arr, order, factor);
695                    n_equal_steps = 0;
696                    h_abs = h_abs * factor;
697                    stats.n_reject += 1;
698                    last_c = None;
699                    continue;
700                }
701
702                accepted = Some(AcceptedStep {
703                    y_new: outcome.y_new,
704                    d_corr: outcome.d,
705                    err_norm,
706                    h_abs_used,
707                    safety,
708                });
709                step_accepted = true;
710            }
711
712            let AcceptedStep {
713                y_new,
714                d_corr,
715                err_norm,
716                h_abs_used,
717                safety,
718            } = accepted.unwrap();
719
720            // --- Commit accepted step ---
721            stats.n_accept += 1;
722            n_equal_steps += 1;
723            let t_old = t;
724            // Snapshot the old state before the divided-differences update
725            // overwrites D[0]; needed by the Hermite interpolant.
726            let y_old_snapshot = if grid_emitter.is_some() {
727                Some(d_arr[0].clone())
728            } else {
729                None
730            };
731            t = t + h_abs_used * direction;
732            h_abs = h_abs_used;
733
734            // Update modified divided differences in-place. The principal
735            // relation here is D^{j+1} y_n = D^j y_n − D^j y_{n-1}; combined
736            // with d_corr = D^{order+1} y_n, this elegant cascade updates
737            // the entire table:
738            //   D[order+2] ← d − D[order+1]
739            //   D[order+1] ← d
740            //   D[i]      ← D[i] + D[i+1]    for i = order, …, 0
741            for i in 0..dim {
742                d_arr[order + 2][i] = d_corr[i] - d_arr[order + 1][i];
743                d_arr[order + 1][i] = d_corr[i];
744            }
745            for k in (0..=order).rev() {
746                for i in 0..dim {
747                    d_arr[k][i] = d_arr[k][i] + d_arr[k + 1][i];
748                }
749            }
750
751            // After update, D[0] should equal y_new (modulo Newton tolerance).
752            debug_assert!({
753                let mut ok = true;
754                for i in 0..dim {
755                    let diff = (d_arr[0][i] - y_new[i]).abs();
756                    let scl = options.atol + options.rtol * y_new[i].abs().max(S::ONE);
757                    if diff.to_f64() > scl.to_f64() * 1e3 {
758                        ok = false;
759                        break;
760                    }
761                }
762                ok
763            });
764
765            if let Some(ref mut emitter) = grid_emitter {
766                problem.rhs(t, &d_arr[0], &mut dy_new_buf);
767                stats.n_eval += 1;
768                let y_old = y_old_snapshot.as_deref().unwrap();
769                emitter.emit_step(
770                    t_old,
771                    y_old,
772                    &dy_old_buf,
773                    t,
774                    &d_arr[0],
775                    &dy_new_buf,
776                    &mut t_out,
777                    &mut y_out,
778                );
779                dy_old_buf.copy_from_slice(&dy_new_buf);
780            } else {
781                t_out.push(t);
782                y_out.extend_from_slice(&d_arr[0]);
783            }
784
785            // Mark Jacobian as stale.
786            current_jac = false;
787
788            // --- Order / step adaptation (gated on quasi-constant steps) ---
789            //
790            // Until we have `order + 1` consecutive equal-size accepted
791            // steps, the modified-divided-differences are not yet a faithful
792            // proxy for "constant-step" differences, so the order-selection
793            // formulas are not trustworthy. SciPy enforces the same gate.
794            if n_equal_steps < order + 1 {
795                step_count += 1;
796                continue;
797            }
798
799            // Compute error norms for orders order−1 and order+1.
800            let mut err_m_norm = S::from_f64(f64::INFINITY);
801            if order > 1 {
802                let ec = S::from_f64(ERROR_CONST[order - 1]);
803                let mut s = S::ZERO;
804                for i in 0..dim {
805                    let scl =
806                        (options.atol + options.rtol * y_new[i].abs()).max(S::from_f64(1e-300));
807                    let e = ec * d_arr[order][i] / scl;
808                    s = s + e * e;
809                }
810                err_m_norm = (s / S::from_usize(dim)).sqrt();
811            }
812
813            let mut err_p_norm = S::from_f64(f64::INFINITY);
814            if order < self.max_order {
815                let ec = S::from_f64(ERROR_CONST[order + 1]);
816                let mut s = S::ZERO;
817                for i in 0..dim {
818                    let scl =
819                        (options.atol + options.rtol * y_new[i].abs()).max(S::from_f64(1e-300));
820                    let e = ec * d_arr[order + 2][i] / scl;
821                    s = s + e * e;
822                }
823                err_p_norm = (s / S::from_usize(dim)).sqrt();
824            }
825
826            let safe_pow = |e: S, p: usize| -> S {
827                let f = e.max(S::from_f64(1e-30));
828                f.powf(-S::ONE / S::from_usize(p))
829            };
830            let factor_m = if order > self.min_order && err_m_norm.to_f64().is_finite() {
831                safe_pow(err_m_norm, order)
832            } else {
833                S::ZERO
834            };
835            let factor_o = safe_pow(err_norm, order + 1);
836            let factor_p = if order < self.max_order && err_p_norm.to_f64().is_finite() {
837                safe_pow(err_p_norm, order + 2)
838            } else {
839                S::ZERO
840            };
841
842            let mut best = factor_o;
843            let mut delta_order: i32 = 0;
844            if factor_m > best {
845                best = factor_m;
846                delta_order = -1;
847            }
848            if factor_p > best {
849                best = factor_p;
850                delta_order = 1;
851            }
852            if !best.to_f64().is_finite() {
853                best = S::ONE;
854            }
855
856            order = ((order as i32) + delta_order) as usize;
857
858            let factor = (safety * best).min(S::from_f64(MAX_FACTOR));
859            let factor = factor.max(S::from_f64(MIN_FACTOR));
860
861            change_d(&mut d_arr, order, factor);
862            n_equal_steps = 0;
863            h_abs = h_abs * factor;
864            if h_abs > h_max {
865                let cap = h_max / h_abs;
866                change_d(&mut d_arr, order, cap);
867                h_abs = h_max;
868            }
869            lu = None;
870            last_c = None;
871
872            step_count += 1;
873        }
874
875        Ok(SolverResult::new(t_out, y_out, dim, stats))
876    }
877
878    fn initial_step_size<S: Scalar>(
879        &self,
880        y0: &[S],
881        f0: &[S],
882        options: &SolverOptions<S>,
883        dim: usize,
884    ) -> S {
885        if let Some(h0) = options.h0 {
886            return h0;
887        }
888        let mut d0 = S::ZERO;
889        let mut d1 = S::ZERO;
890        for i in 0..dim {
891            let sc = (options.atol + options.rtol * y0[i].abs()).max(S::from_f64(1e-15));
892            d0 = d0 + (y0[i] / sc) * (y0[i] / sc);
893            d1 = d1 + (f0[i] / sc) * (f0[i] / sc);
894        }
895        d0 = (d0 / S::from_usize(dim)).sqrt();
896        d1 = (d1 / S::from_usize(dim)).sqrt();
897        let h0 = if d0 < S::from_f64(1e-5) || d1 < S::from_f64(1e-5) {
898            S::from_f64(1e-6)
899        } else {
900            S::from_f64(0.01) * d0 / d1
901        };
902        h0.min(options.h_max).max(options.h_min)
903    }
904
905    /// Build  M − c·J  (or  I − c·J  for ODEs).
906    fn form_iteration_matrix<S>(
907        &self,
908        jac: &[S],
909        c: S,
910        dim: usize,
911        mass: Option<&[S]>,
912    ) -> DenseMatrix<S>
913    where
914        S: Scalar + SimpleEntity + Conjugate<Canonical = S> + ComplexField,
915    {
916        let mut m = DenseMatrix::zeros(dim, dim);
917        for i in 0..dim {
918            for j in 0..dim {
919                let jij = jac[i * dim + j];
920                let mij = match mass {
921                    Some(mass_data) => mass_data[i * dim + j],
922                    None => {
923                        if i == j {
924                            S::ONE
925                        } else {
926                            S::ZERO
927                        }
928                    }
929                };
930                m.set(i, j, mij - c * jij);
931            }
932        }
933        m
934    }
935}
936
937#[cfg(test)]
938mod tests {
939    use super::*;
940    use crate::problem::{DaeProblem, OdeProblem};
941
942    #[test]
943    fn test_bdf_exponential_decay() {
944        let problem = OdeProblem::new(
945            |_t, y: &[f64], dydt: &mut [f64]| {
946                dydt[0] = -y[0];
947            },
948            0.0,
949            5.0,
950            vec![1.0],
951        );
952        let options = SolverOptions::default().rtol(1e-3).atol(1e-5);
953        let result = Bdf::solve(&problem, 0.0, 5.0, &[1.0], &options).unwrap();
954        assert!(result.success);
955        let y_final = result.y_final().unwrap();
956        let expected = (-5.0_f64).exp();
957        assert!(
958            (y_final[0] - expected).abs() < 1e-2,
959            "BDF exponential: got {}, expected {}",
960            y_final[0],
961            expected
962        );
963    }
964
965    #[test]
966    fn test_bdf_stiff_decay() {
967        let problem = OdeProblem::new(
968            |_t, y: &[f64], dydt: &mut [f64]| {
969                dydt[0] = -100.0 * y[0];
970            },
971            0.0,
972            0.1,
973            vec![1.0],
974        );
975        let options = SolverOptions::default().rtol(1e-2).atol(1e-4);
976        let result = Bdf::solve(&problem, 0.0, 0.1, &[1.0], &options).unwrap();
977        assert!(result.success);
978        let y_final = result.y_final().unwrap();
979        let expected = (-10.0_f64).exp();
980        assert!(
981            (y_final[0] - expected).abs() < 0.05,
982            "BDF stiff: got {}, expected {}",
983            y_final[0],
984            expected
985        );
986    }
987
988    #[test]
989    fn test_bdf_linear_2d() {
990        let problem = OdeProblem::new(
991            |_t, y: &[f64], dydt: &mut [f64]| {
992                dydt[0] = -y[0] + y[1];
993                dydt[1] = y[0] - y[1];
994            },
995            0.0,
996            5.0,
997            vec![1.0, 0.0],
998        );
999        let options = SolverOptions::default().rtol(1e-3).atol(1e-5);
1000        let result = Bdf::solve(&problem, 0.0, 5.0, &[1.0, 0.0], &options).unwrap();
1001        assert!(result.success);
1002        let y_final = result.y_final().unwrap();
1003        assert!((y_final[0] + y_final[1] - 1.0).abs() < 1e-2);
1004    }
1005
1006    #[test]
1007    fn test_bdf_van_der_pol() {
1008        let mu = 10.0;
1009        let problem = OdeProblem::new(
1010            move |_t, y: &[f64], dydt: &mut [f64]| {
1011                dydt[0] = y[1];
1012                dydt[1] = mu * (1.0 - y[0] * y[0]) * y[1] - y[0];
1013            },
1014            0.0,
1015            20.0,
1016            vec![2.0, 0.0],
1017        );
1018        let options = SolverOptions::default().rtol(1e-2).atol(1e-4);
1019        let result = Bdf::solve(&problem, 0.0, 20.0, &[2.0, 0.0], &options);
1020        assert!(result.is_ok(), "BDF Van der Pol failed: {:?}", result.err());
1021    }
1022
1023    #[test]
1024    fn test_bdf_fixed_order() {
1025        let solver = Bdf::fixed_order(2);
1026        let problem = OdeProblem::new(
1027            |_t, y: &[f64], dydt: &mut [f64]| {
1028                dydt[0] = -y[0];
1029            },
1030            0.0,
1031            2.0,
1032            vec![1.0],
1033        );
1034        let options = SolverOptions::default().rtol(1e-3).atol(1e-5);
1035        let result = solver
1036            .solve_internal(&problem, 0.0, 2.0, &[1.0], &options)
1037            .unwrap();
1038        assert!(result.success);
1039    }
1040
1041    /// Regression: previously returned y_final = y_initial at success=true.
1042    /// The 2-state augmented system was the user's reported failure case.
1043    /// Analytical solution: y(t) = exp(-t/2), S(t) = -t·exp(-t/2).
1044    #[test]
1045    fn test_bdf_regression_silent_wrong_answer() {
1046        let problem = OdeProblem::new(
1047            |_t, y: &[f64], dydt: &mut [f64]| {
1048                dydt[0] = -0.5 * y[0];
1049                dydt[1] = -0.5 * y[1] - y[0];
1050            },
1051            0.0,
1052            2.0,
1053            vec![1.0, 0.0],
1054        );
1055        let options = SolverOptions::default().rtol(1e-4).atol(1e-6);
1056        let result = Bdf::solve(&problem, 0.0, 2.0, &[1.0, 0.0], &options).unwrap();
1057        assert!(result.success);
1058        let y_final = result.y_final().unwrap();
1059        // y(2) = exp(-1) ≈ 0.3679;  S(2) = -2·exp(-1) ≈ -0.7358
1060        let y_expected = (-1.0_f64).exp();
1061        let s_expected = -2.0 * (-1.0_f64).exp();
1062        assert!(
1063            (y_final[0] - y_expected).abs() < 1e-3,
1064            "y deviated: got {}, expected {}",
1065            y_final[0],
1066            y_expected
1067        );
1068        assert!(
1069            (y_final[1] - s_expected).abs() < 1e-3,
1070            "S deviated: got {}, expected {}",
1071            y_final[1],
1072            s_expected
1073        );
1074    }
1075
1076    /// Regression: tight rtol on simple decay used to take 308k steps or
1077    /// fail outright. SciPy takes ~44 at rtol=1e-8.
1078    #[test]
1079    fn test_bdf_regression_step_efficiency() {
1080        let problem = OdeProblem::new(
1081            |_t, y: &[f64], dydt: &mut [f64]| {
1082                dydt[0] = -y[0];
1083            },
1084            0.0,
1085            1.0,
1086            vec![1.0],
1087        );
1088        let options = SolverOptions::default().rtol(1e-6).atol(1e-9);
1089        let result = Bdf::solve(&problem, 0.0, 1.0, &[1.0], &options).unwrap();
1090        assert!(result.success);
1091        let exact = (-1.0_f64).exp();
1092        assert!((result.y_final().unwrap()[0] - exact).abs() < 1e-5);
1093        // Loose upper bound. SciPy: ~27.
1094        assert!(
1095            result.stats.n_accept < 200,
1096            "Too many accepted steps: {} (expected ~30, regression bound 200)",
1097            result.stats.n_accept
1098        );
1099    }
1100
1101    #[test]
1102    fn test_bdf_simple_dae() {
1103        let dae = DaeProblem::new(
1104            |_t, y: &[f64], dydt: &mut [f64]| {
1105                dydt[0] = -y[0] + y[1];
1106                dydt[1] = y[0] - y[1];
1107            },
1108            |mass: &mut [f64]| {
1109                for i in 0..4 {
1110                    mass[i] = 0.0;
1111                }
1112                mass[0] = 1.0;
1113            },
1114            0.0,
1115            1.0,
1116            vec![1.0, 1.0],
1117            vec![1],
1118        );
1119        let options = SolverOptions::default()
1120            .rtol(1e-4)
1121            .atol(1e-6)
1122            .max_steps(500_000);
1123        let result = Bdf::solve(&dae, 0.0, 1.0, &[1.0, 1.0], &options);
1124        assert!(result.is_ok(), "DAE solve failed: {:?}", result.err());
1125        let sol = result.unwrap();
1126        let yf = sol.y_final().unwrap();
1127        assert!((yf[0] - 1.0).abs() < 1e-4);
1128        assert!((yf[1] - 1.0).abs() < 1e-4);
1129        assert!((yf[0] - yf[1]).abs() < 1e-4);
1130    }
1131
1132    #[test]
1133    fn test_bdf_dae_with_mass_identity() {
1134        let dae = DaeProblem::new(
1135            |_t, y: &[f64], dydt: &mut [f64]| {
1136                dydt[0] = -y[0];
1137            },
1138            |mass: &mut [f64]| {
1139                mass[0] = 1.0;
1140            },
1141            0.0,
1142            1.0,
1143            vec![1.0],
1144            vec![],
1145        );
1146        let options = SolverOptions::default().rtol(1e-4).atol(1e-6);
1147        let result = Bdf::solve(&dae, 0.0, 1.0, &[1.0], &options);
1148        assert!(result.is_ok());
1149        let yf = result.unwrap().y_final().unwrap();
1150        let exact = (-1.0_f64).exp();
1151        assert!((yf[0] - exact).abs() < 1e-3);
1152    }
1153
1154    #[test]
1155    fn test_bdf_dae_scaled_mass() {
1156        let dae = DaeProblem::new(
1157            |_t, y: &[f64], dydt: &mut [f64]| {
1158                dydt[0] = -y[0];
1159            },
1160            |mass: &mut [f64]| {
1161                mass[0] = 2.0;
1162            },
1163            0.0,
1164            1.0,
1165            vec![1.0],
1166            vec![],
1167        );
1168        let options = SolverOptions::default().rtol(1e-3).atol(1e-5);
1169        let result = Bdf::solve(&dae, 0.0, 1.0, &[1.0], &options);
1170        assert!(result.is_ok());
1171        let yf = result.unwrap().y_final().unwrap();
1172        let exact = (-0.5_f64).exp();
1173        assert!((yf[0] - exact).abs() < 1e-2);
1174    }
1175
1176    /// Cross-check accepted-step counts against SciPy's BDF reference on the
1177    /// six problems specified in the BDF rewrite handoff. Bounds are loose
1178    /// (3-5× the SciPy count) to avoid flake while still catching real
1179    /// regressions.
1180    #[test]
1181    fn test_bdf_step_counts_vs_scipy() {
1182        struct Probe {
1183            name: &'static str,
1184            n_accept: usize,
1185            scipy_ref: usize,
1186            bound: usize,
1187            ok: bool,
1188        }
1189        let mut probes: Vec<Probe> = Vec::new();
1190
1191        // 1) y' = -y on [0, 1] at three tolerance levels.
1192        for &(rtol, atol, scipy_ref, bound) in &[
1193            (1e-4, 1e-6, 16usize, 60usize),
1194            (1e-6, 1e-9, 27, 100),
1195            (1e-8, 1e-11, 44, 200),
1196        ] {
1197            let p = OdeProblem::new(
1198                |_t, y: &[f64], dy: &mut [f64]| dy[0] = -y[0],
1199                0.0,
1200                1.0,
1201                vec![1.0],
1202            );
1203            let opts = SolverOptions::default().rtol(rtol).atol(atol);
1204            let r = Bdf::solve(&p, 0.0, 1.0, &[1.0], &opts).unwrap();
1205            let exact = (-1.0_f64).exp();
1206            let acc = (r.y_final().unwrap()[0] - exact).abs() < 10.0 * rtol;
1207            probes.push(Probe {
1208                name: Box::leak(format!("decay rtol={rtol:e}").into_boxed_str()),
1209                n_accept: r.stats.n_accept,
1210                scipy_ref,
1211                bound,
1212                ok: acc && r.success && r.stats.n_accept < bound,
1213            });
1214        }
1215
1216        // 2) y' = -100 y on [0, 0.1] at rtol=1e-2 (stiff decay).
1217        {
1218            let p = OdeProblem::new(
1219                |_t, y: &[f64], dy: &mut [f64]| dy[0] = -100.0 * y[0],
1220                0.0,
1221                0.1,
1222                vec![1.0],
1223            );
1224            let opts = SolverOptions::default().rtol(1e-2).atol(1e-4);
1225            let r = Bdf::solve(&p, 0.0, 0.1, &[1.0], &opts).unwrap();
1226            probes.push(Probe {
1227                name: "stiff y'=-100y",
1228                n_accept: r.stats.n_accept,
1229                scipy_ref: 12,
1230                bound: 50,
1231                ok: r.success && r.stats.n_accept < 50,
1232            });
1233        }
1234
1235        // 3) 2-state augmented system (the canary regression).
1236        {
1237            let p = OdeProblem::new(
1238                |_t, y: &[f64], dy: &mut [f64]| {
1239                    dy[0] = -0.5 * y[0];
1240                    dy[1] = -0.5 * y[1] - y[0];
1241                },
1242                0.0,
1243                2.0,
1244                vec![1.0, 0.0],
1245            );
1246            let opts = SolverOptions::default().rtol(1e-4).atol(1e-6);
1247            let r = Bdf::solve(&p, 0.0, 2.0, &[1.0, 0.0], &opts).unwrap();
1248            probes.push(Probe {
1249                name: "2-state augmented",
1250                n_accept: r.stats.n_accept,
1251                scipy_ref: 21,
1252                bound: 100,
1253                ok: r.success && r.stats.n_accept < 100,
1254            });
1255        }
1256
1257        // 4) Van der Pol μ=10, t in [0, 20].
1258        {
1259            let mu = 10.0_f64;
1260            let p = OdeProblem::new(
1261                move |_t, y: &[f64], dy: &mut [f64]| {
1262                    dy[0] = y[1];
1263                    dy[1] = mu * (1.0 - y[0] * y[0]) * y[1] - y[0];
1264                },
1265                0.0,
1266                20.0,
1267                vec![2.0, 0.0],
1268            );
1269            let opts = SolverOptions::default().rtol(1e-2).atol(1e-4);
1270            let r = Bdf::solve(&p, 0.0, 20.0, &[2.0, 0.0], &opts).unwrap();
1271            probes.push(Probe {
1272                name: "VdP μ=10",
1273                n_accept: r.stats.n_accept,
1274                scipy_ref: 80,
1275                bound: 400,
1276                ok: r.success && r.stats.n_accept < 400,
1277            });
1278        }
1279
1280        // Pretty-print the table and fail on any out-of-bound row.
1281        eprintln!("\n  problem                | n_accept |  scipy  |  bound  |  status");
1282        eprintln!("  -----------------------+----------+---------+---------+--------");
1283        for p in &probes {
1284            eprintln!(
1285                "  {:<22} | {:>8} | {:>7} | {:>7} | {}",
1286                p.name,
1287                p.n_accept,
1288                p.scipy_ref,
1289                p.bound,
1290                if p.ok { "ok" } else { "OVER" },
1291            );
1292        }
1293        assert!(
1294            probes.iter().all(|p| p.ok),
1295            "one or more BDF probes exceeded bound or failed accuracy"
1296        );
1297    }
1298}