Skip to main content

numra_ode/
bdf.rs

1//! BDF / NDF: variable-order, variable-step backward differentiation formulas.
2//!
3//! This is a Rust port of SciPy's `scipy.integrate.BDF` (the canonical
4//! implementation of Shampine–Reichelt's NDF method from MATLAB's `ode15s`).
5//! The previous revision of this file was algorithmically incorrect — see
6//! "Why a rewrite" below.
7//!
8//! ## Mathematical formulation
9//!
10//! The k-th order BDF method approximates
11//!   M · ∑ⱼ αⱼ · y_{n+1-j}  =  h · β · f(t_{n+1}, y_{n+1})
12//! where M is an optional (possibly singular) mass matrix. For ODEs M = I.
13//!
14//! Rather than storing past `y` values and applying fixed-coefficient BDF
15//! formulas (which is *not* order-k accurate when the step size varies — this
16//! was the previous file's fundamental error), we work with **modified
17//! divided differences** D, defined by:
18//!   D⁰yₙ = yₙ
19//!   Dʲyₙ = Dʲ⁻¹yₙ − Dʲ⁻¹y_{n-1}    (j ≥ 1)
20//!
21//! The differences are stored in an array `D[0..k+2]` whose rows are scaled
22//! so that the BDF formula holds *as if* the step size were constant. When h
23//! changes, the array is rescaled in-place via a small transformation matrix
24//! (`change_d`); this preserves order accuracy. This is the
25//! "fixed-leading-coefficient" / "quasi-constant step size" trick from
26//! Shampine & Reichelt (1997).
27//!
28//! ## NDF coefficients (Klopfenstein–Shampine)
29//!
30//! For each order k we store a `KAPPA[k]` "boost" parameter; setting κ = 0
31//! recovers pure BDF, while small negative κ gives the NDF method, which
32//! reduces the error constant by ~25–30% at the cost of slight stability
33//! tightening. Defaults follow Shampine–Reichelt.
34//!
35//! ## DAE support (index-1)
36//!
37//! For singular M, the iteration matrix becomes M − c·J (instead of I − c·J),
38//! and the Newton residual carries M·ψ and M·d terms. Algebraic constraint
39//! rows (zero rows of M) reduce to f_alg(t, y) = 0, enforced at every step.
40//!
41//! ## Why a rewrite
42//!
43//! The previous bdf.rs had three independent algorithmic bugs that
44//! manifested catastrophically (silent wrong answers, runaway step counts):
45//!
46//! 1. **Newton convergence test on the residual norm** — when h·f was small
47//!    in absolute terms, Newton declared convergence at iteration 0 with
48//!    `y_new = predictor = y_old`, leaving the solution unchanged while time
49//!    advanced. This is the source of the reported "y_final = y_initial at
50//!    success=true" bug.
51//! 2. **Error estimator identically ≈ 0** — the formula referenced
52//!    `history[0]`, which was always equal to the current `y` (just pushed),
53//!    so the estimate's leading term cancelled.
54//! 3. **Fixed BDF coefficients applied to variable-h history** — without the
55//!    modified-divided-differences rescaling, the method silently loses
56//!    order whenever h changes. The textbook coefficients [1, -4/3, 1/3,
57//!    2/3·h] only define BDF2 when y_n, y_{n-1} are spaced equally.
58//!
59//! ## References
60//! - L. F. Shampine and M. W. Reichelt, "The MATLAB ODE Suite", SIAM J. Sci.
61//!   Comput. 18(1), 1–22 (1997). [NDF coefficients, fixed-leading-coefficient
62//!   BDF, error estimator.]
63//! - G. D. Byrne and A. C. Hindmarsh, "A Polyalgorithm for the Numerical
64//!   Solution of ODEs", ACM TOMS 1(1), 71–96 (1975).
65//! - SciPy `scipy/integrate/_ivp/bdf.py` (BSD-3 / Apache-2.0). Reference
66//!   implementation that this file ports.
67//!
68//! Author: Moussa Leblouba
69//! Date: 5 March 2026
70//! Modified: 6 May 2026 (full rewrite based on SciPy's NDF/MDD algorithm)
71
72use faer::{ComplexField, Conjugate, SimpleEntity};
73use numra_core::Scalar;
74use numra_linalg::{DenseMatrix, LUFactorization, Matrix};
75
76use crate::error::SolverError;
77use crate::problem::OdeSystem;
78use crate::solver::{Solver, SolverOptions, SolverResult, SolverStats};
79use crate::t_eval::{validate_grid, TEvalEmitter};
80
81/// BDF / NDF solver for stiff ODEs and index-1 DAEs (orders 1-5).
82///
83/// Order control (cap, floor, pinning) is configured through
84/// [`SolverOptions::max_order`] and [`SolverOptions::min_order`].
85#[derive(Clone, Copy, Debug, Default)]
86pub struct Bdf;
87
88// ---- Algorithm constants ---------------------------------------------------
89
90const MAX_ORDER: usize = 5;
91const NEWTON_MAXITER: usize = 4;
92const MIN_FACTOR: f64 = 0.2;
93const MAX_FACTOR: f64 = 10.0;
94
95/// Klopfenstein–Shampine NDF "boost" parameter for orders 0..=5.
96/// Setting all to zero recovers pure BDF.
97const KAPPA: [f64; MAX_ORDER + 1] = [0.0, -0.1850, -1.0 / 9.0, -0.0823, -0.0415, 0.0];
98
99/// γ_k = ∑_{i=1}^{k} 1/i, with γ_0 = 0.
100const GAMMA: [f64; MAX_ORDER + 1] = [
101    0.0,
102    1.0,
103    1.0 + 1.0 / 2.0,
104    1.0 + 1.0 / 2.0 + 1.0 / 3.0,
105    1.0 + 1.0 / 2.0 + 1.0 / 3.0 + 1.0 / 4.0,
106    1.0 + 1.0 / 2.0 + 1.0 / 3.0 + 1.0 / 4.0 + 1.0 / 5.0,
107];
108
109/// α_k = (1 − κ_k) γ_k. The leading coefficient in the BDF/NDF
110/// fixed-leading-coefficient form.
111const ALPHA: [f64; MAX_ORDER + 1] = [
112    (1.0 - KAPPA[0]) * GAMMA[0],
113    (1.0 - KAPPA[1]) * GAMMA[1],
114    (1.0 - KAPPA[2]) * GAMMA[2],
115    (1.0 - KAPPA[3]) * GAMMA[3],
116    (1.0 - KAPPA[4]) * GAMMA[4],
117    (1.0 - KAPPA[5]) * GAMMA[5],
118];
119
120/// Error constant C_{k+1} for order k: C = κ_k γ_k + 1/(k+1). Scales the
121/// next-order divided difference into a local truncation error estimate.
122const ERROR_CONST: [f64; MAX_ORDER + 1] = [
123    KAPPA[0] * GAMMA[0] + 1.0,
124    KAPPA[1] * GAMMA[1] + 1.0 / 2.0,
125    KAPPA[2] * GAMMA[2] + 1.0 / 3.0,
126    KAPPA[3] * GAMMA[3] + 1.0 / 4.0,
127    KAPPA[4] * GAMMA[4] + 1.0 / 5.0,
128    KAPPA[5] * GAMMA[5] + 1.0 / 6.0,
129];
130
131const RU_SIZE: usize = (MAX_ORDER + 1) * (MAX_ORDER + 1); // = 36
132
133// ---- Solver trait impl -----------------------------------------------------
134
135impl<S: Scalar + SimpleEntity + Conjugate<Canonical = S> + ComplexField> Solver<S> for Bdf {
136    fn solve<Sys: OdeSystem<S>>(
137        problem: &Sys,
138        t0: S,
139        tf: S,
140        y0: &[S],
141        options: &SolverOptions<S>,
142    ) -> Result<SolverResult<S>, SolverError> {
143        solve_internal(problem, t0, tf, y0, options)
144    }
145}
146
147/// Resolve the BDF order window from `SolverOptions`, clamping to the
148/// algorithmic `[1, MAX_ORDER]` range and ensuring `min_order ≤ max_order`.
149fn resolve_order_bounds<S: Scalar>(options: &SolverOptions<S>) -> (usize, usize) {
150    let max = options.max_order.unwrap_or(MAX_ORDER).clamp(1, MAX_ORDER);
151    let min = options.min_order.unwrap_or(1).clamp(1, max);
152    (min, max)
153}
154
155// ---- compute_R / change_D --------------------------------------------------
156//
157// `compute_r(order, factor)` builds the (order+1)×(order+1) transformation
158// matrix R such that, when D is the modified-divided-differences array for
159// step h, the rescaled differences for step (factor·h) are obtained by
160//   D' = (R · U)^T · D
161// where U = compute_r(order, 1). For numerical efficiency we use stack
162// arrays of size (MAX_ORDER+1)² = 36 and only access the leading
163// (order+1)×(order+1) submatrix.
164//
165// Reference: SciPy's `_ivp/bdf.py::compute_R`, derived from Shampine &
166// Reichelt §3.
167
168/// Compute R(order, factor) into the leading (order+1)×(order+1) block of `out`.
169/// Layout: out[r * (MAX_ORDER+1) + c] = R[r, c].
170fn compute_r<S: Scalar>(order: usize, factor: S, out: &mut [S; RU_SIZE]) {
171    for slot in out.iter_mut() {
172        *slot = S::ZERO;
173    }
174    let stride = MAX_ORDER + 1;
175
176    // M[0, :] = 1
177    for c in 0..=order {
178        out[c] = S::ONE;
179    }
180    // M[r, c] = (r - 1 - factor * c) / r  for r, c in 1..=order
181    for r in 1..=order {
182        for c in 1..=order {
183            let num = S::from_usize(r - 1) - factor * S::from_usize(c);
184            out[r * stride + c] = num / S::from_usize(r);
185        }
186    }
187    // Cumulative product down each column.
188    for r in 1..=order {
189        for c in 0..=order {
190            let prev = out[(r - 1) * stride + c];
191            out[r * stride + c] = out[r * stride + c] * prev;
192        }
193    }
194}
195
196/// In-place: replace D[0..=order] with (R·U)^T · D[0..=order],
197/// where R = compute_r(order, factor) and U = compute_r(order, 1).
198fn change_d<S: Scalar>(d: &mut [Vec<S>], order: usize, factor: S) {
199    // factor = 1 is a no-op; bail early.
200    if (factor - S::ONE).abs() < S::from_f64(1e-15) {
201        return;
202    }
203
204    let mut r_mat = [S::ZERO; RU_SIZE];
205    let mut u_mat = [S::ZERO; RU_SIZE];
206    compute_r(order, factor, &mut r_mat);
207    compute_r(order, S::ONE, &mut u_mat);
208
209    let stride = MAX_ORDER + 1;
210
211    // RU = R · U  (only the (order+1)×(order+1) leading block matters).
212    let mut ru = [S::ZERO; RU_SIZE];
213    for i in 0..=order {
214        for j in 0..=order {
215            let mut s = S::ZERO;
216            for k in 0..=order {
217                s = s + r_mat[i * stride + k] * u_mat[k * stride + j];
218            }
219            ru[i * stride + j] = s;
220        }
221    }
222
223    // Apply (RU)^T to the rows of D:  D'_i = ∑_k RU[k, i] · D_k.
224    let dim = d[0].len();
225    let mut new_rows: Vec<Vec<S>> = (0..=order).map(|_| vec![S::ZERO; dim]).collect();
226    for i in 0..=order {
227        for k in 0..=order {
228            let coef = ru[k * stride + i];
229            if coef == S::ZERO {
230                continue;
231            }
232            for col in 0..dim {
233                new_rows[i][col] = new_rows[i][col] + coef * d[k][col];
234            }
235        }
236    }
237    for (i, row) in new_rows.into_iter().enumerate() {
238        d[i] = row;
239    }
240}
241
242// ---- Newton solver ---------------------------------------------------------
243
244struct NewtonOutcome<S: Scalar> {
245    converged: bool,
246    n_iter: usize,
247    y_new: Vec<S>,
248    /// d = y_new − y_predict. Equals the (order+1)-th modified divided
249    /// difference at the new step, and is the raw input to the local
250    /// truncation error estimate.
251    d: Vec<S>,
252}
253
254/// Bookkeeping for an accepted step, captured inside the inner retry loop
255/// and consumed by the commit phase.
256struct AcceptedStep<S: Scalar> {
257    y_new: Vec<S>,
258    d_corr: Vec<S>,
259    err_norm: S,
260    h_abs_used: S,
261    safety: S,
262}
263
264/// Solve the BDF/NDF algebraic system via simplified Newton iteration.
265///
266/// At convergence:  M·d = c·f(t_new, y_predict + d) − M·ψ.
267/// Iteration uses the pre-factored linear operator (M − c·J).
268///
269/// Convergence test (Hairer–Wanner ODE II §IV.8, also SciPy):
270///   converged ⟺ rate / (1 − rate) · ‖dy‖ < tol
271/// with `rate` = ratio of successive correction norms. This is a *correction*
272/// norm test, not a residual norm test — using residual norms (as the
273/// previous file did) leads to false convergence at iteration 0 when the
274/// predictor is near the new point in absolute terms but Newton hasn't
275/// actually moved.
276#[allow(clippy::too_many_arguments)]
277fn solve_bdf_system<S, Sys>(
278    problem: &Sys,
279    t_new: S,
280    y_predict: &[S],
281    c: S,
282    psi: &[S],
283    lu: &LUFactorization<S>,
284    scale: &[S],
285    tol: S,
286    mass: Option<&[S]>,
287    stats: &mut SolverStats,
288    dim: usize,
289) -> Result<NewtonOutcome<S>, SolverError>
290where
291    S: Scalar + SimpleEntity + Conjugate<Canonical = S> + ComplexField,
292    Sys: OdeSystem<S>,
293{
294    let mut d = vec![S::ZERO; dim];
295    let mut y = y_predict.to_vec();
296    let mut f_buf = vec![S::ZERO; dim];
297    let mut rhs_buf = vec![S::ZERO; dim];
298
299    // Pre-compute M·ψ once (constant within the Newton loop).
300    let mut m_psi = vec![S::ZERO; dim];
301    if let Some(m) = mass {
302        for i in 0..dim {
303            let mut s = S::ZERO;
304            for j in 0..dim {
305                s = s + m[i * dim + j] * psi[j];
306            }
307            m_psi[i] = s;
308        }
309    } else {
310        m_psi.copy_from_slice(psi);
311    }
312
313    let mut dy_norm_old: Option<S> = None;
314    let mut converged = false;
315    let mut n_iter_done = 0usize;
316
317    for iter in 0..NEWTON_MAXITER {
318        n_iter_done = iter + 1;
319
320        problem.rhs(t_new, &y, &mut f_buf);
321        stats.n_eval += 1;
322
323        // Bail on NaN/Inf in f.
324        if f_buf.iter().any(|v| !v.to_f64().is_finite()) {
325            break;
326        }
327
328        // RHS of Newton update:  c·f − M·(ψ + d).
329        // For ODEs (M = I), this simplifies to  c·f − ψ − d.
330        if let Some(m) = mass {
331            for i in 0..dim {
332                let mut md = S::ZERO;
333                for j in 0..dim {
334                    md = md + m[i * dim + j] * d[j];
335                }
336                rhs_buf[i] = c * f_buf[i] - m_psi[i] - md;
337            }
338        } else {
339            for i in 0..dim {
340                rhs_buf[i] = c * f_buf[i] - psi[i] - d[i];
341            }
342        }
343
344        let dy = lu.solve(&rhs_buf)?;
345
346        // Weighted-RMS norm of the correction.
347        let mut dy_norm_sq = S::ZERO;
348        for i in 0..dim {
349            let r = dy[i] / scale[i];
350            dy_norm_sq = dy_norm_sq + r * r;
351        }
352        let dy_norm = (dy_norm_sq / S::from_usize(dim)).sqrt();
353
354        let rate = dy_norm_old.and_then(|old| {
355            if old > S::ZERO {
356                Some(dy_norm / old)
357            } else {
358                None
359            }
360        });
361
362        // Early divergence: rate ≥ 1, or projected error after the remaining
363        // iterations would still exceed tol.
364        if let Some(r) = rate {
365            if r >= S::ONE {
366                break;
367            }
368            let remaining = NEWTON_MAXITER - iter;
369            let r_pow = r.powf(S::from_usize(remaining));
370            if r_pow / (S::ONE - r) * dy_norm > tol {
371                break;
372            }
373        }
374
375        // Apply correction.
376        for i in 0..dim {
377            y[i] = y[i] + dy[i];
378            d[i] = d[i] + dy[i];
379        }
380
381        // Convergence test on the *correction*.
382        if dy_norm == S::ZERO {
383            converged = true;
384            break;
385        }
386        if let Some(r) = rate {
387            if r / (S::ONE - r) * dy_norm < tol {
388                converged = true;
389                break;
390            }
391        }
392
393        dy_norm_old = Some(dy_norm);
394    }
395
396    Ok(NewtonOutcome {
397        converged,
398        n_iter: n_iter_done,
399        y_new: y,
400        d,
401    })
402}
403
404// ---- Main solve loop -------------------------------------------------------
405
406fn solve_internal<S, Sys>(
407    problem: &Sys,
408    t0: S,
409    tf: S,
410    y0: &[S],
411    options: &SolverOptions<S>,
412) -> Result<SolverResult<S>, SolverError>
413where
414    S: Scalar + SimpleEntity + Conjugate<Canonical = S> + ComplexField,
415    Sys: OdeSystem<S>,
416{
417    let (min_order, max_order) = resolve_order_bounds(options);
418
419    let dim = problem.dim();
420    if y0.len() != dim {
421        return Err(SolverError::DimensionMismatch {
422            expected: dim,
423            actual: y0.len(),
424        });
425    }
426
427    let mut stats = SolverStats::default();
428
429    let mut t = t0;
430    let direction = if tf > t0 { S::ONE } else { -S::ONE };
431
432    if let Some(grid) = options.t_eval.as_deref() {
433        validate_grid(grid, t0, tf)?;
434    }
435    let mut grid_emitter = options
436        .t_eval
437        .as_deref()
438        .map(|g| TEvalEmitter::new(g, direction));
439    let (mut t_out, mut y_out) = if grid_emitter.is_some() {
440        (Vec::new(), Vec::new())
441    } else {
442        (vec![t0], y0.to_vec())
443    };
444    // Slopes at the previous accepted t (start of step) and the new t,
445    // needed by the Hermite interpolant in t_eval mode. BDF doesn't keep
446    // f(t, y) hot between steps, so we do explicit RHS evaluations only
447    // when a grid is supplied.
448    let mut dy_old_buf = vec![S::ZERO; dim];
449    let mut dy_new_buf = vec![S::ZERO; dim];
450    if grid_emitter.is_some() {
451        problem.rhs(t0, y0, &mut dy_old_buf);
452        stats.n_eval += 1;
453    }
454
455    // ---- Initial RHS, step size, and divided differences ----
456    let mut f_eval = vec![S::ZERO; dim];
457    problem.rhs(t, y0, &mut f_eval);
458    stats.n_eval += 1;
459
460    let mut h_abs = initial_step_size(y0, &f_eval, options, dim);
461
462    // D[0] = y_0;  D[1] = h_0 · f_0 · direction;  D[2..] = 0.
463    let mut d_arr: Vec<Vec<S>> = (0..MAX_ORDER + 3).map(|_| vec![S::ZERO; dim]).collect();
464    d_arr[0].copy_from_slice(y0);
465    for i in 0..dim {
466        d_arr[1][i] = h_abs * f_eval[i] * direction;
467    }
468
469    // Mass matrix.
470    let mass_data = if problem.has_mass_matrix() {
471        let mut m = vec![S::ZERO; dim * dim];
472        problem.mass_matrix(&mut m);
473        Some(m)
474    } else {
475        None
476    };
477    let mass_ref = mass_data.as_deref();
478
479    // Jacobian + LU. Delegates to OdeSystem::jacobian, which lets a
480    // system override with an analytical Jacobian (e.g. MOLSystem*) and
481    // otherwise falls through to the canonical FD default in
482    // problem.rs. Costs one extra rhs evaluation per Jacobian rebuild
483    // vs the previous inlined path because the trait default
484    // recomputes f0 internally; that overhead is dominated by the LU
485    // on every problem we care about.
486    let mut jac = vec![S::ZERO; dim * dim];
487    problem.jacobian(t, y0, &mut jac);
488    stats.n_jac += 1;
489    let mut current_jac = true;
490    let mut lu: Option<LUFactorization<S>> = None;
491    let mut last_c: Option<S> = None;
492
493    // Order management.
494    let mut order = 1usize;
495    let mut n_equal_steps: usize = 0;
496
497    // Newton tolerance (Hairer formula).
498    let uround = S::from_f64(2.220446049250313e-16);
499    let newton_tol =
500        (S::from_f64(10.0) * uround / options.rtol).max(S::from_f64(0.03).min(options.rtol.sqrt()));
501
502    let h_min = options.h_min;
503    let h_max = options.h_max.min((tf - t0).abs());
504
505    let mut step_count = 0usize;
506
507    while (tf - t) * direction > S::from_f64(1e-12) * (tf - t0).abs() {
508        if step_count >= options.max_steps {
509            return Err(SolverError::MaxIterationsExceeded { t: t.to_f64() });
510        }
511
512        // --- Step-size cap pre-checks (SciPy: top of _step_impl) ---
513        let ulp_min = S::from_f64(10.0) * uround * (t.abs().max(tf.abs())).max(S::ONE);
514        let min_step = h_min.max(ulp_min);
515
516        if h_abs > h_max {
517            let factor = h_max / h_abs;
518            change_d(&mut d_arr, order, factor);
519            n_equal_steps = 0;
520            h_abs = h_max;
521            lu = None;
522            last_c = None;
523        } else if h_abs < min_step {
524            let factor = min_step / h_abs;
525            change_d(&mut d_arr, order, factor);
526            n_equal_steps = 0;
527            h_abs = min_step;
528            lu = None;
529            last_c = None;
530        }
531
532        // --- Inner loop: try, retry, possibly reduce h ---
533        let mut step_accepted = false;
534        // (y_new, d_correction, error_norm, h_abs_used, safety_factor)
535        let mut accepted: Option<AcceptedStep<S>> = None;
536
537        while !step_accepted {
538            if h_abs < min_step {
539                return Err(SolverError::StepSizeTooSmall {
540                    t: t.to_f64(),
541                    h: h_abs.to_f64(),
542                    h_min: h_min.to_f64(),
543                });
544            }
545
546            let h = h_abs * direction;
547            let mut t_new = t + h;
548            let mut h_used = h;
549            let mut h_abs_used = h_abs;
550
551            // Clip to t_bound (rescale D to match the shorter step).
552            if (t_new - tf) * direction > S::ZERO {
553                t_new = tf;
554                h_used = t_new - t;
555                h_abs_used = h_used.abs();
556                let factor = h_abs_used / h_abs;
557                change_d(&mut d_arr, order, factor);
558                n_equal_steps = 0;
559                h_abs = h_abs_used;
560                lu = None;
561                last_c = None;
562            }
563
564            // Predictor:  y_predict = ∑_{k=0..order} D[k]
565            let mut y_predict = vec![S::ZERO; dim];
566            for k in 0..=order {
567                for i in 0..dim {
568                    y_predict[i] = y_predict[i] + d_arr[k][i];
569                }
570            }
571
572            // ψ = (∑_{j=1..order} γ_j · D[j]) / α_order
573            let alpha_o = S::from_f64(ALPHA[order]);
574            let mut psi = vec![S::ZERO; dim];
575            for j in 1..=order {
576                let g = S::from_f64(GAMMA[j]);
577                for i in 0..dim {
578                    psi[i] = psi[i] + g * d_arr[j][i];
579                }
580            }
581            for i in 0..dim {
582                psi[i] = psi[i] / alpha_o;
583            }
584
585            let c = h_used / alpha_o;
586
587            // Pre-Newton scale based on |y_predict|.
588            let mut scale = vec![S::ZERO; dim];
589            for i in 0..dim {
590                scale[i] =
591                    (options.atol + options.rtol * y_predict[i].abs()).max(S::from_f64(1e-300));
592            }
593
594            // --- Newton with retry-on-stale-Jacobian ---
595            let outcome;
596            loop {
597                if lu.is_none() || last_c != Some(c) {
598                    let it = form_iteration_matrix(&jac, c, dim, mass_ref);
599                    lu = Some(LUFactorization::new(&it)?);
600                    stats.n_lu += 1;
601                    last_c = Some(c);
602                }
603                let attempt = solve_bdf_system(
604                    problem,
605                    t_new,
606                    &y_predict,
607                    c,
608                    &psi,
609                    lu.as_ref().unwrap(),
610                    &scale,
611                    newton_tol,
612                    mass_ref,
613                    &mut stats,
614                    dim,
615                )?;
616
617                if attempt.converged || current_jac {
618                    outcome = attempt;
619                    break;
620                }
621
622                // Stale Jacobian: refresh f baseline + J, retry
623                // *without* reducing h. Critical: previous BDF skipped
624                // this and reduced h on every Newton failure, which
625                // wasted enormous numbers of steps.
626                problem.rhs(t_new, &y_predict, &mut f_eval);
627                stats.n_eval += 1;
628                problem.jacobian(t_new, &y_predict, &mut jac);
629                stats.n_jac += 1;
630                current_jac = true;
631                lu = None;
632                last_c = None;
633            }
634
635            if !outcome.converged {
636                // Real Newton failure with fresh Jacobian. Halve h.
637                let factor = S::from_f64(0.5);
638                change_d(&mut d_arr, order, factor);
639                n_equal_steps = 0;
640                h_abs = h_abs * factor;
641                stats.n_reject += 1;
642                lu = None;
643                last_c = None;
644                continue;
645            }
646
647            // --- Newton converged; check error norm ---
648            let safety = S::from_f64(0.9 * (2.0 * NEWTON_MAXITER as f64 + 1.0))
649                / S::from_f64(2.0 * NEWTON_MAXITER as f64 + outcome.n_iter as f64);
650
651            let mut scale_new = vec![S::ZERO; dim];
652            for i in 0..dim {
653                scale_new[i] =
654                    (options.atol + options.rtol * outcome.y_new[i].abs()).max(S::from_f64(1e-300));
655            }
656
657            let err_const = S::from_f64(ERROR_CONST[order]);
658            let mut err_sq = S::ZERO;
659            for i in 0..dim {
660                let e = err_const * outcome.d[i] / scale_new[i];
661                err_sq = err_sq + e * e;
662            }
663            let err_norm = (err_sq / S::from_usize(dim)).sqrt();
664
665            if err_norm > S::ONE {
666                // Reject by error: shrink h. Do NOT reset LU per SciPy
667                // (Newton was fine, only the error was too big), but DO
668                // reset last_c since c changes with h.
669                let order_p1 = S::from_usize(order + 1);
670                let factor =
671                    S::from_f64(MIN_FACTOR).max(safety * err_norm.powf(-S::ONE / order_p1));
672                change_d(&mut d_arr, order, factor);
673                n_equal_steps = 0;
674                h_abs = h_abs * factor;
675                stats.n_reject += 1;
676                last_c = None;
677                continue;
678            }
679
680            accepted = Some(AcceptedStep {
681                y_new: outcome.y_new,
682                d_corr: outcome.d,
683                err_norm,
684                h_abs_used,
685                safety,
686            });
687            step_accepted = true;
688        }
689
690        let AcceptedStep {
691            y_new,
692            d_corr,
693            err_norm,
694            h_abs_used,
695            safety,
696        } = accepted.unwrap();
697
698        // --- Commit accepted step ---
699        stats.n_accept += 1;
700        n_equal_steps += 1;
701        let t_old = t;
702        // Snapshot the old state before the divided-differences update
703        // overwrites D[0]; needed by the Hermite interpolant.
704        let y_old_snapshot = if grid_emitter.is_some() {
705            Some(d_arr[0].clone())
706        } else {
707            None
708        };
709        t = t + h_abs_used * direction;
710        h_abs = h_abs_used;
711
712        // Update modified divided differences in-place. The principal
713        // relation here is D^{j+1} y_n = D^j y_n − D^j y_{n-1}; combined
714        // with d_corr = D^{order+1} y_n, this elegant cascade updates
715        // the entire table:
716        //   D[order+2] ← d − D[order+1]
717        //   D[order+1] ← d
718        //   D[i]      ← D[i] + D[i+1]    for i = order, …, 0
719        for i in 0..dim {
720            d_arr[order + 2][i] = d_corr[i] - d_arr[order + 1][i];
721            d_arr[order + 1][i] = d_corr[i];
722        }
723        for k in (0..=order).rev() {
724            for i in 0..dim {
725                d_arr[k][i] = d_arr[k][i] + d_arr[k + 1][i];
726            }
727        }
728
729        // After update, D[0] should equal y_new (modulo Newton tolerance).
730        debug_assert!({
731            let mut ok = true;
732            for i in 0..dim {
733                let diff = (d_arr[0][i] - y_new[i]).abs();
734                let scl = options.atol + options.rtol * y_new[i].abs().max(S::ONE);
735                if diff.to_f64() > scl.to_f64() * 1e3 {
736                    ok = false;
737                    break;
738                }
739            }
740            ok
741        });
742
743        if let Some(ref mut emitter) = grid_emitter {
744            problem.rhs(t, &d_arr[0], &mut dy_new_buf);
745            stats.n_eval += 1;
746            let y_old = y_old_snapshot.as_deref().unwrap();
747            emitter.emit_step(
748                t_old,
749                y_old,
750                &dy_old_buf,
751                t,
752                &d_arr[0],
753                &dy_new_buf,
754                &mut t_out,
755                &mut y_out,
756            );
757            dy_old_buf.copy_from_slice(&dy_new_buf);
758        } else {
759            t_out.push(t);
760            y_out.extend_from_slice(&d_arr[0]);
761        }
762
763        // Mark Jacobian as stale.
764        current_jac = false;
765
766        // --- Order / step adaptation (gated on quasi-constant steps) ---
767        //
768        // Until we have `order + 1` consecutive equal-size accepted
769        // steps, the modified-divided-differences are not yet a faithful
770        // proxy for "constant-step" differences, so the order-selection
771        // formulas are not trustworthy. SciPy enforces the same gate.
772        if n_equal_steps < order + 1 {
773            step_count += 1;
774            continue;
775        }
776
777        // Compute error norms for orders order−1 and order+1.
778        let mut err_m_norm = S::from_f64(f64::INFINITY);
779        if order > 1 {
780            let ec = S::from_f64(ERROR_CONST[order - 1]);
781            let mut s = S::ZERO;
782            for i in 0..dim {
783                let scl = (options.atol + options.rtol * y_new[i].abs()).max(S::from_f64(1e-300));
784                let e = ec * d_arr[order][i] / scl;
785                s = s + e * e;
786            }
787            err_m_norm = (s / S::from_usize(dim)).sqrt();
788        }
789
790        let mut err_p_norm = S::from_f64(f64::INFINITY);
791        if order < max_order {
792            let ec = S::from_f64(ERROR_CONST[order + 1]);
793            let mut s = S::ZERO;
794            for i in 0..dim {
795                let scl = (options.atol + options.rtol * y_new[i].abs()).max(S::from_f64(1e-300));
796                let e = ec * d_arr[order + 2][i] / scl;
797                s = s + e * e;
798            }
799            err_p_norm = (s / S::from_usize(dim)).sqrt();
800        }
801
802        let safe_pow = |e: S, p: usize| -> S {
803            let f = e.max(S::from_f64(1e-30));
804            f.powf(-S::ONE / S::from_usize(p))
805        };
806        let factor_m = if order > min_order && err_m_norm.to_f64().is_finite() {
807            safe_pow(err_m_norm, order)
808        } else {
809            S::ZERO
810        };
811        let factor_o = safe_pow(err_norm, order + 1);
812        let factor_p = if order < max_order && err_p_norm.to_f64().is_finite() {
813            safe_pow(err_p_norm, order + 2)
814        } else {
815            S::ZERO
816        };
817
818        let mut best = factor_o;
819        let mut delta_order: i32 = 0;
820        if factor_m > best {
821            best = factor_m;
822            delta_order = -1;
823        }
824        if factor_p > best {
825            best = factor_p;
826            delta_order = 1;
827        }
828        if !best.to_f64().is_finite() {
829            best = S::ONE;
830        }
831
832        order = ((order as i32) + delta_order) as usize;
833
834        let factor = (safety * best).min(S::from_f64(MAX_FACTOR));
835        let factor = factor.max(S::from_f64(MIN_FACTOR));
836
837        change_d(&mut d_arr, order, factor);
838        n_equal_steps = 0;
839        h_abs = h_abs * factor;
840        if h_abs > h_max {
841            let cap = h_max / h_abs;
842            change_d(&mut d_arr, order, cap);
843            h_abs = h_max;
844        }
845        lu = None;
846        last_c = None;
847
848        step_count += 1;
849    }
850
851    Ok(SolverResult::new(t_out, y_out, dim, stats))
852}
853
854fn initial_step_size<S: Scalar>(y0: &[S], f0: &[S], options: &SolverOptions<S>, dim: usize) -> S {
855    if let Some(h0) = options.h0 {
856        return h0;
857    }
858    let mut d0 = S::ZERO;
859    let mut d1 = S::ZERO;
860    for i in 0..dim {
861        let sc = (options.atol + options.rtol * y0[i].abs()).max(S::from_f64(1e-15));
862        d0 = d0 + (y0[i] / sc) * (y0[i] / sc);
863        d1 = d1 + (f0[i] / sc) * (f0[i] / sc);
864    }
865    d0 = (d0 / S::from_usize(dim)).sqrt();
866    d1 = (d1 / S::from_usize(dim)).sqrt();
867    let h0 = if d0 < S::from_f64(1e-5) || d1 < S::from_f64(1e-5) {
868        S::from_f64(1e-6)
869    } else {
870        S::from_f64(0.01) * d0 / d1
871    };
872    h0.min(options.h_max).max(options.h_min)
873}
874
875/// Build  M − c·J  (or  I − c·J  for ODEs).
876fn form_iteration_matrix<S>(jac: &[S], c: S, dim: usize, mass: Option<&[S]>) -> DenseMatrix<S>
877where
878    S: Scalar + SimpleEntity + Conjugate<Canonical = S> + ComplexField,
879{
880    let mut m = DenseMatrix::zeros(dim, dim);
881    for i in 0..dim {
882        for j in 0..dim {
883            let jij = jac[i * dim + j];
884            let mij = match mass {
885                Some(mass_data) => mass_data[i * dim + j],
886                None => {
887                    if i == j {
888                        S::ONE
889                    } else {
890                        S::ZERO
891                    }
892                }
893            };
894            m.set(i, j, mij - c * jij);
895        }
896    }
897    m
898}
899
900#[cfg(test)]
901mod tests {
902    use super::*;
903    use crate::problem::{DaeProblem, OdeProblem};
904
905    #[test]
906    fn test_bdf_exponential_decay() {
907        let problem = OdeProblem::new(
908            |_t, y: &[f64], dydt: &mut [f64]| {
909                dydt[0] = -y[0];
910            },
911            0.0,
912            5.0,
913            vec![1.0],
914        );
915        let options = SolverOptions::default().rtol(1e-3).atol(1e-5);
916        let result = Bdf::solve(&problem, 0.0, 5.0, &[1.0], &options).unwrap();
917        assert!(result.success);
918        let y_final = result.y_final().unwrap();
919        let expected = (-5.0_f64).exp();
920        assert!(
921            (y_final[0] - expected).abs() < 1e-2,
922            "BDF exponential: got {}, expected {}",
923            y_final[0],
924            expected
925        );
926    }
927
928    #[test]
929    fn test_bdf_stiff_decay() {
930        let problem = OdeProblem::new(
931            |_t, y: &[f64], dydt: &mut [f64]| {
932                dydt[0] = -100.0 * y[0];
933            },
934            0.0,
935            0.1,
936            vec![1.0],
937        );
938        let options = SolverOptions::default().rtol(1e-2).atol(1e-4);
939        let result = Bdf::solve(&problem, 0.0, 0.1, &[1.0], &options).unwrap();
940        assert!(result.success);
941        let y_final = result.y_final().unwrap();
942        let expected = (-10.0_f64).exp();
943        assert!(
944            (y_final[0] - expected).abs() < 0.05,
945            "BDF stiff: got {}, expected {}",
946            y_final[0],
947            expected
948        );
949    }
950
951    #[test]
952    fn test_bdf_linear_2d() {
953        let problem = OdeProblem::new(
954            |_t, y: &[f64], dydt: &mut [f64]| {
955                dydt[0] = -y[0] + y[1];
956                dydt[1] = y[0] - y[1];
957            },
958            0.0,
959            5.0,
960            vec![1.0, 0.0],
961        );
962        let options = SolverOptions::default().rtol(1e-3).atol(1e-5);
963        let result = Bdf::solve(&problem, 0.0, 5.0, &[1.0, 0.0], &options).unwrap();
964        assert!(result.success);
965        let y_final = result.y_final().unwrap();
966        assert!((y_final[0] + y_final[1] - 1.0).abs() < 1e-2);
967    }
968
969    #[test]
970    fn test_bdf_van_der_pol() {
971        let mu = 10.0;
972        let problem = OdeProblem::new(
973            move |_t, y: &[f64], dydt: &mut [f64]| {
974                dydt[0] = y[1];
975                dydt[1] = mu * (1.0 - y[0] * y[0]) * y[1] - y[0];
976            },
977            0.0,
978            20.0,
979            vec![2.0, 0.0],
980        );
981        let options = SolverOptions::default().rtol(1e-2).atol(1e-4);
982        let result = Bdf::solve(&problem, 0.0, 20.0, &[2.0, 0.0], &options);
983        assert!(result.is_ok(), "BDF Van der Pol failed: {:?}", result.err());
984    }
985
986    /// Regression: previously returned y_final = y_initial at success=true.
987    /// The 2-state augmented system was the user's reported failure case.
988    /// Analytical solution: y(t) = exp(-t/2), S(t) = -t·exp(-t/2).
989    #[test]
990    fn test_bdf_regression_silent_wrong_answer() {
991        let problem = OdeProblem::new(
992            |_t, y: &[f64], dydt: &mut [f64]| {
993                dydt[0] = -0.5 * y[0];
994                dydt[1] = -0.5 * y[1] - y[0];
995            },
996            0.0,
997            2.0,
998            vec![1.0, 0.0],
999        );
1000        let options = SolverOptions::default().rtol(1e-4).atol(1e-6);
1001        let result = Bdf::solve(&problem, 0.0, 2.0, &[1.0, 0.0], &options).unwrap();
1002        assert!(result.success);
1003        let y_final = result.y_final().unwrap();
1004        // y(2) = exp(-1) ≈ 0.3679;  S(2) = -2·exp(-1) ≈ -0.7358
1005        let y_expected = (-1.0_f64).exp();
1006        let s_expected = -2.0 * (-1.0_f64).exp();
1007        assert!(
1008            (y_final[0] - y_expected).abs() < 1e-3,
1009            "y deviated: got {}, expected {}",
1010            y_final[0],
1011            y_expected
1012        );
1013        assert!(
1014            (y_final[1] - s_expected).abs() < 1e-3,
1015            "S deviated: got {}, expected {}",
1016            y_final[1],
1017            s_expected
1018        );
1019    }
1020
1021    /// Regression: tight rtol on simple decay used to take 308k steps or
1022    /// fail outright. SciPy takes ~44 at rtol=1e-8.
1023    #[test]
1024    fn test_bdf_regression_step_efficiency() {
1025        let problem = OdeProblem::new(
1026            |_t, y: &[f64], dydt: &mut [f64]| {
1027                dydt[0] = -y[0];
1028            },
1029            0.0,
1030            1.0,
1031            vec![1.0],
1032        );
1033        let options = SolverOptions::default().rtol(1e-6).atol(1e-9);
1034        let result = Bdf::solve(&problem, 0.0, 1.0, &[1.0], &options).unwrap();
1035        assert!(result.success);
1036        let exact = (-1.0_f64).exp();
1037        assert!((result.y_final().unwrap()[0] - exact).abs() < 1e-5);
1038        // Loose upper bound. SciPy: ~27.
1039        assert!(
1040            result.stats.n_accept < 200,
1041            "Too many accepted steps: {} (expected ~30, regression bound 200)",
1042            result.stats.n_accept
1043        );
1044    }
1045
1046    #[test]
1047    fn test_bdf_simple_dae() {
1048        let dae = DaeProblem::new(
1049            |_t, y: &[f64], dydt: &mut [f64]| {
1050                dydt[0] = -y[0] + y[1];
1051                dydt[1] = y[0] - y[1];
1052            },
1053            |mass: &mut [f64]| {
1054                for i in 0..4 {
1055                    mass[i] = 0.0;
1056                }
1057                mass[0] = 1.0;
1058            },
1059            0.0,
1060            1.0,
1061            vec![1.0, 1.0],
1062            vec![1],
1063        );
1064        let options = SolverOptions::default()
1065            .rtol(1e-4)
1066            .atol(1e-6)
1067            .max_steps(500_000);
1068        let result = Bdf::solve(&dae, 0.0, 1.0, &[1.0, 1.0], &options);
1069        assert!(result.is_ok(), "DAE solve failed: {:?}", result.err());
1070        let sol = result.unwrap();
1071        let yf = sol.y_final().unwrap();
1072        assert!((yf[0] - 1.0).abs() < 1e-4);
1073        assert!((yf[1] - 1.0).abs() < 1e-4);
1074        assert!((yf[0] - yf[1]).abs() < 1e-4);
1075    }
1076
1077    #[test]
1078    fn test_bdf_dae_with_mass_identity() {
1079        let dae = DaeProblem::new(
1080            |_t, y: &[f64], dydt: &mut [f64]| {
1081                dydt[0] = -y[0];
1082            },
1083            |mass: &mut [f64]| {
1084                mass[0] = 1.0;
1085            },
1086            0.0,
1087            1.0,
1088            vec![1.0],
1089            vec![],
1090        );
1091        let options = SolverOptions::default().rtol(1e-4).atol(1e-6);
1092        let result = Bdf::solve(&dae, 0.0, 1.0, &[1.0], &options);
1093        assert!(result.is_ok());
1094        let yf = result.unwrap().y_final().unwrap();
1095        let exact = (-1.0_f64).exp();
1096        assert!((yf[0] - exact).abs() < 1e-3);
1097    }
1098
1099    #[test]
1100    fn test_bdf_dae_scaled_mass() {
1101        let dae = DaeProblem::new(
1102            |_t, y: &[f64], dydt: &mut [f64]| {
1103                dydt[0] = -y[0];
1104            },
1105            |mass: &mut [f64]| {
1106                mass[0] = 2.0;
1107            },
1108            0.0,
1109            1.0,
1110            vec![1.0],
1111            vec![],
1112        );
1113        let options = SolverOptions::default().rtol(1e-3).atol(1e-5);
1114        let result = Bdf::solve(&dae, 0.0, 1.0, &[1.0], &options);
1115        assert!(result.is_ok());
1116        let yf = result.unwrap().y_final().unwrap();
1117        let exact = (-0.5_f64).exp();
1118        assert!((yf[0] - exact).abs() < 1e-2);
1119    }
1120
1121    /// Cross-check accepted-step counts against SciPy's BDF reference on the
1122    /// six problems specified in the BDF rewrite handoff. Bounds are loose
1123    /// (3-5× the SciPy count) to avoid flake while still catching real
1124    /// regressions.
1125    #[test]
1126    fn test_bdf_step_counts_vs_scipy() {
1127        struct Probe {
1128            name: &'static str,
1129            n_accept: usize,
1130            scipy_ref: usize,
1131            bound: usize,
1132            ok: bool,
1133        }
1134        let mut probes: Vec<Probe> = Vec::new();
1135
1136        // 1) y' = -y on [0, 1] at three tolerance levels.
1137        for &(rtol, atol, scipy_ref, bound) in &[
1138            (1e-4, 1e-6, 16usize, 60usize),
1139            (1e-6, 1e-9, 27, 100),
1140            (1e-8, 1e-11, 44, 200),
1141        ] {
1142            let p = OdeProblem::new(
1143                |_t, y: &[f64], dy: &mut [f64]| dy[0] = -y[0],
1144                0.0,
1145                1.0,
1146                vec![1.0],
1147            );
1148            let opts = SolverOptions::default().rtol(rtol).atol(atol);
1149            let r = Bdf::solve(&p, 0.0, 1.0, &[1.0], &opts).unwrap();
1150            let exact = (-1.0_f64).exp();
1151            let acc = (r.y_final().unwrap()[0] - exact).abs() < 10.0 * rtol;
1152            probes.push(Probe {
1153                name: Box::leak(format!("decay rtol={rtol:e}").into_boxed_str()),
1154                n_accept: r.stats.n_accept,
1155                scipy_ref,
1156                bound,
1157                ok: acc && r.success && r.stats.n_accept < bound,
1158            });
1159        }
1160
1161        // 2) y' = -100 y on [0, 0.1] at rtol=1e-2 (stiff decay).
1162        {
1163            let p = OdeProblem::new(
1164                |_t, y: &[f64], dy: &mut [f64]| dy[0] = -100.0 * y[0],
1165                0.0,
1166                0.1,
1167                vec![1.0],
1168            );
1169            let opts = SolverOptions::default().rtol(1e-2).atol(1e-4);
1170            let r = Bdf::solve(&p, 0.0, 0.1, &[1.0], &opts).unwrap();
1171            probes.push(Probe {
1172                name: "stiff y'=-100y",
1173                n_accept: r.stats.n_accept,
1174                scipy_ref: 12,
1175                bound: 50,
1176                ok: r.success && r.stats.n_accept < 50,
1177            });
1178        }
1179
1180        // 3) 2-state augmented system (the canary regression).
1181        {
1182            let p = OdeProblem::new(
1183                |_t, y: &[f64], dy: &mut [f64]| {
1184                    dy[0] = -0.5 * y[0];
1185                    dy[1] = -0.5 * y[1] - y[0];
1186                },
1187                0.0,
1188                2.0,
1189                vec![1.0, 0.0],
1190            );
1191            let opts = SolverOptions::default().rtol(1e-4).atol(1e-6);
1192            let r = Bdf::solve(&p, 0.0, 2.0, &[1.0, 0.0], &opts).unwrap();
1193            probes.push(Probe {
1194                name: "2-state augmented",
1195                n_accept: r.stats.n_accept,
1196                scipy_ref: 21,
1197                bound: 100,
1198                ok: r.success && r.stats.n_accept < 100,
1199            });
1200        }
1201
1202        // 4) Van der Pol μ=10, t in [0, 20].
1203        {
1204            let mu = 10.0_f64;
1205            let p = OdeProblem::new(
1206                move |_t, y: &[f64], dy: &mut [f64]| {
1207                    dy[0] = y[1];
1208                    dy[1] = mu * (1.0 - y[0] * y[0]) * y[1] - y[0];
1209                },
1210                0.0,
1211                20.0,
1212                vec![2.0, 0.0],
1213            );
1214            let opts = SolverOptions::default().rtol(1e-2).atol(1e-4);
1215            let r = Bdf::solve(&p, 0.0, 20.0, &[2.0, 0.0], &opts).unwrap();
1216            probes.push(Probe {
1217                name: "VdP μ=10",
1218                n_accept: r.stats.n_accept,
1219                scipy_ref: 80,
1220                bound: 400,
1221                ok: r.success && r.stats.n_accept < 400,
1222            });
1223        }
1224
1225        // Pretty-print the table and fail on any out-of-bound row.
1226        eprintln!("\n  problem                | n_accept |  scipy  |  bound  |  status");
1227        eprintln!("  -----------------------+----------+---------+---------+--------");
1228        for p in &probes {
1229            eprintln!(
1230                "  {:<22} | {:>8} | {:>7} | {:>7} | {}",
1231                p.name,
1232                p.n_accept,
1233                p.scipy_ref,
1234                p.bound,
1235                if p.ok { "ok" } else { "OVER" },
1236            );
1237        }
1238        assert!(
1239            probes.iter().all(|p| p.ok),
1240            "one or more BDF probes exceeded bound or failed accuracy"
1241        );
1242    }
1243}