Skip to main content

sidereon_core/astro/math/
least_squares.rs

1//! Generic weighted least-squares substrate.
2//!
3//! Domain-free numerical building blocks for nonlinear least-squares fitting:
4//! a forward-difference Jacobian and a trust-region (trf-style) Gauss-Newton
5//! solver. Nothing here knows about GNSS, orbits, or any physical units; the
6//! caller supplies a residual closure `r: R^n -> R^m` and optional diagonal
7//! weights.
8//!
9//! Two distinct numerical regimes live in this module:
10//!
11//! - The residual evaluation and the finite-difference Jacobian are pure
12//!   `f64` arithmetic plus libm `exp`/etc. inside the caller's closure. Their
13//!   operation order is fixed and reproducible, so they reproduce a reference
14//!   implementation (e.g. scipy `approx_derivative`) bit-for-bit when the same
15//!   recipe and the same libm are used.
16//! - The solver's trust-region step solves a linear subproblem via a matrix
17//!   factorization. That factorization goes through dense linear algebra whose
18//!   last bits depend on the BLAS/LAPACK backend, so the converged solution is
19//!   reproducible only to a tight tolerance, not bit-for-bit. The owned
20//!   [`TrustRegionSolve::OwnedGaussianFirstTie`] variant replaces only the
21//!   factorization with a fixed-order scalar kernel; the normal-matrix /
22//!   gradient / norm reductions that build the subproblem stay on nalgebra, so
23//!   its cross-platform bit guarantee is scoped to the factorization (see that
24//!   variant's docs).
25//!
26//! Keeping the finite-difference primitive separate from the linear-algebra
27//! step lets callers assert the former to the bit while treating the latter as
28//! a tolerance-bound agreement.
29//!
30//! # Relationship to the `trust-region-least-squares` crate
31//!
32//! The workspace also ships [`trust-region-least-squares`], a standalone,
33//! publishable solver that reproduces SciPy's trust-region-reflective
34//! `least_squares` (its dense unbounded `n = 3`, linear-loss, 2-point-Jacobian
35//! path) bit-for-bit, via an SVD-based iteration with an injectable SVD/BLAS
36//! seam. This module is a *different* algorithm: a Levenberg-damped Gauss-Newton
37//! trust region with no SVD and no reflection, tuned for the GNSS estimation
38//! stack and unconstrained in dimension. The two are deliberately not unified.
39//!
40//! The reason is bit-exactness, not convenience. The only callers of
41//! [`solve_trf`]/[`solve_trf_with`] are the SPP solve and the reduced-orbit fit;
42//! their converged outputs are pinned to bit-exact goldens (SPP's
43//! geometry/clock reference is Skyfield; the reduced-orbit fit is pinned to an
44//! independent Astropy/SciPy oracle arc). RTK and PPP do not use this solver.
45//! Those goldens were produced by *this* iteration's exact floating-point
46//! trajectory; repointing the callers at the SVD/TRF crate's iteration would
47//! change the converged values and break the goldens, because the two solvers
48//! take different steps even when both converge. So there is no
49//! behavior-preserving merge: the public scipy-compatible solver is the crate,
50//! the GNSS-tuned solver is this module, and unifying them is a
51//! golden-rebaselining decision reserved for the repo owner rather than
52//! something to force here.
53//!
54//! [`trust-region-least-squares`]: https://docs.rs/trust-region-least-squares
55
56use nalgebra::{DMatrix, DVector};
57
58/// Relative finite-difference step for a 2-point (forward) scheme: `sqrt(eps)`
59/// for `f64`, i.e. `2^-26`. This matches scipy's `_eps_for_method` choice for
60/// the `"2-point"` method.
61pub const FD_REL_STEP_2POINT: f64 = 1.4901161193847656e-8; // 0x1.0p-26 == sqrt(2^-52)
62
63/// Default first-order optimality tolerance (scipy `least_squares` `gtol`).
64const TRF_DEFAULT_GTOL: f64 = 1e-10;
65/// Default relative-cost-reduction tolerance (scipy `least_squares` `ftol`).
66const TRF_DEFAULT_FTOL: f64 = 1e-8;
67/// Default relative-step tolerance (scipy `least_squares` `xtol`).
68const TRF_DEFAULT_XTOL: f64 = 1e-8;
69/// Default maximum residual evaluations.
70const TRF_DEFAULT_MAX_NFEV: usize = 300;
71/// Initial Levenberg damping as a fraction of the largest Gauss-Newton normal
72/// diagonal: `mu0 = TRF_INITIAL_DAMPING_SCALE * max_i (J^T J)_ii`.
73const TRF_INITIAL_DAMPING_SCALE: f64 = 1e-3;
74
75/// Per-parameter step pieces for a single forward-difference column, recorded
76/// in evaluation order so they can be inspected or compared against a
77/// reference trace.
78#[derive(Debug, Clone, PartialEq)]
79pub struct FdStep {
80    /// Index of the perturbed parameter.
81    pub param_index: usize,
82    /// `+1.0` if `x0[i] >= 0`, else `-1.0` (the `(x0>=0)*2 - 1` convention;
83    /// note `x0[i] == 0` yields `+1.0`).
84    pub sign_x0: f64,
85    /// Nominal step `rel_step * sign_x0 * max(1, |x0[i]|)`.
86    pub h: f64,
87    /// Effective step after rounding: `(x0[i] + h) - x0[i]`. This is the
88    /// denominator actually used for the column, recomputed rather than reused
89    /// from `h`.
90    pub dx: f64,
91    /// The perturbed parameter vector (only component `i` bumped by `h`).
92    pub x_perturbed: DVector<f64>,
93}
94
95/// Compute the per-parameter forward-difference step pieces for `x0`.
96///
97/// `sign_x0[i] = +1 if x0[i] >= 0 else -1`, `h[i] = rel_step * sign_x0[i] *
98/// max(1, |x0[i]|)`, and the effective step `dx[i] = (x0[i] + h[i]) - x0[i]`.
99/// The post-rounding `dx` is the value used as the column denominator.
100pub fn fd_steps(x0: &DVector<f64>, rel_step: f64) -> Result<Vec<FdStep>, SolveError> {
101    let rel_step = crate::validate::positive_step(rel_step, "rel_step").map_err(map_field_error)?;
102    fd_steps_checked(x0, rel_step)
103}
104
105fn fd_steps_checked(x0: &DVector<f64>, rel_step: f64) -> Result<Vec<FdStep>, SolveError> {
106    validate_nonempty_vector(x0, "parameters")?;
107    validate_vector(x0, "parameters")?;
108    let steps = fd_steps_unchecked(x0, rel_step);
109    for step in &steps {
110        validate_value(step.h, "fd_step")?;
111        validate_value(step.dx, "fd_step")?;
112        if step.dx == 0.0 {
113            return Err(invalid_input("fd_step", "zero"));
114        }
115        validate_vector(&step.x_perturbed, "perturbed parameters")?;
116    }
117    Ok(steps)
118}
119
120fn fd_steps_unchecked(x0: &DVector<f64>, rel_step: f64) -> Vec<FdStep> {
121    (0..x0.len())
122        .map(|i| {
123            let xi = x0[i];
124            let sign_x0 = if xi >= 0.0 { 1.0 } else { -1.0 };
125            let h = rel_step * sign_x0 * xi.abs().max(1.0);
126            let mut x_perturbed = x0.clone();
127            x_perturbed[i] = xi + h;
128            let dx = x_perturbed[i] - xi;
129            FdStep {
130                param_index: i,
131                sign_x0,
132                h,
133                dx,
134                x_perturbed,
135            }
136        })
137        .collect()
138}
139
140/// Forward (2-point) finite-difference Jacobian of `residual` at `x0`, given a
141/// precomputed `f0 = residual(x0)`.
142///
143/// Column `i` is `(residual(x0 + h_i e_i) - f0) / dx_i`, where `h_i` and `dx_i`
144/// come from [`fd_steps`]. The arithmetic is plain `f64` (no fused multiply-add):
145/// each entry is one subtraction followed by one division, matching scipy's
146/// `approx_derivative` operation order.
147///
148/// `f0` is passed in (rather than recomputed) so the caller controls the base
149/// evaluation and so the same `f0` used elsewhere is reused exactly.
150pub fn jacobian_2point<F>(
151    residual: F,
152    x0: &DVector<f64>,
153    f0: &DVector<f64>,
154) -> Result<DMatrix<f64>, SolveError>
155where
156    F: Fn(&DVector<f64>) -> DVector<f64>,
157{
158    jacobian_2point_checked(|x| Ok(residual(x)), x0, f0)
159}
160
161fn jacobian_2point_checked<F>(
162    residual: F,
163    x0: &DVector<f64>,
164    f0: &DVector<f64>,
165) -> Result<DMatrix<f64>, SolveError>
166where
167    F: Fn(&DVector<f64>) -> Result<DVector<f64>, SolveError>,
168{
169    validate_nonempty_vector(x0, "parameters")?;
170    validate_vector(x0, "parameters")?;
171    validate_nonempty_vector(f0, "residual")?;
172    validate_vector(f0, "residual")?;
173    let m = f0.len();
174    let n = x0.len();
175    let steps = fd_steps_checked(x0, FD_REL_STEP_2POINT)?;
176    let mut jac = DMatrix::zeros(m, n);
177    for step in &steps {
178        let f1 = residual(&step.x_perturbed)?;
179        validate_nonempty_vector(&f1, "residual")?;
180        validate_vector(&f1, "residual")?;
181        if f1.len() != m {
182            return Err(invalid_input("residual", "length mismatch"));
183        }
184        let i = step.param_index;
185        for row in 0..m {
186            jac[(row, i)] = (f1[row] - f0[row]) / step.dx;
187        }
188    }
189    validate_matrix(&jac, "jacobian")?;
190    Ok(jac)
191}
192
193/// Termination state of a [`solve_trf`] run, mirroring the scipy
194/// `least_squares` status codes for the conditions this solver detects.
195#[derive(Debug, Clone, Copy, PartialEq, Eq)]
196pub enum Status {
197    /// `||J^T r||_inf` fell below `gtol` (first-order optimality).
198    GradientTolerance,
199    /// The relative cost reduction fell below `ftol`.
200    CostTolerance,
201    /// The relative step size fell below `xtol`.
202    StepTolerance,
203    /// The maximum number of residual evaluations was reached.
204    MaxEvaluations,
205}
206
207/// Stopping tolerances and evaluation budget for [`solve_trf`].
208#[derive(Debug, Clone, Copy)]
209pub struct SolveOptions {
210    /// First-order optimality tolerance on `||J^T r||_inf`.
211    pub gtol: f64,
212    /// Relative-cost-reduction tolerance.
213    pub ftol: f64,
214    /// Relative-step tolerance.
215    pub xtol: f64,
216    /// Maximum number of residual evaluations.
217    pub max_nfev: usize,
218}
219
220impl Default for SolveOptions {
221    fn default() -> Self {
222        // scipy's defaults for these tolerances.
223        Self {
224            gtol: TRF_DEFAULT_GTOL,
225            ftol: TRF_DEFAULT_FTOL,
226            xtol: TRF_DEFAULT_XTOL,
227            max_nfev: TRF_DEFAULT_MAX_NFEV,
228        }
229    }
230}
231
232/// Result of a [`solve_trf`] run.
233#[derive(Debug, Clone)]
234pub struct LeastSquaresReport {
235    /// Converged parameter vector.
236    pub x: DVector<f64>,
237    /// Residual at `x`.
238    pub residual: DVector<f64>,
239    /// Cost `0.5 * dot(r, r)` at `x`.
240    pub cost: f64,
241    /// Finite-difference Jacobian at `x`.
242    pub jacobian: DMatrix<f64>,
243    /// First-order optimality `||J^T r||_inf` at `x`.
244    pub optimality_inf: f64,
245    /// Number of accepted iterations.
246    pub iterations: usize,
247    /// Why the solve stopped.
248    pub status: Status,
249}
250
251/// How the trust-region subproblem `(J^T J + mu I) dx = -J^T r` is solved at
252/// each iterate. Both are dense small-system solves; they differ only in the
253/// factorization, which is the one place the converged bits move *between the
254/// two variants*. The surrounding Gauss-Newton reductions that build the
255/// subproblem (the normal matrix `J^T J`, the gradient `J^T r`, the cost dot
256/// product, the step/state norms, the optimality `amax`) are shared, identical
257/// for both variants, and computed with nalgebra's dense algebra.
258#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
259pub enum TrustRegionSolve {
260    /// nalgebra LU factorization. This is the legacy SPP path: its last bit is
261    /// BLAS/LAPACK-backend dependent, so its converged solution is reproducible
262    /// only to a tight tolerance, not bit-for-bit across machines.
263    #[default]
264    NalgebraLu,
265    /// Owned deterministic Gaussian elimination with partial pivoting and a
266    /// fixed reduction order ([`crate::astro::math::linear::solve_linear_first_tie`])
267    /// for the dense trust-region subproblem `(J^T J + mu I) dx = -J^T r`: no
268    /// nalgebra LU and no black-box BLAS in that factorization, so the
269    /// factorization step is reproducible to the bit on any platform.
270    ///
271    /// Determinism scope: this variant owns ONLY the subproblem factorization.
272    /// The reductions that FORM the subproblem each iterate -- the normal
273    /// matrix `J^T J` and gradient `J^T r` (nalgebra GEMM/GEMV), the cost dot
274    /// product, and the step/state norms -- still go through nalgebra's dense
275    /// `DMatrix`/`DVector` algebra, which (nalgebra 0.33 without the BLAS
276    /// feature) dispatches to `matrixmultiply`'s CPU-tuned FMA kernels. Those
277    /// reductions are therefore NOT bit-portable across CPU targets. The
278    /// converged solution is bit-reproducible run-to-run for a fixed build, so
279    /// its frozen-bits golden pins THAT build's output; the cross-platform bit
280    /// guarantee is scoped to the factorization, not the whole solve. Owning the
281    /// assembly reductions too (fixed-order scalar `J^T J`/`J^T r`/dot/norm)
282    /// would make the entire solve portable, but is a separate,
283    /// behavior-changing step that would re-pin the owned bits.
284    OwnedGaussianFirstTie,
285}
286
287/// Error from [`solve_trf`].
288#[derive(Debug, Clone, thiserror::Error)]
289pub enum SolveError {
290    /// The Jacobian is rank-deficient / the trust-region subproblem has no
291    /// usable descent direction (degenerate geometry).
292    #[error("singular or rank-deficient Jacobian: no usable descent direction")]
293    SingularJacobian,
294    /// A boundary input or derived least-squares quantity was malformed.
295    #[error("invalid least-squares {field}: {reason}")]
296    InvalidInput {
297        field: &'static str,
298        reason: &'static str,
299    },
300}
301
302/// Cost `0.5 * dot(r, r)`, a plain fold of `f64` operations.
303pub fn cost(residual: &DVector<f64>) -> Result<f64, SolveError> {
304    validate_nonempty_vector(residual, "residual")?;
305    validate_vector(residual, "residual")?;
306    validate_value(0.5 * residual.dot(residual), "cost")
307}
308
309/// A nonlinear least-squares problem: a residual closure, optional diagonal
310/// weights, and a starting point. The weighted form scales both the residual
311/// and the Jacobian rows by `sqrt(weight)`; with all weights `1` it reduces to
312/// the ordinary (unweighted) least-squares problem.
313pub struct LeastSquaresProblem<F> {
314    residual: F,
315    /// `sqrt` of the diagonal weights, or `None` for the identity weighting.
316    sqrt_weights: Option<DVector<f64>>,
317    x0: DVector<f64>,
318}
319
320impl<F> LeastSquaresProblem<F>
321where
322    F: Fn(&DVector<f64>) -> DVector<f64>,
323{
324    /// An unweighted problem (identity weighting).
325    pub fn new(residual: F, x0: DVector<f64>) -> Self {
326        Self {
327            residual,
328            sqrt_weights: None,
329            x0,
330        }
331    }
332
333    /// A problem with diagonal weights `W`; residual and Jacobian rows are
334    /// scaled by `sqrt(W)`.
335    pub fn with_weights(residual: F, x0: DVector<f64>, weights: DVector<f64>) -> Self {
336        let sqrt_weights = weights.map(f64::sqrt);
337        Self {
338            residual,
339            sqrt_weights: Some(sqrt_weights),
340            x0,
341        }
342    }
343
344    /// Weighted residual at `x`.
345    fn weighted_residual(&self, x: &DVector<f64>) -> Result<DVector<f64>, SolveError> {
346        validate_nonempty_vector(x, "parameters")?;
347        validate_vector(x, "parameters")?;
348        let r = (self.residual)(x);
349        validate_nonempty_vector(&r, "residual")?;
350        validate_vector(&r, "residual")?;
351        match &self.sqrt_weights {
352            Some(sw) => {
353                validate_nonempty_vector(sw, "weights")?;
354                validate_vector(sw, "weights")?;
355                if sw.len() != r.len() {
356                    return Err(invalid_input("weights", "length mismatch"));
357                }
358                let weighted = r.component_mul(sw);
359                validate_vector(&weighted, "weighted residual")?;
360                Ok(weighted)
361            }
362            None => Ok(r),
363        }
364    }
365}
366
367/// Trust-region (trf-style) Gauss-Newton solve.
368///
369/// At each iterate the weighted residual and its forward-difference Jacobian
370/// are formed, then a Levenberg-damped Gauss-Newton step `(J^T J + mu I) dx =
371/// -J^T r` is taken inside a trust region; the damping `mu` is grown on a
372/// rejected step and shrunk on an accepted one. The linear solve uses a dense
373/// factorization, so the converged solution is reproducible to a tight
374/// tolerance rather than to the bit.
375///
376/// Returns [`SolveError::SingularJacobian`] if the normal-equation system
377/// cannot be solved (degenerate geometry).
378///
379/// Uses the legacy [`TrustRegionSolve::NalgebraLu`] subproblem solver; for the
380/// owned deterministic factorization call [`solve_trf_with`].
381pub fn solve_trf<F>(
382    problem: &LeastSquaresProblem<F>,
383    opts: &SolveOptions,
384) -> Result<LeastSquaresReport, SolveError>
385where
386    F: Fn(&DVector<f64>) -> DVector<f64>,
387{
388    solve_trf_with(problem, opts, TrustRegionSolve::NalgebraLu)
389}
390
391/// Solve the trust-region subproblem `(J^T J + mu I) dx = rhs` with the
392/// selected factorization. The two arms produce the same algebra; they differ
393/// only in the dense solve's operation order (see [`TrustRegionSolve`]).
394fn solve_subproblem(
395    lhs: &DMatrix<f64>,
396    rhs: &DVector<f64>,
397    linear_solve: TrustRegionSolve,
398) -> Option<DVector<f64>> {
399    match linear_solve {
400        TrustRegionSolve::NalgebraLu => lhs.clone().lu().solve(rhs),
401        TrustRegionSolve::OwnedGaussianFirstTie => {
402            let n = rhs.len();
403            let a: Vec<Vec<f64>> = (0..n)
404                .map(|i| (0..n).map(|j| lhs[(i, j)]).collect())
405                .collect();
406            let b: Vec<f64> = rhs.iter().copied().collect();
407            crate::astro::math::linear::solve_linear_first_tie(&a, &b).map(DVector::from_vec)
408        }
409    }
410}
411
412/// [`solve_trf`] with an explicit choice of the trust-region subproblem solver.
413/// `NalgebraLu` reproduces the legacy SPP path; `OwnedGaussianFirstTie` is the
414/// owned deterministic kernel pinned to its own frozen-bits goldens.
415pub fn solve_trf_with<F>(
416    problem: &LeastSquaresProblem<F>,
417    opts: &SolveOptions,
418    linear_solve: TrustRegionSolve,
419) -> Result<LeastSquaresReport, SolveError>
420where
421    F: Fn(&DVector<f64>) -> DVector<f64>,
422{
423    validate_options(opts)?;
424    let n = problem.x0.len();
425
426    let mut x = problem.x0.clone();
427    validate_nonempty_vector(&x, "initial parameters")?;
428    validate_vector(&x, "initial parameters")?;
429    let mut r = problem.weighted_residual(&x)?;
430    let mut f0 = r.clone();
431    let mut jac = jacobian_2point_checked(|p| problem.weighted_residual(p), &x, &f0)?;
432    let mut nfev = 1usize; // the f0 above
433    let mut cur_cost = cost(&r)?;
434
435    // Initial Levenberg damping scaled to the Gauss-Newton normal matrix.
436    let jtj0 = jac.transpose() * &jac;
437    validate_matrix(&jtj0, "normal matrix")?;
438    let mut mu = TRF_INITIAL_DAMPING_SCALE
439        * (0..n)
440            .map(|i| jtj0[(i, i)])
441            .fold(0.0_f64, f64::max)
442            .max(1.0);
443
444    let mut iterations = 0usize;
445
446    loop {
447        let jt = jac.transpose();
448        let grad = &jt * &r;
449        validate_vector(&grad, "gradient")?;
450        let optimality_inf = validate_value(grad.amax(), "optimality")?;
451
452        if optimality_inf < opts.gtol {
453            return finish(x, r, cur_cost, jac, iterations, Status::GradientTolerance);
454        }
455        if nfev >= opts.max_nfev {
456            return finish(x, r, cur_cost, jac, iterations, Status::MaxEvaluations);
457        }
458
459        let jtj = &jt * &jac;
460        validate_matrix(&jtj, "normal matrix")?;
461
462        // Levenberg-damped Gauss-Newton subproblem.
463        let mut accepted = false;
464        for _ in 0..30 {
465            let mut lhs = jtj.clone();
466            for i in 0..n {
467                lhs[(i, i)] += mu;
468            }
469            let rhs = -&grad;
470            validate_matrix(&lhs, "subproblem matrix")?;
471            validate_vector(&rhs, "subproblem rhs")?;
472            let step = match solve_subproblem(&lhs, &rhs, linear_solve) {
473                Some(s) => s,
474                None => return Err(SolveError::SingularJacobian),
475            };
476            validate_vector(&step, "step")?;
477
478            let x_trial = &x + &step;
479            let r_trial = problem.weighted_residual(&x_trial)?;
480            nfev += 1;
481            let cost_trial = cost(&r_trial)?;
482
483            if cost_trial < cur_cost {
484                // Accept; relative-cost and relative-step stopping checks.
485                let cost_reduction = (cur_cost - cost_trial) / cur_cost.max(f64::MIN_POSITIVE);
486                let step_norm = step.norm();
487                let x_norm = x.norm();
488                let rel_step = step_norm / x_norm.max(f64::MIN_POSITIVE);
489
490                x = x_trial;
491                r = r_trial;
492                cur_cost = cost_trial;
493                f0 = r.clone();
494                jac = jacobian_2point_checked(|p| problem.weighted_residual(p), &x, &f0)?;
495                nfev += n; // FD probes for the new Jacobian
496                iterations += 1;
497                mu *= 0.5;
498                accepted = true;
499
500                if cost_reduction < opts.ftol {
501                    return finish(x, r, cur_cost, jac, iterations, Status::CostTolerance);
502                }
503                if rel_step < opts.xtol {
504                    return finish(x, r, cur_cost, jac, iterations, Status::StepTolerance);
505                }
506                break;
507            } else {
508                // Reject: grow damping and retry the subproblem.
509                mu *= 2.0;
510            }
511        }
512
513        if !accepted {
514            // Could not find an improving step within the damping sweep.
515            return finish(x, r, cur_cost, jac, iterations, Status::StepTolerance);
516        }
517    }
518}
519
520fn finish(
521    x: DVector<f64>,
522    residual: DVector<f64>,
523    cost_value: f64,
524    jacobian: DMatrix<f64>,
525    iterations: usize,
526    status: Status,
527) -> Result<LeastSquaresReport, SolveError> {
528    validate_nonempty_vector(&x, "solution")?;
529    validate_vector(&x, "solution")?;
530    validate_nonempty_vector(&residual, "residual")?;
531    validate_vector(&residual, "residual")?;
532    validate_value(cost_value, "cost")?;
533    validate_matrix(&jacobian, "jacobian")?;
534    let optimality_inf = validate_value((jacobian.transpose() * &residual).amax(), "optimality")?;
535    Ok(LeastSquaresReport {
536        x,
537        residual,
538        cost: cost_value,
539        jacobian,
540        optimality_inf,
541        iterations,
542        status,
543    })
544}
545
546fn validate_value(value: f64, field: &'static str) -> Result<f64, SolveError> {
547    crate::validate::finite(value, field).map_err(map_field_error)
548}
549
550fn validate_options(opts: &SolveOptions) -> Result<(), SolveError> {
551    crate::validate::positive_step(opts.gtol, "gtol").map_err(map_field_error)?;
552    crate::validate::positive_step(opts.ftol, "ftol").map_err(map_field_error)?;
553    crate::validate::positive_step(opts.xtol, "xtol").map_err(map_field_error)?;
554    if opts.max_nfev == 0 {
555        return Err(invalid_input("max_nfev", "not positive"));
556    }
557    Ok(())
558}
559
560fn validate_nonempty_vector(vector: &DVector<f64>, field: &'static str) -> Result<(), SolveError> {
561    if vector.is_empty() {
562        Err(invalid_input(field, "empty"))
563    } else {
564        Ok(())
565    }
566}
567
568fn validate_vector(vector: &DVector<f64>, field: &'static str) -> Result<(), SolveError> {
569    crate::validate::finite_slice(vector.as_slice(), field).map_err(map_field_error)
570}
571
572fn validate_matrix(matrix: &DMatrix<f64>, field: &'static str) -> Result<(), SolveError> {
573    crate::validate::finite_slice(matrix.as_slice(), field).map_err(map_field_error)
574}
575
576fn map_field_error(error: crate::validate::FieldError) -> SolveError {
577    invalid_input(error.field(), error.reason())
578}
579
580fn invalid_input(field: &'static str, reason: &'static str) -> SolveError {
581    SolveError::InvalidInput { field, reason }
582}
583
584#[cfg(test)]
585mod tests {
586    use super::*;
587
588    #[test]
589    fn fd_rel_step_is_sqrt_eps() {
590        assert_eq!(FD_REL_STEP_2POINT, (2.0_f64.powi(-52)).sqrt());
591        assert_eq!(FD_REL_STEP_2POINT, 2.0_f64.powi(-26));
592    }
593
594    #[test]
595    fn fd_step_sign_convention() {
596        let x0 = DVector::from_vec(vec![5.0, -2.0, 0.0]);
597        let steps = fd_steps(&x0, FD_REL_STEP_2POINT).unwrap();
598        assert_eq!(steps[0].sign_x0, 1.0);
599        assert_eq!(steps[1].sign_x0, -1.0);
600        assert_eq!(steps[2].sign_x0, 1.0); // x == 0 -> +1
601    }
602
603    #[test]
604    fn fd_steps_rejects_zero_relative_step() {
605        let x0 = DVector::from_vec(vec![1.0]);
606        assert_invalid_field(fd_steps(&x0, 0.0).unwrap_err(), "rel_step");
607    }
608
609    #[test]
610    fn fd_steps_rejects_nonfinite_parameters() {
611        let x0 = DVector::from_vec(vec![1.0, f64::NAN]);
612        assert_invalid_field(fd_steps(&x0, FD_REL_STEP_2POINT).unwrap_err(), "parameters");
613    }
614
615    #[test]
616    fn jacobian_rejects_residual_length_mismatch() {
617        let x0 = DVector::from_vec(vec![1.0, 2.0]);
618        let f0 = DVector::from_vec(vec![1.0, 2.0]);
619        let residual = |_: &DVector<f64>| DVector::from_vec(vec![1.0]);
620        assert_invalid_field(jacobian_2point(residual, &x0, &f0).unwrap_err(), "residual");
621    }
622
623    #[test]
624    fn cost_rejects_nonfinite_residual() {
625        assert_invalid_field(
626            cost(&DVector::from_vec(vec![1.0, f64::INFINITY])).unwrap_err(),
627            "residual",
628        );
629    }
630
631    #[test]
632    fn exp_fit_converges() {
633        // a*exp(b*t) + c with a known minimum near the generated data.
634        let t = vec![0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0];
635        let y = vec![
636            3.0123, 2.2083, 1.6889, 1.3713, 1.0903, 0.9302, 0.8104, 0.6303,
637        ];
638        let tt = t.clone();
639        let yy = y.clone();
640        let residual = move |p: &DVector<f64>| {
641            let (a, b, c) = (p[0], p[1], p[2]);
642            DVector::from_iterator(
643                tt.len(),
644                tt.iter()
645                    .zip(&yy)
646                    .map(|(&tk, &yk)| a * (b * tk).exp() + c - yk),
647            )
648        };
649        let problem = LeastSquaresProblem::new(residual, DVector::from_vec(vec![5.0, -2.0, 2.0]));
650        let report = solve_trf(&problem, &SolveOptions::default()).unwrap();
651        assert!(report.cost < 1.0, "cost did not reduce: {}", report.cost);
652    }
653
654    #[test]
655    fn solve_trf_rejects_nonfinite_initial_residual() {
656        fn residual(_: &DVector<f64>) -> DVector<f64> {
657            DVector::from_element(1, f64::NAN)
658        }
659        let problem = LeastSquaresProblem::new(residual, DVector::from_element(1, 0.0));
660        assert_invalid_field(
661            solve_trf(&problem, &SolveOptions::default()).unwrap_err(),
662            "residual",
663        );
664    }
665
666    #[test]
667    fn solve_trf_rejects_nonfinite_initial_cost() {
668        fn residual(_: &DVector<f64>) -> DVector<f64> {
669            DVector::from_element(1, f64::MAX)
670        }
671        let problem = LeastSquaresProblem::new(residual, DVector::from_element(1, 0.0));
672        assert_invalid_field(
673            solve_trf(&problem, &SolveOptions::default()).unwrap_err(),
674            "cost",
675        );
676    }
677
678    #[test]
679    fn solve_trf_rejects_nonfinite_trial_residual_instead_of_converging() {
680        use std::cell::Cell;
681
682        let calls = Cell::new(0usize);
683        let residual = move |p: &DVector<f64>| {
684            let call = calls.get();
685            calls.set(call + 1);
686            if call >= 2 {
687                DVector::from_element(1, f64::NAN)
688            } else {
689                DVector::from_element(1, p[0])
690            }
691        };
692        let problem = LeastSquaresProblem::new(residual, DVector::from_element(1, 1.0));
693        assert_invalid_field(
694            solve_trf(&problem, &SolveOptions::default()).unwrap_err(),
695            "residual",
696        );
697    }
698
699    #[test]
700    fn solve_trf_rejects_invalid_options() {
701        fn residual(p: &DVector<f64>) -> DVector<f64> {
702            DVector::from_element(1, p[0])
703        }
704        let problem = LeastSquaresProblem::new(residual, DVector::from_element(1, 1.0));
705        let opts = SolveOptions {
706            gtol: f64::NAN,
707            ..SolveOptions::default()
708        };
709        assert_invalid_field(solve_trf(&problem, &opts).unwrap_err(), "gtol");
710
711        let opts = SolveOptions {
712            max_nfev: 0,
713            ..SolveOptions::default()
714        };
715        assert_invalid_field(solve_trf(&problem, &opts).unwrap_err(), "max_nfev");
716    }
717
718    #[test]
719    fn solve_trf_rejects_weight_residual_dimension_mismatch() {
720        fn residual(_: &DVector<f64>) -> DVector<f64> {
721            DVector::from_vec(vec![1.0, 2.0])
722        }
723        let problem = LeastSquaresProblem::with_weights(
724            residual,
725            DVector::from_element(1, 0.0),
726            DVector::from_vec(vec![1.0]),
727        );
728        assert_invalid_field(
729            solve_trf(&problem, &SolveOptions::default()).unwrap_err(),
730            "weights",
731        );
732    }
733
734    fn assert_invalid_field(error: SolveError, expected: &'static str) {
735        match error {
736            SolveError::InvalidInput { field, .. } => assert_eq!(field, expected),
737            other => panic!("expected invalid input for {expected}, got {other:?}"),
738        }
739    }
740
741    /// The exp-fit residual used by the owned-solver tests.
742    fn exp_fit_problem() -> LeastSquaresProblem<impl Fn(&DVector<f64>) -> DVector<f64>> {
743        let t = [0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0];
744        let y = [
745            3.0123, 2.2083, 1.6889, 1.3713, 1.0903, 0.9302, 0.8104, 0.6303,
746        ];
747        let residual = move |p: &DVector<f64>| {
748            let (a, b, c) = (p[0], p[1], p[2]);
749            DVector::from_iterator(
750                t.len(),
751                t.iter()
752                    .zip(&y)
753                    .map(|(&tk, &yk)| a * (b * tk).exp() + c - yk),
754            )
755        };
756        LeastSquaresProblem::new(residual, DVector::from_vec(vec![5.0, -2.0, 2.0]))
757    }
758
759    /// The owned deterministic subproblem solver converges on the exp-fit
760    /// problem and reproduces its solution bit-for-bit run to run. The pinned
761    /// bits are the owned kernel's own frozen-bits golden (a different
762    /// factorization than the legacy nalgebra path, so its own value). The
763    /// owned kernel owns only the subproblem factorization; the surrounding
764    /// `J^T J`/`J^T r`/norm reductions are shared nalgebra dense algebra, so
765    /// these bits are this build's reproducible output (the run-to-run check
766    /// below), not a cross-platform constant.
767    #[test]
768    fn owned_trf_converges_to_frozen_bits() {
769        let problem = exp_fit_problem();
770        let report = solve_trf_with(
771            &problem,
772            &SolveOptions::default(),
773            TrustRegionSolve::OwnedGaussianFirstTie,
774        )
775        .unwrap();
776        assert!(
777            report.cost < 1.0,
778            "owned cost did not reduce: {}",
779            report.cost
780        );
781        assert_eq!(report.x[0].to_bits(), 0x4003c3674cdfadef);
782        assert_eq!(report.x[1].to_bits(), 0xbfe799e0d1929220);
783        assert_eq!(report.x[2].to_bits(), 0x3fe0d5c96d9d3b35);
784
785        // Determinism: a second run is bit-identical.
786        let again = solve_trf_with(
787            &problem,
788            &SolveOptions::default(),
789            TrustRegionSolve::OwnedGaussianFirstTie,
790        )
791        .unwrap();
792        for i in 0..3 {
793            assert_eq!(report.x[i].to_bits(), again.x[i].to_bits());
794        }
795    }
796}