nereids-fitting 0.2.1

//! Levenberg-Marquardt least-squares optimizer.
//!
//! Minimizes χ² = Σᵢ [(y_obs - y_model)² / σᵢ²] by iteratively solving:
//!
//!   (JᵀWJ + λ·diag(JᵀWJ)) · δ = JᵀW·r
//!
//! where J is the Jacobian, W = diag(1/σ²), r = y_obs - y_model,
//! and λ is the damping parameter.
//!
//! ## SAMMY Reference
//! - `fit/` module, manual Sec. IV (Bayes equations / generalized least-squares)

use nereids_core::constants::{LM_DIAGONAL_FLOOR, PIVOT_FLOOR};

use crate::error::FittingError;
use crate::parameters::ParameterSet;

/// Row-major flat matrix for cache-friendly storage.
///
/// Replaces `Vec<Vec<f64>>` to collapse ~N separate heap allocations into 1
/// and improve cache locality for JtWJ assembly.  Access: `data[i * ncols + j]`.
#[derive(Debug, Clone)]
pub struct FlatMatrix {
    /// Flat row-major storage: `data[i * ncols + j]` = element at row i, col j.
    pub data: Vec<f64>,
    /// Number of rows.
    pub nrows: usize,
    /// Number of columns.
    pub ncols: usize,
}

impl FlatMatrix {
    /// Create a new zero-filled matrix with the given dimensions.
    pub fn zeros(nrows: usize, ncols: usize) -> Self {
        let len = nrows
            .checked_mul(ncols)
            .expect("FlatMatrix dimensions overflow usize");
        Self {
            data: vec![0.0; len],
            nrows,
            ncols,
        }
    }

    /// Access element at (row, col) immutably.
    #[inline(always)]
    pub fn get(&self, row: usize, col: usize) -> f64 {
        debug_assert!(row < self.nrows && col < self.ncols);
        self.data[row * self.ncols + col]
    }

    /// Access element at (row, col) mutably.
    #[inline(always)]
    pub fn get_mut(&mut self, row: usize, col: usize) -> &mut f64 {
        debug_assert!(row < self.nrows && col < self.ncols);
        &mut self.data[row * self.ncols + col]
    }
}

/// #125.4: Maximum damping parameter before the optimizer gives up.
///
/// When λ exceeds this threshold, the optimizer is stuck in a region where no
/// step improves chi-squared.  Breaking out avoids wasting iterations.
const LAMBDA_BREAKOUT: f64 = 1e16;

/// Configuration for the LM optimizer.
#[derive(Debug, Clone)]
pub struct LmConfig {
    /// Maximum number of iterations.
    pub max_iter: usize,
    /// Initial damping parameter λ.
    pub lambda_init: f64,
    /// Factor to increase λ on rejected step.
    pub lambda_up: f64,
    /// Factor to decrease λ on accepted step.
    pub lambda_down: f64,
    /// Convergence tolerance on relative χ² change.
    pub tol_chi2: f64,
    /// Convergence tolerance on relative parameter change.
    pub tol_param: f64,
    /// Step size for finite-difference Jacobian.
    pub fd_step: f64,
    /// Whether to compute the covariance matrix (and uncertainties) after
    /// convergence.  This requires an extra Jacobian evaluation + matrix
    /// inversion at the final parameters.  Set to `false` for per-pixel
    /// spatial mapping where only densities are needed.
    ///
    /// Default: `true`.
    pub compute_covariance: bool,
}

impl Default for LmConfig {
    fn default() -> Self {
        Self {
            max_iter: 200,
            lambda_init: 1e-3,
            lambda_up: 10.0,
            lambda_down: 0.1,
            tol_chi2: 1e-8,
            tol_param: 1e-8,
            fd_step: 1e-6,
            compute_covariance: true,
        }
    }
}

/// Result of a Levenberg-Marquardt fit.
#[derive(Debug, Clone)]
pub struct LmResult {
    /// Final chi-squared value.
    pub chi_squared: f64,
    /// Reduced chi-squared (χ²/ν where ν = n_data - n_params).
    pub reduced_chi_squared: f64,
    /// Number of iterations taken.
    pub iterations: usize,
    /// Whether the fit converged.
    pub converged: bool,
    /// Final parameter values (all parameters, including fixed).
    pub params: Vec<f64>,
    /// Covariance matrix of free parameters (n_free × n_free), if available.
    pub covariance: Option<FlatMatrix>,
    /// Standard errors of free parameters (diagonal of covariance).
    pub uncertainties: Option<Vec<f64>>,
}

/// A model function that can be fitted.
///
/// Given parameter values (all params including fixed), computes
/// the model prediction at each data point.
pub trait FitModel {
    /// Evaluate the model for the given parameters.
    ///
    /// On success, returns a vector of model predictions with the same
    /// length as the data being fitted. On failure, returns a
    /// [`FittingError`] indicating that the model could not be evaluated
    /// (e.g. a broadening or physics error).
    ///
    /// **Optimizer semantics:** during Levenberg-Marquardt trial steps,
    /// an `Err` is treated as a failed step (increase λ / backtrack).
    /// At the initial point or post-convergence, `Err` is propagated.
    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError>;

    /// Optionally provide an analytical Jacobian.
    ///
    /// `free_param_indices`: indices (into `params`) of the free parameters,
    /// in the same order as the Jacobian columns.
    ///
    /// `y_current`: current model output, i.e. `self.evaluate(params)`.
    /// Provided so implementations can compute J analytically from T without
    /// an extra `evaluate` call.
    ///
    /// Returns `Some(J)` where `J.get(i, j) = ∂model[i]/∂params[free_param_indices[j]]`.
    /// The matrix has `y_current.len()` rows and `free_param_indices.len()` columns.
    /// Return `None` to fall back to finite-difference Jacobian (the default).
    fn analytical_jacobian(
        &self,
        _params: &[f64],
        _free_param_indices: &[usize],
        _y_current: &[f64],
    ) -> Option<FlatMatrix> {
        None
    }
}

/// Blanket implementation: shared references to any `FitModel` also implement
/// `FitModel`, forwarding all calls to the underlying implementation.
///
/// This enables `NormalizedTransmissionModel<&dyn FitModel>` to work when the
/// inner model is borrowed (e.g. in `fit_spectrum` where the inner model is a
/// local variable wrapped conditionally).
impl<M: FitModel + ?Sized> FitModel for &M {
    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
        (**self).evaluate(params)
    }

    fn analytical_jacobian(
        &self,
        params: &[f64],
        free_param_indices: &[usize],
        y_current: &[f64],
    ) -> Option<FlatMatrix> {
        (**self).analytical_jacobian(params, free_param_indices, y_current)
    }
}

/// Compute weighted chi-squared: Σ [(y_obs - y_model)² / σ²].
///
/// When `active_mask` is `Some(m)`, bins where `m[i]` is `false` are
/// **skipped entirely** rather than zero-weighted.  Explicit row-skip
/// matters when masked bins may contain non-finite residuals (e.g. NaN
/// outside the user's fit-energy range): `0.0 * NaN = NaN`, so a
/// zero-weight strategy would propagate NaN through the χ² accumulator
/// even though the bin contributes no information.  See SAMMY EMIN/EMAX
/// semantics (#514) and lm.rs Fix 4.
fn chi_squared(residuals: &[f64], weights: &[f64], active_mask: Option<&[bool]>) -> f64 {
    let mut sum = 0.0;
    for (i, (&r, &w)) in residuals.iter().zip(weights.iter()).enumerate() {
        if active_mask.is_some_and(|m| !m[i]) {
            continue;
        }
        sum += r * r * w;
    }
    sum
}

/// Infinity norm of the gradient, scaled by local curvature and residual size.
///
/// This is a dimensionless first-order optimality measure for least squares:
/// small values indicate that the current point is stationary even when χ² is
/// nonzero because the data are noisy or the model is imperfect.
fn scaled_gradient_inf_norm(jtw_j: &FlatMatrix, jtw_r: &[f64], chi2: f64) -> f64 {
    let residual_scale = chi2.sqrt().max(1.0);
    let mut max_scaled: f64 = 0.0;
    for (j, &grad_j) in jtw_r.iter().enumerate() {
        let curvature = jtw_j.get(j, j).abs().sqrt();
        let scale = curvature * residual_scale + PIVOT_FLOOR;
        max_scaled = max_scaled.max(grad_j.abs() / scale);
    }
    max_scaled
}

/// Whether the local model has meaningful curvature in at least one free direction.
///
/// Purely flat models (J = 0 everywhere) should still report failure rather than
/// "converged", even though their gradient is numerically zero.
fn has_informative_curvature(jtw_j: &FlatMatrix) -> bool {
    (0..jtw_j.nrows).any(|j| jtw_j.get(j, j) > LM_DIAGONAL_FLOOR)
}

/// Compute the Jacobian, preferring an analytical formula over finite differences.
///
/// `y_current` must equal `model.evaluate(&params.all_values())` at the
/// current parameter values — it is passed in to avoid a redundant evaluate
/// call (the LM loop already has this vector from the previous accepted step).
///
/// `all_vals_buf` is a scratch buffer reused across the per-parameter FD loop
/// to avoid allocating a fresh `Vec<f64>` on every `model.evaluate()` call.
///
/// `free_idx_buf` is a scratch buffer for `params.free_indices_into()`, reused
/// across iterations to avoid per-Jacobian allocation.
///
/// J.get(i, j) = ∂model[i] / ∂free_param[j]
fn compute_jacobian(
    model: &dyn FitModel,
    params: &mut ParameterSet,
    y_current: &[f64],
    fd_step: f64,
    all_vals_buf: &mut Vec<f64>,
    free_idx_buf: &mut Vec<usize>,
) -> Result<FlatMatrix, FittingError> {
    params.free_indices_into(free_idx_buf);
    let n_free = free_idx_buf.len();
    let n_data = y_current.len();

    // Try analytical Jacobian first (no extra evaluate calls).
    params.all_values_into(all_vals_buf);
    if let Some(j) = model.analytical_jacobian(all_vals_buf, free_idx_buf, y_current) {
        debug_assert!(
            j.nrows == n_data && j.ncols == n_free && j.data.len() == n_data * n_free,
            "analytical_jacobian shape mismatch: got ({}x{}, len={}), expected ({}x{}, len={})",
            j.nrows,
            j.ncols,
            j.data.len(),
            n_data,
            n_free,
            n_data * n_free,
        );
        return Ok(j);
    }

    // Fallback: forward finite differences, reusing y_current as the base.
    let mut jacobian = FlatMatrix::zeros(n_data, n_free);

    for (j, &idx) in free_idx_buf.iter().enumerate() {
        let original = params.params[idx].value;
        let step = fd_step * (1.0 + original.abs());

        params.params[idx].value = original + step;
        params.params[idx].clamp();
        let mut actual_step = params.params[idx].value - original;

        // #112: If the forward step is blocked by an upper bound, try the
        // backward step so the Jacobian column is not frozen at zero.
        if actual_step.abs() < PIVOT_FLOOR {
            params.params[idx].value = original - step;
            params.params[idx].clamp();
            actual_step = params.params[idx].value - original;
            if actual_step.abs() < PIVOT_FLOOR {
                // Truly stuck at a point constraint — skip this parameter.
                params.params[idx].value = original;
                continue;
            }
        }

        params.all_values_into(all_vals_buf);
        let perturbed = match model.evaluate(all_vals_buf) {
            Ok(v) => v,
            Err(_) => {
                // Restore original before skipping — leaving the column as
                // zero makes this parameter unresponsive for one LM step,
                // which is safe (matches Poisson compute_gradient pattern).
                params.params[idx].value = original;
                continue;
            }
        };
        params.params[idx].value = original;

        // The main LM loop checks the trial step via
        // `trial_has_active_nonfinite`, but `compute_jacobian` is also
        // called from the post-convergence covariance path, where no
        // such guard runs.  A NaN row in the perturbed output divided
        // by `actual_step` would yield NaN in the Jacobian, which then
        // poisons JᵀWJ and the inverse covariance.
        //
        // Per-cell skip (zero the entry) rather than whole-column skip:
        // a NaN at a masked / inactive row is benign (the JᵀWJ assembly
        // skips masked rows entirely via `active_mask.is_some_and(|m| !m[i])`),
        // so a finite Jacobian row produced from a bad-but-masked probe
        // must still be allowed to land in the column — see
        // `test_lm_active_mask_tolerates_model_nan_outside_range`.  The
        // active-row NaNs that would otherwise propagate into JᵀWJ are
        // zeroed here, which is the same numerical outcome as if the
        // model had reported `Err` at that probe (skipped column, but
        // per-row).
        for i in 0..n_data {
            let p = perturbed[i];
            let y = y_current[i];
            if p.is_finite() && y.is_finite() {
                *jacobian.get_mut(i, j) = (p - y) / actual_step;
            }
            // else: leave at the zero-default; for active rows this is
            // safe (matches the whole-column-skip outcome on Err), for
            // masked rows it never gets read.
        }
    }

    Ok(jacobian)
}

/// Solve (A + λ·diag(A)) · x = b using Gaussian elimination.
///
/// A is a flat n×n symmetric positive definite matrix (approximately).
/// Returns the solution vector x.
pub(crate) fn solve_damped_system(a: &FlatMatrix, b: &[f64], lambda: f64) -> Option<Vec<f64>> {
    let n = b.len();
    if n == 0 {
        return Some(vec![]);
    }

    // Build the augmented matrix [A + λ·diag(A) | b] as flat (n × (n+1)).
    let ncols = n + 1;
    let mut aug = FlatMatrix::zeros(n, ncols);
    for (i, &bi) in b.iter().enumerate() {
        for j in 0..n {
            *aug.get_mut(i, j) = a.get(i, j);
        }
        *aug.get_mut(i, i) += lambda * a.get(i, i).max(LM_DIAGONAL_FLOOR); // Ensure non-zero diagonal
        *aug.get_mut(i, n) = bi;
    }

    // Gaussian elimination with partial pivoting
    for col in 0..n {
        // Find pivot
        let mut max_val = aug.get(col, col).abs();
        let mut max_row = col;
        for row in (col + 1)..n {
            if aug.get(row, col).abs() > max_val {
                max_val = aug.get(row, col).abs();
                max_row = row;
            }
        }

        if max_val < PIVOT_FLOOR {
            return None; // Singular
        }

        // Swap rows col and max_row in the flat buffer.
        if col != max_row {
            let (row_a, row_b) = (col * ncols, max_row * ncols);
            let (first, second) = aug.data.split_at_mut(row_b);
            first[row_a..row_a + ncols].swap_with_slice(&mut second[..ncols]);
        }

        let pivot = aug.get(col, col);
        for row in (col + 1)..n {
            let factor = aug.get(row, col) / pivot;
            for j in col..=n {
                let val = aug.get(col, j);
                *aug.get_mut(row, j) -= factor * val;
            }
        }
    }

    // Back substitution
    let mut x = vec![0.0; n];
    for i in (0..n).rev() {
        let mut sum = aug.get(i, n);
        for (j, &xj) in x.iter().enumerate().skip(i + 1) {
            sum -= aug.get(i, j) * xj;
        }
        x[i] = sum / aug.get(i, i);
    }

    Some(x)
}

/// Invert a symmetric positive definite matrix (for covariance).
///
/// Input: flat n×n matrix. Output: flat n×n inverse, or None if singular.
pub(crate) fn invert_matrix(a: &FlatMatrix) -> Option<FlatMatrix> {
    let n = a.nrows;
    if n == 0 {
        return Some(FlatMatrix::zeros(0, 0));
    }

    // Build [A | I] as flat (n × 2n).
    let ncols = 2 * n;
    let mut aug = FlatMatrix::zeros(n, ncols);
    for i in 0..n {
        for j in 0..n {
            *aug.get_mut(i, j) = a.get(i, j);
        }
        *aug.get_mut(i, n + i) = 1.0;
    }

    // Forward elimination with partial pivoting
    for col in 0..n {
        let mut max_val = aug.get(col, col).abs();
        let mut max_row = col;
        for row in (col + 1)..n {
            if aug.get(row, col).abs() > max_val {
                max_val = aug.get(row, col).abs();
                max_row = row;
            }
        }

        if max_val < PIVOT_FLOOR {
            return None;
        }

        // Swap rows col and max_row.
        if col != max_row {
            let (row_a, row_b) = (col * ncols, max_row * ncols);
            let (first, second) = aug.data.split_at_mut(row_b);
            first[row_a..row_a + ncols].swap_with_slice(&mut second[..ncols]);
        }

        let pivot = aug.get(col, col);
        for j in 0..ncols {
            *aug.get_mut(col, j) /= pivot;
        }

        for row in 0..n {
            if row != col {
                let factor = aug.get(row, col);
                for j in 0..ncols {
                    let val = aug.get(col, j);
                    *aug.get_mut(row, j) -= factor * val;
                }
            }
        }
    }

    // Extract the right half [I|A⁻¹] → A⁻¹
    let mut inv = FlatMatrix::zeros(n, n);
    for i in 0..n {
        for j in 0..n {
            *inv.get_mut(i, j) = aug.get(i, n + j);
        }
    }

    Some(inv)
}

/// Run the Levenberg-Marquardt optimizer.
///
/// # Arguments
/// * `model` — Forward model implementing `FitModel`.
/// * `y_obs` — Observed data values.
/// * `sigma` — Uncertainties on observed data (standard deviations).
/// * `params` — Initial parameter set (modified in place on convergence).
/// * `config` — LM configuration.
///
/// # Returns
/// Fit result including final parameters, chi-squared, and uncertainties.
pub fn levenberg_marquardt(
    model: &dyn FitModel,
    y_obs: &[f64],
    sigma: &[f64],
    params: &mut ParameterSet,
    config: &LmConfig,
) -> Result<LmResult, FittingError> {
    levenberg_marquardt_with_mask(model, y_obs, sigma, params, config, None)
}

/// Run the Levenberg-Marquardt optimizer with an optional per-bin
/// active mask (SAMMY EMIN/EMAX-equivalent fit-energy-range restriction).
///
/// When `active_mask` is `Some(m)`:
/// - bins where `m[i]` is `false` are excluded from χ² and the normal
///   equations (weight zeroed before assembly), so they contribute
///   nothing to the gradient or Hessian;
/// - the model is still evaluated on the full grid so resolution
///   broadening at the boundaries of the active region is correct;
/// - `dof = n_active − n_free` where `n_active` is the count of
///   active bins.
///
/// When `active_mask` is `None`, behaviour is identical to
/// [`levenberg_marquardt`] (all bins active).
pub fn levenberg_marquardt_with_mask(
    model: &dyn FitModel,
    y_obs: &[f64],
    sigma: &[f64],
    params: &mut ParameterSet,
    config: &LmConfig,
    active_mask: Option<&[bool]>,
) -> Result<LmResult, FittingError> {
    let n_data = y_obs.len();
    if n_data == 0 {
        return Err(FittingError::EmptyData);
    }
    if sigma.len() != n_data {
        return Err(FittingError::LengthMismatch {
            expected: n_data,
            actual: sigma.len(),
            field: "sigma",
        });
    }
    if let Some(m) = active_mask
        && m.len() != n_data
    {
        return Err(FittingError::LengthMismatch {
            expected: n_data,
            actual: m.len(),
            field: "active_mask",
        });
    }
    let n_active = crate::active_mask::active_count(active_mask, n_data);

    // SAMMY EMIN/EMAX-equivalent fit-energy-range (#514): a mask with
    // zero active bins means the user's `[E_min, E_max]` does not
    // overlap the energy grid.  No data contributes to the cost
    // function — return non-converged with NaN χ² rather than
    // falling through to the all-fixed fast-return path (which would
    // report `converged: true, chi_squared: 0` from a zero-row sum)
    // or to the main optimisation loop (where `n_active < n_free`
    // would catch it but only after wasted setup work).
    if n_active == 0 {
        return Ok(LmResult {
            chi_squared: f64::NAN,
            reduced_chi_squared: f64::NAN,
            iterations: 0,
            converged: false,
            params: params.all_values(),
            covariance: None,
            uncertainties: None,
        });
    }

    let n_free = params.n_free();

    // Early return when all parameters are fixed: evaluate once and report the
    // model's chi-squared.  There is nothing to optimize, so iterating would
    // waste cycles.
    if n_free == 0 {
        let weights: Vec<f64> = sigma
            .iter()
            .enumerate()
            .map(|(i, &s)| {
                // Active-bin masking (SAMMY EMIN/EMAX): bins outside the
                // user range contribute zero to χ².
                if active_mask.is_some_and(|m| !m[i]) {
                    return 0.0;
                }
                if !s.is_finite() || s <= 0.0 {
                    1.0 / 1e30
                } else {
                    1.0 / (s * s)
                }
            })
            .collect();
        let y_model = model.evaluate(&params.all_values())?;

        // #P1: If the model produces NaN/Inf with all-fixed parameters,
        // return converged=false rather than silently propagating NaN chi².
        // Covariance/uncertainties are None because the fit did not converge —
        // an unconverged result has no meaningful covariance to report.
        //
        // SAMMY EMIN/EMAX-equivalent fit-energy-range (#514): masked rows are
        // skipped throughout the cost / normal-equation accumulators, so a
        // non-finite model output at a masked bin should not abort the fit
        // — only check finiteness on active rows.
        let model_has_active_nonfinite = y_model
            .iter()
            .enumerate()
            .any(|(i, v)| active_mask.is_none_or(|m| m[i]) && !v.is_finite());
        if model_has_active_nonfinite {
            return Ok(LmResult {
                chi_squared: f64::NAN,
                reduced_chi_squared: f64::NAN,
                iterations: 0,
                converged: false,
                params: params.all_values(),
                covariance: None,
                uncertainties: None,
            });
        }

        let residuals: Vec<f64> = y_obs
            .iter()
            .zip(y_model.iter())
            .map(|(&obs, &mdl)| obs - mdl)
            .collect();
        let chi2 = chi_squared(&residuals, &weights, active_mask);
        // #125.5: Compute dof via `n_active - n_free` (with `n_active = n_data`
        // when no mask is set) to mirror the main path and keep a single
        // visible formula.  When a fit-energy-range mask is in effect,
        // `n_active` reflects the count of bins that actually contributed
        // to χ² (SAMMY EMIN/EMAX semantics).
        let dof = n_active.saturating_sub(n_free);
        let reduced = if dof > 0 { chi2 / dof as f64 } else { f64::NAN };
        return Ok(LmResult {
            chi_squared: chi2,
            reduced_chi_squared: reduced,
            iterations: 0,
            converged: true,
            params: params.all_values(),
            covariance: Some(FlatMatrix::zeros(0, 0)),
            uncertainties: Some(vec![]),
        });
    }

    // #108.3: Underdetermined systems — when n_active < n_free, the problem is
    // underdetermined and the Jacobian cannot be full rank.  Return early
    // with converged=false so callers can detect the problem.
    // n_active == n_free is exactly determined (dof=0) and still solvable;
    // we allow it and report reduced_chi_squared = NaN (0/0).
    //
    // When a fit-energy-range mask is in effect, the underdetermined
    // check uses the active-bin count (SAMMY EMIN/EMAX semantics) — masked
    // bins do not contribute information to the fit.
    if n_active < n_free {
        return Ok(LmResult {
            chi_squared: f64::NAN,
            reduced_chi_squared: f64::NAN,
            iterations: 0,
            converged: false,
            params: params.all_values(),
            covariance: None,
            uncertainties: None,
        });
    }
    let dof = n_active - n_free;

    // #104: Validate sigma — division by zero or non-finite sigma would produce
    // NaN/Inf weights and silently corrupt the entire fit.  Clamp to a small
    // floor instead of rejecting outright, so callers with a few zero-sigma
    // bins still get a usable fit.
    //
    // Active-bin masking (SAMMY EMIN/EMAX): bins outside the user range
    // get weight 0 here, which propagates through χ² and the JᵀWJ /
    // JᵀWr normal-equation assembly to contribute exactly zero —
    // covering both residual and gradient masking with no further
    // changes downstream.
    let weights: Vec<f64> = sigma
        .iter()
        .enumerate()
        .map(|(i, &s)| {
            if active_mask.is_some_and(|m| !m[i]) {
                return 0.0;
            }
            if !s.is_finite() || s <= 0.0 {
                // Treat as negligible weight (huge sigma) rather than panicking.
                1.0 / 1e30
            } else {
                1.0 / (s * s)
            }
        })
        .collect();

    // Scratch buffers reused across the optimization loop for
    // params.all_values_into() calls in compute_jacobian (1 + N_free calls
    // per Jacobian computation) and the trial-step evaluation,
    // params.free_values_into() calls for snapshotting free parameters
    // before trial steps, and params.free_indices_into() calls inside
    // compute_jacobian.
    let mut all_vals_buf = Vec::with_capacity(params.params.len());
    let mut free_vals_buf = Vec::with_capacity(n_free);
    let mut free_idx_buf = Vec::with_capacity(n_free);

    // Initial model output, residuals, and chi².
    // y_current is kept up-to-date after accepted steps so that the next
    // Jacobian call can reuse it without an extra evaluate() call.
    params.all_values_into(&mut all_vals_buf);
    let mut y_current = model.evaluate(&all_vals_buf)?;
    let mut residuals: Vec<f64> = y_obs
        .iter()
        .zip(y_current.iter())
        .map(|(&obs, &mdl)| obs - mdl)
        .collect();
    let mut chi2 = chi_squared(&residuals, &weights, active_mask);

    let mut lambda = config.lambda_init;
    let mut converged = false;
    let mut iter = 0;

    for _ in 0..config.max_iter {
        iter += 1;

        // Compute Jacobian — uses y_current to avoid a redundant evaluate().
        // Analytical Jacobian (if provided by the model) costs 0 extra evaluates;
        // finite-difference fallback costs N_free extra evaluates.
        let jacobian = compute_jacobian(
            model,
            params,
            &y_current,
            config.fd_step,
            &mut all_vals_buf,
            &mut free_idx_buf,
        )?;

        // Build normal equations: JᵀWJ and JᵀWr.
        //
        // Active-bin masking (SAMMY EMIN/EMAX, #514): explicitly skip
        // masked rows rather than relying on weights[i] == 0 — the
        // model or the residual at a masked margin bin may be NaN
        // (e.g. when y_obs is NaN outside the user's fit-energy range),
        // and `0.0 * NaN = NaN` would poison both accumulators.
        let mut jtw_j = FlatMatrix::zeros(n_free, n_free);
        let mut jtw_r = vec![0.0; n_free];

        for (i, (&wi, &ri)) in weights.iter().zip(residuals.iter()).enumerate() {
            if active_mask.is_some_and(|m| !m[i]) {
                continue;
            }
            for (j, jtw_r_j) in jtw_r.iter_mut().enumerate() {
                let jij = jacobian.get(i, j);
                *jtw_r_j += jij * wi * ri;
                for k in 0..n_free {
                    *jtw_j.get_mut(j, k) += jij * wi * jacobian.get(i, k);
                }
            }
        }
        let scaled_grad_inf = scaled_gradient_inf_norm(&jtw_j, &jtw_r, chi2);
        let informative_curvature = has_informative_curvature(&jtw_j);

        // Solve (JᵀWJ + λ·diag(JᵀWJ)) · δ = JᵀWr
        let delta = match solve_damped_system(&jtw_j, &jtw_r, lambda) {
            Some(d) => d,
            None => break, // Singular system
        };

        // Trial step — snapshot free values into a reusable buffer to avoid
        // per-iteration allocation.
        params.free_values_into(&mut free_vals_buf);
        let trial_free: Vec<f64> = free_vals_buf
            .iter()
            .zip(delta.iter())
            .map(|(&v, &d)| v + d)
            .collect();
        let param_change: f64 = delta
            .iter()
            .zip(free_vals_buf.iter())
            .map(|(&d, &v)| (d / (v.abs() + PIVOT_FLOOR)).powi(2))
            .sum::<f64>()
            .sqrt();
        params.set_free_values(&trial_free);

        params.all_values_into(&mut all_vals_buf);
        let y_trial = match model.evaluate(&all_vals_buf) {
            Ok(y) => y,
            Err(_) => {
                // Treat evaluation error as a bad step (same as NaN).
                params.set_free_values(&free_vals_buf);
                lambda *= config.lambda_up;
                if lambda > LAMBDA_BREAKOUT {
                    converged = false;
                    break;
                }
                continue;
            }
        };

        // #113: If the model produced NaN/Inf at any *active* bin, treat as
        // a bad step (same as chi2 increase) — increase lambda and try again.
        // SAMMY EMIN/EMAX-equivalent fit-energy-range (#514): masked rows are
        // skipped from χ² / JᵀWJ / JᵀWr / covariance, so a non-finite model
        // output at a masked margin bin must not penalise the trial step.
        let trial_has_active_nonfinite = y_trial
            .iter()
            .enumerate()
            .any(|(i, v)| active_mask.is_none_or(|m| m[i]) && !v.is_finite());
        if trial_has_active_nonfinite {
            params.set_free_values(&free_vals_buf);
            lambda *= config.lambda_up;
            if lambda > LAMBDA_BREAKOUT {
                converged = false;
                break;
            }
            continue;
        }

        let trial_residuals: Vec<f64> = y_obs
            .iter()
            .zip(y_trial.iter())
            .map(|(&obs, &mdl)| obs - mdl)
            .collect();
        let trial_chi2 = chi_squared(&trial_residuals, &weights, active_mask);
        let chi2_delta = (trial_chi2 - chi2).abs();
        let chi2_scale = chi2.abs().max(trial_chi2.abs()).max(1.0);
        let chi2_stagnated = chi2_delta <= config.tol_chi2 * chi2_scale;

        if trial_chi2 < chi2 {
            // Accept step — cache y_trial so the next iteration can skip
            // the base evaluate() inside compute_jacobian.
            let rel_change = (chi2 - trial_chi2) / (chi2 + PIVOT_FLOOR);
            chi2 = trial_chi2;
            residuals = trial_residuals;
            y_current = y_trial;
            lambda *= config.lambda_down;

            // Check convergence: relative chi2 change is tiny or parameters
            // stopped moving.  The old third condition
            // `chi2 < tol_chi2 * n_data` was scale-dependent and could cause
            // premature convergence on data with small residuals.  (#108.2)
            if rel_change < config.tol_chi2 || param_change < config.tol_param {
                converged = true;
                break;
            }
        } else {
            // Numerical stagnation: the strict LM acceptance test keeps
            // `trial_chi2 == chi2` in the reject path, but when both the
            // objective change and parameter step are tiny, the optimizer may
            // already be at a nonzero-χ² stationary point (noisy data,
            // correlated parameters, imperfect model). Report convergence if
            // the gradient is also tiny and the local model has real
            // curvature, rather than inflating lambda until breakout.
            let grad_tol = config.tol_chi2.sqrt().max(config.tol_param.sqrt());
            if chi2_stagnated
                && param_change < config.tol_param
                && scaled_grad_inf < grad_tol
                && informative_curvature
            {
                params.set_free_values(&free_vals_buf);
                converged = true;
                break;
            }

            // Reject step, restore parameters.
            // y_current stays valid (parameters reverted to free_vals_buf snapshot).
            params.set_free_values(&free_vals_buf);
            lambda *= config.lambda_up;

            // #108.4: If lambda is astronomically large, the optimizer is stuck
            // in a region where no step improves chi2.  Break out rather than
            // wasting iterations.
            if lambda > LAMBDA_BREAKOUT {
                converged = false;
                break;
            }
        }
    }

    let reduced_chi2 = if dof > 0 { chi2 / dof as f64 } else { f64::NAN };

    // Compute covariance matrix: (JᵀWJ)⁻¹ at the final parameters.
    //
    // This block requires an extra Jacobian evaluation + O(n_free³) matrix
    // inversion.  When `compute_covariance` is false (e.g. per-pixel spatial
    // mapping), we skip it entirely — the caller only needs densities and
    // chi-squared, not uncertainties.
    let (covariance, uncertainties) = if converged && config.compute_covariance {
        let jacobian = compute_jacobian(
            model,
            params,
            &y_current,
            config.fd_step,
            &mut all_vals_buf,
            &mut free_idx_buf,
        )?;
        // Same active-mask row-skip semantics as the main assembly
        // loop above — keeps the covariance free of NaN contributions
        // from masked margin bins (#514).
        let mut jtw_j = FlatMatrix::zeros(n_free, n_free);
        for (i, &wi) in weights.iter().enumerate() {
            if active_mask.is_some_and(|m| !m[i]) {
                continue;
            }
            for j in 0..n_free {
                let jij = jacobian.get(i, j);
                for k in 0..n_free {
                    *jtw_j.get_mut(j, k) += jij * wi * jacobian.get(i, k);
                }
            }
        }

        // #108.1: Scale covariance by reduced chi-squared.
        //
        // The raw (JᵀWJ)⁻¹ gives the covariance only when the model is a perfect
        // description and the weights are exact.  Multiplying by χ²/ν accounts for
        // misfit (model inadequacy or underestimated errors).  This is the standard
        // statistical prescription (see e.g. Numerical Recipes §15.6).
        //
        // When dof == 0 (exactly determined system), reduced chi-squared is
        // undefined (0/0).  We report NaN and skip covariance scaling entirely,
        // returning None for covariance and uncertainties.
        if dof > 0 {
            if let Some(mut cov) = invert_matrix(&jtw_j) {
                for elem in cov.data.iter_mut() {
                    *elem *= reduced_chi2;
                }
                let unc: Vec<f64> = (0..n_free)
                    .map(|i| {
                        let diag = cov.get(i, i);
                        if diag.is_finite() && diag > 0.0 {
                            diag.sqrt()
                        } else {
                            f64::NAN
                        }
                    })
                    .collect();
                (Some(cov), Some(unc))
            } else {
                (None, None)
            }
        } else {
            // dof == 0: covariance scaling is undefined; report None.
            (None, None)
        }
    } else {
        // Covariance computation skipped (compute_covariance == false).
        (None, None)
    };

    Ok(LmResult {
        chi_squared: chi2,
        reduced_chi_squared: reduced_chi2,
        iterations: iter,
        converged,
        params: params.all_values(),
        covariance,
        uncertainties,
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::parameters::{FitParameter, ParameterSet};

    /// Simple linear model: y = a*x + b
    struct LinearModel {
        x: Vec<f64>,
    }

    impl FitModel for LinearModel {
        fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
            let a = params[0];
            let b = params[1];
            Ok(self.x.iter().map(|&x| a * x + b).collect())
        }
    }

    #[test]
    fn test_fit_linear_exact() {
        // Fit y = 2x + 3 with exact data (no noise)
        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        let sigma = vec![1.0; 10];

        let model = LinearModel { x };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0), // initial guess
            FitParameter::unbounded("b", 1.0),
        ]);

        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        assert!(result.converged, "Fit did not converge");
        assert!(
            (result.params[0] - 2.0).abs() < 1e-4,
            "a = {}, expected 2.0",
            result.params[0]
        );
        assert!(
            (result.params[1] - 3.0).abs() < 1e-4,
            "b = {}, expected 3.0",
            result.params[1]
        );
        assert!(result.chi_squared < 1e-6);
    }

    #[test]
    fn test_converges_on_exact_flat_bottom_without_lambda_breakout() {
        // Exact data with zero initial damping reaches the optimum in one
        // Newton step. The next iteration sits on a flat χ² floor where the
        // strict `trial_chi2 < chi2` check must not force a false non-
        // convergence.
        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        let sigma = vec![1.0; 10];

        let model = LinearModel { x };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 0.0),
            FitParameter::unbounded("b", 0.0),
        ]);
        let config = LmConfig {
            lambda_init: 0.0,
            ..LmConfig::default()
        };

        let result = levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &config).unwrap();

        assert!(
            result.converged,
            "Fit should converge on an exact flat bottom"
        );
        assert!(result.chi_squared < 1e-20, "chi2 = {}", result.chi_squared);
        assert!(
            result.iterations < config.max_iter,
            "LM should stop by convergence, not by iteration exhaustion"
        );
    }

    #[test]
    fn test_converges_on_nonzero_chi2_stationary_point() {
        // Noisy overdetermined data have a nonzero-chi2 optimum. With very
        // strict tolerances, the accepted step to the optimum does not trip
        // the accept-branch convergence checks, so the next iteration reaches
        // a reject-path stationary point with trial_chi2 == chi2.
        struct AffineModel {
            x: Vec<f64>,
        }
        impl FitModel for AffineModel {
            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
                let a = params[0];
                let b = params[1];
                Ok(self.x.iter().map(|&x| a * x + b).collect())
            }
        }

        let model = AffineModel {
            x: vec![0.0, 1.0, 2.0, 3.0, 4.0],
        };
        let y_obs = vec![0.1, 0.9, 2.2, 2.8, 4.1];
        let sigma = vec![1.0; y_obs.len()];
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 0.0),
            FitParameter::unbounded("b", 0.0),
        ]);
        let config = LmConfig {
            max_iter: 200,
            tol_chi2: 1e-16,
            tol_param: 1e-16,
            ..LmConfig::default()
        };

        let result = levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &config).unwrap();

        assert!(
            result.converged,
            "stationary nonzero-chi2 optimum should converge instead of lambda breakout"
        );
        assert!(
            result.reduced_chi_squared.is_finite() && result.reduced_chi_squared > 0.0,
            "expected nonzero reduced chi2 at noisy optimum, got {}",
            result.reduced_chi_squared
        );
    }

    #[test]
    fn test_fit_linear_with_fixed_param() {
        // Fit y = a*x + 3 with b fixed at 3.0
        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        let sigma = vec![1.0; 10];

        let model = LinearModel { x };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::fixed("b", 3.0),
        ]);

        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        assert!(result.converged);
        assert!(
            (result.params[0] - 2.0).abs() < 1e-6,
            "a = {}",
            result.params[0]
        );
        assert_eq!(result.params[1], 3.0); // fixed
    }

    #[test]
    fn test_fit_quadratic() {
        // Fit y = a*x² + b*x + c to quadratic data
        struct QuadModel {
            x: Vec<f64>,
        }
        impl FitModel for QuadModel {
            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
                let (a, b, c) = (params[0], params[1], params[2]);
                Ok(self.x.iter().map(|&x| a * x * x + b * x + c).collect())
            }
        }

        let x: Vec<f64> = (0..20).map(|i| i as f64 * 0.5).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| 0.5 * xi * xi - 2.0 * xi + 1.0).collect();
        let sigma = vec![1.0; 20];

        let model = QuadModel { x };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 0.0),
            FitParameter::unbounded("c", 0.0),
        ]);

        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        assert!(result.converged);
        assert!(
            (result.params[0] - 0.5).abs() < 1e-5,
            "a = {}",
            result.params[0]
        );
        assert!(
            (result.params[1] - (-2.0)).abs() < 1e-5,
            "b = {}",
            result.params[1]
        );
        assert!(
            (result.params[2] - 1.0).abs() < 1e-5,
            "c = {}",
            result.params[2]
        );
    }

    #[test]
    fn test_non_negative_constraint() {
        // Fit y = a*x with data that has negative slope,
        // but parameter a is constrained to be non-negative.
        // Should converge to a ≈ 0.
        struct SlopeModel {
            x: Vec<f64>,
        }
        impl FitModel for SlopeModel {
            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
                let a = params[0];
                Ok(self.x.iter().map(|&x| a * x).collect())
            }
        }

        let x: Vec<f64> = (1..10).map(|i| i as f64).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| -2.0 * xi).collect();
        let sigma = vec![1.0; 9];

        let model = SlopeModel { x };
        let mut params = ParameterSet::new(vec![FitParameter::non_negative("a", 1.0)]);

        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        // Should be clamped at 0
        assert!(
            result.params[0] >= 0.0 && result.params[0] < 0.1,
            "a = {}, expected ~0",
            result.params[0]
        );
    }

    #[test]
    fn test_uncertainty_estimation() {
        // Fit linear model; uncertainties should be reasonable
        let x: Vec<f64> = (0..100).map(|i| i as f64 * 0.1).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        let sigma = vec![0.1; 100]; // Small uncertainty

        let model = LinearModel { x };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);

        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        assert!(result.converged);
        assert!(result.uncertainties.is_some());
        let unc = result.uncertainties.unwrap();
        // Uncertainties should be positive and small
        assert!(unc[0] > 0.0 && unc[0] < 0.01, "σ_a = {}", unc[0]);
        assert!(unc[1] > 0.0 && unc[1] < 0.1, "σ_b = {}", unc[1]);
    }

    #[test]
    fn test_solve_damped_system_identity() {
        // (I + λ·I)x = b → x = b/(1+λ)
        let a = FlatMatrix {
            data: vec![1.0, 0.0, 0.0, 1.0],
            nrows: 2,
            ncols: 2,
        };
        let b = vec![2.0, 4.0];
        let lambda = 1.0;
        let x = solve_damped_system(&a, &b, lambda).unwrap();
        assert!((x[0] - 1.0).abs() < 1e-10);
        assert!((x[1] - 2.0).abs() < 1e-10);
    }

    #[test]
    fn test_invert_matrix_2x2() {
        let a = FlatMatrix {
            data: vec![4.0, 7.0, 2.0, 6.0],
            nrows: 2,
            ncols: 2,
        };
        let inv = invert_matrix(&a).unwrap();
        // A⁻¹ = 1/10 × [6 -7; -2 4]
        assert!((inv.get(0, 0) - 0.6).abs() < 1e-10);
        assert!((inv.get(0, 1) - (-0.7)).abs() < 1e-10);
        assert!((inv.get(1, 0) - (-0.2)).abs() < 1e-10);
        assert!((inv.get(1, 1) - 0.4).abs() < 1e-10);
    }

    // ---- Edge-case tests for issue #125 ----

    #[test]
    fn test_all_fixed_params_nan_model() {
        // #125.1: When all parameters are fixed and the model produces NaN,
        // the result must report converged=false (not converged=true with NaN chi2).
        struct NanModel;
        impl FitModel for NanModel {
            fn evaluate(&self, _params: &[f64]) -> Result<Vec<f64>, FittingError> {
                Ok(vec![f64::NAN; 5])
            }
        }

        let y_obs = vec![1.0; 5];
        let sigma = vec![1.0; 5];
        let mut params = ParameterSet::new(vec![FitParameter::fixed("a", 1.0)]);

        let result =
            levenberg_marquardt(&NanModel, &y_obs, &sigma, &mut params, &LmConfig::default())
                .unwrap();

        assert!(!result.converged, "All-fixed NaN model should not converge");
        assert!(result.chi_squared.is_nan(), "chi2 should be NaN");
        assert_eq!(result.iterations, 0);
    }

    #[test]
    fn test_underdetermined_system() {
        // #125.6: More free parameters than data points → underdetermined.
        // Should return converged=false immediately.
        let y_obs = vec![1.0, 2.0]; // 2 data points
        let sigma = vec![1.0, 1.0];

        // 2 free params for 2 data points is exactly determined (ok),
        // but 3 free params for 2 data points is underdetermined.
        struct ThreeParamModel;
        impl FitModel for ThreeParamModel {
            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
                Ok(vec![params[0] + params[1] + params[2]; 2])
            }
        }

        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
            FitParameter::unbounded("c", 1.0),
        ]);

        let result = levenberg_marquardt(
            &ThreeParamModel,
            &y_obs,
            &sigma,
            &mut params,
            &LmConfig::default(),
        )
        .unwrap();

        assert!(
            !result.converged,
            "Underdetermined system should not converge"
        );
        assert!(result.chi_squared.is_nan());
        assert_eq!(result.iterations, 0);
    }

    #[test]
    fn test_exactly_determined_dof_zero() {
        // #125.6: n_data == n_free → dof=0, exactly determined.
        // Should still converge but reduced_chi_squared is NaN (0/0).
        let y_obs = vec![5.0, 11.0]; // y = 2x + 3 at x=1,4
        let sigma = vec![1.0, 1.0];

        let model = LinearModel { x: vec![1.0, 4.0] };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);

        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        assert!(
            result.converged,
            "Exactly-determined system should converge"
        );
        assert!(
            result.chi_squared < 1e-6,
            "chi2 should be ~0, got {}",
            result.chi_squared
        );
        // dof=0 → reduced chi2 is NaN
        assert!(
            result.reduced_chi_squared.is_nan(),
            "reduced_chi2 should be NaN for dof=0, got {}",
            result.reduced_chi_squared
        );
        // No covariance when dof=0
        assert!(result.covariance.is_none());
        assert!(result.uncertainties.is_none());
    }

    #[test]
    fn test_lambda_breakout() {
        // #125.6: A model that never improves should trigger lambda breakout.
        struct ConstantModel;
        impl FitModel for ConstantModel {
            fn evaluate(&self, _params: &[f64]) -> Result<Vec<f64>, FittingError> {
                // Returns constant output regardless of parameters,
                // so the Jacobian is zero and no step can improve chi2.
                Ok(vec![42.0; 5])
            }
        }

        let y_obs = vec![1.0, 2.0, 3.0, 4.0, 5.0];
        let sigma = vec![1.0; 5];
        let mut params = ParameterSet::new(vec![FitParameter::unbounded("a", 1.0)]);

        let config = LmConfig {
            max_iter: 1000,
            ..LmConfig::default()
        };

        let result =
            levenberg_marquardt(&ConstantModel, &y_obs, &sigma, &mut params, &config).unwrap();

        assert!(
            !result.converged,
            "Flat model should not converge (lambda breakout)"
        );
        assert!(
            result.covariance.is_none(),
            "unconverged fit should not report covariance"
        );
        assert!(
            result.uncertainties.is_none(),
            "unconverged fit should not report uncertainties"
        );
    }

    #[test]
    fn test_nan_model_during_iteration() {
        // #125.6: Model that produces NaN for certain parameter values.
        // The optimizer should treat NaN steps as bad and try smaller steps.
        struct NanAtLargeModel {
            x: Vec<f64>,
        }
        impl FitModel for NanAtLargeModel {
            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
                let a = params[0];
                Ok(self
                    .x
                    .iter()
                    .map(|&x| if a > 5.0 { f64::NAN } else { a * x + 1.0 })
                    .collect())
            }
        }

        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
        let sigma = vec![1.0; 10];

        let model = NanAtLargeModel { x };
        let mut params = ParameterSet::new(vec![FitParameter::unbounded("a", 3.0)]);

        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        // Should converge to a≈2 while avoiding the NaN region a>5.
        assert!(result.converged, "Should converge avoiding NaN region");
        assert!(
            (result.params[0] - 2.0).abs() < 0.1,
            "a = {}, expected ~2.0",
            result.params[0]
        );
    }

    #[test]
    fn test_err_model_during_trial_step() {
        // Model that returns Err for large parameter values.
        // The optimizer should treat Err trial steps as bad steps (increase λ)
        // and converge without panicking.
        struct ErrAtLargeModel {
            x: Vec<f64>,
        }
        impl FitModel for ErrAtLargeModel {
            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
                let a = params[0];
                if a > 5.0 {
                    return Err(FittingError::EvaluationFailed(
                        "parameter out of valid range".into(),
                    ));
                }
                Ok(self.x.iter().map(|&x| a * x + 1.0).collect())
            }
        }

        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 1.0).collect();
        let sigma = vec![1.0; 10];

        let model = ErrAtLargeModel { x };
        let mut params = ParameterSet::new(vec![FitParameter::unbounded("a", 3.0)]);

        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        // Should converge to a≈2 while avoiding the Err region a>5.
        assert!(result.converged, "Should converge avoiding Err region");
        assert!(
            (result.params[0] - 2.0).abs() < 0.1,
            "a = {}, expected ~2.0",
            result.params[0]
        );
    }

    #[test]
    fn test_fit_linear_no_covariance() {
        // When compute_covariance is false, the fit should still converge and
        // produce correct parameters, but covariance and uncertainties are None.
        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let y_obs: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        let sigma = vec![1.0; 10];

        let model = LinearModel { x };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);

        let config = LmConfig {
            compute_covariance: false,
            ..LmConfig::default()
        };

        let result = levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &config).unwrap();

        assert!(result.converged, "Fit did not converge");
        assert!(
            (result.params[0] - 2.0).abs() < 1e-4,
            "a = {}, expected 2.0",
            result.params[0]
        );
        assert!(
            (result.params[1] - 3.0).abs() < 1e-4,
            "b = {}, expected 3.0",
            result.params[1]
        );
        assert!(result.chi_squared < 1e-6);
        assert!(
            result.covariance.is_none(),
            "covariance should be None when compute_covariance=false"
        );
        assert!(
            result.uncertainties.is_none(),
            "uncertainties should be None when compute_covariance=false"
        );
    }

    #[test]
    fn test_zero_negative_sigma_clamping() {
        // #125.6: Zero and negative sigma should be clamped to huge sigma (tiny weight),
        // not cause NaN/panic.
        let y_obs = vec![1.0, 2.0, 3.0, 4.0, 5.0];
        let sigma = vec![0.0, -1.0, f64::NAN, f64::INFINITY, 1.0];

        let model = LinearModel {
            x: vec![0.0, 1.0, 2.0, 3.0, 4.0],
        };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 0.0),
        ]);

        // Should not panic and should produce a finite result.
        let result =
            levenberg_marquardt(&model, &y_obs, &sigma, &mut params, &LmConfig::default()).unwrap();

        assert!(
            result.chi_squared.is_finite(),
            "chi2 should be finite despite bad sigma, got {}",
            result.chi_squared
        );
        assert!(
            result.converged,
            "Fit should converge despite bad sigma values"
        );
        // The only valid data point with sigma=1.0 is (x=4, y=5).
        // The fitted line y = a*x + b should pass near that point.
        let y_at_4 = result.params[0] * 4.0 + result.params[1];
        assert!(
            (y_at_4 - 5.0).abs() < 1.0,
            "Fitted line should pass near (4, 5): a={}, b={}, y(4)={}",
            result.params[0],
            result.params[1],
            y_at_4,
        );
    }

    // ------------------------------------------------------------------
    // Active-bin mask (SAMMY EMIN/EMAX-equivalent fit-energy-range, #514).
    // ------------------------------------------------------------------

    /// LM with an `active_mask` that restricts to a subset of bins must
    /// produce a fit equivalent to running on that subset directly —
    /// validates that masking propagates through both the residual /
    /// χ² accumulation AND the dof / reduced-χ² book-keeping.
    #[test]
    fn test_lm_active_mask_subset_equivalence() {
        // Linear fit y = 2x + 3 on x = 0..10.  Mask = bins 0..5 only.
        let x_full: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let y_full: Vec<f64> = x_full.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        let sigma_full = vec![1.0; 10];
        let mask = vec![
            true, true, true, true, true, false, false, false, false, false,
        ];

        let model = LinearModel { x: x_full.clone() };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);
        let result_masked = levenberg_marquardt_with_mask(
            &model,
            &y_full,
            &sigma_full,
            &mut params,
            &LmConfig::default(),
            Some(&mask),
        )
        .unwrap();
        assert!(result_masked.converged);
        assert!((result_masked.params[0] - 2.0).abs() < 1e-6);
        assert!((result_masked.params[1] - 3.0).abs() < 1e-6);

        // Reference: same fit on the subset directly (no mask).
        let x_sub = x_full[..5].to_vec();
        let y_sub = y_full[..5].to_vec();
        let sigma_sub = vec![1.0; 5];
        let model_sub = LinearModel { x: x_sub };
        let mut params_sub = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);
        let result_sub = levenberg_marquardt(
            &model_sub,
            &y_sub,
            &sigma_sub,
            &mut params_sub,
            &LmConfig::default(),
        )
        .unwrap();

        // Recovered parameters and reduced-χ² (both fits noiseless,
        // so reduced-χ² ≈ 0 in either case) must match.
        for j in 0..2 {
            assert!(
                (result_masked.params[j] - result_sub.params[j]).abs() < 1e-6,
                "param {j}: masked={} subset={}",
                result_masked.params[j],
                result_sub.params[j]
            );
        }
        assert!(
            (result_masked.reduced_chi_squared - result_sub.reduced_chi_squared).abs() < 1e-6,
            "reduced-χ²: masked={} subset={}",
            result_masked.reduced_chi_squared,
            result_sub.reduced_chi_squared
        );
    }

    /// Without the mask, residuals from the out-of-range half of the
    /// grid would dominate χ² and pull the fit way off — the masked
    /// fit must NOT be biased by them.
    #[test]
    fn test_lm_active_mask_excludes_out_of_range_residuals() {
        // y = 2x + 3 inside the masked range; corrupt outliers outside.
        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let mut y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        for yi in y.iter_mut().skip(5) {
            *yi += 1000.0; // huge corruption outside the active mask
        }
        let sigma = vec![1.0; 10];
        let mask = vec![true; 5]
            .into_iter()
            .chain(std::iter::repeat_n(false, 5))
            .collect::<Vec<bool>>();

        let model = LinearModel { x };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);
        let result = levenberg_marquardt_with_mask(
            &model,
            &y,
            &sigma,
            &mut params,
            &LmConfig::default(),
            Some(&mask),
        )
        .unwrap();
        assert!(result.converged);
        assert!(
            (result.params[0] - 2.0).abs() < 1e-6,
            "slope should be 2.0 (corrupt outliers must be masked out), got {}",
            result.params[0]
        );
        assert!(
            (result.params[1] - 3.0).abs() < 1e-6,
            "intercept should be 3.0 (corrupt outliers must be masked out), got {}",
            result.params[1]
        );
    }

    /// Out-of-mask `y_obs` containing `NaN` must not poison χ² /
    /// JᵀWJ / JᵀWr.  Without explicit row-skip in the accumulator
    /// loops, `0.0 (weight) * NaN (residual) = NaN` would propagate
    /// through the fit despite the masked weight being zero.
    #[test]
    fn test_lm_active_mask_tolerates_nan_outside_range() {
        // y = 2x + 3 inside the active mask; NaN outside.  A naive
        // zero-weight implementation would return NaN χ² and fail to
        // converge; the row-skip path should fit cleanly.
        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let mut y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        for yi in y.iter_mut().skip(5) {
            *yi = f64::NAN;
        }
        let sigma = vec![1.0; 10];
        let mask = vec![true; 5]
            .into_iter()
            .chain(std::iter::repeat_n(false, 5))
            .collect::<Vec<bool>>();

        let model = LinearModel { x };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);
        let result = levenberg_marquardt_with_mask(
            &model,
            &y,
            &sigma,
            &mut params,
            &LmConfig::default(),
            Some(&mask),
        )
        .unwrap();

        assert!(
            result.converged,
            "fit should converge despite NaN outside the active mask"
        );
        assert!(
            result.chi_squared.is_finite(),
            "χ² should be finite (NaN poisoning prevented), got {}",
            result.chi_squared
        );
        assert!(
            (result.params[0] - 2.0).abs() < 1e-6,
            "slope should be 2.0, got {}",
            result.params[0]
        );
        assert!(
            (result.params[1] - 3.0).abs() < 1e-6,
            "intercept should be 3.0, got {}",
            result.params[1]
        );
    }

    /// The global finite checks on `y_model` / `y_trial` must skip
    /// masked bins.  A model that returns NaN only at masked margin
    /// bins should not abort the fit or get its trial step rejected.
    #[test]
    fn test_lm_active_mask_tolerates_model_nan_outside_range() {
        // Linear model that injects NaN into masked bins.  The fit
        // should still converge cleanly — the contract is that masked
        // rows are completely irrelevant to the fit.
        struct LinearModelWithMaskedNaN {
            x: Vec<f64>,
            mask: Vec<bool>,
        }
        impl FitModel for LinearModelWithMaskedNaN {
            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
                let a = params[0];
                let b = params[1];
                Ok(self
                    .x
                    .iter()
                    .enumerate()
                    .map(|(i, &x)| if self.mask[i] { a * x + b } else { f64::NAN })
                    .collect())
            }
        }

        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        let sigma = vec![1.0; 10];
        let mask: Vec<bool> = vec![true; 5]
            .into_iter()
            .chain(std::iter::repeat_n(false, 5))
            .collect();

        let model = LinearModelWithMaskedNaN {
            x: x.clone(),
            mask: mask.clone(),
        };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);
        let result = levenberg_marquardt_with_mask(
            &model,
            &y,
            &sigma,
            &mut params,
            &LmConfig::default(),
            Some(&mask),
        )
        .unwrap();

        assert!(
            result.converged,
            "fit should converge despite model NaN at masked bins"
        );
        assert!(result.chi_squared.is_finite());
        assert!((result.params[0] - 2.0).abs() < 1e-6);
        assert!((result.params[1] - 3.0).abs() < 1e-6);
    }

    /// A zero-active mask (the user's range misses the grid entirely)
    /// must return non-converged regardless of whether parameters are
    /// free or fixed.  Pre-fix the all-fixed fast-return path would
    /// report `converged: true, chi_squared: 0` from a sum over zero
    /// rows — a deceptive "success" with no data behind it.
    #[test]
    fn test_lm_active_mask_all_false_returns_non_converged() {
        let x: Vec<f64> = (0..5).map(|i| i as f64).collect();
        let y: Vec<f64> = x.iter().map(|&xi| 2.0 * xi + 3.0).collect();
        let sigma = vec![1.0; 5];
        let mask = vec![false; 5];
        let model = LinearModel { x: x.clone() };

        // (a) All parameters free.
        let mut params_free = ParameterSet::new(vec![
            FitParameter::unbounded("a", 1.0),
            FitParameter::unbounded("b", 1.0),
        ]);
        let r_free = levenberg_marquardt_with_mask(
            &model,
            &y,
            &sigma,
            &mut params_free,
            &LmConfig::default(),
            Some(&mask),
        )
        .unwrap();
        assert!(
            !r_free.converged,
            "n_free > 0 + zero-active mask must NOT report converged"
        );
        assert!(r_free.chi_squared.is_nan());
        assert!(r_free.reduced_chi_squared.is_nan());

        // (b) All parameters fixed → n_free == 0 (the path that
        //     failed in #517 before the n_active==0 early-return).
        let mut params_fixed = ParameterSet::new(vec![
            FitParameter::fixed("a", 1.0),
            FitParameter::fixed("b", 0.0),
        ]);
        let r_fixed = levenberg_marquardt_with_mask(
            &model,
            &y,
            &sigma,
            &mut params_fixed,
            &LmConfig::default(),
            Some(&mask),
        )
        .unwrap();
        assert!(
            !r_fixed.converged,
            "n_free == 0 + zero-active mask must NOT report converged \
             (sum over zero rows would be 0, masquerading as a perfect fit)"
        );
        assert!(r_fixed.chi_squared.is_nan());
        assert!(r_fixed.reduced_chi_squared.is_nan());
    }

    // ==================================================================
    // NaN-in-Jacobian during FD probes.
    //
    // The main LM loop guards the trial step at the `model.evaluate`
    // site via `trial_has_active_nonfinite`.  The silent surface is the
    // post-convergence covariance Jacobian: it calls `compute_jacobian`
    // directly, which has no finiteness check on the per-column FD
    // probe.  A NaN at any active row of the perturbed model output
    // gets divided by `actual_step` and turned into NaN entries in the
    // Jacobian — these poison `JᵀWJ` and the inverse covariance, which
    // is reported as the "uncertainty" of the fit.
    //
    // `compute_jacobian`'s FD path zeros per-cell entries whose probe
    // returned a non-finite value, mirroring the existing `Err` branch.
    // ==================================================================

    /// `compute_jacobian`'s FD path zeroes per-cell entries whose
    /// perturbed model output is non-finite, rather than baking NaN
    /// into the Jacobian (which then poisons the post-convergence
    /// covariance computation).  Per-cell (not whole-column) so that a
    /// NaN at a masked / inactive row leaves the rest of the column
    /// usable — see `test_lm_active_mask_tolerates_model_nan_outside_range`
    /// for the masked-NaN contract.
    #[test]
    fn test_compute_jacobian_skips_nan_perturbed_column() {
        use crate::parameters::FitParameter;
        // Two-parameter model.  Param 0 is well-behaved (returns
        // `θ_0` constant).  Param 1 produces a NaN row at the +FD
        // probe — exactly the "NaN-in-Jacobian during FD probes"
        // signature from issue #552.
        struct NanInColumn1 {
            // Base value of param 1 — used to detect the perturbation.
            p1_base: f64,
        }
        impl FitModel for NanInColumn1 {
            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
                // The +FD probe perturbs params[1] by ε; everything else
                // keeps params[1] == self.p1_base.
                let nan_row = (params[1] - self.p1_base).abs() > 1e-12;
                let v = if nan_row { f64::NAN } else { params[0] };
                Ok(vec![v; 4])
            }
            // No analytical_jacobian -> FD fallback drives compute_jacobian.
        }
        let model = NanInColumn1 { p1_base: 0.3 };
        let mut params = ParameterSet::new(vec![
            FitParameter::unbounded("p0", 0.5),
            FitParameter::unbounded("p1", 0.3),
        ]);
        let y_current = vec![0.5; 4];
        let mut all_vals_buf: Vec<f64> = Vec::new();
        let mut free_idx_buf: Vec<usize> = Vec::new();
        let jac = compute_jacobian(
            &model,
            &mut params,
            &y_current,
            1e-6,
            &mut all_vals_buf,
            &mut free_idx_buf,
        )
        .unwrap();
        // Every entry must be finite — column 1 must have been skipped.
        for (i, v) in jac.data.iter().enumerate() {
            assert!(
                v.is_finite(),
                "compute_jacobian produced non-finite entry at index {i}: {v}"
            );
        }
        // Column 1 was skipped -> all zeros.  Column 0 has the normal
        // ∂T/∂θ_0 = 1 column from the well-behaved param.
        for i in 0..jac.nrows {
            assert_eq!(
                jac.get(i, 1),
                0.0,
                "column 1 (NaN probe) should be zeroed, row {i}"
            );
        }
    }
}