Skip to main content

nereids_fitting/
poisson.rs

1//! Poisson-likelihood optimizer for low-count neutron data (transmission
2//! path).
3//!
4//! Minimizes the single-arm Poisson negative log-likelihood
5//!
6//! ```text
7//! L(θ) = Σᵢ [y_model(θ)ᵢ − y_obs,ᵢ · ln(y_model(θ)ᵢ)]
8//! ```
9//!
10//! using a projected damped Gauss-Newton / Fisher optimizer with
11//! backtracking line search and finite-difference fallback.
12//!
13//! **Scope note.**  In the current pipeline this solver is only reached
14//! for the **transmission + PoissonKL** path (via
15//! `crate::transmission_model::TransmissionKLBackgroundModel`).  The
16//! **counts** path uses the joint-Poisson conditional-binomial-deviance
17//! solver in [`crate::joint_poisson`], which replaced the older
18//! fixed-flux counts NLL that lived here.  The helpers [`CountsModel`]
19//! and [`CountsBackgroundScaleModel`] exposed from this module are
20//! retained for the [`crate::lm`]-side `evaluate_jacobian_and_fisher`
21//! Fisher-information helper and for spatial-regularization research
22//! drivers; they are not part of the production fit path.
23//!
24//! ## TRINIDI Reference
25//! - `trinidi/reconstruct.py` — Poisson NLL and APGM optimizer
26
27use nereids_core::constants::{PIVOT_FLOOR, POISSON_EPSILON};
28
29use crate::error::FittingError;
30use crate::lm::{FitModel, FlatMatrix};
31use crate::parameters::{FitParameter, ParameterSet};
32
33/// Configuration for the Poisson optimizer.
34#[derive(Debug, Clone)]
35pub struct PoissonConfig {
36    /// Maximum number of iterations.
37    pub max_iter: usize,
38    /// Step size for finite-difference gradient.
39    pub fd_step: f64,
40    /// Initial step size for line search.
41    pub step_size: f64,
42    /// Convergence tolerance used for both parameter displacement (L2 norm of step)
43    /// and gradient-norm convergence checks in `poisson_fit`.
44    pub tol_param: f64,
45    /// Armijo line search parameter (sufficient decrease).
46    pub armijo_c: f64,
47    /// Line search backtracking factor.
48    pub backtrack: f64,
49    /// Relative diagonal damping for analytical Gauss-Newton / Fisher steps.
50    pub gauss_newton_lambda: f64,
51    /// History size for the finite-difference L-BFGS fallback.
52    pub lbfgs_history: usize,
53    /// Whether to compute the Fisher covariance matrix (and uncertainties)
54    /// after convergence.  Set to `false` for per-pixel spatial mapping
55    /// when only densities are needed, avoiding extra model evaluations.
56    pub compute_covariance: bool,
57}
58
59impl Default for PoissonConfig {
60    fn default() -> Self {
61        Self {
62            max_iter: 200,
63            fd_step: 1e-7,
64            step_size: 1.0,
65            tol_param: 1e-8,
66            armijo_c: 1e-4,
67            backtrack: 0.5,
68            gauss_newton_lambda: 1e-3,
69            lbfgs_history: 8,
70            compute_covariance: true,
71        }
72    }
73}
74
75/// Result of Poisson-likelihood optimization.
76#[derive(Debug, Clone)]
77pub struct PoissonResult {
78    /// Final negative log-likelihood.
79    pub nll: f64,
80    /// Number of iterations taken.
81    pub iterations: usize,
82    /// Whether the optimizer converged.
83    pub converged: bool,
84    /// Final parameter values (all parameters, including fixed).
85    pub params: Vec<f64>,
86    /// Local covariance estimate from the inverse Fisher information matrix
87    /// at the converged parameters: `F⁻¹ = (J^T H J)⁻¹` where
88    /// `H = diag(obs/model²)` is the Poisson Hessian.
89    ///
90    /// This is a local curvature estimate, NOT a Bayesian posterior.
91    /// When an analytical Jacobian is available, it is used directly.
92    /// Otherwise a finite-difference Jacobian is computed as fallback.
93    /// `None` when the fit did not converge, the Fisher matrix is
94    /// singular, or covariance computation is disabled via config.
95    pub covariance: Option<FlatMatrix>,
96    /// Standard errors of free parameters: `√diag(F⁻¹)`.
97    /// `None` when covariance is not available.
98    pub uncertainties: Option<Vec<f64>>,
99}
100
101/// Compute Poisson negative log-likelihood.
102///
103/// NLL = Σᵢ [y_model - y_obs · ln(y_model)]
104///
105/// #109.2: For y_model ≤ epsilon, use a smooth C¹ quadratic extrapolation
106/// instead of a hard 1e30 penalty.  This keeps the NLL and its gradient
107/// continuous, so gradient-based optimizers (projected gradient, L-BFGS)
108/// can smoothly steer back into the feasible region rather than hitting
109/// a discontinuous cliff that stalls the line search.
110fn poisson_nll(y_obs: &[f64], y_model: &[f64]) -> f64 {
111    y_obs
112        .iter()
113        .zip(y_model.iter())
114        .map(|(&obs, &mdl)| poisson_nll_term(obs, mdl))
115        .sum()
116}
117
118/// Single-bin Poisson NLL with smooth extrapolation for mdl <= epsilon.
119///
120/// For mdl > 0: NLL = mdl - obs * ln(mdl)
121/// For mdl <= epsilon: quadratic Taylor expansion about epsilon,
122///   NLL(ε) + NLL'(ε)·(mdl−ε) + ½·NLL''(ε)·(mdl−ε)²
123/// where NLL'(x) = 1 − obs/x and NLL''(x) = obs/x².
124///
125/// Since delta = ε − mdl ≥ 0, this becomes:
126///   NLL(ε) − NLL'(ε)·delta + ½·NLL''(ε)·delta²
127///
128/// When obs == 0, the exact Hessian obs/ε² vanishes, leaving only a linear
129/// term that decreases without bound as mdl → −∞.  This can cause the
130/// optimizer to diverge.  We impose a minimum curvature of 1/ε so the
131/// quadratic penalty still curves upward for negative predictions.
132#[inline]
133fn poisson_nll_term(obs: f64, mdl: f64) -> f64 {
134    // #125.3: Negative observed counts would produce wrong-signed NLL terms.
135    // Release builds skip this check; callers must ensure non-negative counts. See #125 item 3.
136    debug_assert!(
137        obs.is_finite() && obs >= 0.0,
138        "poisson_nll_term: obs must be finite and >= 0, got {obs}"
139    );
140    if mdl > POISSON_EPSILON {
141        mdl - obs * mdl.ln()
142    } else {
143        let eps = POISSON_EPSILON;
144        let nll_eps = eps - obs * eps.ln();
145        let grad_eps = 1.0 - obs / eps;
146        // Minimum curvature 1/eps ensures the penalty grows quadratically
147        // even when obs == 0 (where the exact Hessian obs/eps^2 vanishes).
148        let hess_eps = if obs > 0.0 {
149            obs / (eps * eps)
150        } else {
151            1.0 / eps
152        };
153        let delta = eps - mdl;
154        // Taylor expansion: f(eps) + f'(eps)*(mdl - eps) + 0.5*f''(eps)*(mdl - eps)^2
155        // Since (mdl - eps) = -delta, the linear term flips sign.
156        nll_eps - grad_eps * delta + 0.5 * hess_eps * delta * delta
157    }
158}
159
160/// Per-bin Poisson NLL weight: ∂f(obs, mdl)/∂mdl.
161///
162/// For mdl > ε: w = 1 - obs/mdl
163/// For mdl ≤ ε: derivative of the smooth quadratic extrapolation,
164///   w = grad_eps - hess_eps · (ε - mdl), continuous at boundary.
165#[inline]
166fn poisson_nll_weight(obs: f64, mdl: f64) -> f64 {
167    if mdl > POISSON_EPSILON {
168        1.0 - obs / mdl
169    } else {
170        let eps = POISSON_EPSILON;
171        let grad_eps = 1.0 - obs / eps;
172        let hess_eps = if obs > 0.0 {
173            obs / (eps * eps)
174        } else {
175            1.0 / eps
176        };
177        grad_eps - hess_eps * (eps - mdl)
178    }
179}
180
181/// Per-bin Poisson NLL curvature: ∂²f(obs, mdl)/∂mdl².
182///
183/// For mdl > ε: h = obs / mdl²
184/// For mdl ≤ ε: curvature of the smooth quadratic extrapolation.
185#[inline]
186fn poisson_nll_curvature(obs: f64, mdl: f64) -> f64 {
187    if mdl > POISSON_EPSILON {
188        obs / (mdl * mdl)
189    } else {
190        let eps = POISSON_EPSILON;
191        if obs > 0.0 {
192            obs / (eps * eps)
193        } else {
194            1.0 / eps
195        }
196    }
197}
198
199/// Analytical first/second-order information for the Poisson objective.
200#[derive(Debug)]
201struct AnalyticalStepData {
202    /// Gradient of the Poisson NLL: grad = J^T · w.
203    grad: Vec<f64>,
204    /// Full Gauss-Newton / Fisher curvature approximation: J^T H J.
205    fisher: FlatMatrix,
206}
207
208/// Compute gradient and Gauss-Newton / Fisher curvature of the Poisson NLL
209/// using the analytical Jacobian.
210///
211/// `grad_j = Σᵢ wᵢ · J_{i,j}` where `wᵢ = ∂NLL/∂y_model_i`
212/// and `J_{i,j} = ∂y_model_i/∂θⱼ` from `model.analytical_jacobian()`.
213///
214/// The curvature uses the Poisson Hessian with respect to the model output:
215/// `fisher_{j,k} = Σᵢ hᵢ · J_{i,j} · J_{i,k}` where
216/// `hᵢ = ∂²NLL/∂y_model_i²`.
217///
218/// Returns `Some(step_data)` if the model provides an analytical Jacobian,
219/// `None` otherwise (caller should fall back to finite differences).
220fn compute_analytical_step_data(
221    model: &dyn FitModel,
222    params: &ParameterSet,
223    y_obs: &[f64],
224    y_model: &[f64],
225    all_vals_buf: &mut Vec<f64>,
226    free_idx_buf: &mut Vec<usize>,
227) -> Option<AnalyticalStepData> {
228    params.all_values_into(all_vals_buf);
229    params.free_indices_into(free_idx_buf);
230    let jac = model.analytical_jacobian(all_vals_buf, free_idx_buf, y_model)?;
231    let n_e = y_obs.len();
232    let n_free = free_idx_buf.len();
233    let mut grad = vec![0.0f64; n_free];
234    let mut fisher = FlatMatrix::zeros(n_free, n_free);
235    for i in 0..n_e {
236        let w = poisson_nll_weight(y_obs[i], y_model[i]);
237        let h = poisson_nll_curvature(y_obs[i], y_model[i]);
238        for (g, j) in grad.iter_mut().zip(0..n_free) {
239            let jij = jac.get(i, j);
240            *g += w * jij;
241            for k in 0..n_free {
242                *fisher.get_mut(j, k) += h * jij * jac.get(i, k);
243            }
244        }
245    }
246    Some(AnalyticalStepData { grad, fisher })
247}
248
249/// Compute gradient of Poisson NLL by finite differences.
250///
251/// `all_vals_buf` is a reusable scratch buffer for `params.all_values_into()`,
252/// avoiding a fresh allocation on every `model.evaluate()` call inside the
253/// per-parameter FD loop (N_free+1 allocations saved per gradient call).
254///
255/// `free_idx_buf` is a scratch buffer for `params.free_indices_into()`, reused
256/// across iterations to avoid per-gradient allocation.
257fn compute_gradient(
258    model: &dyn FitModel,
259    params: &mut ParameterSet,
260    y_obs: &[f64],
261    fd_step: f64,
262    all_vals_buf: &mut Vec<f64>,
263    free_idx_buf: &mut Vec<usize>,
264) -> Result<Vec<f64>, FittingError> {
265    params.all_values_into(all_vals_buf);
266    let base_model = model.evaluate(all_vals_buf)?;
267    let base_nll = poisson_nll(y_obs, &base_model);
268
269    params.free_indices_into(free_idx_buf);
270    let mut grad = vec![0.0; free_idx_buf.len()];
271
272    for (j, &idx) in free_idx_buf.iter().enumerate() {
273        let original = params.params[idx].value;
274        let step = fd_step * (1.0 + original.abs());
275
276        params.params[idx].value = original + step;
277        params.params[idx].clamp();
278        let mut actual_step = params.params[idx].value - original;
279
280        // #112: If the forward step is blocked by an upper bound, try the
281        // backward step so the gradient component is not frozen at zero.
282        if actual_step.abs() < PIVOT_FLOOR {
283            params.params[idx].value = original - step;
284            params.params[idx].clamp();
285            actual_step = params.params[idx].value - original;
286            if actual_step.abs() < PIVOT_FLOOR {
287                // Truly stuck at a point constraint — skip this parameter.
288                params.params[idx].value = original;
289                continue;
290            }
291        }
292
293        params.all_values_into(all_vals_buf);
294        let perturbed_model = match model.evaluate(all_vals_buf) {
295            Ok(v) => v,
296            Err(_) => {
297                params.params[idx].value = original;
298                continue;
299            }
300        };
301        let perturbed_nll = poisson_nll(y_obs, &perturbed_model);
302        params.params[idx].value = original;
303
304        grad[j] = (perturbed_nll - base_nll) / actual_step;
305    }
306
307    Ok(grad)
308}
309
310fn normalized_step_norm(
311    old_free: &[f64],
312    new_free: &[f64],
313    params: &ParameterSet,
314    free_param_indices: &[usize],
315) -> f64 {
316    old_free
317        .iter()
318        .zip(new_free.iter())
319        .zip(free_param_indices.iter())
320        .map(|((&old, &new), &idx)| {
321            let range = params.params[idx].upper - params.params[idx].lower;
322            let scale = if range.is_finite() && range > 1e-10 {
323                range
324            } else {
325                old.abs().max(1e-3)
326            };
327            ((old - new) / scale).powi(2)
328        })
329        .sum::<f64>()
330        .sqrt()
331}
332
333fn is_bound_active(param: &FitParameter, grad: f64) -> bool {
334    let at_lower = param.lower.is_finite() && (param.value - param.lower).abs() <= PIVOT_FLOOR;
335    let at_upper = param.upper.is_finite() && (param.value - param.upper).abs() <= PIVOT_FLOOR;
336    (at_lower && grad > 0.0) || (at_upper && grad < 0.0)
337}
338
339fn inactive_free_positions(
340    params: &ParameterSet,
341    free_param_indices: &[usize],
342    grad: &[f64],
343) -> Vec<usize> {
344    free_param_indices
345        .iter()
346        .zip(grad.iter())
347        .enumerate()
348        .filter_map(|(pos, (&idx, &g))| (!is_bound_active(&params.params[idx], g)).then_some(pos))
349        .collect()
350}
351
352fn inactive_free_mask(
353    params: &ParameterSet,
354    free_param_indices: &[usize],
355    grad: &[f64],
356) -> Vec<bool> {
357    free_param_indices
358        .iter()
359        .zip(grad.iter())
360        .map(|(&idx, &g)| !is_bound_active(&params.params[idx], g))
361        .collect()
362}
363
364fn projected_gradient_norm(
365    params: &ParameterSet,
366    free_param_indices: &[usize],
367    grad: &[f64],
368) -> f64 {
369    free_param_indices
370        .iter()
371        .zip(grad.iter())
372        .map(|(&idx, &g)| {
373            if is_bound_active(&params.params[idx], g) {
374                0.0
375            } else {
376                g * g
377            }
378        })
379        .sum::<f64>()
380        .sqrt()
381}
382
383fn extract_submatrix(matrix: &FlatMatrix, positions: &[usize]) -> FlatMatrix {
384    let n = positions.len();
385    let mut sub = FlatMatrix::zeros(n, n);
386    for (row_out, &row_in) in positions.iter().enumerate() {
387        for (col_out, &col_in) in positions.iter().enumerate() {
388            *sub.get_mut(row_out, col_out) = matrix.get(row_in, col_in);
389        }
390    }
391    sub
392}
393
394#[derive(Debug, Clone)]
395struct LbfgsHistory {
396    s_list: Vec<Vec<f64>>,
397    y_list: Vec<Vec<f64>>,
398    max_pairs: usize,
399}
400
401impl LbfgsHistory {
402    fn new(max_pairs: usize) -> Self {
403        Self {
404            s_list: Vec::with_capacity(max_pairs),
405            y_list: Vec::with_capacity(max_pairs),
406            max_pairs,
407        }
408    }
409
410    fn clear(&mut self) {
411        self.s_list.clear();
412        self.y_list.clear();
413    }
414
415    fn update(&mut self, old_free: &[f64], new_free: &[f64], old_grad: &[f64], new_grad: &[f64]) {
416        if self.max_pairs == 0 {
417            return;
418        }
419        let s: Vec<f64> = new_free
420            .iter()
421            .zip(old_free.iter())
422            .map(|(&new, &old)| new - old)
423            .collect();
424        let y: Vec<f64> = new_grad
425            .iter()
426            .zip(old_grad.iter())
427            .map(|(&new, &old)| new - old)
428            .collect();
429        let sy = dot(&s, &y);
430        let s_norm = dot(&s, &s).sqrt();
431        let y_norm = dot(&y, &y).sqrt();
432        if sy <= 1e-12 * s_norm * y_norm.max(1.0) {
433            return;
434        }
435        if self.s_list.len() == self.max_pairs {
436            self.s_list.remove(0);
437            self.y_list.remove(0);
438        }
439        self.s_list.push(s);
440        self.y_list.push(y);
441    }
442
443    fn apply_on_positions(&self, grad: &[f64], positions: &[usize]) -> Option<Vec<f64>> {
444        if self.s_list.is_empty() || positions.is_empty() {
445            return None;
446        }
447
448        let mut q: Vec<f64> = positions.iter().map(|&pos| grad[pos]).collect();
449        let mut alpha = vec![0.0; self.s_list.len()];
450        let mut rho = vec![0.0; self.s_list.len()];
451        let mut used = vec![false; self.s_list.len()];
452
453        for i in (0..self.s_list.len()).rev() {
454            let s_sub = extract_positions(&self.s_list[i], positions);
455            let y_sub = extract_positions(&self.y_list[i], positions);
456            let sy = dot(&s_sub, &y_sub);
457            if sy <= 1e-12 {
458                continue;
459            }
460            rho[i] = 1.0 / sy;
461            used[i] = true;
462            alpha[i] = rho[i] * dot(&s_sub, &q);
463            axpy(&mut q, -alpha[i], &y_sub);
464        }
465
466        let gamma = used
467            .iter()
468            .enumerate()
469            .rev()
470            .find_map(|(i, &is_used)| {
471                if !is_used {
472                    return None;
473                }
474                let last_s = extract_positions(&self.s_list[i], positions);
475                let last_y = extract_positions(&self.y_list[i], positions);
476                let yy = dot(&last_y, &last_y);
477                (yy > 0.0).then_some(dot(&last_s, &last_y) / yy)
478            })
479            .unwrap_or(1.0);
480
481        let mut r: Vec<f64> = q.into_iter().map(|v| gamma * v).collect();
482        for i in 0..self.s_list.len() {
483            if !used[i] {
484                continue;
485            }
486            let s_sub = extract_positions(&self.s_list[i], positions);
487            let y_sub = extract_positions(&self.y_list[i], positions);
488            let beta = rho[i] * dot(&y_sub, &r);
489            axpy(&mut r, alpha[i] - beta, &s_sub);
490        }
491
492        let mut full = vec![0.0; grad.len()];
493        for (&pos, &value) in positions.iter().zip(r.iter()) {
494            full[pos] = value;
495        }
496        Some(full)
497    }
498}
499
500fn dot(a: &[f64], b: &[f64]) -> f64 {
501    a.iter().zip(b.iter()).map(|(&x, &y)| x * y).sum()
502}
503
504fn axpy(dst: &mut [f64], alpha: f64, src: &[f64]) {
505    for (d, &s) in dst.iter_mut().zip(src.iter()) {
506        *d += alpha * s;
507    }
508}
509
510fn extract_positions(values: &[f64], positions: &[usize]) -> Vec<f64> {
511    positions.iter().map(|&pos| values[pos]).collect()
512}
513
514fn parameter_scaled_gradient_direction(
515    params: &ParameterSet,
516    free_param_indices: &[usize],
517    grad: &[f64],
518) -> Vec<f64> {
519    grad.iter()
520        .zip(free_param_indices.iter())
521        .map(|(&g, &idx)| {
522            if is_bound_active(&params.params[idx], g) {
523                return 0.0;
524            }
525            let p = &params.params[idx];
526            let range = p.upper - p.lower;
527            if range.is_finite() && range > 1e-10 {
528                g * range * range
529            } else {
530                let scale = p.value.abs().max(1e-3);
531                g * scale * scale
532            }
533        })
534        .collect()
535}
536
537fn max_feasible_step(
538    params: &ParameterSet,
539    free_param_indices: &[usize],
540    old_free: &[f64],
541    search_dir: &[f64],
542) -> f64 {
543    let mut alpha_max = f64::INFINITY;
544    for ((&idx, &x), &d) in free_param_indices
545        .iter()
546        .zip(old_free.iter())
547        .zip(search_dir.iter())
548    {
549        if d.abs() <= PIVOT_FLOOR {
550            continue;
551        }
552        let p = &params.params[idx];
553        let candidate = if d > 0.0 && p.lower.is_finite() {
554            (x - p.lower) / d
555        } else if d < 0.0 && p.upper.is_finite() {
556            (p.upper - x) / (-d)
557        } else {
558            f64::INFINITY
559        };
560        alpha_max = alpha_max.min(candidate);
561    }
562    alpha_max.max(0.0)
563}
564
565enum LineSearchResult {
566    Accepted {
567        nll: f64,
568        y_model: Vec<f64>,
569        hit_boundary: bool,
570    },
571    Stagnated,
572    Failed,
573}
574
575const MAX_FACE_STEPS_PER_ITER: usize = 4;
576
577/// Backtracking line search with Armijo condition.
578///
579/// Backtracking line search with Armijo sufficient-decrease condition:
580/// try a step, reject NaN/Inf model outputs, check the Armijo sufficient-decrease
581/// condition, and backtrack if needed.
582///
583/// # Arguments
584/// * `model`        — Forward model (maps parameters -> predicted counts).
585/// * `params`       — Parameter set (modified in place on success).
586/// * `y_obs`        — Observed counts.
587/// * `old_free`     — Free parameter values before the step.
588/// * `search_dir`   — Search direction (gradient or preconditioned gradient).
589/// * `initial_alpha`— Initial step size.
590/// * `config`       — Optimizer configuration (backtrack factor, Armijo c).
591/// * `grad`         — Raw gradient (used for Armijo descent computation).
592/// * `nll`          — Current negative log-likelihood.
593/// * `all_vals_buf` — Scratch buffer for `params.all_values_into()`, reused
594///   across the up-to-50 backtracking iterations to avoid per-trial allocation.
595/// * `free_vals_buf`— Scratch buffer for `params.free_values_into()`.
596/// * `trial_free_buf` — Scratch buffer for the trial free-parameter vector,
597///   reused across backtracking iterations to avoid up to 50 allocations.
598///
599/// # Returns
600/// `Some((new_nll, y_model))` if a step was accepted, `None` if the line search
601/// exhausted all backtracking attempts. Returns the model output alongside NLL
602/// so the caller can cache it for the next analytical gradient computation.
603///
604/// # Failure contract
605///
606/// On `None` return (line search exhausted), `params` is restored to
607/// `old_free` before returning. Callers need not restore manually.
608// All 12 arguments are genuinely needed: 9 original + 3 scratch buffers that
609// avoid per-backtracking-iteration allocations inside the 50-trial loop.
610#[allow(clippy::too_many_arguments)]
611fn backtracking_line_search(
612    model: &dyn FitModel,
613    params: &mut ParameterSet,
614    y_obs: &[f64],
615    old_free: &[f64],
616    free_param_indices: &[usize],
617    search_dir: &[f64],
618    initial_alpha: f64,
619    config: &PoissonConfig,
620    grad: &[f64],
621    nll: f64,
622    all_vals_buf: &mut Vec<f64>,
623    free_vals_buf: &mut Vec<f64>,
624    trial_free_buf: &mut Vec<f64>,
625) -> LineSearchResult {
626    let alpha_max = max_feasible_step(params, free_param_indices, old_free, search_dir);
627    if alpha_max <= PIVOT_FLOOR {
628        params.set_free_values(old_free);
629        return LineSearchResult::Stagnated;
630    }
631    let mut alpha = initial_alpha.min(alpha_max);
632    for _ in 0..50 {
633        // Trial step along the feasible path: x_new = x - alpha * d, with
634        // alpha capped so inactive-subspace directions hit bounds exactly
635        // instead of relying on projection to distort the step.
636        trial_free_buf.clear();
637        for ((&idx, &v), &d) in free_param_indices
638            .iter()
639            .zip(old_free.iter())
640            .zip(search_dir.iter())
641        {
642            let p = &params.params[idx];
643            trial_free_buf.push((v - alpha * d).clamp(p.lower, p.upper));
644        }
645        params.set_free_values(trial_free_buf);
646
647        params.all_values_into(all_vals_buf);
648        let trial_model = match model.evaluate(all_vals_buf) {
649            Ok(v) => v,
650            Err(_) => {
651                alpha *= config.backtrack;
652                continue;
653            }
654        };
655
656        // #113: If the model produced NaN/Inf, reduce step size rather
657        // than accepting a garbage NLL.
658        if trial_model.iter().any(|v| !v.is_finite()) {
659            alpha *= config.backtrack;
660            continue;
661        }
662
663        let trial_nll = poisson_nll(y_obs, &trial_model);
664
665        // Armijo condition: f(x_new) <= f(x) - c * descent
666        params.free_values_into(free_vals_buf);
667        let step_norm = normalized_step_norm(old_free, free_vals_buf, params, free_param_indices);
668        let descent = grad
669            .iter()
670            .zip(old_free.iter())
671            .zip(free_vals_buf.iter())
672            .map(|((&g, &old), &new)| g * (old - new))
673            .sum::<f64>();
674
675        if trial_nll.is_finite() && trial_nll <= nll - config.armijo_c * descent {
676            return LineSearchResult::Accepted {
677                nll: trial_nll,
678                y_model: trial_model,
679                hit_boundary: alpha_max.is_finite()
680                    && (alpha_max - alpha).abs() <= 1e-12 * alpha_max.max(1.0),
681            };
682        }
683
684        let nll_delta = (trial_nll - nll).abs();
685        let nll_scale = trial_nll.abs().max(nll.abs()).max(1.0);
686        if trial_nll.is_finite()
687            && step_norm < config.tol_param
688            && nll_delta <= config.tol_param * nll_scale
689        {
690            params.set_free_values(old_free);
691            return LineSearchResult::Stagnated;
692        }
693
694        // Backtrack
695        alpha *= config.backtrack;
696        if alpha <= PIVOT_FLOOR {
697            break;
698        }
699    }
700    params.set_free_values(old_free);
701    LineSearchResult::Failed
702}
703
704/// Early return for all-fixed parameters: evaluate once and report.
705///
706/// Returns `Ok(Some(PoissonResult))` if all parameters are fixed (either a
707/// valid result with converged=true, or a non-finite NLL with converged=false).
708/// Returns `Ok(None)` if there are free parameters and optimization should
709/// proceed. Returns `Err(FittingError)` if model evaluation fails.
710fn try_early_return_fixed(
711    model: &dyn FitModel,
712    y_obs: &[f64],
713    params: &ParameterSet,
714) -> Result<Option<PoissonResult>, FittingError> {
715    if params.n_free() != 0 {
716        return Ok(None);
717    }
718    let y_model = model.evaluate(&params.all_values())?;
719    let nll = poisson_nll(y_obs, &y_model);
720    if !nll.is_finite() {
721        return Ok(Some(PoissonResult {
722            nll,
723            iterations: 0,
724            converged: false,
725            params: params.all_values(),
726            covariance: None,
727            uncertainties: None,
728        }));
729    }
730    Ok(Some(PoissonResult {
731        nll,
732        iterations: 0,
733        converged: true,
734        params: params.all_values(),
735        covariance: None,
736        uncertainties: None,
737    }))
738}
739
740/// Build the Poisson Fisher information matrix via finite-difference Jacobian.
741///
742/// Used as fallback when the model does not provide an analytical Jacobian
743/// (e.g., `TransmissionFitModel` without base_xs).  Returns `None` if any
744/// model evaluation fails during the FD perturbation.
745///
746/// Respects parameter bounds via `clamp()` and uses the actual clamped step
747/// size.  When a bound blocks the central-difference step in one direction,
748/// falls back to one-sided difference.
749fn compute_fd_fisher(
750    model: &dyn FitModel,
751    params: &mut ParameterSet,
752    y_obs: &[f64],
753    y_model: &[f64],
754    fd_step: f64,
755    all_vals_buf: &mut Vec<f64>,
756    free_idx_buf: &mut Vec<usize>,
757) -> Option<FlatMatrix> {
758    params.free_indices_into(free_idx_buf);
759    let n_free = free_idx_buf.len();
760    let n_e = y_obs.len();
761
762    let mut jac = FlatMatrix::zeros(n_e, n_free);
763    for (col, &fi) in free_idx_buf.iter().enumerate() {
764        let orig = params.params[fi].value;
765        let h = fd_step * (1.0 + orig.abs());
766
767        // Forward perturbation (clamped to bounds).
768        params.params[fi].value = orig + h;
769        params.params[fi].clamp();
770        let step_plus = params.params[fi].value - orig;
771
772        let y_plus = if step_plus.abs() > PIVOT_FLOOR {
773            params.all_values_into(all_vals_buf);
774            let y = match model.evaluate(all_vals_buf) {
775                Ok(v) => v,
776                Err(_) => {
777                    params.params[fi].value = orig;
778                    return None;
779                }
780            };
781            Some(y)
782        } else {
783            None
784        };
785
786        // Backward perturbation (clamped to bounds).
787        params.params[fi].value = orig - h;
788        params.params[fi].clamp();
789        let step_minus = params.params[fi].value - orig;
790
791        let y_minus = if step_minus.abs() > PIVOT_FLOOR {
792            params.all_values_into(all_vals_buf);
793            let y = match model.evaluate(all_vals_buf) {
794                Ok(v) => v,
795                Err(_) => {
796                    params.params[fi].value = orig;
797                    return None;
798                }
799            };
800            Some(y)
801        } else {
802            None
803        };
804
805        params.params[fi].value = orig;
806
807        // Central difference, or one-sided fallback at bounds.
808        match (&y_plus, &y_minus) {
809            (Some(yp), Some(ym)) => {
810                let denom = step_plus - step_minus;
811                for (i, (&vp, &vm)) in yp.iter().zip(ym.iter()).enumerate() {
812                    *jac.get_mut(i, col) = (vp - vm) / denom;
813                }
814            }
815            (Some(yp), None) => {
816                for (i, (&vp, &v0)) in yp.iter().zip(y_model.iter()).enumerate() {
817                    *jac.get_mut(i, col) = (vp - v0) / step_plus;
818                }
819            }
820            (None, Some(ym)) => {
821                for (i, (&v0, &vm)) in y_model.iter().zip(ym.iter()).enumerate() {
822                    *jac.get_mut(i, col) = (v0 - vm) / (-step_minus);
823                }
824            }
825            (None, None) => {
826                // Both directions blocked — Jacobian column stays zero.
827            }
828        }
829    }
830
831    let mut fisher = FlatMatrix::zeros(n_free, n_free);
832    for i in 0..n_e {
833        let h_i = poisson_nll_curvature(y_obs[i], y_model[i]);
834        for j in 0..n_free {
835            let jij = jac.get(i, j);
836            for k in 0..n_free {
837                *fisher.get_mut(j, k) += h_i * jij * jac.get(i, k);
838            }
839        }
840    }
841    Some(fisher)
842}
843
844/// Run Poisson-likelihood optimization using a projected KL optimizer.
845///
846/// Uses damped Gauss-Newton / Fisher steps when an analytical Jacobian is
847/// available, falling back to projected gradient descent otherwise. Both paths
848/// use backtracking line search with Armijo condition and projection onto
849/// parameter bounds after each step.
850///
851/// # Arguments
852/// * `model` — Forward model (maps parameters → predicted counts).
853/// * `y_obs` — Observed counts at each data point.
854/// * `params` — Parameter set (modified in place).
855/// * `config` — Optimizer configuration.
856///
857/// # Returns
858/// `Ok(PoissonResult)` with final NLL, parameters, and convergence status.
859/// `Err(FittingError)` if model evaluation fails at the initial point.
860/// Evaluation errors during line-search trials are treated as bad steps
861/// (backtrack), not fatal errors.
862pub fn poisson_fit(
863    model: &dyn FitModel,
864    y_obs: &[f64],
865    params: &mut ParameterSet,
866    config: &PoissonConfig,
867) -> Result<PoissonResult, FittingError> {
868    if let Some(result) = try_early_return_fixed(model, y_obs, params)? {
869        return Ok(result);
870    }
871
872    // Scratch buffers reused across the entire optimization loop to avoid
873    // per-iteration allocations inside compute_gradient (N_free+1 calls)
874    // and backtracking_line_search (up to 50 calls).
875    let mut all_vals_buf = Vec::with_capacity(params.params.len());
876    let mut free_vals_buf = Vec::with_capacity(params.n_free());
877    let mut old_free_buf: Vec<f64> = Vec::with_capacity(params.n_free());
878    let mut trial_free_buf: Vec<f64> = Vec::with_capacity(params.n_free());
879    let mut free_idx_buf: Vec<usize> = Vec::with_capacity(params.n_free());
880    let mut fd_history = LbfgsHistory::new(config.lbfgs_history);
881    let mut pending_fd_state: Option<(Vec<f64>, Vec<f64>, Vec<bool>)> = None;
882
883    params.all_values_into(&mut all_vals_buf);
884    let mut y_model = model.evaluate(&all_vals_buf)?;
885    let mut nll = poisson_nll(y_obs, &y_model);
886
887    // Guard: if the initial NLL is non-finite, bail out immediately rather
888    // than entering the optimization loop with garbage values.
889    if !nll.is_finite() {
890        return Ok(PoissonResult {
891            nll,
892            iterations: 0,
893            converged: false,
894            params: params.all_values(),
895            covariance: None,
896            uncertainties: None,
897        });
898    }
899
900    let mut converged = false;
901    let mut iter = 0;
902
903    'outer: for _ in 0..config.max_iter {
904        iter += 1;
905        let mut face_steps = 0usize;
906
907        loop {
908            // Compute gradient: try analytical (grad = J^T · w) first,
909            // fall back to finite differences if the model doesn't provide
910            // an analytical Jacobian.
911            let analytical_step = compute_analytical_step_data(
912                model,
913                params,
914                y_obs,
915                &y_model,
916                &mut all_vals_buf,
917                &mut free_idx_buf,
918            );
919            let grad = if let Some(ref analytical) = analytical_step {
920                analytical.grad.clone()
921            } else {
922                compute_gradient(
923                    model,
924                    params,
925                    y_obs,
926                    config.fd_step,
927                    &mut all_vals_buf,
928                    &mut free_idx_buf,
929                )?
930            };
931
932            params.free_indices_into(&mut free_idx_buf);
933
934            let using_fd = analytical_step.is_none();
935            if using_fd {
936                params.free_values_into(&mut free_vals_buf);
937                let current_mask = inactive_free_mask(params, &free_idx_buf, &grad);
938                if let Some((prev_free, prev_grad, prev_mask)) = pending_fd_state.take() {
939                    if prev_mask == current_mask {
940                        fd_history.update(&prev_free, &free_vals_buf, &prev_grad, &grad);
941                    } else {
942                        fd_history.clear();
943                    }
944                }
945                pending_fd_state = Some((free_vals_buf.clone(), grad.clone(), current_mask));
946            } else {
947                pending_fd_state.take();
948                fd_history.clear();
949            }
950
951            // Use projected-gradient optimality for bound-constrained problems.
952            let projected_grad_norm = projected_gradient_norm(params, &free_idx_buf, &grad);
953            if projected_grad_norm < config.tol_param {
954                converged = true;
955                break 'outer;
956            }
957
958            let (search_dir, initial_alpha): (Vec<f64>, f64) =
959                if let Some(ref analytical) = analytical_step {
960                    let inactive_positions = inactive_free_positions(params, &free_idx_buf, &grad);
961                    if inactive_positions.is_empty() {
962                        converged = true;
963                        break 'outer;
964                    }
965                    let reduced_fisher = extract_submatrix(&analytical.fisher, &inactive_positions);
966                    let reduced_grad: Vec<f64> =
967                        inactive_positions.iter().map(|&pos| grad[pos]).collect();
968                    let reduced_dir = crate::lm::solve_damped_system(
969                        &reduced_fisher,
970                        &reduced_grad,
971                        config.gauss_newton_lambda,
972                    )
973                    .unwrap_or_else(|| {
974                        reduced_grad
975                            .iter()
976                            .enumerate()
977                            .map(|(j, &g)| g / reduced_fisher.get(j, j).max(1e-12))
978                            .collect()
979                    });
980                    let mut dir = vec![0.0; grad.len()];
981                    for (&pos, &value) in inactive_positions.iter().zip(reduced_dir.iter()) {
982                        dir[pos] = value;
983                    }
984                    (dir, config.step_size)
985                } else {
986                    let inactive_positions = inactive_free_positions(params, &free_idx_buf, &grad);
987                    if inactive_positions.is_empty() {
988                        converged = true;
989                        break 'outer;
990                    }
991                    let used_history = !fd_history.s_list.is_empty();
992                    let mut dir = fd_history
993                        .apply_on_positions(&grad, &inactive_positions)
994                        .unwrap_or_else(|| {
995                            parameter_scaled_gradient_direction(params, &free_idx_buf, &grad)
996                        });
997                    let descent = dot(&grad, &dir);
998                    if !descent.is_finite() || descent <= 0.0 {
999                        dir = parameter_scaled_gradient_direction(params, &free_idx_buf, &grad);
1000                    }
1001                    if used_history && descent.is_finite() && descent > 0.0 {
1002                        (dir, config.step_size)
1003                    } else {
1004                        let search_norm: f64 = dir.iter().map(|d| d * d).sum::<f64>().sqrt();
1005                        (dir, config.step_size / search_norm.max(1.0))
1006                    }
1007                };
1008
1009            params.free_values_into(&mut free_vals_buf);
1010            old_free_buf.clear();
1011            old_free_buf.extend_from_slice(&free_vals_buf);
1012
1013            match backtracking_line_search(
1014                model,
1015                params,
1016                y_obs,
1017                &old_free_buf,
1018                &free_idx_buf,
1019                &search_dir,
1020                initial_alpha,
1021                config,
1022                &grad,
1023                nll,
1024                &mut all_vals_buf,
1025                &mut free_vals_buf,
1026                &mut trial_free_buf,
1027            ) {
1028                LineSearchResult::Accepted {
1029                    nll: new_nll,
1030                    y_model: new_y_model,
1031                    hit_boundary,
1032                } => {
1033                    if !using_fd {
1034                        pending_fd_state = None;
1035                    }
1036                    nll = new_nll;
1037                    y_model = new_y_model;
1038
1039                    if hit_boundary && face_steps < MAX_FACE_STEPS_PER_ITER {
1040                        face_steps += 1;
1041                        continue;
1042                    }
1043                }
1044                LineSearchResult::Stagnated => {
1045                    converged = true;
1046                    break 'outer;
1047                }
1048                LineSearchResult::Failed => {
1049                    break 'outer;
1050                }
1051            }
1052
1053            params.free_values_into(&mut free_vals_buf);
1054            let step_norm =
1055                normalized_step_norm(&old_free_buf, &free_vals_buf, params, &free_idx_buf);
1056            if step_norm < config.tol_param {
1057                converged = true;
1058                break 'outer;
1059            }
1060
1061            break;
1062        }
1063    }
1064
1065    // Compute local covariance from the inverse Fisher information matrix
1066    // at the converged parameters: cov = (J^T H J)^{-1}.
1067    // This is a local curvature estimate, not a Bayesian posterior.
1068    // Gated by config.compute_covariance to avoid extra evaluations when
1069    // the caller only needs densities (e.g., per-pixel spatial mapping).
1070    let (covariance, uncertainties) = if converged && config.compute_covariance {
1071        // Use the final y_model from the last accepted step rather than
1072        // re-evaluating (avoids extra model call and cannot turn a
1073        // successful fit into an error during post-processing).
1074
1075        // Build the Fisher information matrix J^T H J from the Jacobian at
1076        // the converged parameters.  Try the analytical Jacobian first; fall
1077        // back to finite differences if not available.
1078        let fisher_opt = if let Some(step_data) = compute_analytical_step_data(
1079            model,
1080            params,
1081            y_obs,
1082            &y_model,
1083            &mut all_vals_buf,
1084            &mut free_idx_buf,
1085        ) {
1086            Some(step_data.fisher)
1087        } else {
1088            // Build Fisher via FD Jacobian.
1089            compute_fd_fisher(
1090                model,
1091                params,
1092                y_obs,
1093                &y_model,
1094                config.fd_step,
1095                &mut all_vals_buf,
1096                &mut free_idx_buf,
1097            )
1098        };
1099
1100        if let Some(fisher) = fisher_opt {
1101            if let Some(cov) = crate::lm::invert_matrix(&fisher) {
1102                let n_free = cov.nrows;
1103                let unc: Vec<f64> = (0..n_free)
1104                    .map(|i| {
1105                        let d = cov.get(i, i);
1106                        if d.is_finite() && d > 0.0 {
1107                            d.sqrt()
1108                        } else {
1109                            f64::NAN
1110                        }
1111                    })
1112                    .collect();
1113                (Some(cov), Some(unc))
1114            } else {
1115                (None, None)
1116            }
1117        } else {
1118            (None, None)
1119        }
1120    } else {
1121        (None, None)
1122    };
1123
1124    Ok(PoissonResult {
1125        nll,
1126        iterations: iter,
1127        converged,
1128        params: params.all_values(),
1129        covariance,
1130        uncertainties,
1131    })
1132}
1133
1134/// Fixed-flux counts-domain forward model: `Y_model = flux × T_model(θ) + background`.
1135///
1136/// **Retained for the research Fisher helper, not for production fitting.**
1137/// The production counts-KL dispatch (`SolverConfig::PoissonKL` on
1138/// `InputData::Counts` / `InputData::CountsWithNuisance`) goes through
1139/// the joint-Poisson conditional-binomial-deviance path in
1140/// [`crate::joint_poisson`].  `CountsModel` and
1141/// [`CountsBackgroundScaleModel`] below are consumed only by
1142/// `nereids_pipeline::pipeline::evaluate_jacobian_and_fisher` (the
1143/// Fisher-info research helper used by the spatial-regularization
1144/// epic #394) and by this module's `#[cfg(test)]` tests.  They assume
1145/// the caller has pre-computed `flux = c · O` (i.e. `c` is baked into
1146/// `flux` — a convention that proved error-prone for
1147/// end users, which is precisely why the production path no longer
1148/// uses this struct).
1149///
1150/// The `flux` and `background` slices must have the same length as the
1151/// transmission vector returned by the inner model.  In debug builds,
1152/// `evaluate()` asserts this invariant.
1153pub struct CountsModel<'a> {
1154    /// Underlying transmission model.
1155    pub transmission_model: &'a dyn FitModel,
1156    /// Incident flux (counts per bin in open beam, after normalization).
1157    pub flux: &'a [f64],
1158    /// Background counts per bin.
1159    pub background: &'a [f64],
1160    /// Total parameter count in the wrapped model.
1161    pub n_params: usize,
1162}
1163
1164impl<'a> FitModel for CountsModel<'a> {
1165    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1166        let transmission = self.transmission_model.evaluate(params)?;
1167        debug_assert_eq!(
1168            transmission.len(),
1169            self.flux.len(),
1170            "CountsModel: transmission length ({}) != flux length ({})",
1171            transmission.len(),
1172            self.flux.len(),
1173        );
1174        debug_assert_eq!(
1175            self.flux.len(),
1176            self.background.len(),
1177            "CountsModel: flux length ({}) != background length ({})",
1178            self.flux.len(),
1179            self.background.len(),
1180        );
1181        Ok(transmission
1182            .iter()
1183            .zip(self.flux.iter())
1184            .zip(self.background.iter())
1185            .map(|((&t, &f), &b)| f * t + b)
1186            .collect())
1187    }
1188
1189    /// Analytical Jacobian: ∂Y/∂θ = flux · ∂T_inner/∂θ.
1190    ///
1191    /// Background is constant w.r.t. θ and drops out.
1192    fn analytical_jacobian(
1193        &self,
1194        params: &[f64],
1195        free_param_indices: &[usize],
1196        y_current: &[f64],
1197    ) -> Option<FlatMatrix> {
1198        let n_e = y_current.len();
1199        // Recover inner transmission: T = (Y - background) / flux
1200        let t_inner: Vec<f64> = y_current
1201            .iter()
1202            .zip(self.flux.iter())
1203            .zip(self.background.iter())
1204            .map(|((&y, &f), &b)| if f.abs() > 1e-30 { (y - b) / f } else { 0.0 })
1205            .collect();
1206        let inner_jac =
1207            self.transmission_model
1208                .analytical_jacobian(params, free_param_indices, &t_inner)?;
1209        let n_free = free_param_indices.len();
1210        let mut jac = FlatMatrix::zeros(n_e, n_free);
1211        for i in 0..n_e {
1212            for j in 0..n_free {
1213                *jac.get_mut(i, j) = self.flux[i] * inner_jac.get(i, j);
1214            }
1215        }
1216        Some(jac)
1217    }
1218}
1219
1220// ── ForwardModel implementation for CountsModel (Phase 1) ────────────────
1221
1222impl<'a> crate::forward_model::ForwardModel for CountsModel<'a> {
1223    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1224        self.evaluate(params)
1225    }
1226
1227    // No analytical jacobian — uses finite differences (same as FitModel).
1228
1229    fn n_data(&self) -> usize {
1230        self.flux.len()
1231    }
1232
1233    fn n_params(&self) -> usize {
1234        self.n_params
1235    }
1236}
1237
1238/// Fixed-flux counts model with optional α₁ / α₂ nuisance scaling of
1239/// signal and detector background.
1240///
1241/// **Retained for the research Fisher helper, not for production fitting.**
1242/// See [`CountsModel`] for the scope note — the production counts-KL
1243/// dispatch does not use this struct; it is reached only from
1244/// `evaluate_jacobian_and_fisher` (Epic #394 spatial-regularization
1245/// prototype) and from this module's `#[cfg(test)]` tests.
1246///
1247/// Given a transmission model `T(θ)`, predicts:
1248///
1249///   Y(E) = α₁ · [Φ(E) · T(θ)] + α₂ · B(E)
1250///
1251/// where `α₁` and `α₂` are parameter-vector entries.
1252pub struct CountsBackgroundScaleModel<'a> {
1253    /// Underlying transmission model.
1254    pub transmission_model: &'a dyn FitModel,
1255    /// Incident flux spectrum.
1256    pub flux: &'a [f64],
1257    /// Detector background spectrum.
1258    pub background: &'a [f64],
1259    /// Index of α₁ in the parameter vector.
1260    pub alpha1_index: usize,
1261    /// Index of α₂ in the parameter vector.
1262    pub alpha2_index: usize,
1263    /// Total parameter count in the wrapped model.
1264    pub n_params: usize,
1265}
1266
1267impl<'a> FitModel for CountsBackgroundScaleModel<'a> {
1268    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1269        let transmission = self.transmission_model.evaluate(params)?;
1270        let alpha1 = params[self.alpha1_index];
1271        let alpha2 = params[self.alpha2_index];
1272        debug_assert_eq!(transmission.len(), self.flux.len());
1273        debug_assert_eq!(self.flux.len(), self.background.len());
1274        Ok(transmission
1275            .iter()
1276            .zip(self.flux.iter())
1277            .zip(self.background.iter())
1278            .map(|((&t, &f), &b)| alpha1 * f * t + alpha2 * b)
1279            .collect())
1280    }
1281
1282    fn analytical_jacobian(
1283        &self,
1284        params: &[f64],
1285        free_param_indices: &[usize],
1286        y_current: &[f64],
1287    ) -> Option<FlatMatrix> {
1288        let n_e = y_current.len();
1289        let n_free = free_param_indices.len();
1290        let alpha1 = params[self.alpha1_index];
1291        let alpha1_col = free_param_indices
1292            .iter()
1293            .position(|&i| i == self.alpha1_index);
1294        let alpha2_col = free_param_indices
1295            .iter()
1296            .position(|&i| i == self.alpha2_index);
1297        let inner_free: Vec<usize> = free_param_indices
1298            .iter()
1299            .copied()
1300            .filter(|&i| i != self.alpha1_index && i != self.alpha2_index)
1301            .collect();
1302
1303        // Evaluate the inner transmission model directly instead of
1304        // reconstructing from y_current — reconstruction via
1305        // (y - alpha2*b)/(alpha1*f) is undefined when alpha1 ≈ 0.
1306        let t_inner = match self.transmission_model.evaluate(params) {
1307            Ok(t) => t,
1308            Err(_) => return None,
1309        };
1310
1311        let inner_jac = if !inner_free.is_empty() {
1312            self.transmission_model
1313                .analytical_jacobian(params, &inner_free, &t_inner)
1314        } else {
1315            None
1316        };
1317
1318        let mut jacobian = FlatMatrix::zeros(n_e, n_free);
1319        if let Some(ref ij) = inner_jac {
1320            let mut inner_col = 0;
1321            for (col, &fp) in free_param_indices.iter().enumerate() {
1322                if fp == self.alpha1_index || fp == self.alpha2_index {
1323                    continue;
1324                }
1325                for row in 0..n_e {
1326                    *jacobian.get_mut(row, col) = alpha1 * self.flux[row] * ij.get(row, inner_col);
1327                }
1328                inner_col += 1;
1329            }
1330        } else if !inner_free.is_empty() {
1331            return None;
1332        }
1333
1334        if let Some(col) = alpha1_col {
1335            for (row, (&f, &t)) in self.flux.iter().zip(t_inner.iter()).enumerate() {
1336                *jacobian.get_mut(row, col) = f * t;
1337            }
1338        }
1339        if let Some(col) = alpha2_col {
1340            for (row, &bg) in self.background.iter().enumerate() {
1341                *jacobian.get_mut(row, col) = bg;
1342            }
1343        }
1344
1345        Some(jacobian)
1346    }
1347}
1348
1349impl<'a> crate::forward_model::ForwardModel for CountsBackgroundScaleModel<'a> {
1350    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1351        self.evaluate(params)
1352    }
1353
1354    fn n_data(&self) -> usize {
1355        self.flux.len()
1356    }
1357
1358    fn n_params(&self) -> usize {
1359        self.n_params
1360    }
1361}
1362
1363/// KL-compatible background model for transmission data.
1364///
1365/// Given a transmission model T_inner(θ), predicts:
1366///
1367///   T_out(E) = T_inner(E) + b₀ + b₁/√E
1368///
1369/// where b₀ and b₁ are the additive background parameters at indices
1370/// `b0_index` and `b1_index` in the parameter vector.
1371///
1372/// Unlike `NormalizedTransmissionModel` (which uses `Anorm * T + BackA +
1373/// BackB/√E + BackC√E` with 4 free parameters), this model:
1374/// - Has only 2 background parameters (b₀, b₁), reducing overfitting risk
1375/// - Constrains b₀, b₁ ≥ 0 via parameter bounds (physical: background
1376///   adds counts, never subtracts), ensuring T_out > 0 for valid Poisson NLL
1377/// - Does NOT multiply T_inner by a normalization factor — normalization
1378///   is handled separately (nuisance estimation for counts, or pre-processing
1379///   for transmission data)
1380///
1381/// ## Gradient
1382///
1383/// - ∂T_out/∂nₖ = ∂T_inner/∂nₖ = -σₖ(E)·T_inner(E)  (same as bare model)
1384/// - ∂T_out/∂b₀ = 1
1385/// - ∂T_out/∂b₁ = 1/√E
1386pub struct TransmissionKLBackgroundModel<'a> {
1387    /// Underlying transmission model (density parameters only).
1388    pub inner: &'a dyn FitModel,
1389    /// Precomputed 1/√E for each energy bin.
1390    pub inv_sqrt_energies: Vec<f64>,
1391    /// Index of b₀ (constant background) in the parameter vector.
1392    pub b0_index: usize,
1393    /// Index of b₁ (1/√E background) in the parameter vector.
1394    pub b1_index: usize,
1395    /// Total parameter count in the wrapped model.
1396    pub n_params: usize,
1397}
1398
1399impl<'a> FitModel for TransmissionKLBackgroundModel<'a> {
1400    fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1401        let t_inner = self.inner.evaluate(params)?;
1402        let b0 = params[self.b0_index];
1403        let b1 = params[self.b1_index];
1404        Ok(t_inner
1405            .iter()
1406            .zip(self.inv_sqrt_energies.iter())
1407            .map(|(&t, &inv_sqrt_e)| t + b0 + b1 * inv_sqrt_e)
1408            .collect())
1409    }
1410
1411    fn analytical_jacobian(
1412        &self,
1413        params: &[f64],
1414        free_param_indices: &[usize],
1415        y_current: &[f64],
1416    ) -> Option<FlatMatrix> {
1417        let n_e = y_current.len();
1418        let n_free = free_param_indices.len();
1419
1420        // Identify which free params are background vs inner model.
1421        let b0_col = free_param_indices.iter().position(|&i| i == self.b0_index);
1422        let b1_col = free_param_indices.iter().position(|&i| i == self.b1_index);
1423
1424        // Inner model free params (those not b0 or b1).
1425        let inner_free: Vec<usize> = free_param_indices
1426            .iter()
1427            .copied()
1428            .filter(|&i| i != self.b0_index && i != self.b1_index)
1429            .collect();
1430
1431        // Get inner model Jacobian for density columns.
1432        let inner_jac = if !inner_free.is_empty() {
1433            // Evaluate inner model at current params to get T_inner for y_current.
1434            let t_inner = self.inner.evaluate(params).ok()?;
1435            self.inner
1436                .analytical_jacobian(params, &inner_free, &t_inner)
1437        } else {
1438            None
1439        };
1440
1441        let mut jacobian = FlatMatrix::zeros(n_e, n_free);
1442
1443        // Fill inner model columns (density, temperature).
1444        // Inner Jacobian is the same as bare model — background doesn't
1445        // affect ∂T_inner/∂nₖ.
1446        if let Some(ref ij) = inner_jac {
1447            let mut inner_col = 0;
1448            for (col, &fp) in free_param_indices.iter().enumerate() {
1449                if fp == self.b0_index || fp == self.b1_index {
1450                    continue;
1451                }
1452                for row in 0..n_e {
1453                    *jacobian.get_mut(row, col) = ij.get(row, inner_col);
1454                }
1455                inner_col += 1;
1456            }
1457        } else {
1458            // No analytical inner Jacobian — fall back to FD for entire model.
1459            return None;
1460        }
1461
1462        // Background columns.
1463        if let Some(col) = b0_col {
1464            for row in 0..n_e {
1465                *jacobian.get_mut(row, col) = 1.0; // ∂T_out/∂b₀ = 1
1466            }
1467        }
1468        if let Some(col) = b1_col {
1469            for row in 0..n_e {
1470                *jacobian.get_mut(row, col) = self.inv_sqrt_energies[row]; // ∂T_out/∂b₁ = 1/√E
1471            }
1472        }
1473
1474        Some(jacobian)
1475    }
1476}
1477
1478impl<'a> crate::forward_model::ForwardModel for TransmissionKLBackgroundModel<'a> {
1479    fn predict(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1480        self.evaluate(params)
1481    }
1482
1483    fn n_data(&self) -> usize {
1484        self.inv_sqrt_energies.len()
1485    }
1486
1487    fn n_params(&self) -> usize {
1488        self.n_params
1489    }
1490}
1491
1492#[cfg(test)]
1493mod tests {
1494    use super::*;
1495    use crate::lm::FitModel;
1496    use crate::parameters::FitParameter;
1497
1498    /// Simple model: y = a * exp(-b * x)
1499    /// This mimics transmission: counts = flux * exp(-density * sigma)
1500    struct ExponentialModel {
1501        x: Vec<f64>,
1502        flux: Vec<f64>,
1503    }
1504
1505    impl FitModel for ExponentialModel {
1506        fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1507            let b = params[0]; // "density"
1508            Ok(self
1509                .x
1510                .iter()
1511                .zip(self.flux.iter())
1512                .map(|(&xi, &fi)| fi * (-b * xi).exp())
1513                .collect())
1514        }
1515    }
1516
1517    #[test]
1518    fn test_poisson_nll_perfect_match() {
1519        let y_obs = vec![10.0, 20.0, 30.0];
1520        let y_model = vec![10.0, 20.0, 30.0];
1521        let nll = poisson_nll(&y_obs, &y_model);
1522        // NLL = Σ(y_model - y_obs*ln(y_model))
1523        let expected: f64 = y_obs
1524            .iter()
1525            .zip(y_model.iter())
1526            .map(|(&o, &m)| m - o * m.ln())
1527            .sum();
1528        assert!((nll - expected).abs() < 1e-10);
1529    }
1530
1531    #[test]
1532    fn test_poisson_fit_exponential() {
1533        // Generate synthetic Poisson data from y = 1000 * exp(-0.5 * x)
1534        let x: Vec<f64> = (0..20).map(|i| i as f64 * 0.5).collect();
1535        let true_b = 0.5;
1536        let flux: Vec<f64> = vec![1000.0; x.len()];
1537
1538        let model = ExponentialModel {
1539            x: x.clone(),
1540            flux: flux.clone(),
1541        };
1542
1543        // Use exact expected counts (no noise) for reproducibility
1544        let y_obs = model.evaluate(&[true_b]).unwrap();
1545
1546        let mut params = ParameterSet::new(vec![
1547            FitParameter::non_negative("b", 1.0), // Initial guess 2× off
1548        ]);
1549
1550        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1551
1552        assert!(
1553            result.converged,
1554            "Poisson fit did not converge after {} iterations",
1555            result.iterations,
1556        );
1557        assert!(
1558            (result.params[0] - true_b).abs() / true_b < 0.05,
1559            "Fitted b = {}, true = {}, error = {:.1}%",
1560            result.params[0],
1561            true_b,
1562            (result.params[0] - true_b).abs() / true_b * 100.0,
1563        );
1564    }
1565
1566    #[test]
1567    fn test_poisson_fit_low_counts() {
1568        // Low-count regime: flux = 10 counts per bin
1569        let x: Vec<f64> = (0..30).map(|i| i as f64 * 0.2).collect();
1570        let true_b = 0.3;
1571        let flux: Vec<f64> = vec![10.0; x.len()];
1572
1573        let model = ExponentialModel {
1574            x: x.clone(),
1575            flux: flux.clone(),
1576        };
1577
1578        let y_obs = model.evaluate(&[true_b]).unwrap();
1579
1580        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", 0.1)]);
1581
1582        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1583
1584        assert!(result.converged);
1585        assert!(
1586            (result.params[0] - true_b).abs() / true_b < 0.1,
1587            "Low-count: fitted b = {}, true = {}",
1588            result.params[0],
1589            true_b,
1590        );
1591    }
1592
1593    #[test]
1594    fn test_poisson_non_negativity() {
1595        // Data that would drive parameter negative without constraint
1596        let x: Vec<f64> = (0..10).map(|i| i as f64).collect();
1597        let flux: Vec<f64> = vec![100.0; x.len()];
1598
1599        let model = ExponentialModel {
1600            x: x.clone(),
1601            flux: flux.clone(),
1602        };
1603
1604        // Generate data with b=0 (constant), but start with b=1
1605        let y_obs: Vec<f64> = vec![100.0; x.len()];
1606
1607        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", 1.0)]);
1608
1609        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
1610
1611        assert!(
1612            result.params[0] >= 0.0,
1613            "b should be non-negative, got {}",
1614            result.params[0],
1615        );
1616        assert!(
1617            result.params[0] < 0.1,
1618            "b should be ~0, got {}",
1619            result.params[0],
1620        );
1621    }
1622
1623    #[test]
1624    fn test_counts_model() {
1625        struct ConstTransmission;
1626        impl FitModel for ConstTransmission {
1627            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1628                Ok(vec![params[0]; 3])
1629            }
1630        }
1631
1632        let t_model = ConstTransmission;
1633        let flux = [100.0, 200.0, 300.0];
1634        let background = [5.0, 10.0, 15.0];
1635        let counts_model = CountsModel {
1636            transmission_model: &t_model,
1637            flux: &flux,
1638            background: &background,
1639            n_params: 1,
1640        };
1641
1642        // T = 0.5 → counts = flux*0.5 + background
1643        let result = counts_model.evaluate(&[0.5]).unwrap();
1644        assert!((result[0] - 55.0).abs() < 1e-10);
1645        assert!((result[1] - 110.0).abs() < 1e-10);
1646        assert!((result[2] - 165.0).abs() < 1e-10);
1647        assert_eq!(
1648            crate::forward_model::ForwardModel::n_params(&counts_model),
1649            1
1650        );
1651    }
1652
1653    #[test]
1654    fn test_counts_background_scale_model() {
1655        struct ConstTransmission;
1656        impl FitModel for ConstTransmission {
1657            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1658                Ok(vec![params[0]; 3])
1659            }
1660
1661            fn analytical_jacobian(
1662                &self,
1663                _params: &[f64],
1664                free_param_indices: &[usize],
1665                _y_current: &[f64],
1666            ) -> Option<FlatMatrix> {
1667                let mut jac = FlatMatrix::zeros(3, free_param_indices.len());
1668                for (col, &fp) in free_param_indices.iter().enumerate() {
1669                    if fp == 0 {
1670                        for row in 0..3 {
1671                            *jac.get_mut(row, col) = 1.0;
1672                        }
1673                    }
1674                }
1675                Some(jac)
1676            }
1677        }
1678
1679        let t_model = ConstTransmission;
1680        let flux = [100.0, 200.0, 300.0];
1681        let background = [5.0, 10.0, 15.0];
1682        let counts_model = CountsBackgroundScaleModel {
1683            transmission_model: &t_model,
1684            flux: &flux,
1685            background: &background,
1686            alpha1_index: 1,
1687            alpha2_index: 2,
1688            n_params: 3,
1689        };
1690
1691        let params = [0.5, 0.8, 1.5];
1692        let result = counts_model.evaluate(&params).unwrap();
1693        assert!((result[0] - 47.5).abs() < 1e-10);
1694        assert!((result[1] - 95.0).abs() < 1e-10);
1695        assert!((result[2] - 142.5).abs() < 1e-10);
1696        assert_eq!(
1697            crate::forward_model::ForwardModel::n_params(&counts_model),
1698            3
1699        );
1700    }
1701
1702    #[test]
1703    fn test_poisson_fit_multi_density_temperature_converges() {
1704        struct MultiDensityCountsModel {
1705            energies: Vec<f64>,
1706            flux: Vec<f64>,
1707            density_count: usize,
1708            temp_index: usize,
1709        }
1710
1711        impl MultiDensityCountsModel {
1712            fn sigma(&self, iso: usize, energy: f64, temp_k: f64) -> f64 {
1713                let center = 6.0 + iso as f64 * 4.5;
1714                let amp = 150.0 + 25.0 * iso as f64;
1715                let base_width = 0.8 + 0.12 * iso as f64;
1716                let width_coeff = 0.05 + 0.01 * iso as f64;
1717                let width = (base_width * (1.0 + width_coeff * (temp_k - 300.0) / 300.0)).max(0.1);
1718                let delta = energy - center;
1719                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
1720                amp * gauss
1721            }
1722
1723            fn dsigma_dt(&self, iso: usize, energy: f64, temp_k: f64) -> f64 {
1724                let center = 6.0 + iso as f64 * 4.5;
1725                let amp = 150.0 + 25.0 * iso as f64;
1726                let base_width = 0.8 + 0.12 * iso as f64;
1727                let width_coeff = 0.05 + 0.01 * iso as f64;
1728                let width = (base_width * (1.0 + width_coeff * (temp_k - 300.0) / 300.0)).max(0.1);
1729                let delta = energy - center;
1730                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
1731                let dwidth_dt = base_width * width_coeff / 300.0;
1732                amp * gauss * (delta * delta) * dwidth_dt / width.powi(3)
1733            }
1734        }
1735
1736        impl FitModel for MultiDensityCountsModel {
1737            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
1738                let temp_k = params[self.temp_index];
1739                let mut out = Vec::with_capacity(self.energies.len());
1740                for (i, &energy) in self.energies.iter().enumerate() {
1741                    let optical_depth = (0..self.density_count)
1742                        .map(|iso| params[iso] * self.sigma(iso, energy, temp_k))
1743                        .sum::<f64>();
1744                    out.push(self.flux[i] * (-optical_depth).exp());
1745                }
1746                Ok(out)
1747            }
1748
1749            fn analytical_jacobian(
1750                &self,
1751                params: &[f64],
1752                free_param_indices: &[usize],
1753                y_current: &[f64],
1754            ) -> Option<crate::lm::FlatMatrix> {
1755                let temp_k = params[self.temp_index];
1756                let mut jac =
1757                    crate::lm::FlatMatrix::zeros(self.energies.len(), free_param_indices.len());
1758                for (row, &energy) in self.energies.iter().enumerate() {
1759                    let y = y_current[row];
1760                    let mut sum_n_dsigma_dt = 0.0;
1761                    for (iso, &density) in params[..self.density_count].iter().enumerate() {
1762                        sum_n_dsigma_dt += density * self.dsigma_dt(iso, energy, temp_k);
1763                    }
1764                    for (col, &fp) in free_param_indices.iter().enumerate() {
1765                        let val = if fp == self.temp_index {
1766                            -y * sum_n_dsigma_dt
1767                        } else {
1768                            -y * self.sigma(fp, energy, temp_k)
1769                        };
1770                        *jac.get_mut(row, col) = val;
1771                    }
1772                }
1773                Some(jac)
1774            }
1775        }
1776
1777        let energies: Vec<f64> = (0..220).map(|i| 1.0 + 0.18 * i as f64).collect();
1778        let flux: Vec<f64> = energies
1779            .iter()
1780            .map(|&e| 1500.0 * (1.0 + 0.15 * (e / 8.0).sin()).max(0.2))
1781            .collect();
1782        let density_count = 6usize;
1783        let temp_index = density_count;
1784        let model = MultiDensityCountsModel {
1785            energies,
1786            flux,
1787            density_count,
1788            temp_index,
1789        };
1790
1791        let true_params = vec![3.2e-4, 2.4e-4, 1.7e-4, 1.1e-4, 7.5e-5, 4.2e-5, 360.0];
1792        let y_obs = model.evaluate(&true_params).unwrap();
1793
1794        let mut params = ParameterSet::new(vec![
1795            FitParameter::non_negative("n0", 6.0e-4),
1796            FitParameter::non_negative("n1", 4.0e-4),
1797            FitParameter::non_negative("n2", 2.5e-4),
1798            FitParameter::non_negative("n3", 1.8e-4),
1799            FitParameter::non_negative("n4", 1.0e-4),
1800            FitParameter::non_negative("n5", 8.0e-5),
1801            FitParameter {
1802                name: "temperature_k".into(),
1803                value: 300.0,
1804                lower: 1.0,
1805                upper: 5000.0,
1806                fixed: false,
1807            },
1808        ]);
1809
1810        let config = PoissonConfig {
1811            max_iter: 200,
1812            gauss_newton_lambda: 1e-4,
1813            ..PoissonConfig::default()
1814        };
1815        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
1816
1817        assert!(
1818            result.converged,
1819            "multi-density+temperature Poisson fit did not converge after {} iterations",
1820            result.iterations,
1821        );
1822        assert!(
1823            result.iterations < config.max_iter,
1824            "fit hit max_iter={}, params={:?}",
1825            config.max_iter,
1826            result.params,
1827        );
1828
1829        for (i, (&fit, &truth)) in result.params[..density_count]
1830            .iter()
1831            .zip(true_params[..density_count].iter())
1832            .enumerate()
1833        {
1834            let rel_err = (fit - truth).abs() / truth;
1835            assert!(
1836                rel_err < 0.10,
1837                "density[{i}] fit={fit} truth={truth} rel_err={rel_err:.3}",
1838            );
1839        }
1840
1841        let fitted_temp = result.params[temp_index];
1842        assert!(
1843            (fitted_temp - true_params[temp_index]).abs() < 10.0,
1844            "temperature fit={fitted_temp} truth={}",
1845            true_params[temp_index],
1846        );
1847        assert!(
1848            result.iterations <= 80,
1849            "expected analytical KL path to converge well before max_iter; got {}",
1850            result.iterations,
1851        );
1852    }
1853
1854    #[test]
1855    fn test_poisson_fit_exact_optimum_without_analytical_jacobian_converges() {
1856        let x: Vec<f64> = (0..20).map(|i| i as f64 * 0.5).collect();
1857        let true_b = 0.5;
1858        let flux: Vec<f64> = vec![1000.0; x.len()];
1859
1860        let model = ExponentialModel { x, flux };
1861        let y_obs = model.evaluate(&[true_b]).unwrap();
1862        let mut params = ParameterSet::new(vec![FitParameter::non_negative("b", true_b)]);
1863        let config = PoissonConfig {
1864            fd_step: 1e-4,
1865            tol_param: 1e-12,
1866            max_iter: 50,
1867            ..PoissonConfig::default()
1868        };
1869
1870        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
1871
1872        assert!(
1873            result.converged,
1874            "exact-optimum FD fit should converge instead of exhausting line search"
1875        );
1876        assert!(
1877            result.iterations < config.max_iter,
1878            "fit should stop by convergence, not hit max_iter"
1879        );
1880        assert!(
1881            (result.params[0] - true_b).abs() < 1e-6,
1882            "parameter drifted away from optimum: {}",
1883            result.params[0]
1884        );
1885    }
1886
1887    #[test]
1888    fn test_projected_gradient_ignores_lower_bound_blocked_direction() {
1889        let params = ParameterSet::new(vec![
1890            FitParameter::non_negative("density", 0.0),
1891            FitParameter::unbounded("temp", 300.0),
1892        ]);
1893        let free_idx = vec![0, 1];
1894        let grad = vec![0.25, -0.5];
1895
1896        let inactive = inactive_free_positions(&params, &free_idx, &grad);
1897        assert_eq!(
1898            inactive,
1899            vec![1],
1900            "lower-bound blocked density should be active"
1901        );
1902
1903        let pg_norm = projected_gradient_norm(&params, &free_idx, &grad);
1904        assert!(
1905            (pg_norm - 0.5).abs() < 1e-12,
1906            "projected gradient should ignore blocked lower-bound component"
1907        );
1908    }
1909
1910    #[test]
1911    fn test_max_feasible_step_hits_lower_bound() {
1912        let params = ParameterSet::new(vec![
1913            FitParameter::non_negative("density", 0.2),
1914            FitParameter::unbounded("temp", 300.0),
1915        ]);
1916        let alpha = max_feasible_step(&params, &[0, 1], &[0.2, 300.0], &[0.5, 0.0]);
1917        assert!(
1918            (alpha - 0.4).abs() < 1e-12,
1919            "feasible step should stop exactly at lower bound"
1920        );
1921    }
1922
1923    #[test]
1924    fn test_max_feasible_step_hits_upper_bound() {
1925        let params = ParameterSet::new(vec![FitParameter {
1926            name: "temp".into(),
1927            value: 300.0,
1928            lower: 1.0,
1929            upper: 500.0,
1930            fixed: false,
1931        }]);
1932        let alpha = max_feasible_step(&params, &[0], &[300.0], &[-50.0]);
1933        assert!(
1934            (alpha - 4.0).abs() < 1e-12,
1935            "feasible step should stop exactly at upper bound"
1936        );
1937    }
1938
1939    #[test]
1940    fn test_inactive_mask_changes_when_bound_activity_changes() {
1941        let free_idx = vec![0, 1];
1942        let params_at_bound = ParameterSet::new(vec![
1943            FitParameter::non_negative("density", 0.0),
1944            FitParameter::non_negative("temp", 1.0),
1945        ]);
1946        let params_free = ParameterSet::new(vec![
1947            FitParameter::non_negative("density", 0.2),
1948            FitParameter::non_negative("temp", 1.0),
1949        ]);
1950
1951        let mask_at_bound = inactive_free_mask(&params_at_bound, &free_idx, &[0.3, -0.2]);
1952        let mask_free = inactive_free_mask(&params_free, &free_idx, &[0.3, -0.2]);
1953
1954        assert_eq!(mask_at_bound, vec![false, true]);
1955        assert_eq!(mask_free, vec![true, true]);
1956        assert_ne!(
1957            mask_at_bound, mask_free,
1958            "active-set changes should invalidate FD quasi-Newton history"
1959        );
1960    }
1961
1962    #[test]
1963    fn test_lbfgs_history_two_loop_matches_secant_direction() {
1964        let mut history = LbfgsHistory::new(4);
1965        history.update(&[0.0], &[1.0], &[0.0], &[2.0]);
1966        let dir = history
1967            .apply_on_positions(&[4.0], &[0])
1968            .expect("history should produce a direction");
1969        assert!(
1970            (dir[0] - 2.0).abs() < 1e-12,
1971            "1D secant pair should scale gradient by inverse curvature"
1972        );
1973    }
1974
1975    #[test]
1976    fn test_lbfgs_subspace_ignores_active_components() {
1977        let mut history = LbfgsHistory::new(4);
1978        history.update(&[0.0, 0.0], &[1.0, 100.0], &[0.0, 0.0], &[2.0, 100.0]);
1979        let dir = history
1980            .apply_on_positions(&[4.0, 50.0], &[0])
1981            .expect("subspace history should produce a direction");
1982        assert!(
1983            (dir[0] - 2.0).abs() < 1e-12,
1984            "inactive-subspace L-BFGS should match 1D secant scaling on the free variable"
1985        );
1986        assert!(
1987            dir[1].abs() < 1e-12,
1988            "inactive-subspace L-BFGS should not leak blocked-variable history into the direction"
1989        );
1990    }
1991
1992    #[test]
1993    fn test_poisson_fit_converges_at_bound_active_optimum() {
1994        struct OffsetModel {
1995            base: Vec<f64>,
1996        }
1997
1998        impl FitModel for OffsetModel {
1999            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2000                Ok(self.base.iter().map(|&b| b + params[0]).collect())
2001            }
2002
2003            fn analytical_jacobian(
2004                &self,
2005                _params: &[f64],
2006                free_param_indices: &[usize],
2007                y_current: &[f64],
2008            ) -> Option<FlatMatrix> {
2009                let mut jac = FlatMatrix::zeros(y_current.len(), free_param_indices.len());
2010                for (col, &fp) in free_param_indices.iter().enumerate() {
2011                    assert_eq!(fp, 0);
2012                    for row in 0..y_current.len() {
2013                        *jac.get_mut(row, col) = 1.0;
2014                    }
2015                }
2016                Some(jac)
2017            }
2018        }
2019
2020        let model = OffsetModel {
2021            base: vec![10.0; 12],
2022        };
2023        let y_obs = vec![8.0; 12];
2024        let mut params = ParameterSet::new(vec![FitParameter::non_negative("offset", 0.0)]);
2025
2026        let result = poisson_fit(&model, &y_obs, &mut params, &PoissonConfig::default()).unwrap();
2027
2028        assert!(
2029            result.converged,
2030            "bound-active optimum should satisfy projected optimality"
2031        );
2032        assert_eq!(
2033            result.iterations, 1,
2034            "should stop on projected-gradient check"
2035        );
2036        assert!(
2037            result.params[0].abs() < 1e-12,
2038            "offset should stay pinned at lower bound, got {}",
2039            result.params[0]
2040        );
2041    }
2042
2043    #[test]
2044    fn test_poisson_fit_fd_lbfgs_handles_coupled_two_parameter_model() {
2045        struct CoupledExponentialModel {
2046            x: Vec<f64>,
2047        }
2048
2049        impl FitModel for CoupledExponentialModel {
2050            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2051                let amp = params[0];
2052                let decay = params[1];
2053                Ok(self
2054                    .x
2055                    .iter()
2056                    .map(|&x| amp * (-decay * x).exp() + 1.0)
2057                    .collect())
2058            }
2059        }
2060
2061        let model = CoupledExponentialModel {
2062            x: (0..60).map(|i| i as f64 * 0.08).collect(),
2063        };
2064        let true_params = [120.0, 0.45];
2065        let y_obs = model.evaluate(&true_params).unwrap();
2066        let mut params = ParameterSet::new(vec![
2067            FitParameter::non_negative("amp", 30.0),
2068            FitParameter::non_negative("decay", 1.2),
2069        ]);
2070        let config = PoissonConfig {
2071            max_iter: 120,
2072            lbfgs_history: 8,
2073            ..PoissonConfig::default()
2074        };
2075        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
2076        let mut baseline_params = ParameterSet::new(vec![
2077            FitParameter::non_negative("amp", 30.0),
2078            FitParameter::non_negative("decay", 1.2),
2079        ]);
2080        let baseline = poisson_fit(
2081            &model,
2082            &y_obs,
2083            &mut baseline_params,
2084            &PoissonConfig {
2085                lbfgs_history: 0,
2086                ..config.clone()
2087            },
2088        )
2089        .unwrap();
2090
2091        assert!(
2092            result.converged,
2093            "FD L-BFGS fit did not converge: {result:?}"
2094        );
2095        assert!(
2096            baseline.converged,
2097            "baseline FD fit should still converge: {baseline:?}"
2098        );
2099        assert!(
2100            result.iterations <= 60,
2101            "expected FD quasi-Newton path to converge well before max_iter; got {}",
2102            result.iterations,
2103        );
2104        assert!(
2105            result.iterations < baseline.iterations,
2106            "L-BFGS fallback should beat no-history gradient scaling: lbfgs={} baseline={}",
2107            result.iterations,
2108            baseline.iterations,
2109        );
2110        assert!(
2111            (result.params[0] - true_params[0]).abs() / true_params[0] < 0.02,
2112            "amplitude fit={}, true={}",
2113            result.params[0],
2114            true_params[0],
2115        );
2116        assert!(
2117            (result.params[1] - true_params[1]).abs() / true_params[1] < 0.02,
2118            "decay fit={}, true={}",
2119            result.params[1],
2120            true_params[1],
2121        );
2122    }
2123
2124    #[test]
2125    fn test_poisson_fit_fd_lbfgs_with_bound_active_offset_uses_subspace() {
2126        struct OffsetDecayModel {
2127            x: Vec<f64>,
2128        }
2129
2130        impl FitModel for OffsetDecayModel {
2131            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2132                let offset = params[0];
2133                let decay = params[1];
2134                Ok(self
2135                    .x
2136                    .iter()
2137                    .map(|&x| offset + (-decay * x).exp())
2138                    .collect())
2139            }
2140        }
2141
2142        let model = OffsetDecayModel {
2143            x: (0..60).map(|i| i as f64 * 0.08).collect(),
2144        };
2145        let true_params = [0.0, 0.35];
2146        let y_obs = model.evaluate(&true_params).unwrap();
2147
2148        let config = PoissonConfig {
2149            max_iter: 120,
2150            lbfgs_history: 8,
2151            ..PoissonConfig::default()
2152        };
2153        let mut params = ParameterSet::new(vec![
2154            FitParameter::non_negative("offset", 0.0),
2155            FitParameter::non_negative("decay", 1.1),
2156        ]);
2157        let result = poisson_fit(&model, &y_obs, &mut params, &config).unwrap();
2158        assert!(
2159            result.converged,
2160            "subspace FD L-BFGS fit did not converge: {result:?}"
2161        );
2162        assert!(
2163            result.iterations <= 20,
2164            "bound-active subspace FD fit should converge comfortably before max_iter; got {}",
2165            result.iterations,
2166        );
2167        assert!(
2168            result.params[0].abs() < 1e-8,
2169            "offset should remain at the lower bound, got {}",
2170            result.params[0]
2171        );
2172        assert!(
2173            (result.params[1] - true_params[1]).abs() / true_params[1] < 0.02,
2174            "decay fit={}, true={}",
2175            result.params[1],
2176            true_params[1],
2177        );
2178    }
2179
2180    #[test]
2181    fn test_poisson_fit_temperature_and_background_converges() {
2182        struct TempTransmissionModel {
2183            energies: Vec<f64>,
2184        }
2185
2186        impl TempTransmissionModel {
2187            fn sigma(&self, energy: f64, temp_k: f64) -> f64 {
2188                let center = 6.0;
2189                let amp = 110.0;
2190                let base_width = 0.55;
2191                let width = (base_width * (temp_k / 300.0).sqrt()).max(0.08);
2192                let delta = energy - center;
2193                amp * (-(delta * delta) / (2.0 * width * width)).exp()
2194            }
2195
2196            fn dsigma_dt(&self, energy: f64, temp_k: f64) -> f64 {
2197                let center = 6.0;
2198                let amp = 110.0;
2199                let base_width = 0.55;
2200                let width = (base_width * (temp_k / 300.0).sqrt()).max(0.08);
2201                let dwidth_dt = if temp_k > 0.0 {
2202                    base_width / (2.0 * (300.0 * temp_k).sqrt())
2203                } else {
2204                    0.0
2205                };
2206                let delta = energy - center;
2207                let gauss = (-(delta * delta) / (2.0 * width * width)).exp();
2208                amp * gauss * (delta * delta) * dwidth_dt / width.powi(3)
2209            }
2210        }
2211
2212        impl FitModel for TempTransmissionModel {
2213            fn evaluate(&self, params: &[f64]) -> Result<Vec<f64>, FittingError> {
2214                let density = params[0];
2215                let temp_k = params[1];
2216                Ok(self
2217                    .energies
2218                    .iter()
2219                    .map(|&energy| (-density * self.sigma(energy, temp_k)).exp())
2220                    .collect())
2221            }
2222
2223            fn analytical_jacobian(
2224                &self,
2225                params: &[f64],
2226                free_param_indices: &[usize],
2227                y_current: &[f64],
2228            ) -> Option<FlatMatrix> {
2229                let density = params[0];
2230                let temp_k = params[1];
2231                let mut jac = FlatMatrix::zeros(self.energies.len(), free_param_indices.len());
2232                for (row, &energy) in self.energies.iter().enumerate() {
2233                    let y = y_current[row];
2234                    let sigma = self.sigma(energy, temp_k);
2235                    let dsigma_dt = self.dsigma_dt(energy, temp_k);
2236                    for (col, &fp) in free_param_indices.iter().enumerate() {
2237                        let deriv = match fp {
2238                            0 => -sigma * y,
2239                            1 => -density * dsigma_dt * y,
2240                            _ => unreachable!("unexpected parameter index {fp}"),
2241                        };
2242                        *jac.get_mut(row, col) = deriv;
2243                    }
2244                }
2245                Some(jac)
2246            }
2247        }
2248
2249        let energies: Vec<f64> = (0..180).map(|i| 1.0 + 0.06 * i as f64).collect();
2250        let inner = TempTransmissionModel {
2251            energies: energies.clone(),
2252        };
2253        let inv_sqrt_energies: Vec<f64> = energies.iter().map(|&e| 1.0 / e.sqrt()).collect();
2254        let wrapped = TransmissionKLBackgroundModel {
2255            inner: &inner,
2256            inv_sqrt_energies,
2257            b0_index: 2,
2258            b1_index: 3,
2259            n_params: 4,
2260        };
2261
2262        let true_params = vec![4.5e-4, 345.0, 0.012, 0.008];
2263        let y_obs = wrapped.evaluate(&true_params).unwrap();
2264
2265        let mut params = ParameterSet::new(vec![
2266            FitParameter::non_negative("density", 8.0e-4),
2267            FitParameter {
2268                name: "temperature_k".into(),
2269                value: 290.0,
2270                lower: 1.0,
2271                upper: 5000.0,
2272                fixed: false,
2273            },
2274            FitParameter {
2275                name: "kl_b0".into(),
2276                value: 0.0,
2277                lower: 0.0,
2278                upper: 0.5,
2279                fixed: false,
2280            },
2281            FitParameter {
2282                name: "kl_b1".into(),
2283                value: 0.0,
2284                lower: 0.0,
2285                upper: 0.5,
2286                fixed: false,
2287            },
2288        ]);
2289
2290        let config = PoissonConfig {
2291            max_iter: 120,
2292            gauss_newton_lambda: 1e-4,
2293            ..PoissonConfig::default()
2294        };
2295        let result = poisson_fit(&wrapped, &y_obs, &mut params, &config).unwrap();
2296
2297        assert!(result.converged, "fit did not converge: {result:?}");
2298        assert!(
2299            result.iterations <= 80,
2300            "expected convergence well before max_iter; got {}",
2301            result.iterations,
2302        );
2303        assert!(
2304            (result.params[0] - true_params[0]).abs() / true_params[0] < 0.05,
2305            "density fit={}, true={}",
2306            result.params[0],
2307            true_params[0],
2308        );
2309        assert!(
2310            (result.params[1] - true_params[1]).abs() < 8.0,
2311            "temperature fit={}, true={}",
2312            result.params[1],
2313            true_params[1],
2314        );
2315        assert!(
2316            (result.params[2] - true_params[2]).abs() < 5e-3,
2317            "b0 fit={}, true={}",
2318            result.params[2],
2319            true_params[2],
2320        );
2321        assert!(
2322            (result.params[3] - true_params[3]).abs() < 5e-3,
2323            "b1 fit={}, true={}",
2324            result.params[3],
2325            true_params[3],
2326        );
2327    }
2328}