use crate::linalg::{inprod, matprod12_into, matprod21_into};
use crate::mat::Mat;
#[cfg(feature = "count-kernels")]
pub mod counters {
use core::sync::atomic::{AtomicUsize, Ordering};
static CALVLAG_NOADD_CALLS: AtomicUsize = AtomicUsize::new(0);
pub fn reset() {
CALVLAG_NOADD_CALLS.store(0, Ordering::Relaxed);
}
pub fn calvlag_noadd_calls() -> usize {
CALVLAG_NOADD_CALLS.load(Ordering::Relaxed)
}
pub(crate) fn bump() {
CALVLAG_NOADD_CALLS.fetch_add(1, Ordering::Relaxed);
}
}
pub(crate) fn setij_into(n: usize, npt: usize, ij: &mut Vec<(usize, usize)>) {
ij.clear();
if npt > 2 * n + 1 {
for k in n..=(npt - n - 2) {
let ell = k / n;
let i1 = k - n * ell + 1; let i2 = (i1 + ell - 1) % n + 1; ij.push((i1 - 1, i2 - 1)); }
}
}
#[cfg(test)]
pub(crate) fn setij(n: usize, npt: usize) -> Vec<(usize, usize)> {
let mut ij = Vec::new();
setij_into(n, npt, &mut ij);
ij
}
#[expect(clippy::needless_range_loop)] pub(crate) fn hess_mul_into(
x: &[f64],
xpt: &Mat,
pq: &[f64],
hq: Option<&Mat>,
dxpt: &mut [f64],
y: &mut [f64],
) {
let n = xpt.nrows();
let npt = xpt.ncols();
matprod12_into(x, xpt, dxpt);
for k in 0..npt {
dxpt[k] *= pq[k];
}
matprod21_into(xpt, dxpt, y);
if let Some(hq) = hq {
let y = &mut y[..n];
for j in 0..n {
let hqj = &hq.col(j)[..n];
let xj = x[j];
for i in 0..n {
y[i] += hqj[i] * xj;
}
}
}
}
#[cfg(test)]
pub(crate) fn hess_mul(x: &[f64], xpt: &Mat, pq: &[f64], hq: Option<&Mat>) -> Vec<f64> {
let mut dxpt = vec![0.0; xpt.ncols()];
let mut y = vec![0.0; xpt.nrows()];
hess_mul_into(x, xpt, pq, hq, &mut dxpt, &mut y);
y
}
#[expect(clippy::too_many_arguments)] pub(crate) fn quadinc(
d: &[f64],
xpt: &Mat,
gq: &[f64],
pq: &[f64],
hq: &Mat,
dxpt: &mut [f64],
pqdxpt: &mut [f64],
hqd: &mut [f64],
) -> f64 {
let n = xpt.nrows();
let npt = xpt.ncols();
matprod12_into(d, xpt, dxpt);
matprod21_into(hq, d, hqd);
for i in 0..n {
hqd[i] = gq[i] + 0.5 * hqd[i];
}
for k in 0..npt {
pqdxpt[k] = pq[k] * dxpt[k];
}
inprod(d, hqd) + 0.5 * inprod(dxpt, pqdxpt)
}
fn omega_mul_into(zmat: &Mat, x: &[f64], xz: &mut [f64], y: &mut [f64]) {
matprod12_into(x, zmat, xz);
matprod21_into(zmat, xz, y);
}
#[derive(Debug, Clone)]
pub(crate) struct CalWs {
wcheck: Vec<f64>, xrefxpt: Vec<f64>, xz: Vec<f64>, omega: Vec<f64>, wmv: Vec<f64>, vlag_beta: Vec<f64>, vlag_den: Vec<f64>, hdiag: Vec<f64>, }
impl CalWs {
pub(crate) fn new(n: usize, npt: usize) -> Self {
Self {
wcheck: vec![0.0; npt],
xrefxpt: vec![0.0; npt],
xz: vec![0.0; npt - n - 1],
omega: vec![0.0; npt],
wmv: vec![0.0; npt + n],
vlag_beta: vec![0.0; npt + n],
vlag_den: vec![0.0; npt + n],
hdiag: vec![0.0; npt],
}
}
}
fn beta_from_noadd_vlag(
d: &[f64],
xref: &[f64],
wcheck: &[f64],
vlag: &[f64],
n: usize,
npt: usize,
) -> f64 {
let dxref = inprod(d, xref);
let dsq = inprod(d, d);
let xrefsq = inprod(xref, xref);
let dvlag = inprod(d, &vlag[npt..npt + n]);
let wvlag = inprod(wcheck, &vlag[..npt]);
dxref * dxref + dsq * (xrefsq + dxref + dxref + 0.5 * dsq) - dvlag - wvlag
}
#[expect(clippy::too_many_arguments)] fn calvlag_noadd(
kref: usize,
bmat: &Mat,
d: &[f64],
xpt: &Mat,
zmat: &Mat,
wcheck: &mut [f64],
xrefxpt: &mut [f64],
xz: &mut [f64],
omega: &mut [f64],
wmv: &mut [f64],
vlag: &mut [f64],
) {
#[cfg(feature = "count-kernels")]
counters::bump();
let n = xpt.nrows();
let npt = xpt.ncols();
let xref = xpt.col(kref); matprod12_into(d, xpt, wcheck);
matprod12_into(xref, xpt, xrefxpt);
for k in 0..npt {
wcheck[k] *= 0.5 * wcheck[k] + xrefxpt[k];
}
omega_mul_into(zmat, wcheck, xz, omega);
for j in 0..npt {
vlag[j] = omega[j] + inprod(d, bmat.col(j));
}
wmv[..npt].copy_from_slice(wcheck);
wmv[npt..npt + n].copy_from_slice(d);
matprod21_into(bmat, wmv, &mut vlag[npt..npt + n]);
}
pub(crate) fn calvlag_into(
kref: usize,
bmat: &Mat,
d: &[f64],
xpt: &Mat,
zmat: &Mat,
cw: &mut CalWs,
vlag: &mut [f64],
) {
let CalWs {
wcheck,
xrefxpt,
xz,
omega,
wmv,
..
} = cw;
calvlag_noadd(
kref, bmat, d, xpt, zmat, wcheck, xrefxpt, xz, omega, wmv, vlag,
);
vlag[kref] += 1.0;
}
#[expect(clippy::too_many_arguments)] fn calbeta_core(
kref: usize,
bmat: &Mat,
d: &[f64],
xpt: &Mat,
zmat: &Mat,
wcheck: &mut [f64],
xrefxpt: &mut [f64],
xz: &mut [f64],
omega: &mut [f64],
wmv: &mut [f64],
vlag: &mut [f64],
) -> f64 {
let n = xpt.nrows();
let npt = xpt.ncols();
let xref = xpt.col(kref); calvlag_noadd(
kref, bmat, d, xpt, zmat, wcheck, xrefxpt, xz, omega, wmv, vlag,
);
beta_from_noadd_vlag(d, xref, wcheck, vlag, n, npt)
}
pub(crate) fn calbeta(
kref: usize,
bmat: &Mat,
d: &[f64],
xpt: &Mat,
zmat: &Mat,
cw: &mut CalWs,
) -> f64 {
let CalWs {
wcheck,
xrefxpt,
xz,
omega,
wmv,
vlag_beta,
..
} = cw;
calbeta_core(
kref, bmat, d, xpt, zmat, wcheck, xrefxpt, xz, omega, wmv, vlag_beta,
)
}
pub(crate) fn calden_into(
kref: usize,
bmat: &Mat,
d: &[f64],
xpt: &Mat,
zmat: &Mat,
cw: &mut CalWs,
den: &mut [f64],
) {
let n = xpt.nrows();
let npt = xpt.ncols();
let xref = xpt.col(kref); let CalWs {
wcheck,
xrefxpt,
xz,
omega,
wmv,
vlag_den,
hdiag,
..
} = cw;
hdiag[..npt].fill(0.0);
for j in 0..zmat.ncols() {
let zj = &zmat.col(j)[..npt];
for k in 0..npt {
hdiag[k] += zj[k] * zj[k];
}
}
calvlag_noadd(
kref, bmat, d, xpt, zmat, wcheck, xrefxpt, xz, omega, wmv, vlag_den,
);
let beta = beta_from_noadd_vlag(d, xref, wcheck, vlag_den, n, npt);
vlag_den[kref] += 1.0; for k in 0..npt {
den[k] = hdiag[k] * beta + vlag_den[k] * vlag_den[k];
}
}
#[expect(clippy::too_many_arguments)] pub(crate) fn calvlag_and_den_into(
kref: usize,
bmat: &Mat,
d: &[f64],
xpt: &Mat,
zmat: &Mat,
cw: &mut CalWs,
vlag: &mut [f64],
den: &mut [f64],
) -> f64 {
let n = xpt.nrows();
let npt = xpt.ncols();
let xref = xpt.col(kref);
let CalWs {
wcheck,
xrefxpt,
xz,
omega,
wmv,
hdiag,
..
} = cw;
hdiag[..npt].fill(0.0);
for j in 0..zmat.ncols() {
let zj = &zmat.col(j)[..npt];
for k in 0..npt {
hdiag[k] += zj[k] * zj[k];
}
}
calvlag_noadd(
kref, bmat, d, xpt, zmat, wcheck, xrefxpt, xz, omega, wmv, vlag,
);
let beta = beta_from_noadd_vlag(d, xref, wcheck, vlag, n, npt);
vlag[kref] += 1.0; for k in 0..npt {
den[k] = hdiag[k] * beta + vlag[k] * vlag[k]; }
beta
}
#[cfg(test)]
mod tests {
use super::*;
use crate::mat::Mat;
#[test]
fn setij_yields_valid_distinct_pairs_of_the_right_count() {
for (n, npt) in [(2, 6), (3, 10), (10, 30)] {
let ij = setij(n, npt);
assert_eq!(ij.len(), npt - 2 * n - 1);
for &(i, j) in &ij {
assert!(i < n && j < n && i != j, "bad pair ({i}, {j}) for n={n}");
}
}
}
#[test]
fn setij_is_empty_at_powells_default_npt() {
assert!(setij(2, 5).is_empty());
}
#[test]
fn hess_mul_combines_explicit_and_implicit_parts() {
let xpt = Mat::from_col_major(1, 3, vec![1.0, 2.0, 0.0]);
let hq = Mat::from_col_major(1, 1, vec![2.0]);
assert_eq!(
hess_mul(&[3.0], &xpt, &[1.0, 2.0, 0.0], Some(&hq)),
vec![33.0]
);
assert_eq!(hess_mul(&[3.0], &xpt, &[1.0, 2.0, 0.0], None), vec![27.0]);
}
#[test]
fn quadinc_evaluates_the_model_increment() {
let xpt = Mat::from_col_major(1, 3, vec![1.0, 2.0, 0.0]);
let hq = Mat::zeros(1, 1);
let (mut dxpt, mut pqdxpt, mut hqd) = (vec![0.0; 3], vec![0.0; 3], vec![0.0; 1]);
assert_eq!(
quadinc(
&[2.0],
&xpt,
&[3.0],
&[0.0, 0.0, 0.0],
&hq,
&mut dxpt,
&mut pqdxpt,
&mut hqd
),
6.0
);
let hq2 = Mat::from_col_major(1, 1, vec![2.0]);
assert_eq!(
quadinc(
&[2.0],
&xpt,
&[3.0],
&[1.0, 2.0, 0.0],
&hq2,
&mut dxpt,
&mut pqdxpt,
&mut hqd
),
28.0
);
}
#[test]
fn calden_into_matches_calvlag_and_den_into() {
let (n, npt, kref) = (1, 3, 0);
let bmat = Mat::zeros(n, npt + n);
let xpt = Mat::from_col_major(n, npt, vec![1.0, 2.0, 0.0]);
let zmat = Mat::from_col_major(npt, npt - n - 1, vec![1.0, 0.0, -1.0]);
let d = [1.0];
let mut cw = CalWs::new(n, npt);
let mut den_calden = vec![0.0; npt];
calden_into(kref, &bmat, &d, &xpt, &zmat, &mut cw, &mut den_calden);
let mut den_fused = vec![0.0; npt];
let mut vlag = vec![0.0; npt + n];
calvlag_and_den_into(
kref,
&bmat,
&d,
&xpt,
&zmat,
&mut cw,
&mut vlag,
&mut den_fused,
);
assert_eq!(den_calden, den_fused);
assert_eq!(vlag[kref], 2.5);
}
}