use crate::basis::{BasisOptions, PenaltyInfo, PenaltySource};
use crate::custom_family::{
AdditiveBlockJacobian, BatchedOuterGradientTerms, BlockEffectiveJacobian, BlockWorkingSet,
BlockwiseFitOptions, CustomFamily, CustomFamilyBlockPsiDerivative,
CustomFamilyJointDesignChannel, CustomFamilyJointDesignPairContribution,
CustomFamilyJointPsiOperator, CustomFamilyPsiDesignAction, CustomFamilyPsiLinearMapRef,
CustomFamilyPsiSecondDesignAction, CustomFamilyWarmStart, ExactNewtonJointGradientEvaluation,
ExactNewtonJointHessianWorkspace, ExactNewtonJointPsiDirectCache,
ExactNewtonJointPsiSecondOrderTerms, ExactNewtonJointPsiWorkspace, FamilyChannelHessian,
FamilyEvaluation, ParameterBlockSpec, ParameterBlockState, PenaltyMatrix, PsiDesignMap,
evaluate_custom_family_joint_hyper, evaluate_custom_family_joint_hyper_efs, fit_custom_family,
fit_custom_family_fixed_log_lambdas, resolve_custom_family_x_psi_map,
resolve_custom_family_x_psi_psi_map, second_psi_linear_map, shared_dense_arc,
weighted_crossprod_psi_maps,
};
use crate::estimate::UnifiedFitResult;
use crate::faer_ndarray::{fast_ab, fast_atv, fast_av, fast_joint_hessian_2x2};
use crate::families::location_scale_engine::build_location_scale_exact_joint_setup;
use crate::families::parameter_block::ParameterBlockInput;
use crate::families::scale_design::{
build_scale_deviation_operator, build_scale_deviation_transform_design,
};
use crate::families::sigma_link::{
LOGB_SIGMA_FLOOR, SigmaJet1, exp_sigma_derivs_up_to_fourth_scalar,
exp_sigma_derivs_up_to_third, exp_sigma_from_eta_scalar, exp_sigma_jet1_scalar,
logb_sigma_from_eta_scalar, logb_sigma_jet1_scalar, safe_exp,
};
use crate::families::spatial_psi_bridge::build_block_spatial_psi_derivatives;
use crate::families::wiggle::{
SelectedWiggleBasis, WiggleBlockConfig, buildwiggle_block_input_from_knots,
initializewiggle_knots_from_seed, monotone_wiggle_basis_with_derivative_order,
monotone_wiggle_nonnegative_constraints, select_wiggle_basis_from_seed,
validate_monotone_wiggle_beta_nonnegative,
};
use crate::generative::{CustomFamilyGenerative, GenerativeSpec, NoiseModel};
use crate::matrix::SymmetricMatrix;
use crate::matrix::{DenseDesignMatrix, DenseDesignOperator, DesignMatrix};
use crate::mixture_link::{inverse_link_jet_for_inverse_link, inverse_link_mu_d1_for_inverse_link};
use crate::pirls::LinearInequalityConstraints;
use crate::probability::{normal_logcdf, normal_logsf, standard_normal_quantile};
use crate::smooth::{
BlockwisePenalty, ExactJointHyperSetup, PenaltyBlockInfo,
SpatialLengthScaleOptimizationOptions, SpatialLogKappaCoords, TermCollectionDesign,
TermCollectionSpec, build_term_collection_design, freeze_term_collection_from_design,
optimize_spatial_length_scale_exact_joint, spatial_dims_per_term,
spatial_length_scale_term_indices,
};
use crate::solver::estimate::validate_all_finite_estimation;
use crate::types::{InverseLink, RidgePolicy, StandardLink};
use ndarray::{Array1, Array2, ArrayView1, ArrayView2, Axis, s};
use rayon::prelude::*;
use std::borrow::Cow;
use std::collections::{HashMap, hash_map::DefaultHasher};
use std::hash::{Hash, Hasher};
use std::sync::atomic::AtomicUsize;
use std::sync::{Arc, Mutex};
mod binomial_q_derivs;
use binomial_q_derivs::{
binomial_neglog_q_derivatives_dispatch, binomial_neglog_q_fourth_derivative_dispatch,
};
mod validation;
use validation::{
minimum_monotone_wiggle_knot_count, validate_binomial_location_scale_termspec,
validate_binomial_location_scalewiggle_termspec, validate_binomial_response,
validate_blockrows, validate_gaussian_location_scale_termspec,
validate_gaussian_location_scalewiggle_termspec, validate_len_match, validate_term_weights,
validateweights,
};
mod weighted_design_products;
use weighted_design_products::{
mirror_upper_to_lower, scaled_outer_add, signedwith_floor, xt_diag_x_dense, xt_diag_x_design,
xt_diag_y_dense, xt_diag_y_design,
};
#[derive(Debug)]
pub enum GamlssError {
DimensionMismatch { reason: String },
InvalidInput { reason: String },
NonFinite { reason: String },
UnsupportedConfiguration { reason: String },
ConstraintViolation { reason: String },
NumericalFailure { reason: String },
}
impl std::fmt::Display for GamlssError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
GamlssError::DimensionMismatch { reason }
| GamlssError::InvalidInput { reason }
| GamlssError::NonFinite { reason }
| GamlssError::UnsupportedConfiguration { reason }
| GamlssError::ConstraintViolation { reason }
| GamlssError::NumericalFailure { reason } => f.write_str(reason),
}
}
}
impl std::error::Error for GamlssError {}
impl From<GamlssError> for String {
fn from(err: GamlssError) -> Self {
err.to_string()
}
}
const MIN_PROB: f64 = 1e-10;
const MIN_DERIV: f64 = 1e-8;
use crate::solver::pirls::MIN_WEIGHT;
const ETA_HARD_CLAMP: f64 = 30.0;
#[inline]
fn saturated_exp_eta(eta: f64) -> f64 {
eta.clamp(-ETA_HARD_CLAMP, ETA_HARD_CLAMP)
.exp()
.max(MIN_WEIGHT)
}
const WARMSTART_LOG_LAMBDA_FLOOR: f64 = 1e-12;
const EXACT_DENSE_BLOCK_BUDGET_BYTES: usize = 512 * 1024 * 1024;
const EXACT_DENSE_TOTAL_BUDGET_BYTES: usize = 2 * 1024 * 1024 * 1024;
const GAMLSS_ROWWISE_PAR_MIN_N: usize = 4096;
const GAMLSS_PROJECTED_TRACE_TARGET_BYTES: usize = 32 * 1024 * 1024;
const GAMLSS_PROJECTED_TRACE_MIN_CHUNK_ROWS: usize = 64;
const GAMLSS_PROJECTED_TRACE_MAX_CHUNK_ROWS: usize = 8192;
fn gamlss_projected_trace_chunk_rows(
rank: usize,
projected_channel_count: usize,
gram_column_count: usize,
) -> usize {
let per_row_values = rank
.saturating_mul(projected_channel_count.max(1))
.saturating_add(gram_column_count.max(1))
.max(1);
let per_row_bytes = per_row_values.saturating_mul(std::mem::size_of::<f64>());
let rows = GAMLSS_PROJECTED_TRACE_TARGET_BYTES / per_row_bytes.max(1);
rows.clamp(
GAMLSS_PROJECTED_TRACE_MIN_CHUNK_ROWS,
GAMLSS_PROJECTED_TRACE_MAX_CHUNK_ROWS,
)
}
fn gamlss_rowwise_map<F>(n: usize, f: F) -> Array1<f64>
where
F: Fn(usize) -> f64 + Sync,
{
if n >= GAMLSS_ROWWISE_PAR_MIN_N {
Array1::from((0..n).into_par_iter().map(&f).collect::<Vec<f64>>())
} else {
Array1::from_iter((0..n).map(f))
}
}
fn gamlss_rowwise_map_result<F>(n: usize, f: F) -> Result<Array1<f64>, String>
where
F: Fn(usize) -> Result<f64, String> + Sync,
{
if n >= GAMLSS_ROWWISE_PAR_MIN_N {
let values: Result<Vec<f64>, String> = (0..n).into_par_iter().map(&f).collect();
Ok(Array1::from(values?))
} else {
let mut out = Array1::<f64>::zeros(n);
for i in 0..n {
out[i] = f(i)?;
}
Ok(out)
}
}
enum DenseOrOperator<'a> {
Borrowed(&'a Array2<f64>),
Owned(Array2<f64>),
Operator(DesignMatrix),
}
impl DenseOrOperator<'_> {
fn nrows(&self) -> usize {
match self {
Self::Borrowed(dense) => dense.nrows(),
Self::Owned(dense) => dense.nrows(),
Self::Operator(design) => design.nrows(),
}
}
fn ncols(&self) -> usize {
match self {
Self::Borrowed(dense) => dense.ncols(),
Self::Owned(dense) => dense.ncols(),
Self::Operator(design) => design.ncols(),
}
}
fn row_chunk(&self, rows: std::ops::Range<usize>) -> Result<Array2<f64>, String> {
match self {
Self::Borrowed(dense) => Ok(dense.slice(s![rows, ..]).to_owned()),
Self::Owned(dense) => Ok(dense.slice(s![rows, ..]).to_owned()),
Self::Operator(design) => design.try_row_chunk(rows).map_err(|e| e.to_string()),
}
}
fn dot(&self, beta: ArrayView1<'_, f64>) -> Array1<f64> {
let n = self.nrows();
let p = self.ncols();
assert_eq!(beta.len(), p);
match self {
Self::Borrowed(dense) => fast_av(*dense, &beta),
Self::Owned(dense) => fast_av(dense, &beta),
Self::Operator(design) => {
let mut out = Array1::<f64>::zeros(n);
for rows in exact_design_row_chunks(n, p) {
let chunk = design
.try_row_chunk(rows.clone())
.expect("gamlss DesignSlot::dot: design row chunk materialization failed");
out.slice_mut(s![rows]).assign(&fast_av(&chunk, &beta));
}
out
}
}
}
}
fn dense_block_from_spec<'a>(
spec: &'a ParameterBlockSpec,
material_policy: &crate::resource::MaterializationPolicy,
materialization_label: &str,
) -> Result<Cow<'a, Array2<f64>>, String> {
match spec.design.as_dense_ref() {
Some(d) => Ok(Cow::Borrowed(d)),
None => Ok(Cow::Owned(
spec.design
.try_to_dense_with_policy(material_policy, "gamlss dense_block_from_spec")
.map_err(|e| format!("{materialization_label}: {e}"))?
.as_ref()
.clone(),
)),
}
}
fn dense_locscale_block_designs_fromspecs<'a>(
specs: &'a [ParameterBlockSpec],
expected_count: usize,
family_name: &str,
short_family_name: &str,
primary_block_idx: usize,
log_sigma_block_idx: usize,
primary_label: &str,
material_policy: &crate::resource::MaterializationPolicy,
) -> Result<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>), String> {
if specs.len() != expected_count {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{family_name} expects {expected_count} specs, got {}",
specs.len()
),
}
.into());
}
let primary = dense_block_from_spec(
&specs[primary_block_idx],
material_policy,
&format!("{short_family_name} dense_block_designs_fromspecs {primary_label}"),
)?;
let log_sigma = dense_block_from_spec(
&specs[log_sigma_block_idx],
material_policy,
&format!("{short_family_name} dense_block_designs_fromspecs log_sigma"),
)?;
Ok((primary, log_sigma))
}
fn dense_locscale_block_designs_cached<'a>(
primary_design: Option<&'a DesignMatrix>,
log_sigma_design: Option<&'a DesignMatrix>,
family_name: &str,
short_family_name: &str,
primary_label: &str,
material_policy: &crate::resource::MaterializationPolicy,
) -> Result<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>), String> {
let primary_design = primary_design
.ok_or_else(|| format!("{family_name} exact path is missing {primary_label} design"))?;
let log_sigma_design = log_sigma_design
.ok_or_else(|| format!("{family_name} exact path is missing log-sigma design"))?;
let primary = match primary_design.as_dense_ref() {
Some(d) => Cow::Borrowed(d),
None => Cow::Owned(
primary_design
.try_to_dense_with_policy(material_policy, "gamlss dense_locscale_block_designs")
.map_err(|e| {
format!("{short_family_name} dense_block_designs {primary_label}: {e}")
})?
.as_ref()
.clone(),
),
};
let log_sigma = match log_sigma_design.as_dense_ref() {
Some(d) => Cow::Borrowed(d),
None => Cow::Owned(
log_sigma_design
.try_to_dense_with_policy(material_policy, "gamlss dense_locscale_block_designs")
.map_err(|e| format!("{short_family_name} dense_block_designs log_sigma: {e}"))?
.as_ref()
.clone(),
),
};
Ok((primary, log_sigma))
}
struct LocScalePsiDirectionParts {
block_idx: usize,
local_idx: usize,
primary_psi: PsiDesignMap,
log_sigma_psi: PsiDesignMap,
primary_z: Array1<f64>,
log_sigma_z: Array1<f64>,
}
#[allow(clippy::too_many_arguments)]
fn locscale_joint_psi_direction_parts(
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
n: usize,
p_primary: usize,
p_log_sigma: usize,
primary_block_idx: usize,
log_sigma_block_idx: usize,
expected_blocks: usize,
family_name: &str,
primary_label: &str,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<LocScalePsiDirectionParts>, String> {
if block_states.len() != expected_blocks || derivative_blocks.len() != expected_blocks {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{family_name} joint psi direction expects {expected_blocks} blocks and {expected_blocks} derivative block lists, got {} and {}",
block_states.len(),
derivative_blocks.len()
),
}
.into());
}
let beta_primary = &block_states[primary_block_idx].beta;
let beta_log_sigma = &block_states[log_sigma_block_idx].beta;
let mut global = 0usize;
for (block_idx, block_derivs) in derivative_blocks.iter().enumerate() {
for (local_idx, deriv) in block_derivs.iter().enumerate() {
if global == psi_index {
let primary_psi;
let log_sigma_psi;
let primary_z;
let log_sigma_z;
if block_idx == primary_block_idx {
primary_psi = resolve_custom_family_x_psi_map(
deriv,
n,
p_primary,
0..n,
&format!("{family_name} {primary_label}"),
policy,
)?;
primary_z = primary_psi
.forward_mul(beta_primary.view())
.map_err(|e| format!("{family_name} {primary_label} forward_mul: {e}"))?;
log_sigma_psi = PsiDesignMap::Zero {
nrows: n,
ncols: p_log_sigma,
};
log_sigma_z = Array1::<f64>::zeros(n);
} else if block_idx == log_sigma_block_idx {
log_sigma_psi = resolve_custom_family_x_psi_map(
deriv,
n,
p_log_sigma,
0..n,
&format!("{family_name} log-sigma"),
policy,
)?;
log_sigma_z = log_sigma_psi
.forward_mul(beta_log_sigma.view())
.map_err(|e| format!("{family_name} log-sigma forward_mul: {e}"))?;
primary_psi = PsiDesignMap::Zero {
nrows: n,
ncols: p_primary,
};
primary_z = Array1::<f64>::zeros(n);
} else {
return Ok(None);
}
return Ok(Some(LocScalePsiDirectionParts {
block_idx,
local_idx,
primary_psi,
log_sigma_psi,
primary_z,
log_sigma_z,
}));
}
global += 1;
}
}
Ok(None)
}
struct LocScalePsiDriftConfig<'a> {
n: usize,
p_primary: usize,
p_log_sigma: usize,
primary_block_idx: usize,
log_sigma_block_idx: usize,
family_name: &'a str,
primary_label: &'a str,
policy: &'a crate::resource::ResourcePolicy,
}
fn locscale_joint_psisecond_design_drifts(
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &LocationScaleJointPsiDirection,
psi_b: &LocationScaleJointPsiDirection,
cfg: LocScalePsiDriftConfig<'_>,
) -> Result<LocationScaleJointPsiSecondDrifts, String> {
let beta_primary = &block_states[cfg.primary_block_idx].beta;
let beta_log_sigma = &block_states[cfg.log_sigma_block_idx].beta;
let mut primary_ab_action = None;
let mut log_sigma_ab_action = None;
let mut primary_ab = None;
let mut log_sigma_ab = None;
if psi_a.block_idx == psi_b.block_idx {
let deriv = &derivative_blocks[psi_a.block_idx][psi_a.local_idx];
let deriv_b = &derivative_blocks[psi_b.block_idx][psi_b.local_idx];
if psi_a.block_idx == cfg.primary_block_idx {
let (action, matrix) = psi_psi_map_to_drift_slots(
deriv,
deriv_b,
psi_b.local_idx,
cfg.n,
cfg.p_primary,
&format!("{} {}", cfg.family_name, cfg.primary_label),
cfg.policy,
)?;
primary_ab_action = action;
primary_ab = matrix;
} else if psi_a.block_idx == cfg.log_sigma_block_idx {
let (action, matrix) = psi_psi_map_to_drift_slots(
deriv,
deriv_b,
psi_b.local_idx,
cfg.n,
cfg.p_log_sigma,
&format!("{} log-sigma", cfg.family_name),
cfg.policy,
)?;
log_sigma_ab_action = action;
log_sigma_ab = matrix;
}
}
let z_primary_ab = second_psi_linear_map(
primary_ab_action.as_ref(),
primary_ab.as_ref(),
cfg.n,
cfg.p_primary,
)
.forward_mul(beta_primary.view());
let z_ls_ab = second_psi_linear_map(
log_sigma_ab_action.as_ref(),
log_sigma_ab.as_ref(),
cfg.n,
cfg.p_log_sigma,
)
.forward_mul(beta_log_sigma.view());
Ok(LocationScaleJointPsiSecondDrifts {
x_primary_ab_action: primary_ab_action,
x_ls_ab_action: log_sigma_ab_action,
x_primary_ab: primary_ab,
x_ls_ab: log_sigma_ab,
z_primary_ab,
z_ls_ab,
})
}
fn psi_psi_map_to_drift_slots(
deriv: &crate::custom_family::CustomFamilyBlockPsiDerivative,
deriv_b: &crate::custom_family::CustomFamilyBlockPsiDerivative,
local_idx_b: usize,
n: usize,
p: usize,
label: &str,
policy: &crate::resource::ResourcePolicy,
) -> Result<
(
Option<crate::custom_family::CustomFamilyPsiSecondDesignAction>,
Option<Array2<f64>>,
),
String,
> {
match resolve_custom_family_x_psi_psi_map(
deriv,
deriv_b,
local_idx_b,
n,
p,
0..n,
label,
policy,
)? {
crate::custom_family::PsiDesignMap::Second { action } => Ok((Some(action), None)),
crate::custom_family::PsiDesignMap::Dense { matrix } => Ok((None, Some((*matrix).clone()))),
crate::custom_family::PsiDesignMap::Zero { .. } => Ok((None, None)),
crate::custom_family::PsiDesignMap::First { .. } => {
Err(GamlssError::UnsupportedConfiguration {
reason: format!("{label}: unexpected First variant from _psi_psi_map"),
}
.into())
}
}
}
fn dense_block_or_operator<'a>(
design: &'a DesignMatrix,
n: usize,
p: usize,
budget_bytes: usize,
policy: &crate::resource::ResourcePolicy,
) -> DenseOrOperator<'a> {
if let Some(dense) = design.as_dense_ref() {
return DenseOrOperator::Borrowed(dense);
}
let dense_bytes = 8usize.saturating_mul(n).saturating_mul(p);
if dense_bytes <= budget_bytes
&& let Ok(arc) = design
.try_to_dense_with_policy(&policy.material_policy(), "gamlss dense_block_or_operator")
{
return DenseOrOperator::Owned(arc.as_ref().clone());
}
DenseOrOperator::Operator(design.clone())
}
fn dense_blocks_planned_budget(blocks: &[&DesignMatrix]) -> Vec<usize> {
let mut planned = vec![0; blocks.len()];
let mut total = 0usize;
for (idx, design) in blocks.iter().enumerate() {
if design.as_dense_ref().is_some() {
continue;
}
let bytes = 8usize
.saturating_mul(design.nrows())
.saturating_mul(design.ncols());
if bytes <= EXACT_DENSE_BLOCK_BUDGET_BYTES
&& total.saturating_add(bytes) <= EXACT_DENSE_TOTAL_BUDGET_BYTES
{
planned[idx] = bytes;
total += bytes;
}
}
planned
}
pub(super) fn exact_design_row_chunks(
n: usize,
p: usize,
) -> impl Iterator<Item = std::ops::Range<usize>> {
const TARGET_BYTES: usize = 8 * 1024 * 1024;
const MIN_ROWS: usize = 512;
const MAX_ROWS: usize = 131_072;
let rows = (TARGET_BYTES / (p.max(1) * 8))
.clamp(MIN_ROWS, MAX_ROWS)
.min(n.max(1));
(0..n)
.step_by(rows)
.map(move |start| start..(start + rows).min(n))
}
fn design_weighted_column_squares(
design: &DesignMatrix,
weights: &Array1<f64>,
) -> Result<Array1<f64>, String> {
let n = design.nrows();
let p = design.ncols();
if weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"design weighted column squares dimension mismatch: weights={}, rows={}",
weights.len(),
n
),
}
.into());
}
let mut out = Array1::<f64>::zeros(p);
for rows in exact_design_row_chunks(n, p) {
let chunk = design.try_row_chunk(rows.clone()).map_err(|e| {
format!("design weighted column squares row chunk materialization failed: {e}")
})?;
for (local_i, row) in chunk.outer_iter().enumerate() {
let w = weights[rows.start + local_i];
if w == 0.0 {
continue;
}
for j in 0..p {
let x = row[j];
out[j] += w * x * x;
}
}
}
Ok(out)
}
#[inline]
fn floor_positiveweight(rawweight: f64, minweight: f64) -> f64 {
if !rawweight.is_finite() || rawweight <= 0.0 {
0.0
} else {
rawweight.max(minweight)
}
}
#[inline]
fn logb_dlog_sigma_deta(sigma: f64, d_sigma_deta: f64) -> f64 {
if d_sigma_deta.is_infinite() {
1.0
} else {
let value = d_sigma_deta / sigma;
if value.is_finite() {
value.clamp(0.0, 1.0)
} else {
0.0
}
}
}
#[inline]
fn gaussian_log_sigma_irlsinfo_directional_derivative(
weight: f64,
sigma: f64,
d_sigma_deta: f64,
d_eta: f64,
) -> f64 {
if weight == 0.0 || d_eta == 0.0 || !sigma.is_finite() || sigma <= 0.0 {
return 0.0;
}
let g = logb_dlog_sigma_deta(sigma, d_sigma_deta);
if !g.is_finite() || !(0.0..1.0).contains(&g) {
return 0.0;
}
let rawinfo = 2.0 * weight * g * g;
if !rawinfo.is_finite() || rawinfo <= MIN_WEIGHT {
return 0.0;
}
let dg_deta = g * (1.0 - g);
let dw = 4.0 * weight * g * dg_deta * d_eta;
if dw.is_finite() { dw } else { 0.0 }
}
#[derive(Clone, Copy)]
struct GaussianDiagonalRowKernel {
log_likelihood: f64,
location_working_weight: f64,
location_working_shift: f64,
log_sigma_working_weight: f64,
log_sigma_working_response: f64,
}
#[inline]
fn gaussian_diagonal_row_kernel(
y: f64,
location_eta: f64,
eta_log_sigma: f64,
obs_weight: f64,
ln2pi: f64,
) -> GaussianDiagonalRowKernel {
if obs_weight == 0.0 {
return GaussianDiagonalRowKernel {
log_likelihood: 0.0,
location_working_weight: 0.0,
location_working_shift: 0.0,
log_sigma_working_weight: 0.0,
log_sigma_working_response: eta_log_sigma,
};
}
let SigmaJet1 { sigma, d1 } = logb_sigma_jet1_scalar(eta_log_sigma);
let inv_s2 = (sigma * sigma).recip();
let residual = y - location_eta;
let location_working_weight = floor_positiveweight(obs_weight * inv_s2, MIN_WEIGHT);
let dlog_sigma_deta = logb_dlog_sigma_deta(sigma, d1);
let log_sigma_working_weight = floor_positiveweight(
2.0 * obs_weight * dlog_sigma_deta * dlog_sigma_deta,
MIN_WEIGHT,
);
let log_sigma_score = obs_weight * (residual * residual * inv_s2 - 1.0) * dlog_sigma_deta;
let log_sigma_working_response = if log_sigma_working_weight == 0.0 {
eta_log_sigma
} else {
eta_log_sigma + log_sigma_score / log_sigma_working_weight
};
GaussianDiagonalRowKernel {
log_likelihood: obs_weight
* (-0.5 * (residual * residual * inv_s2 + ln2pi + 2.0 * sigma.ln())),
location_working_weight,
location_working_shift: residual,
log_sigma_working_weight,
log_sigma_working_response,
}
}
#[derive(Clone, Copy)]
struct GamlssLambdaLayout {
k_mean: usize,
k_noise: usize,
kwiggle: usize,
}
impl GamlssLambdaLayout {
fn two_block(k_mean: usize, k_noise: usize) -> Self {
Self {
k_mean,
k_noise,
kwiggle: 0,
}
}
fn withwiggle(k_mean: usize, k_noise: usize, kwiggle: usize) -> Self {
Self {
k_mean,
k_noise,
kwiggle,
}
}
fn total(self) -> usize {
self.k_mean + self.k_noise + self.kwiggle
}
fn mean_end(self) -> usize {
self.k_mean
}
fn noise_start(self) -> usize {
self.k_mean
}
fn noise_end(self) -> usize {
self.k_mean + self.k_noise
}
fn wiggle_start(self) -> usize {
self.k_mean + self.k_noise
}
fn wiggle_end(self) -> usize {
self.k_mean + self.k_noise + self.kwiggle
}
fn validate_theta_len(self, theta_len: usize, context: &str) -> Result<(), String> {
let needed = self.total();
if theta_len < needed {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{context} theta too short: got {}, need at least {}",
theta_len, needed
),
}
.into());
}
Ok(())
}
fn mean_from(self, theta: &Array1<f64>) -> Array1<f64> {
theta.slice(s![0..self.mean_end()]).to_owned()
}
fn noise_from(self, theta: &Array1<f64>) -> Array1<f64> {
theta
.slice(s![self.noise_start()..self.noise_end()])
.to_owned()
}
fn wiggle_from(self, theta: &Array1<f64>) -> Array1<f64> {
theta
.slice(s![self.wiggle_start()..self.wiggle_end()])
.to_owned()
}
}
#[derive(Clone, Copy)]
struct GamlssBetaLayout {
pt: usize,
pls: usize,
pw: usize,
}
impl GamlssBetaLayout {
fn withwiggle(pt: usize, pls: usize, pw: usize) -> Self {
Self { pt, pls, pw }
}
fn total(self) -> usize {
self.pt + self.pls + self.pw
}
fn split_three(
self,
flat: &Array1<f64>,
context: &str,
) -> Result<(Array1<f64>, Array1<f64>, Array1<f64>), String> {
if flat.len() != self.total() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{context} length mismatch: got {}, expected {}",
flat.len(),
self.total()
),
}
.into());
}
Ok((
flat.slice(s![0..self.pt]).to_owned(),
flat.slice(s![self.pt..self.pt + self.pls]).to_owned(),
flat.slice(s![self.pt + self.pls..self.total()]).to_owned(),
))
}
}
#[derive(Clone, Debug)]
pub struct FamilyMetadata {
pub name: &'static str,
pub parameternames: &'static [&'static str],
pub parameter_links: &'static [ParameterLink],
}
const DEFAULT_GAUGE_PRIORITY: u8 = 100;
const LINK_WIGGLE_GAUGE_PRIORITY: u8 = 80;
fn initial_log_lambdas_orzeros(block: &ParameterBlockInput) -> Result<Array1<f64>, String> {
let k = block.penalties.len();
let lambdas = block
.initial_log_lambdas
.clone()
.unwrap_or_else(|| Array1::<f64>::zeros(k));
if lambdas.len() != k {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"initial_log_lambdas length mismatch: got {}, expected {}",
lambdas.len(),
k
),
}
.into());
}
Ok(lambdas)
}
fn build_two_block_exact_joint_setup(
data: ArrayView2<'_, f64>,
meanspec: &TermCollectionSpec,
noisespec: &TermCollectionSpec,
mean_penalties: usize,
noise_penalties: usize,
extra_rho0: &[f64],
rho0_override: Option<&Array1<f64>>,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> ExactJointHyperSetup {
let rho_dim = mean_penalties + noise_penalties + extra_rho0.len();
let mut rho0vec = Array1::<f64>::zeros(rho_dim);
if let Some(rho0) = rho0_override.filter(|rho0| rho0.len() == rho_dim) {
rho0vec.assign(rho0);
} else {
for (i, &rho_init) in extra_rho0.iter().enumerate() {
rho0vec[mean_penalties + noise_penalties + i] = rho_init;
}
}
build_location_scale_exact_joint_setup(data, &[meanspec, noisespec], rho0vec, kappa_options)
}
pub(crate) fn solve_penalizedweighted_projection(
design: &DesignMatrix,
offset: &Array1<f64>,
target_eta: &Array1<f64>,
weights: &Array1<f64>,
penalties: &[PenaltyMatrix],
log_lambdas: &Array1<f64>,
ridge_floor: f64,
) -> Result<Array1<f64>, String> {
let n = design.nrows();
let p = design.ncols();
if offset.len() != n || target_eta.len() != n || weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "solve_penalizedweighted_projection dimension mismatch".to_string(),
}
.into());
}
if penalties.len() != log_lambdas.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"solve_penalizedweighted_projection lambda mismatch: penalties={}, log_lambdas={}",
penalties.len(),
log_lambdas.len()
),
}
.into());
}
let y_star = target_eta - offset;
let xtwy = design.compute_xtwy(weights, &y_star)?;
let mut penalty_system = if penalties.is_empty() {
None
} else {
Some(Array2::<f64>::zeros((p, p)))
};
for (k, s) in penalties.iter().enumerate() {
let lambda = log_lambdas[k].exp();
if !lambda.is_finite() || lambda < 0.0 {
return Err(GamlssError::NumericalFailure { reason: format!(
"solve_penalizedweighted_projection encountered invalid lambda at index {k}: {}",
log_lambdas[k]
) }.into());
}
if s.nrows() != p || s.ncols() != p {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"solve_penalizedweighted_projection penalty shape mismatch at index {k}: \
penalty is {}x{} but design has {} columns",
s.nrows(),
s.ncols(),
p
),
}
.into());
}
if let Some(system) = penalty_system.as_mut() {
s.add_scaled_to(lambda, system);
}
}
let beta = design.solve_systemwith_policy(
weights,
&xtwy,
penalty_system.as_ref(),
ridge_floor.max(1e-12),
RidgePolicy::explicit_stabilization_pospart(),
)?;
if beta.iter().any(|v| !v.is_finite()) {
return Err(
"solve_penalizedweighted_projection produced non-finite coefficients".to_string(),
);
}
Ok(beta)
}
fn gaussian_location_scalewarm_start(
y: &Array1<f64>,
weights: &Array1<f64>,
mu_block: &ParameterBlockSpec,
log_sigma_block: &ParameterBlockSpec,
ridge_floor: f64,
mean_beta_hint: Option<&Array1<f64>>,
noise_beta_hint: Option<&Array1<f64>>,
) -> Result<(Array1<f64>, Array1<f64>, f64), String> {
let betamu = if let Some(beta) = mean_beta_hint {
beta.clone()
} else {
solve_penalizedweighted_projection(
&mu_block.design,
&mu_block.offset,
y,
weights,
&mu_block.penalties,
&mu_block.initial_log_lambdas,
ridge_floor,
)?
};
let mut mu_hat = mu_block.solver_design().matrixvectormultiply(&betamu);
mu_hat += mu_block.solver_offset();
let mut weighted_ss = 0.0;
let mut weight_sum = 0.0;
for i in 0..y.len() {
let wi = weights[i].max(0.0);
let resid = y[i] - mu_hat[i];
weighted_ss += wi * resid * resid;
weight_sum += wi;
}
if !weighted_ss.is_finite() || !weight_sum.is_finite() || weight_sum <= 0.0 {
return Err(
"gaussian location-scale warm start could not estimate residual scale".to_string(),
);
}
let sigma_hat = (weighted_ss / weight_sum)
.sqrt()
.max(LOGB_SIGMA_FLOOR * 1.5);
let beta_log_sigma = if let Some(beta) = noise_beta_hint {
beta.clone()
} else {
let eta_sigma = (sigma_hat - LOGB_SIGMA_FLOOR).ln();
let sigma_target = Array1::from_elem(y.len(), eta_sigma);
solve_penalizedweighted_projection(
&log_sigma_block.design,
&log_sigma_block.offset,
&sigma_target,
weights,
&log_sigma_block.penalties,
&log_sigma_block.initial_log_lambdas,
ridge_floor,
)?
};
Ok((betamu, beta_log_sigma, sigma_hat))
}
const LOCATION_SCALE_N_OUTPUTS: usize = 2;
#[allow(clippy::too_many_arguments)]
fn build_location_scale_block(
name: impl Into<String>,
design: DesignMatrix,
offset: Array1<f64>,
penalties: Vec<PenaltyMatrix>,
nullspace_dims: Vec<usize>,
initial_log_lambdas: Array1<f64>,
initial_beta: Option<Array1<f64>>,
own_output: usize,
n_family_outputs: usize,
caller: &str,
) -> Result<ParameterBlockSpec, String> {
if own_output >= n_family_outputs {
return Err(format!(
"{caller}: own_output={own_output} >= n_family_outputs={n_family_outputs}"
));
}
let mut spec = ParameterBlockSpec {
name: name.into(),
design,
offset,
penalties,
nullspace_dims,
initial_log_lambdas,
initial_beta,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
let dense = spec.effective_design(caller)?;
spec.jacobian_callback = Some(std::sync::Arc::new(AdditiveBlockJacobian {
design: dense,
own_output,
n_family_outputs,
}));
Ok(spec)
}
#[allow(clippy::too_many_arguments)]
fn build_location_scale_wiggle_block(
name: impl Into<String>,
design: DesignMatrix,
offset: Array1<f64>,
penalties: Vec<PenaltyMatrix>,
nullspace_dims: Vec<usize>,
initial_log_lambdas: Array1<f64>,
initial_beta: Option<Array1<f64>>,
n_rows: usize,
) -> Result<ParameterBlockSpec, String> {
let p_w = design.ncols();
let mut spec = ParameterBlockSpec {
name: name.into(),
design,
offset,
penalties,
nullspace_dims,
initial_log_lambdas,
initial_beta,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
};
spec.jacobian_callback = Some(std::sync::Arc::new(AdditiveBlockJacobian {
design: ndarray::Array2::<f64>::zeros((n_rows, p_w)),
own_output: 0,
n_family_outputs: LOCATION_SCALE_N_OUTPUTS,
}));
Ok(spec)
}
fn prepared_gaussian_log_sigma_design(
mu_design: &DesignMatrix,
log_sigma_design: &DesignMatrix,
) -> Result<DesignMatrix, String> {
if mu_design.nrows() != log_sigma_design.nrows() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"gaussian log-sigma design row mismatch: mean rows={}, log_sigma rows={}",
mu_design.nrows(),
log_sigma_design.nrows()
),
}
.into());
}
Ok(log_sigma_design.clone())
}
fn identified_binomial_log_sigma_design(
threshold_design: &TermCollectionDesign,
log_sigma_design: &TermCollectionDesign,
weights: &Array1<f64>,
) -> Result<DesignMatrix, String> {
let non_intercept_start = log_sigma_design
.intercept_range
.end
.min(log_sigma_design.design.ncols());
let transform = build_scale_deviation_transform_design(
&threshold_design.design,
&log_sigma_design.design,
weights,
non_intercept_start,
)?;
build_scale_deviation_operator(
threshold_design.design.clone(),
log_sigma_design.design.clone(),
&transform,
)
}
fn identity_penalty(dim: usize) -> Array2<f64> {
let mut penalty = Array2::<f64>::zeros((dim, dim));
for i in 0..dim {
penalty[[i, i]] = 1.0;
}
penalty
}
fn append_binomial_log_sigma_shrinkage_penalty_design(design: &mut TermCollectionDesign) {
let p = design.design.ncols();
design
.penalties
.push(BlockwisePenalty::new(0..p, identity_penalty(p)));
design.nullspace_dims.push(0);
design.penaltyinfo.push(PenaltyBlockInfo {
global_index: design.penaltyinfo.len(),
termname: Some("log_sigma_shrinkage".to_string()),
penalty: PenaltyInfo {
source: PenaltySource::Other("shrinkage".to_string()),
original_index: 0,
active: true,
effective_rank: p,
dropped_reason: None,
nullspace_dim_hint: 0,
normalization_scale: 1.0,
kronecker_factors: None,
},
});
}
#[allow(clippy::too_many_arguments)]
fn build_gaussian_mean_and_scale_blocks(
y: &Array1<f64>,
weights: &Array1<f64>,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
mean_offset: &Array1<f64>,
noise_offset: &Array1<f64>,
mean_log_lambdas: Array1<f64>,
noise_log_lambdas: Array1<f64>,
mean_beta_hint: Option<Array1<f64>>,
noise_beta_hint: Option<Array1<f64>>,
context: &str,
) -> Result<(ParameterBlockSpec, ParameterBlockSpec), String> {
let mut meanspec = build_location_scale_block(
"mu",
mean_design.design.clone(),
mean_offset.clone(),
mean_design.penalties_as_penalty_matrix(),
mean_design.nullspace_dims.clone(),
mean_log_lambdas,
mean_beta_hint,
0,
LOCATION_SCALE_N_OUTPUTS,
&format!("{context}: mu"),
)?;
let prepared_noise_design =
prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design)?;
let p_noise = prepared_noise_design.ncols();
let mut log_sigma_penalty_matrices = noise_design.penalties_as_penalty_matrix();
log_sigma_penalty_matrices.push(PenaltyMatrix::Dense(identity_penalty(p_noise)));
let mut log_sigma_nullspace_dims = noise_design.nullspace_dims.clone();
log_sigma_nullspace_dims.push(0);
let mut noisespec = build_location_scale_block(
"log_sigma",
prepared_noise_design,
noise_offset.clone(),
log_sigma_penalty_matrices,
log_sigma_nullspace_dims,
noise_log_lambdas,
noise_beta_hint,
1,
LOCATION_SCALE_N_OUTPUTS,
&format!("{context}: log_sigma"),
)?;
if meanspec.initial_beta.is_none() || noisespec.initial_beta.is_none() {
let (betamu0, beta_ls0, _) = gaussian_location_scalewarm_start(
y,
weights,
&meanspec,
&noisespec,
1e-10,
meanspec.initial_beta.as_ref(),
noisespec.initial_beta.as_ref(),
)?;
if meanspec.initial_beta.is_none() {
meanspec.initial_beta = Some(betamu0);
}
if noisespec.initial_beta.is_none() {
noisespec.initial_beta = Some(beta_ls0);
}
}
Ok((meanspec, noisespec))
}
#[allow(clippy::too_many_arguments)]
fn build_binomial_threshold_and_scale_blocks(
y: &Array1<f64>,
weights: &Array1<f64>,
link_kind: &InverseLink,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
mean_offset: &Array1<f64>,
noise_offset: &Array1<f64>,
mean_log_lambdas: Array1<f64>,
noise_log_lambdas: Array1<f64>,
mean_beta_hint: Option<Array1<f64>>,
noise_beta_hint: Option<Array1<f64>>,
context: &str,
) -> Result<(ParameterBlockSpec, ParameterBlockSpec), String> {
let identifiednoise_design =
identified_binomial_log_sigma_design(mean_design, noise_design, weights)?;
let p_noise = identifiednoise_design.ncols();
let mut log_sigma_penalty_matrices: Vec<PenaltyMatrix> =
noise_design.penalties_as_penalty_matrix();
log_sigma_penalty_matrices.push(PenaltyMatrix::Dense(identity_penalty(p_noise)));
let mut thresholdspec = build_location_scale_block(
"threshold",
mean_design.design.clone(),
mean_offset.clone(),
mean_design.penalties_as_penalty_matrix(),
vec![],
mean_log_lambdas,
mean_beta_hint,
0,
LOCATION_SCALE_N_OUTPUTS,
&format!("{context}: threshold"),
)?;
let mut log_sigmaspec = build_location_scale_block(
"log_sigma",
identifiednoise_design,
noise_offset.clone(),
log_sigma_penalty_matrices,
vec![],
noise_log_lambdas,
noise_beta_hint,
1,
LOCATION_SCALE_N_OUTPUTS,
&format!("{context}: log_sigma"),
)?;
if thresholdspec.initial_beta.is_none() || log_sigmaspec.initial_beta.is_none() {
let (beta_t0, beta_ls0) = binomial_location_scalewarm_start(
y,
weights,
link_kind,
&thresholdspec,
&log_sigmaspec,
thresholdspec.initial_beta.as_ref(),
log_sigmaspec.initial_beta.as_ref(),
)?;
if thresholdspec.initial_beta.is_none() {
thresholdspec.initial_beta = Some(beta_t0);
}
if log_sigmaspec.initial_beta.is_none() {
log_sigmaspec.initial_beta = Some(beta_ls0);
}
}
Ok((thresholdspec, log_sigmaspec))
}
fn wiggle_block_penalty_matrices(wiggle_block: &ParameterBlockInput) -> Vec<PenaltyMatrix> {
let p_wiggle = wiggle_block.design.ncols();
wiggle_block
.penalties
.iter()
.map(|spec| match spec {
crate::solver::estimate::PenaltySpec::Block {
local, col_range, ..
} => PenaltyMatrix::Blockwise {
local: local.clone(),
col_range: col_range.clone(),
total_dim: p_wiggle,
},
crate::solver::estimate::PenaltySpec::Dense(m)
| crate::solver::estimate::PenaltySpec::DenseWithMean { matrix: m, .. } => {
PenaltyMatrix::Dense(m.clone())
}
})
.collect()
}
fn binomial_location_scale_link_eta_from_probability(
link_kind: &InverseLink,
probability: f64,
) -> Result<f64, String> {
let target = probability.clamp(1e-6, 1.0 - 1e-6);
match link_kind {
InverseLink::Standard(StandardLink::Logit) => Ok((target / (1.0 - target)).ln()),
InverseLink::Standard(StandardLink::Probit) => standard_normal_quantile(target)
.map_err(|err| format!("failed to invert probit warm-start probability: {err}")),
InverseLink::Standard(StandardLink::CLogLog) => Ok((-((1.0 - target).ln())).ln()),
other => Err(GamlssError::UnsupportedConfiguration { reason: format!(
"binomial location-scale warm start requires logit, probit, or cloglog link, got {other:?}"
) }.into()),
}
}
fn weighted_binomial_prevalence(y: &Array1<f64>, weights: &Array1<f64>) -> Result<f64, String> {
if y.len() != weights.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"binomial location-scale warm start dimension mismatch: y has length {}, weights have length {}",
y.len(),
weights.len()
) }.into());
}
let mut weight_sum = 0.0;
let mut success_sum = 0.0;
for (&yi, &wi) in y.iter().zip(weights.iter()) {
if !yi.is_finite() {
return Err(GamlssError::NonFinite {
reason: format!(
"binomial location-scale warm start encountered non-finite response {yi}"
),
}
.into());
}
let weight = floor_positiveweight(wi, MIN_WEIGHT);
if weight > 0.0 {
weight_sum += weight;
success_sum += weight * yi;
}
}
if !weight_sum.is_finite() || weight_sum <= 0.0 {
return Err(
"binomial location-scale warm start requires positive total weight".to_string(),
);
}
Ok(success_sum / weight_sum)
}
fn project_constant_eta_into_block(
block: &ParameterBlockSpec,
weights: &Array1<f64>,
eta: f64,
) -> Result<Array1<f64>, String> {
let target_eta = Array1::from_elem(block.design.nrows(), eta);
solve_penalizedweighted_projection(
&block.design,
&block.offset,
&target_eta,
weights,
&block.penalties,
&block.initial_log_lambdas,
1e-10,
)
}
fn binomial_location_scalewarm_start(
y: &Array1<f64>,
weights: &Array1<f64>,
link_kind: &InverseLink,
threshold_block: &ParameterBlockSpec,
log_sigma_block: &ParameterBlockSpec,
mean_beta_hint: Option<&Array1<f64>>,
noise_beta_hint: Option<&Array1<f64>>,
) -> Result<(Array1<f64>, Array1<f64>), String> {
if let (Some(mean_beta), Some(noise_beta)) = (mean_beta_hint, noise_beta_hint) {
return Ok((mean_beta.clone(), noise_beta.clone()));
}
let beta_threshold = match mean_beta_hint {
Some(beta) => beta.clone(),
None => {
let prevalence = weighted_binomial_prevalence(y, weights)?;
let eta = binomial_location_scale_link_eta_from_probability(link_kind, prevalence)?;
project_constant_eta_into_block(threshold_block, weights, eta)?
}
};
let beta_log_sigma = match noise_beta_hint {
Some(beta) => beta.clone(),
None => project_constant_eta_into_block(log_sigma_block, weights, 0.0)?,
};
Ok((beta_threshold, beta_log_sigma))
}
#[derive(Clone)]
struct BinomialMeanWiggleSpec {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub link_kind: InverseLink,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
pub eta_block: ParameterBlockInput,
pub wiggle_block: ParameterBlockInput,
}
#[derive(Clone)]
pub struct GaussianLocationScaleTermSpec {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub meanspec: TermCollectionSpec,
pub log_sigmaspec: TermCollectionSpec,
pub mean_offset: Array1<f64>,
pub log_sigma_offset: Array1<f64>,
}
#[derive(Clone)]
pub struct GaussianLocationScaleWiggleTermSpec {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub meanspec: TermCollectionSpec,
pub log_sigmaspec: TermCollectionSpec,
pub mean_offset: Array1<f64>,
pub log_sigma_offset: Array1<f64>,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
pub wiggle_block: ParameterBlockInput,
}
#[derive(Clone)]
pub struct BinomialLocationScaleTermSpec {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub link_kind: InverseLink,
pub thresholdspec: TermCollectionSpec,
pub log_sigmaspec: TermCollectionSpec,
pub threshold_offset: Array1<f64>,
pub log_sigma_offset: Array1<f64>,
}
#[derive(Clone)]
pub struct BinomialLocationScaleWiggleTermSpec {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub link_kind: InverseLink,
pub thresholdspec: TermCollectionSpec,
pub log_sigmaspec: TermCollectionSpec,
pub threshold_offset: Array1<f64>,
pub log_sigma_offset: Array1<f64>,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
pub wiggle_block: ParameterBlockInput,
}
#[derive(Clone, Debug)]
pub struct BlockwiseTermFitResult {
pub fit: UnifiedFitResult,
pub meanspec_resolved: TermCollectionSpec,
pub noisespec_resolved: TermCollectionSpec,
pub mean_design: TermCollectionDesign,
pub noise_design: TermCollectionDesign,
}
pub(crate) struct BlockwiseTermFitResultParts {
pub fit: UnifiedFitResult,
pub meanspec_resolved: TermCollectionSpec,
pub noisespec_resolved: TermCollectionSpec,
pub mean_design: TermCollectionDesign,
pub noise_design: TermCollectionDesign,
}
pub struct BlockwiseTermWiggleFitResult {
pub fit: BlockwiseTermFitResult,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
}
pub struct BinomialMeanWiggleTermFitResult {
pub fit: UnifiedFitResult,
pub resolvedspec: TermCollectionSpec,
pub design: TermCollectionDesign,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
}
struct BlockwiseTermWiggleFitResultParts {
pub fit: BlockwiseTermFitResult,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
}
fn validate_term_collection_design(
label: &str,
design: &TermCollectionDesign,
) -> Result<(), String> {
let p = design.design.ncols();
let n = design.design.nrows();
for rows in exact_design_row_chunks(n, p) {
let chunk = design
.design
.try_row_chunk(rows)
.map_err(|e| format!("{label}.design row chunk materialization failed: {e}"))?;
validate_all_finite_estimation(&format!("{label}.design"), chunk.iter().copied())
.map_err(|e| e.to_string())?;
}
if design.nullspace_dims.len() != design.penalties.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{label}.nullspace_dims length mismatch: got {}, expected {}",
design.nullspace_dims.len(),
design.penalties.len()
),
}
.into());
}
if design.penaltyinfo.len() != design.penalties.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{label}.penaltyinfo length mismatch: got {}, expected {}",
design.penaltyinfo.len(),
design.penalties.len()
),
}
.into());
}
for (idx, bp) in design.penalties.iter().enumerate() {
validate_all_finite_estimation(
&format!("{label}.penalties[{idx}]"),
bp.local.iter().copied(),
)
.map_err(|e| e.to_string())?;
if bp.col_range.end > p {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{label}.penalties[{idx}] col_range {}..{} exceeds design width {}",
bp.col_range.start, bp.col_range.end, p
),
}
.into());
}
}
if let Some(bounds) = design.coefficient_lower_bounds.as_ref() {
if bounds.len() != p {
return Err(GamlssError::ConstraintViolation {
reason: format!(
"{label}.coefficient_lower_bounds length mismatch: got {}, expected {p}",
bounds.len()
),
}
.into());
}
for (idx, &bound) in bounds.iter().enumerate() {
if !(bound.is_finite() || bound == f64::NEG_INFINITY) {
return Err(GamlssError::NonFinite { reason: format!(
"{label}.coefficient_lower_bounds[{idx}] must be finite or -inf, got {bound}",
) }.into());
}
}
}
if let Some(constraints) = design.linear_constraints.as_ref() {
validate_all_finite_estimation(
&format!("{label}.linear_constraints.a"),
constraints.a.iter().copied(),
)
.map_err(|e| e.to_string())?;
validate_all_finite_estimation(
&format!("{label}.linear_constraints.b"),
constraints.b.iter().copied(),
)
.map_err(|e| e.to_string())?;
if constraints.a.ncols() != p {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{label}.linear_constraints.a column mismatch: got {}, expected {p}",
constraints.a.ncols()
),
}
.into());
}
if constraints.a.nrows() != constraints.b.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"{label}.linear_constraints row mismatch: a has {}, b has {}",
constraints.a.nrows(),
constraints.b.len()
),
}
.into());
}
}
if design.intercept_range.start > design.intercept_range.end || design.intercept_range.end > p {
return Err(GamlssError::ConstraintViolation {
reason: format!(
"{label}.intercept_range out of bounds: {:?} for {} columns",
design.intercept_range, p
),
}
.into());
}
Ok(())
}
impl BlockwiseTermFitResult {
pub(crate) fn try_from_parts(parts: BlockwiseTermFitResultParts) -> Result<Self, String> {
let BlockwiseTermFitResultParts {
fit,
meanspec_resolved,
noisespec_resolved,
mean_design,
noise_design,
} = parts;
fit.validate_numeric_finiteness()
.map_err(|e| format!("{e}"))?;
if fit.block_states.len() < 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BlockwiseTermFitResult requires at least 2 block states, got {}",
fit.block_states.len()
),
}
.into());
}
validate_term_collection_design("blockwise_term.mean_design", &mean_design)?;
validate_term_collection_design("blockwise_term.noise_design", &noise_design)?;
if mean_design.design.nrows() != noise_design.design.nrows() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BlockwiseTermFitResult row mismatch: mean_design={}, noise_design={}",
mean_design.design.nrows(),
noise_design.design.nrows()
),
}
.into());
}
if fit.block_states[0].beta.len() != mean_design.design.ncols() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BlockwiseTermFitResult mean beta length mismatch: got {}, expected {}",
fit.block_states[0].beta.len(),
mean_design.design.ncols()
),
}
.into());
}
if fit.block_states[1].beta.len() != noise_design.design.ncols() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BlockwiseTermFitResult noise beta length mismatch: got {}, expected {}",
fit.block_states[1].beta.len(),
noise_design.design.ncols()
),
}
.into());
}
if fit.block_states[0].eta.len() != mean_design.design.nrows() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BlockwiseTermFitResult mean eta length mismatch: got {}, expected {}",
fit.block_states[0].eta.len(),
mean_design.design.nrows()
),
}
.into());
}
if fit.block_states[1].eta.len() != noise_design.design.nrows() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BlockwiseTermFitResult noise eta length mismatch: got {}, expected {}",
fit.block_states[1].eta.len(),
noise_design.design.nrows()
),
}
.into());
}
Ok(Self {
fit,
meanspec_resolved,
noisespec_resolved,
mean_design,
noise_design,
})
}
fn validate_numeric_finiteness(&self) -> Result<(), String> {
Self::try_from_parts(BlockwiseTermFitResultParts {
fit: self.fit.clone(),
meanspec_resolved: self.meanspec_resolved.clone(),
noisespec_resolved: self.noisespec_resolved.clone(),
mean_design: self.mean_design.clone(),
noise_design: self.noise_design.clone(),
})
.map(|_| ())
}
}
impl BlockwiseTermWiggleFitResult {
fn try_from_parts(parts: BlockwiseTermWiggleFitResultParts) -> Result<Self, String> {
let BlockwiseTermWiggleFitResultParts {
fit,
wiggle_knots,
wiggle_degree,
} = parts;
fit.validate_numeric_finiteness()
.map_err(|e| e.to_string())?;
if fit.fit.block_states.len() < 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BlockwiseTermWiggleFitResult requires at least 3 block states, got {}",
fit.fit.block_states.len()
),
}
.into());
}
if wiggle_knots.is_empty() {
return Err(GamlssError::UnsupportedConfiguration {
reason: "BlockwiseTermWiggleFitResult requires non-empty wiggle_knots".to_string(),
}
.into());
}
validate_all_finite_estimation(
"blockwise_term_wiggle.wiggle_knots",
wiggle_knots.iter().copied(),
)
.map_err(|e| e.to_string())?;
Ok(Self {
fit,
wiggle_knots,
wiggle_degree,
})
}
}
pub struct BinomialLocationScaleFitResult {
pub fit: BlockwiseTermFitResult,
pub wiggle_knots: Option<Array1<f64>>,
pub wiggle_degree: Option<usize>,
pub beta_link_wiggle: Option<Vec<f64>>,
}
pub struct GaussianLocationScaleFitResult {
pub fit: BlockwiseTermFitResult,
pub wiggle_knots: Option<Array1<f64>>,
pub wiggle_degree: Option<usize>,
pub beta_link_wiggle: Option<Vec<f64>>,
pub response_scale: f64,
}
fn fit_binomial_mean_wiggle(
spec: BinomialMeanWiggleSpec,
options: &BlockwiseFitOptions,
) -> Result<UnifiedFitResult, String> {
let n = spec.y.len();
validate_len_match("weights vs y", n, spec.weights.len())?;
validateweights(&spec.weights, "fit_binomial_mean_wiggle")?;
validate_binomial_response(&spec.y, "fit_binomial_mean_wiggle")?;
validate_blockrows("eta", n, &spec.eta_block)?;
validate_blockrows("wiggle", n, &spec.wiggle_block)?;
if matches!(
spec.link_kind,
InverseLink::Standard(StandardLink::Identity)
) {
return Err(GamlssError::UnsupportedConfiguration {
reason: "fit_binomial_mean_wiggle does not support identity link".to_string(),
}
.into());
}
crate::inference::formula_dsl::require_binomial_inverse_link_supports_joint_wiggle(
&spec.link_kind,
"fit_binomial_mean_wiggle",
)?;
if spec.wiggle_degree < 2 {
return Err(GamlssError::ConstraintViolation {
reason: format!(
"fit_binomial_mean_wiggle: wiggle_degree must be >= 2, got {}",
spec.wiggle_degree
),
}
.into());
}
let minimum_knots = minimum_monotone_wiggle_knot_count(spec.wiggle_degree)?;
if spec.wiggle_knots.len() < minimum_knots {
return Err(GamlssError::DimensionMismatch { reason: format!(
"fit_binomial_mean_wiggle: wiggle_knots length {} is too short for degree {} (need at least {})",
spec.wiggle_knots.len(),
spec.wiggle_degree,
minimum_knots
) }.into());
}
let family = BinomialMeanWiggleFamily {
y: spec.y,
weights: spec.weights,
link_kind: spec.link_kind,
wiggle_knots: spec.wiggle_knots,
wiggle_degree: spec.wiggle_degree,
policy: crate::resource::ResourcePolicy::default_library(),
};
let blocks = vec![
spec.eta_block
.intospec_with_gauge_priority("eta", LINK_WIGGLE_GAUGE_PRIORITY)?,
spec.wiggle_block.intospec("wiggle")?,
];
fit_custom_family(&family, &blocks, options).map_err(|e| e.to_string())
}
trait LocationScaleFamilyBuilder {
type Family: CustomFamily + Clone + Send + Sync + 'static;
fn meanspec(&self) -> &TermCollectionSpec;
fn noisespec(&self) -> &TermCollectionSpec;
fn build_blocks(
&self,
theta: &Array1<f64>,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
mean_beta_hint: Option<Array1<f64>>,
noise_beta_hint: Option<Array1<f64>>,
) -> Result<Vec<ParameterBlockSpec>, String>;
fn build_family(
&self,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Self::Family;
fn extract_primary_betas(
&self,
fit: &UnifiedFitResult,
) -> Result<(Array1<f64>, Array1<f64>), String>;
fn mean_penalty_count(&self, mean_design: &TermCollectionDesign) -> usize {
mean_design.penalties.len()
}
fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
noise_design.penalties.len()
}
fn exact_spatial_joint_supported(&self) -> bool {
false
}
fn require_exact_spatial_joint(&self) -> bool {
false
}
fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
crate::seeding::SeedRiskProfile::GeneralizedLinear
}
fn extra_rho0(&self) -> Result<Array1<f64>, String> {
Ok(Array1::zeros(0))
}
fn build_psiderivative_blocks(
&self,
arr: ndarray::ArrayView2<'_, f64>,
term_spec: &TermCollectionSpec,
term_spec2: &TermCollectionSpec,
term_design: &TermCollectionDesign,
term_design2: &TermCollectionDesign,
) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String>;
}
fn fit_location_scale_terms<B: LocationScaleFamilyBuilder>(
data: ndarray::ArrayView2<'_, f64>,
builder: B,
options: &BlockwiseFitOptions,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> Result<BlockwiseTermFitResult, String> {
let mut mean_beta_hint: Option<Array1<f64>> = None;
let mut noise_beta_hint: Option<Array1<f64>> = None;
let extra_rho0 = builder.extra_rho0()?;
let mean_boot_design =
build_term_collection_design(data, builder.meanspec()).map_err(|e| e.to_string())?;
let noise_boot_design =
build_term_collection_design(data, builder.noisespec()).map_err(|e| e.to_string())?;
let mean_bootspec = freeze_term_collection_from_design(builder.meanspec(), &mean_boot_design)
.map_err(|e| e.to_string())?;
let noise_bootspec =
freeze_term_collection_from_design(builder.noisespec(), &noise_boot_design)
.map_err(|e| e.to_string())?;
let require_exact_spatial_joint = builder.require_exact_spatial_joint();
let analytic_joint_derivatives_check = if builder.exact_spatial_joint_supported() {
builder
.build_psiderivative_blocks(
data,
&mean_bootspec,
&noise_bootspec,
&mean_boot_design,
&noise_boot_design,
)
.map(|_| ())
} else {
Err(
"analytic spatial psi derivatives are unavailable for this location-scale family"
.to_string(),
)
};
let analytic_joint_derivatives_available = analytic_joint_derivatives_check.is_ok();
if require_exact_spatial_joint {
analytic_joint_derivatives_check.map_err(|err| {
format!("exact two-block spatial path requires analytic psi derivatives: {err}")
})?;
}
let mean_penalty_count = builder.mean_penalty_count(&mean_boot_design);
let noise_penalty_count = builder.noise_penalty_count(&noise_boot_design);
let mut effective_kappa_options = kappa_options.clone();
if effective_kappa_options.enabled
&& crate::smooth::all_spatial_terms_kappa_fixed(&mean_bootspec)
&& crate::smooth::all_spatial_terms_kappa_fixed(&noise_bootspec)
{
log::info!(
"[GAMLSS spatial] disabling κ/ψ optimization: every spatial term in \
both blocks has an explicit length_scale and no anisotropy; \
user-supplied kernel scale is fixed"
);
effective_kappa_options.enabled = false;
}
let kappa_options: &SpatialLengthScaleOptimizationOptions = &effective_kappa_options;
macro_rules! run_exact_joint_spatial {
() => {{
let joint_setup = build_two_block_exact_joint_setup(
data,
builder.meanspec(),
builder.noisespec(),
mean_penalty_count,
noise_penalty_count,
extra_rho0.as_slice().unwrap_or(&[]),
None,
kappa_options,
);
let mean_terms = spatial_length_scale_term_indices(builder.meanspec());
let noise_terms = spatial_length_scale_term_indices(builder.noisespec());
let mean_beta_hint_cell = std::cell::RefCell::new(mean_beta_hint.clone());
let noise_beta_hint_cell = std::cell::RefCell::new(noise_beta_hint.clone());
let hyper_warm_start_cell =
std::cell::RefCell::new(None::<CustomFamilyWarmStart>);
let gamlss_disable_fixed_point = true;
let outer_policy = {
let theta_seed = joint_setup.theta0();
let rho_dim = joint_setup.rho_dim();
let psi_dim = theta_seed.len() - rho_dim;
let rho_seed = theta_seed.slice(s![..rho_dim]).to_owned();
let policy_blocks_res = builder.build_blocks(
&rho_seed,
&mean_boot_design,
&noise_boot_design,
mean_beta_hint_cell.borrow().clone(),
noise_beta_hint_cell.borrow().clone(),
);
let mut policy = match policy_blocks_res {
Ok(policy_blocks) => {
let policy_family =
builder.build_family(&mean_boot_design, &noise_boot_design);
crate::families::custom_family::CustomFamily::outer_derivative_policy(
&policy_family,
&policy_blocks,
psi_dim,
options,
)
}
Err(err) => {
log::warn!(
"[GAMLSS spatial] failed to realize policy blocks at seed rho ({err}); \
routing outer optimizer through gradient-only BFGS"
);
let capability = if analytic_joint_derivatives_available {
crate::families::custom_family::ExactOuterDerivativeOrder::Second
} else {
crate::families::custom_family::ExactOuterDerivativeOrder::First
};
crate::families::custom_family::OuterDerivativePolicy {
capability,
predicted_gradient_work: u128::MAX,
predicted_hessian_work: u128::MAX,
subsample_capable: false,
}
}
};
if !analytic_joint_derivatives_available {
policy.capability =
crate::families::custom_family::ExactOuterDerivativeOrder::First;
}
policy
};
optimize_spatial_length_scale_exact_joint(
data,
&[builder.meanspec().clone(), builder.noisespec().clone()],
&[mean_terms, noise_terms],
kappa_options,
&joint_setup,
builder.exact_spatial_seed_risk_profile(),
analytic_joint_derivatives_available,
analytic_joint_derivatives_available,
gamlss_disable_fixed_point,
None,
outer_policy,
|theta, specs: &[TermCollectionSpec], designs: &[TermCollectionDesign]| {
assert_eq!(
specs.len(),
2,
"joint spatial closure expects exactly two block specs (mean, noise); got {}",
specs.len(),
);
assert_eq!(
designs.len(),
2,
"joint spatial closure expects exactly two block designs (mean, noise); got {}",
designs.len(),
);
let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
let fit = {
let blocks = builder.build_blocks(
&rho,
&designs[0],
&designs[1],
mean_beta_hint_cell.borrow().clone(),
noise_beta_hint_cell.borrow().clone(),
)?;
if mean_beta_hint_cell.borrow().is_none()
&& let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
{
*mean_beta_hint_cell.borrow_mut() = Some(beta);
}
if noise_beta_hint_cell.borrow().is_none()
&& let Some(beta) =
blocks.get(1).and_then(|block| block.initial_beta.clone())
{
*noise_beta_hint_cell.borrow_mut() = Some(beta);
}
let family = builder.build_family(&designs[0], &designs[1]);
if joint_setup.log_kappa_dim() > 0 && kappa_options.enabled {
let warm_start = hyper_warm_start_cell.borrow().clone();
fit_custom_family_fixed_log_lambdas(
&family,
&blocks,
options,
warm_start.as_ref(),
0,
None,
true,
)?
} else {
fit_custom_family(&family, &blocks, options)?
}
};
let (mean_beta, noise_beta) = builder.extract_primary_betas(&fit)?;
mean_beta_hint = Some(mean_beta);
noise_beta_hint = Some(noise_beta);
*mean_beta_hint_cell.borrow_mut() = mean_beta_hint.clone();
*noise_beta_hint_cell.borrow_mut() = noise_beta_hint.clone();
Ok(fit)
},
|theta,
specs: &[TermCollectionSpec],
designs: &[TermCollectionDesign],
eval_mode,
row_set: &crate::families::row_kernel::RowSet| {
use crate::solver::estimate::reml::unified::EvalMode;
if !analytic_joint_derivatives_available {
return Err(
"analytic spatial psi derivatives are unavailable for this exact two-block path"
.to_string(),
);
}
let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
let blocks = builder.build_blocks(
&rho,
&designs[0],
&designs[1],
mean_beta_hint_cell.borrow().clone(),
noise_beta_hint_cell.borrow().clone(),
)?;
if mean_beta_hint_cell.borrow().is_none()
&& let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
{
*mean_beta_hint_cell.borrow_mut() = Some(beta);
}
if noise_beta_hint_cell.borrow().is_none()
&& let Some(beta) = blocks.get(1).and_then(|block| block.initial_beta.clone())
{
*noise_beta_hint_cell.borrow_mut() = Some(beta);
}
let family = builder.build_family(&designs[0], &designs[1]);
let psiderivative_blocks = builder.build_psiderivative_blocks(
data,
&specs[0],
&specs[1],
&designs[0],
&designs[1],
)?;
let warm_start = hyper_warm_start_cell.borrow().clone();
let eval_options = match row_set {
crate::families::row_kernel::RowSet::All => {
std::borrow::Cow::Borrowed(options)
}
crate::families::row_kernel::RowSet::Subsample {
rows,
n_full,
} => {
let subsample = crate::families::marginal_slope_shared::
OuterScoreSubsample::from_weighted_rows(
(**rows).clone(),
*n_full,
*n_full as u64,
);
let mut cloned = options.clone();
cloned.outer_score_subsample =
Some(std::sync::Arc::new(subsample));
std::borrow::Cow::Owned(cloned)
}
};
let eval = evaluate_custom_family_joint_hyper(
&family,
&blocks,
eval_options.as_ref(),
&rho,
&psiderivative_blocks,
warm_start.as_ref(),
eval_mode,
)?;
*hyper_warm_start_cell.borrow_mut() = Some(eval.warm_start.clone());
if !eval.inner_converged {
return Err(
"exact two-block spatial inner solve did not converge".to_string(),
);
}
if matches!(eval_mode, EvalMode::ValueGradientHessian)
&& !eval.outer_hessian.is_analytic()
{
return Err(
"exact two-block spatial objective requires a full joint [rho, psi] hessian"
.to_string(),
);
}
Ok((eval.objective, eval.gradient, eval.outer_hessian))
},
|theta, specs: &[TermCollectionSpec], designs: &[TermCollectionDesign]| {
if !analytic_joint_derivatives_available {
return Err(
"analytic spatial psi derivatives are unavailable for this exact two-block path"
.to_string(),
);
}
let rho = theta.slice(s![..joint_setup.rho_dim()]).to_owned();
let blocks = builder.build_blocks(
&rho,
&designs[0],
&designs[1],
mean_beta_hint_cell.borrow().clone(),
noise_beta_hint_cell.borrow().clone(),
)?;
if mean_beta_hint_cell.borrow().is_none()
&& let Some(beta) = blocks.first().and_then(|block| block.initial_beta.clone())
{
*mean_beta_hint_cell.borrow_mut() = Some(beta);
}
if noise_beta_hint_cell.borrow().is_none()
&& let Some(beta) = blocks.get(1).and_then(|block| block.initial_beta.clone())
{
*noise_beta_hint_cell.borrow_mut() = Some(beta);
}
let family = builder.build_family(&designs[0], &designs[1]);
let psiderivative_blocks = builder.build_psiderivative_blocks(
data,
&specs[0],
&specs[1],
&designs[0],
&designs[1],
)?;
let warm_start = hyper_warm_start_cell.borrow().clone();
let eval = evaluate_custom_family_joint_hyper_efs(
&family,
&blocks,
options,
&rho,
&psiderivative_blocks,
warm_start.as_ref(),
)?;
*hyper_warm_start_cell.borrow_mut() = Some(eval.warm_start.clone());
if !eval.inner_converged {
return Err(
"exact two-block spatial EFS inner solve did not converge".to_string(),
);
}
Ok(eval.efs_eval)
},
|_beta: &Array1<f64>| Ok(()),
)
}};
}
let mut solved = run_exact_joint_spatial!()
.map_err(|err| format!("exact two-block spatial optimization failed: {err}"))?;
let expected_noise_penalty_count = builder.noise_penalty_count(&solved.designs[1]);
let actual_noise_penalty_count = solved.designs[1].penalties.len();
if expected_noise_penalty_count > actual_noise_penalty_count {
if expected_noise_penalty_count != actual_noise_penalty_count + 1 {
return Err(GamlssError::UnsupportedConfiguration {
reason: format!(
"location-scale result noise design expected {} penalties after augmentation, got {} before augmentation",
expected_noise_penalty_count, actual_noise_penalty_count
),
}
.into());
}
append_binomial_log_sigma_shrinkage_penalty_design(&mut solved.designs[1]);
}
BlockwiseTermFitResult::try_from_parts(BlockwiseTermFitResultParts {
fit: solved.fit,
meanspec_resolved: solved.resolved_specs.remove(0),
noisespec_resolved: solved.resolved_specs.remove(0),
mean_design: solved.designs.remove(0),
noise_design: solved.designs.remove(0),
})
}
struct GaussianLocationScaleTermBuilder {
y: Array1<f64>,
weights: Array1<f64>,
meanspec: TermCollectionSpec,
noisespec: TermCollectionSpec,
mean_offset: Array1<f64>,
noise_offset: Array1<f64>,
}
impl LocationScaleFamilyBuilder for GaussianLocationScaleTermBuilder {
type Family = GaussianLocationScaleFamily;
fn meanspec(&self) -> &TermCollectionSpec {
&self.meanspec
}
fn noisespec(&self) -> &TermCollectionSpec {
&self.noisespec
}
fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
noise_design.penalties.len() + 1
}
fn exact_spatial_joint_supported(&self) -> bool {
true
}
fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
crate::seeding::SeedRiskProfile::Gaussian
}
fn build_blocks(
&self,
theta: &Array1<f64>,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
mean_beta_hint: Option<Array1<f64>>,
noise_beta_hint: Option<Array1<f64>>,
) -> Result<Vec<ParameterBlockSpec>, String> {
let layout = GamlssLambdaLayout::two_block(
mean_design.penalties.len(),
self.noise_penalty_count(noise_design),
);
layout.validate_theta_len(theta.len(), "gaussian location-scale")?;
let (meanspec, noisespec) = build_gaussian_mean_and_scale_blocks(
&self.y,
&self.weights,
mean_design,
noise_design,
&self.mean_offset,
&self.noise_offset,
layout.mean_from(theta),
layout.noise_from(theta),
mean_beta_hint,
noise_beta_hint,
"GaussianLocationScale::build_blocks",
)?;
Ok(vec![meanspec, noisespec])
}
fn build_family(
&self,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Self::Family {
let preparednoise_design =
prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design)
.expect("prepared Gaussian log-sigma design should match block construction");
GaussianLocationScaleFamily {
y: self.y.clone(),
weights: self.weights.clone(),
mu_design: Some(mean_design.design.clone()),
log_sigma_design: Some(preparednoise_design),
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
}
}
fn extract_primary_betas(
&self,
fit: &UnifiedFitResult,
) -> Result<(Array1<f64>, Array1<f64>), String> {
let mean_beta = fit
.block_states
.get(GaussianLocationScaleFamily::BLOCK_MU)
.ok_or_else(|| "missing Gaussian mu block state".to_string())?
.beta
.clone();
let noise_beta = fit
.block_states
.get(GaussianLocationScaleFamily::BLOCK_LOG_SIGMA)
.ok_or_else(|| "missing Gaussian log_sigma block state".to_string())?
.beta
.clone();
Ok((mean_beta, noise_beta))
}
fn build_psiderivative_blocks(
&self,
data: ndarray::ArrayView2<'_, f64>,
meanspec_resolved: &TermCollectionSpec,
noisespec_resolved: &TermCollectionSpec,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
let mean_derivs =
build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
.ok_or_else(|| "missing Gaussian mean spatial psi derivatives".to_string())?;
let noise_derivs =
build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
.ok_or_else(|| "missing Gaussian log-sigma spatial psi derivatives".to_string())?;
Ok(vec![mean_derivs, noise_derivs])
}
}
struct GaussianLocationScaleWiggleTermBuilder {
y: Array1<f64>,
weights: Array1<f64>,
meanspec: TermCollectionSpec,
noisespec: TermCollectionSpec,
mean_offset: Array1<f64>,
noise_offset: Array1<f64>,
wiggle_knots: Array1<f64>,
wiggle_degree: usize,
wiggle_block: ParameterBlockInput,
}
impl LocationScaleFamilyBuilder for GaussianLocationScaleWiggleTermBuilder {
type Family = GaussianLocationScaleWiggleFamily;
fn meanspec(&self) -> &TermCollectionSpec {
&self.meanspec
}
fn noisespec(&self) -> &TermCollectionSpec {
&self.noisespec
}
fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
noise_design.penalties.len() + 1
}
fn exact_spatial_joint_supported(&self) -> bool {
true
}
fn exact_spatial_seed_risk_profile(&self) -> crate::seeding::SeedRiskProfile {
crate::seeding::SeedRiskProfile::Gaussian
}
fn require_exact_spatial_joint(&self) -> bool {
true
}
fn extra_rho0(&self) -> Result<Array1<f64>, String> {
initial_log_lambdas_orzeros(&self.wiggle_block)
}
fn build_blocks(
&self,
theta: &Array1<f64>,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
mean_beta_hint: Option<Array1<f64>>,
noise_beta_hint: Option<Array1<f64>>,
) -> Result<Vec<ParameterBlockSpec>, String> {
let layout = GamlssLambdaLayout::withwiggle(
mean_design.penalties.len(),
self.noise_penalty_count(noise_design),
self.wiggle_block.penalties.len(),
);
layout.validate_theta_len(theta.len(), "gaussian location-scale wiggle")?;
let (meanspec, noisespec) = build_gaussian_mean_and_scale_blocks(
&self.y,
&self.weights,
mean_design,
noise_design,
&self.mean_offset,
&self.noise_offset,
layout.mean_from(theta),
layout.noise_from(theta),
mean_beta_hint,
noise_beta_hint,
"GaussianLocationScaleWiggle::build_blocks",
)?;
let n_rows = meanspec.design.nrows();
let wigglespec = build_location_scale_wiggle_block(
"wiggle",
self.wiggle_block.design.clone(),
self.wiggle_block.offset.clone(),
wiggle_block_penalty_matrices(&self.wiggle_block),
self.wiggle_block.nullspace_dims.clone(),
layout.wiggle_from(theta),
self.wiggle_block.initial_beta.clone(),
n_rows,
)?;
Ok(vec![meanspec, noisespec, wigglespec])
}
fn build_family(
&self,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Self::Family {
let preparednoise_design =
prepared_gaussian_log_sigma_design(&mean_design.design, &noise_design.design).expect(
"prepared Gaussian log-sigma design should match wiggle block construction",
);
GaussianLocationScaleWiggleFamily {
y: self.y.clone(),
weights: self.weights.clone(),
mu_design: Some(mean_design.design.clone()),
log_sigma_design: Some(preparednoise_design),
wiggle_knots: self.wiggle_knots.clone(),
wiggle_degree: self.wiggle_degree,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
}
}
fn extract_primary_betas(
&self,
fit: &UnifiedFitResult,
) -> Result<(Array1<f64>, Array1<f64>), String> {
let mean_beta = fit
.block_states
.get(GaussianLocationScaleWiggleFamily::BLOCK_MU)
.ok_or_else(|| "missing Gaussian wiggle mu block state".to_string())?
.beta
.clone();
let noise_beta = fit
.block_states
.get(GaussianLocationScaleWiggleFamily::BLOCK_LOG_SIGMA)
.ok_or_else(|| "missing Gaussian wiggle log_sigma block state".to_string())?
.beta
.clone();
Ok((mean_beta, noise_beta))
}
fn build_psiderivative_blocks(
&self,
data: ndarray::ArrayView2<'_, f64>,
meanspec_resolved: &TermCollectionSpec,
noisespec_resolved: &TermCollectionSpec,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
let mean_derivs =
build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?.ok_or_else(
|| "missing Gaussian wiggle mean spatial psi derivatives".to_string(),
)?;
let noise_derivs =
build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
.ok_or_else(|| {
"missing Gaussian wiggle log-sigma spatial psi derivatives".to_string()
})?;
Ok(vec![mean_derivs, noise_derivs, Vec::new()])
}
}
struct BinomialLocationScaleTermBuilder {
y: Array1<f64>,
weights: Array1<f64>,
link_kind: InverseLink,
meanspec: TermCollectionSpec,
noisespec: TermCollectionSpec,
mean_offset: Array1<f64>,
noise_offset: Array1<f64>,
}
impl LocationScaleFamilyBuilder for BinomialLocationScaleTermBuilder {
type Family = BinomialLocationScaleFamily;
fn meanspec(&self) -> &TermCollectionSpec {
&self.meanspec
}
fn noisespec(&self) -> &TermCollectionSpec {
&self.noisespec
}
fn exact_spatial_joint_supported(&self) -> bool {
true
}
fn require_exact_spatial_joint(&self) -> bool {
true
}
fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
noise_design.penalties.len() + 1
}
fn build_blocks(
&self,
theta: &Array1<f64>,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
mean_beta_hint: Option<Array1<f64>>,
noise_beta_hint: Option<Array1<f64>>,
) -> Result<Vec<ParameterBlockSpec>, String> {
let layout = GamlssLambdaLayout::two_block(
mean_design.penalties.len(),
self.noise_penalty_count(noise_design),
);
layout.validate_theta_len(theta.len(), "binomial location-scale")?;
let (thresholdspec, log_sigmaspec) = build_binomial_threshold_and_scale_blocks(
&self.y,
&self.weights,
&self.link_kind,
mean_design,
noise_design,
&self.mean_offset,
&self.noise_offset,
layout.mean_from(theta),
layout.noise_from(theta),
mean_beta_hint,
noise_beta_hint,
"BinomialLocationScale::build_blocks",
)?;
Ok(vec![thresholdspec, log_sigmaspec])
}
fn build_family(
&self,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Self::Family {
let identifiednoise_design =
identified_binomial_log_sigma_design(mean_design, noise_design, &self.weights)
.expect("identified binomial log-sigma design");
BinomialLocationScaleFamily {
y: self.y.clone(),
weights: self.weights.clone(),
link_kind: self.link_kind.clone(),
threshold_design: Some(mean_design.design.clone()),
log_sigma_design: Some(identifiednoise_design),
policy: crate::resource::ResourcePolicy::default_library(),
}
}
fn extract_primary_betas(
&self,
fit: &UnifiedFitResult,
) -> Result<(Array1<f64>, Array1<f64>), String> {
let mean_beta = fit
.block_states
.get(BinomialLocationScaleFamily::BLOCK_T)
.ok_or_else(|| "missing Binomial threshold block state".to_string())?
.beta
.clone();
let noise_beta = fit
.block_states
.get(BinomialLocationScaleFamily::BLOCK_LOG_SIGMA)
.ok_or_else(|| "missing Binomial log_sigma block state".to_string())?
.beta
.clone();
Ok((mean_beta, noise_beta))
}
fn build_psiderivative_blocks(
&self,
data: ndarray::ArrayView2<'_, f64>,
meanspec_resolved: &TermCollectionSpec,
noisespec_resolved: &TermCollectionSpec,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
let mean_derivs =
build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
.ok_or_else(|| "missing threshold spatial psi derivatives".to_string())?;
let noise_derivs =
build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
.ok_or_else(|| "missing log_sigma spatial psi derivatives".to_string())?;
Ok(vec![mean_derivs, noise_derivs])
}
}
struct BinomialLocationScaleWiggleTermBuilder {
y: Array1<f64>,
weights: Array1<f64>,
link_kind: InverseLink,
meanspec: TermCollectionSpec,
noisespec: TermCollectionSpec,
mean_offset: Array1<f64>,
noise_offset: Array1<f64>,
wiggle_knots: Array1<f64>,
wiggle_degree: usize,
wiggle_block: ParameterBlockInput,
}
impl LocationScaleFamilyBuilder for BinomialLocationScaleWiggleTermBuilder {
type Family = BinomialLocationScaleWiggleFamily;
fn meanspec(&self) -> &TermCollectionSpec {
&self.meanspec
}
fn noisespec(&self) -> &TermCollectionSpec {
&self.noisespec
}
fn exact_spatial_joint_supported(&self) -> bool {
true
}
fn require_exact_spatial_joint(&self) -> bool {
true
}
fn extra_rho0(&self) -> Result<Array1<f64>, String> {
initial_log_lambdas_orzeros(&self.wiggle_block)
}
fn noise_penalty_count(&self, noise_design: &TermCollectionDesign) -> usize {
noise_design.penalties.len() + 1
}
fn build_blocks(
&self,
theta: &Array1<f64>,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
mean_beta_hint: Option<Array1<f64>>,
noise_beta_hint: Option<Array1<f64>>,
) -> Result<Vec<ParameterBlockSpec>, String> {
let layout = GamlssLambdaLayout::withwiggle(
mean_design.penalties.len(),
self.noise_penalty_count(noise_design),
self.wiggle_block.penalties.len(),
);
layout.validate_theta_len(theta.len(), "wiggle location-scale")?;
let (thresholdspec, log_sigmaspec) = build_binomial_threshold_and_scale_blocks(
&self.y,
&self.weights,
&self.link_kind,
mean_design,
noise_design,
&self.mean_offset,
&self.noise_offset,
layout.mean_from(theta),
layout.noise_from(theta),
mean_beta_hint,
noise_beta_hint,
"BinomialLocationScaleWiggle::build_blocks",
)?;
let n_rows = thresholdspec.design.nrows();
let wigglespec = build_location_scale_wiggle_block(
"wiggle",
self.wiggle_block.design.clone(),
self.wiggle_block.offset.clone(),
wiggle_block_penalty_matrices(&self.wiggle_block),
vec![],
layout.wiggle_from(theta),
self.wiggle_block.initial_beta.clone(),
n_rows,
)?;
Ok(vec![thresholdspec, log_sigmaspec, wigglespec])
}
fn build_family(
&self,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Self::Family {
let identifiednoise_design =
identified_binomial_log_sigma_design(mean_design, noise_design, &self.weights)
.expect("identified binomial log-sigma design should match block construction");
BinomialLocationScaleWiggleFamily {
y: self.y.clone(),
weights: self.weights.clone(),
link_kind: self.link_kind.clone(),
threshold_design: Some(mean_design.design.clone()),
log_sigma_design: Some(identifiednoise_design),
wiggle_knots: self.wiggle_knots.clone(),
wiggle_degree: self.wiggle_degree,
policy: crate::resource::ResourcePolicy::default_library(),
}
}
fn extract_primary_betas(
&self,
fit: &UnifiedFitResult,
) -> Result<(Array1<f64>, Array1<f64>), String> {
let mean_beta = fit
.block_states
.get(BinomialLocationScaleWiggleFamily::BLOCK_T)
.ok_or_else(|| "missing Binomial wiggle threshold block state".to_string())?
.beta
.clone();
let noise_beta = fit
.block_states
.get(BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA)
.ok_or_else(|| "missing Binomial wiggle log_sigma block state".to_string())?
.beta
.clone();
Ok((mean_beta, noise_beta))
}
fn build_psiderivative_blocks(
&self,
data: ndarray::ArrayView2<'_, f64>,
meanspec_resolved: &TermCollectionSpec,
noisespec_resolved: &TermCollectionSpec,
mean_design: &TermCollectionDesign,
noise_design: &TermCollectionDesign,
) -> Result<Vec<Vec<CustomFamilyBlockPsiDerivative>>, String> {
let mean_derivs =
build_block_spatial_psi_derivatives(data, meanspec_resolved, mean_design)?
.ok_or_else(|| "missing threshold spatial psi derivatives".to_string())?;
let noise_derivs =
build_block_spatial_psi_derivatives(data, noisespec_resolved, noise_design)?
.ok_or_else(|| "missing log_sigma spatial psi derivatives".to_string())?;
Ok(vec![mean_derivs, noise_derivs, Vec::new()])
}
}
pub(crate) fn fit_gaussian_location_scale_terms(
data: ndarray::ArrayView2<'_, f64>,
spec: GaussianLocationScaleTermSpec,
options: &BlockwiseFitOptions,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> Result<BlockwiseTermFitResult, String> {
validate_gaussian_location_scale_termspec(data, &spec, "fit_gaussian_location_scale_terms")?;
fit_location_scale_terms(
data,
GaussianLocationScaleTermBuilder {
y: spec.y,
weights: spec.weights,
meanspec: spec.meanspec,
noisespec: spec.log_sigmaspec,
mean_offset: spec.mean_offset,
noise_offset: spec.log_sigma_offset,
},
options,
kappa_options,
)
}
pub(crate) fn fit_gaussian_location_scalewiggle_terms(
data: ndarray::ArrayView2<'_, f64>,
spec: GaussianLocationScaleWiggleTermSpec,
options: &BlockwiseFitOptions,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> Result<BlockwiseTermFitResult, String> {
validate_gaussian_location_scalewiggle_termspec(
data,
&spec,
"fit_gaussian_location_scalewiggle_terms",
)?;
fit_location_scale_terms(
data,
GaussianLocationScaleWiggleTermBuilder {
y: spec.y,
weights: spec.weights,
meanspec: spec.meanspec,
noisespec: spec.log_sigmaspec,
mean_offset: spec.mean_offset,
noise_offset: spec.log_sigma_offset,
wiggle_knots: spec.wiggle_knots,
wiggle_degree: spec.wiggle_degree,
wiggle_block: spec.wiggle_block,
},
options,
kappa_options,
)
}
pub(crate) fn select_gaussian_location_scale_link_wiggle_basis_from_pilot(
pilot: &BlockwiseTermFitResult,
wiggle_cfg: &WiggleBlockConfig,
wiggle_penalty_orders: &[usize],
) -> Result<SelectedWiggleBasis, String> {
let q_seed = pilot
.fit
.block_states
.first()
.ok_or_else(|| "pilot Gaussian wiggle fit is missing mean block".to_string())?
.eta
.view();
select_wiggle_basis_from_seed(q_seed, wiggle_cfg, wiggle_penalty_orders)
}
pub(crate) fn fit_gaussian_location_scale_terms_with_selected_wiggle(
data: ndarray::ArrayView2<'_, f64>,
spec: GaussianLocationScaleTermSpec,
selected_wiggle_basis: SelectedWiggleBasis,
options: &BlockwiseFitOptions,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> Result<BlockwiseTermWiggleFitResult, String> {
let SelectedWiggleBasis {
knots: wiggle_knots,
degree: wiggle_degree,
block: wiggle_block,
..
} = selected_wiggle_basis;
let solved = fit_gaussian_location_scalewiggle_terms(
data,
GaussianLocationScaleWiggleTermSpec {
y: spec.y,
weights: spec.weights,
meanspec: spec.meanspec,
log_sigmaspec: spec.log_sigmaspec,
mean_offset: spec.mean_offset,
log_sigma_offset: spec.log_sigma_offset,
wiggle_knots: wiggle_knots.clone(),
wiggle_degree,
wiggle_block,
},
options,
kappa_options,
)?;
BlockwiseTermWiggleFitResult::try_from_parts(BlockwiseTermWiggleFitResultParts {
fit: solved,
wiggle_knots,
wiggle_degree,
})
}
pub(crate) fn fit_binomial_location_scale_terms(
data: ndarray::ArrayView2<'_, f64>,
spec: BinomialLocationScaleTermSpec,
options: &BlockwiseFitOptions,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> Result<BlockwiseTermFitResult, String> {
validate_binomial_location_scale_termspec(data, &spec, "fit_binomial_location_scale_terms")?;
fit_location_scale_terms(
data,
BinomialLocationScaleTermBuilder {
y: spec.y,
weights: spec.weights,
link_kind: spec.link_kind,
meanspec: spec.thresholdspec,
noisespec: spec.log_sigmaspec,
mean_offset: spec.threshold_offset,
noise_offset: spec.log_sigma_offset,
},
options,
kappa_options,
)
}
pub(crate) fn fit_binomial_location_scalewiggle_terms(
data: ndarray::ArrayView2<'_, f64>,
spec: BinomialLocationScaleWiggleTermSpec,
options: &BlockwiseFitOptions,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> Result<BlockwiseTermFitResult, String> {
validate_binomial_location_scalewiggle_termspec(
data,
&spec,
"fit_binomial_location_scalewiggle_terms",
)?;
fit_location_scale_terms(
data,
BinomialLocationScaleWiggleTermBuilder {
y: spec.y,
weights: spec.weights,
link_kind: spec.link_kind,
meanspec: spec.thresholdspec,
noisespec: spec.log_sigmaspec,
mean_offset: spec.threshold_offset,
noise_offset: spec.log_sigma_offset,
wiggle_knots: spec.wiggle_knots,
wiggle_degree: spec.wiggle_degree,
wiggle_block: spec.wiggle_block,
},
options,
kappa_options,
)
}
pub(crate) fn select_binomial_location_scale_link_wiggle_basis_from_pilot(
pilot: &BlockwiseTermFitResult,
wiggle_cfg: &WiggleBlockConfig,
wiggle_penalty_orders: &[usize],
) -> Result<SelectedWiggleBasis, String> {
let eta_t = pilot
.fit
.block_states
.first()
.ok_or_else(|| "pilot fit is missing threshold block".to_string())?
.eta
.view();
let eta_ls = pilot
.fit
.block_states
.get(1)
.ok_or_else(|| "pilot fit is missing log_sigma block".to_string())?
.eta
.view();
let sigma = eta_ls.mapv(safe_exp);
let q_seed = Array1::from_iter(eta_t.iter().zip(sigma.iter()).map(|(&t, &s)| -t / s));
select_wiggle_basis_from_seed(q_seed.view(), wiggle_cfg, wiggle_penalty_orders)
}
pub(crate) fn fit_binomial_location_scale_terms_with_selected_wiggle(
data: ndarray::ArrayView2<'_, f64>,
spec: BinomialLocationScaleTermSpec,
selected_wiggle_basis: SelectedWiggleBasis,
options: &BlockwiseFitOptions,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> Result<BlockwiseTermWiggleFitResult, String> {
let SelectedWiggleBasis {
knots: wiggle_knots,
degree: wiggle_degree,
block: wiggle_block,
..
} = selected_wiggle_basis;
let solved = fit_binomial_location_scalewiggle_terms(
data,
BinomialLocationScaleWiggleTermSpec {
y: spec.y,
weights: spec.weights,
link_kind: spec.link_kind,
thresholdspec: spec.thresholdspec,
log_sigmaspec: spec.log_sigmaspec,
threshold_offset: spec.threshold_offset,
log_sigma_offset: spec.log_sigma_offset,
wiggle_knots: wiggle_knots.clone(),
wiggle_degree,
wiggle_block,
},
options,
kappa_options,
)?;
BlockwiseTermWiggleFitResult::try_from_parts(BlockwiseTermWiggleFitResultParts {
fit: solved,
wiggle_knots,
wiggle_degree,
})
}
pub(crate) fn select_binomial_mean_link_wiggle_basis_from_pilot(
pilot_design: &TermCollectionDesign,
pilot_fit: &UnifiedFitResult,
wiggle_cfg: &WiggleBlockConfig,
wiggle_penalty_orders: &[usize],
) -> Result<SelectedWiggleBasis, String> {
let q_seed = pilot_design.design.dot(&pilot_fit.beta);
select_wiggle_basis_from_seed(q_seed.view(), wiggle_cfg, wiggle_penalty_orders)
}
pub(crate) fn fit_binomial_mean_wiggle_terms_with_selected_basis(
data: ndarray::ArrayView2<'_, f64>,
pilot_spec: &TermCollectionSpec,
pilot_design: &TermCollectionDesign,
pilot_fit: &UnifiedFitResult,
y: &Array1<f64>,
weights: &Array1<f64>,
link_kind: InverseLink,
selected_wiggle_basis: SelectedWiggleBasis,
options: &BlockwiseFitOptions,
kappa_options: &SpatialLengthScaleOptimizationOptions,
) -> Result<BinomialMeanWiggleTermFitResult, String> {
const RHO_BOUND: f64 = 12.0;
validate_term_weights(
data,
y.len(),
weights,
"fit_binomial_mean_wiggle_terms_with_selected_basis",
)?;
validate_binomial_response(y, "fit_binomial_mean_wiggle_terms_with_selected_basis")?;
let SelectedWiggleBasis {
knots: wiggle_knots,
degree: wiggle_degree,
block: wiggle_block,
..
} = selected_wiggle_basis;
let spatial_terms = spatial_length_scale_term_indices(pilot_spec);
if spatial_terms.is_empty() {
let fit = fit_binomial_mean_wiggle(
BinomialMeanWiggleSpec {
y: y.clone(),
weights: weights.clone(),
link_kind,
wiggle_knots: wiggle_knots.clone(),
wiggle_degree,
eta_block: ParameterBlockInput {
design: pilot_design.design.clone(),
offset: Array1::zeros(y.len()),
penalties: pilot_design
.penalties
.iter()
.map(crate::solver::estimate::PenaltySpec::from_blockwise_ref)
.collect(),
nullspace_dims: vec![],
initial_log_lambdas: Some(
pilot_fit
.lambdas
.mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln()),
),
initial_beta: Some(pilot_fit.beta.clone()),
},
wiggle_block,
},
options,
)?;
return Ok(BinomialMeanWiggleTermFitResult {
fit,
resolvedspec: pilot_spec.clone(),
design: pilot_design.clone(),
wiggle_knots,
wiggle_degree,
});
}
let dims_per_term = spatial_dims_per_term(pilot_spec, &spatial_terms);
let log_kappa0 =
SpatialLogKappaCoords::from_length_scales_aniso(pilot_spec, &spatial_terms, kappa_options)
.reseed_from_data(data, pilot_spec, &spatial_terms, kappa_options);
let log_kappa_lower = SpatialLogKappaCoords::lower_bounds_aniso_from_data(
data,
pilot_spec,
&spatial_terms,
&dims_per_term,
kappa_options,
);
let log_kappa_upper = SpatialLogKappaCoords::upper_bounds_aniso_from_data(
data,
pilot_spec,
&spatial_terms,
&dims_per_term,
kappa_options,
);
let log_kappa0 = log_kappa0.clamp_to_bounds(&log_kappa_lower, &log_kappa_upper);
let eta_penalty_count = pilot_design.penalties.len();
let wiggle_penalty_count = initial_log_lambdas_orzeros(&wiggle_block)?.len();
let rho_dim = eta_penalty_count + wiggle_penalty_count;
let baseline_resolvedspec = log_kappa0
.apply_tospec(pilot_spec, &spatial_terms)
.map_err(|e| e.to_string())?;
let baseline_design =
build_term_collection_design(data, &baseline_resolvedspec).map_err(|e| e.to_string())?;
let baseline_fit = fit_binomial_mean_wiggle(
BinomialMeanWiggleSpec {
y: y.clone(),
weights: weights.clone(),
link_kind: link_kind.clone(),
wiggle_knots: wiggle_knots.clone(),
wiggle_degree,
eta_block: ParameterBlockInput {
design: baseline_design.design.clone(),
offset: Array1::zeros(y.len()),
penalties: baseline_design
.penalties
.iter()
.map(crate::solver::estimate::PenaltySpec::from_blockwise_ref)
.collect(),
nullspace_dims: vec![],
initial_log_lambdas: Some(
pilot_fit
.lambdas
.mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln()),
),
initial_beta: Some(pilot_fit.beta.clone()),
},
wiggle_block: wiggle_block.clone(),
},
options,
)?;
let baseline_log_lambdas = baseline_fit
.lambdas
.mapv(|v| v.max(WARMSTART_LOG_LAMBDA_FLOOR).ln());
if baseline_log_lambdas.len() != rho_dim {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"baseline binomial mean-wiggle fit returned {} log-lambdas, expected {rho_dim}",
baseline_log_lambdas.len()
),
}
.into());
}
let baseline_eta_beta = baseline_fit
.block_states
.get(BinomialMeanWiggleFamily::BLOCK_ETA)
.ok_or_else(|| "baseline binomial mean-wiggle fit missing eta block".to_string())?
.beta
.clone();
let baseline_wiggle_beta = Some(
baseline_fit
.block_states
.get(BinomialMeanWiggleFamily::BLOCK_WIGGLE)
.ok_or_else(|| "baseline binomial mean-wiggle fit missing wiggle block".to_string())?
.beta
.clone(),
);
let theta_dim = rho_dim + log_kappa0.len();
let mut theta0 = Array1::<f64>::zeros(theta_dim);
theta0
.slice_mut(s![0..rho_dim])
.assign(&baseline_log_lambdas);
theta0
.slice_mut(s![rho_dim..theta_dim])
.assign(log_kappa0.as_array());
let mut lower = Array1::<f64>::from_elem(theta_dim, -RHO_BOUND);
let mut upper = Array1::<f64>::from_elem(theta_dim, RHO_BOUND);
lower
.slice_mut(s![rho_dim..theta_dim])
.assign(log_kappa_lower.as_array());
upper
.slice_mut(s![rho_dim..theta_dim])
.assign(log_kappa_upper.as_array());
let pilot_spec_cloned = pilot_spec.clone();
let pilot_beta = baseline_eta_beta;
let wiggle_design = wiggle_block.design.clone();
let wiggle_offset = wiggle_block.offset.clone();
let wiggle_penalties = wiggle_block.penalties.clone();
let wiggle_initial_beta = baseline_wiggle_beta;
let wiggle_knots_cloned = wiggle_knots.clone();
let y_cloned = y.clone();
let weights_cloned = weights.clone();
let link_kind_cloned = link_kind.clone();
let outer_family = BinomialMeanWiggleFamily {
y: y_cloned.clone(),
weights: weights_cloned.clone(),
link_kind: link_kind_cloned.clone(),
wiggle_knots: wiggle_knots_cloned.clone(),
wiggle_degree,
policy: crate::resource::ResourcePolicy::default_library(),
};
let screening_cap = Arc::new(AtomicUsize::new(0));
let mut outer_options = options.clone();
outer_options.screening_max_inner_iterations = Some(Arc::clone(&screening_cap));
struct MeanWiggleOuterState {
warm_cache: Option<crate::custom_family::CustomFamilyWarmStart>,
last_eval: Option<(
Array1<f64>,
f64,
Array1<f64>,
crate::solver::outer_strategy::HessianResult,
crate::custom_family::CustomFamilyWarmStart,
)>,
}
let build_realized_blocks = |theta: &Array1<f64>| -> Result<
(
TermCollectionSpec,
TermCollectionDesign,
Vec<ParameterBlockSpec>,
Vec<CustomFamilyBlockPsiDerivative>,
),
String,
> {
let log_kappa =
SpatialLogKappaCoords::from_theta_tail_with_dims(theta, rho_dim, dims_per_term.clone());
let resolvedspec = log_kappa
.apply_tospec(&pilot_spec_cloned, &spatial_terms)
.map_err(|e| e.to_string())?;
let design =
build_term_collection_design(data, &resolvedspec).map_err(|e| e.to_string())?;
let eta_derivs = build_block_spatial_psi_derivatives(data, &resolvedspec, &design)?
.ok_or_else(|| {
"missing eta spatial psi derivatives for binomial mean wiggle".to_string()
})?;
let blocks = vec![
ParameterBlockSpec {
name: "eta".to_string(),
design: design.design.clone(),
offset: Array1::zeros(y_cloned.len()),
penalties: design.penalties_as_penalty_matrix(),
nullspace_dims: vec![],
initial_log_lambdas: theta.slice(s![0..eta_penalty_count]).to_owned(),
initial_beta: Some(pilot_beta.clone()),
gauge_priority: LINK_WIGGLE_GAUGE_PRIORITY,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "wiggle".to_string(),
design: wiggle_design.clone(),
offset: wiggle_offset.clone(),
penalties: {
let p_wiggle = wiggle_design.ncols();
wiggle_penalties
.iter()
.map(|spec| match spec {
crate::solver::estimate::PenaltySpec::Block {
local,
col_range,
..
} => PenaltyMatrix::Blockwise {
local: local.clone(),
col_range: col_range.clone(),
total_dim: p_wiggle,
},
crate::solver::estimate::PenaltySpec::Dense(m)
| crate::solver::estimate::PenaltySpec::DenseWithMean {
matrix: m,
..
} => PenaltyMatrix::Dense(m.clone()),
})
.collect()
},
nullspace_dims: vec![],
initial_log_lambdas: theta.slice(s![eta_penalty_count..rho_dim]).to_owned(),
initial_beta: wiggle_initial_beta.clone(),
gauge_priority: DEFAULT_GAUGE_PRIORITY,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
Ok((resolvedspec, design, blocks, eta_derivs))
};
let build_eval = |theta: &Array1<f64>,
warm_cache: Option<&crate::custom_family::CustomFamilyWarmStart>,
need_hessian: bool|
-> Result<
(
crate::custom_family::CustomFamilyJointHyperResult,
TermCollectionSpec,
TermCollectionDesign,
),
String,
> {
let (resolvedspec, design, blocks, eta_derivs) = build_realized_blocks(theta)?;
let eval = evaluate_custom_family_joint_hyper(
&outer_family,
&blocks,
&outer_options,
&theta.slice(s![0..rho_dim]).to_owned(),
&[eta_derivs, Vec::new()],
warm_cache,
if need_hessian {
crate::solver::estimate::reml::unified::EvalMode::ValueGradientHessian
} else {
crate::solver::estimate::reml::unified::EvalMode::ValueAndGradient
},
)?;
Ok((eval, resolvedspec, design))
};
let build_efs = |theta: &Array1<f64>,
warm_cache: Option<&crate::custom_family::CustomFamilyWarmStart>|
-> Result<crate::custom_family::CustomFamilyJointHyperEfsResult, String> {
let (_, _, blocks, eta_derivs) = build_realized_blocks(theta)?;
evaluate_custom_family_joint_hyper_efs(
&outer_family,
&blocks,
&outer_options,
&theta.slice(s![0..rho_dim]).to_owned(),
&[eta_derivs, Vec::new()],
warm_cache,
)
.map_err(|e| e.to_string())
};
use crate::estimate::EstimationError;
use crate::solver::outer_strategy::{
DeclaredHessianForm, Derivative, OuterEval, OuterEvalOrder,
};
let analytic_outer_hessian_available = true;
let mut seed_heuristic = theta0.to_vec();
for value in &mut seed_heuristic[..rho_dim] {
*value = value.exp();
}
let problem = crate::solver::outer_strategy::OuterProblem::new(theta_dim)
.with_gradient(Derivative::Analytic)
.with_hessian(if analytic_outer_hessian_available {
DeclaredHessianForm::Either
} else {
DeclaredHessianForm::Unavailable
})
.with_psi_dim(theta_dim - rho_dim)
.with_tolerance(options.outer_tol)
.with_max_iter(options.outer_max_iter)
.with_bounds(lower.clone(), upper.clone())
.with_initial_rho(theta0.clone())
.with_seed_config(crate::seeding::SeedConfig {
max_seeds: 4,
seed_budget: 2,
risk_profile: crate::seeding::SeedRiskProfile::GeneralizedLinear,
num_auxiliary_trailing: theta_dim - rho_dim,
..Default::default()
})
.with_screening_cap(Arc::clone(&screening_cap))
.with_rho_bound(12.0)
.with_heuristic_lambdas(seed_heuristic);
let eval_outer = |state: &mut MeanWiggleOuterState,
theta: &Array1<f64>,
order: OuterEvalOrder|
-> Result<OuterEval, EstimationError> {
if let Some((cached_theta, cached_cost, cached_grad, cached_hess, cached_warm)) =
&state.last_eval
&& cached_theta == theta
&& (!matches!(order, OuterEvalOrder::ValueGradientHessian)
|| matches!(
cached_hess,
crate::solver::outer_strategy::HessianResult::Analytic(_)
| crate::solver::outer_strategy::HessianResult::Operator(_)
))
{
state.warm_cache = Some(cached_warm.clone());
return Ok(OuterEval {
cost: *cached_cost,
gradient: cached_grad.clone(),
hessian: cached_hess.clone(),
inner_beta_hint: None,
});
}
let need_hessian = matches!(order, OuterEvalOrder::ValueGradientHessian)
&& analytic_outer_hessian_available;
let (eval, _, _) = build_eval(theta, state.warm_cache.as_ref(), need_hessian)
.map_err(EstimationError::InvalidInput)?;
if !eval.inner_converged {
state.warm_cache = Some(eval.warm_start);
crate::bail_invalid_estim!(
"binomial mean-wiggle exact spatial inner solve did not converge"
);
}
let hessian_result = eval.outer_hessian.clone();
state.last_eval = Some((
theta.clone(),
eval.objective,
eval.gradient.clone(),
eval.outer_hessian.clone(),
eval.warm_start.clone(),
));
state.warm_cache = Some(eval.warm_start);
Ok(OuterEval {
cost: eval.objective,
gradient: eval.gradient,
hessian: hessian_result,
inner_beta_hint: None,
})
};
let mut obj = problem.build_objective_with_eval_order(
MeanWiggleOuterState {
warm_cache: None,
last_eval: None,
},
|state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
if let Some((cached_theta, cached_cost, _, _, cached_warm)) = &state.last_eval
&& cached_theta == theta
{
state.warm_cache = Some(cached_warm.clone());
return Ok(*cached_cost);
}
let (eval, _, _) = build_eval(theta, state.warm_cache.as_ref(), false)
.map_err(EstimationError::InvalidInput)?;
if !eval.inner_converged {
state.warm_cache = Some(eval.warm_start);
crate::bail_invalid_estim!(
"binomial mean-wiggle exact spatial cost inner solve did not converge"
.to_string(),
);
}
state.warm_cache = Some(eval.warm_start);
Ok(eval.objective)
},
|state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
eval_outer(
state,
theta,
if analytic_outer_hessian_available {
OuterEvalOrder::ValueGradientHessian
} else {
OuterEvalOrder::ValueAndGradient
},
)
},
|state: &mut MeanWiggleOuterState, theta: &Array1<f64>, order: OuterEvalOrder| {
eval_outer(state, theta, order)
},
Some(|state: &mut MeanWiggleOuterState| {
state.warm_cache = None;
state.last_eval = None;
}),
Some(|state: &mut MeanWiggleOuterState, theta: &Array1<f64>| {
let eval = build_efs(theta, state.warm_cache.as_ref())
.map_err(EstimationError::InvalidInput)?;
if !eval.inner_converged {
state.warm_cache = Some(eval.warm_start);
crate::bail_invalid_estim!(
"binomial mean-wiggle exact spatial EFS inner solve did not converge"
.to_string(),
);
}
state.warm_cache = Some(eval.warm_start);
Ok(eval.efs_eval)
}),
);
let outer = problem
.run(&mut obj, "binomial mean wiggle exact spatial hyper")
.map_err(|e| e.to_string())?;
if !outer.converged {
return Err(GamlssError::NumericalFailure { reason: format!(
"binomial mean wiggle exact spatial hyper did not converge after {} iterations (final_objective={:.6e}, final_grad_norm={})",
outer.iterations,
outer.final_value,
outer.final_grad_norm_report(),
) }.into());
}
let theta_star = outer.rho;
let log_kappa =
SpatialLogKappaCoords::from_theta_tail_with_dims(&theta_star, rho_dim, dims_per_term);
let resolvedspec = log_kappa
.apply_tospec(&pilot_spec_cloned, &spatial_terms)
.map_err(|e| e.to_string())?;
let design = build_term_collection_design(data, &resolvedspec).map_err(|e| e.to_string())?;
let resolvedspec =
freeze_term_collection_from_design(&resolvedspec, &design).map_err(|e| e.to_string())?;
let fit = fit_binomial_mean_wiggle(
BinomialMeanWiggleSpec {
y: y_cloned,
weights: weights_cloned,
link_kind: link_kind_cloned,
wiggle_knots: wiggle_knots.clone(),
wiggle_degree,
eta_block: ParameterBlockInput {
design: design.design.clone(),
offset: Array1::zeros(y.len()),
penalties: design
.penalties
.iter()
.map(crate::solver::estimate::PenaltySpec::from_blockwise_ref)
.collect(),
nullspace_dims: vec![],
initial_log_lambdas: Some(theta_star.slice(s![0..eta_penalty_count]).to_owned()),
initial_beta: Some(pilot_beta),
},
wiggle_block: ParameterBlockInput {
design: wiggle_design,
offset: wiggle_offset,
penalties: wiggle_penalties,
nullspace_dims: vec![],
initial_log_lambdas: Some(
theta_star.slice(s![eta_penalty_count..rho_dim]).to_owned(),
),
initial_beta: wiggle_initial_beta,
},
},
options,
)?;
Ok(BinomialMeanWiggleTermFitResult {
fit,
resolvedspec,
design,
wiggle_knots,
wiggle_degree,
})
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ParameterLink {
Identity,
Log,
Logit,
Probit,
InverseLink,
Wiggle,
}
struct BinomialLocationScaleCore {
sigma: Array1<f64>,
dsigma_deta: Array1<f64>,
q0: Array1<f64>,
mu: Array1<f64>,
dmu_dq: Array1<f64>,
d2mu_dq2: Array1<f64>,
d3mu_dq3: Array1<f64>,
log_likelihood: f64,
}
#[derive(Clone, Copy)]
struct NonWiggleQDerivs {
q_t: f64,
q_ls: f64,
q_tl: f64,
q_ll: f64,
q_tl_ls: f64,
q_ll_ls: f64,
}
#[derive(Clone, Copy)]
struct NonWiggleQDirectional {
delta_q: f64,
delta_q_t: f64,
delta_q_ls: f64,
delta_q_tl: f64,
delta_q_ll: f64,
}
#[derive(Clone, Copy)]
struct BinomialLocationScaleRow {
sigma: f64,
dsigma_deta: f64,
q0: f64,
inverse_link: crate::mixture_link::InverseLinkJet,
ll: f64,
}
#[inline]
fn hessian_coeff_fromobjective_q_terms(m1: f64, m2: f64, q_a: f64, q_b: f64, q_ab: f64) -> f64 {
m2 * q_a * q_b + m1 * q_ab
}
#[inline]
fn directionalhessian_coeff_fromobjective_q_terms(
m1: f64,
m2: f64,
m3: f64,
dq: f64,
q_a: f64,
q_b: f64,
q_ab: f64,
dq_a: f64,
dq_b: f64,
dq_ab: f64,
) -> f64 {
m3 * dq * q_a * q_b + m2 * (dq_a * q_b + q_a * dq_b + dq * q_ab) + m1 * dq_ab
}
#[inline]
fn second_directionalhessian_coeff_fromobjective_q_terms(
m1: f64,
m2: f64,
m3: f64,
m4: f64,
dq_u: f64,
dqv: f64,
d2q_uv: f64,
q_a: f64,
q_b: f64,
q_ab: f64,
dq_a_u: f64,
dq_av: f64,
dq_b_u: f64,
dq_bv: f64,
d2q_a_uv: f64,
d2q_b_uv: f64,
dq_ab_u: f64,
dq_abv: f64,
d2q_ab_uv: f64,
) -> f64 {
let d_qaqb_u = dq_a_u * q_b + q_a * dq_b_u;
let d_qaqbv = dq_av * q_b + q_a * dq_bv;
let d2_qaqb_uv = d2q_a_uv * q_b + dq_a_u * dq_bv + dq_av * dq_b_u + q_a * d2q_b_uv;
m4 * dq_u * dqv * q_a * q_b
+ m3 * (d2q_uv * q_a * q_b + dq_u * d_qaqbv + dqv * d_qaqb_u + dq_u * dqv * q_ab)
+ m2 * (d2_qaqb_uv + d2q_uv * q_ab + dq_u * dq_abv + dqv * dq_ab_u)
+ m1 * d2q_ab_uv
}
fn nonwiggle_q_derivs(eta_t: f64, sigma: f64) -> NonWiggleQDerivs {
let inv_sigma = sigma.recip();
let q_t = -inv_sigma;
let q_ls = eta_t * inv_sigma;
let q_tl = inv_sigma;
let q_ll = -eta_t * inv_sigma;
let q_tl_ls = -inv_sigma;
let q_ll_ls = eta_t * inv_sigma;
NonWiggleQDerivs {
q_t,
q_ls,
q_tl,
q_ll,
q_tl_ls,
q_ll_ls,
}
}
fn nonwiggle_q_directional(
q: NonWiggleQDerivs,
d_eta_t: f64,
d_eta_ls: f64,
) -> NonWiggleQDirectional {
NonWiggleQDirectional {
delta_q: q.q_t * d_eta_t + q.q_ls * d_eta_ls,
delta_q_t: q.q_tl * d_eta_ls,
delta_q_ls: q.q_tl * d_eta_t + q.q_ll * d_eta_ls,
delta_q_tl: q.q_tl_ls * d_eta_ls,
delta_q_ll: q.q_tl_ls * d_eta_t + q.q_ll_ls * d_eta_ls,
}
}
#[inline]
fn log1mexp_neg_positive(z: f64) -> f64 {
assert!(z >= 0.0);
if z == 0.0 {
f64::NEG_INFINITY
} else if z <= std::f64::consts::LN_2 {
(-(-z).exp_m1()).ln()
} else {
(1.0 - (-z).exp()).ln()
}
}
#[inline]
fn bernoulli_log_likelihood_from_probability(y: f64, weight: f64, mu: f64) -> Result<f64, String> {
if weight == 0.0 {
return Ok(0.0);
}
if !mu.is_finite() || !(0.0..=1.0).contains(&mu) {
return Err(GamlssError::NumericalFailure {
reason: format!(
"binomial location-scale inverse link returned invalid probability {mu}"
),
}
.into());
}
let log_mu = if mu == 0.0 {
if y == 0.0 { 0.0 } else { f64::NEG_INFINITY }
} else {
mu.ln()
};
let log_one_minus = if mu == 1.0 {
if y == 1.0 { 0.0 } else { f64::NEG_INFINITY }
} else {
(1.0 - mu).ln()
};
let ll = weight * (y * log_mu + (1.0 - y) * log_one_minus);
if ll.is_finite() {
Ok(ll)
} else {
Err(GamlssError::NonFinite {
reason: format!(
"binomial location-scale log likelihood is non-finite at y={y}, mu={mu}"
),
}
.into())
}
}
#[inline]
fn binomial_location_scale_q0(eta_t: f64, sigma: f64) -> f64 {
-eta_t / sigma
}
#[inline]
fn binomial_location_scale_log_likelihood(
y: f64,
weight: f64,
q: f64,
link_kind: &InverseLink,
mu: f64,
) -> Result<f64, String> {
if weight == 0.0 {
return Ok(0.0);
}
match link_kind {
InverseLink::Standard(StandardLink::Probit) => {
Ok(weight * (y * normal_logcdf(q) + (1.0_f64 - y) * normal_logsf(q)))
}
InverseLink::Standard(StandardLink::Logit) => Ok(weight
* (-y * crate::linalg::utils::stable_softplus(-q)
- (1.0_f64 - y) * crate::linalg::utils::stable_softplus(q))),
InverseLink::Standard(StandardLink::CLogLog) => {
let z = q.exp();
let log_p = if z == 0.0 {
q
} else if z.is_infinite() {
0.0
} else {
log1mexp_neg_positive(z)
};
let log_survival = -z;
let ll = weight * (y * log_p + (1.0_f64 - y) * log_survival);
if ll.is_finite() {
Ok(ll)
} else {
Err(GamlssError::NonFinite { reason: format!(
"binomial cloglog location-scale log likelihood is non-finite at y={y}, q={q}"
) }.into())
}
}
_ => bernoulli_log_likelihood_from_probability(y, weight, mu),
}
}
fn binomial_location_scalerow(
y: f64,
weight: f64,
eta_t: f64,
eta_ls: f64,
etawiggle: f64,
link_kind: &InverseLink,
) -> Result<BinomialLocationScaleRow, String> {
let SigmaJet1 {
sigma,
d1: dsigma_deta,
} = exp_sigma_jet1_scalar(eta_ls);
let q0 = binomial_location_scale_q0(eta_t, sigma);
let q = q0 + etawiggle;
let jet = inverse_link_jet_for_inverse_link(link_kind, q)
.map_err(|e| format!("location-scale inverse-link evaluation failed: {e}"))?;
let raw_mu = jet.mu;
let inverse_link = jet;
let ll = binomial_location_scale_log_likelihood(y, weight, q, link_kind, raw_mu)?;
Ok(BinomialLocationScaleRow {
sigma,
dsigma_deta,
q0,
inverse_link,
ll,
})
}
fn binomial_location_scale_ll_only(
y: &Array1<f64>,
weights: &Array1<f64>,
eta_t: &Array1<f64>,
eta_ls: &Array1<f64>,
etawiggle: Option<&Array1<f64>>,
link_kind: &InverseLink,
) -> Result<f64, String> {
let n = y.len();
let y_slice = y.as_slice().expect("y must be contiguous");
let w_slice = weights.as_slice().expect("weights must be contiguous");
let et_slice = eta_t.as_slice().expect("eta_t must be contiguous");
let el_slice = eta_ls.as_slice().expect("eta_ls must be contiguous");
let ew_slice = etawiggle.map(|w| w.as_slice().expect("etawiggle must be contiguous"));
(0..n)
.into_par_iter()
.try_fold(
|| 0.0_f64,
|acc, i| -> Result<f64, String> {
let SigmaJet1 { sigma, .. } = exp_sigma_jet1_scalar(el_slice[i]);
let q0 = binomial_location_scale_q0(et_slice[i], sigma);
let q = q0 + ew_slice.map_or(0.0, |w| w[i]);
if matches!(link_kind, InverseLink::Standard(StandardLink::Probit)) {
return Ok(acc
+ binomial_location_scale_log_likelihood(
y_slice[i], w_slice[i], q, link_kind, 0.5,
)?);
}
let jet = inverse_link_jet_for_inverse_link(link_kind, q)
.map_err(|e| format!("location-scale inverse-link evaluation failed: {e}"))?;
Ok(acc
+ binomial_location_scale_log_likelihood(
y_slice[i], w_slice[i], q, link_kind, jet.mu,
)?)
},
)
.try_reduce(|| 0.0_f64, |a, b| Ok(a + b))
}
fn binomial_location_scale_core(
y: &Array1<f64>,
weights: &Array1<f64>,
eta_t: &Array1<f64>,
eta_ls: &Array1<f64>,
etawiggle: Option<&Array1<f64>>,
link_kind: &InverseLink,
) -> Result<BinomialLocationScaleCore, String> {
let n = y.len();
if weights.len() != n || eta_t.len() != n || eta_ls.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "binomial location-scale core size mismatch".to_string(),
}
.into());
}
if let Some(w) = etawiggle
&& w.len() != n
{
return Err(GamlssError::DimensionMismatch {
reason: "binomial location-scale core wiggle size mismatch".to_string(),
}
.into());
}
let y_slice = y.as_slice().expect("y must be contiguous");
let w_slice = weights.as_slice().expect("weights must be contiguous");
let et_slice = eta_t.as_slice().expect("eta_t must be contiguous");
let el_slice = eta_ls.as_slice().expect("eta_ls must be contiguous");
let ew_slice = etawiggle.map(|w| w.as_slice().expect("etawiggle must be contiguous"));
let mut sigma = vec![0.0_f64; n];
let mut dsigma_deta = vec![0.0_f64; n];
let mut q0 = vec![0.0_f64; n];
let mut mu = vec![0.0_f64; n];
let mut dmu_dq = vec![0.0_f64; n];
let mut d2mu_dq2 = vec![0.0_f64; n];
let mut d3mu_dq3 = vec![0.0_f64; n];
#[derive(Clone, Copy)]
struct SendPtr(*mut f64);
unsafe impl Send for SendPtr {}
unsafe impl Sync for SendPtr {}
impl SendPtr {
#[inline(always)]
unsafe fn write(self, i: usize, v: f64) {
unsafe { *self.0.add(i) = v };
}
}
let sigma_p = SendPtr(sigma.as_mut_ptr());
let dsigma_p = SendPtr(dsigma_deta.as_mut_ptr());
let q0_p = SendPtr(q0.as_mut_ptr());
let mu_p = SendPtr(mu.as_mut_ptr());
let dmu_p = SendPtr(dmu_dq.as_mut_ptr());
let d2mu_p = SendPtr(d2mu_dq2.as_mut_ptr());
let d3mu_p = SendPtr(d3mu_dq3.as_mut_ptr());
let ll = (0..n)
.into_par_iter()
.map(move |i| {
let row = binomial_location_scalerow(
y_slice[i],
w_slice[i],
et_slice[i],
el_slice[i],
ew_slice.map_or(0.0, |w| w[i]),
link_kind,
)?;
unsafe {
sigma_p.write(i, row.sigma);
dsigma_p.write(i, row.dsigma_deta);
q0_p.write(i, row.q0);
mu_p.write(i, row.inverse_link.mu);
dmu_p.write(i, row.inverse_link.d1);
d2mu_p.write(i, row.inverse_link.d2);
d3mu_p.write(i, row.inverse_link.d3);
}
Ok::<f64, String>(row.ll)
})
.try_reduce(|| 0.0_f64, |a, b| Ok(a + b))?;
Ok(BinomialLocationScaleCore {
sigma: Array1::from_vec(sigma),
dsigma_deta: Array1::from_vec(dsigma_deta),
q0: Array1::from_vec(q0),
mu: Array1::from_vec(mu),
dmu_dq: Array1::from_vec(dmu_dq),
d2mu_dq2: Array1::from_vec(d2mu_dq2),
d3mu_dq3: Array1::from_vec(d3mu_dq3),
log_likelihood: ll,
})
}
fn binomial_location_scale_first_directional_coefficients(
y: &Array1<f64>,
weights: &Array1<f64>,
core: &BinomialLocationScaleCore,
d_eta_t: &Array1<f64>,
d_eta_ls: &Array1<f64>,
link_kind: &InverseLink,
) -> (Array1<f64>, Array1<f64>, Array1<f64>) {
let n = y.len();
let mut c_tt_v = vec![0.0_f64; n];
let mut c_tl_v = vec![0.0_f64; n];
let mut c_ll_v = vec![0.0_f64; n];
let y_slice = y.as_slice().expect("y must be contiguous");
let w_slice = weights.as_slice().expect("weights must be contiguous");
let q0_slice = core.q0.as_slice().expect("q0 must be contiguous");
let sigma_slice = core.sigma.as_slice().expect("sigma must be contiguous");
let dsigma_slice = core
.dsigma_deta
.as_slice()
.expect("dsigma_deta must be contiguous");
let mu_slice = core.mu.as_slice().expect("mu must be contiguous");
let dmu_slice = core.dmu_dq.as_slice().expect("dmu_dq must be contiguous");
let d2mu_slice = core
.d2mu_dq2
.as_slice()
.expect("d2mu_dq2 must be contiguous");
let d3mu_slice = core
.d3mu_dq3
.as_slice()
.expect("d3mu_dq3 must be contiguous");
let det_slice = d_eta_t.as_slice().expect("d_eta_t must be contiguous");
let del_slice = d_eta_ls.as_slice().expect("d_eta_ls must be contiguous");
c_tt_v
.par_iter_mut()
.zip(c_tl_v.par_iter_mut())
.zip(c_ll_v.par_iter_mut())
.enumerate()
.for_each(|(i, ((c_tt, c_tl), c_ll))| {
let q = q0_slice[i];
let r = 1.0 / sigma_slice[i];
let s = dsigma_slice[i] / sigma_slice[i];
let (m1, m2, m3) = binomial_neglog_q_derivatives_dispatch(
y_slice[i],
w_slice[i],
q,
mu_slice[i],
dmu_slice[i],
d2mu_slice[i],
d3mu_slice[i],
link_kind,
);
let a = det_slice[i];
let b = del_slice[i];
let sb = s * b;
let du = -r * a - q * sb;
*c_tt = r * r * (m3 * du - 2.0 * m2 * sb);
*c_tl = s * r * (q * m3 * du + m2 * (2.0 * du - q * sb) - m1 * sb);
*c_ll = s * s * (m1 + 3.0 * q * m2 + q * q * m3) * du;
});
(
Array1::from_vec(c_tt_v),
Array1::from_vec(c_tl_v),
Array1::from_vec(c_ll_v),
)
}
fn binomial_location_scalesecond_directional_coefficients(
y: &Array1<f64>,
weights: &Array1<f64>,
core: &BinomialLocationScaleCore,
d_eta_t_u: &Array1<f64>,
d_eta_ls_u: &Array1<f64>,
d_eta_t_v: &Array1<f64>,
d_eta_ls_v: &Array1<f64>,
link_kind: &InverseLink,
) -> Result<(Array1<f64>, Array1<f64>, Array1<f64>), String> {
use rayon::iter::{IntoParallelIterator, ParallelIterator};
let n = y.len();
let triples: Result<Vec<(f64, f64, f64)>, String> = (0..n)
.into_par_iter()
.map(|i| -> Result<(f64, f64, f64), String> {
let q = core.q0[i];
let r = 1.0 / core.sigma[i];
let (m1, m2, m3) = binomial_neglog_q_derivatives_dispatch(
y[i],
weights[i],
q,
core.mu[i],
core.dmu_dq[i],
core.d2mu_dq2[i],
core.d3mu_dq3[i],
link_kind,
);
let m4 = binomial_neglog_q_fourth_derivative_dispatch(
y[i],
weights[i],
q,
core.mu[i],
core.dmu_dq[i],
core.d2mu_dq2[i],
core.d3mu_dq3[i],
link_kind,
)?;
let s = core.dsigma_deta[i] / core.sigma[i];
let a = d_eta_t_u[i];
let b = s * d_eta_ls_u[i];
let c = d_eta_t_v[i];
let d = s * d_eta_ls_v[i];
let du = -r * a - q * b;
let dv = -r * c - q * d;
let d2 = r * (a * d + b * c) + q * b * d;
let tt =
r * r * (m4 * du * dv + m3 * (d2 - 2.0 * d * du - 2.0 * b * dv) + 4.0 * m2 * b * d);
let tl = s
* r
* (q * m4 * du * dv
+ m3 * (q * d2 + 3.0 * du * dv - q * (d * du + b * dv))
+ m2 * (q * b * d + 2.0 * d2 - 2.0 * (d * du + b * dv))
+ m1 * b * d);
let ll = s
* s
* (q * q * m4 * du * dv
+ m3 * (q * q * d2 + 5.0 * q * du * dv)
+ m2 * (3.0 * q * d2 + 4.0 * du * dv)
+ m1 * d2);
Ok((tt, tl, ll))
})
.collect();
let triples = triples?;
let mut coeff_tt = Array1::<f64>::zeros(n);
let mut coeff_tl = Array1::<f64>::zeros(n);
let mut coeff_ll = Array1::<f64>::zeros(n);
for (i, (tt, tl, ll)) in triples.into_iter().enumerate() {
coeff_tt[i] = tt;
coeff_tl[i] = tl;
coeff_ll[i] = ll;
}
Ok((coeff_tt, coeff_tl, coeff_ll))
}
pub struct GaussianLocationScaleFamily {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub mu_design: Option<DesignMatrix>,
pub log_sigma_design: Option<DesignMatrix>,
pub policy: crate::resource::ResourcePolicy,
pub cached_row_scalars:
std::sync::RwLock<Option<(f64, f64, f64, f64, f64, f64, Arc<GaussianJointRowScalars>)>>,
}
impl Clone for GaussianLocationScaleFamily {
fn clone(&self) -> Self {
Self {
y: self.y.clone(),
weights: self.weights.clone(),
mu_design: self.mu_design.clone(),
log_sigma_design: self.log_sigma_design.clone(),
policy: self.policy.clone(),
cached_row_scalars: std::sync::RwLock::new(
self.cached_row_scalars
.read()
.expect("lock poisoned")
.clone(),
),
}
}
}
struct LocationScaleJointPsiDirection {
block_idx: usize,
local_idx: usize,
x_primary_psi: PsiDesignMap,
x_ls_psi: PsiDesignMap,
z_primary_psi: Array1<f64>,
z_ls_psi: Array1<f64>,
}
struct LocationScaleJointPsiSecondDrifts {
x_primary_ab_action: Option<CustomFamilyPsiSecondDesignAction>,
x_ls_ab_action: Option<CustomFamilyPsiSecondDesignAction>,
x_primary_ab: Option<Array2<f64>>,
x_ls_ab: Option<Array2<f64>>,
z_primary_ab: Array1<f64>,
z_ls_ab: Array1<f64>,
}
trait LocationScaleJointPsiFamily: Clone + Send + Sync + 'static {
type Direction: Send + Sync + 'static;
const LABEL: &'static str;
fn ws_policy(&self) -> &crate::resource::ResourcePolicy;
fn ws_exact_joint_dense_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String>;
fn ws_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<Self::Direction>, String>;
fn ws_psi_second_order_terms_from_parts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &Self::Direction,
psi_b: &Self::Direction,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
subsample: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<ExactNewtonJointPsiSecondOrderTerms, String>;
fn ws_psi_hessian_directional_from_parts(
&self,
block_states: &[ParameterBlockState],
psi_dir: &Self::Direction,
d_beta_flat: &Array1<f64>,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
subsample: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<Array2<f64>, String>;
}
impl LocationScaleJointPsiFamily for GaussianLocationScaleFamily {
type Direction = LocationScaleJointPsiDirection;
const LABEL: &'static str = "GaussianLocationScaleFamily";
fn ws_policy(&self) -> &crate::resource::ResourcePolicy {
&self.policy
}
fn ws_exact_joint_dense_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
self.exact_joint_dense_block_designs(specs)
}
fn ws_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<LocationScaleJointPsiDirection>, String> {
self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
design_loc,
design_scale,
policy,
)
}
fn ws_psi_second_order_terms_from_parts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &LocationScaleJointPsiDirection,
psi_b: &LocationScaleJointPsiDirection,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
subsample: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<ExactNewtonJointPsiSecondOrderTerms, String> {
self.exact_newton_joint_psisecond_order_terms_from_parts(
block_states,
derivative_blocks,
psi_a,
psi_b,
design_loc,
design_scale,
subsample,
)
}
fn ws_psi_hessian_directional_from_parts(
&self,
block_states: &[ParameterBlockState],
psi_dir: &LocationScaleJointPsiDirection,
d_beta_flat: &Array1<f64>,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
subsample: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<Array2<f64>, String> {
self.exact_newton_joint_psihessian_directional_derivative_from_parts(
block_states,
psi_dir,
d_beta_flat,
design_loc,
design_scale,
subsample,
)
}
}
impl LocationScaleJointPsiFamily for GaussianLocationScaleWiggleFamily {
type Direction = LocationScaleJointPsiDirection;
const LABEL: &'static str = "GaussianLocationScaleWiggleFamily";
fn ws_policy(&self) -> &crate::resource::ResourcePolicy {
&self.policy
}
fn ws_exact_joint_dense_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
self.exact_joint_dense_block_designs(specs)
}
fn ws_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<LocationScaleJointPsiDirection>, String> {
self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
design_loc,
design_scale,
policy,
)
}
fn ws_psi_second_order_terms_from_parts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &LocationScaleJointPsiDirection,
psi_b: &LocationScaleJointPsiDirection,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
outer_rows: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<ExactNewtonJointPsiSecondOrderTerms, String> {
assert!(outer_rows.map_or(true, |r| r.len() <= isize::MAX as usize));
self.exact_newton_joint_psisecond_order_terms_from_parts(
block_states,
derivative_blocks,
psi_a,
psi_b,
design_loc,
design_scale,
)
}
fn ws_psi_hessian_directional_from_parts(
&self,
block_states: &[ParameterBlockState],
psi_dir: &LocationScaleJointPsiDirection,
d_beta_flat: &Array1<f64>,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
outer_rows: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<Array2<f64>, String> {
assert!(outer_rows.map_or(true, |r| r.len() <= isize::MAX as usize));
self.exact_newton_joint_psihessian_directional_derivative_from_parts(
block_states,
psi_dir,
d_beta_flat,
design_loc,
design_scale,
)
}
}
struct LocationScaleJointPsiWorkspace<F: LocationScaleJointPsiFamily> {
family: F,
block_states: Vec<ParameterBlockState>,
derivative_blocks: Vec<Vec<CustomFamilyBlockPsiDerivative>>,
design_loc: Arc<Array2<f64>>,
design_scale: Arc<Array2<f64>>,
psi_directions: ExactNewtonJointPsiDirectCache<F::Direction>,
outer_score_subsample: Option<Arc<crate::families::marginal_slope_shared::OuterScoreSubsample>>,
}
impl<F: LocationScaleJointPsiFamily> LocationScaleJointPsiWorkspace<F> {
fn new(
family: F,
block_states: Vec<ParameterBlockState>,
specs: &[ParameterBlockSpec],
derivative_blocks: Vec<Vec<CustomFamilyBlockPsiDerivative>>,
) -> Result<Self, String> {
Self::new_with_subsample(family, block_states, specs, derivative_blocks, None)
}
fn new_with_subsample(
family: F,
block_states: Vec<ParameterBlockState>,
specs: &[ParameterBlockSpec],
derivative_blocks: Vec<Vec<CustomFamilyBlockPsiDerivative>>,
outer_score_subsample: Option<
Arc<crate::families::marginal_slope_shared::OuterScoreSubsample>,
>,
) -> Result<Self, String> {
let Some((design_loc, design_scale)) =
family.ws_exact_joint_dense_block_designs(Some(specs))?
else {
return Err(GamlssError::UnsupportedConfiguration {
reason: format!(
"{} exact joint psi workspace requires dense block designs",
F::LABEL,
),
}
.into());
};
let design_loc = shared_dense_arc(design_loc.as_ref());
let design_scale = shared_dense_arc(design_scale.as_ref());
let psi_dim = derivative_blocks.iter().map(Vec::len).sum();
Ok(Self {
family,
block_states,
derivative_blocks,
design_loc,
design_scale,
psi_directions: ExactNewtonJointPsiDirectCache::new(psi_dim),
outer_score_subsample,
})
}
fn psi_direction(&self, psi_index: usize) -> Result<Option<Arc<F::Direction>>, String> {
self.psi_directions.get_or_try_init(psi_index, || {
self.family.ws_psi_direction(
&self.block_states,
&self.derivative_blocks,
psi_index,
self.design_loc.as_ref(),
self.design_scale.as_ref(),
self.family.ws_policy(),
)
})
}
fn subsample_rows(
&self,
) -> Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]> {
self.outer_score_subsample
.as_ref()
.map(|s| s.rows.as_ref().as_slice())
}
}
impl<F> ExactNewtonJointPsiWorkspace for LocationScaleJointPsiWorkspace<F>
where
F: LocationScaleJointPsiFamily,
{
fn second_order_terms(
&self,
psi_i: usize,
psi_j: usize,
) -> Result<Option<ExactNewtonJointPsiSecondOrderTerms>, String> {
let Some(dir_i) = self.psi_direction(psi_i)? else {
return Ok(None);
};
let Some(dir_j) = self.psi_direction(psi_j)? else {
return Ok(None);
};
Ok(Some(self.family.ws_psi_second_order_terms_from_parts(
&self.block_states,
&self.derivative_blocks,
dir_i.as_ref(),
dir_j.as_ref(),
self.design_loc.as_ref(),
self.design_scale.as_ref(),
self.subsample_rows(),
)?))
}
fn hessian_directional_derivative(
&self,
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<crate::solver::estimate::reml::unified::DriftDerivResult>, String> {
let Some(dir) = self.psi_direction(psi_index)? else {
return Ok(None);
};
Ok(Some(
crate::solver::estimate::reml::unified::DriftDerivResult::Dense(
self.family.ws_psi_hessian_directional_from_parts(
&self.block_states,
dir.as_ref(),
d_beta_flat,
self.design_loc.as_ref(),
self.design_scale.as_ref(),
self.subsample_rows(),
)?,
),
))
}
}
type GaussianLocationScaleExactNewtonJointPsiWorkspace =
LocationScaleJointPsiWorkspace<GaussianLocationScaleFamily>;
type GaussianLocationScaleWiggleExactNewtonJointPsiWorkspace =
LocationScaleJointPsiWorkspace<GaussianLocationScaleWiggleFamily>;
#[derive(Clone)]
pub struct GaussianJointRowScalars {
obs_weight: Array1<f64>,
w: Array1<f64>,
m: Array1<f64>,
n: Array1<f64>,
kappa: Array1<f64>,
kappa_prime: Array1<f64>,
kappa_dprime: Array1<f64>,
}
struct GaussianJointPsiFirstWeights {
objective_psirow: Array1<f64>,
scoremu: Array1<f64>,
score_ls: Array1<f64>,
dscoremu: Array1<f64>,
dscore_ls: Array1<f64>,
hmumu: Array1<f64>,
hmu_ls: Array1<f64>,
h_ls_ls: Array1<f64>,
dhmumu: Array1<f64>,
dhmu_ls: Array1<f64>,
dh_ls_ls: Array1<f64>,
}
struct GaussianJointPsiSecondWeights {
objective_psi_psirow: Array1<f64>,
d2scoremu: Array1<f64>,
d2score_ls: Array1<f64>,
d2hmumu: Array1<f64>,
d2hmu_ls: Array1<f64>,
d2h_ls_ls: Array1<f64>,
}
struct GaussianJointPsiMixedDriftWeights {
dhmumu_u: Array1<f64>,
dhmu_ls_u: Array1<f64>,
dh_ls_ls_u: Array1<f64>,
d2hmumu: Array1<f64>,
d2hmu_ls: Array1<f64>,
d2h_ls_ls: Array1<f64>,
}
fn apply_ht_mask_first(
weights: &mut GaussianJointPsiFirstWeights,
rows: &[crate::families::marginal_slope_shared::WeightedOuterRow],
) {
let n = weights.objective_psirow.len();
let mut obj = Array1::<f64>::zeros(n);
let mut smu = Array1::<f64>::zeros(n);
let mut sls = Array1::<f64>::zeros(n);
let mut dsmu = Array1::<f64>::zeros(n);
let mut dsls = Array1::<f64>::zeros(n);
let mut hmm = Array1::<f64>::zeros(n);
let mut hml = Array1::<f64>::zeros(n);
let mut hll = Array1::<f64>::zeros(n);
let mut dhmm = Array1::<f64>::zeros(n);
let mut dhml = Array1::<f64>::zeros(n);
let mut dhll = Array1::<f64>::zeros(n);
for r in rows {
let i = r.index;
let w = r.weight;
obj[i] = weights.objective_psirow[i] * w;
smu[i] = weights.scoremu[i] * w;
sls[i] = weights.score_ls[i] * w;
dsmu[i] = weights.dscoremu[i] * w;
dsls[i] = weights.dscore_ls[i] * w;
hmm[i] = weights.hmumu[i] * w;
hml[i] = weights.hmu_ls[i] * w;
hll[i] = weights.h_ls_ls[i] * w;
dhmm[i] = weights.dhmumu[i] * w;
dhml[i] = weights.dhmu_ls[i] * w;
dhll[i] = weights.dh_ls_ls[i] * w;
}
weights.objective_psirow = obj;
weights.scoremu = smu;
weights.score_ls = sls;
weights.dscoremu = dsmu;
weights.dscore_ls = dsls;
weights.hmumu = hmm;
weights.hmu_ls = hml;
weights.h_ls_ls = hll;
weights.dhmumu = dhmm;
weights.dhmu_ls = dhml;
weights.dh_ls_ls = dhll;
}
fn apply_ht_mask_second(
weights: &mut GaussianJointPsiSecondWeights,
rows: &[crate::families::marginal_slope_shared::WeightedOuterRow],
) {
let n = weights.objective_psi_psirow.len();
let mut obj = Array1::<f64>::zeros(n);
let mut d2smu = Array1::<f64>::zeros(n);
let mut d2sls = Array1::<f64>::zeros(n);
let mut d2hmm = Array1::<f64>::zeros(n);
let mut d2hml = Array1::<f64>::zeros(n);
let mut d2hll = Array1::<f64>::zeros(n);
for r in rows {
let i = r.index;
let w = r.weight;
obj[i] = weights.objective_psi_psirow[i] * w;
d2smu[i] = weights.d2scoremu[i] * w;
d2sls[i] = weights.d2score_ls[i] * w;
d2hmm[i] = weights.d2hmumu[i] * w;
d2hml[i] = weights.d2hmu_ls[i] * w;
d2hll[i] = weights.d2h_ls_ls[i] * w;
}
weights.objective_psi_psirow = obj;
weights.d2scoremu = d2smu;
weights.d2score_ls = d2sls;
weights.d2hmumu = d2hmm;
weights.d2hmu_ls = d2hml;
weights.d2h_ls_ls = d2hll;
}
fn apply_ht_mask_mixed(
weights: &mut GaussianJointPsiMixedDriftWeights,
rows: &[crate::families::marginal_slope_shared::WeightedOuterRow],
) {
let n = weights.dhmumu_u.len();
let mut dhmm_u = Array1::<f64>::zeros(n);
let mut dhml_u = Array1::<f64>::zeros(n);
let mut dhll_u = Array1::<f64>::zeros(n);
let mut d2hmm = Array1::<f64>::zeros(n);
let mut d2hml = Array1::<f64>::zeros(n);
let mut d2hll = Array1::<f64>::zeros(n);
for r in rows {
let i = r.index;
let w = r.weight;
dhmm_u[i] = weights.dhmumu_u[i] * w;
dhml_u[i] = weights.dhmu_ls_u[i] * w;
dhll_u[i] = weights.dh_ls_ls_u[i] * w;
d2hmm[i] = weights.d2hmumu[i] * w;
d2hml[i] = weights.d2hmu_ls[i] * w;
d2hll[i] = weights.d2h_ls_ls[i] * w;
}
weights.dhmumu_u = dhmm_u;
weights.dhmu_ls_u = dhml_u;
weights.dh_ls_ls_u = dhll_u;
weights.d2hmumu = d2hmm;
weights.d2hmu_ls = d2hml;
weights.d2h_ls_ls = d2hll;
}
fn gaussian_jointrow_scalars(
y: &Array1<f64>,
etamu: &Array1<f64>,
eta_ls: &Array1<f64>,
weights: &Array1<f64>,
) -> Result<GaussianJointRowScalars, String> {
let nobs = y.len();
if etamu.len() != nobs || eta_ls.len() != nobs || weights.len() != nobs {
return Err(GamlssError::DimensionMismatch {
reason: "Gaussian joint row scalar input size mismatch".to_string(),
}
.into());
}
let mut obs_weight = Array1::<f64>::uninit(nobs);
let mut w = Array1::<f64>::uninit(nobs);
let mut m = Array1::<f64>::uninit(nobs);
let mut n = Array1::<f64>::uninit(nobs);
let mut kappa = Array1::<f64>::uninit(nobs);
let mut kappa_prime = Array1::<f64>::uninit(nobs);
let mut kappa_dprime = Array1::<f64>::uninit(nobs);
for i in 0..nobs {
let jet = crate::families::sigma_link::logb_sigma_jet1_scalar(eta_ls[i]);
let s = jet.sigma;
let ki = logb_dlog_sigma_deta(s, jet.d1);
let kp = ki * (1.0 - ki);
let kdp = kp * (1.0 - 2.0 * ki);
let wi = weights[i] / (s * s);
let ri = y[i] - etamu[i];
obs_weight[i].write(weights[i]);
w[i].write(wi);
m[i].write(ri * wi);
n[i].write(ri * ri * wi);
kappa[i].write(ki);
kappa_prime[i].write(kp);
kappa_dprime[i].write(kdp);
}
let (obs_weight, w, m, n, kappa, kappa_prime, kappa_dprime) = unsafe {
(
obs_weight.assume_init(),
w.assume_init(),
m.assume_init(),
n.assume_init(),
kappa.assume_init(),
kappa_prime.assume_init(),
kappa_dprime.assume_init(),
)
};
Ok(GaussianJointRowScalars {
obs_weight,
w,
m,
n,
kappa,
kappa_prime,
kappa_dprime,
})
}
fn gaussian_joint_first_directionalweights(
scalars: &GaussianJointRowScalars,
dotmu: &Array1<f64>,
dot_eta: &Array1<f64>,
) -> (Array1<f64>, Array1<f64>, Array1<f64>) {
let nobs = scalars.w.len();
let mut w_u = Array1::<f64>::uninit(nobs);
let mut c_u = Array1::<f64>::uninit(nobs);
let mut d_u = Array1::<f64>::uninit(nobs);
for i in 0..nobs {
let wi = scalars.w[i];
let mi = scalars.m[i];
let ki = scalars.kappa[i];
let kpi = scalars.kappa_prime[i];
let ai = scalars.obs_weight[i];
let dm = dotmu[i];
let de = dot_eta[i];
let sde = ki * de;
w_u[i].write(-2.0 * wi * sde);
c_u[i].write(ki * (-2.0 * wi * dm - 4.0 * mi * sde) + 2.0 * mi * kpi * de);
d_u[i].write(4.0 * ki * kpi * ai * de);
}
let (w_u, c_u, d_u) = unsafe { (w_u.assume_init(), c_u.assume_init(), d_u.assume_init()) };
(w_u, c_u, d_u)
}
fn gaussian_jointsecond_directionalweights(
scalars: &GaussianJointRowScalars,
dotmu_u: &Array1<f64>,
dot_eta_u: &Array1<f64>,
dotmuv: &Array1<f64>,
dot_etav: &Array1<f64>,
) -> (Array1<f64>, Array1<f64>, Array1<f64>) {
let nobs = scalars.w.len();
let mut w_uv = Array1::<f64>::uninit(nobs);
let mut c_uv = Array1::<f64>::uninit(nobs);
let mut d_uv = Array1::<f64>::uninit(nobs);
for i in 0..nobs {
let wi = scalars.w[i];
let mi = scalars.m[i];
let ki = scalars.kappa[i];
let kpi = scalars.kappa_prime[i];
let kdpi = scalars.kappa_dprime[i];
let ai = scalars.obs_weight[i];
let dmu = dotmu_u[i];
let dmv = dotmuv[i];
let deu = dot_eta_u[i];
let dev = dot_etav[i];
let sdeu = ki * deu;
let sdev = ki * dev;
let de_sym = dmu * dev + dmv * deu;
let de_eta = deu * dev;
w_uv[i].write(4.0 * wi * sdeu * sdev - 2.0 * wi * kpi * de_eta);
c_uv[i].write(
ki * (4.0 * wi * (dmu * sdev + dmv * sdeu) + 8.0 * mi * sdeu * sdev)
- 2.0 * wi * kpi * de_sym
+ 2.0 * mi * (kdpi - 6.0 * ki * kpi) * de_eta,
);
d_uv[i].write(4.0 * ai * (kpi * kpi + ki * kdpi) * de_eta);
}
let (w_uv, c_uv, d_uv) =
unsafe { (w_uv.assume_init(), c_uv.assume_init(), d_uv.assume_init()) };
(w_uv, c_uv, d_uv)
}
fn gaussian_joint_psi_firstweights(
scalars: &GaussianJointRowScalars,
mu_a: &Array1<f64>,
eta_a: &Array1<f64>,
) -> GaussianJointPsiFirstWeights {
let nobs = scalars.w.len();
let mut objective_psirow = Array1::<f64>::uninit(nobs);
let mut scoremu = Array1::<f64>::uninit(nobs);
let mut score_ls = Array1::<f64>::uninit(nobs);
let mut dscoremu = Array1::<f64>::uninit(nobs);
let mut dscore_ls = Array1::<f64>::uninit(nobs);
let mut hmumu = Array1::<f64>::uninit(nobs);
let mut hmu_ls = Array1::<f64>::uninit(nobs);
let mut h_ls_ls = Array1::<f64>::uninit(nobs);
let mut dhmumu = Array1::<f64>::uninit(nobs);
let mut dhmu_ls = Array1::<f64>::uninit(nobs);
let mut dh_ls_ls = Array1::<f64>::uninit(nobs);
for i in 0..nobs {
let mi = scalars.m[i];
let ni = scalars.n[i];
let ki = scalars.kappa[i];
let kpi = scalars.kappa_prime[i];
let ai = scalars.obs_weight[i];
let ma = mu_a[i];
let ea = eta_a[i];
let sea = ki * ea;
let smu = -mi;
let sls = ki * (ai - ni);
let wi = scalars.w[i];
scoremu[i].write(smu);
score_ls[i].write(sls);
dscoremu[i].write(wi * ma + 2.0 * mi * sea);
dscore_ls[i].write(ki * (2.0 * mi * ma + 2.0 * ni * sea) + kpi * (ai - ni) * ea);
hmumu[i].write(wi);
hmu_ls[i].write(0.0);
h_ls_ls[i].write(2.0 * ki * ki * ai);
dhmumu[i].write(-2.0 * wi * sea);
dhmu_ls[i].write(0.0);
dh_ls_ls[i].write(4.0 * ki * kpi * ai * ea);
objective_psirow[i].write(smu * ma + sls * ea);
}
unsafe {
GaussianJointPsiFirstWeights {
objective_psirow: objective_psirow.assume_init(),
scoremu: scoremu.assume_init(),
score_ls: score_ls.assume_init(),
dscoremu: dscoremu.assume_init(),
dscore_ls: dscore_ls.assume_init(),
hmumu: hmumu.assume_init(),
hmu_ls: hmu_ls.assume_init(),
h_ls_ls: h_ls_ls.assume_init(),
dhmumu: dhmumu.assume_init(),
dhmu_ls: dhmu_ls.assume_init(),
dh_ls_ls: dh_ls_ls.assume_init(),
}
}
}
fn gaussian_joint_psisecondweights(
scalars: &GaussianJointRowScalars,
mu_a: &Array1<f64>,
eta_a: &Array1<f64>,
mu_b: &Array1<f64>,
eta_b: &Array1<f64>,
mu_ab: &Array1<f64>,
eta_ab: &Array1<f64>,
) -> GaussianJointPsiSecondWeights {
let nobs = scalars.w.len();
let mut objective_psi_psirow = Array1::<f64>::uninit(nobs);
let mut d2scoremu = Array1::<f64>::uninit(nobs);
let mut d2score_ls = Array1::<f64>::uninit(nobs);
let mut d2hmumu = Array1::<f64>::uninit(nobs);
let mut d2hmu_ls = Array1::<f64>::uninit(nobs);
let mut d2h_ls_ls = Array1::<f64>::uninit(nobs);
for i in 0..nobs {
let wi = scalars.w[i];
let mi = scalars.m[i];
let ni = scalars.n[i];
let ki = scalars.kappa[i];
let kpi = scalars.kappa_prime[i];
let kdpi = scalars.kappa_dprime[i];
let ai = scalars.obs_weight[i];
let amn = ai - ni;
let ma = mu_a[i];
let mb = mu_b[i];
let mab = mu_ab[i];
let ea = eta_a[i];
let eb = eta_b[i];
let eab = eta_ab[i];
let sea = ki * ea;
let seb = ki * eb;
let seab = ki * eab;
let cross = ma * seb + mb * sea;
let cross_eta = ma * eb + mb * ea;
let sea_seb = sea * seb;
let ea_eb = ea * eb;
let ma_mb = ma * mb;
objective_psi_psirow[i].write(
wi * ma_mb + 2.0 * mi * cross + 2.0 * ni * sea_seb - mi * mab
+ ki * amn * eab
+ kpi * amn * ea_eb,
);
d2scoremu[i].write(
wi * mab - 2.0 * wi * cross - 4.0 * mi * sea_seb
+ 2.0 * mi * seab
+ 2.0 * mi * kpi * ea_eb,
);
d2score_ls[i].write(
ki * (-2.0 * wi * ma_mb - 4.0 * mi * cross - 4.0 * ni * sea_seb
+ 2.0 * mi * mab
+ 2.0 * ni * seab)
+ 2.0 * mi * kpi * cross_eta
+ (kdpi * amn + 6.0 * ki * kpi * ni) * ea_eb
+ kpi * amn * eab,
);
d2hmumu[i].write(4.0 * wi * sea_seb - 2.0 * wi * seab - 2.0 * wi * kpi * ea_eb);
d2hmu_ls[i].write(0.0);
d2h_ls_ls[i].write(4.0 * ai * (kpi * kpi + ki * kdpi) * ea_eb + 4.0 * ai * ki * kpi * eab);
}
unsafe {
GaussianJointPsiSecondWeights {
objective_psi_psirow: objective_psi_psirow.assume_init(),
d2scoremu: d2scoremu.assume_init(),
d2score_ls: d2score_ls.assume_init(),
d2hmumu: d2hmumu.assume_init(),
d2hmu_ls: d2hmu_ls.assume_init(),
d2h_ls_ls: d2h_ls_ls.assume_init(),
}
}
}
fn gaussian_joint_psi_mixed_driftweights(
scalars: &GaussianJointRowScalars,
dot_eta: &Array1<f64>,
eta_a: &Array1<f64>,
dot_eta_a: &Array1<f64>,
) -> GaussianJointPsiMixedDriftWeights {
let nobs = scalars.w.len();
let mut dhmumu_u = Array1::<f64>::uninit(nobs);
let mut dhmu_ls_u = Array1::<f64>::uninit(nobs);
let mut dh_ls_ls_u = Array1::<f64>::uninit(nobs);
let mut d2hmumu = Array1::<f64>::uninit(nobs);
let mut d2hmu_ls = Array1::<f64>::uninit(nobs);
let mut d2h_ls_ls = Array1::<f64>::uninit(nobs);
for i in 0..nobs {
let wi = scalars.w[i];
let ki = scalars.kappa[i];
let kpi = scalars.kappa_prime[i];
let kdpi = scalars.kappa_dprime[i];
let ai = scalars.obs_weight[i];
let de = dot_eta[i];
let ea = eta_a[i];
let dea = dot_eta_a[i];
let sde = ki * de;
let sea = ki * ea;
let sdea = ki * dea;
let de_ea = de * ea;
dhmumu_u[i].write(-2.0 * wi * sde);
dhmu_ls_u[i].write(0.0);
dh_ls_ls_u[i].write(4.0 * ki * kpi * ai * de);
d2hmumu[i].write(4.0 * wi * sde * sea - 2.0 * wi * sdea - 2.0 * wi * kpi * de_ea);
d2hmu_ls[i].write(0.0);
d2h_ls_ls[i].write(4.0 * ai * (kpi * kpi + ki * kdpi) * de_ea + 4.0 * ai * ki * kpi * dea);
}
unsafe {
GaussianJointPsiMixedDriftWeights {
dhmumu_u: dhmumu_u.assume_init(),
dhmu_ls_u: dhmu_ls_u.assume_init(),
dh_ls_ls_u: dh_ls_ls_u.assume_init(),
d2hmumu: d2hmumu.assume_init(),
d2hmu_ls: d2hmu_ls.assume_init(),
d2h_ls_ls: d2h_ls_ls.assume_init(),
}
}
}
fn gaussian_pack_joint_score(scoremu: &Array1<f64>, score_ls: &Array1<f64>) -> Array1<f64> {
let pmu = scoremu.len();
let p_ls = score_ls.len();
let mut out = Array1::<f64>::zeros(pmu + p_ls);
out.slice_mut(s![0..pmu]).assign(scoremu);
out.slice_mut(s![pmu..pmu + p_ls]).assign(score_ls);
out
}
fn gaussian_pack_joint_symmetrichessian(
hmumu: &Array2<f64>,
hmu_ls: &Array2<f64>,
h_ls_ls: &Array2<f64>,
) -> Array2<f64> {
let pmu = hmumu.nrows();
let p_ls = h_ls_ls.nrows();
let total = pmu + p_ls;
let mut out = Array2::<f64>::zeros((total, total));
out.slice_mut(s![0..pmu, 0..pmu]).assign(hmumu);
out.slice_mut(s![0..pmu, pmu..total]).assign(hmu_ls);
out.slice_mut(s![pmu..total, pmu..total]).assign(h_ls_ls);
mirror_upper_to_lower(&mut out);
out
}
fn gaussian_locscale_fisher_joint_row_coeffs(
rows: &GaussianJointRowScalars,
) -> (Array1<f64>, Array1<f64>, Array1<f64>) {
let mm = rows.w.clone();
let ml = Array1::<f64>::zeros(rows.kappa.len());
let ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
(mm, ml, ll)
}
fn gaussian_joint_hessian_from_designs(
xmu: &DenseOrOperator<'_>,
x_ls: &DenseOrOperator<'_>,
hmumu_coeff: &Array1<f64>,
hmu_ls_coeff: &Array1<f64>,
h_ls_ls_coeff: &Array1<f64>,
) -> Result<Array2<f64>, String> {
if xmu.nrows() != hmumu_coeff.len()
|| xmu.nrows() != hmu_ls_coeff.len()
|| xmu.nrows() != h_ls_ls_coeff.len()
|| x_ls.nrows() != xmu.nrows()
{
return Err(GamlssError::DimensionMismatch { reason: format!(
"gaussian_joint_hessian_from_designs dimension mismatch: xmu {}x{}, x_ls {}x{}, coeffs {}/{}/{}",
xmu.nrows(),
xmu.ncols(),
x_ls.nrows(),
x_ls.ncols(),
hmumu_coeff.len(),
hmu_ls_coeff.len(),
h_ls_ls_coeff.len()
) }.into());
}
let n = xmu.nrows();
let pmu = xmu.ncols();
let p_ls = x_ls.ncols();
let total = pmu + p_ls;
let mut out = Array2::<f64>::zeros((total, total));
for rows in exact_design_row_chunks(n, pmu.max(p_ls)) {
let xmu_chunk = xmu.row_chunk(rows.clone())?;
let xls_chunk = x_ls.row_chunk(rows.clone())?;
let hmumu = hmumu_coeff.slice(s![rows.clone()]);
let hmu_ls = hmu_ls_coeff.slice(s![rows.clone()]);
let h_ls_ls = h_ls_ls_coeff.slice(s![rows.clone()]);
let chunk_hessian =
fast_joint_hessian_2x2(&xmu_chunk, &xls_chunk, &hmumu, &hmu_ls, &h_ls_ls);
out += &chunk_hessian;
}
Ok(out)
}
fn gaussian_joint_psihessian_fromweights(
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
xmu_psi: CustomFamilyPsiLinearMapRef<'_>,
x_ls_psi: CustomFamilyPsiLinearMapRef<'_>,
weights: &GaussianJointPsiFirstWeights,
) -> Result<Array2<f64>, String> {
let a_mu = weighted_crossprod_psi_maps(
xmu_psi,
weights.hmumu.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let hmumu = &a_mu + &a_mu.t() + &xt_diag_x_dense(xmu, &weights.dhmumu)?;
let hmu_ls = weighted_crossprod_psi_maps(
xmu_psi,
weights.hmu_ls.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
weights.hmu_ls.view(),
x_ls_psi,
)? + &xt_diag_y_dense(xmu, &weights.dhmu_ls, x_ls)?;
let a_ls = weighted_crossprod_psi_maps(
x_ls_psi,
weights.h_ls_ls.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let h_ls_ls = &a_ls + &a_ls.t() + &xt_diag_x_dense(x_ls, &weights.dh_ls_ls)?;
Ok(gaussian_pack_joint_symmetrichessian(
&hmumu, &hmu_ls, &h_ls_ls,
))
}
fn build_two_block_custom_family_joint_psi_operator_from_actions(
left_action: Option<CustomFamilyPsiDesignAction>,
right_action: Option<CustomFamilyPsiDesignAction>,
left_range: std::ops::Range<usize>,
right_range: std::ops::Range<usize>,
left_design: &Array2<f64>,
right_design: &Array2<f64>,
left_weights: &Array1<f64>,
cross_weights: &Array1<f64>,
right_weights: &Array1<f64>,
left_drift_weights: &Array1<f64>,
cross_drift_weights: &Array1<f64>,
right_drift_weights: &Array1<f64>,
) -> Result<Option<std::sync::Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
if left_action.is_none() && right_action.is_none() {
return Ok(None);
}
let total = left_design.ncols() + right_design.ncols();
let channels = vec![
CustomFamilyJointDesignChannel::new(left_range, shared_dense_arc(left_design), left_action),
CustomFamilyJointDesignChannel::new(
right_range,
shared_dense_arc(right_design),
right_action,
),
];
let pair_contributions = vec![
CustomFamilyJointDesignPairContribution::new(
0,
0,
left_weights.clone(),
left_drift_weights.clone(),
),
CustomFamilyJointDesignPairContribution::new(
0,
1,
cross_weights.clone(),
cross_drift_weights.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
0,
cross_weights.clone(),
cross_drift_weights.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
1,
right_weights.clone(),
right_drift_weights.clone(),
),
];
Ok(Some(std::sync::Arc::new(
CustomFamilyJointPsiOperator::new(total, channels, pair_contributions),
)))
}
fn gaussian_joint_psisecondhessian_fromweights(
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
xmu_i: CustomFamilyPsiLinearMapRef<'_>,
x_ls_i: CustomFamilyPsiLinearMapRef<'_>,
xmu_j: CustomFamilyPsiLinearMapRef<'_>,
x_ls_j: CustomFamilyPsiLinearMapRef<'_>,
xmu_ab: CustomFamilyPsiLinearMapRef<'_>,
x_ls_ab: CustomFamilyPsiLinearMapRef<'_>,
weights_i: &GaussianJointPsiFirstWeights,
weights_j: &GaussianJointPsiFirstWeights,
secondweights: &GaussianJointPsiSecondWeights,
) -> Result<Array2<f64>, String> {
let a_ab_mu = weighted_crossprod_psi_maps(
xmu_ab,
weights_i.hmumu.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let a_ij_mu = weighted_crossprod_psi_maps(xmu_i, weights_i.hmumu.view(), xmu_j)?;
let a_iwj_mu = weighted_crossprod_psi_maps(
xmu_i,
weights_j.dhmumu.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let a_jwi_mu = weighted_crossprod_psi_maps(
xmu_j,
weights_i.dhmumu.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let hmumu = &a_ab_mu
+ &a_ab_mu.t()
+ &a_ij_mu
+ a_ij_mu.t()
+ &a_iwj_mu
+ a_iwj_mu.t()
+ &a_jwi_mu
+ a_jwi_mu.t()
+ &xt_diag_x_dense(xmu, &secondweights.d2hmumu)?;
let hmu_ls = weighted_crossprod_psi_maps(
xmu_ab,
weights_i.hmu_ls.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(xmu_i, weights_i.hmu_ls.view(), x_ls_j)?
+ &weighted_crossprod_psi_maps(xmu_j, weights_i.hmu_ls.view(), x_ls_i)?
+ &weighted_crossprod_psi_maps(
xmu_i,
weights_j.dhmu_ls.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?
+ &weighted_crossprod_psi_maps(
xmu_j,
weights_i.dhmu_ls.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
weights_i.dhmu_ls.view(),
x_ls_j,
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
weights_j.dhmu_ls.view(),
x_ls_i,
)?
+ &xt_diag_y_dense(xmu, &secondweights.d2hmu_ls, x_ls)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
weights_i.hmu_ls.view(),
x_ls_ab,
)?;
let a_ab_ls = weighted_crossprod_psi_maps(
x_ls_ab,
weights_i.h_ls_ls.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let a_ij_ls = weighted_crossprod_psi_maps(x_ls_i, weights_i.h_ls_ls.view(), x_ls_j)?;
let a_iwj_ls = weighted_crossprod_psi_maps(
x_ls_i,
weights_j.dh_ls_ls.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let a_jwi_ls = weighted_crossprod_psi_maps(
x_ls_j,
weights_i.dh_ls_ls.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let h_ls_ls = &a_ab_ls
+ &a_ab_ls.t()
+ &a_ij_ls
+ a_ij_ls.t()
+ &a_iwj_ls
+ a_iwj_ls.t()
+ &a_jwi_ls
+ a_jwi_ls.t()
+ &xt_diag_x_dense(x_ls, &secondweights.d2h_ls_ls)?;
Ok(gaussian_pack_joint_symmetrichessian(
&hmumu, &hmu_ls, &h_ls_ls,
))
}
fn gaussian_joint_psi_mixedhessian_drift_fromweights(
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
xmu_psi: CustomFamilyPsiLinearMapRef<'_>,
x_ls_psi: CustomFamilyPsiLinearMapRef<'_>,
mixedweights: &GaussianJointPsiMixedDriftWeights,
) -> Result<Array2<f64>, String> {
let a_mu = weighted_crossprod_psi_maps(
xmu_psi,
mixedweights.dhmumu_u.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let hmumu = &a_mu + &a_mu.t() + &xt_diag_x_dense(xmu, &mixedweights.d2hmumu)?;
let hmu_ls = weighted_crossprod_psi_maps(
xmu_psi,
mixedweights.dhmu_ls_u.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
mixedweights.dhmu_ls_u.view(),
x_ls_psi,
)? + &xt_diag_y_dense(xmu, &mixedweights.d2hmu_ls, x_ls)?;
let a_ls = weighted_crossprod_psi_maps(
x_ls_psi,
mixedweights.dh_ls_ls_u.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let h_ls_ls = &a_ls + &a_ls.t() + &xt_diag_x_dense(x_ls, &mixedweights.d2h_ls_ls)?;
Ok(gaussian_pack_joint_symmetrichessian(
&hmumu, &hmu_ls, &h_ls_ls,
))
}
#[inline]
fn exp_sigma_derivs_up_to_fourth_array(
eta: ArrayView1<'_, f64>,
) -> (
Array1<f64>,
Array1<f64>,
Array1<f64>,
Array1<f64>,
Array1<f64>,
) {
use rayon::iter::{IntoParallelIterator, ParallelIterator};
let n = eta.len();
let tuples: Vec<(f64, f64, f64, f64, f64)> = (0..n)
.into_par_iter()
.map(|i| exp_sigma_derivs_up_to_fourth_scalar(eta[i]))
.collect();
let mut sigma = Array1::<f64>::zeros(n);
let mut d1 = Array1::<f64>::zeros(n);
let mut d2 = Array1::<f64>::zeros(n);
let mut d3 = Array1::<f64>::zeros(n);
let mut d4 = Array1::<f64>::zeros(n);
for (i, (s_i, d1_i, d2_i, d3_i, d4_i)) in tuples.into_iter().enumerate() {
sigma[i] = s_i;
d1[i] = d1_i;
d2[i] = d2_i;
d3[i] = d3_i;
d4[i] = d4_i;
}
(sigma, d1, d2, d3, d4)
}
impl GaussianLocationScaleFamily {
pub const BLOCK_MU: usize = 0;
pub const BLOCK_LOG_SIGMA: usize = 1;
fn get_or_compute_row_scalars(
&self,
etamu: &Array1<f64>,
eta_ls: &Array1<f64>,
) -> Result<Arc<GaussianJointRowScalars>, String> {
Ok(Arc::new(gaussian_jointrow_scalars(
&self.y,
etamu,
eta_ls,
&self.weights,
)?))
}
pub fn parameternames() -> &'static [&'static str] {
&["mu", "log_sigma"]
}
pub fn parameter_links() -> &'static [ParameterLink] {
&[ParameterLink::Identity, ParameterLink::Log]
}
pub fn metadata() -> FamilyMetadata {
FamilyMetadata {
name: "gaussian_location_scale",
parameternames: Self::parameternames(),
parameter_links: Self::parameter_links(),
}
}
fn exact_joint_supported(&self) -> bool {
self.mu_design.is_some() && self.log_sigma_design.is_some()
}
fn exact_block_designs(&self) -> Result<(DenseOrOperator<'_>, DenseOrOperator<'_>), String> {
let mu_design = self.mu_design.as_ref().ok_or_else(|| {
"GaussianLocationScaleFamily exact path is missing mu design".to_string()
})?;
let log_sigma_design = self.log_sigma_design.as_ref().ok_or_else(|| {
"GaussianLocationScaleFamily exact path is missing log-sigma design".to_string()
})?;
let planned = dense_blocks_planned_budget(&[mu_design, log_sigma_design]);
let xmu = dense_block_or_operator(
mu_design,
mu_design.nrows(),
mu_design.ncols(),
planned[0],
&self.policy,
);
let x_ls = dense_block_or_operator(
log_sigma_design,
log_sigma_design.nrows(),
log_sigma_design.ncols(),
planned[1],
&self.policy,
);
Ok((xmu, x_ls))
}
fn exact_block_designs_fromspecs<'a>(
&self,
specs: &'a [ParameterBlockSpec],
) -> Result<(DenseOrOperator<'a>, DenseOrOperator<'a>), String> {
if specs.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily spec-aware exact path expects 2 specs, got {}",
specs.len()
),
}
.into());
}
let mu_design = &specs[Self::BLOCK_MU].design;
let log_sigma_design = &specs[Self::BLOCK_LOG_SIGMA].design;
let planned = dense_blocks_planned_budget(&[mu_design, log_sigma_design]);
let xmu = dense_block_or_operator(
mu_design,
mu_design.nrows(),
mu_design.ncols(),
planned[0],
&self.policy,
);
let x_ls = dense_block_or_operator(
log_sigma_design,
log_sigma_design.nrows(),
log_sigma_design.ncols(),
planned[1],
&self.policy,
);
Ok((xmu, x_ls))
}
fn exact_joint_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(DenseOrOperator<'a>, DenseOrOperator<'a>)>, String> {
if self.exact_joint_supported() {
return self.exact_block_designs().map(Some);
}
if let Some(specs) = specs {
return self.exact_block_designs_fromspecs(specs).map(Some);
}
Ok(None)
}
fn exact_joint_dense_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
let Some((xmu, x_ls)) = self.exact_joint_block_designs(specs)? else {
return Ok(None);
};
let xmu = match xmu {
DenseOrOperator::Borrowed(dense) => Cow::Borrowed(dense),
DenseOrOperator::Owned(dense) => Cow::Owned(dense),
DenseOrOperator::Operator(_) => {
return Err(
"GaussianLocationScaleFamily exact psi path requires chunked operator support for oversized designs"
.to_string(),
);
}
};
let x_ls = match x_ls {
DenseOrOperator::Borrowed(dense) => Cow::Borrowed(dense),
DenseOrOperator::Owned(dense) => Cow::Owned(dense),
DenseOrOperator::Operator(_) => {
return Err(
"GaussianLocationScaleFamily exact psi path requires chunked operator support for oversized designs"
.to_string(),
);
}
};
Ok(Some((xmu, x_ls)))
}
fn exact_newton_joint_hessian_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
) -> Result<Option<Array2<f64>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_block_designs(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessian_from_designs(block_states, &xmu, &x_ls)
}
fn exact_newton_joint_hessian_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_block_designs(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessian_directional_derivative_from_designs(
block_states,
&xmu,
&x_ls,
d_beta_flat,
)
}
fn exact_newton_joint_hessian_second_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_block_designs(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessiansecond_directional_derivative_from_designs(
block_states,
&xmu,
&x_ls,
d_beta_u_flat,
d_betav_flat,
)
}
fn exact_newton_joint_psi_terms_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psi_terms_from_designs(
block_states,
specs,
derivative_blocks,
psi_index,
&xmu,
&x_ls,
)
}
fn exact_newton_joint_psisecond_order_terms_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psisecond_order_terms_from_designs(
block_states,
derivative_blocks,
psi_i,
psi_j,
&xmu,
&x_ls,
)
}
fn exact_newton_joint_psihessian_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psihessian_directional_derivative_from_designs(
block_states,
derivative_blocks,
psi_index,
d_beta_flat,
&xmu,
&x_ls,
)
}
fn exact_newton_joint_hessian_from_designs(
&self,
block_states: &[ParameterBlockState],
xmu: &DenseOrOperator<'_>,
x_ls: &DenseOrOperator<'_>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if etamu.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let rows = self.get_or_compute_row_scalars(etamu, eta_ls)?;
let (mm, cross, scale) = gaussian_locscale_fisher_joint_row_coeffs(&rows);
Ok(Some(gaussian_joint_hessian_from_designs(
xmu, x_ls, &mm, &cross, &scale,
)?))
}
fn exact_newton_joint_hessian_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
xmu: &DenseOrOperator<'_>,
x_ls: &DenseOrOperator<'_>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if etamu.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let pmu = xmu.ncols();
let p_ls = x_ls.ncols();
let total = pmu + p_ls;
if d_beta_flat.len() != total {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily joint d_beta length mismatch: got {}, expected {}",
d_beta_flat.len(),
total
),
}
.into());
}
let ximu = xmu.dot(d_beta_flat.slice(s![0..pmu]));
let xi_ls = x_ls.dot(d_beta_flat.slice(s![pmu..pmu + p_ls]));
let rows = self.get_or_compute_row_scalars(etamu, eta_ls)?;
let directional = gaussian_joint_first_directionalweights(&rows, &ximu, &xi_ls);
let dhmumu = directional.0;
let dh_ls_ls = directional.2;
let dhmu_ls = Array1::<f64>::zeros(dhmumu.len());
Ok(Some(gaussian_joint_hessian_from_designs(
xmu, x_ls, &dhmumu, &dhmu_ls, &dh_ls_ls,
)?))
}
fn exact_newton_joint_hessiansecond_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
xmu: &DenseOrOperator<'_>,
x_ls: &DenseOrOperator<'_>,
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if etamu.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let pmu = xmu.ncols();
let p_ls = x_ls.ncols();
let total = pmu + p_ls;
if d_beta_u_flat.len() != total || d_betav_flat.len() != total {
return Err(GamlssError::DimensionMismatch { reason: format!(
"GaussianLocationScaleFamily joint second directional derivative length mismatch: got {} and {}, expected {}",
d_beta_u_flat.len(),
d_betav_flat.len(),
total
) }.into());
}
let ximu_u = xmu.dot(d_beta_u_flat.slice(s![0..pmu]));
let xi_ls_u = x_ls.dot(d_beta_u_flat.slice(s![pmu..pmu + p_ls]));
let ximuv = xmu.dot(d_betav_flat.slice(s![0..pmu]));
let xi_lsv = x_ls.dot(d_betav_flat.slice(s![pmu..pmu + p_ls]));
let rows = self.get_or_compute_row_scalars(etamu, eta_ls)?;
let second =
gaussian_jointsecond_directionalweights(&rows, &ximu_u, &xi_ls_u, &ximuv, &xi_lsv);
let d2hmumu = second.0;
let d2h_ls_ls = second.2;
let d2hmu_ls = Array1::<f64>::zeros(d2hmumu.len());
Ok(Some(gaussian_joint_hessian_from_designs(
xmu, x_ls, &d2hmumu, &d2hmu_ls, &d2h_ls_ls,
)?))
}
fn exact_newton_joint_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<LocationScaleJointPsiDirection>, String> {
let Some(parts) = locscale_joint_psi_direction_parts(
block_states,
derivative_blocks,
psi_index,
self.y.len(),
xmu.ncols(),
x_ls.ncols(),
Self::BLOCK_MU,
Self::BLOCK_LOG_SIGMA,
2,
"GaussianLocationScaleFamily",
"mu",
policy,
)?
else {
return Ok(None);
};
Ok(Some(LocationScaleJointPsiDirection {
block_idx: parts.block_idx,
local_idx: parts.local_idx,
z_primary_psi: parts.primary_z,
z_ls_psi: parts.log_sigma_z,
x_primary_psi: parts.primary_psi,
x_ls_psi: parts.log_sigma_psi,
}))
}
fn exact_newton_joint_psisecond_design_drifts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &LocationScaleJointPsiDirection,
psi_b: &LocationScaleJointPsiDirection,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<LocationScaleJointPsiSecondDrifts, String> {
locscale_joint_psisecond_design_drifts(
block_states,
derivative_blocks,
psi_a,
psi_b,
LocScalePsiDriftConfig {
n: self.y.len(),
p_primary: xmu.ncols(),
p_log_sigma: x_ls.ncols(),
primary_block_idx: Self::BLOCK_MU,
log_sigma_block_idx: Self::BLOCK_LOG_SIGMA,
family_name: "GaussianLocationScaleFamily",
primary_label: "mu",
policy: &self.policy,
},
)
}
fn exact_newton_joint_psi_terms_from_designs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
if specs.len() != 2 || derivative_blocks.len() != 2 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"GaussianLocationScaleFamily joint psi terms expect 2 specs and 2 derivative blocks, got {} and {}",
specs.len(),
derivative_blocks.len()
) }.into());
}
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
xmu,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let rows = self.get_or_compute_row_scalars(etamu, eta_ls)?;
let weights_a =
gaussian_joint_psi_firstweights(&rows, &dir_a.z_primary_psi, &dir_a.z_ls_psi);
let objective_psi = weights_a.objective_psirow.sum();
let xmu_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
let score_mu =
xmu_map.transpose_mul(weights_a.scoremu.view()) + fast_atv(xmu, &weights_a.dscoremu);
let score_ls = x_ls_map.transpose_mul(weights_a.score_ls.view())
+ fast_atv(x_ls, &weights_a.dscore_ls);
let score_psi = gaussian_pack_joint_score(&score_mu, &score_ls);
let hessian_psi_operator = build_two_block_custom_family_joint_psi_operator_from_actions(
dir_a.x_primary_psi.cloned_first_action(),
dir_a.x_ls_psi.cloned_first_action(),
0..xmu.ncols(),
xmu.ncols()..xmu.ncols() + x_ls.ncols(),
xmu,
x_ls,
&weights_a.hmumu,
&weights_a.hmu_ls,
&weights_a.h_ls_ls,
&weights_a.dhmumu,
&weights_a.dhmu_ls,
&weights_a.dh_ls_ls,
)?;
let hessian_psi = if hessian_psi_operator.is_some() {
Array2::zeros((0, 0))
} else {
gaussian_joint_psihessian_fromweights(xmu, x_ls, xmu_map, x_ls_map, &weights_a)?
};
Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
objective_psi,
score_psi,
hessian_psi,
hessian_psi_operator,
}))
}
fn exact_newton_joint_psisecond_order_terms_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
let Some(dir_i) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_i,
xmu,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
let Some(dir_j) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_j,
xmu,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
Ok(Some(
self.exact_newton_joint_psisecond_order_terms_from_parts(
block_states,
derivative_blocks,
&dir_i,
&dir_j,
xmu,
x_ls,
None,
)?,
))
}
fn exact_newton_joint_psisecond_order_terms_from_parts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
dir_i: &LocationScaleJointPsiDirection,
dir_j: &LocationScaleJointPsiDirection,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
subsample: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms, String> {
let second_drifts = self.exact_newton_joint_psisecond_design_drifts(
block_states,
derivative_blocks,
dir_i,
dir_j,
xmu,
x_ls,
)?;
let n = self.y.len();
let xmu_i_map = dir_i.x_primary_psi.as_linear_map_ref();
let x_ls_i_map = dir_i.x_ls_psi.as_linear_map_ref();
let xmu_j_map = dir_j.x_primary_psi.as_linear_map_ref();
let x_ls_j_map = dir_j.x_ls_psi.as_linear_map_ref();
let xmu_ab_map = second_psi_linear_map(
second_drifts.x_primary_ab_action.as_ref(),
second_drifts.x_primary_ab.as_ref(),
n,
xmu.ncols(),
);
let x_ls_ab_map = second_psi_linear_map(
second_drifts.x_ls_ab_action.as_ref(),
second_drifts.x_ls_ab.as_ref(),
n,
x_ls.ncols(),
);
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let rows = self.get_or_compute_row_scalars(etamu, eta_ls)?;
let mut weights_i =
gaussian_joint_psi_firstweights(&rows, &dir_i.z_primary_psi, &dir_i.z_ls_psi);
let mut weights_j =
gaussian_joint_psi_firstweights(&rows, &dir_j.z_primary_psi, &dir_j.z_ls_psi);
let mut secondweights = gaussian_joint_psisecondweights(
&rows,
&dir_i.z_primary_psi,
&dir_i.z_ls_psi,
&dir_j.z_primary_psi,
&dir_j.z_ls_psi,
&second_drifts.z_primary_ab,
&second_drifts.z_ls_ab,
);
if let Some(sub_rows) = subsample {
apply_ht_mask_first(&mut weights_i, sub_rows);
apply_ht_mask_first(&mut weights_j, sub_rows);
apply_ht_mask_second(&mut secondweights, sub_rows);
}
let objective_psi_psi = secondweights.objective_psi_psirow.sum();
let score_psi_psi = gaussian_pack_joint_score(
&(xmu_ab_map.transpose_mul(weights_i.scoremu.view())
+ xmu_i_map.transpose_mul(weights_j.dscoremu.view())
+ xmu_j_map.transpose_mul(weights_i.dscoremu.view())
+ fast_atv(xmu, &secondweights.d2scoremu)),
&(x_ls_ab_map.transpose_mul(weights_i.score_ls.view())
+ x_ls_i_map.transpose_mul(weights_j.dscore_ls.view())
+ x_ls_j_map.transpose_mul(weights_i.dscore_ls.view())
+ fast_atv(x_ls, &secondweights.d2score_ls)),
);
let hessian_psi_psi = gaussian_joint_psisecondhessian_fromweights(
xmu,
x_ls,
xmu_i_map,
x_ls_i_map,
xmu_j_map,
x_ls_j_map,
xmu_ab_map,
x_ls_ab_map,
&weights_i,
&weights_j,
&secondweights,
)?;
Ok(crate::custom_family::ExactNewtonJointPsiSecondOrderTerms {
objective_psi_psi,
score_psi_psi,
hessian_psi_psi,
hessian_psi_psi_operator: None,
})
}
fn exact_newton_joint_psihessian_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
xmu,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
Ok(Some(
self.exact_newton_joint_psihessian_directional_derivative_from_parts(
block_states,
&dir_a,
d_beta_flat,
xmu,
x_ls,
None,
)?,
))
}
fn exact_newton_joint_psihessian_directional_derivative_from_parts(
&self,
block_states: &[ParameterBlockState],
dir_a: &LocationScaleJointPsiDirection,
d_beta_flat: &Array1<f64>,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
subsample: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<Array2<f64>, String> {
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let pmu = xmu.ncols();
let p_ls = x_ls.ncols();
let xmu_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
let total = pmu + p_ls;
if d_beta_flat.len() != total {
return Err(GamlssError::DimensionMismatch { reason: format!(
"GaussianLocationScaleFamily joint psi hessian directional derivative length mismatch: got {}, expected {}",
d_beta_flat.len(),
total
) }.into());
}
let u_ls = d_beta_flat.slice(s![pmu..pmu + p_ls]);
let xi_ls = fast_av(x_ls, &u_ls);
let uza_ls = x_ls_map.forward_mul(u_ls);
let rows = self.get_or_compute_row_scalars(etamu, eta_ls)?;
let mut mixedweights =
gaussian_joint_psi_mixed_driftweights(&rows, &xi_ls, &dir_a.z_ls_psi, &uza_ls);
if let Some(sub_rows) = subsample {
apply_ht_mask_mixed(&mut mixedweights, sub_rows);
}
gaussian_joint_psi_mixedhessian_drift_fromweights(
xmu,
x_ls,
xmu_map,
x_ls_map,
&mixedweights,
)
}
pub fn block_effective_jacobian(
specs: &[ParameterBlockSpec],
block_idx: usize,
) -> Result<Box<dyn BlockEffectiveJacobian>, String> {
crate::util::block_jacobian::AdditiveWiggleBlockLayout {
family: "GaussianLocationScaleFamily",
n_outputs: 2,
additive_blocks: &[Self::BLOCK_MU, Self::BLOCK_LOG_SIGMA],
wiggle_block: None,
}
.block_effective_jacobian(specs, block_idx)
}
}
pub struct GaussianLocationScaleChannelHessian {
h: ndarray::Array3<f64>,
}
impl GaussianLocationScaleChannelHessian {
pub fn from_pilot_observed_unclamped(
y: &ndarray::Array1<f64>,
w: &ndarray::Array1<f64>,
eta_mu: &ndarray::Array1<f64>,
eta_log_sigma: &ndarray::Array1<f64>,
) -> Result<Self, String> {
let n = y.len();
if w.len() != n || eta_mu.len() != n || eta_log_sigma.len() != n {
return Err(format!(
"GaussianLocationScaleChannelHessian::from_pilot_observed_unclamped: \
length mismatch y={n} w={} eta_mu={} eta_log_sigma={}",
w.len(),
eta_mu.len(),
eta_log_sigma.len(),
));
}
let mut h = ndarray::Array3::<f64>::zeros((n, 2, 2));
for i in 0..n {
let wi = w[i];
let mu_i = eta_mu[i];
let s_i = eta_log_sigma[i];
let inv_sigma2 = (-2.0 * s_i).exp();
let resid = y[i] - mu_i;
h[[i, 0, 0]] = wi * inv_sigma2;
h[[i, 1, 1]] = wi * 2.0 * resid * resid * inv_sigma2;
h[[i, 0, 1]] = wi * 2.0 * resid * inv_sigma2;
h[[i, 1, 0]] = h[[i, 0, 1]];
}
Ok(Self { h })
}
pub fn from_pilot(
y: &ndarray::Array1<f64>,
w: &ndarray::Array1<f64>,
eta_mu: &ndarray::Array1<f64>,
eta_log_sigma: &ndarray::Array1<f64>,
) -> Result<Self, String> {
let n = y.len();
if w.len() != n || eta_mu.len() != n || eta_log_sigma.len() != n {
return Err(format!(
"GaussianLocationScaleChannelHessian::from_pilot: \
length mismatch y={n} w={} eta_mu={} eta_log_sigma={}",
w.len(),
eta_mu.len(),
eta_log_sigma.len(),
));
}
let mut h = ndarray::Array3::<f64>::zeros((n, 2, 2));
for i in 0..n {
let wi = w[i];
let mu_i = eta_mu[i];
let s_i = eta_log_sigma[i];
let inv_sigma2 = (-2.0 * s_i).exp(); let resid = y[i] - mu_i;
let h00 = wi * inv_sigma2;
let h11 = wi * 2.0 * resid * resid * inv_sigma2;
let h01 = wi * 2.0 * resid * inv_sigma2;
let (e0, e1, u1_0, u1_1, u2_0, u2_1) = psd_clamp_2x2(h00, h01, h11);
h[[i, 0, 0]] = e0 * u1_0 * u1_0 + e1 * u2_0 * u2_0;
h[[i, 0, 1]] = e0 * u1_0 * u1_1 + e1 * u2_0 * u2_1;
h[[i, 1, 0]] = h[[i, 0, 1]];
h[[i, 1, 1]] = e0 * u1_1 * u1_1 + e1 * u2_1 * u2_1;
}
Ok(Self { h })
}
}
#[inline]
fn psd_clamp_2x2(a: f64, b: f64, d: f64) -> (f64, f64, f64, f64, f64, f64) {
let trace = a + d;
let det = a * d - b * b;
let disc = (trace * trace * 0.25 - det).max(0.0).sqrt();
let lam1 = (trace * 0.5 + disc).max(0.0); let lam2 = (trace * 0.5 - disc).max(0.0); let (u1_0, u1_1, u2_0, u2_1) = if b.abs() > 1e-15 * (a.abs() + d.abs()).max(1.0) {
let ex = lam1 - d;
let ey = b;
let norm = (ex * ex + ey * ey).sqrt().max(1e-300);
let (e0x, e0y) = (ex / norm, ey / norm);
let (e1x, e1y) = (-e0y, e0x);
(e0x, e0y, e1x, e1y)
} else if a >= d {
(1.0, 0.0, 0.0, 1.0)
} else {
(0.0, 1.0, 1.0, 0.0)
};
(lam1, lam2, u1_0, u1_1, u2_0, u2_1)
}
impl FamilyChannelHessian for GaussianLocationScaleChannelHessian {
fn n_outputs(&self) -> usize {
2
}
fn n_subjects(&self) -> usize {
self.h.shape()[0]
}
fn fill_subject(&self, i: usize, out: &mut [f64]) {
assert_eq!(out.len(), 4);
out[0] = self.h[[i, 0, 0]];
out[1] = self.h[[i, 0, 1]];
out[2] = self.h[[i, 1, 0]];
out[3] = self.h[[i, 1, 1]];
}
fn evaluate_full(&self) -> ndarray::Array3<f64> {
self.h.clone()
}
}
impl CustomFamily for GaussianLocationScaleFamily {
fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
true
}
fn output_channel_assignment(&self, specs: &[ParameterBlockSpec]) -> Option<Vec<usize>> {
Some(
(0..specs.len())
.map(|i| usize::from(i == Self::BLOCK_LOG_SIGMA))
.collect(),
)
}
fn coefficient_hessian_cost(&self, specs: &[ParameterBlockSpec]) -> u64 {
crate::families::location_scale_engine::location_scale_coefficient_hessian_cost(
self.y.len() as u64,
specs,
)
}
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_log_sigma = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if etamu.len() != n || eta_log_sigma.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let mut zmu = Array1::<f64>::zeros(n);
let mut wmu = Array1::<f64>::zeros(n);
let mut z_ls = Array1::<f64>::zeros(n);
let mut w_ls = Array1::<f64>::zeros(n);
let ln2pi = (2.0 * std::f64::consts::PI).ln();
let mut ll = 0.0;
const CHUNK: usize = 1024;
if let (
Some(y_s),
Some(w_s),
Some(mu_s),
Some(ls_s),
Some(zmu_s),
Some(wmu_s),
Some(zls_s),
Some(wls_s),
) = (
self.y.as_slice_memory_order(),
self.weights.as_slice_memory_order(),
etamu.as_slice_memory_order(),
eta_log_sigma.as_slice_memory_order(),
zmu.as_slice_memory_order_mut(),
wmu.as_slice_memory_order_mut(),
z_ls.as_slice_memory_order_mut(),
w_ls.as_slice_memory_order_mut(),
) {
ll += zmu_s
.par_chunks_mut(CHUNK)
.zip(wmu_s.par_chunks_mut(CHUNK))
.zip(zls_s.par_chunks_mut(CHUNK))
.zip(wls_s.par_chunks_mut(CHUNK))
.enumerate()
.map(|(chunk_idx, (((zmu_c, wmu_c), zls_c), wls_c))| {
let start = chunk_idx * CHUNK;
let mut local_ll = 0.0;
for local in 0..zmu_c.len() {
let i = start + local;
let row =
gaussian_diagonal_row_kernel(y_s[i], mu_s[i], ls_s[i], w_s[i], ln2pi);
zmu_c[local] = mu_s[i] + row.location_working_shift;
wmu_c[local] = row.location_working_weight;
zls_c[local] = row.log_sigma_working_response;
wls_c[local] = row.log_sigma_working_weight;
local_ll += row.log_likelihood;
}
local_ll
})
.sum::<f64>();
} else {
let y_view = self.y.view();
let w_view = self.weights.view();
let mu_view = etamu.view();
let ls_view = eta_log_sigma.view();
let zmu_s = zmu
.as_slice_memory_order_mut()
.expect("zeros is contiguous");
let wmu_s = wmu
.as_slice_memory_order_mut()
.expect("zeros is contiguous");
let zls_s = z_ls
.as_slice_memory_order_mut()
.expect("zeros is contiguous");
let wls_s = w_ls
.as_slice_memory_order_mut()
.expect("zeros is contiguous");
ll += zmu_s
.par_chunks_mut(CHUNK)
.zip(wmu_s.par_chunks_mut(CHUNK))
.zip(zls_s.par_chunks_mut(CHUNK))
.zip(wls_s.par_chunks_mut(CHUNK))
.enumerate()
.map(|(chunk_idx, (((zmu_c, wmu_c), zls_c), wls_c))| {
let start = chunk_idx * CHUNK;
let mut local_ll = 0.0;
for local in 0..zmu_c.len() {
let i = start + local;
let row = gaussian_diagonal_row_kernel(
y_view[i], mu_view[i], ls_view[i], w_view[i], ln2pi,
);
zmu_c[local] = mu_view[i] + row.location_working_shift;
wmu_c[local] = row.location_working_weight;
zls_c[local] = row.log_sigma_working_response;
wls_c[local] = row.log_sigma_working_weight;
local_ll += row.log_likelihood;
}
local_ll
})
.sum::<f64>();
}
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![
BlockWorkingSet::diagonal_checked(zmu, wmu)?,
BlockWorkingSet::diagonal_checked(z_ls, w_ls)?,
],
})
}
fn log_likelihood_only(&self, block_states: &[ParameterBlockState]) -> Result<f64, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_log_sigma = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if etamu.len() != n || eta_log_sigma.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let ln2pi = (2.0 * std::f64::consts::PI).ln();
let mut ll = 0.0;
if let (Some(y_s), Some(w_s), Some(mu_s), Some(ls_s)) = (
self.y.as_slice_memory_order(),
self.weights.as_slice_memory_order(),
etamu.as_slice_memory_order(),
eta_log_sigma.as_slice_memory_order(),
) {
use rayon::iter::{IntoParallelIterator, ParallelIterator};
ll += (0..n)
.into_par_iter()
.map(|i| {
let wi = w_s[i];
if wi == 0.0 {
return 0.0;
}
let sigma_i = logb_sigma_from_eta_scalar(ls_s[i]);
let inv_s2 = (sigma_i * sigma_i).recip();
let r = y_s[i] - mu_s[i];
wi * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()))
})
.sum::<f64>();
} else {
use rayon::iter::{IntoParallelIterator, ParallelIterator};
ll += (0..n)
.into_par_iter()
.map(|i| {
let wi = self.weights[i];
if wi == 0.0 {
return 0.0;
}
let sigma_i = logb_sigma_from_eta_scalar(eta_log_sigma[i]);
let inv_s2 = (sigma_i * sigma_i).recip();
let r = self.y[i] - etamu[i];
wi * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()))
})
.sum::<f64>();
}
Ok(ll)
}
fn log_likelihood_only_with_options(
&self,
block_states: &[ParameterBlockState],
options: &BlockwiseFitOptions,
) -> Result<f64, String> {
let Some(subsample) = options.outer_score_subsample.as_ref() else {
return self.log_likelihood_only(block_states);
};
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let etamu = &block_states[Self::BLOCK_MU].eta;
let eta_log_sigma = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if etamu.len() != n || eta_log_sigma.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let ln2pi = (2.0 * std::f64::consts::PI).ln();
use rayon::iter::ParallelIterator;
let ll: f64 = subsample
.rows
.par_iter()
.map(|row| {
let i = row.index;
let wi = self.weights[i];
if wi == 0.0 {
return 0.0;
}
let sigma_i = logb_sigma_from_eta_scalar(eta_log_sigma[i]);
let inv_s2 = (sigma_i * sigma_i).recip();
let r = self.y[i] - etamu[i];
row.weight * wi * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()))
})
.sum();
Ok(ll)
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_for_specs(block_states, None)
}
fn has_explicit_joint_hessian(&self) -> bool {
true
}
fn joint_jeffreys_term_required(&self) -> bool {
false
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_directional_derivative_for_specs(
block_states,
None,
d_beta_flat,
)
}
fn exact_newton_joint_hessiansecond_directional_derivative(
&self,
block_states: &[ParameterBlockState],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
block_states,
None,
d_beta_u_flat,
d_betav_flat,
)
}
fn diagonalworking_weights_directional_derivative(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
d_eta: &Array1<f64>,
) -> Result<Option<Array1<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n || eta_ls.len() != n || self.weights.len() != n || d_eta.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let sigma = eta_ls.mapv(logb_sigma_from_eta_scalar);
let mut dw = Array1::<f64>::zeros(n);
match block_idx {
Self::BLOCK_MU => {
Ok(Some(dw))
}
Self::BLOCK_LOG_SIGMA => {
use rayon::iter::{IntoParallelIterator, ParallelIterator};
let dw_vec: Vec<f64> = (0..n)
.into_par_iter()
.map(|i| {
let d1 = crate::families::sigma_link::logb_sigma_jet1_scalar(eta_ls[i]).d1;
gaussian_log_sigma_irlsinfo_directional_derivative(
self.weights[i],
sigma[i],
d1,
d_eta[i],
)
})
.collect();
for (i, v) in dw_vec.into_iter().enumerate() {
dw[i] = v;
}
Ok(Some(dw))
}
_ => Ok(None),
}
}
fn exact_newton_joint_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_for_specs(block_states, Some(specs))
}
fn exact_newton_joint_hessian_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_directional_derivative_for_specs(
block_states,
Some(specs),
d_beta_flat,
)
}
fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
block_states,
Some(specs),
d_beta_u_flat,
d_betav_flat,
)
}
fn exact_newton_joint_psi_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
self.exact_newton_joint_psi_terms_for_specs(
block_states,
specs,
derivative_blocks,
psi_index,
)
}
fn exact_newton_joint_psisecond_order_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
self.exact_newton_joint_psisecond_order_terms_for_specs(
block_states,
specs,
derivative_blocks,
psi_i,
psi_j,
)
}
fn exact_newton_joint_psihessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_psihessian_directional_derivative_for_specs(
block_states,
specs,
derivative_blocks,
psi_index,
d_beta_flat,
)
}
fn exact_newton_joint_psi_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
if block_states.len() != 2 || specs.len() != 2 || derivative_blocks.len() != 2 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"GaussianLocationScaleFamily joint psi workspace expects 2 states, 2 specs, and 2 derivative block lists, got {} / {} / {}",
block_states.len(),
specs.len(),
derivative_blocks.len()
) }.into());
}
Ok(Some(Arc::new(
GaussianLocationScaleExactNewtonJointPsiWorkspace::new(
self.clone(),
block_states.to_vec(),
specs,
derivative_blocks.to_vec(),
)?,
)))
}
fn exact_newton_joint_psi_workspace_with_options(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
options: &BlockwiseFitOptions,
) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
if block_states.len() != 2 || specs.len() != 2 || derivative_blocks.len() != 2 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"GaussianLocationScaleFamily joint psi workspace expects 2 states, 2 specs, and 2 derivative block lists, got {} / {} / {}",
block_states.len(),
specs.len(),
derivative_blocks.len()
) }.into());
}
Ok(Some(Arc::new(
GaussianLocationScaleExactNewtonJointPsiWorkspace::new_with_subsample(
self.clone(),
block_states.to_vec(),
specs,
derivative_blocks.to_vec(),
options.outer_score_subsample.clone(),
)?,
)))
}
fn exact_newton_joint_hessian_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
let workspace = GaussianLocationScaleHessianWorkspace::new(
self.clone(),
block_states.to_vec(),
xmu.into_owned(),
x_ls.into_owned(),
)?;
Ok(Some(Arc::new(workspace)))
}
fn exact_newton_joint_hessian_workspace_with_options(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
options: &BlockwiseFitOptions,
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
let mut workspace = GaussianLocationScaleHessianWorkspace::new(
self.clone(),
block_states.to_vec(),
xmu.into_owned(),
x_ls.into_owned(),
)?;
if let Some(subsample) = options.outer_score_subsample.as_ref() {
workspace.apply_outer_subsample(subsample.rows.as_ref());
}
Ok(Some(Arc::new(workspace)))
}
fn inner_coefficient_hessian_hvp_available(&self, specs: &[ParameterBlockSpec]) -> bool {
self.exact_joint_supported()
&& matches!(
self.exact_joint_dense_block_designs(Some(specs)),
Ok(Some(_))
)
}
fn outer_derivative_subsample_capable(&self) -> bool {
true
}
}
impl CustomFamilyGenerative for GaussianLocationScaleFamily {
fn generativespec(
&self,
block_states: &[ParameterBlockState],
) -> Result<GenerativeSpec, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let mu = block_states[Self::BLOCK_MU].eta.clone();
let eta_log_sigma = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let sigma = gamlss_rowwise_map(eta_log_sigma.len(), |i| {
logb_sigma_from_eta_scalar(eta_log_sigma[i])
});
Ok(GenerativeSpec {
mean: mu,
noise: NoiseModel::Gaussian { sigma },
})
}
}
struct RowCoeffChannel {
block: usize,
design: Arc<Array2<f64>>,
}
struct RowCoeffPair {
a: usize,
b: usize,
coeff: Array1<f64>,
}
struct RowCoeffScratch {
u: Vec<Array1<f64>>,
r: Vec<Array1<f64>>,
}
struct RowCoeffOperator {
channels: Vec<RowCoeffChannel>,
block_offsets: Vec<usize>,
block_widths: Vec<usize>,
dim: usize,
pair_coeffs: Vec<RowCoeffPair>,
nrows: usize,
scratch_pool: std::sync::Mutex<Vec<RowCoeffScratch>>,
}
impl RowCoeffOperator {
fn from_directions(
block_widths: Vec<usize>,
channels: Vec<(usize, Arc<Array2<f64>>)>,
pairs: Vec<(usize, usize, Array1<f64>)>,
nrows: usize,
) -> Self {
let channels: Vec<RowCoeffChannel> = channels
.into_iter()
.map(|(block, design)| RowCoeffChannel { block, design })
.collect();
let pair_coeffs: Vec<RowCoeffPair> = pairs
.into_iter()
.map(|(a, b, coeff)| RowCoeffPair { a, b, coeff })
.collect();
let mut block_offsets = Vec::with_capacity(block_widths.len());
let mut acc = 0;
for w in &block_widths {
block_offsets.push(acc);
acc += *w;
}
let n_ch = channels.len();
let initial = RowCoeffScratch {
u: (0..n_ch).map(|_| Array1::<f64>::zeros(nrows)).collect(),
r: (0..n_ch).map(|_| Array1::<f64>::zeros(nrows)).collect(),
};
Self {
channels,
block_offsets,
block_widths,
dim: acc,
pair_coeffs,
nrows,
scratch_pool: std::sync::Mutex::new(vec![initial]),
}
}
fn acquire_scratch(&self) -> RowCoeffScratch {
self.scratch_pool
.lock()
.expect("RowCoeffOperator scratch pool poisoned")
.pop()
.unwrap_or_else(|| {
let n_ch = self.channels.len();
RowCoeffScratch {
u: (0..n_ch)
.map(|_| Array1::<f64>::zeros(self.nrows))
.collect(),
r: (0..n_ch)
.map(|_| Array1::<f64>::zeros(self.nrows))
.collect(),
}
})
}
fn release_scratch(&self, scratch: RowCoeffScratch) {
self.scratch_pool
.lock()
.expect("RowCoeffOperator scratch pool poisoned")
.push(scratch);
}
fn projected_trace(&self, factor: &Array2<f64>) -> f64 {
let grams = self.projected_pair_gram_table(factor);
self.trace_from_pair_gram_table(grams.view())
}
fn projected_pair_gram_cache_id(&self) -> usize {
let mut hasher = DefaultHasher::new();
"RowCoeffOperator::projected_pair_gram_table".hash(&mut hasher);
self.nrows.hash(&mut hasher);
self.dim.hash(&mut hasher);
self.block_widths.hash(&mut hasher);
self.block_offsets.hash(&mut hasher);
self.channels.len().hash(&mut hasher);
self.pair_coeffs.len().hash(&mut hasher);
for (idx, ch) in self.channels.iter().enumerate() {
idx.hash(&mut hasher);
(Arc::as_ptr(&ch.design) as usize).hash(&mut hasher);
ch.block.hash(&mut hasher);
ch.design.nrows().hash(&mut hasher);
ch.design.ncols().hash(&mut hasher);
self.block_widths[ch.block].hash(&mut hasher);
}
for (idx, pair) in self.pair_coeffs.iter().enumerate() {
idx.hash(&mut hasher);
pair.a.hash(&mut hasher);
pair.b.hash(&mut hasher);
}
hasher.finish() as usize
}
fn projected_pair_gram_table(&self, factor: &Array2<f64>) -> Array2<f64> {
assert_eq!(
factor.nrows(),
self.dim,
"row-coefficient cached projected trace factor row mismatch: factor rows={} but dim={}",
factor.nrows(),
self.dim
);
let rank = factor.ncols();
let pair_count = self.pair_coeffs.len();
if self.nrows == 0 || rank == 0 || pair_count == 0 {
return Array2::<f64>::zeros((self.nrows, pair_count));
}
let rows_per_chunk =
gamlss_projected_trace_chunk_rows(rank, self.channels.len(), pair_count)
.min(self.nrows.max(1));
let mut grams = Array2::<f64>::zeros((self.nrows, pair_count));
let fill_chunk = |start: usize, mut out_chunk: ndarray::ArrayViewMut2<'_, f64>| {
let end = (start + rows_per_chunk).min(self.nrows);
let rows = start..end;
let mut projected: Vec<Array2<f64>> = Vec::with_capacity(self.channels.len());
for ch in &self.channels {
let block_start = self.block_offsets[ch.block];
let width = self.block_widths[ch.block];
let design_chunk = ch.design.slice(s![rows.clone(), ..]);
let factor_block = factor.slice(s![block_start..block_start + width, ..]);
projected.push(fast_ab(&design_chunk, &factor_block));
}
for (pair_idx, pair) in self.pair_coeffs.iter().enumerate() {
let u_a = &projected[pair.a];
let u_b = &projected[pair.b];
for local_i in 0..u_a.nrows() {
let mut value = 0.0;
for col in 0..rank {
value += u_a[[local_i, col]] * u_b[[local_i, col]];
}
out_chunk[[local_i, pair_idx]] = value;
}
}
};
if rayon::current_thread_index().is_none() && self.nrows > rows_per_chunk {
grams
.axis_chunks_iter_mut(Axis(0), rows_per_chunk)
.into_par_iter()
.enumerate()
.for_each(|(chunk_idx, out_chunk)| {
fill_chunk(chunk_idx * rows_per_chunk, out_chunk)
});
} else {
for start in (0..self.nrows).step_by(rows_per_chunk) {
let end = (start + rows_per_chunk).min(self.nrows);
let out_chunk = grams.slice_mut(s![start..end, ..]);
fill_chunk(start, out_chunk);
}
}
grams
}
fn trace_from_pair_gram_table(&self, grams: ArrayView2<'_, f64>) -> f64 {
assert_eq!(grams.nrows(), self.nrows);
assert_eq!(grams.ncols(), self.pair_coeffs.len());
let mut trace = 0.0;
for i in 0..self.nrows {
for (pair_idx, pair) in self.pair_coeffs.iter().enumerate() {
let multiplier = if pair.a == pair.b { 1.0 } else { 2.0 };
trace += multiplier * pair.coeff[i] * grams[[i, pair_idx]];
}
}
trace
}
}
impl crate::solver::estimate::reml::unified::HyperOperator for RowCoeffOperator {
fn dim(&self) -> usize {
self.dim
}
fn mul_vec(&self, v: &Array1<f64>) -> Array1<f64> {
assert_eq!(v.len(), self.dim);
let mut scratch = self.acquire_scratch();
let RowCoeffScratch { u, r } = &mut scratch;
for (k, ch) in self.channels.iter().enumerate() {
let start = self.block_offsets[ch.block];
let width = self.block_widths[ch.block];
assert_eq!(ch.design.ncols(), width);
let v_slice = v.slice(s![start..start + width]);
crate::faer_ndarray::fast_av_into(ch.design.as_ref(), &v_slice, &mut u[k]);
}
for slot in r.iter_mut() {
slot.fill(0.0);
}
for pair in &self.pair_coeffs {
let a = pair.a;
let b = pair.b;
let coeff = pair
.coeff
.as_slice()
.expect("RowCoeffOperator pair coeff must be contiguous");
if a == b {
let u_a = u[a]
.as_slice()
.expect("RowCoeffOperator u must be contiguous");
let r_a = r[a]
.as_slice_mut()
.expect("RowCoeffOperator r must be contiguous");
use rayon::prelude::*;
r_a.par_iter_mut()
.zip(coeff.par_iter())
.zip(u_a.par_iter())
.for_each(|((r, c), u)| *r += c * u);
} else {
let (r_a_slice, r_b_slice) = if a < b {
let (left, right) = r.split_at_mut(b);
(
left[a].as_slice_mut().expect("contiguous"),
right[0].as_slice_mut().expect("contiguous"),
)
} else {
let (left, right) = r.split_at_mut(a);
(
right[0].as_slice_mut().expect("contiguous"),
left[b].as_slice_mut().expect("contiguous"),
)
};
let u_a = u[a].as_slice().expect("contiguous");
let u_b = u[b].as_slice().expect("contiguous");
use rayon::prelude::*;
r_a_slice
.par_iter_mut()
.zip(r_b_slice.par_iter_mut())
.zip(coeff.par_iter())
.zip(u_a.par_iter())
.zip(u_b.par_iter())
.for_each(|((((ra, rb), c), ua), ub)| {
*ra += c * ub;
*rb += c * ua;
});
}
}
let mut out = Array1::<f64>::zeros(self.dim);
for (k, ch) in self.channels.iter().enumerate() {
let start = self.block_offsets[ch.block];
let width = self.block_widths[ch.block];
let mut block = out.slice_mut(s![start..start + width]);
let contrib = fast_atv(ch.design.as_ref(), &r[k]);
block += &contrib;
}
self.release_scratch(scratch);
out
}
fn mul_basis_columns_into(&self, start: usize, mut out: ndarray::ArrayViewMut2<'_, f64>) {
let cols = out.ncols();
assert!(start + cols <= self.dim);
let mut basis = Array1::<f64>::zeros(self.dim);
for local_col in 0..cols {
let global_col = start + local_col;
basis[global_col] = 1.0;
let col = self.mul_vec(&basis);
out.column_mut(local_col).assign(&col);
basis[global_col] = 0.0;
}
}
fn to_dense(&self) -> Array2<f64> {
let mut out = Array2::<f64>::zeros((self.dim, self.dim));
self.mul_basis_columns_into(0, out.view_mut());
out
}
fn trace_projected_factor(&self, factor: &Array2<f64>) -> f64 {
self.projected_trace(factor)
}
fn trace_projected_factor_cached(
&self,
factor: &Array2<f64>,
cache: &crate::solver::estimate::reml::unified::ProjectedFactorCache,
) -> f64 {
let key = crate::solver::estimate::reml::unified::ProjectedFactorKey::from_factor_view(
self.projected_pair_gram_cache_id(),
factor.view(),
);
let grams = cache.get_or_insert_with(key, || self.projected_pair_gram_table(factor));
self.trace_from_pair_gram_table(grams.view())
}
fn is_implicit(&self) -> bool {
true
}
}
struct DesignTwoBlockRowCoeffOperator {
x_a: DesignMatrix,
x_b: DesignMatrix,
c_aa: Arc<Array1<f64>>,
c_ab: Arc<Array1<f64>>,
c_bb: Arc<Array1<f64>>,
dim: usize,
nrows: usize,
pa: usize,
}
impl crate::solver::estimate::reml::unified::HyperOperator for DesignTwoBlockRowCoeffOperator {
fn dim(&self) -> usize {
self.dim
}
fn mul_vec(&self, v: &Array1<f64>) -> Array1<f64> {
assert_eq!(v.len(), self.dim);
let v_a = v.slice(s![0..self.pa]);
let v_b = v.slice(s![self.pa..self.dim]);
let u_a = self.x_a.matrixvectormultiply(&v_a.to_owned());
let u_b = self.x_b.matrixvectormultiply(&v_b.to_owned());
assert_eq!(u_a.len(), self.nrows);
assert_eq!(u_b.len(), self.nrows);
let r_a = self.c_aa.as_ref() * &u_a + self.c_ab.as_ref() * &u_b;
let r_b = self.c_ab.as_ref() * &u_a + self.c_bb.as_ref() * &u_b;
let out_a = self.x_a.transpose_vector_multiply(&r_a);
let out_b = self.x_b.transpose_vector_multiply(&r_b);
let mut out = Array1::<f64>::zeros(self.dim);
out.slice_mut(s![0..self.pa]).assign(&out_a);
out.slice_mut(s![self.pa..self.dim]).assign(&out_b);
out
}
fn mul_basis_columns_into(&self, start: usize, mut out: ndarray::ArrayViewMut2<'_, f64>) {
let cols = out.ncols();
assert!(start + cols <= self.dim);
let mut basis = Array1::<f64>::zeros(self.dim);
for local_col in 0..cols {
let global_col = start + local_col;
basis[global_col] = 1.0;
let col = self.mul_vec(&basis);
out.column_mut(local_col).assign(&col);
basis[global_col] = 0.0;
}
}
fn to_dense(&self) -> Array2<f64> {
let mut out = Array2::<f64>::zeros((self.dim, self.dim));
self.mul_basis_columns_into(0, out.view_mut());
out
}
fn trace_projected_factor(&self, factor: &Array2<f64>) -> f64 {
let grams = self.projected_row_gram_triples(factor);
self.trace_from_row_gram_triples(grams.view())
}
fn trace_projected_factor_cached(
&self,
factor: &Array2<f64>,
cache: &crate::solver::estimate::reml::unified::ProjectedFactorCache,
) -> f64 {
assert_eq!(
factor.nrows(),
self.dim,
"two-block cached projected trace factor row mismatch: factor rows={} \
but joint p={} (pa={}, pb={})",
factor.nrows(),
self.dim,
self.pa,
self.dim - self.pa,
);
let key = crate::solver::estimate::reml::unified::ProjectedFactorKey::from_factor_view(
self.projected_row_gram_cache_id(),
factor.view(),
);
let grams = cache.get_or_insert_with(key, || self.projected_row_gram_triples(factor));
self.trace_from_row_gram_triples(grams.view())
}
fn is_implicit(&self) -> bool {
true
}
}
impl DesignTwoBlockRowCoeffOperator {
fn design_cache_token(design: &DesignMatrix) -> usize {
match design {
DesignMatrix::Dense(DenseDesignMatrix::Materialized(matrix)) => {
Arc::as_ptr(matrix) as usize
}
DesignMatrix::Dense(DenseDesignMatrix::Lazy(op)) => {
Arc::as_ptr(op) as *const () as usize
}
DesignMatrix::Sparse(sparse) => sparse as *const _ as usize,
}
}
fn projected_row_gram_cache_id(&self) -> usize {
let mut hasher = DefaultHasher::new();
"DesignTwoBlockRowCoeffOperator::projected_row_gram_triples".hash(&mut hasher);
Self::design_cache_token(&self.x_a).hash(&mut hasher);
Self::design_cache_token(&self.x_b).hash(&mut hasher);
self.nrows.hash(&mut hasher);
self.pa.hash(&mut hasher);
self.dim.hash(&mut hasher);
hasher.finish() as usize
}
fn projected_row_gram_triples(&self, factor: &Array2<f64>) -> Array2<f64> {
assert_eq!(
factor.nrows(),
self.dim,
"two-block cached projected trace factor row mismatch: factor rows={} \
but joint p={} (pa={}, pb={})",
factor.nrows(),
self.dim,
self.pa,
self.dim - self.pa,
);
let rank = factor.ncols();
let mut grams = Array2::<f64>::zeros((self.nrows, 3));
if self.nrows == 0 || rank == 0 {
return grams;
}
let rows_per_chunk = gamlss_projected_trace_chunk_rows(rank, 2, 3).min(self.nrows.max(1));
let f_a = factor.slice(s![0..self.pa, ..]);
let f_b = factor.slice(s![self.pa..self.dim, ..]);
let fill_chunk = |start: usize, mut out_chunk: ndarray::ArrayViewMut2<'_, f64>| {
let end = (start + rows_per_chunk).min(self.nrows);
let rows = start..end;
let x_a_chunk = self
.x_a
.try_row_chunk(rows.clone())
.expect("two-block projected trace x_a row chunk materialization failed");
let x_b_chunk = self
.x_b
.try_row_chunk(rows.clone())
.expect("two-block projected trace x_b row chunk materialization failed");
let u_a = fast_ab(&x_a_chunk, &f_a);
let u_b = fast_ab(&x_b_chunk, &f_b);
for local_i in 0..u_a.nrows() {
let mut aa = 0.0;
let mut ab = 0.0;
let mut bb = 0.0;
for col in 0..rank {
let a = u_a[[local_i, col]];
let b = u_b[[local_i, col]];
aa += a * a;
ab += a * b;
bb += b * b;
}
out_chunk[[local_i, 0]] = aa;
out_chunk[[local_i, 1]] = ab;
out_chunk[[local_i, 2]] = bb;
}
};
if rayon::current_thread_index().is_none() && self.nrows > rows_per_chunk {
grams
.axis_chunks_iter_mut(Axis(0), rows_per_chunk)
.into_par_iter()
.enumerate()
.for_each(|(chunk_idx, out_chunk)| {
fill_chunk(chunk_idx * rows_per_chunk, out_chunk)
});
} else {
for start in (0..self.nrows).step_by(rows_per_chunk) {
let end = (start + rows_per_chunk).min(self.nrows);
let out_chunk = grams.slice_mut(s![start..end, ..]);
fill_chunk(start, out_chunk);
}
}
grams
}
fn trace_from_row_gram_triples(&self, grams: ArrayView2<'_, f64>) -> f64 {
assert_eq!(grams.nrows(), self.nrows);
assert_eq!(grams.ncols(), 3);
let c_aa = self
.c_aa
.as_slice()
.expect("c_aa is constructed contiguous");
let c_ab = self
.c_ab
.as_slice()
.expect("c_ab is constructed contiguous");
let c_bb = self
.c_bb
.as_slice()
.expect("c_bb is constructed contiguous");
let mut trace = 0.0;
for i in 0..self.nrows {
trace +=
c_aa[i] * grams[[i, 0]] + 2.0 * c_ab[i] * grams[[i, 1]] + c_bb[i] * grams[[i, 2]];
}
trace
}
}
struct GaussianLocationScaleHessianWorkspace {
family: GaussianLocationScaleFamily,
block_states: Vec<ParameterBlockState>,
xmu: Arc<Array2<f64>>,
x_ls: Arc<Array2<f64>>,
coeff_mm: Array1<f64>,
coeff_ml: Array1<f64>,
coeff_ll: Array1<f64>,
}
impl GaussianLocationScaleHessianWorkspace {
fn new(
family: GaussianLocationScaleFamily,
block_states: Vec<ParameterBlockState>,
xmu: Array2<f64>,
x_ls: Array2<f64>,
) -> Result<Self, String> {
let etamu = &block_states[GaussianLocationScaleFamily::BLOCK_MU].eta;
let eta_ls = &block_states[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA].eta;
let rows = family.get_or_compute_row_scalars(etamu, eta_ls)?;
let (coeff_mm, coeff_ml, coeff_ll) = gaussian_locscale_fisher_joint_row_coeffs(&rows);
Ok(Self {
family,
block_states,
xmu: Arc::new(xmu),
x_ls: Arc::new(x_ls),
coeff_mm,
coeff_ml,
coeff_ll,
})
}
fn apply_outer_subsample(
&mut self,
rows: &[crate::families::marginal_slope_shared::WeightedOuterRow],
) {
let n = self.coeff_mm.len();
let mut mask_mm = Array1::<f64>::zeros(n);
let mut mask_ml = Array1::<f64>::zeros(n);
let mut mask_ll = Array1::<f64>::zeros(n);
for r in rows {
let i = r.index;
mask_mm[i] = self.coeff_mm[i] * r.weight;
mask_ml[i] = self.coeff_ml[i] * r.weight;
mask_ll[i] = self.coeff_ll[i] * r.weight;
}
self.coeff_mm = mask_mm;
self.coeff_ml = mask_ml;
self.coeff_ll = mask_ll;
}
}
impl ExactNewtonJointHessianWorkspace for GaussianLocationScaleHessianWorkspace {
fn hessian_dense(&self) -> Result<Option<Array2<f64>>, String> {
let pmu = self.xmu.ncols();
let p_ls = self.x_ls.ncols();
let total = pmu + p_ls;
let h_mm = xt_diag_x_dense(self.xmu.as_ref(), &self.coeff_mm)?;
let h_ml = xt_diag_y_dense(self.xmu.as_ref(), &self.coeff_ml, self.x_ls.as_ref())?;
let h_ll = xt_diag_x_dense(self.x_ls.as_ref(), &self.coeff_ll)?;
let mut h = Array2::<f64>::zeros((total, total));
h.slice_mut(s![0..pmu, 0..pmu]).assign(&h_mm);
h.slice_mut(s![0..pmu, pmu..total]).assign(&h_ml);
h.slice_mut(s![pmu..total, pmu..total]).assign(&h_ll);
mirror_upper_to_lower(&mut h);
Ok(Some(h))
}
fn hessian_matvec_available(&self) -> bool {
true
}
fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
let pmu = self.xmu.ncols();
let p_ls = self.x_ls.ncols();
let total = pmu + p_ls;
if v.len() != total {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScale matvec dimension mismatch: got {}, expected {}",
v.len(),
total
),
}
.into());
}
let u_mu = fast_av(self.xmu.as_ref(), &v.slice(s![0..pmu]));
let u_ls = fast_av(self.x_ls.as_ref(), &v.slice(s![pmu..total]));
let r_mu = &self.coeff_mm * &u_mu + &self.coeff_ml * &u_ls;
let r_ls = &self.coeff_ml * &u_mu + &self.coeff_ll * &u_ls;
let out_mu = fast_atv(self.xmu.as_ref(), &r_mu);
let out_ls = fast_atv(self.x_ls.as_ref(), &r_ls);
let mut out = Array1::<f64>::zeros(total);
out.slice_mut(s![0..pmu]).assign(&out_mu);
out.slice_mut(s![pmu..total]).assign(&out_ls);
Ok(Some(out))
}
fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
use rayon::iter::{IntoParallelIterator, ParallelIterator};
let pmu = self.xmu.ncols();
let p_ls = self.x_ls.ncols();
let total = pmu + p_ls;
let diag_mu: Vec<f64> = (0..pmu)
.into_par_iter()
.map(|j| {
let col = self.xmu.column(j);
col.iter()
.zip(self.coeff_mm.iter())
.map(|(&v, &c)| c * v * v)
.sum()
})
.collect();
let diag_ls: Vec<f64> = (0..p_ls)
.into_par_iter()
.map(|j| {
let col = self.x_ls.column(j);
col.iter()
.zip(self.coeff_ll.iter())
.map(|(&v, &c)| c * v * v)
.sum()
})
.collect();
let mut diag = Array1::<f64>::zeros(total);
for (j, v) in diag_mu.into_iter().enumerate() {
diag[j] = v;
}
for (j, v) in diag_ls.into_iter().enumerate() {
diag[pmu + j] = v;
}
Ok(Some(diag))
}
fn directional_derivative(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.family
.exact_newton_joint_hessian_directional_derivative_from_designs(
&self.block_states,
&DenseOrOperator::Borrowed(self.xmu.as_ref()),
&DenseOrOperator::Borrowed(self.x_ls.as_ref()),
d_beta_flat,
)
}
fn directional_derivative_operator(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
let n = self.xmu.nrows();
let pmu = self.xmu.ncols();
let pls = self.x_ls.ncols();
let total = pmu + pls;
if d_beta_flat.len() != total {
return Err(GamlssError::InvalidInput {
reason: format!(
"GaussianLocationScale dH operator: d_beta length {} != {}",
d_beta_flat.len(),
total
),
}
.into());
}
let etamu = &self.block_states[GaussianLocationScaleFamily::BLOCK_MU].eta;
let eta_ls = &self.block_states[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA].eta;
let rows = self.family.get_or_compute_row_scalars(etamu, eta_ls)?;
let ximu = fast_av(self.xmu.as_ref(), &d_beta_flat.slice(s![0..pmu]));
let xi_ls = fast_av(self.x_ls.as_ref(), &d_beta_flat.slice(s![pmu..total]));
let directional = gaussian_joint_first_directionalweights(&rows, &ximu, &xi_ls);
let c_mm = directional.0;
let c_ll = directional.2;
let c_ml = Array1::<f64>::zeros(c_mm.len());
Ok(Some(Arc::new(make_two_block_row_coeff_operator(
self.xmu.clone(),
self.x_ls.clone(),
c_mm,
c_ml,
c_ll,
n,
))))
}
fn second_directional_derivative(
&self,
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.family
.exact_newton_joint_hessiansecond_directional_derivative_from_designs(
&self.block_states,
&DenseOrOperator::Borrowed(self.xmu.as_ref()),
&DenseOrOperator::Borrowed(self.x_ls.as_ref()),
d_beta_u_flat,
d_beta_v_flat,
)
}
fn second_directional_derivative_operator(
&self,
d_beta_u: &Array1<f64>,
d_beta_v: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
let n = self.xmu.nrows();
let pmu = self.xmu.ncols();
let pls = self.x_ls.ncols();
let total = pmu + pls;
if d_beta_u.len() != total || d_beta_v.len() != total {
return Err(GamlssError::InvalidInput {
reason: format!(
"GaussianLocationScale d2H operator: d_beta_{{u,v}} length {}/{} != {}",
d_beta_u.len(),
d_beta_v.len(),
total
),
}
.into());
}
let etamu = &self.block_states[GaussianLocationScaleFamily::BLOCK_MU].eta;
let eta_ls = &self.block_states[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA].eta;
let rows = self.family.get_or_compute_row_scalars(etamu, eta_ls)?;
let ximu_u = fast_av(self.xmu.as_ref(), &d_beta_u.slice(s![0..pmu]));
let xi_ls_u = fast_av(self.x_ls.as_ref(), &d_beta_u.slice(s![pmu..total]));
let ximu_v = fast_av(self.xmu.as_ref(), &d_beta_v.slice(s![0..pmu]));
let xi_ls_v = fast_av(self.x_ls.as_ref(), &d_beta_v.slice(s![pmu..total]));
let directional =
gaussian_jointsecond_directionalweights(&rows, &ximu_u, &xi_ls_u, &ximu_v, &xi_ls_v);
let c_mm = directional.0;
let c_ll = directional.2;
let c_ml = Array1::<f64>::zeros(c_mm.len());
Ok(Some(Arc::new(make_two_block_row_coeff_operator(
self.xmu.clone(),
self.x_ls.clone(),
c_mm,
c_ml,
c_ll,
n,
))))
}
}
fn make_two_block_row_coeff_operator(
x_a: Arc<Array2<f64>>,
x_b: Arc<Array2<f64>>,
c_aa: Array1<f64>,
c_ab: Array1<f64>,
c_bb: Array1<f64>,
nrows: usize,
) -> RowCoeffOperator {
let pa = x_a.ncols();
let pb = x_b.ncols();
RowCoeffOperator::from_directions(
vec![pa, pb],
vec![(0, x_a), (1, x_b)],
vec![(0, 0, c_aa), (0, 1, c_ab), (1, 1, c_bb)],
nrows,
)
}
fn make_two_block_design_row_coeff_operator(
x_a: DesignMatrix,
x_b: DesignMatrix,
c_aa: Arc<Array1<f64>>,
c_ab: Arc<Array1<f64>>,
c_bb: Arc<Array1<f64>>,
) -> Result<DesignTwoBlockRowCoeffOperator, String> {
let nrows = x_a.nrows();
if x_b.nrows() != nrows || c_aa.len() != nrows || c_ab.len() != nrows || c_bb.len() != nrows {
return Err(GamlssError::DimensionMismatch { reason: format!(
"two-block row coefficient operator dimension mismatch: rows a={}, b={}, coeffs={}/{}/{}",
nrows,
x_b.nrows(),
c_aa.len(),
c_ab.len(),
c_bb.len()
) }.into());
}
let pa = x_a.ncols();
let pb = x_b.ncols();
Ok(DesignTwoBlockRowCoeffOperator {
x_a,
x_b,
c_aa,
c_ab,
c_bb,
dim: pa + pb,
nrows,
pa,
})
}
struct GaussianLocationScaleWiggleGeometry {
basis: Array2<f64>,
basis_d1: Array2<f64>,
basis_d2: Array2<f64>,
basis_d3: Array2<f64>,
dq_dq0: Array1<f64>,
d2q_dq02: Array1<f64>,
d3q_dq03: Array1<f64>,
d4q_dq04: Array1<f64>,
}
struct GaussianLocationScaleWiggleHessianRowPieces {
coeff_mm: Array1<f64>,
coeff_ml: Array1<f64>,
coeff_ll: Array1<f64>,
coeff_mw_b: Array1<f64>,
coeff_mw_d: Array1<f64>,
coeff_lw_b: Array1<f64>,
coeff_ww: Array1<f64>,
basis: Array2<f64>,
basis_d1: Array2<f64>,
}
impl GaussianLocationScaleWiggleHessianRowPieces {
fn assemble_dense(&self, xmu: &Array2<f64>, x_ls: &Array2<f64>) -> Result<Array2<f64>, String> {
let h_mm = xt_diag_x_dense(xmu, &self.coeff_mm)?;
let h_ml = xt_diag_y_dense(xmu, &self.coeff_ml, x_ls)?;
let h_ll = xt_diag_x_dense(x_ls, &self.coeff_ll)?;
let h_mw = xt_diag_y_dense(xmu, &self.coeff_mw_b, &self.basis)?
+ &xt_diag_y_dense(xmu, &self.coeff_mw_d, &self.basis_d1)?;
let h_lw = xt_diag_y_dense(x_ls, &self.coeff_lw_b, &self.basis)?;
let h_ww = xt_diag_x_dense(&self.basis, &self.coeff_ww)?;
Ok(gaussian_pack_wiggle_joint_symmetrichessian(
&h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
))
}
}
fn scale_matrix_rows(mat: &Array2<f64>, coeffs: &Array1<f64>) -> Result<Array2<f64>, String> {
if mat.nrows() != coeffs.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"row scaling dimension mismatch: matrix has {} rows but coeffs have {} entries",
mat.nrows(),
coeffs.len()
),
}
.into());
}
Ok(Array2::from_shape_fn(mat.dim(), |(i, j)| {
mat[[i, j]] * coeffs[i]
}))
}
fn gaussian_pack_wiggle_joint_score(
score_mu: &Array1<f64>,
score_ls: &Array1<f64>,
score_w: &Array1<f64>,
) -> Array1<f64> {
let pmu = score_mu.len();
let p_ls = score_ls.len();
let pw = score_w.len();
let total = pmu + p_ls + pw;
let mut out = Array1::<f64>::zeros(total);
out.slice_mut(s![0..pmu]).assign(score_mu);
out.slice_mut(s![pmu..pmu + p_ls]).assign(score_ls);
out.slice_mut(s![pmu + p_ls..total]).assign(score_w);
out
}
fn gaussian_pack_wiggle_joint_symmetrichessian(
h_mm: &Array2<f64>,
h_ml: &Array2<f64>,
h_mw: &Array2<f64>,
h_ll: &Array2<f64>,
h_lw: &Array2<f64>,
h_ww: &Array2<f64>,
) -> Array2<f64> {
let pmu = h_mm.nrows();
let p_ls = h_ll.nrows();
let pw = h_ww.nrows();
let total = pmu + p_ls + pw;
let mut out = Array2::<f64>::zeros((total, total));
out.slice_mut(s![0..pmu, 0..pmu]).assign(h_mm);
out.slice_mut(s![0..pmu, pmu..pmu + p_ls]).assign(h_ml);
out.slice_mut(s![0..pmu, pmu + p_ls..total]).assign(h_mw);
out.slice_mut(s![pmu..pmu + p_ls, pmu..pmu + p_ls])
.assign(h_ll);
out.slice_mut(s![pmu..pmu + p_ls, pmu + p_ls..total])
.assign(h_lw);
out.slice_mut(s![pmu + p_ls..total, pmu + p_ls..total])
.assign(h_ww);
mirror_upper_to_lower(&mut out);
out
}
pub struct GaussianLocationScaleWiggleFamily {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub mu_design: Option<DesignMatrix>,
pub log_sigma_design: Option<DesignMatrix>,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
pub policy: crate::resource::ResourcePolicy,
cached_row_scalars:
std::sync::RwLock<Option<(f64, f64, f64, f64, f64, f64, Arc<GaussianJointRowScalars>)>>,
}
impl Clone for GaussianLocationScaleWiggleFamily {
fn clone(&self) -> Self {
Self {
y: self.y.clone(),
weights: self.weights.clone(),
mu_design: self.mu_design.clone(),
log_sigma_design: self.log_sigma_design.clone(),
wiggle_knots: self.wiggle_knots.clone(),
wiggle_degree: self.wiggle_degree,
policy: self.policy.clone(),
cached_row_scalars: std::sync::RwLock::new(
self.cached_row_scalars
.read()
.expect("lock poisoned")
.clone(),
),
}
}
}
impl GaussianLocationScaleWiggleFamily {
pub const BLOCK_MU: usize = 0;
pub const BLOCK_LOG_SIGMA: usize = 1;
pub const BLOCK_WIGGLE: usize = 2;
pub fn parameternames() -> &'static [&'static str] {
&["mu", "log_sigma", "wiggle"]
}
pub fn parameter_links() -> &'static [ParameterLink] {
&[
ParameterLink::Identity,
ParameterLink::Log,
ParameterLink::Wiggle,
]
}
pub fn metadata() -> FamilyMetadata {
FamilyMetadata {
name: "gaussian_location_scalewiggle",
parameternames: Self::parameternames(),
parameter_links: Self::parameter_links(),
}
}
fn exact_joint_supported(&self) -> bool {
self.mu_design.is_some() && self.log_sigma_design.is_some()
}
fn wiggle_basiswith_options(
&self,
q0: ArrayView1<'_, f64>,
options: BasisOptions,
) -> Result<Array2<f64>, String> {
monotone_wiggle_basis_with_derivative_order(
q0,
&self.wiggle_knots,
self.wiggle_degree,
options.derivative_order,
)
}
fn wiggle_design(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
self.wiggle_basiswith_options(q0, BasisOptions::value())
}
fn wiggle_dq_dq0(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d1 = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
if d1.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
d1.ncols(),
beta_link_wiggle.len()
) }.into());
}
Ok(d1.dot(&beta_link_wiggle) + 1.0)
}
fn wiggle_d2q_dq02(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
if d2.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle second-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
d2.ncols(),
beta_link_wiggle.len()
) }.into());
}
Ok(d2.dot(&beta_link_wiggle))
}
fn wiggle_d3basis_constrained(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
monotone_wiggle_basis_with_derivative_order(q0, &self.wiggle_knots, self.wiggle_degree, 3)
}
fn wiggle_d3q_dq03(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d3 = self.wiggle_d3basis_constrained(q0)?;
if d3.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle third-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
d3.ncols(),
beta_link_wiggle.len()
) }.into());
}
Ok(d3.dot(&beta_link_wiggle))
}
fn wiggle_d4q_dq04(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d4 = monotone_wiggle_basis_with_derivative_order(
q0,
&self.wiggle_knots,
self.wiggle_degree,
4,
)?;
if d4.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle fourth-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
d4.ncols(),
beta_link_wiggle.len()
) }.into());
}
Ok(d4.dot(&beta_link_wiggle))
}
fn wiggle_geometry(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<GaussianLocationScaleWiggleGeometry, String> {
let basis = self.wiggle_design(q0)?;
let basis_d1 = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
let basis_d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
let basis_d3 = self.wiggle_d3basis_constrained(q0)?;
let dq_dq0 = self.wiggle_dq_dq0(q0, beta_link_wiggle)?;
let d2q_dq02 = self.wiggle_d2q_dq02(q0, beta_link_wiggle)?;
let d3q_dq03 = self.wiggle_d3q_dq03(q0, beta_link_wiggle)?;
let d4q_dq04 = self.wiggle_d4q_dq04(q0, beta_link_wiggle)?;
Ok(GaussianLocationScaleWiggleGeometry {
basis,
basis_d1,
basis_d2,
basis_d3,
dq_dq0,
d2q_dq02,
d3q_dq03,
d4q_dq04,
})
}
fn get_or_compute_row_scalars(
&self,
q: &Array1<f64>,
eta_ls: &Array1<f64>,
) -> Result<Arc<GaussianJointRowScalars>, String> {
Ok(Arc::new(gaussian_jointrow_scalars(
&self.y,
q,
eta_ls,
&self.weights,
)?))
}
fn dense_block_designs(&self) -> Result<(Cow<'_, Array2<f64>>, Cow<'_, Array2<f64>>), String> {
dense_locscale_block_designs_cached(
self.mu_design.as_ref(),
self.log_sigma_design.as_ref(),
"GaussianLocationScaleWiggleFamily",
"GaussianLocationScaleWiggle",
"mu",
&self.policy.material_policy(),
)
}
fn dense_block_designs_fromspecs<'a>(
&self,
specs: &'a [ParameterBlockSpec],
) -> Result<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>), String> {
dense_locscale_block_designs_fromspecs(
specs,
3,
"GaussianLocationScaleWiggleFamily",
"GaussianLocationScaleWiggle",
Self::BLOCK_MU,
Self::BLOCK_LOG_SIGMA,
"mu",
&self.policy.material_policy(),
)
}
fn exact_joint_dense_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
if self.exact_joint_supported() {
return self.dense_block_designs().map(Some);
}
if let Some(specs) = specs {
return self.dense_block_designs_fromspecs(specs).map(Some);
}
Ok(None)
}
pub fn block_effective_jacobian(
specs: &[ParameterBlockSpec],
block_idx: usize,
) -> Result<Box<dyn BlockEffectiveJacobian>, String> {
crate::util::block_jacobian::AdditiveWiggleBlockLayout {
family: "GaussianLocationScaleWiggleFamily",
n_outputs: 2,
additive_blocks: &[Self::BLOCK_MU, Self::BLOCK_LOG_SIGMA],
wiggle_block: Some(Self::BLOCK_WIGGLE),
}
.block_effective_jacobian(specs, block_idx)
}
}
struct GlsWiggleSecondDirCoeffs {
coeff_mm_uv: Array1<f64>,
coeff_ml_uv: Array1<f64>,
coeff_ll_uv: Array1<f64>,
a_u: Array1<f64>,
a_v: Array1<f64>,
a_uv: Array1<f64>,
c_u: Array1<f64>,
c_v: Array1<f64>,
c_uv: Array1<f64>,
l_u: Array1<f64>,
l_v: Array1<f64>,
l_uv: Array1<f64>,
dw_u: Array1<f64>,
dw_v: Array1<f64>,
dw_uv: Array1<f64>,
}
struct GlsWiggleDirPieces<'a> {
zeta_u: &'a Array1<f64>,
zeta_v: &'a Array1<f64>,
q_u: &'a Array1<f64>,
q_v: &'a Array1<f64>,
q_uv: &'a Array1<f64>,
s1_u: &'a Array1<f64>,
s1_v: &'a Array1<f64>,
s1_uv: &'a Array1<f64>,
g2_u: &'a Array1<f64>,
g2_v: &'a Array1<f64>,
g2_uv: &'a Array1<f64>,
}
fn gls_wiggle_second_directional_coeffs(
rows: &GaussianJointRowScalars,
geom: &GaussianLocationScaleWiggleGeometry,
dir: &GlsWiggleDirPieces<'_>,
) -> GlsWiggleSecondDirCoeffs {
let GlsWiggleDirPieces {
zeta_u,
zeta_v,
q_u,
q_v,
q_uv,
s1_u,
s1_v,
s1_uv,
g2_u,
g2_v,
g2_uv,
} = *dir;
let szeta_u = &rows.kappa * zeta_u;
let szeta_v = &rows.kappa * zeta_v;
let zeta_u_zeta_v = zeta_u * zeta_v;
let dw_u = -2.0 * &rows.w * &szeta_u;
let dw_v = -2.0 * &rows.w * &szeta_v;
let dw_uv =
4.0 * &rows.w * &(&szeta_u * &szeta_v) - 2.0 * &rows.w * &rows.kappa_prime * &zeta_u_zeta_v;
let dm_u = -(&rows.w * q_u) - &(2.0 * &rows.m * &szeta_u);
let dm_v = -(&rows.w * q_v) - &(2.0 * &rows.m * &szeta_v);
let dm_uv = &(2.0 * &rows.w * &(q_u * &szeta_v + q_v * &szeta_u)) - &(&rows.w * q_uv)
+ &(4.0 * &rows.m * &(&szeta_u * &szeta_v))
- 2.0 * &rows.m * &rows.kappa_prime * &zeta_u_zeta_v;
let coeff_mm_uv = &(&dw_uv * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &dw_u * &geom.dq_dq0 * s1_v)
+ &(2.0 * &dw_v * &geom.dq_dq0 * s1_u)
+ &(2.0 * &rows.w * s1_u * s1_v)
+ &(2.0 * &rows.w * &geom.dq_dq0 * s1_uv)
- &(&dm_uv * &geom.d2q_dq02)
- &(&dm_u * g2_v)
- &(&dm_v * g2_u)
- &(&rows.m * g2_uv);
let n = rows.m.len();
let coeff_ml_uv = Array1::<f64>::zeros(n);
let coeff_ll_uv = 4.0
* &rows.obs_weight
* &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
* &zeta_u_zeta_v;
let a_u = &dw_u * &geom.dq_dq0 + &rows.w * s1_u;
let a_v = &dw_v * &geom.dq_dq0 + &rows.w * s1_v;
let a_uv = &dw_uv * &geom.dq_dq0 + &dw_u * s1_v + &dw_v * s1_u + &rows.w * s1_uv;
let c_u = -&dm_u;
let c_v = -&dm_v;
let c_uv = -&dm_uv;
let l_u = Array1::<f64>::zeros(n);
let l_v = Array1::<f64>::zeros(n);
let l_uv = Array1::<f64>::zeros(n);
GlsWiggleSecondDirCoeffs {
coeff_mm_uv,
coeff_ml_uv,
coeff_ll_uv,
a_u,
a_v,
a_uv,
c_u,
c_v,
c_uv,
l_u,
l_v,
l_uv,
dw_u,
dw_v,
dw_uv,
}
}
impl GaussianLocationScaleWiggleFamily {
fn exact_newton_joint_hessian_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
) -> Result<Option<Array2<f64>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessian_from_designs(block_states, &xmu, &x_ls)
}
fn exact_newton_joint_hessian_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessian_directional_derivative_from_designs(
block_states,
&xmu,
&x_ls,
d_beta_flat,
)
}
fn exact_newton_joint_hessian_second_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessiansecond_directional_derivative_from_designs(
block_states,
&xmu,
&x_ls,
d_beta_u_flat,
d_beta_v_flat,
)
}
fn exact_newton_joint_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<LocationScaleJointPsiDirection>, String> {
let Some(parts) = locscale_joint_psi_direction_parts(
block_states,
derivative_blocks,
psi_index,
self.y.len(),
xmu.ncols(),
x_ls.ncols(),
Self::BLOCK_MU,
Self::BLOCK_LOG_SIGMA,
3,
"GaussianLocationScaleWiggleFamily",
"mu",
policy,
)?
else {
return Ok(None);
};
Ok(Some(LocationScaleJointPsiDirection {
block_idx: parts.block_idx,
local_idx: parts.local_idx,
z_primary_psi: parts.primary_z,
z_ls_psi: parts.log_sigma_z,
x_primary_psi: parts.primary_psi,
x_ls_psi: parts.log_sigma_psi,
}))
}
fn exact_newton_joint_psisecond_design_drifts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &LocationScaleJointPsiDirection,
psi_b: &LocationScaleJointPsiDirection,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<LocationScaleJointPsiSecondDrifts, String> {
locscale_joint_psisecond_design_drifts(
block_states,
derivative_blocks,
psi_a,
psi_b,
LocScalePsiDriftConfig {
n: self.y.len(),
p_primary: xmu.ncols(),
p_log_sigma: x_ls.ncols(),
primary_block_idx: Self::BLOCK_MU,
log_sigma_block_idx: Self::BLOCK_LOG_SIGMA,
family_name: "GaussianLocationScaleWiggleFamily",
primary_label: "mu",
policy: &self.policy,
},
)
}
fn wiggle_hessian_row_pieces(
&self,
block_states: &[ParameterBlockState],
) -> Result<GaussianLocationScaleWiggleHessianRowPieces, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let q0 = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let q = q0 + etaw;
let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
if geom.basis.ncols() != betaw.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"GaussianLocationScaleWiggleFamily wiggle basis/beta mismatch: basis has {} columns but beta has {} entries",
geom.basis.ncols(),
betaw.len()
) }.into());
}
let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
let coeff_ml = Array1::<f64>::zeros(n);
let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
let coeff_mw_b = &rows.w * &geom.dq_dq0;
let coeff_mw_d = -&rows.m;
let coeff_lw_b = Array1::<f64>::zeros(n);
let coeff_ww = rows.w.clone();
Ok(GaussianLocationScaleWiggleHessianRowPieces {
coeff_mm,
coeff_ml,
coeff_ll,
coeff_mw_b,
coeff_mw_d,
coeff_lw_b,
coeff_ww,
basis: geom.basis,
basis_d1: geom.basis_d1,
})
}
fn exact_newton_joint_hessian_from_designs(
&self,
block_states: &[ParameterBlockState],
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<Array2<f64>>, String> {
let pieces = self.wiggle_hessian_row_pieces(block_states)?;
Ok(Some(pieces.assemble_dense(xmu, x_ls)?))
}
fn exact_newton_joint_hessian_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let pmu = xmu.ncols();
let p_ls = x_ls.ncols();
let q0 = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
let (umu, u_ls, uw) = layout.split_three(
d_beta_flat,
"GaussianLocationScaleWiggleFamily exact joint directional Hessian",
)?;
if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let q = q0 + etaw;
let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
let xi = fast_av(xmu, &umu);
let zeta = fast_av(x_ls, &u_ls);
let szeta = &rows.kappa * ζ
let phi = fast_av(&geom.basis, &uw);
let mut q_u = &geom.dq_dq0 * ξ
q_u += φ
let mut s1_u = &geom.d2q_dq02 * ξ
s1_u += &fast_av(&geom.basis_d1, &uw);
let mut g2_u = &geom.d3q_dq03 * ξ
g2_u += &fast_av(&geom.basis_d2, &uw);
let basis_u = scale_matrix_rows(&geom.basis_d1, &xi)?;
let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi)?;
let dw_u = -2.0 * &rows.w * &szeta;
let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &szeta);
let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
- &(&dm_u * &geom.d2q_dq02)
- &(&rows.m * &g2_u);
let coeff_ml_u = Array1::<f64>::zeros(n);
let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &(&zeta * &rows.obs_weight);
let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
let c_u = -&dm_u;
let l_u = Array1::<f64>::zeros(n);
let zeros_ls_b1 = Array1::<f64>::zeros(n);
let h_mm = xt_diag_x_dense(xmu, &coeff_mm_u)?;
let h_ml = xt_diag_y_dense(xmu, &coeff_ml_u, x_ls)?;
let h_ll = xt_diag_x_dense(x_ls, &coeff_ll_u)?;
let h_mw = xt_diag_y_dense(xmu, &a_u, &geom.basis)?
+ &xt_diag_y_dense(xmu, &(&rows.w * &geom.dq_dq0), &basis_u)?
+ &xt_diag_y_dense(xmu, &c_u, &geom.basis_d1)?
+ &xt_diag_y_dense(xmu, &(-&rows.m), &basis1_u)?;
let h_lw = xt_diag_y_dense(x_ls, &l_u, &geom.basis)?
+ &xt_diag_y_dense(x_ls, &zeros_ls_b1, &basis_u)?;
let a_ww = xt_diag_y_dense(&basis_u, &rows.w, &geom.basis)?;
let h_ww = &a_ww + &a_ww.t() + &xt_diag_x_dense(&geom.basis, &dw_u)?;
Ok(Some(gaussian_pack_wiggle_joint_symmetrichessian(
&h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
)))
}
fn gls_wiggle_directional_operator(
&self,
block_states: &[ParameterBlockState],
xmu_arc: Arc<Array2<f64>>,
x_ls_arc: Arc<Array2<f64>>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let pmu = xmu_arc.ncols();
let p_ls = x_ls_arc.ncols();
let q0_eta = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
let (umu, u_ls, uw) =
layout.split_three(d_beta_flat, "GLS Wiggle joint dH operator d_beta")?;
if q0_eta.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let q = q0_eta + etaw;
let geom = self.wiggle_geometry(q0_eta.view(), betaw.view())?;
let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
let xi = fast_av(xmu_arc.as_ref(), &umu);
let zeta = fast_av(x_ls_arc.as_ref(), &u_ls);
let szeta = &rows.kappa * ζ
let phi = fast_av(&geom.basis, &uw);
let mut q_u = &geom.dq_dq0 * ξ
q_u += φ
let mut s1_u = &geom.d2q_dq02 * ξ
s1_u += &fast_av(&geom.basis_d1, &uw);
let mut g2_u = &geom.d3q_dq03 * ξ
g2_u += &fast_av(&geom.basis_d2, &uw);
let dw_u = -2.0 * &rows.w * &szeta;
let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &szeta);
let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
- &(&dm_u * &geom.d2q_dq02)
- &(&rows.m * &g2_u);
let coeff_ml_u = Array1::<f64>::zeros(n);
let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &(&zeta * &rows.obs_weight);
let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
let c_u = -&dm_u;
let l_u = Array1::<f64>::zeros(n);
let coeff_m_b1 = &(&rows.w * &geom.dq_dq0 * &xi) + &c_u;
let coeff_m_b2 = -(&rows.m * &xi);
let coeff_ls_b1 = Array1::<f64>::zeros(n);
let coeff_b_b1 = &rows.w * ξ
let basis: Arc<Array2<f64>> = Arc::new(geom.basis.clone());
let basis_d1: Arc<Array2<f64>> = Arc::new(geom.basis_d1.clone());
let basis_d2: Arc<Array2<f64>> = Arc::new(geom.basis_d2.clone());
let pw = basis.ncols();
Ok(Some(Arc::new(RowCoeffOperator::from_directions(
vec![pmu, p_ls, pw],
vec![
(0, xmu_arc),
(1, x_ls_arc),
(2, basis),
(2, basis_d1),
(2, basis_d2),
],
vec![
(0, 0, coeff_mm_u),
(0, 1, coeff_ml_u),
(1, 1, coeff_ll_u),
(0, 2, a_u),
(0, 3, coeff_m_b1),
(0, 4, coeff_m_b2),
(1, 2, l_u),
(1, 3, coeff_ls_b1),
(2, 2, dw_u),
(2, 3, coeff_b_b1),
],
n,
))))
}
fn gls_wiggle_second_directional_operator(
&self,
block_states: &[ParameterBlockState],
xmu_arc: Arc<Array2<f64>>,
x_ls_arc: Arc<Array2<f64>>,
d_beta_u: &Array1<f64>,
d_beta_v: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let pmu = xmu_arc.ncols();
let p_ls = x_ls_arc.ncols();
let q0_eta = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
let (umu, u_ls, uw) = layout.split_three(d_beta_u, "GLS Wiggle d2H operator (u)")?;
let (vmu, v_ls, vw) = layout.split_three(d_beta_v, "GLS Wiggle d2H operator (v)")?;
if q0_eta.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let q = q0_eta + etaw;
let geom = self.wiggle_geometry(q0_eta.view(), betaw.view())?;
let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
let xi_u = fast_av(xmu_arc.as_ref(), &umu);
let xi_v = fast_av(xmu_arc.as_ref(), &vmu);
let zeta_u = fast_av(x_ls_arc.as_ref(), &u_ls);
let zeta_v = fast_av(x_ls_arc.as_ref(), &v_ls);
let phi_u = fast_av(&geom.basis, &uw);
let phi_v = fast_av(&geom.basis, &vw);
let b1u = fast_av(&geom.basis_d1, &uw);
let b1v = fast_av(&geom.basis_d1, &vw);
let b2u = fast_av(&geom.basis_d2, &uw);
let b2v = fast_av(&geom.basis_d2, &vw);
let b3u = fast_av(&geom.basis_d3, &uw);
let b3v = fast_av(&geom.basis_d3, &vw);
let mut q_u = &geom.dq_dq0 * &xi_u;
q_u += &phi_u;
let mut q_v = &geom.dq_dq0 * &xi_v;
q_v += &phi_v;
let mut s1_u = &geom.d2q_dq02 * &xi_u;
s1_u += &b1u;
let mut s1_v = &geom.d2q_dq02 * &xi_v;
s1_v += &b1v;
let mut g2_u = &geom.d3q_dq03 * &xi_u;
g2_u += &b2u;
let mut g2_v = &geom.d3q_dq03 * &xi_v;
g2_v += &b2v;
let q_uv = &(&geom.d2q_dq02 * &(&xi_u * &xi_v)) + &(&b1u * &xi_v) + &(&b1v * &xi_u);
let s1_uv = &(&geom.d3q_dq03 * &(&xi_u * &xi_v)) + &(&b2u * &xi_v) + &(&b2v * &xi_u);
let g2_uv = &(&geom.d4q_dq04 * &(&xi_u * &xi_v)) + &(&b3u * &xi_v) + &(&b3v * &xi_u);
let GlsWiggleSecondDirCoeffs {
coeff_mm_uv,
coeff_ml_uv,
coeff_ll_uv,
a_u,
a_v,
a_uv,
c_u,
c_v,
c_uv,
l_u,
l_v,
l_uv,
dw_u,
dw_v,
dw_uv,
} = gls_wiggle_second_directional_coeffs(
&rows,
&geom,
&GlsWiggleDirPieces {
zeta_u: &zeta_u,
zeta_v: &zeta_v,
q_u: &q_u,
q_v: &q_v,
q_uv: &q_uv,
s1_u: &s1_u,
s1_v: &s1_v,
s1_uv: &s1_uv,
g2_u: &g2_u,
g2_v: &g2_v,
g2_uv: &g2_uv,
},
);
let xi_u_xi_v = &xi_u * &xi_v;
let coeff_m_b1 = &(&a_u * &xi_v) + &(&a_v * &xi_u) + &c_uv;
let coeff_m_b2 = &(&rows.w * &geom.dq_dq0 * &xi_u_xi_v) + &(&c_u * &xi_v) + &(&c_v * &xi_u);
let coeff_m_b3 = -(&rows.m * &xi_u_xi_v);
let coeff_ls_b1 = &(&l_u * &xi_v) + &(&l_v * &xi_u);
let coeff_ls_b2 = Array1::<f64>::zeros(n);
let coeff_b_b1 = &(&dw_u * &xi_v) + &(&dw_v * &xi_u);
let coeff_b_b2 = &rows.w * &xi_u_xi_v;
let coeff_b1_b1 = 2.0 * &(&rows.w * &xi_u_xi_v);
let basis: Arc<Array2<f64>> = Arc::new(geom.basis.clone());
let basis_d1: Arc<Array2<f64>> = Arc::new(geom.basis_d1.clone());
let basis_d2: Arc<Array2<f64>> = Arc::new(geom.basis_d2.clone());
let basis_d3: Arc<Array2<f64>> = Arc::new(geom.basis_d3.clone());
let pw = basis.ncols();
Ok(Some(Arc::new(RowCoeffOperator::from_directions(
vec![pmu, p_ls, pw],
vec![
(0, xmu_arc),
(1, x_ls_arc),
(2, basis),
(2, basis_d1),
(2, basis_d2),
(2, basis_d3),
],
vec![
(0, 0, coeff_mm_uv),
(0, 1, coeff_ml_uv),
(1, 1, coeff_ll_uv),
(0, 2, a_uv),
(0, 3, coeff_m_b1),
(0, 4, coeff_m_b2),
(0, 5, coeff_m_b3),
(1, 2, l_uv),
(1, 3, coeff_ls_b1),
(1, 4, coeff_ls_b2),
(2, 2, dw_uv),
(2, 3, coeff_b_b1),
(2, 4, coeff_b_b2),
(3, 3, coeff_b1_b1),
],
n,
))))
}
fn exact_newton_joint_hessiansecond_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let pmu = xmu.ncols();
let p_ls = x_ls.ncols();
let q0 = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
let (umu, u_ls, uw) = layout.split_three(
d_beta_u_flat,
"GaussianLocationScaleWiggleFamily exact joint second directional Hessian (u)",
)?;
let (vmu, v_ls, vw) = layout.split_three(
d_beta_v_flat,
"GaussianLocationScaleWiggleFamily exact joint second directional Hessian (v)",
)?;
if q0.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let q = q0 + etaw;
let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
let xi_u = fast_av(xmu, &umu);
let xi_v = fast_av(xmu, &vmu);
let zeta_u = fast_av(x_ls, &u_ls);
let zeta_v = fast_av(x_ls, &v_ls);
let phi_u = fast_av(&geom.basis, &uw);
let phi_v = fast_av(&geom.basis, &vw);
let b1u = fast_av(&geom.basis_d1, &uw);
let b1v = fast_av(&geom.basis_d1, &vw);
let b2u = fast_av(&geom.basis_d2, &uw);
let b2v = fast_av(&geom.basis_d2, &vw);
let b3u = fast_av(&geom.basis_d3, &uw);
let b3v = fast_av(&geom.basis_d3, &vw);
let mut q_u = &geom.dq_dq0 * &xi_u;
q_u += &phi_u;
let mut q_v = &geom.dq_dq0 * &xi_v;
q_v += &phi_v;
let mut s1_u = &geom.d2q_dq02 * &xi_u;
s1_u += &b1u;
let mut s1_v = &geom.d2q_dq02 * &xi_v;
s1_v += &b1v;
let mut g2_u = &geom.d3q_dq03 * &xi_u;
g2_u += &b2u;
let mut g2_v = &geom.d3q_dq03 * &xi_v;
g2_v += &b2v;
let q_uv = &(&geom.d2q_dq02 * &(&xi_u * &xi_v)) + &(&b1u * &xi_v) + &(&b1v * &xi_u);
let s1_uv = &(&geom.d3q_dq03 * &(&xi_u * &xi_v)) + &(&b2u * &xi_v) + &(&b2v * &xi_u);
let g2_uv = &(&geom.d4q_dq04 * &(&xi_u * &xi_v)) + &(&b3u * &xi_v) + &(&b3v * &xi_u);
let basis_u = scale_matrix_rows(&geom.basis_d1, &xi_u)?;
let basis_v = scale_matrix_rows(&geom.basis_d1, &xi_v)?;
let basis_uv = scale_matrix_rows(&geom.basis_d2, &(&xi_u * &xi_v))?;
let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi_u)?;
let basis1_v = scale_matrix_rows(&geom.basis_d2, &xi_v)?;
let basis1_uv = scale_matrix_rows(&geom.basis_d3, &(&xi_u * &xi_v))?;
let GlsWiggleSecondDirCoeffs {
coeff_mm_uv,
coeff_ml_uv,
coeff_ll_uv,
a_u,
a_v,
a_uv,
c_u,
c_v,
c_uv,
l_u,
l_v,
l_uv,
dw_u,
dw_v,
dw_uv,
} = gls_wiggle_second_directional_coeffs(
&rows,
&geom,
&GlsWiggleDirPieces {
zeta_u: &zeta_u,
zeta_v: &zeta_v,
q_u: &q_u,
q_v: &q_v,
q_uv: &q_uv,
s1_u: &s1_u,
s1_v: &s1_v,
s1_uv: &s1_uv,
g2_u: &g2_u,
g2_v: &g2_v,
g2_uv: &g2_uv,
},
);
let h_mm = xt_diag_x_dense(xmu, &coeff_mm_uv)?;
let h_ml = xt_diag_y_dense(xmu, &coeff_ml_uv, x_ls)?;
let h_ll = xt_diag_x_dense(x_ls, &coeff_ll_uv)?;
let h_mw = xt_diag_y_dense(xmu, &a_uv, &geom.basis)?
+ &xt_diag_y_dense(xmu, &a_u, &basis_v)?
+ &xt_diag_y_dense(xmu, &a_v, &basis_u)?
+ &xt_diag_y_dense(xmu, &(&rows.w * &geom.dq_dq0), &basis_uv)?
+ &xt_diag_y_dense(xmu, &c_uv, &geom.basis_d1)?
+ &xt_diag_y_dense(xmu, &c_u, &basis1_v)?
+ &xt_diag_y_dense(xmu, &c_v, &basis1_u)?
+ &xt_diag_y_dense(xmu, &(-&rows.m), &basis1_uv)?;
let zeros_ls_b2 = Array1::<f64>::zeros(n);
let h_lw = xt_diag_y_dense(x_ls, &l_uv, &geom.basis)?
+ &xt_diag_y_dense(x_ls, &l_u, &basis_v)?
+ &xt_diag_y_dense(x_ls, &l_v, &basis_u)?
+ &xt_diag_y_dense(x_ls, &zeros_ls_b2, &basis_uv)?;
let a_ab = xt_diag_y_dense(&basis_uv, &rows.w, &geom.basis)?;
let a_ij = xt_diag_y_dense(&basis_u, &rows.w, &basis_v)?;
let a_iwj = xt_diag_y_dense(&basis_u, &dw_v, &geom.basis)?;
let a_jwi = xt_diag_y_dense(&basis_v, &dw_u, &geom.basis)?;
let h_ww = &a_ab
+ &a_ab.t()
+ &a_ij
+ a_ij.t()
+ &a_iwj
+ a_iwj.t()
+ &a_jwi
+ a_jwi.t()
+ &xt_diag_x_dense(&geom.basis, &dw_uv)?;
Ok(Some(gaussian_pack_wiggle_joint_symmetrichessian(
&h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
)))
}
fn exact_newton_joint_psi_terms_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
xmu,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
let q0 = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let q = q0 + etaw;
let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
let xmu_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
let e_a = &dir_a.z_ls_psi;
let amn = &rows.obs_weight - &rows.n;
let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
let dn_a = -(2.0 * &rows.m * &q_a) - &(2.0 * &rows.n * &rows.kappa * e_a);
let s_mu = -&rows.m * &geom.dq_dq0;
let s_mu_a = -(&dm_a * &geom.dq_dq0) - &(&rows.m * &s1_a);
let s_ls = &rows.kappa * &amn;
let s_ls_a = &rows.kappa_prime * &(e_a * &amn) - &rows.kappa * &dn_a;
let s_w = -&rows.m;
let s_w_a = -&dm_a;
let objective_psi = (-&rows.m * &q_a + &s_ls * e_a).sum();
let score_psi = gaussian_pack_wiggle_joint_score(
&(xmu_map.transpose_mul(s_mu.view()) + fast_atv(xmu, &s_mu_a)),
&(x_ls_map.transpose_mul(s_ls.view()) + fast_atv(x_ls, &s_ls_a)),
&(fast_atv(&basis_a, &s_w) + fast_atv(&geom.basis, &s_w_a)),
);
let n = rows.m.len();
let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
let coeff_mm_a = &(&dw_a * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a)
- &(&dm_a * &geom.d2q_dq02)
- &(&rows.m * &g2_a);
let coeff_ml = Array1::<f64>::zeros(n);
let coeff_ml_a = Array1::<f64>::zeros(n);
let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
let coeff_ll_a = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_a;
let a = &rows.w * &geom.dq_dq0;
let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
let c = -&rows.m;
let c_a = -&dm_a;
let l = Array1::<f64>::zeros(n);
let l_a = Array1::<f64>::zeros(n);
let h_mm_a1 = weighted_crossprod_psi_maps(
xmu_map,
coeff_mm.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let h_mm = &h_mm_a1 + &h_mm_a1.t() + &xt_diag_x_dense(xmu, &coeff_mm_a)?;
let h_ml = weighted_crossprod_psi_maps(
xmu_map,
coeff_ml.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
coeff_ml.view(),
x_ls_map,
)? + &xt_diag_y_dense(xmu, &coeff_ml_a, x_ls)?;
let h_ll_a1 = weighted_crossprod_psi_maps(
x_ls_map,
coeff_ll.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let h_ll = &h_ll_a1 + &h_ll_a1.t() + &xt_diag_x_dense(x_ls, &coeff_ll_a)?;
let h_mw = weighted_crossprod_psi_maps(
xmu_map,
a.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &xt_diag_y_dense(xmu, &a_a, &geom.basis)?
+ &xt_diag_y_dense(xmu, &a, &basis_a)?
+ &weighted_crossprod_psi_maps(
xmu_map,
c.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
)?
+ &xt_diag_y_dense(xmu, &c_a, &geom.basis_d1)?
+ &xt_diag_y_dense(xmu, &c, &basis1_a)?;
let h_lw = weighted_crossprod_psi_maps(
x_ls_map,
l.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &xt_diag_y_dense(x_ls, &l_a, &geom.basis)?
+ &xt_diag_y_dense(x_ls, &l, &basis_a)?;
let h_ww_a1 = xt_diag_y_dense(&basis_a, &rows.w, &geom.basis)?;
let h_ww = &h_ww_a1 + &h_ww_a1.t() + &xt_diag_x_dense(&geom.basis, &dw_a)?;
Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
objective_psi,
score_psi,
hessian_psi: gaussian_pack_wiggle_joint_symmetrichessian(
&h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
),
hessian_psi_operator: None,
}))
}
fn exact_newton_joint_psisecond_order_terms_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_i,
xmu,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
let Some(dir_b) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_j,
xmu,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
Ok(Some(
self.exact_newton_joint_psisecond_order_terms_from_parts(
block_states,
derivative_blocks,
&dir_a,
&dir_b,
xmu,
x_ls,
)?,
))
}
fn exact_newton_joint_psisecond_order_terms_from_parts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
dir_a: &LocationScaleJointPsiDirection,
dir_b: &LocationScaleJointPsiDirection,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms, String> {
let second_drifts = self.exact_newton_joint_psisecond_design_drifts(
block_states,
derivative_blocks,
dir_a,
dir_b,
xmu,
x_ls,
)?;
let n = self.y.len();
let xmu_a_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_a_map = dir_a.x_ls_psi.as_linear_map_ref();
let xmu_b_map = dir_b.x_primary_psi.as_linear_map_ref();
let x_ls_b_map = dir_b.x_ls_psi.as_linear_map_ref();
let xmu_ab_map = second_psi_linear_map(
second_drifts.x_primary_ab_action.as_ref(),
second_drifts.x_primary_ab.as_ref(),
n,
xmu.ncols(),
);
let x_ls_ab_map = second_psi_linear_map(
second_drifts.x_ls_ab_action.as_ref(),
second_drifts.x_ls_ab.as_ref(),
n,
x_ls.ncols(),
);
let q0 = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let q = q0 + etaw;
let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
let q_b = &geom.dq_dq0 * &dir_b.z_primary_psi;
let q_ab = &(&geom.dq_dq0 * &second_drifts.z_primary_ab)
+ &(&geom.d2q_dq02 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi));
let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
let s1_b = &geom.d2q_dq02 * &dir_b.z_primary_psi;
let s1_ab = &(&geom.d3q_dq03 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi))
+ &(&geom.d2q_dq02 * &second_drifts.z_primary_ab);
let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
let g2_b = &geom.d3q_dq03 * &dir_b.z_primary_psi;
let g2_ab = &(&geom.d4q_dq04 * &(&dir_a.z_primary_psi * &dir_b.z_primary_psi))
+ &(&geom.d3q_dq03 * &second_drifts.z_primary_ab);
let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
let basis_b = scale_matrix_rows(&geom.basis_d1, &dir_b.z_primary_psi)?;
let basis_ab = scale_matrix_rows(&geom.basis_d1, &second_drifts.z_primary_ab)?
+ &scale_matrix_rows(
&geom.basis_d2,
&(&dir_a.z_primary_psi * &dir_b.z_primary_psi),
)?;
let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
let basis1_b = scale_matrix_rows(&geom.basis_d2, &dir_b.z_primary_psi)?;
let basis1_ab = scale_matrix_rows(&geom.basis_d2, &second_drifts.z_primary_ab)?
+ &scale_matrix_rows(
&geom.basis_d3,
&(&dir_a.z_primary_psi * &dir_b.z_primary_psi),
)?;
let e_a = &dir_a.z_ls_psi;
let e_b = &dir_b.z_ls_psi;
let e_ab = &second_drifts.z_ls_ab;
let amn = &rows.obs_weight - &rows.n;
let four_k2_minus_2kpi = 4.0 * &rows.kappa * &rows.kappa - 2.0 * &rows.kappa_prime;
let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
let dw_b = -2.0 * &rows.w * &rows.kappa * e_b;
let dw_ab =
&four_k2_minus_2kpi * &rows.w * &(e_a * e_b) - &(2.0 * &rows.w * &rows.kappa * e_ab);
let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
let dm_b = -(&rows.w * &q_b) - &(2.0 * &rows.m * &rows.kappa * e_b);
let dm_ab = &(2.0 * &rows.w * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
- &(&rows.w * &q_ab)
+ &(&four_k2_minus_2kpi * &rows.m * &(e_a * e_b))
- &(2.0 * &rows.m * &rows.kappa * e_ab);
let dn_a = -(2.0 * &rows.m * &q_a) - &(2.0 * &rows.n * &rows.kappa * e_a);
let dn_b = -(2.0 * &rows.m * &q_b) - &(2.0 * &rows.n * &rows.kappa * e_b);
let dn_ab = &(2.0 * &rows.w * &(&q_a * &q_b))
+ &(4.0 * &rows.m * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
- &(2.0 * &rows.m * &q_ab)
+ &(&four_k2_minus_2kpi * &rows.n * &(e_a * e_b))
- &(2.0 * &rows.n * &rows.kappa * e_ab);
let s_mu = -&rows.m * &geom.dq_dq0;
let s_mu_a = -(&dm_a * &geom.dq_dq0) - &(&rows.m * &s1_a);
let s_mu_b = -(&dm_b * &geom.dq_dq0) - &(&rows.m * &s1_b);
let s_mu_ab =
-(&dm_ab * &geom.dq_dq0) - &(&dm_a * &s1_b) - &(&dm_b * &s1_a) - &(&rows.m * &s1_ab);
let s_ls = &rows.kappa * &amn;
let s_ls_a = &rows.kappa_prime * &(e_a * &amn) - &rows.kappa * &dn_a;
let s_ls_b = &rows.kappa_prime * &(e_b * &amn) - &rows.kappa * &dn_b;
let s_ls_ab = &rows.kappa_dprime * &(e_a * e_b) * &amn + &rows.kappa_prime * e_ab * &amn
- &rows.kappa_prime * &(e_a * &dn_b + e_b * &dn_a)
- &rows.kappa * &dn_ab;
let s_w = -&rows.m;
let s_w_a = -&dm_a;
let s_w_b = -&dm_b;
let s_w_ab = -&dm_ab;
let objective_psi_psi = (&rows.w * &(&q_a * &q_b)
+ &(2.0 * &rows.m * &rows.kappa * &(&q_a * e_b + &q_b * e_a))
+ &((2.0 * &rows.kappa * &rows.kappa * &rows.n + &rows.kappa_prime * &amn)
* &(e_a * e_b))
- &(&rows.m * &q_ab)
+ &(&rows.kappa * &amn * e_ab))
.sum();
let score_psi_psi = gaussian_pack_wiggle_joint_score(
&(xmu_ab_map.transpose_mul(s_mu.view())
+ xmu_a_map.transpose_mul(s_mu_b.view())
+ xmu_b_map.transpose_mul(s_mu_a.view())
+ fast_atv(xmu, &s_mu_ab)),
&(x_ls_ab_map.transpose_mul(s_ls.view())
+ x_ls_a_map.transpose_mul(s_ls_b.view())
+ x_ls_b_map.transpose_mul(s_ls_a.view())
+ fast_atv(x_ls, &s_ls_ab)),
&(fast_atv(&basis_ab, &s_w)
+ fast_atv(&basis_a, &s_w_b)
+ fast_atv(&basis_b, &s_w_a)
+ fast_atv(&geom.basis, &s_w_ab)),
);
let n = rows.m.len();
let coeff_mm = &rows.w * &geom.dq_dq0.mapv(|v| v * v) - &rows.m * &geom.d2q_dq02;
let coeff_ml = Array1::<f64>::zeros(n);
let coeff_ll = 2.0 * &rows.kappa * &rows.kappa * &rows.obs_weight;
let coeff_mm_a = &(&dw_a * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a)
- &(&dm_a * &geom.d2q_dq02)
- &(&rows.m * &g2_a);
let coeff_mm_b = &(&dw_b * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &rows.w * &geom.dq_dq0 * &s1_b)
- &(&dm_b * &geom.d2q_dq02)
- &(&rows.m * &g2_b);
let coeff_mm_ab = &(&dw_ab * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &dw_a * &geom.dq_dq0 * &s1_b)
+ &(2.0 * &dw_b * &geom.dq_dq0 * &s1_a)
+ &(2.0 * &rows.w * &s1_a * &s1_b)
+ &(2.0 * &rows.w * &geom.dq_dq0 * &s1_ab)
- &(&dm_ab * &geom.d2q_dq02)
- &(&dm_a * &g2_b)
- &(&dm_b * &g2_a)
- &(&rows.m * &g2_ab);
let coeff_ml_a = Array1::<f64>::zeros(n);
let coeff_ml_b = Array1::<f64>::zeros(n);
let coeff_ml_ab = Array1::<f64>::zeros(n);
let coeff_ll_a = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_a;
let coeff_ll_b = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_b;
let coeff_ll_ab = 4.0
* &rows.obs_weight
* &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
* &(e_a * e_b)
+ 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * e_ab;
let a = &rows.w * &geom.dq_dq0;
let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
let a_b = &dw_b * &geom.dq_dq0 + &rows.w * &s1_b;
let a_ab = &dw_ab * &geom.dq_dq0 + &dw_a * &s1_b + &dw_b * &s1_a + &rows.w * &s1_ab;
let c = -&rows.m;
let c_a = -&dm_a;
let c_b = -&dm_b;
let c_ab = -&dm_ab;
let l = Array1::<f64>::zeros(n);
let l_a = Array1::<f64>::zeros(n);
let l_b = Array1::<f64>::zeros(n);
let l_ab = Array1::<f64>::zeros(n);
let hmm_ab = weighted_crossprod_psi_maps(
xmu_ab_map,
coeff_mm.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let hmm_ij = weighted_crossprod_psi_maps(xmu_a_map, coeff_mm.view(), xmu_b_map)?;
let hmm_iwj = weighted_crossprod_psi_maps(
xmu_a_map,
coeff_mm_b.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let hmm_jwi = weighted_crossprod_psi_maps(
xmu_b_map,
coeff_mm_a.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let h_mm = &hmm_ab
+ &hmm_ab.t()
+ &hmm_ij
+ hmm_ij.t()
+ &hmm_iwj
+ hmm_iwj.t()
+ &hmm_jwi
+ hmm_jwi.t()
+ &xt_diag_x_dense(xmu, &coeff_mm_ab)?;
let h_ml = weighted_crossprod_psi_maps(
xmu_ab_map,
coeff_ml.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(xmu_a_map, coeff_ml.view(), x_ls_b_map)?
+ &weighted_crossprod_psi_maps(xmu_b_map, coeff_ml.view(), x_ls_a_map)?
+ &weighted_crossprod_psi_maps(
xmu_a_map,
coeff_ml_b.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?
+ &weighted_crossprod_psi_maps(
xmu_b_map,
coeff_ml_a.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
coeff_ml_a.view(),
x_ls_b_map,
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
coeff_ml_b.view(),
x_ls_a_map,
)?
+ &xt_diag_y_dense(xmu, &coeff_ml_ab, x_ls)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
coeff_ml.view(),
x_ls_ab_map,
)?;
let hll_ab = weighted_crossprod_psi_maps(
x_ls_ab_map,
coeff_ll.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let hll_ij = weighted_crossprod_psi_maps(x_ls_a_map, coeff_ll.view(), x_ls_b_map)?;
let hll_iwj = weighted_crossprod_psi_maps(
x_ls_a_map,
coeff_ll_b.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let hll_jwi = weighted_crossprod_psi_maps(
x_ls_b_map,
coeff_ll_a.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let h_ll = &hll_ab
+ &hll_ab.t()
+ &hll_ij
+ hll_ij.t()
+ &hll_iwj
+ hll_iwj.t()
+ &hll_jwi
+ hll_jwi.t()
+ &xt_diag_x_dense(x_ls, &coeff_ll_ab)?;
let h_mw = weighted_crossprod_psi_maps(
xmu_ab_map,
a.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &weighted_crossprod_psi_maps(
xmu_a_map,
a_b.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &weighted_crossprod_psi_maps(
xmu_a_map,
a.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis_b),
)? + &weighted_crossprod_psi_maps(
xmu_b_map,
a_a.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &xt_diag_y_dense(xmu, &a_ab, &geom.basis)?
+ &xt_diag_y_dense(xmu, &a_a, &basis_b)?
+ &weighted_crossprod_psi_maps(
xmu_b_map,
a.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis_a),
)?
+ &xt_diag_y_dense(xmu, &a_b, &basis_a)?
+ &xt_diag_y_dense(xmu, &a, &basis_ab)?
+ &weighted_crossprod_psi_maps(
xmu_ab_map,
c.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
)?
+ &weighted_crossprod_psi_maps(
xmu_a_map,
c_b.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
)?
+ &weighted_crossprod_psi_maps(
xmu_a_map,
c.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis1_b),
)?
+ &weighted_crossprod_psi_maps(
xmu_b_map,
c_a.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
)?
+ &xt_diag_y_dense(xmu, &c_ab, &geom.basis_d1)?
+ &xt_diag_y_dense(xmu, &c_a, &basis1_b)?
+ &weighted_crossprod_psi_maps(
xmu_b_map,
c.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis1_a),
)?
+ &xt_diag_y_dense(xmu, &c_b, &basis1_a)?
+ &xt_diag_y_dense(xmu, &c, &basis1_ab)?;
let h_lw = weighted_crossprod_psi_maps(
x_ls_ab_map,
l.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &weighted_crossprod_psi_maps(
x_ls_a_map,
l_b.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &weighted_crossprod_psi_maps(
x_ls_a_map,
l.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis_b),
)? + &weighted_crossprod_psi_maps(
x_ls_b_map,
l_a.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &xt_diag_y_dense(x_ls, &l_ab, &geom.basis)?
+ &xt_diag_y_dense(x_ls, &l_a, &basis_b)?
+ &weighted_crossprod_psi_maps(
x_ls_b_map,
l.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis_a),
)?
+ &xt_diag_y_dense(x_ls, &l_b, &basis_a)?
+ &xt_diag_y_dense(x_ls, &l, &basis_ab)?;
let hww_ab = xt_diag_y_dense(&basis_ab, &rows.w, &geom.basis)?;
let hww_ij = xt_diag_y_dense(&basis_a, &rows.w, &basis_b)?;
let hww_iwj = xt_diag_y_dense(&basis_a, &dw_b, &geom.basis)?;
let hww_jwi = xt_diag_y_dense(&basis_b, &dw_a, &geom.basis)?;
let h_ww = &hww_ab
+ &hww_ab.t()
+ &hww_ij
+ hww_ij.t()
+ &hww_iwj
+ hww_iwj.t()
+ &hww_jwi
+ hww_jwi.t()
+ &xt_diag_x_dense(&geom.basis, &dw_ab)?;
Ok(crate::custom_family::ExactNewtonJointPsiSecondOrderTerms {
objective_psi_psi,
score_psi_psi,
hessian_psi_psi: gaussian_pack_wiggle_joint_symmetrichessian(
&h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
),
hessian_psi_psi_operator: None,
})
}
fn exact_newton_joint_psihessian_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
xmu,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
Ok(Some(
self.exact_newton_joint_psihessian_directional_derivative_from_parts(
block_states,
&dir_a,
d_beta_flat,
xmu,
x_ls,
)?,
))
}
fn exact_newton_joint_psihessian_directional_derivative_from_parts(
&self,
block_states: &[ParameterBlockState],
dir_a: &LocationScaleJointPsiDirection,
d_beta_flat: &Array1<f64>,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Array2<f64>, String> {
let pmu = xmu.ncols();
let p_ls = x_ls.ncols();
let xmu_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
let q0 = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let layout = GamlssBetaLayout::withwiggle(pmu, p_ls, betaw.len());
let (umu, u_ls, uw) = layout.split_three(
d_beta_flat,
"GaussianLocationScaleWiggleFamily joint psi hessian directional derivative",
)?;
let q = q0 + etaw;
let geom = self.wiggle_geometry(q0.view(), betaw.view())?;
let rows = self.get_or_compute_row_scalars(&q, eta_ls)?;
let xi = fast_av(xmu, &umu);
let zeta = fast_av(x_ls, &u_ls);
let zmu_a_u = xmu_map.forward_mul(umu.view());
let zls_a_u = x_ls_map.forward_mul(u_ls.view());
let b1u = fast_av(&geom.basis_d1, &uw);
let b2u = fast_av(&geom.basis_d2, &uw);
let b3u = fast_av(&geom.basis_d3, &uw);
let q_u = &(&geom.dq_dq0 * &xi) + &fast_av(&geom.basis, &uw);
let s1_u = &(&geom.d2q_dq02 * &xi) + &b1u;
let g2_u = &(&geom.d3q_dq03 * &xi) + &b2u;
let g3_u = &(&geom.d4q_dq04 * &xi) + &b3u;
let q_a = &geom.dq_dq0 * &dir_a.z_primary_psi;
let s1_a = &geom.d2q_dq02 * &dir_a.z_primary_psi;
let g2_a = &geom.d3q_dq03 * &dir_a.z_primary_psi;
let q_a_u = &(&s1_u * &dir_a.z_primary_psi) + &(&geom.dq_dq0 * &zmu_a_u);
let s1_a_u = &(&g2_u * &dir_a.z_primary_psi) + &(&geom.d2q_dq02 * &zmu_a_u);
let g2_a_u = &(&g3_u * &dir_a.z_primary_psi) + &(&geom.d3q_dq03 * &zmu_a_u);
let basis_u = scale_matrix_rows(&geom.basis_d1, &xi)?;
let basis1_u = scale_matrix_rows(&geom.basis_d2, &xi)?;
let basis_a = scale_matrix_rows(&geom.basis_d1, &dir_a.z_primary_psi)?;
let basis1_a = scale_matrix_rows(&geom.basis_d2, &dir_a.z_primary_psi)?;
let basis_a_u = scale_matrix_rows(&geom.basis_d2, &(&xi * &dir_a.z_primary_psi))?
+ &scale_matrix_rows(&geom.basis_d1, &zmu_a_u)?;
let basis1_a_u = scale_matrix_rows(&geom.basis_d3, &(&xi * &dir_a.z_primary_psi))?
+ &scale_matrix_rows(&geom.basis_d2, &zmu_a_u)?;
let e_a = &dir_a.z_ls_psi;
let four_k2_minus_2kpi = 4.0 * &rows.kappa * &rows.kappa - 2.0 * &rows.kappa_prime;
let dw_u = -2.0 * &rows.w * &rows.kappa * ζ
let dm_u = -(&rows.w * &q_u) - &(2.0 * &rows.m * &rows.kappa * &zeta);
let dw_a = -2.0 * &rows.w * &rows.kappa * e_a;
let dm_a = -(&rows.w * &q_a) - &(2.0 * &rows.m * &rows.kappa * e_a);
let dw_a_u = &four_k2_minus_2kpi * &rows.w * &(e_a * &zeta)
- &(2.0 * &rows.w * &rows.kappa * &zls_a_u);
let dm_a_u = &(2.0 * &rows.w * &rows.kappa * &(&q_a * &zeta + &q_u * e_a))
- &(&rows.w * &q_a_u)
+ &(&four_k2_minus_2kpi * &rows.m * &(e_a * &zeta))
- &(2.0 * &rows.m * &rows.kappa * &zls_a_u);
let coeff_mm_u = &(&dw_u * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &rows.w * &geom.dq_dq0 * &s1_u)
- &(&dm_u * &geom.d2q_dq02)
- &(&rows.m * &g2_u);
let n = rows.m.len();
let coeff_ml_u = Array1::<f64>::zeros(n);
let coeff_ll_u = 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * ζ
let coeff_mm_a_u = &(&dw_a_u * &geom.dq_dq0.mapv(|v| v * v))
+ &(2.0 * &dw_a * &geom.dq_dq0 * &s1_u)
+ &(2.0 * &dw_u * &geom.dq_dq0 * &s1_a)
+ &(2.0 * &rows.w * &s1_u * &s1_a)
+ &(2.0 * &rows.w * &geom.dq_dq0 * &s1_a_u)
- &(&dm_a_u * &geom.d2q_dq02)
- &(&dm_a * &g2_u)
- &(&dm_u * &g2_a)
- &(&rows.m * &g2_a_u);
let coeff_ml_a_u = Array1::<f64>::zeros(n);
let coeff_ll_a_u = 4.0
* &rows.obs_weight
* &(&rows.kappa_prime * &rows.kappa_prime + &rows.kappa * &rows.kappa_dprime)
* &(e_a * &zeta)
+ 4.0 * &rows.kappa * &rows.kappa_prime * &rows.obs_weight * &zls_a_u;
let a = &rows.w * &geom.dq_dq0;
let a_u = &dw_u * &geom.dq_dq0 + &rows.w * &s1_u;
let a_a = &dw_a * &geom.dq_dq0 + &rows.w * &s1_a;
let a_a_u = &dw_a_u * &geom.dq_dq0 + &dw_a * &s1_u + &dw_u * &s1_a + &rows.w * &s1_a_u;
let c = -&rows.m;
let c_u = -&dm_u;
let c_a = -&dm_a;
let c_a_u = -&dm_a_u;
let l = Array1::<f64>::zeros(n);
let l_u = Array1::<f64>::zeros(n);
let l_a = Array1::<f64>::zeros(n);
let l_a_u = Array1::<f64>::zeros(n);
let hmm_a1 = weighted_crossprod_psi_maps(
xmu_map,
coeff_mm_u.view(),
CustomFamilyPsiLinearMapRef::Dense(xmu),
)?;
let h_mm = &hmm_a1 + &hmm_a1.t() + &xt_diag_x_dense(xmu, &coeff_mm_a_u)?;
let h_ml = weighted_crossprod_psi_maps(
xmu_map,
coeff_ml_u.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(xmu),
coeff_ml_u.view(),
x_ls_map,
)? + &xt_diag_y_dense(xmu, &coeff_ml_a_u, x_ls)?;
let hll_a1 = weighted_crossprod_psi_maps(
x_ls_map,
coeff_ll_u.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?;
let h_ll = &hll_a1 + &hll_a1.t() + &xt_diag_x_dense(x_ls, &coeff_ll_a_u)?;
let h_mw = weighted_crossprod_psi_maps(
xmu_map,
a_u.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &weighted_crossprod_psi_maps(
xmu_map,
a.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis_u),
)? + &xt_diag_y_dense(xmu, &a_a_u, &geom.basis)?
+ &xt_diag_y_dense(xmu, &a_a, &basis_u)?
+ &xt_diag_y_dense(xmu, &a_u, &basis_a)?
+ &xt_diag_y_dense(xmu, &a, &basis_a_u)?
+ &weighted_crossprod_psi_maps(
xmu_map,
c_u.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis_d1),
)?
+ &weighted_crossprod_psi_maps(
xmu_map,
c.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis1_u),
)?
+ &xt_diag_y_dense(xmu, &c_a_u, &geom.basis_d1)?
+ &xt_diag_y_dense(xmu, &c_a, &basis1_u)?
+ &xt_diag_y_dense(xmu, &c_u, &basis1_a)?
+ &xt_diag_y_dense(xmu, &c, &basis1_a_u)?;
let h_lw = weighted_crossprod_psi_maps(
x_ls_map,
l_u.view(),
CustomFamilyPsiLinearMapRef::Dense(&geom.basis),
)? + &weighted_crossprod_psi_maps(
x_ls_map,
l.view(),
CustomFamilyPsiLinearMapRef::Dense(&basis_u),
)? + &xt_diag_y_dense(x_ls, &l_a_u, &geom.basis)?
+ &xt_diag_y_dense(x_ls, &l_a, &basis_u)?
+ &xt_diag_y_dense(x_ls, &l_u, &basis_a)?
+ &xt_diag_y_dense(x_ls, &l, &basis_a_u)?;
let hww_a_u = xt_diag_y_dense(&basis_a_u, &rows.w, &geom.basis)?;
let hww_aw = xt_diag_y_dense(&basis_a, &dw_u, &geom.basis)?;
let hww_au = xt_diag_y_dense(&basis_a, &rows.w, &basis_u)?;
let h_ww = &hww_a_u
+ &hww_a_u.t()
+ &hww_aw
+ hww_aw.t()
+ &hww_au
+ hww_au.t()
+ &xt_diag_x_dense(&geom.basis, &dw_a_u)?;
Ok(gaussian_pack_wiggle_joint_symmetrichessian(
&h_mm, &h_ml, &h_mw, &h_ll, &h_lw, &h_ww,
))
}
fn exact_newton_joint_psi_terms_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psi_terms_from_designs(
block_states,
derivative_blocks,
psi_index,
&xmu,
&x_ls,
)
}
fn exact_newton_joint_psisecond_order_terms_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psisecond_order_terms_from_designs(
block_states,
derivative_blocks,
psi_i,
psi_j,
&xmu,
&x_ls,
)
}
fn exact_newton_joint_psihessian_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psihessian_directional_derivative_from_designs(
block_states,
derivative_blocks,
psi_index,
d_beta_flat,
&xmu,
&x_ls,
)
}
}
impl CustomFamily for GaussianLocationScaleWiggleFamily {
fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
true
}
fn coefficient_hessian_cost(&self, specs: &[ParameterBlockSpec]) -> u64 {
crate::families::location_scale_engine::location_scale_coefficient_hessian_cost(
self.y.len() as u64,
specs,
)
}
fn block_linear_constraints(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
spec: &ParameterBlockSpec,
) -> Result<Option<LinearInequalityConstraints>, String> {
assert!(block_states.len() <= isize::MAX as usize);
if block_idx != Self::BLOCK_WIGGLE {
return Ok(None);
}
Ok(monotone_wiggle_nonnegative_constraints(spec.design.ncols()))
}
fn post_update_block_beta(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
block_spec: &ParameterBlockSpec,
beta: Array1<f64>,
) -> Result<Array1<f64>, String> {
assert!(block_states.len() <= isize::MAX as usize);
assert!(!block_spec.name.is_empty());
if block_idx != Self::BLOCK_WIGGLE {
return Ok(beta);
}
validate_monotone_wiggle_beta_nonnegative(
&beta,
"GaussianLocationScaleWiggleFamily post-update",
)?;
Ok(beta)
}
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_mu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_mu.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let ln2pi = (2.0 * std::f64::consts::PI).ln();
let mut zmu = Array1::<f64>::zeros(n);
let mut wmu = Array1::<f64>::zeros(n);
let mut zls = Array1::<f64>::zeros(n);
let mut wls = Array1::<f64>::zeros(n);
let mut zw = Array1::<f64>::zeros(n);
let mut ww = Array1::<f64>::zeros(n);
const CHUNK: usize = 1024;
let zmu_s = zmu
.as_slice_memory_order_mut()
.expect("zeros is contiguous");
let wmu_s = wmu
.as_slice_memory_order_mut()
.expect("zeros is contiguous");
let zls_s = zls
.as_slice_memory_order_mut()
.expect("zeros is contiguous");
let wls_s = wls
.as_slice_memory_order_mut()
.expect("zeros is contiguous");
let zw_s = zw.as_slice_memory_order_mut().expect("zeros is contiguous");
let ww_s = ww.as_slice_memory_order_mut().expect("zeros is contiguous");
let y_view = self.y.view();
let w_view = self.weights.view();
let eta_mu_view = eta_mu.view();
let eta_ls_view = eta_ls.view();
let etaw_view = etaw.view();
let ll: f64 = zmu_s
.par_chunks_mut(CHUNK)
.zip(wmu_s.par_chunks_mut(CHUNK))
.zip(zls_s.par_chunks_mut(CHUNK))
.zip(wls_s.par_chunks_mut(CHUNK))
.zip(zw_s.par_chunks_mut(CHUNK))
.zip(ww_s.par_chunks_mut(CHUNK))
.enumerate()
.map(
|(chunk_idx, (((((zmu_c, wmu_c), zls_c), wls_c), zw_c), ww_c))| {
let start = chunk_idx * CHUNK;
let mut local_ll = 0.0;
for local in 0..zmu_c.len() {
let i = start + local;
let q_i = eta_mu_view[i] + etaw_view[i];
let row = gaussian_diagonal_row_kernel(
y_view[i],
q_i,
eta_ls_view[i],
w_view[i],
ln2pi,
);
let w_i = row.location_working_weight;
let shift = row.location_working_shift;
zmu_c[local] = eta_mu_view[i] + shift;
wmu_c[local] = w_i;
zw_c[local] = etaw_view[i] + shift;
ww_c[local] = w_i;
zls_c[local] = row.log_sigma_working_response;
wls_c[local] = row.log_sigma_working_weight;
local_ll += row.log_likelihood;
}
local_ll
},
)
.sum();
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![
BlockWorkingSet::diagonal_checked(zmu, wmu)?,
BlockWorkingSet::diagonal_checked(zls, wls)?,
BlockWorkingSet::diagonal_checked(zw, ww)?,
],
})
}
fn log_likelihood_only(&self, block_states: &[ParameterBlockState]) -> Result<f64, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta_mu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_mu.len() != self.y.len()
|| eta_ls.len() != self.y.len()
|| etaw.len() != self.y.len()
|| self.weights.len() != self.y.len()
{
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let q = eta_mu + etaw;
let ln2pi = (2.0 * std::f64::consts::PI).ln();
let mut ll = 0.0;
for i in 0..self.y.len() {
let sigma_i = logb_sigma_from_eta_scalar(eta_ls[i]);
let inv_s2 = (sigma_i * sigma_i).recip();
let r = self.y[i] - q[i];
ll += self.weights[i] * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()));
}
Ok(ll)
}
fn log_likelihood_only_with_options(
&self,
block_states: &[ParameterBlockState],
options: &BlockwiseFitOptions,
) -> Result<f64, String> {
let Some(subsample) = options.outer_score_subsample.as_ref() else {
return self.log_likelihood_only(block_states);
};
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_mu = &block_states[Self::BLOCK_MU].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_mu.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GaussianLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let ln2pi = (2.0 * std::f64::consts::PI).ln();
use rayon::iter::ParallelIterator;
let ll: f64 = subsample
.rows
.par_iter()
.map(|row| {
let i = row.index;
let wi = self.weights[i];
if wi == 0.0 {
return 0.0;
}
let sigma_i = logb_sigma_from_eta_scalar(eta_ls[i]);
let inv_s2 = (sigma_i * sigma_i).recip();
let r = self.y[i] - eta_mu[i] - etaw[i];
row.weight * wi * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma_i.ln()))
})
.sum();
Ok(ll)
}
fn requires_joint_outer_hyper_path(&self) -> bool {
true
}
fn exact_newton_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
d_beta: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let pmu = self
.mu_design
.as_ref()
.ok_or_else(|| {
"GaussianLocationScaleWiggleFamily exact path is missing mu design".to_string()
})?
.ncols();
let p_ls = self
.log_sigma_design
.as_ref()
.ok_or_else(|| {
"GaussianLocationScaleWiggleFamily exact path is missing log-sigma design"
.to_string()
})?
.ncols();
let pw = block_states[Self::BLOCK_WIGGLE].beta.len();
let total = pmu + p_ls + pw;
let (start, end) = match block_idx {
Self::BLOCK_MU => (0usize, pmu),
Self::BLOCK_LOG_SIGMA => (pmu, pmu + p_ls),
Self::BLOCK_WIGGLE => (pmu + p_ls, total),
_ => return Ok(None),
};
if d_beta.len() != end - start {
return Err(GamlssError::DimensionMismatch { reason: format!(
"GaussianLocationScaleWiggleFamily block {block_idx} d_beta length mismatch: got {}, expected {}",
d_beta.len(),
end - start
) }.into());
}
let mut d_beta_flat = Array1::<f64>::zeros(total);
d_beta_flat.slice_mut(s![start..end]).assign(d_beta);
let (xmu, x_ls) = self.dense_block_designs()?;
let d_joint = self
.exact_newton_joint_hessian_directional_derivative_from_designs(
block_states,
&xmu,
&x_ls,
&d_beta_flat,
)?
.ok_or_else(|| "missing Gaussian wiggle exact joint directional Hessian".to_string())?;
Ok(Some(d_joint.slice(s![start..end, start..end]).to_owned()))
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_for_specs(block_states, None)
}
fn has_explicit_joint_hessian(&self) -> bool {
true
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_directional_derivative_for_specs(
block_states,
None,
d_beta_flat,
)
}
fn exact_newton_joint_hessiansecond_directional_derivative(
&self,
block_states: &[ParameterBlockState],
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
block_states,
None,
d_beta_u_flat,
d_beta_v_flat,
)
}
fn exact_newton_joint_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_for_specs(block_states, Some(specs))
}
fn exact_newton_joint_hessian_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_directional_derivative_for_specs(
block_states,
Some(specs),
d_beta_flat,
)
}
fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
block_states,
Some(specs),
d_beta_u_flat,
d_beta_v_flat,
)
}
fn exact_newton_joint_psi_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
self.exact_newton_joint_psi_terms_for_specs(
block_states,
specs,
derivative_blocks,
psi_index,
)
}
fn exact_newton_joint_psisecond_order_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
self.exact_newton_joint_psisecond_order_terms_for_specs(
block_states,
specs,
derivative_blocks,
psi_i,
psi_j,
)
}
fn exact_newton_joint_psihessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_psihessian_directional_derivative_for_specs(
block_states,
specs,
derivative_blocks,
psi_index,
d_beta_flat,
)
}
fn exact_newton_joint_psi_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
if !self.exact_joint_supported() {
return Ok(None);
}
Ok(Some(Arc::new(
GaussianLocationScaleWiggleExactNewtonJointPsiWorkspace::new(
self.clone(),
block_states.to_vec(),
specs,
derivative_blocks.to_vec(),
)?,
)))
}
fn exact_newton_joint_psi_workspace_with_options(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
options: &BlockwiseFitOptions,
) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
if !self.exact_joint_supported() {
return Ok(None);
}
Ok(Some(Arc::new(
GaussianLocationScaleWiggleExactNewtonJointPsiWorkspace::new_with_subsample(
self.clone(),
block_states.to_vec(),
specs,
derivative_blocks.to_vec(),
options.outer_score_subsample.clone(),
)?,
)))
}
fn block_geometry(
&self,
block_states: &[ParameterBlockState],
spec: &ParameterBlockSpec,
) -> Result<(DesignMatrix, Array1<f64>), String> {
if spec.name != "wiggle" {
return Ok((spec.design.clone(), spec.offset.clone()));
}
if block_states.is_empty() {
return Err(GamlssError::UnsupportedConfiguration {
reason: "Gaussian wiggle geometry requires mean block".to_string(),
}
.into());
}
let eta_mu = &block_states[Self::BLOCK_MU].eta;
if eta_mu.len() != self.y.len() {
return Err(GamlssError::DimensionMismatch {
reason: "Gaussian wiggle geometry input size mismatch".to_string(),
}
.into());
}
let x = self.wiggle_design(eta_mu.view())?;
if x.ncols() != spec.design.ncols() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"Gaussian dynamic wiggle design col mismatch: got {}, expected {}",
x.ncols(),
spec.design.ncols()
),
}
.into());
}
let nrows = x.nrows();
Ok((
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(x)),
Array1::zeros(nrows),
))
}
fn block_geometry_is_dynamic(&self) -> bool {
true
}
fn exact_newton_joint_hessian_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
let workspace = GaussianLocationScaleWiggleHessianWorkspace::new(
self.clone(),
block_states.to_vec(),
xmu.into_owned(),
x_ls.into_owned(),
)?;
Ok(Some(Arc::new(workspace)))
}
fn exact_newton_joint_hessian_workspace_with_options(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
options: &BlockwiseFitOptions,
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let Some((xmu, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
let mut workspace = GaussianLocationScaleWiggleHessianWorkspace::new(
self.clone(),
block_states.to_vec(),
xmu.into_owned(),
x_ls.into_owned(),
)?;
if let Some(subsample) = options.outer_score_subsample.as_ref() {
workspace.apply_outer_subsample(subsample.rows.as_ref());
}
Ok(Some(Arc::new(workspace)))
}
fn outer_derivative_subsample_capable(&self) -> bool {
true
}
fn inner_coefficient_hessian_hvp_available(&self, specs: &[ParameterBlockSpec]) -> bool {
self.exact_joint_supported()
&& matches!(
self.exact_joint_dense_block_designs(Some(specs)),
Ok(Some(_))
)
}
}
struct GaussianLocationScaleWiggleHessianWorkspace {
family: GaussianLocationScaleWiggleFamily,
block_states: Vec<ParameterBlockState>,
xmu: Arc<Array2<f64>>,
x_ls: Arc<Array2<f64>>,
pieces: GaussianLocationScaleWiggleHessianRowPieces,
}
impl GaussianLocationScaleWiggleHessianWorkspace {
fn new(
family: GaussianLocationScaleWiggleFamily,
block_states: Vec<ParameterBlockState>,
xmu: Array2<f64>,
x_ls: Array2<f64>,
) -> Result<Self, String> {
let pieces = family.wiggle_hessian_row_pieces(&block_states)?;
Ok(Self {
family,
block_states,
xmu: Arc::new(xmu),
x_ls: Arc::new(x_ls),
pieces,
})
}
fn apply_outer_subsample(
&mut self,
rows: &[crate::families::marginal_slope_shared::WeightedOuterRow],
) {
let n = self.pieces.coeff_mm.len();
let mut mask_mm = Array1::<f64>::zeros(n);
let mut mask_ml = Array1::<f64>::zeros(n);
let mut mask_ll = Array1::<f64>::zeros(n);
let mut mask_mw_b = Array1::<f64>::zeros(n);
let mut mask_mw_d = Array1::<f64>::zeros(n);
let mut mask_lw_b = Array1::<f64>::zeros(n);
let mut maskww = Array1::<f64>::zeros(n);
for r in rows {
let i = r.index;
let w = r.weight;
mask_mm[i] = self.pieces.coeff_mm[i] * w;
mask_ml[i] = self.pieces.coeff_ml[i] * w;
mask_ll[i] = self.pieces.coeff_ll[i] * w;
mask_mw_b[i] = self.pieces.coeff_mw_b[i] * w;
mask_mw_d[i] = self.pieces.coeff_mw_d[i] * w;
mask_lw_b[i] = self.pieces.coeff_lw_b[i] * w;
maskww[i] = self.pieces.coeff_ww[i] * w;
}
self.pieces.coeff_mm = mask_mm;
self.pieces.coeff_ml = mask_ml;
self.pieces.coeff_ll = mask_ll;
self.pieces.coeff_mw_b = mask_mw_b;
self.pieces.coeff_mw_d = mask_mw_d;
self.pieces.coeff_lw_b = mask_lw_b;
self.pieces.coeff_ww = maskww;
}
}
impl ExactNewtonJointHessianWorkspace for GaussianLocationScaleWiggleHessianWorkspace {
fn hessian_dense(&self) -> Result<Option<Array2<f64>>, String> {
let dense = self
.pieces
.assemble_dense(self.xmu.as_ref(), self.x_ls.as_ref())?;
Ok(Some(dense))
}
fn hessian_matvec_available(&self) -> bool {
true
}
fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
let pmu = self.xmu.ncols();
let p_ls = self.x_ls.ncols();
let pw = self.pieces.basis.ncols();
let total = pmu + p_ls + pw;
if v.len() != total {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggle matvec dimension mismatch: got {}, expected {}",
v.len(),
total
),
}
.into());
}
let v_mu = v.slice(s![0..pmu]);
let v_ls = v.slice(s![pmu..pmu + p_ls]);
let v_w = v.slice(s![pmu + p_ls..total]);
let u_mu = fast_av(self.xmu.as_ref(), &v_mu);
let u_ls = fast_av(self.x_ls.as_ref(), &v_ls);
let u_b = fast_av(&self.pieces.basis, &v_w);
let u_d = fast_av(&self.pieces.basis_d1, &v_w);
let r_mu = &self.pieces.coeff_mm * &u_mu
+ &self.pieces.coeff_ml * &u_ls
+ &self.pieces.coeff_mw_b * &u_b
+ &self.pieces.coeff_mw_d * &u_d;
let r_ls = &self.pieces.coeff_ml * &u_mu
+ &self.pieces.coeff_ll * &u_ls
+ &self.pieces.coeff_lw_b * &u_b;
let r_b = &self.pieces.coeff_mw_b * &u_mu
+ &self.pieces.coeff_lw_b * &u_ls
+ &self.pieces.coeff_ww * &u_b;
let r_d = &self.pieces.coeff_mw_d * &u_mu;
let out_mu = fast_atv(self.xmu.as_ref(), &r_mu);
let out_ls = fast_atv(self.x_ls.as_ref(), &r_ls);
let out_w = fast_atv(&self.pieces.basis, &r_b) + &fast_atv(&self.pieces.basis_d1, &r_d);
let mut out = Array1::<f64>::zeros(total);
out.slice_mut(s![0..pmu]).assign(&out_mu);
out.slice_mut(s![pmu..pmu + p_ls]).assign(&out_ls);
out.slice_mut(s![pmu + p_ls..total]).assign(&out_w);
Ok(Some(out))
}
fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
let pmu = self.xmu.ncols();
let p_ls = self.x_ls.ncols();
let pw = self.pieces.basis.ncols();
let total = pmu + p_ls + pw;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
let diag_mu: Vec<f64> = (0..pmu)
.into_par_iter()
.map(|j| {
let col = self.xmu.column(j);
col.iter()
.zip(self.pieces.coeff_mm.iter())
.map(|(&v, &c)| c * v * v)
.sum()
})
.collect();
let diag_ls: Vec<f64> = (0..p_ls)
.into_par_iter()
.map(|j| {
let col = self.x_ls.column(j);
col.iter()
.zip(self.pieces.coeff_ll.iter())
.map(|(&v, &c)| c * v * v)
.sum()
})
.collect();
let diag_w: Vec<f64> = (0..pw)
.into_par_iter()
.map(|j| {
let col = self.pieces.basis.column(j);
col.iter()
.zip(self.pieces.coeff_ww.iter())
.map(|(&v, &c)| c * v * v)
.sum()
})
.collect();
let mut diag = Array1::<f64>::zeros(total);
for (j, v) in diag_mu.into_iter().enumerate() {
diag[j] = v;
}
for (j, v) in diag_ls.into_iter().enumerate() {
diag[pmu + j] = v;
}
for (j, v) in diag_w.into_iter().enumerate() {
diag[pmu + p_ls + j] = v;
}
Ok(Some(diag))
}
fn directional_derivative(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.family
.exact_newton_joint_hessian_directional_derivative_from_designs(
&self.block_states,
self.xmu.as_ref(),
self.x_ls.as_ref(),
d_beta_flat,
)
}
fn directional_derivative_operator(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
self.family.gls_wiggle_directional_operator(
&self.block_states,
self.xmu.clone(),
self.x_ls.clone(),
d_beta_flat,
)
}
fn second_directional_derivative(
&self,
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.family
.exact_newton_joint_hessiansecond_directional_derivative_from_designs(
&self.block_states,
self.xmu.as_ref(),
self.x_ls.as_ref(),
d_beta_u_flat,
d_beta_v_flat,
)
}
fn second_directional_derivative_operator(
&self,
d_beta_u: &Array1<f64>,
d_beta_v: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
self.family.gls_wiggle_second_directional_operator(
&self.block_states,
self.xmu.clone(),
self.x_ls.clone(),
d_beta_u,
d_beta_v,
)
}
}
impl CustomFamilyGenerative for GaussianLocationScaleWiggleFamily {
fn generativespec(
&self,
block_states: &[ParameterBlockState],
) -> Result<GenerativeSpec, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"GaussianLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta_mu = &block_states[Self::BLOCK_MU].eta;
let eta_wiggle = &block_states[Self::BLOCK_WIGGLE].eta;
let eta_log_sigma = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let n = eta_mu.len();
let mean = gamlss_rowwise_map(n, |i| eta_mu[i] + eta_wiggle[i]);
let sigma = gamlss_rowwise_map(n, |i| logb_sigma_from_eta_scalar(eta_log_sigma[i]));
Ok(GenerativeSpec {
mean,
noise: NoiseModel::Gaussian { sigma },
})
}
}
fn expect_single_block<'a>(
block_states: &'a [ParameterBlockState],
family_name: &str,
) -> Result<&'a ParameterBlockState, String> {
if block_states.len() != 1 {
return Err(GamlssError::DimensionMismatch {
reason: format!("{family_name} expects 1 block, got {}", block_states.len()),
}
.into());
}
Ok(&block_states[0])
}
#[derive(Clone)]
pub struct BinomialMeanWiggleFamily {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub link_kind: InverseLink,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
pub policy: crate::resource::ResourcePolicy,
}
struct BinomialMeanWiggleGeometry {
basis: Array2<f64>,
basis_d1: Array2<f64>,
basis_d2: Array2<f64>,
basis_d3: Array2<f64>,
dq_dq0: Array1<f64>,
d2q_dq02: Array1<f64>,
d3q_dq03: Array1<f64>,
d4q_dq04: Array1<f64>,
}
struct BinomialMeanWiggleJointPsiDirection {
x_eta_psi: Option<Array2<f64>>,
z_eta_psi: Array1<f64>,
}
fn binomial_pack_mean_wiggle_joint_score(
score_eta: &Array1<f64>,
score_w: &Array1<f64>,
) -> Array1<f64> {
let p_eta = score_eta.len();
let pw = score_w.len();
let mut out = Array1::<f64>::zeros(p_eta + pw);
out.slice_mut(s![0..p_eta]).assign(score_eta);
out.slice_mut(s![p_eta..p_eta + pw]).assign(score_w);
out
}
fn binomial_pack_mean_wiggle_joint_symmetrichessian(
h_eta_eta: &Array2<f64>,
h_eta_w: &Array2<f64>,
h_ww: &Array2<f64>,
) -> Array2<f64> {
let p_eta = h_eta_eta.nrows();
let pw = h_ww.nrows();
let total = p_eta + pw;
let mut out = Array2::<f64>::zeros((total, total));
out.slice_mut(s![0..p_eta, 0..p_eta]).assign(h_eta_eta);
out.slice_mut(s![0..p_eta, p_eta..total]).assign(h_eta_w);
out.slice_mut(s![p_eta..total, p_eta..total]).assign(h_ww);
mirror_upper_to_lower(&mut out);
out
}
impl BinomialMeanWiggleFamily {
pub const BLOCK_ETA: usize = 0;
pub const BLOCK_WIGGLE: usize = 1;
fn wiggle_basiswith_options(
&self,
q0: ArrayView1<'_, f64>,
options: BasisOptions,
) -> Result<Array2<f64>, String> {
monotone_wiggle_basis_with_derivative_order(
q0,
&self.wiggle_knots,
self.wiggle_degree,
options.derivative_order,
)
}
fn wiggle_design(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
self.wiggle_basiswith_options(q0, BasisOptions::value())
}
fn wiggle_dq_dq0(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d_constrained = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
if d_constrained.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
d_constrained.ncols(),
beta_link_wiggle.len()
) }.into());
}
Ok(d_constrained.dot(&beta_link_wiggle) + 1.0)
}
fn wiggle_d2q_dq02(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
if d2.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle second-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
d2.ncols(),
beta_link_wiggle.len()
) }.into());
}
Ok(d2.dot(&beta_link_wiggle))
}
fn wiggle_d3basis_constrained(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
monotone_wiggle_basis_with_derivative_order(q0, &self.wiggle_knots, self.wiggle_degree, 3)
}
fn wiggle_d3q_dq03(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d3 = self.wiggle_d3basis_constrained(q0)?;
if d3.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle third-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
d3.ncols(),
beta_link_wiggle.len()
) }.into());
}
Ok(d3.dot(&beta_link_wiggle))
}
fn wiggle_d4q_dq04(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d4 = monotone_wiggle_basis_with_derivative_order(
q0,
&self.wiggle_knots,
self.wiggle_degree,
4,
)?;
if d4.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle fourth-derivative/beta mismatch: basis has {} columns but beta_link_wiggle has {} coefficients",
d4.ncols(),
beta_link_wiggle.len()
) }.into());
}
Ok(d4.dot(&beta_link_wiggle))
}
fn wiggle_geometry(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<BinomialMeanWiggleGeometry, String> {
let basis = self.wiggle_design(q0)?;
let basis_d1 = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
let basis_d2 = self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
let basis_d3 = self.wiggle_d3basis_constrained(q0)?;
let dq_dq0 = self.wiggle_dq_dq0(q0, beta_link_wiggle)?;
let d2q_dq02 = self.wiggle_d2q_dq02(q0, beta_link_wiggle)?;
let d3q_dq03 = self.wiggle_d3q_dq03(q0, beta_link_wiggle)?;
let d4q_dq04 = self.wiggle_d4q_dq04(q0, beta_link_wiggle)?;
Ok(BinomialMeanWiggleGeometry {
basis,
basis_d1,
basis_d2,
basis_d3,
dq_dq0,
d2q_dq02,
d3q_dq03,
d4q_dq04,
})
}
fn neglog_q_derivatives(&self, y: f64, weight: f64, q: f64) -> Result<(f64, f64, f64), String> {
let jet = inverse_link_jet_for_inverse_link(&self.link_kind, q)
.map_err(|e| format!("fixed-link wiggle inverse-link evaluation failed: {e}"))?;
Ok(binomial_neglog_q_derivatives_dispatch(
y,
weight,
q,
jet.mu,
jet.d1,
jet.d2,
jet.d3,
&self.link_kind,
))
}
fn neglog_q_fourth_derivative(&self, y: f64, weight: f64, q: f64) -> Result<f64, String> {
let jet = inverse_link_jet_for_inverse_link(&self.link_kind, q)
.map_err(|e| format!("fixed-link wiggle inverse-link evaluation failed: {e}"))?;
binomial_neglog_q_fourth_derivative_dispatch(
y,
weight,
q,
jet.mu,
jet.d1,
jet.d2,
jet.d3,
&self.link_kind,
)
}
fn dense_eta_design_fromspecs<'a>(
&self,
specs: &'a [ParameterBlockSpec],
) -> Result<Cow<'a, Array2<f64>>, String> {
if specs.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 specs, got {}",
specs.len()
),
}
.into());
}
Ok(match specs[Self::BLOCK_ETA].design.as_dense_ref() {
Some(d) => Cow::Borrowed(d),
None => Cow::Owned(
specs[Self::BLOCK_ETA]
.design
.try_to_dense_with_policy(
&self.policy.material_policy(),
"BinomialMeanWiggle dense_eta_design_fromspecs eta",
)
.map_err(|e| e.to_string())?
.as_ref()
.clone(),
),
})
}
fn exact_newton_joint_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
psi_index: usize,
x_eta: &Array2<f64>,
) -> Result<Option<BinomialMeanWiggleJointPsiDirection>, String> {
if block_states.len() != 2 || derivative_blocks.len() != 2 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialMeanWiggleFamily joint psi direction expects 2 blocks and 2 derivative block lists, got {} and {}",
block_states.len(),
derivative_blocks.len()
) }.into());
}
let n = self.y.len();
let p_eta = x_eta.ncols();
let beta_eta = &block_states[Self::BLOCK_ETA].beta;
let mut global = 0usize;
for (block_idx, block_derivs) in derivative_blocks.iter().enumerate() {
for deriv in block_derivs {
if global == psi_index {
if block_idx != Self::BLOCK_ETA {
return Ok(None);
}
let x_eta_psi_map = resolve_custom_family_x_psi_map(
deriv,
n,
p_eta,
0..n,
"BinomialMeanWiggleFamily eta",
&self.policy,
)?;
let x_eta_psi = x_eta_psi_map.row_chunk(0..n)?;
let z_eta_psi = x_eta_psi.dot(beta_eta);
return Ok(Some(BinomialMeanWiggleJointPsiDirection {
x_eta_psi: Some(x_eta_psi),
z_eta_psi,
}));
}
global += 1;
}
}
Ok(None)
}
fn exact_newton_joint_psi_action(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
psi_index: usize,
p_eta: usize,
) -> Result<Option<(CustomFamilyPsiDesignAction, Array1<f64>)>, String> {
if block_states.len() != 2 || derivative_blocks.len() != 2 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialMeanWiggleFamily joint psi action expects 2 blocks and 2 derivative block lists, got {} and {}",
block_states.len(),
derivative_blocks.len()
) }.into());
}
let n = self.y.len();
let beta_eta = &block_states[Self::BLOCK_ETA].beta;
let mut global = 0usize;
for (block_idx, block_derivs) in derivative_blocks.iter().enumerate() {
for deriv in block_derivs {
if global == psi_index {
if block_idx != Self::BLOCK_ETA {
return Ok(None);
}
let action = match CustomFamilyPsiDesignAction::from_first_derivative(
deriv,
n,
p_eta,
0..n,
"BinomialMeanWiggleFamily eta",
) {
Ok(action) => action,
Err(_) => return Ok(None),
};
let z_eta_psi = action.forward_mul(beta_eta.view());
return Ok(Some((action, z_eta_psi)));
}
global += 1;
}
}
Ok(None)
}
fn bmw_static_hessian_operator(
&self,
block_states: &[ParameterBlockState],
x_eta_arc: Arc<Array2<f64>>,
) -> Result<Arc<RowCoeffOperator>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if eta.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
}
.into());
}
let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
let p_eta = x_eta_arc.ncols();
let pw = geom.basis.ncols();
let mut coeff_eta = Array1::<f64>::zeros(n);
let mut coeff_etaw_b = Array1::<f64>::zeros(n);
let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
let mut coeff_ww = Array1::<f64>::zeros(n);
for row in 0..n {
let q = eta[row] + etaw[row];
let (m1, m2, _) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
let a = geom.dq_dq0[row];
let b = geom.d2q_dq02[row];
coeff_eta[row] = hessian_coeff_fromobjective_q_terms(m1, m2, a, a, b);
coeff_etaw_b[row] = m2 * a;
coeff_etaw_d1[row] = m1;
coeff_ww[row] = m2;
}
Ok(Arc::new(RowCoeffOperator::from_directions(
vec![p_eta, pw],
vec![
(0, x_eta_arc),
(1, Arc::new(geom.basis)),
(1, Arc::new(geom.basis_d1)),
],
vec![
(0, 0, coeff_eta),
(0, 1, coeff_etaw_b),
(0, 2, coeff_etaw_d1),
(1, 1, coeff_ww),
],
n,
)))
}
fn bmw_directional_operator(
&self,
block_states: &[ParameterBlockState],
x_eta_arc: Arc<Array2<f64>>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if eta.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
}
.into());
}
let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
let p_eta = x_eta_arc.ncols();
let pw = geom.basis.ncols();
let total = p_eta + pw;
if d_beta_flat.len() != total {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily joint d_beta length mismatch: got {}, expected {}",
d_beta_flat.len(),
total
),
}
.into());
}
let u_eta = d_beta_flat.slice(s![0..p_eta]).to_owned();
let uw = d_beta_flat.slice(s![p_eta..total]).to_owned();
let xi = fast_av(x_eta_arc.as_ref(), &u_eta);
let phi = fast_av(&geom.basis, &uw);
let basis1_u = fast_av(&geom.basis_d1, &uw);
let basis2_u = fast_av(&geom.basis_d2, &uw);
let mut coeff_eta = Array1::<f64>::zeros(n);
let mut coeff_etaw_b = Array1::<f64>::zeros(n);
let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
let mut coeff_etaw_d2 = Array1::<f64>::zeros(n);
let mut coeff_ww_bb = Array1::<f64>::zeros(n);
let mut coeff_ww_db = Array1::<f64>::zeros(n);
for row in 0..n {
let q = eta[row] + etaw[row];
let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
let a = geom.dq_dq0[row];
let b = geom.d2q_dq02[row];
let c = geom.d3q_dq03[row];
let q_u = a * xi[row] + phi[row];
let a_u = b * xi[row] + basis1_u[row];
let b_u = c * xi[row] + basis2_u[row];
coeff_eta[row] = directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, q_u, a, a, b, a_u, a_u, b_u,
);
coeff_etaw_b[row] = m3 * q_u * a + m2 * a_u;
coeff_etaw_d1[row] = m2 * (a * xi[row] + q_u);
coeff_etaw_d2[row] = m1 * xi[row];
coeff_ww_bb[row] = m3 * q_u;
coeff_ww_db[row] = m2 * xi[row];
}
Ok(Some(Arc::new(RowCoeffOperator::from_directions(
vec![p_eta, pw],
vec![
(0, x_eta_arc),
(1, Arc::new(geom.basis)),
(1, Arc::new(geom.basis_d1)),
(1, Arc::new(geom.basis_d2)),
],
vec![
(0, 0, coeff_eta),
(0, 1, coeff_etaw_b),
(0, 2, coeff_etaw_d1),
(0, 3, coeff_etaw_d2),
(1, 1, coeff_ww_bb),
(1, 2, coeff_ww_db),
],
n,
))))
}
fn bmw_second_directional_operator(
&self,
block_states: &[ParameterBlockState],
x_eta_arc: Arc<Array2<f64>>,
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if eta.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
}
.into());
}
let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
let p_eta = x_eta_arc.ncols();
let pw = geom.basis.ncols();
let total = p_eta + pw;
if d_beta_u_flat.len() != total || d_beta_v_flat.len() != total {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialMeanWiggleFamily joint second d_beta length mismatch: got {} and {}, expected {}",
d_beta_u_flat.len(),
d_beta_v_flat.len(),
total
) }.into());
}
let u_eta = d_beta_u_flat.slice(s![0..p_eta]).to_owned();
let v_eta = d_beta_v_flat.slice(s![0..p_eta]).to_owned();
let uw = d_beta_u_flat.slice(s![p_eta..total]).to_owned();
let vw = d_beta_v_flat.slice(s![p_eta..total]).to_owned();
let xi_u = fast_av(x_eta_arc.as_ref(), &u_eta);
let xi_v = fast_av(x_eta_arc.as_ref(), &v_eta);
let phi_u = fast_av(&geom.basis, &uw);
let phi_v = fast_av(&geom.basis, &vw);
let b1u = fast_av(&geom.basis_d1, &uw);
let b1v = fast_av(&geom.basis_d1, &vw);
let b2u = fast_av(&geom.basis_d2, &uw);
let b2v = fast_av(&geom.basis_d2, &vw);
let b3u = fast_av(&geom.basis_d3, &uw);
let b3v = fast_av(&geom.basis_d3, &vw);
let mut coeff_eta = Array1::<f64>::zeros(n);
let mut coeff_etaw_b = Array1::<f64>::zeros(n);
let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
let mut coeff_etaw_d2 = Array1::<f64>::zeros(n);
let mut coeff_etaw_d3 = Array1::<f64>::zeros(n);
let mut coeff_ww_bb = Array1::<f64>::zeros(n);
let mut coeff_ww_db = Array1::<f64>::zeros(n);
let mut coeff_ww_ddb = Array1::<f64>::zeros(n);
let mut coeff_ww_dd = Array1::<f64>::zeros(n);
for row in 0..n {
let q = eta[row] + etaw[row];
let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
let m4 = self.neglog_q_fourth_derivative(self.y[row], self.weights[row], q)?;
let a = geom.dq_dq0[row];
let b = geom.d2q_dq02[row];
let c = geom.d3q_dq03[row];
let d = geom.d4q_dq04[row];
let q_u = a * xi_u[row] + phi_u[row];
let a_u = b * xi_u[row] + b1u[row];
let b_u = c * xi_u[row] + b2u[row];
let q_v = a * xi_v[row] + phi_v[row];
let a_v = b * xi_v[row] + b1v[row];
let b_v = c * xi_v[row] + b2v[row];
let q_uv = b * xi_u[row] * xi_v[row] + b1u[row] * xi_v[row] + b1v[row] * xi_u[row];
let a_uv = c * xi_u[row] * xi_v[row] + b2u[row] * xi_v[row] + b2v[row] * xi_u[row];
let b_uv = d * xi_u[row] * xi_v[row] + b3u[row] * xi_v[row] + b3v[row] * xi_u[row];
coeff_eta[row] = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, q_u, q_v, q_uv, a, a, b, a_u, a_v, a_u, a_v, a_uv, a_uv, b_u, b_v,
b_uv,
);
let d2_c_b = m4 * q_u * q_v * a + m3 * (q_uv * a + q_u * a_v + q_v * a_u) + m2 * a_uv;
let dc_b_u = m3 * q_u * a + m2 * a_u;
let dc_b_v = m3 * q_v * a + m2 * a_v;
let c_b_static = m2 * a;
let d2_c_b1 = m3 * q_u * q_v + m2 * q_uv;
let dc_b1_u = m2 * q_u;
let dc_b1_v = m2 * q_v;
coeff_etaw_b[row] = d2_c_b;
coeff_etaw_d1[row] = dc_b_u * xi_v[row] + dc_b_v * xi_u[row] + d2_c_b1;
coeff_etaw_d2[row] =
c_b_static * xi_u[row] * xi_v[row] + dc_b1_u * xi_v[row] + dc_b1_v * xi_u[row];
coeff_etaw_d3[row] = m1 * xi_u[row] * xi_v[row];
let dw = m2;
let dw_u = m3 * q_u;
let dw_v = m3 * q_v;
let dw_uv = m4 * q_u * q_v + m3 * q_uv;
let xixj = xi_u[row] * xi_v[row];
coeff_ww_bb[row] = dw_uv;
coeff_ww_db[row] = dw_v * xi_u[row] + dw_u * xi_v[row];
coeff_ww_ddb[row] = dw * xixj;
coeff_ww_dd[row] = 2.0 * dw * xixj;
}
Ok(Some(Arc::new(RowCoeffOperator::from_directions(
vec![p_eta, pw],
vec![
(0, x_eta_arc),
(1, Arc::new(geom.basis)),
(1, Arc::new(geom.basis_d1)),
(1, Arc::new(geom.basis_d2)),
(1, Arc::new(geom.basis_d3)),
],
vec![
(0, 0, coeff_eta),
(0, 1, coeff_etaw_b),
(0, 2, coeff_etaw_d1),
(0, 3, coeff_etaw_d2),
(0, 4, coeff_etaw_d3),
(1, 1, coeff_ww_bb),
(1, 2, coeff_ww_db),
(1, 3, coeff_ww_ddb),
(2, 2, coeff_ww_dd),
],
n,
))))
}
pub fn block_effective_jacobian(
specs: &[ParameterBlockSpec],
block_idx: usize,
) -> Result<Box<dyn BlockEffectiveJacobian>, String> {
crate::util::block_jacobian::AdditiveWiggleBlockLayout {
family: "BinomialMeanWiggleFamily",
n_outputs: 1,
additive_blocks: &[Self::BLOCK_ETA],
wiggle_block: Some(Self::BLOCK_WIGGLE),
}
.block_effective_jacobian(specs, block_idx)
}
}
impl CustomFamily for BinomialMeanWiggleFamily {
fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
true
}
fn joint_jeffreys_term_required(&self) -> bool {
false
}
fn coefficient_hessian_cost(&self, specs: &[ParameterBlockSpec]) -> u64 {
let p_total = specs
.iter()
.map(|s| s.design.ncols() as u64)
.fold(0u64, |acc, p| acc.saturating_add(p));
(self.y.len() as u64).saturating_mul(p_total.max(1))
}
fn block_linear_constraints(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
spec: &ParameterBlockSpec,
) -> Result<Option<LinearInequalityConstraints>, String> {
assert!(block_states.len() <= isize::MAX as usize);
if block_idx != Self::BLOCK_WIGGLE {
return Ok(None);
}
Ok(monotone_wiggle_nonnegative_constraints(spec.design.ncols()))
}
fn post_update_block_beta(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
block_spec: &ParameterBlockSpec,
beta: Array1<f64>,
) -> Result<Array1<f64>, String> {
assert!(block_states.len() <= isize::MAX as usize);
assert!(!block_spec.name.is_empty());
if block_idx != Self::BLOCK_WIGGLE {
return Ok(beta);
}
validate_monotone_wiggle_beta_nonnegative(&beta, "BinomialMeanWiggleFamily post-update")?;
Ok(beta)
}
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if eta.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
}
.into());
}
let dq_dq0 = self.wiggle_dq_dq0(eta.view(), betaw.view())?;
if dq_dq0.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily dq/dq0 length mismatch: got {}, expected {}",
dq_dq0.len(),
n
),
}
.into());
}
let mut ll = 0.0;
let mut z_eta = Array1::<f64>::zeros(n);
let mut w_eta = Array1::<f64>::zeros(n);
let mut z_wiggle = Array1::<f64>::zeros(n);
let mut w_wiggle = Array1::<f64>::zeros(n);
for i in 0..n {
let q = eta[i] + etaw[i];
let (mu_q, d1_q) = inverse_link_mu_d1_for_inverse_link(&self.link_kind, q)
.map_err(|e| format!("fixed-link wiggle inverse-link evaluation failed: {e}"))?;
let yi = self.y[i];
let wi = self.weights[i];
ll += binomial_location_scale_log_likelihood(yi, wi, q, &self.link_kind, mu_q)?;
let mu = mu_q.clamp(1e-12, 1.0 - 1e-12);
let var = (mu * (1.0 - mu)).max(MIN_PROB);
let dmu_deta = d1_q * dq_dq0[i];
let dmu_dw = d1_q;
if wi == 0.0 || !var.is_finite() {
z_eta[i] = eta[i];
z_wiggle[i] = etaw[i];
continue;
}
if dmu_deta.is_finite() {
w_eta[i] = floor_positiveweight(wi * (dmu_deta * dmu_deta / var), MIN_WEIGHT);
z_eta[i] = eta[i] + (yi - mu) / signedwith_floor(dmu_deta, MIN_DERIV);
} else {
z_eta[i] = eta[i];
}
if dmu_dw.is_finite() {
w_wiggle[i] = floor_positiveweight(wi * (dmu_dw * dmu_dw / var), MIN_WEIGHT);
z_wiggle[i] = etaw[i] + (yi - mu) / signedwith_floor(dmu_dw, MIN_DERIV);
} else {
z_wiggle[i] = etaw[i];
}
}
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![
BlockWorkingSet::diagonal_checked(z_eta, w_eta)?,
BlockWorkingSet::diagonal_checked(z_wiggle, w_wiggle)?,
],
})
}
fn block_geometry(
&self,
block_states: &[ParameterBlockState],
spec: &ParameterBlockSpec,
) -> Result<(DesignMatrix, Array1<f64>), String> {
if spec.name != "wiggle" {
return Ok((spec.design.clone(), spec.offset.clone()));
}
if block_states.is_empty() {
return Err(GamlssError::UnsupportedConfiguration {
reason: "wiggle geometry requires eta block".to_string(),
}
.into());
}
let eta = &block_states[Self::BLOCK_ETA].eta;
if eta.len() != self.y.len() {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily eta size mismatch".to_string(),
}
.into());
}
let x = self.wiggle_design(eta.view())?;
if x.ncols() != spec.design.ncols() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"dynamic wiggle design col mismatch: got {}, expected {}",
x.ncols(),
spec.design.ncols()
),
}
.into());
}
let nrows = x.nrows();
Ok((
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(x)),
Array1::zeros(nrows),
))
}
fn block_geometry_is_dynamic(&self) -> bool {
true
}
fn exact_newton_joint_hessian_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let x_eta = self.dense_eta_design_fromspecs(specs)?.into_owned();
let workspace =
BinomialMeanWiggleHessianWorkspace::new(self.clone(), block_states.to_vec(), x_eta)?;
Ok(Some(Arc::new(workspace)))
}
fn inner_coefficient_hessian_hvp_available(&self, specs: &[ParameterBlockSpec]) -> bool {
self.dense_eta_design_fromspecs(specs).is_ok()
}
fn exact_newton_joint_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let x_eta = self.dense_eta_design_fromspecs(specs)?;
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if eta.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
}
.into());
}
let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
let p_eta = x_eta.ncols();
let pw = geom.basis.ncols();
let mut coeff_eta = Array1::<f64>::zeros(n);
let mut coeff_etaw_b = Array1::<f64>::zeros(n);
let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
let mut coeff_ww = Array1::<f64>::zeros(n);
for row in 0..n {
let q = eta[row] + etaw[row];
let (m1, m2, _) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
let a = geom.dq_dq0[row];
let b = geom.d2q_dq02[row];
coeff_eta[row] = hessian_coeff_fromobjective_q_terms(m1, m2, a, a, b);
coeff_etaw_b[row] = m2 * a;
coeff_etaw_d1[row] = m1;
coeff_ww[row] = m2;
}
let h_eta_eta = xt_diag_x_dense(&x_eta, &coeff_eta)?;
let h_eta_w = xt_diag_y_dense(&x_eta, &coeff_etaw_b, &geom.basis)?
+ &xt_diag_y_dense(&x_eta, &coeff_etaw_d1, &geom.basis_d1)?;
let h_ww = xt_diag_x_dense(&geom.basis, &coeff_ww)?;
assert_eq!(h_eta_eta.nrows(), p_eta);
assert_eq!(h_ww.nrows(), pw);
Ok(Some(binomial_pack_mean_wiggle_joint_symmetrichessian(
&h_eta_eta, &h_eta_w, &h_ww,
)))
}
fn exact_newton_joint_hessian_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let x_eta = self.dense_eta_design_fromspecs(specs)?;
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if eta.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
}
.into());
}
let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
let p_eta = x_eta.ncols();
let pw = geom.basis.ncols();
if d_beta_flat.len() != p_eta + pw {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily joint d_beta length mismatch: got {}, expected {}",
d_beta_flat.len(),
p_eta + pw
),
}
.into());
}
let u_eta = d_beta_flat.slice(s![0..p_eta]).to_owned();
let uw = d_beta_flat.slice(s![p_eta..p_eta + pw]).to_owned();
let xi = x_eta.dot(&u_eta);
let phi = geom.basis.dot(&uw);
let basis1_u = geom.basis_d1.dot(&uw);
let basis2_u = geom.basis_d2.dot(&uw);
let mut coeff_eta = Array1::<f64>::zeros(n);
let mut coeff_etaw_b = Array1::<f64>::zeros(n);
let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
let mut coeff_etaw_d2 = Array1::<f64>::zeros(n);
let mut coeff_ww_bb = Array1::<f64>::zeros(n);
let mut coeff_ww_db = Array1::<f64>::zeros(n);
for row in 0..n {
let q = eta[row] + etaw[row];
let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
let a = geom.dq_dq0[row];
let b = geom.d2q_dq02[row];
let c = geom.d3q_dq03[row];
let q_u = a * xi[row] + phi[row];
let a_u = b * xi[row] + basis1_u[row];
let b_u = c * xi[row] + basis2_u[row];
coeff_eta[row] = directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, q_u, a, a, b, a_u, a_u, b_u,
);
coeff_etaw_b[row] = m3 * q_u * a + m2 * a_u;
coeff_etaw_d1[row] = m2 * (a * xi[row] + q_u);
coeff_etaw_d2[row] = m1 * xi[row];
coeff_ww_bb[row] = m3 * q_u;
coeff_ww_db[row] = m2 * xi[row];
}
let d_h_eta_eta = xt_diag_x_dense(&x_eta, &coeff_eta)?;
let d_h_eta_w = xt_diag_y_dense(&x_eta, &coeff_etaw_b, &geom.basis)?
+ &xt_diag_y_dense(&x_eta, &coeff_etaw_d1, &geom.basis_d1)?
+ &xt_diag_y_dense(&x_eta, &coeff_etaw_d2, &geom.basis_d2)?;
let a_ww = xt_diag_y_dense(&geom.basis_d1, &coeff_ww_db, &geom.basis)?;
let d_h_ww = xt_diag_x_dense(&geom.basis, &coeff_ww_bb)? + &a_ww + a_ww.t();
Ok(Some(binomial_pack_mean_wiggle_joint_symmetrichessian(
&d_h_eta_eta,
&d_h_eta_w,
&d_h_ww,
)))
}
fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let x_eta = self.dense_eta_design_fromspecs(specs)?;
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if eta.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
}
.into());
}
let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
let p_eta = x_eta.ncols();
let pw = geom.basis.ncols();
let total = p_eta + pw;
if d_beta_u_flat.len() != total || d_beta_v_flat.len() != total {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialMeanWiggleFamily joint second d_beta length mismatch: got {} and {}, expected {}",
d_beta_u_flat.len(),
d_beta_v_flat.len(),
total
) }.into());
}
let u_eta = d_beta_u_flat.slice(s![0..p_eta]).to_owned();
let v_eta = d_beta_v_flat.slice(s![0..p_eta]).to_owned();
let uw = d_beta_u_flat.slice(s![p_eta..total]).to_owned();
let vw = d_beta_v_flat.slice(s![p_eta..total]).to_owned();
let xi_u = x_eta.dot(&u_eta); let xi_v = x_eta.dot(&v_eta); let phi_u = geom.basis.dot(&uw); let phi_v = geom.basis.dot(&vw); let b1u = geom.basis_d1.dot(&uw); let b1v = geom.basis_d1.dot(&vw);
let b2u = geom.basis_d2.dot(&uw); let b2v = geom.basis_d2.dot(&vw);
let b3u = geom.basis_d3.dot(&uw); let b3v = geom.basis_d3.dot(&vw);
let basis_u = scale_matrix_rows(&geom.basis_d1, &xi_u)?; let basis_v = scale_matrix_rows(&geom.basis_d1, &xi_v)?; let basis_uv = scale_matrix_rows(&geom.basis_d2, &(&xi_u * &xi_v))?; let mut coeff_eta = Array1::<f64>::zeros(n);
let mut coeff_etaw_b = Array1::<f64>::zeros(n);
let mut coeff_etaw_d1 = Array1::<f64>::zeros(n);
let mut coeff_etaw_d2 = Array1::<f64>::zeros(n);
let mut coeff_etaw_d3 = Array1::<f64>::zeros(n);
let mut dw = Array1::<f64>::zeros(n);
let mut dw_u = Array1::<f64>::zeros(n);
let mut dw_v = Array1::<f64>::zeros(n);
let mut dw_uv = Array1::<f64>::zeros(n);
for row in 0..n {
let q = eta[row] + etaw[row];
let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
let m4 = self.neglog_q_fourth_derivative(self.y[row], self.weights[row], q)?;
let a = geom.dq_dq0[row];
let b = geom.d2q_dq02[row];
let c = geom.d3q_dq03[row];
let d = geom.d4q_dq04[row];
let q_u = a * xi_u[row] + phi_u[row];
let a_u = b * xi_u[row] + b1u[row];
let b_u = c * xi_u[row] + b2u[row];
let q_v = a * xi_v[row] + phi_v[row];
let a_v = b * xi_v[row] + b1v[row];
let b_v = c * xi_v[row] + b2v[row];
let q_uv = b * xi_u[row] * xi_v[row] + b1u[row] * xi_v[row] + b1v[row] * xi_u[row];
let a_uv = c * xi_u[row] * xi_v[row] + b2u[row] * xi_v[row] + b2v[row] * xi_u[row];
let b_uv = d * xi_u[row] * xi_v[row] + b3u[row] * xi_v[row] + b3v[row] * xi_u[row];
coeff_eta[row] = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, q_u, q_v, q_uv, a, a, b, a_u, a_v, a_u, a_v, a_uv, a_uv, b_u, b_v, b_uv, );
let d2_c_b = m4 * q_u * q_v * a + m3 * (q_uv * a + q_u * a_v + q_v * a_u) + m2 * a_uv;
let dc_b_u = m3 * q_u * a + m2 * a_u;
let dc_b_v = m3 * q_v * a + m2 * a_v;
let c_b_static = m2 * a;
let d2_c_b1 = m3 * q_u * q_v + m2 * q_uv;
let dc_b1_u = m2 * q_u;
let dc_b1_v = m2 * q_v;
coeff_etaw_b[row] = d2_c_b;
coeff_etaw_d1[row] = dc_b_u * xi_v[row] + dc_b_v * xi_u[row] + d2_c_b1;
coeff_etaw_d2[row] =
c_b_static * xi_u[row] * xi_v[row] + dc_b1_u * xi_v[row] + dc_b1_v * xi_u[row];
coeff_etaw_d3[row] = m1 * xi_u[row] * xi_v[row];
dw[row] = m2;
dw_u[row] = m3 * q_u;
dw_v[row] = m3 * q_v;
dw_uv[row] = m4 * q_u * q_v + m3 * q_uv;
}
let d2_h_eta_eta = xt_diag_x_dense(&x_eta, &coeff_eta)?;
let d2_h_eta_w = xt_diag_y_dense(&x_eta, &coeff_etaw_b, &geom.basis)?
+ &xt_diag_y_dense(&x_eta, &coeff_etaw_d1, &geom.basis_d1)?
+ &xt_diag_y_dense(&x_eta, &coeff_etaw_d2, &geom.basis_d2)?
+ &xt_diag_y_dense(&x_eta, &coeff_etaw_d3, &geom.basis_d3)?;
let a_ab = xt_diag_y_dense(&basis_uv, &dw, &geom.basis)?;
let a_ij = xt_diag_y_dense(&basis_u, &dw, &basis_v)?;
let a_iwj = xt_diag_y_dense(&basis_u, &dw_v, &geom.basis)?;
let a_jwi = xt_diag_y_dense(&basis_v, &dw_u, &geom.basis)?;
let d2_h_ww = &a_ab
+ &a_ab.t()
+ &a_ij
+ a_ij.t()
+ &a_iwj
+ a_iwj.t()
+ &a_jwi
+ a_jwi.t()
+ &xt_diag_x_dense(&geom.basis, &dw_uv)?;
Ok(Some(binomial_pack_mean_wiggle_joint_symmetrichessian(
&d2_h_eta_eta,
&d2_h_eta_w,
&d2_h_ww,
)))
}
fn exact_newton_joint_psi_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
if block_states.len() != 2 || derivative_blocks.len() != 2 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialMeanWiggleFamily joint psi terms expect 2 blocks and 2 derivative block lists, got {} and {}",
block_states.len(),
derivative_blocks.len()
) }.into());
}
let x_eta = self.dense_eta_design_fromspecs(specs)?;
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let n = self.y.len();
if eta.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily input size mismatch".to_string(),
}
.into());
}
let geom = self.wiggle_geometry(eta.view(), betaw.view())?;
let p_eta = x_eta.ncols();
let pw = geom.basis.ncols();
let implicit_dir =
self.exact_newton_joint_psi_action(block_states, derivative_blocks, psi_index, p_eta)?;
let dense_dir = if implicit_dir.is_none() {
self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
&x_eta,
)?
} else {
None
};
let z_eta_psi = if let Some((_, ref z_eta_psi)) = implicit_dir {
z_eta_psi
} else if let Some(ref dir_a) = dense_dir {
&dir_a.z_eta_psi
} else {
return Ok(None);
};
let mut objective_psi = 0.0;
let mut score_eta_xa = Array1::<f64>::zeros(n);
let mut score_eta_x = Array1::<f64>::zeros(n);
let mut score_w_b = Array1::<f64>::zeros(n);
let mut score_w_d1 = Array1::<f64>::zeros(n);
let mut coeff_eta_eta_xx = Array1::<f64>::zeros(n);
let mut coeff_eta_eta_xa_x = Array1::<f64>::zeros(n);
let mut coeff_eta_w_xa_b = Array1::<f64>::zeros(n);
let mut coeff_eta_w_x_b = Array1::<f64>::zeros(n);
let mut coeff_eta_w_x_d1 = Array1::<f64>::zeros(n);
let mut coeff_eta_w_xa_d1 = Array1::<f64>::zeros(n);
let mut coeff_eta_w_x_d2 = Array1::<f64>::zeros(n);
let mut coeff_ww_bb = Array1::<f64>::zeros(n);
let mut coeff_ww_db = Array1::<f64>::zeros(n);
for row in 0..n {
let q = eta[row] + etaw[row];
let (m1, m2, m3) = self.neglog_q_derivatives(self.y[row], self.weights[row], q)?;
let z_a = z_eta_psi[row];
let a = geom.dq_dq0[row];
let b = geom.d2q_dq02[row];
let c = geom.d3q_dq03[row];
let q_a = a * z_a;
objective_psi += m1 * q_a;
score_eta_xa[row] = m1 * a;
score_eta_x[row] = m2 * q_a * a + m1 * b * z_a;
score_w_b[row] = m2 * q_a;
score_w_d1[row] = m1 * z_a;
coeff_eta_eta_xx[row] =
m3 * q_a * a * a + m2 * (2.0 * a * b * z_a + q_a * b) + m1 * c * z_a;
coeff_eta_eta_xa_x[row] = m2 * a * a + m1 * b;
coeff_eta_w_xa_b[row] = m2 * a;
coeff_eta_w_x_b[row] = m3 * q_a * a + m2 * b * z_a;
coeff_eta_w_x_d1[row] = m2 * (a * z_a + q_a);
coeff_eta_w_xa_d1[row] = m1;
coeff_eta_w_x_d2[row] = m1 * z_a;
coeff_ww_bb[row] = m3 * q_a;
coeff_ww_db[row] = m2 * z_a;
}
let score_w = crate::faer_ndarray::fast_atv(&geom.basis, &score_w_b)
+ crate::faer_ndarray::fast_atv(&geom.basis_d1, &score_w_d1);
if let Some((action, _)) = implicit_dir {
let score_eta = action.transpose_mul(score_eta_xa.view())
+ crate::faer_ndarray::fast_atv(x_eta.as_ref(), &score_eta_x);
let score_psi = binomial_pack_mean_wiggle_joint_score(&score_eta, &score_w);
let x_eta_arc = shared_dense_arc(x_eta.as_ref());
let basis_arc = Arc::new(geom.basis.clone());
let basis_d1_arc = Arc::new(geom.basis_d1.clone());
let basis_d2_arc = Arc::new(geom.basis_d2.clone());
let zeros = Array1::<f64>::zeros(n);
let operator = CustomFamilyJointPsiOperator::new(
p_eta + pw,
vec![
CustomFamilyJointDesignChannel::new(
0..p_eta,
Arc::clone(&x_eta_arc),
Some(action),
),
CustomFamilyJointDesignChannel::new(
p_eta..p_eta + pw,
Arc::clone(&basis_arc),
None,
),
CustomFamilyJointDesignChannel::new(
p_eta..p_eta + pw,
Arc::clone(&basis_d1_arc),
None,
),
CustomFamilyJointDesignChannel::new(
p_eta..p_eta + pw,
Arc::clone(&basis_d2_arc),
None,
),
],
vec![
CustomFamilyJointDesignPairContribution::new(
0,
0,
coeff_eta_eta_xa_x.clone(),
coeff_eta_eta_xx.clone(),
),
CustomFamilyJointDesignPairContribution::new(
0,
1,
coeff_eta_w_xa_b.clone(),
coeff_eta_w_x_b.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
0,
coeff_eta_w_xa_b.clone(),
coeff_eta_w_x_b.clone(),
),
CustomFamilyJointDesignPairContribution::new(
0,
2,
coeff_eta_w_xa_d1.clone(),
coeff_eta_w_x_d1.clone(),
),
CustomFamilyJointDesignPairContribution::new(
2,
0,
coeff_eta_w_xa_d1.clone(),
coeff_eta_w_x_d1.clone(),
),
CustomFamilyJointDesignPairContribution::new(
0,
3,
zeros.clone(),
coeff_eta_w_x_d2.clone(),
),
CustomFamilyJointDesignPairContribution::new(
3,
0,
zeros.clone(),
coeff_eta_w_x_d2.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
1,
zeros.clone(),
coeff_ww_bb.clone(),
),
CustomFamilyJointDesignPairContribution::new(
2,
1,
zeros.clone(),
coeff_ww_db.clone(),
),
CustomFamilyJointDesignPairContribution::new(1, 2, zeros, coeff_ww_db.clone()),
],
);
return Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
objective_psi,
score_psi,
hessian_psi: Array2::zeros((0, 0)),
hessian_psi_operator: Some(std::sync::Arc::new(operator)),
}));
}
let dir_a =
dense_dir.expect("dense psi direction should exist when implicit direction is absent");
let x_eta_psi = dir_a
.x_eta_psi
.as_ref()
.expect("dense eta psi design should exist when implicit direction is absent");
let score_psi = binomial_pack_mean_wiggle_joint_score(
&(crate::faer_ndarray::fast_atv(x_eta_psi, &score_eta_xa)
+ crate::faer_ndarray::fast_atv(x_eta.as_ref(), &score_eta_x)),
&score_w,
);
let a_eta_eta = xt_diag_y_dense(x_eta_psi, &coeff_eta_eta_xa_x, &x_eta)?;
let h_eta_eta = &a_eta_eta + &a_eta_eta.t() + &xt_diag_x_dense(&x_eta, &coeff_eta_eta_xx)?;
let h_eta_w = xt_diag_y_dense(x_eta_psi, &coeff_eta_w_xa_b, &geom.basis)?
+ &xt_diag_y_dense(&x_eta, &coeff_eta_w_x_b, &geom.basis)?
+ &xt_diag_y_dense(&x_eta, &coeff_eta_w_x_d1, &geom.basis_d1)?
+ &xt_diag_y_dense(x_eta_psi, &coeff_eta_w_xa_d1, &geom.basis_d1)?
+ &xt_diag_y_dense(&x_eta, &coeff_eta_w_x_d2, &geom.basis_d2)?;
let a_ww = xt_diag_y_dense(&geom.basis_d1, &coeff_ww_db, &geom.basis)?;
let h_ww = xt_diag_x_dense(&geom.basis, &coeff_ww_bb)? + &a_ww + a_ww.t();
Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
objective_psi,
score_psi,
hessian_psi: binomial_pack_mean_wiggle_joint_symmetrichessian(
&h_eta_eta, &h_eta_w, &h_ww,
),
hessian_psi_operator: None,
}))
}
}
struct BinomialMeanWiggleHessianWorkspace {
family: BinomialMeanWiggleFamily,
block_states: Vec<ParameterBlockState>,
x_eta: Arc<Array2<f64>>,
hessian_operator: Arc<RowCoeffOperator>,
}
impl BinomialMeanWiggleHessianWorkspace {
fn new(
family: BinomialMeanWiggleFamily,
block_states: Vec<ParameterBlockState>,
x_eta: Array2<f64>,
) -> Result<Self, String> {
let x_eta = Arc::new(x_eta);
let hessian_operator = family.bmw_static_hessian_operator(&block_states, x_eta.clone())?;
Ok(Self {
family,
block_states,
x_eta,
hessian_operator,
})
}
}
impl ExactNewtonJointHessianWorkspace for BinomialMeanWiggleHessianWorkspace {
fn hessian_matvec_available(&self) -> bool {
true
}
fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
Ok(Some(
crate::solver::estimate::reml::unified::HyperOperator::mul_vec(
self.hessian_operator.as_ref(),
v,
),
))
}
fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
Ok(None)
}
fn directional_derivative(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
Ok(self
.directional_derivative_operator(d_beta_flat)?
.map(|operator| operator.to_dense()))
}
fn directional_derivative_operator(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
self.family
.bmw_directional_operator(&self.block_states, self.x_eta.clone(), d_beta_flat)
}
fn second_directional_derivative(
&self,
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
Ok(self
.second_directional_derivative_operator(d_beta_u_flat, d_beta_v_flat)?
.map(|operator| operator.to_dense()))
}
fn second_directional_derivative_operator(
&self,
d_beta_u: &Array1<f64>,
d_beta_v: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
self.family.bmw_second_directional_operator(
&self.block_states,
self.x_eta.clone(),
d_beta_u,
d_beta_v,
)
}
}
impl CustomFamilyGenerative for BinomialMeanWiggleFamily {
fn generativespec(
&self,
block_states: &[ParameterBlockState],
) -> Result<GenerativeSpec, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialMeanWiggleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta = &block_states[Self::BLOCK_ETA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta.len() != self.y.len() || etaw.len() != self.y.len() {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialMeanWiggleFamily generative size mismatch".to_string(),
}
.into());
}
let mean = gamlss_rowwise_map_result(self.y.len(), |i| {
let jet = inverse_link_jet_for_inverse_link(&self.link_kind, eta[i] + etaw[i])
.map_err(|e| format!("fixed-link wiggle inverse-link evaluation failed: {e}"))?;
Ok(jet.mu)
})?;
Ok(GenerativeSpec {
mean,
noise: NoiseModel::Bernoulli,
})
}
}
#[derive(Clone)]
pub struct PoissonLogFamily {
pub y: Array1<f64>,
pub weights: Array1<f64>,
}
impl PoissonLogFamily {
pub const BLOCK_ETA: usize = 0;
pub fn parameternames() -> &'static [&'static str] {
&["eta"]
}
pub fn parameter_links() -> &'static [ParameterLink] {
&[ParameterLink::Log]
}
pub fn metadata() -> FamilyMetadata {
FamilyMetadata {
name: "poisson_log",
parameternames: Self::parameternames(),
parameter_links: Self::parameter_links(),
}
}
}
struct DiagonalIrlsRow {
log_lik_increment: f64,
observed_weight: f64,
working_step: f64,
}
trait LogLinkDiagonalIrlsFamily {
fn family_label(&self) -> &'static str;
fn y(&self) -> &Array1<f64>;
fn prior_weights(&self) -> &Array1<f64>;
fn validate_self(&self) -> Result<(), String> {
Ok(())
}
fn validate_yi(&self, yi: f64, idx: usize) -> Result<(), String>;
fn row_kernel(&self, yi: f64, e_clamped: f64, m: f64, prior_w: f64) -> DiagonalIrlsRow;
}
fn evaluate_log_link_diagonal_irls<F: LogLinkDiagonalIrlsFamily + ?Sized>(
family: &F,
block_states: &[ParameterBlockState],
) -> Result<FamilyEvaluation, String> {
let label = family.family_label();
let eta = &expect_single_block(block_states, label)?.eta;
let y = family.y();
let prior_weights = family.prior_weights();
let n = y.len();
if eta.len() != n || prior_weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: format!("{label} input size mismatch"),
}
.into());
}
family.validate_self()?;
let mut ll = 0.0;
let mut z = Array1::<f64>::zeros(n);
let mut w = Array1::<f64>::zeros(n);
for i in 0..n {
let yi = y[i];
family.validate_yi(yi, i)?;
let e_raw = eta[i];
let e = e_raw.clamp(-ETA_HARD_CLAMP, ETA_HARD_CLAMP);
let active_clamp = e != e_raw;
let m = saturated_exp_eta(e_raw);
let prior_w = prior_weights[i];
let row = family.row_kernel(yi, e, m, prior_w);
ll += row.log_lik_increment;
if prior_w == 0.0 || active_clamp {
w[i] = 0.0;
z[i] = e_raw;
} else {
w[i] = floor_positiveweight(row.observed_weight, MIN_WEIGHT);
z[i] = e + row.working_step;
}
}
Ok(FamilyEvaluation {
log_likelihood: ll,
blockworking_sets: vec![BlockWorkingSet::diagonal_checked(z, w)?],
})
}
impl LogLinkDiagonalIrlsFamily for PoissonLogFamily {
fn family_label(&self) -> &'static str {
"PoissonLogFamily"
}
fn y(&self) -> &Array1<f64> {
&self.y
}
fn prior_weights(&self) -> &Array1<f64> {
&self.weights
}
fn validate_yi(&self, yi: f64, idx: usize) -> Result<(), String> {
if !yi.is_finite() || yi < 0.0 {
return Err(GamlssError::InvalidInput {
reason: format!(
"PoissonLogFamily requires non-negative finite y; found y[{idx}]={yi}"
),
}
.into());
}
Ok::<(), _>(())
}
#[inline]
fn row_kernel(&self, yi: f64, e_clamped: f64, m: f64, prior_w: f64) -> DiagonalIrlsRow {
let log_lik_increment = prior_w * (yi * e_clamped - m);
let dmu = m.max(MIN_DERIV);
let var = m.max(MIN_PROB);
DiagonalIrlsRow {
log_lik_increment,
observed_weight: prior_w * (dmu * dmu / var),
working_step: (yi - m) / signedwith_floor(dmu, MIN_DERIV),
}
}
}
impl CustomFamily for PoissonLogFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
evaluate_log_link_diagonal_irls(self, block_states)
}
}
impl CustomFamilyGenerative for PoissonLogFamily {
fn generativespec(
&self,
block_states: &[ParameterBlockState],
) -> Result<GenerativeSpec, String> {
let eta = &expect_single_block(block_states, "PoissonLogFamily")?.eta;
let mean = gamlss_rowwise_map(eta.len(), |i| saturated_exp_eta(eta[i]));
Ok(GenerativeSpec {
mean,
noise: NoiseModel::Poisson,
})
}
}
#[derive(Clone)]
pub struct GammaLogFamily {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub shape: f64,
}
impl GammaLogFamily {
pub const BLOCK_ETA: usize = 0;
pub fn parameternames() -> &'static [&'static str] {
&["eta"]
}
pub fn parameter_links() -> &'static [ParameterLink] {
&[ParameterLink::Log]
}
pub fn metadata() -> FamilyMetadata {
FamilyMetadata {
name: "gamma_log",
parameternames: Self::parameternames(),
parameter_links: Self::parameter_links(),
}
}
}
impl LogLinkDiagonalIrlsFamily for GammaLogFamily {
fn family_label(&self) -> &'static str {
"GammaLogFamily"
}
fn y(&self) -> &Array1<f64> {
&self.y
}
fn prior_weights(&self) -> &Array1<f64> {
&self.weights
}
fn validate_self(&self) -> Result<(), String> {
if !self.shape.is_finite() || self.shape <= 0.0 {
return Err(GamlssError::NonFinite {
reason: "GammaLogFamily shape must be finite and > 0".to_string(),
}
.into());
}
Ok(())
}
fn validate_yi(&self, yi: f64, idx: usize) -> Result<(), String> {
if !yi.is_finite() || yi <= 0.0 {
return Err(GamlssError::InvalidInput {
reason: format!("GammaLogFamily requires positive finite y; found y[{idx}]={yi}"),
}
.into());
}
Ok::<(), _>(())
}
#[inline]
fn row_kernel(&self, yi: f64, e_clamped: f64, m: f64, prior_w: f64) -> DiagonalIrlsRow {
assert!(e_clamped.is_finite());
assert!((e_clamped.exp() - m).abs() <= 1.0e-8 * m.abs().max(1.0));
let log_lik_increment = prior_w * (-self.shape * (yi / m + m.ln()));
let observed_weight = prior_w * self.shape * yi / m;
let score = prior_w * self.shape * (yi / m - 1.0);
let w_floored = observed_weight.max(MIN_WEIGHT);
DiagonalIrlsRow {
log_lik_increment,
observed_weight,
working_step: score / w_floored,
}
}
}
impl CustomFamily for GammaLogFamily {
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
evaluate_log_link_diagonal_irls(self, block_states)
}
fn diagonalworking_weights_directional_derivative(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
d_eta: &Array1<f64>,
) -> Result<Option<Array1<f64>>, String> {
if block_idx != Self::BLOCK_ETA {
return Ok(None);
}
let eta = &expect_single_block(block_states, "GammaLogFamily")?.eta;
let n = self.y.len();
if eta.len() != n || self.weights.len() != n || d_eta.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "GammaLogFamily input size mismatch".to_string(),
}
.into());
}
if !self.shape.is_finite() || self.shape <= 0.0 {
return Err(GamlssError::NonFinite {
reason: "GammaLogFamily shape must be finite and > 0".to_string(),
}
.into());
}
let mut dw = Array1::<f64>::zeros(n);
for i in 0..n {
let yi = self.y[i];
if !yi.is_finite() || yi <= 0.0 {
return Err(GamlssError::InvalidInput {
reason: format!("GammaLogFamily requires positive finite y; found y[{i}]={yi}"),
}
.into());
}
let e_raw = eta[i];
let e = e_raw.clamp(-ETA_HARD_CLAMP, ETA_HARD_CLAMP);
if self.weights[i] == 0.0 || e != e_raw {
dw[i] = 0.0;
continue;
}
let m = safe_exp(e).max(MIN_WEIGHT);
let observed_weight = self.weights[i] * self.shape * yi / m;
if observed_weight <= MIN_WEIGHT {
dw[i] = 0.0;
} else {
dw[i] = -observed_weight * d_eta[i];
}
}
Ok(Some(dw))
}
}
impl CustomFamilyGenerative for GammaLogFamily {
fn generativespec(
&self,
block_states: &[ParameterBlockState],
) -> Result<GenerativeSpec, String> {
let eta = &expect_single_block(block_states, "GammaLogFamily")?.eta;
let mean = gamlss_rowwise_map(eta.len(), |i| saturated_exp_eta(eta[i]));
Ok(GenerativeSpec {
mean,
noise: NoiseModel::Gamma { shape: self.shape },
})
}
}
#[derive(Clone)]
pub struct BinomialLocationScaleFamily {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub link_kind: InverseLink,
pub threshold_design: Option<DesignMatrix>,
pub log_sigma_design: Option<DesignMatrix>,
pub policy: crate::resource::ResourcePolicy,
}
macro_rules! impl_binomial_location_scale_joint_psi_family {
($family:ty, $label:literal) => {
impl LocationScaleJointPsiFamily for $family {
type Direction = LocationScaleJointPsiDirection;
const LABEL: &'static str = $label;
fn ws_policy(&self) -> &crate::resource::ResourcePolicy {
&self.policy
}
fn ws_exact_joint_dense_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
self.exact_joint_dense_block_designs(specs)
}
fn ws_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<LocationScaleJointPsiDirection>, String> {
self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
design_loc,
design_scale,
policy,
)
}
fn ws_psi_second_order_terms_from_parts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &LocationScaleJointPsiDirection,
psi_b: &LocationScaleJointPsiDirection,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
subsample: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<ExactNewtonJointPsiSecondOrderTerms, String> {
assert!(subsample.is_none());
self.exact_newton_joint_psisecond_order_terms_from_parts(
block_states,
derivative_blocks,
psi_a,
psi_b,
design_loc,
design_scale,
)
}
fn ws_psi_hessian_directional_from_parts(
&self,
block_states: &[ParameterBlockState],
psi_dir: &LocationScaleJointPsiDirection,
d_beta_flat: &Array1<f64>,
design_loc: &Array2<f64>,
design_scale: &Array2<f64>,
subsample: Option<&[crate::families::marginal_slope_shared::WeightedOuterRow]>,
) -> Result<Array2<f64>, String> {
assert!(subsample.is_none());
self.exact_newton_joint_psihessian_directional_derivative_from_parts(
block_states,
psi_dir,
d_beta_flat,
design_loc,
design_scale,
)
}
}
};
}
impl_binomial_location_scale_joint_psi_family!(
BinomialLocationScaleFamily,
"BinomialLocationScaleFamily"
);
impl_binomial_location_scale_joint_psi_family!(
BinomialLocationScaleWiggleFamily,
"BinomialLocationScaleWiggleFamily"
);
type BinomialLocationScaleExactNewtonJointPsiWorkspace =
LocationScaleJointPsiWorkspace<BinomialLocationScaleFamily>;
type BinomialLocationScaleWiggleExactNewtonJointPsiWorkspace =
LocationScaleJointPsiWorkspace<BinomialLocationScaleWiggleFamily>;
impl BinomialLocationScaleFamily {
pub const BLOCK_T: usize = 0;
pub const BLOCK_LOG_SIGMA: usize = 1;
pub fn parameternames() -> &'static [&'static str] {
&["threshold", "log_sigma"]
}
pub fn parameter_links() -> &'static [ParameterLink] {
&[ParameterLink::InverseLink, ParameterLink::Log]
}
pub fn metadata() -> FamilyMetadata {
FamilyMetadata {
name: "binomial_location_scale",
parameternames: Self::parameternames(),
parameter_links: Self::parameter_links(),
}
}
fn exact_joint_supported(&self) -> bool {
self.threshold_design.is_some() && self.log_sigma_design.is_some()
}
fn dense_block_designs(&self) -> Result<(Cow<'_, Array2<f64>>, Cow<'_, Array2<f64>>), String> {
dense_locscale_block_designs_cached(
self.threshold_design.as_ref(),
self.log_sigma_design.as_ref(),
"BinomialLocationScaleFamily",
"BinomialLocationScale",
"threshold",
&self.policy.material_policy(),
)
}
fn dense_block_designs_fromspecs<'a>(
&self,
specs: &'a [ParameterBlockSpec],
) -> Result<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>), String> {
dense_locscale_block_designs_fromspecs(
specs,
2,
"BinomialLocationScaleFamily",
"BinomialLocationScale",
Self::BLOCK_T,
Self::BLOCK_LOG_SIGMA,
"threshold",
&self.policy.material_policy(),
)
}
fn exact_joint_dense_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
if self.threshold_design.is_some() && self.log_sigma_design.is_some() {
return self.dense_block_designs().map(Some);
}
if let Some(specs) = specs {
return self.dense_block_designs_fromspecs(specs).map(Some);
}
Ok(None)
}
fn exact_joint_block_designs_owned(
&self,
specs: Option<&[ParameterBlockSpec]>,
) -> Result<Option<(DesignMatrix, DesignMatrix)>, String> {
let designs = if let (Some(x_t), Some(x_ls)) = (
self.threshold_design.as_ref(),
self.log_sigma_design.as_ref(),
) {
Some((x_t.clone(), x_ls.clone()))
} else if let Some(specs) = specs {
if specs.len() != 2 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleFamily spec-aware operator path expects 2 specs, got {}",
specs.len()
) }.into());
}
Some((
specs[Self::BLOCK_T].design.clone(),
specs[Self::BLOCK_LOG_SIGMA].design.clone(),
))
} else {
None
};
let Some((x_t, x_ls)) = designs else {
return Ok(None);
};
let n = self.y.len();
if x_t.nrows() != n || x_ls.nrows() != n {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleFamily operator designs have row mismatch: y={}, threshold={}, log_sigma={}",
n,
x_t.nrows(),
x_ls.nrows()
) }.into());
}
Ok(Some((x_t, x_ls)))
}
fn exact_newton_joint_gradient_from_designs(
&self,
block_states: &[ParameterBlockState],
x_t: &DesignMatrix,
x_ls: &DesignMatrix,
) -> Result<ExactNewtonJointGradientEvaluation, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n
|| eta_ls.len() != n
|| self.weights.len() != n
|| x_t.nrows() != n
|| x_ls.nrows() != n
{
return Err(
"BinomialLocationScaleFamily joint gradient input size mismatch".to_string(),
);
}
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let mut grad_eta_t_v = vec![0.0_f64; n];
let mut grad_eta_ls_v = vec![0.0_f64; n];
let y_slice = self.y.as_slice().expect("y must be contiguous");
let w_slice = self.weights.as_slice().expect("weights must be contiguous");
let q0_slice = core.q0.as_slice().expect("q0 must be contiguous");
let sigma_slice = core.sigma.as_slice().expect("sigma must be contiguous");
let mu_slice = core.mu.as_slice().expect("mu must be contiguous");
let dmu_slice = core.dmu_dq.as_slice().expect("dmu_dq must be contiguous");
let d2mu_slice = core
.d2mu_dq2
.as_slice()
.expect("d2mu_dq2 must be contiguous");
let d3mu_slice = core
.d3mu_dq3
.as_slice()
.expect("d3mu_dq3 must be contiguous");
let eta_t_slice = eta_t.as_slice().expect("eta_t must be contiguous");
let link_kind = &self.link_kind;
grad_eta_t_v
.par_iter_mut()
.zip(grad_eta_ls_v.par_iter_mut())
.enumerate()
.for_each(|(i, (g_t, g_ls))| {
let (m1, _, _) = binomial_neglog_q_derivatives_dispatch(
y_slice[i],
w_slice[i],
q0_slice[i],
mu_slice[i],
dmu_slice[i],
d2mu_slice[i],
d3mu_slice[i],
link_kind,
);
let q0d = nonwiggle_q_derivs(eta_t_slice[i], sigma_slice[i]);
*g_t = -m1 * q0d.q_t;
*g_ls = -m1 * q0d.q_ls;
});
let grad_eta_t = Array1::from_vec(grad_eta_t_v);
let grad_eta_ls = Array1::from_vec(grad_eta_ls_v);
let grad_t = x_t.transpose_vector_multiply(&grad_eta_t);
let grad_ls = x_ls.transpose_vector_multiply(&grad_eta_ls);
let total = grad_t.len() + grad_ls.len();
let mut gradient = Array1::<f64>::zeros(total);
gradient.slice_mut(s![0..grad_t.len()]).assign(&grad_t);
gradient.slice_mut(s![grad_t.len()..total]).assign(&grad_ls);
Ok(ExactNewtonJointGradientEvaluation {
log_likelihood: core.log_likelihood,
gradient,
})
}
fn exact_newton_joint_hessian_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
) -> Result<Option<Array2<f64>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_block_designs_owned(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessian_from_design_matrices(block_states, &x_t, &x_ls)
}
fn exact_newton_joint_hessian_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessian_directional_derivative_from_designs(
block_states,
&x_t,
&x_ls,
d_beta_flat,
)
}
fn exact_newton_joint_hessian_second_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: Option<&[ParameterBlockSpec]>,
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(specs)? else {
return Ok(None);
};
self.exact_newton_joint_hessiansecond_directional_derivative_from_designs(
block_states,
&x_t,
&x_ls,
d_beta_u_flat,
d_betav_flat,
)
}
fn exact_newton_joint_psi_terms_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psi_terms_from_designs(
block_states,
specs,
derivative_blocks,
psi_index,
&x_t,
&x_ls,
)
}
fn exact_newton_joint_psisecond_order_terms_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psisecond_order_terms_from_designs(
block_states,
derivative_blocks,
psi_i,
psi_j,
&x_t,
&x_ls,
)
}
fn exact_newton_joint_psihessian_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psihessian_directional_derivative_from_designs(
block_states,
derivative_blocks,
psi_index,
d_beta_flat,
&x_t,
&x_ls,
)
}
fn exact_newton_joint_hessian_row_coefficients(
&self,
block_states: &[ParameterBlockState],
) -> Result<(Array1<f64>, Array1<f64>, Array1<f64>), String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let mut coeff_tt = vec![0.0_f64; n];
let mut coeff_tl = vec![0.0_f64; n];
let mut coeff_ll = vec![0.0_f64; n];
let y_slice = self.y.as_slice().expect("y must be contiguous");
let w_slice = self.weights.as_slice().expect("weights must be contiguous");
let q0_slice = core.q0.as_slice().expect("q0 must be contiguous");
let sigma_slice = core.sigma.as_slice().expect("sigma must be contiguous");
let dsigma_slice = core
.dsigma_deta
.as_slice()
.expect("dsigma_deta must be contiguous");
let mu_slice = core.mu.as_slice().expect("mu must be contiguous");
let dmu_slice = core.dmu_dq.as_slice().expect("dmu_dq must be contiguous");
let d2mu_slice = core
.d2mu_dq2
.as_slice()
.expect("d2mu_dq2 must be contiguous");
let d3mu_slice = core
.d3mu_dq3
.as_slice()
.expect("d3mu_dq3 must be contiguous");
let link_kind = &self.link_kind;
coeff_tt
.par_iter_mut()
.zip(coeff_tl.par_iter_mut())
.zip(coeff_ll.par_iter_mut())
.enumerate()
.for_each(|(i, ((c_tt, c_tl), c_ll))| {
let q = q0_slice[i];
let r = 1.0 / sigma_slice[i];
let kappa = dsigma_slice[i] / sigma_slice[i];
let (m1, m2, _) = binomial_neglog_q_derivatives_dispatch(
y_slice[i],
w_slice[i],
q,
mu_slice[i],
dmu_slice[i],
d2mu_slice[i],
d3mu_slice[i],
link_kind,
);
*c_tt = m2 * r * r;
*c_tl = kappa * r * (m1 + q * m2);
*c_ll = kappa * kappa * q * (m1 + q * m2);
});
Ok((
Array1::from_vec(coeff_tt),
Array1::from_vec(coeff_tl),
Array1::from_vec(coeff_ll),
))
}
fn exact_newton_block_diagonal_hessians_from_design_matrices(
&self,
block_states: &[ParameterBlockState],
x_t: &DesignMatrix,
x_ls: &DesignMatrix,
) -> Result<(Array2<f64>, Array2<f64>), String> {
let (coeff_tt, _coeff_tl, coeff_ll) =
self.exact_newton_joint_hessian_row_coefficients(block_states)?;
let h_tt = xt_diag_x_design(x_t, &coeff_tt)?;
let h_ll = xt_diag_x_design(x_ls, &coeff_ll)?;
Ok((h_tt, h_ll))
}
fn exact_newton_joint_hessian_from_designs(
&self,
block_states: &[ParameterBlockState],
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<Array2<f64>>, String> {
let (coeff_tt, coeff_tl, coeff_ll) =
self.exact_newton_joint_hessian_row_coefficients(block_states)?;
let pt = x_t.ncols();
let pls = x_ls.ncols();
let h_tt = xt_diag_x_dense(x_t, &coeff_tt)?;
let h_tl = xt_diag_y_dense(x_t, &coeff_tl, x_ls)?;
let h_ll = xt_diag_x_dense(x_ls, &coeff_ll)?;
let total = pt + pls;
let mut h = Array2::<f64>::zeros((total, total));
h.slice_mut(s![0..pt, 0..pt]).assign(&h_tt);
h.slice_mut(s![0..pt, pt..total]).assign(&h_tl);
h.slice_mut(s![pt..total, pt..total]).assign(&h_ll);
mirror_upper_to_lower(&mut h);
Ok(Some(h))
}
fn exact_newton_joint_hessian_from_design_matrices(
&self,
block_states: &[ParameterBlockState],
x_t: &DesignMatrix,
x_ls: &DesignMatrix,
) -> Result<Option<Array2<f64>>, String> {
let (coeff_tt, coeff_tl, coeff_ll) =
self.exact_newton_joint_hessian_row_coefficients(block_states)?;
let pt = x_t.ncols();
let pls = x_ls.ncols();
let h_tt = xt_diag_x_design(x_t, &coeff_tt)?;
let h_tl = xt_diag_y_design(x_t, &coeff_tl, x_ls)?;
let h_ll = xt_diag_x_design(x_ls, &coeff_ll)?;
let total = pt + pls;
let mut h = Array2::<f64>::zeros((total, total));
h.slice_mut(s![0..pt, 0..pt]).assign(&h_tt);
h.slice_mut(s![0..pt, pt..total]).assign(&h_tl);
h.slice_mut(s![pt..total, pt..total]).assign(&h_ll);
mirror_upper_to_lower(&mut h);
Ok(Some(h))
}
fn exact_newton_joint_hessian_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let pt = x_t.ncols();
let pls = x_ls.ncols();
if d_beta_flat.len() != pt + pls {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily joint d_beta length mismatch: got {}, expected {}",
d_beta_flat.len(),
pt + pls
),
}
.into());
}
let d_eta_t = fast_av(x_t, &d_beta_flat.slice(s![0..pt]));
let d_eta_ls = fast_av(x_ls, &d_beta_flat.slice(s![pt..pt + pls]));
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let (coeff_tt, coeff_tl, coeff_ll) = binomial_location_scale_first_directional_coefficients(
&self.y,
&self.weights,
&core,
&d_eta_t,
&d_eta_ls,
&self.link_kind,
);
let d_h_tt = xt_diag_x_dense(x_t, &coeff_tt)?;
let d_h_tl = xt_diag_y_dense(x_t, &coeff_tl, x_ls)?;
let d_h_ll = xt_diag_x_dense(x_ls, &coeff_ll)?;
let total = pt + pls;
let mut d_h = Array2::<f64>::zeros((total, total));
d_h.slice_mut(s![0..pt, 0..pt]).assign(&d_h_tt);
d_h.slice_mut(s![0..pt, pt..total]).assign(&d_h_tl);
d_h.slice_mut(s![pt..total, pt..total]).assign(&d_h_ll);
mirror_upper_to_lower(&mut d_h);
Ok(Some(d_h))
}
fn exact_newton_joint_hessiansecond_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let pt = x_t.ncols();
let pls = x_ls.ncols();
let total = pt + pls;
if d_beta_u_flat.len() != total {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleFamily joint d_beta_u length mismatch: got {}, expected {}",
d_beta_u_flat.len(),
total
) }.into());
}
if d_betav_flat.len() != total {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleFamily joint d_betav length mismatch: got {}, expected {}",
d_betav_flat.len(),
total
) }.into());
}
let d_eta_t_u = fast_av(x_t, &d_beta_u_flat.slice(s![0..pt]));
let d_eta_ls_u = fast_av(x_ls, &d_beta_u_flat.slice(s![pt..total]));
let d_eta_tv = fast_av(x_t, &d_betav_flat.slice(s![0..pt]));
let d_eta_lsv = fast_av(x_ls, &d_betav_flat.slice(s![pt..total]));
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let (coeff_tt, coeff_tl, coeff_ll) =
binomial_location_scalesecond_directional_coefficients(
&self.y,
&self.weights,
&core,
&d_eta_t_u,
&d_eta_ls_u,
&d_eta_tv,
&d_eta_lsv,
&self.link_kind,
)?;
let d2_h_tt = xt_diag_x_dense(x_t, &coeff_tt)?;
let d2_h_tl = xt_diag_y_dense(x_t, &coeff_tl, x_ls)?;
let d2_h_ll = xt_diag_x_dense(x_ls, &coeff_ll)?;
let mut d2_h = Array2::<f64>::zeros((total, total));
d2_h.slice_mut(s![0..pt, 0..pt]).assign(&d2_h_tt);
d2_h.slice_mut(s![0..pt, pt..total]).assign(&d2_h_tl);
d2_h.slice_mut(s![pt..total, pt..total]).assign(&d2_h_ll);
mirror_upper_to_lower(&mut d2_h);
Ok(Some(d2_h))
}
fn exact_newton_joint_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<LocationScaleJointPsiDirection>, String> {
let Some(parts) = locscale_joint_psi_direction_parts(
block_states,
derivative_blocks,
psi_index,
self.y.len(),
x_t.ncols(),
x_ls.ncols(),
Self::BLOCK_T,
Self::BLOCK_LOG_SIGMA,
2,
"BinomialLocationScaleFamily",
"threshold",
policy,
)?
else {
return Ok(None);
};
Ok(Some(LocationScaleJointPsiDirection {
block_idx: parts.block_idx,
local_idx: parts.local_idx,
x_primary_psi: parts.primary_psi,
x_ls_psi: parts.log_sigma_psi,
z_primary_psi: parts.primary_z,
z_ls_psi: parts.log_sigma_z,
}))
}
fn exact_newton_joint_psisecond_design_drifts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &LocationScaleJointPsiDirection,
psi_b: &LocationScaleJointPsiDirection,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<LocationScaleJointPsiSecondDrifts, String> {
locscale_joint_psisecond_design_drifts(
block_states,
derivative_blocks,
psi_a,
psi_b,
LocScalePsiDriftConfig {
n: self.y.len(),
p_primary: x_t.ncols(),
p_log_sigma: x_ls.ncols(),
primary_block_idx: Self::BLOCK_T,
log_sigma_block_idx: Self::BLOCK_LOG_SIGMA,
family_name: "BinomialLocationScaleFamily",
primary_label: "threshold",
policy: &self.policy,
},
)
}
fn exact_newton_joint_psi_terms_from_designs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
if specs.len() != 2 || derivative_blocks.len() != 2 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleFamily joint psi terms expect 2 specs and 2 derivative blocks, got {} and {}",
specs.len(),
derivative_blocks.len()
) }.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let pt = x_t.ncols();
let pls = x_ls.ncols();
let total = pt + pls;
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
x_t,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
let (z_t, z_ls) = (&dir_a.z_primary_psi, &dir_a.z_ls_psi);
struct PsiTermsRow {
r_t: f64,
r_ls: f64,
dr_t: f64,
dr_ls: f64,
h_tt: f64,
h_tl: f64,
h_ll: f64,
dh_tt: f64,
dh_tl: f64,
dh_ll: f64,
obj: f64,
}
let y_p = self.y.as_slice().expect("y must be contiguous");
let w_p = self.weights.as_slice().expect("weights must be contiguous");
let q0_p = core.q0.as_slice().expect("q0 must be contiguous");
let sigma_p = core.sigma.as_slice().expect("sigma must be contiguous");
let dsigma_p = core
.dsigma_deta
.as_slice()
.expect("dsigma_deta must be contiguous");
let mu_p = core.mu.as_slice().expect("mu must be contiguous");
let dmu_p = core.dmu_dq.as_slice().expect("dmu_dq must be contiguous");
let d2mu_p = core
.d2mu_dq2
.as_slice()
.expect("d2mu_dq2 must be contiguous");
let d3mu_p = core
.d3mu_dq3
.as_slice()
.expect("d3mu_dq3 must be contiguous");
let z_t_p = z_t.as_slice().expect("z_t must be contiguous");
let z_ls_p = z_ls.as_slice().expect("z_ls must be contiguous");
let link_kind_p = &self.link_kind;
let rows: Vec<PsiTermsRow> = (0..n)
.into_par_iter()
.map(|i| {
let q = q0_p[i];
let r = 1.0 / sigma_p[i];
let s = dsigma_p[i] / sigma_p[i];
let sz = s * z_ls_p[i];
let q_psi = -r * z_t_p[i] - q * sz;
let (a, b, c) = binomial_neglog_q_derivatives_dispatch(
y_p[i],
w_p[i],
q,
mu_p[i],
dmu_p[i],
d2mu_p[i],
d3mu_p[i],
link_kind_p,
);
let r_t = -a * r;
let r_ls = -a * q * s;
PsiTermsRow {
r_t,
r_ls,
dr_t: -b * q_psi * r + a * r * sz,
dr_ls: -(a + q * b) * q_psi,
h_tt: b * r * r,
h_tl: r * (a + q * b),
h_ll: q * (a + q * b),
dh_tt: r * r * (c * q_psi - 2.0 * b * sz),
dh_tl: r * ((2.0 * b + c * q) * q_psi - (a + q * b) * sz),
dh_ll: (a + 3.0 * q * b + q * q * c) * q_psi,
obj: r_t * z_t_p[i] + r_ls * z_ls_p[i],
}
})
.collect();
let mut r_t = Array1::<f64>::zeros(n);
let mut r_ls = Array1::<f64>::zeros(n);
let mut dr_t = Array1::<f64>::zeros(n);
let mut dr_ls = Array1::<f64>::zeros(n);
let mut h_tt = Array1::<f64>::zeros(n);
let mut h_tl = Array1::<f64>::zeros(n);
let mut h_ll = Array1::<f64>::zeros(n);
let mut dh_tt = Array1::<f64>::zeros(n);
let mut dh_tl = Array1::<f64>::zeros(n);
let mut dh_ll = Array1::<f64>::zeros(n);
let mut objective_psi = 0.0_f64;
for (i, row) in rows.into_iter().enumerate() {
r_t[i] = row.r_t;
r_ls[i] = row.r_ls;
dr_t[i] = row.dr_t;
dr_ls[i] = row.dr_ls;
h_tt[i] = row.h_tt;
h_tl[i] = row.h_tl;
h_ll[i] = row.h_ll;
dh_tt[i] = row.dh_tt;
dh_tl[i] = row.dh_tl;
dh_ll[i] = row.dh_ll;
objective_psi += row.obj;
}
let hessian_psi_operator = build_two_block_custom_family_joint_psi_operator_from_actions(
dir_a.x_primary_psi.cloned_first_action(),
dir_a.x_ls_psi.cloned_first_action(),
0..pt,
pt..pt + pls,
x_t,
x_ls,
&h_tt,
&h_tl,
&h_ll,
&dh_tt,
&dh_tl,
&dh_ll,
)?;
let x_t_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
let score_t = x_t_map.transpose_mul(r_t.view()) + fast_atv(x_t, &dr_t);
let score_ls = x_ls_map.transpose_mul(r_ls.view()) + fast_atv(x_ls, &dr_ls);
let mut score_psi = Array1::<f64>::zeros(total);
score_psi.slice_mut(s![0..pt]).assign(&score_t);
score_psi.slice_mut(s![pt..pt + pls]).assign(&score_ls);
let hessian_psi = if hessian_psi_operator.is_some() {
Array2::zeros((0, 0))
} else {
let h_tt_block = weighted_crossprod_psi_maps(
x_t_map,
h_tt.view(),
CustomFamilyPsiLinearMapRef::Dense(x_t),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
h_tt.view(),
x_t_map,
)? + &xt_diag_x_dense(x_t, &dh_tt)?;
let h_tl_block = weighted_crossprod_psi_maps(
x_t_map,
h_tl.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
h_tl.view(),
x_ls_map,
)? + &xt_diag_y_dense(x_t, &dh_tl, x_ls)?;
let h_ll_block = weighted_crossprod_psi_maps(
x_ls_map,
h_ll.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_ls),
h_ll.view(),
x_ls_map,
)? + &xt_diag_x_dense(x_ls, &dh_ll)?;
let mut hessian_psi = Array2::<f64>::zeros((total, total));
hessian_psi.slice_mut(s![0..pt, 0..pt]).assign(&h_tt_block);
hessian_psi
.slice_mut(s![0..pt, pt..pt + pls])
.assign(&h_tl_block);
hessian_psi
.slice_mut(s![pt..pt + pls, pt..pt + pls])
.assign(&h_ll_block);
mirror_upper_to_lower(&mut hessian_psi);
hessian_psi
};
Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
objective_psi,
score_psi,
hessian_psi,
hessian_psi_operator,
}))
}
fn exact_newton_joint_psisecond_order_terms_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
let Some(dir_i) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_i,
x_t,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
let Some(dir_j) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_j,
x_t,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
Ok(Some(
self.exact_newton_joint_psisecond_order_terms_from_parts(
block_states,
derivative_blocks,
&dir_i,
&dir_j,
x_t,
x_ls,
)?,
))
}
fn exact_newton_joint_psisecond_order_terms_from_parts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
dir_i: &LocationScaleJointPsiDirection,
dir_j: &LocationScaleJointPsiDirection,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms, String> {
let second_drifts = self.exact_newton_joint_psisecond_design_drifts(
block_states,
derivative_blocks,
dir_i,
dir_j,
x_t,
x_ls,
)?;
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let pt = x_t.ncols();
let pls = x_ls.ncols();
let total = pt + pls;
let x_t_i_map = dir_i.x_primary_psi.as_linear_map_ref();
let x_t_j_map = dir_j.x_primary_psi.as_linear_map_ref();
let x_ls_i_map = dir_i.x_ls_psi.as_linear_map_ref();
let x_ls_j_map = dir_j.x_ls_psi.as_linear_map_ref();
let x_t_ab_map = second_psi_linear_map(
second_drifts.x_primary_ab_action.as_ref(),
second_drifts.x_primary_ab.as_ref(),
n,
pt,
);
let x_ls_ab_map = second_psi_linear_map(
second_drifts.x_ls_ab_action.as_ref(),
second_drifts.x_ls_ab.as_ref(),
n,
pls,
);
let mut r_t = Array1::<f64>::zeros(n);
let mut r_ls = Array1::<f64>::zeros(n);
let mut dr_t_i = Array1::<f64>::zeros(n);
let mut dr_t_j = Array1::<f64>::zeros(n);
let mut dr_ls_i = Array1::<f64>::zeros(n);
let mut dr_ls_j = Array1::<f64>::zeros(n);
let mut d2r_t = Array1::<f64>::zeros(n);
let mut d2r_ls = Array1::<f64>::zeros(n);
let mut h_tt = Array1::<f64>::zeros(n);
let mut h_tl = Array1::<f64>::zeros(n);
let mut h_ll = Array1::<f64>::zeros(n);
let mut dh_tt_i = Array1::<f64>::zeros(n);
let mut dh_tt_j = Array1::<f64>::zeros(n);
let mut dh_tl_i = Array1::<f64>::zeros(n);
let mut dh_tl_j = Array1::<f64>::zeros(n);
let mut dh_ll_i = Array1::<f64>::zeros(n);
let mut dh_ll_j = Array1::<f64>::zeros(n);
let mut d2h_tt = Array1::<f64>::zeros(n);
let mut d2h_tl = Array1::<f64>::zeros(n);
let mut d2h_ll = Array1::<f64>::zeros(n);
let mut objective_psi_psi = 0.0;
struct PsiSecondRow {
r_t: f64,
r_ls: f64,
dr_t_i: f64,
dr_t_j: f64,
dr_ls_i: f64,
dr_ls_j: f64,
d2r_t: f64,
d2r_ls: f64,
h_tt: f64,
h_tl: f64,
h_ll: f64,
dh_tt_i: f64,
dh_tt_j: f64,
dh_tl_i: f64,
dh_tl_j: f64,
dh_ll_i: f64,
dh_ll_j: f64,
d2h_tt: f64,
d2h_tl: f64,
d2h_ll: f64,
objective: f64,
}
let y_p = self.y.as_slice().expect("y must be contiguous");
let w_p = self.weights.as_slice().expect("weights must be contiguous");
let q_p = core.q0.as_slice().expect("q0 must be contiguous");
let sigma_p = core.sigma.as_slice().expect("sigma must be contiguous");
let mu_p = core.mu.as_slice().expect("mu must be contiguous");
let dmu_p = core.dmu_dq.as_slice().expect("dmu_dq must be contiguous");
let d2mu_p = core
.d2mu_dq2
.as_slice()
.expect("d2mu_dq2 must be contiguous");
let d3mu_p = core
.d3mu_dq3
.as_slice()
.expect("d3mu_dq3 must be contiguous");
let z_t_i = dir_i
.z_primary_psi
.as_slice()
.expect("z_t_psi_i must be contiguous");
let z_t_j = dir_j
.z_primary_psi
.as_slice()
.expect("z_t_psi_j must be contiguous");
let z_ls_i = dir_i
.z_ls_psi
.as_slice()
.expect("z_ls_psi_i must be contiguous");
let z_ls_j = dir_j
.z_ls_psi
.as_slice()
.expect("z_ls_psi_j must be contiguous");
let z_t_ab = second_drifts
.z_primary_ab
.as_slice()
.expect("z_t_ab must be contiguous");
let z_ls_ab = second_drifts
.z_ls_ab
.as_slice()
.expect("z_ls_ab must be contiguous");
let link_kind_p = &self.link_kind;
let rows: Result<Vec<PsiSecondRow>, String> = (0..n)
.into_par_iter()
.map(|row| {
let q = q_p[row];
let r = 1.0 / sigma_p[row];
let q_i = -r * z_t_i[row] - q * z_ls_i[row];
let q_j = -r * z_t_j[row] - q * z_ls_j[row];
let q_ij = -r * z_t_ab[row]
+ r * (z_t_i[row] * z_ls_j[row] + z_t_j[row] * z_ls_i[row])
+ q * (z_ls_i[row] * z_ls_j[row] - z_ls_ab[row]);
let (a, b, c) = binomial_neglog_q_derivatives_dispatch(
y_p[row],
w_p[row],
q,
mu_p[row],
dmu_p[row],
d2mu_p[row],
d3mu_p[row],
link_kind_p,
);
let d = binomial_neglog_q_fourth_derivative_dispatch(
y_p[row],
w_p[row],
q,
mu_p[row],
dmu_p[row],
d2mu_p[row],
d3mu_p[row],
link_kind_p,
)?;
let u = a + q * b;
let u_i = (2.0 * b + q * c) * q_i;
let u_j = (2.0 * b + q * c) * q_j;
Ok(PsiSecondRow {
r_t: -a * r,
r_ls: -a * q,
dr_t_i: -b * q_i * r + a * r * z_ls_i[row],
dr_t_j: -b * q_j * r + a * r * z_ls_j[row],
dr_ls_i: -u * q_i,
dr_ls_j: -u * q_j,
d2r_t: r
* (-c * q_i * q_j - b * q_ij + b * (q_i * z_ls_j[row] + q_j * z_ls_i[row])
- a * z_ls_i[row] * z_ls_j[row]
+ a * z_ls_ab[row]),
d2r_ls: -((2.0 * b + q * c) * q_i * q_j + u * q_ij),
h_tt: b * r * r,
h_tl: r * u,
h_ll: q * u,
dh_tt_i: r * r * (c * q_i - 2.0 * b * z_ls_i[row]),
dh_tt_j: r * r * (c * q_j - 2.0 * b * z_ls_j[row]),
dh_tl_i: r * (u_i - u * z_ls_i[row]),
dh_tl_j: r * (u_j - u * z_ls_j[row]),
dh_ll_i: (a + 3.0 * q * b + q * q * c) * q_i,
dh_ll_j: (a + 3.0 * q * b + q * q * c) * q_j,
d2h_tt: r
* r
* (d * q_i * q_j + c * q_ij
- 2.0 * c * (q_j * z_ls_i[row] + q_i * z_ls_j[row])
+ 4.0 * b * z_ls_i[row] * z_ls_j[row]
- 2.0 * b * z_ls_ab[row]),
d2h_tl: r
* (((3.0 * c + q * d) * q_j) * q_i + (2.0 * b + q * c) * q_ij
- (2.0 * b + q * c) * (q_j * z_ls_i[row] + q_i * z_ls_j[row])
+ u * (z_ls_i[row] * z_ls_j[row] - z_ls_ab[row])),
d2h_ll: (4.0 * b + 5.0 * q * c + q * q * d) * q_i * q_j
+ (a + 3.0 * q * b + q * q * c) * q_ij,
objective: a * q_ij + b * q_i * q_j,
})
})
.collect();
for (row, vals) in rows?.into_iter().enumerate() {
r_t[row] = vals.r_t;
r_ls[row] = vals.r_ls;
dr_t_i[row] = vals.dr_t_i;
dr_t_j[row] = vals.dr_t_j;
dr_ls_i[row] = vals.dr_ls_i;
dr_ls_j[row] = vals.dr_ls_j;
d2r_t[row] = vals.d2r_t;
d2r_ls[row] = vals.d2r_ls;
h_tt[row] = vals.h_tt;
h_tl[row] = vals.h_tl;
h_ll[row] = vals.h_ll;
dh_tt_i[row] = vals.dh_tt_i;
dh_tt_j[row] = vals.dh_tt_j;
dh_tl_i[row] = vals.dh_tl_i;
dh_tl_j[row] = vals.dh_tl_j;
dh_ll_i[row] = vals.dh_ll_i;
dh_ll_j[row] = vals.dh_ll_j;
d2h_tt[row] = vals.d2h_tt;
d2h_tl[row] = vals.d2h_tl;
d2h_ll[row] = vals.d2h_ll;
objective_psi_psi += vals.objective;
}
let mut score_psi_psi = Array1::<f64>::zeros(total);
score_psi_psi.slice_mut(s![0..pt]).assign(
&(x_t_ab_map.transpose_mul(r_t.view())
+ x_t_i_map.transpose_mul(dr_t_j.view())
+ x_t_j_map.transpose_mul(dr_t_i.view())
+ fast_atv(x_t, &d2r_t)),
);
score_psi_psi.slice_mut(s![pt..pt + pls]).assign(
&(x_ls_ab_map.transpose_mul(r_ls.view())
+ x_ls_i_map.transpose_mul(dr_ls_j.view())
+ x_ls_j_map.transpose_mul(dr_ls_i.view())
+ fast_atv(x_ls, &d2r_ls)),
);
let h_tt_block = weighted_crossprod_psi_maps(
x_t_ab_map,
h_tt.view(),
CustomFamilyPsiLinearMapRef::Dense(x_t),
)? + &weighted_crossprod_psi_maps(x_t_i_map, h_tt.view(), x_t_j_map)?
+ &weighted_crossprod_psi_maps(x_t_j_map, h_tt.view(), x_t_i_map)?
+ &weighted_crossprod_psi_maps(
x_t_i_map,
dh_tt_j.view(),
CustomFamilyPsiLinearMapRef::Dense(x_t),
)?
+ &weighted_crossprod_psi_maps(
x_t_j_map,
dh_tt_i.view(),
CustomFamilyPsiLinearMapRef::Dense(x_t),
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
dh_tt_i.view(),
x_t_j_map,
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
dh_tt_j.view(),
x_t_i_map,
)?
+ &xt_diag_x_dense(x_t, &d2h_tt)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
h_tt.view(),
x_t_ab_map,
)?;
let h_tl_block = weighted_crossprod_psi_maps(
x_t_ab_map,
h_tl.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(x_t_i_map, h_tl.view(), x_ls_j_map)?
+ &weighted_crossprod_psi_maps(x_t_j_map, h_tl.view(), x_ls_i_map)?
+ &weighted_crossprod_psi_maps(
x_t_i_map,
dh_tl_j.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?
+ &weighted_crossprod_psi_maps(
x_t_j_map,
dh_tl_i.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
dh_tl_i.view(),
x_ls_j_map,
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
dh_tl_j.view(),
x_ls_i_map,
)?
+ &xt_diag_y_dense(x_t, &d2h_tl, x_ls)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
h_tl.view(),
x_ls_ab_map,
)?;
let h_ll_block = weighted_crossprod_psi_maps(
x_ls_ab_map,
h_ll.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(x_ls_i_map, h_ll.view(), x_ls_j_map)?
+ &weighted_crossprod_psi_maps(x_ls_j_map, h_ll.view(), x_ls_i_map)?
+ &weighted_crossprod_psi_maps(
x_ls_i_map,
dh_ll_j.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?
+ &weighted_crossprod_psi_maps(
x_ls_j_map,
dh_ll_i.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_ls),
dh_ll_i.view(),
x_ls_j_map,
)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_ls),
dh_ll_j.view(),
x_ls_i_map,
)?
+ &xt_diag_x_dense(x_ls, &d2h_ll)?
+ &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_ls),
h_ll.view(),
x_ls_ab_map,
)?;
let mut hessian_psi_psi = Array2::<f64>::zeros((total, total));
hessian_psi_psi
.slice_mut(s![0..pt, 0..pt])
.assign(&h_tt_block);
hessian_psi_psi
.slice_mut(s![0..pt, pt..pt + pls])
.assign(&h_tl_block);
hessian_psi_psi
.slice_mut(s![pt..pt + pls, pt..pt + pls])
.assign(&h_ll_block);
mirror_upper_to_lower(&mut hessian_psi_psi);
Ok(crate::custom_family::ExactNewtonJointPsiSecondOrderTerms {
objective_psi_psi,
score_psi_psi,
hessian_psi_psi,
hessian_psi_psi_operator: None,
})
}
fn exact_newton_joint_psihessian_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
x_t,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
Ok(Some(
self.exact_newton_joint_psihessian_directional_derivative_from_parts(
block_states,
&dir_a,
d_beta_flat,
x_t,
x_ls,
)?,
))
}
fn exact_newton_joint_psihessian_directional_derivative_from_parts(
&self,
block_states: &[ParameterBlockState],
dir_a: &LocationScaleJointPsiDirection,
d_beta_flat: &Array1<f64>,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Array2<f64>, String> {
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let pt = x_t.ncols();
let pls = x_ls.ncols();
let total = pt + pls;
if d_beta_flat.len() != total {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleFamily joint psi hessian directional derivative length mismatch: got {}, expected {}",
d_beta_flat.len(),
total
) }.into());
}
let xi_t = fast_av(x_t, &d_beta_flat.slice(s![0..pt]));
let xi_ls = fast_av(x_ls, &d_beta_flat.slice(s![pt..pt + pls]));
let x_t_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
let mut dh_tt_u = Array1::<f64>::zeros(n);
let mut dh_tl_u = Array1::<f64>::zeros(n);
let mut dh_ll_u = Array1::<f64>::zeros(n);
let mut h_tt_u = Array1::<f64>::zeros(n);
let mut h_tl_u = Array1::<f64>::zeros(n);
let mut h_ll_u = Array1::<f64>::zeros(n);
for row in 0..n {
let q = core.q0[row];
let r = 1.0 / core.sigma[row];
let s = core.dsigma_deta[row] / core.sigma[row];
let xi_ls_s = s * xi_ls[row];
let z_ls_psi_s = s * dir_a.z_ls_psi[row];
let du = -r * xi_t[row] - q * xi_ls_s;
let q_a = -r * dir_a.z_primary_psi[row] - q * z_ls_psi_s;
let q_au = r * dir_a.z_primary_psi[row] * xi_ls_s - du * z_ls_psi_s;
let (a, b, c) = binomial_neglog_q_derivatives_dispatch(
self.y[row],
self.weights[row],
q,
core.mu[row],
core.dmu_dq[row],
core.d2mu_dq2[row],
core.d3mu_dq3[row],
&self.link_kind,
);
let d = binomial_neglog_q_fourth_derivative_dispatch(
self.y[row],
self.weights[row],
q,
core.mu[row],
core.dmu_dq[row],
core.d2mu_dq2[row],
core.d3mu_dq3[row],
&self.link_kind,
)?;
let u = a + q * b;
h_tt_u[row] = r * r * (c * du - 2.0 * b * xi_ls_s);
h_tl_u[row] = r * ((2.0 * b + q * c) * du - u * xi_ls_s);
h_ll_u[row] = (a + 3.0 * q * b + q * q * c) * du;
dh_tt_u[row] = r
* r
* (d * du * q_a + c * q_au - 2.0 * c * (q_a * xi_ls_s + du * z_ls_psi_s)
+ 4.0 * b * xi_ls_s * z_ls_psi_s);
dh_tl_u[row] = r
* (((3.0 * c + q * d) * q_a) * du + (2.0 * b + q * c) * q_au
- (2.0 * b + q * c) * (q_a * xi_ls_s + du * z_ls_psi_s)
+ u * xi_ls_s * z_ls_psi_s);
dh_ll_u[row] = (4.0 * b + 5.0 * q * c + q * q * d) * du * q_a
+ (a + 3.0 * q * b + q * q * c) * q_au;
}
let tt_block = weighted_crossprod_psi_maps(
x_t_map,
h_tt_u.view(),
CustomFamilyPsiLinearMapRef::Dense(x_t),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
h_tt_u.view(),
x_t_map,
)? + &xt_diag_x_dense(x_t, &dh_tt_u)?;
let tl_block = weighted_crossprod_psi_maps(
x_t_map,
h_tl_u.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
h_tl_u.view(),
x_ls_map,
)? + &xt_diag_y_dense(x_t, &dh_tl_u, x_ls)?;
let ll_block = weighted_crossprod_psi_maps(
x_ls_map,
h_ll_u.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_ls),
h_ll_u.view(),
x_ls_map,
)? + &xt_diag_x_dense(x_ls, &dh_ll_u)?;
let mut out = Array2::<f64>::zeros((total, total));
out.slice_mut(s![0..pt, 0..pt]).assign(&tt_block);
out.slice_mut(s![0..pt, pt..pt + pls]).assign(&tl_block);
out.slice_mut(s![pt..pt + pls, pt..pt + pls])
.assign(&ll_block);
mirror_upper_to_lower(&mut out);
Ok(out)
}
pub fn block_effective_jacobian(
specs: &[ParameterBlockSpec],
block_idx: usize,
) -> Result<Box<dyn BlockEffectiveJacobian>, String> {
crate::util::block_jacobian::AdditiveWiggleBlockLayout {
family: "BinomialLocationScaleFamily",
n_outputs: 2,
additive_blocks: &[Self::BLOCK_T, Self::BLOCK_LOG_SIGMA],
wiggle_block: None,
}
.block_effective_jacobian(specs, block_idx)
}
}
impl CustomFamily for BinomialLocationScaleFamily {
fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
true
}
fn coefficient_hessian_cost(&self, specs: &[ParameterBlockSpec]) -> u64 {
crate::families::location_scale_engine::location_scale_coefficient_hessian_cost(
self.y.len() as u64,
specs,
)
}
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
if !self.exact_joint_supported() {
return Err(
"BinomialLocationScaleFamily requires exact curvature designs; diagonal fallback has been removed"
.to_string(),
);
}
let threshold_design = self.threshold_design.as_ref().ok_or_else(|| {
"BinomialLocationScaleFamily exact path is missing threshold design".to_string()
})?;
let log_sigma_design = self.log_sigma_design.as_ref().ok_or_else(|| {
"BinomialLocationScaleFamily exact path is missing log-sigma design".to_string()
})?;
let mut grad_eta_t_v = vec![0.0_f64; n];
let mut grad_eta_ls_v = vec![0.0_f64; n];
let y_slice_e = self.y.as_slice().expect("y must be contiguous");
let w_slice_e = self.weights.as_slice().expect("weights must be contiguous");
let q0_slice_e = core.q0.as_slice().expect("q0 must be contiguous");
let sigma_slice_e = core.sigma.as_slice().expect("sigma must be contiguous");
let mu_slice_e = core.mu.as_slice().expect("mu must be contiguous");
let dmu_slice_e = core.dmu_dq.as_slice().expect("dmu_dq must be contiguous");
let d2mu_slice_e = core
.d2mu_dq2
.as_slice()
.expect("d2mu_dq2 must be contiguous");
let d3mu_slice_e = core
.d3mu_dq3
.as_slice()
.expect("d3mu_dq3 must be contiguous");
let eta_t_slice_e = eta_t.as_slice().expect("eta_t must be contiguous");
let link_kind_e = &self.link_kind;
grad_eta_t_v
.par_iter_mut()
.zip(grad_eta_ls_v.par_iter_mut())
.enumerate()
.for_each(|(i, (g_t, g_ls))| {
let (m1, _, _) = binomial_neglog_q_derivatives_dispatch(
y_slice_e[i],
w_slice_e[i],
q0_slice_e[i],
mu_slice_e[i],
dmu_slice_e[i],
d2mu_slice_e[i],
d3mu_slice_e[i],
link_kind_e,
);
let q0d = nonwiggle_q_derivs(eta_t_slice_e[i], sigma_slice_e[i]);
*g_t = -m1 * q0d.q_t;
*g_ls = -m1 * q0d.q_ls;
});
let grad_eta_t = Array1::from_vec(grad_eta_t_v);
let grad_eta_ls = Array1::from_vec(grad_eta_ls_v);
let grad_t = threshold_design.transpose_vector_multiply(&grad_eta_t);
let grad_ls = log_sigma_design.transpose_vector_multiply(&grad_eta_ls);
let (h_tt, h_ll) = self.exact_newton_block_diagonal_hessians_from_design_matrices(
block_states,
threshold_design,
log_sigma_design,
)?;
Ok(FamilyEvaluation {
log_likelihood: core.log_likelihood,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: grad_t,
hessian: SymmetricMatrix::Dense(h_tt),
},
BlockWorkingSet::ExactNewton {
gradient: grad_ls,
hessian: SymmetricMatrix::Dense(h_ll),
},
],
})
}
fn log_likelihood_only(&self, block_states: &[ParameterBlockState]) -> Result<f64, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
binomial_location_scale_ll_only(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)
}
fn log_likelihood_only_with_options(
&self,
block_states: &[ParameterBlockState],
options: &BlockwiseFitOptions,
) -> Result<f64, String> {
let Some(subsample) = options.outer_score_subsample.as_ref() else {
return self.log_likelihood_only(block_states);
};
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != n || eta_ls.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleFamily input size mismatch".to_string(),
}
.into());
}
use rayon::iter::ParallelIterator;
let link_kind = &self.link_kind;
let ll: Result<f64, String> = subsample
.rows
.par_iter()
.try_fold(
|| 0.0_f64,
|acc, row| -> Result<f64, String> {
let i = row.index;
let wi = self.weights[i];
if wi == 0.0 {
return Ok(acc);
}
let SigmaJet1 { sigma, .. } = exp_sigma_jet1_scalar(eta_ls[i]);
let q = binomial_location_scale_q0(eta_t[i], sigma);
let mu = if matches!(link_kind, InverseLink::Standard(StandardLink::Probit)) {
0.5
} else {
let jet = inverse_link_jet_for_inverse_link(link_kind, q).map_err(|e| {
format!("location-scale inverse-link evaluation failed: {e}")
})?;
jet.mu
};
let term =
binomial_location_scale_log_likelihood(self.y[i], wi, q, link_kind, mu)?;
Ok(acc + row.weight * term)
},
)
.try_reduce(|| 0.0_f64, |a, b| Ok(a + b));
ll
}
fn requires_joint_outer_hyper_path(&self) -> bool {
true
}
fn diagonalworking_weights_directional_derivative(
&self,
block_states: &[ParameterBlockState],
idx: usize,
arr: &Array1<f64>,
) -> Result<Option<Array1<f64>>, String> {
assert!(block_states.len() <= isize::MAX as usize);
assert!(idx < usize::MAX);
assert!(arr.iter().all(|v| !v.is_nan()));
Err(
"BinomialLocationScaleFamily no longer supports diagonal working weights; exact curvature is required"
.to_string(),
)
}
fn exact_newton_joint_psi_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
self.exact_newton_joint_psi_terms_for_specs(
block_states,
specs,
derivative_blocks,
psi_index,
)
}
fn exact_newton_joint_psisecond_order_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
self.exact_newton_joint_psisecond_order_terms_for_specs(
block_states,
specs,
derivative_blocks,
psi_i,
psi_j,
)
}
fn exact_newton_joint_psihessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_psihessian_directional_derivative_for_specs(
block_states,
specs,
derivative_blocks,
psi_index,
d_beta_flat,
)
}
fn exact_newton_joint_psi_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
if !self.exact_joint_supported() {
return Ok(None);
}
Ok(Some(Arc::new(
BinomialLocationScaleExactNewtonJointPsiWorkspace::new(
self.clone(),
block_states.to_vec(),
specs,
derivative_blocks.to_vec(),
)?,
)))
}
fn exact_newton_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
d_beta: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if !self.exact_joint_supported() {
return Ok(None);
}
let pt = self
.threshold_design
.as_ref()
.ok_or_else(|| {
"BinomialLocationScaleFamily exact path is missing threshold design".to_string()
})?
.ncols();
let pls = self
.log_sigma_design
.as_ref()
.ok_or_else(|| {
"BinomialLocationScaleFamily exact path is missing log-sigma design".to_string()
})?
.ncols();
let total = pt + pls;
let (start, end, joint_direction) = match block_idx {
Self::BLOCK_T => {
if d_beta.len() != pt {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleFamily threshold d_beta length mismatch: got {}, expected {}",
d_beta.len(),
pt
) }.into());
}
let mut dir = Array1::<f64>::zeros(total);
dir.slice_mut(s![0..pt]).assign(d_beta);
(0usize, pt, dir)
}
Self::BLOCK_LOG_SIGMA => {
if d_beta.len() != pls {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleFamily log-sigma d_beta length mismatch: got {}, expected {}",
d_beta.len(),
pls
) }.into());
}
let mut dir = Array1::<f64>::zeros(total);
dir.slice_mut(s![pt..pt + pls]).assign(d_beta);
(pt, pt + pls, dir)
}
_ => return Ok(None),
};
let joint = self
.exact_newton_joint_hessian_directional_derivative(block_states, &joint_direction)?
.ok_or_else(|| {
format!("missing joint exact-newton directional Hessian for block {block_idx}")
})?;
Ok(Some(joint.slice(s![start..end, start..end]).to_owned()))
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_for_specs(block_states, None)
}
fn has_explicit_joint_hessian(&self) -> bool {
true
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_directional_derivative_for_specs(
block_states,
None,
d_beta_flat,
)
}
fn exact_newton_joint_hessiansecond_directional_derivative(
&self,
block_states: &[ParameterBlockState],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
block_states,
None,
d_beta_u_flat,
d_betav_flat,
)
}
fn exact_newton_joint_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_for_specs(block_states, Some(specs))
}
fn exact_newton_joint_hessian_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_directional_derivative_for_specs(
block_states,
Some(specs),
d_beta_flat,
)
}
fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_hessian_second_directional_derivative_for_specs(
block_states,
Some(specs),
d_beta_u_flat,
d_betav_flat,
)
}
fn exact_newton_joint_gradient_evaluation(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<ExactNewtonJointGradientEvaluation>, String> {
let Some((x_t, x_ls)) = self.exact_joint_block_designs_owned(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_gradient_from_designs(block_states, &x_t, &x_ls)
.map(Some)
}
fn exact_newton_joint_hessian_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_block_designs_owned(Some(specs))? else {
return Ok(None);
};
let workspace = BinomialLocationScaleHessianWorkspace::new(
self.clone(),
block_states.to_vec(),
x_t,
x_ls,
)?;
Ok(Some(Arc::new(workspace)))
}
fn exact_newton_joint_hessian_workspace_with_options(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
options: &BlockwiseFitOptions,
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_block_designs_owned(Some(specs))? else {
return Ok(None);
};
let mut workspace = BinomialLocationScaleHessianWorkspace::new(
self.clone(),
block_states.to_vec(),
x_t,
x_ls,
)?;
if let Some(subsample) = options.outer_score_subsample.as_ref() {
workspace.apply_outer_subsample(subsample.rows.as_ref());
}
Ok(Some(Arc::new(workspace)))
}
fn outer_derivative_subsample_capable(&self) -> bool {
true
}
fn inner_coefficient_hessian_hvp_available(&self, specs: &[ParameterBlockSpec]) -> bool {
if specs.len() != 2 {
return false;
}
let n = self.y.len();
specs[Self::BLOCK_T].design.nrows() == n && specs[Self::BLOCK_LOG_SIGMA].design.nrows() == n
}
fn batched_outer_gradient_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
rho: &ndarray::Array1<f64>,
options: &BlockwiseFitOptions,
workspace: Option<Arc<dyn ExactNewtonJointHessianWorkspace>>,
) -> Result<Option<BatchedOuterGradientTerms>, String> {
use crate::faer_ndarray::FaerCholesky;
use faer::Side;
if options.outer_score_subsample.is_some() {
return Ok(None);
}
let psi_dim: usize = derivative_blocks.iter().map(Vec::len).sum();
if psi_dim != 0 {
return Ok(None);
}
if !self.exact_joint_supported() {
return Ok(None);
}
if block_states.len() != 2 || specs.len() != 2 {
return Ok(None);
}
let Some((x_t_cow, x_ls_cow)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
let x_t = x_t_cow.into_owned();
let x_ls = x_ls_cow.into_owned();
let pt = x_t.ncols();
let pls = x_ls.ncols();
let total = pt + pls;
let n = self.y.len();
if crate::custom_family::use_joint_matrix_free_path(total, n) {
return Ok(None);
}
let h_l = if let Some(workspace) = workspace.as_ref() {
if let Some(hessian) = workspace.hessian_dense()? {
hessian
} else {
self.exact_newton_joint_hessian_from_designs(block_states, &x_t, &x_ls)?
.ok_or_else(|| {
"BinomialLocationScaleFamily: unable to assemble joint Hessian for batched gradient"
.to_string()
})?
}
} else {
self.exact_newton_joint_hessian_from_designs(block_states, &x_t, &x_ls)?
.ok_or_else(|| {
"BinomialLocationScaleFamily: unable to assemble joint Hessian for batched gradient"
.to_string()
})?
};
let mut h = h_l.clone();
let total_pen: usize = specs.iter().map(|s| s.penalties.len()).sum();
if rho.len() != total_pen {
return Ok(None);
}
let mut per_block_rho: Vec<Vec<f64>> = Vec::with_capacity(specs.len());
let mut cursor = 0;
for spec in specs {
let cnt = spec.penalties.len();
let mut row = Vec::with_capacity(cnt);
for k in 0..cnt {
row.push(rho[cursor + k]);
}
per_block_rho.push(row);
cursor += cnt;
}
let ranges: Vec<(usize, usize)> = {
let mut out = Vec::with_capacity(specs.len());
let mut s_pos = 0usize;
for spec in specs {
let p = spec.design.ncols();
out.push((s_pos, s_pos + p));
s_pos += p;
}
out
};
for (b, spec) in specs.iter().enumerate() {
let (start, end) = ranges[b];
let p = end - start;
let mut s_b = ndarray::Array2::<f64>::zeros((p, p));
for (k, pen) in spec.penalties.iter().enumerate() {
let lambda = per_block_rho[b][k].exp();
pen.add_scaled_to(lambda, &mut s_b);
}
let mut h_block = h.slice_mut(s![start..end, start..end]);
h_block += &s_b;
}
let factor = h
.cholesky(Side::Lower)
.map_err(|e| format!("BinomialLocationScale batched gradient: Cholesky failed: {e}"))?;
let beta_flat = {
let mut out = ndarray::Array1::<f64>::zeros(total);
for b in 0..specs.len() {
let (start, end) = ranges[b];
out.slice_mut(s![start..end]).assign(&block_states[b].beta);
}
out
};
const LEVERAGE_CHUNK_ROWS: usize = 1024;
const MIN_PARALLEL_LEVERAGE_ROWS: usize = 2 * LEVERAGE_CHUNK_ROWS;
let leverage_chunk_rows = if n >= MIN_PARALLEL_LEVERAGE_ROWS {
LEVERAGE_CHUNK_ROWS
} else {
n.max(1)
};
let leverage_chunks = n.div_ceil(leverage_chunk_rows);
struct LeverageScratch {
rhs_t: ndarray::Array2<f64>,
rhs_l: ndarray::Array2<f64>,
}
impl LeverageScratch {
fn new(total: usize, chunk_rows: usize) -> Self {
Self {
rhs_t: ndarray::Array2::<f64>::zeros((total, chunk_rows)),
rhs_l: ndarray::Array2::<f64>::zeros((total, chunk_rows)),
}
}
}
let leverage_parts: Vec<(
usize,
ndarray::Array1<f64>,
ndarray::Array1<f64>,
ndarray::Array1<f64>,
)> = (0..leverage_chunks)
.into_par_iter()
.map_init(
|| LeverageScratch::new(total, leverage_chunk_rows),
|scratch, chunk_idx| {
let row_start = chunk_idx * leverage_chunk_rows;
let row_end = (row_start + leverage_chunk_rows).min(n);
let m = row_end - row_start;
let mut rhs_t = scratch.rhs_t.slice_mut(s![.., 0..m]);
let mut rhs_l = scratch.rhs_l.slice_mut(s![.., 0..m]);
rhs_t.fill(0.0);
rhs_l.fill(0.0);
for j in 0..m {
let i = row_start + j;
for c in 0..pt {
rhs_t[[c, j]] = x_t[[i, c]];
}
for c in 0..pls {
rhs_l[[pt + c, j]] = x_ls[[i, c]];
}
}
let q_t = factor.solve_mat(&rhs_t.to_owned());
let q_l = factor.solve_mat(&rhs_l.to_owned());
let mut chunk_00 = ndarray::Array1::<f64>::zeros(m);
let mut chunk_01 = ndarray::Array1::<f64>::zeros(m);
let mut chunk_11 = ndarray::Array1::<f64>::zeros(m);
for j in 0..m {
let i = row_start + j;
let mut l00 = 0.0;
let mut l11 = 0.0;
let mut l01 = 0.0;
for c in 0..pt {
l00 += x_t[[i, c]] * q_t[[c, j]];
l01 += x_t[[i, c]] * q_l[[c, j]];
}
for c in 0..pls {
l11 += x_ls[[i, c]] * q_l[[pt + c, j]];
}
chunk_00[j] = l00;
chunk_01[j] = l01;
chunk_11[j] = l11;
}
(row_start, chunk_00, chunk_01, chunk_11)
},
)
.collect();
let mut leverage_00 = ndarray::Array1::<f64>::zeros(n);
let mut leverage_01 = ndarray::Array1::<f64>::zeros(n);
let mut leverage_11 = ndarray::Array1::<f64>::zeros(n);
for (row_start, chunk_00, chunk_01, chunk_11) in leverage_parts {
let row_end = row_start + chunk_00.len();
leverage_00
.slice_mut(s![row_start..row_end])
.assign(&chunk_00);
leverage_01
.slice_mut(s![row_start..row_end])
.assign(&chunk_01);
leverage_11
.slice_mut(s![row_start..row_end])
.assign(&chunk_11);
}
let h_inv_block_diag: Vec<ndarray::Array2<f64>> = (0..specs.len())
.into_par_iter()
.map(|b| {
let (start, end) = ranges[b];
let p_b = end - start;
let mut rhs = ndarray::Array2::<f64>::zeros((total, p_b));
for c in 0..p_b {
rhs[[start + c, c]] = 1.0;
}
let m_full = factor.solve_mat(&rhs);
let mut block = ndarray::Array2::<f64>::zeros((p_b, p_b));
for r in 0..p_b {
for c in 0..p_b {
block[[r, c]] = m_full[[start + r, c]];
}
}
block
})
.collect();
let mut s_pseudologdet_blocks: Vec<
crate::solver::estimate::reml::penalty_logdet::PenaltyPseudologdet,
> = Vec::with_capacity(specs.len());
for b in 0..specs.len() {
let (start, end) = ranges[b];
let p_b = end - start;
let mut s_b = ndarray::Array2::<f64>::zeros((p_b, p_b));
for (k, pen) in specs[b].penalties.iter().enumerate() {
let lambda = per_block_rho[b][k].exp();
pen.add_scaled_to(lambda, &mut s_b);
}
s_pseudologdet_blocks.push(
crate::solver::estimate::reml::penalty_logdet::PenaltyPseudologdet::from_assembled(
s_b, None,
)?,
);
}
let core = binomial_location_scale_core(
&self.y,
&self.weights,
&block_states[Self::BLOCK_T].eta,
&block_states[Self::BLOCK_LOG_SIGMA].eta,
None,
&self.link_kind,
)?;
let mut row_m1 = ndarray::Array1::<f64>::zeros(n);
let mut row_m2 = ndarray::Array1::<f64>::zeros(n);
let mut row_m3 = ndarray::Array1::<f64>::zeros(n);
let mut row_r = ndarray::Array1::<f64>::zeros(n);
let mut row_s = ndarray::Array1::<f64>::zeros(n);
let mut row_q = ndarray::Array1::<f64>::zeros(n);
let row_scalars: Vec<(f64, f64, f64, f64, f64, f64)> = (0..n)
.into_par_iter()
.map(|i| {
let q = core.q0[i];
let r = 1.0 / core.sigma[i];
let s_factor = core.dsigma_deta[i] / core.sigma[i];
let (m1, m2, m3) = binomial_neglog_q_derivatives_dispatch(
self.y[i],
self.weights[i],
q,
core.mu[i],
core.dmu_dq[i],
core.d2mu_dq2[i],
core.d3mu_dq3[i],
&self.link_kind,
);
(m1, m2, m3, r, s_factor, q)
})
.collect();
for (i, (m1, m2, m3, r, s_factor, q)) in row_scalars.into_iter().enumerate() {
row_m1[i] = m1;
row_m2[i] = m2;
row_m3[i] = m3;
row_r[i] = r;
row_s[i] = s_factor;
row_q[i] = q;
}
let mut objective_theta = ndarray::Array1::<f64>::zeros(total_pen);
let mut trace_h_inv_hdot = ndarray::Array1::<f64>::zeros(total_pen);
let mut trace_s_pinv_sdot = ndarray::Array1::<f64>::zeros(total_pen);
const MIN_PARALLEL_PENALTY_COORDS: usize = 2;
let mut penalty_coords = Vec::with_capacity(total_pen);
let mut flat_idx = 0usize;
for b in 0..specs.len() {
for k_local in 0..specs[b].penalties.len() {
penalty_coords.push((flat_idx, b, k_local));
flat_idx += 1;
}
}
let penalty_coord_chunk_size = if penalty_coords.len() >= MIN_PARALLEL_PENALTY_COORDS {
1
} else {
penalty_coords.len().max(1)
};
struct PenaltyGradientPart {
flat_idx: usize,
objective_theta: f64,
trace_h_inv_hdot: f64,
trace_s_pinv_sdot: f64,
}
let penalty_parts: Vec<Result<Vec<PenaltyGradientPart>, String>> = penalty_coords
.par_chunks(penalty_coord_chunk_size)
.map(|chunk| {
let mut chunk_parts = Vec::with_capacity(chunk.len());
for &(flat_idx, b, k_local) in chunk {
let (start, end) = ranges[b];
let p_b = end - start;
let beta_b = beta_flat.slice(s![start..end]).to_owned();
let pen = &specs[b].penalties[k_local];
let lambda_k = per_block_rho[b][k_local].exp();
let mut s_k_local = ndarray::Array2::<f64>::zeros((p_b, p_b));
pen.add_scaled_to(lambda_k, &mut s_k_local);
let s_k_beta_local = s_k_local.dot(&beta_b);
let objective_theta = 0.5 * beta_b.dot(&s_k_beta_local);
let mut a_k_beta_full = ndarray::Array1::<f64>::zeros(total);
a_k_beta_full
.slice_mut(s![start..end])
.assign(&s_k_beta_local);
let mut u_k = factor.solvevec(&a_k_beta_full);
u_k.mapv_inplace(|v| -v);
let m_block = &h_inv_block_diag[b];
let mut tr_pen = 0.0;
for r in 0..p_b {
for c in 0..p_b {
tr_pen += m_block[[r, c]] * s_k_local[[c, r]];
}
}
let u_k_t = u_k.slice(s![0..pt]).to_owned();
let u_k_ls = u_k.slice(s![pt..total]).to_owned();
let d_eta_t = fast_av(&x_t, &u_k_t);
let d_eta_ls = fast_av(&x_ls, &u_k_ls);
let mut drift_trace = 0.0;
for i in 0..n {
let q = row_q[i];
let r_val = row_r[i];
let s_factor = row_s[i];
let m1 = row_m1[i];
let m2 = row_m2[i];
let m3 = row_m3[i];
let a_eta = d_eta_t[i];
let b_eta = d_eta_ls[i];
let sb = s_factor * b_eta;
let du = -r_val * a_eta - q * sb;
let c_tt = r_val * r_val * (m3 * du - 2.0 * m2 * sb);
let c_tl =
s_factor * r_val * (q * m3 * du + m2 * (2.0 * du - q * sb) - m1 * sb);
let c_ll = s_factor * s_factor * (m1 + 3.0 * q * m2 + q * q * m3) * du;
drift_trace += c_tt * leverage_00[i]
+ 2.0 * c_tl * leverage_01[i]
+ c_ll * leverage_11[i];
}
let trace_s_pinv_sdot =
s_pseudologdet_blocks[b].tau_gradient_component(&s_k_local);
chunk_parts.push(PenaltyGradientPart {
flat_idx,
objective_theta,
trace_h_inv_hdot: tr_pen + drift_trace,
trace_s_pinv_sdot,
});
}
Ok(chunk_parts)
})
.collect();
for chunk in penalty_parts {
for part in chunk? {
objective_theta[part.flat_idx] = part.objective_theta;
trace_h_inv_hdot[part.flat_idx] = part.trace_h_inv_hdot;
trace_s_pinv_sdot[part.flat_idx] = part.trace_s_pinv_sdot;
}
}
Ok(Some(BatchedOuterGradientTerms {
objective_theta,
trace_h_inv_hdot,
trace_s_pinv_sdot,
}))
}
}
impl CustomFamilyGenerative for BinomialLocationScaleFamily {
fn generativespec(
&self,
block_states: &[ParameterBlockState],
) -> Result<GenerativeSpec, String> {
if block_states.len() != 2 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleFamily expects 2 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != self.y.len() || eta_ls.len() != self.y.len() {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleFamily generative size mismatch".to_string(),
}
.into());
}
let mean = gamlss_rowwise_map_result(self.y.len(), |i| {
let sigma = exp_sigma_from_eta_scalar(eta_ls[i]);
let q = binomial_location_scale_q0(eta_t[i], sigma);
let jet = inverse_link_jet_for_inverse_link(&self.link_kind, q)
.map_err(|e| format!("location-scale inverse-link evaluation failed: {e}"))?;
Ok(jet.mu)
})?;
Ok(GenerativeSpec {
mean,
noise: NoiseModel::Bernoulli,
})
}
}
struct BinomialLocationScaleHessianWorkspace {
family: BinomialLocationScaleFamily,
x_t: DesignMatrix,
x_ls: DesignMatrix,
core: BinomialLocationScaleCore,
coeff_tt: Array1<f64>,
coeff_tl: Array1<f64>,
coeff_ll: Array1<f64>,
direction_eta_cache: Mutex<HashMap<BinomialDirectionKey, Arc<BinomialDirectionEta>>>,
first_coeff_cache: Mutex<HashMap<BinomialDirectionKey, Arc<BinomialRowCoeffTriple>>>,
}
#[derive(Clone, Eq, Hash, PartialEq)]
struct BinomialDirectionKey {
bits: Vec<u64>,
}
impl BinomialDirectionKey {
fn from_array(v: &Array1<f64>) -> Self {
Self {
bits: v.iter().map(|value| value.to_bits()).collect(),
}
}
}
struct BinomialDirectionEta {
t: Array1<f64>,
ls: Array1<f64>,
}
struct BinomialRowCoeffTriple {
tt: Arc<Array1<f64>>,
tl: Arc<Array1<f64>>,
ll: Arc<Array1<f64>>,
}
impl BinomialLocationScaleHessianWorkspace {
fn new(
family: BinomialLocationScaleFamily,
block_states: Vec<ParameterBlockState>,
x_t: DesignMatrix,
x_ls: DesignMatrix,
) -> Result<Self, String> {
let eta_t = &block_states[BinomialLocationScaleFamily::BLOCK_T].eta;
let eta_ls = &block_states[BinomialLocationScaleFamily::BLOCK_LOG_SIGMA].eta;
let core = binomial_location_scale_core(
&family.y,
&family.weights,
eta_t,
eta_ls,
None,
&family.link_kind,
)?;
let (coeff_tt, coeff_tl, coeff_ll) =
family.exact_newton_joint_hessian_row_coefficients(&block_states)?;
Ok(Self {
family,
x_t,
x_ls,
core,
coeff_tt,
coeff_tl,
coeff_ll,
direction_eta_cache: Mutex::new(HashMap::new()),
first_coeff_cache: Mutex::new(HashMap::new()),
})
}
fn direction_eta(
&self,
key: &BinomialDirectionKey,
d_beta: &Array1<f64>,
pt: usize,
total: usize,
) -> Arc<BinomialDirectionEta> {
if let Some(value) = self
.direction_eta_cache
.lock()
.expect("binomial direction eta cache lock poisoned")
.get(key)
.cloned()
{
return value;
}
let value = Arc::new(BinomialDirectionEta {
t: self
.x_t
.matrixvectormultiply(&d_beta.slice(s![0..pt]).to_owned()),
ls: self
.x_ls
.matrixvectormultiply(&d_beta.slice(s![pt..total]).to_owned()),
});
let mut cache = self
.direction_eta_cache
.lock()
.expect("binomial direction eta cache lock poisoned");
cache
.entry(key.clone())
.or_insert_with(|| value.clone())
.clone()
}
fn first_coefficients(
&self,
key: &BinomialDirectionKey,
eta: &BinomialDirectionEta,
) -> Arc<BinomialRowCoeffTriple> {
if let Some(value) = self
.first_coeff_cache
.lock()
.expect("binomial first coefficient cache lock poisoned")
.get(key)
.cloned()
{
return value;
}
let (tt, tl, ll) = binomial_location_scale_first_directional_coefficients(
&self.family.y,
&self.family.weights,
&self.core,
&eta.t,
&eta.ls,
&self.family.link_kind,
);
let value = Arc::new(BinomialRowCoeffTriple {
tt: Arc::new(tt),
tl: Arc::new(tl),
ll: Arc::new(ll),
});
let mut cache = self
.first_coeff_cache
.lock()
.expect("binomial first coefficient cache lock poisoned");
cache
.entry(key.clone())
.or_insert_with(|| value.clone())
.clone()
}
fn second_coefficients(
&self,
eta_u: &BinomialDirectionEta,
eta_v: &BinomialDirectionEta,
) -> Result<Arc<BinomialRowCoeffTriple>, String> {
let (tt, tl, ll) = binomial_location_scalesecond_directional_coefficients(
&self.family.y,
&self.family.weights,
&self.core,
&eta_u.t,
&eta_u.ls,
&eta_v.t,
&eta_v.ls,
&self.family.link_kind,
)?;
Ok(Arc::new(BinomialRowCoeffTriple {
tt: Arc::new(tt),
tl: Arc::new(tl),
ll: Arc::new(ll),
}))
}
fn apply_outer_subsample(
&mut self,
rows: &[crate::families::marginal_slope_shared::WeightedOuterRow],
) {
let n = self.coeff_tt.len();
let mut mask_tt = Array1::<f64>::zeros(n);
let mut mask_tl = Array1::<f64>::zeros(n);
let mut mask_ll = Array1::<f64>::zeros(n);
for r in rows {
let i = r.index;
mask_tt[i] = self.coeff_tt[i] * r.weight;
mask_tl[i] = self.coeff_tl[i] * r.weight;
mask_ll[i] = self.coeff_ll[i] * r.weight;
}
self.coeff_tt = mask_tt;
self.coeff_tl = mask_tl;
self.coeff_ll = mask_ll;
}
}
impl ExactNewtonJointHessianWorkspace for BinomialLocationScaleHessianWorkspace {
fn hessian_dense(&self) -> Result<Option<Array2<f64>>, String> {
let pt = self.x_t.ncols();
let pls = self.x_ls.ncols();
let total = pt + pls;
let h_tt = xt_diag_x_design(&self.x_t, &self.coeff_tt)?;
let h_tl = xt_diag_y_design(&self.x_t, &self.coeff_tl, &self.x_ls)?;
let h_ll = xt_diag_x_design(&self.x_ls, &self.coeff_ll)?;
let mut h = Array2::<f64>::zeros((total, total));
h.slice_mut(s![0..pt, 0..pt]).assign(&h_tt);
h.slice_mut(s![0..pt, pt..total]).assign(&h_tl);
h.slice_mut(s![pt..total, pt..total]).assign(&h_ll);
mirror_upper_to_lower(&mut h);
Ok(Some(h))
}
fn hessian_matvec_available(&self) -> bool {
true
}
fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
let pt = self.x_t.ncols();
let pls = self.x_ls.ncols();
let total = pt + pls;
if v.len() != total {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScale matvec dimension mismatch: got {}, expected {}",
v.len(),
total
),
}
.into());
}
let u_t = self
.x_t
.matrixvectormultiply(&v.slice(s![0..pt]).to_owned());
let u_ls = self
.x_ls
.matrixvectormultiply(&v.slice(s![pt..total]).to_owned());
let r_t = &self.coeff_tt * &u_t + &self.coeff_tl * &u_ls;
let r_ls = &self.coeff_tl * &u_t + &self.coeff_ll * &u_ls;
let out_t = self.x_t.transpose_vector_multiply(&r_t);
let out_ls = self.x_ls.transpose_vector_multiply(&r_ls);
let mut out = Array1::<f64>::zeros(total);
out.slice_mut(s![0..pt]).assign(&out_t);
out.slice_mut(s![pt..total]).assign(&out_ls);
Ok(Some(out))
}
fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
let pt = self.x_t.ncols();
let pls = self.x_ls.ncols();
let total = pt + pls;
let mut diag = Array1::<f64>::zeros(total);
let diag_t = design_weighted_column_squares(&self.x_t, &self.coeff_tt)?;
let diag_ls = design_weighted_column_squares(&self.x_ls, &self.coeff_ll)?;
diag.slice_mut(s![0..pt]).assign(&diag_t);
diag.slice_mut(s![pt..total]).assign(&diag_ls);
Ok(Some(diag))
}
fn directional_derivative(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
Ok(self
.directional_derivative_operator(d_beta_flat)?
.map(|operator| operator.to_dense()))
}
fn directional_derivative_operator(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
let pt = self.x_t.ncols();
let pls = self.x_ls.ncols();
let total = pt + pls;
if d_beta_flat.len() != total {
return Err(GamlssError::InvalidInput {
reason: format!(
"BinomialLocationScale dH operator: d_beta length {} != {}",
d_beta_flat.len(),
total
),
}
.into());
}
let key = BinomialDirectionKey::from_array(d_beta_flat);
let eta = self.direction_eta(&key, d_beta_flat, pt, total);
let coeffs = self.first_coefficients(&key, &eta);
Ok(Some(Arc::new(make_two_block_design_row_coeff_operator(
self.x_t.clone(),
self.x_ls.clone(),
coeffs.tt.clone(),
coeffs.tl.clone(),
coeffs.ll.clone(),
)?)))
}
fn second_directional_derivative(
&self,
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
Ok(self
.second_directional_derivative_operator(d_beta_u_flat, d_beta_v_flat)?
.map(|operator| operator.to_dense()))
}
fn second_directional_derivative_operator(
&self,
d_beta_u: &Array1<f64>,
d_beta_v: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
let pt = self.x_t.ncols();
let pls = self.x_ls.ncols();
let total = pt + pls;
if d_beta_u.len() != total || d_beta_v.len() != total {
return Err(GamlssError::InvalidInput {
reason: format!(
"BinomialLocationScale d2H operator: d_beta_{{u,v}} length {}/{} != {}",
d_beta_u.len(),
d_beta_v.len(),
total
),
}
.into());
}
let key_u = BinomialDirectionKey::from_array(d_beta_u);
let key_v = BinomialDirectionKey::from_array(d_beta_v);
let eta_u = self.direction_eta(&key_u, d_beta_u, pt, total);
let eta_v = self.direction_eta(&key_v, d_beta_v, pt, total);
let coeffs = self.second_coefficients(&eta_u, &eta_v)?;
Ok(Some(Arc::new(make_two_block_design_row_coeff_operator(
self.x_t.clone(),
self.x_ls.clone(),
coeffs.tt.clone(),
coeffs.tl.clone(),
coeffs.ll.clone(),
)?)))
}
}
#[derive(Clone)]
pub struct BinomialLocationScaleWiggleFamily {
pub y: Array1<f64>,
pub weights: Array1<f64>,
pub link_kind: InverseLink,
pub threshold_design: Option<DesignMatrix>,
pub log_sigma_design: Option<DesignMatrix>,
pub wiggle_knots: Array1<f64>,
pub wiggle_degree: usize,
pub policy: crate::resource::ResourcePolicy,
}
impl BinomialLocationScaleWiggleFamily {
pub const BLOCK_T: usize = 0;
pub const BLOCK_LOG_SIGMA: usize = 1;
pub const BLOCK_WIGGLE: usize = 2;
pub fn parameternames() -> &'static [&'static str] {
&["threshold", "log_sigma", "wiggle"]
}
pub fn parameter_links() -> &'static [ParameterLink] {
&[
ParameterLink::InverseLink,
ParameterLink::Log,
ParameterLink::Wiggle,
]
}
pub fn metadata() -> FamilyMetadata {
FamilyMetadata {
name: "binomial_location_scalewiggle",
parameternames: Self::parameternames(),
parameter_links: Self::parameter_links(),
}
}
fn exact_joint_supported(&self) -> bool {
self.threshold_design.is_some() && self.log_sigma_design.is_some()
}
pub fn initializewiggle_knots_from_q(
q_seed: ArrayView1<'_, f64>,
degree: usize,
num_internal_knots: usize,
) -> Result<Array1<f64>, String> {
initializewiggle_knots_from_seed(q_seed, degree, num_internal_knots)
}
fn wiggle_basiswith_options(
&self,
q0: ArrayView1<'_, f64>,
basis_options: BasisOptions,
) -> Result<Array2<f64>, String> {
monotone_wiggle_basis_with_derivative_order(
q0,
&self.wiggle_knots,
self.wiggle_degree,
basis_options.derivative_order,
)
}
fn wiggle_design(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
self.wiggle_basiswith_options(q0, BasisOptions::value())
}
fn wiggle_dq_dq0(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d_constrained = self.wiggle_basiswith_options(q0, BasisOptions::first_derivative())?;
if d_constrained.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"wiggle derivative col mismatch: got {}, expected {}",
d_constrained.ncols(),
beta_link_wiggle.len()
),
}
.into());
}
Ok(d_constrained.dot(&beta_link_wiggle) + 1.0)
}
fn wiggle_d2q_dq02(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d2_constrained =
self.wiggle_basiswith_options(q0, BasisOptions::second_derivative())?;
if d2_constrained.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"wiggle second-derivative col mismatch: got {}, expected {}",
d2_constrained.ncols(),
beta_link_wiggle.len()
),
}
.into());
}
Ok(d2_constrained.dot(&beta_link_wiggle))
}
fn wiggle_d3q_dq03(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d3_constrained = self.wiggle_d3basis_constrained(q0)?;
if d3_constrained.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"wiggle third-derivative col mismatch: got {}, expected {}",
d3_constrained.ncols(),
beta_link_wiggle.len()
),
}
.into());
}
Ok(d3_constrained.dot(&beta_link_wiggle))
}
fn wiggle_d3basis_constrained(&self, q0: ArrayView1<'_, f64>) -> Result<Array2<f64>, String> {
monotone_wiggle_basis_with_derivative_order(q0, &self.wiggle_knots, self.wiggle_degree, 3)
}
fn wiggle_d4q_dq04(
&self,
q0: ArrayView1<'_, f64>,
beta_link_wiggle: ArrayView1<'_, f64>,
) -> Result<Array1<f64>, String> {
let d4 = monotone_wiggle_basis_with_derivative_order(
q0,
&self.wiggle_knots,
self.wiggle_degree,
4,
)?;
if d4.ncols() != beta_link_wiggle.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"wiggle fourth-derivative col mismatch: got {}, expected {}",
d4.ncols(),
beta_link_wiggle.len()
),
}
.into());
}
Ok(d4.dot(&beta_link_wiggle))
}
fn dense_block_designs(&self) -> Result<(Cow<'_, Array2<f64>>, Cow<'_, Array2<f64>>), String> {
dense_locscale_block_designs_cached(
self.threshold_design.as_ref(),
self.log_sigma_design.as_ref(),
"BinomialLocationScaleWiggleFamily",
"BinomialLocationScaleWiggle",
"threshold",
&self.policy.material_policy(),
)
}
fn dense_block_designs_fromspecs<'a>(
&self,
specs: &'a [ParameterBlockSpec],
) -> Result<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>), String> {
dense_locscale_block_designs_fromspecs(
specs,
3,
"BinomialLocationScaleWiggleFamily",
"BinomialLocationScaleWiggle",
Self::BLOCK_T,
Self::BLOCK_LOG_SIGMA,
"threshold",
&self.policy.material_policy(),
)
}
fn exact_joint_dense_block_designs<'a>(
&'a self,
specs: Option<&'a [ParameterBlockSpec]>,
) -> Result<Option<(Cow<'a, Array2<f64>>, Cow<'a, Array2<f64>>)>, String> {
if self.threshold_design.is_some() && self.log_sigma_design.is_some() {
return self.dense_block_designs().map(Some);
}
if let Some(specs) = specs {
return self.dense_block_designs_fromspecs(specs).map(Some);
}
Ok(None)
}
fn shadow_with_exact_joint_designs(
&self,
specs: &[ParameterBlockSpec],
) -> Result<Option<Self>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
Ok(Some(Self {
y: self.y.clone(),
weights: self.weights.clone(),
link_kind: self.link_kind.clone(),
threshold_design: Some(DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
x_t.into_owned(),
))),
log_sigma_design: Some(DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
x_ls.into_owned(),
))),
wiggle_knots: self.wiggle_knots.clone(),
wiggle_degree: self.wiggle_degree,
policy: self.policy.clone(),
}))
}
fn exact_newton_joint_psi_terms_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psi_terms_from_designs(
block_states,
derivative_blocks,
psi_index,
&x_t,
&x_ls,
)
}
fn exact_newton_joint_psisecond_order_terms_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psisecond_order_terms_from_designs(
block_states,
derivative_blocks,
psi_i,
psi_j,
&x_t,
&x_ls,
)
}
fn exact_newton_joint_psihessian_directional_derivative_for_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
self.exact_newton_joint_psihessian_directional_derivative_from_designs(
block_states,
derivative_blocks,
psi_index,
d_beta_flat,
&x_t,
&x_ls,
)
}
fn exact_newton_joint_psi_direction(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
policy: &crate::resource::ResourcePolicy,
) -> Result<Option<LocationScaleJointPsiDirection>, String> {
let Some(parts) = locscale_joint_psi_direction_parts(
block_states,
derivative_blocks,
psi_index,
self.y.len(),
x_t.ncols(),
x_ls.ncols(),
Self::BLOCK_T,
Self::BLOCK_LOG_SIGMA,
3,
"BinomialLocationScaleWiggleFamily",
"threshold",
policy,
)?
else {
return Ok(None);
};
Ok(Some(LocationScaleJointPsiDirection {
block_idx: parts.block_idx,
local_idx: parts.local_idx,
z_primary_psi: parts.primary_z,
z_ls_psi: parts.log_sigma_z,
x_primary_psi: parts.primary_psi,
x_ls_psi: parts.log_sigma_psi,
}))
}
fn exact_newton_joint_psisecond_design_drifts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_a: &LocationScaleJointPsiDirection,
psi_b: &LocationScaleJointPsiDirection,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<LocationScaleJointPsiSecondDrifts, String> {
locscale_joint_psisecond_design_drifts(
block_states,
derivative_blocks,
psi_a,
psi_b,
LocScalePsiDriftConfig {
n: self.y.len(),
p_primary: x_t.ncols(),
p_log_sigma: x_ls.ncols(),
primary_block_idx: Self::BLOCK_T,
log_sigma_block_idx: Self::BLOCK_LOG_SIGMA,
family_name: "BinomialLocationScaleWiggleFamily",
primary_label: "threshold",
policy: &self.policy,
},
)
}
fn exact_newton_joint_psi_terms_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
if self
.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
x_t,
x_ls,
&self.policy,
)?
.is_none()
{
return Ok(None);
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let base_core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let b0 = self.wiggle_design(base_core.q0.view())?;
let d0 =
self.wiggle_basiswith_options(base_core.q0.view(), BasisOptions::first_derivative())?;
let dd0 =
self.wiggle_basiswith_options(base_core.q0.view(), BasisOptions::second_derivative())?;
let d3q = self.wiggle_d3q_dq03(base_core.q0.view(), betaw.view())?;
let m = d0.dot(betaw) + 1.0;
let g2 = self.wiggle_d2q_dq02(base_core.q0.view(), betaw.view())?;
let g3 = d3q;
let (sigma, ..) = exp_sigma_derivs_up_to_third(eta_ls.view());
let pt = x_t.ncols();
let pls = x_ls.ncols();
let pw = b0.ncols();
let total = pt + pls + pw;
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
x_t,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
let (z_t_psi, z_ls_psi) = (&dir_a.z_primary_psi, &dir_a.z_ls_psi);
let mut objective_psi = 0.0;
let mut score_t_xa = Array1::<f64>::zeros(n);
let mut score_t_x = Array1::<f64>::zeros(n);
let mut score_ls_xa = Array1::<f64>::zeros(n);
let mut score_ls_x = Array1::<f64>::zeros(n);
let mut score_w_b = Array1::<f64>::zeros(n);
let mut score_w_d1 = Array1::<f64>::zeros(n);
let mut coeff_tt_w = Array1::<f64>::zeros(n);
let mut coeff_tt_d = Array1::<f64>::zeros(n);
let mut coeff_tl_w = Array1::<f64>::zeros(n);
let mut coeff_tl_d = Array1::<f64>::zeros(n);
let mut coeff_ll_w = Array1::<f64>::zeros(n);
let mut coeff_ll_d = Array1::<f64>::zeros(n);
let mut coeff_tw_b_w = Array1::<f64>::zeros(n);
let mut coeff_tw_b_d = Array1::<f64>::zeros(n);
let mut coeff_tw_d1_w = Array1::<f64>::zeros(n);
let mut coeff_tw_d1_d = Array1::<f64>::zeros(n);
let mut coeff_tw_d2_d = Array1::<f64>::zeros(n);
let mut coeff_lw_b_w = Array1::<f64>::zeros(n);
let mut coeff_lw_b_d = Array1::<f64>::zeros(n);
let mut coeff_lw_d1_w = Array1::<f64>::zeros(n);
let mut coeff_lw_d1_d = Array1::<f64>::zeros(n);
let mut coeff_lw_d2_d = Array1::<f64>::zeros(n);
let mut coeff_ww_bb = Array1::<f64>::zeros(n);
let mut coeff_ww_db = Array1::<f64>::zeros(n);
for row in 0..n {
let q0 = base_core.q0[row];
let q = q0 + etaw[row];
let q0_geom = nonwiggle_q_derivs(eta_t[row], sigma[row]);
let r_sigma = 1.0 / sigma[row];
let q0_a = -r_sigma * z_t_psi[row] - q0 * z_ls_psi[row];
let q0_t_a = q0_geom.q_tl * z_ls_psi[row];
let q0_ls_a = q0_geom.q_tl * z_t_psi[row] + q0_geom.q_ll * z_ls_psi[row];
let q0_tl_a = q0_geom.q_tl_ls * z_ls_psi[row];
let q0_ll_a = q0_geom.q_tl_ls * z_t_psi[row] + q0_geom.q_ll_ls * z_ls_psi[row];
let q_t = m[row] * q0_geom.q_t;
let q_ls = m[row] * q0_geom.q_ls;
let q_tt = g2[row] * q0_geom.q_t * q0_geom.q_t;
let q_tl = g2[row] * q0_geom.q_t * q0_geom.q_ls + m[row] * q0_geom.q_tl;
let q_ll = g2[row] * q0_geom.q_ls * q0_geom.q_ls + m[row] * q0_geom.q_ll;
let q_t_a = g2[row] * q0_a * q0_geom.q_t + m[row] * q0_t_a;
let q_ls_a = g2[row] * q0_a * q0_geom.q_ls + m[row] * q0_ls_a;
let q_tt_a =
g3[row] * q0_a * q0_geom.q_t * q0_geom.q_t + g2[row] * (2.0 * q0_geom.q_t * q0_t_a);
let q_tl_a = g3[row] * q0_a * q0_geom.q_t * q0_geom.q_ls
+ g2[row] * (q0_t_a * q0_geom.q_ls + q0_geom.q_t * q0_ls_a + q0_a * q0_geom.q_tl)
+ m[row] * q0_tl_a;
let q_ll_a = g3[row] * q0_a * q0_geom.q_ls * q0_geom.q_ls
+ g2[row] * (2.0 * q0_geom.q_ls * q0_ls_a + q0_a * q0_geom.q_ll)
+ m[row] * q0_ll_a;
let (loss_1, loss_2, loss_3) = binomial_neglog_q_derivatives_dispatch(
self.y[row],
self.weights[row],
q,
core.mu[row],
core.dmu_dq[row],
core.d2mu_dq2[row],
core.d3mu_dq3[row],
&self.link_kind,
);
let alpha = m[row] * q0_a;
objective_psi += loss_1 * alpha;
score_t_xa[row] = loss_1 * q_t;
score_t_x[row] = loss_2 * alpha * q_t + loss_1 * q_t_a;
score_ls_xa[row] = loss_1 * q_ls;
score_ls_x[row] = loss_2 * alpha * q_ls + loss_1 * q_ls_a;
score_w_b[row] = loss_2 * alpha;
score_w_d1[row] = loss_1 * q0_a;
coeff_tt_w[row] = loss_2 * q_t * q_t + loss_1 * q_tt;
coeff_tt_d[row] = loss_3 * alpha * q_t * q_t
+ 2.0 * loss_2 * q_t * q_t_a
+ loss_2 * alpha * q_tt
+ loss_1 * q_tt_a;
coeff_tl_w[row] = loss_2 * q_t * q_ls + loss_1 * q_tl;
coeff_tl_d[row] = loss_3 * alpha * q_t * q_ls
+ loss_2 * (q_t_a * q_ls + q_t * q_ls_a)
+ loss_2 * alpha * q_tl
+ loss_1 * q_tl_a;
coeff_ll_w[row] = loss_2 * q_ls * q_ls + loss_1 * q_ll;
coeff_ll_d[row] = loss_3 * alpha * q_ls * q_ls
+ 2.0 * loss_2 * q_ls * q_ls_a
+ loss_2 * alpha * q_ll
+ loss_1 * q_ll_a;
coeff_tw_b_w[row] = loss_2 * q_t;
coeff_tw_b_d[row] = loss_3 * alpha * q_t + loss_2 * q_t_a;
coeff_tw_d1_w[row] = loss_1 * q0_geom.q_t;
coeff_tw_d1_d[row] = loss_2 * (q_t * q0_a + alpha * q0_geom.q_t) + loss_1 * q0_t_a;
coeff_tw_d2_d[row] = loss_1 * q0_a * q0_geom.q_t;
coeff_lw_b_w[row] = loss_2 * q_ls;
coeff_lw_b_d[row] = loss_3 * alpha * q_ls + loss_2 * q_ls_a;
coeff_lw_d1_w[row] = loss_1 * q0_geom.q_ls;
coeff_lw_d1_d[row] = loss_2 * (q_ls * q0_a + alpha * q0_geom.q_ls) + loss_1 * q0_ls_a;
coeff_lw_d2_d[row] = loss_1 * q0_a * q0_geom.q_ls;
coeff_ww_bb[row] = loss_3 * alpha;
coeff_ww_db[row] = loss_2 * q0_a;
}
let x_t_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
let score_t = x_t_map.transpose_mul(score_t_xa.view()) + fast_atv(x_t, &score_t_x);
let score_ls = x_ls_map.transpose_mul(score_ls_xa.view()) + fast_atv(x_ls, &score_ls_x);
let score_w = fast_atv(&b0, &score_w_b) + fast_atv(&d0, &score_w_d1);
let mut score_psi = Array1::<f64>::zeros(total);
score_psi.slice_mut(s![0..pt]).assign(&score_t);
score_psi.slice_mut(s![pt..pt + pls]).assign(&score_ls);
score_psi.slice_mut(s![pt + pls..total]).assign(&score_w);
let x_t_action_opt = dir_a.x_primary_psi.cloned_first_action();
let x_ls_action_opt = dir_a.x_ls_psi.cloned_first_action();
if x_t_action_opt.is_some() || x_ls_action_opt.is_some() {
let basis_arc = Arc::new(b0.clone());
let basis_d1_arc = Arc::new(d0.clone());
let basis_d2_arc = Arc::new(dd0.clone());
let zeros = Array1::<f64>::zeros(n);
let operator = CustomFamilyJointPsiOperator::new(
total,
vec![
CustomFamilyJointDesignChannel::new(
0..pt,
shared_dense_arc(x_t),
x_t_action_opt,
),
CustomFamilyJointDesignChannel::new(
pt..pt + pls,
shared_dense_arc(x_ls),
x_ls_action_opt,
),
CustomFamilyJointDesignChannel::new(
pt + pls..total,
Arc::clone(&basis_arc),
None,
),
CustomFamilyJointDesignChannel::new(
pt + pls..total,
Arc::clone(&basis_d1_arc),
None,
),
CustomFamilyJointDesignChannel::new(
pt + pls..total,
Arc::clone(&basis_d2_arc),
None,
),
],
vec![
CustomFamilyJointDesignPairContribution::new(
0,
0,
coeff_tt_w.clone(),
coeff_tt_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
0,
1,
coeff_tl_w.clone(),
coeff_tl_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
0,
coeff_tl_w.clone(),
coeff_tl_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
1,
coeff_ll_w.clone(),
coeff_ll_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
0,
2,
coeff_tw_b_w.clone(),
coeff_tw_b_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
2,
0,
coeff_tw_b_w.clone(),
coeff_tw_b_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
0,
3,
coeff_tw_d1_w.clone(),
coeff_tw_d1_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
3,
0,
coeff_tw_d1_w.clone(),
coeff_tw_d1_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
0,
4,
zeros.clone(),
coeff_tw_d2_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
4,
0,
zeros.clone(),
coeff_tw_d2_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
2,
coeff_lw_b_w.clone(),
coeff_lw_b_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
2,
1,
coeff_lw_b_w.clone(),
coeff_lw_b_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
3,
coeff_lw_d1_w.clone(),
coeff_lw_d1_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
3,
1,
coeff_lw_d1_w.clone(),
coeff_lw_d1_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
1,
4,
zeros.clone(),
coeff_lw_d2_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
4,
1,
zeros.clone(),
coeff_lw_d2_d.clone(),
),
CustomFamilyJointDesignPairContribution::new(
2,
2,
zeros.clone(),
coeff_ww_bb.clone(),
),
CustomFamilyJointDesignPairContribution::new(
3,
2,
zeros.clone(),
coeff_ww_db.clone(),
),
CustomFamilyJointDesignPairContribution::new(2, 3, zeros, coeff_ww_db.clone()),
],
);
return Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
objective_psi,
score_psi,
hessian_psi: Array2::zeros((0, 0)),
hessian_psi_operator: Some(std::sync::Arc::new(operator)),
}));
}
let h_tt_block = weighted_crossprod_psi_maps(
x_t_map,
coeff_tt_w.view(),
CustomFamilyPsiLinearMapRef::Dense(x_t),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
coeff_tt_w.view(),
x_t_map,
)? + &xt_diag_x_dense(x_t, &coeff_tt_d)?;
let h_tl_block = weighted_crossprod_psi_maps(
x_t_map,
coeff_tl_w.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_t),
coeff_tl_w.view(),
x_ls_map,
)? + &xt_diag_y_dense(x_t, &coeff_tl_d, x_ls)?;
let h_ll_block = weighted_crossprod_psi_maps(
x_ls_map,
coeff_ll_w.view(),
CustomFamilyPsiLinearMapRef::Dense(x_ls),
)? + &weighted_crossprod_psi_maps(
CustomFamilyPsiLinearMapRef::Dense(x_ls),
coeff_ll_w.view(),
x_ls_map,
)? + &xt_diag_x_dense(x_ls, &coeff_ll_d)?;
let h_tw = weighted_crossprod_psi_maps(
x_t_map,
coeff_tw_b_w.view(),
CustomFamilyPsiLinearMapRef::Dense(&b0),
)? + &xt_diag_y_dense(x_t, &coeff_tw_b_d, &b0)?
+ &weighted_crossprod_psi_maps(
x_t_map,
coeff_tw_d1_w.view(),
CustomFamilyPsiLinearMapRef::Dense(&d0),
)?
+ &xt_diag_y_dense(x_t, &coeff_tw_d1_d, &d0)?
+ &xt_diag_y_dense(x_t, &coeff_tw_d2_d, &dd0)?;
let h_lw = weighted_crossprod_psi_maps(
x_ls_map,
coeff_lw_b_w.view(),
CustomFamilyPsiLinearMapRef::Dense(&b0),
)? + &xt_diag_y_dense(x_ls, &coeff_lw_b_d, &b0)?
+ &weighted_crossprod_psi_maps(
x_ls_map,
coeff_lw_d1_w.view(),
CustomFamilyPsiLinearMapRef::Dense(&d0),
)?
+ &xt_diag_y_dense(x_ls, &coeff_lw_d1_d, &d0)?
+ &xt_diag_y_dense(x_ls, &coeff_lw_d2_d, &dd0)?;
let a_ww = xt_diag_y_dense(&d0, &coeff_ww_db, &b0)?;
let h_ww = xt_diag_x_dense(&b0, &coeff_ww_bb)? + &a_ww + a_ww.t();
let mut hessian_psi = Array2::<f64>::zeros((total, total));
hessian_psi.slice_mut(s![0..pt, 0..pt]).assign(&h_tt_block);
hessian_psi
.slice_mut(s![0..pt, pt..pt + pls])
.assign(&h_tl_block);
hessian_psi
.slice_mut(s![pt..pt + pls, pt..pt + pls])
.assign(&h_ll_block);
hessian_psi
.slice_mut(s![0..pt, pt + pls..total])
.assign(&h_tw);
hessian_psi
.slice_mut(s![pt..pt + pls, pt + pls..total])
.assign(&h_lw);
hessian_psi
.slice_mut(s![pt + pls..total, pt + pls..total])
.assign(&h_ww);
mirror_upper_to_lower(&mut hessian_psi);
Ok(Some(crate::custom_family::ExactNewtonJointPsiTerms {
objective_psi,
score_psi,
hessian_psi,
hessian_psi_operator: None,
}))
}
fn exact_newton_joint_psisecond_order_terms_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
if block_states.len() != 3 || derivative_blocks.len() != 3 {
return Err(GamlssError::DimensionMismatch { reason: format!(
"BinomialLocationScaleWiggleFamily joint psi second-order terms expect 3 blocks and 3 derivative block lists, got {} and {}",
block_states.len(),
derivative_blocks.len()
) }.into());
}
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_i,
x_t,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
let Some(dir_b) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_j,
x_t,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
Ok(Some(
self.exact_newton_joint_psisecond_order_terms_from_parts(
block_states,
derivative_blocks,
&dir_a,
&dir_b,
x_t,
x_ls,
)?,
))
}
fn exact_newton_joint_psisecond_order_terms_from_parts(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
dir_a: &LocationScaleJointPsiDirection,
dir_b: &LocationScaleJointPsiDirection,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms, String> {
let second_drifts = self.exact_newton_joint_psisecond_design_drifts(
block_states,
derivative_blocks,
dir_a,
dir_b,
x_t,
x_ls,
)?;
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let base_core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let b0 = self.wiggle_design(base_core.q0.view())?;
let d0 =
self.wiggle_basiswith_options(base_core.q0.view(), BasisOptions::first_derivative())?;
let dd0 =
self.wiggle_basiswith_options(base_core.q0.view(), BasisOptions::second_derivative())?;
let d3_basis = self.wiggle_d3basis_constrained(base_core.q0.view())?;
let d3q = self.wiggle_d3q_dq03(base_core.q0.view(), betaw.view())?;
let d4q = self.wiggle_d4q_dq04(base_core.q0.view(), betaw.view())?;
if b0.ncols() != betaw.len()
|| d0.ncols() != betaw.len()
|| dd0.ncols() != betaw.len()
|| d3_basis.ncols() != betaw.len()
{
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle derivative/beta mismatch in joint psi psi terms: B={} B'={} B''={} B'''={} betaw={}",
b0.ncols(),
d0.ncols(),
dd0.ncols(),
d3_basis.ncols(),
betaw.len()
) }.into());
}
let m = d0.dot(betaw) + 1.0;
let g2 = dd0.dot(betaw);
let g3 = d3q;
let g4 = d4q;
let (sigma, ds, d2s, d3s) = exp_sigma_derivs_up_to_third(eta_ls.view());
let pt = x_t.ncols();
let pls = x_ls.ncols();
let pw = b0.ncols();
let total = pt + pls + pw;
let x_t_a_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_t_b_map = dir_b.x_primary_psi.as_linear_map_ref();
let x_ls_a_map = dir_a.x_ls_psi.as_linear_map_ref();
let x_ls_b_map = dir_b.x_ls_psi.as_linear_map_ref();
let x_t_ab_map = second_psi_linear_map(
second_drifts.x_primary_ab_action.as_ref(),
second_drifts.x_primary_ab.as_ref(),
n,
pt,
);
let x_ls_ab_map = second_psi_linear_map(
second_drifts.x_ls_ab_action.as_ref(),
second_drifts.x_ls_ab.as_ref(),
n,
pls,
);
let mut objective_psi_psi = 0.0;
let mut score_psi_psi = Array1::<f64>::zeros(total);
let mut hessian_psi_psi = Array2::<f64>::zeros((total, total));
let mut b = Array1::<f64>::zeros(total);
let mut c_a = Array1::<f64>::zeros(total);
let mut c_b = Array1::<f64>::zeros(total);
let mut c_ab = Array1::<f64>::zeros(total);
let mut q_mat = Array2::<f64>::zeros((total, total));
let mut r_a = Array2::<f64>::zeros((total, total));
let mut r_b = Array2::<f64>::zeros((total, total));
let mut r_ab = Array2::<f64>::zeros((total, total));
let mut qw_a = Array1::<f64>::zeros(pw);
let mut qw_b = Array1::<f64>::zeros(pw);
let mut qw_ab = Array1::<f64>::zeros(pw);
let mut q_tw_a = Array1::<f64>::zeros(pw);
let mut q_tw_b = Array1::<f64>::zeros(pw);
let mut q_lw_a = Array1::<f64>::zeros(pw);
let mut q_lw_b = Array1::<f64>::zeros(pw);
let mut d0_ab = Array1::<f64>::zeros(pw);
let mut q_tw_ab = Array1::<f64>::zeros(pw);
let mut q_lw_ab = Array1::<f64>::zeros(pw);
for row in 0..n {
let q0 = base_core.q0[row];
let q = q0 + etaw[row];
let q0_geom = nonwiggle_q_derivs(eta_t[row], sigma[row]);
let s_safe = sigma[row];
let s2 = s_safe * s_safe;
let s3 = s2 * s_safe;
let s4 = s3 * s_safe;
let q0_tl_ls_ls =
d3s[row] / s2 - 6.0 * ds[row] * d2s[row] / s3 + 6.0 * ds[row].powi(3) / s4;
let r_sigma = 1.0 / s_safe;
let q0_a = -r_sigma * dir_a.z_primary_psi[row] - q0 * dir_a.z_ls_psi[row];
let q0_b = -r_sigma * dir_b.z_primary_psi[row] - q0 * dir_b.z_ls_psi[row];
let q0_ab = -r_sigma * second_drifts.z_primary_ab[row]
+ r_sigma
* (dir_a.z_primary_psi[row] * dir_b.z_ls_psi[row]
+ dir_b.z_primary_psi[row] * dir_a.z_ls_psi[row])
+ q0 * (dir_a.z_ls_psi[row] * dir_b.z_ls_psi[row] - second_drifts.z_ls_ab[row]);
let q0_t_a = q0_geom.q_tl * dir_a.z_ls_psi[row];
let q0_t_b = q0_geom.q_tl * dir_b.z_ls_psi[row];
let q0_t_ab = q0_geom.q_tl_ls * dir_a.z_ls_psi[row] * dir_b.z_ls_psi[row]
+ q0_geom.q_tl * second_drifts.z_ls_ab[row];
let q0_ls_a =
q0_geom.q_tl * dir_a.z_primary_psi[row] + q0_geom.q_ll * dir_a.z_ls_psi[row];
let q0_ls_b =
q0_geom.q_tl * dir_b.z_primary_psi[row] + q0_geom.q_ll * dir_b.z_ls_psi[row];
let q0_ls_ab = -q0_ab;
let q0_tl_a = q0_geom.q_tl_ls * dir_a.z_ls_psi[row];
let q0_tl_b = q0_geom.q_tl_ls * dir_b.z_ls_psi[row];
let q0_tl_ab = q0_tl_ls_ls * dir_a.z_ls_psi[row] * dir_b.z_ls_psi[row]
+ q0_geom.q_tl_ls * second_drifts.z_ls_ab[row];
let q0_ll_a =
q0_geom.q_tl_ls * dir_a.z_primary_psi[row] + q0_geom.q_ll_ls * dir_a.z_ls_psi[row];
let q0_ll_b =
q0_geom.q_tl_ls * dir_b.z_primary_psi[row] + q0_geom.q_ll_ls * dir_b.z_ls_psi[row];
let q0_ll_ab = q0_ab;
let m_a = g2[row] * q0_a;
let m_b = g2[row] * q0_b;
let m_ab = g3[row] * q0_a * q0_b + g2[row] * q0_ab;
let g2_a = g3[row] * q0_a;
let g2_b = g3[row] * q0_b;
let g2_ab = g4[row] * q0_a * q0_b + g3[row] * q0_ab;
let q_a = m[row] * q0_a;
let q_b = m[row] * q0_b;
let q_ab = m[row] * q0_ab + g2[row] * q0_a * q0_b;
let q_t = m[row] * q0_geom.q_t;
let q_ls = m[row] * q0_geom.q_ls;
let q_tt = g2[row] * q0_geom.q_t * q0_geom.q_t;
let q_tl = g2[row] * q0_geom.q_t * q0_geom.q_ls + m[row] * q0_geom.q_tl;
let q_ll = g2[row] * q0_geom.q_ls * q0_geom.q_ls + m[row] * q0_geom.q_ll;
let q_t_a = m_a * q0_geom.q_t + m[row] * q0_t_a;
let q_t_b = m_b * q0_geom.q_t + m[row] * q0_t_b;
let q_ls_a = m_a * q0_geom.q_ls + m[row] * q0_ls_a;
let q_ls_b = m_b * q0_geom.q_ls + m[row] * q0_ls_b;
let q_t_ab = m_ab * q0_geom.q_t + m_a * q0_t_b + m_b * q0_t_a + m[row] * q0_t_ab;
let q_ls_ab = m_ab * q0_geom.q_ls + m_a * q0_ls_b + m_b * q0_ls_a + m[row] * q0_ls_ab;
let q_tt_a = g2_a * q0_geom.q_t * q0_geom.q_t + g2[row] * 2.0 * q0_geom.q_t * q0_t_a;
let q_tt_b = g2_b * q0_geom.q_t * q0_geom.q_t + g2[row] * 2.0 * q0_geom.q_t * q0_t_b;
let q_tt_ab = g2_ab * q0_geom.q_t * q0_geom.q_t
+ g2_a * 2.0 * q0_geom.q_t * q0_t_b
+ g2_b * 2.0 * q0_geom.q_t * q0_t_a
+ g2[row] * (2.0 * q0_t_a * q0_t_b + 2.0 * q0_geom.q_t * q0_t_ab);
let q_tl_a = g2_a * q0_geom.q_t * q0_geom.q_ls
+ g2[row] * (q0_t_a * q0_geom.q_ls + q0_geom.q_t * q0_ls_a)
+ m_a * q0_geom.q_tl
+ m[row] * q0_tl_a;
let q_tl_b = g2_b * q0_geom.q_t * q0_geom.q_ls
+ g2[row] * (q0_t_b * q0_geom.q_ls + q0_geom.q_t * q0_ls_b)
+ m_b * q0_geom.q_tl
+ m[row] * q0_tl_b;
let q_tl_ab = g2_ab * q0_geom.q_t * q0_geom.q_ls
+ g2_a * (q0_t_b * q0_geom.q_ls + q0_geom.q_t * q0_ls_b)
+ g2_b * (q0_t_a * q0_geom.q_ls + q0_geom.q_t * q0_ls_a)
+ g2[row]
* (q0_t_ab * q0_geom.q_ls
+ q0_t_a * q0_ls_b
+ q0_t_b * q0_ls_a
+ q0_geom.q_t * q0_ls_ab)
+ m_ab * q0_geom.q_tl
+ m_a * q0_tl_b
+ m_b * q0_tl_a
+ m[row] * q0_tl_ab;
let q_ll_a = g2_a * q0_geom.q_ls * q0_geom.q_ls
+ g2[row] * 2.0 * q0_geom.q_ls * q0_ls_a
+ m_a * q0_geom.q_ll
+ m[row] * q0_ll_a;
let q_ll_b = g2_b * q0_geom.q_ls * q0_geom.q_ls
+ g2[row] * 2.0 * q0_geom.q_ls * q0_ls_b
+ m_b * q0_geom.q_ll
+ m[row] * q0_ll_b;
let q_ll_ab = g2_ab * q0_geom.q_ls * q0_geom.q_ls
+ g2_a * 2.0 * q0_geom.q_ls * q0_ls_b
+ g2_b * 2.0 * q0_geom.q_ls * q0_ls_a
+ g2[row] * (2.0 * q0_ls_a * q0_ls_b + 2.0 * q0_geom.q_ls * q0_ls_ab)
+ m_ab * q0_geom.q_ll
+ m_a * q0_ll_b
+ m_b * q0_ll_a
+ m[row] * q0_ll_ab;
let brow = b0.row(row);
let drow = d0.row(row);
let ddrow = dd0.row(row);
let d3row = d3_basis.row(row);
qw_a.fill(0.0);
qw_a.scaled_add(q0_a, &drow);
qw_b.fill(0.0);
qw_b.scaled_add(q0_b, &drow);
qw_ab.fill(0.0);
qw_ab.scaled_add(q0_a * q0_b, &ddrow);
qw_ab.scaled_add(q0_ab, &drow);
q_tw_a.fill(0.0);
q_tw_a.scaled_add(q0_a * q0_geom.q_t, &ddrow);
q_tw_a.scaled_add(q0_t_a, &drow);
q_tw_b.fill(0.0);
q_tw_b.scaled_add(q0_b * q0_geom.q_t, &ddrow);
q_tw_b.scaled_add(q0_t_b, &drow);
q_lw_a.fill(0.0);
q_lw_a.scaled_add(q0_a * q0_geom.q_ls, &ddrow);
q_lw_a.scaled_add(q0_ls_a, &drow);
q_lw_b.fill(0.0);
q_lw_b.scaled_add(q0_b * q0_geom.q_ls, &ddrow);
q_lw_b.scaled_add(q0_ls_b, &drow);
d0_ab.fill(0.0);
d0_ab.scaled_add(q0_a * q0_b, &d3row);
d0_ab.scaled_add(q0_ab, &ddrow);
q_tw_ab.fill(0.0);
q_tw_ab.scaled_add(q0_geom.q_t, &d0_ab);
q_tw_ab.scaled_add(q0_b * q0_t_a, &ddrow);
q_tw_ab.scaled_add(q0_a * q0_t_b, &ddrow);
q_tw_ab.scaled_add(q0_t_ab, &drow);
q_lw_ab.fill(0.0);
q_lw_ab.scaled_add(q0_geom.q_ls, &d0_ab);
q_lw_ab.scaled_add(q0_b * q0_ls_a, &ddrow);
q_lw_ab.scaled_add(q0_a * q0_ls_b, &ddrow);
q_lw_ab.scaled_add(q0_ls_ab, &drow);
let (loss_1, loss_2, loss_3) = binomial_neglog_q_derivatives_dispatch(
self.y[row],
self.weights[row],
q,
core.mu[row],
core.dmu_dq[row],
core.d2mu_dq2[row],
core.d3mu_dq3[row],
&self.link_kind,
);
let loss_4 = binomial_neglog_q_fourth_derivative_dispatch(
self.y[row],
self.weights[row],
q,
core.mu[row],
core.dmu_dq[row],
core.d2mu_dq2[row],
core.d3mu_dq3[row],
&self.link_kind,
)?;
objective_psi_psi += loss_2 * q_a * q_b + loss_1 * q_ab;
let xtr = x_t.row(row);
let xlsr = x_ls.row(row);
let xta = x_t_a_map.row_vector(row)?;
let xtb = x_t_b_map.row_vector(row)?;
let xlsa = x_ls_a_map.row_vector(row)?;
let xlsb = x_ls_b_map.row_vector(row)?;
let xtab = x_t_ab_map.row_vector(row)?;
let xlsab = x_ls_ab_map.row_vector(row)?;
b.fill(0.0);
b.slice_mut(s![0..pt]).scaled_add(q_t, &xtr);
b.slice_mut(s![pt..pt + pls]).scaled_add(q_ls, &xlsr);
b.slice_mut(s![pt + pls..]).assign(&brow);
c_a.fill(0.0);
c_a.slice_mut(s![0..pt]).scaled_add(q_t_a, &xtr);
c_a.slice_mut(s![0..pt]).scaled_add(q_t, &xta.view());
c_a.slice_mut(s![pt..pt + pls]).scaled_add(q_ls_a, &xlsr);
c_a.slice_mut(s![pt..pt + pls])
.scaled_add(q_ls, &xlsa.view());
c_a.slice_mut(s![pt + pls..]).assign(&qw_a);
c_b.fill(0.0);
c_b.slice_mut(s![0..pt]).scaled_add(q_t_b, &xtr);
c_b.slice_mut(s![0..pt]).scaled_add(q_t, &xtb.view());
c_b.slice_mut(s![pt..pt + pls]).scaled_add(q_ls_b, &xlsr);
c_b.slice_mut(s![pt..pt + pls])
.scaled_add(q_ls, &xlsb.view());
c_b.slice_mut(s![pt + pls..]).assign(&qw_b);
c_ab.fill(0.0);
c_ab.slice_mut(s![0..pt]).scaled_add(q_t_ab, &xtr);
c_ab.slice_mut(s![0..pt]).scaled_add(q_t_b, &xta.view());
c_ab.slice_mut(s![0..pt]).scaled_add(q_t_a, &xtb.view());
c_ab.slice_mut(s![0..pt]).scaled_add(q_t, &xtab.view());
c_ab.slice_mut(s![pt..pt + pls]).scaled_add(q_ls_ab, &xlsr);
c_ab.slice_mut(s![pt..pt + pls])
.scaled_add(q_ls_b, &xlsa.view());
c_ab.slice_mut(s![pt..pt + pls])
.scaled_add(q_ls_a, &xlsb.view());
c_ab.slice_mut(s![pt..pt + pls])
.scaled_add(q_ls, &xlsab.view());
c_ab.slice_mut(s![pt + pls..]).assign(&qw_ab);
score_psi_psi.scaled_add(loss_1, &c_ab);
score_psi_psi.scaled_add(loss_2 * q_b, &c_a);
score_psi_psi.scaled_add(loss_2 * q_a, &c_b);
score_psi_psi.scaled_add(loss_2 * q_ab + loss_3 * q_a * q_b, &b);
q_mat.fill(0.0);
r_a.fill(0.0);
r_b.fill(0.0);
r_ab.fill(0.0);
scaled_outer_add(q_mat.slice_mut(s![0..pt, 0..pt]), q_tt, xtr, xtr);
scaled_outer_add(q_mat.slice_mut(s![0..pt, pt..pt + pls]), q_tl, xtr, xlsr);
scaled_outer_add(
q_mat.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsr,
xlsr,
);
scaled_outer_add(
q_mat.slice_mut(s![0..pt, pt + pls..]),
q0_geom.q_t,
xtr,
drow,
);
scaled_outer_add(
q_mat.slice_mut(s![pt..pt + pls, pt + pls..]),
q0_geom.q_ls,
xlsr,
drow,
);
mirror_upper_to_lower(&mut q_mat);
scaled_outer_add(r_a.slice_mut(s![0..pt, 0..pt]), q_tt_a, xtr, xtr);
scaled_outer_add(r_a.slice_mut(s![0..pt, 0..pt]), q_tt, xta.view(), xtr);
scaled_outer_add(r_a.slice_mut(s![0..pt, 0..pt]), q_tt, xtr, xta.view());
scaled_outer_add(r_a.slice_mut(s![0..pt, pt..pt + pls]), q_tl_a, xtr, xlsr);
scaled_outer_add(
r_a.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xta.view(),
xlsr,
);
scaled_outer_add(
r_a.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xtr,
xlsa.view(),
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll_a,
xlsr,
xlsr,
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsa.view(),
xlsr,
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsr,
xlsa.view(),
);
scaled_outer_add(
r_a.slice_mut(s![0..pt, pt + pls..]),
q0_geom.q_t,
xta.view(),
drow,
);
scaled_outer_add(
r_a.slice_mut(s![0..pt, pt + pls..]),
1.0,
xtr,
q_tw_a.view(),
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt + pls..]),
q0_geom.q_ls,
xlsa.view(),
drow,
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsr,
q_lw_a.view(),
);
mirror_upper_to_lower(&mut r_a);
scaled_outer_add(r_b.slice_mut(s![0..pt, 0..pt]), q_tt_b, xtr, xtr);
scaled_outer_add(r_b.slice_mut(s![0..pt, 0..pt]), q_tt, xtb.view(), xtr);
scaled_outer_add(r_b.slice_mut(s![0..pt, 0..pt]), q_tt, xtr, xtb.view());
scaled_outer_add(r_b.slice_mut(s![0..pt, pt..pt + pls]), q_tl_b, xtr, xlsr);
scaled_outer_add(
r_b.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xtb.view(),
xlsr,
);
scaled_outer_add(
r_b.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xtr,
xlsb.view(),
);
scaled_outer_add(
r_b.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll_b,
xlsr,
xlsr,
);
scaled_outer_add(
r_b.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsb.view(),
xlsr,
);
scaled_outer_add(
r_b.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsr,
xlsb.view(),
);
scaled_outer_add(
r_b.slice_mut(s![0..pt, pt + pls..]),
q0_geom.q_t,
xtb.view(),
drow,
);
scaled_outer_add(
r_b.slice_mut(s![0..pt, pt + pls..]),
1.0,
xtr,
q_tw_b.view(),
);
scaled_outer_add(
r_b.slice_mut(s![pt..pt + pls, pt + pls..]),
q0_geom.q_ls,
xlsb.view(),
drow,
);
scaled_outer_add(
r_b.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsr,
q_lw_b.view(),
);
mirror_upper_to_lower(&mut r_b);
scaled_outer_add(r_ab.slice_mut(s![0..pt, 0..pt]), q_tt_ab, xtr, xtr);
scaled_outer_add(r_ab.slice_mut(s![0..pt, 0..pt]), q_tt_b, xta.view(), xtr);
scaled_outer_add(r_ab.slice_mut(s![0..pt, 0..pt]), q_tt_b, xtr, xta.view());
scaled_outer_add(r_ab.slice_mut(s![0..pt, 0..pt]), q_tt_a, xtb.view(), xtr);
scaled_outer_add(r_ab.slice_mut(s![0..pt, 0..pt]), q_tt_a, xtr, xtb.view());
scaled_outer_add(r_ab.slice_mut(s![0..pt, 0..pt]), q_tt, xtab.view(), xtr);
scaled_outer_add(r_ab.slice_mut(s![0..pt, 0..pt]), q_tt, xtr, xtab.view());
scaled_outer_add(
r_ab.slice_mut(s![0..pt, 0..pt]),
q_tt,
xta.view(),
xtb.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, 0..pt]),
q_tt,
xtb.view(),
xta.view(),
);
scaled_outer_add(r_ab.slice_mut(s![0..pt, pt..pt + pls]), q_tl_ab, xtr, xlsr);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt..pt + pls]),
q_tl_b,
xta.view(),
xlsr,
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt..pt + pls]),
q_tl_b,
xtr,
xlsa.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt..pt + pls]),
q_tl_a,
xtb.view(),
xlsr,
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt..pt + pls]),
q_tl_a,
xtr,
xlsb.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xtab.view(),
xlsr,
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xtr,
xlsab.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xta.view(),
xlsb.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xtb.view(),
xlsa.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll_ab,
xlsr,
xlsr,
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll_b,
xlsa.view(),
xlsr,
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll_b,
xlsr,
xlsa.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll_a,
xlsb.view(),
xlsr,
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll_a,
xlsr,
xlsb.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsab.view(),
xlsr,
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsr,
xlsab.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsa.view(),
xlsb.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsb.view(),
xlsa.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt + pls..]),
q0_geom.q_t,
xtab.view(),
drow,
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt + pls..]),
1.0,
xta.view(),
q_tw_b.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt + pls..]),
1.0,
xtb.view(),
q_tw_a.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![0..pt, pt + pls..]),
1.0,
xtr,
q_tw_ab.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt + pls..]),
q0_geom.q_ls,
xlsab.view(),
drow,
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsa.view(),
q_lw_b.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsb.view(),
q_lw_a.view(),
);
scaled_outer_add(
r_ab.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsr,
q_lw_ab.view(),
);
mirror_upper_to_lower(&mut r_ab);
hessian_psi_psi.scaled_add(loss_1, &r_ab);
hessian_psi_psi.scaled_add(loss_2 * q_b, &r_a);
hessian_psi_psi.scaled_add(loss_2 * q_a, &r_b);
scaled_outer_add(hessian_psi_psi.view_mut(), loss_2, c_ab.view(), b.view());
scaled_outer_add(hessian_psi_psi.view_mut(), loss_2, b.view(), c_ab.view());
scaled_outer_add(hessian_psi_psi.view_mut(), loss_2, c_a.view(), c_b.view());
scaled_outer_add(hessian_psi_psi.view_mut(), loss_2, c_b.view(), c_a.view());
hessian_psi_psi.scaled_add(loss_2 * q_ab, &q_mat);
scaled_outer_add(
hessian_psi_psi.view_mut(),
loss_3 * q_b,
c_a.view(),
b.view(),
);
scaled_outer_add(
hessian_psi_psi.view_mut(),
loss_3 * q_b,
b.view(),
c_a.view(),
);
scaled_outer_add(
hessian_psi_psi.view_mut(),
loss_3 * q_a,
c_b.view(),
b.view(),
);
scaled_outer_add(
hessian_psi_psi.view_mut(),
loss_3 * q_a,
b.view(),
c_b.view(),
);
hessian_psi_psi.scaled_add(loss_3 * q_a * q_b, &q_mat);
scaled_outer_add(
hessian_psi_psi.view_mut(),
loss_4 * q_a * q_b + loss_3 * q_ab,
b.view(),
b.view(),
);
}
Ok(crate::custom_family::ExactNewtonJointPsiSecondOrderTerms {
objective_psi_psi,
score_psi_psi,
hessian_psi_psi,
hessian_psi_psi_operator: None,
})
}
fn exact_newton_joint_psihessian_directional_derivative_from_designs(
&self,
block_states: &[ParameterBlockState],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some(dir_a) = self.exact_newton_joint_psi_direction(
block_states,
derivative_blocks,
psi_index,
x_t,
x_ls,
&self.policy,
)?
else {
return Ok(None);
};
Ok(Some(
self.exact_newton_joint_psihessian_directional_derivative_from_parts(
block_states,
&dir_a,
d_beta_flat,
x_t,
x_ls,
)?,
))
}
fn exact_newton_joint_psihessian_directional_derivative_from_parts(
&self,
block_states: &[ParameterBlockState],
dir_a: &LocationScaleJointPsiDirection,
d_beta_flat: &Array1<f64>,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<Array2<f64>, String> {
let pt = x_t.ncols();
let pls = x_ls.ncols();
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
let betaw = &block_states[Self::BLOCK_WIGGLE].beta;
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let base_core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let b0 = self.wiggle_design(base_core.q0.view())?;
let d0 =
self.wiggle_basiswith_options(base_core.q0.view(), BasisOptions::first_derivative())?;
let dd0 =
self.wiggle_basiswith_options(base_core.q0.view(), BasisOptions::second_derivative())?;
let d3_basis = self.wiggle_d3basis_constrained(base_core.q0.view())?;
let d4q = self.wiggle_d4q_dq04(base_core.q0.view(), betaw.view())?;
let pw = b0.ncols();
let layout = GamlssBetaLayout::withwiggle(pt, pls, pw);
let (u_t, u_ls, uw) = layout.split_three(
d_beta_flat,
"wiggle joint psi hessian directional derivative",
)?;
let total = pt + pls + pw;
if d0.ncols() != betaw.len()
|| dd0.ncols() != betaw.len()
|| d3_basis.ncols() != betaw.len()
{
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle derivative/beta mismatch in joint psi mixed drift: B'={} B''={} B'''={} betaw={}",
d0.ncols(),
dd0.ncols(),
d3_basis.ncols(),
betaw.len()
) }.into());
}
let xi_t = x_t.dot(&u_t);
let xi_ls = x_ls.dot(&u_ls);
let x_t_map = dir_a.x_primary_psi.as_linear_map_ref();
let x_ls_map = dir_a.x_ls_psi.as_linear_map_ref();
let m = d0.dot(betaw) + 1.0;
let g2 = dd0.dot(betaw);
let g3 = self.wiggle_d3q_dq03(base_core.q0.view(), betaw.view())?;
let g4 = d4q;
let (sigma, ds, d2s, d3s, d4s) = exp_sigma_derivs_up_to_fourth_array(eta_ls.view());
let mut out = Array2::<f64>::zeros((total, total));
let mut b = Array1::<f64>::zeros(total);
let mut c_a = Array1::<f64>::zeros(total);
let mut gamma = Array1::<f64>::zeros(total);
let mut gamma_a = Array1::<f64>::zeros(total);
let mut q_mat = Array2::<f64>::zeros((total, total));
let mut r_a = Array2::<f64>::zeros((total, total));
let mut c_u = Array2::<f64>::zeros((total, total));
let mut delta_a = Array2::<f64>::zeros((total, total));
let mut q_tw = Array1::<f64>::zeros(pw);
let mut q_lw = Array1::<f64>::zeros(pw);
let mut qw_a = Array1::<f64>::zeros(pw);
let mut q_tw_a = Array1::<f64>::zeros(pw);
let mut q_lw_a = Array1::<f64>::zeros(pw);
let mut dq_tw_u = Array1::<f64>::zeros(pw);
let mut dq_lw_u = Array1::<f64>::zeros(pw);
let mut dq_tw_a_u = Array1::<f64>::zeros(pw);
let mut dq_lw_a_u = Array1::<f64>::zeros(pw);
for row in 0..n {
let q = core.q0[row] + etaw[row];
let (loss_1, loss_2, loss_3) = binomial_neglog_q_derivatives_dispatch(
self.y[row],
self.weights[row],
q,
core.mu[row],
core.dmu_dq[row],
core.d2mu_dq2[row],
core.d3mu_dq3[row],
&self.link_kind,
);
let loss_4 = binomial_neglog_q_fourth_derivative_dispatch(
self.y[row],
self.weights[row],
q,
core.mu[row],
core.dmu_dq[row],
core.d2mu_dq2[row],
core.d3mu_dq3[row],
&self.link_kind,
)?;
let q0 = nonwiggle_q_derivs(eta_t[row], sigma[row]);
let s_safe = sigma[row];
let s2 = s_safe * s_safe;
let s3 = s2 * s_safe;
let s4 = s3 * s_safe;
let s5 = s4 * s_safe;
let q0_tl_ls_ls = d3s[row] / s2 - 6.0 * ds[row] * d2s[row] / s3
+ 6.0 * ds[row] * ds[row] * ds[row] / s4;
let q0_tl_ls_ls_ls =
d4s[row] / s2 - 8.0 * ds[row] * d3s[row] / s3 - 6.0 * d2s[row] * d2s[row] / s3
+ 36.0 * ds[row] * ds[row] * d2s[row] / s4
- 24.0 * ds[row] * ds[row] * ds[row] * ds[row] / s5;
let q0_ll_ls_ls = eta_t[row] * q0_tl_ls_ls_ls;
let xtr = x_t.row(row);
let xlsr = x_ls.row(row);
let xta = x_t_map.row_vector(row)?;
let xlsa = x_ls_map.row_vector(row)?;
let br = b0.row(row);
let dr = d0.row(row);
let ddr = dd0.row(row);
let d3r = d3_basis.row(row);
let xi_t_i = xi_t[row];
let xi_ls_i = xi_ls[row];
let xi_ta_i = xta.dot(&u_t);
let xi_lsa_i = xlsa.dot(&u_ls);
let d_dot_u = dr.dot(&uw);
let dd_dot_u = ddr.dot(&uw);
let d3_dot_u = d3r.dot(&uw);
let dq0_u = q0.q_t * xi_t_i + q0.q_ls * xi_ls_i;
let dq0_t_u = q0.q_tl * xi_ls_i;
let dq0_ls_u = q0.q_tl * xi_t_i + q0.q_ll * xi_ls_i;
let dq0_tl_u = q0.q_tl_ls * xi_ls_i;
let dq0_ll_u = q0.q_tl_ls * xi_t_i + q0.q_ll_ls * xi_ls_i;
let dq0_tl_ls_u = q0_tl_ls_ls * xi_ls_i;
let dq0_ll_ls_u = q0_tl_ls_ls * xi_t_i + q0_ll_ls_ls * xi_ls_i;
let q0_a = -q0.q_t * dir_a.z_primary_psi[row] - q0.q_ls * dir_a.z_ls_psi[row];
let q0_t_a = q0.q_tl_ls * dir_a.z_ls_psi[row];
let q0_ls_a = q0.q_tl_ls * dir_a.z_primary_psi[row] + q0.q_ll_ls * dir_a.z_ls_psi[row];
let q0_tl_a = q0.q_tl_ls * dir_a.z_ls_psi[row];
let q0_ll_a = q0.q_tl_ls * dir_a.z_primary_psi[row] + q0.q_ll_ls * dir_a.z_ls_psi[row];
let dq0_a_u = q0_t_a * xi_t_i + q0_ls_a * xi_ls_i;
let dq0_t_a_u = dq0_tl_ls_u * dir_a.z_ls_psi[row];
let dq0_ls_a_u =
dq0_tl_ls_u * dir_a.z_primary_psi[row] + dq0_ll_ls_u * dir_a.z_ls_psi[row];
let dq0_tl_a_u = dq0_tl_ls_u * dir_a.z_ls_psi[row];
let dq0_ll_a_u =
dq0_tl_ls_u * dir_a.z_primary_psi[row] + dq0_ll_ls_u * dir_a.z_ls_psi[row];
let q_t = m[row] * q0.q_t;
let q_ls = m[row] * q0.q_ls;
let q_tt = g2[row] * q0.q_t * q0.q_t;
let q_tl = g2[row] * q0.q_t * q0.q_ls + m[row] * q0.q_tl;
let q_ll = g2[row] * q0.q_ls * q0.q_ls + m[row] * q0.q_ll;
q_tw.fill(0.0);
q_tw.scaled_add(q0.q_t, &dr);
q_lw.fill(0.0);
q_lw.scaled_add(q0.q_ls, &dr);
let dm_u = g2[row] * dq0_u + d_dot_u;
let dg2_u = g3[row] * dq0_u + dd_dot_u;
let dg3_u = g4[row] * dq0_u + d3_dot_u;
let q_a = m[row] * q0_a;
let q_t_a = g2[row] * q0_a * q0.q_t + m[row] * q0_t_a;
let q_ls_a = g2[row] * q0_a * q0.q_ls + m[row] * q0_ls_a;
let q_tt_a = g3[row] * q0_a * q0.q_t * q0.q_t + g2[row] * (2.0 * q0.q_t * q0_t_a);
let q_tl_a = g3[row] * q0_a * q0.q_t * q0.q_ls
+ g2[row] * (q0_t_a * q0.q_ls + q0.q_t * q0_ls_a + q0_a * q0.q_tl)
+ m[row] * q0_tl_a;
let q_ll_a = g3[row] * q0_a * q0.q_ls * q0.q_ls
+ g2[row] * (2.0 * q0.q_ls * q0_ls_a + q0_a * q0.q_ll)
+ m[row] * q0_ll_a;
qw_a.fill(0.0);
qw_a.scaled_add(q0_a, &dr);
q_tw_a.fill(0.0);
q_tw_a.scaled_add(q0_a * q0.q_t, &ddr);
q_tw_a.scaled_add(q0_t_a, &dr);
q_lw_a.fill(0.0);
q_lw_a.scaled_add(q0_a * q0.q_ls, &ddr);
q_lw_a.scaled_add(q0_ls_a, &dr);
let dq_tt_u = dg2_u * q0.q_t * q0.q_t + g2[row] * (2.0 * q0.q_t * dq0_t_u);
let dq_tl_u = dg2_u * q0.q_t * q0.q_ls
+ g2[row] * (dq0_t_u * q0.q_ls + q0.q_t * dq0_ls_u)
+ dm_u * q0.q_tl
+ m[row] * dq0_tl_u;
let dq_ll_u = dg2_u * q0.q_ls * q0.q_ls
+ g2[row] * (2.0 * q0.q_ls * dq0_ls_u)
+ dm_u * q0.q_ll
+ m[row] * dq0_ll_u;
dq_tw_u.fill(0.0);
dq_tw_u.scaled_add(dq0_u * q0.q_t, &ddr);
dq_tw_u.scaled_add(dq0_t_u, &dr);
dq_lw_u.fill(0.0);
dq_lw_u.scaled_add(dq0_u * q0.q_ls, &ddr);
dq_lw_u.scaled_add(dq0_ls_u, &dr);
let dq_tt_a_u = dg3_u * q0_a * q0.q_t * q0.q_t
+ g3[row] * (dq0_a_u * q0.q_t * q0.q_t + 2.0 * q0_a * q0.q_t * dq0_t_u)
+ dg2_u * (2.0 * q0.q_t * q0_t_a)
+ g2[row] * (2.0 * dq0_t_u * q0_t_a + 2.0 * q0.q_t * dq0_t_a_u);
let dq_tl_a_u = dg3_u * q0_a * q0.q_t * q0.q_ls
+ g3[row]
* (dq0_a_u * q0.q_t * q0.q_ls
+ q0_a * dq0_t_u * q0.q_ls
+ q0_a * q0.q_t * dq0_ls_u)
+ dg2_u * (q0_t_a * q0.q_ls + q0.q_t * q0_ls_a + q0_a * q0.q_tl)
+ g2[row]
* (dq0_t_a_u * q0.q_ls
+ q0_t_a * dq0_ls_u
+ dq0_t_u * q0_ls_a
+ q0.q_t * dq0_ls_a_u
+ dq0_a_u * q0.q_tl
+ q0_a * dq0_tl_u)
+ dm_u * q0_tl_a
+ m[row] * dq0_tl_a_u;
let dq_ll_a_u = dg3_u * q0_a * q0.q_ls * q0.q_ls
+ g3[row] * (dq0_a_u * q0.q_ls * q0.q_ls + 2.0 * q0_a * q0.q_ls * dq0_ls_u)
+ dg2_u * (2.0 * q0.q_ls * q0_ls_a + q0_a * q0.q_ll)
+ g2[row]
* (2.0 * dq0_ls_u * q0_ls_a
+ 2.0 * q0.q_ls * dq0_ls_a_u
+ dq0_a_u * q0.q_ll
+ q0_a * dq0_ll_u)
+ dm_u * q0_ll_a
+ m[row] * dq0_ll_a_u;
dq_tw_a_u.fill(0.0);
dq_tw_a_u.scaled_add(dq0_u * q0_a * q0.q_t, &d3r);
dq_tw_a_u.scaled_add(dq0_a_u * q0.q_t + q0_a * dq0_t_u + dq0_u * q0_t_a, &ddr);
dq_tw_a_u.scaled_add(dq0_t_a_u, &dr);
dq_lw_a_u.fill(0.0);
dq_lw_a_u.scaled_add(dq0_u * q0_a * q0.q_ls, &d3r);
dq_lw_a_u.scaled_add(dq0_a_u * q0.q_ls + q0_a * dq0_ls_u + dq0_u * q0_ls_a, &ddr);
dq_lw_a_u.scaled_add(dq0_ls_a_u, &dr);
b.fill(0.0);
b.slice_mut(s![0..pt]).scaled_add(q_t, &xtr);
b.slice_mut(s![pt..pt + pls]).scaled_add(q_ls, &xlsr);
b.slice_mut(s![pt + pls..]).assign(&br);
c_a.fill(0.0);
c_a.slice_mut(s![0..pt]).scaled_add(q_t_a, &xtr);
c_a.slice_mut(s![0..pt]).scaled_add(q_t, &xta.view());
c_a.slice_mut(s![pt..pt + pls]).scaled_add(q_ls_a, &xlsr);
c_a.slice_mut(s![pt..pt + pls])
.scaled_add(q_ls, &xlsa.view());
c_a.slice_mut(s![pt + pls..]).assign(&qw_a);
gamma.fill(0.0);
gamma
.slice_mut(s![0..pt])
.scaled_add(q_tt * xi_t_i + q_tl * xi_ls_i + q0.q_t * d_dot_u, &xtr);
gamma
.slice_mut(s![pt..pt + pls])
.scaled_add(q_tl * xi_t_i + q_ll * xi_ls_i + q0.q_ls * d_dot_u, &xlsr);
gamma.slice_mut(s![pt + pls..]).scaled_add(dq0_u, &dr);
let q_tw_a_dot_u = q_tw_a.dot(&uw);
let q_lw_a_dot_u = q_lw_a.dot(&uw);
gamma_a.fill(0.0);
gamma_a.slice_mut(s![0..pt]).scaled_add(
q_tt_a * xi_t_i
+ q_tt * xi_ta_i
+ q_tl_a * xi_ls_i
+ q_tl * xi_lsa_i
+ q_tw_a_dot_u,
&xtr,
);
gamma_a.slice_mut(s![0..pt]).scaled_add(
q_tt * xi_t_i + q_tl * xi_ls_i + q0.q_t * d_dot_u,
&xta.view(),
);
gamma_a.slice_mut(s![pt..pt + pls]).scaled_add(
q_tl_a * xi_t_i
+ q_tl * xi_ta_i
+ q_ll_a * xi_ls_i
+ q_ll * xi_lsa_i
+ q_lw_a_dot_u,
&xlsr,
);
gamma_a.slice_mut(s![pt..pt + pls]).scaled_add(
q_tl * xi_t_i + q_ll * xi_ls_i + q0.q_ls * d_dot_u,
&xlsa.view(),
);
gamma_a
.slice_mut(s![pt + pls..])
.scaled_add(xi_t_i, &q_tw_a);
gamma_a.slice_mut(s![pt + pls..]).scaled_add(xi_ta_i, &q_tw);
gamma_a
.slice_mut(s![pt + pls..])
.scaled_add(xi_ls_i, &q_lw_a);
gamma_a
.slice_mut(s![pt + pls..])
.scaled_add(xi_lsa_i, &q_lw);
let alpha = b.dot(d_beta_flat);
let alpha_a = c_a.dot(d_beta_flat);
q_mat.fill(0.0);
scaled_outer_add(q_mat.slice_mut(s![0..pt, 0..pt]), q_tt, xtr, xtr);
scaled_outer_add(q_mat.slice_mut(s![0..pt, pt..pt + pls]), q_tl, xtr, xlsr);
scaled_outer_add(
q_mat.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsr,
xlsr,
);
scaled_outer_add(
q_mat.slice_mut(s![0..pt, pt + pls..]),
1.0,
xtr,
q_tw.view(),
);
scaled_outer_add(
q_mat.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsr,
q_lw.view(),
);
mirror_upper_to_lower(&mut q_mat);
r_a.fill(0.0);
scaled_outer_add(r_a.slice_mut(s![0..pt, 0..pt]), q_tt_a, xtr, xtr);
scaled_outer_add(r_a.slice_mut(s![0..pt, 0..pt]), q_tt, xta.view(), xtr);
scaled_outer_add(r_a.slice_mut(s![0..pt, 0..pt]), q_tt, xtr, xta.view());
scaled_outer_add(r_a.slice_mut(s![0..pt, pt..pt + pls]), q_tl_a, xtr, xlsr);
scaled_outer_add(
r_a.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xta.view(),
xlsr,
);
scaled_outer_add(
r_a.slice_mut(s![0..pt, pt..pt + pls]),
q_tl,
xtr,
xlsa.view(),
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll_a,
xlsr,
xlsr,
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsa.view(),
xlsr,
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
q_ll,
xlsr,
xlsa.view(),
);
scaled_outer_add(
r_a.slice_mut(s![0..pt, pt + pls..]),
1.0,
xta.view(),
q_tw.view(),
);
scaled_outer_add(
r_a.slice_mut(s![0..pt, pt + pls..]),
1.0,
xtr,
q_tw_a.view(),
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsa.view(),
q_lw.view(),
);
scaled_outer_add(
r_a.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsr,
q_lw_a.view(),
);
mirror_upper_to_lower(&mut r_a);
c_u.fill(0.0);
scaled_outer_add(c_u.slice_mut(s![0..pt, 0..pt]), dq_tt_u, xtr, xtr);
scaled_outer_add(c_u.slice_mut(s![0..pt, pt..pt + pls]), dq_tl_u, xtr, xlsr);
scaled_outer_add(
c_u.slice_mut(s![pt..pt + pls, pt..pt + pls]),
dq_ll_u,
xlsr,
xlsr,
);
scaled_outer_add(
c_u.slice_mut(s![0..pt, pt + pls..]),
1.0,
xtr,
dq_tw_u.view(),
);
scaled_outer_add(
c_u.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsr,
dq_lw_u.view(),
);
mirror_upper_to_lower(&mut c_u);
delta_a.fill(0.0);
scaled_outer_add(delta_a.slice_mut(s![0..pt, 0..pt]), dq_tt_a_u, xtr, xtr);
scaled_outer_add(
delta_a.slice_mut(s![0..pt, 0..pt]),
dq_tt_u,
xta.view(),
xtr,
);
scaled_outer_add(
delta_a.slice_mut(s![0..pt, 0..pt]),
dq_tt_u,
xtr,
xta.view(),
);
scaled_outer_add(
delta_a.slice_mut(s![0..pt, pt..pt + pls]),
dq_tl_a_u,
xtr,
xlsr,
);
scaled_outer_add(
delta_a.slice_mut(s![0..pt, pt..pt + pls]),
dq_tl_u,
xta.view(),
xlsr,
);
scaled_outer_add(
delta_a.slice_mut(s![0..pt, pt..pt + pls]),
dq_tl_u,
xtr,
xlsa.view(),
);
scaled_outer_add(
delta_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
dq_ll_a_u,
xlsr,
xlsr,
);
scaled_outer_add(
delta_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
dq_ll_u,
xlsa.view(),
xlsr,
);
scaled_outer_add(
delta_a.slice_mut(s![pt..pt + pls, pt..pt + pls]),
dq_ll_u,
xlsr,
xlsa.view(),
);
scaled_outer_add(
delta_a.slice_mut(s![0..pt, pt + pls..]),
1.0,
xta.view(),
dq_tw_u.view(),
);
scaled_outer_add(
delta_a.slice_mut(s![0..pt, pt + pls..]),
1.0,
xtr,
dq_tw_a_u.view(),
);
scaled_outer_add(
delta_a.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsa.view(),
dq_lw_u.view(),
);
scaled_outer_add(
delta_a.slice_mut(s![pt..pt + pls, pt + pls..]),
1.0,
xlsr,
dq_lw_a_u.view(),
);
mirror_upper_to_lower(&mut delta_a);
out.scaled_add(loss_1, &delta_a);
out.scaled_add(loss_2 * alpha, &r_a);
out.scaled_add(loss_2 * q_a, &c_u);
scaled_outer_add(out.view_mut(), loss_2, gamma_a.view(), b.view());
scaled_outer_add(out.view_mut(), loss_2, b.view(), gamma_a.view());
scaled_outer_add(out.view_mut(), loss_2, gamma.view(), c_a.view());
scaled_outer_add(out.view_mut(), loss_2, c_a.view(), gamma.view());
out.scaled_add(loss_2 * alpha_a, &q_mat);
scaled_outer_add(out.view_mut(), loss_3 * alpha * q_a, b.view(), b.view());
scaled_outer_add(out.view_mut(), loss_3 * q_a, gamma.view(), b.view());
scaled_outer_add(out.view_mut(), loss_3 * q_a, b.view(), gamma.view());
scaled_outer_add(out.view_mut(), loss_3 * alpha, c_a.view(), b.view());
scaled_outer_add(out.view_mut(), loss_3 * alpha, b.view(), c_a.view());
out.scaled_add(loss_3 * alpha * q_a, &q_mat);
scaled_outer_add(
out.view_mut(),
loss_4 * alpha * q_a + loss_3 * alpha_a,
b.view(),
b.view(),
);
}
mirror_upper_to_lower(&mut out);
Ok(out)
}
pub fn buildwiggle_block_input(
q_seed: ArrayView1<'_, f64>,
degree: usize,
num_internal_knots: usize,
penalty_order: usize,
double_penalty: bool,
) -> Result<(ParameterBlockInput, Array1<f64>), String> {
let knots = Self::initializewiggle_knots_from_q(q_seed, degree, num_internal_knots)?;
let block = buildwiggle_block_input_from_knots(
q_seed,
&knots,
degree,
penalty_order,
double_penalty,
)?;
Ok((block, knots))
}
fn wiggle_hessian_row_pieces(
&self,
block_states: &[ParameterBlockState],
) -> Result<BinomialLocationScaleWiggleHessianRowPieces, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let betaw0 = block_states[Self::BLOCK_WIGGLE].beta.clone();
let core0 = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let b0 = self.wiggle_design(core0.q0.view())?;
let d0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::first_derivative())?;
let dd0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::second_derivative())?;
if b0.ncols() != betaw0.len() || d0.ncols() != betaw0.len() || dd0.ncols() != betaw0.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"wiggle basis/beta mismatch in exact joint Hessian: B={} B'={} B''={} betaw={}",
b0.ncols(),
d0.ncols(),
dd0.ncols(),
betaw0.len()
),
}
.into());
}
let m = d0.dot(&betaw0) + 1.0;
let g2 = dd0.dot(&betaw0);
let (sigma, ..) = exp_sigma_derivs_up_to_third(eta_ls.view());
let mut coeff_tt = Array1::<f64>::zeros(n);
let mut coeff_tl = Array1::<f64>::zeros(n);
let mut coeff_ll = Array1::<f64>::zeros(n);
let mut coeff_tw_b = Array1::<f64>::zeros(n);
let mut coeff_tw_d = Array1::<f64>::zeros(n);
let mut coeff_lw_b = Array1::<f64>::zeros(n);
let mut coeff_lw_d = Array1::<f64>::zeros(n);
let mut coeffww = Array1::<f64>::zeros(n);
for i in 0..n {
let q_i = core0.q0[i] + etaw[i];
let (m1, m2, _) = binomial_neglog_q_derivatives_dispatch(
self.y[i],
self.weights[i],
q_i,
core0.mu[i],
core0.dmu_dq[i],
core0.d2mu_dq2[i],
core0.d3mu_dq3[i],
&self.link_kind,
);
let q0 = nonwiggle_q_derivs(eta_t[i], sigma[i]);
let q_t = m[i] * q0.q_t;
let q_ls = m[i] * q0.q_ls;
let q_tt = g2[i] * q0.q_t * q0.q_t;
let q_tl = g2[i] * q0.q_t * q0.q_ls + m[i] * q0.q_tl;
let q_ll = g2[i] * q0.q_ls * q0.q_ls + m[i] * q0.q_ll;
coeff_tt[i] = hessian_coeff_fromobjective_q_terms(m1, m2, q_t, q_t, q_tt);
coeff_tl[i] = hessian_coeff_fromobjective_q_terms(m1, m2, q_t, q_ls, q_tl);
coeff_ll[i] = hessian_coeff_fromobjective_q_terms(m1, m2, q_ls, q_ls, q_ll);
coeff_tw_b[i] = m2 * q_t;
coeff_tw_d[i] = m1 * q0.q_t;
coeff_lw_b[i] = m2 * q_ls;
coeff_lw_d[i] = m1 * q0.q_ls;
coeffww[i] = m2;
}
Ok(BinomialLocationScaleWiggleHessianRowPieces {
coeff_tt,
coeff_tl,
coeff_ll,
coeff_tw_b,
coeff_tw_d,
coeff_lw_b,
coeff_lw_d,
coeffww,
b0,
d0,
})
}
}
struct BinomialLocationScaleWiggleHessianRowPieces {
coeff_tt: Array1<f64>,
coeff_tl: Array1<f64>,
coeff_ll: Array1<f64>,
coeff_tw_b: Array1<f64>,
coeff_tw_d: Array1<f64>,
coeff_lw_b: Array1<f64>,
coeff_lw_d: Array1<f64>,
coeffww: Array1<f64>,
b0: Array2<f64>,
d0: Array2<f64>,
}
impl BinomialLocationScaleWiggleHessianRowPieces {
fn assemble_dense(&self, x_t: &Array2<f64>, x_ls: &Array2<f64>) -> Result<Array2<f64>, String> {
let pt = x_t.ncols();
let pls = x_ls.ncols();
let pw = self.b0.ncols();
let total = pt + pls + pw;
let h_tt = xt_diag_x_dense(x_t, &self.coeff_tt)?;
let h_tl = xt_diag_y_dense(x_t, &self.coeff_tl, x_ls)?;
let h_ll = xt_diag_x_dense(x_ls, &self.coeff_ll)?;
let h_tw = xt_diag_y_dense(x_t, &self.coeff_tw_b, &self.b0)?
+ &xt_diag_y_dense(x_t, &self.coeff_tw_d, &self.d0)?;
let h_lw = xt_diag_y_dense(x_ls, &self.coeff_lw_b, &self.b0)?
+ &xt_diag_y_dense(x_ls, &self.coeff_lw_d, &self.d0)?;
let hww = xt_diag_x_dense(&self.b0, &self.coeffww)?;
let mut h = Array2::<f64>::zeros((total, total));
h.slice_mut(s![0..pt, 0..pt]).assign(&h_tt);
h.slice_mut(s![0..pt, pt..pt + pls]).assign(&h_tl);
h.slice_mut(s![pt..pt + pls, pt..pt + pls]).assign(&h_ll);
h.slice_mut(s![0..pt, pt + pls..total]).assign(&h_tw);
h.slice_mut(s![pt..pt + pls, pt + pls..total]).assign(&h_lw);
h.slice_mut(s![pt + pls..total, pt + pls..total])
.assign(&hww);
mirror_upper_to_lower(&mut h);
Ok(h)
}
fn assemble_block_diagonals(
&self,
x_t: &Array2<f64>,
x_ls: &Array2<f64>,
) -> Result<(Array2<f64>, Array2<f64>, Array2<f64>), String> {
let h_tt = xt_diag_x_dense(x_t, &self.coeff_tt)?;
let h_ll = xt_diag_x_dense(x_ls, &self.coeff_ll)?;
let h_ww = xt_diag_x_dense(&self.b0, &self.coeffww)?;
Ok((h_tt, h_ll, h_ww))
}
}
struct BinomialWiggleDhRowCoeffs {
coeff_tt: Array1<f64>,
coeff_tl: Array1<f64>,
coeff_ll: Array1<f64>,
coeff_tw_b: Array1<f64>,
coeff_tw_d: Array1<f64>,
coeff_tw_dd: Array1<f64>,
coeff_lw_b: Array1<f64>,
coeff_lw_d: Array1<f64>,
coeff_lw_dd: Array1<f64>,
coeffww_bb: Array1<f64>,
coeffww_db: Array1<f64>,
}
struct BinomialWiggleDhRowInputs<'a> {
core0: &'a BinomialLocationScaleCore,
eta_t: &'a Array1<f64>,
etaw: &'a Array1<f64>,
sigma: &'a Array1<f64>,
m: &'a Array1<f64>,
g2: &'a Array1<f64>,
g3: &'a Array1<f64>,
b0: &'a Array2<f64>,
d0: &'a Array2<f64>,
dd0: &'a Array2<f64>,
uw: &'a Array1<f64>,
d_eta_t: &'a Array1<f64>,
d_eta_ls: &'a Array1<f64>,
}
impl BinomialLocationScaleWiggleFamily {
fn binomial_wiggle_dh_row_coeffs(
&self,
n: usize,
inputs: &BinomialWiggleDhRowInputs<'_>,
) -> BinomialWiggleDhRowCoeffs {
let BinomialWiggleDhRowInputs {
core0,
eta_t,
etaw,
sigma,
m,
g2,
g3,
b0,
d0,
dd0,
uw,
d_eta_t,
d_eta_ls,
} = *inputs;
let mut coeff_tt = Array1::<f64>::zeros(n);
let mut coeff_tl = Array1::<f64>::zeros(n);
let mut coeff_ll = Array1::<f64>::zeros(n);
let mut coeff_tw_b = Array1::<f64>::zeros(n);
let mut coeff_tw_d = Array1::<f64>::zeros(n);
let mut coeff_tw_dd = Array1::<f64>::zeros(n);
let mut coeff_lw_b = Array1::<f64>::zeros(n);
let mut coeff_lw_d = Array1::<f64>::zeros(n);
let mut coeff_lw_dd = Array1::<f64>::zeros(n);
let mut coeffww_bb = Array1::<f64>::zeros(n);
let mut coeffww_db = Array1::<f64>::zeros(n);
for i in 0..n {
let q_i = core0.q0[i] + etaw[i];
let (m1, m2, m3) = binomial_neglog_q_derivatives_dispatch(
self.y[i],
self.weights[i],
q_i,
core0.mu[i],
core0.dmu_dq[i],
core0.d2mu_dq2[i],
core0.d3mu_dq3[i],
&self.link_kind,
);
let q0 = nonwiggle_q_derivs(eta_t[i], sigma[i]);
let dq0 = nonwiggle_q_directional(q0, d_eta_t[i], d_eta_ls[i]);
let br = b0.row(i);
let dr = d0.row(i);
let ddr = dd0.row(i);
let duw_i = dr.dot(uw);
let dduw_i = ddr.dot(uw);
let delta_m = g2[i] * dq0.delta_q + duw_i;
let delta_g2 = g3[i] * dq0.delta_q + dduw_i;
let q_t = m[i] * q0.q_t;
let q_ls = m[i] * q0.q_ls;
let q_tt = g2[i] * q0.q_t * q0.q_t;
let q_tl = g2[i] * q0.q_t * q0.q_ls + m[i] * q0.q_tl;
let q_ll = g2[i] * q0.q_ls * q0.q_ls + m[i] * q0.q_ll;
let delta_q_t = delta_m * q0.q_t + m[i] * dq0.delta_q_t;
let delta_q_ls = delta_m * q0.q_ls + m[i] * dq0.delta_q_ls;
let delta_q_tt = delta_g2 * q0.q_t * q0.q_t + g2[i] * 2.0 * q0.q_t * dq0.delta_q_t;
let delta_q_tl = delta_g2 * q0.q_t * q0.q_ls
+ g2[i] * (dq0.delta_q_t * q0.q_ls + q0.q_t * dq0.delta_q_ls)
+ delta_m * q0.q_tl
+ m[i] * dq0.delta_q_tl;
let delta_q_ll = delta_g2 * q0.q_ls * q0.q_ls
+ g2[i] * 2.0 * q0.q_ls * dq0.delta_q_ls
+ delta_m * q0.q_ll
+ m[i] * dq0.delta_q_ll;
let delta_q = m[i] * dq0.delta_q + br.dot(uw);
coeff_tt[i] = directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, delta_q, q_t, q_t, q_tt, delta_q_t, delta_q_t, delta_q_tt,
);
coeff_tl[i] = directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, delta_q, q_t, q_ls, q_tl, delta_q_t, delta_q_ls, delta_q_tl,
);
coeff_ll[i] = directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, delta_q, q_ls, q_ls, q_ll, delta_q_ls, delta_q_ls, delta_q_ll,
);
coeff_tw_b[i] = m3 * delta_q * q_t + m2 * delta_q_t;
coeff_tw_d[i] = m2 * (q_t * dq0.delta_q + delta_q * q0.q_t) + m1 * dq0.delta_q_t;
coeff_tw_dd[i] = m1 * dq0.delta_q * q0.q_t;
coeff_lw_b[i] = m3 * delta_q * q_ls + m2 * delta_q_ls;
coeff_lw_d[i] = m2 * (q_ls * dq0.delta_q + delta_q * q0.q_ls) + m1 * dq0.delta_q_ls;
coeff_lw_dd[i] = m1 * dq0.delta_q * q0.q_ls;
coeffww_bb[i] = m3 * delta_q;
coeffww_db[i] = m2 * dq0.delta_q;
}
BinomialWiggleDhRowCoeffs {
coeff_tt,
coeff_tl,
coeff_ll,
coeff_tw_b,
coeff_tw_d,
coeff_tw_dd,
coeff_lw_b,
coeff_lw_d,
coeff_lw_dd,
coeffww_bb,
coeffww_db,
}
}
pub fn block_effective_jacobian(
specs: &[ParameterBlockSpec],
block_idx: usize,
) -> Result<Box<dyn BlockEffectiveJacobian>, String> {
crate::util::block_jacobian::AdditiveWiggleBlockLayout {
family: "BinomialLocationScaleWiggleFamily",
n_outputs: 2,
additive_blocks: &[Self::BLOCK_T, Self::BLOCK_LOG_SIGMA],
wiggle_block: Some(Self::BLOCK_WIGGLE),
}
.block_effective_jacobian(specs, block_idx)
}
}
impl CustomFamily for BinomialLocationScaleWiggleFamily {
fn exact_newton_joint_hessian_beta_dependent(&self) -> bool {
true
}
fn coefficient_hessian_cost(&self, specs: &[ParameterBlockSpec]) -> u64 {
crate::families::location_scale_engine::location_scale_coefficient_hessian_cost(
self.y.len() as u64,
specs,
)
}
fn pseudo_logdet_mode(&self) -> crate::custom_family::PseudoLogdetMode {
crate::custom_family::PseudoLogdetMode::HardPseudo
}
fn block_linear_constraints(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
spec: &ParameterBlockSpec,
) -> Result<Option<LinearInequalityConstraints>, String> {
assert!(block_states.len() <= isize::MAX as usize);
if block_idx != Self::BLOCK_WIGGLE {
return Ok(None);
}
Ok(monotone_wiggle_nonnegative_constraints(spec.design.ncols()))
}
fn post_update_block_beta(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
block_spec: &ParameterBlockSpec,
beta: Array1<f64>,
) -> Result<Array1<f64>, String> {
assert!(block_states.len() <= isize::MAX as usize);
assert!(!block_spec.name.is_empty());
if block_idx != Self::BLOCK_WIGGLE {
return Ok(beta);
}
validate_monotone_wiggle_beta_nonnegative(
&beta,
"BinomialLocationScaleWiggleFamily post-update",
)?;
Ok(beta)
}
fn evaluate(&self, block_states: &[ParameterBlockState]) -> Result<FamilyEvaluation, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let core = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let wiggle_design = self.wiggle_design(core.q0.view())?;
let dq_dq0 =
self.wiggle_dq_dq0(core.q0.view(), block_states[Self::BLOCK_WIGGLE].beta.view())?;
let threshold_design = self.threshold_design.as_ref().ok_or_else(|| {
"BinomialLocationScaleWiggleFamily exact-newton path is missing threshold design"
.to_string()
})?;
let log_sigma_design = self.log_sigma_design.as_ref().ok_or_else(|| {
"BinomialLocationScaleWiggleFamily exact-newton path is missing log-sigma design"
.to_string()
})?;
let mut grad_eta_t = Array1::<f64>::zeros(n);
let mut grad_eta_ls = Array1::<f64>::zeros(n);
let mut grad_q = Array1::<f64>::zeros(n);
for i in 0..n {
let q_i = core.q0[i] + etaw[i];
let (m1, _, _) = binomial_neglog_q_derivatives_dispatch(
self.y[i],
self.weights[i],
q_i,
core.mu[i],
core.dmu_dq[i],
core.d2mu_dq2[i],
core.d3mu_dq3[i],
&self.link_kind,
);
let score_q = -m1;
let q0d = nonwiggle_q_derivs(eta_t[i], core.sigma[i]);
grad_eta_t[i] = score_q * dq_dq0[i] * q0d.q_t;
grad_eta_ls[i] = score_q * dq_dq0[i] * q0d.q_ls;
grad_q[i] = score_q;
}
let grad_t = threshold_design.transpose_vector_multiply(&grad_eta_t);
let grad_ls = log_sigma_design.transpose_vector_multiply(&grad_eta_ls);
let grad_w = fast_atv(&wiggle_design, &grad_q);
let (x_t, x_ls) = self
.exact_joint_dense_block_designs(None)?
.ok_or("BinomialLocationScaleWiggleFamily: joint block designs unavailable")?;
let pieces = self.wiggle_hessian_row_pieces(block_states)?;
let (h_tt, h_ll, h_ww) = pieces.assemble_block_diagonals(&x_t, &x_ls)?;
Ok(FamilyEvaluation {
log_likelihood: core.log_likelihood,
blockworking_sets: vec![
BlockWorkingSet::ExactNewton {
gradient: grad_t,
hessian: SymmetricMatrix::Dense(h_tt),
},
BlockWorkingSet::ExactNewton {
gradient: grad_ls,
hessian: SymmetricMatrix::Dense(h_ll),
},
BlockWorkingSet::ExactNewton {
gradient: grad_w,
hessian: SymmetricMatrix::Dense(h_ww),
},
],
})
}
fn log_likelihood_only(&self, block_states: &[ParameterBlockState]) -> Result<f64, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
binomial_location_scale_ll_only(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)
}
fn log_likelihood_only_with_options(
&self,
block_states: &[ParameterBlockState],
options: &BlockwiseFitOptions,
) -> Result<f64, String> {
let Some(subsample) = options.outer_score_subsample.as_ref() else {
return self.log_likelihood_only(block_states);
};
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
use rayon::iter::ParallelIterator;
let link_kind = &self.link_kind;
let ll: Result<f64, String> = subsample
.rows
.par_iter()
.try_fold(
|| 0.0_f64,
|acc, row| -> Result<f64, String> {
let i = row.index;
let wi = self.weights[i];
if wi == 0.0 {
return Ok(acc);
}
let SigmaJet1 { sigma, .. } = exp_sigma_jet1_scalar(eta_ls[i]);
let q0 = binomial_location_scale_q0(eta_t[i], sigma);
let q = q0 + etaw[i];
let mu = if matches!(link_kind, InverseLink::Standard(StandardLink::Probit)) {
0.5
} else {
let jet = inverse_link_jet_for_inverse_link(link_kind, q).map_err(|e| {
format!("location-scale inverse-link evaluation failed: {e}")
})?;
jet.mu
};
let term =
binomial_location_scale_log_likelihood(self.y[i], wi, q, link_kind, mu)?;
Ok(acc + row.weight * term)
},
)
.try_reduce(|| 0.0_f64, |a, b| Ok(a + b));
ll
}
fn requires_joint_outer_hyper_path(&self) -> bool {
true
}
fn exact_newton_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
block_idx: usize,
d_beta: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let (x_t, x_ls) = self.dense_block_designs()?;
let pt = x_t.ncols();
let pls = x_ls.ncols();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let core0 = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
None,
&self.link_kind,
)?;
let b0 = self.wiggle_design(core0.q0.view())?;
let pw = b0.ncols();
let total = pt + pls + pw;
let (range_start, range_end) = match block_idx {
Self::BLOCK_T => (0usize, pt),
Self::BLOCK_LOG_SIGMA => (pt, pt + pls),
Self::BLOCK_WIGGLE => (pt + pls, total),
_ => return Ok(None),
};
if d_beta.len() != (range_end - range_start) {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"block {block_idx} d_beta length mismatch: got {}, expected {}",
d_beta.len(),
range_end - range_start
),
}
.into());
}
let mut d_beta_flat = Array1::<f64>::zeros(total);
match block_idx {
Self::BLOCK_T => {
d_beta_flat.slice_mut(s![0..pt]).assign(d_beta);
}
Self::BLOCK_LOG_SIGMA => {
d_beta_flat.slice_mut(s![pt..pt + pls]).assign(d_beta);
}
Self::BLOCK_WIGGLE => {
d_beta_flat.slice_mut(s![pt + pls..]).assign(d_beta);
}
_ => {}
}
let d_joint = self
.exact_newton_joint_hessian_directional_derivative(block_states, &d_beta_flat)?
.ok_or_else(|| "missing exact wiggle joint dH".to_string())?;
let out = d_joint
.slice(s![range_start..range_end, range_start..range_end])
.to_owned();
Ok(Some(out))
}
fn exact_newton_joint_hessian(
&self,
block_states: &[ParameterBlockState],
) -> Result<Option<Array2<f64>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(None)? else {
return Ok(None);
};
let pieces = self.wiggle_hessian_row_pieces(block_states)?;
Ok(Some(pieces.assemble_dense(&x_t, &x_ls)?))
}
fn has_explicit_joint_hessian(&self) -> bool {
true
}
fn exact_newton_joint_hessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(None)? else {
return Ok(None);
};
let pt = x_t.ncols();
let pls = x_ls.ncols();
let betaw0 = block_states[Self::BLOCK_WIGGLE].beta.clone();
let core0 = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let b0 = self.wiggle_design(core0.q0.view())?;
let pw = b0.ncols();
let beta_layout = GamlssBetaLayout::withwiggle(pt, pls, pw);
let total = beta_layout.total();
let (u_t, u_ls, uw) = beta_layout.split_three(d_beta_flat, "wiggle joint d_beta")?;
let d_eta_t = fast_av(&x_t, &u_t);
let d_eta_ls = fast_av(&x_ls, &u_ls);
let d0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::first_derivative())?;
let dd0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::second_derivative())?;
let d3q = self.wiggle_d3q_dq03(core0.q0.view(), betaw0.view())?;
if d0.ncols() != betaw0.len() || dd0.ncols() != betaw0.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"wiggle derivative/beta mismatch in exact joint dH: B'={} B''={} betaw={}",
d0.ncols(),
dd0.ncols(),
betaw0.len()
),
}
.into());
}
let m = d0.dot(&betaw0) + 1.0;
let g2 = dd0.dot(&betaw0);
let g3 = d3q;
let (sigma, ..) = exp_sigma_derivs_up_to_third(eta_ls.view());
let BinomialWiggleDhRowCoeffs {
coeff_tt,
coeff_tl,
coeff_ll,
coeff_tw_b,
coeff_tw_d,
coeff_tw_dd,
coeff_lw_b,
coeff_lw_d,
coeff_lw_dd,
coeffww_bb,
coeffww_db,
} = self.binomial_wiggle_dh_row_coeffs(
n,
&BinomialWiggleDhRowInputs {
core0: &core0,
eta_t,
etaw,
sigma: &sigma,
m: &m,
g2: &g2,
g3: &g3,
b0: &b0,
d0: &d0,
dd0: &dd0,
uw: &uw,
d_eta_t: &d_eta_t,
d_eta_ls: &d_eta_ls,
},
);
let d_h_tt = xt_diag_x_dense(&x_t, &coeff_tt)?;
let d_h_tl = xt_diag_y_dense(&x_t, &coeff_tl, &x_ls)?;
let d_h_ll = xt_diag_x_dense(&x_ls, &coeff_ll)?;
let d_h_tw = xt_diag_y_dense(&x_t, &coeff_tw_b, &b0)?
+ &xt_diag_y_dense(&x_t, &coeff_tw_d, &d0)?
+ &xt_diag_y_dense(&x_t, &coeff_tw_dd, &dd0)?;
let d_h_lw = xt_diag_y_dense(&x_ls, &coeff_lw_b, &b0)?
+ &xt_diag_y_dense(&x_ls, &coeff_lw_d, &d0)?
+ &xt_diag_y_dense(&x_ls, &coeff_lw_dd, &dd0)?;
let mut d_hww = xt_diag_x_dense(&b0, &coeffww_bb)?;
d_hww += &xt_diag_y_dense(&d0, &coeffww_db, &b0)?;
d_hww += &xt_diag_y_dense(&b0, &coeffww_db, &d0)?;
let mut d_h = Array2::<f64>::zeros((total, total));
d_h.slice_mut(s![0..pt, 0..pt]).assign(&d_h_tt);
d_h.slice_mut(s![0..pt, pt..pt + pls]).assign(&d_h_tl);
d_h.slice_mut(s![pt..pt + pls, pt..pt + pls])
.assign(&d_h_ll);
d_h.slice_mut(s![0..pt, pt + pls..total]).assign(&d_h_tw);
d_h.slice_mut(s![pt..pt + pls, pt + pls..total])
.assign(&d_h_lw);
d_h.slice_mut(s![pt + pls..total, pt + pls..total])
.assign(&d_hww);
mirror_upper_to_lower(&mut d_h);
Ok(Some(d_h))
}
fn exact_newton_joint_hessiansecond_directional_derivative(
&self,
block_states: &[ParameterBlockState],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(None)? else {
return Ok(None);
};
let pt = x_t.ncols();
let pls = x_ls.ncols();
let betaw0 = block_states[Self::BLOCK_WIGGLE].beta.clone();
let core0 = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let b0 = self.wiggle_design(core0.q0.view())?;
let d0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::first_derivative())?;
let dd0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::second_derivative())?;
let d3_basis = self.wiggle_d3basis_constrained(core0.q0.view())?;
let d3q = self.wiggle_d3q_dq03(core0.q0.view(), betaw0.view())?;
let d4q = self.wiggle_d4q_dq04(core0.q0.view(), betaw0.view())?;
let pw = b0.ncols();
let beta_layout = GamlssBetaLayout::withwiggle(pt, pls, pw);
let total = beta_layout.total();
if d0.ncols() != betaw0.len()
|| dd0.ncols() != betaw0.len()
|| d3_basis.ncols() != betaw0.len()
{
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle derivative/beta mismatch in exact joint d2H: B'={} B''={} B'''={} betaw={}",
d0.ncols(),
dd0.ncols(),
d3_basis.ncols(),
betaw0.len()
) }.into());
}
let (u_t, u_ls, uw) = beta_layout.split_three(d_beta_u_flat, "wiggle joint d_beta_u")?;
let (v_t, v_ls, vw) = beta_layout.split_three(d_betav_flat, "wiggle joint d_betav")?;
let d_eta_t_u = fast_av(&x_t, &u_t);
let d_eta_ls_u = fast_av(&x_ls, &u_ls);
let d_eta_tv = fast_av(&x_t, &v_t);
let d_eta_lsv = fast_av(&x_ls, &v_ls);
let m = d0.dot(&betaw0) + 1.0;
let g2 = dd0.dot(&betaw0);
let g3 = d3q;
let g4 = d4q;
let (sigma, ds, d2s, d3s, d4s) = exp_sigma_derivs_up_to_fourth_array(eta_ls.view());
let mut d2_h: Array2<f64> = (0..n)
.into_par_iter()
.map(|i| -> Result<Array2<f64>, String> {
let mut row_h = Array2::<f64>::zeros((total, total));
let q_i = core0.q0[i] + etaw[i];
let (m1, m2, m3) = binomial_neglog_q_derivatives_dispatch(
self.y[i],
self.weights[i],
q_i,
core0.mu[i],
core0.dmu_dq[i],
core0.d2mu_dq2[i],
core0.d3mu_dq3[i],
&self.link_kind,
);
let m4 = binomial_neglog_q_fourth_derivative_dispatch(
self.y[i],
self.weights[i],
q_i,
core0.mu[i],
core0.dmu_dq[i],
core0.d2mu_dq2[i],
core0.d3mu_dq3[i],
&self.link_kind,
)?;
let q0 = nonwiggle_q_derivs(eta_t[i], sigma[i]);
let s_safe = sigma[i];
let s2 = s_safe * s_safe;
let s3 = s2 * s_safe;
let s4 = s3 * s_safe;
let s5 = s4 * s_safe;
let q0_tl_ls_ls =
d3s[i] / s2 - 6.0 * ds[i] * d2s[i] / s3 + 6.0 * ds[i] * ds[i] * ds[i] / s4;
let q0_tl_ls_ls_ls =
d4s[i] / s2 - 8.0 * ds[i] * d3s[i] / s3 - 6.0 * d2s[i] * d2s[i] / s3
+ 36.0 * ds[i] * ds[i] * d2s[i] / s4
- 24.0 * ds[i] * ds[i] * ds[i] * ds[i] / s5;
let q0_ll_ls_ls = eta_t[i] * q0_tl_ls_ls_ls;
let u_t_i = d_eta_t_u[i];
let u_ls_i = d_eta_ls_u[i];
let v_t_i = d_eta_tv[i];
let v_ls_i = d_eta_lsv[i];
let dq0_u = q0.q_t * u_t_i + q0.q_ls * u_ls_i;
let dq0v = q0.q_t * v_t_i + q0.q_ls * v_ls_i;
let d2q0_uv =
q0.q_tl * (u_t_i * v_ls_i + v_t_i * u_ls_i) + q0.q_ll * u_ls_i * v_ls_i;
let dq0_t_u = q0.q_tl * u_ls_i;
let dq0_tv = q0.q_tl * v_ls_i;
let dq0_ls_u = q0.q_tl * u_t_i + q0.q_ll * u_ls_i;
let dq0_lsv = q0.q_tl * v_t_i + q0.q_ll * v_ls_i;
let dq0_tl_u = q0.q_tl_ls * u_ls_i;
let dq0_tlv = q0.q_tl_ls * v_ls_i;
let dq0_ll_u = q0.q_tl_ls * u_t_i + q0.q_ll_ls * u_ls_i;
let dq0_llv = q0.q_tl_ls * v_t_i + q0.q_ll_ls * v_ls_i;
let d2q0_t_uv = q0.q_tl_ls * u_ls_i * v_ls_i;
let d2q0_ls_uv =
q0.q_tl_ls * (u_ls_i * v_t_i + v_ls_i * u_t_i) + q0.q_ll_ls * u_ls_i * v_ls_i;
let d2q0_tl_uv = q0_tl_ls_ls * u_ls_i * v_ls_i;
let d2q0_ll_uv =
q0_tl_ls_ls * (u_t_i * v_ls_i + v_t_i * u_ls_i) + q0_ll_ls_ls * u_ls_i * v_ls_i;
let br = b0.row(i);
let dr = d0.row(i);
let ddr = dd0.row(i);
let d3r = d3_basis.row(i);
let b_u = br.dot(&uw);
let bv = br.dot(&vw);
let b1_u = dr.dot(&uw);
let b1v = dr.dot(&vw);
let b2_u = ddr.dot(&uw);
let b2v = ddr.dot(&vw);
let b3_u = d3r.dot(&uw);
let b3v = d3r.dot(&vw);
let dm_u = b1_u + g2[i] * dq0_u;
let dmv = b1v + g2[i] * dq0v;
let d2m_uv = g3[i] * dq0_u * dq0v + g2[i] * d2q0_uv + b2v * dq0_u + b2_u * dq0v;
let dg2_u = b2_u + g3[i] * dq0_u;
let dg2v = b2v + g3[i] * dq0v;
let d2g2_uv = g4[i] * dq0_u * dq0v + g3[i] * d2q0_uv + b3v * dq0_u + b3_u * dq0v;
let dq_u = m[i] * dq0_u + b_u;
let dqv = m[i] * dq0v + bv;
let d2q_uv = m[i] * d2q0_uv + g2[i] * dq0_u * dq0v + b1_u * dq0v + b1v * dq0_u;
let q_t = m[i] * q0.q_t;
let q_ls = m[i] * q0.q_ls;
let q_tt = g2[i] * q0.q_t * q0.q_t;
let q_tl = g2[i] * q0.q_t * q0.q_ls + m[i] * q0.q_tl;
let q_ll = g2[i] * q0.q_ls * q0.q_ls + m[i] * q0.q_ll;
let dq_t_u = dm_u * q0.q_t + m[i] * dq0_t_u;
let dq_tv = dmv * q0.q_t + m[i] * dq0_tv;
let dq_ls_u = dm_u * q0.q_ls + m[i] * dq0_ls_u;
let dq_lsv = dmv * q0.q_ls + m[i] * dq0_lsv;
let d2q_t_uv = d2m_uv * q0.q_t + dm_u * dq0_tv + dmv * dq0_t_u + m[i] * d2q0_t_uv;
let d2q_ls_uv =
d2m_uv * q0.q_ls + dm_u * dq0_lsv + dmv * dq0_ls_u + m[i] * d2q0_ls_uv;
let dq_tt_u = dg2_u * q0.q_t * q0.q_t + g2[i] * (2.0 * q0.q_t * dq0_t_u);
let dq_ttv = dg2v * q0.q_t * q0.q_t + g2[i] * (2.0 * q0.q_t * dq0_tv);
let d2q_tt_uv = d2g2_uv * q0.q_t * q0.q_t
+ dg2_u * (2.0 * q0.q_t * dq0_tv)
+ dg2v * (2.0 * q0.q_t * dq0_t_u)
+ g2[i] * (2.0 * dq0_t_u * dq0_tv + 2.0 * q0.q_t * d2q0_t_uv);
let dq_tl_u = dg2_u * q0.q_t * q0.q_ls
+ g2[i] * (dq0_t_u * q0.q_ls + q0.q_t * dq0_ls_u)
+ dm_u * q0.q_tl
+ m[i] * dq0_tl_u;
let dq_tlv = dg2v * q0.q_t * q0.q_ls
+ g2[i] * (dq0_tv * q0.q_ls + q0.q_t * dq0_lsv)
+ dmv * q0.q_tl
+ m[i] * dq0_tlv;
let d2q_tl_uv = d2g2_uv * q0.q_t * q0.q_ls
+ dg2_u * (dq0_tv * q0.q_ls + q0.q_t * dq0_lsv)
+ dg2v * (dq0_t_u * q0.q_ls + q0.q_t * dq0_ls_u)
+ g2[i]
* (d2q0_t_uv * q0.q_ls
+ dq0_t_u * dq0_lsv
+ dq0_tv * dq0_ls_u
+ q0.q_t * d2q0_ls_uv)
+ d2m_uv * q0.q_tl
+ dm_u * dq0_tlv
+ dmv * dq0_tl_u
+ m[i] * d2q0_tl_uv;
let dq_ll_u = dg2_u * q0.q_ls * q0.q_ls
+ g2[i] * (2.0 * q0.q_ls * dq0_ls_u)
+ dm_u * q0.q_ll
+ m[i] * dq0_ll_u;
let dq_llv = dg2v * q0.q_ls * q0.q_ls
+ g2[i] * (2.0 * q0.q_ls * dq0_lsv)
+ dmv * q0.q_ll
+ m[i] * dq0_llv;
let d2q_ll_uv = d2g2_uv * q0.q_ls * q0.q_ls
+ dg2_u * (2.0 * q0.q_ls * dq0_lsv)
+ dg2v * (2.0 * q0.q_ls * dq0_ls_u)
+ g2[i] * (2.0 * dq0_ls_u * dq0_lsv + 2.0 * q0.q_ls * d2q0_ls_uv)
+ d2m_uv * q0.q_ll
+ dm_u * dq0_llv
+ dmv * dq0_ll_u
+ m[i] * d2q0_ll_uv;
let coeff_tt = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_t, q_t, q_tt, dq_t_u, dq_tv, dq_t_u,
dq_tv, d2q_t_uv, d2q_t_uv, dq_tt_u, dq_ttv, d2q_tt_uv,
);
let coeff_tl = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_t, q_ls, q_tl, dq_t_u, dq_tv, dq_ls_u,
dq_lsv, d2q_t_uv, d2q_ls_uv, dq_tl_u, dq_tlv, d2q_tl_uv,
);
let coeff_ll = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_ls, q_ls, q_ll, dq_ls_u, dq_lsv, dq_ls_u,
dq_lsv, d2q_ls_uv, d2q_ls_uv, dq_ll_u, dq_llv, d2q_ll_uv,
);
let xtr = x_t.row(i);
let xlsr = x_ls.row(i);
for a_idx in 0..pt {
for b_idx in a_idx..pt {
row_h[[a_idx, b_idx]] += coeff_tt * xtr[a_idx] * xtr[b_idx];
}
}
for a_idx in 0..pt {
for b_idx in 0..pls {
row_h[[a_idx, pt + b_idx]] += coeff_tl * xtr[a_idx] * xlsr[b_idx];
}
}
for a_idx in 0..pls {
for b_idx in a_idx..pls {
row_h[[pt + a_idx, pt + b_idx]] += coeff_ll * xlsr[a_idx] * xlsr[b_idx];
}
}
for j in 0..pw {
let qw = br[j];
let dqw_u = dr[j] * dq0_u;
let dqwv = dr[j] * dq0v;
let d2qw_uv = ddr[j] * dq0_u * dq0v + dr[j] * d2q0_uv;
let q_tw = dr[j] * q0.q_t;
let q_lw = dr[j] * q0.q_ls;
let dq_tw_u = ddr[j] * dq0_u * q0.q_t + dr[j] * dq0_t_u;
let dq_twv = ddr[j] * dq0v * q0.q_t + dr[j] * dq0_tv;
let d2q_tw_uv = d3r[j] * dq0_u * dq0v * q0.q_t
+ ddr[j] * (d2q0_uv * q0.q_t + dq0_u * dq0_tv + dq0v * dq0_t_u)
+ dr[j] * d2q0_t_uv;
let dq_lw_u = ddr[j] * dq0_u * q0.q_ls + dr[j] * dq0_ls_u;
let dq_lwv = ddr[j] * dq0v * q0.q_ls + dr[j] * dq0_lsv;
let d2q_lw_uv = d3r[j] * dq0_u * dq0v * q0.q_ls
+ ddr[j] * (d2q0_uv * q0.q_ls + dq0_u * dq0_lsv + dq0v * dq0_ls_u)
+ dr[j] * d2q0_ls_uv;
let coeff_tw = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_t, qw, q_tw, dq_t_u, dq_tv, dqw_u,
dqwv, d2q_t_uv, d2qw_uv, dq_tw_u, dq_twv, d2q_tw_uv,
);
let coeff_lw = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_ls, qw, q_lw, dq_ls_u, dq_lsv, dqw_u,
dqwv, d2q_ls_uv, d2qw_uv, dq_lw_u, dq_lwv, d2q_lw_uv,
);
for a_idx in 0..pt {
row_h[[a_idx, pt + pls + j]] += coeff_tw * xtr[a_idx];
}
for a_idx in 0..pls {
row_h[[pt + a_idx, pt + pls + j]] += coeff_lw * xlsr[a_idx];
}
}
for j in 0..pw {
let qwj = br[j];
let dqwj_u = dr[j] * dq0_u;
let dqwjv = dr[j] * dq0v;
let d2qwj_uv = ddr[j] * dq0_u * dq0v + dr[j] * d2q0_uv;
for k in j..pw {
let qwk = br[k];
let dqwk_u = dr[k] * dq0_u;
let dqwkv = dr[k] * dq0v;
let d2qwk_uv = ddr[k] * dq0_u * dq0v + dr[k] * d2q0_uv;
let coeffww = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, qwj, qwk, 0.0, dqwj_u, dqwjv,
dqwk_u, dqwkv, d2qwj_uv, d2qwk_uv, 0.0, 0.0, 0.0,
);
row_h[[pt + pls + j, pt + pls + k]] += coeffww;
}
}
Ok(row_h)
})
.try_reduce(
|| Array2::<f64>::zeros((total, total)),
|mut acc, row_h| {
acc += &row_h;
Ok(acc)
},
)?;
mirror_upper_to_lower(&mut d2_h);
Ok(Some(d2_h))
}
fn exact_newton_joint_hessian_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Array2<f64>>, String> {
let Some(shadow) = self.shadow_with_exact_joint_designs(specs)? else {
return Ok(None);
};
shadow.exact_newton_joint_hessian(block_states)
}
fn exact_newton_joint_hessian_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some(shadow) = self.shadow_with_exact_joint_designs(specs)? else {
return Ok(None);
};
shadow.exact_newton_joint_hessian_directional_derivative(block_states, d_beta_flat)
}
fn exact_newton_joint_hessian_second_directional_derivative_with_specs(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
d_beta_u_flat: &Array1<f64>,
d_betav_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
let Some(shadow) = self.shadow_with_exact_joint_designs(specs)? else {
return Ok(None);
};
shadow.exact_newton_joint_hessiansecond_directional_derivative(
block_states,
d_beta_u_flat,
d_betav_flat,
)
}
fn exact_newton_joint_psi_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiTerms>, String> {
self.exact_newton_joint_psi_terms_for_specs(
block_states,
specs,
derivative_blocks,
psi_index,
)
}
fn exact_newton_joint_psisecond_order_terms(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_i: usize,
psi_j: usize,
) -> Result<Option<crate::custom_family::ExactNewtonJointPsiSecondOrderTerms>, String> {
self.exact_newton_joint_psisecond_order_terms_for_specs(
block_states,
specs,
derivative_blocks,
psi_i,
psi_j,
)
}
fn exact_newton_joint_psihessian_directional_derivative(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
psi_index: usize,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.exact_newton_joint_psihessian_directional_derivative_for_specs(
block_states,
specs,
derivative_blocks,
psi_index,
d_beta_flat,
)
}
fn exact_newton_joint_psi_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
derivative_blocks: &[Vec<crate::custom_family::CustomFamilyBlockPsiDerivative>],
) -> Result<Option<Arc<dyn ExactNewtonJointPsiWorkspace>>, String> {
if !self.exact_joint_supported() {
return Ok(None);
}
Ok(Some(Arc::new(
BinomialLocationScaleWiggleExactNewtonJointPsiWorkspace::new(
self.clone(),
block_states.to_vec(),
specs,
derivative_blocks.to_vec(),
)?,
)))
}
fn block_geometry(
&self,
block_states: &[ParameterBlockState],
spec: &crate::custom_family::ParameterBlockSpec,
) -> Result<(DesignMatrix, Array1<f64>), String> {
if spec.name != "wiggle" {
return Ok((spec.design.clone(), spec.offset.clone()));
}
if block_states.len() < 2 {
return Err(GamlssError::UnsupportedConfiguration {
reason: "wiggle geometry requires threshold and log-sigma blocks".to_string(),
}
.into());
}
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
if eta_t.len() != self.y.len() || eta_ls.len() != self.y.len() {
return Err(GamlssError::DimensionMismatch {
reason: "wiggle geometry input size mismatch".to_string(),
}
.into());
}
let mut q0 = Array1::<f64>::zeros(eta_t.len());
for i in 0..q0.len() {
let sigma = exp_sigma_from_eta_scalar(eta_ls[i]);
q0[i] = binomial_location_scale_q0(eta_t[i], sigma);
}
let x = self.wiggle_design(q0.view())?;
if x.ncols() != spec.design.ncols() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"dynamic wiggle design col mismatch: got {}, expected {}",
x.ncols(),
spec.design.ncols()
),
}
.into());
}
let nrows = x.nrows();
Ok((
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(x)),
Array1::zeros(nrows),
))
}
fn block_geometry_is_dynamic(&self) -> bool {
true
}
fn exact_newton_joint_hessian_workspace(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
let workspace = BinomialLocationScaleWiggleHessianWorkspace::new(
self.clone(),
block_states.to_vec(),
x_t.into_owned(),
x_ls.into_owned(),
)?;
Ok(Some(Arc::new(workspace)))
}
fn exact_newton_joint_hessian_workspace_with_options(
&self,
block_states: &[ParameterBlockState],
specs: &[ParameterBlockSpec],
options: &BlockwiseFitOptions,
) -> Result<Option<Arc<dyn ExactNewtonJointHessianWorkspace>>, String> {
let Some((x_t, x_ls)) = self.exact_joint_dense_block_designs(Some(specs))? else {
return Ok(None);
};
let mut workspace = BinomialLocationScaleWiggleHessianWorkspace::new(
self.clone(),
block_states.to_vec(),
x_t.into_owned(),
x_ls.into_owned(),
)?;
if let Some(subsample) = options.outer_score_subsample.as_ref() {
workspace.apply_outer_subsample(subsample.rows.as_ref());
}
Ok(Some(Arc::new(workspace)))
}
fn outer_derivative_subsample_capable(&self) -> bool {
true
}
fn inner_coefficient_hessian_hvp_available(&self, specs: &[ParameterBlockSpec]) -> bool {
self.exact_joint_supported()
&& matches!(
self.exact_joint_dense_block_designs(Some(specs)),
Ok(Some(_))
)
}
}
impl BinomialLocationScaleWiggleFamily {
fn bls_wiggle_directional_operator(
&self,
block_states: &[ParameterBlockState],
x_t_arc: Arc<Array2<f64>>,
x_ls_arc: Arc<Array2<f64>>,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let pt = x_t_arc.ncols();
let pls = x_ls_arc.ncols();
let betaw0 = block_states[Self::BLOCK_WIGGLE].beta.clone();
let core0 = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let b0 = self.wiggle_design(core0.q0.view())?;
let pw = b0.ncols();
let beta_layout = GamlssBetaLayout::withwiggle(pt, pls, pw);
let total = beta_layout.total();
if d_beta_flat.len() != total {
return Err(GamlssError::InvalidInput {
reason: format!(
"BLS wiggle dH operator: d_beta length {} != {}",
d_beta_flat.len(),
total
),
}
.into());
}
let (u_t, u_ls, uw) =
beta_layout.split_three(d_beta_flat, "wiggle joint dH operator d_beta")?;
let d_eta_t = fast_av(x_t_arc.as_ref(), &u_t);
let d_eta_ls = fast_av(x_ls_arc.as_ref(), &u_ls);
let d0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::first_derivative())?;
let dd0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::second_derivative())?;
let d3q = self.wiggle_d3q_dq03(core0.q0.view(), betaw0.view())?;
if d0.ncols() != betaw0.len() || dd0.ncols() != betaw0.len() {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"wiggle derivative/beta mismatch in dH operator: B'={} B''={} betaw={}",
d0.ncols(),
dd0.ncols(),
betaw0.len()
),
}
.into());
}
let m = d0.dot(&betaw0) + 1.0;
let g2 = dd0.dot(&betaw0);
let g3 = d3q;
let (sigma, ..) = exp_sigma_derivs_up_to_third(eta_ls.view());
let BinomialWiggleDhRowCoeffs {
coeff_tt,
coeff_tl,
coeff_ll,
coeff_tw_b,
coeff_tw_d,
coeff_tw_dd,
coeff_lw_b,
coeff_lw_d,
coeff_lw_dd,
coeffww_bb,
coeffww_db,
} = self.binomial_wiggle_dh_row_coeffs(
n,
&BinomialWiggleDhRowInputs {
core0: &core0,
eta_t,
etaw,
sigma: &sigma,
m: &m,
g2: &g2,
g3: &g3,
b0: &b0,
d0: &d0,
dd0: &dd0,
uw: &uw,
d_eta_t: &d_eta_t,
d_eta_ls: &d_eta_ls,
},
);
let basis: Arc<Array2<f64>> = Arc::new(b0);
let basis_d1: Arc<Array2<f64>> = Arc::new(d0);
let basis_d2: Arc<Array2<f64>> = Arc::new(dd0);
Ok(Some(Arc::new(RowCoeffOperator::from_directions(
vec![pt, pls, pw],
vec![
(0, x_t_arc),
(1, x_ls_arc),
(2, basis),
(2, basis_d1),
(2, basis_d2),
],
vec![
(0, 0, coeff_tt),
(0, 1, coeff_tl),
(1, 1, coeff_ll),
(0, 2, coeff_tw_b),
(0, 3, coeff_tw_d),
(0, 4, coeff_tw_dd),
(1, 2, coeff_lw_b),
(1, 3, coeff_lw_d),
(1, 4, coeff_lw_dd),
(2, 2, coeffww_bb),
(2, 3, coeffww_db),
],
n,
))))
}
fn bls_wiggle_second_directional_operator(
&self,
block_states: &[ParameterBlockState],
x_t_arc: Arc<Array2<f64>>,
x_ls_arc: Arc<Array2<f64>>,
d_beta_u: &Array1<f64>,
d_beta_v: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let n = self.y.len();
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != n || eta_ls.len() != n || etaw.len() != n || self.weights.len() != n {
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily input size mismatch".to_string(),
}
.into());
}
let pt = x_t_arc.ncols();
let pls = x_ls_arc.ncols();
let betaw0 = block_states[Self::BLOCK_WIGGLE].beta.clone();
let core0 = binomial_location_scale_core(
&self.y,
&self.weights,
eta_t,
eta_ls,
Some(etaw),
&self.link_kind,
)?;
let b0 = self.wiggle_design(core0.q0.view())?;
let d0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::first_derivative())?;
let dd0 =
self.wiggle_basiswith_options(core0.q0.view(), BasisOptions::second_derivative())?;
let d3_basis = self.wiggle_d3basis_constrained(core0.q0.view())?;
let d3q = self.wiggle_d3q_dq03(core0.q0.view(), betaw0.view())?;
let d4q = self.wiggle_d4q_dq04(core0.q0.view(), betaw0.view())?;
let pw = b0.ncols();
let beta_layout = GamlssBetaLayout::withwiggle(pt, pls, pw);
let total = beta_layout.total();
if d_beta_u.len() != total || d_beta_v.len() != total {
return Err(GamlssError::InvalidInput {
reason: format!(
"BLS wiggle d2H operator: d_beta_{{u,v}} length {}/{} != {}",
d_beta_u.len(),
d_beta_v.len(),
total
),
}
.into());
}
if d0.ncols() != betaw0.len()
|| dd0.ncols() != betaw0.len()
|| d3_basis.ncols() != betaw0.len()
{
return Err(GamlssError::DimensionMismatch { reason: format!(
"wiggle derivative/beta mismatch in d2H operator: B'={} B''={} B'''={} betaw={}",
d0.ncols(),
dd0.ncols(),
d3_basis.ncols(),
betaw0.len()
) }.into());
}
let (u_t, u_ls, uw) = beta_layout.split_three(d_beta_u, "wiggle d2H op u")?;
let (v_t, v_ls, vw) = beta_layout.split_three(d_beta_v, "wiggle d2H op v")?;
let d_eta_t_u = fast_av(x_t_arc.as_ref(), &u_t);
let d_eta_ls_u = fast_av(x_ls_arc.as_ref(), &u_ls);
let d_eta_t_v = fast_av(x_t_arc.as_ref(), &v_t);
let d_eta_ls_v = fast_av(x_ls_arc.as_ref(), &v_ls);
let m = d0.dot(&betaw0) + 1.0;
let g2 = dd0.dot(&betaw0);
let g3 = d3q;
let g4 = d4q;
let (sigma, ds, d2s, d3s, d4s) = exp_sigma_derivs_up_to_fourth_array(eta_ls.view());
let mut coeff_tt = Array1::<f64>::zeros(n);
let mut coeff_tl = Array1::<f64>::zeros(n);
let mut coeff_ll = Array1::<f64>::zeros(n);
let mut alpha_tw_b = Array1::<f64>::zeros(n);
let mut alpha_tw_d = Array1::<f64>::zeros(n);
let mut alpha_tw_dd = Array1::<f64>::zeros(n);
let mut alpha_tw_d3 = Array1::<f64>::zeros(n);
let mut alpha_lw_b = Array1::<f64>::zeros(n);
let mut alpha_lw_d = Array1::<f64>::zeros(n);
let mut alpha_lw_dd = Array1::<f64>::zeros(n);
let mut alpha_lw_d3 = Array1::<f64>::zeros(n);
let mut c_ww_bb = Array1::<f64>::zeros(n);
let mut c_ww_bd = Array1::<f64>::zeros(n);
let mut c_ww_bdd = Array1::<f64>::zeros(n);
let mut c_ww_dd_pair = Array1::<f64>::zeros(n);
for i in 0..n {
let q_i = core0.q0[i] + etaw[i];
let (m1, m2, m3) = binomial_neglog_q_derivatives_dispatch(
self.y[i],
self.weights[i],
q_i,
core0.mu[i],
core0.dmu_dq[i],
core0.d2mu_dq2[i],
core0.d3mu_dq3[i],
&self.link_kind,
);
let m4 = binomial_neglog_q_fourth_derivative_dispatch(
self.y[i],
self.weights[i],
q_i,
core0.mu[i],
core0.dmu_dq[i],
core0.d2mu_dq2[i],
core0.d3mu_dq3[i],
&self.link_kind,
)?;
let q0_d = nonwiggle_q_derivs(eta_t[i], sigma[i]);
let s_safe = sigma[i];
let s2 = s_safe * s_safe;
let s3 = s2 * s_safe;
let s4 = s3 * s_safe;
let s5 = s4 * s_safe;
let q0_tl_ls_ls =
d3s[i] / s2 - 6.0 * ds[i] * d2s[i] / s3 + 6.0 * ds[i] * ds[i] * ds[i] / s4;
let q0_tl_ls_ls_ls =
d4s[i] / s2 - 8.0 * ds[i] * d3s[i] / s3 - 6.0 * d2s[i] * d2s[i] / s3
+ 36.0 * ds[i] * ds[i] * d2s[i] / s4
- 24.0 * ds[i] * ds[i] * ds[i] * ds[i] / s5;
let q0_ll_ls_ls = eta_t[i] * q0_tl_ls_ls_ls;
let u_t_i = d_eta_t_u[i];
let u_ls_i = d_eta_ls_u[i];
let v_t_i = d_eta_t_v[i];
let v_ls_i = d_eta_ls_v[i];
let dq0_u = q0_d.q_t * u_t_i + q0_d.q_ls * u_ls_i;
let dq0v = q0_d.q_t * v_t_i + q0_d.q_ls * v_ls_i;
let d2q0_uv =
q0_d.q_tl * (u_t_i * v_ls_i + v_t_i * u_ls_i) + q0_d.q_ll * u_ls_i * v_ls_i;
let dq0_t_u = q0_d.q_tl * u_ls_i;
let dq0_tv = q0_d.q_tl * v_ls_i;
let dq0_ls_u = q0_d.q_tl * u_t_i + q0_d.q_ll * u_ls_i;
let dq0_lsv = q0_d.q_tl * v_t_i + q0_d.q_ll * v_ls_i;
let dq0_tl_u = q0_d.q_tl_ls * u_ls_i;
let dq0_tlv = q0_d.q_tl_ls * v_ls_i;
let dq0_ll_u = q0_d.q_tl_ls * u_t_i + q0_d.q_ll_ls * u_ls_i;
let dq0_llv = q0_d.q_tl_ls * v_t_i + q0_d.q_ll_ls * v_ls_i;
let d2q0_t_uv = q0_d.q_tl_ls * u_ls_i * v_ls_i;
let d2q0_ls_uv =
q0_d.q_tl_ls * (u_ls_i * v_t_i + v_ls_i * u_t_i) + q0_d.q_ll_ls * u_ls_i * v_ls_i;
let d2q0_tl_uv = q0_tl_ls_ls * u_ls_i * v_ls_i;
let d2q0_ll_uv =
q0_tl_ls_ls * (u_t_i * v_ls_i + v_t_i * u_ls_i) + q0_ll_ls_ls * u_ls_i * v_ls_i;
let br = b0.row(i);
let dr = d0.row(i);
let ddr = dd0.row(i);
let d3r = d3_basis.row(i);
let b_u = br.dot(&uw);
let bv = br.dot(&vw);
let b1_u = dr.dot(&uw);
let b1v = dr.dot(&vw);
let b2_u = ddr.dot(&uw);
let b2v = ddr.dot(&vw);
let b3_u = d3r.dot(&uw);
let b3v = d3r.dot(&vw);
let dm_u = b1_u + g2[i] * dq0_u;
let dmv = b1v + g2[i] * dq0v;
let d2m_uv = g3[i] * dq0_u * dq0v + g2[i] * d2q0_uv + b2v * dq0_u + b2_u * dq0v;
let dg2_u = b2_u + g3[i] * dq0_u;
let dg2v = b2v + g3[i] * dq0v;
let d2g2_uv = g4[i] * dq0_u * dq0v + g3[i] * d2q0_uv + b3v * dq0_u + b3_u * dq0v;
let dq_u = m[i] * dq0_u + b_u;
let dqv = m[i] * dq0v + bv;
let d2q_uv = m[i] * d2q0_uv + g2[i] * dq0_u * dq0v + b1_u * dq0v + b1v * dq0_u;
let q_t = m[i] * q0_d.q_t;
let q_ls = m[i] * q0_d.q_ls;
let q_tt = g2[i] * q0_d.q_t * q0_d.q_t;
let q_tl = g2[i] * q0_d.q_t * q0_d.q_ls + m[i] * q0_d.q_tl;
let q_ll = g2[i] * q0_d.q_ls * q0_d.q_ls + m[i] * q0_d.q_ll;
let dq_t_u = dm_u * q0_d.q_t + m[i] * dq0_t_u;
let dq_tv = dmv * q0_d.q_t + m[i] * dq0_tv;
let dq_ls_u = dm_u * q0_d.q_ls + m[i] * dq0_ls_u;
let dq_lsv = dmv * q0_d.q_ls + m[i] * dq0_lsv;
let d2q_t_uv = d2m_uv * q0_d.q_t + dm_u * dq0_tv + dmv * dq0_t_u + m[i] * d2q0_t_uv;
let d2q_ls_uv =
d2m_uv * q0_d.q_ls + dm_u * dq0_lsv + dmv * dq0_ls_u + m[i] * d2q0_ls_uv;
let dq_tt_u = dg2_u * q0_d.q_t * q0_d.q_t + g2[i] * (2.0 * q0_d.q_t * dq0_t_u);
let dq_ttv = dg2v * q0_d.q_t * q0_d.q_t + g2[i] * (2.0 * q0_d.q_t * dq0_tv);
let d2q_tt_uv = d2g2_uv * q0_d.q_t * q0_d.q_t
+ dg2_u * (2.0 * q0_d.q_t * dq0_tv)
+ dg2v * (2.0 * q0_d.q_t * dq0_t_u)
+ g2[i] * (2.0 * dq0_t_u * dq0_tv + 2.0 * q0_d.q_t * d2q0_t_uv);
let dq_tl_u = dg2_u * q0_d.q_t * q0_d.q_ls
+ g2[i] * (dq0_t_u * q0_d.q_ls + q0_d.q_t * dq0_ls_u)
+ dm_u * q0_d.q_tl
+ m[i] * dq0_tl_u;
let dq_tlv = dg2v * q0_d.q_t * q0_d.q_ls
+ g2[i] * (dq0_tv * q0_d.q_ls + q0_d.q_t * dq0_lsv)
+ dmv * q0_d.q_tl
+ m[i] * dq0_tlv;
let d2q_tl_uv = d2g2_uv * q0_d.q_t * q0_d.q_ls
+ dg2_u * (dq0_tv * q0_d.q_ls + q0_d.q_t * dq0_lsv)
+ dg2v * (dq0_t_u * q0_d.q_ls + q0_d.q_t * dq0_ls_u)
+ g2[i]
* (d2q0_t_uv * q0_d.q_ls
+ dq0_t_u * dq0_lsv
+ dq0_tv * dq0_ls_u
+ q0_d.q_t * d2q0_ls_uv)
+ d2m_uv * q0_d.q_tl
+ dm_u * dq0_tlv
+ dmv * dq0_tl_u
+ m[i] * d2q0_tl_uv;
let dq_ll_u = dg2_u * q0_d.q_ls * q0_d.q_ls
+ g2[i] * (2.0 * q0_d.q_ls * dq0_ls_u)
+ dm_u * q0_d.q_ll
+ m[i] * dq0_ll_u;
let dq_llv = dg2v * q0_d.q_ls * q0_d.q_ls
+ g2[i] * (2.0 * q0_d.q_ls * dq0_lsv)
+ dmv * q0_d.q_ll
+ m[i] * dq0_llv;
let d2q_ll_uv = d2g2_uv * q0_d.q_ls * q0_d.q_ls
+ dg2_u * (2.0 * q0_d.q_ls * dq0_lsv)
+ dg2v * (2.0 * q0_d.q_ls * dq0_ls_u)
+ g2[i] * (2.0 * dq0_ls_u * dq0_lsv + 2.0 * q0_d.q_ls * d2q0_ls_uv)
+ d2m_uv * q0_d.q_ll
+ dm_u * dq0_llv
+ dmv * dq0_ll_u
+ m[i] * d2q0_ll_uv;
coeff_tt[i] = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_t, q_t, q_tt, dq_t_u, dq_tv, dq_t_u, dq_tv,
d2q_t_uv, d2q_t_uv, dq_tt_u, dq_ttv, d2q_tt_uv,
);
coeff_tl[i] = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_t, q_ls, q_tl, dq_t_u, dq_tv, dq_ls_u, dq_lsv,
d2q_t_uv, d2q_ls_uv, dq_tl_u, dq_tlv, d2q_tl_uv,
);
coeff_ll[i] = second_directionalhessian_coeff_fromobjective_q_terms(
m1, m2, m3, m4, dq_u, dqv, d2q_uv, q_ls, q_ls, q_ll, dq_ls_u, dq_lsv, dq_ls_u,
dq_lsv, d2q_ls_uv, d2q_ls_uv, dq_ll_u, dq_llv, d2q_ll_uv,
);
alpha_tw_b[i] = m4 * dq_u * dqv * q_t
+ m3 * (d2q_uv * q_t + dq_u * dq_tv + dqv * dq_t_u)
+ m2 * d2q_t_uv;
alpha_tw_d[i] = m3 * (dq_u * q_t * dq0v + dqv * q_t * dq0_u + dq_u * dqv * q0_d.q_t)
+ m2 * (dq_t_u * dq0v
+ dq_tv * dq0_u
+ q_t * d2q0_uv
+ d2q_uv * q0_d.q_t
+ dq_u * dq0_tv
+ dqv * dq0_t_u)
+ m1 * d2q0_t_uv;
alpha_tw_dd[i] = m2
* (q_t * dq0_u * dq0v + dq_u * dq0v * q0_d.q_t + dqv * dq0_u * q0_d.q_t)
+ m1 * (d2q0_uv * q0_d.q_t + dq0_u * dq0_tv + dq0v * dq0_t_u);
alpha_tw_d3[i] = m1 * dq0_u * dq0v * q0_d.q_t;
alpha_lw_b[i] = m4 * dq_u * dqv * q_ls
+ m3 * (d2q_uv * q_ls + dq_u * dq_lsv + dqv * dq_ls_u)
+ m2 * d2q_ls_uv;
alpha_lw_d[i] = m3 * (dq_u * q_ls * dq0v + dqv * q_ls * dq0_u + dq_u * dqv * q0_d.q_ls)
+ m2 * (dq_ls_u * dq0v
+ dq_lsv * dq0_u
+ q_ls * d2q0_uv
+ d2q_uv * q0_d.q_ls
+ dq_u * dq0_lsv
+ dqv * dq0_ls_u)
+ m1 * d2q0_ls_uv;
alpha_lw_dd[i] = m2
* (q_ls * dq0_u * dq0v + dq_u * dq0v * q0_d.q_ls + dqv * dq0_u * q0_d.q_ls)
+ m1 * (d2q0_uv * q0_d.q_ls + dq0_u * dq0_lsv + dq0v * dq0_ls_u);
alpha_lw_d3[i] = m1 * dq0_u * dq0v * q0_d.q_ls;
c_ww_bb[i] = m4 * dq_u * dqv + m3 * d2q_uv;
c_ww_bd[i] = m3 * (dq_u * dq0v + dqv * dq0_u) + m2 * d2q0_uv;
c_ww_bdd[i] = m2 * dq0_u * dq0v;
c_ww_dd_pair[i] = 2.0 * m2 * dq0_u * dq0v;
}
let basis: Arc<Array2<f64>> = Arc::new(b0);
let basis_d1: Arc<Array2<f64>> = Arc::new(d0);
let basis_d2: Arc<Array2<f64>> = Arc::new(dd0);
let basis_d3: Arc<Array2<f64>> = Arc::new(d3_basis);
Ok(Some(Arc::new(RowCoeffOperator::from_directions(
vec![pt, pls, pw],
vec![
(0, x_t_arc),
(1, x_ls_arc),
(2, basis),
(2, basis_d1),
(2, basis_d2),
(2, basis_d3),
],
vec![
(0, 0, coeff_tt),
(0, 1, coeff_tl),
(1, 1, coeff_ll),
(0, 2, alpha_tw_b),
(0, 3, alpha_tw_d),
(0, 4, alpha_tw_dd),
(0, 5, alpha_tw_d3),
(1, 2, alpha_lw_b),
(1, 3, alpha_lw_d),
(1, 4, alpha_lw_dd),
(1, 5, alpha_lw_d3),
(2, 2, c_ww_bb),
(2, 3, c_ww_bd),
(2, 4, c_ww_bdd),
(3, 3, c_ww_dd_pair),
],
n,
))))
}
}
struct BinomialLocationScaleWiggleHessianWorkspace {
family: BinomialLocationScaleWiggleFamily,
block_states: Vec<ParameterBlockState>,
x_t: Arc<Array2<f64>>,
x_ls: Arc<Array2<f64>>,
pieces: BinomialLocationScaleWiggleHessianRowPieces,
}
impl BinomialLocationScaleWiggleHessianWorkspace {
fn new(
family: BinomialLocationScaleWiggleFamily,
block_states: Vec<ParameterBlockState>,
x_t: Array2<f64>,
x_ls: Array2<f64>,
) -> Result<Self, String> {
let pieces = family.wiggle_hessian_row_pieces(&block_states)?;
Ok(Self {
family,
block_states,
x_t: Arc::new(x_t),
x_ls: Arc::new(x_ls),
pieces,
})
}
fn apply_outer_subsample(
&mut self,
rows: &[crate::families::marginal_slope_shared::WeightedOuterRow],
) {
let n = self.pieces.coeff_tt.len();
let mut mask_tt = Array1::<f64>::zeros(n);
let mut mask_tl = Array1::<f64>::zeros(n);
let mut mask_ll = Array1::<f64>::zeros(n);
let mut mask_tw_b = Array1::<f64>::zeros(n);
let mut mask_tw_d = Array1::<f64>::zeros(n);
let mut mask_lw_b = Array1::<f64>::zeros(n);
let mut mask_lw_d = Array1::<f64>::zeros(n);
let mut maskww = Array1::<f64>::zeros(n);
for r in rows {
let i = r.index;
let w = r.weight;
mask_tt[i] = self.pieces.coeff_tt[i] * w;
mask_tl[i] = self.pieces.coeff_tl[i] * w;
mask_ll[i] = self.pieces.coeff_ll[i] * w;
mask_tw_b[i] = self.pieces.coeff_tw_b[i] * w;
mask_tw_d[i] = self.pieces.coeff_tw_d[i] * w;
mask_lw_b[i] = self.pieces.coeff_lw_b[i] * w;
mask_lw_d[i] = self.pieces.coeff_lw_d[i] * w;
maskww[i] = self.pieces.coeffww[i] * w;
}
self.pieces.coeff_tt = mask_tt;
self.pieces.coeff_tl = mask_tl;
self.pieces.coeff_ll = mask_ll;
self.pieces.coeff_tw_b = mask_tw_b;
self.pieces.coeff_tw_d = mask_tw_d;
self.pieces.coeff_lw_b = mask_lw_b;
self.pieces.coeff_lw_d = mask_lw_d;
self.pieces.coeffww = maskww;
}
}
impl ExactNewtonJointHessianWorkspace for BinomialLocationScaleWiggleHessianWorkspace {
fn hessian_dense(&self) -> Result<Option<Array2<f64>>, String> {
let dense = self
.pieces
.assemble_dense(self.x_t.as_ref(), self.x_ls.as_ref())?;
Ok(Some(dense))
}
fn hessian_matvec_available(&self) -> bool {
true
}
fn hessian_matvec(&self, v: &Array1<f64>) -> Result<Option<Array1<f64>>, String> {
let pt = self.x_t.ncols();
let pls = self.x_ls.ncols();
let pw = self.pieces.b0.ncols();
let total = pt + pls + pw;
if v.len() != total {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggle matvec dimension mismatch: got {}, expected {}",
v.len(),
total
),
}
.into());
}
let v_t = v.slice(s![0..pt]);
let v_ls = v.slice(s![pt..pt + pls]);
let v_w = v.slice(s![pt + pls..total]);
let u_t = self.x_t.dot(&v_t);
let u_ls = self.x_ls.dot(&v_ls);
let u_b = self.pieces.b0.dot(&v_w);
let u_d = self.pieces.d0.dot(&v_w);
let r_t = &self.pieces.coeff_tt * &u_t
+ &self.pieces.coeff_tl * &u_ls
+ &self.pieces.coeff_tw_b * &u_b
+ &self.pieces.coeff_tw_d * &u_d;
let r_ls = &self.pieces.coeff_tl * &u_t
+ &self.pieces.coeff_ll * &u_ls
+ &self.pieces.coeff_lw_b * &u_b
+ &self.pieces.coeff_lw_d * &u_d;
let r_b = &self.pieces.coeff_tw_b * &u_t
+ &self.pieces.coeff_lw_b * &u_ls
+ &self.pieces.coeffww * &u_b;
let r_d = &self.pieces.coeff_tw_d * &u_t + &self.pieces.coeff_lw_d * &u_ls;
let out_t = fast_atv(self.x_t.as_ref(), &r_t);
let out_ls = fast_atv(self.x_ls.as_ref(), &r_ls);
let out_w = fast_atv(&self.pieces.b0, &r_b) + &fast_atv(&self.pieces.d0, &r_d);
let mut out = Array1::<f64>::zeros(total);
out.slice_mut(s![0..pt]).assign(&out_t);
out.slice_mut(s![pt..pt + pls]).assign(&out_ls);
out.slice_mut(s![pt + pls..total]).assign(&out_w);
Ok(Some(out))
}
fn hessian_diagonal(&self) -> Result<Option<Array1<f64>>, String> {
let pt = self.x_t.ncols();
let pls = self.x_ls.ncols();
let pw = self.pieces.b0.ncols();
let total = pt + pls + pw;
let mut diag = Array1::<f64>::zeros(total);
let n = self.pieces.coeff_tt.len();
for j in 0..pt {
let col = self.x_t.column(j);
let mut acc = 0.0;
for i in 0..n {
let v = col[i];
acc += self.pieces.coeff_tt[i] * v * v;
}
diag[j] = acc;
}
for j in 0..pls {
let col = self.x_ls.column(j);
let mut acc = 0.0;
for i in 0..n {
let v = col[i];
acc += self.pieces.coeff_ll[i] * v * v;
}
diag[pt + j] = acc;
}
for j in 0..pw {
let col = self.pieces.b0.column(j);
let mut acc = 0.0;
for i in 0..n {
let v = col[i];
acc += self.pieces.coeffww[i] * v * v;
}
diag[pt + pls + j] = acc;
}
Ok(Some(diag))
}
fn directional_derivative(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.family
.exact_newton_joint_hessian_directional_derivative(&self.block_states, d_beta_flat)
}
fn directional_derivative_operator(
&self,
d_beta_flat: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
self.family.bls_wiggle_directional_operator(
&self.block_states,
self.x_t.clone(),
self.x_ls.clone(),
d_beta_flat,
)
}
fn second_directional_derivative(
&self,
d_beta_u_flat: &Array1<f64>,
d_beta_v_flat: &Array1<f64>,
) -> Result<Option<Array2<f64>>, String> {
self.family
.exact_newton_joint_hessiansecond_directional_derivative(
&self.block_states,
d_beta_u_flat,
d_beta_v_flat,
)
}
fn second_directional_derivative_operator(
&self,
d_beta_u: &Array1<f64>,
d_beta_v: &Array1<f64>,
) -> Result<Option<Arc<dyn crate::solver::estimate::reml::unified::HyperOperator>>, String>
{
self.family.bls_wiggle_second_directional_operator(
&self.block_states,
self.x_t.clone(),
self.x_ls.clone(),
d_beta_u,
d_beta_v,
)
}
}
impl CustomFamilyGenerative for BinomialLocationScaleWiggleFamily {
fn generativespec(
&self,
block_states: &[ParameterBlockState],
) -> Result<GenerativeSpec, String> {
if block_states.len() != 3 {
return Err(GamlssError::DimensionMismatch {
reason: format!(
"BinomialLocationScaleWiggleFamily expects 3 blocks, got {}",
block_states.len()
),
}
.into());
}
let eta_t = &block_states[Self::BLOCK_T].eta;
let eta_ls = &block_states[Self::BLOCK_LOG_SIGMA].eta;
let etaw = &block_states[Self::BLOCK_WIGGLE].eta;
if eta_t.len() != self.y.len() || eta_ls.len() != self.y.len() || etaw.len() != self.y.len()
{
return Err(GamlssError::DimensionMismatch {
reason: "BinomialLocationScaleWiggleFamily generative size mismatch".to_string(),
}
.into());
}
let mean = gamlss_rowwise_map_result(self.y.len(), |i| {
let sigma = exp_sigma_from_eta_scalar(eta_ls[i]);
let q0 = binomial_location_scale_q0(eta_t[i], sigma);
let jet = inverse_link_jet_for_inverse_link(&self.link_kind, q0 + etaw[i])
.map_err(|e| format!("location-scale inverse-link evaluation failed: {e}"))?;
Ok(jet.mu)
})?;
Ok(GenerativeSpec {
mean,
noise: NoiseModel::Bernoulli,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::binomial_q_derivs::{
binomial_neglog_q_derivatives_cloglog_closed_form,
binomial_neglog_q_derivatives_logit_closed_form,
binomial_neglog_q_derivatives_probit_closed_form,
binomial_neglog_q_fourth_derivative_cloglog_closed_form,
binomial_neglog_q_fourth_derivative_logit_closed_form,
binomial_neglog_q_fourth_derivative_probit_closed_form,
};
use crate::basis::{
CenterStrategy, Dense, KnotSource, MaternBasisSpec, MaternIdentifiability, MaternNu,
create_basis,
};
use crate::families::wiggle::{
initializewiggle_knots_from_seed, monotone_wiggle_internal_degree,
split_wiggle_penalty_orders,
};
use crate::smooth::{ShapeConstraint, SmoothBasisSpec, SmoothTermSpec};
use crate::test_support::{binomial_location_scale_base_fixture, no_densify_design};
use ndarray::{Array2, Axis, array};
use num_dual::{
DualNum, second_derivative, second_partial_derivative, third_partial_derivative_vec,
};
fn intercept_block(n: usize) -> ParameterBlockInput {
ParameterBlockInput {
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::from_elem(
(n, 1),
1.0,
))),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: vec![],
initial_log_lambdas: None,
initial_beta: None,
}
}
fn compose_theta_from_hints_test(
mean_penalty_count: usize,
noise_penalty_count: usize,
mean_log_lambda_hint: &Option<Array1<f64>>,
noise_log_lambda_hint: &Option<Array1<f64>>,
extra_rho0: &Array1<f64>,
) -> Array1<f64> {
let layout = GamlssLambdaLayout::withwiggle(
mean_penalty_count,
noise_penalty_count,
extra_rho0.len(),
);
let mut theta = Array1::<f64>::zeros(layout.total());
if let Some(v) = mean_log_lambda_hint
&& v.len() == layout.k_mean
{
theta.slice_mut(s![0..layout.mean_end()]).assign(v);
}
if let Some(v) = noise_log_lambda_hint
&& v.len() == layout.k_noise
{
theta
.slice_mut(s![layout.noise_start()..layout.noise_end()])
.assign(v);
}
if layout.kwiggle > 0 {
theta
.slice_mut(s![layout.wiggle_start()..layout.wiggle_end()])
.assign(extra_rho0);
}
theta
}
#[test]
fn monotone_wiggle_post_update_validator_rejects_hidden_projection() {
validate_monotone_wiggle_beta_nonnegative(
&array![0.0, 1.0e-13, 2.0],
"monotone wiggle validator test",
)
.expect("feasible nonnegative wiggle beta should validate");
let err = validate_monotone_wiggle_beta_nonnegative(
&array![0.0, -1.0e-3, 2.0],
"monotone wiggle validator test",
)
.expect_err("negative wiggle beta must be rejected instead of projected");
assert!(
err.contains("monotone wiggle coefficients must be non-negative"),
"unexpected error: {err}"
);
}
#[test]
fn logb_dlog_sigma_deta_preserves_negative_tail_precision() {
let eta = -703.4873664863218;
let SigmaJet1 { sigma, d1 } = logb_sigma_jet1_scalar(eta);
assert_eq!(
1.0 - LOGB_SIGMA_FLOOR / sigma,
0.0,
"the algebraically equivalent complement form must cancel at this eta"
);
assert!(
logb_dlog_sigma_deta(sigma, d1) > 0.0,
"d_sigma_deta / sigma must preserve the remaining tail derivative"
);
assert_eq!(logb_dlog_sigma_deta(f64::INFINITY, f64::INFINITY), 1.0);
}
fn logistic_numdual<D: DualNum<f64> + Copy>(x: D) -> D {
D::one() / (D::one() + (-x).exp())
}
fn bspline_basis_scalar_numdual<D: DualNum<f64> + Copy>(
x: D,
knots: &Array1<f64>,
degree: usize,
) -> Vec<D> {
let n_basis = knots.len() - degree - 1;
let x_real = x.re();
let mut basis = vec![D::zero(); n_basis];
let last_knot = knots[knots.len() - 1];
for j in 0..n_basis {
let left = knots[j];
let right = knots[j + 1];
let active = if x_real == last_knot {
j + 1 == n_basis
} else {
left <= x_real && x_real < right
};
if active {
basis[j] = D::one();
}
}
for k in 1..=degree {
let mut next = vec![D::zero(); n_basis];
for j in 0..n_basis {
let mut acc = D::zero();
let left_denom = knots[j + k] - knots[j];
if left_denom > 0.0 {
acc += ((x - D::from(knots[j])) / D::from(left_denom)) * basis[j];
}
if j + 1 < n_basis {
let right_denom = knots[j + k + 1] - knots[j + 1];
if right_denom > 0.0 {
acc +=
((D::from(knots[j + k + 1]) - x) / D::from(right_denom)) * basis[j + 1];
}
}
next[j] = acc;
}
basis = next;
}
basis
}
fn monotone_wiggle_basis_scalar_numdual<D: DualNum<f64> + Copy>(
x: D,
knots: &Array1<f64>,
degree: usize,
) -> Array1<D> {
let bs_degree =
monotone_wiggle_internal_degree(degree).expect("monotone wiggle degree") + 1;
let left = knots[bs_degree];
let full = bspline_basis_scalar_numdual(x, knots, bs_degree);
let left_full = bspline_basis_scalar_numdual(D::from(left), knots, bs_degree);
let mut out = Array1::<D>::from_elem(full.len().saturating_sub(1), D::zero());
let mut running = D::zero();
let mut left_running = D::zero();
for j in (1..full.len()).rev() {
running += full[j];
left_running += left_full[j];
out[j - 1] = running - left_running;
}
out
}
fn wiggle_negloglik_threshold_numdual<D: DualNum<f64> + Copy>(
beta_t: D,
beta_ls: f64,
betaw: &Array1<f64>,
y: &Array1<f64>,
weights: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
) -> D {
let sigma = D::from(beta_ls).exp();
let q0 = -beta_t / sigma;
let basis = monotone_wiggle_basis_scalar_numdual(q0, knots, degree);
let mut etaw = D::zero();
for j in 0..betaw.len() {
etaw += basis[j] * D::from(betaw[j]);
}
let q = q0 + etaw;
let mu = logistic_numdual(q);
let one_minusmu = D::one() - mu;
let mut out = D::zero();
for i in 0..y.len() {
out -= D::from(weights[i])
* (D::from(y[i]) * mu.ln() + D::from(1.0 - y[i]) * one_minusmu.ln());
}
out
}
fn gaussian_negloglik_log_sigma_psi_numdual<D: DualNum<f64> + Copy>(
beta_mu: D,
beta_ls: D,
psi: D,
y: &Array1<f64>,
weights: &Array1<f64>,
x_mu0: &Array1<f64>,
x_ls0: &Array1<f64>,
x_ls_psi: &Array1<f64>,
x_ls_psi_psi: &Array1<f64>,
) -> D {
let half = D::from(0.5);
let mut out = D::zero();
for i in 0..y.len() {
let eta_mu = D::from(x_mu0[i]) * beta_mu;
let x_ls = D::from(x_ls0[i])
+ psi * D::from(x_ls_psi[i])
+ half * psi * psi * D::from(x_ls_psi_psi[i]);
let eta_ls = x_ls * beta_ls;
let sigma = D::from(LOGB_SIGMA_FLOOR) + eta_ls.exp();
let resid = D::from(y[i]) - eta_mu;
out += D::from(weights[i]) * (half * (resid / sigma).powi(2) + sigma.ln());
}
out
}
fn gaussian_negloglik_log_sigma_psi_only_numdual<D: DualNum<f64> + Copy>(
psi: D,
beta_mu: f64,
beta_ls: f64,
y: &Array1<f64>,
weights: &Array1<f64>,
x_mu0: &Array1<f64>,
x_ls0: &Array1<f64>,
x_ls_psi: &Array1<f64>,
x_ls_psi_psi: &Array1<f64>,
) -> D {
gaussian_negloglik_log_sigma_psi_numdual(
D::from(beta_mu),
D::from(beta_ls),
psi,
y,
weights,
x_mu0,
x_ls0,
x_ls_psi,
x_ls_psi_psi,
)
}
fn gaussian_negloglik_log_sigma_mu_psi_numdual<D: DualNum<f64> + Copy>(
beta_mu: D,
psi: D,
beta_ls: f64,
y: &Array1<f64>,
weights: &Array1<f64>,
x_mu0: &Array1<f64>,
x_ls0: &Array1<f64>,
x_ls_psi: &Array1<f64>,
x_ls_psi_psi: &Array1<f64>,
) -> D {
gaussian_negloglik_log_sigma_psi_numdual(
beta_mu,
D::from(beta_ls),
psi,
y,
weights,
x_mu0,
x_ls0,
x_ls_psi,
x_ls_psi_psi,
)
}
fn gaussian_negloglik_log_sigma_ls_psi_numdual<D: DualNum<f64> + Copy>(
beta_ls: D,
psi: D,
beta_mu: f64,
y: &Array1<f64>,
weights: &Array1<f64>,
x_mu0: &Array1<f64>,
x_ls0: &Array1<f64>,
x_ls_psi: &Array1<f64>,
x_ls_psi_psi: &Array1<f64>,
) -> D {
gaussian_negloglik_log_sigma_psi_numdual(
D::from(beta_mu),
beta_ls,
psi,
y,
weights,
x_mu0,
x_ls0,
x_ls_psi,
x_ls_psi_psi,
)
}
fn gaussian_negloglik_log_sigma_beta_vec_numdual<D: DualNum<f64> + Copy>(
v: &[D],
y: &Array1<f64>,
weights: &Array1<f64>,
x_mu0: &Array1<f64>,
x_ls0: &Array1<f64>,
x_ls_psi: &Array1<f64>,
x_ls_psi_psi: &Array1<f64>,
) -> D {
gaussian_negloglik_log_sigma_psi_numdual(
v[0],
v[1],
v[2],
y,
weights,
x_mu0,
x_ls0,
x_ls_psi,
x_ls_psi_psi,
)
}
fn gaussian_psi_test_spec(name: &str, design: Array2<f64>) -> ParameterBlockSpec {
let n = design.nrows();
ParameterBlockSpec {
name: name.to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(design)),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}
}
#[test]
fn gaussian_joint_psi_firstweights_score_ls_carries_logb_chain_rule_factor() {
let y = array![1.1];
let etamu = array![0.3];
let eta_ls = array![-0.2];
let weights = array![2.5];
let rows =
gaussian_jointrow_scalars(&y, &etamu, &eta_ls, &weights).expect("gaussian row scalars");
let firstweights = gaussian_joint_psi_firstweights(&rows, &array![0.0], &array![1.0]);
let sigma = crate::families::sigma_link::logb_sigma_from_eta_scalar(eta_ls[0]);
let kappa = 1.0 - crate::families::sigma_link::LOGB_SIGMA_FLOOR / sigma;
let expected = kappa * (weights[0] - rows.n[0]);
assert!(
(firstweights.score_ls[0] - expected).abs() <= 1e-12,
"Under the logb link σ = b + exp(η_ls), d/dη_ls of weight*(ln σ + 0.5(y-μ)^2/σ^2) carries the chain-rule factor κ = 1 - b/σ, so the row score must equal κ*(weight - n_i). The helper coded {} but the κ-corrected expectation is {}.",
firstweights.score_ls[0],
expected
);
assert!(
(firstweights.objective_psirow[0] - expected).abs() <= 1e-12,
"With mu_psi=0 and eta_psi=1, the exact psi objective derivative must equal κ*(weight - n_i) (κ = 1 - b/σ from the logb chain rule). The helper coded {} but the κ-corrected expectation is {}.",
firstweights.objective_psirow[0],
expected
);
}
#[test]
fn cloglog_binomial_right_tail_derivatives_stay_finite() {
let (m1, m2, m3) = binomial_neglog_q_derivatives_cloglog_closed_form(1.0, 1.0, 1000.0);
let m4 = binomial_neglog_q_fourth_derivative_cloglog_closed_form(1.0, 1.0, 300.0);
assert_eq!(m1, 0.0);
assert_eq!(m2, 0.0);
assert_eq!(m3, 0.0);
assert_eq!(m4, 0.0);
}
#[test]
fn cloglog_binomial_fractional_right_tail_keeps_y0_branch() {
let y = 0.25;
let weight = 2.0;
let q = 300.0;
let expected = weight * (1.0 - y) * q.exp();
let (m1, m2, m3) = binomial_neglog_q_derivatives_cloglog_closed_form(y, weight, q);
let m4 = binomial_neglog_q_fourth_derivative_cloglog_closed_form(y, weight, q);
assert!(m1.is_finite());
assert!(m2.is_finite());
assert!(m3.is_finite());
assert!(m4.is_finite());
assert_eq!(m1, expected);
assert_eq!(m2, expected);
assert_eq!(m3, expected);
assert_eq!(m4, expected);
}
#[test]
fn logit_binomial_tail_derivatives_are_exact_not_clipped() {
let q = 50.0;
let t = (-q).exp();
let denom = 1.0 + t;
let s_exact = t / (denom * denom);
let (m1, m2, m3) = binomial_neglog_q_derivatives_logit_closed_form(1.0, 1.0, q);
let m4 = binomial_neglog_q_fourth_derivative_logit_closed_form(1.0, 1.0, q);
assert!(
s_exact < 1e-21,
"sanity: exact tail variance should be ~1e-22, got {s_exact}"
);
assert!(m1.abs() <= 1e-15, "m1 should be ~0 at p≈1, got {m1}");
assert!(
(m2 - s_exact).abs() <= 1e-30,
"logit curvature must equal exact s=p(1-p) in the tail, got {m2}, want {s_exact}"
);
assert!(
m2 < 1e-15,
"logit curvature must NOT be floored at MIN_PROB·(1−MIN_PROB)≈1e-10, got {m2}"
);
assert!(m3.is_finite());
assert!(
(m4 - s_exact * (1.0 - 6.0 * s_exact)).abs() <= 1e-30,
"logit fourth derivative must equal exact ws(1-6s) in the tail, got {m4}"
);
}
#[test]
fn probit_binomial_incompatible_tail_keeps_mills_score() {
let q = 40.0;
let (m1, m2, m3) = binomial_neglog_q_derivatives_probit_closed_form(0.0, 1.0, q);
let m4 = binomial_neglog_q_fourth_derivative_probit_closed_form(0.0, 1.0, q);
assert!(
m1 > 39.0 && m1 < 41.0,
"right-tail probit score should be Mills-ratio sized, got {m1}"
);
assert!(
m2 > 0.9 && m2 < 1.1,
"right-tail probit curvature should stay near one, got {m2}"
);
assert!(
m3.is_finite(),
"third derivative must stay finite, got {m3}"
);
assert!(
m4.is_finite(),
"fourth derivative must stay finite, got {m4}"
);
}
#[test]
fn binomial_location_scale_loglik_uses_tail_stable_standard_links() {
use crate::families::custom_family::{CustomFamily, ParameterBlockState};
let n = 2usize;
let design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_elem((n, 1), 1.0),
));
let log_sigma = ParameterBlockState {
beta: array![0.0],
eta: array![0.0, 0.0],
};
let logit_family = BinomialLocationScaleFamily {
y: array![0.0, 1.0],
weights: Array1::ones(n),
link_kind: InverseLink::Standard(StandardLink::Logit),
threshold_design: Some(design.clone()),
log_sigma_design: Some(design.clone()),
policy: crate::resource::ResourcePolicy::default_library(),
};
let logit_states = vec![
ParameterBlockState {
beta: array![0.0],
eta: array![-1000.0, 1000.0],
},
log_sigma.clone(),
];
let logit_ll = logit_family
.log_likelihood_only(&logit_states)
.expect("logit tail likelihood");
assert!(
(logit_ll + 2000.0).abs() <= 1e-10,
"logit tail likelihood must use softplus natural-parameter algebra, got {logit_ll}"
);
let cloglog_family = BinomialLocationScaleFamily {
y: array![0.0, 1.0],
weights: Array1::ones(n),
link_kind: InverseLink::Standard(StandardLink::CLogLog),
threshold_design: Some(design.clone()),
log_sigma_design: Some(design),
policy: crate::resource::ResourcePolicy::default_library(),
};
let cloglog_states = vec![
ParameterBlockState {
beta: array![0.0],
eta: array![-20.0, 1000.0],
},
log_sigma,
];
let cloglog_ll = cloglog_family
.log_likelihood_only(&cloglog_states)
.expect("cloglog tail likelihood");
let expected = -20.0_f64.exp() - 1000.0;
let rel = (cloglog_ll - expected).abs() / expected.abs();
assert!(
rel <= 1e-14,
"cloglog tail likelihood must use exp(q) survival algebra, got {cloglog_ll}, expected {expected}"
);
}
#[test]
fn gaussian_joint_psisecondweights_eta_ab_term_carries_logb_chain_rule_factor() {
let y = array![1.1];
let etamu = array![0.3];
let eta_ls = array![-0.2];
let weights = array![2.5];
let rows =
gaussian_jointrow_scalars(&y, &etamu, &eta_ls, &weights).expect("gaussian row scalars");
let secondweights = gaussian_joint_psisecondweights(
&rows,
&array![0.0],
&array![0.0],
&array![0.0],
&array![0.0],
&array![0.0],
&array![1.0],
);
let sigma = crate::families::sigma_link::logb_sigma_from_eta_scalar(eta_ls[0]);
let kappa = 1.0 - crate::families::sigma_link::LOGB_SIGMA_FLOOR / sigma;
let expected = kappa * (weights[0] - rows.n[0]);
assert!(
(secondweights.objective_psi_psirow[0] - expected).abs() <= 1e-12,
"With only eta_psi_psi=1 active, the Gaussian second psi objective contribution from the linear η_ls term carries the logb chain-rule factor κ = 1 - b/σ, so it must equal κ*(weight - n_i). The helper coded {} but the κ-corrected expectation is {}.",
secondweights.objective_psi_psirow[0],
expected
);
}
#[test]
fn gaussian_location_scale_coefficient_cost_delegates_to_joint_coupled_helper() {
let n = 100usize;
let p_mu = 7usize;
let p_log_sigma = 4usize;
let family = GaussianLocationScaleFamily {
y: Array1::zeros(n),
weights: Array1::from_elem(n, 1.0),
mu_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let specs = vec![
ParameterBlockSpec {
name: "mu".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros(
(n, p_mu),
))),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros(
(n, p_log_sigma),
))),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
let p_total = (p_mu + p_log_sigma) as u64;
let expected =
crate::custom_family::joint_coupled_coefficient_hessian_cost(n as u64, &specs);
assert_eq!(family.coefficient_hessian_cost(&specs), expected);
assert_eq!(expected, (n as u64) * p_total * p_total);
assert!(
expected > crate::custom_family::default_coefficient_hessian_cost(&specs),
"joint-coupled cost must exceed block-diagonal default by the cross-block fill"
);
}
#[test]
fn large_n_gaussian_location_scale_keeps_exact_outer_hessian_plan() {
let n = 50_001usize;
let p_mu = 20usize;
let p_log_sigma = 20usize;
let family = GaussianLocationScaleFamily {
y: Array1::zeros(n),
weights: Array1::from_elem(n, 1.0),
mu_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let specs = vec![
ParameterBlockSpec {
name: "mu".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros(
(n, p_mu),
))),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros(
(n, p_log_sigma),
))),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
let options = BlockwiseFitOptions::default();
let (gradient, hessian) =
crate::custom_family::custom_family_outer_derivatives(&family, &specs, &options);
assert_eq!(
gradient,
crate::solver::outer_strategy::Derivative::Analytic
);
assert_eq!(
hessian,
crate::solver::outer_strategy::DeclaredHessianForm::Either,
"large-n GAMLSS location-scale fits must advertise exact second-order curvature instead of triggering the historical BFGS downgrade"
);
let p_total = p_mu + p_log_sigma;
assert!(
crate::solver::estimate::reml::unified::prefer_outer_hessian_operator(n, p_total, 2),
"the large-n work model should select the scalable explicit Hessian-operator representation"
);
let plan =
crate::solver::outer_strategy::plan(&crate::solver::outer_strategy::OuterCapability {
gradient,
hessian,
n_params: 2,
psi_dim: 0,
fixed_point_available: false,
barrier_config: None,
prefer_gradient_only: false,
disable_fixed_point: true,
});
assert_eq!(plan.solver, crate::solver::outer_strategy::Solver::Arc);
assert_eq!(
plan.hessian_source,
crate::solver::outer_strategy::HessianSource::Analytic
);
}
fn gls_workspace_fixture() -> (
GaussianLocationScaleFamily,
Vec<ParameterBlockState>,
Vec<ParameterBlockSpec>,
) {
let n = 7usize;
let p_mu = 3usize;
let p_ls = 2usize;
let xmu = Array2::from_shape_fn((n, p_mu), |(i, j)| {
((i as f64) * 0.13 + (j as f64) * 0.31).sin()
});
let xls = Array2::from_shape_fn((n, p_ls), |(i, j)| {
((i as f64) * 0.21 + (j as f64) * 0.47).cos()
});
let beta_mu = array![0.10, -0.20, 0.30];
let beta_ls = array![0.40, -0.10];
let eta_mu = xmu.dot(&beta_mu);
let eta_ls = xls.dot(&beta_ls);
let y = Array1::from_shape_fn(n, |i| 0.5 + 0.1 * (i as f64).cos());
let weights = Array1::from_elem(n, 1.0);
let mu_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xmu.clone()));
let log_sigma_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xls.clone()));
let family = GaussianLocationScaleFamily {
y,
weights,
mu_design: Some(mu_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let states = vec![
ParameterBlockState {
beta: beta_mu,
eta: eta_mu,
},
ParameterBlockState {
beta: beta_ls,
eta: eta_ls,
},
];
let specs = vec![
ParameterBlockSpec {
name: "mu".to_string(),
design: mu_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: log_sigma_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
(family, states, specs)
}
fn bls_workspace_fixture() -> (
BinomialLocationScaleFamily,
Vec<ParameterBlockState>,
Vec<ParameterBlockSpec>,
) {
let n = 8usize;
let pt = 3usize;
let pls = 2usize;
let xt = Array2::from_shape_fn((n, pt), |(i, j)| {
((i as f64) * 0.17 + (j as f64) * 0.29).sin()
});
let xls = Array2::from_shape_fn((n, pls), |(i, j)| {
((i as f64) * 0.23 + (j as f64) * 0.41).cos() * 0.5
});
let beta_t = array![0.20, -0.10, 0.05];
let beta_ls = array![0.30, -0.15];
let eta_t = xt.dot(&beta_t);
let eta_ls = xls.dot(&beta_ls);
let y = Array1::from_iter((0..n).map(|i| if i % 2 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let threshold_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xt.clone()));
let log_sigma_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xls.clone()));
let family = BinomialLocationScaleFamily {
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
policy: crate::resource::ResourcePolicy::default_library(),
};
let states = vec![
ParameterBlockState {
beta: beta_t,
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls,
eta: eta_ls,
},
];
let specs = vec![
ParameterBlockSpec {
name: "threshold".to_string(),
design: threshold_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: log_sigma_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
(family, states, specs)
}
#[test]
fn gaussian_location_scale_workspace_matvec_matches_dense() {
let (family, states, specs) = gls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let dense = family
.exact_newton_joint_hessian(&states)
.expect("dense joint Hessian build")
.expect("dense joint Hessian present");
assert_eq!(dense.nrows(), p);
assert_eq!(dense.ncols(), p);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let diag_op = workspace
.hessian_diagonal()
.expect("diagonal call")
.expect("diagonal present");
assert_eq!(diag_op.len(), p);
for i in 0..p {
let want = dense[[i, i]];
let got = diag_op[i];
assert!(
(want - got).abs() <= 1e-10 * want.abs().max(1.0) + 1e-10,
"GLS diagonal mismatch at {i}: dense={want:.6e}, workspace={got:.6e}"
);
}
let directions = [
Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.0, 0.0, 0.0, 1.0, 0.0]),
Array1::from_vec(vec![0.30, -0.70, 0.50, -0.20, 0.15]),
Array1::from_vec(vec![-0.42, 0.11, 0.93, 0.05, -0.31]),
];
for (k, v) in directions.iter().enumerate() {
assert_eq!(v.len(), p);
let want = dense.dot(v);
let got = workspace
.hessian_matvec(v)
.expect("matvec call")
.expect("matvec present");
assert_eq!(got.len(), p);
for i in 0..p {
let tol = 1e-10 * want[i].abs().max(1.0) + 1e-10;
assert!(
(want[i] - got[i]).abs() <= tol,
"GLS matvec[{k}, {i}] mismatch: dense={:.6e}, workspace={:.6e}",
want[i],
got[i]
);
}
}
}
fn assert_dense_matches_canonical_basis_hvp(
workspace: &dyn crate::custom_family::ExactNewtonJointHessianWorkspace,
total: usize,
label: &str,
) {
let dense = workspace
.hessian_dense()
.expect("hessian_dense call")
.expect("hessian_dense present");
assert_eq!(dense.nrows(), total);
assert_eq!(dense.ncols(), total);
let mut assembled = Array2::<f64>::zeros((total, total));
for j in 0..total {
let mut e = Array1::<f64>::zeros(total);
e[j] = 1.0;
let col = workspace
.hessian_matvec(&e)
.expect("matvec call")
.expect("matvec present");
assembled.column_mut(j).assign(&col);
}
let assembled_sym = 0.5 * (&assembled + &assembled.t());
let max_rel = dense
.iter()
.zip(assembled_sym.iter())
.map(|(d, a)| ((d - a) / d.abs().max(a.abs()).max(1.0)).abs())
.fold(0.0_f64, f64::max);
assert!(
max_rel < 1e-12,
"{label} hessian_dense vs canonical HVP max relative diff: {max_rel:.3e}"
);
}
#[test]
fn gaussian_location_scale_hessian_dense_matches_canonical_basis_hvp_path() {
assert!(file!().ends_with(".rs"));
let (family, states, specs) = gls_workspace_fixture();
let total = states[0].beta.len() + states[1].beta.len();
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
assert_dense_matches_canonical_basis_hvp(workspace.as_ref(), total, "GLS");
}
#[test]
fn binomial_location_scale_hessian_dense_matches_canonical_basis_hvp_path() {
assert!(file!().ends_with(".rs"));
let (family, states, specs) = bls_workspace_fixture();
let total = states[0].beta.len() + states[1].beta.len();
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
assert_dense_matches_canonical_basis_hvp(workspace.as_ref(), total, "BLS");
}
#[test]
fn gaussian_location_scale_wiggle_hessian_dense_matches_canonical_basis_hvp_path() {
assert!(file!().ends_with(".rs"));
let (family, states, specs, _xmu, _xls, _xw) = gls_wiggle_workspace_fixture();
let total = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
assert_dense_matches_canonical_basis_hvp(workspace.as_ref(), total, "GLSW");
}
#[test]
fn binomial_location_scale_wiggle_hessian_dense_matches_canonical_basis_hvp_path() {
assert!(file!().ends_with(".rs"));
let (family, states, specs, _xt, _xls, _xw) = bls_wiggle_workspace_fixture();
let total = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
assert_dense_matches_canonical_basis_hvp(workspace.as_ref(), total, "BLSW");
}
#[test]
fn gaussian_location_scale_workspace_dh_operator_matches_dense() {
let (family, states, specs) = gls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let d_beta = array![0.07, -0.04, 0.21, 0.08, -0.13];
assert_eq!(d_beta.len(), p);
let dense_dh = family
.exact_newton_joint_hessian_directional_derivative(&states, &d_beta)
.expect("dense dH build")
.expect("dense dH present");
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&d_beta)
.expect("dH operator call")
.expect("dH operator present");
let probes = [
Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.0, 1.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.30, -0.70, 0.50, -0.20, 0.15]),
];
for (k, w) in probes.iter().enumerate() {
assert_eq!(w.len(), p);
let want = dense_dh.dot(w);
let got = dh_op.mul_vec(w);
assert_eq!(got.len(), p);
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"GLS dH op matvec[{k}, {i}] mismatch: dense={:.6e}, op={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn binomial_location_scale_workspace_matvec_matches_dense() {
let (family, states, specs) = bls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let dense = family
.exact_newton_joint_hessian(&states)
.expect("dense joint Hessian build")
.expect("dense joint Hessian present");
assert_eq!(dense.nrows(), p);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let diag_op = workspace
.hessian_diagonal()
.expect("diagonal call")
.expect("diagonal present");
assert_eq!(diag_op.len(), p);
for i in 0..p {
let want = dense[[i, i]];
let got = diag_op[i];
assert!(
(want - got).abs() <= 1e-10 * want.abs().max(1.0) + 1e-10,
"BLS diagonal mismatch at {i}: dense={want:.6e}, workspace={got:.6e}"
);
}
let directions = [
Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.0, 0.0, 0.0, 1.0, 0.0]),
Array1::from_vec(vec![0.30, -0.70, 0.50, -0.20, 0.15]),
Array1::from_vec(vec![-0.42, 0.11, 0.93, 0.05, -0.31]),
];
for (k, v) in directions.iter().enumerate() {
assert_eq!(v.len(), p);
let want = dense.dot(v);
let got = workspace
.hessian_matvec(v)
.expect("matvec call")
.expect("matvec present");
for i in 0..p {
let tol = 1e-10 * want[i].abs().max(1.0) + 1e-10;
assert!(
(want[i] - got[i]).abs() <= tol,
"BLS matvec[{k}, {i}] mismatch: dense={:.6e}, workspace={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn binomial_location_scale_operator_workspace_never_densifies_specs() {
let n = 8usize;
let pt = 3usize;
let pls = 2usize;
let xt = Array2::from_shape_fn((n, pt), |(i, j)| {
((i as f64) * 0.17 + (j as f64) * 0.29).sin()
});
let xls = Array2::from_shape_fn((n, pls), |(i, j)| {
((i as f64) * 0.23 + (j as f64) * 0.41).cos() * 0.5
});
let beta_t = array![0.20, -0.10, 0.05];
let beta_ls = array![0.30, -0.15];
let eta_t = xt.dot(&beta_t);
let eta_ls = xls.dot(&beta_ls);
let family = BinomialLocationScaleFamily {
y: Array1::from_iter((0..n).map(|i| if i % 2 == 0 { 1.0 } else { 0.0 })),
weights: Array1::from_elem(n, 1.0),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
};
let states = vec![
ParameterBlockState {
beta: beta_t,
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls,
eta: eta_ls,
},
];
let specs = vec![
ParameterBlockSpec {
name: "threshold".to_string(),
design: no_densify_design(xt.clone()),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: no_densify_design(xls.clone()),
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
assert!(family.inner_coefficient_hessian_hvp_available(&specs));
let dense_h = family
.exact_newton_joint_hessian_from_designs(&states, &xt, &xls)
.expect("dense reference Hessian")
.expect("dense Hessian present");
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("operator workspace build")
.expect("operator workspace present");
let got_h = workspace
.hessian_dense()
.expect("operator-backed dense Hessian")
.expect("operator-backed dense Hessian present");
assert_eq!(got_h.dim(), dense_h.dim());
for i in 0..got_h.nrows() {
for j in 0..got_h.ncols() {
let want = dense_h[[i, j]];
let got = got_h[[i, j]];
let tol = 1e-10 * want.abs().max(1.0) + 1e-10;
assert!(
(want - got).abs() <= tol,
"lazy BLS dense Hessian mismatch at ({i}, {j}): dense={want:.6e}, op={got:.6e}"
);
}
}
let v = array![0.30, -0.70, 0.50, -0.20, 0.15];
let got_hv = workspace
.hessian_matvec(&v)
.expect("operator matvec")
.expect("operator matvec present");
let want_hv = dense_h.dot(&v);
for i in 0..v.len() {
let tol = 1e-10 * want_hv[i].abs().max(1.0) + 1e-10;
assert!(
(want_hv[i] - got_hv[i]).abs() <= tol,
"lazy BLS Hv mismatch at {i}: dense={:.6e}, op={:.6e}",
want_hv[i],
got_hv[i]
);
}
let got_diag = workspace
.hessian_diagonal()
.expect("operator diagonal")
.expect("operator diagonal present");
for i in 0..v.len() {
let want = dense_h[[i, i]];
let tol = 1e-10 * want.abs().max(1.0) + 1e-10;
assert!(
(want - got_diag[i]).abs() <= tol,
"lazy BLS diagonal mismatch at {i}: dense={:.6e}, op={:.6e}",
want,
got_diag[i]
);
}
let dense_xt = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xt.clone()));
let dense_xls = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xls.clone()));
let want_grad = family
.exact_newton_joint_gradient_from_designs(&states, &dense_xt, &dense_xls)
.expect("dense reference gradient");
let got_grad = family
.exact_newton_joint_gradient_evaluation(&states, &specs)
.expect("operator gradient")
.expect("operator gradient present");
assert!(
(want_grad.log_likelihood - got_grad.log_likelihood).abs() <= 1e-12,
"operator gradient log-likelihood mismatch"
);
for i in 0..v.len() {
let want = want_grad.gradient[i];
let got = got_grad.gradient[i];
let tol = 1e-10 * want.abs().max(1.0) + 1e-10;
assert!(
(want - got).abs() <= tol,
"lazy BLS gradient mismatch at {i}: dense={:.6e}, op={:.6e}",
want,
got
);
}
let d_beta = array![0.07, -0.04, 0.21, 0.08, -0.13];
let dense_dh = family
.exact_newton_joint_hessian_directional_derivative_from_designs(
&states, &xt, &xls, &d_beta,
)
.expect("dense dH")
.expect("dense dH present");
let got_dh_v = workspace
.directional_derivative_operator(&d_beta)
.expect("operator dH")
.expect("operator dH present")
.mul_vec(&v);
let want_dh_v = dense_dh.dot(&v);
for i in 0..v.len() {
let tol = 1e-9 * want_dh_v[i].abs().max(1.0) + 1e-9;
assert!(
(want_dh_v[i] - got_dh_v[i]).abs() <= tol,
"lazy BLS dH*v mismatch at {i}: dense={:.6e}, op={:.6e}",
want_dh_v[i],
got_dh_v[i]
);
}
let d_beta_v = array![-0.11, 0.13, -0.05, -0.22, 0.09];
let dense_d2h = family
.exact_newton_joint_hessiansecond_directional_derivative_from_designs(
&states, &xt, &xls, &d_beta, &d_beta_v,
)
.expect("dense d2H")
.expect("dense d2H present");
let got_d2h_v = workspace
.second_directional_derivative_operator(&d_beta, &d_beta_v)
.expect("operator d2H")
.expect("operator d2H present")
.mul_vec(&v);
let want_d2h_v = dense_d2h.dot(&v);
for i in 0..v.len() {
let tol = 1e-9 * want_d2h_v[i].abs().max(1.0) + 1e-9;
assert!(
(want_d2h_v[i] - got_d2h_v[i]).abs() <= tol,
"lazy BLS d2H*v mismatch at {i}: dense={:.6e}, op={:.6e}",
want_d2h_v[i],
got_d2h_v[i]
);
}
}
#[test]
fn binomial_location_scale_workspace_dh_operator_matches_dense() {
let (family, states, specs) = bls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let d_beta = array![0.07, -0.04, 0.21, 0.08, -0.13];
assert_eq!(d_beta.len(), p);
let dense_dh = family
.exact_newton_joint_hessian_directional_derivative(&states, &d_beta)
.expect("dense dH build")
.expect("dense dH present");
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&d_beta)
.expect("dH operator call")
.expect("dH operator present");
let probes = [
Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.0, 1.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.30, -0.70, 0.50, -0.20, 0.15]),
];
for (k, w) in probes.iter().enumerate() {
assert_eq!(w.len(), p);
let want = dense_dh.dot(w);
let got = dh_op.mul_vec(w);
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"BLS dH op matvec[{k}, {i}] mismatch: dense={:.6e}, op={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn binomial_location_scale_workspace_d2h_operator_matches_dense() {
let (family, states, specs) = bls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let d_beta_u = array![0.07, -0.04, 0.21, 0.08, -0.13];
let d_beta_v = array![-0.11, 0.13, -0.05, -0.22, 0.09];
assert_eq!(d_beta_u.len(), p);
assert_eq!(d_beta_v.len(), p);
let dense_d2h = family
.exact_newton_joint_hessiansecond_directional_derivative(&states, &d_beta_u, &d_beta_v)
.expect("dense d2H build")
.expect("dense d2H present");
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let d2h_op = workspace
.second_directional_derivative_operator(&d_beta_u, &d_beta_v)
.expect("d2H operator call")
.expect("d2H operator present");
let probes = [
Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.0, 1.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.30, -0.70, 0.50, -0.20, 0.15]),
];
for (k, w) in probes.iter().enumerate() {
let want = dense_d2h.dot(w);
let got = d2h_op.mul_vec(w);
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"BLS d2H op matvec[{k}, {i}] mismatch: dense={:.6e}, op={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn binomial_location_scale_projected_trace_cache_matches_dense() {
let (family, states, specs) = bls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let d_beta_u = array![0.07, -0.04, 0.21, 0.08, -0.13];
let d_beta_v = array![-0.11, 0.13, -0.05, -0.22, 0.09];
let factor = Array2::from_shape_fn((p, 3), |(i, j)| {
((i as f64 + 1.0) * 0.19 + (j as f64 + 0.5) * 0.37).sin()
});
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&d_beta_u)
.expect("dH operator call")
.expect("dH operator present");
let d2h_op = workspace
.second_directional_derivative_operator(&d_beta_u, &d_beta_v)
.expect("d2H operator call")
.expect("d2H operator present");
let cache = crate::solver::estimate::reml::unified::ProjectedFactorCache::default();
for (name, op) in [("dH", dh_op.clone()), ("d2H", d2h_op.clone())] {
let dense = op.to_dense();
let dense_projected = dense.dot(&factor);
let want: f64 = factor
.iter()
.zip(dense_projected.iter())
.map(|(&f, &bf)| f * bf)
.sum();
let uncached = op.trace_projected_factor(&factor);
let cached_first = op.trace_projected_factor_cached(&factor, &cache);
let cached_second = op.trace_projected_factor_cached(&factor, &cache);
for (label, got) in [
("uncached", uncached),
("cached_first", cached_first),
("cached_second", cached_second),
] {
let tol = 1e-9 * want.abs().max(1.0) + 1e-9;
assert!(
(want - got).abs() <= tol,
"{name} projected trace {label} mismatch: dense={want:.6e}, got={got:.6e}"
);
}
}
let mut reused_factor = factor.clone();
let cached_probe = dh_op.trace_projected_factor_cached(&reused_factor, &cache);
assert!(cached_probe.is_finite());
reused_factor[[0, 0]] += 0.25;
let dense = dh_op.to_dense();
let dense_projected = dense.dot(&reused_factor);
let want: f64 = reused_factor
.iter()
.zip(dense_projected.iter())
.map(|(&f, &bf)| f * bf)
.sum();
let got = dh_op.trace_projected_factor_cached(&reused_factor, &cache);
let tol = 1e-9 * want.abs().max(1.0) + 1e-9;
assert!(
(want - got).abs() <= tol,
"cached projected trace reused stale factor contents: dense={want:.6e}, got={got:.6e}"
);
}
#[test]
#[should_panic(expected = "two-block cached projected trace factor row mismatch")]
fn binomial_location_scale_projected_trace_rejects_wrong_factor_rows() {
let (family, states, specs) = bls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let d_beta = array![0.07, -0.04, 0.21, 0.08, -0.13];
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&d_beta)
.expect("dH operator call")
.expect("dH operator present");
let bad_factor = Array2::<f64>::zeros((p + 1, 2));
let cache = crate::solver::estimate::reml::unified::ProjectedFactorCache::default();
dh_op.trace_projected_factor_cached(&bad_factor, &cache);
}
#[test]
fn binomial_location_scale_workspace_dh_operator_finite_difference() {
let (family, states, specs) = bls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let u = array![0.07, -0.04, 0.21, 0.08, -0.13];
let v = array![0.30, -0.70, 0.50, -0.20, 0.15];
let eps = 1e-6;
let perturb = |sign: f64| -> Vec<ParameterBlockState> {
let mut out = states.clone();
let pt = states[0].beta.len();
for j in 0..pt {
out[0].beta[j] += sign * eps * u[j];
}
for j in 0..(p - pt) {
out[1].beta[j] += sign * eps * u[pt + j];
}
let xt_dense = specs[0].design.as_dense_ref().expect("dense xt");
let xls_dense = specs[1].design.as_dense_ref().expect("dense xls");
out[0].eta = xt_dense.dot(&out[0].beta);
out[1].eta = xls_dense.dot(&out[1].beta);
out
};
let states_plus = perturb(1.0);
let states_minus = perturb(-1.0);
let h_plus = family
.exact_newton_joint_hessian(&states_plus)
.expect("dense H+")
.expect("dense H+ present");
let h_minus = family
.exact_newton_joint_hessian(&states_minus)
.expect("dense H-")
.expect("dense H- present");
let fd = (h_plus.dot(&v) - h_minus.dot(&v)) / (2.0 * eps);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&u)
.expect("dH op call")
.expect("dH op present");
let analytic = dh_op.mul_vec(&v);
for i in 0..p {
let tol = 1e-5 * fd[i].abs().max(1.0) + 1e-5;
assert!(
(fd[i] - analytic[i]).abs() <= tol,
"BLS dH FD mismatch at {i}: fd={:.6e}, analytic={:.6e}",
fd[i],
analytic[i]
);
}
}
#[test]
fn binomial_location_scale_workspace_d2h_operator_finite_difference() {
let (family, states, specs) = bls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let u = array![0.07, -0.04, 0.21, 0.08, -0.13];
let u_fd = array![0.30, -0.70, 0.50, -0.20, 0.15];
let probe = array![-0.21, 0.11, 0.05, 0.32, -0.04];
let eps = 1e-6;
let perturb = |sign: f64| -> Vec<ParameterBlockState> {
let mut out = states.clone();
let pt = states[0].beta.len();
for j in 0..pt {
out[0].beta[j] += sign * eps * u_fd[j];
}
for j in 0..(p - pt) {
out[1].beta[j] += sign * eps * u_fd[pt + j];
}
let xt_dense = specs[0].design.as_dense_ref().expect("dense xt");
let xls_dense = specs[1].design.as_dense_ref().expect("dense xls");
out[0].eta = xt_dense.dot(&out[0].beta);
out[1].eta = xls_dense.dot(&out[1].beta);
out
};
let states_plus = perturb(1.0);
let states_minus = perturb(-1.0);
let dh_plus = family
.exact_newton_joint_hessian_directional_derivative(&states_plus, &u)
.expect("dense dH+")
.expect("dense dH+ present");
let dh_minus = family
.exact_newton_joint_hessian_directional_derivative(&states_minus, &u)
.expect("dense dH-")
.expect("dense dH- present");
let fd = (dh_plus.dot(&probe) - dh_minus.dot(&probe)) / (2.0 * eps);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let d2h_op = workspace
.second_directional_derivative_operator(&u_fd, &u)
.expect("d2H op call")
.expect("d2H op present");
let analytic = d2h_op.mul_vec(&probe);
for i in 0..p {
let tol = 5e-5 * fd[i].abs().max(1.0) + 5e-5;
assert!(
(fd[i] - analytic[i]).abs() <= tol,
"BLS d2H FD mismatch at {i}: fd={:.6e}, analytic={:.6e}",
fd[i],
analytic[i]
);
}
}
#[test]
fn gaussian_location_scale_workspace_d2h_operator_matches_dense() {
let (family, states, specs) = gls_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len();
let d_beta_u = array![0.07, -0.04, 0.21, 0.08, -0.13];
let d_beta_v = array![-0.11, 0.13, -0.05, -0.22, 0.09];
let dense_d2h = family
.exact_newton_joint_hessiansecond_directional_derivative(&states, &d_beta_u, &d_beta_v)
.expect("dense d2H build")
.expect("dense d2H present");
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let d2h_op = workspace
.second_directional_derivative_operator(&d_beta_u, &d_beta_v)
.expect("d2H op call")
.expect("d2H op present");
let probes = [
Array1::from_vec(vec![1.0, 0.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.0, 1.0, 0.0, 0.0, 0.0]),
Array1::from_vec(vec![0.30, -0.70, 0.50, -0.20, 0.15]),
];
for (k, w) in probes.iter().enumerate() {
let want = dense_d2h.dot(w);
let got = d2h_op.mul_vec(w);
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"GLS d2H op matvec[{k}, {i}] mismatch: dense={:.6e}, op={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn binomial_location_scale_wiggle_workspace_matvec_matches_dense() {
let (family, states, specs, _xt, _xls, wiggle_design_current) =
bls_wiggle_workspace_fixture();
let pt = 3usize;
let pls = 2usize;
let pw = wiggle_design_current.ncols();
let p = pt + pls + pw;
let dense = family
.exact_newton_joint_hessian(&states)
.expect("dense joint Hessian build")
.expect("dense joint Hessian present");
assert_eq!(dense.nrows(), p);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let directions = vec![
Array1::from_shape_fn(p, |i| if i == 0 { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == pt { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == pt + pls { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| 0.1 * ((i + 1) as f64).cos()),
];
for (k, v) in directions.iter().enumerate() {
let want = dense.dot(v);
let got = workspace
.hessian_matvec(v)
.expect("matvec call")
.expect("matvec present");
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"BLSW matvec[{k}, {i}] mismatch: dense={:.6e}, workspace={:.6e}",
want[i],
got[i]
);
}
}
}
fn bls_wiggle_workspace_fixture() -> (
BinomialLocationScaleWiggleFamily,
Vec<ParameterBlockState>,
Vec<ParameterBlockSpec>,
Array2<f64>,
Array2<f64>,
Array2<f64>,
) {
let n = 10usize;
let pt = 3usize;
let pls = 2usize;
let xt = Array2::from_shape_fn((n, pt), |(i, j)| {
((i as f64) * 0.17 + (j as f64) * 0.29).sin() * 0.4
});
let xls = Array2::from_shape_fn((n, pls), |(i, j)| {
((i as f64) * 0.23 + (j as f64) * 0.41).cos() * 0.3
});
let beta_t = array![0.20, -0.10, 0.05];
let beta_ls = array![0.30, -0.15];
let eta_t = xt.dot(&beta_t);
let eta_ls = xls.dot(&beta_ls);
let q_seed = Array1::linspace(-1.0, 1.0, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
3,
2,
false,
)
.expect("wiggle block");
let y = Array1::from_iter((0..n).map(|i| if i % 2 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let threshold_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xt.clone()));
let log_sigma_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xls.clone()));
let family = BinomialLocationScaleWiggleFamily {
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 2,
policy: crate::resource::ResourcePolicy::default_library(),
};
let q0 = Array1::from_iter(
eta_t
.iter()
.zip(eta_ls.iter())
.map(|(&eta_t_i, &eta_ls_i)| {
binomial_location_scale_q0(eta_t_i, exp_sigma_from_eta_scalar(eta_ls_i))
}),
);
let wiggle_design_current = family
.wiggle_design(q0.view())
.expect("current wiggle basis");
let pw = wiggle_design_current.ncols();
let beta_w = Array1::from_shape_fn(pw, |j| 0.05 * ((j + 1) as f64).cos());
let eta_w = wiggle_design_current.dot(&beta_w);
let states = vec![
ParameterBlockState {
beta: beta_t,
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls,
eta: eta_ls,
},
ParameterBlockState {
beta: beta_w,
eta: eta_w,
},
];
let specs = vec![
ParameterBlockSpec {
name: "threshold".to_string(),
design: threshold_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: log_sigma_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "wiggle".to_string(),
design: wiggle_block.design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
(family, states, specs, xt, xls, wiggle_design_current)
}
#[test]
fn binomial_location_scale_wiggle_workspace_dh_operator_matches_dense() {
let (family, states, specs, _xt, _xls, _xw) = bls_wiggle_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let d_beta = Array1::from_shape_fn(p, |i| 0.05 * ((i + 1) as f64).cos());
let dense_dh = family
.exact_newton_joint_hessian_directional_derivative(&states, &d_beta)
.expect("dense dH build")
.expect("dense dH present");
assert_eq!(dense_dh.nrows(), p);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&d_beta)
.expect("dH op call")
.expect("dH op present");
let probes = [
Array1::from_shape_fn(p, |i| if i == 0 { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == states[0].beta.len() { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| {
if i == states[0].beta.len() + states[1].beta.len() {
1.0
} else {
0.0
}
}),
Array1::from_shape_fn(p, |i| 0.07 * ((i + 2) as f64).sin()),
];
for (k, w) in probes.iter().enumerate() {
let want = dense_dh.dot(w);
let got = dh_op.mul_vec(w);
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"BLSW dH op matvec[{k}, {i}] mismatch: dense={:.6e}, op={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn binomial_location_scale_wiggle_workspace_dh_operator_finite_difference() {
let (family, states, specs, xt, xls, _xw) = bls_wiggle_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let u = Array1::from_shape_fn(p, |i| 0.05 * ((i + 1) as f64).cos());
let v = Array1::from_shape_fn(p, |i| 0.07 * ((i + 2) as f64).sin());
let pt = states[0].beta.len();
let pls = states[1].beta.len();
let eps = 1e-5;
let perturb = |sign: f64| -> Vec<ParameterBlockState> {
let mut out = states.clone();
for j in 0..pt {
out[0].beta[j] += sign * eps * u[j];
}
for j in 0..pls {
out[1].beta[j] += sign * eps * u[pt + j];
}
for j in 0..(p - pt - pls) {
out[2].beta[j] += sign * eps * u[pt + pls + j];
}
out[0].eta = xt.dot(&out[0].beta);
out[1].eta = xls.dot(&out[1].beta);
let q0 = Array1::from_iter(out[0].eta.iter().zip(out[1].eta.iter()).map(
|(&eta_t, &eta_ls)| {
binomial_location_scale_q0(eta_t, exp_sigma_from_eta_scalar(eta_ls))
},
));
out[2].eta = family
.wiggle_design(q0.view())
.expect("perturbed wiggle basis")
.dot(&out[2].beta);
out
};
let states_plus = perturb(1.0);
let states_minus = perturb(-1.0);
let h_plus = family
.exact_newton_joint_hessian(&states_plus)
.expect("dense H+")
.expect("dense H+ present");
let h_minus = family
.exact_newton_joint_hessian(&states_minus)
.expect("dense H-")
.expect("dense H- present");
let fd = (h_plus.dot(&v) - h_minus.dot(&v)) / (2.0 * eps);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&u)
.expect("dH op call")
.expect("dH op present");
let analytic = dh_op.mul_vec(&v);
for i in 0..p {
let tol = 5e-5 * fd[i].abs().max(1.0) + 5e-5;
assert!(
(fd[i] - analytic[i]).abs() <= tol,
"BLSW dH FD mismatch at {i}: fd={:.6e}, analytic={:.6e}",
fd[i],
analytic[i]
);
}
}
#[test]
fn binomial_location_scale_wiggle_workspace_d2h_operator_matches_dense() {
let (family, states, specs, _xt, _xls, _xw) = bls_wiggle_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let d_beta_u = Array1::from_shape_fn(p, |i| 0.05 * ((i + 1) as f64).cos());
let d_beta_v = Array1::from_shape_fn(p, |i| 0.07 * ((i + 2) as f64).sin());
let dense_d2h = family
.exact_newton_joint_hessiansecond_directional_derivative(&states, &d_beta_u, &d_beta_v)
.expect("dense d2H build")
.expect("dense d2H present");
assert_eq!(dense_d2h.nrows(), p);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let d2h_op = workspace
.second_directional_derivative_operator(&d_beta_u, &d_beta_v)
.expect("d2H op call")
.expect("d2H op present");
let pt = states[0].beta.len();
let pls = states[1].beta.len();
let probes = [
Array1::from_shape_fn(p, |i| if i == 0 { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == pt { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == pt + pls { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| 0.07 * ((i + 3) as f64).cos()),
];
for (k, w) in probes.iter().enumerate() {
let want = dense_d2h.dot(w);
let got = d2h_op.mul_vec(w);
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"BLSW d2H op matvec[{k}, {i}] mismatch: dense={:.6e}, op={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn binomial_location_scale_wiggle_workspace_d2h_operator_finite_difference() {
let (family, states, specs, xt, xls, xw) = bls_wiggle_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let u_fd = Array1::from_shape_fn(p, |i| 0.05 * ((i + 1) as f64).cos());
let u = Array1::from_shape_fn(p, |i| 0.07 * ((i + 2) as f64).sin());
let probe = Array1::from_shape_fn(p, |i| 0.04 * ((i + 3) as f64).sin());
let pt = states[0].beta.len();
let pls = states[1].beta.len();
let eps = 1e-5;
let perturb = |sign: f64| -> Vec<ParameterBlockState> {
let mut out = states.clone();
for j in 0..pt {
out[0].beta[j] += sign * eps * u_fd[j];
}
for j in 0..pls {
out[1].beta[j] += sign * eps * u_fd[pt + j];
}
for j in 0..(p - pt - pls) {
out[2].beta[j] += sign * eps * u_fd[pt + pls + j];
}
out[0].eta = xt.dot(&out[0].beta);
out[1].eta = xls.dot(&out[1].beta);
out[2].eta = xw.dot(&out[2].beta);
out
};
let states_plus = perturb(1.0);
let states_minus = perturb(-1.0);
let dh_plus = family
.exact_newton_joint_hessian_directional_derivative(&states_plus, &u)
.expect("dense dH+")
.expect("dense dH+ present");
let dh_minus = family
.exact_newton_joint_hessian_directional_derivative(&states_minus, &u)
.expect("dense dH-")
.expect("dense dH- present");
let fd = (dh_plus.dot(&probe) - dh_minus.dot(&probe)) / (2.0 * eps);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let d2h_op = workspace
.second_directional_derivative_operator(&u_fd, &u)
.expect("d2H op call")
.expect("d2H op present");
let analytic = d2h_op.mul_vec(&probe);
for i in 0..p {
let tol = 5e-5 * fd[i].abs().max(1.0) + 5e-5;
assert!(
(fd[i] - analytic[i]).abs() <= tol,
"BLSW d2H FD mismatch at {i}: fd={:.6e}, analytic={:.6e}",
fd[i],
analytic[i]
);
}
}
#[test]
fn gaussian_location_scale_wiggle_workspace_matvec_matches_dense() {
let n = 10usize;
let p_mu = 3usize;
let p_ls = 2usize;
let xmu = Array2::from_shape_fn((n, p_mu), |(i, j)| {
((i as f64) * 0.13 + (j as f64) * 0.31).sin() * 0.4
});
let xls = Array2::from_shape_fn((n, p_ls), |(i, j)| {
((i as f64) * 0.21 + (j as f64) * 0.47).cos() * 0.3
});
let beta_mu = array![0.10, -0.20, 0.30];
let beta_ls = array![0.40, -0.10];
let eta_mu = xmu.dot(&beta_mu);
let eta_ls = xls.dot(&beta_ls);
let q_seed = Array1::linspace(-1.0, 1.0, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
3,
2,
false,
)
.expect("wiggle block");
let wiggle_design_dense = match wiggle_block.design.as_dense_ref() {
Some(d) => d.clone(),
None => panic!("wiggle design must be dense for this test fixture"),
};
let pw = wiggle_design_dense.ncols();
let beta_w = Array1::from_shape_fn(pw, |j| 0.05 * ((j + 1) as f64).sin());
let eta_w = wiggle_design_dense.dot(&beta_w);
let y = Array1::from_shape_fn(n, |i| 0.5 + 0.1 * (i as f64).cos());
let weights = Array1::from_elem(n, 1.0);
let mu_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xmu.clone()));
let log_sigma_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xls.clone()));
let family = GaussianLocationScaleWiggleFamily {
y,
weights,
mu_design: Some(mu_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 2,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let states = vec![
ParameterBlockState {
beta: beta_mu,
eta: eta_mu,
},
ParameterBlockState {
beta: beta_ls,
eta: eta_ls,
},
ParameterBlockState {
beta: beta_w,
eta: eta_w,
},
];
let specs = vec![
ParameterBlockSpec {
name: "mu".to_string(),
design: mu_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: log_sigma_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "wiggle".to_string(),
design: wiggle_block.design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
let p = p_mu + p_ls + pw;
let dense = family
.exact_newton_joint_hessian(&states)
.expect("dense joint Hessian build")
.expect("dense joint Hessian present");
assert_eq!(dense.nrows(), p);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let directions = [
Array1::from_shape_fn(p, |i| if i == 0 { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == p_mu { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == p_mu + p_ls { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| 0.1 * ((i + 1) as f64).sin()),
];
for (k, v) in directions.iter().enumerate() {
let want = dense.dot(v);
let got = workspace
.hessian_matvec(v)
.expect("matvec call")
.expect("matvec present");
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"GLSW matvec[{k}, {i}] mismatch: dense={:.6e}, workspace={:.6e}",
want[i],
got[i]
);
}
}
}
fn gls_wiggle_workspace_fixture() -> (
GaussianLocationScaleWiggleFamily,
Vec<ParameterBlockState>,
Vec<ParameterBlockSpec>,
Array2<f64>,
Array2<f64>,
Array2<f64>,
) {
let n = 10usize;
let p_mu = 3usize;
let p_ls = 2usize;
let xmu = Array2::from_shape_fn((n, p_mu), |(i, j)| {
((i as f64) * 0.13 + (j as f64) * 0.31).sin() * 0.4
});
let xls = Array2::from_shape_fn((n, p_ls), |(i, j)| {
((i as f64) * 0.21 + (j as f64) * 0.47).cos() * 0.3
});
let beta_mu = array![0.10, -0.20, 0.30];
let beta_ls = array![0.40, -0.10];
let eta_mu = xmu.dot(&beta_mu);
let eta_ls = xls.dot(&beta_ls);
let q_seed = Array1::linspace(-1.0, 1.0, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
3,
2,
false,
)
.expect("wiggle block");
let pw = wiggle_block.design.ncols();
let beta_w = Array1::from_shape_fn(pw, |j| 0.05 * ((j + 1) as f64).sin());
let y = Array1::from_shape_fn(n, |i| 0.5 + 0.1 * (i as f64).cos());
let weights = Array1::from_elem(n, 1.0);
let mu_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xmu.clone()));
let log_sigma_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(xls.clone()));
let family = GaussianLocationScaleWiggleFamily {
y,
weights,
mu_design: Some(mu_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 2,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let xw_at_q0 = family
.wiggle_design(eta_mu.view())
.expect("wiggle basis at q0");
let eta_w = xw_at_q0.dot(&beta_w);
let states = vec![
ParameterBlockState {
beta: beta_mu,
eta: eta_mu,
},
ParameterBlockState {
beta: beta_ls,
eta: eta_ls,
},
ParameterBlockState {
beta: beta_w,
eta: eta_w,
},
];
let specs = vec![
ParameterBlockSpec {
name: "mu".to_string(),
design: mu_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "log_sigma".to_string(),
design: log_sigma_design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "wiggle".to_string(),
design: wiggle_block.design,
offset: Array1::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
(family, states, specs, xmu, xls, xw_at_q0)
}
#[test]
fn gaussian_location_scale_wiggle_workspace_dh_operator_matches_dense() {
let (family, states, specs, _xmu, _xls, _xw) = gls_wiggle_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let d_beta = Array1::from_shape_fn(p, |i| 0.05 * ((i + 1) as f64).sin());
let dense_dh = family
.exact_newton_joint_hessian_directional_derivative(&states, &d_beta)
.expect("dense dH build")
.expect("dense dH present");
assert_eq!(dense_dh.nrows(), p);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&d_beta)
.expect("dH op call")
.expect("dH op present");
let pmu = states[0].beta.len();
let pls = states[1].beta.len();
let probes = [
Array1::from_shape_fn(p, |i| if i == 0 { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == pmu { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == pmu + pls { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| 0.07 * ((i + 2) as f64).cos()),
];
for (k, w) in probes.iter().enumerate() {
let want = dense_dh.dot(w);
let got = dh_op.mul_vec(w);
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"GLSW dH op matvec[{k}, {i}] mismatch: dense={:.6e}, op={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn gaussian_location_scale_wiggle_workspace_dh_operator_finite_difference() {
let (family, states, specs, xmu, xls, _xw) = gls_wiggle_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let u = Array1::from_shape_fn(p, |i| 0.05 * ((i + 1) as f64).cos());
let v = Array1::from_shape_fn(p, |i| 0.07 * ((i + 2) as f64).sin());
let pmu = states[0].beta.len();
let pls = states[1].beta.len();
let eps = 1e-5;
let perturb = |sign: f64| -> Vec<ParameterBlockState> {
let mut out = states.clone();
for j in 0..pmu {
out[0].beta[j] += sign * eps * u[j];
}
for j in 0..pls {
out[1].beta[j] += sign * eps * u[pmu + j];
}
for j in 0..(p - pmu - pls) {
out[2].beta[j] += sign * eps * u[pmu + pls + j];
}
out[0].eta = xmu.dot(&out[0].beta);
out[1].eta = xls.dot(&out[1].beta);
let xw_perturbed = family
.wiggle_design(out[0].eta.view())
.expect("wiggle basis at perturbed q0");
out[2].eta = xw_perturbed.dot(&out[2].beta);
out
};
let states_plus = perturb(1.0);
let states_minus = perturb(-1.0);
let h_plus = family
.exact_newton_joint_hessian(&states_plus)
.expect("dense H+")
.expect("dense H+ present");
let h_minus = family
.exact_newton_joint_hessian(&states_minus)
.expect("dense H-")
.expect("dense H- present");
let fd = (h_plus.dot(&v) - h_minus.dot(&v)) / (2.0 * eps);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let dh_op = workspace
.directional_derivative_operator(&u)
.expect("dH op call")
.expect("dH op present");
let analytic = dh_op.mul_vec(&v);
for i in 0..p {
let tol = 5e-5 * fd[i].abs().max(1.0) + 5e-5;
assert!(
(fd[i] - analytic[i]).abs() <= tol,
"GLSW dH FD mismatch at {i}: fd={:.6e}, analytic={:.6e}",
fd[i],
analytic[i]
);
}
}
#[test]
fn gaussian_location_scale_wiggle_workspace_d2h_operator_matches_dense() {
let (family, states, specs, _xmu, _xls, _xw) = gls_wiggle_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let d_beta_u = Array1::from_shape_fn(p, |i| 0.05 * ((i + 1) as f64).sin());
let d_beta_v = Array1::from_shape_fn(p, |i| 0.07 * ((i + 2) as f64).cos());
let dense_d2h = family
.exact_newton_joint_hessiansecond_directional_derivative(&states, &d_beta_u, &d_beta_v)
.expect("dense d2H build")
.expect("dense d2H present");
assert_eq!(dense_d2h.nrows(), p);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let d2h_op = workspace
.second_directional_derivative_operator(&d_beta_u, &d_beta_v)
.expect("d2H op call")
.expect("d2H op present");
let pmu = states[0].beta.len();
let pls = states[1].beta.len();
let probes = [
Array1::from_shape_fn(p, |i| if i == 0 { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == pmu { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| if i == pmu + pls { 1.0 } else { 0.0 }),
Array1::from_shape_fn(p, |i| 0.07 * ((i + 3) as f64).cos()),
];
for (k, w) in probes.iter().enumerate() {
let want = dense_d2h.dot(w);
let got = d2h_op.mul_vec(w);
for i in 0..p {
let tol = 1e-9 * want[i].abs().max(1.0) + 1e-9;
assert!(
(want[i] - got[i]).abs() <= tol,
"GLSW d2H op matvec[{k}, {i}] mismatch: dense={:.6e}, op={:.6e}",
want[i],
got[i]
);
}
}
}
#[test]
fn gaussian_location_scale_wiggle_workspace_d2h_operator_finite_difference() {
let (family, states, specs, xmu, xls, _xw) = gls_wiggle_workspace_fixture();
let p = states[0].beta.len() + states[1].beta.len() + states[2].beta.len();
let u_fd = Array1::from_shape_fn(p, |i| 0.05 * ((i + 1) as f64).cos());
let u = Array1::from_shape_fn(p, |i| 0.07 * ((i + 2) as f64).sin());
let probe = Array1::from_shape_fn(p, |i| 0.04 * ((i + 3) as f64).sin());
let pmu = states[0].beta.len();
let pls = states[1].beta.len();
let eps = 1e-5;
let perturb = |sign: f64| -> Vec<ParameterBlockState> {
let mut out = states.clone();
for j in 0..pmu {
out[0].beta[j] += sign * eps * u_fd[j];
}
for j in 0..pls {
out[1].beta[j] += sign * eps * u_fd[pmu + j];
}
for j in 0..(p - pmu - pls) {
out[2].beta[j] += sign * eps * u_fd[pmu + pls + j];
}
out[0].eta = xmu.dot(&out[0].beta);
out[1].eta = xls.dot(&out[1].beta);
let xw_perturbed = family
.wiggle_design(out[0].eta.view())
.expect("wiggle basis at perturbed q0");
out[2].eta = xw_perturbed.dot(&out[2].beta);
out
};
let states_plus = perturb(1.0);
let states_minus = perturb(-1.0);
let dh_plus = family
.exact_newton_joint_hessian_directional_derivative(&states_plus, &u)
.expect("dense dH+")
.expect("dense dH+ present");
let dh_minus = family
.exact_newton_joint_hessian_directional_derivative(&states_minus, &u)
.expect("dense dH-")
.expect("dense dH- present");
let fd = (dh_plus.dot(&probe) - dh_minus.dot(&probe)) / (2.0 * eps);
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace build")
.expect("workspace present");
let d2h_op = workspace
.second_directional_derivative_operator(&u_fd, &u)
.expect("d2H op call")
.expect("d2H op present");
let analytic = d2h_op.mul_vec(&probe);
for i in 0..p {
let tol = 5e-5 * fd[i].abs().max(1.0) + 5e-5;
assert!(
(fd[i] - analytic[i]).abs() <= tol,
"GLSW d2H FD mismatch at {i}: fd={:.6e}, analytic={:.6e}",
fd[i],
analytic[i]
);
}
}
#[test]
fn zeroweightrows_stay_inactive_in_builtin_diagonal_families() {
let weights = Array1::from_vec(vec![0.0, 1.0]);
let gaussian = GaussianLocationScaleFamily {
y: Array1::from_vec(vec![2.0, -1.0]),
weights: weights.clone(),
mu_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let gaussian_eval = gaussian
.evaluate(&[
ParameterBlockState {
beta: Array1::zeros(0),
eta: Array1::from_vec(vec![0.5, -0.25]),
},
ParameterBlockState {
beta: Array1::zeros(0),
eta: Array1::from_vec(vec![0.1, -0.2]),
},
])
.expect("gaussian evaluate");
match &gaussian_eval.blockworking_sets[GaussianLocationScaleFamily::BLOCK_MU] {
BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
assert_eq!(working_weights[0], 0.0);
assert_eq!(working_response[0], 0.5);
assert!(working_weights[1] > 0.0);
}
BlockWorkingSet::ExactNewton { .. } => panic!("expected diagonal Gaussian mu block"),
}
match &gaussian_eval.blockworking_sets[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA] {
BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
assert_eq!(working_weights[0], 0.0);
assert_eq!(working_response[0], 0.1);
assert!(working_weights[1] > 0.0);
}
BlockWorkingSet::ExactNewton { .. } => {
panic!("expected diagonal Gaussian log-sigma block")
}
}
let poisson = PoissonLogFamily {
y: Array1::from_vec(vec![3.0, 1.0]),
weights: weights.clone(),
};
let poisson_eval = poisson
.evaluate(&[ParameterBlockState {
beta: Array1::zeros(0),
eta: Array1::from_vec(vec![0.7, -0.4]),
}])
.expect("poisson evaluate");
match &poisson_eval.blockworking_sets[PoissonLogFamily::BLOCK_ETA] {
BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
assert_eq!(working_weights[0], 0.0);
assert_eq!(working_response[0], 0.7);
assert!(working_weights[1] > 0.0);
}
BlockWorkingSet::ExactNewton { .. } => panic!("expected diagonal Poisson block"),
}
let gamma = GammaLogFamily {
y: Array1::from_vec(vec![1.5, 0.8]),
weights,
shape: 2.5,
};
let gamma_eval = gamma
.evaluate(&[ParameterBlockState {
beta: Array1::zeros(0),
eta: Array1::from_vec(vec![0.2, -0.1]),
}])
.expect("gamma evaluate");
match &gamma_eval.blockworking_sets[GammaLogFamily::BLOCK_ETA] {
BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
assert_eq!(working_weights[0], 0.0);
assert_eq!(working_response[0], 0.2);
assert!(working_weights[1] > 0.0);
}
BlockWorkingSet::ExactNewton { .. } => panic!("expected diagonal Gamma block"),
}
}
#[test]
fn hard_clamped_poisson_and_gammarows_stay_locally_flat() {
let poisson = PoissonLogFamily {
y: Array1::from_vec(vec![1.0, 2.0, 3.0]),
weights: Array1::from_vec(vec![1.0, 1.0, 1.0]),
};
let poisson_eta = Array1::from_vec(vec![-35.0, 0.2, 35.0]);
let poisson_eval = poisson
.evaluate(&[ParameterBlockState {
beta: Array1::zeros(0),
eta: poisson_eta.clone(),
}])
.expect("poisson evaluate");
match &poisson_eval.blockworking_sets[PoissonLogFamily::BLOCK_ETA] {
BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
assert_eq!(working_weights[0], 0.0);
assert_eq!(working_response[0], poisson_eta[0]);
assert!(working_weights[1] > 0.0);
assert_eq!(working_weights[2], 0.0);
assert_eq!(working_response[2], poisson_eta[2]);
}
BlockWorkingSet::ExactNewton { .. } => panic!("expected diagonal Poisson block"),
}
let gamma = GammaLogFamily {
y: Array1::from_vec(vec![0.8, 1.2, 2.5]),
weights: Array1::from_vec(vec![1.0, 1.0, 1.0]),
shape: 3.0,
};
let gamma_eta = Array1::from_vec(vec![-40.0, -0.3, 40.0]);
let gamma_eval = gamma
.evaluate(&[ParameterBlockState {
beta: Array1::zeros(0),
eta: gamma_eta.clone(),
}])
.expect("gamma evaluate");
match &gamma_eval.blockworking_sets[GammaLogFamily::BLOCK_ETA] {
BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
assert_eq!(working_weights[0], 0.0);
assert_eq!(working_response[0], gamma_eta[0]);
assert!(working_weights[1] > 0.0);
assert_eq!(working_weights[2], 0.0);
assert_eq!(working_response[2], gamma_eta[2]);
}
BlockWorkingSet::ExactNewton { .. } => panic!("expected diagonal Gamma block"),
}
}
#[test]
fn poisson_log_canonical_diagonal_weight_is_fisher_and_observed() {
let family = PoissonLogFamily {
y: array![0.0, 3.0],
weights: array![1.5, 0.5],
};
let eta = array![-0.4_f64, 0.7_f64];
let eval = family
.evaluate(&[ParameterBlockState {
beta: Array1::zeros(0),
eta: eta.clone(),
}])
.expect("poisson evaluate");
match &eval.blockworking_sets[PoissonLogFamily::BLOCK_ETA] {
BlockWorkingSet::Diagonal {
working_response: _,
working_weights,
} => {
for i in 0..eta.len() {
let fisher_weight = family.weights[i] * eta[i].exp();
assert!(
(working_weights[i] - fisher_weight).abs() < 1e-12,
"canonical Poisson-log observed and Fisher weights should coincide at row {i}: got {}, expected {}",
working_weights[i],
fisher_weight
);
}
}
BlockWorkingSet::ExactNewton { .. } => panic!("expected diagonal Poisson block"),
}
}
#[test]
fn gamma_log_noncanonical_diagonal_uses_observed_not_fisher_weight_and_dw() {
let family = GammaLogFamily {
y: array![2.0, 0.25],
weights: array![1.25, 0.75],
shape: 3.0,
};
let eta = array![0.0_f64, -0.5_f64];
let states = vec![ParameterBlockState {
beta: Array1::zeros(0),
eta: eta.clone(),
}];
let eval = family.evaluate(&states).expect("gamma evaluate");
match &eval.blockworking_sets[GammaLogFamily::BLOCK_ETA] {
BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
for i in 0..eta.len() {
let mu = eta[i].exp();
let fisher_weight = family.weights[i] * family.shape;
let observed_weight = fisher_weight * family.y[i] / mu;
assert!(
(working_weights[i] - observed_weight).abs() < 1e-12,
"Gamma-log row {i} should use observed weight: got {}, expected {}",
working_weights[i],
observed_weight
);
assert!(
(working_weights[i] - fisher_weight).abs() > 1e-6,
"fixture should distinguish observed from Fisher at row {i}: observed {}, fisher {}",
working_weights[i],
fisher_weight
);
let score = fisher_weight * (family.y[i] / mu - 1.0);
let expected_response = eta[i] + score / observed_weight;
assert!(
(working_response[i] - expected_response).abs() < 1e-12,
"Gamma-log row {i} working response should be consistent with observed Newton weight: got {}, expected {}",
working_response[i],
expected_response
);
}
}
BlockWorkingSet::ExactNewton { .. } => panic!("expected diagonal Gamma block"),
}
let d_eta = array![0.5_f64, -2.0_f64];
let dw = family
.diagonalworking_weights_directional_derivative(
&states,
GammaLogFamily::BLOCK_ETA,
&d_eta,
)
.expect("gamma dW")
.expect("gamma dW present");
for i in 0..eta.len() {
let observed_weight = family.weights[i] * family.shape * family.y[i] / eta[i].exp();
let expected_dw = -observed_weight * d_eta[i];
assert!(
(dw[i] - expected_dw).abs() < 1e-12,
"Gamma-log row {i} dW should differentiate observed weights: got {}, expected {}",
dw[i],
expected_dw
);
}
}
#[test]
fn gaussian_log_sigmaweight_directional_derivative_iszero_on_active_floor_branch() {
let family = GaussianLocationScaleFamily {
y: Array1::from_vec(vec![0.3]),
weights: Array1::from_vec(vec![1.0]),
mu_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let states = vec![
ParameterBlockState {
beta: Array1::zeros(0),
eta: Array1::from_vec(vec![0.0]),
},
ParameterBlockState {
beta: Array1::zeros(0),
eta: Array1::from_vec(vec![35.0]),
},
];
let d_eta = Array1::from_vec(vec![1.0]);
let dw = family
.diagonalworking_weights_directional_derivative(
&states,
GaussianLocationScaleFamily::BLOCK_LOG_SIGMA,
&d_eta,
)
.expect("gaussian directional derivative")
.expect("gaussian log-sigma derivative");
assert_eq!(dw[0], 0.0);
}
#[test]
fn gaussian_log_sigmaweight_directional_derivative_matches_finite_difference() {
let family = GaussianLocationScaleFamily {
y: Array1::from_vec(vec![1.2]),
weights: Array1::from_vec(vec![1.0]),
mu_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let etamu = Array1::from_vec(vec![0.1]);
let eta_ls = Array1::from_vec(vec![0.4]);
let states = vec![
ParameterBlockState {
beta: Array1::zeros(0),
eta: etamu.clone(),
},
ParameterBlockState {
beta: Array1::zeros(0),
eta: eta_ls.clone(),
},
];
let d_eta = Array1::from_vec(vec![1.0]);
let dw = family
.diagonalworking_weights_directional_derivative(
&states,
GaussianLocationScaleFamily::BLOCK_LOG_SIGMA,
&d_eta,
)
.expect("gaussian directional derivative")
.expect("gaussian log-sigma derivative");
let eps = 1e-6;
let mut states_plus = states.clone();
states_plus[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA].eta[0] += eps;
let eval_plus = family.evaluate(&states_plus).expect("gaussian eval plus");
let w_plus =
match &eval_plus.blockworking_sets[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA] {
BlockWorkingSet::Diagonal {
working_response: _,
working_weights,
} => working_weights[0],
BlockWorkingSet::ExactNewton { .. } => {
panic!("expected diagonal Gaussian log-sigma block")
}
};
let mut states_minus = states;
states_minus[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA].eta[0] -= eps;
let eval_minus = family.evaluate(&states_minus).expect("gaussian eval minus");
let w_minus =
match &eval_minus.blockworking_sets[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA] {
BlockWorkingSet::Diagonal {
working_response: _,
working_weights,
} => working_weights[0],
BlockWorkingSet::ExactNewton { .. } => {
panic!("expected diagonal Gaussian log-sigma block")
}
};
let fd = (w_plus - w_minus) / (2.0 * eps);
assert!((dw[0] - fd).abs() < 1e-6, "dw={} fd={}", dw[0], fd);
}
#[test]
fn gaussian_sigma_helper_matches_exact_exp_link() {
let eta0 = 701.0_f64;
let eta = array![eta0];
let (sigma, d1, d2, d3, d4) = exp_sigma_derivs_up_to_fourth_array(eta.view());
let coded_sigma = safe_exp(eta0);
assert!(
(sigma[0] - coded_sigma).abs() < 1e-30,
"Gaussian sigma helper should evaluate the exact exp sigma link at eta={eta0}; got {} vs {}",
sigma[0],
coded_sigma
);
assert!(
(d1[0] - sigma[0]).abs() / sigma[0] < 1e-12,
"Gaussian sigma helper first derivative should equal exp(eta) at eta={eta0}; got {} vs {}",
d1[0],
sigma[0]
);
assert!(
(d2[0] - sigma[0]).abs() / sigma[0] < 1e-12,
"Gaussian sigma helper second derivative should equal exp(eta) at eta={eta0}; got {} vs {}",
d2[0],
sigma[0]
);
assert!(
(d3[0] - sigma[0]).abs() / sigma[0] < 1e-12,
"Gaussian sigma helper third derivative should equal exp(eta) at eta={eta0}; got {} vs {}",
d3[0],
sigma[0]
);
assert!(
(d4[0] - sigma[0]).abs() / sigma[0] < 1e-12,
"Gaussian sigma helper fourth derivative should equal exp(eta) at eta={eta0}; got {} vs {}",
d4[0],
sigma[0]
);
}
#[test]
fn gaussian_log_sigma_design_keeps_shared_mean_basis() {
let shared = array![[1.0, -1.5], [1.0, -0.25], [1.0, 0.75], [1.0, 2.0],];
let shared_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(shared.clone()));
let prepared = prepared_gaussian_log_sigma_design(&shared_design, &shared_design)
.expect("gaussian log-sigma design should accept shared columns");
let prepared_dense = prepared.as_dense_cow();
for i in 0..shared.nrows() {
for j in 0..shared.ncols() {
assert!(
(prepared_dense[[i, j]] - shared[[i, j]]).abs() < 1e-12,
"gaussian log-sigma design should preserve shared basis at ({i}, {j}): got {}, expected {}",
prepared_dense[[i, j]],
shared[[i, j]]
);
}
}
}
#[test]
fn gaussian_diagonal_log_sigma_block_uses_fisher_score_step_in_far_tail() {
let family = GaussianLocationScaleFamily {
y: array![0.0],
weights: array![1.0],
mu_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let eta_mu = array![0.0];
let eta_ls0 = 701.0_f64;
let states_at = |eta_ls: f64| {
vec![
ParameterBlockState {
beta: Array1::zeros(0),
eta: eta_mu.clone(),
},
ParameterBlockState {
beta: Array1::zeros(0),
eta: array![eta_ls],
},
]
};
let eval = family.evaluate(&states_at(eta_ls0)).expect("evaluate");
match &eval.blockworking_sets[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA] {
BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
let sigma = logb_sigma_from_eta_scalar(eta_ls0);
let inv_s2 = (sigma * sigma).recip();
let dlog = logb_dlog_sigma_deta(sigma, logb_sigma_jet1_scalar(eta_ls0).d1);
let residual = family.y[0] - eta_mu[0];
let expected_score =
family.weights[0] * (residual * residual * inv_s2 - 1.0) * dlog;
let expected_info = 2.0 * family.weights[0] * dlog * dlog;
let expected_response = eta_ls0 + expected_score / expected_info;
assert!((working_weights[0] - expected_info).abs() < 1e-12);
assert!(
(working_response[0] - expected_response).abs() < 1e-12,
"working response mismatch: got {}, expected {}",
working_response[0],
expected_response
);
}
BlockWorkingSet::ExactNewton { .. } => {
panic!("expected diagonal Gaussian log-sigma block")
}
}
let loglik = |eta_ls: f64| family.log_likelihood_only(&states_at(eta_ls)).expect("ll");
let h = 1e-4;
let ll_plus = loglik(eta_ls0 + h);
let ll0 = loglik(eta_ls0);
let ll_minus = loglik(eta_ls0 - h);
let score_fd = (ll_plus - ll_minus) / (2.0 * h);
assert!(score_fd.is_finite());
assert!(
(score_fd + 1.0).abs() < 1e-6,
"far-tail score should be -1, got {score_fd}"
);
assert!(
(ll_plus - 2.0 * ll0 + ll_minus).abs() < 1e-5,
"far-tail Gaussian log-sigma block should have near-zero observed curvature"
);
}
#[test]
fn gaussian_exact_joint_path_stays_finite_in_exp_link_far_tail() {
let mu_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]]));
let log_sigma_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(array![[1.0]]));
let family = GaussianLocationScaleFamily {
y: array![0.0],
weights: array![1.0],
mu_design: Some(mu_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let beta_mu = array![0.0];
let beta_ls = array![710.0];
let states = vec![
ParameterBlockState {
beta: beta_mu.clone(),
eta: mu_design.matrixvectormultiply(&beta_mu),
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: log_sigma_design.matrixvectormultiply(&beta_ls),
},
];
let hessian = family
.exact_newton_joint_hessian(&states)
.expect("joint hessian")
.expect("expected Gaussian exact joint hessian");
assert!(
hessian.iter().all(|value| value.is_finite()),
"far-tail Gaussian exact Hessian should stay finite; got {hessian:?}"
);
let direction = array![0.25, -0.5];
let dh = family
.exact_newton_joint_hessian_directional_derivative(&states, &direction)
.expect("joint dH")
.expect("expected Gaussian exact joint hessian directional derivative");
assert!(
dh.iter().all(|value| value.is_finite()),
"far-tail Gaussian exact Hessian directional derivative should stay finite; got {dh:?}"
);
}
#[test]
fn gaussian_location_scale_hotloop_optimized_matches_legacy_and_is_faster_locally() {
let n = 4096usize;
let y = Array1::from_shape_fn(n, |i| ((i as f64) * 0.003).sin() + 0.1);
let mu = Array1::from_shape_fn(n, |i| ((i as f64) * 0.001).cos() - 0.2);
let eta_ls = Array1::from_shape_fn(n, |i| ((i as f64) * 0.002).sin() * 0.8 - 0.1);
let weights = Array1::from_shape_fn(n, |i| if i % 37 == 0 { 0.0 } else { 1.0 });
let ln2pi = (2.0 * std::f64::consts::PI).ln();
let legacy_eval = || {
let mut ll = 0.0;
let mut zmu = Array1::<f64>::zeros(n);
let mut wmu = Array1::<f64>::zeros(n);
let mut zls = Array1::<f64>::zeros(n);
let mut wls = Array1::<f64>::zeros(n);
for i in 0..n {
let w = weights[i];
let eta = eta_ls[i];
let SigmaJet1 { sigma, d1 } = logb_sigma_jet1_scalar(eta);
let inv_s2 = (sigma * sigma).recip();
let r = y[i] - mu[i];
ll += w * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma.ln()));
if w == 0.0 {
wmu[i] = 0.0;
zmu[i] = mu[i];
} else {
wmu[i] = floor_positiveweight(w * inv_s2, MIN_WEIGHT);
zmu[i] = mu[i] + r;
}
let dlogsigma_du = logb_dlog_sigma_deta(sigma, d1);
let info_u =
floor_positiveweight(2.0 * w * dlogsigma_du * dlogsigma_du, MIN_WEIGHT);
if info_u == 0.0 {
wls[i] = 0.0;
zls[i] = eta;
} else {
wls[i] = info_u;
let score_ls = w * (r * r * inv_s2 - 1.0) * dlogsigma_du;
zls[i] = eta + score_ls / info_u;
}
}
(ll, zmu, wmu, zls, wls)
};
let optimized_eval = || {
let mut ll = 0.0;
let mut zmu = Array1::<f64>::zeros(n);
let mut wmu = Array1::<f64>::zeros(n);
let mut zls = Array1::<f64>::zeros(n);
let mut wls = Array1::<f64>::zeros(n);
for i in 0..n {
let eta = eta_ls[i];
let SigmaJet1 { sigma, d1 } = logb_sigma_jet1_scalar(eta);
let inv_s2 = (sigma * sigma).recip();
let w = weights[i];
let r = y[i] - mu[i];
ll += w * (-0.5 * (r * r * inv_s2 + ln2pi + 2.0 * sigma.ln()));
if w == 0.0 {
wmu[i] = 0.0;
zmu[i] = mu[i];
} else {
wmu[i] = floor_positiveweight(w * inv_s2, MIN_WEIGHT);
zmu[i] = mu[i] + r;
}
let dlogsigma_du = logb_dlog_sigma_deta(sigma, d1);
let info_u =
floor_positiveweight(2.0 * w * dlogsigma_du * dlogsigma_du, MIN_WEIGHT);
if info_u == 0.0 {
wls[i] = 0.0;
zls[i] = eta;
} else {
wls[i] = info_u;
let score_ls = w * (r * r * inv_s2 - 1.0) * dlogsigma_du;
zls[i] = eta + score_ls / info_u;
}
}
(ll, zmu, wmu, zls, wls)
};
let (ll_legacy, zmu_legacy, wmu_legacy, zls_legacy, wls_legacy) = legacy_eval();
let (ll_opt, zmu_opt, wmu_opt, zls_opt, wls_opt) = optimized_eval();
assert!((ll_legacy - ll_opt).abs() < 1e-10);
assert!((&zmu_legacy - &zmu_opt).iter().all(|v| v.abs() < 1e-12));
assert!((&wmu_legacy - &wmu_opt).iter().all(|v| v.abs() < 1e-12));
assert!((&zls_legacy - &zls_opt).iter().all(|v| v.abs() < 1e-12));
assert!((&wls_legacy - &wls_opt).iter().all(|v| v.abs() < 1e-12));
}
fn simple_matern_term_collection(
feature_cols: &[usize],
length_scale: f64,
) -> TermCollectionSpec {
TermCollectionSpec {
linear_terms: Vec::new(),
random_effect_terms: Vec::new(),
smooth_terms: vec![SmoothTermSpec {
name: "spatial".to_string(),
basis: SmoothBasisSpec::Matern {
feature_cols: feature_cols.to_vec(),
spec: MaternBasisSpec {
periodic: None,
center_strategy: CenterStrategy::EqualMass { num_centers: 6 },
length_scale,
nu: MaternNu::ThreeHalves,
include_intercept: false,
double_penalty: false,
identifiability: MaternIdentifiability::CenterSumToZero,
aniso_log_scales: None,
nullspace_shrinkage_survived: None,
},
input_scales: None,
},
shape: ShapeConstraint::None,
joint_null_rotation: None,
}],
}
}
fn empty_term_collection() -> TermCollectionSpec {
TermCollectionSpec {
linear_terms: Vec::new(),
random_effect_terms: Vec::new(),
smooth_terms: Vec::new(),
}
}
fn spatial_kappa_options() -> SpatialLengthScaleOptimizationOptions {
SpatialLengthScaleOptimizationOptions {
enabled: true,
max_outer_iter: 4,
rel_tol: 1e-4,
log_step: std::f64::consts::LN_2,
min_length_scale: 0.1,
max_length_scale: 2.0,
pilot_subsample_threshold: 10_000,
}
}
fn spatial_fit_smoke_options() -> BlockwiseFitOptions {
BlockwiseFitOptions {
inner_max_cycles: 48,
inner_tol: 1e-4,
outer_max_iter: 3,
outer_tol: 1e-4,
..BlockwiseFitOptions::default()
}
}
#[test]
fn binomial_location_scale_exact_probit_tailobjects_stay_finite() {
let n = 6usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 0.0, 1.0]);
let weights = Array1::from_elem(n, 1.0);
let threshold_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_elem((n, 1), 1.0),
));
let log_sigma_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_elem((n, 1), 1.0),
));
let family = BinomialLocationScaleFamily {
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
policy: crate::resource::ResourcePolicy::default_library(),
};
let beta_t = array![250.0];
let beta_ls = array![0.0];
let states = vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: threshold_design.matrixvectormultiply(&beta_t),
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: log_sigma_design.matrixvectormultiply(&beta_ls),
},
];
let eval = family
.evaluate(&states)
.expect("evaluate tail-stable family");
assert!(eval.log_likelihood.is_finite());
let joint = family
.exact_newton_joint_hessian(&states)
.expect("joint hessian")
.expect("expected exact joint hessian");
assert!(joint.iter().all(|v| v.is_finite()));
let direction = array![0.1, -0.2];
let d_h = family
.exact_newton_joint_hessian_directional_derivative(&states, &direction)
.expect("joint dH")
.expect("expected exact joint dH");
assert!(d_h.iter().all(|v| v.is_finite()));
let d2_h = family
.exact_newton_joint_hessiansecond_directional_derivative(
&states, &direction, &direction,
)
.expect("joint d2H")
.expect("expected exact joint d2H");
assert!(d2_h.iter().all(|v| v.is_finite()));
}
#[test]
fn binomial_location_scale_many_smoothing_params_keeps_second_order_outer() {
fn spec_with_penalties(name: &str, n: usize, p: usize, k: usize) -> ParameterBlockSpec {
ParameterBlockSpec {
name: name.to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_elem((n, p), 1.0),
)),
offset: Array1::zeros(n),
penalties: (0..k)
.map(|_| PenaltyMatrix::Dense(identity_penalty(p)))
.collect(),
nullspace_dims: vec![0; k],
initial_log_lambdas: Array1::zeros(k),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}
}
let n = 8usize;
let family = BinomialLocationScaleFamily {
y: Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0]),
weights: Array1::from_elem(n, 1.0),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
};
let specs = vec![
spec_with_penalties("threshold", n, 3, 2),
spec_with_penalties("log_sigma", n, 6, 11),
];
assert_eq!(
family.exact_outer_derivative_order(&specs, &BlockwiseFitOptions::default()),
crate::custom_family::ExactOuterDerivativeOrder::Second
);
let (_gradient, hessian) = crate::custom_family::custom_family_outer_derivatives(
&family,
&specs,
&BlockwiseFitOptions::default(),
);
assert_eq!(
hessian,
crate::solver::outer_strategy::DeclaredHessianForm::Either
);
}
#[test]
fn binomial_location_scale_term_builder_requires_exact_spatial_joint_path() {
let n = 8usize;
let builder = BinomialLocationScaleTermBuilder {
y: Array1::from_elem(n, 0.0),
weights: Array1::from_elem(n, 1.0),
link_kind: InverseLink::Standard(StandardLink::Probit),
meanspec: simple_matern_term_collection(&[0, 1], 0.4),
noisespec: simple_matern_term_collection(&[0, 1], 0.75),
mean_offset: Array1::zeros(n),
noise_offset: Array1::zeros(n),
};
assert!(builder.exact_spatial_joint_supported());
assert!(builder.require_exact_spatial_joint());
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.0 * std::f64::consts::PI * t).sin();
}
let mean_design =
build_term_collection_design(data.view(), builder.meanspec()).expect("mean design");
let noise_design =
build_term_collection_design(data.view(), builder.noisespec()).expect("noise design");
let family = builder.build_family(&mean_design, &noise_design);
assert!(family.exact_joint_supported());
}
#[test]
fn binomial_location_scalewiggle_term_builder_requires_exact_spatial_joint_path() {
let n = 8usize;
let q_seed = Array1::linspace(-1.25, 1.25, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
3,
2,
false,
)
.expect("wiggle block");
let builder = BinomialLocationScaleWiggleTermBuilder {
y: Array1::from_elem(n, 0.0),
weights: Array1::from_elem(n, 1.0),
link_kind: InverseLink::Standard(StandardLink::Probit),
meanspec: simple_matern_term_collection(&[0, 1], 0.4),
noisespec: simple_matern_term_collection(&[0, 1], 0.75),
mean_offset: Array1::zeros(n),
noise_offset: Array1::zeros(n),
wiggle_knots: knots,
wiggle_degree: 2,
wiggle_block,
};
assert!(builder.exact_spatial_joint_supported());
assert!(builder.require_exact_spatial_joint());
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.0 * std::f64::consts::PI * t).sin();
}
let mean_design =
build_term_collection_design(data.view(), builder.meanspec()).expect("mean design");
let noise_design =
build_term_collection_design(data.view(), builder.noisespec()).expect("noise design");
let family = builder.build_family(&mean_design, &noise_design);
assert!(family.exact_joint_supported());
assert!(family.requires_joint_outer_hyper_path());
}
#[test]
fn binomial_location_scale_builder_populateswarm_start_betas() {
let n = 12usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.0 * std::f64::consts::PI * t).sin();
}
let y = Array1::from_iter((0..n).map(|i| if i % 3 == 0 || i % 5 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let builder = BinomialLocationScaleTermBuilder {
mean_offset: Array1::zeros(y.len()),
noise_offset: Array1::zeros(y.len()),
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
meanspec: simple_matern_term_collection(&[0, 1], 0.45),
noisespec: simple_matern_term_collection(&[0, 1], 0.8),
};
let mean_design =
build_term_collection_design(data.view(), builder.meanspec()).expect("mean design");
let noise_design =
build_term_collection_design(data.view(), builder.noisespec()).expect("noise design");
let rho = compose_theta_from_hints_test(
builder.mean_penalty_count(&mean_design),
builder.noise_penalty_count(&noise_design),
&None,
&None,
&Array1::zeros(0),
);
let blocks = builder
.build_blocks(&rho, &mean_design, &noise_design, None, None)
.expect("build blocks");
assert_eq!(blocks.len(), 2);
assert!(blocks[0].initial_beta.is_some());
assert!(blocks[1].initial_beta.is_some());
}
#[test]
fn binomial_location_scalewiggle_builder_populateswarm_start_betas() {
let n = 12usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.0 * std::f64::consts::PI * t).cos();
}
let y = Array1::from_iter((0..n).map(|i| if i % 4 == 0 || i % 5 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let q_seed = Array1::linspace(-1.25, 1.25, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
3,
2,
false,
)
.expect("wiggle block");
let builder = BinomialLocationScaleWiggleTermBuilder {
mean_offset: Array1::zeros(y.len()),
noise_offset: Array1::zeros(y.len()),
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
meanspec: simple_matern_term_collection(&[0, 1], 0.45),
noisespec: simple_matern_term_collection(&[0, 1], 0.8),
wiggle_knots: knots,
wiggle_degree: 2,
wiggle_block,
};
let mean_design =
build_term_collection_design(data.view(), builder.meanspec()).expect("mean design");
let noise_design =
build_term_collection_design(data.view(), builder.noisespec()).expect("noise design");
let rho = compose_theta_from_hints_test(
builder.mean_penalty_count(&mean_design),
builder.noise_penalty_count(&noise_design),
&None,
&None,
&builder.extra_rho0().expect("extra rho0"),
);
let blocks = builder
.build_blocks(&rho, &mean_design, &noise_design, None, None)
.expect("build blocks");
assert_eq!(blocks.len(), 3);
assert!(blocks[0].initial_beta.is_some());
assert!(blocks[1].initial_beta.is_some());
}
#[test]
fn binomial_location_scale_exact_newton_spatial_joint_hyper_returns_fullhessian() {
let n = 12usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.0 * std::f64::consts::PI * t).cos();
}
let y = Array1::from_iter((0..n).map(|i| if i % 3 == 0 || i % 5 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let meanspec = simple_matern_term_collection(&[0, 1], 0.45);
let noisespec = simple_matern_term_collection(&[0, 1], 0.8);
let builder = BinomialLocationScaleTermBuilder {
mean_offset: Array1::zeros(y.len()),
noise_offset: Array1::zeros(y.len()),
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
meanspec: meanspec.clone(),
noisespec: noisespec.clone(),
};
let mean_design =
build_term_collection_design(data.view(), &meanspec).expect("build mean design");
let noise_design =
build_term_collection_design(data.view(), &noisespec).expect("build noise design");
let meanspec_resolved =
freeze_term_collection_from_design(&meanspec, &mean_design).expect("freeze mean spec");
let noisespec_resolved = freeze_term_collection_from_design(&noisespec, &noise_design)
.expect("freeze noise spec");
let rho = compose_theta_from_hints_test(
builder.mean_penalty_count(&mean_design),
builder.noise_penalty_count(&noise_design),
&None,
&None,
&Array1::zeros(0),
);
let blocks = builder
.build_blocks(&rho, &mean_design, &noise_design, None, None)
.expect("build blocks");
let family = builder.build_family(&mean_design, &noise_design);
let derivative_blocks = builder
.build_psiderivative_blocks(
data.view(),
&meanspec_resolved,
&noisespec_resolved,
&mean_design,
&noise_design,
)
.expect("psi derivative blocks");
let eval = evaluate_custom_family_joint_hyper(
&family,
&blocks,
&BlockwiseFitOptions {
use_remlobjective: true,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
},
&rho,
&derivative_blocks,
None,
crate::solver::estimate::reml::unified::EvalMode::ValueGradientHessian,
)
.expect("exact spatial joint hyper eval");
assert!(eval.objective.is_finite());
assert!(eval.gradient.iter().all(|v| v.is_finite()));
let hess = eval
.outer_hessian
.materialize_dense()
.expect("exact spatial joint hyper path should materialize a full [rho, psi] hessian")
.expect("exact spatial joint hyper path should return a full [rho, psi] hessian");
let psi_dim = derivative_blocks.iter().map(Vec::len).sum::<usize>();
let theta_dim = rho.len() + psi_dim;
assert_eq!(eval.gradient.len(), theta_dim);
assert_eq!(hess.nrows(), theta_dim);
assert_eq!(hess.ncols(), theta_dim);
}
#[test]
fn binomial_location_scalewiggle_exact_newton_spatial_joint_hyper_returns_fullhessian() {
let n = 14usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.25 * std::f64::consts::PI * t).sin();
}
let y = Array1::from_iter((0..n).map(|i| if i % 3 == 0 || i % 5 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let meanspec = simple_matern_term_collection(&[0, 1], 0.45);
let noisespec = simple_matern_term_collection(&[0, 1], 0.8);
let q_seed = Array1::linspace(-1.5, 1.5, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
4,
2,
false,
)
.expect("wiggle block");
let builder = BinomialLocationScaleWiggleTermBuilder {
mean_offset: Array1::zeros(y.len()),
noise_offset: Array1::zeros(y.len()),
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
meanspec: meanspec.clone(),
noisespec: noisespec.clone(),
wiggle_knots: knots,
wiggle_degree: 2,
wiggle_block,
};
let mean_design =
build_term_collection_design(data.view(), &meanspec).expect("build mean design");
let noise_design =
build_term_collection_design(data.view(), &noisespec).expect("build noise design");
let meanspec_resolved =
freeze_term_collection_from_design(&meanspec, &mean_design).expect("freeze mean spec");
let noisespec_resolved = freeze_term_collection_from_design(&noisespec, &noise_design)
.expect("freeze noise spec");
let rho = compose_theta_from_hints_test(
builder.mean_penalty_count(&mean_design),
builder.noise_penalty_count(&noise_design),
&None,
&None,
&builder.extra_rho0().expect("wiggle rho0"),
);
let blocks = builder
.build_blocks(&rho, &mean_design, &noise_design, None, None)
.expect("build blocks");
let family = builder.build_family(&mean_design, &noise_design);
let derivative_blocks = builder
.build_psiderivative_blocks(
data.view(),
&meanspec_resolved,
&noisespec_resolved,
&mean_design,
&noise_design,
)
.expect("psi derivative blocks");
let eval = evaluate_custom_family_joint_hyper(
&family,
&blocks,
&BlockwiseFitOptions {
use_remlobjective: true,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
},
&rho,
&derivative_blocks,
None,
crate::solver::estimate::reml::unified::EvalMode::ValueGradientHessian,
)
.expect("exact wiggle spatial joint hyper eval");
assert!(eval.objective.is_finite());
assert!(eval.gradient.iter().all(|v| v.is_finite()));
let hess = eval
.outer_hessian
.materialize_dense()
.expect("exact wiggle spatial joint hyper path should materialize a full [rho, psi] hessian")
.expect("exact wiggle spatial joint hyper path should return a full [rho, psi] hessian");
let psi_dim = derivative_blocks.iter().map(Vec::len).sum::<usize>();
let theta_dim = rho.len() + psi_dim;
assert_eq!(eval.gradient.len(), theta_dim);
assert_eq!(hess.nrows(), theta_dim);
assert_eq!(hess.ncols(), theta_dim);
}
#[test]
fn gaussian_location_scale_exact_newton_spatial_joint_hyper_returns_fullhessian() {
let n = 12usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.0 * std::f64::consts::PI * t).sin();
}
let y = Array1::from_iter((0..n).map(|i| {
let x0 = data[[i, 0]];
let x1 = data[[i, 1]];
0.4 * x0 - 0.2 * x1 + 0.15
}));
let weights = Array1::from_elem(n, 1.0);
let meanspec = simple_matern_term_collection(&[0, 1], 0.45);
let noisespec = simple_matern_term_collection(&[0, 1], 0.8);
let builder = GaussianLocationScaleTermBuilder {
y,
weights,
meanspec: meanspec.clone(),
noisespec: noisespec.clone(),
mean_offset: Array1::zeros(n),
noise_offset: Array1::zeros(n),
};
let mean_design =
build_term_collection_design(data.view(), &meanspec).expect("build mean design");
let noise_design =
build_term_collection_design(data.view(), &noisespec).expect("build noise design");
let meanspec_resolved =
freeze_term_collection_from_design(&meanspec, &mean_design).expect("freeze mean spec");
let noisespec_resolved = freeze_term_collection_from_design(&noisespec, &noise_design)
.expect("freeze noise spec");
let rho = compose_theta_from_hints_test(
builder.mean_penalty_count(&mean_design),
builder.noise_penalty_count(&noise_design),
&None,
&None,
&Array1::zeros(0),
);
let blocks = builder
.build_blocks(&rho, &mean_design, &noise_design, None, None)
.expect("build blocks");
let family = builder.build_family(&mean_design, &noise_design);
let derivative_blocks = builder
.build_psiderivative_blocks(
data.view(),
&meanspec_resolved,
&noisespec_resolved,
&mean_design,
&noise_design,
)
.expect("psi derivative blocks");
let eval = evaluate_custom_family_joint_hyper(
&family,
&blocks,
&BlockwiseFitOptions {
use_remlobjective: true,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
},
&rho,
&derivative_blocks,
None,
crate::solver::estimate::reml::unified::EvalMode::ValueGradientHessian,
)
.expect("exact spatial joint hyper eval");
assert!(eval.objective.is_finite());
assert!(eval.gradient.iter().all(|v| v.is_finite()));
let hess = eval
.outer_hessian
.materialize_dense()
.expect("exact spatial joint hyper path should materialize a full [rho, psi] hessian")
.expect("exact spatial joint hyper path should return a full [rho, psi] hessian");
let psi_dim = derivative_blocks.iter().map(Vec::len).sum::<usize>();
let theta_dim = rho.len() + psi_dim;
assert_eq!(eval.gradient.len(), theta_dim);
assert_eq!(hess.nrows(), theta_dim);
assert_eq!(hess.ncols(), theta_dim);
assert!(hess.iter().all(|v| v.is_finite()));
}
fn assert_joint_psi_hook_surface<F: CustomFamily>(
family: &F,
block_states: &[ParameterBlockState],
blocks: &[ParameterBlockSpec],
derivative_blocks: &[Vec<CustomFamilyBlockPsiDerivative>],
slope: f64,
intercept: f64,
label: &str,
) {
let psi_terms = family
.exact_newton_joint_psi_terms(block_states, blocks, derivative_blocks, 0)
.expect("joint psi terms call")
.unwrap_or_else(|| panic!("{label} family should return joint psi terms"));
let psi2_terms = family
.exact_newton_joint_psisecond_order_terms(block_states, blocks, derivative_blocks, 0, 0)
.expect("joint psi second-order call")
.unwrap_or_else(|| panic!("{label} family should return joint psi second-order terms"));
let total = block_states
.iter()
.map(|state| state.beta.len())
.sum::<usize>();
assert_eq!(psi_terms.score_psi.len(), total);
if psi_terms.hessian_psi_operator.is_some() {
assert_eq!(psi_terms.hessian_psi.dim(), (0, 0));
} else {
assert_eq!(psi_terms.hessian_psi.dim(), (total, total));
}
assert_eq!(psi2_terms.score_psi_psi.len(), total);
if psi2_terms.hessian_psi_psi_operator.is_some() {
assert_eq!(psi2_terms.hessian_psi_psi.dim(), (0, 0));
} else {
assert_eq!(psi2_terms.hessian_psi_psi.dim(), (total, total));
}
let mut d_beta_flat = Array1::<f64>::zeros(total);
let mut at = 0usize;
for state in block_states {
let end = at + state.beta.len();
d_beta_flat
.slice_mut(s![at..end])
.assign(&state.beta.mapv(|v| slope * v + intercept));
at = end;
}
let mixed = family
.exact_newton_joint_psihessian_directional_derivative(
block_states,
blocks,
derivative_blocks,
0,
&d_beta_flat,
)
.expect("joint psi mixed drift call")
.unwrap_or_else(|| panic!("{label} family should return joint psi mixed drift"));
assert_eq!(mixed.dim(), (total, total));
}
#[test]
fn binomial_location_scalewiggle_family_exposes_joint_psi_hook_surface() {
let n = 12usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (1.75 * std::f64::consts::PI * t).cos();
}
let y = Array1::from_iter((0..n).map(|i| if i % 4 == 0 || i % 5 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let meanspec = simple_matern_term_collection(&[0, 1], 0.4);
let noisespec = simple_matern_term_collection(&[0, 1], 0.7);
let q_seed = Array1::linspace(-1.25, 1.25, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
3,
2,
false,
)
.expect("wiggle block");
let builder = BinomialLocationScaleWiggleTermBuilder {
mean_offset: Array1::zeros(y.len()),
noise_offset: Array1::zeros(y.len()),
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
meanspec: meanspec.clone(),
noisespec: noisespec.clone(),
wiggle_knots: knots,
wiggle_degree: 2,
wiggle_block,
};
let mean_design =
build_term_collection_design(data.view(), &meanspec).expect("build mean design");
let noise_design =
build_term_collection_design(data.view(), &noisespec).expect("build noise design");
let meanspec_resolved =
freeze_term_collection_from_design(&meanspec, &mean_design).expect("freeze mean spec");
let noisespec_resolved = freeze_term_collection_from_design(&noisespec, &noise_design)
.expect("freeze noise spec");
let rho = compose_theta_from_hints_test(
builder.mean_penalty_count(&mean_design),
builder.noise_penalty_count(&noise_design),
&None,
&None,
&builder.extra_rho0().expect("wiggle rho0"),
);
let blocks = builder
.build_blocks(&rho, &mean_design, &noise_design, None, None)
.expect("build blocks");
let family = builder.build_family(&mean_design, &noise_design);
let mut block_states = Vec::<ParameterBlockState>::with_capacity(blocks.len());
for (block_idx, spec) in blocks.iter().enumerate() {
let mut beta = spec
.initial_beta
.clone()
.unwrap_or_else(|| Array1::zeros(spec.design.ncols()));
if block_idx == BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE {
beta.fill(0.04);
}
let (design, offset) = family
.block_geometry(&block_states, spec)
.expect("hook fixture block geometry");
let eta = design.matrixvectormultiply(&beta) + &offset;
block_states.push(ParameterBlockState { beta, eta });
}
family
.evaluate(&block_states)
.expect("hook fixture state should evaluate");
let derivative_blocks = builder
.build_psiderivative_blocks(
data.view(),
&meanspec_resolved,
&noisespec_resolved,
&mean_design,
&noise_design,
)
.expect("psi derivative blocks");
assert_joint_psi_hook_surface(
&family,
&block_states,
&blocks,
&derivative_blocks,
0.25,
0.1,
"wiggle",
);
}
#[test]
fn gaussian_location_scale_family_exposes_joint_psi_hook_surface() {
let n = 10usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.0 * std::f64::consts::PI * t).cos();
}
let y = Array1::from_iter((0..n).map(|i| {
let x0 = data[[i, 0]];
let x1 = data[[i, 1]];
0.3 * x0 - 0.15 * x1 + 0.2
}));
let weights = Array1::from_elem(n, 1.0);
let meanspec = simple_matern_term_collection(&[0, 1], 0.4);
let noisespec = simple_matern_term_collection(&[0, 1], 0.7);
let builder = GaussianLocationScaleTermBuilder {
y,
weights,
meanspec: meanspec.clone(),
noisespec: noisespec.clone(),
mean_offset: Array1::zeros(n),
noise_offset: Array1::zeros(n),
};
let mean_design =
build_term_collection_design(data.view(), &meanspec).expect("build mean design");
let noise_design =
build_term_collection_design(data.view(), &noisespec).expect("build noise design");
let meanspec_resolved =
freeze_term_collection_from_design(&meanspec, &mean_design).expect("freeze mean spec");
let noisespec_resolved = freeze_term_collection_from_design(&noisespec, &noise_design)
.expect("freeze noise spec");
let rho = compose_theta_from_hints_test(
builder.mean_penalty_count(&mean_design),
builder.noise_penalty_count(&noise_design),
&None,
&None,
&Array1::zeros(0),
);
let blocks = builder
.build_blocks(&rho, &mean_design, &noise_design, None, None)
.expect("build blocks");
let family = builder.build_family(&mean_design, &noise_design);
let fit = fit_custom_family(
&family,
&blocks,
&BlockwiseFitOptions {
use_remlobjective: true,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
},
)
.expect("fit gaussian family for joint psi hooks");
let derivative_blocks = builder
.build_psiderivative_blocks(
data.view(),
&meanspec_resolved,
&noisespec_resolved,
&mean_design,
&noise_design,
)
.expect("psi derivative blocks");
assert_joint_psi_hook_surface(
&family,
&fit.block_states,
&blocks,
&derivative_blocks,
0.2,
0.15,
"gaussian",
);
}
#[test]
fn gaussian_location_scale_terms_reject_invalidweights_early() {
let n = 8usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
data[[i, 0]] = i as f64;
data[[i, 1]] = (i as f64).sin();
}
let spec = GaussianLocationScaleTermSpec {
y: Array1::zeros(n),
weights: Array1::from_vec(vec![1.0, 1.0, -0.5, 1.0, 1.0, 1.0, 1.0, 1.0]),
meanspec: simple_matern_term_collection(&[0, 1], 0.35),
log_sigmaspec: simple_matern_term_collection(&[0, 1], 0.6),
mean_offset: Array1::zeros(n),
log_sigma_offset: Array1::zeros(n),
};
let err = match fit_gaussian_location_scale_terms(
data.view(),
spec,
&BlockwiseFitOptions::default(),
&spatial_kappa_options(),
) {
Ok(_) => panic!("term API should reject negative weights"),
Err(err) => err,
};
assert!(err.contains("weights must be finite and non-negative"));
}
#[test]
fn binomial_location_scale_terms_reject_invalid_response_early() {
let n = 8usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
data[[i, 0]] = i as f64;
data[[i, 1]] = (i as f64).cos();
}
let spec = BinomialLocationScaleTermSpec {
y: Array1::from_vec(vec![0.0, 1.0, 0.0, 2.0, 1.0, 0.0, 1.0, 0.0]),
weights: Array1::from_elem(n, 1.0),
link_kind: InverseLink::Standard(StandardLink::Probit),
thresholdspec: simple_matern_term_collection(&[0, 1], 0.4),
log_sigmaspec: simple_matern_term_collection(&[0, 1], 0.75),
threshold_offset: Array1::zeros(n),
log_sigma_offset: Array1::zeros(n),
};
let err = match fit_binomial_location_scale_terms(
data.view(),
spec,
&BlockwiseFitOptions::default(),
&spatial_kappa_options(),
) {
Ok(_) => panic!("term API should reject invalid binomial responses"),
Err(err) => err,
};
assert!(err.contains("binomial response must be finite in [0,1]"));
}
#[test]
fn binomial_location_scale_terms_reject_free_log_sigma_terms_early() {
let n = 8usize;
let data = Array2::<f64>::zeros((n, 2));
let spec = BinomialLocationScaleTermSpec {
y: Array1::from_iter((0..n).map(|i| if i % 2 == 0 { 0.0 } else { 1.0 })),
weights: Array1::from_elem(n, 1.0),
link_kind: InverseLink::Standard(StandardLink::Logit),
thresholdspec: simple_matern_term_collection(&[0, 1], 0.4),
log_sigmaspec: simple_matern_term_collection(&[0, 1], 0.75),
threshold_offset: Array1::zeros(n),
log_sigma_offset: Array1::zeros(n),
};
let err = match fit_binomial_location_scale_terms(
data.view(),
spec,
&BlockwiseFitOptions::default(),
&spatial_kappa_options(),
) {
Ok(_) => panic!("Bernoulli free log_sigma terms must be rejected"),
Err(err) => err,
};
assert!(err.contains("identify only the composite q = -threshold / sigma"));
assert!(err.contains("log_sigma must be intercept-only/fixed"));
}
#[test]
fn binomial_location_scale_terms_reject_datarow_mismatch_early() {
let n = 8usize;
let data = Array2::<f64>::zeros((n - 1, 2));
let spec = BinomialLocationScaleTermSpec {
y: Array1::from_elem(n, 0.0),
weights: Array1::from_elem(n, 1.0),
link_kind: InverseLink::Standard(StandardLink::Probit),
thresholdspec: simple_matern_term_collection(&[0, 1], 0.4),
log_sigmaspec: simple_matern_term_collection(&[0, 1], 0.75),
threshold_offset: Array1::zeros(n),
log_sigma_offset: Array1::zeros(n),
};
let err = match fit_binomial_location_scale_terms(
data.view(),
spec,
&BlockwiseFitOptions::default(),
&spatial_kappa_options(),
) {
Ok(_) => panic!("term API should reject data/y row mismatches"),
Err(err) => err,
};
assert!(err.contains("data row count must match response length"));
}
#[test]
fn gaussian_location_scale_termswith_matern_spatial_blocks_fit_finitely() {
let n = 32usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.0 * std::f64::consts::PI * t).sin();
}
let y = Array1::from_iter((0..n).map(|i| {
let x0 = data[[i, 0]];
let x1 = data[[i, 1]];
0.5 * x0 - 0.25 * x1 + 0.1
}));
let weights = Array1::from_elem(n, 1.0);
let spec = GaussianLocationScaleTermSpec {
y,
weights,
meanspec: simple_matern_term_collection(&[0, 1], 0.35),
log_sigmaspec: simple_matern_term_collection(&[0, 1], 0.6),
mean_offset: Array1::zeros(n),
log_sigma_offset: Array1::zeros(n),
};
let fit = fit_gaussian_location_scale_terms(
data.view(),
spec,
&spatial_fit_smoke_options(),
&spatial_kappa_options(),
)
.expect("gaussian location-scale spatial fit");
assert!(fit.fit.penalized_objective.is_finite());
assert_eq!(fit.fit.block_states.len(), 2);
}
#[test]
fn gaussian_location_scale_smooth_noise_homoscedastic_recovers_mean() {
let n = 300usize;
let mut lcg: u64 = 0x2545_F491_4F6C_DD1D;
let mut next_unit = || {
lcg = lcg
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let bits = (lcg >> 11) as f64 / ((1u64 << 53) as f64);
bits.clamp(1.0e-6, 1.0 - 1.0e-6)
};
let mut data = Array2::<f64>::zeros((n, 1));
let mut xs = Vec::with_capacity(n);
for i in 0..n {
let x = -3.0 + 6.0 * next_unit();
data[[i, 0]] = x;
xs.push(x);
}
let true_mean: Vec<f64> = xs.iter().map(|&x| 1.0 + 0.7 * x + x.sin()).collect();
let true_sigma = (-0.5_f64).exp();
let y = Array1::from_iter((0..n).map(|i| {
let z = standard_normal_quantile(next_unit()).expect("finite probit draw");
true_mean[i] + true_sigma * z
}));
let weights = Array1::from_elem(n, 1.0);
let spec = GaussianLocationScaleTermSpec {
y,
weights,
meanspec: simple_matern_term_collection(&[0], 0.6),
log_sigmaspec: simple_matern_term_collection(&[0], 0.6),
mean_offset: Array1::zeros(n),
log_sigma_offset: Array1::zeros(n),
};
let fit = fit_gaussian_location_scale_terms(
data.view(),
spec,
&spatial_fit_smoke_options(),
&spatial_kappa_options(),
)
.expect("gaussian location-scale smooth-noise homoscedastic fit");
let mean_eta = &fit.fit.block_states[GaussianLocationScaleFamily::BLOCK_MU].eta;
assert_eq!(mean_eta.len(), n);
let mut sq_err = 0.0;
for i in 0..n {
let d = mean_eta[i] - true_mean[i];
sq_err += d * d;
}
let mean_rmse = (sq_err / n as f64).sqrt();
assert!(
mean_rmse < 0.5,
"smooth noise_formula degraded the homoscedastic mean fit (issue #365): \
mean RMSE = {mean_rmse:.4} (expected < 0.5; the regression produced ~1.5)"
);
}
#[test]
fn binomial_location_scale_termswith_matern_spatial_blocks_fit_finitely() {
let n = 36usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (3.0 * std::f64::consts::PI * t).cos();
}
let y = Array1::from_iter((0..n).map(|i| if i % 5 == 0 || i % 7 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let spec = BinomialLocationScaleTermSpec {
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
thresholdspec: simple_matern_term_collection(&[0, 1], 0.4),
log_sigmaspec: empty_term_collection(),
threshold_offset: Array1::zeros(n),
log_sigma_offset: Array1::zeros(n),
};
let fit = fit_binomial_location_scale_terms(
data.view(),
spec,
&spatial_fit_smoke_options(),
&spatial_kappa_options(),
)
.expect("binomial location-scale spatial fit");
assert!(fit.fit.penalized_objective.is_finite());
assert_eq!(fit.fit.block_states.len(), 2);
}
#[test]
fn binomial_location_scalewiggle_termswith_matern_spatial_blocks_fit_finitely() {
let n = 30usize;
let mut data = Array2::<f64>::zeros((n, 2));
for i in 0..n {
let t = i as f64 / (n as f64 - 1.0);
data[[i, 0]] = t;
data[[i, 1]] = (2.5 * std::f64::consts::PI * t).sin();
}
let y = Array1::from_iter((0..n).map(|i| if i % 4 == 0 || i % 9 == 0 { 1.0 } else { 0.0 }));
let weights = Array1::from_elem(n, 1.0);
let q_seed = Array1::linspace(-1.5, 1.5, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
4,
2,
false,
)
.expect("wiggle block");
let spec = BinomialLocationScaleWiggleTermSpec {
y,
weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
thresholdspec: simple_matern_term_collection(&[0, 1], 0.45),
log_sigmaspec: empty_term_collection(),
threshold_offset: Array1::zeros(n),
log_sigma_offset: Array1::zeros(n),
wiggle_knots: knots,
wiggle_degree: 2,
wiggle_block,
};
let fit = fit_binomial_location_scalewiggle_terms(
data.view(),
spec,
&spatial_fit_smoke_options(),
&spatial_kappa_options(),
)
.expect("binomial location-scale wiggle spatial fit");
assert!(fit.fit.penalized_objective.is_finite());
assert_eq!(fit.fit.block_states.len(), 3);
}
#[test]
fn wiggle_family_evaluate_returns_exact_newton_blocks() {
let n = 6usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_block = intercept_block(n);
let log_sigma_block = intercept_block(n);
let q_seed = Array1::linspace(-1.5, 1.5, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
3,
2,
false,
)
.expect("wiggle block");
let threshold_design = threshold_block.design.clone();
let log_sigma_design = log_sigma_block.design.clone();
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design),
log_sigma_design: Some(log_sigma_design),
wiggle_knots: knots,
wiggle_degree: 2,
policy: crate::resource::ResourcePolicy::default_library(),
};
let eta_t = Array1::from_vec(vec![0.4; n]);
let eta_ls = Array1::from_vec(vec![-0.2; n]);
let core_for_q0 =
binomial_location_scale_core(&y, &weights, &eta_t, &eta_ls, None, &family.link_kind)
.expect("core q0");
let betaw = Array1::from_vec(vec![0.05; wiggle_block.design.ncols()]);
let etaw = family
.wiggle_design(core_for_q0.q0.view())
.expect("wiggle design")
.dot(&betaw);
let eval = family
.evaluate(&[
ParameterBlockState {
beta: Array1::from_vec(vec![0.4]),
eta: eta_t,
},
ParameterBlockState {
beta: Array1::from_vec(vec![-0.2]),
eta: eta_ls,
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw,
},
])
.expect("evaluate");
assert_eq!(eval.blockworking_sets.len(), 3);
match &eval.blockworking_sets[0] {
BlockWorkingSet::ExactNewton { gradient, hessian } => {
let hessian = hessian.to_dense();
assert_eq!(gradient.len(), 1);
assert_eq!(hessian.dim(), (1, 1));
assert!(gradient[0].is_finite());
assert!(hessian[[0, 0]].is_finite());
}
BlockWorkingSet::Diagonal { .. } => panic!("threshold block should be exact newton"),
}
match &eval.blockworking_sets[1] {
BlockWorkingSet::ExactNewton { gradient, hessian } => {
let hessian = hessian.to_dense();
assert_eq!(gradient.len(), 1);
assert_eq!(hessian.dim(), (1, 1));
assert!(gradient[0].is_finite());
assert!(hessian[[0, 0]].is_finite());
}
BlockWorkingSet::Diagonal { .. } => panic!("log-sigma block should be exact newton"),
}
match &eval.blockworking_sets[2] {
BlockWorkingSet::ExactNewton { gradient, hessian } => {
let hessian = hessian.to_dense();
assert_eq!(gradient.len(), betaw.len());
assert_eq!(hessian.nrows(), betaw.len());
assert_eq!(hessian.ncols(), betaw.len());
assert!(gradient.iter().all(|v| v.is_finite()));
assert!(hessian.iter().all(|v| v.is_finite()));
}
BlockWorkingSet::Diagonal { .. } => panic!("wiggle block should be exact newton"),
}
}
#[test]
fn wiggle_family_exact_newton_directional_derivative_matches_finite_difference() {
let n = 7usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_block = intercept_block(n);
let log_sigma_block = intercept_block(n);
let q_seed = Array1::linspace(-1.4, 1.4, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
3,
4,
2,
false,
)
.expect("wiggle block");
let threshold_design = threshold_block.design.clone();
let log_sigma_design = log_sigma_block.design.clone();
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 3,
policy: crate::resource::ResourcePolicy::default_library(),
};
let beta_t = Array1::from_vec(vec![0.25]);
let beta_ls = Array1::from_vec(vec![-0.15]);
let eta_t = threshold_design.matrixvectormultiply(&beta_t);
let eta_ls = log_sigma_design.matrixvectormultiply(&beta_ls);
let core_for_q0 =
binomial_location_scale_core(&y, &weights, &eta_t, &eta_ls, None, &family.link_kind)
.expect("core q0");
let betaw = Array1::from_vec(vec![0.04; wiggle_block.design.ncols()]);
let etaw = family
.wiggle_design(core_for_q0.q0.view())
.expect("wiggle design")
.dot(&betaw);
let states = vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: eta_t.clone(),
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: eta_ls.clone(),
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw.clone(),
},
];
let extract = |eval: FamilyEvaluation, idx: usize| -> Array2<f64> {
match &eval.blockworking_sets[idx] {
BlockWorkingSet::ExactNewton {
gradient: _,
hessian,
} => hessian.to_dense(),
BlockWorkingSet::Diagonal { .. } => panic!("expected exact newton"),
}
};
let base_eval = family.evaluate(&states).expect("base eval");
let eps = 1e-6;
for block_idx in 0..3 {
let d_beta = Array1::ones(states[block_idx].beta.len());
let analytic = family
.exact_newton_hessian_directional_derivative(&states, block_idx, &d_beta)
.expect("analytic dH")
.expect("expected derivative");
let mut plus_states = states.clone();
plus_states[block_idx].beta = &plus_states[block_idx].beta + &(eps * &d_beta);
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T].eta = threshold_design
.matrixvectormultiply(
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T].beta,
);
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA].eta = log_sigma_design
.matrixvectormultiply(
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA].beta,
);
let plus_core_q0 = binomial_location_scale_core(
&y,
&weights,
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T].eta,
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA].eta,
None,
&family.link_kind,
)
.expect("plus core q0");
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE].eta = family
.wiggle_design(plus_core_q0.q0.view())
.expect("plus wiggle design")
.dot(&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE].beta);
let h_plus = extract(family.evaluate(&plus_states).expect("plus eval"), block_idx);
let h_base = extract(base_eval.clone(), block_idx);
let fd = (h_plus - h_base) / eps;
crate::test_support::assert_matrix_derivativefd(
&fd,
&analytic,
5e-4,
&format!("block {} dH", block_idx),
);
}
}
#[test]
fn wiggle_threshold_block_exacthessian_matches_autodiffobjective() {
let n = 7usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_block = intercept_block(n);
let log_sigma_block = intercept_block(n);
let q_seed = Array1::linspace(-1.4, 1.4, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
3,
4,
2,
false,
)
.expect("wiggle block");
let threshold_design = threshold_block.design.clone();
let log_sigma_design = log_sigma_block.design.clone();
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Logit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots.clone(),
wiggle_degree: 3,
policy: crate::resource::ResourcePolicy::default_library(),
};
let beta_t0 = 0.25;
let beta_ls0 = -0.15;
let beta_t = array![beta_t0];
let beta_ls = array![beta_ls0];
let eta_t = threshold_design.matrixvectormultiply(&beta_t);
let eta_ls = log_sigma_design.matrixvectormultiply(&beta_ls);
let core_for_q0 =
binomial_location_scale_core(&y, &weights, &eta_t, &eta_ls, None, &family.link_kind)
.expect("core q0");
let betaw = Array1::from_vec(vec![0.04; wiggle_block.design.ncols()]);
let etaw = family
.wiggle_design(core_for_q0.q0.view())
.expect("wiggle design")
.dot(&betaw);
let states = vec![
ParameterBlockState {
beta: beta_t,
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls,
eta: eta_ls,
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw,
},
];
let eval = family.evaluate(&states).expect("evaluate wiggle family");
let blockhessian = match &eval.blockworking_sets[BinomialLocationScaleWiggleFamily::BLOCK_T]
{
BlockWorkingSet::ExactNewton { hessian, .. } => hessian.to_dense(),
BlockWorkingSet::Diagonal { .. } => panic!("expected exact newton threshold block"),
};
let (_, _, hess_ad) = second_derivative(
|bt| wiggle_negloglik_threshold_numdual(bt, beta_ls0, &betaw, &y, &weights, &knots, 3),
beta_t0,
);
assert!(
(blockhessian[[0, 0]] - hess_ad).abs() <= 5e-6,
"wiggle threshold exact hessian mismatch: evaluate()={} autodiff={}",
blockhessian[[0, 0]],
hess_ad
);
}
#[test]
fn gaussian_log_sigma_psi_terms_match_autodiff_scalar_objective() {
let y = array![0.25, -0.4, 1.1];
let weights = array![1.0, 0.7, 1.3];
let x_mu0 = array![1.0, -0.35, 0.6];
let x_ls0 = array![0.8, -0.25, 0.45];
let x_ls_psi = array![0.2, -0.15, 0.1];
let x_ls_psi_psi = array![0.05, -0.03, 0.04];
let beta_mu0 = 0.35_f64;
let beta_ls0 = -0.2_f64;
let x_mu0_mat = x_mu0.clone().insert_axis(Axis(1));
let x_ls0_mat = x_ls0.clone().insert_axis(Axis(1));
let family = GaussianLocationScaleFamily {
y: y.clone(),
weights: weights.clone(),
mu_design: Some(DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
x_mu0_mat.clone(),
))),
log_sigma_design: Some(DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
x_ls0_mat.clone(),
))),
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let specs = vec![
gaussian_psi_test_spec("mu", x_mu0_mat.clone()),
gaussian_psi_test_spec("log_sigma", x_ls0_mat.clone()),
];
let states = vec![
ParameterBlockState {
beta: array![beta_mu0],
eta: x_mu0_mat.column(0).to_owned() * beta_mu0,
},
ParameterBlockState {
beta: array![beta_ls0],
eta: x_ls0_mat.column(0).to_owned() * beta_ls0,
},
];
let derivative_blocks = vec![
Vec::new(),
vec![CustomFamilyBlockPsiDerivative {
penalty_index: None,
x_psi: x_ls_psi.clone().insert_axis(Axis(1)),
s_psi: Array2::zeros((1, 1)),
s_psi_components: None,
s_psi_penalty_components: None,
x_psi_psi: Some(vec![x_ls_psi_psi.clone().insert_axis(Axis(1))]),
s_psi_psi: Some(vec![Array2::zeros((1, 1))]),
s_psi_psi_components: None,
s_psi_psi_penalty_components: None,
implicit_operator: None,
implicit_axis: 0,
implicit_group_id: None,
}],
];
let psi_terms = family
.exact_newton_joint_psi_terms(&states, &specs, &derivative_blocks, 0)
.expect("joint psi terms")
.expect("expected gaussian psi terms");
let vars = [beta_mu0, beta_ls0, 0.0_f64];
let (_, dpsi, _) = second_derivative(
|psi| {
gaussian_negloglik_log_sigma_psi_only_numdual(
psi,
beta_mu0,
beta_ls0,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
0.0,
);
let (_, _, _, score_mu_psi) = second_partial_derivative(
|(beta_mu, psi)| {
gaussian_negloglik_log_sigma_mu_psi_numdual(
beta_mu,
psi,
beta_ls0,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
(beta_mu0, 0.0),
);
let (_, _, _, score_ls_psi) = second_partial_derivative(
|(beta_ls, psi)| {
gaussian_negloglik_log_sigma_ls_psi_numdual(
beta_ls,
psi,
beta_mu0,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
(beta_ls0, 0.0),
);
let (_, _, _, _, _, _, _, h_mu_mu_psi) = third_partial_derivative_vec(
|v| {
gaussian_negloglik_log_sigma_beta_vec_numdual(
v,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
&vars,
0,
0,
2,
);
let (_, _, _, _, _, _, _, h_mu_ls_psi) = third_partial_derivative_vec(
|v| {
gaussian_negloglik_log_sigma_beta_vec_numdual(
v,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
&vars,
0,
1,
2,
);
let (_, _, _, _, _, _, _, h_ls_ls_psi) = third_partial_derivative_vec(
|v| {
gaussian_negloglik_log_sigma_beta_vec_numdual(
v,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
&vars,
1,
1,
2,
);
assert!(
(psi_terms.objective_psi - dpsi).abs() <= 1e-10,
"Gaussian log-sigma psi objective derivative mismatch: analytic={} autodiff={}",
psi_terms.objective_psi,
dpsi
);
assert!(
(psi_terms.score_psi[0] - score_mu_psi).abs() <= 1e-10,
"Gaussian log-sigma psi score_mu mismatch: analytic={} autodiff={}",
psi_terms.score_psi[0],
score_mu_psi
);
assert!(
(psi_terms.score_psi[1] - score_ls_psi).abs() <= 1e-10,
"Gaussian log-sigma psi score_ls mismatch: analytic={} autodiff={}",
psi_terms.score_psi[1],
score_ls_psi
);
assert!(
(psi_terms.hessian_psi[[0, 0]] - h_mu_mu_psi).abs() <= 1e-9,
"Gaussian log-sigma psi hessian(mu,mu) mismatch: analytic={} autodiff={}",
psi_terms.hessian_psi[[0, 0]],
h_mu_mu_psi
);
let rows_gap =
gaussian_jointrow_scalars(&y, &(&x_mu0 * beta_mu0), &(&x_ls0 * beta_ls0), &weights)
.expect("gaussian row scalars for psi corrections");
let mu_ls_psi_correction: f64 = (0..y.len())
.map(|i| {
let m = rows_gap.m[i];
let k = rows_gap.kappa[i];
let kp = rows_gap.kappa_prime[i];
let p = 2.0 * k * k - kp;
let xm = x_mu0[i];
let xl = x_ls0[i];
let xp = x_ls_psi[i];
let z_ls_psi = xp * beta_ls0;
xm * (2.0 * m * p * z_ls_psi * xl - 2.0 * m * k * xp)
})
.sum();
assert!(
(psi_terms.hessian_psi[[0, 1]] - (h_mu_ls_psi + mu_ls_psi_correction)).abs() <= 1e-9,
"Gaussian log-sigma psi hessian(mu,ls) mismatch: analytic={} reference={} (ad={} + Fisher correction={})",
psi_terms.hessian_psi[[0, 1]],
h_mu_ls_psi + mu_ls_psi_correction,
h_mu_ls_psi,
mu_ls_psi_correction
);
let ls_ls_psi_correction: f64 = (0..y.len())
.map(|i| {
let a = rows_gap.obs_weight[i];
let n = rows_gap.n[i];
let k = rows_gap.kappa[i];
let kp = rows_gap.kappa_prime[i];
let kdp = rows_gap.kappa_dprime[i];
let p = 2.0 * k * k - kp;
let p1 = 4.0 * k * kp - kdp;
let delta = (a - n) * p;
let ddelta_deta = 2.0 * k * n * p + (a - n) * p1;
let x0 = x_ls0[i];
let xp = x_ls_psi[i];
let z_ls_psi = xp * beta_ls0; ddelta_deta * z_ls_psi * x0 * x0 + delta * 2.0 * x0 * xp
})
.sum();
assert!(
(psi_terms.hessian_psi[[1, 1]] - (h_ls_ls_psi + ls_ls_psi_correction)).abs() <= 1e-9,
"Gaussian log-sigma psi hessian(ls,ls) mismatch: analytic={} reference={} (ad={} + Fisher correction={})",
psi_terms.hessian_psi[[1, 1]],
h_ls_ls_psi + ls_ls_psi_correction,
h_ls_ls_psi,
ls_ls_psi_correction
);
}
#[test]
fn gaussian_log_sigma_psi_second_order_terms_match_autodiff_scalar_objective() {
let y = array![0.25, -0.4, 1.1];
let weights = array![1.0, 0.7, 1.3];
let x_mu0 = array![1.0, -0.35, 0.6];
let x_ls0 = array![0.8, -0.25, 0.45];
let x_ls_psi = array![0.2, -0.15, 0.1];
let x_ls_psi_psi = array![0.05, -0.03, 0.04];
let beta_mu0 = 0.35_f64;
let beta_ls0 = -0.2_f64;
let x_mu0_mat = x_mu0.clone().insert_axis(Axis(1));
let x_ls0_mat = x_ls0.clone().insert_axis(Axis(1));
let family = GaussianLocationScaleFamily {
y: y.clone(),
weights: weights.clone(),
mu_design: Some(DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
x_mu0_mat.clone(),
))),
log_sigma_design: Some(DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
x_ls0_mat.clone(),
))),
policy: crate::resource::ResourcePolicy::default_library(),
cached_row_scalars: std::sync::RwLock::new(None),
};
let specs = vec![
gaussian_psi_test_spec("mu", x_mu0_mat.clone()),
gaussian_psi_test_spec("log_sigma", x_ls0_mat.clone()),
];
let states = vec![
ParameterBlockState {
beta: array![beta_mu0],
eta: x_mu0_mat.column(0).to_owned() * beta_mu0,
},
ParameterBlockState {
beta: array![beta_ls0],
eta: x_ls0_mat.column(0).to_owned() * beta_ls0,
},
];
let derivative_blocks = vec![
Vec::new(),
vec![CustomFamilyBlockPsiDerivative {
penalty_index: None,
x_psi: x_ls_psi.clone().insert_axis(Axis(1)),
s_psi: Array2::zeros((1, 1)),
s_psi_components: None,
s_psi_penalty_components: None,
x_psi_psi: Some(vec![x_ls_psi_psi.clone().insert_axis(Axis(1))]),
s_psi_psi: Some(vec![Array2::zeros((1, 1))]),
s_psi_psi_components: None,
s_psi_psi_penalty_components: None,
implicit_operator: None,
implicit_axis: 0,
implicit_group_id: None,
}],
];
let psi2_terms = family
.exact_newton_joint_psisecond_order_terms(&states, &specs, &derivative_blocks, 0, 0)
.expect("joint psi psi terms")
.expect("expected gaussian psi psi terms");
let vars = [beta_mu0, beta_ls0, 0.0_f64];
let (_, _, d2psi) = second_derivative(
|psi| {
gaussian_negloglik_log_sigma_psi_only_numdual(
psi,
beta_mu0,
beta_ls0,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
0.0,
);
let (_, _, _, _, _, _, _, score_mu_psi_psi) = third_partial_derivative_vec(
|v| {
gaussian_negloglik_log_sigma_beta_vec_numdual(
v,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
&vars,
0,
2,
2,
);
let (_, _, _, _, _, _, _, score_ls_psi_psi) = third_partial_derivative_vec(
|v| {
gaussian_negloglik_log_sigma_beta_vec_numdual(
v,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_ls_psi,
&x_ls_psi_psi,
)
},
&vars,
1,
2,
2,
);
assert!(
(psi2_terms.objective_psi_psi - d2psi).abs() <= 1e-10,
"Gaussian log-sigma psi second objective mismatch: analytic={} autodiff={}",
psi2_terms.objective_psi_psi,
d2psi
);
assert!(
(psi2_terms.score_psi_psi[0] - score_mu_psi_psi).abs() <= 1e-9,
"Gaussian log-sigma psi second score_mu mismatch: analytic={} autodiff={}",
psi2_terms.score_psi_psi[0],
score_mu_psi_psi
);
assert!(
(psi2_terms.score_psi_psi[1] - score_ls_psi_psi).abs() <= 1e-9,
"Gaussian log-sigma psi second score_ls mismatch: analytic={} autodiff={}",
psi2_terms.score_psi_psi[1],
score_ls_psi_psi
);
}
fn gaussian_negloglik_log_sigma_psi_full_numdual<D: DualNum<f64> + Copy>(
beta_mu: D,
beta_ls: D,
psi: D,
y: &Array1<f64>,
weights: &Array1<f64>,
x_mu0: &Array1<f64>,
x_ls0: &Array1<f64>,
x_mu_psi: &Array1<f64>,
x_ls_psi: &Array1<f64>,
x_mu_psi_psi: &Array1<f64>,
x_ls_psi_psi: &Array1<f64>,
) -> D {
let half = D::from(0.5);
let mut out = D::zero();
for i in 0..y.len() {
let x_mu = D::from(x_mu0[i])
+ psi * D::from(x_mu_psi[i])
+ half * psi * psi * D::from(x_mu_psi_psi[i]);
let eta_mu = x_mu * beta_mu;
let x_ls = D::from(x_ls0[i])
+ psi * D::from(x_ls_psi[i])
+ half * psi * psi * D::from(x_ls_psi_psi[i]);
let eta_ls = x_ls * beta_ls;
let sigma = D::from(LOGB_SIGMA_FLOOR) + eta_ls.exp();
let resid = D::from(y[i]) - eta_mu;
out += D::from(weights[i]) * (half * (resid / sigma).powi(2) + sigma.ln());
}
out
}
fn gaussian_negloglik_logb_dense_numdual<D: DualNum<f64> + Copy>(
beta_mu: &[D],
beta_ls: &[D],
y: &Array1<f64>,
weights: &Array1<f64>,
xmu: &Array2<f64>,
x_ls: &Array2<f64>,
) -> D {
let half = D::from(0.5);
let n = y.len();
let mut out = D::zero();
for i in 0..n {
let mut eta_mu = D::zero();
for k in 0..beta_mu.len() {
eta_mu += D::from(xmu[[i, k]]) * beta_mu[k];
}
let mut eta_ls = D::zero();
for k in 0..beta_ls.len() {
eta_ls += D::from(x_ls[[i, k]]) * beta_ls[k];
}
let sigma = D::from(LOGB_SIGMA_FLOOR) + eta_ls.exp();
let resid = D::from(y[i]) - eta_mu;
out += D::from(weights[i]) * (half * (resid / sigma).powi(2) + sigma.ln());
}
out
}
fn gaussian_logb_design_test_data() -> (
Array1<f64>,
Array1<f64>,
Array2<f64>,
Array2<f64>,
Array1<f64>,
Array1<f64>,
) {
let y = array![0.25, -0.4, 1.1, 0.05, -0.2];
let weights = array![1.0, 0.7, 1.3, 0.9, 1.1];
let xmu = ndarray::arr2(&[[1.0, -0.6], [1.0, -0.2], [1.0, 0.1], [1.0, 0.4], [1.0, 0.7]]);
let x_ls = ndarray::arr2(&[[1.0, 0.5], [1.0, -0.1], [1.0, 0.3], [1.0, -0.4], [1.0, 0.2]]);
let beta_mu = array![0.35, -0.25];
let beta_ls = array![-0.4, 0.05];
(y, weights, xmu, x_ls, beta_mu, beta_ls)
}
#[test]
fn gaussian_joint_static_hessian_matches_autodiff() {
let (y, weights, xmu, x_ls, beta_mu, beta_ls) = gaussian_logb_design_test_data();
let etamu = xmu.dot(&beta_mu);
let eta_ls = x_ls.dot(&beta_ls);
let rows =
gaussian_jointrow_scalars(&y, &etamu, &eta_ls, &weights).expect("gaussian row scalars");
let weights0 = gaussian_joint_psi_firstweights(
&rows,
&Array1::zeros(y.len()),
&Array1::zeros(y.len()),
);
let xmu_dense = DenseOrOperator::Borrowed(&xmu);
let xls_dense = DenseOrOperator::Borrowed(&x_ls);
let analytic = gaussian_joint_hessian_from_designs(
&xmu_dense,
&xls_dense,
&weights0.hmumu,
&weights0.hmu_ls,
&weights0.h_ls_ls,
)
.expect("gaussian joint static hessian from designs");
let pmu = beta_mu.len();
let p_ls = beta_ls.len();
let total = pmu + p_ls;
let mut beta_full = vec![0.0_f64; total];
for k in 0..pmu {
beta_full[k] = beta_mu[k];
}
for k in 0..p_ls {
beta_full[pmu + k] = beta_ls[k];
}
let mut ad = Array2::<f64>::zeros((total, total));
for i in 0..total {
for j in i..total {
let val = if i == j {
let g = |x: num_dual::Dual2<f64, f64>| {
let mut bm = vec![num_dual::Dual2::from_re(0.0); pmu];
let mut bl = vec![num_dual::Dual2::from_re(0.0); p_ls];
for k in 0..pmu {
bm[k] = num_dual::Dual2::from_re(beta_full[k]);
}
for k in 0..p_ls {
bl[k] = num_dual::Dual2::from_re(beta_full[pmu + k]);
}
if i < pmu {
bm[i] = x;
} else {
bl[i - pmu] = x;
}
gaussian_negloglik_logb_dense_numdual(&bm, &bl, &y, &weights, &xmu, &x_ls)
};
let (_, _, d2) = second_derivative(g, beta_full[i]);
d2
} else {
let f =
|(a, b): (num_dual::HyperDual<f64, f64>, num_dual::HyperDual<f64, f64>)| {
let mut bm = vec![num_dual::HyperDual::from_re(0.0); pmu];
let mut bl = vec![num_dual::HyperDual::from_re(0.0); p_ls];
for k in 0..pmu {
bm[k] = num_dual::HyperDual::from_re(beta_full[k]);
}
for k in 0..p_ls {
bl[k] = num_dual::HyperDual::from_re(beta_full[pmu + k]);
}
if i < pmu {
bm[i] = a;
} else {
bl[i - pmu] = a;
}
if j < pmu {
bm[j] = b;
} else {
bl[j - pmu] = b;
}
gaussian_negloglik_logb_dense_numdual(
&bm, &bl, &y, &weights, &xmu, &x_ls,
)
};
let (_, _, _, d2xy) =
second_partial_derivative(f, (beta_full[i], beta_full[j]));
d2xy
};
ad[[i, j]] = val;
if i != j {
ad[[j, i]] = val;
}
}
}
let mut reference = ad.clone();
let fisher_minus_observed_ls_ls: Array1<f64> = Array1::from_shape_fn(y.len(), |i| {
let a = rows.obs_weight[i];
let n = rows.n[i];
let k = rows.kappa[i];
let kp = rows.kappa_prime[i];
let fisher = 2.0 * k * k * a;
let observed = 2.0 * k * k * n + kp * (a - n);
fisher - observed
});
let ls_correction = x_ls
.t()
.dot(&Array2::from_diag(&fisher_minus_observed_ls_ls).dot(&x_ls));
for a in 0..p_ls {
for b in 0..p_ls {
reference[[pmu + a, pmu + b]] += ls_correction[[a, b]];
}
}
let fisher_minus_observed_mu_ls: Array1<f64> = Array1::from_shape_fn(y.len(), |i| {
let m = rows.m[i];
let k = rows.kappa[i];
-2.0 * m * k
});
let mu_ls_correction = xmu
.t()
.dot(&Array2::from_diag(&fisher_minus_observed_mu_ls).dot(&x_ls));
for a in 0..pmu {
for b in 0..p_ls {
reference[[a, pmu + b]] += mu_ls_correction[[a, b]];
reference[[pmu + b, a]] += mu_ls_correction[[a, b]];
}
}
for i in 0..total {
for j in 0..total {
let diff = (analytic[[i, j]] - reference[[i, j]]).abs();
assert!(
diff <= 1e-10,
"Gaussian static joint H[{i},{j}] mismatch (κ < 1 case): analytic={} reference={} (ad={}) diff={}",
analytic[[i, j]],
reference[[i, j]],
ad[[i, j]],
diff
);
}
}
let skew = (&analytic - &analytic.t())
.mapv(f64::abs)
.fold(0.0_f64, |acc, &v| acc.max(v));
assert!(
skew <= 1e-12,
"Gaussian static joint Hessian skew exceeds noise floor: {skew}"
);
}
#[test]
fn gaussian_joint_first_directional_hessian_matches_autodiff() {
let (y, weights, xmu, x_ls, beta_mu, beta_ls) = gaussian_logb_design_test_data();
let etamu = xmu.dot(&beta_mu);
let eta_ls = x_ls.dot(&beta_ls);
let pmu = beta_mu.len();
let p_ls = beta_ls.len();
let total = pmu + p_ls;
let v: Array1<f64> = Array1::from_shape_fn(total, |k| 0.13 + 0.07 * (k as f64));
let v_mu = v.slice(s![0..pmu]).to_owned();
let v_ls = v.slice(s![pmu..total]).to_owned();
let ximu = xmu.dot(&v_mu);
let xi_ls = x_ls.dot(&v_ls);
let rows =
gaussian_jointrow_scalars(&y, &etamu, &eta_ls, &weights).expect("gaussian row scalars");
let (dhmumu, dhmu_ls, dh_ls_ls) =
gaussian_joint_first_directionalweights(&rows, &ximu, &xi_ls);
let xmu_dense = DenseOrOperator::Borrowed(&xmu);
let xls_dense = DenseOrOperator::Borrowed(&x_ls);
let analytic = gaussian_joint_hessian_from_designs(
&xmu_dense, &xls_dense, &dhmumu, &dhmu_ls, &dh_ls_ls,
)
.expect("gaussian joint first-directional H from designs");
let mut vars = vec![0.0_f64; total + 1];
for k in 0..pmu {
vars[k] = beta_mu[k];
}
for k in 0..p_ls {
vars[pmu + k] = beta_ls[k];
}
let g = |z: &[num_dual::HyperHyperDual<f64, f64>]| {
let mut bm = vec![num_dual::HyperHyperDual::from_re(0.0); pmu];
let mut bl = vec![num_dual::HyperHyperDual::from_re(0.0); p_ls];
let eps = z[total];
for k in 0..pmu {
bm[k] = z[k] + eps * num_dual::HyperHyperDual::from_re(v[k]);
}
for k in 0..p_ls {
bl[k] = z[pmu + k] + eps * num_dual::HyperHyperDual::from_re(v[pmu + k]);
}
gaussian_negloglik_logb_dense_numdual(&bm, &bl, &y, &weights, &xmu, &x_ls)
};
let mut ad = Array2::<f64>::zeros((total, total));
for i in 0..total {
for j in i..total {
let (_, _, _, _, _, _, _, d3) = third_partial_derivative_vec(g, &vars, i, j, total);
ad[[i, j]] = d3;
if i != j {
ad[[j, i]] = d3;
}
}
}
let mut reference = ad.clone();
let d_fisher_minus_observed: Array1<f64> = Array1::from_shape_fn(y.len(), |i| {
let a = rows.obs_weight[i];
let n = rows.n[i];
let m = rows.m[i];
let k = rows.kappa[i];
let kp = rows.kappa_prime[i];
let kdp = rows.kappa_dprime[i];
let p = 2.0 * k * k - kp;
let p1 = 4.0 * k * kp - kdp;
2.0 * m * p * ximu[i] + (2.0 * k * n * p + (a - n) * p1) * xi_ls[i]
});
let ls_correction = x_ls
.t()
.dot(&Array2::from_diag(&d_fisher_minus_observed).dot(&x_ls));
for a in 0..p_ls {
for b in 0..p_ls {
reference[[pmu + a, pmu + b]] += ls_correction[[a, b]];
}
}
for i in 0..total {
for j in 0..total {
let diff = (analytic[[i, j]] - reference[[i, j]]).abs();
assert!(
diff <= 1e-10,
"Gaussian dH (first-directional) [{i},{j}] mismatch: analytic={} reference={} (ad={}) diff={}",
analytic[[i, j]],
reference[[i, j]],
ad[[i, j]],
diff
);
}
}
let skew = (&analytic - &analytic.t())
.mapv(f64::abs)
.fold(0.0_f64, |acc, &v| acc.max(v));
assert!(
skew <= 1e-12,
"Gaussian first-directional dH skew exceeds noise floor: {skew}"
);
}
#[test]
fn gaussian_joint_second_directional_hessian_matches_autodiff() {
let (y, weights, xmu, x_ls, beta_mu, beta_ls) = gaussian_logb_design_test_data();
let etamu = xmu.dot(&beta_mu);
let eta_ls = x_ls.dot(&beta_ls);
let pmu = beta_mu.len();
let p_ls = beta_ls.len();
let total = pmu + p_ls;
let u: Array1<f64> = Array1::from_shape_fn(total, |k| 0.18 - 0.05 * (k as f64));
let v: Array1<f64> = Array1::from_shape_fn(total, |k| -0.11 + 0.09 * (k as f64));
let u_mu = u.slice(s![0..pmu]).to_owned();
let u_ls = u.slice(s![pmu..total]).to_owned();
let v_mu = v.slice(s![0..pmu]).to_owned();
let v_ls = v.slice(s![pmu..total]).to_owned();
let ximu_u = xmu.dot(&u_mu);
let xi_ls_u = x_ls.dot(&u_ls);
let ximuv = xmu.dot(&v_mu);
let xi_lsv = x_ls.dot(&v_ls);
let rows =
gaussian_jointrow_scalars(&y, &etamu, &eta_ls, &weights).expect("gaussian row scalars");
let (d2hmumu, d2hmu_ls, d2h_ls_ls) =
gaussian_jointsecond_directionalweights(&rows, &ximu_u, &xi_ls_u, &ximuv, &xi_lsv);
let xmu_dense = DenseOrOperator::Borrowed(&xmu);
let xls_dense = DenseOrOperator::Borrowed(&x_ls);
let analytic = gaussian_joint_hessian_from_designs(
&xmu_dense, &xls_dense, &d2hmumu, &d2hmu_ls, &d2h_ls_ls,
)
.expect("gaussian joint second-directional H from designs");
let mut vars_base = vec![0.0_f64; total + 1];
for k in 0..pmu {
vars_base[k] = beta_mu[k];
}
for k in 0..p_ls {
vars_base[pmu + k] = beta_ls[k];
}
let h = 1e-4;
let mut ad = Array2::<f64>::zeros((total, total));
for i in 0..total {
for j in i..total {
let g_plus = |z: &[num_dual::HyperHyperDual<f64, f64>]| {
let mut bm = vec![num_dual::HyperHyperDual::from_re(0.0); pmu];
let mut bl = vec![num_dual::HyperHyperDual::from_re(0.0); p_ls];
let eps_u = z[total];
for k in 0..pmu {
bm[k] = z[k]
+ eps_u * num_dual::HyperHyperDual::from_re(u[k])
+ num_dual::HyperHyperDual::from_re(h * v[k]);
}
for k in 0..p_ls {
bl[k] = z[pmu + k]
+ eps_u * num_dual::HyperHyperDual::from_re(u[pmu + k])
+ num_dual::HyperHyperDual::from_re(h * v[pmu + k]);
}
gaussian_negloglik_logb_dense_numdual(&bm, &bl, &y, &weights, &xmu, &x_ls)
};
let g_minus = |z: &[num_dual::HyperHyperDual<f64, f64>]| {
let mut bm = vec![num_dual::HyperHyperDual::from_re(0.0); pmu];
let mut bl = vec![num_dual::HyperHyperDual::from_re(0.0); p_ls];
let eps_u = z[total];
for k in 0..pmu {
bm[k] = z[k] + eps_u * num_dual::HyperHyperDual::from_re(u[k])
- num_dual::HyperHyperDual::from_re(h * v[k]);
}
for k in 0..p_ls {
bl[k] = z[pmu + k] + eps_u * num_dual::HyperHyperDual::from_re(u[pmu + k])
- num_dual::HyperHyperDual::from_re(h * v[pmu + k]);
}
gaussian_negloglik_logb_dense_numdual(&bm, &bl, &y, &weights, &xmu, &x_ls)
};
let (_, _, _, _, _, _, _, d3_plus) =
third_partial_derivative_vec(g_plus, &vars_base, i, j, total);
let (_, _, _, _, _, _, _, d3_minus) =
third_partial_derivative_vec(g_minus, &vars_base, i, j, total);
let val = (d3_plus - d3_minus) / (2.0 * h);
ad[[i, j]] = val;
if i != j {
ad[[j, i]] = val;
}
}
}
let mut reference = ad.clone();
let d2_fisher_minus_observed: Array1<f64> = Array1::from_shape_fn(y.len(), |i| {
let a = rows.obs_weight[i];
let n = rows.n[i];
let m = rows.m[i];
let w = rows.w[i];
let k = rows.kappa[i];
let kp = rows.kappa_prime[i];
let kdp = rows.kappa_dprime[i];
let g = a - n;
let p = 2.0 * k * k - kp;
let p1 = 4.0 * k * kp - kdp;
let p2 = 6.0 * kp * kp + kdp * (6.0 * k - 1.0);
let g1 = 2.0 * k * n;
let g2 = 2.0 * n * (kp - 2.0 * k * k);
let d_mumu = -2.0 * w * p;
let d_muls = 2.0 * m * (p1 - 2.0 * k * p);
let d_lsls = g2 * p + 2.0 * g1 * p1 + g * p2;
d_mumu * ximu_u[i] * ximuv[i]
+ d_muls * (ximu_u[i] * xi_lsv[i] + xi_ls_u[i] * ximuv[i])
+ d_lsls * xi_ls_u[i] * xi_lsv[i]
});
let ls_correction = x_ls
.t()
.dot(&Array2::from_diag(&d2_fisher_minus_observed).dot(&x_ls));
for a in 0..p_ls {
for b in 0..p_ls {
reference[[pmu + a, pmu + b]] += ls_correction[[a, b]];
}
}
let tol = 5e-6;
for i in 0..total {
for j in 0..total {
let diff = (analytic[[i, j]] - reference[[i, j]]).abs();
assert!(
diff <= tol,
"Gaussian d2H (second-directional) [{i},{j}] mismatch: analytic={} reference={} (ad={}) diff={}",
analytic[[i, j]],
reference[[i, j]],
ad[[i, j]],
diff
);
}
}
let skew = (&analytic - &analytic.t())
.mapv(f64::abs)
.fold(0.0_f64, |acc, &v| acc.max(v));
assert!(
skew <= 1e-10,
"Gaussian second-directional d2H skew exceeds noise floor: {skew}"
);
}
#[test]
fn gaussian_joint_psi_second_order_terms_match_autodiff() {
let y = array![0.25, -0.4, 1.1, 0.05, -0.2];
let weights = array![1.0, 0.7, 1.3, 0.9, 1.1];
let x_mu0 = array![1.0, -0.35, 0.6, 0.1, 0.45];
let x_ls0 = array![0.8, -0.25, 0.45, -0.1, 0.3];
let x_mu_psi = array![0.2, 0.15, -0.1, 0.05, 0.3];
let x_ls_psi = array![0.18, -0.12, 0.25, -0.2, 0.07];
let x_mu_psi_psi = array![0.04, -0.03, 0.05, 0.06, -0.02];
let x_ls_psi_psi = array![0.05, -0.03, 0.04, 0.07, -0.04];
let beta_mu0 = 0.35_f64;
let beta_ls0 = -0.4_f64;
let etamu = &x_mu0 * beta_mu0;
let eta_ls = &x_ls0 * beta_ls0;
let zmu_psi = &x_mu_psi * beta_mu0;
let z_ls_psi = &x_ls_psi * beta_ls0;
let zmu_psi_psi = &x_mu_psi_psi * beta_mu0;
let z_ls_psi_psi = &x_ls_psi_psi * beta_ls0;
let rows =
gaussian_jointrow_scalars(&y, &etamu, &eta_ls, &weights).expect("gaussian row scalars");
let secondweights = gaussian_joint_psisecondweights(
&rows,
&zmu_psi,
&z_ls_psi,
&zmu_psi,
&z_ls_psi,
&zmu_psi_psi,
&z_ls_psi_psi,
);
let analytic = secondweights.objective_psi_psirow.sum();
let (_, _, ad) = second_derivative(
|psi| {
gaussian_negloglik_log_sigma_psi_full_numdual(
num_dual::Dual2::from_re(beta_mu0),
num_dual::Dual2::from_re(beta_ls0),
psi,
&y,
&weights,
&x_mu0,
&x_ls0,
&x_mu_psi,
&x_ls_psi,
&x_mu_psi_psi,
&x_ls_psi_psi,
)
},
0.0,
);
let diff = (analytic - ad).abs();
assert!(
diff <= 1e-10,
"Gaussian joint ψ-ψ objective mismatch (κ < 1, μ and σ both ψ-dependent): analytic={} ad={} diff={}",
analytic,
ad,
diff
);
}
#[test]
fn wiggle_family_block_hessians_match_jointhessian_principal_blocks() {
let n = 7usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_block = intercept_block(n);
let log_sigma_block = intercept_block(n);
let q_seed = Array1::linspace(-1.4, 1.4, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
3,
4,
2,
false,
)
.expect("wiggle block");
let threshold_design = threshold_block.design.clone();
let log_sigma_design = log_sigma_block.design.clone();
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 3,
policy: crate::resource::ResourcePolicy::default_library(),
};
let beta_t = Array1::from_vec(vec![0.25]);
let beta_ls = Array1::from_vec(vec![-0.15]);
let eta_t = threshold_design.matrixvectormultiply(&beta_t);
let eta_ls = log_sigma_design.matrixvectormultiply(&beta_ls);
let core_for_q0 =
binomial_location_scale_core(&y, &weights, &eta_t, &eta_ls, None, &family.link_kind)
.expect("core q0");
let betaw = Array1::from_vec(vec![0.04; wiggle_block.design.ncols()]);
let etaw = family
.wiggle_design(core_for_q0.q0.view())
.expect("wiggle design")
.dot(&betaw);
let states = vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: eta_ls,
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw,
},
];
let eval = family.evaluate(&states).expect("evaluate wiggle family");
let joint = family
.exact_newton_joint_hessian(&states)
.expect("joint hessian")
.expect("expected joint exact hessian");
let beta_layout = GamlssBetaLayout::withwiggle(beta_t.len(), beta_ls.len(), betaw.len());
let ranges = [
(0usize, beta_layout.pt),
(beta_layout.pt, beta_layout.pt + beta_layout.pls),
(
beta_layout.pt + beta_layout.pls,
beta_layout.pt + beta_layout.pls + beta_layout.pw,
),
];
for (block_idx, (start, end)) in ranges.into_iter().enumerate() {
let blockhessian = match &eval.blockworking_sets[block_idx] {
BlockWorkingSet::ExactNewton { hessian, .. } => hessian.to_dense(),
BlockWorkingSet::Diagonal { .. } => panic!("expected exact newton block"),
};
let joint_block = joint.slice(s![start..end, start..end]).to_owned();
crate::test_support::assert_matrix_derivativefd(
&joint_block,
&blockhessian,
1e-10,
&format!("wiggle block {block_idx} principal block"),
);
}
}
fn wiggle_nontrivial_fixture() -> (
BinomialLocationScaleWiggleFamily,
DesignMatrix,
DesignMatrix,
ParameterBlockInput,
Array1<f64>,
Array1<f64>,
) {
let n = 9usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let t_grid = Array1::linspace(0.0, 1.0, n);
let threshold_x = Array2::from_shape_fn((n, 3), |(i, j)| match j {
0 => 1.0,
1 => t_grid[i] - 0.5,
2 => (2.0 * std::f64::consts::PI * t_grid[i]).sin(),
_ => unreachable!(),
});
let log_sigma_x = Array2::from_shape_fn((n, 2), |(i, j)| match j {
0 => 1.0,
1 => (3.0 * std::f64::consts::PI * t_grid[i]).cos(),
_ => unreachable!(),
});
let threshold_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(threshold_x.clone()));
let log_sigma_design =
DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(log_sigma_x.clone()));
let q_seed = Array1::linspace(-1.3, 1.1, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
3,
4,
2,
false,
)
.expect("wiggle block");
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 3,
policy: crate::resource::ResourcePolicy::default_library(),
};
(
family,
threshold_design,
log_sigma_design,
wiggle_block,
y,
weights,
)
}
fn rebuild_wiggle_nontrivial_states(
family: &BinomialLocationScaleWiggleFamily,
threshold_design: &DesignMatrix,
log_sigma_design: &DesignMatrix,
y: &Array1<f64>,
weights: &Array1<f64>,
beta_t: &Array1<f64>,
beta_ls: &Array1<f64>,
betaw: &Array1<f64>,
) -> Vec<ParameterBlockState> {
let eta_t = threshold_design.matrixvectormultiply(beta_t);
let eta_ls = log_sigma_design.matrixvectormultiply(beta_ls);
let core_q0 =
binomial_location_scale_core(y, weights, &eta_t, &eta_ls, None, &family.link_kind)
.expect("core q0");
let etaw = family
.wiggle_design(core_q0.q0.view())
.expect("wiggle design")
.dot(betaw);
vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: eta_ls,
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw,
},
]
}
fn extract_wiggle_gradient(eval: &FamilyEvaluation, block_idx: usize) -> Array1<f64> {
match &eval.blockworking_sets[block_idx] {
BlockWorkingSet::ExactNewton {
gradient,
hessian: _,
} => gradient.clone(),
BlockWorkingSet::Diagonal { .. } => panic!("expected exact newton"),
}
}
#[test]
fn wiggle_familygradients_match_finite_differencewith_nontrivial_designs() {
let (family, threshold_design, log_sigma_design, wiggle_block, y, weights) =
wiggle_nontrivial_fixture();
let rebuild_states = |beta_t: &Array1<f64>,
beta_ls: &Array1<f64>,
betaw: &Array1<f64>|
-> Vec<ParameterBlockState> {
rebuild_wiggle_nontrivial_states(
&family,
&threshold_design,
&log_sigma_design,
&y,
&weights,
beta_t,
beta_ls,
betaw,
)
};
let objective = |beta_t: &Array1<f64>, beta_ls: &Array1<f64>, betaw: &Array1<f64>| {
let states = rebuild_states(beta_t, beta_ls, betaw);
-family.evaluate(&states).expect("evaluate").log_likelihood
};
let extractgradient = extract_wiggle_gradient;
let beta_t = Array1::from_vec(vec![0.15, -0.3, 0.2]);
let beta_ls = Array1::from_vec(vec![-0.2, 0.1]);
let betaw = Array1::from_vec(vec![0.04; wiggle_block.design.ncols()]);
let states = rebuild_states(&beta_t, &beta_ls, &betaw);
let eval = family.evaluate(&states).expect("evaluate");
let eps = 1e-6;
for block_idx in 0..3 {
let analytic = extractgradient(&eval, block_idx);
let mut fd = Array1::<f64>::zeros(analytic.len());
for j in 0..analytic.len() {
let mut beta_t_plus = beta_t.clone();
let mut beta_ls_plus = beta_ls.clone();
let mut betaw_plus = betaw.clone();
let mut beta_t_minus = beta_t.clone();
let mut beta_ls_minus = beta_ls.clone();
let mut betaw_minus = betaw.clone();
match block_idx {
BinomialLocationScaleWiggleFamily::BLOCK_T => {
beta_t_plus[j] += eps;
beta_t_minus[j] -= eps;
}
BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA => {
beta_ls_plus[j] += eps;
beta_ls_minus[j] -= eps;
}
BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE => {
betaw_plus[j] += eps;
betaw_minus[j] -= eps;
}
_ => unreachable!(),
}
let f_plus = objective(&beta_t_plus, &beta_ls_plus, &betaw_plus);
let f_minus = objective(&beta_t_minus, &beta_ls_minus, &betaw_minus);
fd[j] = (f_plus - f_minus) / (2.0 * eps);
}
crate::test_support::assert_matrix_derivativefd(
&fd.insert_axis(Axis(1)),
&(-&analytic).insert_axis(Axis(1)),
2e-4,
&format!("wiggle block {block_idx} score"),
);
}
}
#[test]
fn wiggle_family_joint_hessian_matches_fd_gradients_with_nontrivial_designs() {
let (family, threshold_design, log_sigma_design, wiggle_block, y, weights) =
wiggle_nontrivial_fixture();
let rebuild_states = |beta_t: &Array1<f64>,
beta_ls: &Array1<f64>,
betaw: &Array1<f64>|
-> Vec<ParameterBlockState> {
rebuild_wiggle_nontrivial_states(
&family,
&threshold_design,
&log_sigma_design,
&y,
&weights,
beta_t,
beta_ls,
betaw,
)
};
let extractgradient = extract_wiggle_gradient;
let beta_t = Array1::from_vec(vec![0.15, -0.3, 0.2]);
let beta_ls = Array1::from_vec(vec![-0.2, 0.1]);
let betaw = Array1::from_vec(vec![0.04; wiggle_block.design.ncols()]);
let states = rebuild_states(&beta_t, &beta_ls, &betaw);
let h_joint = family
.exact_newton_joint_hessian(&states)
.expect("joint hessian")
.expect("expected joint exact hessian");
let pt = beta_t.len();
let pls = beta_ls.len();
let eps = 1e-6;
let total = pt + pls + betaw.len();
let mut fd = Array2::<f64>::zeros((total, total));
let source_offsets = [0usize, pt, pt + pls];
for source_block in 0..3 {
let source_len = states[source_block].beta.len();
for j in 0..source_len {
let mut beta_t_plus = beta_t.clone();
let mut beta_ls_plus = beta_ls.clone();
let mut betaw_plus = betaw.clone();
let mut beta_t_minus = beta_t.clone();
let mut beta_ls_minus = beta_ls.clone();
let mut betaw_minus = betaw.clone();
match source_block {
BinomialLocationScaleWiggleFamily::BLOCK_T => {
beta_t_plus[j] += eps;
beta_t_minus[j] -= eps;
}
BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA => {
beta_ls_plus[j] += eps;
beta_ls_minus[j] -= eps;
}
BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE => {
betaw_plus[j] += eps;
betaw_minus[j] -= eps;
}
_ => unreachable!(),
}
let eval_plus = family
.evaluate(&rebuild_states(&beta_t_plus, &beta_ls_plus, &betaw_plus))
.expect("eval plus");
let eval_minus = family
.evaluate(&rebuild_states(&beta_t_minus, &beta_ls_minus, &betaw_minus))
.expect("eval minus");
let mut row_offset = 0usize;
for target_block in 0..3 {
let grad_plus = extractgradient(&eval_plus, target_block);
let grad_minus = extractgradient(&eval_minus, target_block);
let col = (&grad_plus - &grad_minus).mapv(|v| -v / (2.0 * eps));
let col_idx = source_offsets[source_block] + j;
fd.slice_mut(s![
row_offset..row_offset + grad_plus.len(),
col_idx..col_idx + 1
])
.assign(&col.insert_axis(Axis(1)));
row_offset += grad_plus.len();
}
}
}
crate::test_support::assert_matrix_derivativefd(
&fd,
&h_joint,
4e-4,
"wiggle joint hessian",
);
}
#[test]
fn wiggle_family_joint_exacthessian_directional_derivative_matches_finite_difference() {
assert!(file!().ends_with(".rs"));
let n = 7usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_block = intercept_block(n);
let log_sigma_block = intercept_block(n);
let q_seed = Array1::linspace(-1.4, 1.4, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
3,
4,
2,
false,
)
.expect("wiggle block");
let threshold_design = threshold_block.design.clone();
let log_sigma_design = log_sigma_block.design.clone();
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 3,
policy: crate::resource::ResourcePolicy::default_library(),
};
let beta_t = Array1::from_vec(vec![0.25]);
let beta_ls = Array1::from_vec(vec![-0.15]);
let eta_t = threshold_design.matrixvectormultiply(&beta_t);
let eta_ls = log_sigma_design.matrixvectormultiply(&beta_ls);
let core_for_q0 =
binomial_location_scale_core(&y, &weights, &eta_t, &eta_ls, None, &family.link_kind)
.expect("core q0");
let betaw = Array1::from_vec(vec![0.04; wiggle_block.design.ncols()]);
let etaw = family
.wiggle_design(core_for_q0.q0.view())
.expect("wiggle design")
.dot(&betaw);
let states = vec![
ParameterBlockState {
beta: beta_t,
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls,
eta: eta_ls,
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw,
},
];
let base_h = family
.exact_newton_joint_hessian(&states)
.expect("joint hessian")
.expect("expected joint exact hessian");
let direction = Array1::ones(base_h.nrows());
let analytic = family
.exact_newton_joint_hessian_directional_derivative(&states, &direction)
.expect("joint dH")
.expect("expected joint exact dH");
let eps = 1e-6;
let mut plus_states = states.clone();
let beta_layout = GamlssBetaLayout::withwiggle(
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T]
.beta
.len(),
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA]
.beta
.len(),
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE]
.beta
.len(),
);
let (dir_t, dir_ls, dirw) = beta_layout
.split_three(&direction, "wiggle test direction split")
.expect("split wiggle test direction");
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T].beta =
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T].beta + &(eps * dir_t);
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA].beta =
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA].beta + &(eps * dir_ls);
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE].beta =
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE].beta + &(eps * dirw);
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T].eta = threshold_design
.matrixvectormultiply(&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T].beta);
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA].eta = log_sigma_design
.matrixvectormultiply(
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA].beta,
);
let plus_core_q0 = binomial_location_scale_core(
&y,
&weights,
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_T].eta,
&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA].eta,
None,
&family.link_kind,
)
.expect("plus core q0");
plus_states[BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE].eta = family
.wiggle_design(plus_core_q0.q0.view())
.expect("plus wiggle design")
.dot(&plus_states[BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE].beta);
let h_plus = family
.exact_newton_joint_hessian(&plus_states)
.expect("plus joint hessian")
.expect("expected plus joint hessian");
let fd = (h_plus - base_h) / eps;
crate::test_support::assert_matrix_derivativefd(&fd, &analytic, 2e-3, "joint dH");
}
#[test]
fn wiggle_family_joint_exacthessiansecond_directional_derivative_matches_finite_difference() {
assert!(file!().ends_with(".rs"));
let n = 7usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_block = intercept_block(n);
let log_sigma_block = intercept_block(n);
let q_seed = Array1::linspace(-1.4, 1.4, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
4,
4,
2,
false,
)
.expect("wiggle block");
let threshold_design = threshold_block.design.clone();
let log_sigma_design = log_sigma_block.design.clone();
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 4,
policy: crate::resource::ResourcePolicy::default_library(),
};
let rebuild_states = |beta_t: &Array1<f64>,
beta_ls: &Array1<f64>,
betaw: &Array1<f64>|
-> Vec<ParameterBlockState> {
let eta_t = threshold_design.matrixvectormultiply(beta_t);
let eta_ls = log_sigma_design.matrixvectormultiply(beta_ls);
let core_q0 = binomial_location_scale_core(
&y,
&weights,
&eta_t,
&eta_ls,
None,
&family.link_kind,
)
.expect("core q0");
let etaw = family
.wiggle_design(core_q0.q0.view())
.expect("wiggle design")
.dot(betaw);
vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: eta_ls,
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw,
},
]
};
let beta_t = Array1::from_vec(vec![0.25]);
let beta_ls = Array1::from_vec(vec![-0.15]);
let betaw = Array1::from_vec(vec![0.03; wiggle_block.design.ncols()]);
let states = rebuild_states(&beta_t, &beta_ls, &betaw);
let pt = beta_t.len();
let pls = beta_ls.len();
let pw = betaw.len();
let total = pt + pls + pw;
let direction_u = Array1::from_shape_fn(total, |k| 0.2 + 0.1 * (k as f64));
let directionv = Array1::from_shape_fn(total, |k| -0.15 + 0.07 * (k as f64));
let analytic = family
.exact_newton_joint_hessiansecond_directional_derivative(
&states,
&direction_u,
&directionv,
)
.expect("joint d2H")
.expect("expected joint exact d2H");
let eps = 1e-6;
let beta_layout = GamlssBetaLayout::withwiggle(pt, pls, pw);
let (step_t, step_ls, stepw) = beta_layout
.split_three(&directionv, "wiggle d2H test directionv")
.expect("split wiggle test direction");
let states_plus = rebuild_states(
&(&beta_t + &(eps * &step_t)),
&(&beta_ls + &(eps * &step_ls)),
&(&betaw + &(eps * &stepw)),
);
let states_minus = rebuild_states(
&(&beta_t - &(eps * &step_t)),
&(&beta_ls - &(eps * &step_ls)),
&(&betaw - &(eps * &stepw)),
);
let d_h_plus = family
.exact_newton_joint_hessian_directional_derivative(&states_plus, &direction_u)
.expect("joint dH plus")
.expect("expected joint exact dH plus");
let d_h_minus = family
.exact_newton_joint_hessian_directional_derivative(&states_minus, &direction_u)
.expect("joint dH minus")
.expect("expected joint exact dH minus");
let fd = (d_h_plus - d_h_minus) / (2.0 * eps);
crate::test_support::assert_matrix_derivativefd(&fd, &analytic, 4e-3, "joint d2H");
}
#[test]
fn wiggle_family_joint_hessian_cross_blocks_match_finite_difference_of_gradients() {
let n = 7usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_block = intercept_block(n);
let log_sigma_block = intercept_block(n);
let q_seed = Array1::linspace(-1.4, 1.4, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
3,
4,
2,
false,
)
.expect("wiggle block");
let threshold_design = threshold_block.design.clone();
let log_sigma_design = log_sigma_block.design.clone();
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
wiggle_knots: knots,
wiggle_degree: 3,
policy: crate::resource::ResourcePolicy::default_library(),
};
let rebuild_states = |beta_t: &Array1<f64>,
beta_ls: &Array1<f64>,
betaw: &Array1<f64>|
-> Vec<ParameterBlockState> {
let eta_t = threshold_design.matrixvectormultiply(beta_t);
let eta_ls = log_sigma_design.matrixvectormultiply(beta_ls);
let core_q0 = binomial_location_scale_core(
&y,
&weights,
&eta_t,
&eta_ls,
None,
&family.link_kind,
)
.expect("core q0");
let etaw = family
.wiggle_design(core_q0.q0.view())
.expect("wiggle design")
.dot(betaw);
vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: eta_t,
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: eta_ls,
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw,
},
]
};
let extractgradient = |eval: &FamilyEvaluation, block_idx: usize| -> Array1<f64> {
match &eval.blockworking_sets[block_idx] {
BlockWorkingSet::ExactNewton {
gradient,
hessian: _,
} => gradient.clone(),
BlockWorkingSet::Diagonal { .. } => panic!("expected exact newton"),
}
};
let beta_t = Array1::from_vec(vec![0.25]);
let beta_ls = Array1::from_vec(vec![-0.15]);
let betaw = Array1::from_vec(vec![0.04; wiggle_block.design.ncols()]);
let states = rebuild_states(&beta_t, &beta_ls, &betaw);
let h_joint = family
.exact_newton_joint_hessian(&states)
.expect("joint hessian")
.expect("expected joint exact hessian");
let pt = beta_t.len();
let pls = beta_ls.len();
let pw = betaw.len();
let eps = 1e-6;
let fd_cross_block = |target_block: usize, source_block: usize| -> Array2<f64> {
let mut out = Array2::<f64>::zeros((
states[target_block].beta.len(),
states[source_block].beta.len(),
));
for j in 0..states[source_block].beta.len() {
let mut beta_t_plus = beta_t.clone();
let mut beta_ls_plus = beta_ls.clone();
let mut betaw_plus = betaw.clone();
let mut beta_t_minus = beta_t.clone();
let mut beta_ls_minus = beta_ls.clone();
let mut betaw_minus = betaw.clone();
match source_block {
BinomialLocationScaleWiggleFamily::BLOCK_T => {
beta_t_plus[j] += eps;
beta_t_minus[j] -= eps;
}
BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA => {
beta_ls_plus[j] += eps;
beta_ls_minus[j] -= eps;
}
BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE => {
betaw_plus[j] += eps;
betaw_minus[j] -= eps;
}
_ => panic!("unexpected block"),
}
let eval_plus = family
.evaluate(&rebuild_states(&beta_t_plus, &beta_ls_plus, &betaw_plus))
.expect("eval plus");
let eval_minus = family
.evaluate(&rebuild_states(&beta_t_minus, &beta_ls_minus, &betaw_minus))
.expect("eval minus");
let grad_plus = extractgradient(&eval_plus, target_block);
let grad_minus = extractgradient(&eval_minus, target_block);
let col = (&grad_plus - &grad_minus).mapv(|v| -v / (2.0 * eps));
out.slice_mut(ndarray::s![.., j]).assign(&col);
}
out
};
let fd_t_ls = fd_cross_block(
BinomialLocationScaleWiggleFamily::BLOCK_T,
BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA,
);
let fd_tw = fd_cross_block(
BinomialLocationScaleWiggleFamily::BLOCK_T,
BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE,
);
let fd_lsw = fd_cross_block(
BinomialLocationScaleWiggleFamily::BLOCK_LOG_SIGMA,
BinomialLocationScaleWiggleFamily::BLOCK_WIGGLE,
);
let h_t_ls = h_joint.slice(ndarray::s![0..pt, pt..pt + pls]).to_owned();
let h_tw = h_joint
.slice(ndarray::s![0..pt, pt + pls..pt + pls + pw])
.to_owned();
let h_lsw = h_joint
.slice(ndarray::s![pt..pt + pls, pt + pls..pt + pls + pw])
.to_owned();
crate::test_support::assert_matrix_derivativefd(&fd_t_ls, &h_t_ls, 2e-4, "H_t_ls");
crate::test_support::assert_matrix_derivativefd(&fd_tw, &h_tw, 4e-4, "H_tw");
crate::test_support::assert_matrix_derivativefd(&fd_lsw, &h_lsw, 6e-4, "H_lsw");
}
#[test]
fn nonwiggle_family_evaluate_returns_exact_newton_blockswhen_designs_are_present() {
let n = 6usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_shape_fn((n, 2), |(i, j)| {
let t = i as f64 / (n as f64 - 1.0);
match j {
0 => 1.0,
1 => t - 0.5,
_ => unreachable!(),
}
}),
));
let log_sigma_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_shape_fn((n, 2), |(i, j)| {
let t = i as f64 / (n as f64 - 1.0);
match j {
0 => 1.0,
1 => (2.0 * std::f64::consts::PI * t).cos(),
_ => unreachable!(),
}
}),
));
let family = BinomialLocationScaleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
policy: crate::resource::ResourcePolicy::default_library(),
};
let beta_t = array![0.2, -0.15];
let beta_ls = array![-0.1, 0.05];
let states = vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: threshold_design.matrixvectormultiply(&beta_t),
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: log_sigma_design.matrixvectormultiply(&beta_ls),
},
];
let eval = family.evaluate(&states).expect("evaluate nonwiggle family");
assert_eq!(eval.blockworking_sets.len(), 2);
let joint = family
.exact_newton_joint_hessian(&states)
.expect("joint hessian")
.expect("expected joint exact hessian");
let pt = beta_t.len();
let pls = beta_ls.len();
for (block_idx, (start, end)) in [(0usize, pt), (pt, pt + pls)].into_iter().enumerate() {
let blockhessian = match &eval.blockworking_sets[block_idx] {
BlockWorkingSet::ExactNewton { hessian, .. } => hessian.to_dense(),
BlockWorkingSet::Diagonal { .. } => panic!("expected exact newton block"),
};
let joint_block = joint.slice(s![start..end, start..end]).to_owned();
crate::test_support::assert_matrix_derivativefd(
&joint_block,
&blockhessian,
1e-10,
&format!("nonwiggle block {block_idx} principal block"),
);
}
}
#[test]
fn nonwiggle_family_joint_exacthessian_directional_derivative_matches_finite_difference() {
let n = 8usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_shape_fn((n, 2), |(i, j)| {
let t = i as f64 / (n as f64 - 1.0);
match j {
0 => 1.0,
1 => (2.0 * std::f64::consts::PI * t).sin(),
_ => unreachable!(),
}
}),
));
let log_sigma_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_shape_fn((n, 2), |(i, j)| {
let t = i as f64 / (n as f64 - 1.0);
match j {
0 => 1.0,
1 => t - 0.5,
_ => unreachable!(),
}
}),
));
let family = BinomialLocationScaleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
policy: crate::resource::ResourcePolicy::default_library(),
};
let rebuild_states = |beta_t: &Array1<f64>, beta_ls: &Array1<f64>| {
vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: threshold_design.matrixvectormultiply(beta_t),
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: log_sigma_design.matrixvectormultiply(beta_ls),
},
]
};
let beta_t = array![0.2, -0.1];
let beta_ls = array![-0.15, 0.08];
let states = rebuild_states(&beta_t, &beta_ls);
let base_h = family
.exact_newton_joint_hessian(&states)
.expect("joint hessian")
.expect("expected joint exact hessian");
let direction = array![0.2, 0.3, -0.15, 0.1];
let analytic = family
.exact_newton_joint_hessian_directional_derivative(&states, &direction)
.expect("joint dH")
.expect("expected joint exact dH");
let eps = 1e-6;
let dir_t = direction.slice(s![0..beta_t.len()]).to_owned();
let dir_ls = direction.slice(s![beta_t.len()..]).to_owned();
let states_plus =
rebuild_states(&(&beta_t + &(eps * &dir_t)), &(&beta_ls + &(eps * &dir_ls)));
let h_plus = family
.exact_newton_joint_hessian(&states_plus)
.expect("plus joint hessian")
.expect("expected plus joint hessian");
let fd = (h_plus - base_h) / eps;
crate::test_support::assert_matrix_derivativefd(&fd, &analytic, 2e-3, "nonwiggle joint dH");
}
#[test]
fn nonwiggle_family_joint_exacthessiansecond_directional_derivative_matches_finite_difference()
{
let n = 8usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let threshold_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_shape_fn((n, 2), |(i, j)| {
let t = i as f64 / (n as f64 - 1.0);
match j {
0 => 1.0,
1 => (2.0 * std::f64::consts::PI * t).sin(),
_ => unreachable!(),
}
}),
));
let log_sigma_design = DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(
Array2::from_shape_fn((n, 2), |(i, j)| {
let t = i as f64 / (n as f64 - 1.0);
match j {
0 => 1.0,
1 => t - 0.5,
_ => unreachable!(),
}
}),
));
let family = BinomialLocationScaleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(threshold_design.clone()),
log_sigma_design: Some(log_sigma_design.clone()),
policy: crate::resource::ResourcePolicy::default_library(),
};
let rebuild_states = |beta_t: &Array1<f64>, beta_ls: &Array1<f64>| {
vec![
ParameterBlockState {
beta: beta_t.clone(),
eta: threshold_design.matrixvectormultiply(beta_t),
},
ParameterBlockState {
beta: beta_ls.clone(),
eta: log_sigma_design.matrixvectormultiply(beta_ls),
},
]
};
let beta_t = array![0.2, -0.1];
let beta_ls = array![-0.15, 0.08];
let states = rebuild_states(&beta_t, &beta_ls);
let direction_u = array![0.2, 0.3, -0.15, 0.1];
let directionv = array![-0.05, 0.12, 0.08, -0.09];
let analytic = family
.exact_newton_joint_hessiansecond_directional_derivative(
&states,
&direction_u,
&directionv,
)
.expect("joint d2H")
.expect("expected joint exact d2H");
let eps = 1e-6;
let step_t = directionv.slice(s![0..beta_t.len()]).to_owned();
let step_ls = directionv.slice(s![beta_t.len()..]).to_owned();
let states_plus = rebuild_states(
&(&beta_t + &(eps * &step_t)),
&(&beta_ls + &(eps * &step_ls)),
);
let states_minus = rebuild_states(
&(&beta_t - &(eps * &step_t)),
&(&beta_ls - &(eps * &step_ls)),
);
let d_h_plus = family
.exact_newton_joint_hessian_directional_derivative(&states_plus, &direction_u)
.expect("joint dH plus")
.expect("expected joint exact dH plus");
let d_h_minus = family
.exact_newton_joint_hessian_directional_derivative(&states_minus, &direction_u)
.expect("joint dH minus")
.expect("expected joint exact dH minus");
let fd = (d_h_plus - d_h_minus) / (2.0 * eps);
crate::test_support::assert_matrix_derivativefd(
&fd,
&analytic,
4e-3,
"nonwiggle joint d2H",
);
}
#[test]
fn wiggle_basis_is_structurally_monotone_for_nonnegative_coefficients() {
let q_seed = Array1::linspace(-2.0, 2.0, 17);
let degree = 3usize;
let num_internal_knots = 6usize;
let penalty_order = 2usize;
let (block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
degree,
num_internal_knots,
penalty_order,
false,
)
.expect("wiggle block");
let design = match &block.design {
DesignMatrix::Dense(x) => x.to_dense_arc(),
DesignMatrix::Sparse(_) => panic!("expected dense wiggle design"),
};
let beta = Array1::from_elem(design.ncols(), 0.2);
let derivative =
monotone_wiggle_basis_with_derivative_order(q_seed.view(), &knots, degree, 1)
.expect("wiggle derivative basis")
.dot(&beta);
assert!(
derivative.iter().all(|&value| value >= -1e-12),
"I-spline wiggle derivative must stay non-negative for non-negative coefficients: min={}",
derivative.iter().fold(f64::INFINITY, |acc, &v| acc.min(v))
);
}
#[test]
fn degeneratewiggle_seed_uses_broad_fallback_domain() {
let q_seed = Array1::zeros(9);
let degree = 3usize;
let knots = initializewiggle_knots_from_seed(q_seed.view(), degree, 5)
.expect("initialize degenerate wiggle knots");
let bs_degree = monotone_wiggle_internal_degree(degree).expect("cubic wiggle degree") + 1;
let domain_min = knots[bs_degree];
let domain_max = knots[knots.len() - bs_degree - 1];
assert!(
domain_min <= -2.9,
"unexpected left fallback boundary: {domain_min}"
);
assert!(
domain_max >= 2.9,
"unexpected right fallback boundary: {domain_max}"
);
}
#[test]
fn wiggle_block_design_matches_ispline_basis() {
let q_seed = Array1::linspace(-1.0, 1.0, 11);
let degree = 2usize;
let num_internal_knots = 4usize;
let penalty_order = 2usize;
let (block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
degree,
num_internal_knots,
penalty_order,
false,
)
.expect("wiggle block");
let (basis, _) = create_basis::<Dense>(
q_seed.view(),
KnotSource::Provided(knots.view()),
monotone_wiggle_internal_degree(degree).expect("wiggle degree"),
BasisOptions::i_spline(),
)
.expect("I-spline basis");
let expected = (*basis).clone();
let got = match &block.design {
DesignMatrix::Dense(x) => x.to_dense_arc(),
DesignMatrix::Sparse(_) => panic!("expected dense wiggle design"),
};
assert_eq!(got.dim(), expected.dim());
for i in 0..got.nrows() {
for j in 0..got.ncols() {
assert!(
(got[[i, j]] - expected[[i, j]]).abs() < 1e-10,
"wiggle design mismatch at ({}, {}): got {}, expected {}",
i,
j,
got[[i, j]],
expected[[i, j]]
);
}
}
}
#[test]
fn split_wiggle_penalty_orders_uses_requested_order_one_as_primary() {
let (primary, extras) = split_wiggle_penalty_orders(2, &[1, 2, 3, 3]);
assert_eq!(primary, 1);
assert_eq!(extras, vec![2, 3]);
}
#[test]
fn append_selected_wiggle_penalty_orders_keeps_order_one() {
let q_seed = Array1::linspace(-1.0, 1.0, 11);
let degree = 3usize;
let num_internal_knots = 5usize;
let cfg = WiggleBlockConfig {
degree,
num_internal_knots,
penalty_order: 1,
double_penalty: false,
};
let selected = select_wiggle_basis_from_seed(q_seed.view(), &cfg, &[1, 3])
.expect("selected wiggle basis");
assert_eq!(selected.block.penalties.len(), 2);
assert_eq!(selected.block.nullspace_dims, vec![1, 3]);
}
#[test]
fn binomial_location_scale_generative_matches_coremu() {
let n = 7usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let eta_t = Array1::from_vec(vec![0.8, -0.4, 0.2, -1.1, 0.0, 0.5, -0.7]);
let eta_ls = Array1::from_vec(vec![-3.0, -1.2, -0.1, 0.3, 1.1, 2.0, 4.0]);
let family = BinomialLocationScaleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: None,
log_sigma_design: None,
policy: crate::resource::ResourcePolicy::default_library(),
};
let states = vec![
ParameterBlockState {
beta: Array1::zeros(1),
eta: eta_t.clone(),
},
ParameterBlockState {
beta: Array1::zeros(1),
eta: eta_ls.clone(),
},
];
let spec = family.generativespec(&states).expect("generative spec");
let core =
binomial_location_scale_core(&y, &weights, &eta_t, &eta_ls, None, &family.link_kind)
.expect("core");
for i in 0..n {
assert!(
(spec.mean[i] - core.mu[i]).abs() < 1e-7,
"mean mismatch at {i}: got {}, expected {}",
spec.mean[i],
core.mu[i]
);
}
}
#[test]
fn wiggle_geometry_and_generative_use_same_sigma_link_as_core() {
let n = 8usize;
let y = Array1::from_vec(vec![0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0]);
let weights = Array1::from_vec(vec![1.0; n]);
let eta_t = Array1::from_vec(vec![0.5, -0.6, 0.1, -0.3, 0.9, -0.2, 0.4, -0.8]);
let eta_ls = Array1::from_vec(vec![-2.5, -1.5, -0.5, 0.0, 0.7, 1.4, 2.2, 3.0]);
let q_seed = Array1::linspace(-1.5, 1.5, n);
let (wiggle_block, knots) = BinomialLocationScaleWiggleFamily::buildwiggle_block_input(
q_seed.view(),
2,
3,
2,
false,
)
.expect("wiggle block");
let family = BinomialLocationScaleWiggleFamily {
y: y.clone(),
weights: weights.clone(),
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: None,
log_sigma_design: None,
wiggle_knots: knots,
wiggle_degree: 2,
policy: crate::resource::ResourcePolicy::default_library(),
};
let core_for_q0 =
binomial_location_scale_core(&y, &weights, &eta_t, &eta_ls, None, &family.link_kind)
.expect("core q0");
let betaw = Array1::from_vec(vec![0.15; wiggle_block.design.ncols()]);
let etaw = family
.wiggle_design(core_for_q0.q0.view())
.expect("wiggle design")
.dot(&betaw);
let states = vec![
ParameterBlockState {
beta: Array1::zeros(1),
eta: eta_t.clone(),
},
ParameterBlockState {
beta: Array1::zeros(1),
eta: eta_ls.clone(),
},
ParameterBlockState {
beta: betaw.clone(),
eta: etaw.clone(),
},
];
let wigglespec = wiggle_block
.clone()
.intospec("wiggle")
.expect("wiggle spec");
let (geom_x, _) = family
.block_geometry(&states, &wigglespec)
.expect("block geometry");
let geom = match geom_x {
DesignMatrix::Dense(x) => x.to_dense(),
DesignMatrix::Sparse(_) => panic!("expected dense wiggle geometry design"),
};
let expected_geom = family
.wiggle_design(core_for_q0.q0.view())
.expect("expected wiggle geometry");
assert_eq!(geom.dim(), expected_geom.dim());
for i in 0..geom.nrows() {
for j in 0..geom.ncols() {
assert!(
(geom[[i, j]] - expected_geom[[i, j]]).abs() < 1e-12,
"geometry mismatch at ({i}, {j}): got {}, expected {}",
geom[[i, j]],
expected_geom[[i, j]]
);
}
}
let generated = family.generativespec(&states).expect("generative spec");
let core = binomial_location_scale_core(
&y,
&weights,
&eta_t,
&eta_ls,
Some(&etaw),
&family.link_kind,
)
.expect("core with wiggle");
for i in 0..n {
assert!(
(generated.mean[i] - core.mu[i]).abs() < 1e-7,
"wiggle mean mismatch at {i}: got {}, expected {}",
generated.mean[i],
core.mu[i]
);
}
}
#[test]
fn poisson_extreme_eta_stays_finite_with_safe_exp() {
use crate::families::custom_family::{CustomFamily, ParameterBlockState};
let poisson = PoissonLogFamily {
y: Array1::from_vec(vec![1.0, 2.0, 3.0]),
weights: Array1::from_vec(vec![1.0, 1.0, 1.0]),
};
let extreme_eta = Array1::from_vec(vec![0.5, 709.0, -0.3]);
let eval_result = poisson.evaluate(&[ParameterBlockState {
beta: Array1::zeros(0),
eta: extreme_eta,
}]);
if let Ok(eval) = eval_result {
match &eval.blockworking_sets[0] {
crate::families::custom_family::BlockWorkingSet::Diagonal {
working_response,
working_weights,
} => {
let all_finite = working_response.iter().all(|v| v.is_finite())
&& working_weights.iter().all(|v| v.is_finite())
&& eval.log_likelihood.is_finite();
assert!(
all_finite,
"Poisson evaluate should produce finite outputs for all eta, \
but got non-finite values: ll={}, z={:?}, w={:?}",
eval.log_likelihood, working_response, working_weights
);
}
_ => panic!("expected Diagonal block"),
}
}
}
#[test]
fn binomial_location_scale_batched_gradient_matches_finite_difference() {
use crate::families::custom_family::BlockwiseFitOptions;
let base = binomial_location_scale_base_fixture();
let family = BinomialLocationScaleFamily {
y: base.y,
weights: base.weights,
link_kind: InverseLink::Standard(StandardLink::Probit),
threshold_design: Some(base.threshold_design),
log_sigma_design: Some(base.log_sigma_design),
policy: crate::resource::ResourcePolicy::default_library(),
};
let specs = vec![base.threshold_spec, base.log_sigma_spec];
let rho = array![0.05, -0.15];
let options = BlockwiseFitOptions {
use_remlobjective: true,
ridge_floor: 1e-10,
outer_max_iter: 1,
..BlockwiseFitOptions::default()
};
let eval_outer = |rho: &Array1<f64>| {
let derivative_blocks = vec![Vec::<CustomFamilyBlockPsiDerivative>::new(); specs.len()];
let result = evaluate_custom_family_joint_hyper(
&family,
&specs,
&options,
rho,
&derivative_blocks,
None,
crate::solver::estimate::reml::unified::EvalMode::ValueAndGradient,
)
.expect("objective+gradient at rho");
(result.objective, result.gradient)
};
let (f0, g0) = eval_outer(&rho);
assert!(f0.is_finite(), "outer cost must be finite at rho");
assert_eq!(g0.len(), rho.len());
let h = 1e-5;
let cost_magnitude = f0.abs().max(1.0);
let noise_floor = (10.0 * f64::EPSILON * cost_magnitude / h).max(1e-9);
for k in 0..rho.len() {
let mut rho_p = rho.clone();
let mut rho_m = rho.clone();
rho_p[k] += h;
rho_m[k] -= h;
let (fp, _) = eval_outer(&rho_p);
let (fm, _) = eval_outer(&rho_m);
let gfd = (fp - fm) / (2.0 * h);
let both_in_noise = g0[k].abs() < noise_floor && gfd.abs() < noise_floor;
if !both_in_noise {
let abs_err = (g0[k] - gfd).abs();
let rel_err = abs_err / gfd.abs().max(g0[k].abs()).max(1e-12);
assert!(
rel_err < 1e-3 || abs_err < 1e-6,
"batched gradient mismatch at coord {k}: \
batched={:.6e}, fd={:.6e}, abs_err={:.3e}, rel_err={:.3e}",
g0[k],
gfd,
abs_err,
rel_err,
);
}
}
}
fn binomial_mean_wiggle_operator_fixture() -> (
BinomialMeanWiggleFamily,
Vec<ParameterBlockState>,
Vec<ParameterBlockSpec>,
Array2<f64>,
) {
let x_eta = array![
[1.0, -0.9],
[1.0, -0.45],
[1.0, -0.1],
[1.0, 0.2],
[1.0, 0.55],
[1.0, 0.9],
];
let beta_eta = array![-0.15, 0.7];
let eta = x_eta.dot(&beta_eta);
let degree = 3usize;
let knots =
initializewiggle_knots_from_seed(eta.view(), degree, 4).expect("mean-wiggle knots");
let family = BinomialMeanWiggleFamily {
y: array![0.0, 1.0, 0.0, 1.0, 1.0, 0.0],
weights: array![1.0, 0.8, 1.2, 1.0, 0.7, 1.1],
link_kind: InverseLink::Standard(StandardLink::Logit),
wiggle_knots: knots,
wiggle_degree: degree,
policy: crate::resource::ResourcePolicy::default_library(),
};
let basis = family.wiggle_design(eta.view()).expect("wiggle basis");
let beta_w = Array1::from_iter((0..basis.ncols()).map(|j| 0.015 * (j as f64 + 1.0)));
let etaw = basis.dot(&beta_w);
let states = vec![
ParameterBlockState {
beta: beta_eta,
eta: eta.clone(),
},
ParameterBlockState {
beta: beta_w,
eta: etaw,
},
];
let specs = vec![
ParameterBlockSpec {
name: "eta".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(x_eta.clone())),
offset: Array1::zeros(eta.len()),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "wiggle".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(basis)),
offset: Array1::zeros(eta.len()),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
(family, states, specs, x_eta)
}
fn assert_close_matrix(a: &Array2<f64>, b: &Array2<f64>, tol: f64, label: &str) {
assert_eq!(a.dim(), b.dim(), "{label} shape mismatch");
let max_err = a
.iter()
.zip(b.iter())
.map(|(&x, &y)| (x - y).abs())
.fold(0.0_f64, f64::max);
assert!(
max_err < tol,
"{label} max error {max_err:.3e} >= {tol:.3e}"
);
}
#[test]
fn binomial_mean_wiggle_hessian_operators_match_dense_derivatives() {
let (family, states, specs, x_eta) = binomial_mean_wiggle_operator_fixture();
let p_eta = x_eta.ncols();
let pw = states[BinomialMeanWiggleFamily::BLOCK_WIGGLE].beta.len();
let total = p_eta + pw;
let dir_u = Array1::from_iter((0..total).map(|j| 0.03 * (j as f64 + 1.0).sin()));
let dir_v = Array1::from_iter((0..total).map(|j| -0.02 * (j as f64 + 0.5).cos()));
let dense_h = family
.exact_newton_joint_hessian_with_specs(&states, &specs)
.expect("dense H")
.expect("dense H available");
let workspace = family
.exact_newton_joint_hessian_workspace(&states, &specs)
.expect("workspace")
.expect("workspace available");
let h_columns =
Array2::from_shape_fn((total, total), |(i, j)| if i == j { 1.0 } else { 0.0 });
let op_h = crate::solver::estimate::reml::unified::HyperOperator::mul_mat(
family
.bmw_static_hessian_operator(&states, Arc::new(x_eta.clone()))
.expect("static op")
.as_ref(),
&h_columns,
);
assert_close_matrix(&op_h, &dense_h, 1e-10, "static H operator");
let hv = workspace
.hessian_matvec(&dir_u)
.expect("workspace HVP")
.expect("workspace HVP available");
let hv_dense = dense_h.dot(&dir_u);
let hv_err = (&hv - &hv_dense).mapv(f64::abs).sum();
assert!(hv_err < 1e-10, "workspace HVP mismatch {hv_err:.3e}");
let dense_dh = family
.exact_newton_joint_hessian_directional_derivative_with_specs(&states, &specs, &dir_u)
.expect("dense dH")
.expect("dense dH available");
let op_dh = workspace
.directional_derivative_operator(&dir_u)
.expect("dH operator")
.expect("dH operator available")
.to_dense();
assert_close_matrix(&op_dh, &dense_dh, 1e-10, "directional dH operator");
let dense_d2h = family
.exact_newton_joint_hessian_second_directional_derivative_with_specs(
&states, &specs, &dir_u, &dir_v,
)
.expect("dense d2H")
.expect("dense d2H available");
let op_d2h = workspace
.second_directional_derivative_operator(&dir_u, &dir_v)
.expect("d2H operator")
.expect("d2H operator available")
.to_dense();
assert_close_matrix(
&op_d2h,
&dense_d2h,
1e-10,
"second directional d2H operator",
);
}
#[test]
fn binomial_mean_wiggle_planner_keeps_second_order_at_large_n() {
let n = 50_001usize;
let family = BinomialMeanWiggleFamily {
y: Array1::zeros(n),
weights: Array1::ones(n),
link_kind: InverseLink::Standard(StandardLink::Logit),
wiggle_knots: initializewiggle_knots_from_seed(
Array1::linspace(-1.0, 1.0, 9).view(),
3,
4,
)
.expect("large-n knots"),
wiggle_degree: 3,
policy: crate::resource::ResourcePolicy::default_library(),
};
let specs = vec![
ParameterBlockSpec {
name: "eta".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros(
(n, 2),
))),
offset: Array1::zeros(n),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
ParameterBlockSpec {
name: "wiggle".to_string(),
design: DesignMatrix::Dense(crate::matrix::DenseDesignMatrix::from(Array2::zeros(
(n, 34),
))),
offset: Array1::zeros(n),
penalties: vec![],
nullspace_dims: vec![],
initial_log_lambdas: Array1::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
},
];
assert!(family.inner_coefficient_hessian_hvp_available(&specs));
assert_eq!(
family.exact_outer_derivative_order(&specs, &BlockwiseFitOptions::default()),
crate::custom_family::ExactOuterDerivativeOrder::Second
);
}
#[test]
fn gaussian_location_scale_psi_joint_hessian_pins_fisher_cross_zero() {
use crate::solver::estimate::reml::unified::HyperOperator;
fn materialize(
dense: &Array2<f64>,
operator: Option<&dyn HyperOperator>,
total: usize,
) -> Array2<f64> {
match operator {
Some(op) => op.to_dense(),
None => {
assert_eq!(dense.dim(), (total, total));
dense.clone()
}
}
}
fn block_max_abs(h: &Array2<f64>, r0: usize, r1: usize, c0: usize, c1: usize) -> f64 {
let mut m = 0.0_f64;
for r in r0..r1 {
for c in c0..c1 {
m = m.max(h[[r, c]].abs());
}
}
m
}
const CROSS_TOL: f64 = 1e-12;
{
let (family, states, specs) = gls_workspace_fixture();
let p_mu = states[GaussianLocationScaleFamily::BLOCK_MU].beta.len();
let p_ls = states[GaussianLocationScaleFamily::BLOCK_LOG_SIGMA]
.beta
.len();
let total = p_mu + p_ls;
let x_mu_psi = Array2::from_shape_fn((family.y.len(), p_mu), |(i, j)| {
0.2 + 0.11 * ((i as f64) * 0.37 + (j as f64) * 0.53).sin()
});
let x_mu_psi_psi = Array2::from_shape_fn((family.y.len(), p_mu), |(i, j)| {
0.07 * ((i as f64) * 0.19 + (j as f64) * 0.23).cos()
});
let derivative_blocks = vec![
vec![CustomFamilyBlockPsiDerivative {
penalty_index: None,
x_psi: x_mu_psi,
s_psi: Array2::zeros((p_mu, p_mu)),
s_psi_components: None,
s_psi_penalty_components: None,
x_psi_psi: Some(vec![x_mu_psi_psi]),
s_psi_psi: Some(vec![Array2::zeros((p_mu, p_mu))]),
s_psi_psi_components: None,
s_psi_psi_penalty_components: None,
implicit_operator: None,
implicit_axis: 0,
implicit_group_id: None,
}],
Vec::new(),
];
let dense_h = family
.exact_newton_joint_hessian(&states)
.expect("dense joint Hessian build")
.expect("dense joint Hessian present");
assert!(
block_max_abs(&dense_h, 0, p_mu, p_mu, total) <= CROSS_TOL,
"#684: dense Fisher joint Hessian μ↔logσ cross block must be 0, got max |.|={:.3e}",
block_max_abs(&dense_h, 0, p_mu, p_mu, total)
);
let psi = family
.exact_newton_joint_psi_terms(&states, &specs, &derivative_blocks, 0)
.expect("psi terms call")
.expect("gaussian psi terms present");
let h_psi = materialize(&psi.hessian_psi, psi.hessian_psi_operator.as_deref(), total);
let cross = block_max_abs(&h_psi, 0, p_mu, p_mu, total);
assert!(
cross <= CROSS_TOL,
"#684: ψ joint Hessian μ↔logσ cross block must be Fisher-0 (observed 2κm \
drift), got max |.|={cross:.3e}"
);
let psi2 = family
.exact_newton_joint_psisecond_order_terms(&states, &specs, &derivative_blocks, 0, 0)
.expect("psi 2nd-order call")
.expect("gaussian psi 2nd-order present");
let h_psi2 = materialize(
&psi2.hessian_psi_psi,
psi2.hessian_psi_psi_operator.as_deref(),
total,
);
let cross2 = block_max_abs(&h_psi2, 0, p_mu, p_mu, total);
assert!(
cross2 <= CROSS_TOL,
"#684: 2nd-order ψ joint Hessian μ↔logσ cross block must be Fisher-0, \
got max |.|={cross2:.3e}"
);
let d_beta = Array1::from_shape_fn(total, |i| 0.05 + 0.13 * ((i + 1) as f64).sin());
let mixed = family
.exact_newton_joint_psihessian_directional_derivative(
&states,
&specs,
&derivative_blocks,
0,
&d_beta,
)
.expect("psi mixed-drift call")
.expect("gaussian psi mixed-drift present");
assert_eq!(mixed.dim(), (total, total));
let crossm = block_max_abs(&mixed, 0, p_mu, p_mu, total);
assert!(
crossm <= CROSS_TOL,
"#684: mixed β·ψ ψ-Hessian μ↔logσ cross block must be Fisher-0, \
got max |.|={crossm:.3e}"
);
}
{
let (family, states, specs, ..) = gls_wiggle_workspace_fixture();
let p_mu = states[GaussianLocationScaleWiggleFamily::BLOCK_MU]
.beta
.len();
let p_ls = states[GaussianLocationScaleWiggleFamily::BLOCK_LOG_SIGMA]
.beta
.len();
let p_w = states[GaussianLocationScaleWiggleFamily::BLOCK_WIGGLE]
.beta
.len();
let total = p_mu + p_ls + p_w;
let mu0 = 0usize;
let ls0 = p_mu;
let ls1 = p_mu + p_ls;
let w0 = p_mu + p_ls;
let w1 = total;
let x_mu_psi = Array2::from_shape_fn((family.y.len(), p_mu), |(i, j)| {
0.18 + 0.09 * ((i as f64) * 0.41 + (j as f64) * 0.29).sin()
});
let x_mu_psi_psi = Array2::from_shape_fn((family.y.len(), p_mu), |(i, j)| {
0.06 * ((i as f64) * 0.17 + (j as f64) * 0.31).cos()
});
let derivative_blocks = vec![
vec![CustomFamilyBlockPsiDerivative {
penalty_index: None,
x_psi: x_mu_psi,
s_psi: Array2::zeros((p_mu, p_mu)),
s_psi_components: None,
s_psi_penalty_components: None,
x_psi_psi: Some(vec![x_mu_psi_psi]),
s_psi_psi: Some(vec![Array2::zeros((p_mu, p_mu))]),
s_psi_psi_components: None,
s_psi_psi_penalty_components: None,
implicit_operator: None,
implicit_axis: 0,
implicit_group_id: None,
}],
Vec::new(),
Vec::new(),
];
let assert_wiggle_crosses_zero = |h: &Array2<f64>, label: &str| {
let c_ml = block_max_abs(h, mu0, ls0, ls0, ls1);
let c_wl = block_max_abs(h, w0, w1, ls0, ls1);
assert!(
c_ml <= CROSS_TOL,
"#684 (wiggle {label}): μ↔logσ cross block must be Fisher-0 \
(observed 2κmD drift), got max |.|={c_ml:.3e}"
);
assert!(
c_wl <= CROSS_TOL,
"#684 (wiggle {label}): wiggle↔logσ cross block must be Fisher-0 \
(observed 2κm drift; the wiggle is mean-side), got max |.|={c_wl:.3e}"
);
};
let dense_h = family
.exact_newton_joint_hessian(&states)
.expect("wiggle dense joint Hessian build")
.expect("wiggle dense joint Hessian present");
assert_eq!(dense_h.dim(), (total, total));
assert_wiggle_crosses_zero(&dense_h, "dense Fisher");
let psi = family
.exact_newton_joint_psi_terms(&states, &specs, &derivative_blocks, 0)
.expect("wiggle psi terms call")
.expect("wiggle psi terms present");
let h_psi = materialize(&psi.hessian_psi, psi.hessian_psi_operator.as_deref(), total);
assert_wiggle_crosses_zero(&h_psi, "1st-order ψ");
let psi2 = family
.exact_newton_joint_psisecond_order_terms(&states, &specs, &derivative_blocks, 0, 0)
.expect("wiggle psi 2nd-order call")
.expect("wiggle psi 2nd-order present");
let h_psi2 = materialize(
&psi2.hessian_psi_psi,
psi2.hessian_psi_psi_operator.as_deref(),
total,
);
assert_wiggle_crosses_zero(&h_psi2, "2nd-order ψ");
let d_beta = Array1::from_shape_fn(total, |i| 0.04 + 0.1 * ((i + 1) as f64).cos());
let mixed = family
.exact_newton_joint_psihessian_directional_derivative(
&states,
&specs,
&derivative_blocks,
0,
&d_beta,
)
.expect("wiggle psi mixed-drift call")
.expect("wiggle psi mixed-drift present");
assert_eq!(mixed.dim(), (total, total));
assert_wiggle_crosses_zero(&mixed, "mixed β·ψ");
}
}
}