use std::sync::Arc;
use ndarray::{Array1, Array2, Array3};
use crate::families::custom_family::{
BlockEffectiveJacobian, CustomFamilyError, FamilyLinearizationState, ParameterBlockSpec,
PenaltyMatrix,
};
use crate::identifiability::audit::{
IdentifiabilityAudit, audit_identifiability, audit_identifiability_channel_aware,
block_structural_penalty_dense, priority_tiered_rank_from_gram, rank_of_gram,
};
use crate::identifiability::families::compiler::{
IdentityRowHessian, RowJacobianOperator, orthogonalize_design_blocks, symmetric_sqrt_into,
};
use crate::linalg::faer_ndarray::{
default_rrqr_rank_alpha, fast_ata, fast_atb, rrqr_with_permutation,
};
use crate::linalg::matrix::{CoefficientTransformOperator, DenseDesignMatrix, DesignMatrix};
use crate::solver::gauge::Gauge;
enum BlockJacobianSource {
Callback(Arc<dyn BlockEffectiveJacobian>),
FlatDesign(DesignMatrix),
}
struct BlockJacobianAsRowOp {
source: BlockJacobianSource,
n: usize,
p: usize,
k_block: usize,
k_target: usize,
block_name: String,
}
impl BlockJacobianAsRowOp {
fn from_callback(
cb: Arc<dyn BlockEffectiveJacobian>,
n_rows: usize,
p_block: usize,
k_target: usize,
block_name: &str,
) -> Result<Self, String> {
let k = cb.n_outputs();
if k == 0 {
return Err(format!(
"BlockJacobianAsRowOp block '{block_name}': n_outputs=0 is invalid"
));
}
if k > k_target {
return Err(format!(
"BlockJacobianAsRowOp block '{block_name}': n_outputs({k}) exceeds the \
audit channel count k_target({k_target})"
));
}
Ok(Self {
source: BlockJacobianSource::Callback(cb),
n: n_rows,
p: p_block,
k_block: k,
k_target,
block_name: block_name.to_string(),
})
}
fn from_flat_design(
design: DesignMatrix,
n_rows: usize,
k_target: usize,
block_name: &str,
) -> Self {
let p = design.ncols();
Self {
source: BlockJacobianSource::FlatDesign(design),
n: n_rows,
p,
k_block: 1,
k_target,
block_name: block_name.to_string(),
}
}
fn zero_state() -> FamilyLinearizationState<'static> {
FamilyLinearizationState {
beta: &[],
family_scalars: None,
channel_hessian: None,
probit_frailty_scale: 1.0,
}
}
fn stacked_rows(&self, start: usize, end: usize) -> Result<Array2<f64>, String> {
match &self.source {
BlockJacobianSource::Callback(cb) => {
let state = Self::zero_state();
let stacked = cb
.effective_jacobian_rows(&state, start..end)
.map_err(|e| {
format!("BlockJacobianAsRowOp block '{}': {e}", self.block_name)
})?;
let chunk = end - start;
if stacked.nrows() != self.k_block * chunk || stacked.ncols() != self.p {
return Err(format!(
"BlockJacobianAsRowOp block '{}': effective_jacobian_rows returned \
shape {:?}, expected [{}, {}]",
self.block_name,
stacked.shape(),
self.k_block * chunk,
self.p,
));
}
Ok(stacked)
}
BlockJacobianSource::FlatDesign(design) => {
let chunk = end - start;
let mut out = Array2::<f64>::zeros((chunk, self.p));
design
.row_chunk_into(start..end, out.view_mut())
.map_err(|e| {
format!(
"BlockJacobianAsRowOp block '{}': flat design row chunk failed: {e}",
self.block_name
)
})?;
Ok(out)
}
}
}
}
impl RowJacobianOperator for BlockJacobianAsRowOp {
fn k(&self) -> usize {
self.k_target
}
fn ncols(&self) -> usize {
self.p
}
fn nrows(&self) -> usize {
self.n
}
fn apply_row(&self, row: usize, delta_beta: &[f64], out: &mut [f64]) {
let k = self.k();
assert_eq!(out.len(), k);
assert_eq!(delta_beta.len(), self.ncols());
for r in 0..k {
out[r] = 0.0;
}
let stacked = self
.stacked_rows(row, row + 1)
.expect("BlockJacobianAsRowOp::apply_row failed to read row");
for r in 0..self.k_block {
for (j, &b) in delta_beta.iter().enumerate() {
out[r] += stacked[[r, j]] * b;
}
}
}
fn evaluate_full(&self) -> Array3<f64> {
let entries = self.n.saturating_mul(self.p).saturating_mul(self.k_target);
const MAX_EVALUATE_FULL_ENTRIES: usize = 10_000_000;
assert!(
entries <= MAX_EVALUATE_FULL_ENTRIES,
"BlockJacobianAsRowOp::evaluate_full refused to materialize {entries} entries"
);
let mut out = Array3::<f64>::zeros((self.n, self.p, self.k_target));
for start in (0..self.n).step_by(4096) {
let end = (start + 4096).min(self.n);
let stacked = self
.stacked_rows(start, end)
.expect("BlockJacobianAsRowOp::evaluate_full failed to read row chunk");
let chunk = end - start;
for ch in 0..self.k_block {
for local_i in 0..chunk {
for col in 0..self.p {
out[[start + local_i, col, ch]] = stacked[[ch * chunk + local_i, col]];
}
}
}
}
out
}
fn scaled_design_by_sqrt_h(&self, h_full: &Array3<f64>) -> Array2<f64> {
let n = self.nrows();
let p = self.ncols();
let k = self.k();
assert_eq!(h_full.shape(), &[n, k, k]);
let mut out = Array2::<f64>::zeros((n * k, p));
let mut sqrt_h = Array2::<f64>::zeros((k, k));
let mut h_i = Array2::<f64>::zeros((k, k));
for start in (0..n).step_by(4096) {
let end = (start + 4096).min(n);
let chunk = end - start;
let mut rows = Array2::<f64>::zeros((chunk * k, p));
self.channel_flattened_rows(start..end, &mut rows);
for local_i in 0..chunk {
let row = start + local_i;
for a in 0..k {
for b in 0..k {
h_i[[a, b]] = h_full[[row, a, b]];
}
}
symmetric_sqrt_into(&h_i, &mut sqrt_h);
for ch in 0..k {
let dst = row * k + ch;
for col in 0..p {
let mut acc = 0.0;
for cp in 0..k {
acc += sqrt_h[[ch, cp]] * rows[[local_i * k + cp, col]];
}
out[[dst, col]] = acc;
}
}
}
}
out
}
fn channel_flattened_column(&self, col: usize, out: &mut [f64]) {
let n = self.nrows();
let k = self.k();
assert!(
col < self.ncols(),
"BlockJacobianAsRowOp::channel_flattened_column col {col} out of range {}",
self.ncols()
);
assert_eq!(out.len(), n * k);
let mut offset = 0usize;
for start in (0..n).step_by(4096) {
let end = (start + 4096).min(n);
let chunk = end - start;
let mut rows = Array2::<f64>::zeros((chunk * k, self.p));
self.channel_flattened_rows(start..end, &mut rows);
for local_i in 0..chunk {
for ch in 0..k {
out[offset + local_i * k + ch] = rows[[local_i * k + ch, col]];
}
}
offset += chunk * k;
}
}
fn channel_flattened_rows(&self, rows: std::ops::Range<usize>, out: &mut Array2<f64>) {
let start = rows.start.min(self.n);
let end = rows.end.min(self.n);
let chunk = end - start;
assert_eq!(out.shape(), &[chunk * self.k_target, self.p]);
out.fill(0.0);
let stacked = self
.stacked_rows(start, end)
.expect("BlockJacobianAsRowOp::channel_flattened_rows failed to read rows");
for ch in 0..self.k_block {
for local_i in 0..chunk {
for col in 0..self.p {
out[[local_i * self.k_target + ch, col]] = stacked[[ch * chunk + local_i, col]];
}
}
}
}
}
#[derive(Debug)]
pub struct CanonicalSpecs {
pub reduced_specs: Vec<ParameterBlockSpec>,
pub gauge: Gauge,
pub audit: IdentifiabilityAudit,
pub used_channel_aware_audit: bool,
}
pub fn canonicalize_for_identifiability(
specs: &[ParameterBlockSpec],
) -> Result<CanonicalSpecs, CustomFamilyError> {
canonicalize_for_identifiability_inner(specs, true)
}
fn audit_convention_rank(j: &Array2<f64>, nk_scale: usize, blocks: &[FlatRankBlock]) -> usize {
let p = j.ncols();
if p == 0 || j.nrows() == 0 {
return 0;
}
let mut gram = fast_ata(j);
let declared: usize = blocks.iter().map(|b| b.width).sum();
let mut n_penalty_rows = 0usize;
if declared == p {
let mut col_off = 0usize;
for b in blocks {
if let Some(s) = b.structural_penalty.as_ref()
&& s.nrows() == b.width
&& s.ncols() == b.width
&& b.width > 0
{
let sts = fast_atb(s, s);
let mut sub = gram.slice_mut(ndarray::s![
col_off..col_off + b.width,
col_off..col_off + b.width
]);
sub += &sts;
n_penalty_rows += b.width;
}
col_off += b.width;
}
}
rank_of_gram(&gram, nk_scale.saturating_add(n_penalty_rows)).unwrap_or(p)
}
struct FlatRankBlock {
width: usize,
structural_penalty: Option<Array2<f64>>,
priority: u8,
}
fn flat_audit_convention_rank(j: &Array2<f64>, blocks: &[FlatRankBlock]) -> usize {
let p = j.ncols();
if p == 0 || j.nrows() == 0 {
return 0;
}
let declared: usize = blocks.iter().map(|b| b.width).sum();
if declared != p {
return match rrqr_with_permutation(j, default_rrqr_rank_alpha()) {
Ok(rrqr) => rrqr.rank,
Err(_) => p,
};
}
let mut gram = fast_ata(j);
let mut col_off = 0usize;
let mut n_penalty_rows = 0usize;
let mut col_priority: Vec<u8> = Vec::with_capacity(p);
for b in blocks {
for _ in 0..b.width {
col_priority.push(b.priority);
}
if let Some(s) = b.structural_penalty.as_ref() {
if s.nrows() == b.width && s.ncols() == b.width && b.width > 0 {
let sts = fast_atb(s, s);
let mut sub = gram.slice_mut(ndarray::s![
col_off..col_off + b.width,
col_off..col_off + b.width
]);
sub += &sts;
n_penalty_rows += b.width;
}
}
col_off += b.width;
}
let m_rows = j.nrows() + n_penalty_rows;
let tiered =
priority_tiered_rank_from_gram(&gram, &col_priority, m_rows, default_rrqr_rank_alpha());
tiered.rank
}
fn canonicalize_for_identifiability_inner(
specs: &[ParameterBlockSpec],
orthogonalize: bool,
) -> Result<CanonicalSpecs, CustomFamilyError> {
if orthogonalize {
if let Some(canon) = try_orthogonalize_blocks(specs)? {
return Ok(canon);
}
}
if specs.is_empty() {
return Ok(CanonicalSpecs {
reduced_specs: Vec::new(),
gauge: Gauge::identity(&[]),
audit: audit_identifiability(specs).map_err(|r| {
CustomFamilyError::DimensionMismatch {
reason: format!("pre-fit identifiability audit failed: {r}"),
}
})?,
used_channel_aware_audit: false,
});
}
let n_rows = specs[0].design.nrows();
let max_n_outputs = specs
.iter()
.map(|s| {
s.jacobian_callback
.as_ref()
.map(|cb| cb.n_outputs())
.unwrap_or(1)
})
.max()
.unwrap_or(1);
let use_channel_aware = max_n_outputs > 1;
log::debug!(
"[CANON] canonicalize_for_identifiability: blocks={} n_rows={} \
max_n_outputs={} route={}",
specs.len(),
n_rows,
max_n_outputs,
if use_channel_aware {
"channel-aware"
} else {
"flat"
},
);
if log::log_enabled!(log::Level::Debug) {
const FROB_CHUNK: usize = 4096;
for spec in specs.iter() {
let k = spec
.jacobian_callback
.as_ref()
.map(|cb| cb.n_outputs())
.unwrap_or(1);
let jac_nrows = if use_channel_aware {
n_rows * k
} else {
n_rows
};
let p = spec.design.ncols();
let zeros = vec![0.0f64; p];
let state = FamilyLinearizationState {
beta: &zeros,
family_scalars: None,
channel_hessian: None,
probit_frailty_scale: 1.0,
};
let mut frob_sq = 0.0_f64;
let mut probe_err: Option<String> = None;
for start in (0..n_rows).step_by(FROB_CHUNK) {
let end = (start + FROB_CHUNK).min(n_rows);
let chunk = match spec.jacobian_callback.as_ref() {
Some(cb) => cb.effective_jacobian_rows(&state, start..end),
None => {
let mut out = Array2::<f64>::zeros((end - start, p));
spec.design
.row_chunk_into(start..end, out.view_mut())
.map(|()| out)
.map_err(|e| e.to_string())
}
};
match chunk {
Ok(rows) => frob_sq += rows.iter().map(|v| v * v).sum::<f64>(),
Err(e) => {
probe_err = Some(e);
break;
}
}
}
match probe_err {
Some(e) => log::debug!(
"[CANON] block '{}': effective_jacobian probe failed: {e}",
spec.name,
),
None => log::debug!(
"[CANON] block '{}': p={} jac_nrows={} frob_norm={:.4e}",
spec.name,
p,
jac_nrows,
frob_sq.sqrt(),
),
}
}
}
let audit = if use_channel_aware {
let k = max_n_outputs;
let mut operators: Vec<Arc<dyn RowJacobianOperator>> = Vec::with_capacity(specs.len());
for spec in specs.iter() {
let op: Arc<dyn RowJacobianOperator> = match spec.jacobian_callback.as_ref() {
Some(cb) => {
let row_op = BlockJacobianAsRowOp::from_callback(
Arc::clone(cb),
n_rows,
spec.design.ncols(),
k,
&spec.name,
)
.map_err(|e| CustomFamilyError::DimensionMismatch {
reason: format!(
"canonicalize_for_identifiability: build \
BlockJacobianAsRowOp for block '{}': {e}",
spec.name,
),
})?;
Arc::new(row_op)
}
None => Arc::new(BlockJacobianAsRowOp::from_flat_design(
spec.design.clone(),
n_rows,
k,
&spec.name,
)),
};
operators.push(op);
}
let row_hess = IdentityRowHessian::new(n_rows, k);
let audit_result = audit_identifiability_channel_aware(specs, &operators, &row_hess)
.map_err(|reason| CustomFamilyError::DimensionMismatch {
reason: format!("pre-fit channel-aware identifiability audit failed: {reason}"),
})?;
log::info!(
"[CANON] channel-aware audit: {} blocks, joint_rank={}/{} (flat audit NOT used)",
specs.len(),
audit_result
.blocks
.iter()
.map(|b| b.effective_dim)
.sum::<usize>(),
specs.iter().map(|s| s.design.ncols()).sum::<usize>(),
);
audit_result
} else {
let audit_result = audit_identifiability(specs).map_err(|reason| {
CustomFamilyError::DimensionMismatch {
reason: format!("pre-fit identifiability audit failed: {reason}"),
}
})?;
log::debug!(
"[CANON] flat audit: {} blocks, joint_rank={}",
specs.len(),
audit_result
.blocks
.iter()
.map(|b| b.effective_dim)
.sum::<usize>(),
);
audit_result
};
if audit.fatal {
return Err(CustomFamilyError::IdentifiabilityFailure { audit });
}
let owns_stacked_geometry = |name: &str| -> bool {
specs
.iter()
.any(|spec| spec.name == name && spec.stacked_design.is_some())
};
let dropped_on_owned_block = audit
.dropped_columns
.iter()
.any(|drop| owns_stacked_geometry(&drop.block));
if dropped_on_owned_block {
let raw_widths: Vec<usize> = specs.iter().map(|spec| spec.design.ncols()).collect();
let dropped_summary = audit
.dropped_columns
.iter()
.map(|drop| format!("{}[{}]", drop.block, drop.column))
.collect::<Vec<_>>()
.join(", ");
log::info!(
"[CANON] width-preserving family-owned geometry path: audit attributed \
dropped columns [{dropped_summary}], at least one of which falls on a block \
that owns its effective geometry via jacobian_callback or a multi-channel \
stacked_design; keeping raw block widths and deferring curvature on the weak \
directions to the robust/Firth path"
);
return Ok(CanonicalSpecs {
reduced_specs: specs.to_vec(),
gauge: Gauge::identity(&raw_widths),
audit,
used_channel_aware_audit: use_channel_aware,
});
}
let mut per_block_transform: Vec<Array2<f64>> = Vec::with_capacity(specs.len());
let mut reduced_specs: Vec<ParameterBlockSpec> = Vec::with_capacity(specs.len());
for spec in specs.iter() {
let p_raw = spec.design.ncols();
let dropped_locals: Vec<usize> = audit
.dropped_columns
.iter()
.filter(|drop| drop.block == spec.name)
.map(|drop| drop.column)
.collect();
let mut dropped_sorted = dropped_locals.clone();
dropped_sorted.sort_unstable();
dropped_sorted.dedup();
for &col in &dropped_sorted {
if col >= p_raw {
crate::bail_dim_custom!(
"canonicalize_for_identifiability: audit reported dropped column \
{col} for block '{}' which has only {} columns",
spec.name,
p_raw,
);
}
}
let kept: Vec<usize> = (0..p_raw)
.filter(|c| dropped_sorted.binary_search(c).is_err())
.collect();
let r_block = kept.len();
let mut t_i = Array2::<f64>::zeros((p_raw, r_block));
for (col_out, &raw_col) in kept.iter().enumerate() {
t_i[[raw_col, col_out]] = 1.0;
}
let reduced_design = if dropped_sorted.is_empty() {
spec.design.clone()
} else {
build_reduced_design(&spec.design, &kept, &spec.name, &t_i)?
};
let reduced_stacked_design: Option<DesignMatrix> = match spec.stacked_design.as_ref() {
Some(stacked) if !dropped_sorted.is_empty() => {
Some(build_reduced_design(stacked, &kept, &spec.name, &t_i)?)
}
Some(stacked) => Some(stacked.clone()),
None => None,
};
let reduced_stacked_offset = spec.stacked_offset.clone();
let reduced_penalties: Vec<PenaltyMatrix> = spec
.penalties
.iter()
.map(|p| pull_back_penalty(p, &kept))
.collect();
let reduced_initial_beta = match &spec.initial_beta {
Some(beta_raw) => {
if beta_raw.len() != p_raw {
crate::bail_dim_custom!(
"canonicalize_for_identifiability: block '{}' initial_beta \
length {} != design ncols {}",
spec.name,
beta_raw.len(),
p_raw,
);
}
let mut theta = Array1::<f64>::zeros(r_block);
for (out_idx, &raw_col) in kept.iter().enumerate() {
theta[out_idx] = beta_raw[raw_col];
}
Some(theta)
}
None => None,
};
let reduced_jacobian_callback = match spec.jacobian_callback.as_ref() {
Some(cb) if !dropped_sorted.is_empty() => Some(Arc::new(
crate::families::custom_family::GaugeComposedJacobian::new(
Arc::clone(cb),
Arc::new(t_i.clone()),
),
)
as Arc<dyn BlockEffectiveJacobian>),
other => other.cloned(),
};
reduced_specs.push(ParameterBlockSpec {
name: spec.name.clone(),
design: reduced_design,
offset: spec.offset.clone(),
penalties: reduced_penalties,
nullspace_dims: Vec::new(),
initial_log_lambdas: spec.initial_log_lambdas.clone(),
initial_beta: reduced_initial_beta,
gauge_priority: spec.gauge_priority,
jacobian_callback: reduced_jacobian_callback,
stacked_design: reduced_stacked_design,
stacked_offset: reduced_stacked_offset,
});
per_block_transform.push(t_i);
}
{
let p_total_raw: usize = specs.iter().map(|s| s.design.ncols()).sum();
let p_total_red: usize = per_block_transform.iter().map(|t| t.ncols()).sum();
let k = if use_channel_aware { max_n_outputs } else { 1 };
let nk = n_rows * k;
let stacked_rows = specs
.iter()
.filter_map(|s| s.stacked_design.as_ref().map(|d| d.nrows()))
.max()
.unwrap_or(0);
let r_map = nk.max(stacked_rows);
let stacked_dense: Vec<Option<Array2<f64>>> = specs
.iter()
.map(|s| {
s.stacked_design.as_ref().and_then(|d| {
d.try_to_dense_arc("canonicalize_rank_check stacked")
.ok()
.map(|a| a.as_ref().clone())
})
})
.collect();
let k_bands = (r_map / n_rows).max(1);
let observation_bands: Vec<usize> = if k_bands <= 1 {
vec![0]
} else if let Some(stacked_ref) = stacked_dense.iter().flatten().max_by_key(|d| d.nrows()) {
let mut bands: Vec<usize> = (0..k_bands)
.filter(|&b| {
let lo = b * n_rows;
let hi = ((b + 1) * n_rows).min(stacked_ref.nrows());
if hi <= lo {
return false;
}
if stacked_ref.ncols() == 0 {
return false;
}
let first = stacked_ref[[lo, 0]];
first.abs() > 1e-12
&& (lo..hi).all(|r| {
(stacked_ref[[r, 0]] - first).abs() <= 1e-9 * first.abs().max(1.0)
})
})
.collect();
if bands.is_empty() {
bands.push(0);
}
bands
} else {
vec![0]
};
let mut j_pre = Array2::<f64>::zeros((r_map, p_total_raw));
let mut col_off = 0usize;
for (idx, spec) in specs.iter().enumerate() {
let p_b = spec.design.ncols();
if let Some(dense) = stacked_dense[idx].as_ref() {
let br = dense.nrows().min(r_map);
for i in 0..br {
for j in 0..p_b.min(dense.ncols()) {
j_pre[[i, col_off + j]] = dense[[i, j]];
}
}
col_off += p_b;
continue;
}
let zeros = vec![0.0f64; p_b];
let state = FamilyLinearizationState {
beta: &zeros,
family_scalars: None,
channel_hessian: None,
probit_frailty_scale: 1.0,
};
match spec.effective_jacobian_at("canonicalize_rank_check", &state) {
Ok(j_b) => {
let k_b = j_b.nrows() / n_rows;
if k_b <= 1 {
for &b in &observation_bands {
let base = b * n_rows;
for i in 0..n_rows {
let dst_row = base + i;
if dst_row >= r_map {
break;
}
for j in 0..p_b {
j_pre[[dst_row, col_off + j]] = j_b[[i, j]];
}
}
}
} else {
let r_max = k_b.min(k);
for r in 0..r_max {
let src_row_base = r * n_rows;
for i in 0..n_rows {
let dst_row = i * k + r;
let src_row = src_row_base + i;
for j in 0..p_b {
j_pre[[dst_row, col_off + j]] = j_b[[src_row, j]];
}
}
}
}
}
Err(_) => {
if let Ok(flat) = spec
.design
.try_to_dense_arc("canonicalize_rank_check")
.map(|a| a.as_ref().clone())
{
for &b in &observation_bands {
let base = b * n_rows;
for i in 0..n_rows.min(flat.nrows()) {
let dst_row = base + i;
if dst_row >= r_map {
break;
}
for j in 0..p_b.min(flat.ncols()) {
j_pre[[dst_row, col_off + j]] = flat[[i, j]];
}
}
}
}
}
}
col_off += p_b;
}
let t_is_identity = per_block_transform.iter().all(|t| t.nrows() == t.ncols());
let mut j_can_reduced: Option<Array2<f64>> = None;
if t_is_identity {
log::info!(
"[CANON] post-T invariant: T=identity (all blocks full-width) — \
J_can≡J_pre, rank preserved by construction; skipping J_can \
materialise + double RRQR (p_raw={p_total_raw} p_red={p_total_red} k={k})",
);
} else {
let mut j_can = Array2::<f64>::zeros((r_map, p_total_red));
let mut raw_col_off = 0usize;
let mut red_col_off = 0usize;
for t_i in per_block_transform.iter() {
let p_i = t_i.nrows();
let r_i = t_i.ncols();
if p_i > 0 && r_i > 0 {
for row in 0..r_map {
for out_col in 0..r_i {
let mut acc = 0.0_f64;
for in_col in 0..p_i {
acc += j_pre[[row, raw_col_off + in_col]] * t_i[[in_col, out_col]];
}
j_can[[row, red_col_off + out_col]] = acc;
}
}
}
raw_col_off += p_i;
red_col_off += r_i;
}
let nk_scale = r_map;
let flat_blocks_can: Vec<FlatRankBlock> = reduced_specs
.iter()
.map(|s| FlatRankBlock {
width: s.design.ncols(),
structural_penalty: block_structural_penalty_dense(s),
priority: s.gauge_priority,
})
.collect();
let rank_j_can = if use_channel_aware {
audit_convention_rank(&j_can, nk_scale, &flat_blocks_can)
} else {
flat_audit_convention_rank(&j_can, &flat_blocks_can)
};
let flat_blocks_pre: Vec<FlatRankBlock> = specs
.iter()
.map(|s| FlatRankBlock {
width: s.design.ncols(),
structural_penalty: block_structural_penalty_dense(s),
priority: s.gauge_priority,
})
.collect();
let rank_j_pre = if use_channel_aware {
audit_convention_rank(&j_pre, nk_scale, &flat_blocks_pre)
} else {
flat_audit_convention_rank(&j_pre, &flat_blocks_pre)
};
let rank_target = rank_j_pre.min(p_total_red);
let audit_kept_rank: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
log::info!(
"[CANON] post-T invariant ({} convention): \
rank(J_can)={rank_j_can} rank(J_pre)={rank_j_pre} \
rank_target={rank_target} p_red={p_total_red} \
audit_kept_rank={audit_kept_rank} \
(p_raw={p_total_raw} k={k})",
if use_channel_aware {
"σ²-Gram"
} else {
"σ-space RRQR (penalty-augmented, priority-tiered)"
},
);
if rank_j_can != rank_target {
let block_shapes: Vec<String> = per_block_transform
.iter()
.zip(specs.iter())
.map(|(t, s)| format!("{}:({},{})", s.name, t.nrows(), t.ncols()))
.collect();
return Err(CustomFamilyError::DimensionMismatch {
reason: format!(
"canonicalize_for_identifiability: post-T rank invariant violated — \
under the drop-deciding {} convention the reduced design J_can is \
rank-deficient: rank(J_can)={rank_j_can} but rank_target=\
min(rank(J_pre)={rank_j_pre}, p_red={p_total_red})={rank_target} \
(audit_kept_rank={audit_kept_rank}, p_raw={p_total_raw}, k={k}); the \
column-selection T dropped a direction the FULL design carried into \
the kept columns — a bug in T construction; per-block T shapes: [{}]",
if use_channel_aware {
"σ²-Gram"
} else {
"σ-space RRQR (penalty-augmented, priority-tiered)"
},
block_shapes.join(", "),
),
});
}
j_can_reduced = Some(j_can);
}
if p_total_red > 0 {
let mut s_joint = Array2::<f64>::zeros((p_total_red, p_total_red));
let mut red_off = 0usize;
for spec in reduced_specs.iter() {
let r_i = spec.design.ncols();
for pen in spec.penalties.iter() {
let s_dense = pen.as_dense_cow();
if s_dense.nrows() == r_i && s_dense.ncols() == r_i {
for ii in 0..r_i {
for jj in 0..r_i {
s_joint[[red_off + ii, red_off + jj]] += s_dense[[ii, jj]];
}
}
}
}
red_off += r_i;
}
let mut red_col_offsets: Vec<usize> = Vec::with_capacity(reduced_specs.len() + 1);
red_col_offsets.push(0);
for spec in reduced_specs.iter() {
let prev = *red_col_offsets.last().unwrap();
red_col_offsets.push(prev + spec.design.ncols());
}
let j_for_map = j_can_reduced.as_ref().unwrap_or(&j_pre);
crate::identifiability::audit::check_map_uniqueness(
j_for_map,
&[],
&s_joint,
&reduced_specs,
&red_col_offsets,
)
.map_err(|error| {
log::warn!("[CANON] MAP uniqueness check failed: {}", error.message,);
CustomFamilyError::MapUniquenessFailure { error }
})?;
log::debug!(
"[CANON] MAP uniqueness check passed \
(p_red={p_total_red} penalty_blocks={})",
reduced_specs
.iter()
.map(|s| s.penalties.len())
.sum::<usize>(),
);
}
}
Ok(CanonicalSpecs {
reduced_specs,
gauge: Gauge::from_block_transforms(&per_block_transform),
audit,
used_channel_aware_audit: use_channel_aware,
})
}
fn try_orthogonalize_blocks(
specs: &[ParameterBlockSpec],
) -> Result<Option<CanonicalSpecs>, CustomFamilyError> {
if specs.len() < 2 {
return Ok(None);
}
let family_owned_geometry = specs.iter().any(|s| s.jacobian_callback.is_some());
if family_owned_geometry {
return Ok(None);
}
let multi_channel_geometry = specs.iter().any(|s| s.stacked_design.is_some());
if multi_channel_geometry {
return Ok(None);
}
let n_rows = specs[0].design.nrows();
let mut block_designs: Vec<Array2<f64>> = Vec::with_capacity(specs.len());
for spec in specs.iter() {
if spec.design.nrows() != n_rows {
return Ok(None);
}
let dense = match spec
.design
.try_to_dense_arc("orthogonalize_design_blocks densify")
{
Ok(arc) => arc.as_ref().clone(),
Err(_) => return Ok(None),
};
block_designs.push(dense);
}
let weight = vec![1.0_f64; n_rows];
let priority: Vec<u32> = specs.iter().map(|s| s.gauge_priority as u32).collect();
let ortho = orthogonalize_design_blocks(&block_designs, &priority, &weight).map_err(|e| {
CustomFamilyError::DimensionMismatch {
reason: format!("orthogonalize_design_blocks failed: {e}"),
}
})?;
if ortho.dropped.is_empty() {
return Ok(None);
}
let mut visit_order: Vec<usize> = (0..specs.len()).collect();
visit_order.sort_by(|&a, &b| priority[b].cmp(&priority[a]));
let visit_rank: Vec<usize> = {
let mut rank = vec![0usize; specs.len()];
for (r, &b) in visit_order.iter().enumerate() {
rank[b] = r;
}
rank
};
for annotation in ortho
.direction_annotations
.iter()
.filter(|annotation| annotation.absorbed_width > 0)
{
let absorbed = annotation.block_idx;
let equal_priority_anchor_exists = (0..specs.len()).any(|other| {
other != absorbed
&& visit_rank[other] < visit_rank[absorbed]
&& priority[other] == priority[absorbed]
});
if equal_priority_anchor_exists {
log::info!(
"[CANON] orthogonalisation declined: block {} (priority {}) was absorbed into an \
equal-priority anchor — exact alias has no gauge ordering; deferring to the fatal \
audit gate instead of arbitrarily dropping the later block's column",
absorbed,
priority[absorbed],
);
return Ok(None);
}
}
for annotation in ortho
.direction_annotations
.iter()
.filter(|annotation| annotation.absorbed_width > 0)
{
log::info!(
"[IDENT] structural direction annotation: block={} raw_width={} kept_width={} absorbed_width={} kind={:?}",
annotation.block_idx,
annotation.raw_width,
annotation.kept_width,
annotation.absorbed_width,
annotation.kind,
);
}
let mut ortho_specs: Vec<ParameterBlockSpec> = Vec::with_capacity(specs.len());
for (spec, v_b) in specs.iter().zip(ortho.block_transforms.iter()) {
let p_b = spec.design.ncols();
if v_b.nrows() != p_b {
return Err(CustomFamilyError::DimensionMismatch {
reason: format!(
"orthogonalize: block '{}' transform has {} rows but design has {p_b} columns",
spec.name,
v_b.nrows(),
),
});
}
let inner_dense = match &spec.design {
DesignMatrix::Dense(d) => d.clone(),
DesignMatrix::Sparse(_) => {
let dense = spec
.design
.try_to_dense_arc("orthogonalize reduced-design densify")
.map_err(|reason| CustomFamilyError::DimensionMismatch {
reason: format!(
"orthogonalize: densify block '{}' failed: {reason}",
spec.name,
),
})?;
DenseDesignMatrix::from(dense)
}
};
let op = CoefficientTransformOperator::new(inner_dense, v_b.clone()).map_err(|reason| {
CustomFamilyError::DimensionMismatch {
reason: format!(
"orthogonalize: build CoefficientTransformOperator for block '{}': {reason}",
spec.name,
),
}
})?;
let reduced_design = DesignMatrix::Dense(DenseDesignMatrix::from(Arc::new(op)));
let reduced_penalties: Vec<PenaltyMatrix> = spec
.penalties
.iter()
.map(|p| pull_back_penalty_dense(p, v_b))
.collect();
let reduced_initial_beta = match &spec.initial_beta {
Some(beta_raw) => {
if beta_raw.len() != p_b {
return Err(CustomFamilyError::DimensionMismatch {
reason: format!(
"orthogonalize: block '{}' initial_beta length {} != design ncols {p_b}",
spec.name,
beta_raw.len(),
),
});
}
Some(v_b.t().dot(beta_raw))
}
None => None,
};
ortho_specs.push(ParameterBlockSpec {
name: spec.name.clone(),
design: reduced_design,
offset: spec.offset.clone(),
penalties: reduced_penalties,
nullspace_dims: Vec::new(),
initial_log_lambdas: spec.initial_log_lambdas.clone(),
initial_beta: reduced_initial_beta,
gauge_priority: spec.gauge_priority,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
});
}
let inner = canonicalize_for_identifiability_inner(&ortho_specs, false)?;
let mut composed_transform: Vec<Array2<f64>> = Vec::with_capacity(specs.len());
for (b, v_b) in ortho.block_transforms.iter().enumerate() {
let t_inner = inner.gauge.block_transform(b);
if v_b.ncols() != t_inner.nrows() {
return Err(CustomFamilyError::DimensionMismatch {
reason: format!(
"orthogonalize: transform composition shape mismatch — V_b is {:?}, \
T_inner is {:?}",
v_b.dim(),
t_inner.dim(),
),
});
}
composed_transform.push(v_b.dot(&t_inner));
}
log::info!(
"[CANON] orthogonalisation applied: {} block(s) shed overlap directions {:?}; \
p_raw={} → p_reduced={}",
ortho.dropped.len(),
ortho.dropped,
specs.iter().map(|s| s.design.ncols()).sum::<usize>(),
composed_transform.iter().map(|t| t.ncols()).sum::<usize>(),
);
Ok(Some(CanonicalSpecs {
reduced_specs: inner.reduced_specs,
gauge: Gauge::from_block_transforms(&composed_transform),
audit: inner.audit,
used_channel_aware_audit: inner.used_channel_aware_audit,
}))
}
fn pull_back_penalty_dense(penalty: &PenaltyMatrix, v: &Array2<f64>) -> PenaltyMatrix {
let label = penalty.precision_label().map(|s| s.to_string());
let fixed_log_lambda = penalty.fixed_log_lambda();
let dense = penalty.as_dense_cow();
let s_v = dense.dot(v);
let reduced = v.t().dot(&s_v);
let mut base = PenaltyMatrix::Dense(reduced);
if let Some(lbl) = label {
base = base.with_precision_label(lbl);
}
if let Some(value) = fixed_log_lambda {
base = base.with_fixed_log_lambda(value);
}
base
}
fn build_reduced_design(
raw: &DesignMatrix,
kept: &[usize],
block_name: &str,
t_i: &Array2<f64>,
) -> Result<DesignMatrix, CustomFamilyError> {
let inner_dense = match raw {
DesignMatrix::Dense(d) => d.clone(),
DesignMatrix::Sparse(_) => {
let dense = raw
.try_to_dense_by_chunks(&format!(
"canonicalize_for_identifiability sparse->dense block '{block_name}'"
))
.map_err(|reason| CustomFamilyError::DimensionMismatch {
reason: format!(
"canonicalize_for_identifiability: densify sparse block '{block_name}' \
failed: {reason}"
),
})?;
DenseDesignMatrix::from(dense)
}
};
if let Some(arr) = inner_dense.as_dense_ref() {
let reduced =
Array2::<f64>::from_shape_fn((arr.nrows(), kept.len()), |(i, j)| arr[[i, kept[j]]]);
return Ok(DesignMatrix::Dense(DenseDesignMatrix::from(reduced)));
}
let op = CoefficientTransformOperator::new(inner_dense, t_i.clone()).map_err(|reason| {
CustomFamilyError::DimensionMismatch {
reason: format!(
"canonicalize_for_identifiability: build CoefficientTransformOperator \
for block '{block_name}': {reason}"
),
}
})?;
Ok(DesignMatrix::Dense(DenseDesignMatrix::from(Arc::new(op))))
}
fn pull_back_penalty(penalty: &PenaltyMatrix, kept: &[usize]) -> PenaltyMatrix {
let label = penalty.precision_label().map(|s| s.to_string());
let fixed_log_lambda = penalty.fixed_log_lambda();
let dense = penalty.as_dense_cow();
let reduced =
Array2::<f64>::from_shape_fn((kept.len(), kept.len()), |(i, j)| dense[[kept[i], kept[j]]]);
let mut base = PenaltyMatrix::Dense(reduced);
if let Some(lbl) = label {
base = base.with_precision_label(lbl);
}
if let Some(value) = fixed_log_lambda {
base = base.with_fixed_log_lambda(value);
}
base
}
#[cfg(test)]
mod tests {
use super::*;
use crate::families::custom_family::AdditiveBlockJacobian;
use crate::linalg::matrix::DenseDesignMatrix;
use ndarray::Array2;
use crate::test_support::spec_from_dense;
fn linspace(n: usize) -> ndarray::Array1<f64> {
if n <= 1 {
return ndarray::Array1::<f64>::zeros(n.max(1));
}
ndarray::Array1::linspace(-1.0, 1.0, n)
}
#[test]
fn canonical_clean_specs_identity_transform() {
let n = 32;
let x = linspace(n);
let mut p = Array2::<f64>::zeros((n, 2));
let mut s = Array2::<f64>::zeros((n, 2));
for i in 0..n {
p[[i, 0]] = 1.0;
p[[i, 1]] = x[i];
s[[i, 0]] = x[i] * x[i];
s[[i, 1]] = x[i] * x[i] * x[i];
}
let specs = [spec_from_dense("p", p), spec_from_dense("s", s)];
let canon = canonicalize_for_identifiability(&specs).expect("clean canonical must succeed");
assert_eq!(canon.reduced_specs.len(), 2);
assert_eq!(canon.gauge.block_transform(0).dim(), (2, 2));
assert_eq!(canon.gauge.block_transform(1).dim(), (2, 2));
let theta = vec![Array1::from(vec![0.5, -0.25]), Array1::from(vec![1.0, 2.0])];
let raw = canon.gauge.lift_block_betas(&theta);
assert_eq!(raw[0].as_slice().unwrap(), &[0.5, -0.25]);
assert_eq!(raw[1].as_slice().unwrap(), &[1.0, 2.0]);
}
#[test]
fn canonical_refuses_aliased_smooth_constant_with_intercept() {
let n = 64;
let x = linspace(n);
let parametric = Array2::<f64>::from_shape_fn((n, 1), |(_, _)| 1.0);
let mut smooth = Array2::<f64>::zeros((n, 3));
for i in 0..n {
smooth[[i, 0]] = 1.0;
smooth[[i, 1]] = x[i] * x[i];
smooth[[i, 2]] = x[i] * x[i] * x[i];
}
let specs = [
spec_from_dense("intercept", parametric),
spec_from_dense("smooth_with_const", smooth),
];
let err = canonicalize_for_identifiability(&specs)
.expect_err("aliased smooth-constant + intercept must refuse, not reduce");
match err {
CustomFamilyError::IdentifiabilityFailure { audit } => {
assert!(
audit.fatal,
"audit attached to IdentifiabilityFailure must be fatal; got {}",
audit.summary,
);
assert!(
audit.summary.contains("intercept")
&& audit.summary.contains("smooth_with_const"),
"refusal summary must name both offending blocks; got {:?}",
audit.summary,
);
}
other => panic!("expected IdentifiabilityFailure, got {other:?}"),
}
}
#[test]
fn canonical_five_block_gauge_ownership_succeeds_with_attribution() {
let n = 96;
let x = linspace(n);
let mut time = Array2::<f64>::zeros((n, 3));
let mut marginal = Array2::<f64>::zeros((n, 3));
let mut logslope = Array2::<f64>::zeros((n, 3));
let mut score_warp = Array2::<f64>::zeros((n, 2));
let mut link_dev = Array2::<f64>::zeros((n, 2));
for i in 0..n {
time[[i, 0]] = 1.0;
time[[i, 1]] = x[i];
time[[i, 2]] = x[i] * x[i] * x[i];
marginal[[i, 0]] = 1.0;
marginal[[i, 1]] = x[i] * x[i];
marginal[[i, 2]] = x[i].sin();
logslope[[i, 0]] = 1.0;
logslope[[i, 1]] = (3.0 * x[i]).sin();
logslope[[i, 2]] = (6.0 * x[i]).sin();
score_warp[[i, 0]] = 1.0;
score_warp[[i, 1]] = (5.0 * x[i]).cos();
link_dev[[i, 0]] = 1.0;
link_dev[[i, 1]] = (7.0 * x[i]).tanh();
}
let mut t_spec = spec_from_dense("time_surface", time);
t_spec.gauge_priority = 200;
let mut m_spec = spec_from_dense("marginal_surface", marginal);
m_spec.gauge_priority = 150;
let mut g_spec = spec_from_dense("logslope_surface", logslope);
g_spec.gauge_priority = 120;
let mut w_spec = spec_from_dense("score_warp_dev", score_warp);
w_spec.gauge_priority = 80;
let mut l_spec = spec_from_dense("link_dev", link_dev);
l_spec.gauge_priority = 60;
let specs = [t_spec, m_spec, g_spec, w_spec, l_spec];
let canon = canonicalize_for_identifiability(&specs).expect(
"five-block aliased joint with distinct gauge_priority must succeed (gauge-resolved)",
);
assert!(
!canon.audit.fatal,
"audit must be non-fatal when gauge_priority is non-trivial and \
all drops are attributed to lower-priority blocks; got {}",
canon.audit.summary,
);
let total_kept: usize = canon.audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept,
9,
"expected joint rank = 13 − 4 = 9 reported by audit; got {total_kept} \
(per-block effective_dim {:?})",
canon
.audit
.blocks
.iter()
.map(|b| (b.block_name.clone(), b.effective_dim))
.collect::<Vec<_>>(),
);
for drop in &canon.audit.dropped_columns {
assert_ne!(
drop.block, "time_surface",
"highest-priority block must never be the attributed drop \
origin under priority-aware RRQR; got drop on time_surface \
({drop:?})",
);
}
let reduced_total: usize = canon.reduced_specs.iter().map(|s| s.design.ncols()).sum();
assert_eq!(
reduced_total, 9,
"reduced specs must have joint rank = 9 total columns; got {reduced_total}",
);
assert_eq!(
canon.reduced_specs[0].design.ncols(),
3,
"time_surface must retain all 3 columns after gauge canonicalisation",
);
}
#[test]
fn canonical_multinomial_near_separable_post_t_rank_invariant_holds() {
let n = 120;
let x = linspace(n);
let eps_rel = 1e-7_f64;
let p = 4usize;
let mut xmat = Array2::<f64>::zeros((n, p));
for i in 0..n {
let xi = x[i];
xmat[[i, 0]] = 1.0; xmat[[i, 1]] = xi; xmat[[i, 2]] = (3.0 * xi).sin(); let w = (7.0 * xi).cos();
xmat[[i, 3]] = 1.0 + 0.5 * xi + eps_rel * w;
}
let m = 2usize; let specs: Vec<ParameterBlockSpec> = (0..m)
.map(|a| {
let mut spec = spec_from_dense_with_priority(
&format!("class_{a}"),
xmat.clone(),
100u8.saturating_add((m - a) as u8),
);
spec.jacobian_callback = Some(std::sync::Arc::new(AdditiveBlockJacobian {
design: xmat.clone(),
own_output: a,
n_family_outputs: m,
}));
spec
})
.collect();
match canonicalize_for_identifiability(&specs) {
Ok(canon) => {
let theta: Vec<Array1<f64>> = canon
.reduced_specs
.iter()
.map(|s| Array1::<f64>::zeros(s.design.ncols()))
.collect();
let raw = canon.gauge.lift_block_betas(&theta);
assert_eq!(raw.len(), m, "lift must return one beta per channel block");
for (a, b) in raw.iter().enumerate() {
assert_eq!(
b.len(),
p,
"lifted channel {a} beta must be raw width {p}, got {}",
b.len()
);
}
}
Err(CustomFamilyError::DimensionMismatch { reason })
if reason.contains("post-T rank invariant violated") =>
{
panic!(
"post-T rank invariant must hold under the shared audit rank \
convention on near-separable multinomial data (#1220); got: {reason}"
);
}
Err(other) => {
let msg = format!("{other:?}");
assert!(
!msg.contains("post-T rank invariant"),
"must not be a post-T rank-invariant failure; got {msg}"
);
}
}
}
#[test]
fn post_t_invariant_jcan_full_rank_under_channel_aware_convention_1391() {
let n = 96;
let x = linspace(n);
let p_total = 6usize;
let mut j_pre = Array2::<f64>::zeros((n, p_total));
for i in 0..n {
let xi = x[i];
j_pre[[i, 0]] = 1.0;
j_pre[[i, 1]] = xi;
j_pre[[i, 2]] = (2.0 * xi).sin();
j_pre[[i, 3]] = (3.0 * xi).cos();
j_pre[[i, 4]] = xi * xi;
j_pre[[i, 5]] = 1.0 + 1e-5 * (7.0 * xi).sin();
}
let nk_scale = n;
let rank_pre = audit_convention_rank(&j_pre, nk_scale, &[]);
assert_eq!(
rank_pre, p_total,
"fixture must make the joint eigendecomposition count the near-separable \
column in J_pre (rank {rank_pre} != {p_total})"
);
let kept: [usize; 5] = [0, 1, 2, 3, 4];
let p_red = kept.len();
let mut j_can = Array2::<f64>::zeros((n, p_red));
for i in 0..n {
for (out, &src) in kept.iter().enumerate() {
j_can[[i, out]] = j_pre[[i, src]];
}
}
let rank_can = audit_convention_rank(&j_can, nk_scale, &[]);
assert!(
rank_pre > rank_can,
"fixture must reproduce the over-count: rank(J_pre)={rank_pre} must \
exceed rank(J_can)={rank_can} under the single-eigendecomposition convention"
);
assert_eq!(
rank_can, p_red,
"post-T rank invariant must hold: reduced J_can must be full rank \
({rank_can} != p_red {p_red}) under the channel-aware σ²-Gram convention (#1391)"
);
}
#[test]
fn flat_audit_convention_rank_jcan_full_rank_penalty_augmented() {
let n = 64;
let x = linspace(n);
let z = Array1::from_iter((0..n).map(|i| (1.7 * x[i]).tanh() + 0.3 * x[i] * x[i]));
let mut mu_d = Array2::<f64>::zeros((n, 4));
let mut sigma_d = Array2::<f64>::zeros((n, 4));
for i in 0..n {
let xi = x[i];
let zi = z[i];
mu_d[[i, 0]] = 1.0;
mu_d[[i, 1]] = xi;
mu_d[[i, 2]] = (3.0 * xi).sin();
mu_d[[i, 3]] = (3.0 * xi).cos();
sigma_d[[i, 0]] = 1.0;
sigma_d[[i, 1]] = zi;
sigma_d[[i, 2]] = (5.0 * zi).sin();
sigma_d[[i, 3]] = (5.0 * zi).cos();
}
let p_red = 7usize;
let mut j_can = Array2::<f64>::zeros((n, p_red));
for i in 0..n {
for j in 0..4 {
j_can[[i, j]] = mu_d[[i, j]];
}
j_can[[i, 4]] = sigma_d[[i, 1]];
j_can[[i, 5]] = sigma_d[[i, 2]];
j_can[[i, 6]] = sigma_d[[i, 3]];
}
let mut s = Array2::<f64>::zeros((4, 4));
s[[2, 2]] = 1.0;
s[[3, 3]] = 1.0;
s[[2, 3]] = -1.0;
s[[3, 2]] = -1.0;
let mut s_red = Array2::<f64>::zeros((3, 3));
s_red[[1, 1]] = 1.0;
s_red[[2, 2]] = 1.0;
s_red[[1, 2]] = -1.0;
s_red[[2, 1]] = -1.0;
let blocks_can = [
FlatRankBlock {
width: 4,
structural_penalty: Some(s),
priority: 120,
},
FlatRankBlock {
width: 3,
structural_penalty: Some(s_red),
priority: 100,
},
];
let rank_can = flat_audit_convention_rank(&j_can, &blocks_can);
assert_eq!(
rank_can, p_red,
"reduced J_can must be full rank ({rank_can} != p_red {p_red}) under the \
penalty-augmented, priority-tiered flat convention (#1391/#1388)"
);
}
#[test]
fn post_t_invariant_underdetermined_jcan_meets_min_target_1388() {
let n = 4usize;
let p_total_raw = 6usize;
let mut j_pre = Array2::<f64>::zeros((n, p_total_raw));
for i in 0..n {
j_pre[[i, 0]] = 1.0;
}
for i in 0..n {
let xi = i as f64;
j_pre[[i, 1]] = 1.0; j_pre[[i, 2]] = xi;
j_pre[[i, 3]] = xi * xi;
j_pre[[i, 4]] = xi * xi * xi;
j_pre[[i, 5]] = xi * xi * xi;
}
let blocks_pre = [
FlatRankBlock {
width: 1,
structural_penalty: None,
priority: 200,
},
FlatRankBlock {
width: 5,
structural_penalty: None,
priority: 100,
},
];
let rank_j_pre = flat_audit_convention_rank(&j_pre, &blocks_pre);
assert!(
rank_j_pre < p_total_raw,
"fixture must be under-determined: rank(J_pre)={rank_j_pre} must be < \
p_raw={p_total_raw}",
);
let kept: [usize; 5] = [1, 2, 3, 4, 5];
let p_total_red = kept.len();
let mut j_can = Array2::<f64>::zeros((n, p_total_red));
for i in 0..n {
for (out, &src) in kept.iter().enumerate() {
j_can[[i, out]] = j_pre[[i, src]];
}
}
let blocks_can = [FlatRankBlock {
width: 5,
structural_penalty: None,
priority: 100,
}];
let rank_j_can = flat_audit_convention_rank(&j_can, &blocks_can);
assert!(
rank_j_can < p_total_red,
"fixture must reproduce the under-determined deficit: rank(J_can)=\
{rank_j_can} must be < p_red={p_total_red} (the OLD invariant would abort)",
);
let rank_target = rank_j_pre.min(p_total_red);
assert_eq!(
rank_j_can, rank_target,
"post-T rank invariant must hold under the corrected target: \
rank(J_can)={rank_j_can} != min(rank(J_pre)={rank_j_pre}, \
p_red={p_total_red})={rank_target} (#1388)",
);
}
#[test]
fn canonical_clean_specs_with_penalty_round_trip() {
let n = 32;
let x = linspace(n);
let parametric = Array2::<f64>::from_shape_fn((n, 1), |(_, _)| 1.0);
let mut smooth = Array2::<f64>::zeros((n, 2));
for i in 0..n {
smooth[[i, 0]] = x[i] * x[i];
smooth[[i, 1]] = x[i] * x[i] * x[i];
}
let mut smooth_spec = spec_from_dense("smooth_only", smooth);
let mut s = Array2::<f64>::zeros((2, 2));
s[[0, 0]] = 4.0;
s[[1, 1]] = 9.0;
s[[0, 1]] = 1.5;
s[[1, 0]] = 1.5;
smooth_spec.penalties = vec![PenaltyMatrix::Dense(s.clone())];
smooth_spec.initial_log_lambdas = Array1::from(vec![0.0]);
let specs = [spec_from_dense("intercept", parametric), smooth_spec];
let canon = canonicalize_for_identifiability(&specs)
.expect("clean canonical must succeed with identity transforms");
let smooth_reduced = &canon.reduced_specs[1];
assert_eq!(smooth_reduced.penalties.len(), 1);
let dense_red = smooth_reduced.penalties[0].as_dense_cow().into_owned();
assert_eq!(dense_red.dim(), (2, 2));
let t_smooth = canon.gauge.block_transform(1);
assert_eq!(t_smooth.dim(), (2, 2));
for i in 0..2 {
for j in 0..2 {
let expected = if i == j { 1.0 } else { 0.0 };
assert_eq!(t_smooth[[i, j]], expected, "T_smooth must be identity");
}
}
}
use crate::test_support::spec_from_dense_with_priority;
#[test]
fn callback_owned_geometry_reduces_and_round_trips_via_gauge() {
use crate::families::custom_family::FamilyLinearizationState;
let n = 48;
let x = linspace(n);
let mut anchor = Array2::<f64>::zeros((n, 2));
let mut callback_owned = Array2::<f64>::zeros((n, 2));
for i in 0..n {
anchor[[i, 0]] = 1.0;
anchor[[i, 1]] = x[i];
callback_owned[[i, 0]] = x[i];
callback_owned[[i, 1]] = x[i] * x[i];
}
let anchor_spec = spec_from_dense_with_priority("marginal_surface", anchor, 150);
let mut callback_spec =
spec_from_dense_with_priority("logslope_surface", callback_owned.clone(), 120);
let raw_callback: Arc<dyn BlockEffectiveJacobian> = Arc::new(AdditiveBlockJacobian {
design: callback_owned.clone(),
own_output: 0,
n_family_outputs: 1,
});
callback_spec.jacobian_callback = Some(Arc::clone(&raw_callback));
let specs = [anchor_spec, callback_spec];
let canon = canonicalize_for_identifiability(&specs)
.expect("callback-only overlap must now reduce safely (#933)");
assert!(
!canon.audit.fatal,
"priority-owned overlap should be non-fatal; got {}",
canon.audit.summary,
);
assert!(
canon
.audit
.dropped_columns
.iter()
.any(|drop| drop.block == "logslope_surface"),
"test must exercise an attributed logslope drop; got {:?}",
canon.audit.dropped_columns,
);
assert_eq!(
canon.reduced_specs[0].design.ncols(),
2,
"anchor block keeps raw width"
);
assert_eq!(
canon.reduced_specs[1].design.ncols(),
1,
"callback-only block is now column-reduced via the composed gauge (#933)",
);
let t_b = canon.gauge.block_transform(1);
assert_eq!(t_b.dim(), (2, 1), "reduced callback block transform is 2×1");
let reduced_cb = canon.reduced_specs[1]
.jacobian_callback
.as_ref()
.expect("reduced callback-only block must still carry a callback");
let theta = Array1::from(vec![0.73_f64]);
let beta_raw = t_b.dot(&theta); let zeros_red = vec![0.0_f64; 1];
let state_red = FamilyLinearizationState {
beta: &zeros_red,
family_scalars: None,
channel_hessian: None,
probit_frailty_scale: 1.0,
};
let j_reduced = reduced_cb
.effective_jacobian_at(&state_red)
.expect("reduced callback Jacobian");
assert_eq!(j_reduced.dim(), (n, 1), "reduced Jacobian is n×1");
let eta_reduced = j_reduced.dot(&theta);
let zeros_raw = vec![0.0_f64; 2];
let state_raw = FamilyLinearizationState {
beta: &zeros_raw,
family_scalars: None,
channel_hessian: None,
probit_frailty_scale: 1.0,
};
let j_raw = raw_callback
.effective_jacobian_at(&state_raw)
.expect("raw callback Jacobian");
let eta_raw = j_raw.dot(&beta_raw);
for i in 0..n {
assert!(
(eta_reduced[i] - eta_raw[i]).abs() < 1e-12,
"η must be invariant under the composed-gauge reduction at row {i}: \
reduced {} vs raw {}",
eta_reduced[i],
eta_raw[i],
);
}
}
#[test]
fn callback_owned_reduced_block_assembles_row_hessian_without_shape_panic() {
use crate::families::custom_family::FamilyLinearizationState;
let n = 48;
let x = linspace(n);
let mut anchor = Array2::<f64>::zeros((n, 2));
let mut callback_owned = Array2::<f64>::zeros((n, 2));
for i in 0..n {
anchor[[i, 0]] = 1.0;
anchor[[i, 1]] = x[i];
callback_owned[[i, 0]] = x[i];
callback_owned[[i, 1]] = x[i] * x[i];
}
let anchor_spec = spec_from_dense_with_priority("marginal_surface", anchor, 150);
let mut callback_spec =
spec_from_dense_with_priority("logslope_surface", callback_owned.clone(), 120);
let raw_callback: Arc<dyn BlockEffectiveJacobian> = Arc::new(AdditiveBlockJacobian {
design: callback_owned.clone(),
own_output: 0,
n_family_outputs: 1,
});
callback_spec.jacobian_callback = Some(Arc::clone(&raw_callback));
let canon = canonicalize_for_identifiability(&[anchor_spec, callback_spec])
.expect("callback-only overlap must reduce safely (#933)");
let reduced_block = &canon.reduced_specs[1];
let reduced_design = &reduced_block.design;
assert!(
canon
.audit
.dropped_columns
.iter()
.any(|drop| drop.block == "logslope_surface"),
"test must exercise a real callback-owned gauge drop; got {:?}",
canon.audit.dropped_columns,
);
assert_eq!(
reduced_design.ncols(),
1,
"callback-owned block must be reduced to the non-aliased column",
);
let reduced_cb = reduced_block
.jacobian_callback
.as_ref()
.expect("reduced callback-only block must still carry a callback");
let zeros_red = vec![0.0_f64; reduced_design.ncols()];
let state_red = FamilyLinearizationState {
beta: &zeros_red,
family_scalars: None,
channel_hessian: None,
probit_frailty_scale: 1.0,
};
let j_reduced = reduced_cb
.effective_jacobian_at(&state_red)
.expect("reduced callback Jacobian");
assert_eq!(
reduced_design.ncols(),
j_reduced.ncols(),
"reduced design width {} must match the wrapped callback's emitted width {} \
— the precondition that makes syr_row_into / row_outer_into shape-match",
reduced_design.ncols(),
j_reduced.ncols(),
);
let p = reduced_design.ncols();
let mut syr_target = Array2::<f64>::zeros((p, p));
let mut outer_target = Array2::<f64>::zeros((p, p));
for row in 0..n {
reduced_design
.syr_row_into(row, 1.0, &mut syr_target)
.unwrap_or_else(|e| {
panic!("row {row}: reduced design must syr_row_into without shape panic: {e}")
});
reduced_design
.row_outer_into(row, reduced_design, 1.0, &mut outer_target)
.unwrap_or_else(|e| {
panic!("row {row}: reduced design must row_outer_into without shape panic: {e}")
});
}
assert!(
syr_target[[0, 0]] > 0.0,
"syr assembly must accumulate a positive diagonal on the surviving column",
);
}
#[test]
fn multichannel_callback_owned_reduced_block_assembles_without_shape_panic() {
use crate::families::custom_family::FamilyLinearizationState;
let n = 40;
let x = linspace(n);
let mut anchor = Array2::<f64>::zeros((n, 2));
let mut callback_owned = Array2::<f64>::zeros((n, 2));
for i in 0..n {
anchor[[i, 0]] = 1.0;
anchor[[i, 1]] = x[i];
callback_owned[[i, 0]] = x[i];
callback_owned[[i, 1]] = x[i] * x[i];
}
let anchor_spec = spec_from_dense_with_priority("location_anchor", anchor, 150);
let mut callback_spec =
spec_from_dense_with_priority("scale_logslope", callback_owned.clone(), 120);
let raw_callback: Arc<dyn BlockEffectiveJacobian> = Arc::new(AdditiveBlockJacobian {
design: callback_owned.clone(),
own_output: 1,
n_family_outputs: 2,
});
callback_spec.jacobian_callback = Some(Arc::clone(&raw_callback));
let canon = canonicalize_for_identifiability(&[anchor_spec, callback_spec])
.expect("multi-channel callback overlap must reduce safely (#933)");
let reduced_block = &canon.reduced_specs[1];
let reduced_design = &reduced_block.design;
let reduced_cb = reduced_block
.jacobian_callback
.as_ref()
.expect("reduced multi-channel callback block must still carry a callback");
assert_eq!(reduced_cb.n_outputs(), 2, "channel count is preserved");
let zeros_red = vec![0.0_f64; reduced_design.ncols()];
let state_red = FamilyLinearizationState {
beta: &zeros_red,
family_scalars: None,
channel_hessian: None,
probit_frailty_scale: 1.0,
};
let j_reduced = reduced_cb
.effective_jacobian_at(&state_red)
.expect("reduced multi-channel callback Jacobian");
assert_eq!(
j_reduced.nrows(),
2 * n,
"two-channel Jacobian stacks both output bands",
);
assert_eq!(
reduced_design.ncols(),
j_reduced.ncols(),
"reduced design width {} must match the wrapped multi-channel callback's \
emitted width {} — the shape-match precondition for row-Hessian assembly",
reduced_design.ncols(),
j_reduced.ncols(),
);
let p = reduced_design.ncols();
let mut syr_target = Array2::<f64>::zeros((p, p));
let mut outer_target = Array2::<f64>::zeros((p, p));
for row in 0..n {
reduced_design
.syr_row_into(row, 1.0, &mut syr_target)
.unwrap_or_else(|e| {
panic!("row {row}: reduced design must syr_row_into without shape panic: {e}")
});
reduced_design
.row_outer_into(row, reduced_design, 1.0, &mut outer_target)
.unwrap_or_else(|e| {
panic!("row {row}: reduced design must row_outer_into without shape panic: {e}")
});
}
assert!(
syr_target[[0, 0]] > 0.0,
"multi-channel reduced assembly must accumulate a positive diagonal",
);
}
#[test]
fn orthogonalize_removes_exact_cross_block_overlap_and_round_trips() {
let n = 48;
let x = linspace(n);
let mut a = Array2::<f64>::zeros((n, 2));
let mut b = Array2::<f64>::zeros((n, 2));
for i in 0..n {
a[[i, 0]] = 1.0;
a[[i, 1]] = x[i];
b[[i, 0]] = x[i];
b[[i, 1]] = x[i] * x[i];
}
let specs = [
spec_from_dense_with_priority("anchor", a.clone(), 150),
spec_from_dense_with_priority("overlap", b.clone(), 120),
];
let canon = canonicalize_for_identifiability(&specs)
.expect("orthogonalisation must resolve the overlap, not refuse");
let v_b = canon.gauge.block_transform(1);
assert_eq!(
v_b.ncols(),
1,
"overlap block must keep exactly one direction"
);
assert_eq!(
v_b.nrows(),
2,
"overlap block transform maps from raw width 2"
);
assert_eq!(canon.gauge.block_transform(0).ncols(), 2);
let theta = vec![Array1::from(vec![0.7, -0.3]), Array1::from(vec![1.4])];
let raw = canon.gauge.lift_block_betas(&theta);
assert_eq!(raw[0].len(), 2);
assert_eq!(raw[1].len(), 2);
let pred_a = a.dot(&raw[0]);
let pred_b = b.dot(&raw[1]);
let v_a = canon.gauge.block_transform(0);
let red_a = a.dot(&v_a).dot(&theta[0]);
let red_b = b.dot(&v_b).dot(&theta[1]);
for i in 0..n {
let raw_pred = pred_a[i] + pred_b[i];
let red_pred = red_a[i] + red_b[i];
assert!(
(raw_pred - red_pred).abs() < 1e-9,
"row {i}: raw prediction {raw_pred} != reduced prediction {red_pred}",
);
}
}
#[test]
fn orthogonalize_clean_design_yields_identity_transforms() {
let n = 32;
let x = linspace(n);
let mut p = Array2::<f64>::zeros((n, 2));
let mut s = Array2::<f64>::zeros((n, 2));
for i in 0..n {
p[[i, 0]] = 1.0;
p[[i, 1]] = x[i];
s[[i, 0]] = x[i] * x[i];
s[[i, 1]] = x[i] * x[i] * x[i];
}
let specs = [
spec_from_dense_with_priority("p", p, 150),
spec_from_dense_with_priority("s", s, 120),
];
let canon = canonicalize_for_identifiability(&specs).expect("clean design canonicalises");
assert_eq!(canon.gauge.block_transform(0).dim(), (2, 2));
assert_eq!(canon.gauge.block_transform(1).dim(), (2, 2));
}
#[test]
fn orthogonalize_design_blocks_drops_only_overlap() {
use crate::identifiability::families::compiler::orthogonalize_design_blocks;
let n = 40;
let x = linspace(n);
let mut anchor = Array2::<f64>::zeros((n, 2));
let mut overlap = Array2::<f64>::zeros((n, 2));
for i in 0..n {
anchor[[i, 0]] = 1.0;
anchor[[i, 1]] = x[i];
overlap[[i, 0]] = x[i];
overlap[[i, 1]] = x[i] * x[i] * x[i];
}
let weight = vec![1.0_f64; n];
let res =
orthogonalize_design_blocks(&[anchor.clone(), overlap.clone()], &[150, 120], &weight)
.expect("orthogonalisation must succeed");
assert_eq!(
res.block_transforms[0].ncols(),
2,
"anchor keeps full width"
);
assert_eq!(
res.block_transforms[1].ncols(),
1,
"overlap block sheds exactly the aliased direction",
);
assert_eq!(
res.dropped,
vec![(1, 1)],
"one direction dropped from block 1"
);
}
#[test]
fn canonical_preserves_stacked_design_for_multichannel_block() {
let n = 24;
let x = linspace(n);
let mut mean = Array2::<f64>::zeros((n, 2));
let mut time_exit = Array2::<f64>::zeros((n, 2));
for i in 0..n {
mean[[i, 0]] = 1.0;
mean[[i, 1]] = x[i];
time_exit[[i, 0]] = 1.0;
time_exit[[i, 1]] = x[i] * x[i];
}
let mut stacked = Array2::<f64>::zeros((3 * n, 2));
for i in 0..n {
stacked[[i, 0]] = 1.0;
stacked[[i, 1]] = x[i] * x[i] * 0.5;
stacked[[n + i, 0]] = 1.0;
stacked[[n + i, 1]] = x[i] * x[i];
stacked[[2 * n + i, 0]] = 0.0;
stacked[[2 * n + i, 1]] = 2.0 * x[i];
}
let mean_spec = spec_from_dense_with_priority("mean", mean, 150);
let mut time_spec = spec_from_dense_with_priority("time_transform", time_exit, 200);
time_spec.stacked_design = Some(DesignMatrix::Dense(DenseDesignMatrix::from(stacked)));
time_spec.stacked_offset = Some(Array1::<f64>::zeros(3 * n));
let specs = [mean_spec, time_spec];
let canon = canonicalize_for_identifiability(&specs)
.expect("multi-channel canonicalisation must succeed (defer to audit gate)");
let time_reduced = canon
.reduced_specs
.iter()
.find(|s| s.name == "time_transform")
.expect("time block survives canonicalisation");
let stacked_after = time_reduced
.stacked_design
.as_ref()
.expect("stacked_design must survive canonicalisation (gam#1068)");
assert_eq!(
stacked_after.nrows(),
3 * n,
"stacked eta operator must keep its 3·n rows; collapsing to n is the #1068 bug",
);
assert!(
time_reduced.stacked_offset.is_some(),
"stacked_offset must survive alongside stacked_design",
);
}
#[test]
fn canonical_survival_ls_aft_keeps_covariate_when_time_owns_geometry() {
let n = 96;
let x = linspace(n);
let age: Vec<f64> = (0..n)
.map(|i| (i as f64) - (n as f64 - 1.0) / 2.0)
.collect();
let mut time_exit = Array2::<f64>::zeros((n, 2));
let mut time_stacked = Array2::<f64>::zeros((3 * n, 2));
for i in 0..n {
time_exit[[i, 0]] = 1.0;
time_exit[[i, 1]] = x[i];
time_stacked[[i, 0]] = 1.0;
time_stacked[[i, 1]] = 0.5 * x[i];
time_stacked[[n + i, 0]] = 1.0;
time_stacked[[n + i, 1]] = x[i];
time_stacked[[2 * n + i, 0]] = 0.0;
time_stacked[[2 * n + i, 1]] = 1.0;
}
let mut threshold = Array2::<f64>::zeros((n, 2));
for i in 0..n {
threshold[[i, 0]] = 1.0;
threshold[[i, 1]] = age[i];
}
let mut t_spec = spec_from_dense("time_transform", time_exit);
t_spec.gauge_priority = 200;
t_spec.stacked_design = Some(DesignMatrix::Dense(DenseDesignMatrix::from(time_stacked)));
t_spec.stacked_offset = Some(Array1::<f64>::zeros(3 * n));
let mut th_spec = spec_from_dense("threshold", threshold);
th_spec.gauge_priority = 150;
let specs = [t_spec, th_spec];
let canon = canonicalize_for_identifiability(&specs).expect(
"survival-LS AFT aliased-constant joint with distinct gauge_priority must \
succeed (gauge-resolved, gam#1110)",
);
assert!(
!canon.audit.fatal,
"audit must be non-fatal: the surplus constant is gauge-resolvable; got {}",
canon.audit.summary,
);
for drop in &canon.audit.dropped_columns {
assert_ne!(
drop.block, "time_transform",
"geometry-owning time_transform block must never be the attributed \
drop origin (gam#1110/#1068); got {drop:?}",
);
}
let time_reduced = canon
.reduced_specs
.iter()
.find(|s| s.name == "time_transform")
.expect("time_transform survives canonicalisation");
assert_eq!(
time_reduced.design.ncols(),
2,
"time_transform must keep both raw columns (raw-width owned geometry)",
);
assert_eq!(
time_reduced
.stacked_design
.as_ref()
.expect("time_transform stacked_design must survive (gam#1068)")
.nrows(),
3 * n,
"time_transform stacked eta operator must keep its 3·n rows",
);
let threshold_reduced = canon
.reduced_specs
.iter()
.find(|s| s.name == "threshold")
.expect("threshold block survives canonicalisation");
let th_dense = threshold_reduced
.design
.try_to_dense_arc("threshold reduced densify")
.expect("dense threshold design");
let th_view = th_dense.as_ref();
assert!(
th_view.ncols() >= 1,
"threshold block must retain its `age` covariate column",
);
let has_nonconstant_column = (0..th_view.ncols()).any(|c| {
let col = th_view.column(c);
let first = col[0];
col.iter().any(|&v| (v - first).abs() > 1e-9)
});
assert!(
has_nonconstant_column,
"threshold block must KEEP the genuine non-constant `age` covariate \
direction after canonicalisation — the #1110 bug dropped/pinned it, \
leaving only constant columns (gam_a_age = 0.00000)",
);
let reduced_total: usize = canon.reduced_specs.iter().map(|s| s.design.ncols()).sum();
assert_eq!(
reduced_total,
3,
"joint rank = 4 raw columns − 1 surplus constant = 3; got {reduced_total} \
(per-block reduced ncols {:?})",
canon
.reduced_specs
.iter()
.map(|s| (s.name.clone(), s.design.ncols()))
.collect::<Vec<_>>(),
);
assert_eq!(
threshold_reduced.design.ncols(),
1,
"threshold keeps exactly its `age` column after the intercept drop",
);
}
}