use ndarray::{Array1, Array2};
use crate::families::custom_family::ParameterBlockSpec;
use crate::linalg::faer_ndarray::{FaerQr, default_rrqr_rank_alpha, rrqr_with_permutation};
#[derive(Debug, Clone)]
pub struct BlockIdentity {
pub block_name: String,
pub original_dim: usize,
pub effective_dim: usize,
pub design_range_rank: usize,
pub design_range_singular_values: Vec<f64>,
}
#[derive(Debug, Clone)]
pub struct AliasedPair {
pub block_a: String,
pub block_b: String,
pub direction_a: usize,
pub direction_b: usize,
pub overlap: f64,
}
#[derive(Debug, Clone)]
pub struct DroppedColumn {
pub block: String,
pub column: usize,
pub reason: String,
}
#[derive(Debug, Clone)]
pub struct IdentifiabilityAudit {
pub blocks: Vec<BlockIdentity>,
pub aliased_pairs: Vec<AliasedPair>,
pub dropped_columns: Vec<DroppedColumn>,
pub fatal: bool,
pub summary: String,
}
const ALIAS_OVERLAP_REPORTING_THRESHOLD: f64 = 0.95;
pub fn audit_identifiability(specs: &[ParameterBlockSpec]) -> Result<IdentifiabilityAudit, String> {
if specs.is_empty() {
return Ok(IdentifiabilityAudit {
blocks: Vec::new(),
aliased_pairs: Vec::new(),
dropped_columns: Vec::new(),
fatal: false,
summary: "identifiability audit: no blocks supplied".to_string(),
});
}
let n = specs[0].design.nrows();
for (idx, spec) in specs.iter().enumerate() {
if spec.design.nrows() != n {
return Err(format!(
"identifiability audit: block {} ({}) has {} rows, expected {}",
idx,
spec.name,
spec.design.nrows(),
n,
));
}
}
let mut dense_blocks: Vec<Array2<f64>> = Vec::with_capacity(specs.len());
let mut blocks: Vec<BlockIdentity> = Vec::with_capacity(specs.len());
let mut col_offsets: Vec<usize> = Vec::with_capacity(specs.len() + 1);
col_offsets.push(0);
for spec in specs {
let dense = spec
.design
.try_to_dense_arc(&format!(
"identifiability_audit::audit_identifiability block '{}'",
spec.name
))?
.as_ref()
.clone();
let p_block = dense.ncols();
let block_singular = block_pivoted_qr_diagonal(&dense)?;
let block_rank = count_rank(&block_singular, n, p_block);
blocks.push(BlockIdentity {
block_name: spec.name.clone(),
original_dim: p_block,
effective_dim: p_block,
design_range_rank: block_rank,
design_range_singular_values: block_singular,
});
let next_offset = col_offsets[col_offsets.len() - 1] + p_block;
col_offsets.push(next_offset);
dense_blocks.push(dense);
}
let p_total = *col_offsets.last().expect("col_offsets non-empty");
if p_total == 0 {
return Ok(IdentifiabilityAudit {
blocks,
aliased_pairs: Vec::new(),
dropped_columns: Vec::new(),
fatal: false,
summary: "identifiability audit: every block is empty".to_string(),
});
}
let mut x_joint = Array2::<f64>::zeros((n, p_total));
for (idx, block) in dense_blocks.iter().enumerate() {
let start = col_offsets[idx];
let end = col_offsets[idx + 1];
if end > start {
x_joint.slice_mut(ndarray::s![.., start..end]).assign(block);
}
}
let rrqr = rrqr_with_permutation(&x_joint, default_rrqr_rank_alpha())
.map_err(|e| format!("identifiability audit joint RRQR failed: {e:?}"))?;
let joint_rank = rrqr.rank;
let joint_rank_tol = rrqr.rank_tol;
let demoted_joint_cols: Vec<usize> = rrqr.column_permutation[rrqr.rank..].to_vec();
let mut col_norms = Array1::<f64>::zeros(p_total);
for j in 0..p_total {
let nrm = x_joint.column(j).iter().map(|v| v * v).sum::<f64>().sqrt();
col_norms[j] = nrm;
}
let mut aliased_pairs: Vec<AliasedPair> = Vec::new();
for a_block_idx in 0..specs.len() {
let a_start = col_offsets[a_block_idx];
let a_end = col_offsets[a_block_idx + 1];
for b_block_idx in (a_block_idx + 1)..specs.len() {
let b_start = col_offsets[b_block_idx];
let b_end = col_offsets[b_block_idx + 1];
for ja in a_start..a_end {
let na = col_norms[ja];
if na == 0.0 {
continue;
}
let ca = x_joint.column(ja);
for jb in b_start..b_end {
let nb = col_norms[jb];
if nb == 0.0 {
continue;
}
let cb = x_joint.column(jb);
let dot: f64 = ca.iter().zip(cb.iter()).map(|(a, b)| a * b).sum();
let overlap = (dot.abs() / (na * nb)).min(1.0);
if overlap >= ALIAS_OVERLAP_REPORTING_THRESHOLD {
aliased_pairs.push(AliasedPair {
block_a: specs[a_block_idx].name.clone(),
block_b: specs[b_block_idx].name.clone(),
direction_a: ja - a_start,
direction_b: jb - b_start,
overlap,
});
}
}
}
}
}
let mut dropped_columns: Vec<DroppedColumn> = Vec::new();
for &joint_col in &demoted_joint_cols {
let (block_idx, local_col) = locate_block_column(&col_offsets, joint_col)?;
let block_name = specs[block_idx].name.clone();
let reason = format!(
"joint-design column {joint_col} (block '{block_name}' local column \
{local_col}) demoted past joint RRQR rank tolerance {tol:.3e}; earlier \
blocks' column span absorbs this direction",
tol = joint_rank_tol,
);
dropped_columns.push(DroppedColumn {
block: block_name,
column: local_col,
reason,
});
}
for (block_idx, block) in blocks.iter_mut().enumerate() {
let lo = col_offsets[block_idx];
let hi = col_offsets[block_idx + 1];
let dropped_here = demoted_joint_cols
.iter()
.filter(|&&j| j >= lo && j < hi)
.count();
block.effective_dim = block.original_dim.saturating_sub(dropped_here);
}
let joint_rank_deficient = joint_rank < p_total;
let fatal = joint_rank_deficient && aliased_pairs.is_empty() && dropped_columns.is_empty();
let summary = format!(
"identifiability audit: {} block(s), {} joint columns, joint rank {}, \
{} alias pair(s) above overlap {:.3}, {} dropped column(s){}",
specs.len(),
p_total,
joint_rank,
aliased_pairs.len(),
ALIAS_OVERLAP_REPORTING_THRESHOLD,
dropped_columns.len(),
if fatal {
" — FATAL (rank deficiency without pair or column attribution)"
} else if joint_rank_deficient {
" — alias(es) flagged; family-specific reparameterisation required"
} else {
" — clean"
},
);
Ok(IdentifiabilityAudit {
blocks,
aliased_pairs,
dropped_columns,
fatal,
summary,
})
}
fn locate_block_column(col_offsets: &[usize], joint_col: usize) -> Result<(usize, usize), String> {
for i in 0..col_offsets.len() - 1 {
if joint_col >= col_offsets[i] && joint_col < col_offsets[i + 1] {
return Ok((i, joint_col - col_offsets[i]));
}
}
Err(format!(
"identifiability_audit::locate_block_column: joint_col {joint_col} \
outside col_offsets range (max = {})",
col_offsets.last().copied().unwrap_or(0),
))
}
fn block_pivoted_qr_diagonal(block: &Array2<f64>) -> Result<Vec<f64>, String> {
if block.ncols() == 0 {
return Ok(Vec::new());
}
let (_q, r) = block
.qr()
.map_err(|e| format!("identifiability audit per-block QR failed: {e:?}"))?;
let diag_len = r.nrows().min(r.ncols());
let mut out: Vec<f64> = (0..diag_len).map(|i| r[[i, i]].abs()).collect();
out.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
out.resize(block.ncols(), 0.0);
Ok(out)
}
fn count_rank(singular_values: &[f64], n: usize, p: usize) -> usize {
if singular_values.is_empty() {
return 0;
}
let leading = singular_values.first().copied().unwrap_or(0.0);
let rank_alpha = default_rrqr_rank_alpha();
let tol = rank_alpha * f64::EPSILON * (n.max(p).max(1) as f64) * leading.max(1.0);
singular_values.iter().filter(|&&v| v > tol).count()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::matrix::{DenseDesignMatrix, DesignMatrix};
use ndarray::Array2;
fn spec_from_dense(name: &str, design: Array2<f64>) -> ParameterBlockSpec {
let n = design.nrows();
ParameterBlockSpec {
name: name.to_string(),
design: DesignMatrix::Dense(DenseDesignMatrix::from(design)),
offset: Array1::<f64>::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::<f64>::zeros(0),
initial_beta: None,
}
}
fn linspace_minus_one_to_one(n: usize) -> Array1<f64> {
if n <= 1 {
return Array1::<f64>::zeros(n.max(1));
}
let step = 2.0 / (n as f64 - 1.0);
Array1::from_iter((0..n).map(|i| -1.0 + step * i as f64))
}
#[test]
fn audit_no_aliasing_returns_clean() {
let n = 64;
let x = linspace_minus_one_to_one(n);
let mut parametric = Array2::<f64>::zeros((n, 2));
for i in 0..n {
parametric[[i, 0]] = 1.0;
parametric[[i, 1]] = x[i];
}
let mut smooth = Array2::<f64>::zeros((n, 2));
for i in 0..n {
smooth[[i, 0]] = x[i] * x[i];
smooth[[i, 1]] = x[i] * x[i] * x[i];
}
let specs = [
spec_from_dense("intercept", parametric),
spec_from_dense("smooth_x", smooth),
];
let audit = audit_identifiability(&specs).expect("audit must succeed on clean specs");
assert!(
!audit.fatal,
"no aliasing must not be fatal: {}",
audit.summary
);
assert!(
audit.aliased_pairs.is_empty(),
"expected no alias pairs; got {:?}",
audit.aliased_pairs,
);
assert!(
audit.dropped_columns.is_empty(),
"expected no dropped columns; got {:?}",
audit.dropped_columns,
);
assert_eq!(audit.blocks.len(), 2);
assert_eq!(audit.blocks[0].original_dim, 2);
assert_eq!(audit.blocks[0].effective_dim, 2);
assert_eq!(audit.blocks[1].original_dim, 2);
assert_eq!(audit.blocks[1].effective_dim, 2);
}
#[test]
fn audit_smooth_constant_aliased_with_intercept() {
let n = 64;
let x = linspace_minus_one_to_one(n);
let parametric = Array2::<f64>::from_shape_fn((n, 1), |(_, _)| 1.0);
let mut smooth = Array2::<f64>::zeros((n, 3));
for i in 0..n {
smooth[[i, 0]] = 1.0;
smooth[[i, 1]] = x[i] * x[i];
smooth[[i, 2]] = x[i] * x[i] * x[i];
}
let specs = [
spec_from_dense("intercept", parametric),
spec_from_dense("smooth_with_const", smooth),
];
let audit = audit_identifiability(&specs).expect("audit must succeed");
assert!(
!audit.fatal,
"alias with attribution must not be fatal: {}",
audit.summary,
);
assert!(
!audit.aliased_pairs.is_empty(),
"smooth-constant aliased with intercept must surface at least one alias pair",
);
assert!(
!audit.dropped_columns.is_empty(),
"smooth-constant aliased with intercept must populate dropped_columns",
);
for drop in &audit.dropped_columns {
assert!(
drop.block == "intercept" || drop.block == "smooth_with_const",
"unexpected drop block name {:?}",
drop.block,
);
}
let total_kept: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept, 3,
"expected 3 surviving directions; got {total_kept} (summary: {})",
audit.summary,
);
}
#[test]
fn audit_two_smooths_share_linear_direction() {
let n = 64;
let x = linspace_minus_one_to_one(n);
let mut smooth_a = Array2::<f64>::zeros((n, 2));
let mut smooth_b = Array2::<f64>::zeros((n, 2));
for i in 0..n {
smooth_a[[i, 0]] = x[i];
smooth_a[[i, 1]] = x[i] * x[i];
smooth_b[[i, 0]] = x[i];
smooth_b[[i, 1]] = (x[i] - 0.3).powi(2);
}
let specs = [
spec_from_dense("smooth_a", smooth_a),
spec_from_dense("smooth_b", smooth_b),
];
let audit = audit_identifiability(&specs).expect("audit must succeed");
assert!(
!audit.fatal,
"attributed alias must not be fatal: {}",
audit.summary
);
let cross_linear_pair = audit
.aliased_pairs
.iter()
.find(|p| p.block_a == "smooth_a" && p.block_b == "smooth_b" && p.overlap > 0.999);
assert!(
cross_linear_pair.is_some(),
"expected an x~x alias pair between the two smooths; got pairs {:?}",
audit.aliased_pairs,
);
assert!(
!audit.dropped_columns.is_empty(),
"RRQR must demote one of the duplicated linear columns",
);
let total_kept: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept, 3,
"expected 3 independent directions (1 shared linear + 2 quadratics); got {total_kept}",
);
}
#[test]
fn audit_three_way_alias_is_attributed_not_fatal() {
let n = 64;
let x = linspace_minus_one_to_one(n);
let eps = 0.5;
let parametric = Array2::<f64>::from_shape_fn((n, 1), |(_, _)| 1.0);
let smooth_a = Array2::<f64>::from_shape_fn((n, 1), |(i, _)| 1.0 + eps * x[i]);
let smooth_b = Array2::<f64>::from_shape_fn((n, 1), |(i, _)| 1.0 - eps * x[i]);
let specs = [
spec_from_dense("intercept", parametric),
spec_from_dense("smooth_a", smooth_a),
spec_from_dense("smooth_b", smooth_b),
];
let audit = audit_identifiability(&specs).expect("audit must succeed");
assert!(
!audit.dropped_columns.is_empty(),
"RRQR must attribute the three-way alias as a dropped column",
);
assert!(
!audit.fatal,
"alias with column attribution must not be fatal: {}",
audit.summary,
);
let total_kept: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept, 2,
"three-way alias collapses to rank 2; got {total_kept}"
);
}
#[test]
fn audit_biobank_shape_end_to_end() {
let n = 1024;
let x = linspace_minus_one_to_one(n);
let mut parametric = Array2::<f64>::zeros((n, 2));
for i in 0..n {
parametric[[i, 0]] = 1.0;
parametric[[i, 1]] = x[i];
}
let mut s_x = Array2::<f64>::zeros((n, 8));
for i in 0..n {
for k in 0..8 {
s_x[[i, k]] = (x[i] - (k as f64 - 4.0) * 0.2).powi(2);
}
}
let mut s_sin = Array2::<f64>::zeros((n, 6));
for i in 0..n {
for k in 0..6 {
s_sin[[i, k]] = ((k as f64 + 1.0) * x[i]).sin();
}
}
let mut alias_block = Array2::<f64>::zeros((n, 4));
for i in 0..n {
alias_block[[i, 0]] = x[i];
alias_block[[i, 1]] = x[i].cos();
alias_block[[i, 2]] = (2.0 * x[i]).cos();
alias_block[[i, 3]] = (3.0 * x[i]).cos();
}
let specs = [
spec_from_dense("parametric", parametric),
spec_from_dense("s_x", s_x),
spec_from_dense("s_sin", s_sin),
spec_from_dense("alias_block", alias_block),
];
let audit = audit_identifiability(&specs).expect("biobank-shape audit must succeed");
assert!(
!audit.fatal,
"seeded alias with attribution must not be fatal: {}",
audit.summary,
);
assert!(
!audit.dropped_columns.is_empty(),
"seeded x~x alias must produce at least one dropped column",
);
assert_eq!(
audit.dropped_columns.len(),
1,
"biobank-shape audit should attribute exactly the seeded alias; \
got {:?}",
audit.dropped_columns,
);
let total_kept: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept, 19,
"biobank-shape: expected 19 kept directions; got {total_kept} ({})",
audit.summary,
);
}
}