use faer::Side;
use ndarray::{Array1, Array2};
use crate::families::custom_family::{FamilyLinearizationState, ParameterBlockSpec};
use crate::linalg::faer_ndarray::{
FaerEigh, FaerQr, default_rrqr_rank_alpha, rrqr_with_permutation,
};
use crate::solver::estimate::EstimationError;
#[derive(Debug, Clone)]
pub struct BlockIdentity {
pub block_name: String,
pub original_dim: usize,
pub effective_dim: usize,
pub design_range_rank: usize,
pub design_range_singular_values: Vec<f64>,
}
#[derive(Debug, Clone)]
pub struct AliasedPair {
pub block_a: String,
pub block_b: String,
pub direction_a: usize,
pub direction_b: usize,
pub overlap: f64,
pub bias_shift: f64,
}
#[derive(Debug, Clone)]
pub struct DroppedColumn {
pub block: String,
pub column: usize,
pub reason: String,
}
#[derive(Debug, Clone)]
pub struct IdentifiabilityAudit {
pub blocks: Vec<BlockIdentity>,
pub aliased_pairs: Vec<AliasedPair>,
pub dropped_columns: Vec<DroppedColumn>,
pub fatal: bool,
pub summary: String,
}
fn compute_leverage_s2(col: &ndarray::ArrayView1<f64>) -> f64 {
let sq_norm: f64 = col.iter().map(|v| v * v).sum();
if sq_norm <= 0.0 {
return 1.0;
}
col.iter().map(|v| (v * v / sq_norm).powi(2)).sum()
}
pub fn compute_skewness_mu3(z: &[f64]) -> f64 {
let n = z.len();
if n < 3 {
return 0.0;
}
let mean = z.iter().sum::<f64>() / n as f64;
let mut m2 = 0.0_f64;
let mut m3 = 0.0_f64;
for &zi in z {
let d = zi - mean;
m2 += d * d;
m3 += d * d * d;
}
m2 /= n as f64;
m3 /= n as f64;
let sigma = m2.sqrt();
if sigma <= 0.0 {
return 0.0;
}
let raw_skew = m3 / (sigma * sigma * sigma);
let nf = n as f64;
let correction = nf / ((nf - 1.0) * (nf - 2.0));
correction * nf * raw_skew
}
pub fn bias_shift_for_pair(z_a: Option<&[f64]>, z_b: Option<&[f64]>, s2_a: f64, s2_b: f64) -> f64 {
match (z_a, z_b) {
(Some(za), Some(zb)) if za.len() == zb.len() => {
let same = za.iter().zip(zb.iter()).all(|(a, b)| a == b);
if same {
return 0.0;
}
}
(None, None) => return 0.0,
_ => {}
}
let s2_dominant = s2_a.max(s2_b);
let mu3 = if s2_a >= s2_b {
match z_a {
Some(z) => compute_skewness_mu3(z),
None => match z_b {
Some(z) => compute_skewness_mu3(z),
None => 0.0,
},
}
} else {
match z_b {
Some(z) => compute_skewness_mu3(z),
None => match z_a {
Some(z) => compute_skewness_mu3(z),
None => 0.0,
},
}
};
let shift = -(mu3 / 2.0) * s2_dominant;
shift.clamp(-0.5, 0.5)
}
fn pair_null_sigma(s2_a: f64, s2_b: f64, n: usize) -> f64 {
let inv_n = if n == 0 { 0.0 } else { 1.0 / n as f64 };
let s2 = s2_a.max(s2_b);
(s2 - inv_n).max(0.0).sqrt()
}
fn pair_report_threshold(s2_a: f64, s2_b: f64, n: usize, m_pairs: usize) -> f64 {
let sigma = pair_null_sigma(s2_a, s2_b, n);
if sigma <= 0.0 {
return 0.10_f64;
}
let k_report = if m_pairs <= 1 {
3.0_f64
} else {
(2.0 * (2.0 * m_pairs as f64 / 0.05_f64).ln())
.sqrt()
.max(3.0)
};
(k_report * sigma).clamp(0.10, 0.999)
}
fn pair_halt_threshold(s2_a: f64, s2_b: f64, n: usize) -> f64 {
let sigma = pair_null_sigma(s2_a, s2_b, n);
if sigma <= 0.0 {
return 0.999_f64;
}
(10.0_f64 * sigma).clamp(0.05, 0.999)
}
fn cosine_outside_null_band(cosine: f64, shift: f64, half_width: f64) -> bool {
(cosine - shift).abs() >= half_width
}
fn signed_cosine(dot: f64, norm_a: f64, norm_b: f64) -> f64 {
(dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
}
pub fn audit_identifiability(
specs: &[ParameterBlockSpec],
) -> Result<IdentifiabilityAudit, EstimationError> {
let p_total_hint: usize = specs.iter().map(|s| s.design.ncols()).sum();
let zeros_beta = vec![0.0f64; p_total_hint];
let init_state = FamilyLinearizationState {
beta: &zeros_beta,
family_scalars: None,
channel_hessian: None,
probit_frailty_scale: 1.0,
};
audit_identifiability_impl(specs, &init_state)
}
fn audit_identifiability_impl(
specs: &[ParameterBlockSpec],
state: &FamilyLinearizationState<'_>,
) -> Result<IdentifiabilityAudit, EstimationError> {
if specs.is_empty() {
return Ok(IdentifiabilityAudit {
blocks: Vec::new(),
aliased_pairs: Vec::new(),
dropped_columns: Vec::new(),
fatal: false,
summary: "identifiability audit: no blocks supplied".to_string(),
});
}
let mut dense_blocks: Vec<Array2<f64>> = Vec::with_capacity(specs.len());
for spec in specs.iter() {
let dense = spec
.effective_jacobian_at("identifiability_audit::audit_identifiability", state)
.map_err(|e| EstimationError::LayoutError(format!("identifiability audit: {e}")))?;
dense_blocks.push(dense);
}
let n = dense_blocks[0].nrows();
for (idx, dense) in dense_blocks.iter().enumerate() {
if dense.nrows() != n {
return Err(EstimationError::LayoutError(format!(
"identifiability audit: block {} ({}) has {} effective-Jacobian rows, expected {}",
idx,
specs[idx].name,
dense.nrows(),
n,
)));
}
}
let mut blocks: Vec<BlockIdentity> = Vec::with_capacity(specs.len());
let mut col_offsets: Vec<usize> = Vec::with_capacity(specs.len() + 1);
col_offsets.push(0);
let block_phase_started = std::time::Instant::now();
let block_heartbeat = (n.saturating_mul(specs.len()) >= 1_000_000)
.then(crate::util::heartbeat::Heartbeat::default_interval);
for (idx, spec) in specs.iter().enumerate() {
let dense = &dense_blocks[idx];
let p_block = dense.ncols();
let block_singular = block_pivoted_qr_diagonal(dense)?;
let block_rank = count_rank(&block_singular, n, p_block);
blocks.push(BlockIdentity {
block_name: spec.name.clone(),
original_dim: p_block,
effective_dim: p_block,
design_range_rank: block_rank,
design_range_singular_values: block_singular,
});
let next_offset = col_offsets[col_offsets.len() - 1] + p_block;
col_offsets.push(next_offset);
if let Some(hb) = block_heartbeat.as_ref() {
hb.tick(1, |progress, elapsed| {
log::info!(
"[STAGE] identifiability audit: per-block QR progress={}/{} elapsed={:.1}s (overall={:.1}s)",
progress.min(specs.len()),
specs.len(),
elapsed,
block_phase_started.elapsed().as_secs_f64(),
);
});
}
if idx + 1 == specs.len() {
log::info!(
"[STAGE] identifiability audit: per-block QR complete blocks={} elapsed={:.3}s",
specs.len(),
block_phase_started.elapsed().as_secs_f64(),
);
}
}
let p_total = *col_offsets.last().expect("col_offsets non-empty");
if p_total == 0 {
return Ok(IdentifiabilityAudit {
blocks,
aliased_pairs: Vec::new(),
dropped_columns: Vec::new(),
fatal: false,
summary: "identifiability audit: every block is empty".to_string(),
});
}
let mut x_joint = Array2::<f64>::zeros((n, p_total));
for (idx, block) in dense_blocks.iter().enumerate() {
let start = col_offsets[idx];
let end = col_offsets[idx + 1];
if end > start {
x_joint.slice_mut(ndarray::s![.., start..end]).assign(block);
}
}
let mut priority_perm: Vec<usize> = (0..p_total).collect();
let col_block_idx: Vec<usize> = (0..specs.len())
.flat_map(|i| std::iter::repeat(i).take(col_offsets[i + 1] - col_offsets[i]))
.collect();
priority_perm.sort_by(|&a, &b| {
let pa = specs[col_block_idx[a]].gauge_priority;
let pb = specs[col_block_idx[b]].gauge_priority;
pb.cmp(&pa).then_with(|| a.cmp(&b))
});
let priority_perm_is_identity = priority_perm
.iter()
.enumerate()
.all(|(new_j, &old_j)| new_j == old_j);
let rrqr_started = std::time::Instant::now();
if priority_perm_is_identity {
log::info!(
"[STAGE] identifiability audit: joint RRQR start n={} p_total={} priority_reorder=false",
n,
p_total,
);
} else {
let block_priority_summary: Vec<String> = specs
.iter()
.map(|s| format!("{}={}", s.name, s.gauge_priority))
.collect();
log::info!(
"[STAGE] identifiability audit: joint RRQR start n={} p_total={} \
priority_reorder=true blocks=[{}]",
n,
p_total,
block_priority_summary.join(", "),
);
}
let rrqr = if priority_perm_is_identity {
rrqr_with_permutation(&x_joint, default_rrqr_rank_alpha()).map_err(|e| {
EstimationError::LayoutError(format!("identifiability audit joint RRQR failed: {e:?}"))
})?
} else {
let mut x_priority = Array2::<f64>::zeros((n, p_total));
for (new_j, &old_j) in priority_perm.iter().enumerate() {
x_priority.column_mut(new_j).assign(&x_joint.column(old_j));
}
rrqr_with_permutation(&x_priority, default_rrqr_rank_alpha()).map_err(|e| {
EstimationError::LayoutError(format!(
"identifiability audit joint RRQR (priority-ordered) failed: {e:?}"
))
})?
};
log::info!(
"[STAGE] identifiability audit: joint RRQR end rank={}/{} elapsed={:.3}s",
rrqr.rank,
p_total,
rrqr_started.elapsed().as_secs_f64(),
);
let joint_rank = rrqr.rank;
let joint_rank_tol = rrqr.rank_tol;
let map_to_original = |reordered_idx: usize| -> usize {
if priority_perm_is_identity {
reordered_idx
} else {
priority_perm[reordered_idx]
}
};
let demoted_joint_cols: Vec<usize> = rrqr.column_permutation[rrqr.rank..]
.iter()
.map(|&j| map_to_original(j))
.collect();
let pairwise_started = std::time::Instant::now();
log::info!(
"[STAGE] identifiability audit: pairwise overlap scan start n={} p_total={} blocks={}",
n,
p_total,
specs.len(),
);
let mut col_norms = Array1::<f64>::zeros(p_total);
let mut col_s2 = Array1::<f64>::zeros(p_total);
for j in 0..p_total {
let col = x_joint.column(j);
let nrm = col.iter().map(|v| v * v).sum::<f64>().sqrt();
col_norms[j] = nrm;
col_s2[j] = compute_leverage_s2(&col);
}
let total_cross_pairs: usize = {
let mut cnt = 0usize;
for a_idx in 0..specs.len() {
let a_cols = col_offsets[a_idx + 1] - col_offsets[a_idx];
for b_idx in (a_idx + 1)..specs.len() {
let b_cols = col_offsets[b_idx + 1] - col_offsets[b_idx];
cnt = cnt.saturating_add(a_cols.saturating_mul(b_cols));
}
}
cnt.max(1)
};
let pairwise_block_heartbeat = (n.saturating_mul(p_total) >= 1_000_000)
.then(crate::util::heartbeat::Heartbeat::default_interval);
let mut aliased_pairs: Vec<AliasedPair> = Vec::new();
let n_block_pairs = specs.len().saturating_mul(specs.len().saturating_sub(1)) / 2;
for a_block_idx in 0..specs.len() {
let a_start = col_offsets[a_block_idx];
let a_end = col_offsets[a_block_idx + 1];
let z_a_arc = specs[a_block_idx]
.jacobian_callback
.as_ref()
.and_then(|cb| cb.eta_row_scaling_for_skewness());
let z_a: Option<&[f64]> = z_a_arc.as_deref();
for b_block_idx in (a_block_idx + 1)..specs.len() {
let b_start = col_offsets[b_block_idx];
let b_end = col_offsets[b_block_idx + 1];
let z_b_arc = specs[b_block_idx]
.jacobian_callback
.as_ref()
.and_then(|cb| cb.eta_row_scaling_for_skewness());
let z_b: Option<&[f64]> = z_b_arc.as_deref();
for ja in a_start..a_end {
let na = col_norms[ja];
if na == 0.0 {
continue;
}
let ca = x_joint.column(ja);
for jb in b_start..b_end {
let nb = col_norms[jb];
if nb == 0.0 {
continue;
}
let cb = x_joint.column(jb);
let dot: f64 = ca.iter().zip(cb.iter()).map(|(a, b)| a * b).sum();
let cosine = signed_cosine(dot, na, nb);
let s2_ja = col_s2[ja];
let s2_jb = col_s2[jb];
let shift = bias_shift_for_pair(z_a, z_b, s2_ja, s2_jb);
let report_half_width =
pair_report_threshold(s2_ja, s2_jb, n, total_cross_pairs);
let report_flag = cosine_outside_null_band(cosine, shift, report_half_width);
let overlap = cosine.abs();
if report_flag {
aliased_pairs.push(AliasedPair {
block_a: specs[a_block_idx].name.clone(),
block_b: specs[b_block_idx].name.clone(),
direction_a: ja - a_start,
direction_b: jb - b_start,
overlap,
bias_shift: shift,
});
}
}
}
if let Some(hb) = pairwise_block_heartbeat.as_ref() {
hb.tick(1, |done, secs| {
log::info!(
"[STAGE] identifiability audit: pairwise overlap progress {done}/{n_block_pairs} block pairs in {secs:.1}s",
);
});
}
}
}
log::info!(
"[STAGE] identifiability audit: pairwise overlap scan done in {:.3}s ({} aliased pairs)",
pairwise_started.elapsed().as_secs_f64(),
aliased_pairs.len(),
);
let mut dropped_columns: Vec<DroppedColumn> = Vec::new();
for &joint_col in &demoted_joint_cols {
let (block_idx, local_col) = locate_block_column(&col_offsets, joint_col)?;
let block_name = specs[block_idx].name.clone();
let reason = format!(
"joint-design column {joint_col} (block '{block_name}' local column \
{local_col}) demoted past joint RRQR rank tolerance {tol:.3e}; earlier \
blocks' column span absorbs this direction",
tol = joint_rank_tol,
);
dropped_columns.push(DroppedColumn {
block: block_name,
column: local_col,
reason,
});
}
for (block_idx, block) in blocks.iter_mut().enumerate() {
let lo = col_offsets[block_idx];
let hi = col_offsets[block_idx + 1];
let dropped_here = demoted_joint_cols
.iter()
.filter(|&&j| j >= lo && j < hi)
.count();
block.effective_dim = block.original_dim.saturating_sub(dropped_here);
}
let joint_rank_deficient = joint_rank < p_total;
let block_priority: std::collections::HashMap<&str, u8> = specs
.iter()
.map(|s| (s.name.as_str(), s.gauge_priority))
.collect();
let all_priorities_equal = specs
.iter()
.all(|s| s.gauge_priority == specs[0].gauge_priority);
let block_name_to_idx: std::collections::HashMap<&str, usize> = specs
.iter()
.enumerate()
.map(|(i, s)| (s.name.as_str(), i))
.collect();
let hard_alias_pair = aliased_pairs
.iter()
.filter(|p| {
let pa = block_priority
.get(p.block_a.as_str())
.copied()
.unwrap_or(100);
let pb = block_priority
.get(p.block_b.as_str())
.copied()
.unwrap_or(100);
if pa != pb {
return false;
}
let ja = block_name_to_idx
.get(p.block_a.as_str())
.map(|&bi| col_offsets[bi] + p.direction_a)
.unwrap_or(0);
let jb = block_name_to_idx
.get(p.block_b.as_str())
.map(|&bi| col_offsets[bi] + p.direction_b)
.unwrap_or(0);
let halt_half_width = pair_halt_threshold(
col_s2.get(ja).copied().unwrap_or(1.0),
col_s2.get(jb).copied().unwrap_or(1.0),
n,
);
let conservative_deviation = (p.overlap - p.bias_shift.abs()).abs();
conservative_deviation >= halt_half_width
})
.max_by(|a, b| {
a.overlap
.partial_cmp(&b.overlap)
.unwrap_or(std::cmp::Ordering::Equal)
})
.cloned();
let rank_deficiency_count = p_total.saturating_sub(joint_rank);
let attribution_complete =
rank_deficiency_count > 0 && dropped_columns.len() == rank_deficiency_count;
let gauge_resolves_rank_deficiency = !all_priorities_equal
&& attribution_complete
&& dropped_columns.iter().all(|drop| {
let drop_priority = block_priority
.get(drop.block.as_str())
.copied()
.unwrap_or(100);
aliased_pairs.iter().any(|pair| {
let other_block = if pair.block_a == drop.block {
pair.block_b.as_str()
} else if pair.block_b == drop.block {
pair.block_a.as_str()
} else {
return false;
};
let other_priority = block_priority.get(other_block).copied().unwrap_or(100);
other_priority > drop_priority
})
});
let fatal =
(joint_rank_deficient && !gauge_resolves_rank_deficiency) || hard_alias_pair.is_some();
let fatal_detail = if fatal {
let mut parts: Vec<String> = Vec::new();
if joint_rank_deficient {
let attribution = if let Some(first_drop) = dropped_columns.first() {
format!(
"first attributed drop: block '{}' local column {} \
(reparam: replace this column with a sum-to-zero or \
orthogonal-complement projection against earlier blocks, \
or remove the redundant term entirely)",
first_drop.block, first_drop.column,
)
} else {
"no per-column attribution (>2-way structural alias); \
audit the joint design by eye and absorb the shared null \
subspace into a single parametric block"
.to_string()
};
parts.push(format!(
"joint rank {} < joint columns {} ({} dropped column(s); {})",
joint_rank,
p_total,
dropped_columns.len(),
attribution,
));
}
if let Some(pair) = hard_alias_pair.as_ref() {
let ja = block_name_to_idx
.get(pair.block_a.as_str())
.map(|&bi| col_offsets[bi] + pair.direction_a)
.unwrap_or(0);
let jb = block_name_to_idx
.get(pair.block_b.as_str())
.map(|&bi| col_offsets[bi] + pair.direction_b)
.unwrap_or(0);
let halt_half_width = pair_halt_threshold(
col_s2.get(ja).copied().unwrap_or(1.0),
col_s2.get(jb).copied().unwrap_or(1.0),
n,
);
let shift_note = if pair.bias_shift.abs() > 1e-8 {
format!(" bias_shift={:.4}", pair.bias_shift)
} else {
String::new()
};
parts.push(format!(
"alias pair: '{}'[{}] ~ '{}'[{}] overlap={:.4} >= leverage-based halt \
half-width {:.4}{} (n_eff_a≈{:.0}, n_eff_b≈{:.0}; \
reparam: orthogonalise one block's column {} against the other \
via sum-to-zero, or absorb the shared direction into a single \
parametric block)",
pair.block_a,
pair.direction_a,
pair.block_b,
pair.direction_b,
pair.overlap,
halt_half_width,
shift_note,
1.0 / col_s2.get(ja).copied().unwrap_or(1.0).max(f64::EPSILON),
1.0 / col_s2.get(jb).copied().unwrap_or(1.0).max(f64::EPSILON),
pair.direction_b,
));
}
format!(" — FATAL: {}", parts.join("; "))
} else if gauge_resolves_rank_deficiency {
format!(
" — gauge-attributed drops: {} column(s) attributed to lower-priority blocks \
via gauge_priority ordering; canonical-gauge pipeline will proceed with \
reduced specs",
dropped_columns.len(),
)
} else if !aliased_pairs.is_empty() {
" — partial alias(es) below leverage-based halt threshold; penalty + line search will resolve"
.to_string()
} else {
" — clean".to_string()
};
let summary = format!(
"identifiability audit: {} block(s), {} joint columns, joint rank {}, \
{} alias pair(s) above leverage-based report threshold, {} dropped column(s){}",
specs.len(),
p_total,
joint_rank,
aliased_pairs.len(),
dropped_columns.len(),
fatal_detail,
);
Ok(IdentifiabilityAudit {
blocks,
aliased_pairs,
dropped_columns,
fatal,
summary,
})
}
pub fn audit_identifiability_channel_aware(
specs: &[ParameterBlockSpec],
operators: &[std::sync::Arc<
dyn crate::families::identifiability_compiler::RowJacobianOperator,
>],
row_hess: &dyn crate::families::identifiability_compiler::RowHessian,
) -> Result<IdentifiabilityAudit, EstimationError> {
use crate::families::identifiability_compiler::{IdentityRowHessian, compile_with_dual_metric};
if specs.is_empty() {
return Ok(IdentifiabilityAudit {
blocks: Vec::new(),
aliased_pairs: Vec::new(),
dropped_columns: Vec::new(),
fatal: false,
summary: "identifiability audit (channel-aware): no blocks supplied".to_string(),
});
}
if specs.len() != operators.len() {
return Err(EstimationError::LayoutError(format!(
"audit_identifiability_channel_aware: specs ({}) and operators ({}) length mismatch",
specs.len(),
operators.len()
)));
}
let k = row_hess.k();
let n = row_hess.nrows();
for (idx, op) in operators.iter().enumerate() {
if op.k() != k {
return Err(EstimationError::LayoutError(format!(
"audit_identifiability_channel_aware: operator {idx} has K={} but row_hess K={k}",
op.k(),
)));
}
if op.nrows() != n {
return Err(EstimationError::LayoutError(format!(
"audit_identifiability_channel_aware: operator {idx} has nrows={} but row_hess nrows={n}",
op.nrows(),
)));
}
if op.ncols() != specs[idx].design.ncols() {
return Err(EstimationError::LayoutError(format!(
"audit_identifiability_channel_aware: operator {idx} ({}) has ncols={} but spec '{}' design ncols={}",
idx,
op.ncols(),
specs[idx].name,
specs[idx].design.ncols(),
)));
}
}
let id_struct = IdentityRowHessian::new(n, k);
let ordering: Vec<crate::families::identifiability_compiler::BlockOrder> =
std::iter::repeat(crate::families::identifiability_compiler::BlockOrder::Marginal)
.take(operators.len())
.collect();
let compiled =
compile_with_dual_metric(operators, row_hess, &id_struct, &ordering).map_err(|e| {
EstimationError::LayoutError(format!(
"audit_identifiability_channel_aware compile failed: {e:?}"
))
})?;
let mut blocks: Vec<BlockIdentity> = Vec::with_capacity(specs.len());
let mut col_offsets: Vec<usize> = Vec::with_capacity(specs.len() + 1);
col_offsets.push(0);
for (idx, spec) in specs.iter().enumerate() {
let p_block = spec.design.ncols();
let kept = compiled
.blocks
.get(idx)
.map(|b| b.t_lw.ncols())
.unwrap_or(p_block);
blocks.push(BlockIdentity {
block_name: spec.name.clone(),
original_dim: p_block,
effective_dim: kept,
design_range_rank: kept,
design_range_singular_values: Vec::new(),
});
let next = col_offsets[col_offsets.len() - 1] + p_block;
col_offsets.push(next);
}
let p_total = *col_offsets.last().expect("col_offsets non-empty");
let mut dropped_columns: Vec<DroppedColumn> = Vec::new();
for (block_idx, local_col) in &compiled.dropped {
let block_name = specs[*block_idx].name.clone();
dropped_columns.push(DroppedColumn {
block: block_name.clone(),
column: *local_col,
reason: format!(
"channel-aware audit (K={k}) demoted block '{block_name}' \
local column {local_col}: column is in the row-Jacobian span \
of earlier blocks under the structural row metric",
),
});
}
let aliased_pairs = channel_aware_aliased_pairs(operators, &col_offsets, specs)?;
let joint_rank = compiled.joint_rank;
let joint_rank_deficient = joint_rank < p_total;
let block_priority_ca: std::collections::HashMap<&str, u8> = specs
.iter()
.map(|s| (s.name.as_str(), s.gauge_priority))
.collect();
let all_priorities_equal_ca = specs
.iter()
.all(|s| s.gauge_priority == specs[0].gauge_priority);
let block_name_to_idx_ca: std::collections::HashMap<&str, usize> = specs
.iter()
.enumerate()
.map(|(i, s)| (s.name.as_str(), i))
.collect();
let ca_col_s2: Vec<f64> = {
let p_total_ca = *col_offsets.last().unwrap_or(&0);
let mut s2_vals: Vec<f64> = Vec::with_capacity(p_total_ca);
for op in operators.iter() {
let j_full = op.evaluate_full();
let p_b = op.ncols();
for c in 0..p_b {
let mut w = Array1::<f64>::zeros(n * k);
for i in 0..n {
for ch in 0..k {
w[i * k + ch] = j_full[[i, c, ch]];
}
}
s2_vals.push(compute_leverage_s2(&w.view()));
}
}
s2_vals
};
let hard_alias_pair = aliased_pairs
.iter()
.filter(|p| {
let pa = block_priority_ca
.get(p.block_a.as_str())
.copied()
.unwrap_or(100);
let pb = block_priority_ca
.get(p.block_b.as_str())
.copied()
.unwrap_or(100);
if pa != pb {
return false;
}
let ja = block_name_to_idx_ca
.get(p.block_a.as_str())
.map(|&bi| col_offsets[bi] + p.direction_a)
.unwrap_or(0);
let jb = block_name_to_idx_ca
.get(p.block_b.as_str())
.map(|&bi| col_offsets[bi] + p.direction_b)
.unwrap_or(0);
let halt_half_width = pair_halt_threshold(
ca_col_s2.get(ja).copied().unwrap_or(1.0),
ca_col_s2.get(jb).copied().unwrap_or(1.0),
n * k,
);
let conservative_deviation = (p.overlap - p.bias_shift.abs()).abs();
conservative_deviation >= halt_half_width
})
.max_by(|a, b| {
a.overlap
.partial_cmp(&b.overlap)
.unwrap_or(std::cmp::Ordering::Equal)
})
.cloned();
let rank_deficiency_count_ca = p_total.saturating_sub(joint_rank);
let attribution_complete_ca =
rank_deficiency_count_ca > 0 && dropped_columns.len() == rank_deficiency_count_ca;
let gauge_resolves_rank_deficiency_ca = !all_priorities_equal_ca
&& attribution_complete_ca
&& dropped_columns.iter().all(|drop| {
let drop_priority = block_priority_ca
.get(drop.block.as_str())
.copied()
.unwrap_or(100);
aliased_pairs.iter().any(|pair| {
let other_block = if pair.block_a == drop.block {
pair.block_b.as_str()
} else if pair.block_b == drop.block {
pair.block_a.as_str()
} else {
return false;
};
let other_priority = block_priority_ca.get(other_block).copied().unwrap_or(100);
other_priority > drop_priority
})
});
let fatal =
(joint_rank_deficient && !gauge_resolves_rank_deficiency_ca) || hard_alias_pair.is_some();
let fatal_detail = if fatal {
let mut parts: Vec<String> = Vec::new();
if joint_rank_deficient && !gauge_resolves_rank_deficiency_ca {
let attribution = if let Some(first_drop) = dropped_columns.first() {
format!(
"first attributed drop: block '{}' local column {} \
(reparam: replace this column with a sum-to-zero or \
orthogonal-complement projection against earlier blocks, \
or remove the redundant term entirely)",
first_drop.block, first_drop.column,
)
} else {
"no per-column attribution (>2-way structural alias in the \
channel-aware row-Jacobian space)"
.to_string()
};
parts.push(format!(
"channel-aware joint rank {} < joint columns {} \
({} dropped column(s); {})",
joint_rank,
p_total,
dropped_columns.len(),
attribution,
));
}
if let Some(pair) = hard_alias_pair.as_ref() {
let ja = block_name_to_idx_ca
.get(pair.block_a.as_str())
.map(|&bi| col_offsets[bi] + pair.direction_a)
.unwrap_or(0);
let jb = block_name_to_idx_ca
.get(pair.block_b.as_str())
.map(|&bi| col_offsets[bi] + pair.direction_b)
.unwrap_or(0);
let halt_half_width_ca = pair_halt_threshold(
ca_col_s2.get(ja).copied().unwrap_or(1.0),
ca_col_s2.get(jb).copied().unwrap_or(1.0),
n * k,
);
parts.push(format!(
"alias pair: '{}'[{}] ~ '{}'[{}] overlap={:.4} >= leverage-based halt \
half-width {:.4} in channel-aware row-Jacobian view \
(reparam: orthogonalise one block's column {} against the other \
or absorb the shared direction)",
pair.block_a,
pair.direction_a,
pair.block_b,
pair.direction_b,
pair.overlap,
halt_half_width_ca,
pair.direction_b,
));
}
format!(" — FATAL: {}", parts.join("; "))
} else if gauge_resolves_rank_deficiency_ca {
format!(
" — gauge-attributed drops (channel-aware): {} column(s) attributed to \
lower-priority blocks via gauge_priority ordering; canonical-gauge pipeline \
will proceed with reduced specs",
dropped_columns.len(),
)
} else if !aliased_pairs.is_empty() {
" — partial alias(es) below leverage-based halt threshold (channel-aware); \
penalty + line search will resolve"
.to_string()
} else {
" — clean (channel-aware)".to_string()
};
let summary = format!(
"identifiability audit (channel-aware, K={k}): {} block(s), {} joint columns, \
joint rank {}, {} alias pair(s) above leverage-based report threshold, \
{} dropped column(s){}",
specs.len(),
p_total,
joint_rank,
aliased_pairs.len(),
dropped_columns.len(),
fatal_detail,
);
Ok(IdentifiabilityAudit {
blocks,
aliased_pairs,
dropped_columns,
fatal,
summary,
})
}
fn channel_aware_aliased_pairs(
operators: &[std::sync::Arc<
dyn crate::families::identifiability_compiler::RowJacobianOperator,
>],
col_offsets: &[usize],
specs: &[ParameterBlockSpec],
) -> Result<Vec<AliasedPair>, EstimationError> {
if operators.is_empty() {
return Ok(Vec::new());
}
let k = operators[0].k();
let n = operators[0].nrows();
let nk = n.checked_mul(k).ok_or_else(|| {
EstimationError::LayoutError(format!("channel-aware audit: n*k overflow (n={n}, k={k})"))
})?;
let p_total = *col_offsets.last().unwrap_or(&0);
if p_total == 0 || nk == 0 {
return Ok(Vec::new());
}
let mut cols: Vec<Array1<f64>> = Vec::with_capacity(p_total);
let mut col_norms: Vec<f64> = Vec::with_capacity(p_total);
let mut col_s2: Vec<f64> = Vec::with_capacity(p_total);
for op in operators.iter() {
let j_full = op.evaluate_full();
let p_b = op.ncols();
for c in 0..p_b {
let mut w = Array1::<f64>::zeros(nk);
for i in 0..n {
for ch in 0..k {
w[i * k + ch] = j_full[[i, c, ch]];
}
}
let norm = w.iter().map(|v| v * v).sum::<f64>().sqrt();
let s2 = compute_leverage_s2(&w.view());
cols.push(w);
col_norms.push(norm);
col_s2.push(s2);
}
}
let total_cross_pairs: usize = {
let mut cnt = 0usize;
for a_idx in 0..specs.len() {
let a_cols = col_offsets[a_idx + 1] - col_offsets[a_idx];
for b_idx in (a_idx + 1)..specs.len() {
let b_cols = col_offsets[b_idx + 1] - col_offsets[b_idx];
cnt = cnt.saturating_add(a_cols.saturating_mul(b_cols));
}
}
cnt.max(1)
};
let mut pairs: Vec<AliasedPair> = Vec::new();
for a in 0..p_total {
if col_norms[a] <= 0.0 {
continue;
}
for b in (a + 1)..p_total {
if col_norms[b] <= 0.0 {
continue;
}
let mut dot = 0.0_f64;
for i in 0..nk {
dot += cols[a][i] * cols[b][i];
}
let overlap = (dot.abs() / (col_norms[a] * col_norms[b])).min(1.0);
let report_thr = pair_report_threshold(col_s2[a], col_s2[b], nk, total_cross_pairs);
if overlap >= report_thr {
let (block_a_idx, dir_a) = locate_block_column(col_offsets, a)?;
let (block_b_idx, dir_b) = locate_block_column(col_offsets, b)?;
if block_a_idx == block_b_idx {
continue;
}
pairs.push(AliasedPair {
block_a: specs[block_a_idx].name.clone(),
block_b: specs[block_b_idx].name.clone(),
direction_a: dir_a,
direction_b: dir_b,
overlap,
bias_shift: 0.0,
});
}
}
}
Ok(pairs)
}
#[derive(Debug, Clone)]
pub struct AuditDriftSummary {
pub pilot_rank: usize,
pub current_rank: usize,
pub beta_relative_change: f64,
pub newly_dropped: Vec<DroppedColumn>,
pub recovered: Vec<String>,
}
pub fn maybe_log_audit_drift(
specs: &[ParameterBlockSpec],
pilot_audit: &IdentifiabilityAudit,
beta_pilot: &[f64],
beta_current: &[f64],
family_scalars: Option<&std::sync::Arc<dyn std::any::Any + Send + Sync>>,
outer_iter: usize,
every_n_iters: usize,
) -> Option<AuditDriftSummary> {
const BETA_RELATIVE_THRESHOLD: f64 = 0.5;
const DEFAULT_EVERY_N_ITERS: usize = 10;
let period = if every_n_iters == 0 {
DEFAULT_EVERY_N_ITERS
} else {
every_n_iters
};
let beta_pilot_norm: f64 = beta_pilot.iter().map(|b| b * b).sum::<f64>().sqrt();
let beta_current_len = beta_current.len();
let beta_pilot_len = beta_pilot.len();
let diff_norm: f64 = if beta_current_len == beta_pilot_len {
beta_current
.iter()
.zip(beta_pilot.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f64>()
.sqrt()
} else {
f64::INFINITY
};
let beta_relative_change = diff_norm / (beta_pilot_norm + f64::EPSILON);
let large_beta_movement = beta_relative_change > BETA_RELATIVE_THRESHOLD;
let periodic_check = (outer_iter % period) == 0;
if !large_beta_movement && !periodic_check {
return None;
}
let p_total: usize = specs.iter().map(|s| s.design.ncols()).sum();
let beta_for_state: Vec<f64> = if beta_current.len() == p_total {
beta_current.to_vec()
} else {
vec![0.0; p_total]
};
let state = crate::families::custom_family::FamilyLinearizationState {
beta: &beta_for_state,
family_scalars: family_scalars.cloned(),
channel_hessian: None,
probit_frailty_scale: 1.0,
};
let current_audit = match audit_identifiability_with_state(specs, &state) {
Ok(a) => a,
Err(_) => return None,
};
let pilot_rank: usize = pilot_audit.blocks.iter().map(|b| b.effective_dim).sum();
let current_rank: usize = current_audit.blocks.iter().map(|b| b.effective_dim).sum();
let pilot_dropped: std::collections::BTreeSet<(String, usize)> = pilot_audit
.dropped_columns
.iter()
.map(|d| (d.block.clone(), d.column))
.collect();
let current_dropped: std::collections::BTreeSet<(String, usize)> = current_audit
.dropped_columns
.iter()
.map(|d| (d.block.clone(), d.column))
.collect();
let newly_dropped: Vec<DroppedColumn> = current_audit
.dropped_columns
.iter()
.filter(|d| !pilot_dropped.contains(&(d.block.clone(), d.column)))
.cloned()
.collect();
let recovered: Vec<String> = pilot_audit
.dropped_columns
.iter()
.filter(|d| !current_dropped.contains(&(d.block.clone(), d.column)))
.map(|d| format!("{}[{}]", d.block, d.column))
.collect();
let verdict_changed =
pilot_rank != current_rank || !newly_dropped.is_empty() || !recovered.is_empty();
if verdict_changed {
let newly_str: Vec<String> = newly_dropped
.iter()
.map(|d| format!("{}[{}]", d.block, d.column))
.collect();
let recovered_str = if recovered.is_empty() {
"none".to_string()
} else {
recovered.join(", ")
};
log::info!(
"[AUDIT-DRIFT] pilot_rank={} current_rank={} \
beta_relative_change={:.4} outer_iter={} \
newly_dropped=[{}] recovered=[{}]",
pilot_rank,
current_rank,
beta_relative_change,
outer_iter,
if newly_str.is_empty() {
"none".to_string()
} else {
newly_str.join(", ")
},
recovered_str,
);
}
Some(AuditDriftSummary {
pilot_rank,
current_rank,
beta_relative_change,
newly_dropped,
recovered,
})
}
pub fn audit_identifiability_with_state(
specs: &[ParameterBlockSpec],
state: &crate::families::custom_family::FamilyLinearizationState<'_>,
) -> Result<IdentifiabilityAudit, EstimationError> {
audit_identifiability_impl(specs, state)
}
fn locate_block_column(
col_offsets: &[usize],
joint_col: usize,
) -> Result<(usize, usize), EstimationError> {
for i in 0..col_offsets.len() - 1 {
if joint_col >= col_offsets[i] && joint_col < col_offsets[i + 1] {
return Ok((i, joint_col - col_offsets[i]));
}
}
Err(EstimationError::LayoutError(format!(
"identifiability_audit::locate_block_column: joint_col {joint_col} \
outside col_offsets range (max = {})",
col_offsets.last().copied().unwrap_or(0),
)))
}
fn block_pivoted_qr_diagonal(block: &Array2<f64>) -> Result<Vec<f64>, EstimationError> {
if block.ncols() == 0 {
return Ok(Vec::new());
}
let (_q, r) = block
.qr()
.map_err(EstimationError::EigendecompositionFailed)?;
let diag_len = r.nrows().min(r.ncols());
let mut out: Vec<f64> = (0..diag_len).map(|i| r[[i, i]].abs()).collect();
out.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
out.resize(block.ncols(), 0.0);
Ok(out)
}
fn count_rank(singular_values: &[f64], n: usize, p: usize) -> usize {
if singular_values.is_empty() {
return 0;
}
let leading = singular_values.first().copied().unwrap_or(0.0);
let rank_alpha = default_rrqr_rank_alpha();
let tol = rank_alpha * f64::EPSILON * (n.max(p).max(1) as f64) * leading.max(1.0);
singular_values.iter().filter(|&&v| v > tol).count()
}
#[derive(Debug, Clone)]
pub struct MapUniquenessError {
pub message: String,
pub dominant_block: String,
pub null_direction_index: usize,
pub penalty_quadratic_form: f64,
}
impl std::fmt::Display for MapUniquenessError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.message)
}
}
pub fn check_map_uniqueness(
j_joint: &Array2<f64>,
w_diag: &[f64],
s_joint: &Array2<f64>,
specs: &[ParameterBlockSpec],
col_offsets: &[usize],
) -> Result<(), MapUniquenessError> {
let n = j_joint.nrows();
let p = j_joint.ncols();
if p == 0 {
return Ok(());
}
let mut g = Array2::<f64>::zeros((p, p));
if w_diag.is_empty() {
for k in 0..n {
for i in 0..p {
let ji = j_joint[[k, i]];
if ji == 0.0 {
continue;
}
for j in i..p {
let val = ji * j_joint[[k, j]];
g[[i, j]] += val;
if i != j {
g[[j, i]] += val;
}
}
}
}
} else {
assert_eq!(
w_diag.len(),
n,
"check_map_uniqueness: w_diag length {} != n {}",
w_diag.len(),
n,
);
for k in 0..n {
let wk = w_diag[k];
if wk == 0.0 {
continue;
}
for i in 0..p {
let wji = wk * j_joint[[k, i]];
if wji == 0.0 {
continue;
}
for j in i..p {
let val = wji * j_joint[[k, j]];
g[[i, j]] += val;
if i != j {
g[[j, i]] += val;
}
}
}
}
}
let (evals, evecs) = match g.eigh(Side::Lower) {
Ok(pair) => pair,
Err(e) => {
log::warn!(
"[MAP-UNIQUE] check_map_uniqueness: eigendecomposition of J^T W J failed \
({e:?}); skipping MAP uniqueness check",
);
return Ok(());
}
};
let lambda_max = evals.iter().copied().fold(0.0_f64, f64::max).max(1.0);
let rank_alpha = default_rrqr_rank_alpha();
let null_tol = rank_alpha * f64::EPSILON * (p as f64) * lambda_max;
let mut null_dirs: Vec<(f64, usize)> = evals
.iter()
.enumerate()
.filter(|&(_, &lam)| lam < null_tol)
.map(|(idx, &lam)| (lam, idx))
.collect();
null_dirs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
if null_dirs.is_empty() {
return Ok(());
}
let s_frob_sq: f64 = s_joint.iter().map(|v| v * v).sum();
let pen_tol = null_tol * s_frob_sq.sqrt().max(1.0);
for (dir_idx, (lam, evec_col)) in null_dirs.iter().enumerate() {
let n_vec = evecs.column(*evec_col);
let sn: Array1<f64> = s_joint.dot(&n_vec.to_owned());
let ntsn: f64 = n_vec.iter().zip(sn.iter()).map(|(ni, si)| ni * si).sum();
if ntsn < pen_tol {
let dominant_block =
dominant_block_for_direction(&n_vec.to_owned(), specs, col_offsets);
let message = format!(
"MAP estimate is non-unique: null direction {} of J^T W J (eigenvalue {lam:.3e}) \
has n^T S n = {ntsn:.3e} < tolerance {pen_tol:.3e}; \
the MAP is flat along this direction (no likelihood curvature, no penalty \
curvature); dominant block: '{}'. \
Fix: add a non-degenerate smoothness penalty to block '{}' that covers this \
direction, or remove the unpenalised null direction from the model.",
dir_idx, dominant_block, dominant_block,
);
return Err(MapUniquenessError {
message,
dominant_block,
null_direction_index: dir_idx,
penalty_quadratic_form: ntsn,
});
}
}
Ok(())
}
fn dominant_block_for_direction(
n_vec: &Array1<f64>,
specs: &[ParameterBlockSpec],
col_offsets: &[usize],
) -> String {
let mut best_block = specs.first().map(|s| s.name.as_str()).unwrap_or("unknown");
let mut best_sq = 0.0_f64;
for (i, spec) in specs.iter().enumerate() {
if i + 1 >= col_offsets.len() {
break;
}
let lo = col_offsets[i];
let hi = col_offsets[i + 1];
let sq: f64 = (lo..hi).map(|c| n_vec[c] * n_vec[c]).sum();
if sq > best_sq {
best_sq = sq;
best_block = spec.name.as_str();
}
}
best_block.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::linalg::matrix::{DenseDesignMatrix, DesignMatrix};
use ndarray::Array2;
fn spec_from_dense(name: &str, design: Array2<f64>) -> ParameterBlockSpec {
let n = design.nrows();
ParameterBlockSpec {
name: name.to_string(),
design: DesignMatrix::Dense(DenseDesignMatrix::from(design)),
offset: Array1::<f64>::zeros(n),
penalties: Vec::new(),
nullspace_dims: Vec::new(),
initial_log_lambdas: Array1::<f64>::zeros(0),
initial_beta: None,
gauge_priority: 100,
jacobian_callback: None,
stacked_design: None,
stacked_offset: None,
}
}
fn linspace_minus_one_to_one(n: usize) -> Array1<f64> {
if n <= 1 {
return Array1::<f64>::zeros(n.max(1));
}
let step = 2.0 / (n as f64 - 1.0);
Array1::from_iter((0..n).map(|i| -1.0 + step * i as f64))
}
#[test]
fn audit_no_aliasing_returns_clean() {
let n = 64;
let x = linspace_minus_one_to_one(n);
let mut parametric = Array2::<f64>::zeros((n, 2));
for i in 0..n {
parametric[[i, 0]] = 1.0;
parametric[[i, 1]] = x[i];
}
let mut smooth = Array2::<f64>::zeros((n, 2));
for i in 0..n {
smooth[[i, 0]] = x[i] * x[i];
smooth[[i, 1]] = x[i] * x[i] * x[i];
}
let specs = [
spec_from_dense("intercept", parametric),
spec_from_dense("smooth_x", smooth),
];
let audit = audit_identifiability(&specs).expect("audit must succeed on clean specs");
assert!(
!audit.fatal,
"no aliasing must not be fatal: {}",
audit.summary
);
assert!(
audit.aliased_pairs.is_empty(),
"expected no alias pairs; got {:?}",
audit.aliased_pairs,
);
assert!(
audit.dropped_columns.is_empty(),
"expected no dropped columns; got {:?}",
audit.dropped_columns,
);
assert_eq!(audit.blocks.len(), 2);
assert_eq!(audit.blocks[0].original_dim, 2);
assert_eq!(audit.blocks[0].effective_dim, 2);
assert_eq!(audit.blocks[1].original_dim, 2);
assert_eq!(audit.blocks[1].effective_dim, 2);
}
#[test]
fn audit_smooth_constant_aliased_with_intercept() {
let n = 64;
let x = linspace_minus_one_to_one(n);
let parametric = Array2::<f64>::from_shape_fn((n, 1), |(_, _)| 1.0);
let mut smooth = Array2::<f64>::zeros((n, 3));
for i in 0..n {
smooth[[i, 0]] = 1.0;
smooth[[i, 1]] = x[i] * x[i];
smooth[[i, 2]] = x[i] * x[i] * x[i];
}
let specs = [
spec_from_dense("intercept", parametric),
spec_from_dense("smooth_with_const", smooth),
];
let audit = audit_identifiability(&specs).expect("audit must succeed");
assert!(
audit.fatal,
"exact intercept~smooth-constant alias must be fatal under the halt gate: {}",
audit.summary,
);
assert!(
!audit.aliased_pairs.is_empty(),
"smooth-constant aliased with intercept must surface at least one alias pair",
);
assert!(
!audit.dropped_columns.is_empty(),
"smooth-constant aliased with intercept must populate dropped_columns",
);
for drop in &audit.dropped_columns {
assert!(
drop.block == "intercept" || drop.block == "smooth_with_const",
"unexpected drop block name {:?}",
drop.block,
);
}
let total_kept: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept, 3,
"expected 3 surviving directions; got {total_kept} (summary: {})",
audit.summary,
);
assert!(
audit.summary.contains("intercept") && audit.summary.contains("smooth_with_const"),
"fatal summary must name both blocks; got {:?}",
audit.summary,
);
assert!(
audit.summary.contains("reparam")
|| audit.summary.contains("sum-to-zero")
|| audit.summary.contains("orthogonal")
|| audit.summary.contains("absorb"),
"fatal summary must include a reparameterisation suggestion; got {:?}",
audit.summary,
);
}
#[test]
fn audit_two_smooths_share_linear_direction() {
let n = 64;
let x = linspace_minus_one_to_one(n);
let mut smooth_a = Array2::<f64>::zeros((n, 2));
let mut smooth_b = Array2::<f64>::zeros((n, 2));
for i in 0..n {
smooth_a[[i, 0]] = x[i];
smooth_a[[i, 1]] = x[i] * x[i];
smooth_b[[i, 0]] = x[i];
smooth_b[[i, 1]] = (x[i] - 0.3).powi(2);
}
let specs = [
spec_from_dense("smooth_a", smooth_a),
spec_from_dense("smooth_b", smooth_b),
];
let audit = audit_identifiability(&specs).expect("audit must succeed");
assert!(
audit.fatal,
"exact x~x alias across two smooth blocks must be fatal under the halt gate: {}",
audit.summary,
);
let cross_linear_pair = audit
.aliased_pairs
.iter()
.find(|p| p.block_a == "smooth_a" && p.block_b == "smooth_b" && p.overlap > 0.999);
assert!(
cross_linear_pair.is_some(),
"expected an x~x alias pair between the two smooths; got pairs {:?}",
audit.aliased_pairs,
);
assert!(
!audit.dropped_columns.is_empty(),
"RRQR must demote one of the duplicated linear columns",
);
let total_kept: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept, 3,
"expected 3 independent directions (1 shared linear + 2 quadratics); got {total_kept}",
);
}
#[test]
fn audit_three_way_alias_is_attributed_not_fatal() {
let n = 64;
let x = linspace_minus_one_to_one(n);
let eps = 0.5;
let parametric = Array2::<f64>::from_shape_fn((n, 1), |(_, _)| 1.0);
let smooth_a = Array2::<f64>::from_shape_fn((n, 1), |(i, _)| 1.0 + eps * x[i]);
let smooth_b = Array2::<f64>::from_shape_fn((n, 1), |(i, _)| 1.0 - eps * x[i]);
let specs = [
spec_from_dense("intercept", parametric),
spec_from_dense("smooth_a", smooth_a),
spec_from_dense("smooth_b", smooth_b),
];
let audit = audit_identifiability(&specs).expect("audit must succeed");
assert!(
!audit.dropped_columns.is_empty(),
"RRQR must attribute the three-way alias as a dropped column",
);
assert!(
audit.fatal,
"three-way alias with joint rank < joint cols must be fatal under the halt gate: {}",
audit.summary,
);
let total_kept: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept, 2,
"three-way alias collapses to rank 2; got {total_kept}"
);
}
#[test]
fn audit_biobank_shape_end_to_end() {
let n = 1024;
let x = linspace_minus_one_to_one(n);
let mut parametric = Array2::<f64>::zeros((n, 2));
for i in 0..n {
parametric[[i, 0]] = 1.0;
parametric[[i, 1]] = x[i];
}
let mut s_x = Array2::<f64>::zeros((n, 8));
for i in 0..n {
for k in 0..8 {
s_x[[i, k]] = (x[i] - (k as f64 - 4.0) * 0.2).powi(2);
}
}
let mut s_sin = Array2::<f64>::zeros((n, 6));
for i in 0..n {
for k in 0..6 {
s_sin[[i, k]] = ((k as f64 + 1.0) * x[i]).sin();
}
}
let mut alias_block = Array2::<f64>::zeros((n, 4));
for i in 0..n {
alias_block[[i, 0]] = x[i];
alias_block[[i, 1]] = x[i].cos();
alias_block[[i, 2]] = (2.0 * x[i]).cos();
alias_block[[i, 3]] = (3.0 * x[i]).cos();
}
let specs = [
spec_from_dense("parametric", parametric),
spec_from_dense("s_x", s_x),
spec_from_dense("s_sin", s_sin),
spec_from_dense("alias_block", alias_block),
];
let audit = audit_identifiability(&specs).expect("biobank-shape audit must succeed");
assert!(
audit.fatal,
"seeded biobank-shape exact x~x alias must be fatal under the halt gate: {}",
audit.summary,
);
assert!(
!audit.dropped_columns.is_empty(),
"seeded x~x alias must produce at least one dropped column",
);
assert_eq!(
audit.dropped_columns.len(),
1,
"biobank-shape audit should attribute exactly the seeded alias; \
got {:?}",
audit.dropped_columns,
);
let total_kept: usize = audit.blocks.iter().map(|b| b.effective_dim).sum();
assert_eq!(
total_kept, 19,
"biobank-shape: expected 19 kept directions; got {total_kept} ({})",
audit.summary,
);
}
}