use gam::families::marginal_slope_shared::{
BIOBANK_OUTER_SUBSAMPLE_K_MAX, BIOBANK_OUTER_SUBSAMPLE_K_MIN,
BIOBANK_OUTER_SUBSAMPLE_THRESHOLD, auto_outer_subsample_k, build_outer_score_subsample,
inject_biobank_outer_subsample, inject_biobank_outer_subsample_from_arrays, outer_row_indices,
outer_score_scale,
};
#[test]
fn auto_k_anchors_match_documented_values() {
assert_eq!(auto_outer_subsample_k(50_000), 4_000);
assert_eq!(auto_outer_subsample_k(100_000), 6_250);
assert_eq!(auto_outer_subsample_k(320_000), 20_000);
assert_eq!(auto_outer_subsample_k(1_000_000), 40_000);
}
#[test]
fn auto_k_clamps_at_floor_and_ceiling() {
assert_eq!(auto_outer_subsample_k(0), BIOBANK_OUTER_SUBSAMPLE_K_MIN);
assert_eq!(
auto_outer_subsample_k(60_000),
BIOBANK_OUTER_SUBSAMPLE_K_MIN
);
assert_eq!(
auto_outer_subsample_k(640_000),
BIOBANK_OUTER_SUBSAMPLE_K_MAX
);
assert_eq!(
auto_outer_subsample_k(50_000_000),
BIOBANK_OUTER_SUBSAMPLE_K_MAX
);
}
#[test]
fn inject_biobank_outer_subsample_skips_below_threshold() {
use gam::families::custom_family::BlockwiseFitOptions;
let n = BIOBANK_OUTER_SUBSAMPLE_THRESHOLD; let z: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
let secondary: Vec<u8> = (0..n).map(|i| (i % 2) as u8).collect();
let mut opts = BlockwiseFitOptions::default();
let installed = inject_biobank_outer_subsample(&mut opts, &z, &secondary);
assert!(!installed, "inject must skip at exactly the threshold");
assert!(opts.outer_score_subsample.is_none());
}
#[test]
fn inject_biobank_outer_subsample_fires_above_threshold_with_auto_k() {
use gam::families::custom_family::BlockwiseFitOptions;
let n = BIOBANK_OUTER_SUBSAMPLE_THRESHOLD + 10_000; let z: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
let secondary: Vec<u8> = (0..n).map(|i| (i % 2) as u8).collect();
let mut opts = BlockwiseFitOptions::default();
let installed = inject_biobank_outer_subsample(&mut opts, &z, &secondary);
assert!(installed, "inject must fire above threshold");
let s = opts
.outer_score_subsample
.as_ref()
.expect("subsample installed");
let expected_k = auto_outer_subsample_k(n);
assert!(
s.mask.len() >= expected_k && s.mask.len() <= expected_k + 200,
"subsample size {} not within [K, K+200] of auto K = {}",
s.mask.len(),
expected_k,
);
assert_eq!(s.n_full, n);
let expected_scale = n as f64 / s.mask.len() as f64;
assert!((s.weight_scale - expected_scale).abs() < 1e-12);
}
#[test]
fn inject_preserves_caller_supplied_subsample() {
use gam::families::custom_family::BlockwiseFitOptions;
use gam::families::marginal_slope_shared::OuterScoreSubsample;
use std::sync::Arc;
let n = BIOBANK_OUTER_SUBSAMPLE_THRESHOLD + 50_000;
let z: Vec<f64> = (0..n).map(|i| i as f64).collect();
let secondary: Vec<u8> = vec![0u8; n];
let mut opts = BlockwiseFitOptions::default();
let preset = OuterScoreSubsample::new(vec![0, 1, 2, 3], n, 0xBEEF);
opts.outer_score_subsample = Some(Arc::new(preset));
let installed = inject_biobank_outer_subsample(&mut opts, &z, &secondary);
assert!(
!installed,
"inject must not overwrite caller-supplied subsample"
);
let s = opts.outer_score_subsample.as_ref().unwrap();
assert_eq!(s.seed, 0xBEEF);
assert_eq!(s.mask.len(), 4);
}
#[test]
fn inject_from_arrays_handles_event_indicator_correctly() {
use gam::families::custom_family::BlockwiseFitOptions;
let n = BIOBANK_OUTER_SUBSAMPLE_THRESHOLD + 10_000;
let z: Vec<f64> = (0..n).map(|i| (i % 1000) as f64).collect();
let secondary_f64: Vec<f64> = (0..n)
.map(|i| if i % 20 == 0 { 1.0 } else { 0.0 })
.collect();
let mut opts = BlockwiseFitOptions::default();
let installed = inject_biobank_outer_subsample_from_arrays(&mut opts, &z, &secondary_f64);
assert!(installed);
let s = opts.outer_score_subsample.as_ref().unwrap();
let event_rows: usize = s.mask.iter().filter(|&&i| secondary_f64[i] == 1.0).count();
assert!(
event_rows > 0,
"subsample mask has no event rows out of {} masked",
s.mask.len()
);
}
#[test]
fn inject_rejects_mismatched_lengths() {
use gam::families::custom_family::BlockwiseFitOptions;
let n = BIOBANK_OUTER_SUBSAMPLE_THRESHOLD + 10_000;
let z: Vec<f64> = (0..n).map(|i| i as f64).collect();
let secondary_short: Vec<u8> = vec![0u8; n - 1];
let mut opts = BlockwiseFitOptions::default();
let installed = inject_biobank_outer_subsample(&mut opts, &z, &secondary_short);
assert!(!installed, "inject must defensively reject length mismatch");
assert!(opts.outer_score_subsample.is_none());
}
#[test]
fn build_outer_score_subsample_is_deterministic_per_seed() {
let n = 5000;
let z: Vec<f64> = (0..n).map(|i| (i as f64).sin()).collect();
let secondary: Vec<u8> = (0..n).map(|i| (i % 3 == 0) as u8).collect();
let s1 = build_outer_score_subsample(&z, &secondary, 1000, 0x123456789ABCDEF);
let s2 = build_outer_score_subsample(&z, &secondary, 1000, 0x123456789ABCDEF);
assert_eq!(s1.mask, s2.mask, "same seed must produce identical mask");
let s3 = build_outer_score_subsample(&z, &secondary, 1000, 0xDEADBEEF);
assert_ne!(
s1.mask, s3.mask,
"different seeds must produce different masks"
);
}
#[test]
fn outer_row_indices_and_scale_round_trip_to_full_n_when_no_subsample() {
use gam::families::custom_family::BlockwiseFitOptions;
let opts = BlockwiseFitOptions::default();
let n = 100;
let scale = outer_score_scale(&opts, n);
assert!(
(scale - 1.0).abs() < 1e-12,
"no-subsample scale must be 1.0"
);
let indices = outer_row_indices(&opts, n).to_vec();
assert_eq!(indices.len(), n);
let expected: Vec<usize> = (0..n).collect();
assert_eq!(indices, expected);
}
#[test]
fn subsample_mask_arc_is_pinned_after_inject() {
use gam::families::custom_family::BlockwiseFitOptions;
use std::sync::Arc;
let n = BIOBANK_OUTER_SUBSAMPLE_THRESHOLD + 50_000;
let z: Vec<f64> = (0..n).map(|i| i as f64 / n as f64).collect();
let secondary: Vec<u8> = (0..n).map(|i| (i % 4 == 0) as u8).collect();
let mut opts = BlockwiseFitOptions::default();
let installed = inject_biobank_outer_subsample(&mut opts, &z, &secondary);
assert!(installed);
let original_arc = opts
.outer_score_subsample
.as_ref()
.expect("subsample installed")
.clone();
let original_ptr = Arc::as_ptr(&original_arc);
for iter in 0..10 {
let cloned = opts.clone();
let cloned_arc = cloned
.outer_score_subsample
.as_ref()
.expect("subsample preserved across clone")
.clone();
assert_eq!(
Arc::as_ptr(&cloned_arc),
original_ptr,
"iter {iter}: subsample Arc identity must be preserved across BlockwiseFitOptions::clone()",
);
}
}
#[test]
fn auto_k_monotone_non_decreasing_in_n() {
let mut prev = 0usize;
for n in (0..2_000_000).step_by(7919) {
let k = auto_outer_subsample_k(n);
assert!(
k >= prev,
"auto_outer_subsample_k regressed at n={n}: prev={prev} k={k}"
);
prev = k;
}
}