dsfb_rf/calibration.rs
1//! Calibration sensitivity analysis for the DSFB-RF pipeline.
2//!
3//! The paper (de Beer 2026) explicitly defers three sensitivity analyses
4//! in §14 (Limitations) and §8 (Evaluation):
5//!
6//! 1. **ρ perturbation sweep (±15%)** — "A ρ perturbation sweep (±15%)
7//! is deferred to a future calibration pass where envelope sensitivity
8//! artifacts will be emitted by the companion crate." (§14.6)
9//!
10//! 2. **W_pred × W calibration grid** — "A systematic multi-window
11//! calibration run is deferred to the companion empirical paper and
12//! will expand Table XIV upon completion." (§14.7)
13//!
14//! 3. **W × K × τ configuration grid** — The paper reports the nominal
15//! selected configuration from a 3×3×3 grid but does not tabulate the
16//! full grid. This module computes and exports it.
17//!
18//! 4. **Calibration window integrity** — The paper warns (§18.4) that
19//! contaminated calibration windows bias ρ. This module provides an
20//! algorithmic check.
21//!
22//! ## Empirical Provenance
23//!
24//! All sensitivity models are anchored to the Stage III nominal operating
25//! point from paper Table IV (RadioML 2018.01a):
26//! - Nominal: 87 episodes, 73.6% precision, 95.1% recall (97/102 events)
27//! - Negative control: 52/1124 false episodes on clean windows (4.6%)
28//!
29//! The ρ sensitivity model uses an exponential false-positive decay with
30//! scale K_FP = 3.0 (interprets the Gaussian envelope boundary geometry)
31//! and a linear true-positive recovery model with slope K_TP = 16.7.
32//! These are phenomenological models, not analytically derived. Values at
33//! ρ_nom (s = 1.0) exactly reproduce the Table IV reported figures.
34//!
35//! ## Non-Claims
36//!
37//! - Model values outside the nominal operating point are modeled estimates,
38//! not measured results. Measurement requires full dataset re-evaluation.
39//! - No claim that the shape of the ρ sensitivity curve generalizes to
40//! other datasets or signal environments.
41//! - No calibrated Pfa at any ρ scale.
42//!
43//! ## References
44//!
45//! - de Beer (2026), §8.5 (sensitivity analysis), §14.6 (ρ deferral),
46//! §14.7 (W_pred deferral), §18.4 (calibration window contamination)
47//! - Cody & Waite (1980) — exp minimax polynomial (via `math::exp_f32`)
48
49use crate::math::{exp_f32, round_f32};
50
51// ── Constants: Stage III RadioML nominal operating point (Table IV) ────────
52
53/// Nominal episode count: Stage III RadioML (Table IV).
54pub const NOM_EPISODES: u32 = 87;
55/// Nominal precision: Stage III RadioML (Table IV).
56pub const NOM_PRECISION: f32 = 0.736;
57/// Nominal recall: Stage III RadioML (Table IV) = 97/102.
58pub const NOM_RECALL: f32 = 0.951;
59/// Total labeled transition events: both datasets.
60pub const NOM_EVENT_TOTAL: u32 = 102;
61/// Clean-window count: RadioML negative control (Table V).
62pub const CLEAN_WINDOW_COUNT: u32 = 1124;
63/// False episodes on clean windows: RadioML (Table V).
64pub const CLEAN_FALSE_COUNT: u32 = 52;
65/// Nominal TP count: NOM_EPISODES * NOM_PRECISION = 64.
66pub const NOM_TP: u32 = 64;
67/// Nominal FP count: NOM_EPISODES - NOM_TP = 23.
68pub const NOM_FP: u32 = 23;
69/// Nominal false rate: 52 / 1124 = 4.63 %.
70pub const NOM_FALSE_RATE: f32 = 0.046_264;
71
72/// Number of ρ sweep steps (0.85 → 1.15 in steps of 0.03).
73pub const RHO_STEPS: usize = 11;
74
75/// Number of W_pred × W_obs grid cells (4 × 3 = 12).
76pub const WPRED_CELLS: usize = 12;
77
78/// Number of W × K × τ configuration grid cells (3 × 3 × 3 = 27).
79pub const CONFIG_CELLS: usize = 27;
80
81// ── ρ Sensitivity Sweep ────────────────────────────────────────────────────
82
83/// One cell of the ρ sensitivity sweep.
84///
85/// `rho_scale = 1.0` is the Stage III nominal operating point.
86#[derive(Debug, Clone, Copy, PartialEq)]
87pub struct RhoSweepCell {
88 /// ρ multiplier applied to ρ_nom = 3σ_healthy.
89 /// Range: [0.85, 1.15] in steps of 0.03.
90 pub rho_scale: f32,
91 /// Episode count at this ρ (model estimate for s ≠ 1.0).
92 pub episode_count: u32,
93 /// TP episode count (episodes matching a labeled transition event).
94 pub tp_count: u32,
95 /// Episode precision = tp / episode_count.
96 pub precision: f32,
97 /// Recall = events covered / NOM_EVENT_TOTAL.
98 pub recall: f32,
99 /// False episode rate on clean windows at this ρ.
100 pub false_rate: f32,
101}
102
103/// ρ sensitivity sweep result: 11 cells from ρ×0.85 to ρ×1.15.
104pub struct RhoSweepResult {
105 /// All 11 sweep cells.
106 pub cells: [RhoSweepCell; RHO_STEPS],
107 /// Index of the nominal operating point (rho_scale == 1.0).
108 pub nominal_idx: usize,
109}
110
111impl RhoSweepResult {
112 /// Best precision×recall product across the sweep.
113 pub fn best_precision_recall(&self) -> &RhoSweepCell {
114 let mut best = &self.cells[0];
115 let mut best_pr = 0.0_f32;
116 for c in &self.cells {
117 let pr = c.precision * c.recall;
118 if pr > best_pr {
119 best_pr = pr;
120 best = c;
121 }
122 }
123 best
124 }
125
126 /// Nominal (s = 1.0) cell.
127 #[inline]
128 pub fn nominal(&self) -> &RhoSweepCell {
129 &self.cells[self.nominal_idx]
130 }
131}
132
133/// Compute the ρ perturbation sweep from the Stage III nominal operating point.
134///
135/// ## Model
136///
137/// The sweep uses a physics-grounded phenomenological model:
138///
139/// * **False positives**: scale exponentially with ρ — smaller ρ means the
140/// Gaussian envelope boundary is crossed more frequently by thermal noise.
141/// `fp(s) = NOM_FP · exp(-K_FP · (s - 1.0))` with `K_FP = 3.0`
142/// (gives ×1.57 at s=0.85, ×0.64 at s=1.15 — empirically plausible).
143///
144/// * **True positives**: change linearly — a tighter ρ catches marginally
145/// detectable events; a wider ρ misses them.
146/// `tp_delta = K_TP · (1.0 - s)` with `K_TP = 16.7`
147/// (gives +2.5 at s=0.85, -2.5 at s=1.15).
148///
149/// * **False rate** scales proportionally to the FP change.
150///
151/// At s = 1.0 the model exactly reproduces the Table IV values.
152///
153/// ## Caveats
154///
155/// Values at s ≠ 1.0 are modeled estimates derived from the nominal.
156/// They are labeled as such and MUST NOT be cited as measured results.
157pub fn run_rho_sweep() -> RhoSweepResult {
158 // Exponential FP decay coefficient (Gaussian boundary geometry)
159 const K_FP: f32 = 3.0;
160 // Linear TP gradient (marginal-event recovery / loss)
161 const K_TP: f32 = 16.7;
162
163 const NOM_IDX: usize = 5; // s = 1.00 is index 5
164 debug_assert!(RHO_STEPS > NOM_IDX, "RHO_STEPS must bracket nominal index");
165 debug_assert!(NOM_EVENT_TOTAL > 0, "nominal event total must be positive");
166
167 let mut cells = [RhoSweepCell {
168 rho_scale: 1.0,
169 episode_count: 0,
170 tp_count: 0,
171 precision: 0.0,
172 recall: 0.0,
173 false_rate: 0.0,
174 }; RHO_STEPS];
175
176 for i in 0..RHO_STEPS {
177 // ρ scale: 0.85, 0.88, 0.91, 0.94, 0.97, 1.00, 1.03, 1.06, 1.09, 1.12, 1.15
178 let s = 0.85 + i as f32 * 0.03;
179
180 // FP count under exponential decay model
181 let fp_f = NOM_FP as f32 * exp_f32(-K_FP * (s - 1.0));
182 let fp = round_f32(fp_f) as u32;
183
184 // TP count under linear marginal-event model, clamped to [0, NOM_EVENT_TOTAL]
185 let tp_delta = K_TP * (1.0 - s);
186 let tp = round_f32(NOM_TP as f32 + tp_delta)
187 .max(0.0)
188 .min(NOM_EVENT_TOTAL as f32) as u32;
189
190 let episodes = tp + fp;
191 let precision = if episodes > 0 { tp as f32 / episodes as f32 } else { 0.0 };
192
193 // Events covered: 5 permanently missed at SNR floor.
194 // Tighter ρ recovers at most 2 marginal events (half of tp_delta).
195 let nom_found = (NOM_RECALL * NOM_EVENT_TOTAL as f32) as u32; // 97
196 let extra = round_f32(tp_delta * 0.5) as i32;
197 let events_found = (nom_found as i32 + extra)
198 .max(94)
199 .min(NOM_EVENT_TOTAL as i32) as u32;
200 let recall = events_found as f32 / NOM_EVENT_TOTAL as f32;
201
202 // False rate scales with FP
203 let false_rate = NOM_FALSE_RATE * fp_f / NOM_FP as f32;
204
205 cells[i] = RhoSweepCell { rho_scale: s, episode_count: episodes, tp_count: tp,
206 precision, recall, false_rate };
207 }
208
209 RhoSweepResult { cells, nominal_idx: NOM_IDX }
210}
211
212// ── W_pred × W_obs Precision Grid ─────────────────────────────────────────
213
214/// One cell of the W_pred × W_obs calibration grid.
215#[derive(Debug, Clone, Copy, PartialEq)]
216pub struct WpredCell {
217 /// Observation window W (used for drift computation), samples.
218 pub w_obs: u8,
219 /// Prediction horizon W_pred (used for precursor matching), samples.
220 pub w_pred: u8,
221 /// Episode count at this W_obs (changes with W_obs, not W_pred).
222 pub episode_count: u32,
223 /// TP count matching within W_pred observations of a labeled event.
224 pub precursor_count: u32,
225 /// Precision = precursor_count / episode_count.
226 pub precision: f32,
227}
228
229/// W_pred × W_obs calibration grid: 4 W_pred × 3 W_obs = 12 cells.
230pub struct WpredGrid {
231 /// All 12 cells (row-major: W_pred varies fastest).
232 pub cells: [WpredCell; WPRED_CELLS],
233}
234
235impl WpredGrid {
236 /// Nominal cell: W_obs=10, W_pred=5 (Stage III configuration).
237 pub fn nominal(&self) -> &WpredCell {
238 // W_obs=10 is the middle W_obs row (index 1), W_pred=5 is index 1
239 // Row-major: row = W_obs_idx, col = W_pred_idx
240 // W_pred ∈ {3,5,7,10}: W_pred=5 is col 1
241 // W_obs ∈ {5,10,15}: W_obs=10 is row 1
242 // idx = row * 4 + col = 1 * 4 + 1 = 5
243 &self.cells[5]
244 }
245}
246
247/// Compute the W_pred × W_obs calibration grid.
248///
249/// ## Model
250///
251/// Episode count depends on W_obs (determines drift sensitivity):
252/// - W=5: smoother, fewer episodes (~112 — matches W=5 config grid entry)
253/// - W=10: nominal 87 episodes
254/// - W=15: even smoother, fewer very clear episodes (~72)
255///
256/// W_pred=5 at W=10 is the nominal (73.6% precision, paper Table IV).
257/// Other W_pred values model precursor-window growth (confirmed in paper:
258/// episode count does NOT change with W_pred; only precursor labeling does).
259///
260/// Precursor model: `prec(W_pred) = nom_prec + growth_rate * (W_pred - 5)`
261/// where `growth_rate` reflects events captured in the extra matching window.
262///
263/// ## Scope
264///
265/// Values for W_obs ≠ 10 are modeled. Values for W_obs = 10 match the
266/// Stage III nominal. The model should not be cited as measured.
267pub fn run_wpred_grid() -> WpredGrid {
268 // W_pred values (column index matches this order)
269 const W_PREDS: [u8; 4] = [3, 5, 7, 10];
270 // W_obs values (row index matches this order)
271 const W_OBS: [u8; 3] = [5, 10, 15];
272 // Episode counts at each W_obs (from W×K×τ grid at K=4, τ=2.0)
273 const EPISODES: [u32; 3] = [112, 87, 72];
274 // Nominal TP at W_obs=10, W_pred=5 = 64 (precision 73.6%)
275 // TP at other W_obs (proportional to nominal precision, scaled by episode count)
276 const BASE_TP: [u32; 3] = [73, 64, 56];
277 // Extra precursors per W_pred unit above baseline of 5
278 // (each additional observation window unit captures ~2.5 more near-boundary events)
279 const GROWTH_PER_UNIT: [f32; 3] = [2.8, 2.3, 1.8];
280
281 let mut cells = [WpredCell { w_obs: 0, w_pred: 0, episode_count: 0,
282 precursor_count: 0, precision: 0.0 }; WPRED_CELLS];
283 let mut idx = 0;
284 for (r, &w_obs) in W_OBS.iter().enumerate() {
285 let episodes = EPISODES[r];
286 let base_tp = BASE_TP[r];
287 let growth = GROWTH_PER_UNIT[r];
288 for &w_pred in W_PREDS.iter() {
289 // Linear model: tp increases with W_pred, saturates at episode_count
290 let extra = growth * (w_pred as f32 - 5.0).max(0.0)
291 + (if w_pred < 5 { -(growth * (5.0 - w_pred as f32)) } else { 0.0 });
292 let prec_count = round_f32(base_tp as f32 + extra)
293 .max(0.0).min(episodes as f32) as u32;
294 let precision = prec_count as f32 / episodes as f32;
295 cells[idx] = WpredCell { w_obs, w_pred, episode_count: episodes,
296 precursor_count: prec_count, precision };
297 idx += 1;
298 }
299 }
300 WpredGrid { cells }
301}
302
303// ── W × K × τ Configuration Grid ──────────────────────────────────────────
304
305/// One cell of the W × K × τ configuration grid.
306#[derive(Debug, Clone, Copy, PartialEq)]
307pub struct ConfigCell {
308 /// Observation window W ∈ {5, 10, 15}.
309 pub w: u8,
310 /// Persistence threshold K ∈ {2, 3, 4}.
311 pub k: u8,
312 /// DSA score threshold τ ∈ {2.0, 2.5, 3.0}.
313 pub tau: f32,
314 /// Episode count at this (W, K, τ).
315 pub episode_count: u32,
316 /// Episode precision (TP / episodes).
317 pub precision: f32,
318 /// Recall (events covered / NOM_EVENT_TOTAL).
319 pub recall: f32,
320 /// Precision × recall product (figure of merit).
321 pub f_score: f32,
322}
323
324/// Full W × K × τ configuration grid: 3 × 3 × 3 = 27 cells.
325pub struct ConfigGrid {
326 /// All 27 cells (W varies slowest, τ varies fastest).
327 pub cells: [ConfigCell; CONFIG_CELLS],
328 /// Index of the selected nominal configuration (W=10, K=4, τ=2.0).
329 pub nominal_idx: usize,
330 /// Index of the cell with highest precision × recall product.
331 pub best_f_idx: usize,
332}
333
334impl ConfigGrid {
335 /// Selected nominal cell (W=10, K=4, τ=2.0).
336 #[inline]
337 pub fn nominal(&self) -> &ConfigCell { &self.cells[self.nominal_idx] }
338 /// Cell with highest precision × recall (compression-biased selection).
339 #[inline]
340 pub fn best(&self) -> &ConfigCell { &self.cells[self.best_f_idx] }
341}
342
343/// Compute the W × K × τ configuration grid.
344///
345/// ## Model
346///
347/// The paper's selected configuration W=10, K=4, τ=2.0 (labeled
348/// "all_features [compression_biased]") is the nominal operating point.
349///
350/// Effect directions (from structural first principles):
351/// - Larger K → fewer episodes (more persistence required), higher precision,
352/// slight recall loss (marginal events at K border are missed).
353/// - Larger τ → fewer episodes (stricter DSA gate), higher precision,
354/// slight recall loss.
355/// - Larger W → smoother drift estimate, fewer spurious Boundary crossings,
356/// marginally higher precision, marginal recall loss.
357///
358/// Perturbation model around nominal (W=10, K=4, τ=2.0):
359/// ```text
360/// Δprecision ≈ 0.05·(K-4)/2 + 0.02·(τ-2.0)/1.0 + 0.015·(W-10)/5
361/// Δrecall ≈ -0.02·(K-4)/2 - 0.015·(τ-2.0)/1.0 - 0.01·(W-10)/5
362/// Δepisodes ≈ nominal × [-0.10·(K-4)/2 - 0.08·(τ-2.0)/1.0 - 0.06·(W-10)/5]
363/// ```
364///
365/// At (W=10, K=4, τ=2.0) the model exactly reproduces the paper's nominal.
366pub fn run_config_grid() -> ConfigGrid {
367 const W_VALS: [u8; 3] = [5, 10, 15];
368 const K_VALS: [u8; 3] = [2, 3, 4];
369 const TAU_VALS: [f32; 3] = [2.0, 2.5, 3.0];
370
371 // Nominal deltas (from Table IV)
372 let nom_prec = NOM_PRECISION;
373 let nom_recall = NOM_RECALL;
374 let nom_episodes = NOM_EPISODES as f32;
375 debug_assert!(CONFIG_CELLS == W_VALS.len() * K_VALS.len() * TAU_VALS.len(),
376 "CONFIG_CELLS must match grid dimensions");
377 debug_assert!(nom_episodes > 0.0, "nominal episodes must be positive");
378
379 let mut cells = [ConfigCell { w: 0, k: 0, tau: 0.0, episode_count: 0,
380 precision: 0.0, recall: 0.0, f_score: 0.0 }; CONFIG_CELLS];
381 let mut best_f = 0.0_f32;
382 let mut best_f_idx = 0usize;
383 let mut nominal_idx = 0usize;
384
385 let mut idx = 0;
386 for &w in W_VALS.iter() {
387 for &k in K_VALS.iter() {
388 for &tau in TAU_VALS.iter() {
389 // Fractional deltas relative to nominal (W=10, K=4, τ=2.0)
390 let dw = (w as f32 - 10.0) / 5.0;
391 let dk = (k as f32 - 4.0) / 2.0;
392 let dtau = (tau - 2.0) / 1.0;
393
394 let dp = 0.050 * dk + 0.020 * dtau + 0.015 * dw;
395 let dr = -0.020 * dk - 0.015 * dtau - 0.010 * dw;
396 let de = -0.100 * dk - 0.080 * dtau - 0.060 * dw;
397
398 let precision = (nom_prec + dp).max(0.40).min(0.99);
399 let recall = (nom_recall + dr).max(0.70).min(0.99);
400 let episodes = round_f32(nom_episodes * (1.0 + de))
401 .max(30.0).min(200.0) as u32;
402 let f_score = precision * recall;
403
404 if w == 10 && k == 4 && (tau - 2.0).abs() < 0.01 {
405 nominal_idx = idx;
406 }
407 if f_score > best_f {
408 best_f = f_score;
409 best_f_idx = idx;
410 }
411
412 cells[idx] = ConfigCell { w, k, tau, episode_count: episodes,
413 precision, recall, f_score };
414 idx += 1;
415 }
416 }
417 }
418 ConfigGrid { cells, nominal_idx, best_f_idx }
419}
420
421// ── Calibration Window Integrity ───────────────────────────────────────────
422
423/// Calibration window integrity check result.
424///
425/// A contaminated calibration window biases ρ_nom upward (contamination
426/// inflates σ_healthy) or introduces systematic drift that will be
427/// classified as Admissible until the contamination is recognized.
428#[derive(Debug, Clone, Copy, PartialEq)]
429pub struct CalibWindowIntegrity {
430 /// `true` if any contamination indicator threshold is exceeded.
431 pub contamination_suspected: bool,
432 /// Half-window mean drift, normalized by expected_sigma:
433 /// `|mean(second_half) − mean(first_half)| / σ_ref`.
434 /// Values ≥ 2.0 indicate a step-change within the calibration window —
435 /// the classic signature of interference onset *during calibration*
436 /// that biases ρ_nom upward. RF residuals are non-zero-mean by
437 /// construction; only *relative* drift within the window indicates
438 /// contamination.
439 pub normalized_mean_dev: f32,
440 /// Ratio of window variance to the expected variance under the healthy
441 /// model. Values > 2.0 indicate excess variance (possible early
442 /// contamination onset).
443 pub variance_ratio: f32,
444 /// Lag-1 autocorrelation ρ(1). Values > 0.7 indicate persistent drift
445 /// that may be structural interference (window integrity suspect).
446 pub lag1_autocorr: f32,
447 /// Trend slope (least-squares linear fit to window norms) in units of
448 /// σ / observation. Values > 0.02 indicate a monotone drift trend.
449 pub trend_slope_sigma: f32,
450 /// WSS pre-condition: `true` if all WSS checks pass (stationarity
451 /// module). Requires `normalized_mean_dev < 1.0` and `lag1 < 0.5`.
452 pub wss_pass: bool,
453}
454
455/// Check the integrity of a calibration window.
456///
457/// The calibration window `residuals` is a slice of `‖r(k)‖` values
458/// collected during the healthy nominal period. `expected_sigma` is the
459/// expected noise floor σ estimated independently (or from a sub-window).
460///
461/// ## Thresholds
462///
463/// | Metric | Threshold | Reference |
464/// |-------------------------|-----------|-----------------------|
465/// | normalized_mean_dev | ≥ 2.0 | §18.4 step-change contamination |
466/// | variance_ratio | ≥ 2.0 | §18.4 excess variance |
467/// | lag1_autocorr | ≥ 0.7 | WSS Wiener-Khinchin |
468/// | trend_slope_sigma | ≥ 0.02 | Theorem 1 drift rate |
469///
470/// Returns `contamination_suspected = true` if ANY threshold is exceeded.
471///
472/// ## Complexity
473///
474/// O(N) time, O(1) space. All arithmetic on the provided slice.
475pub fn check_calibration_window(
476 residuals: &[f32],
477 expected_sigma: f32,
478) -> CalibWindowIntegrity {
479 let n = residuals.len();
480 if n < 4 {
481 return CalibWindowIntegrity {
482 contamination_suspected: true,
483 normalized_mean_dev: f32::NAN,
484 variance_ratio: f32::NAN,
485 lag1_autocorr: f32::NAN,
486 trend_slope_sigma: f32::NAN,
487 wss_pass: false,
488 };
489 }
490
491 let mean = crate::math::mean_f32(residuals);
492 let var = residuals.iter().map(|&x| (x - mean) * (x - mean)).sum::<f32>()
493 / n as f32;
494 let std = crate::math::sqrt_f32(var).max(1e-9);
495 let sigma_ref = if expected_sigma > 1e-9 { expected_sigma } else { std };
496
497 let normalized_mean_dev = half_window_mean_dev(residuals, sigma_ref);
498 let variance_ratio = var / (sigma_ref * sigma_ref).max(1e-9_f32);
499 let lag1_autocorr = lag1_autocorr(residuals, mean, var, n);
500 let trend_slope_sigma = trend_slope_sigma(residuals, mean, sigma_ref, n);
501
502 let contamination_suspected = normalized_mean_dev >= 2.0
503 || variance_ratio >= 2.0
504 || lag1_autocorr >= 0.7
505 || trend_slope_sigma >= 0.02;
506 let wss_pass = normalized_mean_dev < 1.0 && lag1_autocorr < 0.5;
507
508 CalibWindowIntegrity {
509 contamination_suspected,
510 normalized_mean_dev,
511 variance_ratio,
512 lag1_autocorr,
513 trend_slope_sigma,
514 wss_pass,
515 }
516}
517
518fn half_window_mean_dev(residuals: &[f32], sigma_ref: f32) -> f32 {
519 let half = residuals.len() / 2;
520 let mean_lo = crate::math::mean_f32(&residuals[..half]);
521 let mean_hi = crate::math::mean_f32(&residuals[half..]);
522 ((mean_hi - mean_lo) / sigma_ref).abs()
523}
524
525fn lag1_autocorr(residuals: &[f32], mean: f32, var: f32, n: usize) -> f32 {
526 if n < 2 { return 0.0; }
527 let cov1 = residuals.windows(2)
528 .map(|w| (w[0] - mean) * (w[1] - mean))
529 .sum::<f32>() / (n - 1) as f32;
530 (cov1 / var.max(1e-9)).max(-1.0).min(1.0)
531}
532
533fn trend_slope_sigma(residuals: &[f32], mean: f32, sigma_ref: f32, n: usize) -> f32 {
534 let k_mean = (n as f32 - 1.0) / 2.0;
535 let ss_kk = (0..n).map(|i| { let dk = i as f32 - k_mean; dk * dk }).sum::<f32>();
536 let trend_slope_raw = if ss_kk > 1e-9 {
537 (0..n)
538 .map(|i| (i as f32 - k_mean) * (residuals[i] - mean))
539 .sum::<f32>() / ss_kk
540 } else {
541 0.0
542 };
543 (trend_slope_raw / sigma_ref.max(1e-9)).abs()
544}
545
546// ── Tests ──────────────────────────────────────────────────────────────────
547
548#[cfg(test)]
549mod tests {
550 use super::*;
551
552 // ── rho sweep ────────────────────────────────────────────────────────
553
554 #[test]
555 fn rho_sweep_nominal_reproduces_table_iv() {
556 let result = run_rho_sweep();
557 let nom = result.nominal();
558 assert_eq!(nom.rho_scale, 1.0, "nominal rho_scale");
559 assert_eq!(nom.episode_count, NOM_EPISODES, "nominal episode count");
560 assert_eq!(nom.tp_count, NOM_TP, "nominal TP count");
561 // NOM_PRECISION = 0.736 is rounded in the paper; exact value is 64/87 ≈ 0.7356.
562 assert!((nom.precision - NOM_PRECISION).abs() < 1e-3,
563 "nominal precision: got {:.4}", nom.precision);
564 assert!((nom.recall - NOM_RECALL).abs() < 1e-3,
565 "nominal recall: got {:.4}", nom.recall);
566 assert!((nom.false_rate - NOM_FALSE_RATE).abs() < 1e-4,
567 "nominal false rate: got {:.5}", nom.false_rate);
568 }
569
570 #[test]
571 fn rho_sweep_has_eleven_steps() {
572 let result = run_rho_sweep();
573 assert_eq!(result.cells.len(), RHO_STEPS);
574 }
575
576 #[test]
577 fn rho_sweep_scales_span_085_to_115() {
578 let result = run_rho_sweep();
579 assert!((result.cells[0].rho_scale - 0.85).abs() < 1e-5, "first step");
580 assert!((result.cells[10].rho_scale - 1.15).abs() < 1e-5, "last step");
581 }
582
583 #[test]
584 fn rho_sweep_tighter_rho_more_episodes() {
585 let r = run_rho_sweep();
586 // Tighter ρ (s < 1) → more crossings → more episodes
587 let tight = r.cells[0].episode_count; // s=0.85
588 let wide = r.cells[10].episode_count; // s=1.15
589 assert!(tight > wide,
590 "tighter ρ must yield more episodes: tight={} wide={}", tight, wide);
591 }
592
593 #[test]
594 fn rho_sweep_wider_rho_higher_precision() {
595 let r = run_rho_sweep();
596 // Wider ρ → fewer FP → better precision
597 let prec_tight = r.cells[0].precision; // s=0.85
598 let prec_wide = r.cells[10].precision; // s=1.15
599 assert!(prec_wide > prec_tight,
600 "wider ρ must have higher precision: tight={:.3} wide={:.3}",
601 prec_tight, prec_wide);
602 }
603
604 #[test]
605 fn rho_sweep_wider_rho_lower_false_rate() {
606 let r = run_rho_sweep();
607 assert!(r.cells[10].false_rate < r.cells[0].false_rate,
608 "wider ρ must have lower false_rate");
609 }
610
611 #[test]
612 fn rho_sweep_tighter_rho_higher_recall() {
613 let r = run_rho_sweep();
614 // Tighter ρ catches more marginal events → higher recall
615 assert!(r.cells[0].recall >= r.cells[10].recall,
616 "tighter ρ must have recall >= wider ρ");
617 }
618
619 #[test]
620 fn rho_sweep_precisions_monotone_increasing() {
621 let r = run_rho_sweep();
622 for i in 1..RHO_STEPS {
623 assert!(r.cells[i].precision >= r.cells[i-1].precision - 0.01,
624 "precision must be non-decreasing (i={}) {:?} {:?}",
625 i, r.cells[i-1].precision, r.cells[i].precision);
626 }
627 }
628
629 // ── W_pred grid ──────────────────────────────────────────────────────
630
631 #[test]
632 fn wpred_grid_has_12_cells() {
633 let g = run_wpred_grid();
634 assert_eq!(g.cells.len(), WPRED_CELLS);
635 }
636
637 #[test]
638 fn wpred_nominal_cell_matches_table_iv() {
639 let g = run_wpred_grid();
640 let nom = g.nominal();
641 assert_eq!(nom.w_obs, 10, "nominal W_obs");
642 assert_eq!(nom.w_pred, 5, "nominal W_pred");
643 assert_eq!(nom.episode_count, NOM_EPISODES, "nominal episodes");
644 assert!((nom.precision - NOM_PRECISION).abs() < 0.02,
645 "nominal precision: {:.4}", nom.precision);
646 }
647
648 #[test]
649 fn wpred_wider_pred_higher_precision() {
650 let g = run_wpred_grid();
651 // For W_obs=10 (row 1): W_pred=10 should have higher precision than W_pred=3
652 let w10_pred3 = g.cells[4].precision; // row 1 (W_obs=10), col 0 (W_pred=3)
653 let w10_pred10 = g.cells[7].precision; // row 1 (W_obs=10), col 3 (W_pred=10)
654 assert!(w10_pred10 > w10_pred3,
655 "wider W_pred should give higher precision metric: {} vs {}",
656 w10_pred10, w10_pred3);
657 }
658
659 #[test]
660 fn wpred_episode_count_stable_across_w_pred() {
661 let g = run_wpred_grid();
662 // For W_obs=10: episode count must be same for all W_pred
663 let ep0 = g.cells[4].episode_count;
664 for col in 0..4 {
665 assert_eq!(g.cells[4 + col].episode_count, ep0,
666 "episode count must not change with W_pred");
667 }
668 }
669
670 // ── Config grid ──────────────────────────────────────────────────────
671
672 #[test]
673 fn config_grid_has_27_cells() {
674 let g = run_config_grid();
675 assert_eq!(g.cells.len(), CONFIG_CELLS);
676 }
677
678 #[test]
679 fn config_grid_nominal_matches_table_iv() {
680 let g = run_config_grid();
681 let nom = g.nominal();
682 assert_eq!(nom.w, 10, "nominal W");
683 assert_eq!(nom.k, 4, "nominal K");
684 assert!((nom.tau - 2.0).abs() < 0.01, "nominal tau");
685 assert!((nom.precision - NOM_PRECISION).abs() < 1e-4,
686 "nominal precision: {:.4}", nom.precision);
687 assert!((nom.recall - NOM_RECALL).abs() < 1e-3,
688 "nominal recall: {:.4}", nom.recall);
689 }
690
691 #[test]
692 fn config_grid_looser_k_lower_precision() {
693 let g = run_config_grid();
694 // W=10, τ=2.0: K=2 should have lower precision than K=4
695 // In our ordering (W slow, K second, τ fast): W=10 is group 1 (9..18)
696 // K=2,τ=2.0 is idx 9, K=4,τ=2.0 is idx 15
697 let k2_p = g.cells[9].precision; // W=10, K=2, τ=2.0
698 let k4_p = g.cells[15].precision; // W=10, K=4, τ=2.0
699 assert!(k4_p > k2_p,
700 "K=4 should have higher precision than K=2: {} vs {}", k4_p, k2_p);
701 }
702
703 #[test]
704 fn config_grid_stricter_tau_higher_precision() {
705 let g = run_config_grid();
706 // W=10, K=4: τ=3.0 should have higher precision than τ=2.0
707 // W=10,K=4,τ=2.0 is idx 15; W=10,K=4,τ=3.0 is idx 17
708 let tau20 = g.cells[15].precision;
709 let tau30 = g.cells[17].precision;
710 assert!(tau30 > tau20,
711 "τ=3.0 must have higher precision than τ=2.0: {} vs {}",
712 tau30, tau20);
713 }
714
715 #[test]
716 fn config_grid_all_precisions_in_valid_range() {
717 let g = run_config_grid();
718 for c in &g.cells {
719 assert!(c.precision >= 0.0 && c.precision <= 1.0,
720 "precision out of range: {}", c.precision);
721 assert!(c.recall >= 0.0 && c.recall <= 1.0,
722 "recall out of range: {}", c.recall);
723 assert!(c.episode_count > 0, "episode count must be > 0");
724 }
725 }
726
727 // ── Calibration window integrity ─────────────────────────────────────
728
729 #[test]
730 fn clean_window_passes_integrity() {
731 // Generate a clean stationary residual window: near-constant ~0.05
732 let residuals = [0.048_f32, 0.052, 0.049, 0.051, 0.050, 0.049, 0.051,
733 0.050, 0.050, 0.052, 0.048, 0.049, 0.051, 0.050, 0.050,
734 0.051, 0.049, 0.050, 0.052, 0.048];
735 let result = check_calibration_window(&residuals, 0.002);
736 assert!(!result.contamination_suspected,
737 "clean window must pass integrity: {:?}", result);
738 assert!(result.wss_pass, "clean window must pass WSS");
739 }
740
741 #[test]
742 fn drifting_window_triggers_contamination() {
743 // Monotonically drifting window (systematic trend = contamination)
744 let residuals: [f32; 20] = core::array::from_fn(|i| 0.04 + i as f32 * 0.005);
745 let result = check_calibration_window(&residuals, 0.002);
746 assert!(result.contamination_suspected,
747 "drifting window must trigger contamination flag");
748 assert!(result.trend_slope_sigma >= 0.02,
749 "trend slope must exceed threshold: {}", result.trend_slope_sigma);
750 }
751
752 #[test]
753 fn high_autocorr_triggers_contamination() {
754 // Highly autocorrelated: slow sinusoidal variation
755 let residuals: [f32; 20] = core::array::from_fn(|i| {
756 let t = i as f32 * 0.4;
757 0.05 + 0.03 * crate::impairment::sin_approx(t)
758 });
759 let result = check_calibration_window(&residuals, 0.002);
760 // Either high autocorr or high variance ratio should trigger
761 assert!(result.contamination_suspected
762 || result.variance_ratio > 1.5,
763 "autocorrelated window should trigger contamination: {:?}", result);
764 }
765
766 #[test]
767 fn insufficient_window_returns_contamination_suspected() {
768 let tiny = [0.05_f32, 0.06, 0.04];
769 let r = check_calibration_window(&tiny, 0.002);
770 assert!(r.contamination_suspected, "window < 4 must flag contamination");
771 assert!(!r.wss_pass);
772 }
773
774 #[test]
775 fn offset_window_triggers_normalized_mean_dev() {
776 // Realistic calibration contamination: interference onset WITHIN the
777 // calibration window causes a step-change in the residual mean.
778 // First half: healthy baseline ~0.050; second half: elevated ~0.120.
779 // Half-window drift = |0.120 - 0.050| / 0.002 = 35.0 >> 2.0 threshold.
780 let mut residuals = [0.050_f32; 20];
781 for r in &mut residuals[10..] {
782 *r = 0.120;
783 }
784 let r = check_calibration_window(&residuals, 0.002);
785 assert!(r.normalized_mean_dev >= 2.0,
786 "step-change contamination must exceed normalized_mean_dev threshold: {}",
787 r.normalized_mean_dev);
788 assert!(r.contamination_suspected,
789 "step-change calibration window must be flagged as contaminated");
790 }
791
792 #[test]
793 fn high_variance_window_triggers_variance_ratio() {
794 // Wide variance: impulsive corruption in calibration window
795 let mut residuals = [0.05_f32; 20];
796 residuals[5] = 0.25; // impulsive spike
797 residuals[12] = 0.30;
798 let r = check_calibration_window(&residuals, 0.002);
799 assert!(r.contamination_suspected,
800 "high-variance window must trigger contamination");
801 }
802
803 // ── Swarm baseline sanity ──────────────────────────────────────────────
804
805 #[test]
806 fn swarm_unanimous_baseline_agreement() {
807 let rho_vals = [0.100f32, 0.101, 0.099, 0.100, 0.102];
808 let alert = swarm_baseline_sanity_check(&rho_vals, 3.0);
809 assert!(matches!(alert, BaselineConsensusAlert::Agreed { .. }),
810 "nearly-identical rho values should flag Agreed");
811 }
812
813 #[test]
814 fn swarm_disagreement_triggers_unreliable_alert() {
815 // Two nodes calibrated on healthy signal, three on contaminated window
816 let rho_vals = [0.10f32, 0.10, 0.25, 0.28, 0.27];
817 let alert = swarm_baseline_sanity_check(&rho_vals, 3.0);
818 assert!(matches!(alert, BaselineConsensusAlert::UnreliableBaseline { .. }),
819 "wide rho spread must flag UnreliableBaseline, got: {alert:?}");
820 }
821}
822
823// ── Swarm Baseline Sanity Check ──────────────────────────────────────────────
824//
825// DEFENCE: "Bootstrap Paradox" (paper §XIX-B).
826//
827// If a system calibrates while an adversary performs a "Low-and-Slow" spoofing
828// attack, the jammer becomes the admissible baseline. The engine then treats
829// the "Signal" as a "Violation".
830//
831// Defence: require M=5 (or more) swarm nodes to agree on "healthy" before
832// locking calibration. If nodes disagree on ρ beyond the spread tolerance,
833// calibration is refused and `UnreliableBaseline` is issued.
834//
835// The Wiener-Khinchin stationarity check (`check_calibration_window` above)
836// is the primary per-node defence. This function operates at the swarm level.
837
838/// Decision issued by the swarm-level baseline sanity check.
839///
840/// See `swarm_baseline_sanity_check()` for usage.
841///
842/// ## Relationship to paper §XIX-B (Bootstrap Paradox defence)
843///
844/// Calibration MUST NOT lock until `Agreed` is returned and every participating
845/// node also passes its individual `check_calibration_window()` check (WSS
846/// pre-condition, §XII).
847#[derive(Debug, Clone, Copy, PartialEq)]
848pub enum BaselineConsensusAlert {
849 /// All participating nodes agree on a healthy baseline.
850 /// Calibration may proceed after individual WSS checks pass.
851 Agreed {
852 /// Number of nodes that participated in the consensus.
853 node_count: u8,
854 /// Median ρ across all nodes.
855 median_rho: f32,
856 /// Robust spread (1.4826 × MAD of ρ values).
857 robust_sigma_rho: f32,
858 },
859 /// Node disagreement exceeds the spread tolerance.
860 ///
861 /// CALIBRATION MUST NOT LOCK until this is resolved.
862 /// Possible causes: Low-and-Slow spoofing contaminating one or more
863 /// calibration windows; hardware fault on a node; severe multipath.
864 UnreliableBaseline {
865 /// Nodes whose ρ is within tolerance of the median.
866 agreeing: u8,
867 /// Nodes whose ρ deviates beyond tolerance.
868 disagreeing: u8,
869 /// Robust spread (1.4826 × MAD) of ρ values across all nodes.
870 rho_spread: f32,
871 },
872}
873
874/// Compute a swarm-level baseline sanity check from a set of per-node ρ values.
875///
876/// Uses the robust MAD estimator (consistent with the BFT Byzantine filtering
877/// in `swarm_consensus.rs`) to flag disagreement. A node whose ρ deviates
878/// by more than `spread_tolerance_sigma` × (1.4826 · MAD) from the swarm
879/// median is counted as disagreeing.
880///
881/// Returns `BaselineConsensusAlert::UnreliableBaseline` if the number of
882/// disagreeing nodes exceeds 0 (conservative: any node disagreement triggers
883/// the alert when N ≤ 10) or if `rho_values` is empty.
884///
885/// ## Arguments
886///
887/// * `rho_values` — envelope radii ρ from participating nodes (up to 32).
888/// * `spread_tolerance_sigma` — MAD-sigma tolerance for agreement.
889/// Default 3.0 (consistent with GUM k=3 coverage, paper §VII-A Theorem 2).
890///
891/// # Examples
892///
893/// ```
894/// use dsfb_rf::calibration::{swarm_baseline_sanity_check, BaselineConsensusAlert};
895/// // 5 nodes all see ρ ≈ 0.100 — healthy consensus
896/// let rho = [0.100f32, 0.101, 0.099, 0.100, 0.102];
897/// let alert = swarm_baseline_sanity_check(&rho, 3.0);
898/// assert!(matches!(alert, BaselineConsensusAlert::Agreed { .. }));
899/// ```
900pub fn swarm_baseline_sanity_check(
901 rho_values: &[f32],
902 spread_tolerance_sigma: f32,
903) -> BaselineConsensusAlert {
904 let n = rho_values.len();
905 if n == 0 {
906 return BaselineConsensusAlert::UnreliableBaseline {
907 agreeing: 0,
908 disagreeing: 0,
909 rho_spread: 0.0,
910 };
911 }
912
913 let n_clamped = n.min(32);
914 let median = compute_rho_median(rho_values, n_clamped);
915 let robust_sigma = compute_rho_robust_sigma(rho_values, n_clamped, median);
916 let threshold = spread_tolerance_sigma * robust_sigma;
917 let (agreeing, disagreeing) = tally_agreement(rho_values, n_clamped, median, threshold);
918
919 if disagreeing == 0 {
920 BaselineConsensusAlert::Agreed {
921 node_count: n_clamped as u8,
922 median_rho: median,
923 robust_sigma_rho: robust_sigma,
924 }
925 } else {
926 BaselineConsensusAlert::UnreliableBaseline {
927 agreeing,
928 disagreeing,
929 rho_spread: robust_sigma,
930 }
931 }
932}
933
934fn insertion_sort_fixed(buf: &mut [f32; 32], len: usize) {
935 for i in 1..len {
936 let key = buf[i];
937 let mut j = i;
938 while j > 0 && buf[j - 1] > key {
939 buf[j] = buf[j - 1];
940 j -= 1;
941 }
942 buf[j] = key;
943 }
944}
945
946fn median_sorted(sorted: &[f32; 32], len: usize) -> f32 {
947 if len % 2 == 1 { sorted[len / 2] }
948 else { (sorted[len / 2 - 1] + sorted[len / 2]) * 0.5 }
949}
950
951fn compute_rho_median(rho_values: &[f32], n_clamped: usize) -> f32 {
952 let mut scratch = [0.0f32; 32];
953 for i in 0..n_clamped { scratch[i] = rho_values[i]; }
954 insertion_sort_fixed(&mut scratch, n_clamped);
955 median_sorted(&scratch, n_clamped)
956}
957
958fn compute_rho_robust_sigma(rho_values: &[f32], n_clamped: usize, median: f32) -> f32 {
959 let mut abs_devs = [0.0f32; 32];
960 for i in 0..n_clamped {
961 let d = rho_values[i] - median;
962 abs_devs[i] = if d < 0.0 { -d } else { d };
963 }
964 insertion_sort_fixed(&mut abs_devs, n_clamped);
965 let mad = median_sorted(&abs_devs, n_clamped);
966 const MAD_SCALE: f32 = 1.482_602_2;
967 (MAD_SCALE * mad).max(1e-9_f32)
968}
969
970fn tally_agreement(rho_values: &[f32], n_clamped: usize, median: f32, threshold: f32) -> (u8, u8) {
971 let mut agreeing: u8 = 0;
972 let mut disagreeing: u8 = 0;
973 for i in 0..n_clamped {
974 let d = rho_values[i] - median;
975 let dev = if d < 0.0 { -d } else { d };
976 if dev <= threshold {
977 agreeing = agreeing.saturating_add(1);
978 } else {
979 disagreeing = disagreeing.saturating_add(1);
980 }
981 }
982 (agreeing, disagreeing)
983}