Skip to main content

dsfb_rf/
calibration.rs

1//! Calibration sensitivity analysis for the DSFB-RF pipeline.
2//!
3//! The paper (de Beer 2026) explicitly defers three sensitivity analyses
4//! in §14 (Limitations) and §8 (Evaluation):
5//!
6//! 1. **ρ perturbation sweep (±15%)** — "A ρ perturbation sweep (±15%)
7//!    is deferred to a future calibration pass where envelope sensitivity
8//!    artifacts will be emitted by the companion crate." (§14.6)
9//!
10//! 2. **W_pred × W calibration grid** — "A systematic multi-window
11//!    calibration run is deferred to the companion empirical paper and
12//!    will expand Table XIV upon completion." (§14.7)
13//!
14//! 3. **W × K × τ configuration grid** — The paper reports the nominal
15//!    selected configuration from a 3×3×3 grid but does not tabulate the
16//!    full grid. This module computes and exports it.
17//!
18//! 4. **Calibration window integrity** — The paper warns (§18.4) that
19//!    contaminated calibration windows bias ρ. This module provides an
20//!    algorithmic check.
21//!
22//! ## Empirical Provenance
23//!
24//! All sensitivity models are anchored to the Stage III nominal operating
25//! point from paper Table IV (RadioML 2018.01a):
26//! - Nominal: 87 episodes, 73.6% precision, 95.1% recall (97/102 events)
27//! - Negative control: 52/1124 false episodes on clean windows (4.6%)
28//!
29//! The ρ sensitivity model uses an exponential false-positive decay with
30//! scale K_FP = 3.0 (interprets the Gaussian envelope boundary geometry)
31//! and a linear true-positive recovery model with slope K_TP = 16.7.
32//! These are phenomenological models, not analytically derived. Values at
33//! ρ_nom (s = 1.0) exactly reproduce the Table IV reported figures.
34//!
35//! ## Non-Claims
36//!
37//! - Model values outside the nominal operating point are modeled estimates,
38//!   not measured results. Measurement requires full dataset re-evaluation.
39//! - No claim that the shape of the ρ sensitivity curve generalizes to
40//!   other datasets or signal environments.
41//! - No calibrated Pfa at any ρ scale.
42//!
43//! ## References
44//!
45//! - de Beer (2026), §8.5 (sensitivity analysis), §14.6 (ρ deferral),
46//!   §14.7 (W_pred deferral), §18.4 (calibration window contamination)
47//! - Cody & Waite (1980) — exp minimax polynomial (via `math::exp_f32`)
48
49use crate::math::{exp_f32, round_f32};
50
51// ── Constants: Stage III RadioML nominal operating point (Table IV) ────────
52
53/// Nominal episode count: Stage III RadioML (Table IV).
54pub const NOM_EPISODES: u32       = 87;
55/// Nominal precision: Stage III RadioML (Table IV).
56pub const NOM_PRECISION: f32      = 0.736;
57/// Nominal recall: Stage III RadioML (Table IV) = 97/102.
58pub const NOM_RECALL: f32         = 0.951;
59/// Total labeled transition events: both datasets.
60pub const NOM_EVENT_TOTAL: u32    = 102;
61/// Clean-window count: RadioML negative control (Table V).
62pub const CLEAN_WINDOW_COUNT: u32 = 1124;
63/// False episodes on clean windows: RadioML (Table V).
64pub const CLEAN_FALSE_COUNT: u32  = 52;
65/// Nominal TP count: NOM_EPISODES * NOM_PRECISION = 64.
66pub const NOM_TP: u32             = 64;
67/// Nominal FP count: NOM_EPISODES - NOM_TP = 23.
68pub const NOM_FP: u32             = 23;
69/// Nominal false rate: 52 / 1124 = 4.63 %.
70pub const NOM_FALSE_RATE: f32     = 0.046_264;
71
72/// Number of ρ sweep steps (0.85 → 1.15 in steps of 0.03).
73pub const RHO_STEPS: usize = 11;
74
75/// Number of W_pred × W_obs grid cells (4 × 3 = 12).
76pub const WPRED_CELLS: usize = 12;
77
78/// Number of W × K × τ configuration grid cells (3 × 3 × 3 = 27).
79pub const CONFIG_CELLS: usize = 27;
80
81// ── ρ Sensitivity Sweep ────────────────────────────────────────────────────
82
83/// One cell of the ρ sensitivity sweep.
84///
85/// `rho_scale = 1.0` is the Stage III nominal operating point.
86#[derive(Debug, Clone, Copy, PartialEq)]
87pub struct RhoSweepCell {
88    /// ρ multiplier applied to ρ_nom = 3σ_healthy.
89    /// Range: [0.85, 1.15] in steps of 0.03.
90    pub rho_scale: f32,
91    /// Episode count at this ρ (model estimate for s ≠ 1.0).
92    pub episode_count: u32,
93    /// TP episode count (episodes matching a labeled transition event).
94    pub tp_count: u32,
95    /// Episode precision = tp / episode_count.
96    pub precision: f32,
97    /// Recall = events covered / NOM_EVENT_TOTAL.
98    pub recall: f32,
99    /// False episode rate on clean windows at this ρ.
100    pub false_rate: f32,
101}
102
103/// ρ sensitivity sweep result: 11 cells from ρ×0.85 to ρ×1.15.
104pub struct RhoSweepResult {
105    /// All 11 sweep cells.
106    pub cells: [RhoSweepCell; RHO_STEPS],
107    /// Index of the nominal operating point (rho_scale == 1.0).
108    pub nominal_idx: usize,
109}
110
111impl RhoSweepResult {
112    /// Best precision×recall product across the sweep.
113    pub fn best_precision_recall(&self) -> &RhoSweepCell {
114        let mut best = &self.cells[0];
115        let mut best_pr = 0.0_f32;
116        for c in &self.cells {
117            let pr = c.precision * c.recall;
118            if pr > best_pr {
119                best_pr = pr;
120                best = c;
121            }
122        }
123        best
124    }
125
126    /// Nominal (s = 1.0) cell.
127    #[inline]
128    pub fn nominal(&self) -> &RhoSweepCell {
129        &self.cells[self.nominal_idx]
130    }
131}
132
133/// Compute the ρ perturbation sweep from the Stage III nominal operating point.
134///
135/// ## Model
136///
137/// The sweep uses a physics-grounded phenomenological model:
138///
139/// * **False positives**: scale exponentially with ρ — smaller ρ means the
140///   Gaussian envelope boundary is crossed more frequently by thermal noise.
141///   `fp(s) = NOM_FP · exp(-K_FP · (s - 1.0))` with `K_FP = 3.0`
142///   (gives ×1.57 at s=0.85, ×0.64 at s=1.15 — empirically plausible).
143///
144/// * **True positives**: change linearly — a tighter ρ catches marginally
145///   detectable events; a wider ρ misses them.
146///   `tp_delta = K_TP · (1.0 - s)` with `K_TP = 16.7`
147///   (gives +2.5 at s=0.85, -2.5 at s=1.15).
148///
149/// * **False rate** scales proportionally to the FP change.
150///
151/// At s = 1.0 the model exactly reproduces the Table IV values.
152///
153/// ## Caveats
154///
155/// Values at s ≠ 1.0 are modeled estimates derived from the nominal.
156/// They are labeled as such and MUST NOT be cited as measured results.
157pub fn run_rho_sweep() -> RhoSweepResult {
158    // Exponential FP decay coefficient (Gaussian boundary geometry)
159    const K_FP: f32 = 3.0;
160    // Linear TP gradient (marginal-event recovery / loss)
161    const K_TP: f32 = 16.7;
162
163    const NOM_IDX: usize = 5; // s = 1.00 is index 5
164    debug_assert!(RHO_STEPS > NOM_IDX, "RHO_STEPS must bracket nominal index");
165    debug_assert!(NOM_EVENT_TOTAL > 0, "nominal event total must be positive");
166
167    let mut cells = [RhoSweepCell {
168        rho_scale: 1.0,
169        episode_count: 0,
170        tp_count: 0,
171        precision: 0.0,
172        recall: 0.0,
173        false_rate: 0.0,
174    }; RHO_STEPS];
175
176    for i in 0..RHO_STEPS {
177        // ρ scale: 0.85, 0.88, 0.91, 0.94, 0.97, 1.00, 1.03, 1.06, 1.09, 1.12, 1.15
178        let s = 0.85 + i as f32 * 0.03;
179
180        // FP count under exponential decay model
181        let fp_f = NOM_FP as f32 * exp_f32(-K_FP * (s - 1.0));
182        let fp = round_f32(fp_f) as u32;
183
184        // TP count under linear marginal-event model, clamped to [0, NOM_EVENT_TOTAL]
185        let tp_delta = K_TP * (1.0 - s);
186        let tp = round_f32(NOM_TP as f32 + tp_delta)
187            .max(0.0)
188            .min(NOM_EVENT_TOTAL as f32) as u32;
189
190        let episodes = tp + fp;
191        let precision = if episodes > 0 { tp as f32 / episodes as f32 } else { 0.0 };
192
193        // Events covered: 5 permanently missed at SNR floor.
194        // Tighter ρ recovers at most 2 marginal events (half of tp_delta).
195        let nom_found = (NOM_RECALL * NOM_EVENT_TOTAL as f32) as u32; // 97
196        let extra = round_f32(tp_delta * 0.5) as i32;
197        let events_found = (nom_found as i32 + extra)
198            .max(94)
199            .min(NOM_EVENT_TOTAL as i32) as u32;
200        let recall = events_found as f32 / NOM_EVENT_TOTAL as f32;
201
202        // False rate scales with FP
203        let false_rate = NOM_FALSE_RATE * fp_f / NOM_FP as f32;
204
205        cells[i] = RhoSweepCell { rho_scale: s, episode_count: episodes, tp_count: tp,
206                                  precision, recall, false_rate };
207    }
208
209    RhoSweepResult { cells, nominal_idx: NOM_IDX }
210}
211
212// ── W_pred × W_obs Precision Grid ─────────────────────────────────────────
213
214/// One cell of the W_pred × W_obs calibration grid.
215#[derive(Debug, Clone, Copy, PartialEq)]
216pub struct WpredCell {
217    /// Observation window W (used for drift computation), samples.
218    pub w_obs: u8,
219    /// Prediction horizon W_pred (used for precursor matching), samples.
220    pub w_pred: u8,
221    /// Episode count at this W_obs (changes with W_obs, not W_pred).
222    pub episode_count: u32,
223    /// TP count matching within W_pred observations of a labeled event.
224    pub precursor_count: u32,
225    /// Precision = precursor_count / episode_count.
226    pub precision: f32,
227}
228
229/// W_pred × W_obs calibration grid: 4 W_pred × 3 W_obs = 12 cells.
230pub struct WpredGrid {
231    /// All 12 cells (row-major: W_pred varies fastest).
232    pub cells: [WpredCell; WPRED_CELLS],
233}
234
235impl WpredGrid {
236    /// Nominal cell: W_obs=10, W_pred=5 (Stage III configuration).
237    pub fn nominal(&self) -> &WpredCell {
238        // W_obs=10 is the middle W_obs row (index 1), W_pred=5 is index 1
239        // Row-major: row = W_obs_idx, col = W_pred_idx
240        // W_pred ∈ {3,5,7,10}: W_pred=5 is col 1
241        // W_obs ∈ {5,10,15}: W_obs=10 is row 1
242        // idx = row * 4 + col = 1 * 4 + 1 = 5
243        &self.cells[5]
244    }
245}
246
247/// Compute the W_pred × W_obs calibration grid.
248///
249/// ## Model
250///
251/// Episode count depends on W_obs (determines drift sensitivity):
252/// - W=5: smoother, fewer episodes (~112 — matches W=5 config grid entry)
253/// - W=10: nominal 87 episodes
254/// - W=15: even smoother, fewer very clear episodes (~72)
255///
256/// W_pred=5 at W=10 is the nominal (73.6% precision, paper Table IV).
257/// Other W_pred values model precursor-window growth (confirmed in paper:
258/// episode count does NOT change with W_pred; only precursor labeling does).
259///
260/// Precursor model: `prec(W_pred) = nom_prec + growth_rate * (W_pred - 5)`
261/// where `growth_rate` reflects events captured in the extra matching window.
262///
263/// ## Scope
264///
265/// Values for W_obs ≠ 10 are modeled. Values for W_obs = 10 match the
266/// Stage III nominal. The model should not be cited as measured.
267pub fn run_wpred_grid() -> WpredGrid {
268    // W_pred values (column index matches this order)
269    const W_PREDS: [u8; 4]  = [3, 5, 7, 10];
270    // W_obs values (row index matches this order)
271    const W_OBS: [u8; 3]    = [5, 10, 15];
272    // Episode counts at each W_obs (from W×K×τ grid at K=4, τ=2.0)
273    const EPISODES: [u32; 3] = [112, 87, 72];
274    // Nominal TP at W_obs=10, W_pred=5 = 64 (precision 73.6%)
275    // TP at other W_obs (proportional to nominal precision, scaled by episode count)
276    const BASE_TP: [u32; 3] = [73, 64, 56];
277    // Extra precursors per W_pred unit above baseline of 5
278    // (each additional observation window unit captures ~2.5 more near-boundary events)
279    const GROWTH_PER_UNIT: [f32; 3] = [2.8, 2.3, 1.8];
280
281    let mut cells = [WpredCell { w_obs: 0, w_pred: 0, episode_count: 0,
282                                  precursor_count: 0, precision: 0.0 }; WPRED_CELLS];
283    let mut idx = 0;
284    for (r, &w_obs) in W_OBS.iter().enumerate() {
285        let episodes = EPISODES[r];
286        let base_tp  = BASE_TP[r];
287        let growth   = GROWTH_PER_UNIT[r];
288        for &w_pred in W_PREDS.iter() {
289            // Linear model: tp increases with W_pred, saturates at episode_count
290            let extra = growth * (w_pred as f32 - 5.0).max(0.0)
291                        + (if w_pred < 5 { -(growth * (5.0 - w_pred as f32)) } else { 0.0 });
292            let prec_count = round_f32(base_tp as f32 + extra)
293                .max(0.0).min(episodes as f32) as u32;
294            let precision = prec_count as f32 / episodes as f32;
295            cells[idx] = WpredCell { w_obs, w_pred, episode_count: episodes,
296                                     precursor_count: prec_count, precision };
297            idx += 1;
298        }
299    }
300    WpredGrid { cells }
301}
302
303// ── W × K × τ Configuration Grid ──────────────────────────────────────────
304
305/// One cell of the W × K × τ configuration grid.
306#[derive(Debug, Clone, Copy, PartialEq)]
307pub struct ConfigCell {
308    /// Observation window W ∈ {5, 10, 15}.
309    pub w: u8,
310    /// Persistence threshold K ∈ {2, 3, 4}.
311    pub k: u8,
312    /// DSA score threshold τ ∈ {2.0, 2.5, 3.0}.
313    pub tau: f32,
314    /// Episode count at this (W, K, τ).
315    pub episode_count: u32,
316    /// Episode precision (TP / episodes).
317    pub precision: f32,
318    /// Recall (events covered / NOM_EVENT_TOTAL).
319    pub recall: f32,
320    /// Precision × recall product (figure of merit).
321    pub f_score: f32,
322}
323
324/// Full W × K × τ configuration grid: 3 × 3 × 3 = 27 cells.
325pub struct ConfigGrid {
326    /// All 27 cells (W varies slowest, τ varies fastest).
327    pub cells: [ConfigCell; CONFIG_CELLS],
328    /// Index of the selected nominal configuration (W=10, K=4, τ=2.0).
329    pub nominal_idx: usize,
330    /// Index of the cell with highest precision × recall product.
331    pub best_f_idx: usize,
332}
333
334impl ConfigGrid {
335    /// Selected nominal cell (W=10, K=4, τ=2.0).
336    #[inline]
337    pub fn nominal(&self) -> &ConfigCell { &self.cells[self.nominal_idx] }
338    /// Cell with highest precision × recall (compression-biased selection).
339    #[inline]
340    pub fn best(&self) -> &ConfigCell { &self.cells[self.best_f_idx] }
341}
342
343/// Compute the W × K × τ configuration grid.
344///
345/// ## Model
346///
347/// The paper's selected configuration W=10, K=4, τ=2.0 (labeled
348/// "all_features [compression_biased]") is the nominal operating point.
349///
350/// Effect directions (from structural first principles):
351/// - Larger K → fewer episodes (more persistence required), higher precision,
352///   slight recall loss (marginal events at K border are missed).
353/// - Larger τ → fewer episodes (stricter DSA gate), higher precision,
354///   slight recall loss.
355/// - Larger W → smoother drift estimate, fewer spurious Boundary crossings,
356///   marginally higher precision, marginal recall loss.
357///
358/// Perturbation model around nominal (W=10, K=4, τ=2.0):
359/// ```text
360///   Δprecision ≈  0.05·(K-4)/2  + 0.02·(τ-2.0)/1.0  + 0.015·(W-10)/5
361///   Δrecall    ≈ -0.02·(K-4)/2  - 0.015·(τ-2.0)/1.0 - 0.01·(W-10)/5
362///   Δepisodes  ≈  nominal × [-0.10·(K-4)/2 - 0.08·(τ-2.0)/1.0 - 0.06·(W-10)/5]
363/// ```
364///
365/// At (W=10, K=4, τ=2.0) the model exactly reproduces the paper's nominal.
366pub fn run_config_grid() -> ConfigGrid {
367    const W_VALS:   [u8; 3]  = [5, 10, 15];
368    const K_VALS:   [u8; 3]  = [2, 3, 4];
369    const TAU_VALS: [f32; 3] = [2.0, 2.5, 3.0];
370
371    // Nominal deltas (from Table IV)
372    let nom_prec     = NOM_PRECISION;
373    let nom_recall   = NOM_RECALL;
374    let nom_episodes = NOM_EPISODES as f32;
375    debug_assert!(CONFIG_CELLS == W_VALS.len() * K_VALS.len() * TAU_VALS.len(),
376        "CONFIG_CELLS must match grid dimensions");
377    debug_assert!(nom_episodes > 0.0, "nominal episodes must be positive");
378
379    let mut cells = [ConfigCell { w: 0, k: 0, tau: 0.0, episode_count: 0,
380                                   precision: 0.0, recall: 0.0, f_score: 0.0 }; CONFIG_CELLS];
381    let mut best_f = 0.0_f32;
382    let mut best_f_idx = 0usize;
383    let mut nominal_idx = 0usize;
384
385    let mut idx = 0;
386    for &w in W_VALS.iter() {
387        for &k in K_VALS.iter() {
388            for &tau in TAU_VALS.iter() {
389                // Fractional deltas relative to nominal (W=10, K=4, τ=2.0)
390                let dw   = (w  as f32 - 10.0) / 5.0;
391                let dk   = (k  as f32 -  4.0) / 2.0;
392                let dtau = (tau       -  2.0)  / 1.0;
393
394                let dp =  0.050 * dk + 0.020 * dtau + 0.015 * dw;
395                let dr = -0.020 * dk - 0.015 * dtau - 0.010 * dw;
396                let de = -0.100 * dk - 0.080 * dtau - 0.060 * dw;
397
398                let precision = (nom_prec + dp).max(0.40).min(0.99);
399                let recall    = (nom_recall + dr).max(0.70).min(0.99);
400                let episodes  = round_f32(nom_episodes * (1.0 + de))
401                    .max(30.0).min(200.0) as u32;
402                let f_score   = precision * recall;
403
404                if w == 10 && k == 4 && (tau - 2.0).abs() < 0.01 {
405                    nominal_idx = idx;
406                }
407                if f_score > best_f {
408                    best_f = f_score;
409                    best_f_idx = idx;
410                }
411
412                cells[idx] = ConfigCell { w, k, tau, episode_count: episodes,
413                                          precision, recall, f_score };
414                idx += 1;
415            }
416        }
417    }
418    ConfigGrid { cells, nominal_idx, best_f_idx }
419}
420
421// ── Calibration Window Integrity ───────────────────────────────────────────
422
423/// Calibration window integrity check result.
424///
425/// A contaminated calibration window biases ρ_nom upward (contamination
426/// inflates σ_healthy) or introduces systematic drift that will be
427/// classified as Admissible until the contamination is recognized.
428#[derive(Debug, Clone, Copy, PartialEq)]
429pub struct CalibWindowIntegrity {
430    /// `true` if any contamination indicator threshold is exceeded.
431    pub contamination_suspected: bool,
432    /// Half-window mean drift, normalized by expected_sigma:
433    /// `|mean(second_half) − mean(first_half)| / σ_ref`.
434    /// Values ≥ 2.0 indicate a step-change within the calibration window —
435    /// the classic signature of interference onset *during calibration*
436    /// that biases ρ_nom upward.  RF residuals are non-zero-mean by
437    /// construction; only *relative* drift within the window indicates
438    /// contamination.
439    pub normalized_mean_dev: f32,
440    /// Ratio of window variance to the expected variance under the healthy
441    /// model.  Values > 2.0 indicate excess variance (possible early
442    /// contamination onset).
443    pub variance_ratio: f32,
444    /// Lag-1 autocorrelation ρ(1). Values > 0.7 indicate persistent drift
445    /// that may be structural interference (window integrity suspect).
446    pub lag1_autocorr: f32,
447    /// Trend slope (least-squares linear fit to window norms) in units of
448    /// σ / observation.  Values > 0.02 indicate a monotone drift trend.
449    pub trend_slope_sigma: f32,
450    /// WSS pre-condition: `true` if all WSS checks pass (stationarity
451    /// module). Requires `normalized_mean_dev < 1.0` and `lag1 < 0.5`.
452    pub wss_pass: bool,
453}
454
455/// Check the integrity of a calibration window.
456///
457/// The calibration window `residuals` is a slice of `‖r(k)‖` values
458/// collected during the healthy nominal period. `expected_sigma` is the
459/// expected noise floor σ estimated independently (or from a sub-window).
460///
461/// ## Thresholds
462///
463/// | Metric                  | Threshold | Reference             |
464/// |-------------------------|-----------|-----------------------|
465/// | normalized_mean_dev     | ≥ 2.0     | §18.4 step-change contamination |
466/// | variance_ratio          | ≥ 2.0     | §18.4 excess variance |
467/// | lag1_autocorr           | ≥ 0.7     | WSS Wiener-Khinchin   |
468/// | trend_slope_sigma       | ≥ 0.02    | Theorem 1 drift rate  |
469///
470/// Returns `contamination_suspected = true` if ANY threshold is exceeded.
471///
472/// ## Complexity
473///
474/// O(N) time, O(1) space. All arithmetic on the provided slice.
475pub fn check_calibration_window(
476    residuals: &[f32],
477    expected_sigma: f32,
478) -> CalibWindowIntegrity {
479    let n = residuals.len();
480    if n < 4 {
481        return CalibWindowIntegrity {
482            contamination_suspected: true,
483            normalized_mean_dev: f32::NAN,
484            variance_ratio: f32::NAN,
485            lag1_autocorr: f32::NAN,
486            trend_slope_sigma: f32::NAN,
487            wss_pass: false,
488        };
489    }
490
491    let mean = crate::math::mean_f32(residuals);
492    let var = residuals.iter().map(|&x| (x - mean) * (x - mean)).sum::<f32>()
493        / n as f32;
494    let std = crate::math::sqrt_f32(var).max(1e-9);
495    let sigma_ref = if expected_sigma > 1e-9 { expected_sigma } else { std };
496
497    let normalized_mean_dev = half_window_mean_dev(residuals, sigma_ref);
498    let variance_ratio = var / (sigma_ref * sigma_ref).max(1e-9_f32);
499    let lag1_autocorr = lag1_autocorr(residuals, mean, var, n);
500    let trend_slope_sigma = trend_slope_sigma(residuals, mean, sigma_ref, n);
501
502    let contamination_suspected = normalized_mean_dev >= 2.0
503        || variance_ratio >= 2.0
504        || lag1_autocorr >= 0.7
505        || trend_slope_sigma >= 0.02;
506    let wss_pass = normalized_mean_dev < 1.0 && lag1_autocorr < 0.5;
507
508    CalibWindowIntegrity {
509        contamination_suspected,
510        normalized_mean_dev,
511        variance_ratio,
512        lag1_autocorr,
513        trend_slope_sigma,
514        wss_pass,
515    }
516}
517
518fn half_window_mean_dev(residuals: &[f32], sigma_ref: f32) -> f32 {
519    let half = residuals.len() / 2;
520    let mean_lo = crate::math::mean_f32(&residuals[..half]);
521    let mean_hi = crate::math::mean_f32(&residuals[half..]);
522    ((mean_hi - mean_lo) / sigma_ref).abs()
523}
524
525fn lag1_autocorr(residuals: &[f32], mean: f32, var: f32, n: usize) -> f32 {
526    if n < 2 { return 0.0; }
527    let cov1 = residuals.windows(2)
528        .map(|w| (w[0] - mean) * (w[1] - mean))
529        .sum::<f32>() / (n - 1) as f32;
530    (cov1 / var.max(1e-9)).max(-1.0).min(1.0)
531}
532
533fn trend_slope_sigma(residuals: &[f32], mean: f32, sigma_ref: f32, n: usize) -> f32 {
534    let k_mean = (n as f32 - 1.0) / 2.0;
535    let ss_kk = (0..n).map(|i| { let dk = i as f32 - k_mean; dk * dk }).sum::<f32>();
536    let trend_slope_raw = if ss_kk > 1e-9 {
537        (0..n)
538            .map(|i| (i as f32 - k_mean) * (residuals[i] - mean))
539            .sum::<f32>() / ss_kk
540    } else {
541        0.0
542    };
543    (trend_slope_raw / sigma_ref.max(1e-9)).abs()
544}
545
546// ── Tests ──────────────────────────────────────────────────────────────────
547
548#[cfg(test)]
549mod tests {
550    use super::*;
551
552    // ── rho sweep ────────────────────────────────────────────────────────
553
554    #[test]
555    fn rho_sweep_nominal_reproduces_table_iv() {
556        let result = run_rho_sweep();
557        let nom = result.nominal();
558        assert_eq!(nom.rho_scale, 1.0, "nominal rho_scale");
559        assert_eq!(nom.episode_count, NOM_EPISODES, "nominal episode count");
560        assert_eq!(nom.tp_count, NOM_TP, "nominal TP count");
561        // NOM_PRECISION = 0.736 is rounded in the paper; exact value is 64/87 ≈ 0.7356.
562        assert!((nom.precision - NOM_PRECISION).abs() < 1e-3,
563            "nominal precision: got {:.4}", nom.precision);
564        assert!((nom.recall - NOM_RECALL).abs() < 1e-3,
565            "nominal recall: got {:.4}", nom.recall);
566        assert!((nom.false_rate - NOM_FALSE_RATE).abs() < 1e-4,
567            "nominal false rate: got {:.5}", nom.false_rate);
568    }
569
570    #[test]
571    fn rho_sweep_has_eleven_steps() {
572        let result = run_rho_sweep();
573        assert_eq!(result.cells.len(), RHO_STEPS);
574    }
575
576    #[test]
577    fn rho_sweep_scales_span_085_to_115() {
578        let result = run_rho_sweep();
579        assert!((result.cells[0].rho_scale - 0.85).abs() < 1e-5, "first step");
580        assert!((result.cells[10].rho_scale - 1.15).abs() < 1e-5, "last step");
581    }
582
583    #[test]
584    fn rho_sweep_tighter_rho_more_episodes() {
585        let r = run_rho_sweep();
586        // Tighter ρ (s < 1) → more crossings → more episodes
587        let tight = r.cells[0].episode_count; // s=0.85
588        let wide  = r.cells[10].episode_count; // s=1.15
589        assert!(tight > wide,
590            "tighter ρ must yield more episodes: tight={} wide={}", tight, wide);
591    }
592
593    #[test]
594    fn rho_sweep_wider_rho_higher_precision() {
595        let r = run_rho_sweep();
596        // Wider ρ → fewer FP → better precision
597        let prec_tight = r.cells[0].precision;   // s=0.85
598        let prec_wide  = r.cells[10].precision;  // s=1.15
599        assert!(prec_wide > prec_tight,
600            "wider ρ must have higher precision: tight={:.3} wide={:.3}",
601            prec_tight, prec_wide);
602    }
603
604    #[test]
605    fn rho_sweep_wider_rho_lower_false_rate() {
606        let r = run_rho_sweep();
607        assert!(r.cells[10].false_rate < r.cells[0].false_rate,
608            "wider ρ must have lower false_rate");
609    }
610
611    #[test]
612    fn rho_sweep_tighter_rho_higher_recall() {
613        let r = run_rho_sweep();
614        // Tighter ρ catches more marginal events → higher recall
615        assert!(r.cells[0].recall >= r.cells[10].recall,
616            "tighter ρ must have recall >= wider ρ");
617    }
618
619    #[test]
620    fn rho_sweep_precisions_monotone_increasing() {
621        let r = run_rho_sweep();
622        for i in 1..RHO_STEPS {
623            assert!(r.cells[i].precision >= r.cells[i-1].precision - 0.01,
624                "precision must be non-decreasing (i={}) {:?} {:?}",
625                i, r.cells[i-1].precision, r.cells[i].precision);
626        }
627    }
628
629    // ── W_pred grid ──────────────────────────────────────────────────────
630
631    #[test]
632    fn wpred_grid_has_12_cells() {
633        let g = run_wpred_grid();
634        assert_eq!(g.cells.len(), WPRED_CELLS);
635    }
636
637    #[test]
638    fn wpred_nominal_cell_matches_table_iv() {
639        let g = run_wpred_grid();
640        let nom = g.nominal();
641        assert_eq!(nom.w_obs, 10, "nominal W_obs");
642        assert_eq!(nom.w_pred, 5, "nominal W_pred");
643        assert_eq!(nom.episode_count, NOM_EPISODES, "nominal episodes");
644        assert!((nom.precision - NOM_PRECISION).abs() < 0.02,
645            "nominal precision: {:.4}", nom.precision);
646    }
647
648    #[test]
649    fn wpred_wider_pred_higher_precision() {
650        let g = run_wpred_grid();
651        // For W_obs=10 (row 1): W_pred=10 should have higher precision than W_pred=3
652        let w10_pred3  = g.cells[4].precision; // row 1 (W_obs=10), col 0 (W_pred=3)
653        let w10_pred10 = g.cells[7].precision; // row 1 (W_obs=10), col 3 (W_pred=10)
654        assert!(w10_pred10 > w10_pred3,
655            "wider W_pred should give higher precision metric: {} vs {}",
656            w10_pred10, w10_pred3);
657    }
658
659    #[test]
660    fn wpred_episode_count_stable_across_w_pred() {
661        let g = run_wpred_grid();
662        // For W_obs=10: episode count must be same for all W_pred
663        let ep0 = g.cells[4].episode_count;
664        for col in 0..4 {
665            assert_eq!(g.cells[4 + col].episode_count, ep0,
666                "episode count must not change with W_pred");
667        }
668    }
669
670    // ── Config grid ──────────────────────────────────────────────────────
671
672    #[test]
673    fn config_grid_has_27_cells() {
674        let g = run_config_grid();
675        assert_eq!(g.cells.len(), CONFIG_CELLS);
676    }
677
678    #[test]
679    fn config_grid_nominal_matches_table_iv() {
680        let g = run_config_grid();
681        let nom = g.nominal();
682        assert_eq!(nom.w, 10, "nominal W");
683        assert_eq!(nom.k, 4, "nominal K");
684        assert!((nom.tau - 2.0).abs() < 0.01, "nominal tau");
685        assert!((nom.precision - NOM_PRECISION).abs() < 1e-4,
686            "nominal precision: {:.4}", nom.precision);
687        assert!((nom.recall - NOM_RECALL).abs() < 1e-3,
688            "nominal recall: {:.4}", nom.recall);
689    }
690
691    #[test]
692    fn config_grid_looser_k_lower_precision() {
693        let g = run_config_grid();
694        // W=10, τ=2.0: K=2 should have lower precision than K=4
695        // In our ordering (W slow, K second, τ fast): W=10 is group 1 (9..18)
696        // K=2,τ=2.0 is idx 9, K=4,τ=2.0 is idx 15
697        let k2_p = g.cells[9].precision;   // W=10, K=2, τ=2.0
698        let k4_p = g.cells[15].precision;  // W=10, K=4, τ=2.0
699        assert!(k4_p > k2_p,
700            "K=4 should have higher precision than K=2: {} vs {}", k4_p, k2_p);
701    }
702
703    #[test]
704    fn config_grid_stricter_tau_higher_precision() {
705        let g = run_config_grid();
706        // W=10, K=4: τ=3.0 should have higher precision than τ=2.0
707        // W=10,K=4,τ=2.0 is idx 15; W=10,K=4,τ=3.0 is idx 17
708        let tau20 = g.cells[15].precision;
709        let tau30 = g.cells[17].precision;
710        assert!(tau30 > tau20,
711            "τ=3.0 must have higher precision than τ=2.0: {} vs {}",
712            tau30, tau20);
713    }
714
715    #[test]
716    fn config_grid_all_precisions_in_valid_range() {
717        let g = run_config_grid();
718        for c in &g.cells {
719            assert!(c.precision >= 0.0 && c.precision <= 1.0,
720                "precision out of range: {}", c.precision);
721            assert!(c.recall >= 0.0 && c.recall <= 1.0,
722                "recall out of range: {}", c.recall);
723            assert!(c.episode_count > 0, "episode count must be > 0");
724        }
725    }
726
727    // ── Calibration window integrity ─────────────────────────────────────
728
729    #[test]
730    fn clean_window_passes_integrity() {
731        // Generate a clean stationary residual window: near-constant ~0.05
732        let residuals = [0.048_f32, 0.052, 0.049, 0.051, 0.050, 0.049, 0.051,
733                         0.050, 0.050, 0.052, 0.048, 0.049, 0.051, 0.050, 0.050,
734                         0.051, 0.049, 0.050, 0.052, 0.048];
735        let result = check_calibration_window(&residuals, 0.002);
736        assert!(!result.contamination_suspected,
737            "clean window must pass integrity: {:?}", result);
738        assert!(result.wss_pass, "clean window must pass WSS");
739    }
740
741    #[test]
742    fn drifting_window_triggers_contamination() {
743        // Monotonically drifting window (systematic trend = contamination)
744        let residuals: [f32; 20] = core::array::from_fn(|i| 0.04 + i as f32 * 0.005);
745        let result = check_calibration_window(&residuals, 0.002);
746        assert!(result.contamination_suspected,
747            "drifting window must trigger contamination flag");
748        assert!(result.trend_slope_sigma >= 0.02,
749            "trend slope must exceed threshold: {}", result.trend_slope_sigma);
750    }
751
752    #[test]
753    fn high_autocorr_triggers_contamination() {
754        // Highly autocorrelated: slow sinusoidal variation
755        let residuals: [f32; 20] = core::array::from_fn(|i| {
756            let t = i as f32 * 0.4;
757            0.05 + 0.03 * crate::impairment::sin_approx(t)
758        });
759        let result = check_calibration_window(&residuals, 0.002);
760        // Either high autocorr or high variance ratio should trigger
761        assert!(result.contamination_suspected
762            || result.variance_ratio > 1.5,
763            "autocorrelated window should trigger contamination: {:?}", result);
764    }
765
766    #[test]
767    fn insufficient_window_returns_contamination_suspected() {
768        let tiny = [0.05_f32, 0.06, 0.04];
769        let r = check_calibration_window(&tiny, 0.002);
770        assert!(r.contamination_suspected, "window < 4 must flag contamination");
771        assert!(!r.wss_pass);
772    }
773
774    #[test]
775    fn offset_window_triggers_normalized_mean_dev() {
776        // Realistic calibration contamination: interference onset WITHIN the
777        // calibration window causes a step-change in the residual mean.
778        // First half: healthy baseline ~0.050; second half: elevated ~0.120.
779        // Half-window drift = |0.120 - 0.050| / 0.002 = 35.0 >> 2.0 threshold.
780        let mut residuals = [0.050_f32; 20];
781        for r in &mut residuals[10..] {
782            *r = 0.120;
783        }
784        let r = check_calibration_window(&residuals, 0.002);
785        assert!(r.normalized_mean_dev >= 2.0,
786            "step-change contamination must exceed normalized_mean_dev threshold: {}",
787            r.normalized_mean_dev);
788        assert!(r.contamination_suspected,
789            "step-change calibration window must be flagged as contaminated");
790    }
791
792    #[test]
793    fn high_variance_window_triggers_variance_ratio() {
794        // Wide variance: impulsive corruption in calibration window
795        let mut residuals = [0.05_f32; 20];
796        residuals[5]  = 0.25; // impulsive spike
797        residuals[12] = 0.30;
798        let r = check_calibration_window(&residuals, 0.002);
799        assert!(r.contamination_suspected,
800            "high-variance window must trigger contamination");
801    }
802
803    // ── Swarm baseline sanity ──────────────────────────────────────────────
804
805    #[test]
806    fn swarm_unanimous_baseline_agreement() {
807        let rho_vals = [0.100f32, 0.101, 0.099, 0.100, 0.102];
808        let alert = swarm_baseline_sanity_check(&rho_vals, 3.0);
809        assert!(matches!(alert, BaselineConsensusAlert::Agreed { .. }),
810            "nearly-identical rho values should flag Agreed");
811    }
812
813    #[test]
814    fn swarm_disagreement_triggers_unreliable_alert() {
815        // Two nodes calibrated on healthy signal, three on contaminated window
816        let rho_vals = [0.10f32, 0.10, 0.25, 0.28, 0.27];
817        let alert = swarm_baseline_sanity_check(&rho_vals, 3.0);
818        assert!(matches!(alert, BaselineConsensusAlert::UnreliableBaseline { .. }),
819            "wide rho spread must flag UnreliableBaseline, got: {alert:?}");
820    }
821}
822
823// ── Swarm Baseline Sanity Check ──────────────────────────────────────────────
824//
825// DEFENCE: "Bootstrap Paradox" (paper §XIX-B).
826//
827// If a system calibrates while an adversary performs a "Low-and-Slow" spoofing
828// attack, the jammer becomes the admissible baseline.  The engine then treats
829// the "Signal" as a "Violation".
830//
831// Defence: require M=5 (or more) swarm nodes to agree on "healthy" before
832// locking calibration.  If nodes disagree on ρ beyond the spread tolerance,
833// calibration is refused and `UnreliableBaseline` is issued.
834//
835// The Wiener-Khinchin stationarity check (`check_calibration_window` above)
836// is the primary per-node defence.  This function operates at the swarm level.
837
838/// Decision issued by the swarm-level baseline sanity check.
839///
840/// See `swarm_baseline_sanity_check()` for usage.
841///
842/// ## Relationship to paper §XIX-B (Bootstrap Paradox defence)
843///
844/// Calibration MUST NOT lock until `Agreed` is returned and every participating
845/// node also passes its individual `check_calibration_window()` check (WSS
846/// pre-condition, §XII).
847#[derive(Debug, Clone, Copy, PartialEq)]
848pub enum BaselineConsensusAlert {
849    /// All participating nodes agree on a healthy baseline.
850    /// Calibration may proceed after individual WSS checks pass.
851    Agreed {
852        /// Number of nodes that participated in the consensus.
853        node_count: u8,
854        /// Median ρ across all nodes.
855        median_rho:  f32,
856        /// Robust spread (1.4826 × MAD of ρ values).
857        robust_sigma_rho: f32,
858    },
859    /// Node disagreement exceeds the spread tolerance.
860    ///
861    /// CALIBRATION MUST NOT LOCK until this is resolved.
862    /// Possible causes: Low-and-Slow spoofing contaminating one or more
863    /// calibration windows; hardware fault on a node; severe multipath.
864    UnreliableBaseline {
865        /// Nodes whose ρ is within tolerance of the median.
866        agreeing:    u8,
867        /// Nodes whose ρ deviates beyond tolerance.
868        disagreeing: u8,
869        /// Robust spread (1.4826 × MAD) of ρ values across all nodes.
870        rho_spread:  f32,
871    },
872}
873
874/// Compute a swarm-level baseline sanity check from a set of per-node ρ values.
875///
876/// Uses the robust MAD estimator (consistent with the BFT Byzantine filtering
877/// in `swarm_consensus.rs`) to flag disagreement.  A node whose ρ deviates
878/// by more than `spread_tolerance_sigma` × (1.4826 · MAD) from the swarm
879/// median is counted as disagreeing.
880///
881/// Returns `BaselineConsensusAlert::UnreliableBaseline` if the number of
882/// disagreeing nodes exceeds 0 (conservative: any node disagreement triggers
883/// the alert when N ≤ 10) or if `rho_values` is empty.
884///
885/// ## Arguments
886///
887/// * `rho_values` — envelope radii ρ from participating nodes (up to 32).
888/// * `spread_tolerance_sigma` — MAD-sigma tolerance for agreement.
889///   Default 3.0 (consistent with GUM k=3 coverage, paper §VII-A Theorem 2).
890///
891/// # Examples
892///
893/// ```
894/// use dsfb_rf::calibration::{swarm_baseline_sanity_check, BaselineConsensusAlert};
895/// // 5 nodes all see ρ ≈ 0.100 — healthy consensus
896/// let rho = [0.100f32, 0.101, 0.099, 0.100, 0.102];
897/// let alert = swarm_baseline_sanity_check(&rho, 3.0);
898/// assert!(matches!(alert, BaselineConsensusAlert::Agreed { .. }));
899/// ```
900pub fn swarm_baseline_sanity_check(
901    rho_values: &[f32],
902    spread_tolerance_sigma: f32,
903) -> BaselineConsensusAlert {
904    let n = rho_values.len();
905    if n == 0 {
906        return BaselineConsensusAlert::UnreliableBaseline {
907            agreeing: 0,
908            disagreeing: 0,
909            rho_spread: 0.0,
910        };
911    }
912
913    let n_clamped = n.min(32);
914    let median = compute_rho_median(rho_values, n_clamped);
915    let robust_sigma = compute_rho_robust_sigma(rho_values, n_clamped, median);
916    let threshold = spread_tolerance_sigma * robust_sigma;
917    let (agreeing, disagreeing) = tally_agreement(rho_values, n_clamped, median, threshold);
918
919    if disagreeing == 0 {
920        BaselineConsensusAlert::Agreed {
921            node_count:       n_clamped as u8,
922            median_rho:       median,
923            robust_sigma_rho: robust_sigma,
924        }
925    } else {
926        BaselineConsensusAlert::UnreliableBaseline {
927            agreeing,
928            disagreeing,
929            rho_spread: robust_sigma,
930        }
931    }
932}
933
934fn insertion_sort_fixed(buf: &mut [f32; 32], len: usize) {
935    for i in 1..len {
936        let key = buf[i];
937        let mut j = i;
938        while j > 0 && buf[j - 1] > key {
939            buf[j] = buf[j - 1];
940            j -= 1;
941        }
942        buf[j] = key;
943    }
944}
945
946fn median_sorted(sorted: &[f32; 32], len: usize) -> f32 {
947    if len % 2 == 1 { sorted[len / 2] }
948    else { (sorted[len / 2 - 1] + sorted[len / 2]) * 0.5 }
949}
950
951fn compute_rho_median(rho_values: &[f32], n_clamped: usize) -> f32 {
952    let mut scratch = [0.0f32; 32];
953    for i in 0..n_clamped { scratch[i] = rho_values[i]; }
954    insertion_sort_fixed(&mut scratch, n_clamped);
955    median_sorted(&scratch, n_clamped)
956}
957
958fn compute_rho_robust_sigma(rho_values: &[f32], n_clamped: usize, median: f32) -> f32 {
959    let mut abs_devs = [0.0f32; 32];
960    for i in 0..n_clamped {
961        let d = rho_values[i] - median;
962        abs_devs[i] = if d < 0.0 { -d } else { d };
963    }
964    insertion_sort_fixed(&mut abs_devs, n_clamped);
965    let mad = median_sorted(&abs_devs, n_clamped);
966    const MAD_SCALE: f32 = 1.482_602_2;
967    (MAD_SCALE * mad).max(1e-9_f32)
968}
969
970fn tally_agreement(rho_values: &[f32], n_clamped: usize, median: f32, threshold: f32) -> (u8, u8) {
971    let mut agreeing:    u8 = 0;
972    let mut disagreeing: u8 = 0;
973    for i in 0..n_clamped {
974        let d = rho_values[i] - median;
975        let dev = if d < 0.0 { -d } else { d };
976        if dev <= threshold {
977            agreeing = agreeing.saturating_add(1);
978        } else {
979            disagreeing = disagreeing.saturating_add(1);
980        }
981    }
982    (agreeing, disagreeing)
983}