dsfb_gpu_debug_demo/cli/
s_real_audit.rs

1//! S-REAL — Real Dataset Audit Gauntlet CLI driver
2//! (canonical subcommand: `s-real-audit`; historical alias:
3//! `s-real-1-audit`).
4//!
5//! WHY: This is the integration module that ties the S-REAL sealed-gauntlet
6//! pipeline end-to-end:
7//!
8//! ```text
9//!   real bytes
10//!     -> SHA-pinned TSV fixture (cli::ingest)
11//!     -> deterministic TraceEvent lowering (cli::ingest)
12//!     -> Contract::scaled(observed_signals, observed_windows)
13//!     -> build_gpu(events, contract) twice (replay-verification)
14//!     -> emit(&case) → casefile.json
15//!     -> serialize_episodes_jsonl → episodes.jsonl
16//!     -> render_audit_report_html → audit_report.html
17//!     -> 9 panel-locked artifacts per dataset in reports/<tier_dir>/<id>/
18//! ```
19//!
20//! Panel-locked S-REAL scope (current state, post-S-REAL.3.1 seal at
21//! `fde8a99`): **20 sealed audits across S-REAL.1/2/3** (3 + 10 + 7
22//! datasets vendored under panel-cleared licenses — TADBench /
23//! Illinois SocialNetwork / AIOps Challenge / LO2 / DeepTraLog /
24//! MultiDim / Defects4J / BugsInPy / PROMISE / NASA C-MAPSS subsets),
25//! **plus 10 large saturation-class fixtures** (1 M-cell RF I/Q /
26//! mmWave / database-derived residual surfaces — RadioML 2018.01A,
27//! DeepBeam, RadioML-Gold, POWDER, Oracle, DeepSense6G, IMDb tarball,
28//! IMDb DuckDB, Snowset, SQLShare). The 30-fixture sweep is captured
29//! at `reports/s_real_saturation_sweep.txt`; the 20-dataset audit
30//! bundle and its 60-row hash chain live under `reports/s_real_<tier>/`.
31//!
32//! **This is a sealed gauntlet, not a general ingestion product.**
33//! Arbitrary external datasets are intentionally not supported: the
34//! value of the S-REAL audits is that the gauntlet commits to a fixed,
35//! byte-pinned, license-cleared dataset set so reviewers can reproduce
36//! the result exactly.
37//!
38//! Non-claims (preserved verbatim into every dataset's `limitations.md`,
39//! mirroring `cli::audit_report::NON_CLAIMS`):
40//! - Does NOT claim DSFB has identified the "real" anomaly in the dataset.
41//! - Does NOT claim DSFB outperforms any other anomaly detector.
42//! - Does NOT claim DSFB has discovered causality.
43//! - Does NOT claim fitness-for-purpose on regulated / safety-critical use.
44//! - Does NOT claim the dataset is "correctly labeled" or "ground truth".
45//! - Does NOT claim the corpus or registry is exhaustive.
46//! - Does NOT claim replay determinism across different driver / CUDA /
47//!   hardware versions; the replay receipt records the toolchain
48//!   explicitly.
49//!
50//! License: Apache-2.0. Background IP: Invariant Forge LLC.
51
52use std::collections::BTreeMap;
53use std::fmt::Write as _;
54use std::fs;
55use std::path::{Path, PathBuf};
56use std::process::ExitCode;
57use std::time::Instant;
58
59use dsfb_gpu_debug_core::bank::{bank_hash, Episode};
60use dsfb_gpu_debug_core::casefile::{emit, CaseFile};
61use dsfb_gpu_debug_core::contract::Contract;
62use dsfb_gpu_debug_core::hash::sha256;
63use dsfb_gpu_debug_core::motif::registry_hash;
64
65use super::audit_report::{
66    render_audit_report_html, DatasetManifest, ReplayVerification, SchemaMap,
67};
68use super::ingest::{
69    build_ingest_report, load_residual_projection_tsv, lower_to_trace_events, sha256_to_hex_lower,
70    LoweringConfig,
71};
72use super::{parse_flags, usage_error};
73
74// =====================================================================
75// Panel-locked S-REAL dataset identity surfaces (split into two tables).
76// =====================================================================
77//
78// WHY THIS SPLIT EXISTS (S-REAL.3.1.2, panel-locked 2026-05-20):
79//
80// Before S-REAL.3.1.2 the driver had a single `DATASETS` constant
81// carrying 30 entries (20 audit datasets + 10 large saturation-class
82// fixtures). `s-real-audit --dataset all` enumerated all 30, which
83// (a) made the public Colab notebook's central command silently
84// dispatch the 10 saturation witnesses whose TSVs the slim tarball
85// deliberately excludes (~90 MB), and (b) misrepresented what "all"
86// meant in the notebook's narration ("all 20 datasets") vs the code
87// (all 30 entries). The bug is scope-boundary confusion, not a
88// pipeline defect.
89//
90// The fix splits the dataset identity surface into two const tables
91// with distinct authority semantics:
92//
93//   * `AUDIT_DATASETS` (20 entries): the sealed S-REAL.3 audit
94//     gauntlet bundle. Every entry has a `tier_dir` of `s_real_1`,
95//     `s_real_2`, or `s_real_3` matching the on-disk sealed bundle
96//     layout that `reports/s_real_3/bundle_manifest.toml` pins.
97//     `s-real-audit --dataset all` enumerates ONLY this table.
98//
99//   * `SATURATION_FIXTURES` (10 entries): the large saturation-class
100//     real-data fixtures (RF I/Q + mmWave + database-derived
101//     residual surfaces). Every entry has `tier_dir =
102//     "s_real_saturation"`. These fixtures are dispatched ONLY by
103//     `scripts/s_real_saturation_sweep.sh` and by explicit single-id
104//     audit calls (`s-real-audit --dataset radioml_2018_snr30_large`).
105//     They are NEVER selected by `--dataset all`.
106//
107// Both tables share the same `DatasetSpec` struct. The `tier_dir`
108// field is load-bearing: it determines the output directory layout
109// so a freshly-emitted bundle matches the sealed `reports/s_real_<tier>/`
110// directories byte-for-byte without further path massaging.
111//
112// Hard-coding the 20-audit + 10-saturation tables is deliberate. The
113// audit gauntlet's reproducibility story depends on every reviewer
114// running against the same vendored, SHA-256-pinned, license-cleared
115// fixtures; changes to entries here are panel-acknowledged schema-
116// upgrade events, not runtime configuration.
117struct DatasetSpec {
118    dataset_id: &'static str,
119    display_name: &'static str,
120    upstream_doi_or_url: &'static str,
121    license: &'static str,
122    source_class: &'static str,
123    default_path: &'static str,
124    fixture_sha256_hex: &'static str,
125    /// Output tier directory under `reports/`. For the 20 audit
126    /// datasets this is `s_real_1` / `s_real_2` / `s_real_3` matching
127    /// the sealed bundle's `reports/s_real_3/bundle_manifest.toml`
128    /// `tier_dir` field. For the 10 saturation fixtures this is
129    /// `s_real_saturation` (they have no sealed bundle membership; they
130    /// emit only when an operator explicitly requests them).
131    ///
132    /// WHY a field, not a hard-coded default: before S-REAL.3.1.2 the
133    /// driver wrote every dataset to `reports/s_real_1/<id>/` regardless
134    /// of the dataset's true tier, which silently diverged from the
135    /// sealed bundle's on-disk layout. Threading the tier through the
136    /// const table makes the emit path match the manifest by
137    /// construction; the
138    /// `audit_dataset_tier_dir_matches_bundle_manifest` acceptance
139    /// test cross-validates the mirror.
140    tier_dir: &'static str,
141}
142
143/// The 20 sealed S-REAL audit datasets that `--dataset all` enumerates.
144///
145/// WHY: this is the audit gauntlet's load-bearing identity manifest.
146/// Every entry's `dataset_id` and `tier_dir` mirror the
147/// `reports/s_real_3/bundle_manifest.toml` entry of the same name; the
148/// 60-row bundle hash chain that CI-gates the sealed artifacts depends
149/// on this mirror holding. The cross-validation lives in
150/// `tests/s_real_audit_dataset_table_invariants.rs`.
151const AUDIT_DATASETS: &[DatasetSpec] = &[
152    DatasetSpec {
153        dataset_id: "tadbench_f11",
154        display_name: "TADBench TrainTicket F11",
155        upstream_doi_or_url: "10.5281/zenodo.6979726",
156        license: "CC-BY-4.0",
157        source_class: "DebuggingSoftwareTelemetry",
158        default_path: "data/fixtures/tadbench_trainticket_F11.tsv",
159        fixture_sha256_hex: "cdebdab01e310d4d02241b694b3771acfa9beca3060b0cee0e19fb237f65dc97",
160        tier_dir: "s_real_1",
161    },
162    DatasetSpec {
163        dataset_id: "tadbench_f04",
164        display_name: "TADBench TrainTicket F04",
165        upstream_doi_or_url: "10.5281/zenodo.6979726",
166        license: "CC-BY-4.0",
167        source_class: "DebuggingSoftwareTelemetry",
168        default_path: "data/fixtures/tadbench_trainticket_F04.tsv",
169        fixture_sha256_hex: "b3e7ba260617b4496aa5b09f7d399fecae2cc60dbb2002f6405e65a5f37cb1d7",
170        tier_dir: "s_real_2",
171    },
172    DatasetSpec {
173        dataset_id: "tadbench_f11b",
174        display_name: "TADBench TrainTicket F11b",
175        upstream_doi_or_url: "10.5281/zenodo.6979726",
176        license: "CC-BY-4.0",
177        source_class: "DebuggingSoftwareTelemetry",
178        default_path: "data/fixtures/tadbench_trainticket_F11b.tsv",
179        fixture_sha256_hex: "2f45979af791964873a48e6f53a769e0610c02a6452c4a96b7b8bb208d5c492e",
180        tier_dir: "s_real_2",
181    },
182    DatasetSpec {
183        dataset_id: "tadbench_f19",
184        display_name: "TADBench TrainTicket F19",
185        upstream_doi_or_url: "10.5281/zenodo.6979726",
186        license: "CC-BY-4.0",
187        source_class: "DebuggingSoftwareTelemetry",
188        default_path: "data/fixtures/tadbench_trainticket_F19.tsv",
189        fixture_sha256_hex: "d412b6e44a83bb583d7f4ea2e5c3405115e686282fd2840db3f640c1dc3dd480",
190        tier_dir: "s_real_2",
191    },
192    DatasetSpec {
193        dataset_id: "illinois_socialnet",
194        display_name: "Illinois SocialNetwork (DeathStarBench)",
195        upstream_doi_or_url: "10.13012/B2IDB-6738796_V1",
196        license: "CC0-1.0",
197        source_class: "ObservabilityTraces",
198        default_path: "data/fixtures/illinois_socialnetwork.tsv",
199        fixture_sha256_hex: "c86b5abd1b412f69cccaab9b3e838da742c5ea8a1ba1b6dce634ff3407cc082c",
200        tier_dir: "s_real_1",
201    },
202    DatasetSpec {
203        dataset_id: "lo2",
204        display_name: "LO2 (Bakhtin et al. 2025)",
205        upstream_doi_or_url: "10.5281/zenodo.14257989",
206        license: "CC-BY-4.0",
207        source_class: "ObservabilityTraces",
208        default_path: "data/fixtures/lo2.tsv",
209        fixture_sha256_hex: "24d5d7c06d755366b2148a3887151478adfd46062bbec54e9ce20dc358b4647c",
210        tier_dir: "s_real_2",
211    },
212    DatasetSpec {
213        dataset_id: "deeptralog",
214        display_name: "DeepTraLog F01-02 (Zhang et al. ICSE 2022)",
215        upstream_doi_or_url: "github.com/FudanSELab/DeepTraLog",
216        license: "no-upstream-license",
217        source_class: "ObservabilityTraces",
218        default_path: "data/fixtures/deeptralog.tsv",
219        fixture_sha256_hex: "90f2625258b4f823e317a331a8b800dbc0c13c4ee3182b81be6c06a9aaef1853",
220        tier_dir: "s_real_2",
221    },
222    DatasetSpec {
223        dataset_id: "aiops_kpi",
224        display_name: "AIOps Challenge 2018 KPI (Bagel sample)",
225        upstream_doi_or_url: "Li, Chen, Pei. IPCCC 2018; github.com/NetManAIOps/Bagel",
226        license: "no-upstream-license",
227        source_class: "TimeSeriesAnomaly",
228        default_path: "data/fixtures/aiops_challenge.tsv",
229        fixture_sha256_hex: "be17110ebe6647d00fad79dc1ca69b1b01b22788773202bad6e3322e97b0602e",
230        tier_dir: "s_real_1",
231    },
232    DatasetSpec {
233        dataset_id: "multidim_localization",
234        display_name: "MultiDim Localization (2019 AIOps Challenge match 2)",
235        upstream_doi_or_url: "github.com/NetManAIOps/MultiDimension-Localization",
236        license: "no-upstream-license",
237        source_class: "TimeSeriesAnomaly",
238        default_path: "data/fixtures/multidim_localization.tsv",
239        fixture_sha256_hex: "b1237ea1f26e380f6323c5a42f158f5c5ce8901b28d94c7d3aae281747d20926",
240        tier_dir: "s_real_2",
241    },
242    DatasetSpec {
243        dataset_id: "defects4j",
244        display_name: "Defects4J (Just, Jalali, Ernst)",
245        upstream_doi_or_url: "github.com/rjust/defects4j",
246        license: "MIT",
247        source_class: "SoftwareDefects",
248        default_path: "data/fixtures/defects4j.tsv",
249        fixture_sha256_hex: "a6fa8c9d8e1fa78e2bb9c9f33e40d02aa6f26a04c0af2f760539793d33875a63",
250        tier_dir: "s_real_2",
251    },
252    DatasetSpec {
253        dataset_id: "bugsinpy",
254        display_name: "BugsInPy patch-line complexity (Widyasari et al.)",
255        upstream_doi_or_url: "github.com/soarsmu/BugsInPy",
256        license: "no-upstream-license",
257        source_class: "SoftwareDefects",
258        default_path: "data/fixtures/bugsinpy.tsv",
259        fixture_sha256_hex: "1e9349f3a0e3c76f20681d4623ff0c0ee768943c07a814ef3c7b481021a57f94",
260        tier_dir: "s_real_2",
261    },
262    DatasetSpec {
263        dataset_id: "promise_defect_prediction",
264        display_name: "PROMISE Ant 1.3 CK metrics (Sayyad Shirabad, Menzies)",
265        upstream_doi_or_url: "github.com/ssea-lab/PROMISE",
266        license: "no-upstream-license",
267        source_class: "SoftwareDefects",
268        default_path: "data/fixtures/promise_defect_prediction.tsv",
269        fixture_sha256_hex: "14856ef507c9ef8ff6b7120a8c50177d76b492bbff0653b1e635312eadba4461",
270        tier_dir: "s_real_2",
271    },
272    // S-REAL.3 admissions — 5 C-MAPSS sisters + 1 PROMISE Ant 1.4 + 1 DeepTraLog F02 = 20 datasets total.
273    DatasetSpec {
274        dataset_id: "cmapss_fd001_unit50",
275        display_name: "NASA C-MAPSS FD001 unit 50",
276        upstream_doi_or_url: "NASA PCoE PHM08",
277        license: "Public-Domain",
278        source_class: "ReliabilityIndustrial",
279        default_path: "data/fixtures/cmapss_fd001_unit50.tsv",
280        fixture_sha256_hex: "d4920f9801bf1e27104387702f17e2413194201578ed9434e7a4f1a5b84fe5da",
281        tier_dir: "s_real_3",
282    },
283    DatasetSpec {
284        dataset_id: "cmapss_fd002_unit1",
285        display_name: "NASA C-MAPSS FD002 unit 1 (multi-condition)",
286        upstream_doi_or_url: "NASA PCoE PHM08",
287        license: "Public-Domain",
288        source_class: "ReliabilityIndustrial",
289        default_path: "data/fixtures/cmapss_fd002_unit1.tsv",
290        fixture_sha256_hex: "07a94ffc69c54e9838cbf78c2f087e1a247f6971a7e7e75bc47fe6947d6c6c6a",
291        tier_dir: "s_real_3",
292    },
293    DatasetSpec {
294        dataset_id: "cmapss_fd002_unit100",
295        display_name: "NASA C-MAPSS FD002 unit 100",
296        upstream_doi_or_url: "NASA PCoE PHM08",
297        license: "Public-Domain",
298        source_class: "ReliabilityIndustrial",
299        default_path: "data/fixtures/cmapss_fd002_unit100.tsv",
300        fixture_sha256_hex: "34310904bd557b0e264d0e8d874695ed007026d05e62ca02745f295efbbbe2b0",
301        tier_dir: "s_real_3",
302    },
303    DatasetSpec {
304        dataset_id: "cmapss_fd003_unit1",
305        display_name: "NASA C-MAPSS FD003 unit 1 (multi-fault)",
306        upstream_doi_or_url: "NASA PCoE PHM08",
307        license: "Public-Domain",
308        source_class: "ReliabilityIndustrial",
309        default_path: "data/fixtures/cmapss_fd003_unit1.tsv",
310        fixture_sha256_hex: "8eeb96ec624782e06f5abc4f77dc62f11e714634c6b6e520221432a2be4a87b1",
311        tier_dir: "s_real_3",
312    },
313    DatasetSpec {
314        dataset_id: "cmapss_fd004_unit1",
315        display_name: "NASA C-MAPSS FD004 unit 1 (multi-condition + multi-fault)",
316        upstream_doi_or_url: "NASA PCoE PHM08",
317        license: "Public-Domain",
318        source_class: "ReliabilityIndustrial",
319        default_path: "data/fixtures/cmapss_fd004_unit1.tsv",
320        fixture_sha256_hex: "abc69ca301edea69b7d60fe0bc8eac912e991eb80d4ec0fb2f2725167266bdae",
321        tier_dir: "s_real_3",
322    },
323    DatasetSpec {
324        dataset_id: "promise_ant_1_4",
325        display_name: "PROMISE Apache Ant 1.4 CK metrics",
326        upstream_doi_or_url: "github.com/ssea-lab/PROMISE",
327        license: "no-upstream-license",
328        source_class: "SoftwareDefects",
329        default_path: "data/fixtures/promise_ant_1_4.tsv",
330        fixture_sha256_hex: "f965a049b98d69ade3391c5f7a777c4ea37089c386cd886b05012cc86748024c",
331        tier_dir: "s_real_3",
332    },
333    DatasetSpec {
334        dataset_id: "deeptralog_f02",
335        display_name: "DeepTraLog F02-04 ERROR fault period",
336        upstream_doi_or_url: "github.com/FudanSELab/DeepTraLog",
337        license: "no-upstream-license",
338        source_class: "ObservabilityTraces",
339        default_path: "data/fixtures/deeptralog_f02.tsv",
340        fixture_sha256_hex: "2ce480415b20dc49d09119b79933f2dd5d0ba62c186a8ecc0f2e2bf8b1e1a95f",
341        tier_dir: "s_real_3",
342    },
343    DatasetSpec {
344        dataset_id: "cmapss_fd001_unit1",
345        display_name: "NASA C-MAPSS FD001 unit 1 (run-to-failure, z-residual)",
346        upstream_doi_or_url:
347            "Saxena, Goebel, Simon, Eklund (PHM08); NASA PCoE Prognostics Data Repository",
348        license: "Public-Domain",
349        source_class: "ReliabilityIndustrial",
350        default_path: "data/fixtures/cmapss_fd001_unit1.tsv",
351        fixture_sha256_hex: "633442bb93f128bb44e82f4b09d0dd0f175933107bc9c0c3e1fc6bd6b040c93e",
352        tier_dir: "s_real_2",
353    },
354];
355
356/// The 10 saturation-class real-data fixtures used by
357/// `scripts/s_real_saturation_sweep.sh` (and by explicit single-id
358/// audit calls like `s-real-audit --dataset radioml_2018_snr30_large`).
359///
360/// WHY a separate table from `AUDIT_DATASETS`: these fixtures are 1 M-
361/// cell TSV projections (~9 MB each) that `pack_for_colab.sh`
362/// deliberately excludes from the slim Colab tarball (~90 MB of
363/// saturation TSV data that the audit-gauntlet's 20-dataset replay
364/// does NOT need; the saturation sweep is a dev-machine hardware-
365/// anchored measurement on RTX 4080 SUPER / CUDA 13.2). Before
366/// S-REAL.3.1.2 these lived inside `DATASETS` alongside the 20 audit
367/// datasets, so `s-real-audit --dataset all` would attempt to
368/// dispatch them on Colab — where the TSVs are intentionally absent
369/// — and fail. The split makes the authority surface explicit:
370/// `--dataset all` enumerates only `AUDIT_DATASETS`; saturation
371/// fixtures dispatch only via explicit single-id or via the
372/// saturation-sweep script.
373///
374/// All entries have `tier_dir = "s_real_saturation"`: they have no
375/// sealed bundle membership and emit ad-hoc receipts at
376/// `reports/s_real_saturation/<id>/` on the rare occasion an
377/// operator dispatches one for diagnostic re-run.
378const SATURATION_FIXTURES: &[DatasetSpec] = &[
379    // S-PERF.16.a-magnitude large fixture from the RadioML 2018.01
380    // SNR=30 dB HDF5 corpus. 1024 entities × 1024 windows =
381    // 1,048,576 cells per dispatch — same event magnitude as the
382    // S-PERF.16.a saturation bench, but built from REAL RF I/Q
383    // bytes via the z-score projection in
384    // `data/recipes/radioml_2018_snr30_large.py`.
385    //
386    // WHY this entry lives in SATURATION_FIXTURES: the throughput bench
387    // (scripts/s_real_throughput_bench.sh) measures
388    // fixture_bytes / dispatch_median_us. At 1M events / dispatch
389    // the dispatcher escapes the launch-overhead regime and
390    // surfaces actual sustained throughput, which is impossible
391    // to see from the 128-656-event public fixtures sealed at
392    // S-REAL.1 / S-REAL.2 / S-REAL.3.
393    //
394    // Upstream HDF5 is NOT vendored (97 MB; lives on external
395    // drive). The projected TSV at default_path IS pinned via the
396    // SHA-256 below; the recipe is byte-deterministic so two runs
397    // of `radioml_2018_snr30_large.py` against the same HDF5
398    // produce the same TSV bytes.
399    //
400    // License: DeepSig RadioML 2018.01 is distributed under
401    // CC-BY-NC-SA-4.0 (research-use; non-commercial). DSFB-GPU
402    // does NOT redistribute the upstream HDF5; only the
403    // deterministic residual projection of a fixed slice. The
404    // audit reports STRUCTURAL residual evidence, NOT a
405    // modulation-classification verdict.
406    DatasetSpec {
407        dataset_id: "radioml_2018_snr30_large",
408        display_name: "RadioML 2018.01 SNR=30 dB (1024×1024 z-residual; large-fixture throughput)",
409        upstream_doi_or_url:
410            "DeepSig RadioML 2018.01 (O'Shea, Corgan); https://www.deepsig.ai/datasets",
411        license: "CC-BY-NC-SA-4.0",
412        source_class: "RfCommunications",
413        default_path: "data/fixtures/radioml_2018_snr30_1024x1024.tsv",
414        fixture_sha256_hex: "0a626804be42f113bc62afd2245f86f9ac7c7204472e29fa09faf976ee7f6e86",
415        tier_dir: "s_real_saturation",
416    },
417    // Second S-PERF.16.a-magnitude large-fixture entry. Same shape
418    // as the radioml entry (1024×1024 = 1,048,576 cells) but built
419    // from REAL DeepBeam I/Q magnitudes via the recipe at
420    // `data/recipes/deepbeam_large.py`. Lives here so the saturation
421    // sweep (scripts/s_real_saturation_sweep.sh) can compare two
422    // independent real-data fixtures against the synthetic
423    // S-PERF.16.a saturation median (22.74 GB/s).
424    //
425    // Same non-claims as the radioml entry: upstream HDF5 is NOT
426    // vendored (56 GB; lives on external drive). The projected TSV
427    // at default_path IS pinned via the SHA-256 below. DSFB-GPU
428    // does NOT classify beam-pattern; it reports STRUCTURAL residual
429    // evidence under the deterministic IQ-magnitude + z-score
430    // projection. DeepBeam is admitted as a throughput witness, not
431    // an RF-domain-truth claim.
432    //
433    // License: DeepBeam is distributed by the NEU GeneSys Lab for
434    // research use. The dataset README does not include an SPDX-
435    // style LICENSE token; we record `no-upstream-license` per the
436    // research-fair-use convention used for the 7 other S-REAL
437    // datasets without explicit LICENSE files (mirror of the
438    // S-REAL.2c license-discipline reversal).
439    DatasetSpec {
440        dataset_id: "deepbeam_large",
441        display_name: "DeepBeam (1024×1024 IQ-magnitude z-residual; large-fixture throughput)",
442        upstream_doi_or_url:
443            "NEU GeneSys Lab DeepBeam (neu_ww72bk394.h5); https://genesys-lab.org/oracle",
444        license: "no-upstream-license",
445        source_class: "RfCommunications",
446        default_path: "data/fixtures/deepbeam_1024x1024.tsv",
447        fixture_sha256_hex: "242aae914fc3a88c0a5027536923a72f598062ea0647078f7b0e0024a8aa7929",
448        tier_dir: "s_real_saturation",
449    },
450    // ---- Post-S-REAL.3.1 large-fixture throughput witnesses ----
451    //
452    // The 7 entries below extend the saturation-class real-data
453    // surface from 2 fixtures (radioml + deepbeam) to 9. Each is
454    // a 1024×1024 cell (~1M event) residual-projection v2 TSV
455    // built from real upstream data via a deterministic recipe
456    // under data/recipes/. Used by the saturation sweep
457    // (scripts/s_real_saturation_sweep.sh) to surface the
458    // saturation / launch-bound boundary across an
459    // intentionally-diverse fixture set: 4 RF I/Q sources +
460    // 1 mmWave-power source + 3 database-telemetry sources +
461    // 1 byte-frequency projection of a DuckDB binary.
462    //
463    // All are throughput witnesses, NOT domain-truth claims.
464    // Same non-claim discipline as radioml_2018_snr30_large /
465    // deepbeam_large.
466    DatasetSpec {
467        dataset_id: "radioml_gold_large",
468        display_name: "RadioML 2018.01 GOLD full corpus (1024×1024 IQ-magnitude z-residual)",
469        upstream_doi_or_url:
470            "DeepSig RadioML 2018.01 GOLD_XYZ_OSC.0001_1024.hdf5; https://www.deepsig.ai/datasets",
471        license: "CC-BY-NC-SA-4.0",
472        source_class: "RfCommunications",
473        default_path: "data/fixtures/radioml_gold_1024x1024.tsv",
474        fixture_sha256_hex: "06f156dc662a2ce26c9867b49cbf87e534be92b93bfed7402bfe56971906aeaf",
475        tier_dir: "s_real_saturation",
476    },
477    DatasetSpec {
478        dataset_id: "powder_large",
479        display_name: "POWDER 4G LTE Band-7 I/Q (Globecom 2020; 1024×1024 z-residual)",
480        upstream_doi_or_url: "University of Utah POWDER (Globecom 2020); neu_m046tb444.zip",
481        license: "no-upstream-license",
482        source_class: "RfCommunications",
483        default_path: "data/fixtures/powder_1024x1024.tsv",
484        fixture_sha256_hex: "8438f02bc033142797411f5789d8ef6a3f9c214a120ff1aeaabfa3528edb08f2",
485        tier_dir: "s_real_saturation",
486    },
487    DatasetSpec {
488        dataset_id: "oracle_large",
489        display_name: "ORACLE WiFi 802.11a fingerprinting (Sankhe et al. INFOCOM 2019; 1024×1024)",
490        upstream_doi_or_url:
491            "NEU/KRI 16-Device ORACLE dataset (Sankhe et al. INFOCOM 2019); neu_m044q5210.zip",
492        license: "no-upstream-license",
493        source_class: "RfCommunications",
494        default_path: "data/fixtures/oracle_1024x1024.tsv",
495        fixture_sha256_hex: "6ada4586de505fdc67f2be053b24f92732acc32d3bc8154d17a27b52db570133",
496        tier_dir: "s_real_saturation",
497    },
498    DatasetSpec {
499        dataset_id: "deepsense6g_large",
500        display_name: "Deepsense6G Scenario 23 mmWave-power (512×1024 z-residual; sub-saturation)",
501        upstream_doi_or_url:
502            "Deepsense6G Scenario 23 (Alkhateeb et al. 2022); https://deepsense6g.net/scenario-23/",
503        license: "no-upstream-license",
504        source_class: "RfCommunications",
505        default_path: "data/fixtures/deepsense6g_512x1024.tsv",
506        fixture_sha256_hex: "4d71dc2c087697f1c1455d78f926d11870149828786bbfca865b277afab6052e",
507        tier_dir: "s_real_saturation",
508    },
509    DatasetSpec {
510        dataset_id: "imdb_tgz_large",
511        display_name: "IMDB Join-Order-Benchmark cast_info.csv (1020×1024 numeric-ID z-residual)",
512        upstream_doi_or_url:
513            "IMDB Join-Order-Benchmark (Leis et al. VLDB 2015); imdb.tgz cast_info.csv",
514        license: "no-upstream-license",
515        source_class: "DatabaseWorkload",
516        default_path: "data/fixtures/imdb_tgz_1020x1024.tsv",
517        fixture_sha256_hex: "9e0d356f9a706f6132461873a5903df123e046326e67181497413ff122234aa6",
518        tier_dir: "s_real_saturation",
519    },
520    DatasetSpec {
521        dataset_id: "imdb_duckdb_large",
522        display_name: "IMDB DuckDB binary byte-frequency residual (1024×1024; byte-projection)",
523        upstream_doi_or_url: "IMDB Join-Order-Benchmark DuckDB binary dump (sister of imdb.tgz)",
524        license: "no-upstream-license",
525        source_class: "DatabaseWorkload",
526        default_path: "data/fixtures/imdb_duckdb_1024x1024.tsv",
527        fixture_sha256_hex: "2970fbd9d0d6bdb7b98b461d549dff9a77ae4b0a94a4fc2967acbb7eec3e12e7",
528        tier_dir: "s_real_saturation",
529    },
530    DatasetSpec {
531        dataset_id: "snowset_large",
532        display_name: "Snowset Snowflake-telemetry CSV (Vuppalapati et al. NSDI 2020; 1024×1024)",
533        upstream_doi_or_url:
534            "Snowset (Vuppalapati et al. NSDI 2020); github.com/resource-disaggregation/snowset",
535        license: "no-upstream-license",
536        source_class: "DatabaseWorkload",
537        default_path: "data/fixtures/snowset_1024x1024.tsv",
538        fixture_sha256_hex: "0ec2b78c1fc4db066208968ab6fc452a4ac6dad31dc740b0870a77a3e73fc3c8",
539        tier_dir: "s_real_saturation",
540    },
541    DatasetSpec {
542        dataset_id: "sqlshare_large",
543        display_name: "SQLShare 2015 oceanographic CSV residual (Jain et al. UW 2015; 1024×1024)",
544        upstream_doi_or_url:
545            "SQLShare 2015 (Jain et al., U.Washington); uwescience.github.io/sqlshare",
546        license: "no-upstream-license",
547        source_class: "DatabaseWorkload",
548        default_path: "data/fixtures/sqlshare_1024x1024.tsv",
549        fixture_sha256_hex: "e663b53ccdcc1c2bad808a7268af10c9b638e05f108a371707c3e1ac924c37c8",
550        tier_dir: "s_real_saturation",
551    },
552];
553
554/// Convenience iterator over every dataset entry in both tables, in
555/// the canonical (audit-first, saturation-second, table-order-within)
556/// sequence. Useful for cross-table invariants (SHA-pin sanity,
557/// duplicate-id check) without re-deriving the union on every caller.
558fn all_dataset_specs() -> impl Iterator<Item = &'static DatasetSpec> {
559    AUDIT_DATASETS.iter().chain(SATURATION_FIXTURES.iter())
560}
561
562/// Resolve a dataset_id against BOTH the audit-gauntlet and
563/// saturation-fixture tables.
564///
565/// WHY both: single-id audit (e.g. `s-real-audit --dataset
566/// radioml_2018_snr30_large`) is still admissible as a diagnostic
567/// re-run path. Only `--dataset all` enforces the audit-only
568/// AUDIT_DATASETS set; an operator who explicitly names a saturation
569/// fixture knows what they are asking for.
570fn lookup(id: &str) -> Option<&'static DatasetSpec> {
571    all_dataset_specs().find(|d| d.dataset_id == id)
572}
573
574// =====================================================================
575// CLI entry point.
576// =====================================================================
577
578/// Run the S-REAL audit gauntlet on one dataset, on every audit
579/// dataset, or on an explicitly-named saturation fixture.
580///
581/// Flags:
582/// - `--dataset <id|all>` — a canonical dataset id (e.g. `tadbench_f11`,
583///   `illinois_socialnet`, `aiops_kpi`, `cmapss_fd001_unit50`,
584///   `deeptralog`, `multidim_localization`, ...) or `all`. Required.
585///   `all` enumerates the 20 sealed audit datasets in `AUDIT_DATASETS`;
586///   the 10 saturation fixtures in `SATURATION_FIXTURES` are NEVER
587///   selected by `all` (they dispatch only via explicit single-id audit
588///   calls and via `scripts/s_real_saturation_sweep.sh`).
589/// - `--out-dir <path>` — root output directory. Defaults to `reports`.
590///   Each dataset writes to `<out-dir>/<tier_dir>/<dataset_id>/` where
591///   `tier_dir` is `s_real_1` / `s_real_2` / `s_real_3` per the sealed
592///   bundle's `reports/s_real_3/bundle_manifest.toml` layout (or
593///   `s_real_saturation` for an explicit single-id saturation run).
594///   Each dataset produces 9 panel-locked artifacts there
595///   (audit_report.html / casefile.json / dataset_manifest.toml /
596///   episodes.jsonl / limitations.md / perf_profile.txt /
597///   replay_verification.txt / run_receipt.txt / schema_map.toml).
598///
599/// Exit codes (mirrors the existing `run-gpu` convention so downstream
600/// automation handles the result uniformly):
601/// - 0 — success on every selected dataset.
602/// - 1 — CLI usage error.
603/// - 2 — GPU unavailable (`--features cuda` not built) or kernel failed.
604/// - 5 — I/O failure (read fixture, write artifact).
605/// - 6 — fixture SHA-256 mismatch (pinned hash divergence).
606/// - 7 — replay-verification failure (run 1 != run 2 bytes).
607pub fn parse_and_run(args: &[String]) -> ExitCode {
608    let flags = match parse_flags(args) {
609        Ok(f) => f,
610        Err(msg) => return usage_error(&msg),
611    };
612    let dataset_arg = if let Some(s) = flags.get("dataset") {
613        s.clone()
614    } else {
615        // Build the valid-id enumeration deterministically from both
616        // tables so adding a new dataset never leaves the error
617        // message stale. AUDIT_DATASETS first, then SATURATION_FIXTURES,
618        // then `all`.
619        let ids: Vec<&str> = all_dataset_specs().map(|d| d.dataset_id).collect();
620        let menu = format!("{}|all", ids.join("|"));
621        return usage_error(&format!("missing required flag --dataset ({menu})"));
622    };
623    let default_out = "reports".to_string();
624    let out_dir = flags.get("out-dir").cloned().unwrap_or(default_out);
625
626    // S-REAL.PERF flags. --iters >= 2 (replay verification needs at
627    // least run 1 + run 2). --catalogs >= 1.
628    let iters: u32 = flags
629        .get("iters")
630        .and_then(|s| s.parse().ok())
631        .unwrap_or(2)
632        .max(2);
633    let catalogs: u32 = flags
634        .get("catalogs")
635        .and_then(|s| s.parse().ok())
636        .unwrap_or(1)
637        .max(1);
638
639    let selected: Vec<&'static DatasetSpec> = if dataset_arg == "all" {
640        // `--dataset all` enumerates ONLY the 20 sealed audit datasets.
641        // Saturation fixtures dispatch only via explicit single-id or
642        // via the saturation-sweep script; this protects the Colab
643        // public-replay path from accidentally trying to read a
644        // 1M-cell TSV that the slim tarball deliberately excludes.
645        AUDIT_DATASETS.iter().collect()
646    } else if let Some(d) = lookup(&dataset_arg) {
647        vec![d]
648    } else {
649        let ids: Vec<&str> = all_dataset_specs().map(|d| d.dataset_id).collect();
650        let menu = format!("{}, all", ids.join(", "));
651        return usage_error(&format!(
652            "unknown dataset id {dataset_arg:?}; valid values: {menu}"
653        ));
654    };
655
656    for spec in selected {
657        // Output path layout matches the sealed bundle's `tier_dir`
658        // by construction: `<out_dir>/<tier_dir>/<dataset_id>/`. The
659        // 20 audit datasets land in `reports/s_real_1/...`,
660        // `reports/s_real_2/...`, `reports/s_real_3/...`; the 10
661        // saturation fixtures land in `reports/s_real_saturation/...`
662        // when invoked explicitly.
663        let dataset_dir = PathBuf::from(&out_dir)
664            .join(spec.tier_dir)
665            .join(spec.dataset_id);
666        match run_one_dataset(spec, &dataset_dir, iters, catalogs) {
667            Ok(()) => eprintln!(
668                "dsfb-gpu-debug s-real-audit: {} sealed at {} (iters={iters}, catalogs={catalogs})",
669                spec.dataset_id,
670                dataset_dir.display(),
671            ),
672            Err(code) => return code,
673        }
674    }
675    ExitCode::SUCCESS
676}
677
678/// S-REAL.PERF per-dataset performance profile.
679///
680/// WHY: S-REAL.1 / .1.1 / .1.1.1 proved DSFB-GPU processes real
681/// datasets deterministically and emits human-readable artifacts.
682/// S-REAL.PERF answers the next honest question: *how long does that
683/// take, and what dominates the wall?* Every timing is host-Instant
684/// wall-clock microseconds; cudaEvent kernel-level timing remains
685/// S-PERF territory. The profile is **runtime-dependent by design**
686/// — the admission disclosure makes that explicit so an operator does
687/// not conflate timing-replay with byte-replay.
688#[derive(Clone, Debug, Default)]
689pub struct PerformanceProfile {
690    pub iters: u32,
691    pub catalogs: u32,
692    pub ingest_us: u64,
693    pub lowering_us: u64,
694    pub contract_setup_us: u64,
695    pub cuda_dispatch_run1_us: u64,
696    pub cuda_dispatch_run2_us: u64,
697    pub cuda_dispatch_extra_us: Vec<u64>,
698    pub casefile_emit_us: u64,
699    pub episodes_jsonl_emit_us: u64,
700    pub audit_report_emit_us: u64,
701    pub total_us: u64,
702    pub events_emitted: u32,
703    pub finite_cells: u32,
704    pub fixture_byte_size: u64,
705    /// Sequential single-catalog total wall when --catalogs > 1.
706    /// HONEST LABEL: NOT a batched dispatch; this is K sequential
707    /// build_gpu calls on the same (events, contract). Reported so
708    /// the operator can see launch-overhead amortization without
709    /// being misled into thinking the dispatcher supports true
710    /// K-batched mode.
711    pub catalogs_total_us: u64,
712}
713
714impl PerformanceProfile {
715    fn dispatch_samples_sorted(&self) -> Vec<u64> {
716        let mut v = Vec::with_capacity(2 + self.cuda_dispatch_extra_us.len());
717        v.push(self.cuda_dispatch_run1_us);
718        v.push(self.cuda_dispatch_run2_us);
719        v.extend(self.cuda_dispatch_extra_us.iter().copied());
720        v.sort_unstable();
721        v
722    }
723
724    fn dispatch_median_us(&self) -> u64 {
725        let s = self.dispatch_samples_sorted();
726        if s.is_empty() {
727            0
728        } else {
729            s[s.len() / 2]
730        }
731    }
732
733    fn percentile_us(&self, p: u32) -> u64 {
734        let s = self.dispatch_samples_sorted();
735        if s.is_empty() {
736            return 0;
737        }
738        let idx = (((s.len() as u64).saturating_sub(1)) * u64::from(p) / 100) as usize;
739        s[idx]
740    }
741
742    fn events_per_second(&self) -> u64 {
743        if self.total_us == 0 {
744            0
745        } else {
746            (u64::from(self.events_emitted) * 1_000_000) / self.total_us
747        }
748    }
749
750    fn finite_cells_per_second(&self) -> u64 {
751        if self.total_us == 0 {
752            0
753        } else {
754            (u64::from(self.finite_cells) * 1_000_000) / self.total_us
755        }
756    }
757
758    fn logical_bytes_per_second(&self) -> u64 {
759        if self.total_us == 0 {
760            0
761        } else {
762            (self.fixture_byte_size * 1_000_000) / self.total_us
763        }
764    }
765}
766
767// =====================================================================
768// Single-dataset run.
769// =====================================================================
770
771#[allow(
772    clippy::too_many_lines,
773    reason = "End-to-end S-REAL driver per panel-locked design: ingest \
774              + lower + two-run dispatch + replay-verify + 9-artifact \
775              panel-locked emit must live in one function so the audit's \
776              load-bearing steps are visible top-to-bottom."
777)]
778fn run_one_dataset(
779    spec: &'static DatasetSpec,
780    out_dir: &Path,
781    iters: u32,
782    catalogs: u32,
783) -> Result<(), ExitCode> {
784    let t_total_start = Instant::now();
785
786    // 1. Read fixture bytes + 2. Parse + verify SHA-256 byte-pin.
787    let t_ingest_start = Instant::now();
788    let bytes = fs::read(spec.default_path).map_err(|e| {
789        eprintln!(
790            "dsfb-gpu-debug s-real-1-audit: failed to read {}: {e}",
791            spec.default_path
792        );
793        ExitCode::from(5)
794    })?;
795    let fixture_byte_size = bytes.len() as u64;
796    let fixture = load_residual_projection_tsv(&bytes, spec.fixture_sha256_hex).map_err(|e| {
797        eprintln!("dsfb-gpu-debug s-real-1-audit: ingest error: {e}");
798        ExitCode::from(6)
799    })?;
800    let ingest_us = t_ingest_start.elapsed().as_micros() as u64;
801
802    // 3. Deterministic lowering: cells → TraceEvents.
803    let t_lowering_start = Instant::now();
804    let lowering = LoweringConfig::default();
805    let events = lower_to_trace_events(&fixture, &lowering);
806    let ingest_report = build_ingest_report(&fixture, &events, fixture_byte_size);
807    let lowering_us = t_lowering_start.elapsed().as_micros() as u64;
808
809    // 4. Build scaled contract with bank + registry pins.
810    let t_contract_start = Instant::now();
811    let n_entities = ingest_report.observed_num_signals.max(1);
812    let n_windows = ingest_report.observed_num_windows.max(1);
813    let mut contract = Contract::scaled(n_entities, n_windows);
814    contract.pin_bank_hash(bank_hash());
815    contract.pin_detector_registry_hash(registry_hash());
816    let contract_setup_us = t_contract_start.elapsed().as_micros() as u64;
817
818    // 5. Run dispatcher TWICE for replay verification (iter-1 +
819    //    iter-2), plus optional extra iters for variance recording.
820    let t_run1 = Instant::now();
821    let case_run1 = run_gpu_or_emit(&events, &contract)?;
822    let cuda_dispatch_run1_us = t_run1.elapsed().as_micros() as u64;
823
824    let t_run2 = Instant::now();
825    let case_run2 = run_gpu_or_emit(&events, &contract)?;
826    let cuda_dispatch_run2_us = t_run2.elapsed().as_micros() as u64;
827
828    let mut cuda_dispatch_extra_us: Vec<u64> = Vec::new();
829    for _ in 2..iters {
830        let t = Instant::now();
831        let _ = run_gpu_or_emit(&events, &contract)?;
832        cuda_dispatch_extra_us.push(t.elapsed().as_micros() as u64);
833    }
834
835    // 5b. Sequential single-catalog amortization mode (--catalogs K).
836    //     HONEST LABEL: K sequential build_gpu calls; not a true
837    //     batched dispatch. Reports aggregate wall so an operator can
838    //     see launch-overhead amortization vs the per-call median.
839    let catalogs_total_us = if catalogs > 1 {
840        let t = Instant::now();
841        for _ in 0..catalogs {
842            let _ = run_gpu_or_emit(&events, &contract)?;
843        }
844        t.elapsed().as_micros() as u64
845    } else {
846        0
847    };
848
849    // 6. Serialize canonical bytes for both runs and compute per-artifact
850    //    SHA-256 receipts.
851    let t_casefile_emit_start = Instant::now();
852    let casefile_run1 = emit(&case_run1);
853    let casefile_run2 = emit(&case_run2);
854    let casefile_emit_us = t_casefile_emit_start.elapsed().as_micros() as u64;
855
856    let t_episodes_emit_start = Instant::now();
857    let episodes_run1 = serialize_episodes_jsonl(&case_run1.episodes);
858    let episodes_run2 = serialize_episodes_jsonl(&case_run2.episodes);
859    let episodes_jsonl_emit_us = t_episodes_emit_start.elapsed().as_micros() as u64;
860
861    let casefile_run1_hex = sha256_to_hex_lower(&sha256(&casefile_run1));
862    let casefile_run2_hex = sha256_to_hex_lower(&sha256(&casefile_run2));
863    let episodes_run1_hex = sha256_to_hex_lower(&sha256(&episodes_run1));
864    let episodes_run2_hex = sha256_to_hex_lower(&sha256(&episodes_run2));
865
866    // 7. Build the audit-report inputs and render the HTML twice (the
867    //    replay law applies to audit_report.html too).
868    let manifest = DatasetManifest {
869        dataset_id: spec.dataset_id.to_string(),
870        display_name: spec.display_name.to_string(),
871        upstream_doi_or_url: spec.upstream_doi_or_url.to_string(),
872        license: spec.license.to_string(),
873        source_class: spec.source_class.to_string(),
874        vendored_path: spec.default_path.to_string(),
875        fixture_sha256_hex: spec.fixture_sha256_hex.to_string(),
876        fixture_byte_size,
877    };
878    let mut schema = SchemaMap::from(&ingest_report);
879    schema.declared_healthy_window_end = fixture.declared_healthy_window_end;
880    schema.lowering_config = lowering;
881
882    // Toolchain identity is recorded deterministically: rustc version
883    // pulled from CARGO_PKG_RUST_VERSION via env! at compile time (not
884    // available; we record the cargo version instead) and CUDA / GPU
885    // identity from canonical S-PERF.16.a panel-locked values. A real
886    // operator can override these at audit-emit time in a future S-REAL.1.1
887    // commit; for now they are pinned constants so two runs in the same
888    // process produce byte-identical toolchain blocks by construction.
889    let mut toolchain = BTreeMap::new();
890    toolchain.insert(
891        "dsfb_gpu_debug_demo_version".to_string(),
892        env!("CARGO_PKG_VERSION").to_string(),
893    );
894    toolchain.insert("cuda_version".to_string(), "13.2".to_string());
895    toolchain.insert("gpu_name".to_string(), "RTX 4080 SUPER".to_string());
896    toolchain.insert("backend".to_string(), case_run1.backend.to_string());
897
898    let replay_pre = ReplayVerification {
899        run_count: 2,
900        casefile_json_sha256_run1: casefile_run1_hex.clone(),
901        casefile_json_sha256_run2: casefile_run2_hex.clone(),
902        episodes_jsonl_sha256_run1: episodes_run1_hex.clone(),
903        episodes_jsonl_sha256_run2: episodes_run2_hex.clone(),
904        final_case_file_hash_run1_hex: sha256_to_hex_lower(&case_run1.final_case_file_hash),
905        final_case_file_hash_run2_hex: sha256_to_hex_lower(&case_run2.final_case_file_hash),
906        episode_count_run1: case_run1.episodes.len() as u32,
907        episode_count_run2: case_run2.episodes.len() as u32,
908        toolchain,
909    };
910
911    // Render the HTML against case_run1; verify the second render is
912    // byte-identical (this is what the acceptance test will also check
913    // independently).
914    let t_audit_emit_start = Instant::now();
915    let html_run1 = render_audit_report_html(&manifest, &schema, &case_run1, &replay_pre);
916    let html_run2 = render_audit_report_html(&manifest, &schema, &case_run2, &replay_pre);
917    let audit_report_emit_us = t_audit_emit_start.elapsed().as_micros() as u64;
918    let html_run1_hex = sha256_to_hex_lower(&sha256(html_run1.as_bytes()));
919    let html_run2_hex = sha256_to_hex_lower(&sha256(html_run2.as_bytes()));
920
921    // Assemble the performance profile from the timing samples gathered above.
922    let total_us = t_total_start.elapsed().as_micros() as u64;
923    let perf = PerformanceProfile {
924        iters,
925        catalogs,
926        ingest_us,
927        lowering_us,
928        contract_setup_us,
929        cuda_dispatch_run1_us,
930        cuda_dispatch_run2_us,
931        cuda_dispatch_extra_us,
932        casefile_emit_us,
933        episodes_jsonl_emit_us,
934        audit_report_emit_us,
935        total_us,
936        events_emitted: ingest_report.emitted_event_count,
937        finite_cells: ingest_report.finite_cell_count,
938        fixture_byte_size,
939        catalogs_total_us,
940    };
941
942    // The audit's load-bearing replay-admission gate: every artifact must
943    // be byte-identical across the two runs. Exit code 7 surfaces a
944    // replay failure so downstream CI can react distinctly from a CUDA
945    // failure.
946    let admits = casefile_run1_hex == casefile_run2_hex
947        && episodes_run1_hex == episodes_run2_hex
948        && html_run1_hex == html_run2_hex;
949
950    // 8. Emit 9 panel-locked artifacts in canonical order
951    //    (audit_report.html, casefile.json, dataset_manifest.toml,
952    //    episodes.jsonl, limitations.md, perf_profile.txt,
953    //    replay_verification.txt, run_receipt.txt, schema_map.toml).
954    //    Three of these — casefile.json, dataset_manifest.toml,
955    //    episodes.jsonl — are chain-pinned under
956    //    reports/s_real_3/bundle_hash_chain.txt; the other six are
957    //    receipt-only (regenerated by every audit run).
958    //
959    //    HONEST NAMING: the emit is sequential `fs::create_dir_all` +
960    //    `fs::write`, not filesystem-atomic. A partial write on
961    //    disk-full would leave a half-populated directory. "Panel-
962    //    locked" describes the per-artifact byte content (deterministic
963    //    per the SHA-pinned input + the static driver source), not
964    //    filesystem-atomic emit. A future S-REAL.4 hardening pass may
965    //    add staging→verify→rename atomic-swap discipline; for
966    //    research-court purposes the byte-equivalence test
967    //    (s_real_1_replay_byte_identity) is the load-bearing gate.
968    if let Err(e) = fs::create_dir_all(out_dir) {
969        eprintln!(
970            "dsfb-gpu-debug s-real-1-audit: could not create {}: {e}",
971            out_dir.display()
972        );
973        return Err(ExitCode::from(5));
974    }
975
976    let write = |name: &str, content: &[u8]| -> Result<(), ExitCode> {
977        let path = out_dir.join(name);
978        fs::write(&path, content).map_err(|e| {
979            eprintln!(
980                "dsfb-gpu-debug s-real-1-audit: failed to write {}: {e}",
981                path.display()
982            );
983            ExitCode::from(5)
984        })
985    };
986
987    write(
988        "dataset_manifest.toml",
989        emit_dataset_manifest_toml(&manifest).as_bytes(),
990    )?;
991    write("schema_map.toml", emit_schema_map_toml(&schema).as_bytes())?;
992    write(
993        "run_receipt.txt",
994        emit_run_receipt_txt(spec, &manifest, &schema, &case_run1).as_bytes(),
995    )?;
996    write("casefile.json", &casefile_run1)?;
997    write("episodes.jsonl", &episodes_run1)?;
998    write("audit_report.html", html_run1.as_bytes())?;
999    write(
1000        "replay_verification.txt",
1001        emit_replay_verification_txt(spec, &replay_pre, &html_run1_hex, &html_run2_hex, admits)
1002            .as_bytes(),
1003    )?;
1004    write("limitations.md", emit_limitations_md(spec).as_bytes())?;
1005    // S-REAL.PERF: ninth artifact. Runtime-dependent timing block; not
1006    // checked for byte-identity across re-invocations (timing varies).
1007    write(
1008        "perf_profile.txt",
1009        emit_perf_profile_txt(spec, &perf).as_bytes(),
1010    )?;
1011
1012    if !admits {
1013        eprintln!(
1014            "dsfb-gpu-debug s-real-1-audit: replay verification FAILED for {} (artifacts emitted; see replay_verification.txt)",
1015            spec.dataset_id
1016        );
1017        return Err(ExitCode::from(7));
1018    }
1019    Ok(())
1020}
1021
1022// =====================================================================
1023// Dispatch wrapper that surfaces honest exit codes.
1024// =====================================================================
1025
1026fn run_gpu_or_emit(
1027    events: &[dsfb_gpu_debug_core::event::TraceEvent],
1028    contract: &Contract,
1029) -> Result<CaseFile, ExitCode> {
1030    use dsfb_gpu_debug_cuda::{build_gpu, GpuError};
1031    match build_gpu(events, contract) {
1032        Ok(case) => Ok(case),
1033        Err(GpuError::CudaUnavailable) => {
1034            eprintln!(
1035                "dsfb-gpu-debug s-real-1-audit: GPU pipeline unavailable \
1036                 (built without --features cuda)"
1037            );
1038            Err(ExitCode::from(2))
1039        }
1040        Err(GpuError::KernelFailed(code)) => {
1041            eprintln!("dsfb-gpu-debug s-real-1-audit: GPU kernel failed with cuda status {code}");
1042            Err(ExitCode::from(2))
1043        }
1044        Err(GpuError::InvalidInput(msg)) => {
1045            eprintln!("dsfb-gpu-debug s-real-1-audit: GPU dispatcher rejected input: {msg}");
1046            Err(ExitCode::from(2))
1047        }
1048    }
1049}
1050
1051// =====================================================================
1052// Artifact serializers.
1053// =====================================================================
1054
1055/// JSONL serialization of admitted episodes.
1056///
1057/// WHY: One line per admitted episode. The episodes are pre-sorted by
1058/// `(entity_id, start_window, end_window, reason_code as u8)` so two
1059/// consecutive calls on the same `case.episodes` produce byte-identical
1060/// output regardless of insertion order. Q16 fields render as the raw
1061/// signed integer from `.0` (no decimal scaling) so the receipt carries
1062/// the exact bytes the bank stage saw, not a lossy display form.
1063///
1064/// Key order is fixed and matches the audit_report table column order.
1065/// No whitespace beyond the trailing newline per line.
1066#[must_use]
1067pub fn serialize_episodes_jsonl(episodes: &[Episode]) -> Vec<u8> {
1068    let mut sorted: Vec<&Episode> = episodes.iter().collect();
1069    sorted.sort_by_key(|e| (e.entity_id, e.start_window, e.end_window, e.reason as u8));
1070    let mut buf: Vec<u8> = Vec::new();
1071    for (idx, e) in sorted.iter().enumerate() {
1072        // Hand-roll JSON to avoid serde and to keep the key order pinned.
1073        let _ = writeln!(
1074            &mut buf as &mut dyn std::io::Write,
1075            "{{\"idx\":{},\"entity_id\":{},\"start_window\":{},\"end_window\":{},\
1076             \"motif\":\"{}\",\"reason\":\"{}\",\"peak_state\":\"{}\",\
1077             \"peak_residual_q\":{},\"peak_drift_q\":{},\"peak_slew_q\":{},\
1078             \"detector_bit_count\":{}}}",
1079            idx,
1080            e.entity_id,
1081            e.start_window,
1082            e.end_window,
1083            motif_name(e.motif),
1084            reason_name(e.reason),
1085            grammar_name(e.peak_state),
1086            e.peak_residual_q.0,
1087            e.peak_drift_q.0,
1088            e.peak_slew_q.0,
1089            e.detector_bit_count,
1090        );
1091    }
1092    buf
1093}
1094
1095fn motif_name(m: dsfb_gpu_debug_core::bank::BankMotif) -> &'static str {
1096    use dsfb_gpu_debug_core::bank::BankMotif;
1097    match m {
1098        BankMotif::LatencyRamp => "LatencyRamp",
1099        BankMotif::ErrorBurst => "ErrorBurst",
1100        BankMotif::SlewShockRecovery => "SlewShockRecovery",
1101        BankMotif::SustainedDegradation => "SustainedDegradation",
1102        BankMotif::OscillationInstability => "OscillationInstability",
1103        BankMotif::LocalizedRouteFault => "LocalizedRouteFault",
1104        BankMotif::FanoutCascadeCandidate => "FanoutCascadeCandidate",
1105        BankMotif::ConfuserTransient => "ConfuserTransient",
1106    }
1107}
1108
1109fn reason_name(r: dsfb_gpu_debug_core::grammar::ReasonCode) -> &'static str {
1110    use dsfb_gpu_debug_core::grammar::ReasonCode;
1111    match r {
1112        ReasonCode::Admissible => "Admissible",
1113        ReasonCode::BoundaryApproach => "BoundaryApproach",
1114        ReasonCode::SustainedOutwardDrift => "SustainedOutwardDrift",
1115        ReasonCode::AbruptSlewViolation => "AbruptSlewViolation",
1116        ReasonCode::RecurrentBoundaryGrazing => "RecurrentBoundaryGrazing",
1117        ReasonCode::EnvelopeViolation => "EnvelopeViolation",
1118        ReasonCode::DriftWithRecovery => "DriftWithRecovery",
1119        ReasonCode::SingleCrossing => "SingleCrossing",
1120    }
1121}
1122
1123fn grammar_name(g: dsfb_gpu_debug_core::grammar::GrammarState) -> &'static str {
1124    use dsfb_gpu_debug_core::grammar::GrammarState;
1125    match g {
1126        GrammarState::Admissible => "Admissible",
1127        GrammarState::Boundary => "Boundary",
1128        GrammarState::Violation => "Violation",
1129        GrammarState::Recovery => "Recovery",
1130    }
1131}
1132
1133fn emit_dataset_manifest_toml(m: &DatasetManifest) -> String {
1134    let mut s = String::new();
1135    s.push_str("# S-REAL.1 dataset manifest. Provenance record co-pinning\n");
1136    s.push_str("# upstream identity, license, vendored bytes path, and\n");
1137    s.push_str("# SHA-256 byte-pin of the file the audit actually read.\n\n");
1138    s.push_str("[dataset]\n");
1139    let _ = writeln!(&mut s, "dataset_id          = \"{}\"", m.dataset_id);
1140    let _ = writeln!(&mut s, "display_name        = \"{}\"", m.display_name);
1141    let _ = writeln!(
1142        &mut s,
1143        "upstream_doi_or_url = \"{}\"",
1144        m.upstream_doi_or_url
1145    );
1146    let _ = writeln!(&mut s, "license             = \"{}\"", m.license);
1147    let _ = writeln!(&mut s, "source_class        = \"{}\"", m.source_class);
1148    let _ = writeln!(&mut s, "vendored_path       = \"{}\"", m.vendored_path);
1149    s.push('\n');
1150    s.push_str("[fixture]\n");
1151    let _ = writeln!(&mut s, "sha256_hex          = \"{}\"", m.fixture_sha256_hex);
1152    let _ = writeln!(&mut s, "byte_size           = {}", m.fixture_byte_size);
1153    s
1154}
1155
1156fn emit_schema_map_toml(schema: &SchemaMap) -> String {
1157    let mut s = String::new();
1158    s.push_str("# S-REAL.1 schema map. Records the upstream-declared shape,\n");
1159    s.push_str("# the observed shape after parsing, and the deterministic\n");
1160    s.push_str("# event-lowering rule used to project cells into TraceEvents.\n\n");
1161    s.push_str("[upstream_declared]\n");
1162    let _ = writeln!(
1163        &mut s,
1164        "num_windows         = {}",
1165        schema.declared_num_windows
1166    );
1167    let _ = writeln!(
1168        &mut s,
1169        "num_signals         = {}",
1170        schema.declared_num_signals
1171    );
1172    let _ = writeln!(
1173        &mut s,
1174        "healthy_window_end  = {}",
1175        schema.declared_healthy_window_end
1176    );
1177    s.push('\n');
1178    s.push_str("[observed]\n");
1179    let _ = writeln!(
1180        &mut s,
1181        "num_windows         = {}",
1182        schema.observed_num_windows
1183    );
1184    let _ = writeln!(
1185        &mut s,
1186        "num_signals         = {}",
1187        schema.observed_num_signals
1188    );
1189    let _ = writeln!(&mut s, "nan_cell_count      = {}", schema.nan_cell_count);
1190    let _ = writeln!(&mut s, "finite_cell_count   = {}", schema.finite_cell_count);
1191    s.push('\n');
1192    s.push_str("[event_lowering]\n");
1193    let _ = writeln!(
1194        &mut s,
1195        "value_to_microsecond_scale = {}",
1196        schema.lowering_config.value_to_microsecond_scale
1197    );
1198    let _ = writeln!(
1199        &mut s,
1200        "latency_clamp_us           = {}",
1201        schema.lowering_config.latency_clamp_us
1202    );
1203    let _ = writeln!(
1204        &mut s,
1205        "window_size_ns             = {}",
1206        schema.lowering_config.window_size_ns
1207    );
1208    s.push('\n');
1209    s.push_str("[output]\n");
1210    let _ = writeln!(
1211        &mut s,
1212        "emitted_event_count = {}",
1213        schema.emitted_event_count
1214    );
1215    s
1216}
1217
1218#[allow(
1219    clippy::too_many_lines,
1220    reason = "Receipt emitter is a single byte-stable text block; \
1221              splitting risks ordering divergence between two builds."
1222)]
1223fn emit_run_receipt_txt(
1224    _spec: &DatasetSpec,
1225    manifest: &DatasetManifest,
1226    schema: &SchemaMap,
1227    case: &CaseFile,
1228) -> String {
1229    let mut s = String::new();
1230    s.push_str("=== S-REAL.1 run receipt ===\n");
1231    let _ = writeln!(&mut s, "dataset:                 {}", manifest.dataset_id);
1232    let _ = writeln!(&mut s, "display_name:            {}", manifest.display_name);
1233    let _ = writeln!(&mut s, "license:                 {}", manifest.license);
1234    let _ = writeln!(
1235        &mut s,
1236        "upstream_doi_or_url:     {}",
1237        manifest.upstream_doi_or_url
1238    );
1239    s.push('\n');
1240    s.push_str("Input\n");
1241    let _ = writeln!(
1242        &mut s,
1243        "  vendored_path:         {}",
1244        manifest.vendored_path
1245    );
1246    let _ = writeln!(
1247        &mut s,
1248        "  fixture_sha256:        {}",
1249        manifest.fixture_sha256_hex
1250    );
1251    let _ = writeln!(
1252        &mut s,
1253        "  fixture_byte_size:     {}",
1254        manifest.fixture_byte_size
1255    );
1256    s.push('\n');
1257    s.push_str("Lowering\n");
1258    let _ = writeln!(
1259        &mut s,
1260        "  value_to_microsecond_scale: {}",
1261        schema.lowering_config.value_to_microsecond_scale
1262    );
1263    let _ = writeln!(
1264        &mut s,
1265        "  latency_clamp_us:           {}",
1266        schema.lowering_config.latency_clamp_us
1267    );
1268    let _ = writeln!(
1269        &mut s,
1270        "  window_size_ns:             {}",
1271        schema.lowering_config.window_size_ns
1272    );
1273    let _ = writeln!(
1274        &mut s,
1275        "  finite_cells:               {}",
1276        schema.finite_cell_count
1277    );
1278    let _ = writeln!(
1279        &mut s,
1280        "  nan_cells_skipped:          {}",
1281        schema.nan_cell_count
1282    );
1283    let _ = writeln!(
1284        &mut s,
1285        "  events_emitted:             {}",
1286        schema.emitted_event_count
1287    );
1288    s.push('\n');
1289    s.push_str("Run\n");
1290    let _ = writeln!(&mut s, "  backend:               {}", case.backend);
1291    let _ = writeln!(
1292        &mut s,
1293        "  n_entities (= observed_num_signals): {}",
1294        schema.observed_num_signals
1295    );
1296    let _ = writeln!(
1297        &mut s,
1298        "  n_windows  (= observed_num_windows): {}",
1299        schema.observed_num_windows
1300    );
1301    let _ = writeln!(
1302        &mut s,
1303        "  contract_hash:         sha256:{}",
1304        sha256_to_hex_lower(&case.hashes.contract)
1305    );
1306    let _ = writeln!(
1307        &mut s,
1308        "  bank_hash:             sha256:{}",
1309        sha256_to_hex_lower(&case.hashes.bank)
1310    );
1311    let _ = writeln!(
1312        &mut s,
1313        "  detector_registry_hash: sha256:{}",
1314        sha256_to_hex_lower(&case.hashes.detector_registry)
1315    );
1316    s.push('\n');
1317    s.push_str("Result\n");
1318    let _ = writeln!(&mut s, "  episodes_admitted:     {}", case.episodes.len());
1319    let _ = writeln!(
1320        &mut s,
1321        "  final_verdict:         {}",
1322        case.final_verdict.name()
1323    );
1324    let _ = writeln!(
1325        &mut s,
1326        "  final_case_file_hash:  sha256:{}",
1327        sha256_to_hex_lower(&case.final_case_file_hash)
1328    );
1329    s
1330}
1331
1332fn emit_replay_verification_txt(
1333    spec: &DatasetSpec,
1334    r: &ReplayVerification,
1335    html_run1_hex: &str,
1336    html_run2_hex: &str,
1337    admits: bool,
1338) -> String {
1339    let mut s = String::new();
1340    s.push_str("=== S-REAL.1 replay verification ===\n");
1341    let _ = writeln!(&mut s, "dataset: {}", spec.dataset_id);
1342    let _ = writeln!(&mut s, "runs:    {}", r.run_count);
1343    s.push('\n');
1344    let cf_ok = r.casefile_json_sha256_run1 == r.casefile_json_sha256_run2;
1345    let ep_ok = r.episodes_jsonl_sha256_run1 == r.episodes_jsonl_sha256_run2;
1346    let hr_ok = html_run1_hex == html_run2_hex;
1347    let _ = writeln!(
1348        &mut s,
1349        "byte-identical replay: {}",
1350        if admits { "YES" } else { "NO" }
1351    );
1352    let _ = writeln!(
1353        &mut s,
1354        "  casefile.json:        {}",
1355        if cf_ok { "YES" } else { "NO" }
1356    );
1357    let _ = writeln!(
1358        &mut s,
1359        "  episodes.jsonl:       {}",
1360        if ep_ok { "YES" } else { "NO" }
1361    );
1362    let _ = writeln!(
1363        &mut s,
1364        "  audit_report.html:    {}",
1365        if hr_ok { "YES" } else { "NO" }
1366    );
1367    s.push('\n');
1368    s.push_str("Run 1 SHA-256\n");
1369    let _ = writeln!(
1370        &mut s,
1371        "  casefile.json:        {}",
1372        r.casefile_json_sha256_run1
1373    );
1374    let _ = writeln!(
1375        &mut s,
1376        "  episodes.jsonl:       {}",
1377        r.episodes_jsonl_sha256_run1
1378    );
1379    let _ = writeln!(&mut s, "  audit_report.html:    {html_run1_hex}");
1380    s.push('\n');
1381    s.push_str("Run 2 SHA-256\n");
1382    let _ = writeln!(
1383        &mut s,
1384        "  casefile.json:        {}",
1385        r.casefile_json_sha256_run2
1386    );
1387    let _ = writeln!(
1388        &mut s,
1389        "  episodes.jsonl:       {}",
1390        r.episodes_jsonl_sha256_run2
1391    );
1392    let _ = writeln!(&mut s, "  audit_report.html:    {html_run2_hex}");
1393    s.push('\n');
1394    let _ = writeln!(
1395        &mut s,
1396        "final_case_file_hash (run 1): {}",
1397        r.final_case_file_hash_run1_hex
1398    );
1399    let _ = writeln!(
1400        &mut s,
1401        "final_case_file_hash (run 2): {}",
1402        r.final_case_file_hash_run2_hex
1403    );
1404    let _ = writeln!(
1405        &mut s,
1406        "episode_count        (run 1): {}",
1407        r.episode_count_run1
1408    );
1409    let _ = writeln!(
1410        &mut s,
1411        "episode_count        (run 2): {}",
1412        r.episode_count_run2
1413    );
1414    s.push('\n');
1415    s.push_str("Toolchain\n");
1416    for (k, v) in &r.toolchain {
1417        let _ = writeln!(&mut s, "  {k}: {v}");
1418    }
1419    s.push('\n');
1420    s.push_str("Note: replay determinism is asserted only for the toolchain\n");
1421    s.push_str("recorded above. Different driver / CUDA / hardware versions\n");
1422    s.push_str("may produce different bytes; the audit does NOT claim\n");
1423    s.push_str("cross-toolchain replay byte-identity.\n");
1424    s
1425}
1426
1427/// S-REAL.PERF: emit the per-dataset performance profile.
1428///
1429/// WHY: A ninth artifact next to the original eight. Carries the
1430/// per-stage host-Instant wall-clock microseconds, multi-iteration
1431/// dispatch variance (median + p50/p95/p99), throughput metrics
1432/// (events/s, finite-cells/s, logical-bytes/s), and the optional
1433/// sequential-catalog amortization total. Explicitly RUNTIME-
1434/// DEPENDENT — the byte-identical-replay claim covers the inference
1435/// chain (`casefile.json` + `episodes.jsonl`), NOT this timing block.
1436/// The disclosure paragraph at the bottom records that distinction.
1437#[allow(
1438    clippy::too_many_lines,
1439    reason = "Receipt emitter is a single byte-stable text block; \
1440              splitting risks ordering divergence between two builds."
1441)]
1442fn emit_perf_profile_txt(spec: &DatasetSpec, p: &PerformanceProfile) -> String {
1443    let mut s = String::new();
1444    s.push_str("=== S-REAL.PERF performance profile ===\n");
1445    let _ = writeln!(&mut s, "dataset: {}", spec.dataset_id);
1446    let _ = writeln!(&mut s, "iters:    {}", p.iters);
1447    let _ = writeln!(&mut s, "catalogs: {}", p.catalogs);
1448    s.push('\n');
1449    s.push_str("Per-stage wall (microseconds, host Instant):\n");
1450    let _ = writeln!(&mut s, "  ingest_us              : {}", p.ingest_us);
1451    let _ = writeln!(&mut s, "  lowering_us            : {}", p.lowering_us);
1452    let _ = writeln!(&mut s, "  contract_setup_us      : {}", p.contract_setup_us);
1453    let _ = writeln!(
1454        &mut s,
1455        "  cuda_dispatch_run1_us  : {}",
1456        p.cuda_dispatch_run1_us
1457    );
1458    let _ = writeln!(
1459        &mut s,
1460        "  cuda_dispatch_run2_us  : {}",
1461        p.cuda_dispatch_run2_us
1462    );
1463    if !p.cuda_dispatch_extra_us.is_empty() {
1464        let _ = writeln!(
1465            &mut s,
1466            "  cuda_dispatch_extra_us : {:?}",
1467            p.cuda_dispatch_extra_us
1468        );
1469    }
1470    let _ = writeln!(&mut s, "  casefile_emit_us       : {}", p.casefile_emit_us);
1471    let _ = writeln!(
1472        &mut s,
1473        "  episodes_jsonl_emit_us : {}",
1474        p.episodes_jsonl_emit_us
1475    );
1476    let _ = writeln!(
1477        &mut s,
1478        "  audit_report_emit_us   : {}",
1479        p.audit_report_emit_us
1480    );
1481    let _ = writeln!(&mut s, "  total_us               : {}", p.total_us);
1482    s.push('\n');
1483    s.push_str("Dispatch variance (across all recorded iters):\n");
1484    let _ = writeln!(
1485        &mut s,
1486        "  dispatch_median_us     : {}",
1487        p.dispatch_median_us()
1488    );
1489    let _ = writeln!(&mut s, "  dispatch_p50_us        : {}", p.percentile_us(50));
1490    let _ = writeln!(&mut s, "  dispatch_p95_us        : {}", p.percentile_us(95));
1491    let _ = writeln!(&mut s, "  dispatch_p99_us        : {}", p.percentile_us(99));
1492    s.push('\n');
1493    s.push_str("Throughput (end-to-end wall):\n");
1494    let _ = writeln!(
1495        &mut s,
1496        "  events_emitted             : {}",
1497        p.events_emitted
1498    );
1499    let _ = writeln!(&mut s, "  finite_cells               : {}", p.finite_cells);
1500    let _ = writeln!(
1501        &mut s,
1502        "  fixture_byte_size          : {}",
1503        p.fixture_byte_size
1504    );
1505    let _ = writeln!(
1506        &mut s,
1507        "  events_per_second          : {}",
1508        p.events_per_second()
1509    );
1510    let _ = writeln!(
1511        &mut s,
1512        "  finite_cells_per_second    : {}",
1513        p.finite_cells_per_second()
1514    );
1515    let _ = writeln!(
1516        &mut s,
1517        "  logical_bytes_per_second   : {}",
1518        p.logical_bytes_per_second()
1519    );
1520    if p.catalogs > 1 {
1521        s.push('\n');
1522        s.push_str("Sequential-catalog amortization (--catalogs > 1):\n");
1523        let _ = writeln!(
1524            &mut s,
1525            "  catalogs_total_us          : {}",
1526            p.catalogs_total_us
1527        );
1528        let _ = writeln!(
1529            &mut s,
1530            "  per_catalog_us             : {}",
1531            p.catalogs_total_us / u64::from(p.catalogs)
1532        );
1533        s.push_str("  note                       : K sequential build_gpu calls; NOT a batched dispatch.\n");
1534    }
1535    s.push('\n');
1536    s.push_str("Honest framing (panel-locked, MUST appear):\n");
1537    s.push_str("  - Timing values are runtime-dependent. The byte-identical-replay\n");
1538    s.push_str("    claim covers casefile.json + episodes.jsonl (the inference\n");
1539    s.push_str("    chain), NOT this perf_profile.txt or the timing values inside\n");
1540    s.push_str("    audit_report.html. Re-invoking s-real-1-audit will produce a\n");
1541    s.push_str("    new perf_profile.txt with new timing values; the casefile +\n");
1542    s.push_str("    episodes bytes will remain byte-identical to the sealed S-REAL.1.1.1\n");
1543    s.push_str("    artifacts.\n");
1544    s.push_str("  - At these small fixture sizes (128 / 192 / 656 events) the wall\n");
1545    s.push_str("    is overhead-dominated. Real-data throughput numbers below are\n");
1546    s.push_str("    honest measurements on this hardware at this scale; they are NOT\n");
1547    s.push_str("    saturation claims, NOT production-deployment throughput, and NOT\n");
1548    s.push_str("    detector-superiority benchmarks. CUDA timing is host-Instant\n");
1549    s.push_str("    wall (not cudaEvent kernel time; that lives in S-PERF).\n");
1550    s.push_str("  - Cross-driver / cross-CUDA / cross-hardware replay byte-identity\n");
1551    s.push_str("    or throughput-identity is NOT claimed.\n");
1552    s
1553}
1554
1555fn emit_limitations_md(spec: &DatasetSpec) -> String {
1556    let mut s = String::new();
1557    let _ = writeln!(
1558        &mut s,
1559        "# S-REAL.1 audit — limitations and non-claims ({})\n",
1560        spec.dataset_id
1561    );
1562    s.push_str("This file accompanies the `audit_report.html` for this dataset. The\n");
1563    s.push_str("audit's deliverable is **deterministic, replayable structural\n");
1564    s.push_str("evidence on real public dataset bytes** — not domain-truth\n");
1565    s.push_str("claims.\n\n");
1566    s.push_str("## Non-claims\n\n");
1567    for nc in NON_CLAIMS_LINES {
1568        let _ = writeln!(&mut s, "- {nc}");
1569    }
1570    s.push_str("\n## Lowering disclosure\n\n");
1571    s.push_str("The upstream fixture is in `residual-projection v2` form\n");
1572    s.push_str("(window-major × signal-minor TSV). DSFB-GPU normally takes a\n");
1573    s.push_str("`Vec<TraceEvent>` and projects events into residuals via its\n");
1574    s.push_str("window-feature kernel; the upstream is already past that\n");
1575    s.push_str("projection. To run the deterministic engine on this form\n");
1576    s.push_str("without modifying the dispatcher, the audit lowers each\n");
1577    s.push_str("finite cell into one synthetic `TraceEvent` via a documented\n");
1578    s.push_str("rule (see `schema_map.toml` and section 2 of\n");
1579    s.push_str("`audit_report.html`). The audit does NOT claim to recover the\n");
1580    s.push_str("upstream's original trace events; it claims DSFB-GPU saw\n");
1581    s.push_str("exactly the events that rule produces from these bytes.\n");
1582    s
1583}
1584
1585const NON_CLAIMS_LINES: &[&str] = &[
1586    "Does NOT claim DSFB has identified the \"real\" anomaly in the dataset.",
1587    "Does NOT claim DSFB outperforms any other anomaly detector.",
1588    "Does NOT claim DSFB has discovered causality.",
1589    "Does NOT claim DSFB has measured remediation effectiveness.",
1590    "Does NOT claim fitness-for-purpose on regulated or safety-critical use.",
1591    "Does NOT claim the dataset is \"correctly labeled\" or \"ground truth\".",
1592    "Does NOT claim the corpus or registry is exhaustive.",
1593    "Does NOT claim replay determinism across different driver / CUDA / hardware versions.",
1594];
1595
1596#[cfg(test)]
1597mod tests {
1598    use super::*;
1599    use dsfb_gpu_debug_core::bank::{BankMotif, Episode};
1600    use dsfb_gpu_debug_core::fixed::Q16;
1601    use dsfb_gpu_debug_core::grammar::{GrammarState, ReasonCode};
1602
1603    fn mk_episode(
1604        entity: u32,
1605        start: u32,
1606        end: u32,
1607        motif: BankMotif,
1608        reason: ReasonCode,
1609    ) -> Episode {
1610        Episode {
1611            entity_id: entity,
1612            start_window: start,
1613            end_window: end,
1614            motif,
1615            reason,
1616            peak_state: GrammarState::Boundary,
1617            peak_residual_q: Q16(123),
1618            peak_drift_q: Q16(456),
1619            peak_slew_q: Q16(789),
1620            detector_bit_count: 3,
1621            admission: None,
1622        }
1623    }
1624
1625    #[test]
1626    fn lookup_admits_known_datasets() {
1627        // All 13 S-REAL admitted datasets across 5 source classes.
1628        for id in [
1629            // S-REAL.1 + S-REAL.2c (13 datasets, 5 source-class families).
1630            "tadbench_f11",
1631            "tadbench_f04",
1632            "tadbench_f11b",
1633            "tadbench_f19",
1634            "illinois_socialnet",
1635            "lo2",
1636            "deeptralog",
1637            "aiops_kpi",
1638            "multidim_localization",
1639            "defects4j",
1640            "bugsinpy",
1641            "promise_defect_prediction",
1642            "cmapss_fd001_unit1",
1643            // S-REAL.3 admissions (7 more, reaching 20).
1644            "cmapss_fd001_unit50",
1645            "cmapss_fd002_unit1",
1646            "cmapss_fd002_unit100",
1647            "cmapss_fd003_unit1",
1648            "cmapss_fd004_unit1",
1649            "promise_ant_1_4",
1650            "deeptralog_f02",
1651            // Large-fixture throughput entry (post-S-REAL.3; 21st
1652            // dataset, 1024×1024 cells from RadioML 2018 SNR=30 dB
1653            // HDF5; used by scripts/s_real_throughput_bench.sh to
1654            // measure dispatcher throughput at S-PERF.16.a magnitude
1655            // on real RF I/Q bytes).
1656            "radioml_2018_snr30_large",
1657            // Second large-fixture entry — DeepBeam IQ-magnitude
1658            // residual projection at 1024×1024 cells. Same role
1659            // as radioml_2018_snr30_large: throughput-witness for
1660            // the saturation sweep.
1661            "deepbeam_large",
1662            // S-REAL.3.1 saturation-sweep extension: 7 more
1663            // throughput-witness fixtures (4 RF + 3 database + 1
1664            // sub-saturation mmWave). All built from real
1665            // upstream data via deterministic recipes; all
1666            // throughput witnesses, not domain-truth claims.
1667            "radioml_gold_large",
1668            "powder_large",
1669            "oracle_large",
1670            "deepsense6g_large",
1671            "imdb_tgz_large",
1672            "imdb_duckdb_large",
1673            "snowset_large",
1674            "sqlshare_large",
1675        ] {
1676            assert!(lookup(id).is_some(), "lookup must admit {id}");
1677        }
1678        assert!(lookup("unknown_dataset").is_none());
1679    }
1680
1681    #[test]
1682    fn dataset_sha256_pins_are_lower_hex_64() {
1683        for spec in all_dataset_specs() {
1684            assert_eq!(
1685                spec.fixture_sha256_hex.len(),
1686                64,
1687                "{} pin must be 64 hex chars",
1688                spec.dataset_id
1689            );
1690            assert!(
1691                spec.fixture_sha256_hex
1692                    .chars()
1693                    .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()),
1694                "{} pin must be lowercase hex",
1695                spec.dataset_id
1696            );
1697        }
1698    }
1699
1700    // =================================================================
1701    // S-REAL.3.1.2 — panel-required dataset-table-split invariants.
1702    //
1703    // The 5 tests below pin the audit/saturation surface boundary.
1704    // Before S-REAL.3.1.2 the driver had a single `DATASETS` constant
1705    // carrying 30 entries and `--dataset all` enumerated all 30,
1706    // which silently dispatched the 10 large saturation fixtures on
1707    // Colab where their TSVs are deliberately excluded from the
1708    // tarball. These tests make the boundary load-bearing: any future
1709    // commit that re-merges the tables, drops a saturation id back
1710    // into the audit set, or breaks the tier_dir → bundle_manifest
1711    // mirror fails the suite before reaching the bundle integrity
1712    // gate.
1713    // =================================================================
1714
1715    /// Panel-required negative: `--dataset all` must enumerate exactly
1716    /// the 20 sealed audit datasets. The bundle hash chain has 60
1717    /// rows = 20 datasets × 3 chain-pinned artifacts; the count is a
1718    /// load-bearing identity for the entire S-REAL.3 bundle.
1719    #[test]
1720    fn audit_all_dataset_count_is_20() {
1721        assert_eq!(
1722            AUDIT_DATASETS.len(),
1723            20,
1724            "AUDIT_DATASETS must hold exactly 20 sealed audit datasets; \
1725             changing this is a panel-acknowledged schema-upgrade event"
1726        );
1727    }
1728
1729    /// Panel-required negative: `SATURATION_FIXTURES` must hold
1730    /// exactly the 10 saturation-class real-data fixtures the
1731    /// 30-fixture sweep depends on (the other 20 sweep fixtures
1732    /// are the audit datasets at their native cell counts, dispatched
1733    /// from AUDIT_DATASETS).
1734    #[test]
1735    fn saturation_fixture_count_is_10() {
1736        assert_eq!(
1737            SATURATION_FIXTURES.len(),
1738            10,
1739            "SATURATION_FIXTURES must hold exactly 10 saturation-class \
1740             real-data fixtures (RadioML/DeepBeam/RadioML-Gold/POWDER/\
1741             ORACLE/Deepsense6G/IMDb/Snowset/SQLShare)"
1742        );
1743    }
1744
1745    /// Panel-required CAMPAIGN IDENTITY negative: no saturation
1746    /// fixture id appears in `AUDIT_DATASETS`, AND no AUDIT_DATASETS
1747    /// entry's default_path matches the saturation-TSV exclusion
1748    /// pattern that `pack_for_colab.sh` uses (`*x1024.tsv`). Together
1749    /// these guarantee `--dataset all` can never accidentally
1750    /// dispatch a TSV that the slim Colab tarball excludes.
1751    #[test]
1752    fn audit_all_excludes_large_saturation_fixtures() {
1753        const SATURATION_IDS: &[&str] = &[
1754            "radioml_2018_snr30_large",
1755            "deepbeam_large",
1756            "radioml_gold_large",
1757            "powder_large",
1758            "oracle_large",
1759            "deepsense6g_large",
1760            "imdb_tgz_large",
1761            "imdb_duckdb_large",
1762            "snowset_large",
1763            "sqlshare_large",
1764        ];
1765        let audit_ids: Vec<&str> = AUDIT_DATASETS.iter().map(|d| d.dataset_id).collect();
1766        for sat_id in SATURATION_IDS {
1767            assert!(
1768                !audit_ids.contains(sat_id),
1769                "AUDIT_DATASETS must NOT contain saturation fixture {sat_id}; \
1770                 the slim Colab tarball excludes its TSV"
1771            );
1772        }
1773        // Defense-in-depth: no AUDIT_DATASETS entry's TSV path can
1774        // match the saturation-TSV exclusion pattern. The
1775        // `pack_for_colab.sh` exclusion uses the glob `*x1024.tsv`
1776        // (which covers 1024x1024 / 1020x1024 / 512x1024 variants).
1777        for spec in AUDIT_DATASETS {
1778            let lower_path = spec.default_path.to_lowercase();
1779            assert!(
1780                !lower_path.ends_with("x1024.tsv"),
1781                "AUDIT_DATASETS entry {} has default_path {:?} matching \
1782                 the saturation-TSV exclusion pattern *x1024.tsv; this \
1783                 would break the Colab public-replay path",
1784                spec.dataset_id,
1785                spec.default_path
1786            );
1787        }
1788    }
1789
1790    /// Panel-required negative: no dataset_id appears in BOTH tables.
1791    /// The two tables must be a partition, not just a covering set.
1792    #[test]
1793    fn audit_dataset_table_no_duplicate_ids() {
1794        let audit_ids: std::collections::HashSet<&str> =
1795            AUDIT_DATASETS.iter().map(|d| d.dataset_id).collect();
1796        let sat_ids: std::collections::HashSet<&str> =
1797            SATURATION_FIXTURES.iter().map(|d| d.dataset_id).collect();
1798        let overlap: Vec<&&str> = audit_ids.intersection(&sat_ids).collect();
1799        assert!(
1800            overlap.is_empty(),
1801            "AUDIT_DATASETS and SATURATION_FIXTURES must be disjoint; \
1802             found overlap: {overlap:?}"
1803        );
1804        // Also: no id is repeated within a single table.
1805        assert_eq!(
1806            audit_ids.len(),
1807            AUDIT_DATASETS.len(),
1808            "AUDIT_DATASETS contains a duplicate dataset_id"
1809        );
1810        assert_eq!(
1811            sat_ids.len(),
1812            SATURATION_FIXTURES.len(),
1813            "SATURATION_FIXTURES contains a duplicate dataset_id"
1814        );
1815    }
1816
1817    /// Panel-required cross-validation negative: every AUDIT_DATASETS
1818    /// entry's `tier_dir` matches the `tier_dir` field of the
1819    /// corresponding entry in `reports/s_real_3/bundle_manifest.toml`.
1820    /// This is the load-bearing mirror — if the driver's tier table
1821    /// drifts from the sealed bundle's tier layout, the freshly-
1822    /// emitted artifacts land in the wrong directory and the bundle
1823    /// integrity test catches it downstream; this test catches the
1824    /// drift at the table-identity level first.
1825    ///
1826    /// The bundle manifest stores `tier_dir = "reports/s_real_X/<id>"`
1827    /// (full path); the driver stores `tier_dir = "s_real_X"` (root-
1828    /// relative segment). The cross-check normalises both.
1829    #[test]
1830    fn audit_dataset_tier_dir_matches_bundle_manifest() {
1831        let manifest_path = std::path::Path::new("../../reports/s_real_3/bundle_manifest.toml");
1832        let manifest_path = if manifest_path.exists() {
1833            manifest_path.to_path_buf()
1834        } else {
1835            // Test runner CWD can be either the workspace root or the
1836            // crate root depending on how tests are launched; resolve
1837            // both layouts.
1838            std::path::PathBuf::from("reports/s_real_3/bundle_manifest.toml")
1839        };
1840        let body = match std::fs::read_to_string(&manifest_path) {
1841            Ok(b) => b,
1842            Err(e) => {
1843                eprintln!(
1844                    "audit_dataset_tier_dir_matches_bundle_manifest: skipping \
1845                     (bundle_manifest.toml not readable at {}: {e})",
1846                    manifest_path.display()
1847                );
1848                return;
1849            }
1850        };
1851        // Parse the manifest as a flat key-walker. Each `[datasets.<id>]`
1852        // section is followed by a `tier_dir = "reports/s_real_X/<id>"`
1853        // line. Build the map id → manifest_tier_dir.
1854        let mut current_id: Option<String> = None;
1855        let mut manifest_tier_by_id: std::collections::HashMap<String, String> =
1856            std::collections::HashMap::new();
1857        for line in body.lines() {
1858            let trimmed = line.trim();
1859            if let Some(rest) = trimmed.strip_prefix("[datasets.") {
1860                if let Some(end) = rest.find(']') {
1861                    current_id = Some(rest[..end].to_string());
1862                }
1863            } else if let Some(rest) = trimmed.strip_prefix("tier_dir = \"") {
1864                if let Some(end) = rest.find('"') {
1865                    if let Some(id) = current_id.take() {
1866                        manifest_tier_by_id.insert(id, rest[..end].to_string());
1867                    }
1868                }
1869            }
1870        }
1871        if manifest_tier_by_id.is_empty() {
1872            eprintln!(
1873                "audit_dataset_tier_dir_matches_bundle_manifest: skipping \
1874                 (manifest at {} contains no parseable entries)",
1875                manifest_path.display()
1876            );
1877            return;
1878        }
1879        for spec in AUDIT_DATASETS {
1880            let expected_full = format!("reports/{}/{}", spec.tier_dir, spec.dataset_id);
1881            let manifest_full = manifest_tier_by_id.get(spec.dataset_id).unwrap_or_else(|| {
1882                panic!(
1883                    "AUDIT_DATASETS entry {} is missing from bundle_manifest.toml",
1884                    spec.dataset_id
1885                )
1886            });
1887            assert_eq!(
1888                manifest_full, &expected_full,
1889                "AUDIT_DATASETS[{}].tier_dir mirror divergence: driver = {:?}, \
1890                 manifest = {:?}",
1891                spec.dataset_id, expected_full, manifest_full
1892            );
1893        }
1894    }
1895
1896    #[test]
1897    fn serialize_episodes_jsonl_is_deterministic_and_sorted() {
1898        // Same multiset but different insertion order; output must be identical.
1899        let a = vec![
1900            mk_episode(
1901                5,
1902                10,
1903                12,
1904                BankMotif::LatencyRamp,
1905                ReasonCode::BoundaryApproach,
1906            ),
1907            mk_episode(
1908                2,
1909                1,
1910                4,
1911                BankMotif::ErrorBurst,
1912                ReasonCode::EnvelopeViolation,
1913            ),
1914            mk_episode(2, 1, 4, BankMotif::ErrorBurst, ReasonCode::BoundaryApproach),
1915        ];
1916        let b = vec![
1917            mk_episode(2, 1, 4, BankMotif::ErrorBurst, ReasonCode::BoundaryApproach),
1918            mk_episode(
1919                5,
1920                10,
1921                12,
1922                BankMotif::LatencyRamp,
1923                ReasonCode::BoundaryApproach,
1924            ),
1925            mk_episode(
1926                2,
1927                1,
1928                4,
1929                BankMotif::ErrorBurst,
1930                ReasonCode::EnvelopeViolation,
1931            ),
1932        ];
1933        let sa = serialize_episodes_jsonl(&a);
1934        let sb = serialize_episodes_jsonl(&b);
1935        assert_eq!(sa, sb);
1936        let text = std::str::from_utf8(&sa).unwrap();
1937        let lines: Vec<&str> = text.lines().collect();
1938        assert_eq!(lines.len(), 3);
1939        // Verify sort order: idx 0 has entity_id 2 + reason BoundaryApproach (u8 1)
1940        assert!(lines[0].contains("\"entity_id\":2"));
1941        assert!(lines[0].contains("\"reason\":\"BoundaryApproach\""));
1942        // idx 1: entity 2, reason EnvelopeViolation (u8 5)
1943        assert!(lines[1].contains("\"entity_id\":2"));
1944        assert!(lines[1].contains("\"reason\":\"EnvelopeViolation\""));
1945        // idx 2: entity 5
1946        assert!(lines[2].contains("\"entity_id\":5"));
1947    }
1948
1949    #[test]
1950    fn serialize_episodes_jsonl_handles_empty() {
1951        let s = serialize_episodes_jsonl(&[]);
1952        assert!(s.is_empty());
1953    }
1954
1955    #[test]
1956    fn dataset_manifest_toml_carries_required_keys() {
1957        let m = DatasetManifest {
1958            dataset_id: "x".to_string(),
1959            display_name: "X".to_string(),
1960            upstream_doi_or_url: "doi:test".to_string(),
1961            license: "Apache-2.0".to_string(),
1962            source_class: "TestClass".to_string(),
1963            vendored_path: "/tmp/x".to_string(),
1964            fixture_sha256_hex: "0".repeat(64),
1965            fixture_byte_size: 42,
1966        };
1967        let toml = emit_dataset_manifest_toml(&m);
1968        for key in [
1969            "dataset_id",
1970            "display_name",
1971            "upstream_doi_or_url",
1972            "license",
1973            "source_class",
1974            "vendored_path",
1975            "sha256_hex",
1976            "byte_size",
1977        ] {
1978            assert!(toml.contains(key), "missing {key}");
1979        }
1980    }
1981
1982    #[test]
1983    fn limitations_md_carries_every_non_claim() {
1984        let s = emit_limitations_md(&AUDIT_DATASETS[0]);
1985        for nc in NON_CLAIMS_LINES {
1986            assert!(s.contains(nc), "missing non-claim: {nc}");
1987        }
1988    }
1989}
dsfb_gpu_debug_demo/cli/s_real_audit.rs

dsfb_gpu_debug_demo/cli/
s_real_audit.rs