1use std::collections::BTreeMap;
53use std::fmt::Write as _;
54use std::fs;
55use std::path::{Path, PathBuf};
56use std::process::ExitCode;
57use std::time::Instant;
58
59use dsfb_gpu_debug_core::bank::{bank_hash, Episode};
60use dsfb_gpu_debug_core::casefile::{emit, CaseFile};
61use dsfb_gpu_debug_core::contract::Contract;
62use dsfb_gpu_debug_core::hash::sha256;
63use dsfb_gpu_debug_core::motif::registry_hash;
64
65use super::audit_report::{
66 render_audit_report_html, DatasetManifest, ReplayVerification, SchemaMap,
67};
68use super::ingest::{
69 build_ingest_report, load_residual_projection_tsv, lower_to_trace_events, sha256_to_hex_lower,
70 LoweringConfig,
71};
72use super::{parse_flags, usage_error};
73
74struct DatasetSpec {
118 dataset_id: &'static str,
119 display_name: &'static str,
120 upstream_doi_or_url: &'static str,
121 license: &'static str,
122 source_class: &'static str,
123 default_path: &'static str,
124 fixture_sha256_hex: &'static str,
125 tier_dir: &'static str,
141}
142
143const AUDIT_DATASETS: &[DatasetSpec] = &[
152 DatasetSpec {
153 dataset_id: "tadbench_f11",
154 display_name: "TADBench TrainTicket F11",
155 upstream_doi_or_url: "10.5281/zenodo.6979726",
156 license: "CC-BY-4.0",
157 source_class: "DebuggingSoftwareTelemetry",
158 default_path: "data/fixtures/tadbench_trainticket_F11.tsv",
159 fixture_sha256_hex: "cdebdab01e310d4d02241b694b3771acfa9beca3060b0cee0e19fb237f65dc97",
160 tier_dir: "s_real_1",
161 },
162 DatasetSpec {
163 dataset_id: "tadbench_f04",
164 display_name: "TADBench TrainTicket F04",
165 upstream_doi_or_url: "10.5281/zenodo.6979726",
166 license: "CC-BY-4.0",
167 source_class: "DebuggingSoftwareTelemetry",
168 default_path: "data/fixtures/tadbench_trainticket_F04.tsv",
169 fixture_sha256_hex: "b3e7ba260617b4496aa5b09f7d399fecae2cc60dbb2002f6405e65a5f37cb1d7",
170 tier_dir: "s_real_2",
171 },
172 DatasetSpec {
173 dataset_id: "tadbench_f11b",
174 display_name: "TADBench TrainTicket F11b",
175 upstream_doi_or_url: "10.5281/zenodo.6979726",
176 license: "CC-BY-4.0",
177 source_class: "DebuggingSoftwareTelemetry",
178 default_path: "data/fixtures/tadbench_trainticket_F11b.tsv",
179 fixture_sha256_hex: "2f45979af791964873a48e6f53a769e0610c02a6452c4a96b7b8bb208d5c492e",
180 tier_dir: "s_real_2",
181 },
182 DatasetSpec {
183 dataset_id: "tadbench_f19",
184 display_name: "TADBench TrainTicket F19",
185 upstream_doi_or_url: "10.5281/zenodo.6979726",
186 license: "CC-BY-4.0",
187 source_class: "DebuggingSoftwareTelemetry",
188 default_path: "data/fixtures/tadbench_trainticket_F19.tsv",
189 fixture_sha256_hex: "d412b6e44a83bb583d7f4ea2e5c3405115e686282fd2840db3f640c1dc3dd480",
190 tier_dir: "s_real_2",
191 },
192 DatasetSpec {
193 dataset_id: "illinois_socialnet",
194 display_name: "Illinois SocialNetwork (DeathStarBench)",
195 upstream_doi_or_url: "10.13012/B2IDB-6738796_V1",
196 license: "CC0-1.0",
197 source_class: "ObservabilityTraces",
198 default_path: "data/fixtures/illinois_socialnetwork.tsv",
199 fixture_sha256_hex: "c86b5abd1b412f69cccaab9b3e838da742c5ea8a1ba1b6dce634ff3407cc082c",
200 tier_dir: "s_real_1",
201 },
202 DatasetSpec {
203 dataset_id: "lo2",
204 display_name: "LO2 (Bakhtin et al. 2025)",
205 upstream_doi_or_url: "10.5281/zenodo.14257989",
206 license: "CC-BY-4.0",
207 source_class: "ObservabilityTraces",
208 default_path: "data/fixtures/lo2.tsv",
209 fixture_sha256_hex: "24d5d7c06d755366b2148a3887151478adfd46062bbec54e9ce20dc358b4647c",
210 tier_dir: "s_real_2",
211 },
212 DatasetSpec {
213 dataset_id: "deeptralog",
214 display_name: "DeepTraLog F01-02 (Zhang et al. ICSE 2022)",
215 upstream_doi_or_url: "github.com/FudanSELab/DeepTraLog",
216 license: "no-upstream-license",
217 source_class: "ObservabilityTraces",
218 default_path: "data/fixtures/deeptralog.tsv",
219 fixture_sha256_hex: "90f2625258b4f823e317a331a8b800dbc0c13c4ee3182b81be6c06a9aaef1853",
220 tier_dir: "s_real_2",
221 },
222 DatasetSpec {
223 dataset_id: "aiops_kpi",
224 display_name: "AIOps Challenge 2018 KPI (Bagel sample)",
225 upstream_doi_or_url: "Li, Chen, Pei. IPCCC 2018; github.com/NetManAIOps/Bagel",
226 license: "no-upstream-license",
227 source_class: "TimeSeriesAnomaly",
228 default_path: "data/fixtures/aiops_challenge.tsv",
229 fixture_sha256_hex: "be17110ebe6647d00fad79dc1ca69b1b01b22788773202bad6e3322e97b0602e",
230 tier_dir: "s_real_1",
231 },
232 DatasetSpec {
233 dataset_id: "multidim_localization",
234 display_name: "MultiDim Localization (2019 AIOps Challenge match 2)",
235 upstream_doi_or_url: "github.com/NetManAIOps/MultiDimension-Localization",
236 license: "no-upstream-license",
237 source_class: "TimeSeriesAnomaly",
238 default_path: "data/fixtures/multidim_localization.tsv",
239 fixture_sha256_hex: "b1237ea1f26e380f6323c5a42f158f5c5ce8901b28d94c7d3aae281747d20926",
240 tier_dir: "s_real_2",
241 },
242 DatasetSpec {
243 dataset_id: "defects4j",
244 display_name: "Defects4J (Just, Jalali, Ernst)",
245 upstream_doi_or_url: "github.com/rjust/defects4j",
246 license: "MIT",
247 source_class: "SoftwareDefects",
248 default_path: "data/fixtures/defects4j.tsv",
249 fixture_sha256_hex: "a6fa8c9d8e1fa78e2bb9c9f33e40d02aa6f26a04c0af2f760539793d33875a63",
250 tier_dir: "s_real_2",
251 },
252 DatasetSpec {
253 dataset_id: "bugsinpy",
254 display_name: "BugsInPy patch-line complexity (Widyasari et al.)",
255 upstream_doi_or_url: "github.com/soarsmu/BugsInPy",
256 license: "no-upstream-license",
257 source_class: "SoftwareDefects",
258 default_path: "data/fixtures/bugsinpy.tsv",
259 fixture_sha256_hex: "1e9349f3a0e3c76f20681d4623ff0c0ee768943c07a814ef3c7b481021a57f94",
260 tier_dir: "s_real_2",
261 },
262 DatasetSpec {
263 dataset_id: "promise_defect_prediction",
264 display_name: "PROMISE Ant 1.3 CK metrics (Sayyad Shirabad, Menzies)",
265 upstream_doi_or_url: "github.com/ssea-lab/PROMISE",
266 license: "no-upstream-license",
267 source_class: "SoftwareDefects",
268 default_path: "data/fixtures/promise_defect_prediction.tsv",
269 fixture_sha256_hex: "14856ef507c9ef8ff6b7120a8c50177d76b492bbff0653b1e635312eadba4461",
270 tier_dir: "s_real_2",
271 },
272 DatasetSpec {
274 dataset_id: "cmapss_fd001_unit50",
275 display_name: "NASA C-MAPSS FD001 unit 50",
276 upstream_doi_or_url: "NASA PCoE PHM08",
277 license: "Public-Domain",
278 source_class: "ReliabilityIndustrial",
279 default_path: "data/fixtures/cmapss_fd001_unit50.tsv",
280 fixture_sha256_hex: "d4920f9801bf1e27104387702f17e2413194201578ed9434e7a4f1a5b84fe5da",
281 tier_dir: "s_real_3",
282 },
283 DatasetSpec {
284 dataset_id: "cmapss_fd002_unit1",
285 display_name: "NASA C-MAPSS FD002 unit 1 (multi-condition)",
286 upstream_doi_or_url: "NASA PCoE PHM08",
287 license: "Public-Domain",
288 source_class: "ReliabilityIndustrial",
289 default_path: "data/fixtures/cmapss_fd002_unit1.tsv",
290 fixture_sha256_hex: "07a94ffc69c54e9838cbf78c2f087e1a247f6971a7e7e75bc47fe6947d6c6c6a",
291 tier_dir: "s_real_3",
292 },
293 DatasetSpec {
294 dataset_id: "cmapss_fd002_unit100",
295 display_name: "NASA C-MAPSS FD002 unit 100",
296 upstream_doi_or_url: "NASA PCoE PHM08",
297 license: "Public-Domain",
298 source_class: "ReliabilityIndustrial",
299 default_path: "data/fixtures/cmapss_fd002_unit100.tsv",
300 fixture_sha256_hex: "34310904bd557b0e264d0e8d874695ed007026d05e62ca02745f295efbbbe2b0",
301 tier_dir: "s_real_3",
302 },
303 DatasetSpec {
304 dataset_id: "cmapss_fd003_unit1",
305 display_name: "NASA C-MAPSS FD003 unit 1 (multi-fault)",
306 upstream_doi_or_url: "NASA PCoE PHM08",
307 license: "Public-Domain",
308 source_class: "ReliabilityIndustrial",
309 default_path: "data/fixtures/cmapss_fd003_unit1.tsv",
310 fixture_sha256_hex: "8eeb96ec624782e06f5abc4f77dc62f11e714634c6b6e520221432a2be4a87b1",
311 tier_dir: "s_real_3",
312 },
313 DatasetSpec {
314 dataset_id: "cmapss_fd004_unit1",
315 display_name: "NASA C-MAPSS FD004 unit 1 (multi-condition + multi-fault)",
316 upstream_doi_or_url: "NASA PCoE PHM08",
317 license: "Public-Domain",
318 source_class: "ReliabilityIndustrial",
319 default_path: "data/fixtures/cmapss_fd004_unit1.tsv",
320 fixture_sha256_hex: "abc69ca301edea69b7d60fe0bc8eac912e991eb80d4ec0fb2f2725167266bdae",
321 tier_dir: "s_real_3",
322 },
323 DatasetSpec {
324 dataset_id: "promise_ant_1_4",
325 display_name: "PROMISE Apache Ant 1.4 CK metrics",
326 upstream_doi_or_url: "github.com/ssea-lab/PROMISE",
327 license: "no-upstream-license",
328 source_class: "SoftwareDefects",
329 default_path: "data/fixtures/promise_ant_1_4.tsv",
330 fixture_sha256_hex: "f965a049b98d69ade3391c5f7a777c4ea37089c386cd886b05012cc86748024c",
331 tier_dir: "s_real_3",
332 },
333 DatasetSpec {
334 dataset_id: "deeptralog_f02",
335 display_name: "DeepTraLog F02-04 ERROR fault period",
336 upstream_doi_or_url: "github.com/FudanSELab/DeepTraLog",
337 license: "no-upstream-license",
338 source_class: "ObservabilityTraces",
339 default_path: "data/fixtures/deeptralog_f02.tsv",
340 fixture_sha256_hex: "2ce480415b20dc49d09119b79933f2dd5d0ba62c186a8ecc0f2e2bf8b1e1a95f",
341 tier_dir: "s_real_3",
342 },
343 DatasetSpec {
344 dataset_id: "cmapss_fd001_unit1",
345 display_name: "NASA C-MAPSS FD001 unit 1 (run-to-failure, z-residual)",
346 upstream_doi_or_url:
347 "Saxena, Goebel, Simon, Eklund (PHM08); NASA PCoE Prognostics Data Repository",
348 license: "Public-Domain",
349 source_class: "ReliabilityIndustrial",
350 default_path: "data/fixtures/cmapss_fd001_unit1.tsv",
351 fixture_sha256_hex: "633442bb93f128bb44e82f4b09d0dd0f175933107bc9c0c3e1fc6bd6b040c93e",
352 tier_dir: "s_real_2",
353 },
354];
355
356const SATURATION_FIXTURES: &[DatasetSpec] = &[
379 DatasetSpec {
407 dataset_id: "radioml_2018_snr30_large",
408 display_name: "RadioML 2018.01 SNR=30 dB (1024×1024 z-residual; large-fixture throughput)",
409 upstream_doi_or_url:
410 "DeepSig RadioML 2018.01 (O'Shea, Corgan); https://www.deepsig.ai/datasets",
411 license: "CC-BY-NC-SA-4.0",
412 source_class: "RfCommunications",
413 default_path: "data/fixtures/radioml_2018_snr30_1024x1024.tsv",
414 fixture_sha256_hex: "0a626804be42f113bc62afd2245f86f9ac7c7204472e29fa09faf976ee7f6e86",
415 tier_dir: "s_real_saturation",
416 },
417 DatasetSpec {
440 dataset_id: "deepbeam_large",
441 display_name: "DeepBeam (1024×1024 IQ-magnitude z-residual; large-fixture throughput)",
442 upstream_doi_or_url:
443 "NEU GeneSys Lab DeepBeam (neu_ww72bk394.h5); https://genesys-lab.org/oracle",
444 license: "no-upstream-license",
445 source_class: "RfCommunications",
446 default_path: "data/fixtures/deepbeam_1024x1024.tsv",
447 fixture_sha256_hex: "242aae914fc3a88c0a5027536923a72f598062ea0647078f7b0e0024a8aa7929",
448 tier_dir: "s_real_saturation",
449 },
450 DatasetSpec {
467 dataset_id: "radioml_gold_large",
468 display_name: "RadioML 2018.01 GOLD full corpus (1024×1024 IQ-magnitude z-residual)",
469 upstream_doi_or_url:
470 "DeepSig RadioML 2018.01 GOLD_XYZ_OSC.0001_1024.hdf5; https://www.deepsig.ai/datasets",
471 license: "CC-BY-NC-SA-4.0",
472 source_class: "RfCommunications",
473 default_path: "data/fixtures/radioml_gold_1024x1024.tsv",
474 fixture_sha256_hex: "06f156dc662a2ce26c9867b49cbf87e534be92b93bfed7402bfe56971906aeaf",
475 tier_dir: "s_real_saturation",
476 },
477 DatasetSpec {
478 dataset_id: "powder_large",
479 display_name: "POWDER 4G LTE Band-7 I/Q (Globecom 2020; 1024×1024 z-residual)",
480 upstream_doi_or_url: "University of Utah POWDER (Globecom 2020); neu_m046tb444.zip",
481 license: "no-upstream-license",
482 source_class: "RfCommunications",
483 default_path: "data/fixtures/powder_1024x1024.tsv",
484 fixture_sha256_hex: "8438f02bc033142797411f5789d8ef6a3f9c214a120ff1aeaabfa3528edb08f2",
485 tier_dir: "s_real_saturation",
486 },
487 DatasetSpec {
488 dataset_id: "oracle_large",
489 display_name: "ORACLE WiFi 802.11a fingerprinting (Sankhe et al. INFOCOM 2019; 1024×1024)",
490 upstream_doi_or_url:
491 "NEU/KRI 16-Device ORACLE dataset (Sankhe et al. INFOCOM 2019); neu_m044q5210.zip",
492 license: "no-upstream-license",
493 source_class: "RfCommunications",
494 default_path: "data/fixtures/oracle_1024x1024.tsv",
495 fixture_sha256_hex: "6ada4586de505fdc67f2be053b24f92732acc32d3bc8154d17a27b52db570133",
496 tier_dir: "s_real_saturation",
497 },
498 DatasetSpec {
499 dataset_id: "deepsense6g_large",
500 display_name: "Deepsense6G Scenario 23 mmWave-power (512×1024 z-residual; sub-saturation)",
501 upstream_doi_or_url:
502 "Deepsense6G Scenario 23 (Alkhateeb et al. 2022); https://deepsense6g.net/scenario-23/",
503 license: "no-upstream-license",
504 source_class: "RfCommunications",
505 default_path: "data/fixtures/deepsense6g_512x1024.tsv",
506 fixture_sha256_hex: "4d71dc2c087697f1c1455d78f926d11870149828786bbfca865b277afab6052e",
507 tier_dir: "s_real_saturation",
508 },
509 DatasetSpec {
510 dataset_id: "imdb_tgz_large",
511 display_name: "IMDB Join-Order-Benchmark cast_info.csv (1020×1024 numeric-ID z-residual)",
512 upstream_doi_or_url:
513 "IMDB Join-Order-Benchmark (Leis et al. VLDB 2015); imdb.tgz cast_info.csv",
514 license: "no-upstream-license",
515 source_class: "DatabaseWorkload",
516 default_path: "data/fixtures/imdb_tgz_1020x1024.tsv",
517 fixture_sha256_hex: "9e0d356f9a706f6132461873a5903df123e046326e67181497413ff122234aa6",
518 tier_dir: "s_real_saturation",
519 },
520 DatasetSpec {
521 dataset_id: "imdb_duckdb_large",
522 display_name: "IMDB DuckDB binary byte-frequency residual (1024×1024; byte-projection)",
523 upstream_doi_or_url: "IMDB Join-Order-Benchmark DuckDB binary dump (sister of imdb.tgz)",
524 license: "no-upstream-license",
525 source_class: "DatabaseWorkload",
526 default_path: "data/fixtures/imdb_duckdb_1024x1024.tsv",
527 fixture_sha256_hex: "2970fbd9d0d6bdb7b98b461d549dff9a77ae4b0a94a4fc2967acbb7eec3e12e7",
528 tier_dir: "s_real_saturation",
529 },
530 DatasetSpec {
531 dataset_id: "snowset_large",
532 display_name: "Snowset Snowflake-telemetry CSV (Vuppalapati et al. NSDI 2020; 1024×1024)",
533 upstream_doi_or_url:
534 "Snowset (Vuppalapati et al. NSDI 2020); github.com/resource-disaggregation/snowset",
535 license: "no-upstream-license",
536 source_class: "DatabaseWorkload",
537 default_path: "data/fixtures/snowset_1024x1024.tsv",
538 fixture_sha256_hex: "0ec2b78c1fc4db066208968ab6fc452a4ac6dad31dc740b0870a77a3e73fc3c8",
539 tier_dir: "s_real_saturation",
540 },
541 DatasetSpec {
542 dataset_id: "sqlshare_large",
543 display_name: "SQLShare 2015 oceanographic CSV residual (Jain et al. UW 2015; 1024×1024)",
544 upstream_doi_or_url:
545 "SQLShare 2015 (Jain et al., U.Washington); uwescience.github.io/sqlshare",
546 license: "no-upstream-license",
547 source_class: "DatabaseWorkload",
548 default_path: "data/fixtures/sqlshare_1024x1024.tsv",
549 fixture_sha256_hex: "e663b53ccdcc1c2bad808a7268af10c9b638e05f108a371707c3e1ac924c37c8",
550 tier_dir: "s_real_saturation",
551 },
552];
553
554fn all_dataset_specs() -> impl Iterator<Item = &'static DatasetSpec> {
559 AUDIT_DATASETS.iter().chain(SATURATION_FIXTURES.iter())
560}
561
562fn lookup(id: &str) -> Option<&'static DatasetSpec> {
571 all_dataset_specs().find(|d| d.dataset_id == id)
572}
573
574pub fn parse_and_run(args: &[String]) -> ExitCode {
608 let flags = match parse_flags(args) {
609 Ok(f) => f,
610 Err(msg) => return usage_error(&msg),
611 };
612 let dataset_arg = if let Some(s) = flags.get("dataset") {
613 s.clone()
614 } else {
615 let ids: Vec<&str> = all_dataset_specs().map(|d| d.dataset_id).collect();
620 let menu = format!("{}|all", ids.join("|"));
621 return usage_error(&format!("missing required flag --dataset ({menu})"));
622 };
623 let default_out = "reports".to_string();
624 let out_dir = flags.get("out-dir").cloned().unwrap_or(default_out);
625
626 let iters: u32 = flags
629 .get("iters")
630 .and_then(|s| s.parse().ok())
631 .unwrap_or(2)
632 .max(2);
633 let catalogs: u32 = flags
634 .get("catalogs")
635 .and_then(|s| s.parse().ok())
636 .unwrap_or(1)
637 .max(1);
638
639 let selected: Vec<&'static DatasetSpec> = if dataset_arg == "all" {
640 AUDIT_DATASETS.iter().collect()
646 } else if let Some(d) = lookup(&dataset_arg) {
647 vec![d]
648 } else {
649 let ids: Vec<&str> = all_dataset_specs().map(|d| d.dataset_id).collect();
650 let menu = format!("{}, all", ids.join(", "));
651 return usage_error(&format!(
652 "unknown dataset id {dataset_arg:?}; valid values: {menu}"
653 ));
654 };
655
656 for spec in selected {
657 let dataset_dir = PathBuf::from(&out_dir)
664 .join(spec.tier_dir)
665 .join(spec.dataset_id);
666 match run_one_dataset(spec, &dataset_dir, iters, catalogs) {
667 Ok(()) => eprintln!(
668 "dsfb-gpu-debug s-real-audit: {} sealed at {} (iters={iters}, catalogs={catalogs})",
669 spec.dataset_id,
670 dataset_dir.display(),
671 ),
672 Err(code) => return code,
673 }
674 }
675 ExitCode::SUCCESS
676}
677
678#[derive(Clone, Debug, Default)]
689pub struct PerformanceProfile {
690 pub iters: u32,
691 pub catalogs: u32,
692 pub ingest_us: u64,
693 pub lowering_us: u64,
694 pub contract_setup_us: u64,
695 pub cuda_dispatch_run1_us: u64,
696 pub cuda_dispatch_run2_us: u64,
697 pub cuda_dispatch_extra_us: Vec<u64>,
698 pub casefile_emit_us: u64,
699 pub episodes_jsonl_emit_us: u64,
700 pub audit_report_emit_us: u64,
701 pub total_us: u64,
702 pub events_emitted: u32,
703 pub finite_cells: u32,
704 pub fixture_byte_size: u64,
705 pub catalogs_total_us: u64,
712}
713
714impl PerformanceProfile {
715 fn dispatch_samples_sorted(&self) -> Vec<u64> {
716 let mut v = Vec::with_capacity(2 + self.cuda_dispatch_extra_us.len());
717 v.push(self.cuda_dispatch_run1_us);
718 v.push(self.cuda_dispatch_run2_us);
719 v.extend(self.cuda_dispatch_extra_us.iter().copied());
720 v.sort_unstable();
721 v
722 }
723
724 fn dispatch_median_us(&self) -> u64 {
725 let s = self.dispatch_samples_sorted();
726 if s.is_empty() {
727 0
728 } else {
729 s[s.len() / 2]
730 }
731 }
732
733 fn percentile_us(&self, p: u32) -> u64 {
734 let s = self.dispatch_samples_sorted();
735 if s.is_empty() {
736 return 0;
737 }
738 let idx = (((s.len() as u64).saturating_sub(1)) * u64::from(p) / 100) as usize;
739 s[idx]
740 }
741
742 fn events_per_second(&self) -> u64 {
743 if self.total_us == 0 {
744 0
745 } else {
746 (u64::from(self.events_emitted) * 1_000_000) / self.total_us
747 }
748 }
749
750 fn finite_cells_per_second(&self) -> u64 {
751 if self.total_us == 0 {
752 0
753 } else {
754 (u64::from(self.finite_cells) * 1_000_000) / self.total_us
755 }
756 }
757
758 fn logical_bytes_per_second(&self) -> u64 {
759 if self.total_us == 0 {
760 0
761 } else {
762 (self.fixture_byte_size * 1_000_000) / self.total_us
763 }
764 }
765}
766
767#[allow(
772 clippy::too_many_lines,
773 reason = "End-to-end S-REAL driver per panel-locked design: ingest \
774 + lower + two-run dispatch + replay-verify + 9-artifact \
775 panel-locked emit must live in one function so the audit's \
776 load-bearing steps are visible top-to-bottom."
777)]
778fn run_one_dataset(
779 spec: &'static DatasetSpec,
780 out_dir: &Path,
781 iters: u32,
782 catalogs: u32,
783) -> Result<(), ExitCode> {
784 let t_total_start = Instant::now();
785
786 let t_ingest_start = Instant::now();
788 let bytes = fs::read(spec.default_path).map_err(|e| {
789 eprintln!(
790 "dsfb-gpu-debug s-real-1-audit: failed to read {}: {e}",
791 spec.default_path
792 );
793 ExitCode::from(5)
794 })?;
795 let fixture_byte_size = bytes.len() as u64;
796 let fixture = load_residual_projection_tsv(&bytes, spec.fixture_sha256_hex).map_err(|e| {
797 eprintln!("dsfb-gpu-debug s-real-1-audit: ingest error: {e}");
798 ExitCode::from(6)
799 })?;
800 let ingest_us = t_ingest_start.elapsed().as_micros() as u64;
801
802 let t_lowering_start = Instant::now();
804 let lowering = LoweringConfig::default();
805 let events = lower_to_trace_events(&fixture, &lowering);
806 let ingest_report = build_ingest_report(&fixture, &events, fixture_byte_size);
807 let lowering_us = t_lowering_start.elapsed().as_micros() as u64;
808
809 let t_contract_start = Instant::now();
811 let n_entities = ingest_report.observed_num_signals.max(1);
812 let n_windows = ingest_report.observed_num_windows.max(1);
813 let mut contract = Contract::scaled(n_entities, n_windows);
814 contract.pin_bank_hash(bank_hash());
815 contract.pin_detector_registry_hash(registry_hash());
816 let contract_setup_us = t_contract_start.elapsed().as_micros() as u64;
817
818 let t_run1 = Instant::now();
821 let case_run1 = run_gpu_or_emit(&events, &contract)?;
822 let cuda_dispatch_run1_us = t_run1.elapsed().as_micros() as u64;
823
824 let t_run2 = Instant::now();
825 let case_run2 = run_gpu_or_emit(&events, &contract)?;
826 let cuda_dispatch_run2_us = t_run2.elapsed().as_micros() as u64;
827
828 let mut cuda_dispatch_extra_us: Vec<u64> = Vec::new();
829 for _ in 2..iters {
830 let t = Instant::now();
831 let _ = run_gpu_or_emit(&events, &contract)?;
832 cuda_dispatch_extra_us.push(t.elapsed().as_micros() as u64);
833 }
834
835 let catalogs_total_us = if catalogs > 1 {
840 let t = Instant::now();
841 for _ in 0..catalogs {
842 let _ = run_gpu_or_emit(&events, &contract)?;
843 }
844 t.elapsed().as_micros() as u64
845 } else {
846 0
847 };
848
849 let t_casefile_emit_start = Instant::now();
852 let casefile_run1 = emit(&case_run1);
853 let casefile_run2 = emit(&case_run2);
854 let casefile_emit_us = t_casefile_emit_start.elapsed().as_micros() as u64;
855
856 let t_episodes_emit_start = Instant::now();
857 let episodes_run1 = serialize_episodes_jsonl(&case_run1.episodes);
858 let episodes_run2 = serialize_episodes_jsonl(&case_run2.episodes);
859 let episodes_jsonl_emit_us = t_episodes_emit_start.elapsed().as_micros() as u64;
860
861 let casefile_run1_hex = sha256_to_hex_lower(&sha256(&casefile_run1));
862 let casefile_run2_hex = sha256_to_hex_lower(&sha256(&casefile_run2));
863 let episodes_run1_hex = sha256_to_hex_lower(&sha256(&episodes_run1));
864 let episodes_run2_hex = sha256_to_hex_lower(&sha256(&episodes_run2));
865
866 let manifest = DatasetManifest {
869 dataset_id: spec.dataset_id.to_string(),
870 display_name: spec.display_name.to_string(),
871 upstream_doi_or_url: spec.upstream_doi_or_url.to_string(),
872 license: spec.license.to_string(),
873 source_class: spec.source_class.to_string(),
874 vendored_path: spec.default_path.to_string(),
875 fixture_sha256_hex: spec.fixture_sha256_hex.to_string(),
876 fixture_byte_size,
877 };
878 let mut schema = SchemaMap::from(&ingest_report);
879 schema.declared_healthy_window_end = fixture.declared_healthy_window_end;
880 schema.lowering_config = lowering;
881
882 let mut toolchain = BTreeMap::new();
890 toolchain.insert(
891 "dsfb_gpu_debug_demo_version".to_string(),
892 env!("CARGO_PKG_VERSION").to_string(),
893 );
894 toolchain.insert("cuda_version".to_string(), "13.2".to_string());
895 toolchain.insert("gpu_name".to_string(), "RTX 4080 SUPER".to_string());
896 toolchain.insert("backend".to_string(), case_run1.backend.to_string());
897
898 let replay_pre = ReplayVerification {
899 run_count: 2,
900 casefile_json_sha256_run1: casefile_run1_hex.clone(),
901 casefile_json_sha256_run2: casefile_run2_hex.clone(),
902 episodes_jsonl_sha256_run1: episodes_run1_hex.clone(),
903 episodes_jsonl_sha256_run2: episodes_run2_hex.clone(),
904 final_case_file_hash_run1_hex: sha256_to_hex_lower(&case_run1.final_case_file_hash),
905 final_case_file_hash_run2_hex: sha256_to_hex_lower(&case_run2.final_case_file_hash),
906 episode_count_run1: case_run1.episodes.len() as u32,
907 episode_count_run2: case_run2.episodes.len() as u32,
908 toolchain,
909 };
910
911 let t_audit_emit_start = Instant::now();
915 let html_run1 = render_audit_report_html(&manifest, &schema, &case_run1, &replay_pre);
916 let html_run2 = render_audit_report_html(&manifest, &schema, &case_run2, &replay_pre);
917 let audit_report_emit_us = t_audit_emit_start.elapsed().as_micros() as u64;
918 let html_run1_hex = sha256_to_hex_lower(&sha256(html_run1.as_bytes()));
919 let html_run2_hex = sha256_to_hex_lower(&sha256(html_run2.as_bytes()));
920
921 let total_us = t_total_start.elapsed().as_micros() as u64;
923 let perf = PerformanceProfile {
924 iters,
925 catalogs,
926 ingest_us,
927 lowering_us,
928 contract_setup_us,
929 cuda_dispatch_run1_us,
930 cuda_dispatch_run2_us,
931 cuda_dispatch_extra_us,
932 casefile_emit_us,
933 episodes_jsonl_emit_us,
934 audit_report_emit_us,
935 total_us,
936 events_emitted: ingest_report.emitted_event_count,
937 finite_cells: ingest_report.finite_cell_count,
938 fixture_byte_size,
939 catalogs_total_us,
940 };
941
942 let admits = casefile_run1_hex == casefile_run2_hex
947 && episodes_run1_hex == episodes_run2_hex
948 && html_run1_hex == html_run2_hex;
949
950 if let Err(e) = fs::create_dir_all(out_dir) {
969 eprintln!(
970 "dsfb-gpu-debug s-real-1-audit: could not create {}: {e}",
971 out_dir.display()
972 );
973 return Err(ExitCode::from(5));
974 }
975
976 let write = |name: &str, content: &[u8]| -> Result<(), ExitCode> {
977 let path = out_dir.join(name);
978 fs::write(&path, content).map_err(|e| {
979 eprintln!(
980 "dsfb-gpu-debug s-real-1-audit: failed to write {}: {e}",
981 path.display()
982 );
983 ExitCode::from(5)
984 })
985 };
986
987 write(
988 "dataset_manifest.toml",
989 emit_dataset_manifest_toml(&manifest).as_bytes(),
990 )?;
991 write("schema_map.toml", emit_schema_map_toml(&schema).as_bytes())?;
992 write(
993 "run_receipt.txt",
994 emit_run_receipt_txt(spec, &manifest, &schema, &case_run1).as_bytes(),
995 )?;
996 write("casefile.json", &casefile_run1)?;
997 write("episodes.jsonl", &episodes_run1)?;
998 write("audit_report.html", html_run1.as_bytes())?;
999 write(
1000 "replay_verification.txt",
1001 emit_replay_verification_txt(spec, &replay_pre, &html_run1_hex, &html_run2_hex, admits)
1002 .as_bytes(),
1003 )?;
1004 write("limitations.md", emit_limitations_md(spec).as_bytes())?;
1005 write(
1008 "perf_profile.txt",
1009 emit_perf_profile_txt(spec, &perf).as_bytes(),
1010 )?;
1011
1012 if !admits {
1013 eprintln!(
1014 "dsfb-gpu-debug s-real-1-audit: replay verification FAILED for {} (artifacts emitted; see replay_verification.txt)",
1015 spec.dataset_id
1016 );
1017 return Err(ExitCode::from(7));
1018 }
1019 Ok(())
1020}
1021
1022fn run_gpu_or_emit(
1027 events: &[dsfb_gpu_debug_core::event::TraceEvent],
1028 contract: &Contract,
1029) -> Result<CaseFile, ExitCode> {
1030 use dsfb_gpu_debug_cuda::{build_gpu, GpuError};
1031 match build_gpu(events, contract) {
1032 Ok(case) => Ok(case),
1033 Err(GpuError::CudaUnavailable) => {
1034 eprintln!(
1035 "dsfb-gpu-debug s-real-1-audit: GPU pipeline unavailable \
1036 (built without --features cuda)"
1037 );
1038 Err(ExitCode::from(2))
1039 }
1040 Err(GpuError::KernelFailed(code)) => {
1041 eprintln!("dsfb-gpu-debug s-real-1-audit: GPU kernel failed with cuda status {code}");
1042 Err(ExitCode::from(2))
1043 }
1044 Err(GpuError::InvalidInput(msg)) => {
1045 eprintln!("dsfb-gpu-debug s-real-1-audit: GPU dispatcher rejected input: {msg}");
1046 Err(ExitCode::from(2))
1047 }
1048 }
1049}
1050
1051#[must_use]
1067pub fn serialize_episodes_jsonl(episodes: &[Episode]) -> Vec<u8> {
1068 let mut sorted: Vec<&Episode> = episodes.iter().collect();
1069 sorted.sort_by_key(|e| (e.entity_id, e.start_window, e.end_window, e.reason as u8));
1070 let mut buf: Vec<u8> = Vec::new();
1071 for (idx, e) in sorted.iter().enumerate() {
1072 let _ = writeln!(
1074 &mut buf as &mut dyn std::io::Write,
1075 "{{\"idx\":{},\"entity_id\":{},\"start_window\":{},\"end_window\":{},\
1076 \"motif\":\"{}\",\"reason\":\"{}\",\"peak_state\":\"{}\",\
1077 \"peak_residual_q\":{},\"peak_drift_q\":{},\"peak_slew_q\":{},\
1078 \"detector_bit_count\":{}}}",
1079 idx,
1080 e.entity_id,
1081 e.start_window,
1082 e.end_window,
1083 motif_name(e.motif),
1084 reason_name(e.reason),
1085 grammar_name(e.peak_state),
1086 e.peak_residual_q.0,
1087 e.peak_drift_q.0,
1088 e.peak_slew_q.0,
1089 e.detector_bit_count,
1090 );
1091 }
1092 buf
1093}
1094
1095fn motif_name(m: dsfb_gpu_debug_core::bank::BankMotif) -> &'static str {
1096 use dsfb_gpu_debug_core::bank::BankMotif;
1097 match m {
1098 BankMotif::LatencyRamp => "LatencyRamp",
1099 BankMotif::ErrorBurst => "ErrorBurst",
1100 BankMotif::SlewShockRecovery => "SlewShockRecovery",
1101 BankMotif::SustainedDegradation => "SustainedDegradation",
1102 BankMotif::OscillationInstability => "OscillationInstability",
1103 BankMotif::LocalizedRouteFault => "LocalizedRouteFault",
1104 BankMotif::FanoutCascadeCandidate => "FanoutCascadeCandidate",
1105 BankMotif::ConfuserTransient => "ConfuserTransient",
1106 }
1107}
1108
1109fn reason_name(r: dsfb_gpu_debug_core::grammar::ReasonCode) -> &'static str {
1110 use dsfb_gpu_debug_core::grammar::ReasonCode;
1111 match r {
1112 ReasonCode::Admissible => "Admissible",
1113 ReasonCode::BoundaryApproach => "BoundaryApproach",
1114 ReasonCode::SustainedOutwardDrift => "SustainedOutwardDrift",
1115 ReasonCode::AbruptSlewViolation => "AbruptSlewViolation",
1116 ReasonCode::RecurrentBoundaryGrazing => "RecurrentBoundaryGrazing",
1117 ReasonCode::EnvelopeViolation => "EnvelopeViolation",
1118 ReasonCode::DriftWithRecovery => "DriftWithRecovery",
1119 ReasonCode::SingleCrossing => "SingleCrossing",
1120 }
1121}
1122
1123fn grammar_name(g: dsfb_gpu_debug_core::grammar::GrammarState) -> &'static str {
1124 use dsfb_gpu_debug_core::grammar::GrammarState;
1125 match g {
1126 GrammarState::Admissible => "Admissible",
1127 GrammarState::Boundary => "Boundary",
1128 GrammarState::Violation => "Violation",
1129 GrammarState::Recovery => "Recovery",
1130 }
1131}
1132
1133fn emit_dataset_manifest_toml(m: &DatasetManifest) -> String {
1134 let mut s = String::new();
1135 s.push_str("# S-REAL.1 dataset manifest. Provenance record co-pinning\n");
1136 s.push_str("# upstream identity, license, vendored bytes path, and\n");
1137 s.push_str("# SHA-256 byte-pin of the file the audit actually read.\n\n");
1138 s.push_str("[dataset]\n");
1139 let _ = writeln!(&mut s, "dataset_id = \"{}\"", m.dataset_id);
1140 let _ = writeln!(&mut s, "display_name = \"{}\"", m.display_name);
1141 let _ = writeln!(
1142 &mut s,
1143 "upstream_doi_or_url = \"{}\"",
1144 m.upstream_doi_or_url
1145 );
1146 let _ = writeln!(&mut s, "license = \"{}\"", m.license);
1147 let _ = writeln!(&mut s, "source_class = \"{}\"", m.source_class);
1148 let _ = writeln!(&mut s, "vendored_path = \"{}\"", m.vendored_path);
1149 s.push('\n');
1150 s.push_str("[fixture]\n");
1151 let _ = writeln!(&mut s, "sha256_hex = \"{}\"", m.fixture_sha256_hex);
1152 let _ = writeln!(&mut s, "byte_size = {}", m.fixture_byte_size);
1153 s
1154}
1155
1156fn emit_schema_map_toml(schema: &SchemaMap) -> String {
1157 let mut s = String::new();
1158 s.push_str("# S-REAL.1 schema map. Records the upstream-declared shape,\n");
1159 s.push_str("# the observed shape after parsing, and the deterministic\n");
1160 s.push_str("# event-lowering rule used to project cells into TraceEvents.\n\n");
1161 s.push_str("[upstream_declared]\n");
1162 let _ = writeln!(
1163 &mut s,
1164 "num_windows = {}",
1165 schema.declared_num_windows
1166 );
1167 let _ = writeln!(
1168 &mut s,
1169 "num_signals = {}",
1170 schema.declared_num_signals
1171 );
1172 let _ = writeln!(
1173 &mut s,
1174 "healthy_window_end = {}",
1175 schema.declared_healthy_window_end
1176 );
1177 s.push('\n');
1178 s.push_str("[observed]\n");
1179 let _ = writeln!(
1180 &mut s,
1181 "num_windows = {}",
1182 schema.observed_num_windows
1183 );
1184 let _ = writeln!(
1185 &mut s,
1186 "num_signals = {}",
1187 schema.observed_num_signals
1188 );
1189 let _ = writeln!(&mut s, "nan_cell_count = {}", schema.nan_cell_count);
1190 let _ = writeln!(&mut s, "finite_cell_count = {}", schema.finite_cell_count);
1191 s.push('\n');
1192 s.push_str("[event_lowering]\n");
1193 let _ = writeln!(
1194 &mut s,
1195 "value_to_microsecond_scale = {}",
1196 schema.lowering_config.value_to_microsecond_scale
1197 );
1198 let _ = writeln!(
1199 &mut s,
1200 "latency_clamp_us = {}",
1201 schema.lowering_config.latency_clamp_us
1202 );
1203 let _ = writeln!(
1204 &mut s,
1205 "window_size_ns = {}",
1206 schema.lowering_config.window_size_ns
1207 );
1208 s.push('\n');
1209 s.push_str("[output]\n");
1210 let _ = writeln!(
1211 &mut s,
1212 "emitted_event_count = {}",
1213 schema.emitted_event_count
1214 );
1215 s
1216}
1217
1218#[allow(
1219 clippy::too_many_lines,
1220 reason = "Receipt emitter is a single byte-stable text block; \
1221 splitting risks ordering divergence between two builds."
1222)]
1223fn emit_run_receipt_txt(
1224 _spec: &DatasetSpec,
1225 manifest: &DatasetManifest,
1226 schema: &SchemaMap,
1227 case: &CaseFile,
1228) -> String {
1229 let mut s = String::new();
1230 s.push_str("=== S-REAL.1 run receipt ===\n");
1231 let _ = writeln!(&mut s, "dataset: {}", manifest.dataset_id);
1232 let _ = writeln!(&mut s, "display_name: {}", manifest.display_name);
1233 let _ = writeln!(&mut s, "license: {}", manifest.license);
1234 let _ = writeln!(
1235 &mut s,
1236 "upstream_doi_or_url: {}",
1237 manifest.upstream_doi_or_url
1238 );
1239 s.push('\n');
1240 s.push_str("Input\n");
1241 let _ = writeln!(
1242 &mut s,
1243 " vendored_path: {}",
1244 manifest.vendored_path
1245 );
1246 let _ = writeln!(
1247 &mut s,
1248 " fixture_sha256: {}",
1249 manifest.fixture_sha256_hex
1250 );
1251 let _ = writeln!(
1252 &mut s,
1253 " fixture_byte_size: {}",
1254 manifest.fixture_byte_size
1255 );
1256 s.push('\n');
1257 s.push_str("Lowering\n");
1258 let _ = writeln!(
1259 &mut s,
1260 " value_to_microsecond_scale: {}",
1261 schema.lowering_config.value_to_microsecond_scale
1262 );
1263 let _ = writeln!(
1264 &mut s,
1265 " latency_clamp_us: {}",
1266 schema.lowering_config.latency_clamp_us
1267 );
1268 let _ = writeln!(
1269 &mut s,
1270 " window_size_ns: {}",
1271 schema.lowering_config.window_size_ns
1272 );
1273 let _ = writeln!(
1274 &mut s,
1275 " finite_cells: {}",
1276 schema.finite_cell_count
1277 );
1278 let _ = writeln!(
1279 &mut s,
1280 " nan_cells_skipped: {}",
1281 schema.nan_cell_count
1282 );
1283 let _ = writeln!(
1284 &mut s,
1285 " events_emitted: {}",
1286 schema.emitted_event_count
1287 );
1288 s.push('\n');
1289 s.push_str("Run\n");
1290 let _ = writeln!(&mut s, " backend: {}", case.backend);
1291 let _ = writeln!(
1292 &mut s,
1293 " n_entities (= observed_num_signals): {}",
1294 schema.observed_num_signals
1295 );
1296 let _ = writeln!(
1297 &mut s,
1298 " n_windows (= observed_num_windows): {}",
1299 schema.observed_num_windows
1300 );
1301 let _ = writeln!(
1302 &mut s,
1303 " contract_hash: sha256:{}",
1304 sha256_to_hex_lower(&case.hashes.contract)
1305 );
1306 let _ = writeln!(
1307 &mut s,
1308 " bank_hash: sha256:{}",
1309 sha256_to_hex_lower(&case.hashes.bank)
1310 );
1311 let _ = writeln!(
1312 &mut s,
1313 " detector_registry_hash: sha256:{}",
1314 sha256_to_hex_lower(&case.hashes.detector_registry)
1315 );
1316 s.push('\n');
1317 s.push_str("Result\n");
1318 let _ = writeln!(&mut s, " episodes_admitted: {}", case.episodes.len());
1319 let _ = writeln!(
1320 &mut s,
1321 " final_verdict: {}",
1322 case.final_verdict.name()
1323 );
1324 let _ = writeln!(
1325 &mut s,
1326 " final_case_file_hash: sha256:{}",
1327 sha256_to_hex_lower(&case.final_case_file_hash)
1328 );
1329 s
1330}
1331
1332fn emit_replay_verification_txt(
1333 spec: &DatasetSpec,
1334 r: &ReplayVerification,
1335 html_run1_hex: &str,
1336 html_run2_hex: &str,
1337 admits: bool,
1338) -> String {
1339 let mut s = String::new();
1340 s.push_str("=== S-REAL.1 replay verification ===\n");
1341 let _ = writeln!(&mut s, "dataset: {}", spec.dataset_id);
1342 let _ = writeln!(&mut s, "runs: {}", r.run_count);
1343 s.push('\n');
1344 let cf_ok = r.casefile_json_sha256_run1 == r.casefile_json_sha256_run2;
1345 let ep_ok = r.episodes_jsonl_sha256_run1 == r.episodes_jsonl_sha256_run2;
1346 let hr_ok = html_run1_hex == html_run2_hex;
1347 let _ = writeln!(
1348 &mut s,
1349 "byte-identical replay: {}",
1350 if admits { "YES" } else { "NO" }
1351 );
1352 let _ = writeln!(
1353 &mut s,
1354 " casefile.json: {}",
1355 if cf_ok { "YES" } else { "NO" }
1356 );
1357 let _ = writeln!(
1358 &mut s,
1359 " episodes.jsonl: {}",
1360 if ep_ok { "YES" } else { "NO" }
1361 );
1362 let _ = writeln!(
1363 &mut s,
1364 " audit_report.html: {}",
1365 if hr_ok { "YES" } else { "NO" }
1366 );
1367 s.push('\n');
1368 s.push_str("Run 1 SHA-256\n");
1369 let _ = writeln!(
1370 &mut s,
1371 " casefile.json: {}",
1372 r.casefile_json_sha256_run1
1373 );
1374 let _ = writeln!(
1375 &mut s,
1376 " episodes.jsonl: {}",
1377 r.episodes_jsonl_sha256_run1
1378 );
1379 let _ = writeln!(&mut s, " audit_report.html: {html_run1_hex}");
1380 s.push('\n');
1381 s.push_str("Run 2 SHA-256\n");
1382 let _ = writeln!(
1383 &mut s,
1384 " casefile.json: {}",
1385 r.casefile_json_sha256_run2
1386 );
1387 let _ = writeln!(
1388 &mut s,
1389 " episodes.jsonl: {}",
1390 r.episodes_jsonl_sha256_run2
1391 );
1392 let _ = writeln!(&mut s, " audit_report.html: {html_run2_hex}");
1393 s.push('\n');
1394 let _ = writeln!(
1395 &mut s,
1396 "final_case_file_hash (run 1): {}",
1397 r.final_case_file_hash_run1_hex
1398 );
1399 let _ = writeln!(
1400 &mut s,
1401 "final_case_file_hash (run 2): {}",
1402 r.final_case_file_hash_run2_hex
1403 );
1404 let _ = writeln!(
1405 &mut s,
1406 "episode_count (run 1): {}",
1407 r.episode_count_run1
1408 );
1409 let _ = writeln!(
1410 &mut s,
1411 "episode_count (run 2): {}",
1412 r.episode_count_run2
1413 );
1414 s.push('\n');
1415 s.push_str("Toolchain\n");
1416 for (k, v) in &r.toolchain {
1417 let _ = writeln!(&mut s, " {k}: {v}");
1418 }
1419 s.push('\n');
1420 s.push_str("Note: replay determinism is asserted only for the toolchain\n");
1421 s.push_str("recorded above. Different driver / CUDA / hardware versions\n");
1422 s.push_str("may produce different bytes; the audit does NOT claim\n");
1423 s.push_str("cross-toolchain replay byte-identity.\n");
1424 s
1425}
1426
1427#[allow(
1438 clippy::too_many_lines,
1439 reason = "Receipt emitter is a single byte-stable text block; \
1440 splitting risks ordering divergence between two builds."
1441)]
1442fn emit_perf_profile_txt(spec: &DatasetSpec, p: &PerformanceProfile) -> String {
1443 let mut s = String::new();
1444 s.push_str("=== S-REAL.PERF performance profile ===\n");
1445 let _ = writeln!(&mut s, "dataset: {}", spec.dataset_id);
1446 let _ = writeln!(&mut s, "iters: {}", p.iters);
1447 let _ = writeln!(&mut s, "catalogs: {}", p.catalogs);
1448 s.push('\n');
1449 s.push_str("Per-stage wall (microseconds, host Instant):\n");
1450 let _ = writeln!(&mut s, " ingest_us : {}", p.ingest_us);
1451 let _ = writeln!(&mut s, " lowering_us : {}", p.lowering_us);
1452 let _ = writeln!(&mut s, " contract_setup_us : {}", p.contract_setup_us);
1453 let _ = writeln!(
1454 &mut s,
1455 " cuda_dispatch_run1_us : {}",
1456 p.cuda_dispatch_run1_us
1457 );
1458 let _ = writeln!(
1459 &mut s,
1460 " cuda_dispatch_run2_us : {}",
1461 p.cuda_dispatch_run2_us
1462 );
1463 if !p.cuda_dispatch_extra_us.is_empty() {
1464 let _ = writeln!(
1465 &mut s,
1466 " cuda_dispatch_extra_us : {:?}",
1467 p.cuda_dispatch_extra_us
1468 );
1469 }
1470 let _ = writeln!(&mut s, " casefile_emit_us : {}", p.casefile_emit_us);
1471 let _ = writeln!(
1472 &mut s,
1473 " episodes_jsonl_emit_us : {}",
1474 p.episodes_jsonl_emit_us
1475 );
1476 let _ = writeln!(
1477 &mut s,
1478 " audit_report_emit_us : {}",
1479 p.audit_report_emit_us
1480 );
1481 let _ = writeln!(&mut s, " total_us : {}", p.total_us);
1482 s.push('\n');
1483 s.push_str("Dispatch variance (across all recorded iters):\n");
1484 let _ = writeln!(
1485 &mut s,
1486 " dispatch_median_us : {}",
1487 p.dispatch_median_us()
1488 );
1489 let _ = writeln!(&mut s, " dispatch_p50_us : {}", p.percentile_us(50));
1490 let _ = writeln!(&mut s, " dispatch_p95_us : {}", p.percentile_us(95));
1491 let _ = writeln!(&mut s, " dispatch_p99_us : {}", p.percentile_us(99));
1492 s.push('\n');
1493 s.push_str("Throughput (end-to-end wall):\n");
1494 let _ = writeln!(
1495 &mut s,
1496 " events_emitted : {}",
1497 p.events_emitted
1498 );
1499 let _ = writeln!(&mut s, " finite_cells : {}", p.finite_cells);
1500 let _ = writeln!(
1501 &mut s,
1502 " fixture_byte_size : {}",
1503 p.fixture_byte_size
1504 );
1505 let _ = writeln!(
1506 &mut s,
1507 " events_per_second : {}",
1508 p.events_per_second()
1509 );
1510 let _ = writeln!(
1511 &mut s,
1512 " finite_cells_per_second : {}",
1513 p.finite_cells_per_second()
1514 );
1515 let _ = writeln!(
1516 &mut s,
1517 " logical_bytes_per_second : {}",
1518 p.logical_bytes_per_second()
1519 );
1520 if p.catalogs > 1 {
1521 s.push('\n');
1522 s.push_str("Sequential-catalog amortization (--catalogs > 1):\n");
1523 let _ = writeln!(
1524 &mut s,
1525 " catalogs_total_us : {}",
1526 p.catalogs_total_us
1527 );
1528 let _ = writeln!(
1529 &mut s,
1530 " per_catalog_us : {}",
1531 p.catalogs_total_us / u64::from(p.catalogs)
1532 );
1533 s.push_str(" note : K sequential build_gpu calls; NOT a batched dispatch.\n");
1534 }
1535 s.push('\n');
1536 s.push_str("Honest framing (panel-locked, MUST appear):\n");
1537 s.push_str(" - Timing values are runtime-dependent. The byte-identical-replay\n");
1538 s.push_str(" claim covers casefile.json + episodes.jsonl (the inference\n");
1539 s.push_str(" chain), NOT this perf_profile.txt or the timing values inside\n");
1540 s.push_str(" audit_report.html. Re-invoking s-real-1-audit will produce a\n");
1541 s.push_str(" new perf_profile.txt with new timing values; the casefile +\n");
1542 s.push_str(" episodes bytes will remain byte-identical to the sealed S-REAL.1.1.1\n");
1543 s.push_str(" artifacts.\n");
1544 s.push_str(" - At these small fixture sizes (128 / 192 / 656 events) the wall\n");
1545 s.push_str(" is overhead-dominated. Real-data throughput numbers below are\n");
1546 s.push_str(" honest measurements on this hardware at this scale; they are NOT\n");
1547 s.push_str(" saturation claims, NOT production-deployment throughput, and NOT\n");
1548 s.push_str(" detector-superiority benchmarks. CUDA timing is host-Instant\n");
1549 s.push_str(" wall (not cudaEvent kernel time; that lives in S-PERF).\n");
1550 s.push_str(" - Cross-driver / cross-CUDA / cross-hardware replay byte-identity\n");
1551 s.push_str(" or throughput-identity is NOT claimed.\n");
1552 s
1553}
1554
1555fn emit_limitations_md(spec: &DatasetSpec) -> String {
1556 let mut s = String::new();
1557 let _ = writeln!(
1558 &mut s,
1559 "# S-REAL.1 audit — limitations and non-claims ({})\n",
1560 spec.dataset_id
1561 );
1562 s.push_str("This file accompanies the `audit_report.html` for this dataset. The\n");
1563 s.push_str("audit's deliverable is **deterministic, replayable structural\n");
1564 s.push_str("evidence on real public dataset bytes** — not domain-truth\n");
1565 s.push_str("claims.\n\n");
1566 s.push_str("## Non-claims\n\n");
1567 for nc in NON_CLAIMS_LINES {
1568 let _ = writeln!(&mut s, "- {nc}");
1569 }
1570 s.push_str("\n## Lowering disclosure\n\n");
1571 s.push_str("The upstream fixture is in `residual-projection v2` form\n");
1572 s.push_str("(window-major × signal-minor TSV). DSFB-GPU normally takes a\n");
1573 s.push_str("`Vec<TraceEvent>` and projects events into residuals via its\n");
1574 s.push_str("window-feature kernel; the upstream is already past that\n");
1575 s.push_str("projection. To run the deterministic engine on this form\n");
1576 s.push_str("without modifying the dispatcher, the audit lowers each\n");
1577 s.push_str("finite cell into one synthetic `TraceEvent` via a documented\n");
1578 s.push_str("rule (see `schema_map.toml` and section 2 of\n");
1579 s.push_str("`audit_report.html`). The audit does NOT claim to recover the\n");
1580 s.push_str("upstream's original trace events; it claims DSFB-GPU saw\n");
1581 s.push_str("exactly the events that rule produces from these bytes.\n");
1582 s
1583}
1584
1585const NON_CLAIMS_LINES: &[&str] = &[
1586 "Does NOT claim DSFB has identified the \"real\" anomaly in the dataset.",
1587 "Does NOT claim DSFB outperforms any other anomaly detector.",
1588 "Does NOT claim DSFB has discovered causality.",
1589 "Does NOT claim DSFB has measured remediation effectiveness.",
1590 "Does NOT claim fitness-for-purpose on regulated or safety-critical use.",
1591 "Does NOT claim the dataset is \"correctly labeled\" or \"ground truth\".",
1592 "Does NOT claim the corpus or registry is exhaustive.",
1593 "Does NOT claim replay determinism across different driver / CUDA / hardware versions.",
1594];
1595
1596#[cfg(test)]
1597mod tests {
1598 use super::*;
1599 use dsfb_gpu_debug_core::bank::{BankMotif, Episode};
1600 use dsfb_gpu_debug_core::fixed::Q16;
1601 use dsfb_gpu_debug_core::grammar::{GrammarState, ReasonCode};
1602
1603 fn mk_episode(
1604 entity: u32,
1605 start: u32,
1606 end: u32,
1607 motif: BankMotif,
1608 reason: ReasonCode,
1609 ) -> Episode {
1610 Episode {
1611 entity_id: entity,
1612 start_window: start,
1613 end_window: end,
1614 motif,
1615 reason,
1616 peak_state: GrammarState::Boundary,
1617 peak_residual_q: Q16(123),
1618 peak_drift_q: Q16(456),
1619 peak_slew_q: Q16(789),
1620 detector_bit_count: 3,
1621 admission: None,
1622 }
1623 }
1624
1625 #[test]
1626 fn lookup_admits_known_datasets() {
1627 for id in [
1629 "tadbench_f11",
1631 "tadbench_f04",
1632 "tadbench_f11b",
1633 "tadbench_f19",
1634 "illinois_socialnet",
1635 "lo2",
1636 "deeptralog",
1637 "aiops_kpi",
1638 "multidim_localization",
1639 "defects4j",
1640 "bugsinpy",
1641 "promise_defect_prediction",
1642 "cmapss_fd001_unit1",
1643 "cmapss_fd001_unit50",
1645 "cmapss_fd002_unit1",
1646 "cmapss_fd002_unit100",
1647 "cmapss_fd003_unit1",
1648 "cmapss_fd004_unit1",
1649 "promise_ant_1_4",
1650 "deeptralog_f02",
1651 "radioml_2018_snr30_large",
1657 "deepbeam_large",
1662 "radioml_gold_large",
1668 "powder_large",
1669 "oracle_large",
1670 "deepsense6g_large",
1671 "imdb_tgz_large",
1672 "imdb_duckdb_large",
1673 "snowset_large",
1674 "sqlshare_large",
1675 ] {
1676 assert!(lookup(id).is_some(), "lookup must admit {id}");
1677 }
1678 assert!(lookup("unknown_dataset").is_none());
1679 }
1680
1681 #[test]
1682 fn dataset_sha256_pins_are_lower_hex_64() {
1683 for spec in all_dataset_specs() {
1684 assert_eq!(
1685 spec.fixture_sha256_hex.len(),
1686 64,
1687 "{} pin must be 64 hex chars",
1688 spec.dataset_id
1689 );
1690 assert!(
1691 spec.fixture_sha256_hex
1692 .chars()
1693 .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()),
1694 "{} pin must be lowercase hex",
1695 spec.dataset_id
1696 );
1697 }
1698 }
1699
1700 #[test]
1720 fn audit_all_dataset_count_is_20() {
1721 assert_eq!(
1722 AUDIT_DATASETS.len(),
1723 20,
1724 "AUDIT_DATASETS must hold exactly 20 sealed audit datasets; \
1725 changing this is a panel-acknowledged schema-upgrade event"
1726 );
1727 }
1728
1729 #[test]
1735 fn saturation_fixture_count_is_10() {
1736 assert_eq!(
1737 SATURATION_FIXTURES.len(),
1738 10,
1739 "SATURATION_FIXTURES must hold exactly 10 saturation-class \
1740 real-data fixtures (RadioML/DeepBeam/RadioML-Gold/POWDER/\
1741 ORACLE/Deepsense6G/IMDb/Snowset/SQLShare)"
1742 );
1743 }
1744
1745 #[test]
1752 fn audit_all_excludes_large_saturation_fixtures() {
1753 const SATURATION_IDS: &[&str] = &[
1754 "radioml_2018_snr30_large",
1755 "deepbeam_large",
1756 "radioml_gold_large",
1757 "powder_large",
1758 "oracle_large",
1759 "deepsense6g_large",
1760 "imdb_tgz_large",
1761 "imdb_duckdb_large",
1762 "snowset_large",
1763 "sqlshare_large",
1764 ];
1765 let audit_ids: Vec<&str> = AUDIT_DATASETS.iter().map(|d| d.dataset_id).collect();
1766 for sat_id in SATURATION_IDS {
1767 assert!(
1768 !audit_ids.contains(sat_id),
1769 "AUDIT_DATASETS must NOT contain saturation fixture {sat_id}; \
1770 the slim Colab tarball excludes its TSV"
1771 );
1772 }
1773 for spec in AUDIT_DATASETS {
1778 let lower_path = spec.default_path.to_lowercase();
1779 assert!(
1780 !lower_path.ends_with("x1024.tsv"),
1781 "AUDIT_DATASETS entry {} has default_path {:?} matching \
1782 the saturation-TSV exclusion pattern *x1024.tsv; this \
1783 would break the Colab public-replay path",
1784 spec.dataset_id,
1785 spec.default_path
1786 );
1787 }
1788 }
1789
1790 #[test]
1793 fn audit_dataset_table_no_duplicate_ids() {
1794 let audit_ids: std::collections::HashSet<&str> =
1795 AUDIT_DATASETS.iter().map(|d| d.dataset_id).collect();
1796 let sat_ids: std::collections::HashSet<&str> =
1797 SATURATION_FIXTURES.iter().map(|d| d.dataset_id).collect();
1798 let overlap: Vec<&&str> = audit_ids.intersection(&sat_ids).collect();
1799 assert!(
1800 overlap.is_empty(),
1801 "AUDIT_DATASETS and SATURATION_FIXTURES must be disjoint; \
1802 found overlap: {overlap:?}"
1803 );
1804 assert_eq!(
1806 audit_ids.len(),
1807 AUDIT_DATASETS.len(),
1808 "AUDIT_DATASETS contains a duplicate dataset_id"
1809 );
1810 assert_eq!(
1811 sat_ids.len(),
1812 SATURATION_FIXTURES.len(),
1813 "SATURATION_FIXTURES contains a duplicate dataset_id"
1814 );
1815 }
1816
1817 #[test]
1830 fn audit_dataset_tier_dir_matches_bundle_manifest() {
1831 let manifest_path = std::path::Path::new("../../reports/s_real_3/bundle_manifest.toml");
1832 let manifest_path = if manifest_path.exists() {
1833 manifest_path.to_path_buf()
1834 } else {
1835 std::path::PathBuf::from("reports/s_real_3/bundle_manifest.toml")
1839 };
1840 let body = match std::fs::read_to_string(&manifest_path) {
1841 Ok(b) => b,
1842 Err(e) => {
1843 eprintln!(
1844 "audit_dataset_tier_dir_matches_bundle_manifest: skipping \
1845 (bundle_manifest.toml not readable at {}: {e})",
1846 manifest_path.display()
1847 );
1848 return;
1849 }
1850 };
1851 let mut current_id: Option<String> = None;
1855 let mut manifest_tier_by_id: std::collections::HashMap<String, String> =
1856 std::collections::HashMap::new();
1857 for line in body.lines() {
1858 let trimmed = line.trim();
1859 if let Some(rest) = trimmed.strip_prefix("[datasets.") {
1860 if let Some(end) = rest.find(']') {
1861 current_id = Some(rest[..end].to_string());
1862 }
1863 } else if let Some(rest) = trimmed.strip_prefix("tier_dir = \"") {
1864 if let Some(end) = rest.find('"') {
1865 if let Some(id) = current_id.take() {
1866 manifest_tier_by_id.insert(id, rest[..end].to_string());
1867 }
1868 }
1869 }
1870 }
1871 if manifest_tier_by_id.is_empty() {
1872 eprintln!(
1873 "audit_dataset_tier_dir_matches_bundle_manifest: skipping \
1874 (manifest at {} contains no parseable entries)",
1875 manifest_path.display()
1876 );
1877 return;
1878 }
1879 for spec in AUDIT_DATASETS {
1880 let expected_full = format!("reports/{}/{}", spec.tier_dir, spec.dataset_id);
1881 let manifest_full = manifest_tier_by_id.get(spec.dataset_id).unwrap_or_else(|| {
1882 panic!(
1883 "AUDIT_DATASETS entry {} is missing from bundle_manifest.toml",
1884 spec.dataset_id
1885 )
1886 });
1887 assert_eq!(
1888 manifest_full, &expected_full,
1889 "AUDIT_DATASETS[{}].tier_dir mirror divergence: driver = {:?}, \
1890 manifest = {:?}",
1891 spec.dataset_id, expected_full, manifest_full
1892 );
1893 }
1894 }
1895
1896 #[test]
1897 fn serialize_episodes_jsonl_is_deterministic_and_sorted() {
1898 let a = vec![
1900 mk_episode(
1901 5,
1902 10,
1903 12,
1904 BankMotif::LatencyRamp,
1905 ReasonCode::BoundaryApproach,
1906 ),
1907 mk_episode(
1908 2,
1909 1,
1910 4,
1911 BankMotif::ErrorBurst,
1912 ReasonCode::EnvelopeViolation,
1913 ),
1914 mk_episode(2, 1, 4, BankMotif::ErrorBurst, ReasonCode::BoundaryApproach),
1915 ];
1916 let b = vec![
1917 mk_episode(2, 1, 4, BankMotif::ErrorBurst, ReasonCode::BoundaryApproach),
1918 mk_episode(
1919 5,
1920 10,
1921 12,
1922 BankMotif::LatencyRamp,
1923 ReasonCode::BoundaryApproach,
1924 ),
1925 mk_episode(
1926 2,
1927 1,
1928 4,
1929 BankMotif::ErrorBurst,
1930 ReasonCode::EnvelopeViolation,
1931 ),
1932 ];
1933 let sa = serialize_episodes_jsonl(&a);
1934 let sb = serialize_episodes_jsonl(&b);
1935 assert_eq!(sa, sb);
1936 let text = std::str::from_utf8(&sa).unwrap();
1937 let lines: Vec<&str> = text.lines().collect();
1938 assert_eq!(lines.len(), 3);
1939 assert!(lines[0].contains("\"entity_id\":2"));
1941 assert!(lines[0].contains("\"reason\":\"BoundaryApproach\""));
1942 assert!(lines[1].contains("\"entity_id\":2"));
1944 assert!(lines[1].contains("\"reason\":\"EnvelopeViolation\""));
1945 assert!(lines[2].contains("\"entity_id\":5"));
1947 }
1948
1949 #[test]
1950 fn serialize_episodes_jsonl_handles_empty() {
1951 let s = serialize_episodes_jsonl(&[]);
1952 assert!(s.is_empty());
1953 }
1954
1955 #[test]
1956 fn dataset_manifest_toml_carries_required_keys() {
1957 let m = DatasetManifest {
1958 dataset_id: "x".to_string(),
1959 display_name: "X".to_string(),
1960 upstream_doi_or_url: "doi:test".to_string(),
1961 license: "Apache-2.0".to_string(),
1962 source_class: "TestClass".to_string(),
1963 vendored_path: "/tmp/x".to_string(),
1964 fixture_sha256_hex: "0".repeat(64),
1965 fixture_byte_size: 42,
1966 };
1967 let toml = emit_dataset_manifest_toml(&m);
1968 for key in [
1969 "dataset_id",
1970 "display_name",
1971 "upstream_doi_or_url",
1972 "license",
1973 "source_class",
1974 "vendored_path",
1975 "sha256_hex",
1976 "byte_size",
1977 ] {
1978 assert!(toml.contains(key), "missing {key}");
1979 }
1980 }
1981
1982 #[test]
1983 fn limitations_md_carries_every_non_claim() {
1984 let s = emit_limitations_md(&AUDIT_DATASETS[0]);
1985 for nc in NON_CLAIMS_LINES {
1986 assert!(s.contains(nc), "missing non-claim: {nc}");
1987 }
1988 }
1989}