Skip to main content

dsfb_gpu_debug_demo/cli/
audit_report.rs

1//! S-REAL.1 — Deterministic, no-JavaScript HTML audit report renderer.
2//!
3//! WHY: The S-REAL.1 audit gauntlet's central credibility deliverable is
4//! that a human reader can open `reports/s_real_1/<dataset>/audit_report.html`
5//! in any browser, see exactly what DSFB-GPU saw on the upstream fixture,
6//! cross-check every hash against the receipt files, and confirm that two
7//! consecutive dispatches produced byte-identical output. The renderer
8//! must therefore be:
9//!
10//! - **Byte-stable**: same inputs → same byte stream across two calls and
11//!   across two machines. We enforce this by sorting all map iteration
12//!   (BTreeMap), pre-sorting the episode list by `(entity_id,
13//!   start_window, end_window)`, and never emitting wall-clock timestamps,
14//!   process IDs, or any other non-pinned dynamic content.
15//! - **No JavaScript**: the report renders entirely server-side; the
16//!   HTML carries inline CSS for layout. A reader on an air-gapped audit
17//!   workstation can open the file without any network or runtime
18//!   dependency.
19//! - **No charts (v1)**: charts would require either an external library
20//!   (network dependency) or a hand-rolled SVG renderer whose visual
21//!   conventions would themselves need provenance. Tables of admitted
22//!   episodes + stage hashes are the honest v1 surface; charts/heatmaps
23//!   are S-REAL.1.1 scope.
24//!
25//! Section order (panel-locked):
26//!
27//! 1. Input provenance — fixture identity + SHA-256 byte pin.
28//! 2. Residual-projection lowering law — exact rule used to map cells
29//!    into TraceEvents.
30//! 3. Run configuration — contract grid, scale parameters, episode count.
31//! 4. Admitted episodes — table of bank-admitted episodes with motif +
32//!    reason + peak fields.
33//! 5. Stage digest / hash chain — every per-stage hash + final
34//!    case-file hash.
35//! 6. Replay verification — byte-identity between two consecutive runs.
36//! 7. Limitations and non-claims — verbatim panel-locked text.
37//!
38//! Non-claims (rendered verbatim in section 7 of every report):
39//! - Does NOT claim DSFB has identified the "real" anomaly in the dataset.
40//! - Does NOT claim DSFB outperforms any other anomaly detector.
41//! - Does NOT claim DSFB has discovered causality.
42//! - Does NOT claim fitness-for-purpose on regulated / safety-critical use.
43//! - Does NOT claim the dataset is "correctly labeled" or "ground truth".
44//! - Does NOT claim the corpus or registry is exhaustive.
45//! - Does NOT claim replay determinism across driver / CUDA versions; the
46//!   replay receipt records the toolchain explicitly.
47//!
48//! License: Apache-2.0. Background IP: Invariant Forge LLC.
49
50use std::collections::BTreeMap;
51use std::fmt::Write;
52
53use dsfb_gpu_debug_core::bank::Episode;
54use dsfb_gpu_debug_core::casefile::CaseFile;
55
56use super::ingest::{IngestReport, LoweringConfig};
57
58/// Provenance + identity for the dataset under audit.
59///
60/// WHY: Every receipt the audit emits must cite the upstream source by
61/// DOI/URL, the license under which the fixture is reused, the vendored
62/// path the audit actually read, and the SHA-256 of the bytes consumed.
63/// Carrying these together in one struct prevents partial citations.
64#[derive(Clone, PartialEq, Eq, Debug)]
65pub struct DatasetManifest {
66    /// Short identifier used as the directory name (e.g.
67    /// `"tadbench_f11"`). Lowercase, no spaces.
68    pub dataset_id: String,
69    /// Human-readable display name (e.g.
70    /// `"TADBench TrainTicket F11"`).
71    pub display_name: String,
72    /// Upstream DOI, URL, or repository reference. Honest non-claim:
73    /// the audit cites this for provenance; it does not endorse the
74    /// upstream's labels or methodology.
75    pub upstream_doi_or_url: String,
76    /// License under which the audit reuses the fixture bytes
77    /// (`"Apache-2.0"`, `"CC0-1.0"`, etc.).
78    pub license: String,
79    /// DSFB source-class tag for the dataset (e.g.
80    /// `"DebuggingSoftwareTelemetry"`,
81    /// `"ObservabilityTraces"`,
82    /// `"TimeSeriesAnomaly"`).
83    pub source_class: String,
84    /// Path the audit actually read from (the vendored fixture path).
85    pub vendored_path: String,
86    /// SHA-256 of the fixture file bytes, lowercase hex, 64 chars.
87    pub fixture_sha256_hex: String,
88    /// Size of the fixture file in bytes.
89    pub fixture_byte_size: u64,
90}
91
92/// Schema map describing how upstream cells project into DSFB-GPU events.
93///
94/// WHY: The audit's transparency story rests on the reader being able to
95/// reconstruct the entire ingest path from this struct. Every parameter
96/// that affects the emitted `Vec<TraceEvent>` is recorded here.
97#[derive(Clone, PartialEq, Debug)]
98pub struct SchemaMap {
99    /// Declared `num_windows` from the upstream header.
100    pub declared_num_windows: u32,
101    /// Declared `num_signals` from the upstream header.
102    pub declared_num_signals: u32,
103    /// Declared `healthy_window_end` from the upstream header. Carried for
104    /// reader transparency; the lowering does NOT pre-split by it.
105    pub declared_healthy_window_end: u32,
106    /// Observed windows in the actual data rows.
107    pub observed_num_windows: u32,
108    /// Observed columns per row.
109    pub observed_num_signals: u32,
110    /// NaN-cell count (skipped by the lowering rule).
111    pub nan_cell_count: u32,
112    /// Finite-cell count (each becomes one event).
113    pub finite_cell_count: u32,
114    /// Number of events emitted into the dispatcher.
115    pub emitted_event_count: u32,
116    /// Lowering-rule parameters used.
117    pub lowering_config: LoweringConfig,
118}
119
120impl From<&IngestReport> for SchemaMap {
121    fn from(r: &IngestReport) -> Self {
122        Self {
123            declared_num_windows: r.declared_num_windows,
124            declared_num_signals: r.declared_num_signals,
125            declared_healthy_window_end: 0,
126            observed_num_windows: r.observed_num_windows,
127            observed_num_signals: r.observed_num_signals,
128            nan_cell_count: r.nan_cell_count,
129            finite_cell_count: r.finite_cell_count,
130            emitted_event_count: r.emitted_event_count,
131            lowering_config: LoweringConfig::default(),
132        }
133    }
134}
135
136/// Replay-verification metadata.
137///
138/// WHY: The audit's load-bearing replay claim is "two consecutive
139/// dispatches on the same input bytes produce byte-identical CaseFile +
140/// episodes + report". This struct carries the per-run SHA-256 of each
141/// load-bearing artifact so the audit_report.html can publish a side-by-
142/// side comparison.
143#[derive(Clone, PartialEq, Eq, Debug)]
144pub struct ReplayVerification {
145    pub run_count: u32,
146    /// Lowercase hex SHA-256 of the canonical casefile.json bytes from
147    /// run 1 and run 2 (or `"unmeasured"` if only one run was performed).
148    pub casefile_json_sha256_run1: String,
149    pub casefile_json_sha256_run2: String,
150    /// Lowercase hex SHA-256 of the episodes.jsonl bytes.
151    pub episodes_jsonl_sha256_run1: String,
152    pub episodes_jsonl_sha256_run2: String,
153    /// Hex of the `CaseFile::final_case_file_hash` field from each run.
154    pub final_case_file_hash_run1_hex: String,
155    pub final_case_file_hash_run2_hex: String,
156    /// Episode count from each run (must agree for the audit to admit).
157    pub episode_count_run1: u32,
158    pub episode_count_run2: u32,
159    /// Toolchain identity (compiler version, CUDA driver, hardware).
160    /// Sorted BTreeMap so the rendered output is deterministic.
161    pub toolchain: BTreeMap<String, String>,
162}
163
164impl ReplayVerification {
165    /// True when run1 and run2 produced byte-identical CaseFile JSON,
166    /// episodes JSONL, final case-file hash, and episode count.
167    #[must_use]
168    pub fn admits(&self) -> bool {
169        self.casefile_json_sha256_run1 == self.casefile_json_sha256_run2
170            && self.episodes_jsonl_sha256_run1 == self.episodes_jsonl_sha256_run2
171            && self.final_case_file_hash_run1_hex == self.final_case_file_hash_run2_hex
172            && self.episode_count_run1 == self.episode_count_run2
173    }
174}
175
176/// HTML-escape a free-form string. Only the four characters that affect
177/// HTML parsing (`<`, `>`, `&`, `"`) are escaped; everything else is
178/// passed through verbatim so that hash hex / dataset metadata renders
179/// without surprise.
180fn escape(s: &str) -> String {
181    let mut out = String::with_capacity(s.len());
182    for c in s.chars() {
183        match c {
184            '<' => out.push_str("&lt;"),
185            '>' => out.push_str("&gt;"),
186            '&' => out.push_str("&amp;"),
187            '"' => out.push_str("&quot;"),
188            _ => out.push(c),
189        }
190    }
191    out
192}
193
194/// Convert 32 raw bytes (e.g. a stage digest from `CaseFile::hashes`)
195/// to a lowercase hex string.
196fn hex(bytes: &[u8; 32]) -> String {
197    super::ingest::sha256_to_hex_lower(bytes)
198}
199
200/// Render the complete audit report as deterministic, no-JavaScript HTML.
201#[allow(
202    clippy::too_many_lines,
203    reason = "Renderer is intentionally one long byte-stable string \
204              builder; splitting into helpers risks accidental ordering \
205              divergence between two builds."
206)]
207///
208/// WHY: One function consumes every input the audit cares about and
209/// returns a single byte-stable string. Callers can write the result
210/// directly to `audit_report.html`. Two consecutive calls with identical
211/// inputs return identical strings (verified by the S-REAL.1 replay
212/// acceptance test).
213///
214/// # Determinism guarantees
215///
216/// - Episode iteration uses a pre-sorted view by `(entity_id,
217///   start_window, end_window)`; the input slice is not modified.
218/// - Toolchain metadata renders in BTreeMap key order.
219/// - No wall-clock timestamps, process IDs, or environment-derived
220///   dynamic content appears in the output.
221/// - Hash bytes always render as lowercase hex.
222#[must_use]
223pub fn render_audit_report_html(
224    manifest: &DatasetManifest,
225    schema: &SchemaMap,
226    case: &CaseFile,
227    replay: &ReplayVerification,
228) -> String {
229    let mut h = String::with_capacity(8192);
230    h.push_str("<!DOCTYPE html>\n");
231    h.push_str("<html lang=\"en\">\n");
232    h.push_str("<head>\n");
233    h.push_str("<meta charset=\"utf-8\">\n");
234    let _ = writeln!(
235        h,
236        "<title>S-REAL.1 audit — {}</title>",
237        escape(&manifest.display_name)
238    );
239    h.push_str("<style>\n");
240    h.push_str(STYLE);
241    h.push_str("</style>\n");
242    h.push_str("</head>\n");
243    h.push_str("<body>\n");
244
245    // Header
246    let _ = writeln!(
247        h,
248        "<h1>DSFB-GPU S-REAL.1 audit — {}</h1>",
249        escape(&manifest.display_name)
250    );
251    h.push_str(
252        "<p class=\"subhead\">Deterministic residual-densor audit on real public dataset bytes. ",
253    );
254    h.push_str("Apache-2.0 reference implementation. Background IP: Invariant Forge LLC.</p>\n");
255
256    // S-REAL.1.1: Top-of-report summary card. 30-second operator
257    // comprehension target: shape, finite cells, skipped NaNs,
258    // events, episodes, replay verdict, truncated final hash.
259    section_summary_card(&mut h, manifest, schema, case, replay);
260
261    // S-REAL.1.1.1: Replay proof card immediately below the summary
262    // card so an operator sees the byte-identical-replay verdict +
263    // per-artifact SHA-256s at a glance, without scrolling to
264    // section 6.
265    section_replay_proof_card(&mut h, case, replay);
266
267    section_input_provenance(&mut h, manifest);
268    section_lowering_law(&mut h, schema);
269    section_run_configuration(&mut h, schema, case);
270    section_admitted_episodes(&mut h, case);
271
272    // S-REAL.1.1 aggregations: motif histogram + reason-code
273    // histogram with prose + entity summary + episode timeline +
274    // top structural spans (3 sub-tables) + plain-English motif
275    // glossary. All rendered deterministically; all derived from
276    // case.episodes only (no extra dispatcher state).
277    section_motif_histogram(&mut h, case);
278    section_reason_code_histogram(&mut h, case);
279    section_entity_summary(&mut h, case);
280    section_episode_timeline(&mut h, case);
281    section_top_structural_spans(&mut h, case);
282    section_motif_glossary(&mut h);
283
284    section_stage_hash_chain(&mut h, case);
285    section_replay_verification(&mut h, replay);
286    section_limitations(&mut h);
287
288    h.push_str("</body>\n</html>\n");
289    h
290}
291
292fn section_input_provenance(h: &mut String, m: &DatasetManifest) {
293    h.push_str("<h2>1. Input provenance</h2>\n");
294    h.push_str("<table class=\"kv\">\n");
295    kv(h, "dataset_id", &m.dataset_id);
296    kv(h, "display_name", &m.display_name);
297    kv(h, "upstream_doi_or_url", &m.upstream_doi_or_url);
298    kv(h, "license", &m.license);
299    kv(h, "source_class", &m.source_class);
300    kv(h, "vendored_path", &m.vendored_path);
301    kv(h, "fixture_sha256", &m.fixture_sha256_hex);
302    kv(h, "fixture_byte_size", &m.fixture_byte_size.to_string());
303    h.push_str("</table>\n");
304    h.push_str("<p class=\"note\">The fixture bytes were SHA-256-verified before parsing. ");
305    h.push_str("Any divergence from the pinned hash would have aborted the audit before any event was emitted.</p>\n");
306}
307
308fn section_lowering_law(h: &mut String, s: &SchemaMap) {
309    h.push_str("<h2>2. Residual-projection lowering law</h2>\n");
310    h.push_str("<p>The upstream fixture is in <code>residual-projection v2</code> form ");
311    h.push_str("(window-major × signal-minor TSV with NaN cells). The audit deterministically ");
312    h.push_str(
313        "lowers each finite cell into one <code>TraceEvent</code> via the rule below:</p>\n",
314    );
315    h.push_str("<pre class=\"law\">");
316    h.push_str("For each (window_idx, signal_idx, value) in fixture.rows.iter().enumerate()\n");
317    h.push_str(
318        "                                            .flat_map(|(w, row)| row.iter().enumerate()\n",
319    );
320    h.push_str("                                                                  .map(move |(s, v)| (w, s, v))):\n");
321    h.push_str("  if value is None (nan): skip; no event emitted for this cell\n");
322    h.push_str("  else:\n");
323    h.push_str("    ts_ns         = window_idx * window_size_ns\n");
324    h.push_str("    entity_id     = signal_idx\n");
325    h.push_str("    route_id      = 0\n");
326    h.push_str("    span_id       = window_idx * 65536 + signal_idx\n");
327    h.push_str("    parent_span_id = 0\n");
328    h.push_str(
329        "    latency_us    = clamp(value * value_to_microsecond_scale, 0, latency_clamp_us)\n",
330    );
331    h.push_str("    status_code   = 200\n");
332    h.push_str("    error_code    = 0\n");
333    h.push_str("    event_kind    = 0\n");
334    h.push_str("    flags         = 0\n");
335    h.push_str("</pre>\n");
336    h.push_str("<table class=\"kv\">\n");
337    kv(
338        h,
339        "declared_num_windows",
340        &s.declared_num_windows.to_string(),
341    );
342    kv(
343        h,
344        "declared_num_signals",
345        &s.declared_num_signals.to_string(),
346    );
347    kv(
348        h,
349        "declared_healthy_window_end",
350        &s.declared_healthy_window_end.to_string(),
351    );
352    kv(
353        h,
354        "observed_num_windows",
355        &s.observed_num_windows.to_string(),
356    );
357    kv(
358        h,
359        "observed_num_signals",
360        &s.observed_num_signals.to_string(),
361    );
362    kv(h, "nan_cell_count", &s.nan_cell_count.to_string());
363    kv(h, "finite_cell_count", &s.finite_cell_count.to_string());
364    kv(h, "emitted_event_count", &s.emitted_event_count.to_string());
365    kv(
366        h,
367        "value_to_microsecond_scale",
368        &s.lowering_config.value_to_microsecond_scale.to_string(),
369    );
370    kv(
371        h,
372        "latency_clamp_us",
373        &s.lowering_config.latency_clamp_us.to_string(),
374    );
375    kv(
376        h,
377        "window_size_ns",
378        &s.lowering_config.window_size_ns.to_string(),
379    );
380    h.push_str("</table>\n");
381    h.push_str("<p class=\"note\">NaN cells produce no event. The audit ");
382    h.push_str("does not claim DSFB-GPU saw the upstream's original trace ");
383    h.push_str("events; it claims DSFB-GPU saw exactly the events the rule ");
384    h.push_str("above produces from these bytes.</p>\n");
385}
386
387fn section_run_configuration(h: &mut String, s: &SchemaMap, c: &CaseFile) {
388    h.push_str("<h2>3. Run configuration</h2>\n");
389    h.push_str("<table class=\"kv\">\n");
390    kv(h, "casefile_version", c.version);
391    kv(h, "backend", c.backend);
392    kv(
393        h,
394        "n_entities (= observed_num_signals)",
395        &s.observed_num_signals.to_string(),
396    );
397    kv(
398        h,
399        "n_windows (= observed_num_windows)",
400        &s.observed_num_windows.to_string(),
401    );
402    kv(h, "events_dispatched", &s.emitted_event_count.to_string());
403    kv(h, "episodes_admitted", &c.episodes.len().to_string());
404    kv(h, "final_verdict", c.final_verdict.name());
405    h.push_str("</table>\n");
406}
407
408fn section_admitted_episodes(h: &mut String, case: &CaseFile) {
409    h.push_str("<h2>4. Admitted episodes</h2>\n");
410    if case.episodes.is_empty() {
411        h.push_str("<p class=\"note\">No episodes were admitted on this fixture. ");
412        h.push_str("Per the Semantic Non-Bypass Axiom the bank stage admitted zero. ");
413        h.push_str("This is a valid honest outcome — DSFB-GPU saw the fixture and ");
414        h.push_str("found no admissible motif under the canonical bank + detector registry.</p>\n");
415        return;
416    }
417    h.push_str("<p class=\"note\">Episodes are listed in canonical order by ");
418    h.push_str("<code>(entity_id, start_window, end_window)</code>. Each row reports the ");
419    h.push_str("bank motif, reason code, and peak Q16.16 magnitudes the bank used to admit.</p>\n");
420
421    let mut sorted: Vec<&Episode> = case.episodes.iter().collect();
422    sorted.sort_by_key(|e| (e.entity_id, e.start_window, e.end_window));
423
424    h.push_str("<table class=\"episodes\">\n");
425    h.push_str("<thead><tr>");
426    h.push_str("<th>idx</th><th>entity_id</th><th>start_window</th><th>end_window</th>");
427    h.push_str("<th>motif</th><th>reason</th><th>peak_state</th>");
428    h.push_str("<th>peak_residual_q</th><th>peak_drift_q</th><th>peak_slew_q</th>");
429    h.push_str("<th>detector_bit_count</th>");
430    h.push_str("</tr></thead>\n");
431    h.push_str("<tbody>\n");
432    for (idx, e) in sorted.iter().enumerate() {
433        let _ = writeln!(
434            h,
435            "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
436            idx,
437            e.entity_id,
438            e.start_window,
439            e.end_window,
440            escape(motif_name(e.motif)),
441            escape(reason_name(e.reason)),
442            escape(grammar_name(e.peak_state)),
443            e.peak_residual_q.0,
444            e.peak_drift_q.0,
445            e.peak_slew_q.0,
446            e.detector_bit_count,
447        );
448    }
449    h.push_str("</tbody>\n</table>\n");
450}
451
452fn section_stage_hash_chain(h: &mut String, c: &CaseFile) {
453    h.push_str("<h2>5. Stage digest / hash chain</h2>\n");
454    h.push_str("<p class=\"note\">Every per-stage hash links into the next ");
455    h.push_str("via the canonical DSFB-GPU chain. A reader can replay the ");
456    h.push_str("dispatch and confirm every hex value below byte-for-byte.</p>\n");
457    h.push_str("<table class=\"hashes\">\n");
458    h.push_str("<thead><tr><th>chain link</th><th>sha256 (hex)</th></tr></thead>\n");
459    h.push_str("<tbody>\n");
460    hash_row(h, "h_input_catalog", &c.hashes.input_catalog);
461    hash_row(h, "h_contract", &c.hashes.contract);
462    hash_row(h, "h_bank", &c.hashes.bank);
463    hash_row(h, "h_detector_registry", &c.hashes.detector_registry);
464    hash_row(h, "h_kernel_sequence", &c.hashes.kernel_sequence);
465    hash_row(h, "h_window_feature", &c.hashes.window_feature);
466    hash_row(h, "h_residual_field", &c.hashes.residual_field);
467    hash_row(h, "h_sign_field", &c.hashes.sign_field);
468    hash_row(h, "h_detector_cell", &c.hashes.detector_cell);
469    hash_row(h, "h_consensus_grid", &c.hashes.consensus_grid);
470    hash_row(h, "h_candidate_interval", &c.hashes.candidate_interval);
471    hash_row(h, "h_episode", &c.hashes.episode);
472    hash_row(h, "final_case_file_hash", &c.final_case_file_hash);
473    h.push_str("</tbody>\n</table>\n");
474}
475
476fn section_replay_verification(h: &mut String, r: &ReplayVerification) {
477    h.push_str("<h2>6. Replay verification</h2>\n");
478    h.push_str("<table class=\"kv\">\n");
479    kv(h, "run_count", &r.run_count.to_string());
480    kv(h, "casefile_json_sha256_run1", &r.casefile_json_sha256_run1);
481    kv(h, "casefile_json_sha256_run2", &r.casefile_json_sha256_run2);
482    kv(
483        h,
484        "episodes_jsonl_sha256_run1",
485        &r.episodes_jsonl_sha256_run1,
486    );
487    kv(
488        h,
489        "episodes_jsonl_sha256_run2",
490        &r.episodes_jsonl_sha256_run2,
491    );
492    kv(
493        h,
494        "final_case_file_hash_run1",
495        &r.final_case_file_hash_run1_hex,
496    );
497    kv(
498        h,
499        "final_case_file_hash_run2",
500        &r.final_case_file_hash_run2_hex,
501    );
502    kv(h, "episode_count_run1", &r.episode_count_run1.to_string());
503    kv(h, "episode_count_run2", &r.episode_count_run2.to_string());
504    kv(
505        h,
506        "byte_identical_replay",
507        if r.admits() { "true" } else { "false" },
508    );
509    h.push_str("</table>\n");
510    h.push_str("<h3>Toolchain identity</h3>\n");
511    h.push_str("<table class=\"kv\">\n");
512    for (k, v) in &r.toolchain {
513        kv(h, k, v);
514    }
515    h.push_str("</table>\n");
516    h.push_str("<p class=\"note\">Replay determinism is asserted only for ");
517    h.push_str("the recorded toolchain. The audit does not claim replay ");
518    h.push_str("byte-identity across different driver, CUDA, or hardware versions.</p>\n");
519}
520
521fn section_limitations(h: &mut String) {
522    h.push_str("<h2>7. Limitations and non-claims</h2>\n");
523    h.push_str("<ul class=\"nonclaims\">\n");
524    for nc in NON_CLAIMS {
525        let _ = writeln!(h, "<li>{}</li>", escape(nc));
526    }
527    h.push_str("</ul>\n");
528}
529
530/// Convenience: emit one `<tr><th>key</th><td>value</td></tr>` row.
531fn kv(h: &mut String, key: &str, value: &str) {
532    let _ = writeln!(
533        h,
534        "<tr><th>{}</th><td>{}</td></tr>",
535        escape(key),
536        escape(value)
537    );
538}
539
540fn hash_row(h: &mut String, label: &str, bytes: &[u8; 32]) {
541    let _ = writeln!(
542        h,
543        "<tr><td class=\"label\">{}</td><td class=\"hex\">{}</td></tr>",
544        escape(label),
545        hex(bytes)
546    );
547}
548
549// =====================================================================
550// S-REAL.1.1 renderer additions (operator-facing richness).
551// =====================================================================
552//
553// Panel-locked goal: a human reading the report should understand what
554// DSFB-GPU saw in 30 seconds. The seven additions below trade no
555// hash-chain semantics or replay byte-identity to get there — every
556// section is a deterministic projection over `case.episodes` already
557// pinned by the chain.
558
559/// 30-second operator summary card rendered at the top of the report.
560///
561/// WHY: An operator opening the audit report should see the
562/// load-bearing facts (shape, finite cells, NaN skip count, events,
563/// episodes admitted, replay verdict) WITHOUT scrolling. Every field
564/// is also re-rendered later in its own dedicated section, but this
565/// card pins the punch-line for the impatient reader.
566fn section_summary_card(
567    h: &mut String,
568    m: &DatasetManifest,
569    s: &SchemaMap,
570    case: &CaseFile,
571    replay: &ReplayVerification,
572) {
573    h.push_str("<div class=\"summary-card\">\n");
574    h.push_str("<h2 class=\"summary-title\">Dataset summary</h2>\n");
575    h.push_str("<table class=\"kv summary-kv\">\n");
576    kv(h, "dataset", &m.display_name);
577    kv(h, "source class", &m.source_class);
578    kv(
579        h,
580        "shape (entities × windows)",
581        &format!("{} × {}", s.observed_num_signals, s.observed_num_windows),
582    );
583    kv(
584        h,
585        "finite cells (events emitted)",
586        &s.finite_cell_count.to_string(),
587    );
588    kv(h, "NaN cells skipped", &s.nan_cell_count.to_string());
589    kv(h, "episodes admitted", &case.episodes.len().to_string());
590    kv(
591        h,
592        "byte-identical replay",
593        if replay.admits() { "YES" } else { "NO" },
594    );
595    // Truncated final_case_file_hash for at-a-glance chain-head pinning;
596    // the full hex appears in the Replay Proof card below + section 5.
597    kv(
598        h,
599        "final_case_file_hash (first 16 hex)",
600        &hex_truncated_16(&case.final_case_file_hash),
601    );
602    kv(h, "final_verdict", case.final_verdict.name());
603    h.push_str("</table>\n");
604    h.push_str("</div>\n");
605}
606
607/// Replay Proof card rendered immediately below the summary card.
608///
609/// WHY: An operator reading the audit should see the replay chain
610/// confidence at a glance — without scrolling to section 6. The card
611/// surfaces the byte-identical-replay verdict + per-artifact SHA-256s
612/// for casefile.json and episodes.jsonl + the final_case_file_hash.
613///
614/// `audit_report.html` cannot carry its own SHA-256 by construction
615/// (the rendered HTML would have to contain its own hash, which is
616/// computationally infeasible — equivalent to finding a SHA-256
617/// pre-image of the rendered string within the rendered string). The
618/// audit_report.html's authoritative SHA-256 is computed AFTER
619/// rendering and pinned in `replay_verification.txt`; the card
620/// explicitly cites that pin location instead of fabricating a
621/// self-referential value.
622fn section_replay_proof_card(h: &mut String, case: &CaseFile, replay: &ReplayVerification) {
623    h.push_str("<div class=\"replay-proof-card\">\n");
624    h.push_str("<h2 class=\"summary-title\">Replay proof</h2>\n");
625    h.push_str("<table class=\"kv summary-kv\">\n");
626    kv(
627        h,
628        "byte-identical replay",
629        if replay.admits() { "YES" } else { "NO" },
630    );
631    kv(
632        h,
633        "casefile.json SHA-256",
634        &replay.casefile_json_sha256_run1,
635    );
636    kv(
637        h,
638        "episodes.jsonl SHA-256",
639        &replay.episodes_jsonl_sha256_run1,
640    );
641    kv(
642        h,
643        "audit_report.html SHA-256",
644        "(externally pinned in replay_verification.txt — not embedded here because a self-referential hash is computationally infeasible)",
645    );
646    kv(
647        h,
648        "final_case_file_hash (full)",
649        &super::ingest::sha256_to_hex_lower(&case.final_case_file_hash),
650    );
651    kv(
652        h,
653        "episode count (run 1)",
654        &replay.episode_count_run1.to_string(),
655    );
656    kv(
657        h,
658        "episode count (run 2)",
659        &replay.episode_count_run2.to_string(),
660    );
661    h.push_str("</table>\n");
662    h.push_str("</div>\n");
663}
664
665/// Truncate a 32-byte hash to the first 16 hex characters (8 bytes).
666/// Used in the dataset summary card for at-a-glance chain-head
667/// pinning. The full hex appears in the Replay Proof card and
668/// section 5.
669fn hex_truncated_16(bytes: &[u8; 32]) -> String {
670    let full = super::ingest::sha256_to_hex_lower(bytes);
671    full.chars().take(16).collect()
672}
673
674/// Motif histogram sorted ascending by motif wire name.
675///
676/// WHY: After section 4 lists every admitted episode in canonical order,
677/// the histogram answers "which motifs dominated this dataset?" at a
678/// glance. Counts + percentages are exact integers (no f64) so the
679/// rendered output is byte-stable across two calls.
680fn section_motif_histogram(h: &mut String, case: &CaseFile) {
681    h.push_str("<h3>4a. Motif histogram</h3>\n");
682    if case.episodes.is_empty() {
683        h.push_str("<p class=\"note\">No motifs fired on this dataset.</p>\n");
684        return;
685    }
686    let mut counts: BTreeMap<&'static str, u32> = BTreeMap::new();
687    for e in &case.episodes {
688        *counts.entry(motif_name(e.motif)).or_insert(0) += 1;
689    }
690    let total = case.episodes.len() as u32;
691    h.push_str("<table class=\"histogram\">\n");
692    h.push_str("<thead><tr><th>motif</th><th>count</th><th>percent</th></tr></thead>\n<tbody>\n");
693    for (name, count) in &counts {
694        let pct_bp = (*count * 10_000) / total;
695        let _ = writeln!(
696            h,
697            "<tr><td>{}</td><td>{}</td><td>{}.{:02} %</td></tr>",
698            escape(name),
699            count,
700            pct_bp / 100,
701            pct_bp % 100
702        );
703    }
704    h.push_str("</tbody>\n</table>\n");
705}
706
707/// Reason-code histogram with plain-English meaning per row.
708///
709/// WHY: Reason codes are pinned wire-name enums; without the prose
710/// column, an operator must look up what each code means. Folding the
711/// prose into the histogram row makes the table self-documenting and
712/// removes the need for a separate glossary section for reason codes.
713/// Sorted descending by `ReasonCode::severity()` (canonical tie-break
714/// from `dsfb_gpu_debug_core::grammar`), then ascending by wire name
715/// for ties so the rendered output is deterministic.
716fn section_reason_code_histogram(h: &mut String, case: &CaseFile) {
717    use dsfb_gpu_debug_core::grammar::ReasonCode;
718    h.push_str("<h3>4b. Reason-code histogram</h3>\n");
719    if case.episodes.is_empty() {
720        h.push_str("<p class=\"note\">No reason codes fired on this dataset.</p>\n");
721        return;
722    }
723    let mut counts: BTreeMap<u8, (ReasonCode, u32)> = BTreeMap::new();
724    for e in &case.episodes {
725        counts.entry(e.reason as u8).or_insert((e.reason, 0)).1 += 1;
726    }
727    let total = case.episodes.len() as u32;
728
729    // Sort by severity desc, then wire name asc.
730    let mut rows: Vec<(ReasonCode, u32)> = counts.values().copied().collect();
731    rows.sort_by(|a, b| {
732        b.0.severity()
733            .cmp(&a.0.severity())
734            .then_with(|| reason_name(a.0).cmp(reason_name(b.0)))
735    });
736
737    h.push_str("<table class=\"histogram\">\n");
738    h.push_str("<thead><tr><th>reason_code</th><th>count</th><th>percent</th><th>plain-English meaning</th></tr></thead>\n<tbody>\n");
739    for (reason, count) in &rows {
740        let pct_bp = (*count * 10_000) / total;
741        let _ = writeln!(
742            h,
743            "<tr><td>{}</td><td>{}</td><td>{}.{:02} %</td><td>{}</td></tr>",
744            escape(reason_name(*reason)),
745            count,
746            pct_bp / 100,
747            pct_bp % 100,
748            escape(reason_prose(*reason)),
749        );
750    }
751    h.push_str("</tbody>\n</table>\n");
752}
753
754/// Per-entity summary table: one row per entity that admitted at least
755/// one episode.
756///
757/// WHY: Entity-level aggregation answers "which entities are most
758/// affected?" in one glance. Dominant motif (motif with the most
759/// episodes on this entity; tie-broken by motif wire name ascending)
760/// and first/last window give the operator a quick footprint.
761fn section_entity_summary(h: &mut String, case: &CaseFile) {
762    h.push_str("<h3>4c. Entity summary</h3>\n");
763    if case.episodes.is_empty() {
764        h.push_str("<p class=\"note\">No entities admitted episodes on this dataset.</p>\n");
765        return;
766    }
767    // Aggregate by entity. Seed first/last window from the first
768    // observation to avoid the u32::default() == 0 trap (min(0, x)
769    // would always pick 0 and corrupt first_window).
770    let mut per_entity: BTreeMap<u32, EntityAggregation> = BTreeMap::new();
771    for e in &case.episodes {
772        per_entity
773            .entry(e.entity_id)
774            .and_modify(|agg| {
775                agg.episode_count += 1;
776                agg.first_window = agg.first_window.min(e.start_window);
777                agg.last_window = agg.last_window.max(e.end_window);
778                agg.max_detector_bit_count = agg.max_detector_bit_count.max(e.detector_bit_count);
779                *agg.motif_counts.entry(motif_name(e.motif)).or_insert(0) += 1;
780            })
781            .or_insert_with(|| {
782                let mut m: BTreeMap<&'static str, u32> = BTreeMap::new();
783                m.insert(motif_name(e.motif), 1);
784                EntityAggregation {
785                    episode_count: 1,
786                    first_window: e.start_window,
787                    last_window: e.end_window,
788                    max_detector_bit_count: e.detector_bit_count,
789                    motif_counts: m,
790                }
791            });
792    }
793    h.push_str("<table class=\"summary\">\n");
794    h.push_str("<thead><tr><th>entity_id</th><th>episode_count</th><th>first_window</th><th>last_window</th><th>max_detector_bit_count</th><th>dominant_motif</th></tr></thead>\n<tbody>\n");
795    for (entity_id, agg) in &per_entity {
796        // Dominant motif: highest count; ties broken by name ascending
797        // (BTreeMap iterates ascending so first match in the max-fold wins).
798        let (dominant_motif, _) = agg
799            .motif_counts
800            .iter()
801            .max_by(|a, b| a.1.cmp(b.1).then_with(|| b.0.cmp(a.0)))
802            .map_or((&"-", &0u32), |(k, v)| (k, v));
803        let _ = writeln!(
804            h,
805            "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
806            entity_id,
807            agg.episode_count,
808            agg.first_window,
809            agg.last_window,
810            agg.max_detector_bit_count,
811            escape(dominant_motif),
812        );
813    }
814    h.push_str("</tbody>\n</table>\n");
815}
816
817struct EntityAggregation {
818    episode_count: u32,
819    first_window: u32,
820    last_window: u32,
821    max_detector_bit_count: u32,
822    motif_counts: BTreeMap<&'static str, u32>,
823}
824
825/// Episode timeline grouped by entity, sorted ascending by
826/// `(start_window, end_window, motif as u8)` within each entity.
827///
828/// WHY: Section 4 lists every admitted episode in canonical order;
829/// the timeline re-arranges them per-entity in time order so the
830/// operator can scan "what happened on each entity in order" without
831/// having to filter the master list mentally.
832fn section_episode_timeline(h: &mut String, case: &CaseFile) {
833    h.push_str("<h3>4d. Episode timeline (per-entity, time-ordered)</h3>\n");
834    if case.episodes.is_empty() {
835        h.push_str("<p class=\"note\">No episodes to plot.</p>\n");
836        return;
837    }
838    // Bucket episodes by entity in BTreeMap (deterministic).
839    let mut per_entity: BTreeMap<u32, Vec<&dsfb_gpu_debug_core::bank::Episode>> = BTreeMap::new();
840    for e in &case.episodes {
841        per_entity.entry(e.entity_id).or_default().push(e);
842    }
843    h.push_str("<table class=\"timeline\">\n");
844    h.push_str("<thead><tr><th>entity_id</th><th>start_window</th><th>end_window</th><th>motif</th><th>reason</th></tr></thead>\n<tbody>\n");
845    for (entity_id, episodes) in &mut per_entity {
846        episodes.sort_by_key(|e| (e.start_window, e.end_window, e.motif as u8));
847        for e in episodes {
848            let _ = writeln!(
849                h,
850                "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
851                entity_id,
852                e.start_window,
853                e.end_window,
854                escape(motif_name(e.motif)),
855                escape(reason_name(e.reason)),
856            );
857        }
858    }
859    h.push_str("</tbody>\n</table>\n");
860}
861
862/// Per-entity aggregation for the highest-repeated-entities sub-table
863/// of section 4e(b). `(episode_count, motif_counts, reason_counts)`.
864/// Hoisted to a type alias because the inline tuple was clippy-noisy
865/// without buying readability.
866type EntityRankAggregation = (
867    u32,
868    BTreeMap<&'static str, u32>,
869    BTreeMap<&'static str, u32>,
870);
871
872/// Top structural spans rendered as three deterministic sub-tables:
873/// longest spans / highest-repeated entities / most-recurrent reason
874/// codes.
875///
876/// WHY: An operator scanning the audit wants three different "top of
877/// the heap" views: which episodes lasted longest, which entities
878/// produced the most episodes, and which reason codes recurred most.
879/// Each sub-table is small (top-5 or top-10) and deterministic; ties
880/// are broken by stable secondary keys so two renders agree byte-for-
881/// byte.
882fn section_top_structural_spans(h: &mut String, case: &CaseFile) {
883    use dsfb_gpu_debug_core::grammar::ReasonCode;
884    h.push_str("<h3>4e. Top structural spans</h3>\n");
885    if case.episodes.is_empty() {
886        h.push_str("<p class=\"note\">No episodes to rank.</p>\n");
887        return;
888    }
889
890    // (a) Longest spans — top-10 by (end-start) desc, ties broken by
891    //     (detector_bit_count desc, entity asc, start asc).
892    h.push_str("<h4>4e(a). Longest spans</h4>\n");
893    let mut by_length: Vec<&dsfb_gpu_debug_core::bank::Episode> = case.episodes.iter().collect();
894    by_length.sort_by(|a, b| {
895        let la = a.end_window.saturating_sub(a.start_window);
896        let lb = b.end_window.saturating_sub(b.start_window);
897        lb.cmp(&la)
898            .then_with(|| b.detector_bit_count.cmp(&a.detector_bit_count))
899            .then_with(|| a.entity_id.cmp(&b.entity_id))
900            .then_with(|| a.start_window.cmp(&b.start_window))
901    });
902    h.push_str("<table class=\"summary\">\n");
903    h.push_str("<thead><tr><th>rank</th><th>length_windows</th><th>entity_id</th><th>start</th><th>end</th><th>motif</th><th>detector_bit_count</th></tr></thead>\n<tbody>\n");
904    for (rank, e) in by_length.iter().take(10).enumerate() {
905        let _ = writeln!(
906            h,
907            "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
908            rank + 1,
909            e.end_window.saturating_sub(e.start_window),
910            e.entity_id,
911            e.start_window,
912            e.end_window,
913            escape(motif_name(e.motif)),
914            e.detector_bit_count,
915        );
916    }
917    h.push_str("</tbody>\n</table>\n");
918
919    // (b) Highest-repeated entities — top-5 entities by episode_count
920    //     desc, ties broken by entity_id asc.
921    h.push_str("<h4>4e(b). Highest-repeated entities</h4>\n");
922    let mut per_entity: BTreeMap<u32, EntityRankAggregation> = BTreeMap::new();
923    for e in &case.episodes {
924        let agg = per_entity.entry(e.entity_id).or_default();
925        agg.0 += 1;
926        *agg.1.entry(motif_name(e.motif)).or_insert(0) += 1;
927        *agg.2.entry(reason_name(e.reason)).or_insert(0) += 1;
928    }
929    let mut entity_ranks: Vec<(u32, u32, &'static str, &'static str)> = per_entity
930        .iter()
931        .map(|(entity_id, (count, motifs, reasons))| {
932            let dom_motif = motifs
933                .iter()
934                .max_by(|a, b| a.1.cmp(b.1).then_with(|| b.0.cmp(a.0)))
935                .map_or("-", |(k, _)| *k);
936            let dom_reason = reasons
937                .iter()
938                .max_by(|a, b| a.1.cmp(b.1).then_with(|| b.0.cmp(a.0)))
939                .map_or("-", |(k, _)| *k);
940            (*entity_id, *count, dom_motif, dom_reason)
941        })
942        .collect();
943    entity_ranks.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
944    h.push_str("<table class=\"summary\">\n");
945    h.push_str("<thead><tr><th>rank</th><th>entity_id</th><th>episode_count</th><th>dominant_motif</th><th>dominant_reason_code</th></tr></thead>\n<tbody>\n");
946    for (rank, (entity_id, count, motif, reason)) in entity_ranks.iter().take(5).enumerate() {
947        let _ = writeln!(
948            h,
949            "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
950            rank + 1,
951            entity_id,
952            count,
953            escape(motif),
954            escape(reason),
955        );
956    }
957    h.push_str("</tbody>\n</table>\n");
958
959    // (c) Most-recurrent reason codes — top-5 by count desc, ties broken
960    //     by severity desc then wire name asc.
961    h.push_str("<h4>4e(c). Most-recurrent reason codes</h4>\n");
962    let mut counts: BTreeMap<u8, (ReasonCode, u32)> = BTreeMap::new();
963    for e in &case.episodes {
964        counts.entry(e.reason as u8).or_insert((e.reason, 0)).1 += 1;
965    }
966    let mut rows: Vec<(ReasonCode, u32)> = counts.values().copied().collect();
967    rows.sort_by(|a, b| {
968        b.1.cmp(&a.1)
969            .then_with(|| b.0.severity().cmp(&a.0.severity()))
970            .then_with(|| reason_name(a.0).cmp(reason_name(b.0)))
971    });
972    h.push_str("<table class=\"summary\">\n");
973    h.push_str("<thead><tr><th>rank</th><th>reason_code</th><th>count</th><th>severity</th></tr></thead>\n<tbody>\n");
974    for (rank, (reason, count)) in rows.iter().take(5).enumerate() {
975        let _ = writeln!(
976            h,
977            "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
978            rank + 1,
979            escape(reason_name(*reason)),
980            count,
981            reason.severity(),
982        );
983    }
984    h.push_str("</tbody>\n</table>\n");
985}
986
987/// Plain-English motif glossary, rendered as a table.
988///
989/// WHY: An operator encountering DSFB motifs for the first time needs
990/// a structural-vocabulary key. The glossary lists ALL 8 motifs (even
991/// ones that did not fire in this dataset) so the report is
992/// self-documenting. Each prose line ends with the standardised tail
993/// *"DSFB interprets this structurally, not as a ground-truth causal
994/// diagnosis."* — the panel-locked non-overclaim boundary.
995fn section_motif_glossary(h: &mut String) {
996    h.push_str("<h3>4f. Plain-English motif glossary</h3>\n");
997    h.push_str("<p class=\"note\">Every DSFB motif describes a STRUCTURAL ");
998    h.push_str("residual shape. DSFB interprets each motif structurally, ");
999    h.push_str("not as a ground-truth causal diagnosis. The glossary covers ");
1000    h.push_str("all eight motifs even if some did not fire on this dataset.</p>\n");
1001    h.push_str("<table class=\"summary glossary\">\n");
1002    h.push_str(
1003        "<thead><tr><th>motif</th><th>structural interpretation</th></tr></thead>\n<tbody>\n",
1004    );
1005    for (motif, prose) in MOTIF_PROSE {
1006        let _ = writeln!(
1007            h,
1008            "<tr><td class=\"label\">{}</td><td>{}</td></tr>",
1009            escape(motif_name(*motif)),
1010            escape(prose)
1011        );
1012    }
1013    h.push_str("</tbody>\n</table>\n");
1014}
1015
1016/// Look up a reason code's plain-English meaning.
1017///
1018/// WHY: Reason codes are pinned wire-name enums; the prose makes the
1019/// reason-code histogram self-documenting. Each line ends with the
1020/// standardised structural-not-causal tail so the renderer cannot
1021/// accidentally claim ground-truth causality.
1022fn reason_prose(r: dsfb_gpu_debug_core::grammar::ReasonCode) -> &'static str {
1023    use dsfb_gpu_debug_core::grammar::ReasonCode;
1024    match r {
1025        ReasonCode::Admissible => {
1026            "Cell admitted as within the admissibility envelope; DSFB interprets this structurally, not as a ground-truth causal diagnosis."
1027        }
1028        ReasonCode::BoundaryApproach => {
1029            "Residual or drift entered the boundary band but did not cross the violation threshold; DSFB interprets this structurally, not as a ground-truth causal diagnosis."
1030        }
1031        ReasonCode::SustainedOutwardDrift => {
1032            "Drift remained above the violation threshold for multiple windows; DSFB interprets this structurally, not as a ground-truth causal diagnosis."
1033        }
1034        ReasonCode::AbruptSlewViolation => {
1035            "Single-window slew shock crossed the violation threshold; DSFB interprets this structurally, not as a ground-truth causal diagnosis."
1036        }
1037        ReasonCode::RecurrentBoundaryGrazing => {
1038            "Multiple boundary cells with no clear violation — repeated graze without commitment; DSFB interprets this structurally, not as a ground-truth causal diagnosis."
1039        }
1040        ReasonCode::EnvelopeViolation => {
1041            "Envelope-magnitude violation (norm itself crossed the high band); DSFB interprets this structurally, not as a ground-truth causal diagnosis."
1042        }
1043        ReasonCode::DriftWithRecovery => {
1044            "Drift descended after a peak — recovery edge; DSFB interprets this structurally, not as a ground-truth causal diagnosis."
1045        }
1046        ReasonCode::SingleCrossing => {
1047            "One-shot boundary crossing that did not re-enter on the next cell; DSFB interprets this structurally, not as a ground-truth causal diagnosis."
1048        }
1049    }
1050}
1051
1052/// Pinned plain-English glossary for all 8 BankMotif variants.
1053///
1054/// WHY: Self-documenting glossary so an operator opening the audit
1055/// for the first time doesn't need an external DSFB reference. Each
1056/// line ends with the standardised non-overclaim tail so the
1057/// rendered report cannot accidentally claim causality.
1058const MOTIF_PROSE: &[(dsfb_gpu_debug_core::bank::BankMotif, &str)] = {
1059    use dsfb_gpu_debug_core::bank::BankMotif;
1060    &[
1061        (
1062            BankMotif::LatencyRamp,
1063            "Sustained directional increase in residual latency-projection cells across a contiguous window span; DSFB interprets this as recurrent directional latency structure, not as a ground-truth causal diagnosis.",
1064        ),
1065        (
1066            BankMotif::ErrorBurst,
1067            "Concentrated burst of error-projection cells in a short window range; DSFB interprets this as locally concentrated error structure, not as a ground-truth causal diagnosis.",
1068        ),
1069        (
1070            BankMotif::SlewShockRecovery,
1071            "Abrupt slew shock followed by a recovery edge in the same entity; DSFB interprets this as transient slew + recovery structure, not as a ground-truth causal diagnosis.",
1072        ),
1073        (
1074            BankMotif::SustainedDegradation,
1075            "Persistent elevation of residual magnitude over many windows without recovery; DSFB interprets this as sustained structural degradation, not as a ground-truth causal diagnosis.",
1076        ),
1077        (
1078            BankMotif::OscillationInstability,
1079            "Repeated alternation across the boundary band without sustained commitment; DSFB interprets this as oscillatory structural pattern, not as a ground-truth causal diagnosis.",
1080        ),
1081        (
1082            BankMotif::LocalizedRouteFault,
1083            "Episode bounded to a specific entity/route locality with neighbouring entities admissible; DSFB interprets this as locality-confined structure, not as a ground-truth causal diagnosis.",
1084        ),
1085        (
1086            BankMotif::FanoutCascadeCandidate,
1087            "Co-occurrence pattern across multiple entities consistent with fan-out cascade structure; DSFB interprets this as multi-entity structural co-firing, not as a ground-truth causal diagnosis.",
1088        ),
1089        (
1090            BankMotif::ConfuserTransient,
1091            "Confuser-like transient that fired but did not sustain into a full motif; DSFB interprets this structurally as transient near-violation, not as a ground-truth causal diagnosis.",
1092        ),
1093    ]
1094};
1095
1096fn motif_name(m: dsfb_gpu_debug_core::bank::BankMotif) -> &'static str {
1097    // Keep this lookup deterministic: avoid Debug derives in the
1098    // rendered output to prevent format drift between compiler versions.
1099    use dsfb_gpu_debug_core::bank::BankMotif;
1100    match m {
1101        BankMotif::LatencyRamp => "LatencyRamp",
1102        BankMotif::ErrorBurst => "ErrorBurst",
1103        BankMotif::SlewShockRecovery => "SlewShockRecovery",
1104        BankMotif::SustainedDegradation => "SustainedDegradation",
1105        BankMotif::OscillationInstability => "OscillationInstability",
1106        BankMotif::LocalizedRouteFault => "LocalizedRouteFault",
1107        BankMotif::FanoutCascadeCandidate => "FanoutCascadeCandidate",
1108        BankMotif::ConfuserTransient => "ConfuserTransient",
1109    }
1110}
1111
1112fn reason_name(r: dsfb_gpu_debug_core::grammar::ReasonCode) -> &'static str {
1113    use dsfb_gpu_debug_core::grammar::ReasonCode;
1114    match r {
1115        ReasonCode::Admissible => "Admissible",
1116        ReasonCode::BoundaryApproach => "BoundaryApproach",
1117        ReasonCode::SustainedOutwardDrift => "SustainedOutwardDrift",
1118        ReasonCode::AbruptSlewViolation => "AbruptSlewViolation",
1119        ReasonCode::RecurrentBoundaryGrazing => "RecurrentBoundaryGrazing",
1120        ReasonCode::EnvelopeViolation => "EnvelopeViolation",
1121        ReasonCode::DriftWithRecovery => "DriftWithRecovery",
1122        ReasonCode::SingleCrossing => "SingleCrossing",
1123    }
1124}
1125
1126fn grammar_name(g: dsfb_gpu_debug_core::grammar::GrammarState) -> &'static str {
1127    use dsfb_gpu_debug_core::grammar::GrammarState;
1128    match g {
1129        GrammarState::Admissible => "Admissible",
1130        GrammarState::Boundary => "Boundary",
1131        GrammarState::Violation => "Violation",
1132        GrammarState::Recovery => "Recovery",
1133    }
1134}
1135
1136const NON_CLAIMS: &[&str] = &[
1137    "Does NOT claim DSFB has identified the \"real\" anomaly in the dataset.",
1138    "Does NOT claim DSFB outperforms any other anomaly detector.",
1139    "Does NOT claim DSFB has discovered causality.",
1140    "Does NOT claim DSFB has measured remediation effectiveness.",
1141    "Does NOT claim fitness-for-purpose on regulated or safety-critical use.",
1142    "Does NOT claim the dataset is \"correctly labeled\" or \"ground truth\"; the audit report describes deterministic structure DSFB-GPU saw, not labels.",
1143    "Does NOT claim the corpus or registry is exhaustive.",
1144    "Does NOT claim replay determinism across different driver / CUDA / hardware versions; the replay receipt records the toolchain explicitly.",
1145];
1146
1147const STYLE: &str = r#"
1148body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, sans-serif;
1149       margin: 2rem; max-width: 1100px; color: #222; }
1150h1 { border-bottom: 2px solid #444; padding-bottom: .25rem; }
1151h2 { margin-top: 2rem; border-bottom: 1px solid #aaa; padding-bottom: .15rem; }
1152h3 { margin-top: 1.25rem; color: #444; }
1153p.subhead { color: #555; font-size: .95rem; margin-top: -.25rem; }
1154p.note { color: #555; font-size: .9rem; max-width: 75ch; }
1155table { border-collapse: collapse; margin: .5rem 0 1rem 0; }
1156table.kv th { text-align: left; padding: .25rem .75rem .25rem 0; vertical-align: top; font-weight: 600; color: #333; }
1157table.kv td { padding: .25rem 0; font-family: ui-monospace, SFMono-Regular, Consolas, monospace; font-size: .9rem; }
1158table.episodes, table.hashes { width: 100%; font-size: .85rem; }
1159table.episodes th, table.episodes td,
1160table.hashes th, table.hashes td { border-bottom: 1px solid #ddd; padding: .35rem .5rem; text-align: left; }
1161table.episodes th, table.hashes th { background: #f4f4f4; font-weight: 600; }
1162table.hashes td.hex { font-family: ui-monospace, SFMono-Regular, Consolas, monospace; font-size: .82rem; word-break: break-all; }
1163table.hashes td.label { font-weight: 600; white-space: nowrap; }
1164pre.law { background: #f8f8f8; border: 1px solid #ddd; padding: .75rem; font-size: .85rem;
1165          overflow-x: auto; max-width: 100%; }
1166ul.nonclaims li { margin: .25rem 0; color: #333; }
1167code { background: #eee; padding: 1px 4px; border-radius: 3px; font-size: .9em; }
1168"#;
1169
1170#[cfg(test)]
1171mod tests {
1172    use super::*;
1173
1174    fn manifest() -> DatasetManifest {
1175        DatasetManifest {
1176            dataset_id: "aiops_kpi".to_string(),
1177            display_name: "AIOps Challenge 2018 KPI".to_string(),
1178            upstream_doi_or_url: "Su et al., IPCCC 2018; github.com/NetManAIOps/Bagel".to_string(),
1179            license: "Apache-2.0".to_string(),
1180            source_class: "TimeSeriesAnomaly".to_string(),
1181            vendored_path: "/home/one/dsfb/crates/dsfb-debug/data/fixtures/aiops_challenge.tsv"
1182                .to_string(),
1183            fixture_sha256_hex: "29961b8b66d941c19c065cfa974a62f098ebd63ef8c9017d8219e9f228135642"
1184                .to_string(),
1185            fixture_byte_size: 2015,
1186        }
1187    }
1188
1189    fn schema() -> SchemaMap {
1190        SchemaMap {
1191            declared_num_windows: 32,
1192            declared_num_signals: 4,
1193            declared_healthy_window_end: 12,
1194            observed_num_windows: 38,
1195            observed_num_signals: 4,
1196            nan_cell_count: 0,
1197            finite_cell_count: 152,
1198            emitted_event_count: 152,
1199            lowering_config: LoweringConfig::default(),
1200        }
1201    }
1202
1203    fn empty_case() -> CaseFile {
1204        use dsfb_gpu_debug_core::casefile::{CaseFile, EmissionMode, IntermediateHashes};
1205        use dsfb_gpu_debug_core::verdict::FinalVerdict;
1206        let z = [0u8; 32];
1207        CaseFile {
1208            version: "dsfb-gpu-debug-case-0.1",
1209            backend: "cuda",
1210            mode: EmissionMode::Throughput,
1211            hashes: IntermediateHashes {
1212                input_catalog: z,
1213                contract: z,
1214                bank: z,
1215                detector_registry: z,
1216                kernel_sequence: z,
1217                window_feature: z,
1218                residual_field: z,
1219                sign_field: z,
1220                detector_cell: z,
1221                consensus_grid: z,
1222                candidate_interval: z,
1223                episode: z,
1224            },
1225            episodes: Vec::new(),
1226            final_case_file_hash: [0u8; 32],
1227            final_verdict: FinalVerdict::ReplayAdmissible,
1228        }
1229    }
1230
1231    fn replay() -> ReplayVerification {
1232        let mut tc = BTreeMap::new();
1233        tc.insert("rustc".to_string(), "1.84.0 stable".to_string());
1234        tc.insert("cuda".to_string(), "13.2".to_string());
1235        tc.insert("driver".to_string(), "test-stub".to_string());
1236        tc.insert("gpu".to_string(), "RTX 4080 SUPER".to_string());
1237        ReplayVerification {
1238            run_count: 2,
1239            casefile_json_sha256_run1: "0".repeat(64),
1240            casefile_json_sha256_run2: "0".repeat(64),
1241            episodes_jsonl_sha256_run1: "0".repeat(64),
1242            episodes_jsonl_sha256_run2: "0".repeat(64),
1243            final_case_file_hash_run1_hex: "0".repeat(64),
1244            final_case_file_hash_run2_hex: "0".repeat(64),
1245            episode_count_run1: 0,
1246            episode_count_run2: 0,
1247            toolchain: tc,
1248        }
1249    }
1250
1251    #[test]
1252    fn render_is_byte_stable_across_two_calls() {
1253        let mani = manifest();
1254        let sch = schema();
1255        let case = empty_case();
1256        let rep = replay();
1257        let render_a = render_audit_report_html(&mani, &sch, &case, &rep);
1258        let render_b = render_audit_report_html(&mani, &sch, &case, &rep);
1259        assert_eq!(render_a, render_b);
1260        assert!(render_a.starts_with("<!DOCTYPE html>"));
1261        assert!(render_a.contains("AIOps Challenge 2018 KPI"));
1262    }
1263
1264    #[test]
1265    fn render_contains_all_seven_sections() {
1266        let html = render_audit_report_html(&manifest(), &schema(), &empty_case(), &replay());
1267        for section in [
1268            "1. Input provenance",
1269            "2. Residual-projection lowering law",
1270            "3. Run configuration",
1271            "4. Admitted episodes",
1272            "5. Stage digest / hash chain",
1273            "6. Replay verification",
1274            "7. Limitations and non-claims",
1275        ] {
1276            assert!(html.contains(section), "missing section: {section}");
1277        }
1278    }
1279
1280    #[test]
1281    fn render_carries_every_non_claim() {
1282        let html = render_audit_report_html(&manifest(), &schema(), &empty_case(), &replay());
1283        for nc in NON_CLAIMS {
1284            assert!(html.contains(&escape(nc)), "missing non-claim: {nc}");
1285        }
1286    }
1287
1288    #[test]
1289    fn empty_episodes_render_honestly() {
1290        let html = render_audit_report_html(&manifest(), &schema(), &empty_case(), &replay());
1291        assert!(html.contains("No episodes were admitted"));
1292    }
1293
1294    #[test]
1295    fn replay_admits_when_hashes_agree() {
1296        let r = replay();
1297        assert!(r.admits());
1298    }
1299
1300    #[test]
1301    fn replay_rejects_when_casefile_hashes_diverge() {
1302        let mut r = replay();
1303        r.casefile_json_sha256_run2 = "1".repeat(64);
1304        assert!(!r.admits());
1305    }
1306
1307    // ----- S-REAL.1.1 — renderer richness acceptance tests -----
1308
1309    use dsfb_gpu_debug_core::bank::{BankMotif, Episode};
1310    use dsfb_gpu_debug_core::fixed::Q16;
1311    use dsfb_gpu_debug_core::grammar::{GrammarState, ReasonCode};
1312
1313    fn mk_ep(
1314        entity: u32,
1315        start: u32,
1316        end: u32,
1317        motif: BankMotif,
1318        reason: ReasonCode,
1319        bits: u32,
1320    ) -> Episode {
1321        Episode {
1322            entity_id: entity,
1323            start_window: start,
1324            end_window: end,
1325            motif,
1326            reason,
1327            peak_state: GrammarState::Boundary,
1328            peak_residual_q: Q16(100),
1329            peak_drift_q: Q16(200),
1330            peak_slew_q: Q16(50),
1331            detector_bit_count: bits,
1332            admission: None,
1333        }
1334    }
1335
1336    fn case_with_episodes(episodes: Vec<Episode>) -> CaseFile {
1337        let mut c = empty_case();
1338        c.episodes = episodes;
1339        c
1340    }
1341
1342    fn nonempty_case() -> CaseFile {
1343        case_with_episodes(vec![
1344            mk_ep(
1345                0,
1346                5,
1347                12,
1348                BankMotif::LatencyRamp,
1349                ReasonCode::SustainedOutwardDrift,
1350                8,
1351            ),
1352            mk_ep(
1353                0,
1354                20,
1355                25,
1356                BankMotif::ErrorBurst,
1357                ReasonCode::AbruptSlewViolation,
1358                6,
1359            ),
1360            mk_ep(
1361                3,
1362                1,
1363                4,
1364                BankMotif::OscillationInstability,
1365                ReasonCode::RecurrentBoundaryGrazing,
1366                2,
1367            ),
1368            mk_ep(
1369                3,
1370                10,
1371                30,
1372                BankMotif::SustainedDegradation,
1373                ReasonCode::EnvelopeViolation,
1374                10,
1375            ),
1376            mk_ep(
1377                7,
1378                8,
1379                9,
1380                BankMotif::ConfuserTransient,
1381                ReasonCode::SingleCrossing,
1382                1,
1383            ),
1384        ])
1385    }
1386
1387    #[test]
1388    fn render_carries_summary_card() {
1389        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1390        assert!(html.contains("class=\"summary-card\""));
1391        assert!(html.contains("Dataset summary"));
1392        assert!(html.contains("shape (entities × windows)"));
1393        assert!(html.contains("episodes admitted"));
1394        assert!(html.contains("byte-identical replay"));
1395    }
1396
1397    #[test]
1398    fn render_summary_card_includes_source_class_and_truncated_hash() {
1399        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1400        // S-REAL.1.1.1: source_class is in the summary card.
1401        assert!(html.contains("source class"));
1402        assert!(html.contains("TimeSeriesAnomaly"));
1403        // S-REAL.1.1.1: truncated final_case_file_hash (16 hex chars) in summary.
1404        assert!(html.contains("final_case_file_hash (first 16 hex)"));
1405    }
1406
1407    #[test]
1408    fn render_carries_replay_proof_card_with_artifact_hashes() {
1409        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1410        assert!(html.contains("class=\"replay-proof-card\""));
1411        assert!(html.contains("Replay proof"));
1412        assert!(html.contains("casefile.json SHA-256"));
1413        assert!(html.contains("episodes.jsonl SHA-256"));
1414        assert!(html.contains("audit_report.html SHA-256"));
1415        // Honest disclosure of the self-referential-hash impossibility:
1416        assert!(html.contains("externally pinned in replay_verification.txt"));
1417        assert!(html.contains("final_case_file_hash (full)"));
1418    }
1419
1420    #[test]
1421    fn render_carries_motif_histogram() {
1422        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1423        assert!(html.contains("4a. Motif histogram"));
1424        // All 5 distinct motifs appear by wire name (sorted ascending).
1425        for name in [
1426            "ConfuserTransient",
1427            "ErrorBurst",
1428            "LatencyRamp",
1429            "OscillationInstability",
1430            "SustainedDegradation",
1431        ] {
1432            assert!(html.contains(name), "motif missing: {name}");
1433        }
1434    }
1435
1436    #[test]
1437    fn render_carries_reason_code_histogram_with_prose() {
1438        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1439        assert!(html.contains("4b. Reason-code histogram"));
1440        // The standardised structural-not-causal tail appears in every prose entry.
1441        let count = html
1442            .matches("DSFB interprets this structurally, not as a ground-truth causal diagnosis.")
1443            .count();
1444        assert!(
1445            count >= 5,
1446            "expected at least 5 reason-prose tails, got {count}"
1447        );
1448    }
1449
1450    #[test]
1451    fn render_carries_entity_summary_with_dominant_motif() {
1452        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1453        assert!(html.contains("4c. Entity summary"));
1454        // Three distinct entities (0, 3, 7) so 3 rows in the table.
1455        assert!(html.contains("first_window"));
1456        assert!(html.contains("dominant_motif"));
1457    }
1458
1459    #[test]
1460    fn render_carries_episode_timeline() {
1461        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1462        assert!(html.contains("4d. Episode timeline"));
1463    }
1464
1465    #[test]
1466    fn render_carries_top_structural_spans_with_three_subtables() {
1467        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1468        assert!(html.contains("4e. Top structural spans"));
1469        assert!(html.contains("4e(a). Longest spans"));
1470        assert!(html.contains("4e(b). Highest-repeated entities"));
1471        assert!(html.contains("4e(c). Most-recurrent reason codes"));
1472    }
1473
1474    #[test]
1475    fn render_carries_motif_glossary_with_all_eight_motifs() {
1476        let html = render_audit_report_html(&manifest(), &schema(), &empty_case(), &replay());
1477        // Glossary renders ALL 8 motifs even when no episodes fired,
1478        // so the report is self-documenting for first-time readers.
1479        assert!(html.contains("4f. Plain-English motif glossary"));
1480        for name in [
1481            "LatencyRamp",
1482            "ErrorBurst",
1483            "SlewShockRecovery",
1484            "SustainedDegradation",
1485            "OscillationInstability",
1486            "LocalizedRouteFault",
1487            "FanoutCascadeCandidate",
1488            "ConfuserTransient",
1489        ] {
1490            assert!(html.contains(name), "motif glossary missing: {name}");
1491        }
1492    }
1493
1494    #[test]
1495    fn render_is_byte_stable_after_s_real_1_1_additions() {
1496        // Two renders of the SAME nonempty case must produce
1497        // byte-identical bytes — the S-REAL.1.1 additions must not
1498        // introduce any HashMap or wall-clock ordering.
1499        let case = nonempty_case();
1500        let a = render_audit_report_html(&manifest(), &schema(), &case, &replay());
1501        let b = render_audit_report_html(&manifest(), &schema(), &case, &replay());
1502        assert_eq!(a, b);
1503    }
1504
1505    /// Causal-diagnosis-language regression scanner.
1506    ///
1507    /// WHY: The S-REAL.1.1 plan-locked non-overclaim boundary forbids
1508    /// causal-correctness or ground-truth-anomaly claims in the new
1509    /// operator-facing surfaces (summary card, motif histogram,
1510    /// reason-code histogram, entity summary, episode timeline, top
1511    /// structural spans, motif glossary). The pre-existing section 7
1512    /// "Limitations and non-claims" is panel-locked verbatim and is
1513    /// allowed to NEGATE forbidden phrases (e.g. *"Does NOT claim
1514    /// DSFB outperforms ..."*); the scanner explicitly scopes itself
1515    /// to the new S-REAL.1.1 sub-sections to avoid false-positive
1516    /// matches on legitimate disclaimers.
1517    ///
1518    /// The standardised structural-not-causal tail (*"DSFB interprets
1519    /// this structurally, not as a ground-truth causal diagnosis"*)
1520    /// is allowed inside the scoped region because it is itself a
1521    /// disclaimer; the scanner subtracts those occurrences before
1522    /// asserting zero bare hits.
1523    #[test]
1524    fn rejects_causal_diagnosis_language() {
1525        const FORBIDDEN: &[&str] = &[
1526            "real root cause",
1527            "true anomaly",
1528            "ground-truth anomaly",
1529            "outperforms",
1530            "outperformed",
1531            "the real cause",
1532        ];
1533
1534        let html = render_audit_report_html(&manifest(), &schema(), &nonempty_case(), &replay());
1535        // Scope: from the start of the summary card to the start of
1536        // section 5 (Stage digest / hash chain). This covers every
1537        // new S-REAL.1.1 surface; the panel-locked non-claims block
1538        // (section 7) is outside the scope by construction.
1539        let start = html
1540            .find("class=\"summary-card\"")
1541            .expect("summary card present");
1542        let end = html.find("5. Stage digest").expect("section 5 present");
1543        let scoped = &html[start..end].to_ascii_lowercase();
1544        for needle in FORBIDDEN {
1545            assert!(
1546                !scoped.contains(needle),
1547                "S-REAL.1.1 operator-facing sub-sections contain forbidden \
1548                 causal-diagnosis phrase: {needle}"
1549            );
1550        }
1551
1552        // "causal diagnosis" is admissible ONLY inside the standardised
1553        // negation tail; bare occurrences (positive claims) fail.
1554        let bare_causal = scoped.matches("causal diagnosis").count()
1555            - scoped
1556                .matches("not as a ground-truth causal diagnosis")
1557                .count();
1558        assert_eq!(
1559            bare_causal, 0,
1560            "bare 'causal diagnosis' (outside the standardised disclaimer) \
1561             found in S-REAL.1.1 operator-facing sub-sections"
1562        );
1563    }
1564}