dsfb_gray/
evaluation.rs

1//! Reproducible public evaluation pipeline for DSFB gray-failure experiments.
2//!
3//! This module centralizes the deterministic evaluation used by the demo
4//! binary, the checked-in evaluation artifacts, and the generated public
5//! documentation snippets. The goal is simple: every published number should
6//! come from one executable pipeline.
7
8use crate::scan::AUDIT_SCORE_METHOD;
9use crate::{
10    generate_csv, run_scenario, AdmissibilityEnvelope, AsyncStarvationScenario,
11    ChannelBackpressureScenario, ClockDriftScenario, FaultScenario, ObserverConfig,
12    PartialPartitionScenario, ScenarioResult, WorkloadPhase,
13};
14use std::fs;
15use std::io;
16use std::path::{Path, PathBuf};
17
18const AUDIT_NON_CERTIFICATION_STATEMENT: &str =
19    "The DSFB audit includes standards- and certification-relevant structural checks, but it does not certify compliance with IEC, ISO, RTCA, MIL, NIST, or other standards.";
20
21/// Structured result for one primary evaluation scenario.
22#[derive(Debug, Clone)]
23pub struct PrimaryEvaluationRow {
24    /// User-facing scenario label.
25    pub name: String,
26    /// Full scenario result.
27    pub result: ScenarioResult,
28    /// CSV filename written for this scenario.
29    pub csv_name: String,
30}
31
32/// One row of the 2D sensitivity sweep.
33#[derive(Debug, Clone, Copy)]
34pub struct SensitivitySweepRow {
35    /// Envelope width multiplier.
36    pub sigma: f64,
37    /// Persistence window.
38    pub persistence_window: usize,
39    /// Detection delay from injection start.
40    pub detection_delay: Option<u64>,
41    /// Lead time before the scenario ends.
42    pub lead_time: Option<u64>,
43    /// Whether the scenario was detected at all.
44    pub detected: bool,
45    /// Whether the first anomaly occurred early in the evaluation window.
46    pub early_window_detection: bool,
47    /// Whether a pre-injection false alarm occurred.
48    pub has_false_alarm: bool,
49    /// Boundary-state steps across the scenario.
50    pub boundary_steps: u32,
51    /// Violation-state steps across the scenario.
52    pub violation_steps: u32,
53}
54
55/// One clean-window negative-control result.
56#[derive(Debug, Clone)]
57pub struct NegativeControlRow {
58    /// User-facing scenario label.
59    pub name: String,
60    /// Full scenario result.
61    pub result: ScenarioResult,
62}
63
64/// One row of the drift-rate elasticity sweep.
65#[derive(Debug, Clone, Copy)]
66pub struct DriftElasticityRow {
67    /// Injected drift rate.
68    pub drift_rate: f64,
69    /// Full scenario result.
70    pub result_detected: bool,
71    /// Detection delay from injection, if any.
72    pub detection_delay: Option<u64>,
73    /// Lead time before the scenario ends, if any.
74    pub lead_time: Option<u64>,
75}
76
77/// Fully structured public evaluation bundle.
78#[derive(Debug, Clone)]
79pub struct PublicEvaluationBundle {
80    /// Recommended-configuration primary results.
81    pub primary: Vec<PrimaryEvaluationRow>,
82    /// 42-point clock-drift sensitivity sweep.
83    pub sensitivity_sweep: Vec<SensitivitySweepRow>,
84    /// Negative-control results.
85    pub negative_controls: Vec<NegativeControlRow>,
86    /// First-boundary steps observed across reproducibility runs.
87    pub reproducibility_boundary_steps: Vec<u64>,
88    /// Drift-rate elasticity sweep.
89    pub drift_elasticity: Vec<DriftElasticityRow>,
90}
91
92/// Paths written by the public-artifact regeneration workflow.
93#[derive(Debug, Clone)]
94pub struct PublicArtifactPaths {
95    /// Full evaluation report.
96    pub evaluation_results_path: PathBuf,
97    /// Demo output report.
98    pub demo_output_path: PathBuf,
99    /// Sensitivity sweep CSV.
100    pub sensitivity_sweep_path: PathBuf,
101    /// Generated docs directory.
102    pub generated_docs_dir: PathBuf,
103    /// Generated paper directory.
104    pub generated_paper_dir: PathBuf,
105}
106
107/// Build the full deterministic public evaluation bundle.
108pub fn build_public_evaluation() -> PublicEvaluationBundle {
109    let primary = build_primary_evaluation();
110    let sensitivity_sweep = build_sensitivity_sweep();
111    let negative_controls = build_negative_controls();
112    let reproducibility_boundary_steps = build_reproducibility_steps();
113    let drift_elasticity = build_drift_elasticity();
114
115    PublicEvaluationBundle {
116        primary,
117        sensitivity_sweep,
118        negative_controls,
119        reproducibility_boundary_steps,
120        drift_elasticity,
121    }
122}
123
124fn count_true<T>(items: &[T], predicate: impl Fn(&T) -> bool) -> usize {
125    let mut count = 0usize;
126    for item in items.iter() {
127        if predicate(item) {
128            count += 1;
129        }
130    }
131    count
132}
133
134fn find_negative_control<'a>(
135    bundle: &'a PublicEvaluationBundle,
136    name: &str,
137) -> Option<&'a NegativeControlRow> {
138    bundle.negative_controls.iter().find(|row| row.name == name)
139}
140
141fn render_optional_number(value: Option<u64>) -> String {
142    match value {
143        Some(number) => number.to_string(),
144        None => "-".to_string(),
145    }
146}
147
148fn render_optional_steps(value: Option<u64>, missing: &'static str) -> String {
149    match value {
150        Some(steps) => format!("{steps} steps"),
151        None => missing.to_string(),
152    }
153}
154
155/// Render the full human-readable evaluation report.
156pub fn render_public_evaluation_report(bundle: &PublicEvaluationBundle) -> String {
157    let mut out = String::new();
158    let stats = compute_evaluation_summary(bundle);
159
160    push_evaluation_header(&mut out);
161    push_primary_section(&mut out, bundle);
162    push_sensitivity_section(&mut out, bundle);
163    push_negative_control_section(&mut out, bundle);
164    push_reproducibility_section(&mut out, bundle, stats.reproducibility_baseline);
165    push_structural_discrimination_section(&mut out, bundle);
166    push_drift_elasticity_section(&mut out, bundle);
167    push_evaluation_summary(&mut out, bundle, &stats);
168
169    out
170}
171
172struct EvaluationSummaryStats {
173    primary_detected: usize,
174    detected_sweep_points: usize,
175    sweep_false_alarm_points: usize,
176    clean_control_clear: usize,
177    reproducibility_baseline: u64,
178    reproducibility_matches: usize,
179}
180
181fn compute_evaluation_summary(bundle: &PublicEvaluationBundle) -> EvaluationSummaryStats {
182    let reproducibility_baseline = bundle
183        .reproducibility_boundary_steps
184        .first()
185        .copied()
186        .unwrap_or(0);
187
188    EvaluationSummaryStats {
189        primary_detected: count_true(&bundle.primary, |row| row.result.detected()),
190        detected_sweep_points: count_true(&bundle.sensitivity_sweep, |row| row.detected),
191        sweep_false_alarm_points: count_true(&bundle.sensitivity_sweep, |row| row.has_false_alarm),
192        clean_control_clear: count_true(&bundle.negative_controls, |row| {
193            row.result.total_boundary_steps + row.result.total_violation_steps == 0
194        }),
195        reproducibility_baseline,
196        reproducibility_matches: count_true(&bundle.reproducibility_boundary_steps, |step| {
197            *step == reproducibility_baseline
198        }),
199    }
200}
201
202fn push_evaluation_header(out: &mut String) {
203    out.push_str("╔══════════════════════════════════════════════════════════════╗\n");
204    out.push_str("║  DSFB Gray Failure Detection — Full Empirical Evaluation    ║\n");
205    out.push_str("║  Invariant Forge LLC — Deterministic Structural Engine      ║\n");
206    out.push_str("╚══════════════════════════════════════════════════════════════╝\n\n");
207}
208
209fn push_primary_section(out: &mut String, bundle: &PublicEvaluationBundle) {
210    out.push_str("══ Section 1: Primary Evaluation (Recommended Configuration) ══\n\n");
211    out.push_str("Table 1: Primary Results (Recommended Configuration)\n");
212    out.push_str(
213        "┌─────────────────────┬───────┬──────────┬───────┬──────┬────────┬─────────┬──────────┐\n",
214    );
215    out.push_str(
216        "│ Scenario            │ Steps │ Inj.Start│ Det.  │Delay │ Lead   │ FalseAl │ Viol.Steps│\n",
217    );
218    out.push_str(
219        "├─────────────────────┼───────┼──────────┼───────┼──────┼────────┼─────────┼──────────┤\n",
220    );
221    for row in &bundle.primary {
222        let result = &row.result;
223        out.push_str(&format!(
224            "│ {:19} │  {:3}  │    {:3}   │  {}  │ {:>4} │  {:>4}  │    {}    │   {:>4}   │\n",
225            row.name,
226            result.total_steps,
227            result.injection_start,
228            if result.detected() { "YES" } else { "NO " },
229            render_optional_number(result.detection_delay_from_injection()),
230            render_optional_number(result.detection_lead_time()),
231            result.false_alarms_before_injection,
232            result.total_violation_steps
233        ));
234    }
235    out.push_str(
236        "└─────────────────────┴───────┴──────────┴───────┴──────┴────────┴─────────┴──────────┘\n\n",
237    );
238}
239
240fn push_sensitivity_section(out: &mut String, bundle: &PublicEvaluationBundle) {
241    out.push_str("══ Section 2: Sensitivity Sweep (Clock Drift, 42-point 2D) ══\n\n");
242    out.push_str("Table 2: Sensitivity Sweep — Clock Drift Scenario\n");
243    out.push_str("┌──────┬─────┬──────────┬───────┬──────┬────────┐\n");
244    out.push_str("│  σ   │  P  │ Med.Lead │ Det.% │ EW%  │ False% │\n");
245    out.push_str("├──────┼─────┼──────────┼───────┼──────┼────────┤\n");
246    for row in &bundle.sensitivity_sweep {
247        out.push_str(&format!(
248            "│ {:4.1} │ {:3} │   {:>4}   │  {:3}  │ {:3}  │  {:>3}   │\n",
249            row.sigma,
250            row.persistence_window,
251            row.lead_time.unwrap_or(0),
252            if row.detected { 100 } else { 0 },
253            if row.early_window_detection { 100 } else { 0 },
254            if row.has_false_alarm { 100 } else { 0 },
255        ));
256    }
257    out.push_str("└──────┴─────┴──────────┴───────┴──────┴────────┘\n");
258    out.push_str("  CSV: data/sensitivity_sweep.csv\n\n");
259}
260
261fn push_negative_control_section(out: &mut String, bundle: &PublicEvaluationBundle) {
262    out.push_str("══ Section 3: Negative Control — No-Fault Baseline ══\n\n");
263    out.push_str("Table 3: Negative Control — False Alarm Analysis on Healthy Windows\n");
264    out.push_str("┌─────────────────────┬──────────┬────────────┬────────────┬────────────┐\n");
265    out.push_str("│ Scenario            │ Samples  │ Boundary   │ Violation  │ False Rate │\n");
266    out.push_str("├─────────────────────┼──────────┼────────────┼────────────┼────────────┤\n");
267    for row in &bundle.negative_controls {
268        let result = &row.result;
269        out.push_str(&format!(
270            "│ {:19} │   {:4}   │     {:4}   │     {:4}   │   {:5.1}%   │\n",
271            row.name,
272            result.total_steps,
273            result.total_boundary_steps,
274            result.total_violation_steps,
275            negative_control_false_rate(result)
276        ));
277    }
278    out.push_str("└─────────────────────┴──────────┴────────────┴────────────┴────────────┘\n\n");
279}
280
281fn negative_control_false_rate(result: &ScenarioResult) -> f64 {
282    if result.total_steps > 0 {
283        (result.total_boundary_steps + result.total_violation_steps) as f64
284            / result.total_steps as f64
285            * 100.0
286    } else {
287        0.0
288    }
289}
290
291fn push_reproducibility_section(out: &mut String, bundle: &PublicEvaluationBundle, baseline: u64) {
292    out.push_str("══ Section 4: Deterministic Reproducibility Verification ══\n\n");
293    for (idx, step) in bundle.reproducibility_boundary_steps.iter().enumerate() {
294        out.push_str(&format!(
295            "  Run {:2}: first_boundary=step {} {}\n",
296            idx + 1,
297            step,
298            reproducibility_status_suffix(idx, *step, baseline)
299        ));
300    }
301    out.push_str(&format!(
302        "  Deterministic: {}\n\n",
303        if reproducibility_verified(bundle) {
304            "VERIFIED — 10/10 runs identical"
305        } else {
306            "FAILED"
307        }
308    ));
309}
310
311fn reproducibility_status_suffix(idx: usize, step: u64, baseline: u64) -> &'static str {
312    if idx == 0 {
313        "(baseline)"
314    } else if step == baseline {
315        "✓ matches"
316    } else {
317        "✗ MISMATCH"
318    }
319}
320
321fn push_structural_discrimination_section(out: &mut String, bundle: &PublicEvaluationBundle) {
322    out.push_str("══ Section 5: Multi-Scenario Structural Discrimination ══\n\n");
323    out.push_str("Table 4: Structural Signatures by Scenario at Detection Point\n");
324    out.push_str("┌─────────────────────┬──────────┬──────────┬──────────┬────────────┐\n");
325    out.push_str("│ Scenario            │ Residual │  Drift   │   Slew   │ Drift/Slew │\n");
326    out.push_str("├─────────────────────┼──────────┼──────────┼──────────┼────────────┤\n");
327    for row in &bundle.primary {
328        if let Some(signature_row) = render_structural_signature_row(row) {
329            out.push_str(&signature_row);
330        }
331    }
332    out.push_str("└─────────────────────┴──────────┴──────────┴──────────┴────────────┘\n\n");
333    out.push_str("Interpretation: Each scenario produces a structurally distinct signature\n");
334    out.push_str("at its detection point. Clock drift has high drift/slew ratio (pure drift).\n");
335    out.push_str("Backpressure has lower ratio (accelerating growth → positive slew).\n");
336    out.push_str(
337        "This discrimination is information that scalar threshold alerts do not provide.\n\n",
338    );
339}
340
341fn render_structural_signature_row(row: &PrimaryEvaluationRow) -> Option<String> {
342    let det_step = row.result.first_anomaly_step?;
343    let sample = row
344        .result
345        .samples
346        .iter()
347        .find(|sample| sample.step == det_step)?;
348    let ratio = if sample.slew.abs() > 1e-10 {
349        sample.drift / sample.slew
350    } else {
351        f64::INFINITY
352    };
353    let ratio_str = if ratio.is_infinite() {
354        "∞ (pure drift)".to_string()
355    } else {
356        format!("{ratio:.1}")
357    };
358
359    Some(format!(
360        "│ {:19} │ {:>8.4} │ {:>8.6} │ {:>8.6} │ {:>10} │\n",
361        row.name, sample.residual, sample.drift, sample.slew, ratio_str
362    ))
363}
364
365fn push_drift_elasticity_section(out: &mut String, bundle: &PublicEvaluationBundle) {
366    out.push_str("══ Section 6: Drift Rate Variation — Detection Elasticity ══\n\n");
367    out.push_str("Table 5: Clock Drift Detection vs. Drift Rate\n");
368    out.push_str("┌────────────┬───────┬──────┬────────┐\n");
369    out.push_str("│ Drift Rate │ Det.  │Delay │  Lead  │\n");
370    out.push_str("├────────────┼───────┼──────┼────────┤\n");
371    for row in &bundle.drift_elasticity {
372        out.push_str(&format!(
373            "│   {:6.3}   │  {}  │ {:>4} │  {:>4}  │\n",
374            row.drift_rate,
375            if row.result_detected { "YES" } else { "NO " },
376            render_optional_number(row.detection_delay),
377            render_optional_number(row.lead_time)
378        ));
379    }
380    out.push_str("└────────────┴───────┴──────┴────────┘\n\n");
381}
382
383fn push_evaluation_summary(
384    out: &mut String,
385    bundle: &PublicEvaluationBundle,
386    stats: &EvaluationSummaryStats,
387) {
388    out.push_str("══════════════════════════════════════════════════════════════\n");
389    out.push_str("  EVALUATION COMPLETE\n");
390    out.push_str(&format!(
391        "  • {}/4 primary scenarios: gray failures detected\n",
392        stats.primary_detected
393    ));
394    out.push_str(&format!(
395        "  • 42-point sensitivity sweep completed: {} detected, {} with pre-injection alarms\n",
396        stats.detected_sweep_points, stats.sweep_false_alarm_points
397    ));
398    out.push_str(&format!(
399        "  • Clean-window controls: {}/{} had zero anomaly steps\n",
400        stats.clean_control_clear,
401        bundle.negative_controls.len()
402    ));
403    out.push_str(&format!(
404        "  • Deterministic reproducibility: {}/{} runs identical\n",
405        stats.reproducibility_matches,
406        bundle.reproducibility_boundary_steps.len()
407    ));
408    out.push_str("  • Structural discrimination: distinct signatures per scenario\n");
409    out.push_str("  • Drift rate elasticity: 8-point sweep completed\n");
410    out.push_str("══════════════════════════════════════════════════════════════\n");
411}
412
413/// Write the canonical public evaluation artifacts and generated snippets.
414pub fn write_public_artifacts(
415    bundle: &PublicEvaluationBundle,
416    root: &Path,
417) -> io::Result<PublicArtifactPaths> {
418    let paths = public_artifact_paths(root);
419    fs::create_dir_all(&paths.generated_docs_dir)?;
420    fs::create_dir_all(&paths.generated_paper_dir)?;
421    if let Some(data_dir) = paths.evaluation_results_path.parent() {
422        fs::create_dir_all(data_dir)?;
423    }
424
425    let report = render_public_evaluation_report(bundle);
426    write_primary_public_outputs(bundle, &paths, &report)?;
427    let snippets = generated_public_snippets(bundle);
428    write_generated_public_docs(bundle, &paths, &snippets)?;
429    rewrite_public_marked_sections(root, bundle, &snippets)?;
430    Ok(paths)
431}
432
433fn public_artifact_paths(root: &Path) -> PublicArtifactPaths {
434    let data_dir = root.join("data");
435    PublicArtifactPaths {
436        evaluation_results_path: data_dir.join("evaluation_results.txt"),
437        demo_output_path: data_dir.join("demo-output.txt"),
438        sensitivity_sweep_path: data_dir.join("sensitivity_sweep.csv"),
439        generated_docs_dir: root.join("docs/generated"),
440        generated_paper_dir: root.join("paper/generated"),
441    }
442}
443
444fn write_primary_public_outputs(
445    bundle: &PublicEvaluationBundle,
446    paths: &PublicArtifactPaths,
447    report: &str,
448) -> io::Result<()> {
449    fs::write(&paths.evaluation_results_path, report)?;
450    fs::write(&paths.demo_output_path, report)?;
451    fs::write(
452        &paths.sensitivity_sweep_path,
453        render_sensitivity_sweep_csv(bundle),
454    )?;
455    for row in &bundle.primary {
456        let output_root = paths
457            .evaluation_results_path
458            .parent()
459            .unwrap_or_else(|| Path::new("."));
460        fs::write(output_root.join(&row.csv_name), generate_csv(&row.result))?;
461    }
462    Ok(())
463}
464
465struct GeneratedPublicSnippets {
466    readme_results: String,
467    evidence_ledger: String,
468    claim_ledger: String,
469    audit_contract: String,
470    paper_results_md: String,
471}
472
473fn generated_public_snippets(bundle: &PublicEvaluationBundle) -> GeneratedPublicSnippets {
474    GeneratedPublicSnippets {
475        readme_results: render_readme_results_section(bundle),
476        evidence_ledger: render_evidence_ledger_md(bundle),
477        claim_ledger: render_claim_ledger_md(bundle),
478        audit_contract: render_audit_contract_md(),
479        paper_results_md: render_paper_results_table_md(bundle),
480    }
481}
482
483fn write_generated_public_docs(
484    bundle: &PublicEvaluationBundle,
485    paths: &PublicArtifactPaths,
486    snippets: &GeneratedPublicSnippets,
487) -> io::Result<()> {
488    fs::write(
489        paths.generated_docs_dir.join("README_RESULTS.md"),
490        &snippets.readme_results,
491    )?;
492    fs::write(
493        paths.generated_docs_dir.join("EVIDENCE_LEDGER.md"),
494        &snippets.evidence_ledger,
495    )?;
496    fs::write(
497        paths.generated_docs_dir.join("CLAIM_LEDGER.md"),
498        &snippets.claim_ledger,
499    )?;
500    fs::write(
501        paths.generated_docs_dir.join("AUDIT_CONTRACT.md"),
502        &snippets.audit_contract,
503    )?;
504    fs::write(
505        paths.generated_paper_dir.join("results_summary.tex"),
506        render_paper_results_table_tex(bundle),
507    )?;
508    fs::write(
509        paths.generated_paper_dir.join("claim_ledger.tex"),
510        render_paper_claim_ledger_tex(bundle),
511    )?;
512    fs::write(
513        paths.generated_paper_dir.join("results_summary.md"),
514        &snippets.paper_results_md,
515    )?;
516    fs::write(
517        paths.generated_paper_dir.join("claim_ledger.md"),
518        &snippets.claim_ledger,
519    )?;
520    fs::write(
521        paths.generated_paper_dir.join("audit_contract.md"),
522        &snippets.audit_contract,
523    )?;
524    fs::write(
525        paths.generated_paper_dir.join("audit_contract.tex"),
526        render_paper_audit_contract_tex(),
527    )?;
528    Ok(())
529}
530
531fn rewrite_public_marked_sections(
532    root: &Path,
533    _bundle: &PublicEvaluationBundle,
534    snippets: &GeneratedPublicSnippets,
535) -> io::Result<()> {
536    rewrite_marked_section_if_present(
537        &root.join("README.md"),
538        "<!-- DSFB:README_RESULTS:BEGIN -->",
539        "<!-- DSFB:README_RESULTS:END -->",
540        &snippets.readme_results,
541    )?;
542    rewrite_marked_section_if_present(
543        &root.join("README.md"),
544        "<!-- DSFB:EVIDENCE_LEDGER:BEGIN -->",
545        "<!-- DSFB:EVIDENCE_LEDGER:END -->",
546        &snippets.evidence_ledger,
547    )?;
548    rewrite_marked_section_if_present(
549        &root.join("paper/paper.md"),
550        "<!-- DSFB:PAPER_RESULTS:BEGIN -->",
551        "<!-- DSFB:PAPER_RESULTS:END -->",
552        &snippets.paper_results_md,
553    )?;
554    rewrite_marked_section_if_present(
555        &root.join("paper/paper.md"),
556        "<!-- DSFB:PAPER_CLAIM_LEDGER:BEGIN -->",
557        "<!-- DSFB:PAPER_CLAIM_LEDGER:END -->",
558        &snippets.claim_ledger,
559    )?;
560    rewrite_marked_section_if_present(
561        &root.join("paper/paper.md"),
562        "<!-- DSFB:PAPER_AUDIT_CONTRACT:BEGIN -->",
563        "<!-- DSFB:PAPER_AUDIT_CONTRACT:END -->",
564        &snippets.audit_contract,
565    )?;
566    Ok(())
567}
568
569fn rewrite_marked_section_if_present(
570    path: &Path,
571    start_marker: &str,
572    end_marker: &str,
573    generated: &str,
574) -> io::Result<()> {
575    if !path.exists() {
576        return Ok(());
577    }
578    rewrite_marked_section(path, start_marker, end_marker, generated)
579}
580
581/// Whether every reproducibility run matched the first baseline run.
582pub fn reproducibility_verified(bundle: &PublicEvaluationBundle) -> bool {
583    let Some(first) = bundle.reproducibility_boundary_steps.first().copied() else {
584        return false;
585    };
586    bundle
587        .reproducibility_boundary_steps
588        .iter()
589        .all(|step| *step == first)
590}
591
592fn build_primary_evaluation() -> Vec<PrimaryEvaluationRow> {
593    vec![
594        primary_clock_drift_row(),
595        primary_partial_partition_row(),
596        primary_backpressure_row(),
597        primary_async_starvation_row(),
598    ]
599}
600
601fn primary_clock_drift_row() -> PrimaryEvaluationRow {
602    primary_row(
603        "Clock Drift",
604        "clock_drift.csv",
605        &mut ClockDriftScenario::default_scenario(),
606        symmetric_config(20, 2.0, 0.1, 0.05),
607    )
608}
609
610fn primary_partial_partition_row() -> PrimaryEvaluationRow {
611    primary_row(
612        "Partial Partition",
613        "partial_partition.csv",
614        &mut PartialPartitionScenario::default_scenario(),
615        symmetric_config(15, 3.0, 0.15, 0.08),
616    )
617}
618
619fn primary_backpressure_row() -> PrimaryEvaluationRow {
620    primary_row(
621        "Channel Backpressure",
622        "channel_backpressure.csv",
623        &mut ChannelBackpressureScenario::default_scenario(),
624        symmetric_config(15, 100.0, 10.0, 5.0),
625    )
626}
627
628fn primary_async_starvation_row() -> PrimaryEvaluationRow {
629    primary_row(
630        "Async Starvation",
631        "async_starvation.csv",
632        &mut AsyncStarvationScenario::default_scenario(),
633        symmetric_config(15, 30.0, 3.0, 1.5),
634    )
635}
636
637fn primary_row(
638    name: &str,
639    csv_name: &str,
640    scenario: &mut dyn FaultScenario,
641    config: ObserverConfig,
642) -> PrimaryEvaluationRow {
643    PrimaryEvaluationRow {
644        name: name.to_string(),
645        csv_name: csv_name.to_string(),
646        result: run_scenario(scenario, &config),
647    }
648}
649
650fn symmetric_config(
651    persistence_window: usize,
652    residual: f64,
653    drift: f64,
654    slew: f64,
655) -> ObserverConfig {
656    ObserverConfig {
657        persistence_window,
658        hysteresis_count: 3,
659        default_envelope: AdmissibilityEnvelope::symmetric(
660            residual,
661            drift,
662            slew,
663            WorkloadPhase::SteadyState,
664        ),
665        ..ObserverConfig::fast_response()
666    }
667}
668
669fn build_sensitivity_sweep() -> Vec<SensitivitySweepRow> {
670    let sigma_values = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0];
671    let p_values: [usize; 7] = [5, 10, 15, 20, 25, 30, 40];
672    let mut rows = Vec::new();
673
674    for &sigma in &sigma_values {
675        for &p in &p_values {
676            let config = ObserverConfig {
677                persistence_window: p,
678                hysteresis_count: 3,
679                default_envelope: AdmissibilityEnvelope::symmetric(
680                    sigma,
681                    sigma * 0.05,
682                    sigma * 0.025,
683                    WorkloadPhase::SteadyState,
684                ),
685                ..ObserverConfig::fast_response()
686            };
687            let result = run_scenario(&mut ClockDriftScenario::default_scenario(), &config);
688            rows.push(SensitivitySweepRow {
689                sigma,
690                persistence_window: p,
691                detection_delay: result.detection_delay_from_injection(),
692                lead_time: result.detection_lead_time(),
693                detected: result.detected(),
694                early_window_detection: result.first_anomaly_step.is_some_and(|step| {
695                    step >= result.injection_start && step < result.injection_start + 100
696                }),
697                has_false_alarm: result.false_alarms_before_injection > 0,
698                boundary_steps: result.total_boundary_steps,
699                violation_steps: result.total_violation_steps,
700            });
701        }
702    }
703    rows
704}
705
706fn build_negative_controls() -> Vec<NegativeControlRow> {
707    vec![
708        clean_clock_drift_row(),
709        clean_partition_row(),
710        clean_backpressure_row(),
711        clean_starvation_row(),
712    ]
713}
714
715fn clean_clock_drift_row() -> NegativeControlRow {
716    negative_control_row(
717        "Clock Drift (clean)",
718        &mut ClockDriftScenario::new(5.0, 0.05, 999, 200, 0.02),
719        symmetric_config(20, 2.0, 0.1, 0.05),
720    )
721}
722
723fn clean_partition_row() -> NegativeControlRow {
724    negative_control_row(
725        "Partition (clean)",
726        &mut PartialPartitionScenario {
727            baseline: 5.0,
728            start: 999,
729            duration: 200,
730            rate: 0.08,
731            burst: 3.0,
732            burst_dur: 10,
733            noise_state: 137,
734            seed: 137,
735        },
736        symmetric_config(15, 3.0, 0.15, 0.08),
737    )
738}
739
740fn clean_backpressure_row() -> NegativeControlRow {
741    negative_control_row(
742        "Backpressure (clean)",
743        &mut ChannelBackpressureScenario {
744            baseline: 100.0,
745            start: 999,
746            duration: 200,
747            rate: 5.0,
748            noise_state: 271,
749            seed: 271,
750        },
751        symmetric_config(15, 100.0, 10.0, 5.0),
752    )
753}
754
755fn clean_starvation_row() -> NegativeControlRow {
756    negative_control_row(
757        "Starvation (clean)",
758        &mut AsyncStarvationScenario {
759            baseline: 50.0,
760            start: 999,
761            duration: 200,
762            rate: 2.0,
763            noise_state: 313,
764            seed: 313,
765        },
766        symmetric_config(15, 30.0, 3.0, 1.5),
767    )
768}
769
770fn negative_control_row(
771    name: &str,
772    scenario: &mut dyn FaultScenario,
773    config: ObserverConfig,
774) -> NegativeControlRow {
775    NegativeControlRow {
776        name: name.to_string(),
777        result: run_scenario(scenario, &config),
778    }
779}
780
781fn build_reproducibility_steps() -> Vec<u64> {
782    let config = ObserverConfig {
783        persistence_window: 20,
784        hysteresis_count: 3,
785        default_envelope: AdmissibilityEnvelope::symmetric(
786            2.0,
787            0.1,
788            0.05,
789            WorkloadPhase::SteadyState,
790        ),
791        ..ObserverConfig::fast_response()
792    };
793
794    let mut steps = Vec::with_capacity(10);
795    for _ in 0..10 {
796        let result = run_scenario(&mut ClockDriftScenario::default_scenario(), &config);
797        steps.push(result.first_boundary_step.unwrap_or(999));
798    }
799    steps
800}
801
802fn build_drift_elasticity() -> Vec<DriftElasticityRow> {
803    let mut rows = Vec::new();
804    for &drift_rate in &[0.01, 0.02, 0.03, 0.05, 0.08, 0.10, 0.15, 0.20] {
805        let result = run_scenario(
806            &mut ClockDriftScenario::new(5.0, drift_rate, 50, 200, 0.02),
807            &ObserverConfig {
808                persistence_window: 20,
809                hysteresis_count: 3,
810                default_envelope: AdmissibilityEnvelope::symmetric(
811                    2.0,
812                    0.1,
813                    0.05,
814                    WorkloadPhase::SteadyState,
815                ),
816                ..ObserverConfig::fast_response()
817            },
818        );
819        rows.push(DriftElasticityRow {
820            drift_rate,
821            result_detected: result.detected(),
822            detection_delay: result.detection_delay_from_injection(),
823            lead_time: result.detection_lead_time(),
824        });
825    }
826    rows
827}
828
829fn render_sensitivity_sweep_csv(bundle: &PublicEvaluationBundle) -> String {
830    let mut csv = String::from(
831        "sigma,P,detection_delay,lead_time,detected,false_alarms,boundary_steps,violation_steps\n",
832    );
833    for row in &bundle.sensitivity_sweep {
834        csv.push_str(&format!(
835            "{},{},{},{},{},{},{},{}\n",
836            row.sigma,
837            row.persistence_window,
838            row.detection_delay.unwrap_or(0),
839            row.lead_time.unwrap_or(0),
840            u8::from(row.detected),
841            u8::from(row.has_false_alarm),
842            row.boundary_steps,
843            row.violation_steps
844        ));
845    }
846    csv
847}
848
849fn render_readme_results_section(bundle: &PublicEvaluationBundle) -> String {
850    let mut out = String::new();
851    let detected_count = count_true(&bundle.primary, |row| row.result.detected());
852    let pre_injection_primary = count_true(&bundle.primary, |row| {
853        row.result.false_alarms_before_injection > 0
854    });
855    out.push_str("| Gray Failure Scenario | Detection Delay | Lead Time | False Alarms |\n");
856    out.push_str("|----------------------|-----------------|-----------|--------------|\n");
857    for row in &bundle.primary {
858        let result = &row.result;
859        let delay = render_optional_steps(result.detection_delay_from_injection(), "pre-injection");
860        let lead = render_optional_steps(result.detection_lead_time(), "-");
861        out.push_str(&format!(
862            "| {} | {} | {} | **{}** |\n",
863            row.name, delay, lead, result.false_alarms_before_injection
864        ));
865    }
866    out.push('\n');
867    out.push_str(&format!(
868        "Current metrics are generated by `cargo run --bin dsfb-regenerate-public-artifacts`. The current recommended configuration detects {}/{} primary scenarios, and {} primary scenario(s) show a pre-injection anomaly.\n",
869        detected_count,
870        bundle.primary.len(),
871        pre_injection_primary
872    ));
873    out
874}
875
876fn render_evidence_ledger_md(bundle: &PublicEvaluationBundle) -> String {
877    let mut out = String::new();
878    out.push_str("## Evidence Ledger\n\n");
879    out.push_str("Every public-facing numeric claim in this repository should map to one command, one artifact, or one generated section.\n\n");
880    out.push_str("| Claim Surface | Generated From | Artifact |\n");
881    out.push_str("|---------------|----------------|----------|\n");
882    out.push_str("| README results table | `cargo run --bin dsfb-regenerate-public-artifacts` | `docs/generated/README_RESULTS.md` |\n");
883    out.push_str("| Full evaluation narrative | `cargo run --bin dsfb-regenerate-public-artifacts` | `data/evaluation_results.txt` |\n");
884    out.push_str("| Demo output | `cargo run --bin dsfb-regenerate-public-artifacts` | `data/demo-output.txt` |\n");
885    out.push_str("| Sensitivity sweep table | `cargo run --bin dsfb-regenerate-public-artifacts` | `data/sensitivity_sweep.csv` |\n");
886    for row in &bundle.primary {
887        out.push_str(&format!(
888            "| Scenario CSV: {} | `cargo run --bin dsfb-regenerate-public-artifacts` | `data/{}` |\n",
889            row.name, row.csv_name
890        ));
891    }
892    out.push_str("| Paper TeX results table | `cargo run --bin dsfb-regenerate-public-artifacts` | `paper/generated/results_summary.tex` |\n");
893    out.push_str("| Audit contract summary | `cargo run --bin dsfb-regenerate-public-artifacts` | `docs/generated/AUDIT_CONTRACT.md` |\n");
894    out.push_str("| Paper TeX audit contract | `cargo run --bin dsfb-regenerate-public-artifacts` | `paper/generated/audit_contract.tex` |\n");
895    out.push_str("| Claim ledger | `cargo run --bin dsfb-regenerate-public-artifacts` | `docs/generated/CLAIM_LEDGER.md` |\n");
896    out
897}
898
899fn render_audit_contract_md() -> String {
900    let mut out = String::new();
901    out.push_str("## Canonical Broad Audit Contract\n\n");
902    out.push_str(
903        "- DSFB emits one canonical broad audit rather than primary profile-specific reports.\n",
904    );
905    out.push_str("- The audit keeps one shared evidence set and one shared denominator, then renders domain and standards interpretations as conclusion lenses at the end of the report.\n");
906    out.push_str(&format!(
907        "- The locked score method is `{}` with one overall score plus visible advisory subscores.\n",
908        AUDIT_SCORE_METHOD
909    ));
910    out.push_str("- The score is a broad code-improvement and review-readiness target for Rust developers.\n");
911    out.push_str("- The score is not runtime correctness, not a certification result, and not a standards certificate.\n");
912    out.push_str(&format!("- {}\n", AUDIT_NON_CERTIFICATION_STATEMENT));
913    out.push_str("- The report contract includes remediation guidance, verification suggestions, evidence IDs, SARIF, in-toto, DSSE, and static-to-runtime prior derivation.\n");
914    out
915}
916
917fn render_claim_ledger_md(bundle: &PublicEvaluationBundle) -> String {
918    let detected_count = count_true(&bundle.primary, |row| row.result.detected());
919    let clean_control_false_rate = find_negative_control(bundle, "Starvation (clean)")
920        .map(|row| {
921            if row.result.total_steps > 0 {
922                (row.result.total_boundary_steps + row.result.total_violation_steps) as f64
923                    / row.result.total_steps as f64
924                    * 100.0
925            } else {
926                0.0
927            }
928        })
929        .unwrap_or(0.0);
930    let sweep_pre_injection = count_true(&bundle.sensitivity_sweep, |row| row.has_false_alarm);
931    let mut out = String::new();
932    out.push_str("## Claim Ledger\n\n");
933    out.push_str(&format!(
934        "- DSFB detects {}/{} primary deterministic scenarios under the recommended configuration.\n",
935        detected_count,
936        bundle.primary.len()
937    ));
938    out.push_str(&format!(
939        "  Evidence: `data/evaluation_results.txt`, Section 1; generated from {} primary scenarios.\n",
940        bundle.primary.len()
941    ));
942    out.push_str("- The recommended configuration is not zero-false-alarm in all clean windows.\n");
943    out.push_str(&format!(
944        "  Evidence: `data/evaluation_results.txt`, Section 3; async starvation clean control produces a {:.1}% false rate.\n",
945        clean_control_false_rate
946    ));
947    out.push_str(
948        "- Sensitivity behavior is configuration-dependent rather than universally robust.\n",
949    );
950    out.push_str(&format!(
951        "  Evidence: `data/evaluation_results.txt`, Section 2; {}/{} sweep points show pre-injection alarms.\n",
952        sweep_pre_injection,
953        bundle.sensitivity_sweep.len()
954    ));
955    out.push_str("- Reproducibility is deterministic for the current clock-drift harness.\n");
956    out.push_str("  Evidence: `data/evaluation_results.txt`, Section 4; 10/10 runs identical.\n");
957    out.push_str("- DSFB provides structurally distinct detection-point signatures across the primary scenarios.\n");
958    out.push_str("  Evidence: `data/evaluation_results.txt`, Section 5.\n");
959    out.push_str("- The companion crate now emits one canonical broad audit rather than primary profile-specific reports.\n");
960    out.push_str("  Evidence: `docs/generated/AUDIT_CONTRACT.md`; regenerated from `cargo run --bin dsfb-regenerate-public-artifacts`.\n");
961    out.push_str(&format!(
962        "- The audit score method is `{}` and is treated as a broad improvement/readiness guide rather than certification.\n",
963        AUDIT_SCORE_METHOD
964    ));
965    out.push_str(
966        "  Evidence: `docs/generated/AUDIT_CONTRACT.md` and `docs/AUDIT_SCORING_LOCKED.md`.\n",
967    );
968    out.push_str("- The audit report includes conclusion lenses over one shared evidence set rather than separate primary scan modes.\n");
969    out.push_str("  Evidence: `docs/generated/AUDIT_CONTRACT.md`; mirrored in the current scan report contract.\n");
970    out.push_str("- The scanner emits SARIF, in-toto, and DSSE artifacts as part of the established public contract.\n");
971    out.push_str("  Evidence: `docs/generated/AUDIT_CONTRACT.md` and the generated scanner outputs in `output-dsfb-gray/`.\n");
972    out
973}
974
975fn render_paper_results_table_tex(bundle: &PublicEvaluationBundle) -> String {
976    let mut out = String::new();
977    out.push_str("\\begin{table}[H]\n\\centering\n");
978    out.push_str(
979        "\\caption{Primary deterministic evaluation results (recommended configuration).}\n",
980    );
981    out.push_str("\\label{tab:summary}\n");
982    out.push_str("\\begin{tabular}{lcccc}\n\\toprule\n");
983    out.push_str("Scenario & Detection Delay & Lead Time & False Alarms & Notes \\\\\n\\midrule\n");
984    for row in &bundle.primary {
985        let result = &row.result;
986        let delay = result
987            .detection_delay_from_injection()
988            .map_or("pre-injection".to_string(), |value| value.to_string());
989        let lead = result
990            .detection_lead_time()
991            .map_or("-".to_string(), |value| value.to_string());
992        let notes = if result.false_alarms_before_injection > 0 {
993            "pre-injection anomaly observed"
994        } else {
995            "none in primary run"
996        };
997        out.push_str(&format!(
998            "{} & {} & {} & {} & {} \\\\\n",
999            row.name, delay, lead, result.false_alarms_before_injection, notes
1000        ));
1001    }
1002    out.push_str("\\bottomrule\n\\end{tabular}\n\\end{table}\n");
1003    out
1004}
1005
1006fn render_paper_claim_ledger_tex(bundle: &PublicEvaluationBundle) -> String {
1007    let detected_count = count_true(&bundle.primary, |row| row.result.detected());
1008    let clean_control_false_rate = find_negative_control(bundle, "Starvation (clean)")
1009        .map(|row| {
1010            if row.result.total_steps > 0 {
1011                (row.result.total_boundary_steps + row.result.total_violation_steps) as f64
1012                    / row.result.total_steps as f64
1013                    * 100.0
1014            } else {
1015                0.0
1016            }
1017        })
1018        .unwrap_or(0.0);
1019    let sweep_pre_injection = count_true(&bundle.sensitivity_sweep, |row| row.has_false_alarm);
1020    let mut out = String::new();
1021    out.push_str("\\subsection*{Claim Ledger}\n");
1022    out.push_str("\\begin{itemize}\n");
1023    out.push_str(&format!(
1024        "\\item Primary evaluation detects {} of {} deterministic scenarios under the recommended configuration. Evidence: Table~\\ref{{tab:summary}} and \\texttt{{data/evaluation\\_results.txt}}.\n",
1025        detected_count,
1026        bundle.primary.len()
1027    ));
1028    out.push_str(&format!(
1029        "\\item The recommended configuration is not universally zero-false-alarm: the clean async-starvation control produces a {:.1}\\% false rate. Evidence: negative-control section in \\texttt{{data/evaluation\\_results.txt}}.\n",
1030        clean_control_false_rate
1031    ));
1032    out.push_str(&format!(
1033        "\\item Sensitivity behavior is configuration-dependent: {} of {} sweep points show pre-injection alarms. Evidence: sensitivity sweep in \\texttt{{data/evaluation\\_results.txt}}.\n",
1034        sweep_pre_injection,
1035        bundle.sensitivity_sweep.len()
1036    ));
1037    out.push_str(&format!(
1038        "\\item Deterministic reproducibility holds for {} repeated clock-drift runs in the current harness. Evidence: reproducibility section in \\texttt{{data/evaluation\\_results.txt}}.\n",
1039        bundle.reproducibility_boundary_steps.len()
1040    ));
1041    out.push_str(&format!(
1042        "\\item The companion crate now emits one canonical broad audit with locked score method \\texttt{{{}}}. Evidence: \\texttt{{paper/generated/audit\\_contract.tex}} and \\texttt{{docs/AUDIT\\_SCORING\\_LOCKED.md}}.\n",
1043        AUDIT_SCORE_METHOD
1044    ));
1045    out.push_str("\\item The audit score is a broad improvement and review-readiness guide, not a compliance or certification result. Evidence: \\texttt{paper/generated/audit\\_contract.tex}.\n");
1046    out.push_str("\\item The report contract includes conclusion lenses over one shared evidence set rather than separate primary scan modes. Evidence: \\texttt{paper/generated/audit\\_contract.tex}.\n");
1047    out.push_str("\\end{itemize}\n");
1048    out
1049}
1050
1051fn render_paper_audit_contract_tex() -> String {
1052    let mut out = String::new();
1053    out.push_str("\\begin{itemize}[leftmargin=1.5em,itemsep=2pt]\n");
1054    out.push_str("\\item DSFB now emits one canonical broad static audit rather than primary profile-specific reports.\n");
1055    out.push_str("\\item The audit keeps one shared evidence set and one shared score denominator, then renders domain and standards interpretations as conclusion lenses at the end of the report.\n");
1056    out.push_str(&format!(
1057        "\\item The locked score method is \\texttt{{{}}}, reported as one overall score plus visible advisory subscores.\n",
1058        AUDIT_SCORE_METHOD
1059    ));
1060    out.push_str("\\item The score is intended as a broad code-improvement and review-readiness target for Rust developers.\n");
1061    out.push_str("\\item The score is not runtime correctness, not a compliance result, and not a certification outcome.\n");
1062    out.push_str(&format!(
1063        "\\item {}\n",
1064        escape_latex(AUDIT_NON_CERTIFICATION_STATEMENT)
1065    ));
1066    out.push_str("\\item The public audit contract includes remediation guidance, verification suggestions, evidence identifiers, SARIF, in-toto, DSSE, and static-to-runtime prior derivation.\n");
1067    out.push_str("\\end{itemize}\n");
1068    out
1069}
1070
1071fn render_paper_results_table_md(bundle: &PublicEvaluationBundle) -> String {
1072    let mut out = String::new();
1073    out.push_str("## Generated Primary Results\n\n");
1074    out.push_str("| Scenario | Detection Delay | Lead Time | False Alarms | Notes |\n");
1075    out.push_str("|----------|-----------------|-----------|--------------|-------|\n");
1076    for row in &bundle.primary {
1077        let result = &row.result;
1078        let delay = result
1079            .detection_delay_from_injection()
1080            .map_or("pre-injection".to_string(), |value| value.to_string());
1081        let lead = result
1082            .detection_lead_time()
1083            .map_or("-".to_string(), |value| value.to_string());
1084        let notes = if result.false_alarms_before_injection > 0 {
1085            "pre-injection anomaly observed"
1086        } else {
1087            "none in primary run"
1088        };
1089        out.push_str(&format!(
1090            "| {} | {} | {} | {} | {} |\n",
1091            row.name, delay, lead, result.false_alarms_before_injection, notes
1092        ));
1093    }
1094    out
1095}
1096
1097fn rewrite_marked_section(
1098    path: &Path,
1099    start_marker: &str,
1100    end_marker: &str,
1101    generated: &str,
1102) -> io::Result<()> {
1103    let contents = fs::read_to_string(path)?;
1104    let Some(start) = contents.find(start_marker) else {
1105        return Err(io::Error::new(
1106            io::ErrorKind::InvalidData,
1107            format!(
1108                "missing start marker `{start_marker}` in {}",
1109                path.display()
1110            ),
1111        ));
1112    };
1113    let Some(end) = contents.find(end_marker) else {
1114        return Err(io::Error::new(
1115            io::ErrorKind::InvalidData,
1116            format!("missing end marker `{end_marker}` in {}", path.display()),
1117        ));
1118    };
1119    let before = &contents[..start + start_marker.len()];
1120    let after = &contents[end..];
1121    let mut rewritten = String::new();
1122    rewritten.push_str(before);
1123    rewritten.push('\n');
1124    rewritten.push_str(generated.trim_end());
1125    rewritten.push('\n');
1126    rewritten.push_str(after);
1127    fs::write(path, rewritten)
1128}
1129
1130fn escape_latex(input: &str) -> String {
1131    input
1132        .replace('\\', "\\textbackslash{}")
1133        .replace('&', "\\&")
1134        .replace('%', "\\%")
1135        .replace('$', "\\$")
1136        .replace('#', "\\#")
1137        .replace('_', "\\_")
1138        .replace('{', "\\{")
1139        .replace('}', "\\}")
1140        .replace('~', "\\textasciitilde{}")
1141        .replace('^', "\\textasciicircum{}")
1142}
dsfb_gray/evaluation.rs

dsfb_gray/
evaluation.rs