Skip to main content

dsfb_computer_graphics/datasets/
mod.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::path::{Component, Path, PathBuf};
4use std::process::Command;
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8
9use crate::error::{Error, Result};
10use crate::frame::{Color, ImageFrame};
11use crate::scene::{MotionVector, Normal3};
12
13mod davis;
14mod sintel;
15
16pub const DAVIS_SUMMARY_JSON: &str = "generated/davis_mapping_summary.json";
17pub const SINTEL_SUMMARY_JSON: &str = "generated/sintel_mapping_summary.json";
18pub const DAVIS_REPORT_MD: &str = "generated/davis_mapping_report.md";
19pub const SINTEL_REPORT_MD: &str = "generated/sintel_mapping_report.md";
20pub const PREPARATION_REPORT_MD: &str = "generated/dataset_preparation_report.md";
21pub const DATASET_MAPPING_DOC_MD: &str = "docs/dataset_mapping.md";
22pub const DAVIS_MANIFEST_JSON: &str = "examples/davis_external_manifest.json";
23pub const SINTEL_MANIFEST_JSON: &str = "examples/sintel_external_manifest.json";
24
25#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
26#[serde(rename_all = "snake_case")]
27pub enum FieldQuality {
28    Native,
29    DerivedHighConfidence,
30    DerivedLowConfidence,
31    Unavailable,
32}
33
34impl FieldQuality {
35    fn as_str(self) -> &'static str {
36        match self {
37            Self::Native => "native",
38            Self::DerivedHighConfidence => "derived-high-confidence",
39            Self::DerivedLowConfidence => "derived-low-confidence",
40            Self::Unavailable => "unavailable",
41        }
42    }
43}
44
45#[derive(Clone, Debug, Serialize, Deserialize)]
46pub struct BufferFieldSummary {
47    pub field_id: String,
48    pub quality: FieldQuality,
49    pub source: String,
50    pub disclosure: String,
51}
52
53#[derive(Clone, Debug, Serialize, Deserialize)]
54pub struct DatasetCaptureSummary {
55    pub label: String,
56    pub sequence_id: String,
57    pub frame_index: usize,
58    pub roi_kind: String,
59    pub case_tags: Vec<String>,
60}
61
62#[derive(Clone, Debug, Serialize, Deserialize)]
63pub struct DatasetMappingSummary {
64    pub dataset_id: String,
65    pub dataset_name: String,
66    pub why_chosen: String,
67    pub prepared_output_dir: String,
68    pub manifest_path: String,
69    pub dsfb_mode: String,
70    pub demo_a_metric_mode: String,
71    pub demo_b_mode: String,
72    pub reference_strategy: String,
73    pub official_urls: Vec<String>,
74    pub native_buffers: Vec<String>,
75    pub derived_buffers: Vec<String>,
76    pub unsupported_buffers: Vec<String>,
77    pub fields: Vec<BufferFieldSummary>,
78    pub captures: Vec<DatasetCaptureSummary>,
79    pub blockers: Vec<String>,
80    pub notes: Vec<String>,
81}
82
83#[derive(Clone, Debug, Serialize, Deserialize)]
84pub struct TaxonomyDatasetEntry {
85    pub dataset_id: String,
86    pub realism_stress_case: String,
87    pub larger_roi_case: String,
88    pub mixed_regime_case: String,
89    pub notes: Vec<String>,
90}
91
92#[derive(Clone, Debug, Serialize, Deserialize)]
93pub struct ExternalValidationTaxonomy {
94    pub datasets: Vec<TaxonomyDatasetEntry>,
95    pub aggregate_status: String,
96}
97
98pub fn prepare_davis_dataset(output_dir: &Path) -> Result<PathBuf> {
99    let summary = davis::prepare(output_dir)?;
100    write_summary_and_refresh(&summary)?;
101    Ok(crate_root().join(DAVIS_MANIFEST_JSON))
102}
103
104pub fn prepare_sintel_dataset(output_dir: &Path) -> Result<PathBuf> {
105    let summary = sintel::prepare(output_dir)?;
106    write_summary_and_refresh(&summary)?;
107    Ok(crate_root().join(SINTEL_MANIFEST_JSON))
108}
109
110pub fn validate_standard_external_package(output_root: &Path) -> Result<()> {
111    let root = crate_root();
112    let davis_summary = read_summary(&root.join(DAVIS_SUMMARY_JSON))?;
113    let sintel_summary = read_summary(&root.join(SINTEL_SUMMARY_JSON))?;
114
115    for path in [
116        root.join("docs/external_dataset_plan.md"),
117        root.join(DATASET_MAPPING_DOC_MD),
118        root.join(PREPARATION_REPORT_MD),
119        root.join(DAVIS_REPORT_MD),
120        root.join(SINTEL_REPORT_MD),
121        root.join(DAVIS_MANIFEST_JSON),
122        root.join(SINTEL_MANIFEST_JSON),
123    ] {
124        require_file(&path)?;
125    }
126
127    for summary in [&davis_summary, &sintel_summary] {
128        if !summary.blockers.is_empty() {
129            return Err(Error::Message(format!(
130                "{} preparation is still blocked: {}",
131                summary.dataset_name,
132                summary.blockers.join("; ")
133            )));
134        }
135        require_file(Path::new(&summary.prepared_output_dir))?;
136        require_file(Path::new(&summary.manifest_path))?;
137    }
138
139    validate_dataset_output(output_root, "external_davis", &davis_summary)?;
140    validate_dataset_output(output_root, "external_sintel", &sintel_summary)?;
141
142    let taxonomy = build_taxonomy(output_root)?;
143    let taxonomy_path = output_root.join("external_validation_taxonomy.json");
144    fs::create_dir_all(output_root)?;
145    fs::write(&taxonomy_path, serde_json::to_string_pretty(&taxonomy)?)?;
146
147    let report_path = output_root.join("external_validation_report.md");
148    write_final_external_validation_report(
149        &report_path,
150        output_root,
151        &davis_summary,
152        &sintel_summary,
153        &taxonomy,
154    )?;
155
156    let evaluator_handoff_path = output_root.join("evaluator_handoff.md");
157    write_final_evaluator_handoff(
158        &evaluator_handoff_path,
159        output_root,
160        &davis_summary,
161        &sintel_summary,
162    )?;
163
164    let readiness_path = output_root.join("check_signing_readiness.md");
165    write_check_signing_readiness(
166        &readiness_path,
167        output_root,
168        &davis_summary,
169        &sintel_summary,
170        &taxonomy,
171    )?;
172
173    for path in [
174        taxonomy_path,
175        report_path,
176        evaluator_handoff_path,
177        readiness_path,
178    ] {
179        require_file(&path)?;
180    }
181
182    Ok(())
183}
184
185pub(crate) fn crate_root() -> PathBuf {
186    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
187}
188
189pub(crate) fn summary_json_path(dataset_id: &str) -> PathBuf {
190    match dataset_id {
191        "davis" => crate_root().join(DAVIS_SUMMARY_JSON),
192        "sintel" => crate_root().join(SINTEL_SUMMARY_JSON),
193        other => crate_root().join(format!("generated/{other}_mapping_summary.json")),
194    }
195}
196
197pub(crate) fn report_md_path(dataset_id: &str) -> PathBuf {
198    match dataset_id {
199        "davis" => crate_root().join(DAVIS_REPORT_MD),
200        "sintel" => crate_root().join(SINTEL_REPORT_MD),
201        other => crate_root().join(format!("generated/{other}_mapping_report.md")),
202    }
203}
204
205pub(crate) fn manifest_path(dataset_id: &str) -> PathBuf {
206    match dataset_id {
207        "davis" => crate_root().join(DAVIS_MANIFEST_JSON),
208        "sintel" => crate_root().join(SINTEL_MANIFEST_JSON),
209        other => crate_root().join(format!("examples/{other}_external_manifest.json")),
210    }
211}
212
213pub(crate) fn write_summary_and_refresh(summary: &DatasetMappingSummary) -> Result<()> {
214    let summary_path = summary_json_path(&summary.dataset_id);
215    if let Some(parent) = summary_path.parent() {
216        fs::create_dir_all(parent)?;
217    }
218    fs::write(&summary_path, serde_json::to_string_pretty(summary)?)?;
219    write_dataset_mapping_report(&report_md_path(&summary.dataset_id), summary)?;
220    refresh_shared_reports()
221}
222
223pub(crate) fn refresh_shared_reports() -> Result<()> {
224    let root = crate_root();
225    let davis = read_summary_if_exists(&root.join(DAVIS_SUMMARY_JSON))?;
226    let sintel = read_summary_if_exists(&root.join(SINTEL_SUMMARY_JSON))?;
227    write_shared_dataset_mapping_doc(
228        &root.join(DATASET_MAPPING_DOC_MD),
229        davis.as_ref(),
230        sintel.as_ref(),
231    )?;
232    write_shared_preparation_report(
233        &root.join(PREPARATION_REPORT_MD),
234        davis.as_ref(),
235        sintel.as_ref(),
236    )?;
237    Ok(())
238}
239
240pub(crate) fn require_command(name: &str) -> Result<()> {
241    let status = Command::new("bash")
242        .arg("-lc")
243        .arg(format!("command -v {name} >/dev/null 2>&1"))
244        .status()?;
245    if status.success() {
246        Ok(())
247    } else {
248        Err(Error::Message(format!(
249            "required command `{name}` was not found in PATH"
250        )))
251    }
252}
253
254pub(crate) fn download_if_missing(url: &str, archive_path: &Path) -> Result<()> {
255    if archive_path.exists() && archive_path.metadata()?.len() > 0 {
256        return Ok(());
257    }
258    require_command("curl")?;
259    if let Some(parent) = archive_path.parent() {
260        fs::create_dir_all(parent)?;
261    }
262    let status = Command::new("curl")
263        .arg("-L")
264        .arg("--fail")
265        .arg("--retry")
266        .arg("3")
267        .arg("--output")
268        .arg(archive_path)
269        .arg(url)
270        .status()?;
271    if status.success() {
272        Ok(())
273    } else {
274        Err(Error::Message(format!(
275            "failed to download {url} into {}",
276            archive_path.display()
277        )))
278    }
279}
280
281pub(crate) fn unzip_if_needed(archive_path: &Path, destination: &Path) -> Result<()> {
282    let marker = destination.join(".extract_complete");
283    if marker.exists() {
284        return Ok(());
285    }
286    require_command("unzip")?;
287    fs::create_dir_all(destination)?;
288    let status = Command::new("unzip")
289        .arg("-o")
290        .arg(archive_path)
291        .arg("-d")
292        .arg(destination)
293        .status()?;
294    if !status.success() {
295        return Err(Error::Message(format!(
296            "failed to extract {} into {}",
297            archive_path.display(),
298            destination.display()
299        )));
300    }
301    fs::write(marker, archive_path.display().to_string())?;
302    Ok(())
303}
304
305pub(crate) fn read_summary(path: &Path) -> Result<DatasetMappingSummary> {
306    let text = fs::read_to_string(path)?;
307    Ok(serde_json::from_str(&text)?)
308}
309
310pub(crate) fn write_dataset_mapping_report(
311    path: &Path,
312    summary: &DatasetMappingSummary,
313) -> Result<()> {
314    if let Some(parent) = path.parent() {
315        fs::create_dir_all(parent)?;
316    }
317    let mut markdown = String::new();
318    let _ = writeln!(markdown, "# {} Mapping Report", summary.dataset_name);
319    let _ = writeln!(markdown);
320    let _ = writeln!(markdown, "## Why This Dataset");
321    let _ = writeln!(markdown);
322    let _ = writeln!(markdown, "{}", summary.why_chosen);
323    let _ = writeln!(markdown);
324    let _ = writeln!(markdown, "## DSFB Mode");
325    let _ = writeln!(markdown);
326    let _ = writeln!(markdown, "- DSFB mode: `{}`", summary.dsfb_mode);
327    let _ = writeln!(
328        markdown,
329        "- Demo A metric mode: `{}`",
330        summary.demo_a_metric_mode
331    );
332    let _ = writeln!(markdown, "- Demo B mode: `{}`", summary.demo_b_mode);
333    let _ = writeln!(
334        markdown,
335        "- reference strategy: `{}`",
336        summary.reference_strategy
337    );
338    let _ = writeln!(markdown);
339    let _ = writeln!(markdown, "## Buffer Mapping");
340    let _ = writeln!(markdown);
341    let _ = writeln!(markdown, "| Field | Quality | Source | Disclosure |");
342    let _ = writeln!(markdown, "| --- | --- | --- | --- |");
343    for field in &summary.fields {
344        let _ = writeln!(
345            markdown,
346            "| {} | {} | {} | {} |",
347            field.field_id,
348            field.quality.as_str(),
349            field.source,
350            field.disclosure
351        );
352    }
353    let _ = writeln!(markdown);
354    let _ = writeln!(markdown, "## Native Buffers");
355    let _ = writeln!(markdown);
356    for field in &summary.native_buffers {
357        let _ = writeln!(markdown, "- `{field}`");
358    }
359    let _ = writeln!(markdown);
360    let _ = writeln!(markdown, "## Derived Buffers");
361    let _ = writeln!(markdown);
362    for field in &summary.derived_buffers {
363        let _ = writeln!(markdown, "- `{field}`");
364    }
365    let _ = writeln!(markdown);
366    let _ = writeln!(markdown, "## Unsupported Buffers");
367    let _ = writeln!(markdown);
368    if summary.unsupported_buffers.is_empty() {
369        let _ = writeln!(markdown, "- none");
370    } else {
371        for field in &summary.unsupported_buffers {
372            let _ = writeln!(markdown, "- `{field}`");
373        }
374    }
375    let _ = writeln!(markdown);
376    let _ = writeln!(markdown, "## Prepared Captures");
377    let _ = writeln!(markdown);
378    let _ = writeln!(
379        markdown,
380        "| Label | Sequence | Frame | ROI kind | Case tags |"
381    );
382    let _ = writeln!(markdown, "| --- | --- | ---: | --- | --- |");
383    for capture in &summary.captures {
384        let _ = writeln!(
385            markdown,
386            "| {} | {} | {} | {} | {} |",
387            capture.label,
388            capture.sequence_id,
389            capture.frame_index,
390            capture.roi_kind,
391            capture.case_tags.join(", ")
392        );
393    }
394    let _ = writeln!(markdown);
395    if !summary.blockers.is_empty() {
396        let _ = writeln!(markdown, "## Blockers");
397        let _ = writeln!(markdown);
398        for blocker in &summary.blockers {
399            let _ = writeln!(markdown, "- {blocker}");
400        }
401    }
402    let _ = writeln!(markdown);
403    let _ = writeln!(markdown, "## Notes");
404    let _ = writeln!(markdown);
405    for note in &summary.notes {
406        let _ = writeln!(markdown, "- {note}");
407    }
408    fs::write(path, markdown)?;
409    Ok(())
410}
411
412pub(crate) fn write_json_file(path: &Path, value: &impl Serialize) -> Result<()> {
413    if let Some(parent) = path.parent() {
414        fs::create_dir_all(parent)?;
415    }
416    fs::write(path, serde_json::to_string_pretty(value)?)?;
417    Ok(())
418}
419
420pub(crate) fn write_image_frame(path: &Path, frame: &ImageFrame) -> Result<()> {
421    frame.save_png(path)
422}
423
424pub(crate) fn write_scalar_grid(
425    path: &Path,
426    values: &[f32],
427    width: usize,
428    height: usize,
429) -> Result<()> {
430    let payload = serde_json::json!({
431        "width": width,
432        "height": height,
433        "data": values,
434    });
435    write_json_file(path, &payload)
436}
437
438pub(crate) fn write_vec2_grid(
439    path: &Path,
440    values: &[MotionVector],
441    width: usize,
442    height: usize,
443) -> Result<()> {
444    let data = values
445        .iter()
446        .map(|value| [value.to_prev_x, value.to_prev_y])
447        .collect::<Vec<_>>();
448    let payload = serde_json::json!({
449        "width": width,
450        "height": height,
451        "data": data,
452    });
453    write_json_file(path, &payload)
454}
455
456pub(crate) fn write_vec3_grid(
457    path: &Path,
458    values: &[Normal3],
459    width: usize,
460    height: usize,
461) -> Result<()> {
462    let data = values
463        .iter()
464        .map(|value| [value.x, value.y, value.z])
465        .collect::<Vec<_>>();
466    let payload = serde_json::json!({
467        "width": width,
468        "height": height,
469        "data": data,
470    });
471    write_json_file(path, &payload)
472}
473
474pub(crate) fn write_mask_grid(
475    path: &Path,
476    values: &[bool],
477    width: usize,
478    height: usize,
479) -> Result<()> {
480    let payload = serde_json::json!({
481        "width": width,
482        "height": height,
483        "data": values,
484    });
485    write_json_file(path, &payload)
486}
487
488pub(crate) fn load_image_frame(path: &Path) -> Result<ImageFrame> {
489    let image = image::open(path)?.to_rgba8();
490    let width = image.width() as usize;
491    let height = image.height() as usize;
492    let pixels = image
493        .pixels()
494        .map(|pixel| {
495            Color::rgb(
496                pixel[0] as f32 / 255.0,
497                pixel[1] as f32 / 255.0,
498                pixel[2] as f32 / 255.0,
499            )
500        })
501        .collect();
502    Ok(ImageFrame::from_pixels(width, height, pixels))
503}
504
505pub(crate) fn relative_path(from_dir: &Path, to_path: &Path) -> PathBuf {
506    let from = normalize_absolute_path(from_dir);
507    let to = normalize_absolute_path(to_path);
508    let from_components: Vec<_> = from.components().collect();
509    let to_components: Vec<_> = to.components().collect();
510    let mut shared = 0usize;
511    while shared < from_components.len()
512        && shared < to_components.len()
513        && from_components[shared] == to_components[shared]
514    {
515        shared += 1;
516    }
517
518    let mut relative = PathBuf::new();
519    for component in &from_components[shared..] {
520        if matches!(component, Component::Normal(_)) {
521            relative.push("..");
522        }
523    }
524    for component in &to_components[shared..] {
525        relative.push(component.as_os_str());
526    }
527    if relative.as_os_str().is_empty() {
528        relative.push(".");
529    }
530    relative
531}
532
533fn normalize_absolute_path(path: &Path) -> PathBuf {
534    if path.is_absolute() {
535        path.to_path_buf()
536    } else {
537        crate_root().join(path)
538    }
539}
540
541fn read_summary_if_exists(path: &Path) -> Result<Option<DatasetMappingSummary>> {
542    if path.exists() {
543        Ok(Some(read_summary(path)?))
544    } else {
545        Ok(None)
546    }
547}
548
549fn write_shared_dataset_mapping_doc(
550    path: &Path,
551    davis: Option<&DatasetMappingSummary>,
552    sintel: Option<&DatasetMappingSummary>,
553) -> Result<()> {
554    if let Some(parent) = path.parent() {
555        fs::create_dir_all(parent)?;
556    }
557    let mut markdown = String::new();
558    let _ = writeln!(markdown, "# Dataset Mapping");
559    let _ = writeln!(markdown);
560    let _ = writeln!(
561        markdown,
562        "This document records the native-vs-derived mapping used to run the DSFB external replay path on DAVIS and MPI Sintel."
563    );
564    let _ = writeln!(markdown);
565    for summary in [davis, sintel].into_iter().flatten() {
566        let _ = writeln!(markdown, "## {}", summary.dataset_name);
567        let _ = writeln!(markdown);
568        let _ = writeln!(markdown, "- manifest: `{}`", summary.manifest_path);
569        let _ = writeln!(markdown, "- DSFB mode: `{}`", summary.dsfb_mode);
570        let _ = writeln!(
571            markdown,
572            "- derived-vs-native disclosure: all fields below are labeled as native, derived-high-confidence, derived-low-confidence, or unavailable"
573        );
574        let _ = writeln!(markdown);
575        let _ = writeln!(markdown, "| Field | Quality | Source |");
576        let _ = writeln!(markdown, "| --- | --- | --- |");
577        for field in &summary.fields {
578            let _ = writeln!(
579                markdown,
580                "| {} | {} | {} |",
581                field.field_id,
582                field.quality.as_str(),
583                field.source
584            );
585        }
586        let _ = writeln!(markdown);
587    }
588    fs::write(path, markdown)?;
589    Ok(())
590}
591
592fn write_shared_preparation_report(
593    path: &Path,
594    davis: Option<&DatasetMappingSummary>,
595    sintel: Option<&DatasetMappingSummary>,
596) -> Result<()> {
597    if let Some(parent) = path.parent() {
598        fs::create_dir_all(parent)?;
599    }
600    let mut markdown = String::new();
601    let _ = writeln!(markdown, "# Dataset Preparation Report");
602    let _ = writeln!(markdown);
603    let _ = writeln!(markdown, "| Dataset | Prepared | Manifest | Blockers |");
604    let _ = writeln!(markdown, "| --- | --- | --- | --- |");
605    for (label, summary) in [("DAVIS", davis), ("MPI Sintel", sintel)] {
606        match summary {
607            Some(summary) => {
608                let _ = writeln!(
609                    markdown,
610                    "| {} | {} | `{}` | {} |",
611                    label,
612                    if summary.blockers.is_empty() {
613                        "true"
614                    } else {
615                        "false"
616                    },
617                    summary.manifest_path,
618                    if summary.blockers.is_empty() {
619                        "none".to_string()
620                    } else {
621                        summary.blockers.join("; ")
622                    }
623                );
624            }
625            None => {
626                let _ = writeln!(
627                    markdown,
628                    "| {} | false | missing | not prepared yet |",
629                    label
630                );
631            }
632        }
633    }
634    let _ = writeln!(markdown);
635    let _ = writeln!(markdown, "## Exact Gates");
636    let _ = writeln!(markdown);
637    let _ = writeln!(markdown, "- `docs/dataset_mapping.md` must exist.");
638    let _ = writeln!(
639        markdown,
640        "- `generated/davis_mapping_report.md` must exist."
641    );
642    let _ = writeln!(
643        markdown,
644        "- `generated/sintel_mapping_report.md` must exist."
645    );
646    let _ = writeln!(
647        markdown,
648        "- derived-vs-native labeling must be explicit in every mapping table."
649    );
650    fs::write(path, markdown)?;
651    Ok(())
652}
653
654fn validate_dataset_output(
655    output_root: &Path,
656    dir_name: &str,
657    summary: &DatasetMappingSummary,
658) -> Result<()> {
659    let dataset_dir = output_root.join(dir_name);
660    for path in [
661        dataset_dir.join("external_replay_report.md"),
662        dataset_dir.join("external_handoff_report.md"),
663        dataset_dir.join("external_validation_report.md"),
664        dataset_dir.join("replay_metrics.json"),
665        dataset_dir.join("gpu_execution_report.md"),
666        dataset_dir.join("gpu_execution_metrics.json"),
667        dataset_dir.join("demo_a_external_report.md"),
668        dataset_dir.join("demo_a_external_metrics.json"),
669        dataset_dir.join("demo_b_external_report.md"),
670        dataset_dir.join("demo_b_external_metrics.json"),
671        dataset_dir.join("scaling_report.md"),
672        dataset_dir.join("scaling_metrics.json"),
673        dataset_dir.join("memory_bandwidth_report.md"),
674        dataset_dir.join("integration_scaling_report.md"),
675        dataset_dir.join("resolved_external_capture_manifest.json"),
676        dataset_dir.join("figures").join("trust_map.png"),
677        dataset_dir.join("figures").join("intervention_map.png"),
678        dataset_dir.join("figures").join("roi_overlay.png"),
679        dataset_dir.join("figures").join("current_color.png"),
680        dataset_dir.join("figures").join("demo_a_dsfb.png"),
681        dataset_dir.join("figures").join("demo_a_fixed_alpha.png"),
682    ] {
683        require_file(&path)?;
684    }
685
686    let mapping_report = fs::read_to_string(report_md_path(&summary.dataset_id))?;
687    for required_phrase in [
688        "derived-high-confidence",
689        "derived-low-confidence",
690        "native",
691        "DSFB mode",
692    ] {
693        if !mapping_report.contains(required_phrase) {
694            return Err(Error::Message(format!(
695                "{} mapping report is missing required phrase `{required_phrase}`",
696                summary.dataset_name
697            )));
698        }
699    }
700
701    let replay_report = fs::read_to_string(dataset_dir.join("external_replay_report.md"))?;
702    if !replay_report.contains("external-capable") {
703        return Err(Error::Message(format!(
704            "{} replay report must distinguish external-capable from externally validated",
705            summary.dataset_name
706        )));
707    }
708
709    let gpu_report = fs::read_to_string(dataset_dir.join("gpu_execution_report.md"))?;
710    for required_phrase in [
711        "measured_gpu:",
712        "backend",
713        "trust_delta_vs_cpu",
714        "alpha_delta_vs_cpu",
715        "intervention_delta_vs_cpu",
716    ] {
717        if !gpu_report.contains(required_phrase) {
718            return Err(Error::Message(format!(
719                "{} GPU execution report is missing `{required_phrase}`",
720                summary.dataset_name
721            )));
722        }
723    }
724
725    let demo_a_report = fs::read_to_string(dataset_dir.join("demo_a_external_report.md"))?;
726    for required_phrase in [
727        "ROI source",
728        "non-ROI",
729        "point_vs_region",
730        "realism_stress_note",
731        "proxy",
732    ] {
733        if !demo_a_report.contains(required_phrase) {
734            return Err(Error::Message(format!(
735                "{} Demo A report is missing `{required_phrase}`",
736                summary.dataset_name
737            )));
738        }
739    }
740
741    let demo_b_report = fs::read_to_string(dataset_dir.join("demo_b_external_report.md"))?;
742    for required_phrase in [
743        "Gradient magnitude",
744        "Variance proxy",
745        "Combined heuristic",
746        "DSFB imported trust",
747        "Hybrid trust + variance",
748        "fixed_budget_equal",
749        "aliasing_limited",
750        "variance_limited",
751        "mixed_regime",
752    ] {
753        if !demo_b_report.contains(required_phrase) {
754            return Err(Error::Message(format!(
755                "{} Demo B report is missing `{required_phrase}`",
756                summary.dataset_name
757            )));
758        }
759    }
760
761    let scaling_report = fs::read_to_string(dataset_dir.join("scaling_report.md"))?;
762    for required_phrase in [
763        "scaled_1080p",
764        "scaled_4k",
765        "Cost appears approximately linear with resolution",
766        "realism_stress_case",
767        "larger_roi_case",
768        "mixed_regime_case",
769    ] {
770        if !scaling_report.contains(required_phrase) {
771            return Err(Error::Message(format!(
772                "{} scaling report is missing `{required_phrase}`",
773                summary.dataset_name
774            )));
775        }
776    }
777
778    let memory_report = fs::read_to_string(dataset_dir.join("memory_bandwidth_report.md"))?;
779    for required_phrase in [
780        "Readback required in production: `false`",
781        "Memory Access / Coherence Analysis",
782        "estimated memory traffic",
783    ] {
784        if !memory_report.contains(required_phrase) {
785            return Err(Error::Message(format!(
786                "{} memory bandwidth report is missing `{required_phrase}`",
787                summary.dataset_name
788            )));
789        }
790    }
791
792    let integration_report = fs::read_to_string(dataset_dir.join("integration_scaling_report.md"))?;
793    for required_phrase in [
794        "Async-Compute Feasibility",
795        "Production readback is not required",
796        "Hazards / Barriers / Transitions",
797        "Pipeline Compatibility",
798    ] {
799        if !integration_report.contains(required_phrase) {
800            return Err(Error::Message(format!(
801                "{} integration report is missing `{required_phrase}`",
802                summary.dataset_name
803            )));
804        }
805    }
806
807    Ok(())
808}
809
810fn build_taxonomy(output_root: &Path) -> Result<ExternalValidationTaxonomy> {
811    let mut datasets = Vec::new();
812    for dir_name in ["external_davis", "external_sintel"] {
813        let metrics_text =
814            fs::read_to_string(output_root.join(dir_name).join("scaling_metrics.json"))?;
815        let metrics: Value = serde_json::from_str(&metrics_text)?;
816        let coverage = &metrics["coverage"];
817        let note = if coverage["coverage_status"].as_str() == Some("complete") {
818            "coverage complete".to_string()
819        } else {
820            format!(
821                "coverage partial; missing: {}",
822                coverage["missing"]
823                    .as_array()
824                    .map(|values| {
825                        values
826                            .iter()
827                            .filter_map(Value::as_str)
828                            .collect::<Vec<_>>()
829                            .join(", ")
830                    })
831                    .unwrap_or_else(|| "unspecified".to_string())
832            )
833        };
834        datasets.push(TaxonomyDatasetEntry {
835            dataset_id: dir_name.trim_start_matches("external_").to_string(),
836            realism_stress_case: coverage_status(coverage["realism_stress_case"].as_bool()),
837            larger_roi_case: coverage_status(coverage["larger_roi_case"].as_bool()),
838            mixed_regime_case: coverage_status(coverage["mixed_regime_case"].as_bool()),
839            notes: vec![note],
840        });
841    }
842
843    let aggregate_complete = datasets.iter().all(|entry| {
844        [
845            entry.realism_stress_case.as_str(),
846            entry.larger_roi_case.as_str(),
847            entry.mixed_regime_case.as_str(),
848        ]
849        .iter()
850        .all(|status| *status == "covered" || *status == "explicitly_missing")
851    });
852    Ok(ExternalValidationTaxonomy {
853        datasets,
854        aggregate_status: if aggregate_complete {
855            "complete_or_explicitly_missing".to_string()
856        } else {
857            "incomplete".to_string()
858        },
859    })
860}
861
862fn coverage_status(value: Option<bool>) -> String {
863    match value {
864        Some(true) => "covered".to_string(),
865        Some(false) => "explicitly_missing".to_string(),
866        None => "explicitly_missing".to_string(),
867    }
868}
869
870fn write_final_external_validation_report(
871    path: &Path,
872    output_root: &Path,
873    davis: &DatasetMappingSummary,
874    sintel: &DatasetMappingSummary,
875    taxonomy: &ExternalValidationTaxonomy,
876) -> Result<()> {
877    let davis_gpu: Value = serde_json::from_str(&fs::read_to_string(
878        output_root
879            .join("external_davis")
880            .join("gpu_execution_metrics.json"),
881    )?)?;
882    let sintel_gpu: Value = serde_json::from_str(&fs::read_to_string(
883        output_root
884            .join("external_sintel")
885            .join("gpu_execution_metrics.json"),
886    )?)?;
887    let davis_demo_b: Value = serde_json::from_str(&fs::read_to_string(
888        output_root
889            .join("external_davis")
890            .join("demo_b_external_metrics.json"),
891    )?)?;
892    let sintel_demo_b: Value = serde_json::from_str(&fs::read_to_string(
893        output_root
894            .join("external_sintel")
895            .join("demo_b_external_metrics.json"),
896    )?)?;
897    let davis_scaling: Value = serde_json::from_str(&fs::read_to_string(
898        output_root
899            .join("external_davis")
900            .join("scaling_metrics.json"),
901    )?)?;
902    let sintel_scaling: Value = serde_json::from_str(&fs::read_to_string(
903        output_root
904            .join("external_sintel")
905            .join("scaling_metrics.json"),
906    )?)?;
907    let mut markdown = String::new();
908    let _ = writeln!(markdown, "# External Validation Report");
909    let _ = writeln!(markdown);
910    let _ = writeln!(markdown, "## Why DAVIS And Sintel");
911    let _ = writeln!(markdown);
912    let _ = writeln!(markdown, "- DAVIS: {}", davis.why_chosen);
913    let _ = writeln!(markdown, "- MPI Sintel: {}", sintel.why_chosen);
914    let _ = writeln!(markdown);
915    let _ = writeln!(markdown, "## Dataset Contributions");
916    let _ = writeln!(markdown);
917    let _ = writeln!(
918        markdown,
919        "- DAVIS contributes real captured video plus native segmentation masks."
920    );
921    let _ = writeln!(markdown, "- MPI Sintel contributes renderer-origin motion-rich sequences, optical flow, and official depth when available.");
922    let _ = writeln!(markdown);
923    let _ = writeln!(markdown, "## Native Vs Derived Buffers");
924    let _ = writeln!(markdown);
925    append_summary_fields(&mut markdown, "DAVIS", davis);
926    append_summary_fields(&mut markdown, "MPI Sintel", sintel);
927    let _ = writeln!(markdown);
928    let _ = writeln!(markdown, "## DSFB Modes Run");
929    let _ = writeln!(markdown);
930    let _ = writeln!(markdown, "- DAVIS: `{}`", davis.dsfb_mode);
931    let _ = writeln!(markdown, "- MPI Sintel: `{}`", sintel.dsfb_mode);
932    let _ = writeln!(markdown);
933    let _ = writeln!(markdown, "## GPU Execution Summary");
934    let _ = writeln!(markdown);
935    append_gpu_summary(&mut markdown, "DAVIS", &davis_gpu);
936    append_gpu_summary(&mut markdown, "MPI Sintel", &sintel_gpu);
937    let _ = writeln!(markdown);
938    let _ = writeln!(markdown, "## Demo A External Results");
939    let _ = writeln!(markdown);
940    let _ = writeln!(
941        markdown,
942        "- DAVIS uses proxy-only Demo A metrics because no renderer-quality reference exists in the mapped path."
943    );
944    let _ = writeln!(
945        markdown,
946        "- MPI Sintel uses a clean-vs-final pass proxy when available and labels it explicitly as proxy rather than renderer ground truth."
947    );
948    let _ = writeln!(markdown);
949    let _ = writeln!(markdown, "## Demo B External Results");
950    let _ = writeln!(markdown);
951    append_demo_b_summary(&mut markdown, "DAVIS", &davis_demo_b);
952    append_demo_b_summary(&mut markdown, "MPI Sintel", &sintel_demo_b);
953    let _ = writeln!(markdown);
954    let _ = writeln!(markdown, "## Scaling And Memory");
955    let _ = writeln!(markdown);
956    append_scaling_summary(&mut markdown, "DAVIS", &davis_scaling);
957    append_scaling_summary(&mut markdown, "MPI Sintel", &sintel_scaling);
958    let _ = writeln!(markdown, "- 1080p scaling is attempted on both datasets.");
959    let _ = writeln!(
960        markdown,
961        "- 4K scaling is attempted when the GPU path can run on scaled buffers."
962    );
963    let _ = writeln!(markdown, "- Memory / bandwidth reports explicitly state that readback is used for validation, not required in production.");
964    let _ = writeln!(markdown);
965    let _ = writeln!(markdown, "## Pipeline Insertion / Async");
966    let _ = writeln!(markdown);
967    let _ = writeln!(
968        markdown,
969        "- Async feasibility is discussed per dataset in the integration reports."
970    );
971    let _ = writeln!(
972        markdown,
973        "- Production readback is explicitly classified as not required."
974    );
975    let _ = writeln!(
976        markdown,
977        "- Barrier / transition discussion remains implementation guidance rather than proof."
978    );
979    let _ = writeln!(markdown);
980    let _ = writeln!(markdown, "## Coverage Taxonomy");
981    let _ = writeln!(markdown);
982    for dataset in &taxonomy.datasets {
983        let _ = writeln!(
984            markdown,
985            "- {}: realism_stress_case=`{}`, larger_roi_case=`{}`, mixed_regime_case=`{}`",
986            dataset.dataset_id,
987            dataset.realism_stress_case,
988            dataset.larger_roi_case,
989            dataset.mixed_regime_case
990        );
991    }
992    let _ = writeln!(markdown);
993    let _ = writeln!(markdown, "## What Is Proven");
994    let _ = writeln!(markdown);
995    let _ = writeln!(
996        markdown,
997        "- DAVIS and MPI Sintel are both integrated into the same DSFB external replay path."
998    );
999    let _ = writeln!(markdown, "- GPU execution is attempted on both dataset-mapped paths, with measured-vs-unmeasured status made explicit.");
1000    let _ = writeln!(
1001        markdown,
1002        "- Native-vs-derived buffer provenance is disclosed instead of hidden."
1003    );
1004    let _ = writeln!(markdown);
1005    let _ = writeln!(markdown, "## What Is Not Proven");
1006    let _ = writeln!(markdown);
1007    let _ = writeln!(
1008        markdown,
1009        "- This package does not prove production-engine integration."
1010    );
1011    let _ = writeln!(
1012        markdown,
1013        "- Demo B remains an allocation proxy rather than a live renderer sampling benchmark."
1014    );
1015    let _ = writeln!(
1016        markdown,
1017        "- DAVIS depth and normal support remain derived proxies, not native geometry buffers."
1018    );
1019    let _ = writeln!(markdown);
1020    let _ = writeln!(markdown, "## Remaining Blockers");
1021    let _ = writeln!(markdown);
1022    for blocker in collect_blockers(&davis_gpu, &sintel_gpu, taxonomy) {
1023        let _ = writeln!(markdown, "- {blocker}");
1024    }
1025    let _ = writeln!(markdown);
1026    let _ = writeln!(markdown, "## Next Highest-Value Experiment");
1027    let _ = writeln!(markdown);
1028    let _ = writeln!(
1029        markdown,
1030        "- Export one engine-native temporal capture with true history, motion, depth, and normals, then run the same DAVIS/Sintel comparison stack on that capture to close the renderer-integration gap."
1031    );
1032    fs::write(path, markdown)?;
1033    Ok(())
1034}
1035
1036fn write_final_evaluator_handoff(
1037    path: &Path,
1038    output_root: &Path,
1039    _davis: &DatasetMappingSummary,
1040    _sintel: &DatasetMappingSummary,
1041) -> Result<()> {
1042    if let Some(parent) = path.parent() {
1043        fs::create_dir_all(parent)?;
1044    }
1045    let mut markdown = String::new();
1046    let _ = writeln!(markdown, "# Evaluator Handoff");
1047    let _ = writeln!(markdown);
1048    let _ = writeln!(markdown, "## Standard External Datasets: DAVIS + Sintel");
1049    let _ = writeln!(markdown);
1050    let _ = writeln!(
1051        markdown,
1052        "- prepare DAVIS: `cargo run --release -- prepare-davis --output data/external/davis`"
1053    );
1054    let _ = writeln!(
1055        markdown,
1056        "- prepare Sintel: `cargo run --release -- prepare-sintel --output data/external/sintel`"
1057    );
1058    let _ = writeln!(
1059        markdown,
1060        "- replay DAVIS: `cargo run --release -- run-external-replay --manifest examples/davis_external_manifest.json --output {}`",
1061        output_root.join("external_davis").display()
1062    );
1063    let _ = writeln!(
1064        markdown,
1065        "- replay Sintel: `cargo run --release -- run-external-replay --manifest examples/sintel_external_manifest.json --output {}`",
1066        output_root.join("external_sintel").display()
1067    );
1068    let _ = writeln!(
1069        markdown,
1070        "- validate everything: `cargo run --release -- validate-final --output {}`",
1071        output_root.display()
1072    );
1073    let _ = writeln!(markdown);
1074    let _ = writeln!(markdown, "Expected outputs:");
1075    let _ = writeln!(markdown, "- `external_davis/*` and `external_sintel/*` with replay, GPU, Demo A, Demo B, scaling, memory, and integration reports.");
1076    let _ = writeln!(markdown, "- `external_validation_taxonomy.json`.");
1077    let _ = writeln!(markdown, "- `external_validation_report.md`.");
1078    let _ = writeln!(markdown, "- `check_signing_readiness.md`.");
1079    let _ = writeln!(markdown);
1080    let _ = writeln!(markdown, "Success looks like:");
1081    let _ = writeln!(markdown, "- both manifests load");
1082    let _ = writeln!(
1083        markdown,
1084        "- both dataset paths produce replay + GPU reports"
1085    );
1086    let _ = writeln!(markdown, "- proxy-vs-native distinctions stay explicit");
1087    let _ = writeln!(
1088        markdown,
1089        "- fixed-budget Demo B remains equal across all policies"
1090    );
1091    let _ = writeln!(markdown);
1092    let _ = writeln!(markdown, "Failure looks like:");
1093    let _ = writeln!(markdown, "- dataset download blocked");
1094    let _ = writeln!(markdown, "- missing per-dataset report or manifest");
1095    let _ = writeln!(markdown, "- hidden derived buffers or missing disclosure");
1096    let _ = writeln!(markdown);
1097    let _ = writeln!(markdown, "Interpretation rule:");
1098    let _ = writeln!(markdown, "- DAVIS and clean-vs-final Sintel comparisons may use proxies; read those as decision support, not renderer ground truth.");
1099    fs::write(path, markdown)?;
1100    Ok(())
1101}
1102
1103fn write_check_signing_readiness(
1104    path: &Path,
1105    output_root: &Path,
1106    _davis: &DatasetMappingSummary,
1107    _sintel: &DatasetMappingSummary,
1108    taxonomy: &ExternalValidationTaxonomy,
1109) -> Result<()> {
1110    let davis_gpu: Value = serde_json::from_str(&fs::read_to_string(
1111        output_root
1112            .join("external_davis")
1113            .join("gpu_execution_metrics.json"),
1114    )?)?;
1115    let sintel_gpu: Value = serde_json::from_str(&fs::read_to_string(
1116        output_root
1117            .join("external_sintel")
1118            .join("gpu_execution_metrics.json"),
1119    )?)?;
1120    let mut markdown = String::new();
1121    let _ = writeln!(markdown, "# Check Signing Readiness");
1122    let _ = writeln!(markdown);
1123    let _ = writeln!(markdown, "| Area | Status | Classification | Notes |");
1124    let _ = writeln!(markdown, "| --- | --- | --- | --- |");
1125    let _ = writeln!(
1126        markdown,
1127        "| DAVIS prep | ready | external | official DAVIS data mapped into the schema |"
1128    );
1129    let _ = writeln!(
1130        markdown,
1131        "| Sintel prep | ready | external | official Sintel data mapped into the schema |"
1132    );
1133    let _ = writeln!(
1134        markdown,
1135        "| DAVIS GPU | {} | {} | measured_gpu=`{}` |",
1136        if davis_gpu["measured_gpu"].as_bool() == Some(true) {
1137            "ready"
1138        } else {
1139            "partial"
1140        },
1141        if davis_gpu["measured_gpu"].as_bool() == Some(true) {
1142            "external"
1143        } else {
1144            "external"
1145        },
1146        davis_gpu["measured_gpu"].as_bool().unwrap_or(false)
1147    );
1148    let _ = writeln!(
1149        markdown,
1150        "| Sintel GPU | {} | {} | measured_gpu=`{}` |",
1151        if sintel_gpu["measured_gpu"].as_bool() == Some(true) {
1152            "ready"
1153        } else {
1154            "partial"
1155        },
1156        if sintel_gpu["measured_gpu"].as_bool() == Some(true) {
1157            "external"
1158        } else {
1159            "external"
1160        },
1161        sintel_gpu["measured_gpu"].as_bool().unwrap_or(false)
1162    );
1163    let _ = writeln!(
1164        markdown,
1165        "| Taxonomy coverage | {} | external | aggregate_status=`{}` |",
1166        if taxonomy.aggregate_status == "complete_or_explicitly_missing" {
1167            "ready"
1168        } else {
1169            "partial"
1170        },
1171        taxonomy.aggregate_status
1172    );
1173    let _ = writeln!(markdown);
1174    let _ = writeln!(markdown, "## Remaining Blockers");
1175    let _ = writeln!(markdown);
1176    for blocker in collect_blockers(&davis_gpu, &sintel_gpu, taxonomy) {
1177        let class = if blocker.contains("GPU") || blocker.contains("engine") {
1178            "external"
1179        } else {
1180            "internal"
1181        };
1182        let _ = writeln!(markdown, "- [{}] {}", class, blocker);
1183    }
1184    fs::write(path, markdown)?;
1185    Ok(())
1186}
1187
1188fn collect_blockers(
1189    davis_gpu: &Value,
1190    sintel_gpu: &Value,
1191    taxonomy: &ExternalValidationTaxonomy,
1192) -> Vec<String> {
1193    let mut blockers = Vec::new();
1194    if davis_gpu["measured_gpu"].as_bool() != Some(true) {
1195        blockers.push("DAVIS GPU timing remains unmeasured on this machine".to_string());
1196    }
1197    if sintel_gpu["measured_gpu"].as_bool() != Some(true) {
1198        blockers.push("Sintel GPU timing remains unmeasured on this machine".to_string());
1199    }
1200    for dataset in &taxonomy.datasets {
1201        for (label, status) in [
1202            ("realism_stress_case", dataset.realism_stress_case.as_str()),
1203            ("larger_roi_case", dataset.larger_roi_case.as_str()),
1204            ("mixed_regime_case", dataset.mixed_regime_case.as_str()),
1205        ] {
1206            if status == "explicitly_missing" {
1207                blockers.push(format!(
1208                    "{} coverage is partial for {}",
1209                    label, dataset.dataset_id
1210                ));
1211            }
1212        }
1213    }
1214    blockers.push("renderer-integrated sampling validation is still pending".to_string());
1215    blockers
1216}
1217
1218fn append_summary_fields(markdown: &mut String, label: &str, summary: &DatasetMappingSummary) {
1219    let _ = writeln!(
1220        markdown,
1221        "- {} native buffers: {}",
1222        label,
1223        summary.native_buffers.join(", ")
1224    );
1225    let _ = writeln!(
1226        markdown,
1227        "- {} derived buffers: {}",
1228        label,
1229        summary.derived_buffers.join(", ")
1230    );
1231    if !summary.unsupported_buffers.is_empty() {
1232        let _ = writeln!(
1233            markdown,
1234            "- {} unsupported buffers: {}",
1235            label,
1236            summary.unsupported_buffers.join(", ")
1237        );
1238    }
1239}
1240
1241fn append_gpu_summary(markdown: &mut String, label: &str, gpu: &Value) {
1242    let _ = writeln!(
1243        markdown,
1244        "- {} measured_gpu=`{}`, actual_real_external_data=`{}`",
1245        label,
1246        gpu["measured_gpu"].as_bool().unwrap_or(false),
1247        gpu["actual_real_external_data"].as_bool().unwrap_or(false)
1248    );
1249}
1250
1251fn append_demo_b_summary(markdown: &mut String, label: &str, metrics: &Value) {
1252    let capture_count = metrics["captures"]
1253        .as_array()
1254        .map(|items| items.len())
1255        .unwrap_or(0);
1256    let _ = writeln!(
1257        markdown,
1258        "- {} captures evaluated: {}",
1259        label, capture_count
1260    );
1261    if let Some(captures) = metrics["captures"].as_array() {
1262        for capture in captures.iter().take(2) {
1263            let _ = writeln!(
1264                markdown,
1265                "  - {} regime=`{}` fixed_budget_equal=`{}`",
1266                capture["capture_label"].as_str().unwrap_or("unknown"),
1267                capture["regime"].as_str().unwrap_or("unknown"),
1268                capture["fixed_budget_equal"].as_bool().unwrap_or(false)
1269            );
1270        }
1271    }
1272}
1273
1274fn append_scaling_summary(markdown: &mut String, label: &str, metrics: &Value) {
1275    let _ = writeln!(
1276        markdown,
1277        "- {} attempted_1080p=`{}` attempted_4k=`{}`",
1278        label,
1279        metrics["attempted_1080p"].as_bool().unwrap_or(false),
1280        metrics["attempted_4k"].as_bool().unwrap_or(false)
1281    );
1282}
1283
1284fn require_file(path: &Path) -> Result<()> {
1285    if path.exists() {
1286        Ok(())
1287    } else {
1288        Err(Error::Message(format!(
1289            "required file missing: {}",
1290            path.display()
1291        )))
1292    }
1293}