Skip to main content

dsfb_computer_graphics/
metrics.rs

1use serde::Serialize;
2
3use crate::error::{Error, Result};
4use crate::frame::{mean_abs_error, mean_abs_error_over_mask, ImageFrame, ScalarField};
5use crate::scene::{ScenarioExpectation, ScenarioSupportCategory, SceneSequence};
6
7const LOW_RESPONSE_THRESHOLD: f32 = 0.50;
8
9#[derive(Clone, Debug)]
10pub struct RunAnalysisInput<'a> {
11    pub id: &'a str,
12    pub label: &'a str,
13    pub category: &'a str,
14    pub resolved_frames: &'a [ImageFrame],
15    pub reprojected_history_frames: &'a [ImageFrame],
16    pub alpha_frames: &'a [ScalarField],
17    pub response_frames: &'a [ScalarField],
18    pub trust_frames: Option<&'a [ScalarField]>,
19}
20
21#[derive(Clone, Debug, Serialize)]
22pub struct CalibrationBin {
23    pub lower: f32,
24    pub upper: f32,
25    pub sample_count: usize,
26    pub mean_trust: f32,
27    pub mean_error: f32,
28}
29
30#[derive(Clone, Debug, Serialize)]
31pub struct HistogramBin {
32    pub lower: f32,
33    pub upper: f32,
34    pub sample_count: usize,
35}
36
37#[derive(Clone, Copy, Debug, Serialize)]
38pub enum TrustOperatingMode {
39    NearBinaryGate,
40    WeaklyGraded,
41    StronglyGraded,
42}
43
44#[derive(Clone, Debug, Serialize)]
45pub struct RunFrameMetrics {
46    pub frame_index: usize,
47    pub overall_mae: f32,
48    pub overall_rmse: f32,
49    pub roi_mae: f32,
50    pub roi_rmse: f32,
51    pub non_roi_mae: f32,
52    pub non_roi_rmse: f32,
53    pub alpha_mean: f32,
54    pub alpha_roi_mean: f32,
55    pub alpha_non_roi_mean: f32,
56    pub response_mean: f32,
57    pub response_roi_mean: f32,
58    pub response_non_roi_mean: f32,
59    pub trust_mean: Option<f32>,
60    pub trust_roi_mean: Option<f32>,
61    pub trust_non_roi_mean: Option<f32>,
62}
63
64#[derive(Clone, Debug, Serialize)]
65pub struct RunSummary {
66    pub run_id: String,
67    pub label: String,
68    pub category: String,
69    pub peak_roi_mae: f32,
70    pub peak_roi_mae_frame: usize,
71    pub cumulative_roi_mae: f32,
72    pub cumulative_non_roi_mae: f32,
73    pub average_overall_mae: f32,
74    pub average_overall_rmse: f32,
75    pub average_roi_mae: f32,
76    pub average_non_roi_mae: f32,
77    pub average_non_roi_rmse: f32,
78    pub ghost_persistence_frames: usize,
79    pub onset_response_latency_frames: Option<usize>,
80    pub false_positive_response_rate: f32,
81    pub intervention_sparsity: f32,
82    pub mean_alpha: f32,
83    pub onset_alpha_p90: f32,
84    pub onset_alpha_max: f32,
85    pub alpha_temporal_delta_mean: f32,
86    pub response_temporal_delta_mean: f32,
87    pub temporal_variance_non_roi: f32,
88    pub trust_error_rank_correlation: Option<f32>,
89    pub trust_rank_correlation_is_degenerate: bool,
90    pub trust_calibration_bins: Vec<CalibrationBin>,
91    pub trust_histogram: Vec<HistogramBin>,
92    pub trust_occupied_bin_count: usize,
93    pub trust_entropy_bits: Option<f32>,
94    pub trust_discreteness_score: Option<f32>,
95    pub trust_effective_level_count: Option<usize>,
96    pub trust_operating_mode: Option<TrustOperatingMode>,
97}
98
99#[derive(Clone, Debug, Serialize)]
100pub struct ScenarioRunReport {
101    pub summary: RunSummary,
102    pub frame_metrics: Vec<RunFrameMetrics>,
103}
104
105#[derive(Clone, Debug, Serialize)]
106pub struct ScenarioReport {
107    pub scenario_id: String,
108    pub scenario_title: String,
109    pub scenario_description: String,
110    pub expectation: ScenarioExpectation,
111    pub support_category: ScenarioSupportCategory,
112    pub roi_note: String,
113    pub sampling_taxonomy: String,
114    pub realism_stress: bool,
115    pub competitive_baseline_case: bool,
116    pub bounded_loss_disclosure: bool,
117    pub demo_b_taxonomy: String,
118    pub target_label: String,
119    pub onset_frame: usize,
120    pub target_pixels: usize,
121    pub target_area_fraction: f32,
122    pub persistence_threshold: f32,
123    pub runs: Vec<ScenarioRunReport>,
124    pub headline: String,
125    pub bounded_or_neutral_note: String,
126    pub host_realistic_vs_fixed_alpha_cumulative_roi_gain: f32,
127    pub host_realistic_vs_strong_heuristic_cumulative_roi_gain: f32,
128    pub host_realistic_non_roi_penalty_vs_fixed_alpha: f32,
129    pub host_realistic_non_roi_penalty_vs_strong_heuristic: f32,
130    pub host_realistic_non_roi_penalty_ratio_vs_strong_heuristic: f32,
131    pub neighborhood_clamp_roi_trigger_mean: f32,
132    pub neighborhood_clamp_roi_silent_fraction: f32,
133    pub neighborhood_clamp_history_inside_hull_fraction: f32,
134}
135
136#[derive(Clone, Debug, Serialize)]
137pub struct AblationEntry {
138    pub run_id: String,
139    pub label: String,
140    pub canonical_cumulative_roi_mae: f32,
141    pub canonical_peak_roi_mae: f32,
142    pub suite_mean_cumulative_roi_mae: f32,
143    pub suite_mean_false_positive_response_rate: f32,
144}
145
146#[derive(Clone, Debug, Serialize)]
147pub struct AggregateRunScore {
148    pub run_id: String,
149    pub label: String,
150    pub category: String,
151    pub mean_rank: f32,
152    pub mean_cumulative_roi_mae: f32,
153    pub mean_non_roi_mae: f32,
154    pub mean_false_positive_response_rate: f32,
155    pub benefit_scenarios_won: usize,
156}
157
158#[derive(Clone, Debug, Serialize)]
159pub struct DemoASuiteSummary {
160    pub canonical_scenario_id: String,
161    pub scenario_ids: Vec<String>,
162    pub point_roi_scenarios: Vec<String>,
163    pub region_roi_scenarios: Vec<String>,
164    pub negative_control_scenarios: Vec<String>,
165    pub baseline_ids: Vec<String>,
166    pub dsfb_ids: Vec<String>,
167    pub ablation_ids: Vec<String>,
168    pub primary_behavioral_result: String,
169    pub secondary_behavioral_result: String,
170    pub host_realistic_beats_fixed_alpha_scenarios: usize,
171    pub host_realistic_beats_strong_heuristic_scenarios: usize,
172    pub mixed_or_neutral_scenarios: Vec<String>,
173    pub remaining_blockers: Vec<String>,
174}
175
176#[derive(Clone, Debug, Serialize)]
177pub struct DemoASuiteMetrics {
178    pub summary: DemoASuiteSummary,
179    pub scenarios: Vec<ScenarioReport>,
180    pub ablations: Vec<AblationEntry>,
181    pub aggregate_leaderboard: Vec<AggregateRunScore>,
182}
183
184pub fn analyze_demo_a_suite(
185    scenario_runs: &[(SceneSequence, Vec<RunAnalysisInput<'_>>)],
186) -> Result<DemoASuiteMetrics> {
187    if scenario_runs.is_empty() {
188        return Err(Error::Message(
189            "Demo A suite analysis requires at least one scenario".to_string(),
190        ));
191    }
192
193    let mut scenarios = Vec::with_capacity(scenario_runs.len());
194    for (sequence, runs) in scenario_runs {
195        scenarios.push(analyze_scenario(sequence, runs)?);
196    }
197
198    let canonical = &scenarios[0];
199    let fixed_alpha = find_run(canonical, "fixed_alpha")?;
200    let strong_heuristic = find_run(canonical, "strong_heuristic")?;
201    let host_realistic = find_run(canonical, "dsfb_host_realistic")?;
202
203    let primary_behavioral_result = format!(
204        "On the canonical scenario, host-realistic DSFB reduced cumulative ROI MAE from {:.5} for fixed alpha to {:.5}.",
205        fixed_alpha.cumulative_roi_mae, host_realistic.cumulative_roi_mae
206    );
207    let secondary_behavioral_result = format!(
208        "Against the strong heuristic baseline, host-realistic DSFB changed cumulative ROI MAE from {:.5} to {:.5}; mixed outcomes are surfaced per scenario below.",
209        strong_heuristic.cumulative_roi_mae, host_realistic.cumulative_roi_mae
210    );
211
212    let host_realistic_beats_fixed_alpha_scenarios = scenarios
213        .iter()
214        .filter(|scenario| {
215            let fixed = scenario
216                .runs
217                .iter()
218                .find(|run| run.summary.run_id == "fixed_alpha");
219            let host = scenario
220                .runs
221                .iter()
222                .find(|run| run.summary.run_id == "dsfb_host_realistic");
223            match (fixed, host) {
224                (Some(fixed), Some(host)) => {
225                    host.summary.cumulative_roi_mae + 1e-6 < fixed.summary.cumulative_roi_mae
226                }
227                _ => false,
228            }
229        })
230        .count();
231    let host_realistic_beats_strong_heuristic_scenarios = scenarios
232        .iter()
233        .filter(|scenario| {
234            let heuristic = scenario
235                .runs
236                .iter()
237                .find(|run| run.summary.run_id == "strong_heuristic");
238            let host = scenario
239                .runs
240                .iter()
241                .find(|run| run.summary.run_id == "dsfb_host_realistic");
242            match (heuristic, host) {
243                (Some(heuristic), Some(host)) => {
244                    host.summary.cumulative_roi_mae + 1e-6 < heuristic.summary.cumulative_roi_mae
245                }
246                _ => false,
247            }
248        })
249        .count();
250
251    let mixed_or_neutral_scenarios = scenarios
252        .iter()
253        .filter(|scenario| {
254            matches!(scenario.expectation, ScenarioExpectation::NeutralExpected)
255                || scenario.host_realistic_vs_strong_heuristic_cumulative_roi_gain <= 0.0
256        })
257        .map(|scenario| scenario.scenario_id.clone())
258        .collect::<Vec<_>>();
259
260    let baseline_ids = vec![
261        "fixed_alpha".to_string(),
262        "residual_threshold".to_string(),
263        "neighborhood_clamp".to_string(),
264        "depth_normal_reject".to_string(),
265        "reactive_mask".to_string(),
266        "strong_heuristic".to_string(),
267    ];
268    let dsfb_ids = vec![
269        "dsfb_synthetic_visibility".to_string(),
270        "dsfb_host_realistic".to_string(),
271    ];
272    let ablation_ids = vec![
273        "dsfb_synthetic_visibility".to_string(),
274        "dsfb_host_realistic".to_string(),
275        "dsfb_host_gated_reference".to_string(),
276        "dsfb_motion_augmented".to_string(),
277        "dsfb_no_visibility".to_string(),
278        "dsfb_no_thin".to_string(),
279        "dsfb_no_motion_edge".to_string(),
280        "dsfb_no_grammar".to_string(),
281        "dsfb_residual_only".to_string(),
282        "dsfb_trust_no_alpha".to_string(),
283    ];
284
285    let ablations = ablation_ids
286        .iter()
287        .filter_map(|run_id| {
288            let canonical_run = canonical
289                .runs
290                .iter()
291                .find(|run| run.summary.run_id == *run_id)?;
292            let suite_matches = scenarios
293                .iter()
294                .filter_map(|scenario| {
295                    scenario
296                        .runs
297                        .iter()
298                        .find(|run| run.summary.run_id == *run_id)
299                        .map(|run| &run.summary)
300                })
301                .collect::<Vec<_>>();
302            let suite_count = suite_matches.len().max(1) as f32;
303            Some(AblationEntry {
304                run_id: (*run_id).clone(),
305                label: canonical_run.summary.label.clone(),
306                canonical_cumulative_roi_mae: canonical_run.summary.cumulative_roi_mae,
307                canonical_peak_roi_mae: canonical_run.summary.peak_roi_mae,
308                suite_mean_cumulative_roi_mae: suite_matches
309                    .iter()
310                    .map(|summary| summary.cumulative_roi_mae)
311                    .sum::<f32>()
312                    / suite_count,
313                suite_mean_false_positive_response_rate: suite_matches
314                    .iter()
315                    .map(|summary| summary.false_positive_response_rate)
316                    .sum::<f32>()
317                    / suite_count,
318            })
319        })
320        .collect::<Vec<_>>();
321
322    let aggregate_leaderboard = aggregate_leaderboard(&scenarios);
323    let point_roi_scenarios = scenarios
324        .iter()
325        .filter(|scenario| {
326            matches!(
327                scenario.support_category,
328                ScenarioSupportCategory::PointLikeRoi
329            )
330        })
331        .map(|scenario| scenario.scenario_id.clone())
332        .collect::<Vec<_>>();
333    let region_roi_scenarios = scenarios
334        .iter()
335        .filter(|scenario| {
336            matches!(
337                scenario.support_category,
338                ScenarioSupportCategory::RegionRoi
339            )
340        })
341        .map(|scenario| scenario.scenario_id.clone())
342        .collect::<Vec<_>>();
343    let negative_control_scenarios = scenarios
344        .iter()
345        .filter(|scenario| {
346            matches!(
347                scenario.support_category,
348                ScenarioSupportCategory::NegativeControl
349            )
350        })
351        .map(|scenario| scenario.scenario_id.clone())
352        .collect::<Vec<_>>();
353    let remaining_blockers = vec![
354        "The scenario suite is still synthetic and does not prove production-scene generalization."
355            .to_string(),
356        "The strong heuristic baseline remains competitive on some cases, so the crate supports evaluation diligence rather than blanket win claims."
357            .to_string(),
358        "Cost accounting is architectural and CPU-side within the crate; it is not a measured GPU benchmark."
359            .to_string(),
360        "Point-like ROI scenarios remain mechanically useful but statistically weak, so aggregate claims must stay separated from region-ROI evidence."
361            .to_string(),
362    ];
363
364    Ok(DemoASuiteMetrics {
365        summary: DemoASuiteSummary {
366            canonical_scenario_id: canonical.scenario_id.clone(),
367            scenario_ids: scenarios
368                .iter()
369                .map(|scenario| scenario.scenario_id.clone())
370                .collect(),
371            point_roi_scenarios,
372            region_roi_scenarios,
373            negative_control_scenarios,
374            baseline_ids,
375            dsfb_ids,
376            ablation_ids,
377            primary_behavioral_result,
378            secondary_behavioral_result,
379            host_realistic_beats_fixed_alpha_scenarios,
380            host_realistic_beats_strong_heuristic_scenarios,
381            mixed_or_neutral_scenarios,
382            remaining_blockers,
383        },
384        scenarios,
385        ablations,
386        aggregate_leaderboard,
387    })
388}
389
390fn analyze_scenario(
391    sequence: &SceneSequence,
392    runs: &[RunAnalysisInput<'_>],
393) -> Result<ScenarioReport> {
394    if runs.is_empty() {
395        return Err(Error::Message(format!(
396            "scenario {} had no runs to analyze",
397            sequence.scenario_id.as_str()
398        )));
399    }
400
401    let non_roi_mask = invert_mask(&sequence.target_mask);
402    let threshold = persistence_threshold(sequence);
403    let mut reports = Vec::with_capacity(runs.len());
404    for run in runs {
405        reports.push(analyze_run(
406            sequence,
407            &sequence.target_mask,
408            &non_roi_mask,
409            threshold,
410            run,
411        ));
412    }
413
414    let fixed_alpha = reports
415        .iter()
416        .find(|run| run.summary.run_id == "fixed_alpha")
417        .ok_or_else(|| Error::Message("fixed_alpha run missing from scenario".to_string()))?;
418    let strong_heuristic = reports
419        .iter()
420        .find(|run| run.summary.run_id == "strong_heuristic")
421        .ok_or_else(|| Error::Message("strong_heuristic run missing from scenario".to_string()))?;
422    let host_realistic = reports
423        .iter()
424        .find(|run| run.summary.run_id == "dsfb_host_realistic")
425        .ok_or_else(|| {
426            Error::Message("dsfb_host_realistic run missing from scenario".to_string())
427        })?;
428
429    let headline = match sequence.expectation {
430        ScenarioExpectation::BenefitExpected => format!(
431            "{}: host-realistic DSFB changed cumulative ROI MAE from {:.5} (fixed alpha) and {:.5} (strong heuristic) to {:.5}.",
432            sequence.scenario_title,
433            fixed_alpha.summary.cumulative_roi_mae,
434            strong_heuristic.summary.cumulative_roi_mae,
435            host_realistic.summary.cumulative_roi_mae
436        ),
437        ScenarioExpectation::NeutralExpected => format!(
438            "{}: neutral holdout with host-realistic non-ROI MAE {:.5} versus {:.5} for fixed alpha.",
439            sequence.scenario_title,
440            host_realistic.summary.average_non_roi_mae,
441            fixed_alpha.summary.average_non_roi_mae
442        ),
443    };
444    let bounded_or_neutral_note = match sequence.expectation {
445        ScenarioExpectation::BenefitExpected => {
446            if host_realistic.summary.cumulative_roi_mae
447                > strong_heuristic.summary.cumulative_roi_mae
448            {
449                "Strong heuristic remains better on this scenario; the report surfaces that rather than hiding it."
450                    .to_string()
451            } else {
452                "Host-realistic DSFB remains competitive without privileged visibility hints on this scenario."
453                    .to_string()
454            }
455        }
456        ScenarioExpectation::NeutralExpected => {
457            "This is the honesty scenario: aggressive trust collapse is not expected to help, so false-positive response and non-ROI stability are the main evaluation criteria."
458                .to_string()
459        }
460    };
461
462    Ok(ScenarioReport {
463        scenario_id: sequence.scenario_id.as_str().to_string(),
464        scenario_title: sequence.scenario_title.clone(),
465        scenario_description: sequence.scenario_description.clone(),
466        expectation: sequence.expectation,
467        support_category: sequence.support_category,
468        roi_note: sequence.roi_note.clone(),
469        sampling_taxonomy: sequence.sampling_taxonomy.clone(),
470        realism_stress: sequence.realism_stress,
471        competitive_baseline_case: sequence.competitive_baseline_case,
472        bounded_loss_disclosure: sequence.bounded_loss_disclosure,
473        demo_b_taxonomy: sequence.demo_b_taxonomy.clone(),
474        target_label: sequence.target_label.clone(),
475        onset_frame: sequence.onset_frame,
476        target_pixels: sequence.target_mask.iter().filter(|value| **value).count(),
477        target_area_fraction: sequence.target_mask.iter().filter(|value| **value).count() as f32
478            / (sequence.config.width * sequence.config.height).max(1) as f32,
479        persistence_threshold: threshold,
480        headline,
481        bounded_or_neutral_note,
482        host_realistic_vs_fixed_alpha_cumulative_roi_gain: fixed_alpha.summary.cumulative_roi_mae
483            - host_realistic.summary.cumulative_roi_mae,
484        host_realistic_vs_strong_heuristic_cumulative_roi_gain: strong_heuristic
485            .summary
486            .cumulative_roi_mae
487            - host_realistic.summary.cumulative_roi_mae,
488        host_realistic_non_roi_penalty_vs_fixed_alpha: host_realistic.summary.average_non_roi_mae
489            - fixed_alpha.summary.average_non_roi_mae,
490        host_realistic_non_roi_penalty_vs_strong_heuristic: host_realistic
491            .summary
492            .average_non_roi_mae
493            - strong_heuristic.summary.average_non_roi_mae,
494        host_realistic_non_roi_penalty_ratio_vs_strong_heuristic: ratio_or_identity(
495            host_realistic.summary.average_non_roi_mae,
496            strong_heuristic.summary.average_non_roi_mae,
497        ),
498        neighborhood_clamp_roi_trigger_mean: runs
499            .iter()
500            .find(|run| run.id == "neighborhood_clamp")
501            .map(|run| {
502                mean_field_over_mask_range(
503                    run.response_frames,
504                    &sequence.target_mask,
505                    sequence.onset_frame,
506                )
507            })
508            .unwrap_or(0.0),
509        neighborhood_clamp_roi_silent_fraction: runs
510            .iter()
511            .find(|run| run.id == "neighborhood_clamp")
512            .map(|run| {
513                fraction_field_values_below_range(
514                    run.response_frames,
515                    &sequence.target_mask,
516                    sequence.onset_frame,
517                    LOW_RESPONSE_THRESHOLD,
518                )
519            })
520            .unwrap_or(0.0),
521        neighborhood_clamp_history_inside_hull_fraction: runs
522            .iter()
523            .find(|run| run.id == "neighborhood_clamp")
524            .map(|run| {
525                fraction_field_values_below_range(
526                    run.response_frames,
527                    &sequence.target_mask,
528                    sequence.onset_frame,
529                    1.0e-4,
530                )
531            })
532            .unwrap_or(0.0),
533        runs: reports,
534    })
535}
536
537fn analyze_run(
538    sequence: &SceneSequence,
539    target_mask: &[bool],
540    non_roi_mask: &[bool],
541    threshold: f32,
542    run: &RunAnalysisInput<'_>,
543) -> ScenarioRunReport {
544    let onset = sequence
545        .onset_frame
546        .min(sequence.frames.len().saturating_sub(1));
547    let mut frame_metrics = Vec::with_capacity(sequence.frames.len());
548    let mut cumulative_roi_mae = 0.0;
549    let mut cumulative_non_roi_mae = 0.0;
550    let mut average_overall_mae = 0.0;
551    let mut average_overall_rmse = 0.0;
552    let mut average_roi_mae = 0.0;
553    let mut average_non_roi_mae = 0.0;
554    let mut average_non_roi_rmse = 0.0;
555    let mut peak_roi_mae = f32::NEG_INFINITY;
556    let mut peak_roi_mae_frame = onset;
557    let mut response_pixels = 0usize;
558    let total_pixels = sequence.frames.len() * sequence.config.width * sequence.config.height;
559
560    for frame_index in 0..sequence.frames.len() {
561        let gt = &sequence.frames[frame_index].ground_truth;
562        let resolved = &run.resolved_frames[frame_index];
563        let alpha = &run.alpha_frames[frame_index];
564        let response = &run.response_frames[frame_index];
565        let trust = run.trust_frames.map(|fields| &fields[frame_index]);
566
567        let overall_mae = mean_abs_error(resolved, gt);
568        let overall_rmse = rmse(resolved, gt, None);
569        let roi_mae = mean_abs_error_over_mask(resolved, gt, target_mask);
570        let roi_rmse = rmse(resolved, gt, Some(target_mask));
571        let non_roi_mae = mean_abs_error_over_mask(resolved, gt, non_roi_mask);
572        let non_roi_rmse = rmse(resolved, gt, Some(non_roi_mask));
573        let alpha_mean = alpha.mean();
574        let alpha_roi_mean = alpha.mean_over_mask(target_mask);
575        let alpha_non_roi_mean = alpha.mean_over_mask(non_roi_mask);
576        let response_mean = response.mean();
577        let response_roi_mean = response.mean_over_mask(target_mask);
578        let response_non_roi_mean = response.mean_over_mask(non_roi_mask);
579        let trust_mean = trust.map(ScalarField::mean);
580        let trust_roi_mean = trust.map(|field| field.mean_over_mask(target_mask));
581        let trust_non_roi_mean = trust.map(|field| field.mean_over_mask(non_roi_mask));
582
583        average_overall_mae += overall_mae;
584        average_overall_rmse += overall_rmse;
585        average_roi_mae += roi_mae;
586        average_non_roi_mae += non_roi_mae;
587        average_non_roi_rmse += non_roi_rmse;
588        cumulative_roi_mae += roi_mae;
589        cumulative_non_roi_mae += non_roi_mae;
590        response_pixels += count_field_above(response, LOW_RESPONSE_THRESHOLD);
591
592        if roi_mae > peak_roi_mae {
593            peak_roi_mae = roi_mae;
594            peak_roi_mae_frame = frame_index;
595        }
596
597        frame_metrics.push(RunFrameMetrics {
598            frame_index,
599            overall_mae,
600            overall_rmse,
601            roi_mae,
602            roi_rmse,
603            non_roi_mae,
604            non_roi_rmse,
605            alpha_mean,
606            alpha_roi_mean,
607            alpha_non_roi_mean,
608            response_mean,
609            response_roi_mean,
610            response_non_roi_mean,
611            trust_mean,
612            trust_roi_mean,
613            trust_non_roi_mean,
614        });
615    }
616
617    let frame_count = sequence.frames.len().max(1) as f32;
618    let ghost_persistence_frames =
619        compute_ghost_persistence(&frame_metrics, onset, threshold, |frame| frame.roi_mae);
620    let onset_response_latency_frames =
621        first_frame_at_or_above(&frame_metrics, onset, LOW_RESPONSE_THRESHOLD, |frame| {
622            frame.response_roi_mean
623        })
624        .map(|frame| frame.saturating_sub(onset));
625    let false_positive_response_rate = frame_metrics
626        .iter()
627        .skip(onset)
628        .map(|frame| frame.response_non_roi_mean)
629        .sum::<f32>()
630        / (frame_metrics.len().saturating_sub(onset).max(1) as f32);
631    let intervention_sparsity = response_pixels as f32 / total_pixels.max(1) as f32;
632    let onset_alpha_values = run.alpha_frames[onset].values().to_vec();
633    let onset_alpha_p90 = percentile(&onset_alpha_values, 0.90);
634    let onset_alpha_max = onset_alpha_values.iter().copied().fold(0.0f32, f32::max);
635    let temporal_variance_non_roi =
636        temporal_variance_non_roi(sequence, run.resolved_frames, non_roi_mask);
637    let alpha_temporal_delta_mean = temporal_scalar_delta_mean(run.alpha_frames);
638    let response_temporal_delta_mean = temporal_scalar_delta_mean(run.response_frames);
639    let (
640        trust_error_rank_correlation,
641        trust_rank_correlation_is_degenerate,
642        trust_calibration_bins,
643        trust_histogram,
644        trust_occupied_bin_count,
645        trust_entropy_bits,
646        trust_discreteness_score,
647        trust_effective_level_count,
648        trust_operating_mode,
649    ) = if let Some(fields) = run.trust_frames {
650        let histogram = histogram_bins(&fields[onset], 10);
651        let occupied_bin_count = histogram.iter().filter(|bin| bin.sample_count > 0).count();
652        let entropy_bits = histogram_entropy_bits(&histogram);
653        let discreteness_score =
654            entropy_bits.map(|entropy| trust_discreteness_score(histogram.len(), entropy));
655        let effective_level_count = entropy_bits.map(|entropy| {
656            2.0f32
657                .powf(entropy)
658                .round()
659                .clamp(1.0, histogram.len() as f32) as usize
660        });
661        let correlation = frame_spearman_correlation(fields, &frame_metrics, onset);
662        let correlation_degenerate =
663            trust_rank_correlation_is_degenerate(fields, onset, occupied_bin_count, entropy_bits);
664        (
665            Some(correlation),
666            correlation_degenerate,
667            calibration_bins(
668                &fields[onset],
669                &run.resolved_frames[onset],
670                &sequence.frames[onset].ground_truth,
671            ),
672            histogram.clone(),
673            occupied_bin_count,
674            entropy_bits,
675            discreteness_score,
676            effective_level_count,
677            classify_trust_operating_mode(occupied_bin_count, entropy_bits, discreteness_score),
678        )
679    } else {
680        (
681            None,
682            false,
683            Vec::new(),
684            Vec::new(),
685            0,
686            None,
687            None,
688            None,
689            None,
690        )
691    };
692
693    ScenarioRunReport {
694        summary: RunSummary {
695            run_id: run.id.to_string(),
696            label: run.label.to_string(),
697            category: run.category.to_string(),
698            peak_roi_mae,
699            peak_roi_mae_frame,
700            cumulative_roi_mae,
701            cumulative_non_roi_mae,
702            average_overall_mae: average_overall_mae / frame_count,
703            average_overall_rmse: average_overall_rmse / frame_count,
704            average_roi_mae: average_roi_mae / frame_count,
705            average_non_roi_mae: average_non_roi_mae / frame_count,
706            average_non_roi_rmse: average_non_roi_rmse / frame_count,
707            ghost_persistence_frames,
708            onset_response_latency_frames,
709            false_positive_response_rate,
710            intervention_sparsity,
711            mean_alpha: run.alpha_frames.iter().map(ScalarField::mean).sum::<f32>() / frame_count,
712            onset_alpha_p90,
713            onset_alpha_max,
714            alpha_temporal_delta_mean,
715            response_temporal_delta_mean,
716            temporal_variance_non_roi,
717            trust_error_rank_correlation,
718            trust_rank_correlation_is_degenerate,
719            trust_calibration_bins,
720            trust_histogram,
721            trust_occupied_bin_count,
722            trust_entropy_bits,
723            trust_discreteness_score,
724            trust_effective_level_count,
725            trust_operating_mode,
726        },
727        frame_metrics,
728    }
729}
730
731fn persistence_threshold(sequence: &SceneSequence) -> f32 {
732    if sequence.onset_frame == 0 {
733        return 0.02;
734    }
735    let previous = &sequence.frames[sequence.onset_frame - 1].ground_truth;
736    let current = &sequence.frames[sequence.onset_frame].ground_truth;
737    (mean_abs_error_over_mask(previous, current, &sequence.target_mask) * 0.15).max(0.02)
738}
739
740fn rmse(frame_a: &ImageFrame, frame_b: &ImageFrame, mask: Option<&[bool]>) -> f32 {
741    let mut sum = 0.0;
742    let mut count = 0usize;
743    for y in 0..frame_a.height() {
744        for x in 0..frame_a.width() {
745            let index = y * frame_a.width() + x;
746            if mask.map(|values| values[index]).unwrap_or(true) {
747                let diff = frame_a.get(x, y).abs_diff(frame_b.get(x, y));
748                sum += diff * diff;
749                count += 1;
750            }
751        }
752    }
753    if count == 0 {
754        0.0
755    } else {
756        (sum / count as f32).sqrt()
757    }
758}
759
760fn invert_mask(mask: &[bool]) -> Vec<bool> {
761    mask.iter().map(|value| !value).collect()
762}
763
764fn compute_ghost_persistence(
765    frame_metrics: &[RunFrameMetrics],
766    onset: usize,
767    threshold: f32,
768    metric: impl Fn(&RunFrameMetrics) -> f32,
769) -> usize {
770    frame_metrics
771        .iter()
772        .skip(onset)
773        .filter(|frame| metric(frame) > threshold)
774        .count()
775}
776
777fn first_frame_at_or_above(
778    frame_metrics: &[RunFrameMetrics],
779    start: usize,
780    threshold: f32,
781    metric: impl Fn(&RunFrameMetrics) -> f32,
782) -> Option<usize> {
783    frame_metrics
784        .iter()
785        .enumerate()
786        .skip(start)
787        .find_map(|(index, frame)| (metric(frame) >= threshold).then_some(index))
788}
789
790fn percentile(values: &[f32], quantile: f32) -> f32 {
791    if values.is_empty() {
792        return 0.0;
793    }
794    let mut sorted = values.to_vec();
795    sorted.sort_by(|left, right| left.total_cmp(right));
796    let index = ((sorted.len() - 1) as f32 * quantile.clamp(0.0, 1.0)).round() as usize;
797    sorted[index]
798}
799
800fn temporal_variance_non_roi(
801    sequence: &SceneSequence,
802    resolved_frames: &[ImageFrame],
803    non_roi_mask: &[bool],
804) -> f32 {
805    let width = sequence.config.width;
806    let height = sequence.config.height;
807    let frame_count = resolved_frames.len().max(1) as f32;
808    let mut total_variance = 0.0f32;
809    let mut pixel_count = 0usize;
810
811    for y in 0..height {
812        for x in 0..width {
813            let index = y * width + x;
814            if !non_roi_mask[index] {
815                continue;
816            }
817            let mean = resolved_frames
818                .iter()
819                .map(|frame| frame.get(x, y).luma())
820                .sum::<f32>()
821                / frame_count;
822            let variance = resolved_frames
823                .iter()
824                .map(|frame| {
825                    let diff = frame.get(x, y).luma() - mean;
826                    diff * diff
827                })
828                .sum::<f32>()
829                / frame_count;
830            total_variance += variance;
831            pixel_count += 1;
832        }
833    }
834
835    if pixel_count == 0 {
836        0.0
837    } else {
838        total_variance / pixel_count as f32
839    }
840}
841
842fn temporal_scalar_delta_mean(fields: &[ScalarField]) -> f32 {
843    if fields.len() < 2 {
844        return 0.0;
845    }
846    let mut total = 0.0;
847    for window in fields.windows(2) {
848        total += mean_abs_scalar_delta(&window[0], &window[1]);
849    }
850    total / (fields.len() - 1) as f32
851}
852
853fn mean_abs_scalar_delta(left: &ScalarField, right: &ScalarField) -> f32 {
854    if left.values().is_empty() || right.values().is_empty() {
855        return 0.0;
856    }
857    left.values()
858        .iter()
859        .zip(right.values().iter())
860        .map(|(left, right)| (left - right).abs())
861        .sum::<f32>()
862        / left.values().len().min(right.values().len()).max(1) as f32
863}
864
865fn frame_spearman_correlation(
866    trust_frames: &[ScalarField],
867    frame_metrics: &[RunFrameMetrics],
868    onset: usize,
869) -> f32 {
870    let trust_values = trust_frames
871        .iter()
872        .skip(onset)
873        .map(|field| field.mean())
874        .collect::<Vec<_>>();
875    let error_values = frame_metrics
876        .iter()
877        .skip(onset)
878        .map(|frame| frame.roi_mae)
879        .collect::<Vec<_>>();
880    spearman(&trust_values, &error_values)
881}
882
883fn histogram_bins(field: &ScalarField, bin_count: usize) -> Vec<HistogramBin> {
884    let safe_bin_count = bin_count.max(1);
885    let mut counts = vec![0usize; safe_bin_count];
886    for value in field.values().iter().copied() {
887        let index = ((value.clamp(0.0, 1.0) * safe_bin_count as f32).floor() as usize)
888            .min(safe_bin_count - 1);
889        counts[index] += 1;
890    }
891    counts
892        .into_iter()
893        .enumerate()
894        .map(|(index, sample_count)| HistogramBin {
895            lower: index as f32 / safe_bin_count as f32,
896            upper: (index + 1) as f32 / safe_bin_count as f32,
897            sample_count,
898        })
899        .collect()
900}
901
902fn histogram_entropy_bits(histogram: &[HistogramBin]) -> Option<f32> {
903    let total = histogram.iter().map(|bin| bin.sample_count).sum::<usize>();
904    if total == 0 {
905        return None;
906    }
907    let total_f = total as f32;
908    Some(
909        histogram
910            .iter()
911            .filter(|bin| bin.sample_count > 0)
912            .map(|bin| {
913                let p = bin.sample_count as f32 / total_f;
914                -p * p.log2()
915            })
916            .sum::<f32>(),
917    )
918}
919
920fn trust_discreteness_score(bin_count: usize, entropy_bits: f32) -> f32 {
921    let max_entropy = (bin_count.max(2) as f32).log2().max(f32::EPSILON);
922    (1.0 - entropy_bits / max_entropy).clamp(0.0, 1.0)
923}
924
925fn trust_rank_correlation_is_degenerate(
926    trust_frames: &[ScalarField],
927    onset: usize,
928    occupied_bin_count: usize,
929    entropy_bits: Option<f32>,
930) -> bool {
931    let Some(onset_field) = trust_frames.get(onset) else {
932        return true;
933    };
934    let (min_value, max_value) = onset_field.values().iter().copied().fold(
935        (f32::INFINITY, f32::NEG_INFINITY),
936        |(min_value, max_value), value| (min_value.min(value), max_value.max(value)),
937    );
938    let entropy_low = entropy_bits.unwrap_or(0.0) < 1.0;
939    let near_flat = (max_value - min_value).abs() < 0.15;
940    let too_few_post_onset_frames = trust_frames.len().saturating_sub(onset) < 4;
941    occupied_bin_count < 4 || entropy_low || near_flat || too_few_post_onset_frames
942}
943
944fn classify_trust_operating_mode(
945    occupied_bin_count: usize,
946    entropy_bits: Option<f32>,
947    discreteness_score: Option<f32>,
948) -> Option<TrustOperatingMode> {
949    let entropy_bits = entropy_bits?;
950    let discreteness_score = discreteness_score?;
951    Some(
952        if occupied_bin_count <= 3 || discreteness_score >= 0.72 || entropy_bits < 1.0 {
953            TrustOperatingMode::NearBinaryGate
954        } else if occupied_bin_count <= 5 || discreteness_score >= 0.42 || entropy_bits < 1.8 {
955            TrustOperatingMode::WeaklyGraded
956        } else {
957            TrustOperatingMode::StronglyGraded
958        },
959    )
960}
961
962fn calibration_bins(
963    trust: &ScalarField,
964    resolved: &ImageFrame,
965    ground_truth: &ImageFrame,
966) -> Vec<CalibrationBin> {
967    let mut bins = vec![
968        (0.0f32, 0.2f32, 0usize, 0.0f32, 0.0f32),
969        (0.2, 0.4, 0, 0.0, 0.0),
970        (0.4, 0.6, 0, 0.0, 0.0),
971        (0.6, 0.8, 0, 0.0, 0.0),
972        (0.8, 1.01, 0, 0.0, 0.0),
973    ];
974    for y in 0..trust.height() {
975        for x in 0..trust.width() {
976            let trust_value = trust.get(x, y);
977            let error_value = resolved.get(x, y).abs_diff(ground_truth.get(x, y));
978            for bin in &mut bins {
979                if trust_value >= bin.0 && trust_value < bin.1 {
980                    bin.2 += 1;
981                    bin.3 += trust_value;
982                    bin.4 += error_value;
983                    break;
984                }
985            }
986        }
987    }
988
989    bins.into_iter()
990        .map(
991            |(lower, upper, sample_count, trust_sum, error_sum)| CalibrationBin {
992                lower,
993                upper: upper.min(1.0),
994                sample_count,
995                mean_trust: if sample_count == 0 {
996                    0.0
997                } else {
998                    trust_sum / sample_count as f32
999                },
1000                mean_error: if sample_count == 0 {
1001                    0.0
1002                } else {
1003                    error_sum / sample_count as f32
1004                },
1005            },
1006        )
1007        .collect()
1008}
1009
1010fn spearman(left: &[f32], right: &[f32]) -> f32 {
1011    if left.len() != right.len() || left.is_empty() {
1012        return 0.0;
1013    }
1014    let left_ranks = ranks(left);
1015    let right_ranks = ranks(right);
1016    pearson(&left_ranks, &right_ranks)
1017}
1018
1019fn ranks(values: &[f32]) -> Vec<f32> {
1020    let mut indexed = values.iter().copied().enumerate().collect::<Vec<_>>();
1021    indexed.sort_by(|left, right| left.1.total_cmp(&right.1));
1022    let mut result = vec![0.0; values.len()];
1023    for (rank, (index, _)) in indexed.into_iter().enumerate() {
1024        result[index] = rank as f32;
1025    }
1026    result
1027}
1028
1029fn pearson(left: &[f32], right: &[f32]) -> f32 {
1030    let n = left.len().max(1) as f32;
1031    let mean_left = left.iter().sum::<f32>() / n;
1032    let mean_right = right.iter().sum::<f32>() / n;
1033    let mut numerator = 0.0;
1034    let mut denom_left = 0.0;
1035    let mut denom_right = 0.0;
1036    for (l, r) in left.iter().copied().zip(right.iter().copied()) {
1037        let dl = l - mean_left;
1038        let dr = r - mean_right;
1039        numerator += dl * dr;
1040        denom_left += dl * dl;
1041        denom_right += dr * dr;
1042    }
1043    let denom = (denom_left * denom_right).sqrt().max(f32::EPSILON);
1044    numerator / denom
1045}
1046
1047fn count_field_above(field: &ScalarField, threshold: f32) -> usize {
1048    field
1049        .values()
1050        .iter()
1051        .filter(|value| **value >= threshold)
1052        .count()
1053}
1054
1055fn mean_field_over_mask_range(fields: &[ScalarField], mask: &[bool], start: usize) -> f32 {
1056    if fields.is_empty() || start >= fields.len() {
1057        return 0.0;
1058    }
1059    fields
1060        .iter()
1061        .skip(start)
1062        .map(|field| field.mean_over_mask(mask))
1063        .sum::<f32>()
1064        / fields.len().saturating_sub(start).max(1) as f32
1065}
1066
1067fn fraction_field_values_below_range(
1068    fields: &[ScalarField],
1069    mask: &[bool],
1070    start: usize,
1071    threshold: f32,
1072) -> f32 {
1073    if fields.is_empty() || start >= fields.len() {
1074        return 0.0;
1075    }
1076    let mut total = 0usize;
1077    let mut hits = 0usize;
1078    for field in fields.iter().skip(start) {
1079        for (value, include) in field.values().iter().zip(mask.iter().copied()) {
1080            if !include {
1081                continue;
1082            }
1083            total += 1;
1084            if *value <= threshold {
1085                hits += 1;
1086            }
1087        }
1088    }
1089    if total == 0 {
1090        0.0
1091    } else {
1092        hits as f32 / total as f32
1093    }
1094}
1095
1096fn ratio_or_identity(numerator: f32, denominator: f32) -> f32 {
1097    if denominator.abs() <= f32::EPSILON {
1098        if numerator.abs() <= f32::EPSILON {
1099            1.0
1100        } else {
1101            numerator
1102        }
1103    } else {
1104        numerator / denominator
1105    }
1106}
1107
1108fn aggregate_leaderboard(scenarios: &[ScenarioReport]) -> Vec<AggregateRunScore> {
1109    let mut entries = std::collections::BTreeMap::<String, AggregateRunScore>::new();
1110    for scenario in scenarios {
1111        let mut ranked = scenario
1112            .runs
1113            .iter()
1114            .map(|run| {
1115                let score = match scenario.expectation {
1116                    ScenarioExpectation::BenefitExpected => run.summary.cumulative_roi_mae,
1117                    ScenarioExpectation::NeutralExpected => {
1118                        run.summary.average_non_roi_mae
1119                            + 0.5 * run.summary.false_positive_response_rate
1120                    }
1121                };
1122                (score, run)
1123            })
1124            .collect::<Vec<_>>();
1125        ranked.sort_by(|left, right| left.0.total_cmp(&right.0));
1126
1127        for (rank, (_, run)) in ranked.into_iter().enumerate() {
1128            let entry = entries
1129                .entry(run.summary.run_id.clone())
1130                .or_insert_with(|| AggregateRunScore {
1131                    run_id: run.summary.run_id.clone(),
1132                    label: run.summary.label.clone(),
1133                    category: run.summary.category.clone(),
1134                    mean_rank: 0.0,
1135                    mean_cumulative_roi_mae: 0.0,
1136                    mean_non_roi_mae: 0.0,
1137                    mean_false_positive_response_rate: 0.0,
1138                    benefit_scenarios_won: 0,
1139                });
1140            entry.mean_rank += rank as f32;
1141            entry.mean_cumulative_roi_mae += run.summary.cumulative_roi_mae;
1142            entry.mean_non_roi_mae += run.summary.average_non_roi_mae;
1143            entry.mean_false_positive_response_rate += run.summary.false_positive_response_rate;
1144            if rank == 0 && matches!(scenario.expectation, ScenarioExpectation::BenefitExpected) {
1145                entry.benefit_scenarios_won += 1;
1146            }
1147        }
1148    }
1149
1150    let scenario_count = scenarios.len().max(1) as f32;
1151    let mut values = entries
1152        .into_values()
1153        .map(|mut entry| {
1154            entry.mean_rank /= scenario_count;
1155            entry.mean_cumulative_roi_mae /= scenario_count;
1156            entry.mean_non_roi_mae /= scenario_count;
1157            entry.mean_false_positive_response_rate /= scenario_count;
1158            entry
1159        })
1160        .collect::<Vec<_>>();
1161    values.sort_by(|left, right| left.mean_rank.total_cmp(&right.mean_rank));
1162    values
1163}
1164
1165fn find_run<'a>(scenario: &'a ScenarioReport, run_id: &str) -> Result<&'a RunSummary> {
1166    scenario
1167        .runs
1168        .iter()
1169        .find(|run| run.summary.run_id == run_id)
1170        .map(|run| &run.summary)
1171        .ok_or_else(|| Error::Message(format!("run {run_id} missing from scenario report")))
1172}