1use serde::Serialize;
2
3use crate::error::{Error, Result};
4use crate::frame::{mean_abs_error, mean_abs_error_over_mask, ImageFrame, ScalarField};
5use crate::scene::{ScenarioExpectation, ScenarioSupportCategory, SceneSequence};
6
7const LOW_RESPONSE_THRESHOLD: f32 = 0.50;
8
9#[derive(Clone, Debug)]
10pub struct RunAnalysisInput<'a> {
11 pub id: &'a str,
12 pub label: &'a str,
13 pub category: &'a str,
14 pub resolved_frames: &'a [ImageFrame],
15 pub reprojected_history_frames: &'a [ImageFrame],
16 pub alpha_frames: &'a [ScalarField],
17 pub response_frames: &'a [ScalarField],
18 pub trust_frames: Option<&'a [ScalarField]>,
19}
20
21#[derive(Clone, Debug, Serialize)]
22pub struct CalibrationBin {
23 pub lower: f32,
24 pub upper: f32,
25 pub sample_count: usize,
26 pub mean_trust: f32,
27 pub mean_error: f32,
28}
29
30#[derive(Clone, Debug, Serialize)]
31pub struct HistogramBin {
32 pub lower: f32,
33 pub upper: f32,
34 pub sample_count: usize,
35}
36
37#[derive(Clone, Copy, Debug, Serialize)]
38pub enum TrustOperatingMode {
39 NearBinaryGate,
40 WeaklyGraded,
41 StronglyGraded,
42}
43
44#[derive(Clone, Debug, Serialize)]
45pub struct RunFrameMetrics {
46 pub frame_index: usize,
47 pub overall_mae: f32,
48 pub overall_rmse: f32,
49 pub roi_mae: f32,
50 pub roi_rmse: f32,
51 pub non_roi_mae: f32,
52 pub non_roi_rmse: f32,
53 pub alpha_mean: f32,
54 pub alpha_roi_mean: f32,
55 pub alpha_non_roi_mean: f32,
56 pub response_mean: f32,
57 pub response_roi_mean: f32,
58 pub response_non_roi_mean: f32,
59 pub trust_mean: Option<f32>,
60 pub trust_roi_mean: Option<f32>,
61 pub trust_non_roi_mean: Option<f32>,
62}
63
64#[derive(Clone, Debug, Serialize)]
65pub struct RunSummary {
66 pub run_id: String,
67 pub label: String,
68 pub category: String,
69 pub peak_roi_mae: f32,
70 pub peak_roi_mae_frame: usize,
71 pub cumulative_roi_mae: f32,
72 pub cumulative_non_roi_mae: f32,
73 pub average_overall_mae: f32,
74 pub average_overall_rmse: f32,
75 pub average_roi_mae: f32,
76 pub average_non_roi_mae: f32,
77 pub average_non_roi_rmse: f32,
78 pub ghost_persistence_frames: usize,
79 pub onset_response_latency_frames: Option<usize>,
80 pub false_positive_response_rate: f32,
81 pub intervention_sparsity: f32,
82 pub mean_alpha: f32,
83 pub onset_alpha_p90: f32,
84 pub onset_alpha_max: f32,
85 pub alpha_temporal_delta_mean: f32,
86 pub response_temporal_delta_mean: f32,
87 pub temporal_variance_non_roi: f32,
88 pub trust_error_rank_correlation: Option<f32>,
89 pub trust_rank_correlation_is_degenerate: bool,
90 pub trust_calibration_bins: Vec<CalibrationBin>,
91 pub trust_histogram: Vec<HistogramBin>,
92 pub trust_occupied_bin_count: usize,
93 pub trust_entropy_bits: Option<f32>,
94 pub trust_discreteness_score: Option<f32>,
95 pub trust_effective_level_count: Option<usize>,
96 pub trust_operating_mode: Option<TrustOperatingMode>,
97}
98
99#[derive(Clone, Debug, Serialize)]
100pub struct ScenarioRunReport {
101 pub summary: RunSummary,
102 pub frame_metrics: Vec<RunFrameMetrics>,
103}
104
105#[derive(Clone, Debug, Serialize)]
106pub struct ScenarioReport {
107 pub scenario_id: String,
108 pub scenario_title: String,
109 pub scenario_description: String,
110 pub expectation: ScenarioExpectation,
111 pub support_category: ScenarioSupportCategory,
112 pub roi_note: String,
113 pub sampling_taxonomy: String,
114 pub realism_stress: bool,
115 pub competitive_baseline_case: bool,
116 pub bounded_loss_disclosure: bool,
117 pub demo_b_taxonomy: String,
118 pub target_label: String,
119 pub onset_frame: usize,
120 pub target_pixels: usize,
121 pub target_area_fraction: f32,
122 pub persistence_threshold: f32,
123 pub runs: Vec<ScenarioRunReport>,
124 pub headline: String,
125 pub bounded_or_neutral_note: String,
126 pub host_realistic_vs_fixed_alpha_cumulative_roi_gain: f32,
127 pub host_realistic_vs_strong_heuristic_cumulative_roi_gain: f32,
128 pub host_realistic_non_roi_penalty_vs_fixed_alpha: f32,
129 pub host_realistic_non_roi_penalty_vs_strong_heuristic: f32,
130 pub host_realistic_non_roi_penalty_ratio_vs_strong_heuristic: f32,
131 pub neighborhood_clamp_roi_trigger_mean: f32,
132 pub neighborhood_clamp_roi_silent_fraction: f32,
133 pub neighborhood_clamp_history_inside_hull_fraction: f32,
134}
135
136#[derive(Clone, Debug, Serialize)]
137pub struct AblationEntry {
138 pub run_id: String,
139 pub label: String,
140 pub canonical_cumulative_roi_mae: f32,
141 pub canonical_peak_roi_mae: f32,
142 pub suite_mean_cumulative_roi_mae: f32,
143 pub suite_mean_false_positive_response_rate: f32,
144}
145
146#[derive(Clone, Debug, Serialize)]
147pub struct AggregateRunScore {
148 pub run_id: String,
149 pub label: String,
150 pub category: String,
151 pub mean_rank: f32,
152 pub mean_cumulative_roi_mae: f32,
153 pub mean_non_roi_mae: f32,
154 pub mean_false_positive_response_rate: f32,
155 pub benefit_scenarios_won: usize,
156}
157
158#[derive(Clone, Debug, Serialize)]
159pub struct DemoASuiteSummary {
160 pub canonical_scenario_id: String,
161 pub scenario_ids: Vec<String>,
162 pub point_roi_scenarios: Vec<String>,
163 pub region_roi_scenarios: Vec<String>,
164 pub negative_control_scenarios: Vec<String>,
165 pub baseline_ids: Vec<String>,
166 pub dsfb_ids: Vec<String>,
167 pub ablation_ids: Vec<String>,
168 pub primary_behavioral_result: String,
169 pub secondary_behavioral_result: String,
170 pub host_realistic_beats_fixed_alpha_scenarios: usize,
171 pub host_realistic_beats_strong_heuristic_scenarios: usize,
172 pub mixed_or_neutral_scenarios: Vec<String>,
173 pub remaining_blockers: Vec<String>,
174}
175
176#[derive(Clone, Debug, Serialize)]
177pub struct DemoASuiteMetrics {
178 pub summary: DemoASuiteSummary,
179 pub scenarios: Vec<ScenarioReport>,
180 pub ablations: Vec<AblationEntry>,
181 pub aggregate_leaderboard: Vec<AggregateRunScore>,
182}
183
184pub fn analyze_demo_a_suite(
185 scenario_runs: &[(SceneSequence, Vec<RunAnalysisInput<'_>>)],
186) -> Result<DemoASuiteMetrics> {
187 if scenario_runs.is_empty() {
188 return Err(Error::Message(
189 "Demo A suite analysis requires at least one scenario".to_string(),
190 ));
191 }
192
193 let mut scenarios = Vec::with_capacity(scenario_runs.len());
194 for (sequence, runs) in scenario_runs {
195 scenarios.push(analyze_scenario(sequence, runs)?);
196 }
197
198 let canonical = &scenarios[0];
199 let fixed_alpha = find_run(canonical, "fixed_alpha")?;
200 let strong_heuristic = find_run(canonical, "strong_heuristic")?;
201 let host_realistic = find_run(canonical, "dsfb_host_realistic")?;
202
203 let primary_behavioral_result = format!(
204 "On the canonical scenario, host-realistic DSFB reduced cumulative ROI MAE from {:.5} for fixed alpha to {:.5}.",
205 fixed_alpha.cumulative_roi_mae, host_realistic.cumulative_roi_mae
206 );
207 let secondary_behavioral_result = format!(
208 "Against the strong heuristic baseline, host-realistic DSFB changed cumulative ROI MAE from {:.5} to {:.5}; mixed outcomes are surfaced per scenario below.",
209 strong_heuristic.cumulative_roi_mae, host_realistic.cumulative_roi_mae
210 );
211
212 let host_realistic_beats_fixed_alpha_scenarios = scenarios
213 .iter()
214 .filter(|scenario| {
215 let fixed = scenario
216 .runs
217 .iter()
218 .find(|run| run.summary.run_id == "fixed_alpha");
219 let host = scenario
220 .runs
221 .iter()
222 .find(|run| run.summary.run_id == "dsfb_host_realistic");
223 match (fixed, host) {
224 (Some(fixed), Some(host)) => {
225 host.summary.cumulative_roi_mae + 1e-6 < fixed.summary.cumulative_roi_mae
226 }
227 _ => false,
228 }
229 })
230 .count();
231 let host_realistic_beats_strong_heuristic_scenarios = scenarios
232 .iter()
233 .filter(|scenario| {
234 let heuristic = scenario
235 .runs
236 .iter()
237 .find(|run| run.summary.run_id == "strong_heuristic");
238 let host = scenario
239 .runs
240 .iter()
241 .find(|run| run.summary.run_id == "dsfb_host_realistic");
242 match (heuristic, host) {
243 (Some(heuristic), Some(host)) => {
244 host.summary.cumulative_roi_mae + 1e-6 < heuristic.summary.cumulative_roi_mae
245 }
246 _ => false,
247 }
248 })
249 .count();
250
251 let mixed_or_neutral_scenarios = scenarios
252 .iter()
253 .filter(|scenario| {
254 matches!(scenario.expectation, ScenarioExpectation::NeutralExpected)
255 || scenario.host_realistic_vs_strong_heuristic_cumulative_roi_gain <= 0.0
256 })
257 .map(|scenario| scenario.scenario_id.clone())
258 .collect::<Vec<_>>();
259
260 let baseline_ids = vec![
261 "fixed_alpha".to_string(),
262 "residual_threshold".to_string(),
263 "neighborhood_clamp".to_string(),
264 "depth_normal_reject".to_string(),
265 "reactive_mask".to_string(),
266 "strong_heuristic".to_string(),
267 ];
268 let dsfb_ids = vec![
269 "dsfb_synthetic_visibility".to_string(),
270 "dsfb_host_realistic".to_string(),
271 ];
272 let ablation_ids = vec![
273 "dsfb_synthetic_visibility".to_string(),
274 "dsfb_host_realistic".to_string(),
275 "dsfb_host_gated_reference".to_string(),
276 "dsfb_motion_augmented".to_string(),
277 "dsfb_no_visibility".to_string(),
278 "dsfb_no_thin".to_string(),
279 "dsfb_no_motion_edge".to_string(),
280 "dsfb_no_grammar".to_string(),
281 "dsfb_residual_only".to_string(),
282 "dsfb_trust_no_alpha".to_string(),
283 ];
284
285 let ablations = ablation_ids
286 .iter()
287 .filter_map(|run_id| {
288 let canonical_run = canonical
289 .runs
290 .iter()
291 .find(|run| run.summary.run_id == *run_id)?;
292 let suite_matches = scenarios
293 .iter()
294 .filter_map(|scenario| {
295 scenario
296 .runs
297 .iter()
298 .find(|run| run.summary.run_id == *run_id)
299 .map(|run| &run.summary)
300 })
301 .collect::<Vec<_>>();
302 let suite_count = suite_matches.len().max(1) as f32;
303 Some(AblationEntry {
304 run_id: (*run_id).clone(),
305 label: canonical_run.summary.label.clone(),
306 canonical_cumulative_roi_mae: canonical_run.summary.cumulative_roi_mae,
307 canonical_peak_roi_mae: canonical_run.summary.peak_roi_mae,
308 suite_mean_cumulative_roi_mae: suite_matches
309 .iter()
310 .map(|summary| summary.cumulative_roi_mae)
311 .sum::<f32>()
312 / suite_count,
313 suite_mean_false_positive_response_rate: suite_matches
314 .iter()
315 .map(|summary| summary.false_positive_response_rate)
316 .sum::<f32>()
317 / suite_count,
318 })
319 })
320 .collect::<Vec<_>>();
321
322 let aggregate_leaderboard = aggregate_leaderboard(&scenarios);
323 let point_roi_scenarios = scenarios
324 .iter()
325 .filter(|scenario| {
326 matches!(
327 scenario.support_category,
328 ScenarioSupportCategory::PointLikeRoi
329 )
330 })
331 .map(|scenario| scenario.scenario_id.clone())
332 .collect::<Vec<_>>();
333 let region_roi_scenarios = scenarios
334 .iter()
335 .filter(|scenario| {
336 matches!(
337 scenario.support_category,
338 ScenarioSupportCategory::RegionRoi
339 )
340 })
341 .map(|scenario| scenario.scenario_id.clone())
342 .collect::<Vec<_>>();
343 let negative_control_scenarios = scenarios
344 .iter()
345 .filter(|scenario| {
346 matches!(
347 scenario.support_category,
348 ScenarioSupportCategory::NegativeControl
349 )
350 })
351 .map(|scenario| scenario.scenario_id.clone())
352 .collect::<Vec<_>>();
353 let remaining_blockers = vec![
354 "The scenario suite is still synthetic and does not prove production-scene generalization."
355 .to_string(),
356 "The strong heuristic baseline remains competitive on some cases, so the crate supports evaluation diligence rather than blanket win claims."
357 .to_string(),
358 "Cost accounting is architectural and CPU-side within the crate; it is not a measured GPU benchmark."
359 .to_string(),
360 "Point-like ROI scenarios remain mechanically useful but statistically weak, so aggregate claims must stay separated from region-ROI evidence."
361 .to_string(),
362 ];
363
364 Ok(DemoASuiteMetrics {
365 summary: DemoASuiteSummary {
366 canonical_scenario_id: canonical.scenario_id.clone(),
367 scenario_ids: scenarios
368 .iter()
369 .map(|scenario| scenario.scenario_id.clone())
370 .collect(),
371 point_roi_scenarios,
372 region_roi_scenarios,
373 negative_control_scenarios,
374 baseline_ids,
375 dsfb_ids,
376 ablation_ids,
377 primary_behavioral_result,
378 secondary_behavioral_result,
379 host_realistic_beats_fixed_alpha_scenarios,
380 host_realistic_beats_strong_heuristic_scenarios,
381 mixed_or_neutral_scenarios,
382 remaining_blockers,
383 },
384 scenarios,
385 ablations,
386 aggregate_leaderboard,
387 })
388}
389
390fn analyze_scenario(
391 sequence: &SceneSequence,
392 runs: &[RunAnalysisInput<'_>],
393) -> Result<ScenarioReport> {
394 if runs.is_empty() {
395 return Err(Error::Message(format!(
396 "scenario {} had no runs to analyze",
397 sequence.scenario_id.as_str()
398 )));
399 }
400
401 let non_roi_mask = invert_mask(&sequence.target_mask);
402 let threshold = persistence_threshold(sequence);
403 let mut reports = Vec::with_capacity(runs.len());
404 for run in runs {
405 reports.push(analyze_run(
406 sequence,
407 &sequence.target_mask,
408 &non_roi_mask,
409 threshold,
410 run,
411 ));
412 }
413
414 let fixed_alpha = reports
415 .iter()
416 .find(|run| run.summary.run_id == "fixed_alpha")
417 .ok_or_else(|| Error::Message("fixed_alpha run missing from scenario".to_string()))?;
418 let strong_heuristic = reports
419 .iter()
420 .find(|run| run.summary.run_id == "strong_heuristic")
421 .ok_or_else(|| Error::Message("strong_heuristic run missing from scenario".to_string()))?;
422 let host_realistic = reports
423 .iter()
424 .find(|run| run.summary.run_id == "dsfb_host_realistic")
425 .ok_or_else(|| {
426 Error::Message("dsfb_host_realistic run missing from scenario".to_string())
427 })?;
428
429 let headline = match sequence.expectation {
430 ScenarioExpectation::BenefitExpected => format!(
431 "{}: host-realistic DSFB changed cumulative ROI MAE from {:.5} (fixed alpha) and {:.5} (strong heuristic) to {:.5}.",
432 sequence.scenario_title,
433 fixed_alpha.summary.cumulative_roi_mae,
434 strong_heuristic.summary.cumulative_roi_mae,
435 host_realistic.summary.cumulative_roi_mae
436 ),
437 ScenarioExpectation::NeutralExpected => format!(
438 "{}: neutral holdout with host-realistic non-ROI MAE {:.5} versus {:.5} for fixed alpha.",
439 sequence.scenario_title,
440 host_realistic.summary.average_non_roi_mae,
441 fixed_alpha.summary.average_non_roi_mae
442 ),
443 };
444 let bounded_or_neutral_note = match sequence.expectation {
445 ScenarioExpectation::BenefitExpected => {
446 if host_realistic.summary.cumulative_roi_mae
447 > strong_heuristic.summary.cumulative_roi_mae
448 {
449 "Strong heuristic remains better on this scenario; the report surfaces that rather than hiding it."
450 .to_string()
451 } else {
452 "Host-realistic DSFB remains competitive without privileged visibility hints on this scenario."
453 .to_string()
454 }
455 }
456 ScenarioExpectation::NeutralExpected => {
457 "This is the honesty scenario: aggressive trust collapse is not expected to help, so false-positive response and non-ROI stability are the main evaluation criteria."
458 .to_string()
459 }
460 };
461
462 Ok(ScenarioReport {
463 scenario_id: sequence.scenario_id.as_str().to_string(),
464 scenario_title: sequence.scenario_title.clone(),
465 scenario_description: sequence.scenario_description.clone(),
466 expectation: sequence.expectation,
467 support_category: sequence.support_category,
468 roi_note: sequence.roi_note.clone(),
469 sampling_taxonomy: sequence.sampling_taxonomy.clone(),
470 realism_stress: sequence.realism_stress,
471 competitive_baseline_case: sequence.competitive_baseline_case,
472 bounded_loss_disclosure: sequence.bounded_loss_disclosure,
473 demo_b_taxonomy: sequence.demo_b_taxonomy.clone(),
474 target_label: sequence.target_label.clone(),
475 onset_frame: sequence.onset_frame,
476 target_pixels: sequence.target_mask.iter().filter(|value| **value).count(),
477 target_area_fraction: sequence.target_mask.iter().filter(|value| **value).count() as f32
478 / (sequence.config.width * sequence.config.height).max(1) as f32,
479 persistence_threshold: threshold,
480 headline,
481 bounded_or_neutral_note,
482 host_realistic_vs_fixed_alpha_cumulative_roi_gain: fixed_alpha.summary.cumulative_roi_mae
483 - host_realistic.summary.cumulative_roi_mae,
484 host_realistic_vs_strong_heuristic_cumulative_roi_gain: strong_heuristic
485 .summary
486 .cumulative_roi_mae
487 - host_realistic.summary.cumulative_roi_mae,
488 host_realistic_non_roi_penalty_vs_fixed_alpha: host_realistic.summary.average_non_roi_mae
489 - fixed_alpha.summary.average_non_roi_mae,
490 host_realistic_non_roi_penalty_vs_strong_heuristic: host_realistic
491 .summary
492 .average_non_roi_mae
493 - strong_heuristic.summary.average_non_roi_mae,
494 host_realistic_non_roi_penalty_ratio_vs_strong_heuristic: ratio_or_identity(
495 host_realistic.summary.average_non_roi_mae,
496 strong_heuristic.summary.average_non_roi_mae,
497 ),
498 neighborhood_clamp_roi_trigger_mean: runs
499 .iter()
500 .find(|run| run.id == "neighborhood_clamp")
501 .map(|run| {
502 mean_field_over_mask_range(
503 run.response_frames,
504 &sequence.target_mask,
505 sequence.onset_frame,
506 )
507 })
508 .unwrap_or(0.0),
509 neighborhood_clamp_roi_silent_fraction: runs
510 .iter()
511 .find(|run| run.id == "neighborhood_clamp")
512 .map(|run| {
513 fraction_field_values_below_range(
514 run.response_frames,
515 &sequence.target_mask,
516 sequence.onset_frame,
517 LOW_RESPONSE_THRESHOLD,
518 )
519 })
520 .unwrap_or(0.0),
521 neighborhood_clamp_history_inside_hull_fraction: runs
522 .iter()
523 .find(|run| run.id == "neighborhood_clamp")
524 .map(|run| {
525 fraction_field_values_below_range(
526 run.response_frames,
527 &sequence.target_mask,
528 sequence.onset_frame,
529 1.0e-4,
530 )
531 })
532 .unwrap_or(0.0),
533 runs: reports,
534 })
535}
536
537fn analyze_run(
538 sequence: &SceneSequence,
539 target_mask: &[bool],
540 non_roi_mask: &[bool],
541 threshold: f32,
542 run: &RunAnalysisInput<'_>,
543) -> ScenarioRunReport {
544 let onset = sequence
545 .onset_frame
546 .min(sequence.frames.len().saturating_sub(1));
547 let mut frame_metrics = Vec::with_capacity(sequence.frames.len());
548 let mut cumulative_roi_mae = 0.0;
549 let mut cumulative_non_roi_mae = 0.0;
550 let mut average_overall_mae = 0.0;
551 let mut average_overall_rmse = 0.0;
552 let mut average_roi_mae = 0.0;
553 let mut average_non_roi_mae = 0.0;
554 let mut average_non_roi_rmse = 0.0;
555 let mut peak_roi_mae = f32::NEG_INFINITY;
556 let mut peak_roi_mae_frame = onset;
557 let mut response_pixels = 0usize;
558 let total_pixels = sequence.frames.len() * sequence.config.width * sequence.config.height;
559
560 for frame_index in 0..sequence.frames.len() {
561 let gt = &sequence.frames[frame_index].ground_truth;
562 let resolved = &run.resolved_frames[frame_index];
563 let alpha = &run.alpha_frames[frame_index];
564 let response = &run.response_frames[frame_index];
565 let trust = run.trust_frames.map(|fields| &fields[frame_index]);
566
567 let overall_mae = mean_abs_error(resolved, gt);
568 let overall_rmse = rmse(resolved, gt, None);
569 let roi_mae = mean_abs_error_over_mask(resolved, gt, target_mask);
570 let roi_rmse = rmse(resolved, gt, Some(target_mask));
571 let non_roi_mae = mean_abs_error_over_mask(resolved, gt, non_roi_mask);
572 let non_roi_rmse = rmse(resolved, gt, Some(non_roi_mask));
573 let alpha_mean = alpha.mean();
574 let alpha_roi_mean = alpha.mean_over_mask(target_mask);
575 let alpha_non_roi_mean = alpha.mean_over_mask(non_roi_mask);
576 let response_mean = response.mean();
577 let response_roi_mean = response.mean_over_mask(target_mask);
578 let response_non_roi_mean = response.mean_over_mask(non_roi_mask);
579 let trust_mean = trust.map(ScalarField::mean);
580 let trust_roi_mean = trust.map(|field| field.mean_over_mask(target_mask));
581 let trust_non_roi_mean = trust.map(|field| field.mean_over_mask(non_roi_mask));
582
583 average_overall_mae += overall_mae;
584 average_overall_rmse += overall_rmse;
585 average_roi_mae += roi_mae;
586 average_non_roi_mae += non_roi_mae;
587 average_non_roi_rmse += non_roi_rmse;
588 cumulative_roi_mae += roi_mae;
589 cumulative_non_roi_mae += non_roi_mae;
590 response_pixels += count_field_above(response, LOW_RESPONSE_THRESHOLD);
591
592 if roi_mae > peak_roi_mae {
593 peak_roi_mae = roi_mae;
594 peak_roi_mae_frame = frame_index;
595 }
596
597 frame_metrics.push(RunFrameMetrics {
598 frame_index,
599 overall_mae,
600 overall_rmse,
601 roi_mae,
602 roi_rmse,
603 non_roi_mae,
604 non_roi_rmse,
605 alpha_mean,
606 alpha_roi_mean,
607 alpha_non_roi_mean,
608 response_mean,
609 response_roi_mean,
610 response_non_roi_mean,
611 trust_mean,
612 trust_roi_mean,
613 trust_non_roi_mean,
614 });
615 }
616
617 let frame_count = sequence.frames.len().max(1) as f32;
618 let ghost_persistence_frames =
619 compute_ghost_persistence(&frame_metrics, onset, threshold, |frame| frame.roi_mae);
620 let onset_response_latency_frames =
621 first_frame_at_or_above(&frame_metrics, onset, LOW_RESPONSE_THRESHOLD, |frame| {
622 frame.response_roi_mean
623 })
624 .map(|frame| frame.saturating_sub(onset));
625 let false_positive_response_rate = frame_metrics
626 .iter()
627 .skip(onset)
628 .map(|frame| frame.response_non_roi_mean)
629 .sum::<f32>()
630 / (frame_metrics.len().saturating_sub(onset).max(1) as f32);
631 let intervention_sparsity = response_pixels as f32 / total_pixels.max(1) as f32;
632 let onset_alpha_values = run.alpha_frames[onset].values().to_vec();
633 let onset_alpha_p90 = percentile(&onset_alpha_values, 0.90);
634 let onset_alpha_max = onset_alpha_values.iter().copied().fold(0.0f32, f32::max);
635 let temporal_variance_non_roi =
636 temporal_variance_non_roi(sequence, run.resolved_frames, non_roi_mask);
637 let alpha_temporal_delta_mean = temporal_scalar_delta_mean(run.alpha_frames);
638 let response_temporal_delta_mean = temporal_scalar_delta_mean(run.response_frames);
639 let (
640 trust_error_rank_correlation,
641 trust_rank_correlation_is_degenerate,
642 trust_calibration_bins,
643 trust_histogram,
644 trust_occupied_bin_count,
645 trust_entropy_bits,
646 trust_discreteness_score,
647 trust_effective_level_count,
648 trust_operating_mode,
649 ) = if let Some(fields) = run.trust_frames {
650 let histogram = histogram_bins(&fields[onset], 10);
651 let occupied_bin_count = histogram.iter().filter(|bin| bin.sample_count > 0).count();
652 let entropy_bits = histogram_entropy_bits(&histogram);
653 let discreteness_score =
654 entropy_bits.map(|entropy| trust_discreteness_score(histogram.len(), entropy));
655 let effective_level_count = entropy_bits.map(|entropy| {
656 2.0f32
657 .powf(entropy)
658 .round()
659 .clamp(1.0, histogram.len() as f32) as usize
660 });
661 let correlation = frame_spearman_correlation(fields, &frame_metrics, onset);
662 let correlation_degenerate =
663 trust_rank_correlation_is_degenerate(fields, onset, occupied_bin_count, entropy_bits);
664 (
665 Some(correlation),
666 correlation_degenerate,
667 calibration_bins(
668 &fields[onset],
669 &run.resolved_frames[onset],
670 &sequence.frames[onset].ground_truth,
671 ),
672 histogram.clone(),
673 occupied_bin_count,
674 entropy_bits,
675 discreteness_score,
676 effective_level_count,
677 classify_trust_operating_mode(occupied_bin_count, entropy_bits, discreteness_score),
678 )
679 } else {
680 (
681 None,
682 false,
683 Vec::new(),
684 Vec::new(),
685 0,
686 None,
687 None,
688 None,
689 None,
690 )
691 };
692
693 ScenarioRunReport {
694 summary: RunSummary {
695 run_id: run.id.to_string(),
696 label: run.label.to_string(),
697 category: run.category.to_string(),
698 peak_roi_mae,
699 peak_roi_mae_frame,
700 cumulative_roi_mae,
701 cumulative_non_roi_mae,
702 average_overall_mae: average_overall_mae / frame_count,
703 average_overall_rmse: average_overall_rmse / frame_count,
704 average_roi_mae: average_roi_mae / frame_count,
705 average_non_roi_mae: average_non_roi_mae / frame_count,
706 average_non_roi_rmse: average_non_roi_rmse / frame_count,
707 ghost_persistence_frames,
708 onset_response_latency_frames,
709 false_positive_response_rate,
710 intervention_sparsity,
711 mean_alpha: run.alpha_frames.iter().map(ScalarField::mean).sum::<f32>() / frame_count,
712 onset_alpha_p90,
713 onset_alpha_max,
714 alpha_temporal_delta_mean,
715 response_temporal_delta_mean,
716 temporal_variance_non_roi,
717 trust_error_rank_correlation,
718 trust_rank_correlation_is_degenerate,
719 trust_calibration_bins,
720 trust_histogram,
721 trust_occupied_bin_count,
722 trust_entropy_bits,
723 trust_discreteness_score,
724 trust_effective_level_count,
725 trust_operating_mode,
726 },
727 frame_metrics,
728 }
729}
730
731fn persistence_threshold(sequence: &SceneSequence) -> f32 {
732 if sequence.onset_frame == 0 {
733 return 0.02;
734 }
735 let previous = &sequence.frames[sequence.onset_frame - 1].ground_truth;
736 let current = &sequence.frames[sequence.onset_frame].ground_truth;
737 (mean_abs_error_over_mask(previous, current, &sequence.target_mask) * 0.15).max(0.02)
738}
739
740fn rmse(frame_a: &ImageFrame, frame_b: &ImageFrame, mask: Option<&[bool]>) -> f32 {
741 let mut sum = 0.0;
742 let mut count = 0usize;
743 for y in 0..frame_a.height() {
744 for x in 0..frame_a.width() {
745 let index = y * frame_a.width() + x;
746 if mask.map(|values| values[index]).unwrap_or(true) {
747 let diff = frame_a.get(x, y).abs_diff(frame_b.get(x, y));
748 sum += diff * diff;
749 count += 1;
750 }
751 }
752 }
753 if count == 0 {
754 0.0
755 } else {
756 (sum / count as f32).sqrt()
757 }
758}
759
760fn invert_mask(mask: &[bool]) -> Vec<bool> {
761 mask.iter().map(|value| !value).collect()
762}
763
764fn compute_ghost_persistence(
765 frame_metrics: &[RunFrameMetrics],
766 onset: usize,
767 threshold: f32,
768 metric: impl Fn(&RunFrameMetrics) -> f32,
769) -> usize {
770 frame_metrics
771 .iter()
772 .skip(onset)
773 .filter(|frame| metric(frame) > threshold)
774 .count()
775}
776
777fn first_frame_at_or_above(
778 frame_metrics: &[RunFrameMetrics],
779 start: usize,
780 threshold: f32,
781 metric: impl Fn(&RunFrameMetrics) -> f32,
782) -> Option<usize> {
783 frame_metrics
784 .iter()
785 .enumerate()
786 .skip(start)
787 .find_map(|(index, frame)| (metric(frame) >= threshold).then_some(index))
788}
789
790fn percentile(values: &[f32], quantile: f32) -> f32 {
791 if values.is_empty() {
792 return 0.0;
793 }
794 let mut sorted = values.to_vec();
795 sorted.sort_by(|left, right| left.total_cmp(right));
796 let index = ((sorted.len() - 1) as f32 * quantile.clamp(0.0, 1.0)).round() as usize;
797 sorted[index]
798}
799
800fn temporal_variance_non_roi(
801 sequence: &SceneSequence,
802 resolved_frames: &[ImageFrame],
803 non_roi_mask: &[bool],
804) -> f32 {
805 let width = sequence.config.width;
806 let height = sequence.config.height;
807 let frame_count = resolved_frames.len().max(1) as f32;
808 let mut total_variance = 0.0f32;
809 let mut pixel_count = 0usize;
810
811 for y in 0..height {
812 for x in 0..width {
813 let index = y * width + x;
814 if !non_roi_mask[index] {
815 continue;
816 }
817 let mean = resolved_frames
818 .iter()
819 .map(|frame| frame.get(x, y).luma())
820 .sum::<f32>()
821 / frame_count;
822 let variance = resolved_frames
823 .iter()
824 .map(|frame| {
825 let diff = frame.get(x, y).luma() - mean;
826 diff * diff
827 })
828 .sum::<f32>()
829 / frame_count;
830 total_variance += variance;
831 pixel_count += 1;
832 }
833 }
834
835 if pixel_count == 0 {
836 0.0
837 } else {
838 total_variance / pixel_count as f32
839 }
840}
841
842fn temporal_scalar_delta_mean(fields: &[ScalarField]) -> f32 {
843 if fields.len() < 2 {
844 return 0.0;
845 }
846 let mut total = 0.0;
847 for window in fields.windows(2) {
848 total += mean_abs_scalar_delta(&window[0], &window[1]);
849 }
850 total / (fields.len() - 1) as f32
851}
852
853fn mean_abs_scalar_delta(left: &ScalarField, right: &ScalarField) -> f32 {
854 if left.values().is_empty() || right.values().is_empty() {
855 return 0.0;
856 }
857 left.values()
858 .iter()
859 .zip(right.values().iter())
860 .map(|(left, right)| (left - right).abs())
861 .sum::<f32>()
862 / left.values().len().min(right.values().len()).max(1) as f32
863}
864
865fn frame_spearman_correlation(
866 trust_frames: &[ScalarField],
867 frame_metrics: &[RunFrameMetrics],
868 onset: usize,
869) -> f32 {
870 let trust_values = trust_frames
871 .iter()
872 .skip(onset)
873 .map(|field| field.mean())
874 .collect::<Vec<_>>();
875 let error_values = frame_metrics
876 .iter()
877 .skip(onset)
878 .map(|frame| frame.roi_mae)
879 .collect::<Vec<_>>();
880 spearman(&trust_values, &error_values)
881}
882
883fn histogram_bins(field: &ScalarField, bin_count: usize) -> Vec<HistogramBin> {
884 let safe_bin_count = bin_count.max(1);
885 let mut counts = vec![0usize; safe_bin_count];
886 for value in field.values().iter().copied() {
887 let index = ((value.clamp(0.0, 1.0) * safe_bin_count as f32).floor() as usize)
888 .min(safe_bin_count - 1);
889 counts[index] += 1;
890 }
891 counts
892 .into_iter()
893 .enumerate()
894 .map(|(index, sample_count)| HistogramBin {
895 lower: index as f32 / safe_bin_count as f32,
896 upper: (index + 1) as f32 / safe_bin_count as f32,
897 sample_count,
898 })
899 .collect()
900}
901
902fn histogram_entropy_bits(histogram: &[HistogramBin]) -> Option<f32> {
903 let total = histogram.iter().map(|bin| bin.sample_count).sum::<usize>();
904 if total == 0 {
905 return None;
906 }
907 let total_f = total as f32;
908 Some(
909 histogram
910 .iter()
911 .filter(|bin| bin.sample_count > 0)
912 .map(|bin| {
913 let p = bin.sample_count as f32 / total_f;
914 -p * p.log2()
915 })
916 .sum::<f32>(),
917 )
918}
919
920fn trust_discreteness_score(bin_count: usize, entropy_bits: f32) -> f32 {
921 let max_entropy = (bin_count.max(2) as f32).log2().max(f32::EPSILON);
922 (1.0 - entropy_bits / max_entropy).clamp(0.0, 1.0)
923}
924
925fn trust_rank_correlation_is_degenerate(
926 trust_frames: &[ScalarField],
927 onset: usize,
928 occupied_bin_count: usize,
929 entropy_bits: Option<f32>,
930) -> bool {
931 let Some(onset_field) = trust_frames.get(onset) else {
932 return true;
933 };
934 let (min_value, max_value) = onset_field.values().iter().copied().fold(
935 (f32::INFINITY, f32::NEG_INFINITY),
936 |(min_value, max_value), value| (min_value.min(value), max_value.max(value)),
937 );
938 let entropy_low = entropy_bits.unwrap_or(0.0) < 1.0;
939 let near_flat = (max_value - min_value).abs() < 0.15;
940 let too_few_post_onset_frames = trust_frames.len().saturating_sub(onset) < 4;
941 occupied_bin_count < 4 || entropy_low || near_flat || too_few_post_onset_frames
942}
943
944fn classify_trust_operating_mode(
945 occupied_bin_count: usize,
946 entropy_bits: Option<f32>,
947 discreteness_score: Option<f32>,
948) -> Option<TrustOperatingMode> {
949 let entropy_bits = entropy_bits?;
950 let discreteness_score = discreteness_score?;
951 Some(
952 if occupied_bin_count <= 3 || discreteness_score >= 0.72 || entropy_bits < 1.0 {
953 TrustOperatingMode::NearBinaryGate
954 } else if occupied_bin_count <= 5 || discreteness_score >= 0.42 || entropy_bits < 1.8 {
955 TrustOperatingMode::WeaklyGraded
956 } else {
957 TrustOperatingMode::StronglyGraded
958 },
959 )
960}
961
962fn calibration_bins(
963 trust: &ScalarField,
964 resolved: &ImageFrame,
965 ground_truth: &ImageFrame,
966) -> Vec<CalibrationBin> {
967 let mut bins = vec![
968 (0.0f32, 0.2f32, 0usize, 0.0f32, 0.0f32),
969 (0.2, 0.4, 0, 0.0, 0.0),
970 (0.4, 0.6, 0, 0.0, 0.0),
971 (0.6, 0.8, 0, 0.0, 0.0),
972 (0.8, 1.01, 0, 0.0, 0.0),
973 ];
974 for y in 0..trust.height() {
975 for x in 0..trust.width() {
976 let trust_value = trust.get(x, y);
977 let error_value = resolved.get(x, y).abs_diff(ground_truth.get(x, y));
978 for bin in &mut bins {
979 if trust_value >= bin.0 && trust_value < bin.1 {
980 bin.2 += 1;
981 bin.3 += trust_value;
982 bin.4 += error_value;
983 break;
984 }
985 }
986 }
987 }
988
989 bins.into_iter()
990 .map(
991 |(lower, upper, sample_count, trust_sum, error_sum)| CalibrationBin {
992 lower,
993 upper: upper.min(1.0),
994 sample_count,
995 mean_trust: if sample_count == 0 {
996 0.0
997 } else {
998 trust_sum / sample_count as f32
999 },
1000 mean_error: if sample_count == 0 {
1001 0.0
1002 } else {
1003 error_sum / sample_count as f32
1004 },
1005 },
1006 )
1007 .collect()
1008}
1009
1010fn spearman(left: &[f32], right: &[f32]) -> f32 {
1011 if left.len() != right.len() || left.is_empty() {
1012 return 0.0;
1013 }
1014 let left_ranks = ranks(left);
1015 let right_ranks = ranks(right);
1016 pearson(&left_ranks, &right_ranks)
1017}
1018
1019fn ranks(values: &[f32]) -> Vec<f32> {
1020 let mut indexed = values.iter().copied().enumerate().collect::<Vec<_>>();
1021 indexed.sort_by(|left, right| left.1.total_cmp(&right.1));
1022 let mut result = vec![0.0; values.len()];
1023 for (rank, (index, _)) in indexed.into_iter().enumerate() {
1024 result[index] = rank as f32;
1025 }
1026 result
1027}
1028
1029fn pearson(left: &[f32], right: &[f32]) -> f32 {
1030 let n = left.len().max(1) as f32;
1031 let mean_left = left.iter().sum::<f32>() / n;
1032 let mean_right = right.iter().sum::<f32>() / n;
1033 let mut numerator = 0.0;
1034 let mut denom_left = 0.0;
1035 let mut denom_right = 0.0;
1036 for (l, r) in left.iter().copied().zip(right.iter().copied()) {
1037 let dl = l - mean_left;
1038 let dr = r - mean_right;
1039 numerator += dl * dr;
1040 denom_left += dl * dl;
1041 denom_right += dr * dr;
1042 }
1043 let denom = (denom_left * denom_right).sqrt().max(f32::EPSILON);
1044 numerator / denom
1045}
1046
1047fn count_field_above(field: &ScalarField, threshold: f32) -> usize {
1048 field
1049 .values()
1050 .iter()
1051 .filter(|value| **value >= threshold)
1052 .count()
1053}
1054
1055fn mean_field_over_mask_range(fields: &[ScalarField], mask: &[bool], start: usize) -> f32 {
1056 if fields.is_empty() || start >= fields.len() {
1057 return 0.0;
1058 }
1059 fields
1060 .iter()
1061 .skip(start)
1062 .map(|field| field.mean_over_mask(mask))
1063 .sum::<f32>()
1064 / fields.len().saturating_sub(start).max(1) as f32
1065}
1066
1067fn fraction_field_values_below_range(
1068 fields: &[ScalarField],
1069 mask: &[bool],
1070 start: usize,
1071 threshold: f32,
1072) -> f32 {
1073 if fields.is_empty() || start >= fields.len() {
1074 return 0.0;
1075 }
1076 let mut total = 0usize;
1077 let mut hits = 0usize;
1078 for field in fields.iter().skip(start) {
1079 for (value, include) in field.values().iter().zip(mask.iter().copied()) {
1080 if !include {
1081 continue;
1082 }
1083 total += 1;
1084 if *value <= threshold {
1085 hits += 1;
1086 }
1087 }
1088 }
1089 if total == 0 {
1090 0.0
1091 } else {
1092 hits as f32 / total as f32
1093 }
1094}
1095
1096fn ratio_or_identity(numerator: f32, denominator: f32) -> f32 {
1097 if denominator.abs() <= f32::EPSILON {
1098 if numerator.abs() <= f32::EPSILON {
1099 1.0
1100 } else {
1101 numerator
1102 }
1103 } else {
1104 numerator / denominator
1105 }
1106}
1107
1108fn aggregate_leaderboard(scenarios: &[ScenarioReport]) -> Vec<AggregateRunScore> {
1109 let mut entries = std::collections::BTreeMap::<String, AggregateRunScore>::new();
1110 for scenario in scenarios {
1111 let mut ranked = scenario
1112 .runs
1113 .iter()
1114 .map(|run| {
1115 let score = match scenario.expectation {
1116 ScenarioExpectation::BenefitExpected => run.summary.cumulative_roi_mae,
1117 ScenarioExpectation::NeutralExpected => {
1118 run.summary.average_non_roi_mae
1119 + 0.5 * run.summary.false_positive_response_rate
1120 }
1121 };
1122 (score, run)
1123 })
1124 .collect::<Vec<_>>();
1125 ranked.sort_by(|left, right| left.0.total_cmp(&right.0));
1126
1127 for (rank, (_, run)) in ranked.into_iter().enumerate() {
1128 let entry = entries
1129 .entry(run.summary.run_id.clone())
1130 .or_insert_with(|| AggregateRunScore {
1131 run_id: run.summary.run_id.clone(),
1132 label: run.summary.label.clone(),
1133 category: run.summary.category.clone(),
1134 mean_rank: 0.0,
1135 mean_cumulative_roi_mae: 0.0,
1136 mean_non_roi_mae: 0.0,
1137 mean_false_positive_response_rate: 0.0,
1138 benefit_scenarios_won: 0,
1139 });
1140 entry.mean_rank += rank as f32;
1141 entry.mean_cumulative_roi_mae += run.summary.cumulative_roi_mae;
1142 entry.mean_non_roi_mae += run.summary.average_non_roi_mae;
1143 entry.mean_false_positive_response_rate += run.summary.false_positive_response_rate;
1144 if rank == 0 && matches!(scenario.expectation, ScenarioExpectation::BenefitExpected) {
1145 entry.benefit_scenarios_won += 1;
1146 }
1147 }
1148 }
1149
1150 let scenario_count = scenarios.len().max(1) as f32;
1151 let mut values = entries
1152 .into_values()
1153 .map(|mut entry| {
1154 entry.mean_rank /= scenario_count;
1155 entry.mean_cumulative_roi_mae /= scenario_count;
1156 entry.mean_non_roi_mae /= scenario_count;
1157 entry.mean_false_positive_response_rate /= scenario_count;
1158 entry
1159 })
1160 .collect::<Vec<_>>();
1161 values.sort_by(|left, right| left.mean_rank.total_cmp(&right.mean_rank));
1162 values
1163}
1164
1165fn find_run<'a>(scenario: &'a ScenarioReport, run_id: &str) -> Result<&'a RunSummary> {
1166 scenario
1167 .runs
1168 .iter()
1169 .find(|run| run.summary.run_id == run_id)
1170 .map(|run| &run.summary)
1171 .ok_or_else(|| Error::Message(format!("run {run_id} missing from scenario report")))
1172}