1use std::time::{Duration, Instant};
2
3use serde::Serialize;
4
5use crate::config::DemoConfig;
6use crate::cost::{build_cost_report, CostMode};
7use crate::error::{Error, Result};
8use crate::frame::{ImageFrame, ScalarField};
9use crate::host::{
10 default_host_realistic_profile, motion_augmented_profile, supervise_temporal_reuse,
11 synthetic_visibility_profile, HostSupervisionProfile, HostTemporalInputs,
12};
13use crate::scaling::scaled_scene_config;
14use crate::scene::{
15 generate_sequence_for_definition, scenario_by_id, MotionVector, Normal3, ScenarioId,
16 SceneFrame, SceneSequence, SurfaceTag,
17};
18
19#[derive(Clone, Debug, Serialize)]
20pub struct TimingStageMetrics {
21 pub stage: String,
22 pub total_ms: f64,
23 pub ms_per_frame: f64,
24 pub ns_per_pixel: f64,
25}
26
27#[derive(Clone, Debug, Serialize)]
28pub struct TimingEntry {
29 pub label: String,
30 pub measurement_kind: String,
31 pub actual_gpu_timing: bool,
32 pub mode: String,
33 pub scenario_id: String,
34 pub width: usize,
35 pub height: usize,
36 pub frame_count: usize,
37 pub iterations: usize,
38 pub build_profile: String,
39 pub stages: Vec<TimingStageMetrics>,
40 pub total_ms: f64,
41 pub ms_per_frame: f64,
42 pub estimated_ops_per_pixel: usize,
43 pub estimated_reads_per_pixel: usize,
44 pub estimated_writes_per_pixel: usize,
45 pub estimated_memory_traffic_megabytes: f64,
46 pub likely_optimization_levers: Vec<String>,
47}
48
49#[derive(Clone, Debug, Serialize)]
50pub struct TimingMetrics {
51 pub measurement_kind: String,
52 pub actual_gpu_timing: bool,
53 pub entries: Vec<TimingEntry>,
54 pub notes: Vec<String>,
55}
56
57pub fn run_timing_study(config: &DemoConfig) -> Result<TimingMetrics> {
58 let (high_width, high_height) = if cfg!(debug_assertions) {
59 (1280usize, 720usize)
60 } else {
61 (1920usize, 1080usize)
62 };
63 let entries = vec![
64 measure_entry(
65 "minimum_host_path_default_res",
66 CostMode::Minimal,
67 scenario_sequence(config, ScenarioId::ThinReveal, 160, 96)?,
68 &default_host_realistic_profile(
69 config.dsfb_alpha_range.min,
70 config.dsfb_alpha_range.max,
71 ),
72 6,
73 )?,
74 measure_entry(
75 "motion_augmented_region_mid_res",
76 CostMode::HostRealistic,
77 scenario_sequence(config, ScenarioId::MotionBiasBand, 640, 360)?,
78 &motion_augmented_profile(config.dsfb_alpha_range.min, config.dsfb_alpha_range.max),
79 4,
80 )?,
81 measure_entry(
82 "full_debug_region_mid_res",
83 CostMode::FullResearchDebug,
84 scenario_sequence(config, ScenarioId::RevealBand, 640, 360)?,
85 &synthetic_visibility_profile(config.dsfb_alpha_range.min, config.dsfb_alpha_range.max),
86 4,
87 )?,
88 measure_entry(
89 "minimum_host_path_high_res_proxy",
90 CostMode::HostRealistic,
91 timing_high_resolution_sequence(
92 config,
93 ScenarioId::RevealBand,
94 high_width,
95 high_height,
96 )?,
97 &default_host_realistic_profile(
98 config.dsfb_alpha_range.min,
99 config.dsfb_alpha_range.max,
100 ),
101 1,
102 )?,
103 ];
104
105 Ok(TimingMetrics {
106 measurement_kind: "cpu_only_proxy".to_string(),
107 actual_gpu_timing: false,
108 entries,
109 notes: vec![
110 "No actual GPU timing was measured in this environment. These timings are CPU-side proxy measurements of the same per-pixel supervisory structure and are paired with analytical op and memory estimates.".to_string(),
111 "The highest-resolution entry is a selected-scenario host-realistic proxy, not a full-suite production benchmark.".to_string(),
112 ],
113 })
114}
115
116fn timing_high_resolution_sequence(
117 config: &DemoConfig,
118 scenario_id: ScenarioId,
119 width: usize,
120 height: usize,
121) -> Result<SceneSequence> {
122 let mut sequence = scenario_sequence(config, scenario_id, width, height)?;
123 if cfg!(debug_assertions) && sequence.frames.len() > 8 {
124 sequence.frames.truncate(8);
125 sequence.onset_frame = sequence
126 .onset_frame
127 .min(sequence.frames.len().saturating_sub(2));
128 }
129 Ok(sequence)
130}
131
132fn scenario_sequence(
133 config: &DemoConfig,
134 scenario_id: ScenarioId,
135 width: usize,
136 height: usize,
137) -> Result<SceneSequence> {
138 let scaled_scene = scaled_scene_config(&config.scene, width, height);
139 let definition = scenario_by_id(&scaled_scene, scenario_id).ok_or_else(|| {
140 Error::Message(format!(
141 "timing scenario {} is unavailable",
142 scenario_id.as_str()
143 ))
144 })?;
145 Ok(generate_sequence_for_definition(&definition))
146}
147
148fn measure_entry(
149 label: &str,
150 cost_mode: CostMode,
151 sequence: SceneSequence,
152 profile: &HostSupervisionProfile,
153 iterations: usize,
154) -> Result<TimingEntry> {
155 let measured = measure_cpu_proxy(&sequence, profile, iterations.max(1));
156 let cost = build_cost_report(cost_mode);
157 let pixels_per_frame = (sequence.config.width * sequence.config.height) as f64;
158 let active_frames = sequence.frames.len().saturating_sub(1).max(1) as f64;
159 let total_pixels = pixels_per_frame * active_frames * iterations.max(1) as f64;
160 let estimated_memory_traffic_megabytes =
161 (cost.estimated_total_reads_per_pixel + cost.estimated_total_writes_per_pixel) as f64
162 * 4.0
163 * total_pixels
164 / (1024.0 * 1024.0);
165
166 Ok(TimingEntry {
167 label: label.to_string(),
168 measurement_kind: "cpu_only_proxy".to_string(),
169 actual_gpu_timing: false,
170 mode: cost_mode.as_str().to_string(),
171 scenario_id: sequence.scenario_id.as_str().to_string(),
172 width: sequence.config.width,
173 height: sequence.config.height,
174 frame_count: sequence.frames.len(),
175 iterations,
176 build_profile: if cfg!(debug_assertions) {
177 "debug".to_string()
178 } else {
179 "release".to_string()
180 },
181 stages: vec![
182 stage_metrics(
183 "reproject",
184 measured.reproject,
185 active_frames,
186 pixels_per_frame,
187 iterations,
188 ),
189 stage_metrics(
190 "supervise",
191 measured.supervise,
192 active_frames,
193 pixels_per_frame,
194 iterations,
195 ),
196 stage_metrics(
197 "resolve",
198 measured.resolve,
199 active_frames,
200 pixels_per_frame,
201 iterations,
202 ),
203 ],
204 total_ms: measured.total.as_secs_f64() * 1000.0,
205 ms_per_frame: measured.total.as_secs_f64() * 1000.0 / active_frames / iterations as f64,
206 estimated_ops_per_pixel: cost.estimated_total_ops_per_pixel,
207 estimated_reads_per_pixel: cost.estimated_total_reads_per_pixel,
208 estimated_writes_per_pixel: cost.estimated_total_writes_per_pixel,
209 estimated_memory_traffic_megabytes,
210 likely_optimization_levers: optimization_levers(cost_mode),
211 })
212}
213
214#[derive(Default)]
215struct StageDurations {
216 reproject: Duration,
217 supervise: Duration,
218 resolve: Duration,
219 total: Duration,
220}
221
222fn measure_cpu_proxy(
223 sequence: &SceneSequence,
224 profile: &HostSupervisionProfile,
225 iterations: usize,
226) -> StageDurations {
227 let mut totals = StageDurations::default();
228 for _ in 0..iterations {
229 let total_start = Instant::now();
230 let mut resolved_frames = Vec::with_capacity(sequence.frames.len());
231 for (frame_index, scene_frame) in sequence.frames.iter().enumerate() {
232 let width = scene_frame.ground_truth.width();
233 let height = scene_frame.ground_truth.height();
234 if frame_index == 0 {
235 resolved_frames.push(scene_frame.ground_truth.clone());
236 continue;
237 }
238
239 let previous_resolved = &resolved_frames[frame_index - 1];
240 let previous_scene = &sequence.frames[frame_index - 1];
241
242 let start = Instant::now();
243 let reprojected = reproject_frame(previous_resolved, scene_frame);
244 let reprojected_depth = reproject_depth(previous_scene, scene_frame);
245 let reprojected_normals = reproject_normals(previous_scene, scene_frame);
246 totals.reproject += start.elapsed();
247
248 let visibility_hint = profile
249 .use_visibility_hint
250 .then_some(scene_frame.disocclusion_mask.as_slice());
251 let thin_hint_field = profile
252 .use_visibility_hint
253 .then(|| compute_thin_hint(scene_frame));
254 let thin_hint = thin_hint_field.as_ref();
255 let inputs = HostTemporalInputs {
256 current_color: &scene_frame.ground_truth,
257 reprojected_history: &reprojected,
258 motion_vectors: &scene_frame.motion,
259 current_depth: &scene_frame.depth,
260 reprojected_depth: &reprojected_depth,
261 current_normals: &scene_frame.normals,
262 reprojected_normals: &reprojected_normals,
263 visibility_hint,
264 thin_hint,
265 };
266
267 let start = Instant::now();
268 let outputs = supervise_temporal_reuse(&inputs, profile);
269 totals.supervise += start.elapsed();
270
271 let start = Instant::now();
272 let resolved =
273 resolve_with_alpha(&reprojected, &scene_frame.ground_truth, &outputs.alpha);
274 totals.resolve += start.elapsed();
275 resolved_frames.push(resolved);
276
277 let _ = (width, height);
278 }
279 totals.total += total_start.elapsed();
280 }
281 totals
282}
283
284fn stage_metrics(
285 stage: &str,
286 duration: Duration,
287 active_frames: f64,
288 pixels_per_frame: f64,
289 iterations: usize,
290) -> TimingStageMetrics {
291 let total_ms = duration.as_secs_f64() * 1000.0;
292 let pixel_count = active_frames * pixels_per_frame * iterations as f64;
293 TimingStageMetrics {
294 stage: stage.to_string(),
295 total_ms,
296 ms_per_frame: total_ms / active_frames.max(1.0) / iterations as f64,
297 ns_per_pixel: duration.as_secs_f64() * 1.0e9 / pixel_count.max(1.0),
298 }
299}
300
301fn optimization_levers(mode: CostMode) -> Vec<String> {
302 match mode {
303 CostMode::Minimal => vec![
304 "Fuse alpha modulation into the temporal resolve.".to_string(),
305 "Compute trust/intervention at half resolution if only gating is needed.".to_string(),
306 ],
307 CostMode::HostRealistic => vec![
308 "Fuse reprojection fetches across color, depth, and normal buffers.".to_string(),
309 "Evaluate trust at half resolution or per tile, then upsample alpha.".to_string(),
310 "Keep motion disagreement optional; the minimum path no longer pays for it when scenario evidence is weak.".to_string(),
311 ],
312 CostMode::FullResearchDebug => vec![
313 "Drop synthetic visibility and debug exports outside analysis mode.".to_string(),
314 "Compress trust/alpha/intervention into narrower formats once calibration work stabilizes.".to_string(),
315 ],
316 }
317}
318
319fn resolve_with_alpha(
320 history: &ImageFrame,
321 current: &ImageFrame,
322 alpha: &ScalarField,
323) -> ImageFrame {
324 let mut resolved = ImageFrame::new(history.width(), history.height());
325 for y in 0..history.height() {
326 for x in 0..history.width() {
327 resolved.set(
328 x,
329 y,
330 history.get(x, y).lerp(current.get(x, y), alpha.get(x, y)),
331 );
332 }
333 }
334 resolved
335}
336
337fn reproject_frame(previous_resolved: &ImageFrame, scene_frame: &SceneFrame) -> ImageFrame {
338 let mut reprojected = ImageFrame::new(
339 scene_frame.ground_truth.width(),
340 scene_frame.ground_truth.height(),
341 );
342 for y in 0..scene_frame.ground_truth.height() {
343 for x in 0..scene_frame.ground_truth.width() {
344 let motion = scene_frame.motion[y * scene_frame.ground_truth.width() + x];
345 reprojected.set(
346 x,
347 y,
348 previous_resolved.sample_bilinear_clamped(
349 x as f32 + motion.to_prev_x,
350 y as f32 + motion.to_prev_y,
351 ),
352 );
353 }
354 }
355 reprojected
356}
357
358fn reproject_depth(previous_scene_frame: &SceneFrame, scene_frame: &SceneFrame) -> Vec<f32> {
359 reproject_scalar_buffer(
360 &previous_scene_frame.depth,
361 scene_frame.ground_truth.width(),
362 scene_frame.ground_truth.height(),
363 &scene_frame.motion,
364 )
365}
366
367fn reproject_normals(previous_scene_frame: &SceneFrame, scene_frame: &SceneFrame) -> Vec<Normal3> {
368 let width = scene_frame.ground_truth.width();
369 let height = scene_frame.ground_truth.height();
370 let mut reprojected = vec![Normal3::new(0.0, 0.0, 1.0); width * height];
371 for y in 0..height {
372 for x in 0..width {
373 let index = y * width + x;
374 let motion = scene_frame.motion[index];
375 reprojected[index] = sample_normal_bilinear_clamped(
376 &previous_scene_frame.normals,
377 width,
378 height,
379 x as f32 + motion.to_prev_x,
380 y as f32 + motion.to_prev_y,
381 );
382 }
383 }
384 reprojected
385}
386
387fn reproject_scalar_buffer(
388 previous_values: &[f32],
389 width: usize,
390 height: usize,
391 motion: &[MotionVector],
392) -> Vec<f32> {
393 let mut reprojected = vec![0.0; width * height];
394 for y in 0..height {
395 for x in 0..width {
396 let index = y * width + x;
397 let vector = motion[index];
398 reprojected[index] = sample_scalar_bilinear_clamped(
399 previous_values,
400 width,
401 height,
402 x as f32 + vector.to_prev_x,
403 y as f32 + vector.to_prev_y,
404 );
405 }
406 }
407 reprojected
408}
409
410fn compute_thin_hint(scene_frame: &SceneFrame) -> ScalarField {
411 let width = scene_frame.ground_truth.width();
412 let height = scene_frame.ground_truth.height();
413 let mut field = ScalarField::new(width, height);
414 for y in 0..height {
415 for x in 0..width {
416 let index = y * width + x;
417 let hint = matches!(scene_frame.layers[index], SurfaceTag::ThinStructure)
418 || neighbors(x, y, width, height).into_iter().any(|(nx, ny)| {
419 matches!(
420 scene_frame.layers[ny * width + nx],
421 SurfaceTag::ThinStructure
422 )
423 });
424 field.set(x, y, if hint { 1.0 } else { 0.0 });
425 }
426 }
427 field
428}
429
430fn neighbors(x: usize, y: usize, width: usize, height: usize) -> Vec<(usize, usize)> {
431 let mut result = Vec::with_capacity(8);
432 for offset_y in -1..=1 {
433 for offset_x in -1..=1 {
434 if offset_x == 0 && offset_y == 0 {
435 continue;
436 }
437 let nx = x as i32 + offset_x;
438 let ny = y as i32 + offset_y;
439 if nx >= 0 && ny >= 0 && nx < width as i32 && ny < height as i32 {
440 result.push((nx as usize, ny as usize));
441 }
442 }
443 }
444 result
445}
446
447fn sample_scalar_bilinear_clamped(
448 values: &[f32],
449 width: usize,
450 height: usize,
451 x: f32,
452 y: f32,
453) -> f32 {
454 let x0 = x.floor();
455 let y0 = y.floor();
456 let x1 = x0 + 1.0;
457 let y1 = y0 + 1.0;
458 let tx = (x - x0).clamp(0.0, 1.0);
459 let ty = (y - y0).clamp(0.0, 1.0);
460
461 let sample = |sample_x: f32, sample_y: f32| {
462 let sx = sample_x.clamp(0.0, width.saturating_sub(1) as f32) as usize;
463 let sy = sample_y.clamp(0.0, height.saturating_sub(1) as f32) as usize;
464 values[sy * width + sx]
465 };
466
467 let top = sample(x0, y0) * (1.0 - tx) + sample(x1, y0) * tx;
468 let bottom = sample(x0, y1) * (1.0 - tx) + sample(x1, y1) * tx;
469 top * (1.0 - ty) + bottom * ty
470}
471
472fn sample_normal_bilinear_clamped(
473 values: &[Normal3],
474 width: usize,
475 height: usize,
476 x: f32,
477 y: f32,
478) -> Normal3 {
479 let x0 = x.floor();
480 let y0 = y.floor();
481 let x1 = x0 + 1.0;
482 let y1 = y0 + 1.0;
483 let tx = (x - x0).clamp(0.0, 1.0);
484 let ty = (y - y0).clamp(0.0, 1.0);
485
486 let sample = |sample_x: f32, sample_y: f32| {
487 let sx = sample_x.clamp(0.0, width.saturating_sub(1) as f32) as usize;
488 let sy = sample_y.clamp(0.0, height.saturating_sub(1) as f32) as usize;
489 values[sy * width + sx]
490 };
491
492 let mix = |a: Normal3, b: Normal3, t: f32| {
493 Normal3::new(
494 a.x + (b.x - a.x) * t,
495 a.y + (b.y - a.y) * t,
496 a.z + (b.z - a.z) * t,
497 )
498 };
499 mix(
500 mix(sample(x0, y0), sample(x1, y0), tx),
501 mix(sample(x0, y1), sample(x1, y1), tx),
502 ty,
503 )
504 .normalized()
505}