Skip to main content

dsfb_computer_graphics/
timing.rs

1use std::time::{Duration, Instant};
2
3use serde::Serialize;
4
5use crate::config::DemoConfig;
6use crate::cost::{build_cost_report, CostMode};
7use crate::error::{Error, Result};
8use crate::frame::{ImageFrame, ScalarField};
9use crate::host::{
10    default_host_realistic_profile, motion_augmented_profile, supervise_temporal_reuse,
11    synthetic_visibility_profile, HostSupervisionProfile, HostTemporalInputs,
12};
13use crate::scaling::scaled_scene_config;
14use crate::scene::{
15    generate_sequence_for_definition, scenario_by_id, MotionVector, Normal3, ScenarioId,
16    SceneFrame, SceneSequence, SurfaceTag,
17};
18
19#[derive(Clone, Debug, Serialize)]
20pub struct TimingStageMetrics {
21    pub stage: String,
22    pub total_ms: f64,
23    pub ms_per_frame: f64,
24    pub ns_per_pixel: f64,
25}
26
27#[derive(Clone, Debug, Serialize)]
28pub struct TimingEntry {
29    pub label: String,
30    pub measurement_kind: String,
31    pub actual_gpu_timing: bool,
32    pub mode: String,
33    pub scenario_id: String,
34    pub width: usize,
35    pub height: usize,
36    pub frame_count: usize,
37    pub iterations: usize,
38    pub build_profile: String,
39    pub stages: Vec<TimingStageMetrics>,
40    pub total_ms: f64,
41    pub ms_per_frame: f64,
42    pub estimated_ops_per_pixel: usize,
43    pub estimated_reads_per_pixel: usize,
44    pub estimated_writes_per_pixel: usize,
45    pub estimated_memory_traffic_megabytes: f64,
46    pub likely_optimization_levers: Vec<String>,
47}
48
49#[derive(Clone, Debug, Serialize)]
50pub struct TimingMetrics {
51    pub measurement_kind: String,
52    pub actual_gpu_timing: bool,
53    pub entries: Vec<TimingEntry>,
54    pub notes: Vec<String>,
55}
56
57pub fn run_timing_study(config: &DemoConfig) -> Result<TimingMetrics> {
58    let (high_width, high_height) = if cfg!(debug_assertions) {
59        (1280usize, 720usize)
60    } else {
61        (1920usize, 1080usize)
62    };
63    let entries = vec![
64        measure_entry(
65            "minimum_host_path_default_res",
66            CostMode::Minimal,
67            scenario_sequence(config, ScenarioId::ThinReveal, 160, 96)?,
68            &default_host_realistic_profile(
69                config.dsfb_alpha_range.min,
70                config.dsfb_alpha_range.max,
71            ),
72            6,
73        )?,
74        measure_entry(
75            "motion_augmented_region_mid_res",
76            CostMode::HostRealistic,
77            scenario_sequence(config, ScenarioId::MotionBiasBand, 640, 360)?,
78            &motion_augmented_profile(config.dsfb_alpha_range.min, config.dsfb_alpha_range.max),
79            4,
80        )?,
81        measure_entry(
82            "full_debug_region_mid_res",
83            CostMode::FullResearchDebug,
84            scenario_sequence(config, ScenarioId::RevealBand, 640, 360)?,
85            &synthetic_visibility_profile(config.dsfb_alpha_range.min, config.dsfb_alpha_range.max),
86            4,
87        )?,
88        measure_entry(
89            "minimum_host_path_high_res_proxy",
90            CostMode::HostRealistic,
91            timing_high_resolution_sequence(
92                config,
93                ScenarioId::RevealBand,
94                high_width,
95                high_height,
96            )?,
97            &default_host_realistic_profile(
98                config.dsfb_alpha_range.min,
99                config.dsfb_alpha_range.max,
100            ),
101            1,
102        )?,
103    ];
104
105    Ok(TimingMetrics {
106        measurement_kind: "cpu_only_proxy".to_string(),
107        actual_gpu_timing: false,
108        entries,
109        notes: vec![
110            "No actual GPU timing was measured in this environment. These timings are CPU-side proxy measurements of the same per-pixel supervisory structure and are paired with analytical op and memory estimates.".to_string(),
111            "The highest-resolution entry is a selected-scenario host-realistic proxy, not a full-suite production benchmark.".to_string(),
112        ],
113    })
114}
115
116fn timing_high_resolution_sequence(
117    config: &DemoConfig,
118    scenario_id: ScenarioId,
119    width: usize,
120    height: usize,
121) -> Result<SceneSequence> {
122    let mut sequence = scenario_sequence(config, scenario_id, width, height)?;
123    if cfg!(debug_assertions) && sequence.frames.len() > 8 {
124        sequence.frames.truncate(8);
125        sequence.onset_frame = sequence
126            .onset_frame
127            .min(sequence.frames.len().saturating_sub(2));
128    }
129    Ok(sequence)
130}
131
132fn scenario_sequence(
133    config: &DemoConfig,
134    scenario_id: ScenarioId,
135    width: usize,
136    height: usize,
137) -> Result<SceneSequence> {
138    let scaled_scene = scaled_scene_config(&config.scene, width, height);
139    let definition = scenario_by_id(&scaled_scene, scenario_id).ok_or_else(|| {
140        Error::Message(format!(
141            "timing scenario {} is unavailable",
142            scenario_id.as_str()
143        ))
144    })?;
145    Ok(generate_sequence_for_definition(&definition))
146}
147
148fn measure_entry(
149    label: &str,
150    cost_mode: CostMode,
151    sequence: SceneSequence,
152    profile: &HostSupervisionProfile,
153    iterations: usize,
154) -> Result<TimingEntry> {
155    let measured = measure_cpu_proxy(&sequence, profile, iterations.max(1));
156    let cost = build_cost_report(cost_mode);
157    let pixels_per_frame = (sequence.config.width * sequence.config.height) as f64;
158    let active_frames = sequence.frames.len().saturating_sub(1).max(1) as f64;
159    let total_pixels = pixels_per_frame * active_frames * iterations.max(1) as f64;
160    let estimated_memory_traffic_megabytes =
161        (cost.estimated_total_reads_per_pixel + cost.estimated_total_writes_per_pixel) as f64
162            * 4.0
163            * total_pixels
164            / (1024.0 * 1024.0);
165
166    Ok(TimingEntry {
167        label: label.to_string(),
168        measurement_kind: "cpu_only_proxy".to_string(),
169        actual_gpu_timing: false,
170        mode: cost_mode.as_str().to_string(),
171        scenario_id: sequence.scenario_id.as_str().to_string(),
172        width: sequence.config.width,
173        height: sequence.config.height,
174        frame_count: sequence.frames.len(),
175        iterations,
176        build_profile: if cfg!(debug_assertions) {
177            "debug".to_string()
178        } else {
179            "release".to_string()
180        },
181        stages: vec![
182            stage_metrics(
183                "reproject",
184                measured.reproject,
185                active_frames,
186                pixels_per_frame,
187                iterations,
188            ),
189            stage_metrics(
190                "supervise",
191                measured.supervise,
192                active_frames,
193                pixels_per_frame,
194                iterations,
195            ),
196            stage_metrics(
197                "resolve",
198                measured.resolve,
199                active_frames,
200                pixels_per_frame,
201                iterations,
202            ),
203        ],
204        total_ms: measured.total.as_secs_f64() * 1000.0,
205        ms_per_frame: measured.total.as_secs_f64() * 1000.0 / active_frames / iterations as f64,
206        estimated_ops_per_pixel: cost.estimated_total_ops_per_pixel,
207        estimated_reads_per_pixel: cost.estimated_total_reads_per_pixel,
208        estimated_writes_per_pixel: cost.estimated_total_writes_per_pixel,
209        estimated_memory_traffic_megabytes,
210        likely_optimization_levers: optimization_levers(cost_mode),
211    })
212}
213
214#[derive(Default)]
215struct StageDurations {
216    reproject: Duration,
217    supervise: Duration,
218    resolve: Duration,
219    total: Duration,
220}
221
222fn measure_cpu_proxy(
223    sequence: &SceneSequence,
224    profile: &HostSupervisionProfile,
225    iterations: usize,
226) -> StageDurations {
227    let mut totals = StageDurations::default();
228    for _ in 0..iterations {
229        let total_start = Instant::now();
230        let mut resolved_frames = Vec::with_capacity(sequence.frames.len());
231        for (frame_index, scene_frame) in sequence.frames.iter().enumerate() {
232            let width = scene_frame.ground_truth.width();
233            let height = scene_frame.ground_truth.height();
234            if frame_index == 0 {
235                resolved_frames.push(scene_frame.ground_truth.clone());
236                continue;
237            }
238
239            let previous_resolved = &resolved_frames[frame_index - 1];
240            let previous_scene = &sequence.frames[frame_index - 1];
241
242            let start = Instant::now();
243            let reprojected = reproject_frame(previous_resolved, scene_frame);
244            let reprojected_depth = reproject_depth(previous_scene, scene_frame);
245            let reprojected_normals = reproject_normals(previous_scene, scene_frame);
246            totals.reproject += start.elapsed();
247
248            let visibility_hint = profile
249                .use_visibility_hint
250                .then_some(scene_frame.disocclusion_mask.as_slice());
251            let thin_hint_field = profile
252                .use_visibility_hint
253                .then(|| compute_thin_hint(scene_frame));
254            let thin_hint = thin_hint_field.as_ref();
255            let inputs = HostTemporalInputs {
256                current_color: &scene_frame.ground_truth,
257                reprojected_history: &reprojected,
258                motion_vectors: &scene_frame.motion,
259                current_depth: &scene_frame.depth,
260                reprojected_depth: &reprojected_depth,
261                current_normals: &scene_frame.normals,
262                reprojected_normals: &reprojected_normals,
263                visibility_hint,
264                thin_hint,
265            };
266
267            let start = Instant::now();
268            let outputs = supervise_temporal_reuse(&inputs, profile);
269            totals.supervise += start.elapsed();
270
271            let start = Instant::now();
272            let resolved =
273                resolve_with_alpha(&reprojected, &scene_frame.ground_truth, &outputs.alpha);
274            totals.resolve += start.elapsed();
275            resolved_frames.push(resolved);
276
277            let _ = (width, height);
278        }
279        totals.total += total_start.elapsed();
280    }
281    totals
282}
283
284fn stage_metrics(
285    stage: &str,
286    duration: Duration,
287    active_frames: f64,
288    pixels_per_frame: f64,
289    iterations: usize,
290) -> TimingStageMetrics {
291    let total_ms = duration.as_secs_f64() * 1000.0;
292    let pixel_count = active_frames * pixels_per_frame * iterations as f64;
293    TimingStageMetrics {
294        stage: stage.to_string(),
295        total_ms,
296        ms_per_frame: total_ms / active_frames.max(1.0) / iterations as f64,
297        ns_per_pixel: duration.as_secs_f64() * 1.0e9 / pixel_count.max(1.0),
298    }
299}
300
301fn optimization_levers(mode: CostMode) -> Vec<String> {
302    match mode {
303        CostMode::Minimal => vec![
304            "Fuse alpha modulation into the temporal resolve.".to_string(),
305            "Compute trust/intervention at half resolution if only gating is needed.".to_string(),
306        ],
307        CostMode::HostRealistic => vec![
308            "Fuse reprojection fetches across color, depth, and normal buffers.".to_string(),
309            "Evaluate trust at half resolution or per tile, then upsample alpha.".to_string(),
310            "Keep motion disagreement optional; the minimum path no longer pays for it when scenario evidence is weak.".to_string(),
311        ],
312        CostMode::FullResearchDebug => vec![
313            "Drop synthetic visibility and debug exports outside analysis mode.".to_string(),
314            "Compress trust/alpha/intervention into narrower formats once calibration work stabilizes.".to_string(),
315        ],
316    }
317}
318
319fn resolve_with_alpha(
320    history: &ImageFrame,
321    current: &ImageFrame,
322    alpha: &ScalarField,
323) -> ImageFrame {
324    let mut resolved = ImageFrame::new(history.width(), history.height());
325    for y in 0..history.height() {
326        for x in 0..history.width() {
327            resolved.set(
328                x,
329                y,
330                history.get(x, y).lerp(current.get(x, y), alpha.get(x, y)),
331            );
332        }
333    }
334    resolved
335}
336
337fn reproject_frame(previous_resolved: &ImageFrame, scene_frame: &SceneFrame) -> ImageFrame {
338    let mut reprojected = ImageFrame::new(
339        scene_frame.ground_truth.width(),
340        scene_frame.ground_truth.height(),
341    );
342    for y in 0..scene_frame.ground_truth.height() {
343        for x in 0..scene_frame.ground_truth.width() {
344            let motion = scene_frame.motion[y * scene_frame.ground_truth.width() + x];
345            reprojected.set(
346                x,
347                y,
348                previous_resolved.sample_bilinear_clamped(
349                    x as f32 + motion.to_prev_x,
350                    y as f32 + motion.to_prev_y,
351                ),
352            );
353        }
354    }
355    reprojected
356}
357
358fn reproject_depth(previous_scene_frame: &SceneFrame, scene_frame: &SceneFrame) -> Vec<f32> {
359    reproject_scalar_buffer(
360        &previous_scene_frame.depth,
361        scene_frame.ground_truth.width(),
362        scene_frame.ground_truth.height(),
363        &scene_frame.motion,
364    )
365}
366
367fn reproject_normals(previous_scene_frame: &SceneFrame, scene_frame: &SceneFrame) -> Vec<Normal3> {
368    let width = scene_frame.ground_truth.width();
369    let height = scene_frame.ground_truth.height();
370    let mut reprojected = vec![Normal3::new(0.0, 0.0, 1.0); width * height];
371    for y in 0..height {
372        for x in 0..width {
373            let index = y * width + x;
374            let motion = scene_frame.motion[index];
375            reprojected[index] = sample_normal_bilinear_clamped(
376                &previous_scene_frame.normals,
377                width,
378                height,
379                x as f32 + motion.to_prev_x,
380                y as f32 + motion.to_prev_y,
381            );
382        }
383    }
384    reprojected
385}
386
387fn reproject_scalar_buffer(
388    previous_values: &[f32],
389    width: usize,
390    height: usize,
391    motion: &[MotionVector],
392) -> Vec<f32> {
393    let mut reprojected = vec![0.0; width * height];
394    for y in 0..height {
395        for x in 0..width {
396            let index = y * width + x;
397            let vector = motion[index];
398            reprojected[index] = sample_scalar_bilinear_clamped(
399                previous_values,
400                width,
401                height,
402                x as f32 + vector.to_prev_x,
403                y as f32 + vector.to_prev_y,
404            );
405        }
406    }
407    reprojected
408}
409
410fn compute_thin_hint(scene_frame: &SceneFrame) -> ScalarField {
411    let width = scene_frame.ground_truth.width();
412    let height = scene_frame.ground_truth.height();
413    let mut field = ScalarField::new(width, height);
414    for y in 0..height {
415        for x in 0..width {
416            let index = y * width + x;
417            let hint = matches!(scene_frame.layers[index], SurfaceTag::ThinStructure)
418                || neighbors(x, y, width, height).into_iter().any(|(nx, ny)| {
419                    matches!(
420                        scene_frame.layers[ny * width + nx],
421                        SurfaceTag::ThinStructure
422                    )
423                });
424            field.set(x, y, if hint { 1.0 } else { 0.0 });
425        }
426    }
427    field
428}
429
430fn neighbors(x: usize, y: usize, width: usize, height: usize) -> Vec<(usize, usize)> {
431    let mut result = Vec::with_capacity(8);
432    for offset_y in -1..=1 {
433        for offset_x in -1..=1 {
434            if offset_x == 0 && offset_y == 0 {
435                continue;
436            }
437            let nx = x as i32 + offset_x;
438            let ny = y as i32 + offset_y;
439            if nx >= 0 && ny >= 0 && nx < width as i32 && ny < height as i32 {
440                result.push((nx as usize, ny as usize));
441            }
442        }
443    }
444    result
445}
446
447fn sample_scalar_bilinear_clamped(
448    values: &[f32],
449    width: usize,
450    height: usize,
451    x: f32,
452    y: f32,
453) -> f32 {
454    let x0 = x.floor();
455    let y0 = y.floor();
456    let x1 = x0 + 1.0;
457    let y1 = y0 + 1.0;
458    let tx = (x - x0).clamp(0.0, 1.0);
459    let ty = (y - y0).clamp(0.0, 1.0);
460
461    let sample = |sample_x: f32, sample_y: f32| {
462        let sx = sample_x.clamp(0.0, width.saturating_sub(1) as f32) as usize;
463        let sy = sample_y.clamp(0.0, height.saturating_sub(1) as f32) as usize;
464        values[sy * width + sx]
465    };
466
467    let top = sample(x0, y0) * (1.0 - tx) + sample(x1, y0) * tx;
468    let bottom = sample(x0, y1) * (1.0 - tx) + sample(x1, y1) * tx;
469    top * (1.0 - ty) + bottom * ty
470}
471
472fn sample_normal_bilinear_clamped(
473    values: &[Normal3],
474    width: usize,
475    height: usize,
476    x: f32,
477    y: f32,
478) -> Normal3 {
479    let x0 = x.floor();
480    let y0 = y.floor();
481    let x1 = x0 + 1.0;
482    let y1 = y0 + 1.0;
483    let tx = (x - x0).clamp(0.0, 1.0);
484    let ty = (y - y0).clamp(0.0, 1.0);
485
486    let sample = |sample_x: f32, sample_y: f32| {
487        let sx = sample_x.clamp(0.0, width.saturating_sub(1) as f32) as usize;
488        let sy = sample_y.clamp(0.0, height.saturating_sub(1) as f32) as usize;
489        values[sy * width + sx]
490    };
491
492    let mix = |a: Normal3, b: Normal3, t: f32| {
493        Normal3::new(
494            a.x + (b.x - a.x) * t,
495            a.y + (b.y - a.y) * t,
496            a.z + (b.z - a.z) * t,
497        )
498    };
499    mix(
500        mix(sample(x0, y0), sample(x1, y0), tx),
501        mix(sample(x0, y1), sample(x1, y1), tx),
502        ty,
503    )
504    .normalized()
505}