Skip to main content

proof_engine/rendergraph/
executor.rs

1//! Graph execution engine: walks sorted passes, manages barriers, GPU timing,
2//! triple-buffered frame-in-flight management, async compute overlap detection,
3//! execution statistics, and hot-reload from config.
4
5use std::collections::HashMap;
6use std::fmt;
7use std::time::{Duration, Instant};
8
9use crate::rendergraph::graph::{
10    DependencyKind, GraphConfig, PassType, QueueAffinity, RenderGraph,
11};
12use crate::rendergraph::resources::{
13    MemoryBudget, ResourceDescriptor, ResourceHandle, ResourceLifetime, ResourcePool, TextureFormat,
14};
15
16// ---------------------------------------------------------------------------
17// Barrier types
18// ---------------------------------------------------------------------------
19
20/// The kind of GPU barrier inserted between passes.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum BarrierKind {
23    /// Render target finished writing, will be read as texture.
24    RenderToShaderRead,
25    /// Compute shader finished writing, will be read by render pass.
26    ComputeToRender,
27    /// Render pass finished, another render pass will write the same target.
28    RenderToRender,
29    /// Compute finished writing, another compute will read.
30    ComputeToCompute,
31    /// Transfer finished, resource will be read.
32    TransferToRead,
33    /// Generic full pipeline barrier.
34    FullPipeline,
35}
36
37/// A barrier that must be issued between two passes.
38#[derive(Debug, Clone)]
39pub struct PassBarrier {
40    pub before_pass: String,
41    pub after_pass: String,
42    pub resource_name: String,
43    pub kind: BarrierKind,
44}
45
46impl fmt::Display for PassBarrier {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        write!(
49            f,
50            "Barrier({:?}): {} -> {} [{}]",
51            self.kind, self.before_pass, self.after_pass, self.resource_name
52        )
53    }
54}
55
56// ---------------------------------------------------------------------------
57// GPU timing queries
58// ---------------------------------------------------------------------------
59
60/// Simulated GPU timing query result for a single pass.
61#[derive(Debug, Clone)]
62pub struct PassTimingQuery {
63    pub pass_name: String,
64    pub cpu_time: Duration,
65    pub gpu_time_estimate: Duration,
66    pub start_offset: Duration,
67}
68
69impl PassTimingQuery {
70    pub fn cpu_ms(&self) -> f64 {
71        self.cpu_time.as_secs_f64() * 1000.0
72    }
73
74    pub fn gpu_ms(&self) -> f64 {
75        self.gpu_time_estimate.as_secs_f64() * 1000.0
76    }
77}
78
79// ---------------------------------------------------------------------------
80// Pass context
81// ---------------------------------------------------------------------------
82
83/// Context provided to each pass during execution. Gives access to input and
84/// output resources, resolution info, and pass metadata.
85#[derive(Debug)]
86pub struct PassContext {
87    pub pass_name: String,
88    pub pass_index: usize,
89    pub frame_index: u64,
90    pub backbuffer_width: u32,
91    pub backbuffer_height: u32,
92    pub delta_time: f32,
93    /// Input resource handles and names.
94    pub inputs: Vec<(ResourceHandle, String)>,
95    /// Output resource handles and names.
96    pub outputs: Vec<(ResourceHandle, String)>,
97    /// Effective resolution for this pass (after applying resolution scale).
98    pub render_width: u32,
99    pub render_height: u32,
100}
101
102impl PassContext {
103    /// Find an input resource by name.
104    pub fn input(&self, name: &str) -> Option<ResourceHandle> {
105        self.inputs
106            .iter()
107            .find(|(_, n)| n == name)
108            .map(|(h, _)| *h)
109    }
110
111    /// Find an output resource by name.
112    pub fn output(&self, name: &str) -> Option<ResourceHandle> {
113        self.outputs
114            .iter()
115            .find(|(_, n)| n == name)
116            .map(|(h, _)| *h)
117    }
118}
119
120// ---------------------------------------------------------------------------
121// Frame timeline (triple-buffered)
122// ---------------------------------------------------------------------------
123
124/// Manages triple-buffered frame-in-flight state.
125pub struct FrameTimeline {
126    /// Maximum frames in flight.
127    max_frames_in_flight: usize,
128    /// Ring of frame states.
129    frames: Vec<FrameState>,
130    /// Index of the current frame being recorded.
131    current_index: usize,
132    /// Global frame counter.
133    frame_counter: u64,
134}
135
136/// State of a single frame in the pipeline.
137#[derive(Debug, Clone)]
138pub struct FrameState {
139    pub frame_index: u64,
140    pub status: FrameStatus,
141    pub submit_time: Option<Instant>,
142    pub complete_time: Option<Instant>,
143    pub pass_timings: Vec<PassTimingQuery>,
144    pub barriers: Vec<PassBarrier>,
145    pub resource_allocations: usize,
146    pub total_cpu_time: Duration,
147}
148
149impl FrameState {
150    fn new(frame_index: u64) -> Self {
151        Self {
152            frame_index,
153            status: FrameStatus::Available,
154            submit_time: None,
155            complete_time: None,
156            pass_timings: Vec::new(),
157            barriers: Vec::new(),
158            resource_allocations: 0,
159            total_cpu_time: Duration::ZERO,
160        }
161    }
162
163    fn reset(&mut self, frame_index: u64) {
164        self.frame_index = frame_index;
165        self.status = FrameStatus::Recording;
166        self.submit_time = None;
167        self.complete_time = None;
168        self.pass_timings.clear();
169        self.barriers.clear();
170        self.resource_allocations = 0;
171        self.total_cpu_time = Duration::ZERO;
172    }
173}
174
175#[derive(Debug, Clone, Copy, PartialEq, Eq)]
176pub enum FrameStatus {
177    Available,
178    Recording,
179    Submitted,
180    Complete,
181}
182
183impl FrameTimeline {
184    pub fn new(max_frames_in_flight: usize) -> Self {
185        let frames = (0..max_frames_in_flight)
186            .map(|_| FrameState::new(0))
187            .collect();
188        Self {
189            max_frames_in_flight,
190            frames,
191            current_index: 0,
192            frame_counter: 0,
193        }
194    }
195
196    pub fn triple_buffered() -> Self {
197        Self::new(3)
198    }
199
200    /// Begin recording a new frame. Returns the frame index.
201    pub fn begin_frame(&mut self) -> u64 {
202        self.frame_counter += 1;
203        let idx = self.current_index;
204        self.frames[idx].reset(self.frame_counter);
205        self.frame_counter
206    }
207
208    /// Submit the current frame.
209    pub fn submit_frame(&mut self) {
210        let idx = self.current_index;
211        self.frames[idx].status = FrameStatus::Submitted;
212        self.frames[idx].submit_time = Some(Instant::now());
213        self.current_index = (self.current_index + 1) % self.max_frames_in_flight;
214    }
215
216    /// Mark a frame as complete (GPU finished).
217    pub fn complete_frame(&mut self, frame_index: u64) {
218        for f in &mut self.frames {
219            if f.frame_index == frame_index && f.status == FrameStatus::Submitted {
220                f.status = FrameStatus::Complete;
221                f.complete_time = Some(Instant::now());
222                break;
223            }
224        }
225    }
226
227    /// Get the current recording frame (mutable).
228    pub fn current_frame_mut(&mut self) -> &mut FrameState {
229        &mut self.frames[self.current_index]
230    }
231
232    /// Get the current recording frame.
233    pub fn current_frame(&self) -> &FrameState {
234        &self.frames[self.current_index]
235    }
236
237    /// Get a completed frame by index.
238    pub fn completed_frame(&self, frame_index: u64) -> Option<&FrameState> {
239        self.frames
240            .iter()
241            .find(|f| f.frame_index == frame_index && f.status == FrameStatus::Complete)
242    }
243
244    /// Number of frames currently in flight (submitted but not completed).
245    pub fn frames_in_flight(&self) -> usize {
246        self.frames
247            .iter()
248            .filter(|f| f.status == FrameStatus::Submitted)
249            .count()
250    }
251
252    /// Wait until a frame slot is available.
253    pub fn wait_for_available(&self) -> bool {
254        self.frames
255            .iter()
256            .any(|f| f.status == FrameStatus::Available || f.status == FrameStatus::Complete)
257    }
258
259    pub fn max_frames_in_flight(&self) -> usize {
260        self.max_frames_in_flight
261    }
262
263    pub fn frame_counter(&self) -> u64 {
264        self.frame_counter
265    }
266}
267
268// ---------------------------------------------------------------------------
269// Async compute scheduling
270// ---------------------------------------------------------------------------
271
272/// Identifies passes that can run on the async compute queue, overlapping
273/// with graphics work.
274#[derive(Debug, Clone)]
275pub struct AsyncComputeSchedule {
276    /// Passes that run on the graphics queue (in order).
277    pub graphics_passes: Vec<String>,
278    /// Passes that run on the async compute queue (in order).
279    pub compute_passes: Vec<String>,
280    /// Sync points: (graphics_pass, compute_pass) where compute must finish
281    /// before graphics can continue.
282    pub sync_points: Vec<(String, String)>,
283}
284
285impl AsyncComputeSchedule {
286    /// Analyze a graph and partition passes into graphics and async compute queues.
287    pub fn from_graph(graph: &mut RenderGraph) -> Result<Self, Vec<String>> {
288        let sorted = graph.topological_sort()?;
289        let mut graphics = Vec::new();
290        let mut compute = Vec::new();
291        let mut sync_points = Vec::new();
292
293        for name in &sorted {
294            let pass = graph.get_pass(name).unwrap();
295            if pass.is_async_compute_candidate() {
296                compute.push(name.clone());
297            } else {
298                graphics.push(name.clone());
299            }
300        }
301
302        // Determine sync points: if a graphics pass reads a resource written
303        // by a compute pass, we need a sync point.
304        let edges = graph.edges().to_vec();
305        for edge in &edges {
306            let from_is_compute = compute.contains(&edge.from_pass);
307            let to_is_graphics = graphics.contains(&edge.to_pass);
308            if from_is_compute && to_is_graphics {
309                sync_points.push((edge.to_pass.clone(), edge.from_pass.clone()));
310            }
311        }
312
313        Ok(Self {
314            graphics_passes: graphics,
315            compute_passes: compute,
316            sync_points,
317        })
318    }
319
320    /// Returns the percentage of passes that can run asynchronously.
321    pub fn async_ratio(&self) -> f32 {
322        let total = self.graphics_passes.len() + self.compute_passes.len();
323        if total == 0 {
324            return 0.0;
325        }
326        self.compute_passes.len() as f32 / total as f32
327    }
328}
329
330// ---------------------------------------------------------------------------
331// Execution statistics
332// ---------------------------------------------------------------------------
333
334/// Per-frame execution statistics.
335#[derive(Debug, Clone)]
336pub struct ExecutionStats {
337    pub frame_index: u64,
338    pub total_cpu_time: Duration,
339    pub pass_times: Vec<(String, Duration)>,
340    pub barrier_count: usize,
341    pub resource_allocation_count: usize,
342    pub resource_reuse_count: usize,
343    pub async_compute_passes: usize,
344    pub skipped_passes: usize,
345    pub active_passes: usize,
346    pub memory_budget: Option<MemoryBudget>,
347}
348
349impl ExecutionStats {
350    fn new(frame_index: u64) -> Self {
351        Self {
352            frame_index,
353            total_cpu_time: Duration::ZERO,
354            pass_times: Vec::new(),
355            barrier_count: 0,
356            resource_allocation_count: 0,
357            resource_reuse_count: 0,
358            async_compute_passes: 0,
359            skipped_passes: 0,
360            active_passes: 0,
361            memory_budget: None,
362        }
363    }
364
365    pub fn total_ms(&self) -> f64 {
366        self.total_cpu_time.as_secs_f64() * 1000.0
367    }
368
369    /// Slowest pass name and its time.
370    pub fn slowest_pass(&self) -> Option<(&str, Duration)> {
371        self.pass_times
372            .iter()
373            .max_by_key(|(_, d)| *d)
374            .map(|(n, d)| (n.as_str(), *d))
375    }
376}
377
378impl fmt::Display for ExecutionStats {
379    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
380        write!(
381            f,
382            "Frame {}: {:.2}ms, {} passes ({} skipped), {} barriers, {} allocs",
383            self.frame_index,
384            self.total_ms(),
385            self.active_passes,
386            self.skipped_passes,
387            self.barrier_count,
388            self.resource_allocation_count,
389        )
390    }
391}
392
393// ---------------------------------------------------------------------------
394// Pass executor callback
395// ---------------------------------------------------------------------------
396
397/// Trait for pass execution callbacks. Each built-in pass implements this.
398pub trait PassExecutor {
399    /// Execute the pass, given its context.
400    fn execute(&self, ctx: &PassContext);
401
402    /// Name of the pass (for debugging).
403    fn name(&self) -> &str;
404}
405
406/// A boxed pass executor.
407pub type BoxedPassExecutor = Box<dyn PassExecutor>;
408
409/// A simple closure-based pass executor.
410pub struct FnPassExecutor {
411    name: String,
412    func: Box<dyn Fn(&PassContext)>,
413}
414
415impl FnPassExecutor {
416    pub fn new(name: &str, func: impl Fn(&PassContext) + 'static) -> Self {
417        Self {
418            name: name.to_string(),
419            func: Box::new(func),
420        }
421    }
422}
423
424impl PassExecutor for FnPassExecutor {
425    fn execute(&self, ctx: &PassContext) {
426        (self.func)(ctx);
427    }
428
429    fn name(&self) -> &str {
430        &self.name
431    }
432}
433
434// ---------------------------------------------------------------------------
435// Graph executor
436// ---------------------------------------------------------------------------
437
438/// The main execution engine. Walks sorted passes, inserts barriers, tracks
439/// timing, and manages resources.
440pub struct GraphExecutor {
441    /// Resource pool for automatic allocation.
442    pub resource_pool: ResourcePool,
443    /// Frame timeline for triple-buffered frame management.
444    pub timeline: FrameTimeline,
445    /// Registered pass executors.
446    executors: HashMap<String, BoxedPassExecutor>,
447    /// Backbuffer dimensions.
448    backbuffer_width: u32,
449    backbuffer_height: u32,
450    /// Delta time for current frame.
451    delta_time: f32,
452    /// Accumulated statistics for the last N frames.
453    stats_history: Vec<ExecutionStats>,
454    /// Maximum number of stats frames to keep.
455    max_stats_history: usize,
456    /// Current graph config (for hot-reload).
457    current_config: Option<GraphConfig>,
458    /// Config file path (for hot-reload watching).
459    config_path: Option<String>,
460    /// Last modification timestamp for config file.
461    last_config_modified: Option<Instant>,
462    /// Whether to collect detailed per-pass timing.
463    enable_timing: bool,
464}
465
466impl GraphExecutor {
467    pub fn new(backbuffer_width: u32, backbuffer_height: u32) -> Self {
468        Self {
469            resource_pool: ResourcePool::new(),
470            timeline: FrameTimeline::triple_buffered(),
471            executors: HashMap::new(),
472            backbuffer_width,
473            backbuffer_height,
474            delta_time: 0.016,
475            stats_history: Vec::new(),
476            max_stats_history: 120,
477            current_config: None,
478            config_path: None,
479            last_config_modified: None,
480            enable_timing: true,
481        }
482    }
483
484    pub fn with_timing(mut self, enable: bool) -> Self {
485        self.enable_timing = enable;
486        self
487    }
488
489    pub fn with_max_stats_history(mut self, n: usize) -> Self {
490        self.max_stats_history = n;
491        self
492    }
493
494    /// Register a pass executor.
495    pub fn register_executor(&mut self, name: &str, executor: BoxedPassExecutor) {
496        self.executors.insert(name.to_string(), executor);
497    }
498
499    /// Register a closure-based executor.
500    pub fn register_fn(
501        &mut self,
502        name: &str,
503        func: impl Fn(&PassContext) + 'static,
504    ) {
505        self.executors.insert(
506            name.to_string(),
507            Box::new(FnPassExecutor::new(name, func)),
508        );
509    }
510
511    /// Set backbuffer dimensions (e.g., on window resize).
512    pub fn resize(&mut self, width: u32, height: u32) {
513        self.backbuffer_width = width;
514        self.backbuffer_height = height;
515    }
516
517    /// Set delta time for the current frame.
518    pub fn set_delta_time(&mut self, dt: f32) {
519        self.delta_time = dt;
520    }
521
522    // -- Barrier insertion ------------------------------------------------
523
524    /// Compute barriers needed between passes based on their resource
525    /// dependencies and queue types.
526    fn compute_barriers(&self, graph: &RenderGraph, sorted: &[String]) -> Vec<PassBarrier> {
527        let mut barriers = Vec::new();
528        let edges = graph.edges();
529
530        for edge in edges {
531            // Find the indices of from/to in sorted order
532            let from_idx = sorted.iter().position(|n| n == &edge.from_pass);
533            let to_idx = sorted.iter().position(|n| n == &edge.to_pass);
534            if from_idx.is_none() || to_idx.is_none() {
535                continue;
536            }
537
538            let from_pass = graph.get_pass(&edge.from_pass);
539            let to_pass = graph.get_pass(&edge.to_pass);
540            if from_pass.is_none() || to_pass.is_none() {
541                continue;
542            }
543            let from_pass = from_pass.unwrap();
544            let to_pass = to_pass.unwrap();
545
546            let kind = match (from_pass.pass_type, to_pass.pass_type, edge.kind) {
547                (PassType::Compute, PassType::Graphics, _) => BarrierKind::ComputeToRender,
548                (PassType::Compute, PassType::Compute, _) => BarrierKind::ComputeToCompute,
549                (PassType::Graphics, PassType::Graphics, DependencyKind::ReadAfterWrite) => {
550                    BarrierKind::RenderToShaderRead
551                }
552                (PassType::Graphics, PassType::Graphics, _) => BarrierKind::RenderToRender,
553                (PassType::Transfer, _, _) => BarrierKind::TransferToRead,
554                _ => BarrierKind::FullPipeline,
555            };
556
557            barriers.push(PassBarrier {
558                before_pass: edge.from_pass.clone(),
559                after_pass: edge.to_pass.clone(),
560                resource_name: edge.resource.clone(),
561                kind,
562            });
563        }
564
565        barriers
566    }
567
568    // -- Main execution ---------------------------------------------------
569
570    /// Execute one frame of the render graph.
571    pub fn execute_frame(&mut self, graph: &mut RenderGraph) -> Result<ExecutionStats, String> {
572        let frame_start = Instant::now();
573
574        // Begin frame
575        let frame_index = self.timeline.begin_frame();
576        self.resource_pool.begin_frame();
577
578        let mut stats = ExecutionStats::new(frame_index);
579
580        // Topological sort
581        let sorted = graph
582            .topological_sort()
583            .map_err(|cycle| format!("Cycle detected: {:?}", cycle))?;
584
585        // Filter to active passes
586        let active_passes = graph.active_passes().unwrap_or_default();
587        let skipped = sorted.len() - active_passes.len();
588        stats.skipped_passes = skipped;
589        stats.active_passes = active_passes.len();
590
591        // Compute barriers
592        let barriers = self.compute_barriers(graph, &active_passes);
593        stats.barrier_count = barriers.len();
594
595        // Store barriers in frame state
596        self.timeline.current_frame_mut().barriers = barriers.clone();
597
598        // Acquire resources
599        let mut allocated = 0usize;
600        for entry in graph.resource_table.entries() {
601            let _handle = self.resource_pool.acquire(
602                entry.descriptor.clone(),
603                entry.lifetime,
604                self.backbuffer_width,
605                self.backbuffer_height,
606            );
607            allocated += 1;
608        }
609        stats.resource_allocation_count = allocated;
610
611        // Record resource read/write for lifetime tracking
612        for (pass_idx, pass_name) in active_passes.iter().enumerate() {
613            if let Some(pass) = graph.get_pass(pass_name) {
614                for &h in &pass.outputs {
615                    self.resource_pool.record_write(h, pass_idx, pass_name);
616                }
617                for &h in &pass.inputs {
618                    self.resource_pool.record_read(h, pass_idx, pass_name);
619                }
620            }
621        }
622
623        // Execute each pass
624        let mut barrier_idx = 0;
625        for (pass_idx, pass_name) in active_passes.iter().enumerate() {
626            // Issue barriers that precede this pass
627            while barrier_idx < barriers.len() && barriers[barrier_idx].after_pass == *pass_name {
628                // In a real implementation, this would call the GPU API
629                barrier_idx += 1;
630            }
631
632            let pass_start = Instant::now();
633
634            let pass = graph.get_pass(pass_name).unwrap();
635
636            // Build pass context
637            let (rw, rh) = {
638                let w = (self.backbuffer_width as f32 * pass.resolution.width_scale) as u32;
639                let h = (self.backbuffer_height as f32 * pass.resolution.height_scale) as u32;
640                (w.max(1), h.max(1))
641            };
642
643            let ctx = PassContext {
644                pass_name: pass_name.clone(),
645                pass_index: pass_idx,
646                frame_index,
647                backbuffer_width: self.backbuffer_width,
648                backbuffer_height: self.backbuffer_height,
649                delta_time: self.delta_time,
650                inputs: pass
651                    .inputs
652                    .iter()
653                    .zip(pass.input_names.iter())
654                    .map(|(&h, n)| (h, n.clone()))
655                    .collect(),
656                outputs: pass
657                    .outputs
658                    .iter()
659                    .zip(pass.output_names.iter())
660                    .map(|(&h, n)| (h, n.clone()))
661                    .collect(),
662                render_width: rw,
663                render_height: rh,
664            };
665
666            // Execute
667            if let Some(executor) = self.executors.get(pass_name) {
668                executor.execute(&ctx);
669            }
670
671            let pass_elapsed = pass_start.elapsed();
672            if self.enable_timing {
673                stats.pass_times.push((pass_name.clone(), pass_elapsed));
674
675                self.timeline
676                    .current_frame_mut()
677                    .pass_timings
678                    .push(PassTimingQuery {
679                        pass_name: pass_name.clone(),
680                        cpu_time: pass_elapsed,
681                        gpu_time_estimate: pass_elapsed, // simulated
682                        start_offset: pass_start.duration_since(frame_start),
683                    });
684            }
685
686            // Count async compute passes
687            if pass.is_async_compute_candidate() {
688                stats.async_compute_passes += 1;
689            }
690        }
691
692        // Compute aliasing
693        self.resource_pool
694            .compute_aliasing(active_passes.len());
695
696        // Memory budget
697        let budget = self
698            .resource_pool
699            .estimate_memory_budget(self.backbuffer_width, self.backbuffer_height);
700        stats.memory_budget = Some(budget);
701
702        // End frame
703        let pool_stats = self.resource_pool.end_frame();
704        stats.resource_allocation_count = pool_stats.active_resources;
705
706        let total_elapsed = frame_start.elapsed();
707        stats.total_cpu_time = total_elapsed;
708        self.timeline.current_frame_mut().total_cpu_time = total_elapsed;
709        self.timeline.current_frame_mut().resource_allocations = pool_stats.active_resources;
710
711        // Submit frame
712        self.timeline.submit_frame();
713
714        // Store stats
715        self.stats_history.push(stats.clone());
716        if self.stats_history.len() > self.max_stats_history {
717            self.stats_history.remove(0);
718        }
719
720        Ok(stats)
721    }
722
723    // -- Hot-reload -------------------------------------------------------
724
725    /// Set a graph config for hot-reload support.
726    pub fn set_config(&mut self, config: GraphConfig) {
727        self.current_config = Some(config);
728        self.last_config_modified = Some(Instant::now());
729    }
730
731    /// Set the path to watch for config changes.
732    pub fn set_config_path(&mut self, path: &str) {
733        self.config_path = Some(path.to_string());
734    }
735
736    /// Rebuild the graph from the current config (hot-reload).
737    pub fn rebuild_from_config(&mut self) -> Option<RenderGraph> {
738        self.current_config.as_ref().map(|config| {
739            self.last_config_modified = Some(Instant::now());
740            config.build()
741        })
742    }
743
744    /// Check if config has been modified and rebuild if needed.
745    /// In a real implementation, this would watch the filesystem.
746    pub fn check_hot_reload(&mut self) -> Option<RenderGraph> {
747        if let Some(ref _path) = self.config_path {
748            // In production: check file modification time against last_config_modified.
749            // For now, this is a no-op; call rebuild_from_config() explicitly.
750        }
751        None
752    }
753
754    // -- Statistics access -------------------------------------------------
755
756    pub fn stats_history(&self) -> &[ExecutionStats] {
757        &self.stats_history
758    }
759
760    pub fn last_stats(&self) -> Option<&ExecutionStats> {
761        self.stats_history.last()
762    }
763
764    /// Average frame time over the last N frames.
765    pub fn average_frame_time(&self, n: usize) -> Duration {
766        let count = self.stats_history.len().min(n);
767        if count == 0 {
768            return Duration::ZERO;
769        }
770        let total: Duration = self.stats_history[self.stats_history.len() - count..]
771            .iter()
772            .map(|s| s.total_cpu_time)
773            .sum();
774        total / count as u32
775    }
776
777    /// Average barrier count over the last N frames.
778    pub fn average_barrier_count(&self, n: usize) -> f32 {
779        let count = self.stats_history.len().min(n);
780        if count == 0 {
781            return 0.0;
782        }
783        let total: usize = self.stats_history[self.stats_history.len() - count..]
784            .iter()
785            .map(|s| s.barrier_count)
786            .sum();
787        total as f32 / count as f32
788    }
789
790    pub fn backbuffer_size(&self) -> (u32, u32) {
791        (self.backbuffer_width, self.backbuffer_height)
792    }
793
794    /// Generate a text report of the last frame's execution.
795    pub fn frame_report(&self) -> String {
796        let mut report = String::new();
797        if let Some(stats) = self.last_stats() {
798            report.push_str(&format!("=== Frame {} Report ===\n", stats.frame_index));
799            report.push_str(&format!(
800                "Total CPU time: {:.3}ms\n",
801                stats.total_ms()
802            ));
803            report.push_str(&format!(
804                "Active passes: {} ({} skipped)\n",
805                stats.active_passes, stats.skipped_passes
806            ));
807            report.push_str(&format!("Barriers: {}\n", stats.barrier_count));
808            report.push_str(&format!(
809                "Resource allocations: {}\n",
810                stats.resource_allocation_count
811            ));
812            report.push_str(&format!(
813                "Async compute passes: {}\n",
814                stats.async_compute_passes
815            ));
816            if let Some(ref budget) = stats.memory_budget {
817                report.push_str(&format!("Memory: {}\n", budget));
818            }
819            report.push_str("\nPer-pass timing:\n");
820            for (name, dur) in &stats.pass_times {
821                report.push_str(&format!(
822                    "  {}: {:.3}ms\n",
823                    name,
824                    dur.as_secs_f64() * 1000.0
825                ));
826            }
827            if let Some((name, dur)) = stats.slowest_pass() {
828                report.push_str(&format!(
829                    "\nSlowest pass: {} ({:.3}ms)\n",
830                    name,
831                    dur.as_secs_f64() * 1000.0
832                ));
833            }
834        } else {
835            report.push_str("No frame data available.\n");
836        }
837        report
838    }
839}
840
841// ---------------------------------------------------------------------------
842// Multi-graph executor
843// ---------------------------------------------------------------------------
844
845/// Executes multiple render graphs in sequence (e.g., main scene + UI overlay).
846pub struct MultiGraphExecutor {
847    executor: GraphExecutor,
848    graphs: Vec<(String, RenderGraph)>,
849}
850
851impl MultiGraphExecutor {
852    pub fn new(executor: GraphExecutor) -> Self {
853        Self {
854            executor,
855            graphs: Vec::new(),
856        }
857    }
858
859    pub fn add_graph(&mut self, name: &str, graph: RenderGraph) {
860        self.graphs.push((name.to_string(), graph));
861    }
862
863    pub fn remove_graph(&mut self, name: &str) {
864        self.graphs.retain(|(n, _)| n != name);
865    }
866
867    pub fn execute_all(&mut self) -> Vec<Result<ExecutionStats, String>> {
868        let mut results = Vec::new();
869        // We need to iterate mutably, which requires indexing
870        for i in 0..self.graphs.len() {
871            let result = self.executor.execute_frame(&mut self.graphs[i].1);
872            results.push(result);
873        }
874        results
875    }
876
877    pub fn executor(&self) -> &GraphExecutor {
878        &self.executor
879    }
880
881    pub fn executor_mut(&mut self) -> &mut GraphExecutor {
882        &mut self.executor
883    }
884
885    pub fn graph(&self, name: &str) -> Option<&RenderGraph> {
886        self.graphs.iter().find(|(n, _)| n == name).map(|(_, g)| g)
887    }
888
889    pub fn graph_mut(&mut self, name: &str) -> Option<&mut RenderGraph> {
890        self.graphs
891            .iter_mut()
892            .find(|(n, _)| n == name)
893            .map(|(_, g)| g)
894    }
895}
896
897// ---------------------------------------------------------------------------
898// Frame pacing
899// ---------------------------------------------------------------------------
900
901/// Simple frame pacing utility to target a specific framerate.
902pub struct FramePacer {
903    target_frame_time: Duration,
904    last_frame_start: Instant,
905    frame_times: Vec<Duration>,
906    max_samples: usize,
907}
908
909impl FramePacer {
910    pub fn new(target_fps: f64) -> Self {
911        Self {
912            target_frame_time: Duration::from_secs_f64(1.0 / target_fps),
913            last_frame_start: Instant::now(),
914            frame_times: Vec::new(),
915            max_samples: 120,
916        }
917    }
918
919    /// Call at the start of each frame. Returns delta time.
920    pub fn begin_frame(&mut self) -> f32 {
921        let now = Instant::now();
922        let dt = now.duration_since(self.last_frame_start);
923        self.last_frame_start = now;
924        self.frame_times.push(dt);
925        if self.frame_times.len() > self.max_samples {
926            self.frame_times.remove(0);
927        }
928        dt.as_secs_f32()
929    }
930
931    /// Call at the end of each frame. Sleeps if needed to hit target FPS.
932    pub fn end_frame(&self) {
933        let elapsed = self.last_frame_start.elapsed();
934        if elapsed < self.target_frame_time {
935            let remaining = self.target_frame_time - elapsed;
936            std::thread::sleep(remaining);
937        }
938    }
939
940    /// Average FPS over recent frames.
941    pub fn average_fps(&self) -> f64 {
942        if self.frame_times.is_empty() {
943            return 0.0;
944        }
945        let total: Duration = self.frame_times.iter().sum();
946        let avg = total / self.frame_times.len() as u32;
947        if avg.as_secs_f64() > 0.0 {
948            1.0 / avg.as_secs_f64()
949        } else {
950            0.0
951        }
952    }
953
954    /// 1% low frame time.
955    pub fn percentile_1_low(&self) -> Duration {
956        if self.frame_times.is_empty() {
957            return Duration::ZERO;
958        }
959        let mut sorted = self.frame_times.clone();
960        sorted.sort();
961        let idx = (sorted.len() as f64 * 0.99) as usize;
962        sorted[idx.min(sorted.len() - 1)]
963    }
964
965    pub fn set_target_fps(&mut self, fps: f64) {
966        self.target_frame_time = Duration::from_secs_f64(1.0 / fps);
967    }
968}
969
970// ---------------------------------------------------------------------------
971// Tests
972// ---------------------------------------------------------------------------
973
974#[cfg(test)]
975mod tests {
976    use super::*;
977    use crate::rendergraph::graph::{PassCondition, RenderGraphBuilder, ResolutionScale};
978
979    fn test_graph() -> RenderGraph {
980        let mut b = RenderGraphBuilder::new("test_exec", 1920, 1080);
981        let depth = b.texture("depth", TextureFormat::Depth32Float);
982        let color = b.texture("color", TextureFormat::Rgba16Float);
983        let final_rt = b.texture("final", TextureFormat::Rgba8Unorm);
984
985        b.graphics_pass("depth_pre")
986            .writes(depth, "depth")
987            .finish();
988
989        b.graphics_pass("lighting")
990            .reads(depth, "depth")
991            .writes(color, "color")
992            .finish();
993
994        b.graphics_pass("tonemap")
995            .reads(color, "color")
996            .writes(final_rt, "final")
997            .side_effects()
998            .finish();
999
1000        b.build()
1001    }
1002
1003    #[test]
1004    fn test_execute_frame() {
1005        let mut graph = test_graph();
1006        let mut executor = GraphExecutor::new(1920, 1080);
1007        let stats = executor.execute_frame(&mut graph).unwrap();
1008        assert_eq!(stats.active_passes, 3);
1009        assert!(stats.barrier_count > 0);
1010    }
1011
1012    #[test]
1013    fn test_frame_timeline() {
1014        let mut tl = FrameTimeline::triple_buffered();
1015        assert_eq!(tl.max_frames_in_flight(), 3);
1016
1017        let f1 = tl.begin_frame();
1018        assert_eq!(f1, 1);
1019        tl.submit_frame();
1020        assert_eq!(tl.frames_in_flight(), 1);
1021
1022        let f2 = tl.begin_frame();
1023        assert_eq!(f2, 2);
1024        tl.submit_frame();
1025        assert_eq!(tl.frames_in_flight(), 2);
1026
1027        tl.complete_frame(1);
1028        assert_eq!(tl.frames_in_flight(), 1);
1029    }
1030
1031    #[test]
1032    fn test_barrier_computation() {
1033        let mut graph = test_graph();
1034        let _ = graph.topological_sort().unwrap();
1035        let executor = GraphExecutor::new(1920, 1080);
1036        let sorted = vec![
1037            "depth_pre".to_string(),
1038            "lighting".to_string(),
1039            "tonemap".to_string(),
1040        ];
1041        let barriers = executor.compute_barriers(&graph, &sorted);
1042        assert!(barriers.len() >= 2); // depth->lighting, color->tonemap
1043    }
1044
1045    #[test]
1046    fn test_async_compute_schedule() {
1047        let mut b = RenderGraphBuilder::new("async_test", 1920, 1080);
1048        let depth = b.texture("depth", TextureFormat::Depth32Float);
1049        let ssao = b.texture("ssao", TextureFormat::R16Float);
1050        let color = b.texture("color", TextureFormat::Rgba16Float);
1051
1052        b.graphics_pass("depth_pre")
1053            .writes(depth, "depth")
1054            .finish();
1055
1056        b.compute_pass("ssao")
1057            .reads(depth, "depth")
1058            .writes(ssao, "ssao")
1059            .queue(QueueAffinity::Compute)
1060            .finish();
1061
1062        b.graphics_pass("lighting")
1063            .reads(depth, "depth")
1064            .reads(ssao, "ssao")
1065            .writes(color, "color")
1066            .finish();
1067
1068        let mut graph = b.build();
1069        let schedule = AsyncComputeSchedule::from_graph(&mut graph).unwrap();
1070        assert_eq!(schedule.compute_passes.len(), 1);
1071        assert!(schedule.compute_passes.contains(&"ssao".to_string()));
1072    }
1073
1074    #[test]
1075    fn test_frame_pacer() {
1076        let mut pacer = FramePacer::new(60.0);
1077        let dt = pacer.begin_frame();
1078        assert!(dt >= 0.0);
1079    }
1080
1081    #[test]
1082    fn test_executor_with_custom_fn() {
1083        let mut graph = test_graph();
1084        let mut executor = GraphExecutor::new(1920, 1080);
1085
1086        executor.register_fn("depth_pre", |ctx| {
1087            assert_eq!(ctx.pass_name, "depth_pre");
1088        });
1089
1090        let stats = executor.execute_frame(&mut graph).unwrap();
1091        assert_eq!(stats.active_passes, 3);
1092    }
1093
1094    #[test]
1095    fn test_hot_reload() {
1096        use crate::rendergraph::graph::{GraphConfig, PassConfig, ResourceConfig};
1097        use crate::rendergraph::resources::SizePolicy;
1098        let config = GraphConfig {
1099            label: "hot_reload".to_string(),
1100            resources: vec![ResourceConfig {
1101                name: "color".to_string(),
1102                format: TextureFormat::Rgba16Float,
1103                size: SizePolicy::Relative {
1104                    width_scale: 1.0,
1105                    height_scale: 1.0,
1106                },
1107                imported: false,
1108            }],
1109            passes: vec![PassConfig {
1110                name: "lighting".to_string(),
1111                pass_type: PassType::Graphics,
1112                inputs: vec![],
1113                outputs: vec!["color".to_string()],
1114                condition: None,
1115                resolution_scale: None,
1116                queue: QueueAffinity::Graphics,
1117                explicit_deps: vec![],
1118            }],
1119            features: vec![],
1120        };
1121
1122        let mut executor = GraphExecutor::new(1920, 1080);
1123        executor.set_config(config);
1124        let graph = executor.rebuild_from_config();
1125        assert!(graph.is_some());
1126        let mut g = graph.unwrap();
1127        let sorted = g.topological_sort().unwrap();
1128        assert_eq!(sorted, vec!["lighting"]);
1129    }
1130
1131    #[test]
1132    fn test_multi_graph_executor() {
1133        let executor = GraphExecutor::new(1920, 1080);
1134        let mut multi = MultiGraphExecutor::new(executor);
1135        multi.add_graph("main", test_graph());
1136        let results = multi.execute_all();
1137        assert_eq!(results.len(), 1);
1138        assert!(results[0].is_ok());
1139    }
1140
1141    #[test]
1142    fn test_frame_report() {
1143        let mut graph = test_graph();
1144        let mut executor = GraphExecutor::new(1920, 1080);
1145        let _stats = executor.execute_frame(&mut graph).unwrap();
1146        let report = executor.frame_report();
1147        assert!(report.contains("Frame"));
1148        assert!(report.contains("Total CPU time"));
1149    }
1150}