Skip to main content

oxiphysics_gpu/pipeline/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5#[allow(unused_imports)]
6use super::functions::*;
7use std::time::Instant;
8
9/// Schedules compute dispatches to maximise GPU utilisation through overlapping.
10///
11/// Classifies dispatches as either "latency-critical" (must finish before the
12/// next frame) or "background" (can run async on a secondary queue).
13#[derive(Debug)]
14#[allow(dead_code)]
15pub struct ComputeOverlapScheduler {
16    /// Number of latency-critical dispatches submitted this frame.
17    pub critical_count: usize,
18    /// Number of background dispatches submitted this frame.
19    pub background_count: usize,
20    pub(super) recorder: MultiQueueRecorder,
21}
22impl ComputeOverlapScheduler {
23    /// Create a new scheduler.
24    pub fn new() -> Self {
25        Self {
26            critical_count: 0,
27            background_count: 0,
28            recorder: MultiQueueRecorder::new(),
29        }
30    }
31    /// Submit a latency-critical dispatch (→ main queue).
32    pub fn submit_critical(&mut self, batch: DispatchBatch) {
33        self.critical_count += 1;
34        self.recorder.submit(batch, QueueType::Main);
35    }
36    /// Submit a background dispatch (→ async compute queue).
37    pub fn submit_background(&mut self, batch: DispatchBatch) {
38        self.background_count += 1;
39        self.recorder.submit(batch, QueueType::AsyncCompute);
40    }
41    /// Execute all pending work and reset frame counters.
42    pub fn end_frame(&mut self) -> usize {
43        let n = self.recorder.flush_all();
44        self.critical_count = 0;
45        self.background_count = 0;
46        n
47    }
48    /// Returns `true` if there is any pending work.
49    pub fn has_pending(&self) -> bool {
50        self.recorder.pending_total() > 0
51    }
52}
53/// Configuration for the high-level physics pipeline.
54#[derive(Debug, Clone)]
55pub struct PipelineConfig {
56    /// Which stages are active.  Stages not in this list are skipped.
57    pub enabled_stages: Vec<PipelineStage>,
58    /// Number of sub-steps per call to [`PhysicsPipeline::step`].
59    pub substeps: u32,
60    /// Whether a GPU backend should be preferred over the CPU fallback.
61    pub use_gpu: bool,
62}
63impl PipelineConfig {
64    /// Default config: all stages enabled, 1 substep, CPU backend.
65    pub fn new() -> Self {
66        Self {
67            enabled_stages: PipelineStage::all_in_order().to_vec(),
68            substeps: 1,
69            use_gpu: false,
70        }
71    }
72    /// Returns `true` if `stage` is in the enabled list.
73    pub fn is_enabled(&self, stage: PipelineStage) -> bool {
74        self.enabled_stages.contains(&stage)
75    }
76}
77/// Orchestrates a complete simulation step through all enabled pipeline stages.
78///
79/// The stage execution order is guaranteed:
80/// `BroadPhase → NarrowPhase → ConstraintSolve → Integration → PostProcess`
81pub struct PhysicsPipeline {
82    /// Pipeline configuration.
83    pub config: PipelineConfig,
84    /// Cumulative stats across all steps since creation.
85    pub stats: PipelineStats,
86}
87impl PhysicsPipeline {
88    /// Create a new pipeline with the given configuration.
89    pub fn new(config: PipelineConfig) -> Self {
90        Self {
91            config,
92            stats: PipelineStats::default(),
93        }
94    }
95    /// Run one full simulation step (all enabled stages, for `substeps`
96    /// iterations) over `world_state` with time step `dt`.
97    ///
98    /// Returns the [`PipelineStats`] for this step.
99    pub fn step(&mut self, world_state: &mut WorldState, dt: f64) -> PipelineStats {
100        let step_start = Instant::now();
101        let mut step_stats = PipelineStats::default();
102        let sub_dt = if self.config.substeps > 0 {
103            dt / self.config.substeps as f64
104        } else {
105            dt
106        };
107        for _ in 0..self.config.substeps.max(1) {
108            let sub_stats = self.run_stages(world_state, sub_dt);
109            step_stats.accumulate(&sub_stats);
110        }
111        step_stats.total_time_ms = step_start.elapsed().as_secs_f64() * 1000.0;
112        self.stats.accumulate(&step_stats);
113        step_stats
114    }
115    /// Run all enabled stages once for a single sub-step.
116    fn run_stages(&self, world_state: &mut WorldState, dt: f64) -> PipelineStats {
117        let mut stats = PipelineStats::default();
118        for stage in PipelineStage::all_in_order() {
119            if !self.config.is_enabled(stage) {
120                continue;
121            }
122            let mut timer = StageTimer::start();
123            match stage {
124                PipelineStage::BroadPhase => {
125                    let pairs = run_broadphase(world_state);
126                    stats.collision_pairs += pairs as u32;
127                }
128                PipelineStage::NarrowPhase => {}
129                PipelineStage::ConstraintSolve => {
130                    let solved = run_constraint_solve(world_state);
131                    stats.solved_constraints += solved as u32;
132                }
133                PipelineStage::Integration => {
134                    run_integration(world_state, dt);
135                }
136                PipelineStage::PostProcess => {
137                    run_postprocess(world_state);
138                }
139            }
140            timer.stop();
141            match stage {
142                PipelineStage::BroadPhase => stats.broadphase_ms += timer.elapsed_ms,
143                PipelineStage::NarrowPhase => stats.narrowphase_ms += timer.elapsed_ms,
144                PipelineStage::ConstraintSolve => stats.constraint_ms += timer.elapsed_ms,
145                PipelineStage::Integration => stats.integration_ms += timer.elapsed_ms,
146                PipelineStage::PostProcess => stats.postprocess_ms += timer.elapsed_ms,
147            }
148        }
149        stats
150    }
151}
152/// GPU compute pipeline descriptor (wgpu-agnostic, usable in unit tests).
153#[derive(Debug, Clone)]
154pub struct ComputePipeline {
155    /// Human-readable label for debugging.
156    pub label: String,
157    /// Raw WGSL shader source.
158    pub shader_source: String,
159    /// Name of the entry-point function inside the shader.
160    pub entry_point: String,
161    /// Workgroup size declared in the shader `[X, Y, Z]`.
162    pub workgroup_size: [u32; 3],
163}
164impl ComputePipeline {
165    /// Create a new pipeline descriptor.
166    ///
167    /// `workgroup_size` defaults to `[64, 1, 1]` (matching the shaders in
168    /// [`crate::shaders`]).
169    pub fn new(label: &str, shader: &str, entry_point: &str) -> Self {
170        Self {
171            label: label.to_owned(),
172            shader_source: shader.to_owned(),
173            entry_point: entry_point.to_owned(),
174            workgroup_size: [64, 1, 1],
175        }
176    }
177    /// Compute the dispatch grid dimensions needed to cover `n_items` work
178    /// items.
179    ///
180    /// Returns `[ceil(n_items / workgroup_size[0\]), 1, 1]`.
181    pub fn workgroups_needed(&self, n_items: u32) -> [u32; 3] {
182        let x = n_items.div_ceil(self.workgroup_size[0]);
183        [x, 1, 1]
184    }
185}
186/// Describes a memory/execution barrier between two pipeline stages.
187///
188/// On a real GPU this would translate to a `vkCmdPipelineBarrier` or
189/// `wgpu::CommandEncoder::insert_debug_marker`.  Here it is a pure data
190/// type used to validate that the user has not forgotten to insert barriers
191/// between dependent passes.
192#[derive(Debug, Clone, PartialEq)]
193pub struct ResourceBarrier {
194    /// The stage that *writes* the resource.
195    pub src_stage: PipelineStage,
196    /// The stage that *reads* the resource.
197    pub dst_stage: PipelineStage,
198    /// Human-readable name of the protected resource.
199    pub resource_name: String,
200}
201impl ResourceBarrier {
202    /// Create a new barrier descriptor.
203    pub fn new(src: PipelineStage, dst: PipelineStage, name: &str) -> Self {
204        Self {
205            src_stage: src,
206            dst_stage: dst,
207            resource_name: name.to_owned(),
208        }
209    }
210    /// Returns `true` if the barrier is between stages in the correct
211    /// dependency order (src must come before dst).
212    pub fn is_valid_order(&self) -> bool {
213        self.src_stage < self.dst_stage
214    }
215}
216/// An opaque handle to a pool-allocated GPU resource.
217///
218/// Stores the `(offset, size)` pair returned by [`GpuMemoryPool::alloc`] so
219/// that the caller can later free the resource.
220#[derive(Debug, Clone, Copy, PartialEq, Eq)]
221pub struct ResourceHandle {
222    /// Offset into the pool's backing store.
223    pub offset: usize,
224    /// Size of the allocation in `f32` elements.
225    pub size: usize,
226}
227impl ResourceHandle {
228    /// Create a handle from a raw `(offset, size)` pair.
229    pub fn from_alloc(alloc: (usize, usize)) -> Self {
230        Self {
231            offset: alloc.0,
232            size: alloc.1,
233        }
234    }
235}
236/// A simple frame-graph pass descriptor.
237///
238/// Frame graphs describe the rendering / compute workload as a DAG of passes,
239/// enabling automatic barrier insertion and resource aliasing.
240#[derive(Debug, Clone)]
241pub struct FrameGraphPass {
242    /// Unique name for this pass.
243    pub name: String,
244    /// Names of resources this pass reads.
245    pub reads: Vec<String>,
246    /// Names of resources this pass writes.
247    pub writes: Vec<String>,
248    /// Names of passes that must complete before this one.
249    pub dependencies: Vec<String>,
250    /// Which queue this pass runs on.
251    pub queue: QueueType,
252}
253impl FrameGraphPass {
254    /// Create a new pass with no dependencies.
255    pub fn new(name: impl Into<String>, queue: QueueType) -> Self {
256        Self {
257            name: name.into(),
258            reads: Vec::new(),
259            writes: Vec::new(),
260            dependencies: Vec::new(),
261            queue,
262        }
263    }
264    /// Add a resource read.
265    pub fn reads(mut self, resource: impl Into<String>) -> Self {
266        self.reads.push(resource.into());
267        self
268    }
269    /// Add a resource write.
270    pub fn writes(mut self, resource: impl Into<String>) -> Self {
271        self.writes.push(resource.into());
272        self
273    }
274    /// Add a pass dependency.
275    pub fn depends_on(mut self, pass: impl Into<String>) -> Self {
276        self.dependencies.push(pass.into());
277        self
278    }
279}
280/// A minimal frame graph that can validate pass dependencies.
281#[derive(Debug, Default)]
282pub struct FrameGraph {
283    pub(super) passes: Vec<FrameGraphPass>,
284}
285impl FrameGraph {
286    /// Create an empty frame graph.
287    pub fn new() -> Self {
288        Self::default()
289    }
290    /// Add a pass.
291    pub fn add_pass(&mut self, pass: FrameGraphPass) {
292        self.passes.push(pass);
293    }
294    /// Return all pass names in insertion order.
295    pub fn pass_names(&self) -> Vec<&str> {
296        self.passes.iter().map(|p| p.name.as_str()).collect()
297    }
298    /// Return all passes that write to `resource`.
299    pub fn writers_of(&self, resource: &str) -> Vec<&FrameGraphPass> {
300        self.passes
301            .iter()
302            .filter(|p| p.writes.iter().any(|w| w == resource))
303            .collect()
304    }
305    /// Return all passes that read from `resource`.
306    pub fn readers_of(&self, resource: &str) -> Vec<&FrameGraphPass> {
307        self.passes
308            .iter()
309            .filter(|p| p.reads.iter().any(|r| r == resource))
310            .collect()
311    }
312    /// Validate that all declared dependencies reference existing passes.
313    /// Returns a list of invalid dependency references.
314    pub fn validate_dependencies(&self) -> Vec<String> {
315        let names: std::collections::HashSet<&str> =
316            self.passes.iter().map(|p| p.name.as_str()).collect();
317        let mut errors = Vec::new();
318        for pass in &self.passes {
319            for dep in &pass.dependencies {
320                if !names.contains(dep.as_str()) {
321                    errors.push(format!("{}: unknown dependency '{}'", pass.name, dep));
322                }
323            }
324        }
325        errors
326    }
327    /// Count how many passes run on the async compute queue.
328    pub fn async_pass_count(&self) -> usize {
329        self.passes
330            .iter()
331            .filter(|p| p.queue == QueueType::AsyncCompute)
332            .count()
333    }
334}
335/// A distinct stage in the physics simulation pipeline.
336///
337/// Stages run in dependency order:
338/// `BroadPhase → NarrowPhase → ConstraintSolve → Integration → PostProcess`
339#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
340pub enum PipelineStage {
341    /// Broad-phase collision detection (AABB / BVH).
342    BroadPhase,
343    /// Narrow-phase contact generation.
344    NarrowPhase,
345    /// Constraint / impulse solver.
346    ConstraintSolve,
347    /// Velocity and position integration.
348    Integration,
349    /// Post-processing (sleeping, user callbacks, …).
350    PostProcess,
351}
352impl PipelineStage {
353    /// Ordered list of all stages, from first to last.
354    pub fn all_in_order() -> [PipelineStage; 5] {
355        [
356            PipelineStage::BroadPhase,
357            PipelineStage::NarrowPhase,
358            PipelineStage::ConstraintSolve,
359            PipelineStage::Integration,
360            PipelineStage::PostProcess,
361        ]
362    }
363}
364/// A single compute dispatch pass: one pipeline plus its bound buffers.
365#[derive(Debug)]
366pub struct DispatchBatch {
367    /// The pipeline to dispatch.
368    pub pipeline: ComputePipeline,
369    /// Buffers bound to this pass, in binding-slot order.
370    pub bindings: Vec<CpuBuffer>,
371    /// The dispatch grid dimensions `[X, Y, Z]`.
372    pub dispatch_dims: [u32; 3],
373}
374impl DispatchBatch {
375    /// Create a new dispatch batch, computing the required workgroup grid from
376    /// `workitems`.
377    pub fn new(pipeline: ComputePipeline, workitems: u32) -> Self {
378        let dispatch_dims = pipeline.workgroups_needed(workitems);
379        Self {
380            pipeline,
381            bindings: Vec::new(),
382            dispatch_dims,
383        }
384    }
385    /// Append `buffer` to the binding list (next available slot).
386    pub fn bind(&mut self, buffer: CpuBuffer) {
387        self.bindings.push(buffer);
388    }
389}
390/// Models an async compute queue: a FIFO of [`DispatchBatch`]es that can be
391/// submitted independently of the graphics queue.
392///
393/// On a GPU this enables overlapping async compute with graphics work.
394/// Here it is a simple in-memory queue used to test scheduling logic.
395pub struct AsyncComputeQueue {
396    pub(super) queue: std::collections::VecDeque<DispatchBatch>,
397    /// Total number of batches ever enqueued (for profiling).
398    pub total_enqueued: usize,
399    /// Total number of batches ever drained (executed).
400    pub total_executed: usize,
401}
402impl AsyncComputeQueue {
403    /// Create an empty async compute queue.
404    pub fn new() -> Self {
405        Self {
406            queue: std::collections::VecDeque::new(),
407            total_enqueued: 0,
408            total_executed: 0,
409        }
410    }
411    /// Enqueue a dispatch batch.
412    pub fn submit(&mut self, batch: DispatchBatch) {
413        self.total_enqueued += 1;
414        self.queue.push_back(batch);
415    }
416    /// Execute (drain) all pending batches and return the count executed.
417    ///
418    /// In a real backend this would flush the command buffer to the GPU.
419    /// Here it simply clears the queue and updates counters.
420    pub fn flush(&mut self) -> usize {
421        let n = self.queue.len();
422        self.total_executed += n;
423        self.queue.clear();
424        n
425    }
426    /// Number of batches currently waiting in the queue.
427    pub fn pending(&self) -> usize {
428        self.queue.len()
429    }
430    /// Returns `true` when there are no pending batches.
431    pub fn is_idle(&self) -> bool {
432        self.queue.is_empty()
433    }
434}
435/// A dispatched batch annotated with its target queue.
436#[derive(Debug)]
437#[allow(dead_code)]
438pub struct MultiQueueBatch {
439    /// The dispatch batch.
440    pub batch: DispatchBatch,
441    /// Which queue to submit to.
442    pub queue: QueueType,
443    /// Semaphore/fence dependency: must wait for this frame's signal.
444    pub wait_frame: u64,
445}
446/// Minimal world state passed to [`PhysicsPipeline::step`].
447///
448/// Uses pure f64 arrays (no nalgebra) compatible with GPU buffer uploads.
449#[derive(Debug, Clone, Default)]
450pub struct WorldState {
451    /// Flat position array `[x0, y0, z0, x1, y1, z1, ...]`.
452    pub positions: Vec<f64>,
453    /// Flat velocity array `[vx0, vy0, vz0, ...]`.
454    pub velocities: Vec<f64>,
455    /// Inverse masses `[inv_m0, inv_m1, ...]`.
456    pub inverse_masses: Vec<f64>,
457}
458impl WorldState {
459    /// Number of bodies in the world state.
460    pub fn body_count(&self) -> usize {
461        self.inverse_masses.len()
462    }
463}
464/// Records per-stage timing samples and provides basic statistics.
465///
466/// Each call to [`PipelineProfiler::record`] appends one sample for the
467/// named stage.  [`PipelineProfiler::summary`] returns mean ± stddev.
468#[derive(Debug, Default)]
469pub struct PipelineProfiler {
470    pub(super) samples: std::collections::HashMap<String, Vec<f64>>,
471}
472impl PipelineProfiler {
473    /// Create a new, empty profiler.
474    pub fn new() -> Self {
475        Self::default()
476    }
477    /// Append a timing sample (milliseconds) for `stage_name`.
478    pub fn record(&mut self, stage_name: &str, ms: f64) {
479        self.samples
480            .entry(stage_name.to_owned())
481            .or_default()
482            .push(ms);
483    }
484    /// Returns `(mean_ms, stddev_ms, sample_count)` for `stage_name`, or
485    /// `None` if no samples have been recorded.
486    pub fn summary(&self, stage_name: &str) -> Option<(f64, f64, usize)> {
487        let v = self.samples.get(stage_name)?;
488        if v.is_empty() {
489            return None;
490        }
491        let n = v.len() as f64;
492        let mean = v.iter().sum::<f64>() / n;
493        let variance = v.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
494        Some((mean, variance.sqrt(), v.len()))
495    }
496    /// Return all stage names that have recorded samples.
497    pub fn stage_names(&self) -> Vec<&str> {
498        let mut names: Vec<&str> = self.samples.keys().map(String::as_str).collect();
499        names.sort_unstable();
500        names
501    }
502    /// Total number of samples across all stages.
503    pub fn total_samples(&self) -> usize {
504        self.samples.values().map(Vec::len).sum()
505    }
506    /// Clear all recorded samples.
507    pub fn reset(&mut self) {
508        self.samples.clear();
509    }
510}
511/// Models multiple compute queues (graphics queue + async compute queue).
512///
513/// On Vulkan/Metal/DX12 certain hardware can overlap work on the graphics
514/// queue and an async compute queue.  This mock records which queue each
515/// dispatch was submitted to.
516#[derive(Debug, Clone, PartialEq, Eq)]
517pub enum QueueType {
518    /// Main graphics/compute queue.
519    Main,
520    /// Async compute queue (can overlap with graphics).
521    AsyncCompute,
522    /// Transfer/copy queue.
523    Transfer,
524}
525/// Wraps an [`Instant`] and records the elapsed duration of a stage.
526pub struct StageTimer {
527    pub(super) start: Instant,
528    /// Elapsed time recorded by [`StageTimer::stop`].
529    pub elapsed_ms: f64,
530}
531impl StageTimer {
532    /// Create a new timer and start the clock.
533    pub fn start() -> Self {
534        Self {
535            start: Instant::now(),
536            elapsed_ms: 0.0,
537        }
538    }
539    /// Stop the timer and record elapsed time in milliseconds.
540    pub fn stop(&mut self) {
541        self.elapsed_ms = self.start.elapsed().as_secs_f64() * 1000.0;
542    }
543}
544/// Low-level GPU pipeline statistics for one dispatch (mock).
545///
546/// Mirrors the statistics provided by `VkQueryPool` with
547/// `VK_QUERY_TYPE_PIPELINE_STATISTICS` or Metal/D3D equivalents.
548#[derive(Debug, Clone, Default)]
549#[allow(dead_code)]
550pub struct PipelineStatistics {
551    /// Number of invocations of the compute shader.
552    pub cs_invocations: u64,
553    /// Number of work-groups dispatched.
554    pub workgroups_dispatched: u64,
555    /// Estimated FLOP count (mock value).
556    pub flops: u64,
557    /// Estimated bytes read from global memory.
558    pub bytes_read: u64,
559    /// Estimated bytes written to global memory.
560    pub bytes_written: u64,
561}
562impl PipelineStatistics {
563    /// Compute the arithmetic intensity (FLOPs per byte).
564    pub fn arithmetic_intensity(&self) -> f64 {
565        let bytes = self.bytes_read + self.bytes_written;
566        if bytes == 0 {
567            return 0.0;
568        }
569        self.flops as f64 / bytes as f64
570    }
571    /// Estimated memory bandwidth utilization as a fraction of `peak_bw_bytes_s`.
572    pub fn bandwidth_utilization(&self, peak_bw_bytes_s: f64, elapsed_s: f64) -> f64 {
573        if peak_bw_bytes_s <= 0.0 || elapsed_s <= 0.0 {
574            return 0.0;
575        }
576        let used = (self.bytes_read + self.bytes_written) as f64 / elapsed_s;
577        (used / peak_bw_bytes_s).min(1.0)
578    }
579}
580/// Multi-queue command recorder: tracks which batches go to which queue.
581#[derive(Debug, Default)]
582pub struct MultiQueueRecorder {
583    /// Pending batches on the main queue.
584    pub main_queue: Vec<DispatchBatch>,
585    /// Pending batches on the async compute queue.
586    pub async_queue: Vec<DispatchBatch>,
587    /// Pending batches on the transfer queue.
588    pub transfer_queue: Vec<DispatchBatch>,
589    /// Total batches ever recorded.
590    pub total_recorded: usize,
591}
592impl MultiQueueRecorder {
593    /// Create an empty recorder.
594    pub fn new() -> Self {
595        Self::default()
596    }
597    /// Submit a batch to the specified queue.
598    pub fn submit(&mut self, batch: DispatchBatch, queue: QueueType) {
599        self.total_recorded += 1;
600        match queue {
601            QueueType::Main => self.main_queue.push(batch),
602            QueueType::AsyncCompute => self.async_queue.push(batch),
603            QueueType::Transfer => self.transfer_queue.push(batch),
604        }
605    }
606    /// Flush all queues and return total batches executed.
607    pub fn flush_all(&mut self) -> usize {
608        let n = self.main_queue.len() + self.async_queue.len() + self.transfer_queue.len();
609        self.main_queue.clear();
610        self.async_queue.clear();
611        self.transfer_queue.clear();
612        n
613    }
614    /// Number of pending batches across all queues.
615    pub fn pending_total(&self) -> usize {
616        self.main_queue.len() + self.async_queue.len() + self.transfer_queue.len()
617    }
618}
619/// A simple slab-style GPU memory pool that sub-allocates [`CpuBuffer`]s from
620/// a fixed-capacity backing store.
621///
622/// The pool pre-allocates a large buffer and hands out non-overlapping slices
623/// (as [`CpuBuffer`] views backed by offsets).  On a real GPU this avoids per-
624/// allocation overhead from `vkAllocateMemory`.
625///
626/// This CPU-side mock tracks allocations as `(offset, size)` pairs and
627/// simulates fragmentation/free-list behaviour.
628#[derive(Debug)]
629pub struct GpuMemoryPool {
630    /// Total capacity of the pool in `f32` elements.
631    pub capacity: usize,
632    /// Currently allocated `f32` elements.
633    pub allocated: usize,
634    /// Free-list entries `(offset_in_elements, size_in_elements)`.
635    pub(super) free_list: Vec<(usize, usize)>,
636}
637impl GpuMemoryPool {
638    /// Create a pool with the given capacity in `f32` elements.
639    pub fn new(capacity: usize) -> Self {
640        Self {
641            capacity,
642            allocated: 0,
643            free_list: vec![(0, capacity)],
644        }
645    }
646    /// Attempt to allocate `size` `f32` elements from the pool.
647    ///
648    /// Returns `Some((offset, size))` on success, `None` if the pool is full.
649    /// Uses a first-fit strategy.
650    pub fn alloc(&mut self, size: usize) -> Option<(usize, usize)> {
651        for i in 0..self.free_list.len() {
652            let (off, avail) = self.free_list[i];
653            if avail >= size {
654                let alloc_off = off;
655                if avail == size {
656                    self.free_list.remove(i);
657                } else {
658                    self.free_list[i] = (off + size, avail - size);
659                }
660                self.allocated += size;
661                return Some((alloc_off, size));
662            }
663        }
664        None
665    }
666    /// Free a previously allocated block `(offset, size)`.
667    ///
668    /// Returns `Err` if the block was never allocated (invalid free).
669    pub fn free(&mut self, offset: usize, size: usize) -> Result<(), &'static str> {
670        if offset + size > self.capacity {
671            return Err("block out of bounds");
672        }
673        if self.allocated < size {
674            return Err("double-free: allocated count underflow");
675        }
676        self.allocated -= size;
677        self.free_list.push((offset, size));
678        self.free_list.sort_by_key(|&(off, _)| off);
679        let mut merged: Vec<(usize, usize)> = Vec::new();
680        for &(off, sz) in &self.free_list {
681            if let Some(last) = merged.last_mut()
682                && last.0 + last.1 == off
683            {
684                last.1 += sz;
685                continue;
686            }
687            merged.push((off, sz));
688        }
689        self.free_list = merged;
690        Ok(())
691    }
692    /// Remaining free `f32` elements.
693    pub fn free_space(&self) -> usize {
694        self.capacity - self.allocated
695    }
696    /// Returns `true` if the pool has no outstanding allocations.
697    pub fn is_fully_free(&self) -> bool {
698        self.allocated == 0
699    }
700    /// Number of fragmented free-list entries (1 = perfectly contiguous).
701    pub fn fragmentation_count(&self) -> usize {
702        self.free_list.len()
703    }
704    /// Allocate a named [`CpuBuffer`] from the pool, returning the buffer
705    /// and the pool allocation handle `(offset, size)`.
706    pub fn alloc_buffer(
707        &mut self,
708        label: &str,
709        n: usize,
710        usage: BufferUsage,
711    ) -> Option<(CpuBuffer, (usize, usize))> {
712        let handle = self.alloc(n)?;
713        let buf = CpuBuffer::new_zeros(label, n, usage);
714        Some((buf, handle))
715    }
716}
717/// A set of [`TimestampQuery`]s collected during one frame / step.
718#[derive(Debug, Clone, Default)]
719pub struct TimestampQuerySet {
720    pub(super) queries: Vec<TimestampQuery>,
721}
722impl TimestampQuerySet {
723    /// Create an empty query set.
724    pub fn new() -> Self {
725        Self::default()
726    }
727    /// Record a timestamp query.
728    pub fn record(&mut self, query: TimestampQuery) {
729        self.queries.push(query);
730    }
731    /// Return all queries.
732    pub fn queries(&self) -> &[TimestampQuery] {
733        &self.queries
734    }
735    /// Find the query with the longest elapsed time.
736    pub fn slowest_pass(&self) -> Option<&TimestampQuery> {
737        self.queries.iter().max_by(|a, b| {
738            a.elapsed_ms()
739                .partial_cmp(&b.elapsed_ms())
740                .unwrap_or(std::cmp::Ordering::Equal)
741        })
742    }
743    /// Sum of all elapsed times.
744    pub fn total_elapsed_ms(&self) -> f64 {
745        self.queries.iter().map(|q| q.elapsed_ms()).sum()
746    }
747    /// Clear all recorded queries.
748    pub fn clear(&mut self) {
749        self.queries.clear();
750    }
751}
752/// Per-step statistics produced by [`PhysicsPipeline::step`].
753#[derive(Debug, Clone, Default)]
754pub struct PipelineStats {
755    /// Time spent in the broad-phase stage (ms).
756    pub broadphase_ms: f64,
757    /// Time spent in the narrow-phase stage (ms).
758    pub narrowphase_ms: f64,
759    /// Time spent in the constraint-solve stage (ms).
760    pub constraint_ms: f64,
761    /// Time spent in the integration stage (ms).
762    pub integration_ms: f64,
763    /// Time spent in the post-process stage (ms).
764    pub postprocess_ms: f64,
765    /// Total wall-clock time for the step (ms).
766    pub total_time_ms: f64,
767    /// Number of collision pairs found in broad-phase.
768    pub collision_pairs: u32,
769    /// Number of constraints solved.
770    pub solved_constraints: u32,
771}
772impl PipelineStats {
773    /// Accumulate another stats snapshot into `self` (used for sub-step sums).
774    pub fn accumulate(&mut self, other: &PipelineStats) {
775        self.broadphase_ms += other.broadphase_ms;
776        self.narrowphase_ms += other.narrowphase_ms;
777        self.constraint_ms += other.constraint_ms;
778        self.integration_ms += other.integration_ms;
779        self.postprocess_ms += other.postprocess_ms;
780        self.total_time_ms += other.total_time_ms;
781        self.collision_pairs += other.collision_pairs;
782        self.solved_constraints += other.solved_constraints;
783    }
784    /// Sum of all per-stage times.
785    pub fn stage_total_ms(&self) -> f64 {
786        self.broadphase_ms
787            + self.narrowphase_ms
788            + self.constraint_ms
789            + self.integration_ms
790            + self.postprocess_ms
791    }
792}
793/// Optimises a sequence of resource barriers by removing redundant ones.
794///
795/// Two barriers are redundant when a later barrier subsumes an earlier one
796/// (same `src_stage → dst_stage` pair for the same resource).
797#[derive(Debug, Default)]
798pub struct BarrierOptimizer;
799impl BarrierOptimizer {
800    /// Deduplicate `barriers`: for each `(src, dst, resource)` triple,
801    /// keep only the last occurrence.  Returns the optimised set.
802    pub fn optimize(barriers: &[ResourceBarrier]) -> BarrierSet {
803        let mut seen: std::collections::HashMap<
804            (PipelineStage, PipelineStage, &str),
805            &ResourceBarrier,
806        > = std::collections::HashMap::new();
807        for b in barriers {
808            seen.insert((b.src_stage, b.dst_stage, b.resource_name.as_str()), b);
809        }
810        let mut out = BarrierSet::new();
811        for b in seen.values() {
812            out.add(ResourceBarrier::new(
813                b.src_stage,
814                b.dst_stage,
815                &b.resource_name,
816            ));
817        }
818        out
819    }
820    /// Count how many barriers would be removed from `before` to produce
821    /// the optimised set.
822    pub fn savings(barriers: &[ResourceBarrier]) -> usize {
823        let optimized = Self::optimize(barriers);
824        barriers.len().saturating_sub(optimized.len())
825    }
826}
827/// Fluent builder for [`PhysicsPipeline`].
828///
829/// ```no_run
830/// use oxiphysics_gpu::pipeline::{PipelineBuilder, PipelineStage};
831///
832/// let pipeline = PipelineBuilder::new()
833///     .substeps(2)
834///     .use_gpu(false)
835///     .disable_stage(PipelineStage::PostProcess)
836///     .build();
837///
838/// assert_eq!(pipeline.config.substeps, 2);
839/// assert!(!pipeline.config.is_enabled(PipelineStage::PostProcess));
840/// ```
841pub struct PipelineBuilder {
842    pub(super) config: PipelineConfig,
843}
844impl PipelineBuilder {
845    /// Create a builder with a default config (all stages, 1 substep, CPU).
846    pub fn new() -> Self {
847        Self {
848            config: PipelineConfig::new(),
849        }
850    }
851    /// Set the number of sub-steps.
852    pub fn substeps(mut self, n: u32) -> Self {
853        self.config.substeps = n;
854        self
855    }
856    /// Set whether to prefer a GPU backend.
857    pub fn use_gpu(mut self, gpu: bool) -> Self {
858        self.config.use_gpu = gpu;
859        self
860    }
861    /// Add a stage to the enabled list (idempotent).
862    pub fn enable_stage(mut self, stage: PipelineStage) -> Self {
863        if !self.config.enabled_stages.contains(&stage) {
864            self.config.enabled_stages.push(stage);
865            self.config.enabled_stages.sort();
866        }
867        self
868    }
869    /// Remove a stage from the enabled list.
870    pub fn disable_stage(mut self, stage: PipelineStage) -> Self {
871        self.config.enabled_stages.retain(|&s| s != stage);
872        self
873    }
874    /// Consume the builder and produce a [`PhysicsPipeline`].
875    pub fn build(self) -> PhysicsPipeline {
876        PhysicsPipeline::new(self.config)
877    }
878}
879/// A collection of [`ResourceBarrier`]s that form a complete barrier schedule
880/// for one pipeline configuration.
881#[derive(Debug, Clone, Default)]
882pub struct BarrierSet {
883    pub(super) barriers: Vec<ResourceBarrier>,
884}
885impl BarrierSet {
886    /// Create an empty barrier set.
887    pub fn new() -> Self {
888        Self::default()
889    }
890    /// Append a barrier.
891    pub fn add(&mut self, barrier: ResourceBarrier) {
892        self.barriers.push(barrier);
893    }
894    /// Return all barriers whose `src_stage` matches `stage`.
895    pub fn barriers_from(&self, stage: PipelineStage) -> Vec<&ResourceBarrier> {
896        self.barriers
897            .iter()
898            .filter(|b| b.src_stage == stage)
899            .collect()
900    }
901    /// Return all barriers whose `dst_stage` matches `stage`.
902    pub fn barriers_to(&self, stage: PipelineStage) -> Vec<&ResourceBarrier> {
903        self.barriers
904            .iter()
905            .filter(|b| b.dst_stage == stage)
906            .collect()
907    }
908    /// Number of barriers registered.
909    pub fn len(&self) -> usize {
910        self.barriers.len()
911    }
912    /// Returns `true` if no barriers are registered.
913    pub fn is_empty(&self) -> bool {
914        self.barriers.is_empty()
915    }
916    /// Validate that every barrier has `src < dst` (i.e., no backwards
917    /// dependencies).  Returns the list of invalid barriers.
918    pub fn validate(&self) -> Vec<&ResourceBarrier> {
919        self.barriers
920            .iter()
921            .filter(|b| !b.is_valid_order())
922            .collect()
923    }
924}
925/// A timestamp query pair `(begin, end)` for a named pass.
926///
927/// On a real GPU this maps to `VkQueryPool` or `wgpu::QuerySet` writes at the
928/// start and end of a render/compute pass.  Here we store wall-clock
929/// `f64` timestamps in milliseconds.
930#[derive(Debug, Clone)]
931#[allow(dead_code)]
932pub struct TimestampQuery {
933    /// Human-readable label for this pass.
934    pub label: String,
935    /// Begin timestamp in milliseconds (relative to some epoch).
936    pub begin_ms: f64,
937    /// End timestamp in milliseconds.
938    pub end_ms: f64,
939}
940impl TimestampQuery {
941    /// Create a new timestamp query pair.
942    pub fn new(label: impl Into<String>, begin_ms: f64, end_ms: f64) -> Self {
943        Self {
944            label: label.into(),
945            begin_ms,
946            end_ms,
947        }
948    }
949    /// Elapsed duration in milliseconds.
950    pub fn elapsed_ms(&self) -> f64 {
951        self.end_ms - self.begin_ms
952    }
953}
954/// Tracks resource aliasing: two logical resources that share the same physical
955/// memory (pool allocation) at non-overlapping lifetimes.
956///
957/// This is important for reducing peak GPU memory usage.
958#[derive(Debug, Default)]
959pub struct ResourceAliasingTracker {
960    /// Maps physical allocation handle `(offset, size)` to a list of logical
961    /// resource names currently using it.
962    pub(super) aliases: std::collections::HashMap<(usize, usize), Vec<String>>,
963}
964impl ResourceAliasingTracker {
965    /// Create an empty tracker.
966    pub fn new() -> Self {
967        Self::default()
968    }
969    /// Record that `resource_name` uses physical allocation `(offset, size)`.
970    pub fn track(&mut self, resource_name: impl Into<String>, offset: usize, size: usize) {
971        self.aliases
972            .entry((offset, size))
973            .or_default()
974            .push(resource_name.into());
975    }
976    /// Return all resource names sharing the physical allocation `(offset, size)`.
977    pub fn aliases_for(&self, offset: usize, size: usize) -> &[String] {
978        self.aliases
979            .get(&(offset, size))
980            .map(Vec::as_slice)
981            .unwrap_or(&[])
982    }
983    /// Return `true` if two named resources share any physical allocation.
984    pub fn are_aliased(&self, a: &str, b: &str) -> bool {
985        for names in self.aliases.values() {
986            if names.contains(&a.to_string()) && names.contains(&b.to_string()) {
987                return true;
988            }
989        }
990        false
991    }
992    /// Total number of physical allocations tracked.
993    pub fn allocation_count(&self) -> usize {
994        self.aliases.len()
995    }
996    /// Total number of logical resource registrations across all allocations.
997    pub fn total_resource_registrations(&self) -> usize {
998        self.aliases.values().map(Vec::len).sum()
999    }
1000}
1001/// Simulated GPU buffer backed by CPU memory for unit testing without a GPU.
1002#[derive(Debug, Clone)]
1003pub struct CpuBuffer {
1004    /// Human-readable label for debugging.
1005    pub label: String,
1006    /// Buffer contents as 32-bit floats.
1007    pub data: Vec<f32>,
1008    /// Intended usage of this buffer.
1009    pub usage: BufferUsage,
1010}
1011impl CpuBuffer {
1012    /// Create a buffer pre-filled with the given data.
1013    pub fn new_f32(label: &str, data: Vec<f32>, usage: BufferUsage) -> Self {
1014        Self {
1015            label: label.to_owned(),
1016            data,
1017            usage,
1018        }
1019    }
1020    /// Create a buffer of `n` zeros.
1021    pub fn new_zeros(label: &str, n: usize, usage: BufferUsage) -> Self {
1022        Self {
1023            label: label.to_owned(),
1024            data: vec![0.0_f32; n],
1025            usage,
1026        }
1027    }
1028    /// Number of `f32` elements in the buffer.
1029    pub fn len(&self) -> usize {
1030        self.data.len()
1031    }
1032    /// Returns `true` if the buffer contains no elements.
1033    pub fn is_empty(&self) -> bool {
1034        self.data.is_empty()
1035    }
1036}
1037/// Describes how a [`CpuBuffer`] will be used by the pipeline.
1038#[derive(Debug, Clone, Copy, PartialEq)]
1039pub enum BufferUsage {
1040    /// Read-write storage buffer.
1041    Storage,
1042    /// Uniform (read-only, small) buffer.
1043    Uniform,
1044    /// Read-only storage buffer.
1045    StorageReadOnly,
1046}