#[allow(unused_imports)]
use super::functions::*;
use std::time::Instant;
#[derive(Debug)]
#[allow(dead_code)]
pub struct ComputeOverlapScheduler {
pub critical_count: usize,
pub background_count: usize,
pub(super) recorder: MultiQueueRecorder,
}
impl ComputeOverlapScheduler {
pub fn new() -> Self {
Self {
critical_count: 0,
background_count: 0,
recorder: MultiQueueRecorder::new(),
}
}
pub fn submit_critical(&mut self, batch: DispatchBatch) {
self.critical_count += 1;
self.recorder.submit(batch, QueueType::Main);
}
pub fn submit_background(&mut self, batch: DispatchBatch) {
self.background_count += 1;
self.recorder.submit(batch, QueueType::AsyncCompute);
}
pub fn end_frame(&mut self) -> usize {
let n = self.recorder.flush_all();
self.critical_count = 0;
self.background_count = 0;
n
}
pub fn has_pending(&self) -> bool {
self.recorder.pending_total() > 0
}
}
#[derive(Debug, Clone)]
pub struct PipelineConfig {
pub enabled_stages: Vec<PipelineStage>,
pub substeps: u32,
pub use_gpu: bool,
}
impl PipelineConfig {
pub fn new() -> Self {
Self {
enabled_stages: PipelineStage::all_in_order().to_vec(),
substeps: 1,
use_gpu: false,
}
}
pub fn is_enabled(&self, stage: PipelineStage) -> bool {
self.enabled_stages.contains(&stage)
}
}
pub struct PhysicsPipeline {
pub config: PipelineConfig,
pub stats: PipelineStats,
}
impl PhysicsPipeline {
pub fn new(config: PipelineConfig) -> Self {
Self {
config,
stats: PipelineStats::default(),
}
}
pub fn step(&mut self, world_state: &mut WorldState, dt: f64) -> PipelineStats {
let step_start = Instant::now();
let mut step_stats = PipelineStats::default();
let sub_dt = if self.config.substeps > 0 {
dt / self.config.substeps as f64
} else {
dt
};
for _ in 0..self.config.substeps.max(1) {
let sub_stats = self.run_stages(world_state, sub_dt);
step_stats.accumulate(&sub_stats);
}
step_stats.total_time_ms = step_start.elapsed().as_secs_f64() * 1000.0;
self.stats.accumulate(&step_stats);
step_stats
}
fn run_stages(&self, world_state: &mut WorldState, dt: f64) -> PipelineStats {
let mut stats = PipelineStats::default();
for stage in PipelineStage::all_in_order() {
if !self.config.is_enabled(stage) {
continue;
}
let mut timer = StageTimer::start();
match stage {
PipelineStage::BroadPhase => {
let pairs = run_broadphase(world_state);
stats.collision_pairs += pairs as u32;
}
PipelineStage::NarrowPhase => {}
PipelineStage::ConstraintSolve => {
let solved = run_constraint_solve(world_state);
stats.solved_constraints += solved as u32;
}
PipelineStage::Integration => {
run_integration(world_state, dt);
}
PipelineStage::PostProcess => {
run_postprocess(world_state);
}
}
timer.stop();
match stage {
PipelineStage::BroadPhase => stats.broadphase_ms += timer.elapsed_ms,
PipelineStage::NarrowPhase => stats.narrowphase_ms += timer.elapsed_ms,
PipelineStage::ConstraintSolve => stats.constraint_ms += timer.elapsed_ms,
PipelineStage::Integration => stats.integration_ms += timer.elapsed_ms,
PipelineStage::PostProcess => stats.postprocess_ms += timer.elapsed_ms,
}
}
stats
}
}
#[derive(Debug, Clone)]
pub struct ComputePipeline {
pub label: String,
pub shader_source: String,
pub entry_point: String,
pub workgroup_size: [u32; 3],
}
impl ComputePipeline {
pub fn new(label: &str, shader: &str, entry_point: &str) -> Self {
Self {
label: label.to_owned(),
shader_source: shader.to_owned(),
entry_point: entry_point.to_owned(),
workgroup_size: [64, 1, 1],
}
}
pub fn workgroups_needed(&self, n_items: u32) -> [u32; 3] {
let x = n_items.div_ceil(self.workgroup_size[0]);
[x, 1, 1]
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct ResourceBarrier {
pub src_stage: PipelineStage,
pub dst_stage: PipelineStage,
pub resource_name: String,
}
impl ResourceBarrier {
pub fn new(src: PipelineStage, dst: PipelineStage, name: &str) -> Self {
Self {
src_stage: src,
dst_stage: dst,
resource_name: name.to_owned(),
}
}
pub fn is_valid_order(&self) -> bool {
self.src_stage < self.dst_stage
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ResourceHandle {
pub offset: usize,
pub size: usize,
}
impl ResourceHandle {
pub fn from_alloc(alloc: (usize, usize)) -> Self {
Self {
offset: alloc.0,
size: alloc.1,
}
}
}
#[derive(Debug, Clone)]
pub struct FrameGraphPass {
pub name: String,
pub reads: Vec<String>,
pub writes: Vec<String>,
pub dependencies: Vec<String>,
pub queue: QueueType,
}
impl FrameGraphPass {
pub fn new(name: impl Into<String>, queue: QueueType) -> Self {
Self {
name: name.into(),
reads: Vec::new(),
writes: Vec::new(),
dependencies: Vec::new(),
queue,
}
}
pub fn reads(mut self, resource: impl Into<String>) -> Self {
self.reads.push(resource.into());
self
}
pub fn writes(mut self, resource: impl Into<String>) -> Self {
self.writes.push(resource.into());
self
}
pub fn depends_on(mut self, pass: impl Into<String>) -> Self {
self.dependencies.push(pass.into());
self
}
}
#[derive(Debug, Default)]
pub struct FrameGraph {
pub(super) passes: Vec<FrameGraphPass>,
}
impl FrameGraph {
pub fn new() -> Self {
Self::default()
}
pub fn add_pass(&mut self, pass: FrameGraphPass) {
self.passes.push(pass);
}
pub fn pass_names(&self) -> Vec<&str> {
self.passes.iter().map(|p| p.name.as_str()).collect()
}
pub fn writers_of(&self, resource: &str) -> Vec<&FrameGraphPass> {
self.passes
.iter()
.filter(|p| p.writes.iter().any(|w| w == resource))
.collect()
}
pub fn readers_of(&self, resource: &str) -> Vec<&FrameGraphPass> {
self.passes
.iter()
.filter(|p| p.reads.iter().any(|r| r == resource))
.collect()
}
pub fn validate_dependencies(&self) -> Vec<String> {
let names: std::collections::HashSet<&str> =
self.passes.iter().map(|p| p.name.as_str()).collect();
let mut errors = Vec::new();
for pass in &self.passes {
for dep in &pass.dependencies {
if !names.contains(dep.as_str()) {
errors.push(format!("{}: unknown dependency '{}'", pass.name, dep));
}
}
}
errors
}
pub fn async_pass_count(&self) -> usize {
self.passes
.iter()
.filter(|p| p.queue == QueueType::AsyncCompute)
.count()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum PipelineStage {
BroadPhase,
NarrowPhase,
ConstraintSolve,
Integration,
PostProcess,
}
impl PipelineStage {
pub fn all_in_order() -> [PipelineStage; 5] {
[
PipelineStage::BroadPhase,
PipelineStage::NarrowPhase,
PipelineStage::ConstraintSolve,
PipelineStage::Integration,
PipelineStage::PostProcess,
]
}
}
#[derive(Debug)]
pub struct DispatchBatch {
pub pipeline: ComputePipeline,
pub bindings: Vec<CpuBuffer>,
pub dispatch_dims: [u32; 3],
}
impl DispatchBatch {
pub fn new(pipeline: ComputePipeline, workitems: u32) -> Self {
let dispatch_dims = pipeline.workgroups_needed(workitems);
Self {
pipeline,
bindings: Vec::new(),
dispatch_dims,
}
}
pub fn bind(&mut self, buffer: CpuBuffer) {
self.bindings.push(buffer);
}
}
pub struct AsyncComputeQueue {
pub(super) queue: std::collections::VecDeque<DispatchBatch>,
pub total_enqueued: usize,
pub total_executed: usize,
}
impl AsyncComputeQueue {
pub fn new() -> Self {
Self {
queue: std::collections::VecDeque::new(),
total_enqueued: 0,
total_executed: 0,
}
}
pub fn submit(&mut self, batch: DispatchBatch) {
self.total_enqueued += 1;
self.queue.push_back(batch);
}
pub fn flush(&mut self) -> usize {
let n = self.queue.len();
self.total_executed += n;
self.queue.clear();
n
}
pub fn pending(&self) -> usize {
self.queue.len()
}
pub fn is_idle(&self) -> bool {
self.queue.is_empty()
}
}
#[derive(Debug)]
#[allow(dead_code)]
pub struct MultiQueueBatch {
pub batch: DispatchBatch,
pub queue: QueueType,
pub wait_frame: u64,
}
#[derive(Debug, Clone, Default)]
pub struct WorldState {
pub positions: Vec<f64>,
pub velocities: Vec<f64>,
pub inverse_masses: Vec<f64>,
}
impl WorldState {
pub fn body_count(&self) -> usize {
self.inverse_masses.len()
}
}
#[derive(Debug, Default)]
pub struct PipelineProfiler {
pub(super) samples: std::collections::HashMap<String, Vec<f64>>,
}
impl PipelineProfiler {
pub fn new() -> Self {
Self::default()
}
pub fn record(&mut self, stage_name: &str, ms: f64) {
self.samples
.entry(stage_name.to_owned())
.or_default()
.push(ms);
}
pub fn summary(&self, stage_name: &str) -> Option<(f64, f64, usize)> {
let v = self.samples.get(stage_name)?;
if v.is_empty() {
return None;
}
let n = v.len() as f64;
let mean = v.iter().sum::<f64>() / n;
let variance = v.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
Some((mean, variance.sqrt(), v.len()))
}
pub fn stage_names(&self) -> Vec<&str> {
let mut names: Vec<&str> = self.samples.keys().map(String::as_str).collect();
names.sort_unstable();
names
}
pub fn total_samples(&self) -> usize {
self.samples.values().map(Vec::len).sum()
}
pub fn reset(&mut self) {
self.samples.clear();
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum QueueType {
Main,
AsyncCompute,
Transfer,
}
pub struct StageTimer {
pub(super) start: Instant,
pub elapsed_ms: f64,
}
impl StageTimer {
pub fn start() -> Self {
Self {
start: Instant::now(),
elapsed_ms: 0.0,
}
}
pub fn stop(&mut self) {
self.elapsed_ms = self.start.elapsed().as_secs_f64() * 1000.0;
}
}
#[derive(Debug, Clone, Default)]
#[allow(dead_code)]
pub struct PipelineStatistics {
pub cs_invocations: u64,
pub workgroups_dispatched: u64,
pub flops: u64,
pub bytes_read: u64,
pub bytes_written: u64,
}
impl PipelineStatistics {
pub fn arithmetic_intensity(&self) -> f64 {
let bytes = self.bytes_read + self.bytes_written;
if bytes == 0 {
return 0.0;
}
self.flops as f64 / bytes as f64
}
pub fn bandwidth_utilization(&self, peak_bw_bytes_s: f64, elapsed_s: f64) -> f64 {
if peak_bw_bytes_s <= 0.0 || elapsed_s <= 0.0 {
return 0.0;
}
let used = (self.bytes_read + self.bytes_written) as f64 / elapsed_s;
(used / peak_bw_bytes_s).min(1.0)
}
}
#[derive(Debug, Default)]
pub struct MultiQueueRecorder {
pub main_queue: Vec<DispatchBatch>,
pub async_queue: Vec<DispatchBatch>,
pub transfer_queue: Vec<DispatchBatch>,
pub total_recorded: usize,
}
impl MultiQueueRecorder {
pub fn new() -> Self {
Self::default()
}
pub fn submit(&mut self, batch: DispatchBatch, queue: QueueType) {
self.total_recorded += 1;
match queue {
QueueType::Main => self.main_queue.push(batch),
QueueType::AsyncCompute => self.async_queue.push(batch),
QueueType::Transfer => self.transfer_queue.push(batch),
}
}
pub fn flush_all(&mut self) -> usize {
let n = self.main_queue.len() + self.async_queue.len() + self.transfer_queue.len();
self.main_queue.clear();
self.async_queue.clear();
self.transfer_queue.clear();
n
}
pub fn pending_total(&self) -> usize {
self.main_queue.len() + self.async_queue.len() + self.transfer_queue.len()
}
}
#[derive(Debug)]
pub struct GpuMemoryPool {
pub capacity: usize,
pub allocated: usize,
pub(super) free_list: Vec<(usize, usize)>,
}
impl GpuMemoryPool {
pub fn new(capacity: usize) -> Self {
Self {
capacity,
allocated: 0,
free_list: vec![(0, capacity)],
}
}
pub fn alloc(&mut self, size: usize) -> Option<(usize, usize)> {
for i in 0..self.free_list.len() {
let (off, avail) = self.free_list[i];
if avail >= size {
let alloc_off = off;
if avail == size {
self.free_list.remove(i);
} else {
self.free_list[i] = (off + size, avail - size);
}
self.allocated += size;
return Some((alloc_off, size));
}
}
None
}
pub fn free(&mut self, offset: usize, size: usize) -> Result<(), &'static str> {
if offset + size > self.capacity {
return Err("block out of bounds");
}
if self.allocated < size {
return Err("double-free: allocated count underflow");
}
self.allocated -= size;
self.free_list.push((offset, size));
self.free_list.sort_by_key(|&(off, _)| off);
let mut merged: Vec<(usize, usize)> = Vec::new();
for &(off, sz) in &self.free_list {
if let Some(last) = merged.last_mut()
&& last.0 + last.1 == off
{
last.1 += sz;
continue;
}
merged.push((off, sz));
}
self.free_list = merged;
Ok(())
}
pub fn free_space(&self) -> usize {
self.capacity - self.allocated
}
pub fn is_fully_free(&self) -> bool {
self.allocated == 0
}
pub fn fragmentation_count(&self) -> usize {
self.free_list.len()
}
pub fn alloc_buffer(
&mut self,
label: &str,
n: usize,
usage: BufferUsage,
) -> Option<(CpuBuffer, (usize, usize))> {
let handle = self.alloc(n)?;
let buf = CpuBuffer::new_zeros(label, n, usage);
Some((buf, handle))
}
}
#[derive(Debug, Clone, Default)]
pub struct TimestampQuerySet {
pub(super) queries: Vec<TimestampQuery>,
}
impl TimestampQuerySet {
pub fn new() -> Self {
Self::default()
}
pub fn record(&mut self, query: TimestampQuery) {
self.queries.push(query);
}
pub fn queries(&self) -> &[TimestampQuery] {
&self.queries
}
pub fn slowest_pass(&self) -> Option<&TimestampQuery> {
self.queries.iter().max_by(|a, b| {
a.elapsed_ms()
.partial_cmp(&b.elapsed_ms())
.unwrap_or(std::cmp::Ordering::Equal)
})
}
pub fn total_elapsed_ms(&self) -> f64 {
self.queries.iter().map(|q| q.elapsed_ms()).sum()
}
pub fn clear(&mut self) {
self.queries.clear();
}
}
#[derive(Debug, Clone, Default)]
pub struct PipelineStats {
pub broadphase_ms: f64,
pub narrowphase_ms: f64,
pub constraint_ms: f64,
pub integration_ms: f64,
pub postprocess_ms: f64,
pub total_time_ms: f64,
pub collision_pairs: u32,
pub solved_constraints: u32,
}
impl PipelineStats {
pub fn accumulate(&mut self, other: &PipelineStats) {
self.broadphase_ms += other.broadphase_ms;
self.narrowphase_ms += other.narrowphase_ms;
self.constraint_ms += other.constraint_ms;
self.integration_ms += other.integration_ms;
self.postprocess_ms += other.postprocess_ms;
self.total_time_ms += other.total_time_ms;
self.collision_pairs += other.collision_pairs;
self.solved_constraints += other.solved_constraints;
}
pub fn stage_total_ms(&self) -> f64 {
self.broadphase_ms
+ self.narrowphase_ms
+ self.constraint_ms
+ self.integration_ms
+ self.postprocess_ms
}
}
#[derive(Debug, Default)]
pub struct BarrierOptimizer;
impl BarrierOptimizer {
pub fn optimize(barriers: &[ResourceBarrier]) -> BarrierSet {
let mut seen: std::collections::HashMap<
(PipelineStage, PipelineStage, &str),
&ResourceBarrier,
> = std::collections::HashMap::new();
for b in barriers {
seen.insert((b.src_stage, b.dst_stage, b.resource_name.as_str()), b);
}
let mut out = BarrierSet::new();
for b in seen.values() {
out.add(ResourceBarrier::new(
b.src_stage,
b.dst_stage,
&b.resource_name,
));
}
out
}
pub fn savings(barriers: &[ResourceBarrier]) -> usize {
let optimized = Self::optimize(barriers);
barriers.len().saturating_sub(optimized.len())
}
}
pub struct PipelineBuilder {
pub(super) config: PipelineConfig,
}
impl PipelineBuilder {
pub fn new() -> Self {
Self {
config: PipelineConfig::new(),
}
}
pub fn substeps(mut self, n: u32) -> Self {
self.config.substeps = n;
self
}
pub fn use_gpu(mut self, gpu: bool) -> Self {
self.config.use_gpu = gpu;
self
}
pub fn enable_stage(mut self, stage: PipelineStage) -> Self {
if !self.config.enabled_stages.contains(&stage) {
self.config.enabled_stages.push(stage);
self.config.enabled_stages.sort();
}
self
}
pub fn disable_stage(mut self, stage: PipelineStage) -> Self {
self.config.enabled_stages.retain(|&s| s != stage);
self
}
pub fn build(self) -> PhysicsPipeline {
PhysicsPipeline::new(self.config)
}
}
#[derive(Debug, Clone, Default)]
pub struct BarrierSet {
pub(super) barriers: Vec<ResourceBarrier>,
}
impl BarrierSet {
pub fn new() -> Self {
Self::default()
}
pub fn add(&mut self, barrier: ResourceBarrier) {
self.barriers.push(barrier);
}
pub fn barriers_from(&self, stage: PipelineStage) -> Vec<&ResourceBarrier> {
self.barriers
.iter()
.filter(|b| b.src_stage == stage)
.collect()
}
pub fn barriers_to(&self, stage: PipelineStage) -> Vec<&ResourceBarrier> {
self.barriers
.iter()
.filter(|b| b.dst_stage == stage)
.collect()
}
pub fn len(&self) -> usize {
self.barriers.len()
}
pub fn is_empty(&self) -> bool {
self.barriers.is_empty()
}
pub fn validate(&self) -> Vec<&ResourceBarrier> {
self.barriers
.iter()
.filter(|b| !b.is_valid_order())
.collect()
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct TimestampQuery {
pub label: String,
pub begin_ms: f64,
pub end_ms: f64,
}
impl TimestampQuery {
pub fn new(label: impl Into<String>, begin_ms: f64, end_ms: f64) -> Self {
Self {
label: label.into(),
begin_ms,
end_ms,
}
}
pub fn elapsed_ms(&self) -> f64 {
self.end_ms - self.begin_ms
}
}
#[derive(Debug, Default)]
pub struct ResourceAliasingTracker {
pub(super) aliases: std::collections::HashMap<(usize, usize), Vec<String>>,
}
impl ResourceAliasingTracker {
pub fn new() -> Self {
Self::default()
}
pub fn track(&mut self, resource_name: impl Into<String>, offset: usize, size: usize) {
self.aliases
.entry((offset, size))
.or_default()
.push(resource_name.into());
}
pub fn aliases_for(&self, offset: usize, size: usize) -> &[String] {
self.aliases
.get(&(offset, size))
.map(Vec::as_slice)
.unwrap_or(&[])
}
pub fn are_aliased(&self, a: &str, b: &str) -> bool {
for names in self.aliases.values() {
if names.contains(&a.to_string()) && names.contains(&b.to_string()) {
return true;
}
}
false
}
pub fn allocation_count(&self) -> usize {
self.aliases.len()
}
pub fn total_resource_registrations(&self) -> usize {
self.aliases.values().map(Vec::len).sum()
}
}
#[derive(Debug, Clone)]
pub struct CpuBuffer {
pub label: String,
pub data: Vec<f32>,
pub usage: BufferUsage,
}
impl CpuBuffer {
pub fn new_f32(label: &str, data: Vec<f32>, usage: BufferUsage) -> Self {
Self {
label: label.to_owned(),
data,
usage,
}
}
pub fn new_zeros(label: &str, n: usize, usage: BufferUsage) -> Self {
Self {
label: label.to_owned(),
data: vec![0.0_f32; n],
usage,
}
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum BufferUsage {
Storage,
Uniform,
StorageReadOnly,
}