use std::fmt;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use web_time::Instant;
use crate::context::Context;
fn ms(d: Duration) -> f64 {
d.as_secs_f64() * 1000.0
}
#[derive(Clone, Debug, Default)]
pub struct RenderTimings {
pub renderer: &'static str,
pub frame_wall: Duration,
pub total: Duration,
pub cpu_submit: Duration,
pub cpu_present: Duration,
pub gpu_steps: Option<Vec<(&'static str, Duration)>>,
}
impl RenderTimings {
pub fn gpu_total(&self) -> Option<Duration> {
self.gpu_steps
.as_ref()
.map(|s| s.iter().map(|(_, d)| *d).sum())
}
}
impl fmt::Display for RenderTimings {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.frame_wall > Duration::ZERO {
write!(
f,
"{} — frame {:.2} ms ({:.0} FPS)",
self.renderer,
ms(self.frame_wall),
1.0 / self.frame_wall.as_secs_f64()
)?;
} else {
write!(f, "{}", self.renderer)?;
}
write!(f, "\n cpu render {:>8.3} ms", ms(self.total))?;
write!(f, "\n cpu submit {:>8.3} ms", ms(self.cpu_submit))?;
write!(f, "\n cpu present {:>8.3} ms", ms(self.cpu_present))?;
match &self.gpu_steps {
Some(steps) => {
for (name, dur) in steps {
write!(f, "\n gpu {name:<9}{:>8.3} ms", ms(*dur))?;
}
if let Some(total) = self.gpu_total() {
write!(f, "\n gpu total {:>8.3} ms", ms(total))?;
}
}
None => write!(f, "\n gpu timing unsupported")?,
}
Ok(())
}
}
pub(crate) struct CpuTimer {
start: Instant,
}
impl CpuTimer {
pub(crate) fn start() -> CpuTimer {
CpuTimer {
start: Instant::now(),
}
}
pub(crate) fn time<R>(f: impl FnOnce() -> R) -> (R, Duration) {
let t = Instant::now();
let r = f();
(r, t.elapsed())
}
pub(crate) fn elapsed(&self) -> Duration {
self.start.elapsed()
}
}
const MAX_SCOPES: u32 = 128;
const QUERY_COUNT: u32 = MAX_SCOPES * 2;
const BYTES: u64 = QUERY_COUNT as u64 * 8;
struct Readback {
buffer: wgpu::Buffer,
ready: Arc<AtomicBool>,
names: Vec<&'static str>,
pairs: u32,
pending: bool,
}
pub(crate) struct GpuTimer {
enabled: bool,
period_ns: f32,
query_set: Option<wgpu::QuerySet>,
resolve: Option<wgpu::Buffer>,
slots: Vec<Readback>,
frame: usize,
next_pair: u32,
names: Vec<&'static str>,
wrote: Option<usize>,
last_gpu: Option<Vec<(&'static str, Duration)>>,
}
impl GpuTimer {
pub(crate) fn new() -> GpuTimer {
let ctxt = Context::get();
let enabled = ctxt
.device
.features()
.contains(wgpu::Features::TIMESTAMP_QUERY);
if !enabled {
return GpuTimer {
enabled: false,
period_ns: 0.0,
query_set: None,
resolve: None,
slots: Vec::new(),
frame: 0,
next_pair: 0,
names: Vec::new(),
wrote: None,
last_gpu: None,
};
}
let query_set = ctxt.device.create_query_set(&wgpu::QuerySetDescriptor {
label: Some("kiss3d_gpu_timer_queries"),
ty: wgpu::QueryType::Timestamp,
count: QUERY_COUNT,
});
let resolve = ctxt.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("kiss3d_gpu_timer_resolve"),
size: BYTES,
usage: wgpu::BufferUsages::QUERY_RESOLVE | wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
let slots = (0..2)
.map(|_| Readback {
buffer: ctxt.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("kiss3d_gpu_timer_readback"),
size: BYTES,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
}),
ready: Arc::new(AtomicBool::new(false)),
names: Vec::new(),
pairs: 0,
pending: false,
})
.collect();
GpuTimer {
enabled: true,
period_ns: ctxt.queue.get_timestamp_period(),
query_set: Some(query_set),
resolve: Some(resolve),
slots,
frame: 0,
next_pair: 0,
names: Vec::new(),
wrote: None,
last_gpu: None,
}
}
pub(crate) fn last(&self) -> Option<Vec<(&'static str, Duration)>> {
self.last_gpu.clone()
}
pub(crate) fn begin_frame(&mut self) {
if !self.enabled {
return;
}
self.next_pair = 0;
self.names.clear();
self.wrote = None;
let _ = Context::get().device.poll(wgpu::PollType::Poll);
self.try_collect();
}
fn try_collect(&mut self) {
let period = self.period_ns as f64;
let mut collected: Option<Vec<(&'static str, Duration)>> = None;
for slot in &mut self.slots {
if !slot.pending || !slot.ready.load(Ordering::Acquire) {
continue;
}
{
let view = slot.buffer.slice(..).get_mapped_range();
let ticks: &[u64] = bytemuck::cast_slice(&view);
let mut out: Vec<(&'static str, Duration)> = Vec::new();
for (i, &name) in slot.names.iter().enumerate() {
let begin = ticks[i * 2];
let end = ticks[i * 2 + 1];
let ns = end.saturating_sub(begin) as f64 * period;
let dur = Duration::from_nanos(ns as u64);
if let Some(e) = out.iter_mut().find(|(n, _)| *n == name) {
e.1 += dur;
} else {
out.push((name, dur));
}
}
collected = Some(out);
}
slot.buffer.unmap();
slot.ready.store(false, Ordering::Release);
slot.pending = false;
let _ = slot.pairs;
break;
}
if collected.is_some() {
self.last_gpu = collected;
}
}
pub(crate) fn render_scope(
&mut self,
name: &'static str,
) -> Option<wgpu::RenderPassTimestampWrites<'_>> {
let (b, e) = self.alloc_pair(name)?;
Some(wgpu::RenderPassTimestampWrites {
query_set: self.query_set.as_ref().unwrap(),
beginning_of_pass_write_index: Some(b),
end_of_pass_write_index: Some(e),
})
}
pub(crate) fn compute_scope(
&mut self,
name: &'static str,
) -> Option<wgpu::ComputePassTimestampWrites<'_>> {
let (b, e) = self.alloc_pair(name)?;
Some(wgpu::ComputePassTimestampWrites {
query_set: self.query_set.as_ref().unwrap(),
beginning_of_pass_write_index: Some(b),
end_of_pass_write_index: Some(e),
})
}
fn alloc_pair(&mut self, name: &'static str) -> Option<(u32, u32)> {
if !self.enabled || self.next_pair >= MAX_SCOPES {
return None;
}
let b = self.next_pair * 2;
self.next_pair += 1;
self.names.push(name);
Some((b, b + 1))
}
pub(crate) fn resolve(&mut self, encoder: &mut wgpu::CommandEncoder) {
self.wrote = None;
if !self.enabled || self.next_pair == 0 {
return;
}
let slot_idx = self.frame % self.slots.len();
if self.slots[slot_idx].pending {
return;
}
let pairs = self.next_pair;
let qs = self.query_set.as_ref().unwrap();
let resolve = self.resolve.as_ref().unwrap();
encoder.resolve_query_set(qs, 0..pairs * 2, resolve, 0);
encoder.copy_buffer_to_buffer(
resolve,
0,
&self.slots[slot_idx].buffer,
0,
pairs as u64 * 2 * 8,
);
let names = self.names.clone();
let slot = &mut self.slots[slot_idx];
slot.names = names;
slot.pairs = pairs;
self.wrote = Some(slot_idx);
}
pub(crate) fn after_submit(&mut self) {
if let Some(slot_idx) = self.wrote.take() {
let slot = &mut self.slots[slot_idx];
slot.ready.store(false, Ordering::Release);
slot.pending = true;
let ready = slot.ready.clone();
slot.buffer
.slice(..)
.map_async(wgpu::MapMode::Read, move |res| {
if res.is_ok() {
ready.store(true, Ordering::Release);
}
});
}
self.frame = self.frame.wrapping_add(1);
}
}