use crate::backend::Backend;
pub fn start_probe_timer_if<B: Backend>(enabled: bool, ctx: &mut B::Context) -> Option<B::Timer> {
if enabled {
let mut t = B::make_timer();
t.record_start(ctx);
Some(t)
} else {
None
}
}
pub fn finish_probe_timer<B: Backend>(
timer: Option<B::Timer>,
ctx: &mut B::Context,
) -> Option<u64> {
let mut t = timer?;
t.record_end(ctx);
Some((t.elapsed_ms() * 1000.0) as u64)
}
pub fn finish_probe_timer_traced<B: Backend>(
timer: Option<B::Timer>,
ctx: &mut B::Context,
name: &str,
cat: &str,
tid: u32,
) -> Option<u64> {
let us = finish_probe_timer::<B>(timer, ctx)?;
ferrum_bench_core::trace::global_trace().push(name, cat, (us as f64) / 1000.0, tid);
Some(us)
}
pub trait BackendTimer<B: Backend>: Send {
fn new() -> Self
where
Self: Sized;
fn record_start(&mut self, ctx: &mut B::Context);
fn record_end(&mut self, ctx: &mut B::Context);
fn elapsed_ms(&self) -> f64;
}
pub struct CpuTimer {
start: Option<std::time::Instant>,
end: Option<std::time::Instant>,
}
impl Default for CpuTimer {
fn default() -> Self {
Self::new()
}
}
impl CpuTimer {
pub fn new() -> Self {
Self {
start: None,
end: None,
}
}
}
impl BackendTimer<crate::backend::cpu::CpuBackend> for CpuTimer {
fn new() -> Self {
CpuTimer::new()
}
fn record_start(&mut self, _ctx: &mut <crate::backend::cpu::CpuBackend as Backend>::Context) {
self.start = Some(std::time::Instant::now());
}
fn record_end(&mut self, _ctx: &mut <crate::backend::cpu::CpuBackend as Backend>::Context) {
self.end = Some(std::time::Instant::now());
}
fn elapsed_ms(&self) -> f64 {
match (self.start, self.end) {
(Some(s), Some(e)) => e.duration_since(s).as_secs_f64() * 1000.0,
_ => 0.0,
}
}
}
#[cfg(all(target_os = "macos", feature = "metal"))]
pub struct MetalTimer {
start: Option<std::time::Instant>,
end: Option<std::time::Instant>,
}
#[cfg(all(target_os = "macos", feature = "metal"))]
impl Default for MetalTimer {
fn default() -> Self {
Self::new()
}
}
#[cfg(all(target_os = "macos", feature = "metal"))]
impl MetalTimer {
pub fn new() -> Self {
Self {
start: None,
end: None,
}
}
}
#[cfg(all(target_os = "macos", feature = "metal"))]
impl BackendTimer<crate::backend::metal::MetalBackend> for MetalTimer {
fn new() -> Self {
MetalTimer::new()
}
fn record_start(
&mut self,
ctx: &mut <crate::backend::metal::MetalBackend as Backend>::Context,
) {
crate::backend::metal::MetalBackend::sync(ctx);
self.start = Some(std::time::Instant::now());
}
fn record_end(&mut self, ctx: &mut <crate::backend::metal::MetalBackend as Backend>::Context) {
crate::backend::metal::MetalBackend::sync(ctx);
self.end = Some(std::time::Instant::now());
}
fn elapsed_ms(&self) -> f64 {
match (self.start, self.end) {
(Some(s), Some(e)) => e.duration_since(s).as_secs_f64() * 1000.0,
_ => 0.0,
}
}
}
#[cfg(feature = "cuda")]
pub struct CudaTimer {
start: Option<cudarc::driver::sys::CUevent>,
end: Option<cudarc::driver::sys::CUevent>,
recorded_start: bool,
recorded_end: bool,
}
#[cfg(feature = "cuda")]
unsafe impl Send for CudaTimer {}
#[cfg(feature = "cuda")]
impl Default for CudaTimer {
fn default() -> Self {
Self::new()
}
}
#[cfg(feature = "cuda")]
impl Drop for CudaTimer {
fn drop(&mut self) {
use cudarc::driver::sys as cu;
unsafe {
if let Some(e) = self.start.take() {
let _ = cu::cuEventDestroy_v2(e);
}
if let Some(e) = self.end.take() {
let _ = cu::cuEventDestroy_v2(e);
}
}
}
}
#[cfg(feature = "cuda")]
impl CudaTimer {
pub fn new() -> Self {
use cudarc::driver::sys as cu;
let mut start: cu::CUevent = std::ptr::null_mut();
let mut end: cu::CUevent = std::ptr::null_mut();
unsafe {
let _ = cu::cuEventCreate(&mut start, 0);
let _ = cu::cuEventCreate(&mut end, 0);
}
Self {
start: Some(start),
end: Some(end),
recorded_start: false,
recorded_end: false,
}
}
}
#[cfg(feature = "cuda")]
impl BackendTimer<crate::backend::cuda::CudaBackend> for CudaTimer {
fn new() -> Self {
CudaTimer::new()
}
fn record_start(&mut self, ctx: &mut <crate::backend::cuda::CudaBackend as Backend>::Context) {
use cudarc::driver::sys as cu;
if let Some(evt) = self.start {
unsafe {
let _ = cu::cuEventRecord(evt, ctx.stream.cu_stream());
}
self.recorded_start = true;
}
}
fn record_end(&mut self, ctx: &mut <crate::backend::cuda::CudaBackend as Backend>::Context) {
use cudarc::driver::sys as cu;
if let Some(evt) = self.end {
unsafe {
let _ = cu::cuEventRecord(evt, ctx.stream.cu_stream());
}
self.recorded_end = true;
}
}
fn elapsed_ms(&self) -> f64 {
if !self.recorded_start || !self.recorded_end {
return 0.0;
}
use cudarc::driver::sys as cu;
let (Some(s), Some(e)) = (self.start, self.end) else {
return 0.0;
};
unsafe {
let _ = cu::cuEventSynchronize(e);
cudarc::driver::result::event::elapsed(s, e)
.ok()
.map(|ms| ms as f64)
.unwrap_or(0.0)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cpu_timer_basic() {
let mut t = CpuTimer::new();
assert_eq!(t.elapsed_ms(), 0.0);
let mut ctx: <crate::backend::cpu::CpuBackend as Backend>::Context = ();
BackendTimer::<crate::backend::cpu::CpuBackend>::record_start(&mut t, &mut ctx);
std::thread::sleep(std::time::Duration::from_millis(2));
BackendTimer::<crate::backend::cpu::CpuBackend>::record_end(&mut t, &mut ctx);
let ms = BackendTimer::<crate::backend::cpu::CpuBackend>::elapsed_ms(&t);
assert!(ms >= 2.0 && ms < 50.0, "elapsed_ms = {ms}");
}
#[test]
fn cpu_timer_returns_zero_if_unrecorded() {
let t = CpuTimer::new();
let ms = BackendTimer::<crate::backend::cpu::CpuBackend>::elapsed_ms(&t);
assert_eq!(ms, 0.0);
}
}