svod_runtime/profiler.rs
1//! Per-kernel execution profiling.
2//!
3//! Provides structured timing data for kernel execution via
4//! [`ExecutionPlan::execute_profiled()`](crate::ExecutionPlan::execute_profiled).
5
6use std::sync::Arc;
7use std::time::Duration;
8
9use svod_dtype::DeviceSpec;
10
11use crate::kernel_cache::CachedKernel;
12
13/// Per-kernel timing from a profiled execution.
14///
15/// Holds an `Arc<CachedKernel>` for zero-copy access to kernel metadata
16/// (entry point, generated code, global/local size, variable names).
17///
18/// # Example
19///
20/// ```ignore
21/// let plan = tensor.prepare()?;
22/// let profiles = plan.execute_profiled()?;
23///
24/// for (i, p) in profiles.iter().enumerate() {
25/// println!("{:4} {:>8.3}ms {} ({} bufs, {:?})",
26/// i, p.elapsed.as_secs_f64() * 1000.0,
27/// p.kernel.entry_point, p.num_buffers, p.device);
28/// }
29/// ```
30pub struct KernelProfile {
31 /// Compiled kernel (entry_point, code, global_size, local_size, var_names).
32 pub kernel: Arc<CachedKernel>,
33 /// Device this kernel executed on.
34 pub device: DeviceSpec,
35 /// Number of buffer arguments.
36 pub num_buffers: usize,
37 /// Wall-clock execution time.
38 pub elapsed: Duration,
39}