Skip to main content

svod_runtime/
profiler.rs

1//! Per-kernel execution profiling.
2//!
3//! Provides structured timing data for kernel execution via
4//! [`ExecutionPlan::execute_profiled()`](crate::ExecutionPlan::execute_profiled).
5
6use std::sync::Arc;
7use std::time::Duration;
8
9use svod_dtype::DeviceSpec;
10
11use crate::kernel_cache::CachedKernel;
12
13/// Per-kernel timing from a profiled execution.
14///
15/// Holds an `Arc<CachedKernel>` for zero-copy access to kernel metadata
16/// (entry point, generated code, global/local size, variable names).
17///
18/// # Example
19///
20/// ```ignore
21/// let plan = tensor.prepare()?;
22/// let profiles = plan.execute_profiled()?;
23///
24/// for (i, p) in profiles.iter().enumerate() {
25///     println!("{:4} {:>8.3}ms  {}  ({} bufs, {:?})",
26///         i, p.elapsed.as_secs_f64() * 1000.0,
27///         p.kernel.entry_point, p.num_buffers, p.device);
28/// }
29/// ```
30pub struct KernelProfile {
31    /// Compiled kernel (entry_point, code, global_size, local_size, var_names).
32    pub kernel: Arc<CachedKernel>,
33    /// Device this kernel executed on.
34    pub device: DeviceSpec,
35    /// Number of buffer arguments.
36    pub num_buffers: usize,
37    /// Wall-clock execution time.
38    pub elapsed: Duration,
39}