1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
//! Per-kernel execution profiling.
//!
//! Provides structured timing data for kernel execution via
//! [`ExecutionPlan::execute_profiled()`](crate::ExecutionPlan::execute_profiled).
use Arc;
use Duration;
use DeviceSpec;
use crateCachedKernel;
/// Per-kernel timing from a profiled execution.
///
/// Holds an `Arc<CachedKernel>` for zero-copy access to kernel metadata
/// (entry point, generated code, global/local size, variable names).
///
/// # Example
///
/// ```ignore
/// let plan = tensor.prepare()?;
/// let profiles = plan.execute_profiled()?;
///
/// for (i, p) in profiles.iter().enumerate() {
/// println!("{:4} {:>8.3}ms {} ({} bufs, {:?})",
/// i, p.elapsed.as_secs_f64() * 1000.0,
/// p.kernel.entry_point, p.num_buffers, p.device);
/// }
/// ```