zer_prof/lib.rs
1//! Host-side NVTX profiling annotations for `zer`, consumed by `nsys`.
2//!
3//! Provides macros that wrap a block with RAII NVTX ranges visible in the
4//! **Nsight Systems** (`nsys`) timeline:
5//!
6//! | Macro | NVTX name | Active when | Use for |
7//! |------------------|----------------------|-----------------------|--------------------------------|
8//! | `trace!` | `{name}` | any feature | CPU and GPU host regions |
9//! | `trace_cuda!` | `"CUDA: {name}"` | `cuda` feature only | CUDA kernel dispatch sites |
10//! | `trace_vulkan!` | `"VULKAN: {shader}"` | `vulkan` feature only | Vulkan shader dispatch sites |
11//!
12//! `trace_cuda!` lets `ncu` filter to CUDA-specific regions:
13//! - `ncu --nvtx --nvtx-include "regex:^CUDA:.*" ./your_binary`
14//!
15//! `trace_vulkan!` lets `ncu` filter to Vulkan shader regions:
16//! - `ncu --nvtx --nvtx-include "regex:^GPU:.*" ./your_binary`
17//!
18//! Both macros are **zero-cost no-ops** when no feature is compiled in.
19//!
20//! # Feature flags
21//!
22//! | Feature | Effect |
23//! |----------|-----------------------------------------------------------------------------|
24//! | `nvtx` | Activates NVTX standalone, without any compute backend |
25//! | `cuda` | Activates NVTX; `trace_cuda!` active; `trace_vulkan!` is a no-op |
26//! | `vulkan` | Activates NVTX; `trace_vulkan!` active; `trace_cuda!` is a no-op |
27//! | `avx2` | Activates NVTX; `trace_cuda!` and `trace_vulkan!` are no-ops |
28//! | `cpu` | Activates NVTX; `trace_cuda!` and `trace_vulkan!` are no-ops |
29//! | *(none)* | All macros expand to bare blocks, zero overhead, no link dep |
30//!
31//! # Usage
32//!
33//! ```rust,ignore
34//! zer_prof::init(); // call once at the start of main()
35//!
36//! // Host-side region, visible in nsys timeline for all backends.
37//! let vectors = zer_prof::trace!("compare_batch", {
38//! comparator.compare_batch(&pairs, &schema)
39//! });
40//!
41//! // CUDA kernel dispatch, filtered by ncu --nvtx-include "regex:^CUDA:.*".
42//! let out = zer_prof::trace_cuda!("em_reduce_mstep", {
43//! backend.run::<EmReduce>(input)
44//! })?;
45//!
46//! // Vulkan shader dispatch, filtered by ncu --nvtx-include "regex:^GPU:.*".
47//! let out = zer_prof::trace_vulkan!("compare_fields", {
48//! backend.run::<CompareFields>(input)
49//! })?;
50//! ```
51
52// ── NVTX guard module ─────────────────────────────────────────────────────────
53
54#[cfg(any(
55 feature = "nvtx",
56 feature = "cuda",
57 feature = "avx2",
58 feature = "cpu",
59 feature = "vulkan"
60))]
61pub mod nvtx;
62#[cfg(any(
63 feature = "nvtx",
64 feature = "cuda",
65 feature = "avx2",
66 feature = "cpu",
67 feature = "vulkan"
68))]
69pub use nvtx::NvtxGuard;
70
71// ── init ─────────────────────────────────────────────────────────────────────
72
73/// Initialise profiling state.
74///
75/// Currently a no-op for all backends; call once at the start of `main()`
76/// before any [`trace!`] or [`trace_cuda!`] invocations.
77pub fn init() {}
78
79// ── trace! ────────────────────────────────────────────────────────────────────
80
81/// Wrap a block with a named NVTX range.
82///
83/// Evaluates to the block's value. The range is visible in Nsight Systems as
84/// a labelled band and in Nsight Compute as a host-side context annotation.
85/// Expands to a bare block when no feature is compiled in.
86#[cfg(any(
87 feature = "nvtx",
88 feature = "cuda",
89 feature = "avx2",
90 feature = "cpu",
91 feature = "vulkan"
92))]
93#[macro_export]
94macro_rules! trace {
95 ($name:expr, $body:block) => {{
96 let _guard = $crate::NvtxGuard::new($name);
97 $body
98 }};
99}
100
101#[cfg(not(any(
102 feature = "nvtx",
103 feature = "cuda",
104 feature = "avx2",
105 feature = "cpu",
106 feature = "vulkan"
107)))]
108#[macro_export]
109macro_rules! trace {
110 ($name:expr, $body:block) => {
111 $body
112 };
113}
114
115// ── trace_cuda! ───────────────────────────────────────────────────────────────
116
117/// Wrap a CUDA kernel dispatch with an NVTX range prefixed `"CUDA: {name}"`.
118///
119/// The prefix is the anchor for Nsight Compute's NVTX filter:
120/// ```text
121/// ncu --nvtx --nvtx-include "regex:^CUDA:.*" ./your_binary
122/// ```
123/// This limits the `.ncu-rep` file to only the kernels launched inside a
124/// `trace_cuda!` region.
125///
126/// Only active when the `cuda` feature is compiled in; expands to a bare block
127/// otherwise.
128#[cfg(feature = "cuda")]
129#[macro_export]
130macro_rules! trace_cuda {
131 ($name:expr, $body:block) => {{
132 let _guard = $crate::NvtxGuard::new_cuda($name);
133 $body
134 }};
135}
136
137#[cfg(not(feature = "cuda"))]
138#[macro_export]
139macro_rules! trace_cuda {
140 ($name:expr, $body:block) => {
141 $body
142 };
143}
144
145// ── trace_vulkan! ─────────────────────────────────────────────────────────────
146
147/// Wrap a Vulkan shader dispatch with an NVTX range prefixed `"VULKAN: {shader}"`.
148///
149/// `shader` should be the SPIR-V entry-point name (e.g. `"compare_fields"`),
150/// making the range easy to locate in both Nsight Systems and Nsight Compute.
151///
152/// The prefix is the anchor for Nsight Compute's NVTX filter:
153/// ```text
154/// ncu --nvtx --nvtx-include "regex:^GPU:.*" ./your_binary
155/// ```
156/// This limits the `.ncu-rep` file to only the shader dispatches inside a
157/// `trace_vulkan!` region.
158///
159/// Only active when the `vulkan` feature is compiled in; expands to a bare block
160/// otherwise.
161#[cfg(feature = "vulkan")]
162#[macro_export]
163macro_rules! trace_vulkan {
164 ($shader:expr, $body:block) => {{
165 let _guard = $crate::NvtxGuard::new_vulkan($shader);
166 $body
167 }};
168}
169
170#[cfg(not(feature = "vulkan"))]
171#[macro_export]
172macro_rules! trace_vulkan {
173 ($shader:expr, $body:block) => {
174 $body
175 };
176}