Skip to main content

zer_prof/
lib.rs

1//! Host-side NVTX profiling annotations for `zer`, consumed by `nsys`.
2//!
3//! Provides macros that wrap a block with RAII NVTX ranges visible in the
4//! **Nsight Systems** (`nsys`) timeline:
5//!
6//! | Macro            | NVTX name            | Active when           | Use for                        |
7//! |------------------|----------------------|-----------------------|--------------------------------|
8//! | `trace!`         | `{name}`             | any feature           | CPU and GPU host regions       |
9//! | `trace_cuda!`    | `"CUDA: {name}"`     | `cuda` feature only   | CUDA kernel dispatch sites     |
10//! | `trace_vulkan!`  | `"VULKAN: {shader}"`    | `vulkan` feature only | Vulkan shader dispatch sites   |
11//!
12//! `trace_cuda!` lets `ncu` filter to CUDA-specific regions:
13//!   - `ncu --nvtx --nvtx-include "regex:^CUDA:.*" ./your_binary`
14//!
15//! `trace_vulkan!` lets `ncu` filter to Vulkan shader regions:
16//!   - `ncu --nvtx --nvtx-include "regex:^GPU:.*" ./your_binary`
17//!
18//! Both macros are **zero-cost no-ops** when no feature is compiled in.
19//!
20//! # Feature flags
21//!
22//! | Feature  | Effect                                                                      |
23//! |----------|-----------------------------------------------------------------------------|
24//! | `nvtx`   | Activates NVTX standalone, without any compute backend                      |
25//! | `cuda`   | Activates NVTX; `trace_cuda!` active; `trace_vulkan!` is a no-op           |
26//! | `vulkan` | Activates NVTX; `trace_vulkan!` active; `trace_cuda!` is a no-op           |
27//! | `avx2`   | Activates NVTX; `trace_cuda!` and `trace_vulkan!` are no-ops               |
28//! | `cpu`    | Activates NVTX; `trace_cuda!` and `trace_vulkan!` are no-ops               |
29//! | *(none)* | All macros expand to bare blocks, zero overhead, no link dep               |
30//!
31//! # Usage
32//!
33//! ```rust,ignore
34//! zer_prof::init();  // call once at the start of main()
35//!
36//! // Host-side region, visible in nsys timeline for all backends.
37//! let vectors = zer_prof::trace!("compare_batch", {
38//!     comparator.compare_batch(&pairs, &schema)
39//! });
40//!
41//! // CUDA kernel dispatch, filtered by ncu --nvtx-include "regex:^CUDA:.*".
42//! let out = zer_prof::trace_cuda!("em_reduce_mstep", {
43//!     backend.run::<EmReduce>(input)
44//! })?;
45//!
46//! // Vulkan shader dispatch, filtered by ncu --nvtx-include "regex:^GPU:.*".
47//! let out = zer_prof::trace_vulkan!("compare_fields", {
48//!     backend.run::<CompareFields>(input)
49//! })?;
50//! ```
51
52// ── NVTX guard module ─────────────────────────────────────────────────────────
53
54#[cfg(any(
55    feature = "nvtx",
56    feature = "cuda",
57    feature = "avx2",
58    feature = "cpu",
59    feature = "vulkan"
60))]
61pub mod nvtx;
62#[cfg(any(
63    feature = "nvtx",
64    feature = "cuda",
65    feature = "avx2",
66    feature = "cpu",
67    feature = "vulkan"
68))]
69pub use nvtx::NvtxGuard;
70
71// ── init ─────────────────────────────────────────────────────────────────────
72
73/// Initialise profiling state.
74///
75/// Currently a no-op for all backends; call once at the start of `main()`
76/// before any [`trace!`] or [`trace_cuda!`] invocations.
77pub fn init() {}
78
79// ── trace! ────────────────────────────────────────────────────────────────────
80
81/// Wrap a block with a named NVTX range.
82///
83/// Evaluates to the block's value.  The range is visible in Nsight Systems as
84/// a labelled band and in Nsight Compute as a host-side context annotation.
85/// Expands to a bare block when no feature is compiled in.
86#[cfg(any(
87    feature = "nvtx",
88    feature = "cuda",
89    feature = "avx2",
90    feature = "cpu",
91    feature = "vulkan"
92))]
93#[macro_export]
94macro_rules! trace {
95    ($name:expr, $body:block) => {{
96        let _guard = $crate::NvtxGuard::new($name);
97        $body
98    }};
99}
100
101#[cfg(not(any(
102    feature = "nvtx",
103    feature = "cuda",
104    feature = "avx2",
105    feature = "cpu",
106    feature = "vulkan"
107)))]
108#[macro_export]
109macro_rules! trace {
110    ($name:expr, $body:block) => {
111        $body
112    };
113}
114
115// ── trace_cuda! ───────────────────────────────────────────────────────────────
116
117/// Wrap a CUDA kernel dispatch with an NVTX range prefixed `"CUDA: {name}"`.
118///
119/// The prefix is the anchor for Nsight Compute's NVTX filter:
120/// ```text
121/// ncu --nvtx --nvtx-include "regex:^CUDA:.*" ./your_binary
122/// ```
123/// This limits the `.ncu-rep` file to only the kernels launched inside a
124/// `trace_cuda!` region.
125///
126/// Only active when the `cuda` feature is compiled in; expands to a bare block
127/// otherwise.
128#[cfg(feature = "cuda")]
129#[macro_export]
130macro_rules! trace_cuda {
131    ($name:expr, $body:block) => {{
132        let _guard = $crate::NvtxGuard::new_cuda($name);
133        $body
134    }};
135}
136
137#[cfg(not(feature = "cuda"))]
138#[macro_export]
139macro_rules! trace_cuda {
140    ($name:expr, $body:block) => {
141        $body
142    };
143}
144
145// ── trace_vulkan! ─────────────────────────────────────────────────────────────
146
147/// Wrap a Vulkan shader dispatch with an NVTX range prefixed `"VULKAN: {shader}"`.
148///
149/// `shader` should be the SPIR-V entry-point name (e.g. `"compare_fields"`),
150/// making the range easy to locate in both Nsight Systems and Nsight Compute.
151///
152/// The prefix is the anchor for Nsight Compute's NVTX filter:
153/// ```text
154/// ncu --nvtx --nvtx-include "regex:^GPU:.*" ./your_binary
155/// ```
156/// This limits the `.ncu-rep` file to only the shader dispatches inside a
157/// `trace_vulkan!` region.
158///
159/// Only active when the `vulkan` feature is compiled in; expands to a bare block
160/// otherwise.
161#[cfg(feature = "vulkan")]
162#[macro_export]
163macro_rules! trace_vulkan {
164    ($shader:expr, $body:block) => {{
165        let _guard = $crate::NvtxGuard::new_vulkan($shader);
166        $body
167    }};
168}
169
170#[cfg(not(feature = "vulkan"))]
171#[macro_export]
172macro_rules! trace_vulkan {
173    ($shader:expr, $body:block) => {
174        $body
175    };
176}