zer_prof/lib.rs
1//! Host-side NVTX profiling annotations for `zer`, consumed by `nsys`.
2//!
3//! Provides macros that wrap a block with RAII NVTX ranges visible in the
4//! **Nsight Systems** (`nsys`) timeline:
5//!
6//! | Macro | NVTX name | Active when | Use for |
7//! |------------------|----------------------|-----------------------|--------------------------------|
8//! | `trace!` | `{name}` | any feature | CPU and GPU host regions |
9//! | `trace_cuda!` | `"CUDA: {name}"` | `cuda` feature only | CUDA kernel dispatch sites |
10//! | `trace_vulkan!` | `"VULKAN: {shader}"` | `vulkan` feature only | Vulkan shader dispatch sites |
11//!
12//! `trace_cuda!` lets `ncu` filter to CUDA-specific regions:
13//! - `ncu --nvtx --nvtx-include "regex:^CUDA:.*" ./your_binary`
14//!
15//! `trace_vulkan!` lets `ncu` filter to Vulkan shader regions:
16//! - `ncu --nvtx --nvtx-include "regex:^GPU:.*" ./your_binary`
17//!
18//! Both macros are **zero-cost no-ops** when no feature is compiled in.
19//!
20//! # Feature flags
21//!
22//! | Feature | Effect |
23//! |----------|-----------------------------------------------------------------------------|
24//! | `nvtx` | Activates NVTX standalone, without any compute backend |
25//! | `cuda` | Activates NVTX; `trace_cuda!` active; `trace_vulkan!` is a no-op |
26//! | `vulkan` | Activates NVTX; `trace_vulkan!` active; `trace_cuda!` is a no-op |
27//! | `avx2` | Activates NVTX; `trace_cuda!` and `trace_vulkan!` are no-ops |
28//! | `cpu` | Activates NVTX; `trace_cuda!` and `trace_vulkan!` are no-ops |
29//! | *(none)* | All macros expand to bare blocks, zero overhead, no link dep |
30//!
31//! # Usage
32//!
33//! ```rust,ignore
34//! zer_prof::init(); // call once at the start of main()
35//!
36//! // Host-side region, visible in nsys timeline for all backends.
37//! let vectors = zer_prof::trace!("compare_batch", {
38//! comparator.compare_batch(&pairs, &schema)
39//! });
40//!
41//! // CUDA kernel dispatch, filtered by ncu --nvtx-include "regex:^CUDA:.*".
42//! let out = zer_prof::trace_cuda!("em_reduce_mstep", {
43//! backend.run::<EmReduce>(input)
44//! })?;
45//!
46//! // Vulkan shader dispatch, filtered by ncu --nvtx-include "regex:^GPU:.*".
47//! let out = zer_prof::trace_vulkan!("compare_fields", {
48//! backend.run::<CompareFields>(input)
49//! })?;
50//! ```
51
52// ── NVTX guard module ─────────────────────────────────────────────────────────
53
54#[cfg(any(feature = "nvtx", feature = "cuda", feature = "avx2", feature = "cpu", feature = "vulkan"))]
55pub mod nvtx;
56#[cfg(any(feature = "nvtx", feature = "cuda", feature = "avx2", feature = "cpu", feature = "vulkan"))]
57pub use nvtx::NvtxGuard;
58
59// ── init ─────────────────────────────────────────────────────────────────────
60
61/// Initialise profiling state.
62///
63/// Currently a no-op for all backends; call once at the start of `main()`
64/// before any [`trace!`] or [`trace_cuda!`] invocations.
65pub fn init() {}
66
67// ── trace! ────────────────────────────────────────────────────────────────────
68
69/// Wrap a block with a named NVTX range.
70///
71/// Evaluates to the block's value. The range is visible in Nsight Systems as
72/// a labelled band and in Nsight Compute as a host-side context annotation.
73/// Expands to a bare block when no feature is compiled in.
74#[cfg(any(feature = "nvtx", feature = "cuda", feature = "avx2", feature = "cpu", feature = "vulkan"))]
75#[macro_export]
76macro_rules! trace {
77 ($name:expr, $body:block) => {{
78 let _guard = $crate::NvtxGuard::new($name);
79 $body
80 }};
81}
82
83#[cfg(not(any(feature = "nvtx", feature = "cuda", feature = "avx2", feature = "cpu", feature = "vulkan")))]
84#[macro_export]
85macro_rules! trace {
86 ($name:expr, $body:block) => {
87 $body
88 };
89}
90
91// ── trace_cuda! ───────────────────────────────────────────────────────────────
92
93/// Wrap a CUDA kernel dispatch with an NVTX range prefixed `"CUDA: {name}"`.
94///
95/// The prefix is the anchor for Nsight Compute's NVTX filter:
96/// ```text
97/// ncu --nvtx --nvtx-include "regex:^CUDA:.*" ./your_binary
98/// ```
99/// This limits the `.ncu-rep` file to only the kernels launched inside a
100/// `trace_cuda!` region.
101///
102/// Only active when the `cuda` feature is compiled in; expands to a bare block
103/// otherwise.
104#[cfg(feature = "cuda")]
105#[macro_export]
106macro_rules! trace_cuda {
107 ($name:expr, $body:block) => {{
108 let _guard = $crate::NvtxGuard::new_cuda($name);
109 $body
110 }};
111}
112
113#[cfg(not(feature = "cuda"))]
114#[macro_export]
115macro_rules! trace_cuda {
116 ($name:expr, $body:block) => {
117 $body
118 };
119}
120
121// ── trace_vulkan! ─────────────────────────────────────────────────────────────
122
123/// Wrap a Vulkan shader dispatch with an NVTX range prefixed `"VULKAN: {shader}"`.
124///
125/// `shader` should be the SPIR-V entry-point name (e.g. `"compare_fields"`),
126/// making the range easy to locate in both Nsight Systems and Nsight Compute.
127///
128/// The prefix is the anchor for Nsight Compute's NVTX filter:
129/// ```text
130/// ncu --nvtx --nvtx-include "regex:^GPU:.*" ./your_binary
131/// ```
132/// This limits the `.ncu-rep` file to only the shader dispatches inside a
133/// `trace_vulkan!` region.
134///
135/// Only active when the `vulkan` feature is compiled in; expands to a bare block
136/// otherwise.
137#[cfg(feature = "vulkan")]
138#[macro_export]
139macro_rules! trace_vulkan {
140 ($shader:expr, $body:block) => {{
141 let _guard = $crate::NvtxGuard::new_vulkan($shader);
142 $body
143 }};
144}
145
146#[cfg(not(feature = "vulkan"))]
147#[macro_export]
148macro_rules! trace_vulkan {
149 ($shader:expr, $body:block) => {
150 $body
151 };
152}