Skip to main content

zer_prof/
lib.rs

1//! Host-side NVTX profiling annotations for `zer`, consumed by `nsys`.
2//!
3//! Provides macros that wrap a block with RAII NVTX ranges visible in the
4//! **Nsight Systems** (`nsys`) timeline:
5//!
6//! | Macro            | NVTX name            | Active when           | Use for                        |
7//! |------------------|----------------------|-----------------------|--------------------------------|
8//! | `trace!`         | `{name}`             | any feature           | CPU and GPU host regions       |
9//! | `trace_cuda!`    | `"CUDA: {name}"`     | `cuda` feature only   | CUDA kernel dispatch sites     |
10//! | `trace_vulkan!`  | `"VULKAN: {shader}"`    | `vulkan` feature only | Vulkan shader dispatch sites   |
11//!
12//! `trace_cuda!` lets `ncu` filter to CUDA-specific regions:
13//!   - `ncu --nvtx --nvtx-include "regex:^CUDA:.*" ./your_binary`
14//!
15//! `trace_vulkan!` lets `ncu` filter to Vulkan shader regions:
16//!   - `ncu --nvtx --nvtx-include "regex:^GPU:.*" ./your_binary`
17//!
18//! Both macros are **zero-cost no-ops** when no feature is compiled in.
19//!
20//! # Feature flags
21//!
22//! | Feature  | Effect                                                                      |
23//! |----------|-----------------------------------------------------------------------------|
24//! | `nvtx`   | Activates NVTX standalone, without any compute backend                      |
25//! | `cuda`   | Activates NVTX; `trace_cuda!` active; `trace_vulkan!` is a no-op           |
26//! | `vulkan` | Activates NVTX; `trace_vulkan!` active; `trace_cuda!` is a no-op           |
27//! | `avx2`   | Activates NVTX; `trace_cuda!` and `trace_vulkan!` are no-ops               |
28//! | `cpu`    | Activates NVTX; `trace_cuda!` and `trace_vulkan!` are no-ops               |
29//! | *(none)* | All macros expand to bare blocks, zero overhead, no link dep               |
30//!
31//! # Usage
32//!
33//! ```rust,ignore
34//! zer_prof::init();  // call once at the start of main()
35//!
36//! // Host-side region, visible in nsys timeline for all backends.
37//! let vectors = zer_prof::trace!("compare_batch", {
38//!     comparator.compare_batch(&pairs, &schema)
39//! });
40//!
41//! // CUDA kernel dispatch, filtered by ncu --nvtx-include "regex:^CUDA:.*".
42//! let out = zer_prof::trace_cuda!("em_reduce_mstep", {
43//!     backend.run::<EmReduce>(input)
44//! })?;
45//!
46//! // Vulkan shader dispatch, filtered by ncu --nvtx-include "regex:^GPU:.*".
47//! let out = zer_prof::trace_vulkan!("compare_fields", {
48//!     backend.run::<CompareFields>(input)
49//! })?;
50//! ```
51
52// ── NVTX guard module ─────────────────────────────────────────────────────────
53
54#[cfg(any(feature = "nvtx", feature = "cuda", feature = "avx2", feature = "cpu", feature = "vulkan"))]
55pub mod nvtx;
56#[cfg(any(feature = "nvtx", feature = "cuda", feature = "avx2", feature = "cpu", feature = "vulkan"))]
57pub use nvtx::NvtxGuard;
58
59// ── init ─────────────────────────────────────────────────────────────────────
60
61/// Initialise profiling state.
62///
63/// Currently a no-op for all backends; call once at the start of `main()`
64/// before any [`trace!`] or [`trace_cuda!`] invocations.
65pub fn init() {}
66
67// ── trace! ────────────────────────────────────────────────────────────────────
68
69/// Wrap a block with a named NVTX range.
70///
71/// Evaluates to the block's value.  The range is visible in Nsight Systems as
72/// a labelled band and in Nsight Compute as a host-side context annotation.
73/// Expands to a bare block when no feature is compiled in.
74#[cfg(any(feature = "nvtx", feature = "cuda", feature = "avx2", feature = "cpu", feature = "vulkan"))]
75#[macro_export]
76macro_rules! trace {
77    ($name:expr, $body:block) => {{
78        let _guard = $crate::NvtxGuard::new($name);
79        $body
80    }};
81}
82
83#[cfg(not(any(feature = "nvtx", feature = "cuda", feature = "avx2", feature = "cpu", feature = "vulkan")))]
84#[macro_export]
85macro_rules! trace {
86    ($name:expr, $body:block) => {
87        $body
88    };
89}
90
91// ── trace_cuda! ───────────────────────────────────────────────────────────────
92
93/// Wrap a CUDA kernel dispatch with an NVTX range prefixed `"CUDA: {name}"`.
94///
95/// The prefix is the anchor for Nsight Compute's NVTX filter:
96/// ```text
97/// ncu --nvtx --nvtx-include "regex:^CUDA:.*" ./your_binary
98/// ```
99/// This limits the `.ncu-rep` file to only the kernels launched inside a
100/// `trace_cuda!` region.
101///
102/// Only active when the `cuda` feature is compiled in; expands to a bare block
103/// otherwise.
104#[cfg(feature = "cuda")]
105#[macro_export]
106macro_rules! trace_cuda {
107    ($name:expr, $body:block) => {{
108        let _guard = $crate::NvtxGuard::new_cuda($name);
109        $body
110    }};
111}
112
113#[cfg(not(feature = "cuda"))]
114#[macro_export]
115macro_rules! trace_cuda {
116    ($name:expr, $body:block) => {
117        $body
118    };
119}
120
121// ── trace_vulkan! ─────────────────────────────────────────────────────────────
122
123/// Wrap a Vulkan shader dispatch with an NVTX range prefixed `"VULKAN: {shader}"`.
124///
125/// `shader` should be the SPIR-V entry-point name (e.g. `"compare_fields"`),
126/// making the range easy to locate in both Nsight Systems and Nsight Compute.
127///
128/// The prefix is the anchor for Nsight Compute's NVTX filter:
129/// ```text
130/// ncu --nvtx --nvtx-include "regex:^GPU:.*" ./your_binary
131/// ```
132/// This limits the `.ncu-rep` file to only the shader dispatches inside a
133/// `trace_vulkan!` region.
134///
135/// Only active when the `vulkan` feature is compiled in; expands to a bare block
136/// otherwise.
137#[cfg(feature = "vulkan")]
138#[macro_export]
139macro_rules! trace_vulkan {
140    ($shader:expr, $body:block) => {{
141        let _guard = $crate::NvtxGuard::new_vulkan($shader);
142        $body
143    }};
144}
145
146#[cfg(not(feature = "vulkan"))]
147#[macro_export]
148macro_rules! trace_vulkan {
149    ($shader:expr, $body:block) => {
150        $body
151    };
152}