dynamo_runtime/nvtx.rs
1// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! NVTX timeline-annotation helpers for Nsight Systems profiling.
5//!
6//! Delegates to [`cudarc::nvtx`] for the actual NVTX calls
7//!
8//! # Gating (two-level)
9//!
10//! | Cargo feature `nvtx` | `DYN_ENABLE_RUST_NVTX` env | Effect |
11//! |----------------------|----------------------------|-------------------------------------------|
12//! | off (default) | any | macros compile to nothing; zero overhead |
13//! | on | unset | one `Relaxed` load per site (~1 ns) |
14//! | on | `1` / `true` / `yes` | cudarc NVTX calls (~50 ns/annotation) |
15//!
16//! # Usage
17//!
18//! ```rust,ignore
19//! let _r = dynamo_nvtx_range!("preprocess.tokenize"); // RAII — pops at scope end
20//! dynamo_nvtx_push!("codec.encode");
21//! dynamo_nvtx_pop!();
22//! dynamo_nvtx_name_thread!("tokio-worker-0");
23//! ```
24//!
25//! # Build
26//!
27//! ```bash
28//! cargo build --profile profiling --features nvtx
29//! ```
30//! Requires `libnvToolsExt.so` at runtime (CUDA Toolkit or NVHPC).
31
32#[cfg(feature = "nvtx")]
33use std::sync::atomic::{AtomicBool, Ordering};
34
35#[cfg(feature = "nvtx")]
36static NVTX_ENABLED: AtomicBool = AtomicBool::new(false);
37
38// ── Public API ───────────────────────────────────────────────────────────────
39
40/// Initialise the NVTX subsystem from the `DYN_ENABLE_RUST_NVTX` environment variable.
41/// Must be called once at runtime startup before any annotation macros fire.
42/// No-op when the `nvtx` Cargo feature is off.
43pub fn init() {
44 #[cfg(feature = "nvtx")]
45 {
46 let enabled = std::env::var("DYN_ENABLE_RUST_NVTX")
47 .map(|v| matches!(v.to_lowercase().as_str(), "1" | "true" | "yes" | "on"))
48 .unwrap_or(false);
49 NVTX_ENABLED.store(enabled, Ordering::Relaxed);
50 if enabled {
51 tracing::info!("NVTX annotations enabled (DYN_ENABLE_RUST_NVTX)");
52 }
53 }
54}
55
56/// Returns `true` when the `nvtx` feature is compiled in **and** `DYN_ENABLE_RUST_NVTX` is set.
57#[inline(always)]
58pub fn enabled() -> bool {
59 #[cfg(feature = "nvtx")]
60 {
61 return NVTX_ENABLED.load(Ordering::Relaxed);
62 }
63 #[allow(unreachable_code)]
64 false
65}
66
67/// Push an NVTX range onto the calling thread's stack.
68/// No-op (compiled out) when the `nvtx` feature is off.
69#[inline(always)]
70pub fn push_impl(name: &str) {
71 #[cfg(feature = "nvtx")]
72 {
73 if NVTX_ENABLED.load(Ordering::Relaxed) {
74 cudarc::nvtx::result::range_push(name);
75 }
76 }
77 let _ = name;
78}
79
80/// Pop the innermost NVTX range from the calling thread's stack.
81/// No-op (compiled out) when the `nvtx` feature is off.
82#[inline(always)]
83pub fn pop_impl() {
84 #[cfg(feature = "nvtx")]
85 {
86 if NVTX_ENABLED.load(Ordering::Relaxed) {
87 cudarc::nvtx::result::range_pop();
88 }
89 }
90}
91
92/// Name the current OS thread in the Nsight Systems timeline.
93/// No-op (compiled out) when the `nvtx` feature is off.
94#[inline(always)]
95pub fn name_current_thread_impl(name: &str) {
96 #[cfg(feature = "nvtx")]
97 {
98 if NVTX_ENABLED.load(Ordering::Relaxed) {
99 #[cfg(target_os = "linux")]
100 let tid = unsafe { libc::syscall(libc::SYS_gettid) as u32 };
101 #[cfg(not(target_os = "linux"))]
102 let tid = 0u32;
103 cudarc::nvtx::result::name_os_thread(tid, name);
104 }
105 }
106 let _ = name;
107}
108
109// ── RAII guard ───────────────────────────────────────────────────────────────
110
111/// RAII guard that pops an NVTX range when dropped.
112/// Construct with [`dynamo_nvtx_range!`].
113#[cfg(feature = "nvtx")]
114pub struct NvtxRangeGuard {
115 active: bool,
116}
117
118/// Zero-sized no-op guard used when the `nvtx` feature is off.
119#[cfg(not(feature = "nvtx"))]
120pub struct NvtxRangeGuard;
121
122impl NvtxRangeGuard {
123 #[doc(hidden)]
124 pub fn new(name: &str) -> Self {
125 #[cfg(feature = "nvtx")]
126 {
127 let active = NVTX_ENABLED.load(Ordering::Relaxed);
128 if active {
129 cudarc::nvtx::result::range_push(name);
130 }
131 return NvtxRangeGuard { active };
132 }
133 #[cfg(not(feature = "nvtx"))]
134 {
135 let _ = name;
136 NvtxRangeGuard {}
137 }
138 }
139}
140
141#[cfg(feature = "nvtx")]
142impl Drop for NvtxRangeGuard {
143 fn drop(&mut self) {
144 if self.active {
145 cudarc::nvtx::result::range_pop();
146 }
147 }
148}
149
150#[cfg(not(feature = "nvtx"))]
151impl Drop for NvtxRangeGuard {
152 fn drop(&mut self) {}
153}
154
155// ── Macros ───────────────────────────────────────────────────────────────────
156
157/// Push a named NVTX range onto the calling thread's stack.
158/// Zero-cost when the `nvtx` Cargo feature is off.
159#[macro_export]
160macro_rules! dynamo_nvtx_push {
161 ($name:expr) => {
162 $crate::nvtx::push_impl($name)
163 };
164}
165
166/// Pop the innermost NVTX range from the calling thread's stack.
167/// Zero-cost when the `nvtx` Cargo feature is off.
168#[macro_export]
169macro_rules! dynamo_nvtx_pop {
170 () => {
171 $crate::nvtx::pop_impl()
172 };
173}
174
175/// Open a named NVTX range that closes automatically at end of scope.
176///
177/// ```rust,ignore
178/// let _r = dynamo_nvtx_range!("preprocess.tokenize");
179/// // range closes here
180/// ```
181/// Zero-cost when the `nvtx` Cargo feature is off.
182#[macro_export]
183macro_rules! dynamo_nvtx_range {
184 ($name:expr) => {
185 $crate::nvtx::NvtxRangeGuard::new($name)
186 };
187}
188
189/// Annotate the current OS thread in the Nsight Systems timeline.
190/// Zero-cost when the `nvtx` Cargo feature is off.
191#[macro_export]
192macro_rules! dynamo_nvtx_name_thread {
193 ($name:expr) => {
194 $crate::nvtx::name_current_thread_impl($name)
195 };
196}