Skip to main content

dynamo_runtime/
nvtx.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! NVTX timeline-annotation helpers for Nsight Systems profiling.
5//!
6//! Delegates to [`cudarc::nvtx`] for the actual NVTX calls
7//!
8//! # Gating (two-level)
9//!
10//! | Cargo feature `nvtx` | `DYN_ENABLE_RUST_NVTX` env | Effect                                    |
11//! |----------------------|----------------------------|-------------------------------------------|
12//! | off (default)        | any                        | macros compile to nothing; zero overhead  |
13//! | on                   | unset                      | one `Relaxed` load per site (~1 ns)       |
14//! | on                   | `1` / `true` / `yes`       | cudarc NVTX calls (~50 ns/annotation)     |
15//!
16//! # Usage
17//!
18//! ```rust,ignore
19//! let _r = dynamo_nvtx_range!("preprocess.tokenize"); // RAII — pops at scope end
20//! dynamo_nvtx_push!("codec.encode");
21//! dynamo_nvtx_pop!();
22//! dynamo_nvtx_name_thread!("tokio-worker-0");
23//! ```
24//!
25//! # Build
26//!
27//! ```bash
28//! cargo build --profile profiling --features nvtx
29//! ```
30//! Requires `libnvToolsExt.so` at runtime (CUDA Toolkit or NVHPC).
31
32#[cfg(feature = "nvtx")]
33use std::sync::atomic::{AtomicBool, Ordering};
34
35#[cfg(feature = "nvtx")]
36static NVTX_ENABLED: AtomicBool = AtomicBool::new(false);
37
38// ── Public API ───────────────────────────────────────────────────────────────
39
40/// Initialise the NVTX subsystem from the `DYN_ENABLE_RUST_NVTX` environment variable.
41/// Must be called once at runtime startup before any annotation macros fire.
42/// No-op when the `nvtx` Cargo feature is off.
43pub fn init() {
44    #[cfg(feature = "nvtx")]
45    {
46        let enabled = std::env::var("DYN_ENABLE_RUST_NVTX")
47            .map(|v| matches!(v.to_lowercase().as_str(), "1" | "true" | "yes" | "on"))
48            .unwrap_or(false);
49        NVTX_ENABLED.store(enabled, Ordering::Relaxed);
50        if enabled {
51            tracing::info!("NVTX annotations enabled (DYN_ENABLE_RUST_NVTX)");
52        }
53    }
54}
55
56/// Returns `true` when the `nvtx` feature is compiled in **and** `DYN_ENABLE_RUST_NVTX` is set.
57#[inline(always)]
58pub fn enabled() -> bool {
59    #[cfg(feature = "nvtx")]
60    {
61        return NVTX_ENABLED.load(Ordering::Relaxed);
62    }
63    #[allow(unreachable_code)]
64    false
65}
66
67/// Push an NVTX range onto the calling thread's stack.
68/// No-op (compiled out) when the `nvtx` feature is off.
69#[inline(always)]
70pub fn push_impl(name: &str) {
71    #[cfg(feature = "nvtx")]
72    {
73        if NVTX_ENABLED.load(Ordering::Relaxed) {
74            cudarc::nvtx::result::range_push(name);
75        }
76    }
77    let _ = name;
78}
79
80/// Pop the innermost NVTX range from the calling thread's stack.
81/// No-op (compiled out) when the `nvtx` feature is off.
82#[inline(always)]
83pub fn pop_impl() {
84    #[cfg(feature = "nvtx")]
85    {
86        if NVTX_ENABLED.load(Ordering::Relaxed) {
87            cudarc::nvtx::result::range_pop();
88        }
89    }
90}
91
92/// Name the current OS thread in the Nsight Systems timeline.
93/// No-op (compiled out) when the `nvtx` feature is off.
94#[inline(always)]
95pub fn name_current_thread_impl(name: &str) {
96    #[cfg(feature = "nvtx")]
97    {
98        if NVTX_ENABLED.load(Ordering::Relaxed) {
99            #[cfg(target_os = "linux")]
100            let tid = unsafe { libc::syscall(libc::SYS_gettid) as u32 };
101            #[cfg(not(target_os = "linux"))]
102            let tid = 0u32;
103            cudarc::nvtx::result::name_os_thread(tid, name);
104        }
105    }
106    let _ = name;
107}
108
109// ── RAII guard ───────────────────────────────────────────────────────────────
110
111/// RAII guard that pops an NVTX range when dropped.
112/// Construct with [`dynamo_nvtx_range!`].
113#[cfg(feature = "nvtx")]
114pub struct NvtxRangeGuard {
115    active: bool,
116}
117
118/// Zero-sized no-op guard used when the `nvtx` feature is off.
119#[cfg(not(feature = "nvtx"))]
120pub struct NvtxRangeGuard;
121
122impl NvtxRangeGuard {
123    #[doc(hidden)]
124    pub fn new(name: &str) -> Self {
125        #[cfg(feature = "nvtx")]
126        {
127            let active = NVTX_ENABLED.load(Ordering::Relaxed);
128            if active {
129                cudarc::nvtx::result::range_push(name);
130            }
131            return NvtxRangeGuard { active };
132        }
133        #[cfg(not(feature = "nvtx"))]
134        {
135            let _ = name;
136            NvtxRangeGuard {}
137        }
138    }
139}
140
141#[cfg(feature = "nvtx")]
142impl Drop for NvtxRangeGuard {
143    fn drop(&mut self) {
144        if self.active {
145            cudarc::nvtx::result::range_pop();
146        }
147    }
148}
149
150#[cfg(not(feature = "nvtx"))]
151impl Drop for NvtxRangeGuard {
152    fn drop(&mut self) {}
153}
154
155// ── Macros ───────────────────────────────────────────────────────────────────
156
157/// Push a named NVTX range onto the calling thread's stack.
158/// Zero-cost when the `nvtx` Cargo feature is off.
159#[macro_export]
160macro_rules! dynamo_nvtx_push {
161    ($name:expr) => {
162        $crate::nvtx::push_impl($name)
163    };
164}
165
166/// Pop the innermost NVTX range from the calling thread's stack.
167/// Zero-cost when the `nvtx` Cargo feature is off.
168#[macro_export]
169macro_rules! dynamo_nvtx_pop {
170    () => {
171        $crate::nvtx::pop_impl()
172    };
173}
174
175/// Open a named NVTX range that closes automatically at end of scope.
176///
177/// ```rust,ignore
178/// let _r = dynamo_nvtx_range!("preprocess.tokenize");
179/// // range closes here
180/// ```
181/// Zero-cost when the `nvtx` Cargo feature is off.
182#[macro_export]
183macro_rules! dynamo_nvtx_range {
184    ($name:expr) => {
185        $crate::nvtx::NvtxRangeGuard::new($name)
186    };
187}
188
189/// Annotate the current OS thread in the Nsight Systems timeline.
190/// Zero-cost when the `nvtx` Cargo feature is off.
191#[macro_export]
192macro_rules! dynamo_nvtx_name_thread {
193    ($name:expr) => {
194        $crate::nvtx::name_current_thread_impl($name)
195    };
196}