Skip to main content

edgefirst_hal/
trace.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4//! Trace capture for performance analysis.
5//!
6//! Provides a simple start/stop API for capturing [`tracing`]-based spans
7//! emitted by HAL crates into Chrome JSON trace files viewable at
8//! <https://ui.perfetto.dev/>.
9//!
10//! # Design
11//!
12//! The HAL library crates (`edgefirst-decoder`, `edgefirst-image`) emit
13//! [`tracing::trace_span!`] spans on hot paths. These have near-zero overhead
14//! when no subscriber is active (a single relaxed atomic load per span site).
15//!
16//! This module installs a **process-wide subscriber** consisting of a Chrome
17//! trace layer writing spans to a JSON file for Perfetto. Existing `log::*`
18//! output (via `env_logger`) continues independently to stderr.
19//!
20//! The subscriber is installed once on the first call to [`start_tracing`].
21//! Only one trace capture session is supported per process lifetime (this is
22//! a limitation of Rust's global subscriber model and is acceptable for
23//! profiling workflows where a single trace per run is the norm).
24//!
25//! # Usage from Rust
26//!
27//! ```no_run
28//! # #[cfg(feature = "tracing")]
29//! # {
30//! use edgefirst_hal::trace::{start_tracing, stop_tracing};
31//!
32//! start_tracing("/tmp/trace.json").expect("start tracing");
33//! // ... run inference pipeline ...
34//! stop_tracing(); // flushes and closes the trace file
35//! # }
36//! ```
37//!
38//! # Usage from Python
39//!
40//! ```python
41//! import edgefirst_hal as hal
42//!
43//! with hal.Tracing("/tmp/trace.json"):
44//!     # ... run inference ...
45//!     pass
46//! # trace file is flushed on __exit__
47//! ```
48//!
49//! # Usage from C
50//!
51//! ```c
52//! #include "edgefirst_hal.h"
53//! hal_start_tracing("/tmp/trace.json");
54//! // ... run inference ...
55//! hal_stop_tracing(); // flushes trace file
56//! ```
57
58use std::sync::atomic::{AtomicBool, Ordering};
59use std::sync::Mutex;
60
61use tracing_chrome::FlushGuard;
62use tracing_subscriber::prelude::*;
63
64/// Global flush guard for the active trace session.
65static GUARD: Mutex<Option<FlushGuard>> = Mutex::new(None);
66
67/// Tracks whether a session has ever been started (remains true after stop).
68static SESSION_USED: AtomicBool = AtomicBool::new(false);
69
70/// Errors from tracing operations.
71#[derive(Debug)]
72pub enum TracingError {
73    /// A trace capture session is already active.
74    AlreadyActive,
75    /// The single-use trace session was already started and stopped.
76    /// Only one session per process lifetime is supported.
77    SessionExhausted,
78    /// Failed to install the global subscriber (another was already set
79    /// by user code outside the HAL).
80    SubscriberInstallFailed(String),
81}
82
83impl std::fmt::Display for TracingError {
84    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85        match self {
86            Self::AlreadyActive => write!(f, "trace capture already active"),
87            Self::SessionExhausted => write!(
88                f,
89                "trace session already used (only one session per process lifetime)"
90            ),
91            Self::SubscriberInstallFailed(e) => {
92                write!(f, "failed to install tracing subscriber: {e}")
93            }
94        }
95    }
96}
97
98impl std::error::Error for TracingError {}
99
100/// Start trace capture, writing Chrome JSON to `path`.
101///
102/// Installs a global tracing subscriber (chrome layer only) on first call.
103/// The trace file is created immediately. All `tracing::trace_span!` spans
104/// emitted by HAL crates will be recorded until [`stop_tracing`] is called.
105///
106/// Only one session per process lifetime is supported (a limitation of
107/// Rust's global subscriber model).
108///
109/// # Errors
110///
111/// Returns [`TracingError::AlreadyActive`] if a session is currently capturing.
112/// Returns [`TracingError::SessionExhausted`] if a session was previously
113/// started and stopped (the global subscriber cannot be replaced).
114/// Returns [`TracingError::SubscriberInstallFailed`] if another tracing
115/// subscriber was installed by user code outside the HAL.
116pub fn start_tracing(path: &str) -> Result<(), TracingError> {
117    let mut lock = GUARD.lock().unwrap_or_else(|e| e.into_inner());
118    if lock.is_some() {
119        return Err(TracingError::AlreadyActive);
120    }
121    if SESSION_USED.load(Ordering::Relaxed) {
122        return Err(TracingError::SessionExhausted);
123    }
124
125    // Build chrome layer writing to the specified file.
126    let (chrome_layer, guard) = tracing_chrome::ChromeLayerBuilder::new()
127        .file(path)
128        .include_args(true)
129        .build();
130
131    // Install only the chrome layer. Existing log::* output continues through
132    // env_logger to stderr independently — no conflict.
133    let subscriber = tracing_subscriber::registry().with(chrome_layer);
134
135    tracing::subscriber::set_global_default(subscriber)
136        .map_err(|e| TracingError::SubscriberInstallFailed(e.to_string()))?;
137
138    SESSION_USED.store(true, Ordering::Relaxed);
139    *lock = Some(guard);
140    Ok(())
141}
142
143/// Stop trace capture, flushing all buffered spans to the output file.
144///
145/// No-op if no session is active. After this call the trace file is complete
146/// and can be loaded into <https://ui.perfetto.dev/>.
147pub fn stop_tracing() {
148    let mut lock = GUARD.lock().unwrap_or_else(|e| e.into_inner());
149    // Dropping the FlushGuard flushes remaining spans and closes the file.
150    lock.take();
151}
152
153/// Returns `true` if a trace capture session is currently active.
154pub fn is_tracing_active() -> bool {
155    GUARD.lock().unwrap_or_else(|e| e.into_inner()).is_some()
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161    use std::path::Path;
162
163    // Single test because the global subscriber is per-process lifetime.
164    #[test]
165    fn test_trace_lifecycle() {
166        let dir = std::env::temp_dir();
167        let path = dir.join("hal_test_trace_lifecycle.json");
168        let path_str = path.to_str().unwrap();
169
170        // Clean up any previous test artifact
171        let _ = std::fs::remove_file(&path);
172
173        assert!(!is_tracing_active());
174
175        // First start should succeed
176        start_tracing(path_str).expect("start_tracing should succeed");
177        assert!(is_tracing_active());
178
179        // Second start while active should fail with AlreadyActive
180        let err = start_tracing(path_str).unwrap_err();
181        assert!(
182            matches!(err, TracingError::AlreadyActive),
183            "expected AlreadyActive, got: {err:?}"
184        );
185
186        // Emit a span to ensure the file gets content
187        {
188            let _span = tracing::trace_span!("test_span", key = "value").entered();
189        }
190
191        // Stop should deactivate
192        stop_tracing();
193        assert!(!is_tracing_active());
194
195        // Trace file should exist with content
196        assert!(Path::new(path_str).exists());
197        let content = std::fs::read_to_string(&path).unwrap();
198        assert!(!content.is_empty(), "trace file should not be empty");
199
200        // Third start fails because session was already used
201        let err = start_tracing(path_str).unwrap_err();
202        assert!(
203            matches!(err, TracingError::SessionExhausted),
204            "expected SessionExhausted, got: {err:?}"
205        );
206
207        // Clean up
208        let _ = std::fs::remove_file(&path);
209    }
210}