edgefirst_hal/trace.rs
1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4//! Trace capture for performance analysis.
5//!
6//! Provides a simple start/stop API for capturing [`tracing`]-based spans
7//! emitted by HAL crates into Chrome JSON trace files viewable at
8//! <https://ui.perfetto.dev/>.
9//!
10//! # Design
11//!
12//! The HAL library crates (`edgefirst-decoder`, `edgefirst-image`) emit
13//! [`tracing::trace_span!`] spans on hot paths. These have near-zero overhead
14//! when no subscriber is active (a single relaxed atomic load per span site).
15//!
16//! This module installs a **process-wide subscriber** consisting of a Chrome
17//! trace layer writing spans to a JSON file for Perfetto. Existing `log::*`
18//! output (via `env_logger`) continues independently to stderr.
19//!
20//! The subscriber is installed once on the first call to [`start_tracing`].
21//! Only one trace capture session is supported per process lifetime (this is
22//! a limitation of Rust's global subscriber model and is acceptable for
23//! profiling workflows where a single trace per run is the norm).
24//!
25//! # Usage from Rust
26//!
27//! ```no_run
28//! # #[cfg(feature = "tracing")]
29//! # {
30//! use edgefirst_hal::trace::{start_tracing, stop_tracing};
31//!
32//! start_tracing("/tmp/trace.json").expect("start tracing");
33//! // ... run inference pipeline ...
34//! stop_tracing(); // flushes and closes the trace file
35//! # }
36//! ```
37//!
38//! # Usage from Python
39//!
40//! ```python
41//! import edgefirst_hal as hal
42//!
43//! with hal.Tracing("/tmp/trace.json"):
44//! # ... run inference ...
45//! pass
46//! # trace file is flushed on __exit__
47//! ```
48//!
49//! # Usage from C
50//!
51//! ```c
52//! #include "edgefirst_hal.h"
53//! hal_start_tracing("/tmp/trace.json");
54//! // ... run inference ...
55//! hal_stop_tracing(); // flushes trace file
56//! ```
57
58use std::sync::atomic::{AtomicBool, Ordering};
59use std::sync::Mutex;
60
61use tracing_chrome::FlushGuard;
62use tracing_subscriber::prelude::*;
63
64/// Global flush guard for the active trace session.
65static GUARD: Mutex<Option<FlushGuard>> = Mutex::new(None);
66
67/// Tracks whether a session has ever been started (remains true after stop).
68static SESSION_USED: AtomicBool = AtomicBool::new(false);
69
70/// Errors from tracing operations.
71#[derive(Debug)]
72pub enum TracingError {
73 /// A trace capture session is already active.
74 AlreadyActive,
75 /// The single-use trace session was already started and stopped.
76 /// Only one session per process lifetime is supported.
77 SessionExhausted,
78 /// Failed to install the global subscriber (another was already set
79 /// by user code outside the HAL).
80 SubscriberInstallFailed(String),
81}
82
83impl std::fmt::Display for TracingError {
84 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
85 match self {
86 Self::AlreadyActive => write!(f, "trace capture already active"),
87 Self::SessionExhausted => write!(
88 f,
89 "trace session already used (only one session per process lifetime)"
90 ),
91 Self::SubscriberInstallFailed(e) => {
92 write!(f, "failed to install tracing subscriber: {e}")
93 }
94 }
95 }
96}
97
98impl std::error::Error for TracingError {}
99
100/// Start trace capture, writing Chrome JSON to `path`.
101///
102/// Installs a global tracing subscriber (chrome layer only) on first call.
103/// The trace file is created immediately. All `tracing::trace_span!` spans
104/// emitted by HAL crates will be recorded until [`stop_tracing`] is called.
105///
106/// Only one session per process lifetime is supported (a limitation of
107/// Rust's global subscriber model).
108///
109/// # Errors
110///
111/// Returns [`TracingError::AlreadyActive`] if a session is currently capturing.
112/// Returns [`TracingError::SessionExhausted`] if a session was previously
113/// started and stopped (the global subscriber cannot be replaced).
114/// Returns [`TracingError::SubscriberInstallFailed`] if another tracing
115/// subscriber was installed by user code outside the HAL.
116pub fn start_tracing(path: &str) -> Result<(), TracingError> {
117 let mut lock = GUARD.lock().unwrap_or_else(|e| e.into_inner());
118 if lock.is_some() {
119 return Err(TracingError::AlreadyActive);
120 }
121 if SESSION_USED.load(Ordering::Relaxed) {
122 return Err(TracingError::SessionExhausted);
123 }
124
125 // Build chrome layer writing to the specified file.
126 let (chrome_layer, guard) = tracing_chrome::ChromeLayerBuilder::new()
127 .file(path)
128 .include_args(true)
129 .build();
130
131 // Install only the chrome layer. Existing log::* output continues through
132 // env_logger to stderr independently — no conflict.
133 let subscriber = tracing_subscriber::registry().with(chrome_layer);
134
135 tracing::subscriber::set_global_default(subscriber)
136 .map_err(|e| TracingError::SubscriberInstallFailed(e.to_string()))?;
137
138 SESSION_USED.store(true, Ordering::Relaxed);
139 *lock = Some(guard);
140 Ok(())
141}
142
143/// Stop trace capture, flushing all buffered spans to the output file.
144///
145/// No-op if no session is active. After this call the trace file is complete
146/// and can be loaded into <https://ui.perfetto.dev/>.
147pub fn stop_tracing() {
148 let mut lock = GUARD.lock().unwrap_or_else(|e| e.into_inner());
149 // Dropping the FlushGuard flushes remaining spans and closes the file.
150 lock.take();
151}
152
153/// Returns `true` if a trace capture session is currently active.
154pub fn is_tracing_active() -> bool {
155 GUARD.lock().unwrap_or_else(|e| e.into_inner()).is_some()
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161 use std::path::Path;
162
163 // Single test because the global subscriber is per-process lifetime.
164 #[test]
165 fn test_trace_lifecycle() {
166 let dir = std::env::temp_dir();
167 let path = dir.join("hal_test_trace_lifecycle.json");
168 let path_str = path.to_str().unwrap();
169
170 // Clean up any previous test artifact
171 let _ = std::fs::remove_file(&path);
172
173 assert!(!is_tracing_active());
174
175 // First start should succeed
176 start_tracing(path_str).expect("start_tracing should succeed");
177 assert!(is_tracing_active());
178
179 // Second start while active should fail with AlreadyActive
180 let err = start_tracing(path_str).unwrap_err();
181 assert!(
182 matches!(err, TracingError::AlreadyActive),
183 "expected AlreadyActive, got: {err:?}"
184 );
185
186 // Emit a span to ensure the file gets content
187 {
188 let _span = tracing::trace_span!("test_span", key = "value").entered();
189 }
190
191 // Stop should deactivate
192 stop_tracing();
193 assert!(!is_tracing_active());
194
195 // Trace file should exist with content
196 assert!(Path::new(path_str).exists());
197 let content = std::fs::read_to_string(&path).unwrap();
198 assert!(!content.is_empty(), "trace file should not be empty");
199
200 // Third start fails because session was already used
201 let err = start_tracing(path_str).unwrap_err();
202 assert!(
203 matches!(err, TracingError::SessionExhausted),
204 "expected SessionExhausted, got: {err:?}"
205 );
206
207 // Clean up
208 let _ = std::fs::remove_file(&path);
209 }
210}