Skip to main content

hyperlight_guest_tracing/
lib.rs

1/*
2Copyright 2025  The Hyperlight Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8    http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17#![no_std]
18
19/// Expose invariant TSC module
20#[cfg(target_arch = "x86_64")]
21pub mod invariant_tsc;
22
23/// Defines internal guest state
24#[cfg(feature = "trace")]
25mod state;
26
27/// Defines guest tracing Subscriber
28#[cfg(feature = "trace")]
29mod subscriber;
30
31/// Defines a type to iterate over spans/events fields
32#[cfg(feature = "trace")]
33mod visitor;
34
35/// Type to get the relevant information from the internal state
36/// and expose it to the host
37#[cfg(feature = "trace")]
38pub use state::TraceBatchInfo;
39#[cfg(feature = "trace")]
40pub use trace::{
41    end_trace, flush, init_guest_tracing, is_trace_enabled, new_call, reset, serialized_data,
42};
43
44/// This module is gated because some of these types are also used on the host, but we want
45/// only the guest to allocate and allow the functionality intended for the guest.
46#[cfg(feature = "trace")]
47mod trace {
48    extern crate alloc;
49    use alloc::sync::{Arc, Weak};
50
51    use spin::Mutex;
52    use tracing_core::LevelFilter;
53
54    use crate::state::GuestState;
55    use crate::subscriber::GuestSubscriber;
56
57    /// Weak reference to the guest state so we can manually trigger flush to host
58    /// The `GuestState` is ONLY accessed from two places:
59    /// - The tracing subscriber, when spans/events are created in the guest
60    /// - The guest tracing API, when we want manual control to flush the events to the host
61    ///
62    /// The mutex ensures safe access to the state from both places.
63    static GUEST_STATE: spin::Once<Weak<Mutex<GuestState>>> = spin::Once::new();
64
65    /// Initialize the guest tracing subscriber as global default.
66    pub fn init_guest_tracing(guest_start_tsc: u64, max_log_level: LevelFilter) {
67        // Set as global default if not already set.
68        if tracing_core::dispatcher::has_been_set() {
69            return;
70        }
71        let sub = GuestSubscriber::new(guest_start_tsc, max_log_level);
72        let state = sub.state();
73        // Store state Weak<GuestState> to use later at runtime
74        GUEST_STATE.call_once(|| Arc::downgrade(state));
75
76        // Set global dispatcher
77        let _ = tracing_core::dispatcher::set_global_default(tracing_core::Dispatch::new(sub));
78    }
79
80    /// Ends the current trace by ending all active spans in the
81    /// internal state and storing the end timestamps.
82    ///
83    /// This expects an outb call to send the spans to the host.
84    /// After calling this function, the internal state is marked
85    /// for cleaning on the next access.
86    ///
87    /// NOTE: Panics if unable to lock the guest state.
88    pub fn end_trace() {
89        if let Some(w) = GUEST_STATE.get()
90            && let Some(state_mutex) = w.upgrade()
91        {
92            // We want to protect against re-entrancy issues produced by tracing code that locks
93            // the state and then causes an exception that tries to lock the state again.
94            //
95            // For example:
96            // - 1. A span is created, locking the state
97            // - 2. An exception occurs while the span is being created (e.g. not enough memory, etc.)
98            // - 3. The exception handler uses the tracing API to send the trace data to the host
99            // or just create spans/events for logging purposes.
100            // - 4. The tracing API tries to lock the state again, causing a deadlock.
101            // To avoid this, we use try_lock and if we cannot acquire the lock, we panic to signal
102            // the issue.
103            let mut state = state_mutex
104                .try_lock()
105                .expect("guest_tracing: Unable to lock guest tracing state in `end_trace`");
106            state.end_trace();
107        }
108    }
109
110    /// Flushes the current trace data to prepare it for reading by the host.
111    /// NOTE: Panics if unable to lock the guest state.
112    pub fn flush() {
113        if let Some(w) = GUEST_STATE.get()
114            && let Some(state_mutex) = w.upgrade()
115        {
116            // We want to protect against re-entrancy issues produced by tracing code that locks
117            // the state and then causes an exception that tries to lock the state again.
118            //
119            // For example:
120            // - 1. A span is created, locking the state
121            // - 2. An exception occurs while the span is being created (e.g. not enough memory, etc.)
122            // - 3. The exception handler uses the tracing API to send the trace data to the host
123            // or just create spans/events for logging purposes.
124            // - 4. The tracing API tries to lock the state again, causing a deadlock.
125            // To avoid this, we use try_lock and if we cannot acquire the lock, we panic to signal
126            // the issue.
127            let mut state = state_mutex
128                .try_lock()
129                .expect("Unable to lock GuestState in `flush`");
130
131            state.flush();
132        }
133    }
134
135    /// Resets the internal trace state for a new guest function call.
136    /// This clears any existing spans/events from previous calls ensuring a clean state.
137    /// NOTE: Panics if unable to lock the guest state.
138    pub fn new_call(guest_start_tsc: u64) {
139        if let Some(w) = GUEST_STATE.get()
140            && let Some(state_mutex) = w.upgrade()
141        {
142            // We want to protect against re-entrancy issues produced by tracing code that locks
143            // the state and then causes an exception that tries to lock the state again.
144            //
145            // For example:
146            // - 1. A span is created, locking the state
147            // - 2. An exception occurs while the span is being created (e.g. not enough memory, etc.)
148            // - 3. The exception handler uses the tracing API to send the trace data to the host
149            // or just create spans/events for logging purposes.
150            // - 4. The tracing API tries to lock the state again, causing a deadlock.
151            // To avoid this, we use try_lock and if we cannot acquire the lock, we panic to signal
152            // the issue.
153            let mut state = state_mutex
154                .try_lock()
155                .expect("Unable to lock GuestState in `new_call`");
156
157            state.new_call(guest_start_tsc);
158        }
159    }
160
161    /// Cleans the internal trace state by removing closed spans and events.
162    /// This ensures that after a VM exit, we keep the spans that
163    /// are still active (in the stack) and remove all other spans and events.
164    /// NOTE: Panics if unable to lock the guest state.
165    pub fn reset() {
166        if let Some(w) = GUEST_STATE.get()
167            && let Some(state_mutex) = w.upgrade()
168        {
169            // We want to protect against re-entrancy issues produced by tracing code that locks
170            // the state and then causes an exception that tries to lock the state again.
171            //
172            // For example:
173            // - 1. A span is created, locking the state
174            // - 2. An exception occurs while the span is being created (e.g. not enough memory, etc.)
175            // - 3. The exception handler uses the tracing API to send the trace data to the host
176            // or just create spans/events for logging purposes.
177            // - 4. The tracing API tries to lock the state again, causing a deadlock.
178            // To avoid this, we use try_lock and if we cannot acquire the lock, we panic to signal
179            // the issue.
180            let mut state = state_mutex
181                .try_lock()
182                .expect("Unable to lock GuestState in `reset`");
183
184            state.reset();
185        }
186    }
187
188    /// Returns information about the current trace state needed by the host to read the spans.
189    pub fn serialized_data() -> Option<(u64, u64)> {
190        if let Some(w) = GUEST_STATE.get()
191            && let Some(state_mutex) = w.upgrade()
192        {
193            // We want to protect against re-entrancy issues produced by tracing code that locks
194            // the state and then causes an exception that tries to lock the state again.
195            //
196            // For example:
197            // - 1. A span is created, locking the state
198            // - 2. An exception occurs while the span is being created (e.g. not enough memory, etc.)
199            // - 3. The exception handler uses the tracing API to send the trace data to the host
200            // or just create spans/events for logging purposes.
201            // - 4. The tracing API tries to lock the state again, causing a deadlock.
202            // To avoid this, we use try_lock and if we cannot acquire the lock, we panic to signal
203            // the issue.
204            let state = state_mutex
205                .try_lock()
206                .expect("Unable to lock GuestState in `serialized_data`");
207
208            state.serialized_data()
209        } else {
210            None
211        }
212    }
213
214    /// Returns true if tracing is enabled (the guest tracing state is initialized).
215    pub fn is_trace_enabled() -> bool {
216        GUEST_STATE
217            .get()
218            .map(|w| w.upgrade().is_some())
219            .unwrap_or(false)
220    }
221}