dusk_wasmtime/runtime/profiling.rs
1use crate::{instantiate::CompiledModule, AsContext, Module};
2#[allow(unused_imports)]
3use anyhow::bail;
4use anyhow::Result;
5#[cfg(feature = "profiling")]
6use fxprof_processed_profile::{
7 debugid::DebugId, CategoryHandle, Frame, FrameFlags, FrameInfo, LibraryInfo, Profile,
8 ReferenceTimestamp, Symbol, SymbolTable, Timestamp,
9};
10use std::ops::Range;
11use std::sync::Arc;
12use std::time::{Duration, Instant};
13use wasmtime_environ::demangle_function_name_or_index;
14use wasmtime_runtime::Backtrace;
15
16// TODO: collect more data
17// - Provide additional hooks for recording host-guest transitions, to be
18// invoked from a Store::call_hook
19// - On non-Windows, measure thread-local CPU usage between events with
20// rustix::time::clock_gettime(ClockId::ThreadCPUTime)
21// - Report which wasm module, and maybe instance, each frame came from
22
23/// Collects basic profiling data for a single WebAssembly guest.
24///
25/// This profiler can't provide measurements that are as accurate or detailed
26/// as a platform-specific profiler, such as `perf` on Linux. On the other
27/// hand, this profiler works on every platform that Wasmtime supports. Also,
28/// as an embedder you can use this profiler selectively on individual guest
29/// instances rather than profiling the entire process.
30///
31/// To use this, you'll need to arrange to call [`GuestProfiler::sample`] at
32/// regular intervals while the guest is on the stack. The most straightforward
33/// way to do that is to call it from a callback registered with
34/// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
35///
36/// # Accuracy
37///
38/// The data collection granularity is limited by the mechanism you use to
39/// interrupt guest execution and collect a profiling sample.
40///
41/// If you use epoch interruption, then samples will only be collected at
42/// function entry points and loop headers. This introduces some bias to the
43/// results. In addition, samples will only be taken at times when WebAssembly
44/// functions are running, not during host-calls.
45///
46/// It is technically possible to use fuel interruption instead. That
47/// introduces worse bias since samples occur after a certain number of
48/// WebAssembly instructions, which can take different amounts of time.
49///
50/// You may instead be able to use platform-specific methods, such as
51/// `setitimer(ITIMER_VIRTUAL, ...)` on POSIX-compliant systems, to sample on
52/// a more accurate interval. The only current requirement is that the guest
53/// you wish to profile must be on the same stack where you call `sample`,
54/// and executing within the same thread. However, the `GuestProfiler::sample`
55/// method is not currently async-signal-safe, so doing this correctly is not
56/// easy.
57///
58/// # Security
59///
60/// Profiles produced using this profiler do not include any configuration
61/// details from the host, such as virtual memory addresses, or from any
62/// WebAssembly modules that you haven't specifically allowed. So for
63/// example, these profiles should be safe to share with untrusted users
64/// who have provided untrusted code that you are running in a multi-tenancy
65/// environment.
66///
67/// However, the profile does include byte offsets into the text section of
68/// the compiled module, revealing some information about the size of the code
69/// generated for each module. For user-provided modules, the user could get
70/// the same information by compiling the module for themself using a similar
71/// version of Wasmtime on the same target architecture, but for any module
72/// where they don't already have the WebAssembly module binary available this
73/// could theoretically lead to an undesirable information disclosure. So you
74/// should only include user-provided modules in profiles.
75#[cfg(feature = "profiling")]
76#[derive(Debug)]
77pub struct GuestProfiler {
78 profile: Profile,
79 modules: Vec<(Range<usize>, fxprof_processed_profile::LibraryHandle)>,
80 process: fxprof_processed_profile::ProcessHandle,
81 thread: fxprof_processed_profile::ThreadHandle,
82 start: Instant,
83}
84
85#[cfg(feature = "profiling")]
86impl GuestProfiler {
87 /// Begin profiling a new guest. When this function is called, the current
88 /// wall-clock time is recorded as the start time for the guest.
89 ///
90 /// The `module_name` parameter is recorded in the profile to help identify
91 /// where the profile came from.
92 ///
93 /// The `interval` parameter should match the rate at which you intend
94 /// to call `sample`. However, this is used as a hint and not required to
95 /// exactly match the real sample rate.
96 ///
97 /// Only modules which are present in the `modules` vector will appear in
98 /// stack traces in this profile. Any stack frames which were executing
99 /// host code or functions from other modules will be omitted. See the
100 /// "Security" section of the [`GuestProfiler`] documentation for guidance
101 /// on what modules should not be included in this list.
102 pub fn new(module_name: &str, interval: Duration, modules: Vec<(String, Module)>) -> Self {
103 let zero = ReferenceTimestamp::from_millis_since_unix_epoch(0.0);
104 let mut profile = Profile::new(module_name, zero, interval.into());
105
106 let mut modules: Vec<_> = modules
107 .into_iter()
108 .filter_map(|(name, module)| {
109 let compiled = module.compiled_module();
110 let text = compiled.text().as_ptr_range();
111 let address_range = text.start as usize..text.end as usize;
112 module_symbols(name, compiled).map(|lib| (address_range, profile.add_lib(lib)))
113 })
114 .collect();
115
116 modules.sort_unstable_by_key(|(range, _)| range.start);
117
118 profile.set_reference_timestamp(std::time::SystemTime::now().into());
119 let process = profile.add_process(module_name, 0, Timestamp::from_nanos_since_reference(0));
120 let thread = profile.add_thread(process, 0, Timestamp::from_nanos_since_reference(0), true);
121 let start = Instant::now();
122 Self {
123 profile,
124 modules,
125 process,
126 thread,
127 start,
128 }
129 }
130
131 /// Add a sample to the profile. This function collects a backtrace from
132 /// any stack frames for allowed modules on the current stack. It should
133 /// typically be called from a callback registered using
134 /// [`Store::epoch_deadline_callback()`](crate::Store::epoch_deadline_callback).
135 ///
136 /// The `delta` parameter is the amount of CPU time that was used by this
137 /// guest since the previous sample. It is allowed to pass `Duration::ZERO`
138 /// here if recording CPU usage information is not needed.
139 pub fn sample(&mut self, store: impl AsContext, delta: Duration) {
140 let now = Timestamp::from_nanos_since_reference(
141 self.start.elapsed().as_nanos().try_into().unwrap(),
142 );
143
144 let backtrace = Backtrace::new(store.as_context().0.vmruntime_limits());
145 let frames = backtrace
146 .frames()
147 // Samply needs to see the oldest frame first, but we list the newest
148 // first, so iterate in reverse.
149 .rev()
150 .filter_map(|frame| {
151 // Find the first module whose start address includes this PC.
152 let module_idx = self
153 .modules
154 .partition_point(|(range, _)| range.start > frame.pc());
155 if let Some((range, lib)) = self.modules.get(module_idx) {
156 if range.contains(&frame.pc()) {
157 return Some(FrameInfo {
158 frame: Frame::RelativeAddressFromReturnAddress(
159 *lib,
160 u32::try_from(frame.pc() - range.start).unwrap(),
161 ),
162 category_pair: CategoryHandle::OTHER.into(),
163 flags: FrameFlags::empty(),
164 });
165 }
166 }
167 None
168 });
169
170 self.profile
171 .add_sample(self.thread, now, frames, delta.into(), 1);
172 }
173
174 /// When the guest finishes running, call this function to write the
175 /// profile to the given `output`. The output is a JSON-formatted object in
176 /// the [Firefox "processed profile format"][fmt]. Files in this format may
177 /// be visualized at <https://profiler.firefox.com/>.
178 ///
179 /// [fmt]: https://github.com/firefox-devtools/profiler/blob/main/docs-developer/processed-profile-format.md
180 pub fn finish(mut self, output: impl std::io::Write) -> Result<()> {
181 let now = Timestamp::from_nanos_since_reference(
182 self.start.elapsed().as_nanos().try_into().unwrap(),
183 );
184 self.profile.set_thread_end_time(self.thread, now);
185 self.profile.set_process_end_time(self.process, now);
186
187 serde_json::to_writer(output, &self.profile)?;
188 Ok(())
189 }
190}
191
192#[cfg(feature = "profiling")]
193fn module_symbols(name: String, compiled: &CompiledModule) -> Option<LibraryInfo> {
194 let symbols = Vec::from_iter(compiled.finished_functions().map(|(defined_idx, _)| {
195 let loc = compiled.func_loc(defined_idx);
196 let func_idx = compiled.module().func_index(defined_idx);
197 let mut name = String::new();
198 demangle_function_name_or_index(
199 &mut name,
200 compiled.func_name(func_idx),
201 defined_idx.as_u32() as usize,
202 )
203 .unwrap();
204 Symbol {
205 address: loc.start,
206 size: Some(loc.length),
207 name,
208 }
209 }));
210 if symbols.is_empty() {
211 return None;
212 }
213
214 Some(LibraryInfo {
215 name,
216 debug_name: String::new(),
217 path: String::new(),
218 debug_path: String::new(),
219 debug_id: DebugId::nil(),
220 code_id: None,
221 arch: None,
222 symbol_table: Some(Arc::new(SymbolTable::new(symbols))),
223 })
224}