usdt_impl/lib.rs
1//! Main implementation crate for the USDT package.
2
3// Copyright 2022 Oxide Computer Company
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17use serde::Deserialize;
18use std::cell::RefCell;
19use thiserror::Error;
20
21// Probe record parsing required for standard backend (and `des` feature used by `dusty util)
22#[cfg(any(usdt_backend_standard, usdt_backend_stapsdt, feature = "des"))]
23pub mod record;
24
25#[cfg_attr(usdt_backend_noop, path = "empty.rs")]
26#[cfg_attr(usdt_backend_linker, path = "linker.rs")]
27#[cfg_attr(usdt_backend_standard, path = "no-linker.rs")]
28#[cfg_attr(usdt_backend_stapsdt, path = "stapsdt.rs")]
29mod internal;
30
31// Since the `empty` is mostly a no-op, parts of the common code will go unused when it is
32// selected for use.
33#[cfg_attr(usdt_backend_noop, allow(dead_code))]
34mod common;
35
36/// Register an application's probe points with DTrace.
37///
38/// This function collects information about the probe points defined in an application and ensures
39/// that they are registered with the DTrace kernel module. It is critical to note that if this
40/// method is not called (at some point in an application), _no probes will be visible_ via the
41/// `dtrace(1)` command line tool.
42///
43/// NOTE: This method presents a quandary for library developers, as consumers of their library may
44/// forget to (or choose not to) call this function. There are potential workarounds for this
45/// problem, but each comes with significant tradeoffs. Library developers are encouraged to
46/// re-export this function and document to their users that this function should be called to
47/// guarantee that the library's probes are registered.
48pub fn register_probes() -> Result<(), Error> {
49 crate::internal::register_probes()
50}
51
52/// Errors related to building DTrace probes into Rust code
53#[derive(Error, Debug)]
54pub enum Error {
55 /// Error during parsing of DTrace provider source
56 #[error(transparent)]
57 ParseError(#[from] dtrace_parser::DTraceError),
58 /// Error reading or writing files, or registering DTrace probes
59 #[error(transparent)]
60 IO(#[from] std::io::Error),
61 /// Error related to environment variables, e.g., while running a build script
62 #[error(transparent)]
63 Env(#[from] std::env::VarError),
64 /// An error occurred extracting probe information from the encoded object file sections
65 #[error("The file is not a valid object file")]
66 InvalidFile,
67 /// Error related to calling out to DTrace itself
68 #[error("Failed to call DTrace subprocess")]
69 DTraceError,
70 /// Error converting input to JSON
71 #[error(transparent)]
72 Json(#[from] serde_json::Error),
73}
74
75#[derive(Default, Debug, Deserialize)]
76pub struct CompileProvidersConfig {
77 pub provider: Option<String>,
78 pub probe_format: Option<String>,
79 pub module: Option<String>,
80}
81
82impl CompileProvidersConfig {
83 /// Return the formatted name of a probe.
84 pub fn format_probe(&self, probe_name: &str) -> String {
85 if let Some(fmt) = &self.probe_format {
86 fmt.replace(
87 "{provider}",
88 self.provider
89 .as_ref()
90 .expect("Expected a provider name when formatting a rpobe"),
91 )
92 .replace("{probe}", probe_name)
93 } else {
94 String::from(probe_name)
95 }
96 }
97
98 /// Return the formatted name of the probe as an identifier.
99 pub fn probe_ident(&self, probe_name: &str) -> proc_macro2::Ident {
100 quote::format_ident!("{}", self.format_probe(probe_name))
101 }
102
103 /// Return the formatted module name as an identifier.
104 pub fn module_ident(&self) -> proc_macro2::Ident {
105 let name = self.module.as_ref().unwrap_or_else(|| {
106 self.provider
107 .as_ref()
108 .expect("Expected a provider name when making a module ident")
109 });
110 quote::format_ident!("{}", name)
111 }
112}
113
114// Compile DTrace provider source code into Rust.
115//
116// This function parses a provider definition, and, for each probe, a corresponding Rust macro is
117// returned. This macro may be called throughout Rust code to fire the corresponding DTrace probe
118// (if it's enabled). See [probe_test_macro] for a detailed example.
119//
120// [probe_test_macro]: https://github.com/oxidecomputer/usdt/tree/master/probe-test-macro
121pub fn compile_provider_source(
122 source: &str,
123 config: &CompileProvidersConfig,
124) -> Result<proc_macro2::TokenStream, Error> {
125 crate::internal::compile_provider_source(source, config)
126}
127
128// Compile a DTrace provider from its representation in the USDT crate.
129pub fn compile_provider(
130 provider: &Provider,
131 config: &CompileProvidersConfig,
132) -> proc_macro2::TokenStream {
133 crate::internal::compile_provider_from_definition(provider, config)
134}
135
136/// A data type supported by the `usdt` crate.
137#[derive(Debug, Clone, PartialEq)]
138pub enum DataType {
139 Native(dtrace_parser::DataType),
140 UniqueId,
141 Serializable(syn::Type),
142}
143
144impl DataType {
145 /// Convert a data type to its C type representation as a string.
146 pub fn to_c_type(&self) -> String {
147 match self {
148 DataType::Native(ty) => ty.to_c_type(),
149 DataType::UniqueId => String::from("uint64_t"),
150 DataType::Serializable(_) => String::from("char*"),
151 }
152 }
153
154 /// Return the Rust FFI type representation of this data type.
155 pub fn to_rust_ffi_type(&self) -> syn::Type {
156 match self {
157 DataType::Native(ty) => syn::parse_str(&ty.to_rust_ffi_type()).unwrap(),
158 DataType::UniqueId => syn::parse_str("::std::os::raw::c_ulonglong").unwrap(),
159 DataType::Serializable(_) => syn::parse_str("*const ::std::os::raw::c_char").unwrap(),
160 }
161 }
162
163 /// Return the native Rust type representation of this data type.
164 pub fn to_rust_type(&self) -> syn::Type {
165 match self {
166 DataType::Native(ty) => syn::parse_str(&ty.to_rust_type()).unwrap(),
167 DataType::UniqueId => syn::parse_str("::usdt::UniqueId").unwrap(),
168 DataType::Serializable(ref inner) => inner.clone(),
169 }
170 }
171}
172
173impl From<dtrace_parser::DataType> for DataType {
174 fn from(ty: dtrace_parser::DataType) -> Self {
175 DataType::Native(ty)
176 }
177}
178
179impl From<&syn::Type> for DataType {
180 fn from(t: &syn::Type) -> Self {
181 DataType::Serializable(t.clone())
182 }
183}
184
185/// A single DTrace probe function
186#[derive(Debug, Clone)]
187pub struct Probe {
188 pub name: String,
189 pub types: Vec<DataType>,
190}
191
192impl From<dtrace_parser::Probe> for Probe {
193 fn from(p: dtrace_parser::Probe) -> Self {
194 Self {
195 name: p.name,
196 types: p.types.into_iter().map(DataType::from).collect(),
197 }
198 }
199}
200
201impl Probe {
202 /// Return the representation of this probe in D source code.
203 pub fn to_d_source(&self) -> String {
204 let types = self
205 .types
206 .iter()
207 .map(|typ| typ.to_c_type())
208 .collect::<Vec<_>>()
209 .join(", ");
210 format!("probe {name}({types});", name = self.name, types = types)
211 }
212}
213
214/// The `Provider` represents a single DTrace provider, with a collection of probes.
215#[derive(Debug, Clone)]
216pub struct Provider {
217 pub name: String,
218 pub probes: Vec<Probe>,
219 pub use_statements: Vec<syn::ItemUse>,
220}
221
222impl Provider {
223 /// Return the representation of this provider in D source code.
224 pub fn to_d_source(&self) -> String {
225 let probes = self
226 .probes
227 .iter()
228 .map(|probe| format!("\t{}", probe.to_d_source()))
229 .collect::<Vec<_>>()
230 .join("\n");
231 format!(
232 "provider {provider_name} {{\n{probes}\n}};",
233 provider_name = self.name,
234 probes = probes
235 )
236 }
237}
238
239impl From<dtrace_parser::Provider> for Provider {
240 fn from(p: dtrace_parser::Provider) -> Self {
241 Self {
242 name: p.name,
243 probes: p.probes.into_iter().map(Probe::from).collect(),
244 use_statements: vec![],
245 }
246 }
247}
248
249impl From<&dtrace_parser::Provider> for Provider {
250 fn from(p: &dtrace_parser::Provider) -> Self {
251 Self::from(p.clone())
252 }
253}
254
255/// Convert a serializable type into a JSON string, if possible.
256///
257/// NOTE: This is essentially a re-export of the `serde_json::to_string` function, used to avoid
258/// foisting an explicity dependency on that crate in user's `Cargo.toml`.
259pub fn to_json<T>(x: &T) -> Result<String, Error>
260where
261 T: ?Sized + ::serde::Serialize,
262{
263 ::serde_json::to_string(x).map_err(Error::from)
264}
265
266thread_local! {
267 static CURRENT_ID: RefCell<u32> = const { RefCell::new(0) };
268 static THREAD_ID: RefCell<usize> = RefCell::new(thread_id::get());
269}
270
271/// A unique identifier that can be used to correlate multiple USDT probes together.
272///
273/// It's a common pattern in DTrace scripts to correlate multiple probes. For example, one can time
274/// system calls by storing a timestamp on the `syscall:::entry` probe and then computing the
275/// elapsed time in the `syscall:::return` probe. This requires some way to "match up" these two
276/// probes, to ensure that the elapsed time is correctly attributed to a single system call. Doing
277/// so requires an identifier. User code may already have an ID appropriate for this use case, but
278/// the `UniqueId` type may be used when one is not already available. These unique IDs can be used
279/// to correlate multiple probes occurring in a section or span of user code.
280///
281/// A probe function may accept a `UniqueId`, which appears in a D as a `u64`. The value is
282/// guaranteed to be unique, even if multiple threads run the same traced section of code. (See the
283/// [notes] for caveats.) The value may be shared between threads by calling `clone()` on a
284/// constructed span -- in this case, the cloned object shares the same value, so that a traced
285/// span running in multiple threads (or asynchronous tasks) shares the same identifier.
286///
287/// A `UniqueId` is very cheap to construct. The internal value is "materialized" in two
288/// situations:
289///
290/// - When an _enabled_ probe fires
291/// - When the value is cloned (e.g., for sharing with another thread)
292///
293/// This minimizes the disabled-probe effect, but still allows sharing a consistent ID in the case
294/// of multithreaded work.
295///
296/// Example
297/// -------
298/// ```ignore
299/// #[usdt::provider]
300/// mod with_id {
301/// fn work_started(_: &usdt::UniqueId) {}
302/// fn halfway_there(_: &usdt::UniqueId, msg: &str) {}
303/// fn work_completed(_: &usdt::UniqueId, result: u64) {}
304/// }
305///
306/// // Constructing an ID is very cheap.
307/// let id = usdt::UniqueId::new();
308///
309/// // The ID will only be materialized if this probe is enabled.
310/// with_id_work_started!(|| &id);
311///
312/// // If the ID has been materialized above, this simply clone the internal value. If the ID has
313/// // _not_ yet been materialized, say because the `work_started` probe was not enabled, this will
314/// // do so now.
315/// let id2 = id.clone();
316/// let handle = std::thread::spawn(move || {
317/// for i in 0..10 {
318/// // Do our work.
319/// if i == 5 {
320/// with_id_halfway_there!(|| (&id2, "work is half completed"));
321/// }
322/// }
323/// 10
324/// });
325///
326/// let result = handle.join().unwrap();
327/// with_id_work_completed!(|| (&id, result));
328/// ```
329///
330/// Note that this type is not `Sync`, which means we cannot accidentally share the value between
331/// threads. The only way to track the same ID in work spanning threads is to first clone the type,
332/// which materializes the internal value. For example, this will fail to compile:
333///
334/// ```compile_fail
335/// #[usdt::provider]
336/// mod with_id {
337/// fn work_started(_: &usdt::UniqueId) {}
338/// fn halfway_there(_: &usdt::UniqueId, msg: &str) {}
339/// fn work_completed(_: &usdt::UniqueId, result: u64) {}
340/// }
341///
342/// let id = usdt::UniqueId::new();
343/// with_id_work_started!(|| &id);
344/// let handle = std::thread::spawn(move || {
345/// for i in 0..10 {
346/// // Do our work.
347/// if i == 5 {
348/// // Note that we're using `id`, not a clone as the previous example.
349/// with_id_halfway_there!(|| (&id, "work is half completed"));
350/// }
351/// }
352/// 10
353/// });
354/// let result = handle.join().unwrap();
355/// with_id_work_completed!(|| (&id, result));
356/// ```
357///
358/// Notes
359/// -----
360///
361/// In any practical situation, the generated ID is unique. Its value is assigned on the basis of
362/// the thread that creates the `UniqueId` object, plus a monotonic thread-local counter. However,
363/// the counter is 32 bits, and so wraps around after about 4 billion unique values. So
364/// theoretically, multiple `UniqueId`s could manifest as the same value to DTrace, if they are
365/// exceptionally long-lived or generated very often.
366#[derive(Debug)]
367pub struct UniqueId {
368 id: RefCell<Option<u64>>,
369}
370
371impl UniqueId {
372 /// Construct a new identifier.
373 ///
374 /// A `UniqueId` is cheap to create, and is not materialized into an actual value until it's
375 /// needed, either by a probe function or during `clone`ing to share the value between threads.
376 pub const fn new() -> Self {
377 Self {
378 id: RefCell::new(None),
379 }
380 }
381
382 // Helper function to actually materialize a u64 value internally.
383 //
384 // This method assigns a value on the basis of the current thread and a monotonic counter, in
385 // the upper and lower 32-bits of a u64, respectively.
386 fn materialize(&self) {
387 // Safety: This type is not Sync, which means the current thread maintains the only
388 // reference to the contained ID. A `UniqueId` in another thread is a clone, at which
389 // point the value has been materialized as well. The `id` field of that object is a
390 // different `RefCell` -- that type is here just to enable interior mutability.
391 let mut inner = self.id.borrow_mut();
392 if inner.is_none() {
393 let id = CURRENT_ID.with(|id| {
394 let thread_id = THREAD_ID.with(|id| *id.borrow_mut() as u64);
395 let mut inner = id.borrow_mut();
396 *inner = inner.wrapping_add(1);
397 (thread_id << 32) | (*inner as u64)
398 });
399 inner.replace(id);
400 }
401 }
402
403 /// Return the internal `u64` value, materializing it if needed.
404 #[doc(hidden)]
405 pub fn as_u64(&self) -> u64 {
406 self.materialize();
407 // Safety: This is an immutable borrow, so is safe from multiple threads. The cell cannot
408 // be borrowed mutably at the same time, as that only occurs within the scope of the
409 // `materialize` method. This method can't be called on the _same_ `UniqueId` from multiple
410 // threads, because the type is not `Sync`.
411 self.id.borrow().unwrap()
412 }
413}
414
415impl Clone for UniqueId {
416 fn clone(&self) -> Self {
417 self.materialize();
418 Self {
419 id: self.id.clone(),
420 }
421 }
422}
423
424#[cfg(test)]
425mod test {
426 use super::*;
427 use dtrace_parser::BitWidth;
428 use dtrace_parser::DataType as DType;
429 use dtrace_parser::Integer;
430 use dtrace_parser::Sign;
431
432 #[test]
433 fn test_probe_to_d_source() {
434 let probe = Probe {
435 name: String::from("my_probe"),
436 types: vec![DataType::Native(DType::Pointer(Integer {
437 sign: Sign::Unsigned,
438 width: BitWidth::Bit8,
439 }))],
440 };
441 assert_eq!(probe.to_d_source(), "probe my_probe(uint8_t*);");
442 }
443
444 #[test]
445 fn test_provider_to_d_source() {
446 let probe = Probe {
447 name: String::from("my_probe"),
448 types: vec![DataType::Native(DType::Integer(Integer {
449 sign: Sign::Unsigned,
450 width: BitWidth::Bit8,
451 }))],
452 };
453 let provider = Provider {
454 name: String::from("my_provider"),
455 probes: vec![probe],
456 use_statements: vec![],
457 };
458 assert_eq!(
459 provider.to_d_source(),
460 "provider my_provider {\n\tprobe my_probe(uint8_t);\n};"
461 );
462 }
463
464 #[test]
465 fn test_data_type() {
466 let ty = DataType::Native(DType::Pointer(Integer {
467 sign: Sign::Unsigned,
468 width: BitWidth::Bit8,
469 }));
470 assert_eq!(ty.to_rust_type(), syn::parse_str("*const u8").unwrap());
471
472 let ty = DataType::Native(dtrace_parser::DataType::String);
473 assert_eq!(ty.to_rust_type(), syn::parse_str("&str").unwrap());
474
475 let ty = DataType::UniqueId;
476 assert_eq!(
477 ty.to_rust_type(),
478 syn::parse_str("::usdt::UniqueId").unwrap()
479 );
480 }
481
482 #[test]
483 fn test_unique_id() {
484 let id = UniqueId::new();
485 assert!(id.id.borrow().is_none());
486 let x = id.as_u64();
487 assert_eq!(x & 0xFFFF_FFFF, 1);
488 assert_eq!(id.id.borrow().unwrap(), x);
489 }
490
491 #[test]
492 fn test_unique_id_clone() {
493 let id = UniqueId::new();
494 let id2 = id.clone();
495 assert!(id.id.borrow().is_some());
496 assert!(id2.id.borrow().is_some());
497 assert_eq!(id.id.borrow().unwrap(), id2.id.borrow().unwrap());
498
499 // Verify that the actual RefCells inside the type point to different locations. This is
500 // important to check that sending a clone to a different thread will operate on a
501 // different cell, so that they can both borrow the value (either mutably or immutably)
502 // without panics.
503 assert_ne!(&(id.id) as *const _, &(id2.id) as *const _);
504 assert_ne!(id.id.as_ptr(), id2.id.as_ptr());
505 }
506
507 #[test]
508 fn test_compile_providers_config() {
509 let config = CompileProvidersConfig {
510 provider: Some(String::from("prov")),
511 probe_format: Some(String::from("probe_{probe}")),
512 module: Some(String::from("not_prov")),
513 };
514 assert_eq!(config.format_probe("prob"), "probe_prob");
515 let module = config.module_ident();
516 assert_eq!(
517 quote::quote! { #module }.to_string(),
518 quote::quote! { not_prov }.to_string(),
519 );
520 }
521}