usdt_impl/
record.rs

1//! Implementation of construction and extraction of custom linker section records used to store
2//! probe information in an object file.
3
4// Copyright 2024 Oxide Computer Company
5//
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9//
10//     http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18use crate::DataType;
19use byteorder::{NativeEndian, ReadBytesExt};
20use dof::{Probe, Provider, Section};
21use std::collections::BTreeMap;
22use std::mem::size_of;
23use std::sync::atomic::AtomicU8;
24use std::sync::atomic::Ordering;
25
26// Version number for probe records containing data about all probes.
27//
28// NOTE: This must have a maximum of `u8::MAX - 1`. See `read_record_version` for
29// details.
30pub(crate) const PROBE_REC_VERSION: u8 = 1;
31
32/// Extract records for all defined probes from our custom linker sections.
33pub fn process_section(mut data: &mut [u8], register: bool) -> Result<Section, crate::Error> {
34    let mut providers = BTreeMap::new();
35
36    while !data.is_empty() {
37        assert!(
38            data.len() >= size_of::<u32>(),
39            "Not enough bytes for length header"
40        );
41        // Read the length without consuming it
42        let len = (&data[..size_of::<u32>()]).read_u32::<NativeEndian>()? as usize;
43        let (rec, rest) = data.split_at_mut(len);
44        process_probe_record(&mut providers, rec, register)?;
45        data = rest;
46    }
47
48    Ok(Section {
49        providers,
50        ..Default::default()
51    })
52}
53
54#[cfg(all(unix, not(target_os = "freebsd")))]
55/// Convert an address in an object file into a function and file name, if possible.
56pub(crate) fn addr_to_info(addr: u64) -> (Option<String>, Option<String>) {
57    unsafe {
58        let mut info = libc::Dl_info {
59            dli_fname: std::ptr::null(),
60            dli_fbase: std::ptr::null_mut(),
61            dli_sname: std::ptr::null(),
62            dli_saddr: std::ptr::null_mut(),
63        };
64        if libc::dladdr(addr as *const libc::c_void, &mut info as *mut _) == 0 {
65            (None, None)
66        } else {
67            (
68                Some(
69                    std::ffi::CStr::from_ptr(info.dli_sname)
70                        .to_string_lossy()
71                        .to_string(),
72                ),
73                Some(
74                    std::ffi::CStr::from_ptr(info.dli_fname)
75                        .to_string_lossy()
76                        .to_string(),
77                ),
78            )
79        }
80    }
81}
82
83// On FreeBSD, dladdr(3M) only examines the dynamic symbol table. Which is pretty useless as it
84// will always return a dli_sname. To workaround this issue, we use `backtrace_symbols_fmt` from
85// libexecinfo, which internally looks in the executable to determine the symbol of the given
86// address.
87// See: https://man.freebsd.org/cgi/man.cgi?query=backtrace&sektion=3
88#[cfg(target_os = "freebsd")]
89pub(crate) fn addr_to_info(addr: u64) -> (Option<String>, Option<String>) {
90    unsafe {
91        #[link(name = "execinfo")]
92        extern "C" {
93            pub fn backtrace_symbols_fmt(
94                _: *const *mut libc::c_void,
95                _: libc::size_t,
96                _: *const libc::c_char,
97            ) -> *mut *mut libc::c_char;
98        }
99
100        let addrs_arr = [addr];
101        let addrs = addrs_arr.as_ptr() as *const *mut libc::c_void;
102
103        let format = std::ffi::CString::new("%n\n%f").unwrap();
104        let symbols = backtrace_symbols_fmt(addrs, 1, format.as_ptr());
105
106        if !symbols.is_null() {
107            if let Some((sname, fname)) = std::ffi::CStr::from_ptr(*symbols)
108                .to_string_lossy()
109                .split_once('\n')
110            {
111                (Some(sname.to_string()), Some(fname.to_string()))
112            } else {
113                (None, None)
114            }
115        } else {
116            (None, None)
117        }
118    }
119}
120
121#[cfg(not(unix))]
122/// Convert an address in an object file into a function and file name, if possible.
123pub(crate) fn addr_to_info(_addr: u64) -> (Option<String>, Option<String>) {
124    (None, None)
125}
126
127// Limit a string to the DTrace-imposed maxima. Note that this ensures a null-terminated C string
128// result, i.e., the actual string is of length `limit - 1`.
129// See dtrace.h,
130//
131// DTrace appends the PID to the provider name. The exact size is platform dependent, but use the
132// largest known value of 999,999 on illumos. MacOS and the BSDs are 32-99K. We take the log to get
133// the number of digits.
134const MAX_PROVIDER_NAME_LEN: usize = 64 - 6;
135const MAX_PROBE_NAME_LEN: usize = 64;
136const MAX_FUNC_NAME_LEN: usize = 128;
137const MAX_ARG_TYPE_LEN: usize = 128;
138fn limit_string_length<S: AsRef<str>>(s: S, limit: usize) -> String {
139    let s = s.as_ref();
140    let limit = s.len().min(limit - 1);
141    s[..limit].to_string()
142}
143
144// Return the probe record version, atomically updating it if the probe record will be handled.
145fn read_record_version(version: &mut u8, register: bool) -> u8 {
146    // First check if (1) we need to do anything other than read the version and
147    // (2) if this is a version number this compiled crate could feasibly
148    // handle.
149    let ver = *version;
150    if !register || ver > PROBE_REC_VERSION {
151        return ver;
152    }
153
154    // At this point we know we need to potentially update the version, and that
155    // we also have code that can handle it. We'll exchange it with the sentinel
156    // unconditionally.
157    //
158    // If we get back the sentinel, another thread beat us to the punch. If we
159    // get back anything else, it is a version we are capable of handling.
160    //
161    // TODO-safety: We'd love to use `AtomicU8::from_mut`, but that remains a
162    // nightly-only feature. In the meantime, this is safe because we have a
163    // mutable reference to the data in this method, and atomic types are
164    // guaranteed to have the same layout as their inner type.
165    let ver = unsafe { std::mem::transmute::<&mut u8, &AtomicU8>(version) };
166    ver.swap(u8::MAX, Ordering::SeqCst)
167}
168
169// Process a single record from the custom linker section.
170fn process_probe_record(
171    providers: &mut BTreeMap<String, Provider>,
172    rec: &mut [u8],
173    register: bool,
174) -> Result<(), crate::Error> {
175    // First four bytes are the length, next byte is the version number.
176    let (rec, mut data) = {
177        // We need `rec` to be mutable and have type `&mut [u8]`, and `data` to
178        // be mutable, but have type `&[u8]`. Use `split_at_mut` to get two
179        // `&mut [u8]` and then convert the latter to a shared reference.
180        let (rec, data) = rec.split_at_mut(5);
181        (rec, &*data)
182    };
183    let version = read_record_version(&mut rec[4], register);
184
185    // If this record comes from a future version of the data format, we skip it
186    // and hope that the author of main will *also* include a call to a more
187    // recent version. Note that future versions should handle previous formats.
188    //
189    // NOTE: This version check is also used to implement one-time registration of probes. On the
190    // first pass through the probe section, the version is rewritten to `u8::MAX`, so that any
191    // future read of the section skips all previously-read records.
192    if version > PROBE_REC_VERSION {
193        return Ok(());
194    }
195
196    let n_args = data.read_u8()? as usize;
197    let flags = data.read_u16::<NativeEndian>()?;
198    let address = data.read_u64::<NativeEndian>()?;
199    let provname = data.read_cstr();
200    let probename = data.read_cstr();
201    let args = {
202        let mut args = Vec::with_capacity(n_args);
203        for _ in 0..n_args {
204            args.push(limit_string_length(data.read_cstr(), MAX_ARG_TYPE_LEN));
205        }
206        args
207    };
208
209    let funcname = match addr_to_info(address).0 {
210        Some(s) => limit_string_length(s, MAX_FUNC_NAME_LEN),
211        None => format!("?{:#x}", address),
212    };
213
214    let provname = limit_string_length(provname, MAX_PROVIDER_NAME_LEN);
215    let provider = providers.entry(provname.clone()).or_insert(Provider {
216        name: provname,
217        probes: BTreeMap::new(),
218    });
219
220    let probename = limit_string_length(probename, MAX_PROBE_NAME_LEN);
221    let probe = provider.probes.entry(probename.clone()).or_insert(Probe {
222        name: probename,
223        function: funcname,
224        address,
225        offsets: vec![],
226        enabled_offsets: vec![],
227        arguments: vec![],
228    });
229    probe.arguments = args;
230
231    // We expect to get records in address order for a given probe; our offsets
232    // would be negative otherwise.
233    assert!(address >= probe.address);
234
235    if flags == 0 {
236        probe.offsets.push((address - probe.address) as u32);
237    } else {
238        probe.enabled_offsets.push((address - probe.address) as u32);
239    }
240    Ok(())
241}
242
243trait ReadCstrExt<'a> {
244    fn read_cstr(&mut self) -> &'a str;
245}
246
247impl<'a> ReadCstrExt<'a> for &'a [u8] {
248    fn read_cstr(&mut self) -> &'a str {
249        let index = self
250            .iter()
251            .position(|ch| *ch == 0)
252            .expect("ran out of bytes before we found a zero");
253
254        let ret = std::str::from_utf8(&self[..index]).unwrap();
255        *self = &self[index + 1..];
256        ret
257    }
258}
259
260// Construct the ASM record for a probe. If `types` is `None`, then is is an is-enabled probe.
261#[allow(dead_code)]
262pub(crate) fn emit_probe_record(prov: &str, probe: &str, types: Option<&[DataType]>) -> String {
263    #[cfg(not(target_os = "freebsd"))]
264    let section_ident = r#"set_dtrace_probes,"aw","progbits""#;
265    #[cfg(target_os = "freebsd")]
266    let section_ident = r#"set_dtrace_probes,"awR","progbits""#;
267    let is_enabled = types.is_none();
268    let n_args = types.map_or(0, |typ| typ.len());
269    let arguments = types.map_or_else(String::new, |types| {
270        types
271            .iter()
272            .map(|typ| format!(".asciz \"{}\"", typ.to_c_type()))
273            .collect::<Vec<_>>()
274            .join("\n")
275    });
276    format!(
277        r#"
278                    .pushsection {section_ident}
279                    .balign 8
280            991:
281                    .4byte 992f-991b    // length
282                    .byte {version}
283                    .byte {n_args}
284                    .2byte {flags}
285                    .8byte 990b         // address
286                    .asciz "{prov}"
287                    .asciz "{probe}"
288                    {arguments}         // null-terminated strings for each argument
289                    .balign 8
290            992:    .popsection
291                    {yeet}
292        "#,
293        section_ident = section_ident,
294        version = PROBE_REC_VERSION,
295        n_args = n_args,
296        flags = if is_enabled { 1 } else { 0 },
297        prov = prov,
298        probe = probe.replace("__", "-"),
299        arguments = arguments,
300        yeet = if cfg!(any(target_os = "illumos", target_os = "freebsd")) {
301            // The illumos and FreeBSD linkers may yeet our probes section into the trash under
302            // certain conditions. To counteract this, we yeet references to the
303            // probes section into another section. This causes the linker to
304            // retain the probes section.
305            r#"
306                    .pushsection yeet_dtrace_probes
307                    .8byte 991b
308                    .popsection
309                "#
310        } else {
311            ""
312        },
313    )
314}
315
316#[cfg(test)]
317mod test {
318    use std::collections::BTreeMap;
319
320    use byteorder::{NativeEndian, WriteBytesExt};
321
322    use super::emit_probe_record;
323    use super::process_probe_record;
324    use super::process_section;
325    use super::DataType;
326    use super::PROBE_REC_VERSION;
327    use super::{MAX_PROBE_NAME_LEN, MAX_PROVIDER_NAME_LEN};
328    use dtrace_parser::BitWidth;
329    use dtrace_parser::DataType as DType;
330    use dtrace_parser::Integer;
331    use dtrace_parser::Sign;
332
333    #[test]
334    fn test_process_probe_record() {
335        let mut rec = Vec::<u8>::new();
336
337        // write a dummy length
338        rec.write_u32::<NativeEndian>(0).unwrap();
339        rec.write_u8(PROBE_REC_VERSION).unwrap();
340        rec.write_u8(0).unwrap();
341        rec.write_u16::<NativeEndian>(0).unwrap();
342        rec.write_u64::<NativeEndian>(0x1234).unwrap();
343        rec.write_cstr("provider");
344        rec.write_cstr("probe");
345        // fix the length field
346        let len = rec.len();
347        (&mut rec[0..])
348            .write_u32::<NativeEndian>(len as u32)
349            .unwrap();
350
351        let mut providers = BTreeMap::new();
352        process_probe_record(&mut providers, &mut rec, true).unwrap();
353
354        let probe = providers
355            .get("provider")
356            .unwrap()
357            .probes
358            .get("probe")
359            .unwrap();
360
361        assert_eq!(probe.name, "probe");
362        assert_eq!(probe.address, 0x1234);
363    }
364
365    #[test]
366    fn test_process_probe_record_long_names() {
367        let mut rec = Vec::<u8>::new();
368
369        // write a dummy length
370        let long_name = "p".repeat(130);
371        rec.write_u32::<NativeEndian>(0).unwrap();
372        rec.write_u8(PROBE_REC_VERSION).unwrap();
373        rec.write_u8(0).unwrap();
374        rec.write_u16::<NativeEndian>(0).unwrap();
375        rec.write_u64::<NativeEndian>(0x1234).unwrap();
376        rec.write_cstr(&long_name);
377        rec.write_cstr(&long_name);
378        // fix the length field
379        let len = rec.len();
380        (&mut rec[0..])
381            .write_u32::<NativeEndian>(len as u32)
382            .unwrap();
383
384        let mut providers = BTreeMap::new();
385        process_probe_record(&mut providers, &mut rec, true).unwrap();
386
387        let expected_provider_name = &long_name[..MAX_PROVIDER_NAME_LEN - 1];
388        let expected_probe_name = &long_name[..MAX_PROBE_NAME_LEN - 1];
389
390        assert!(providers.get(&long_name).is_none());
391        let probe = providers
392            .get(expected_provider_name)
393            .unwrap()
394            .probes
395            .get(expected_probe_name)
396            .unwrap();
397
398        assert_eq!(probe.name, expected_probe_name);
399        assert_eq!(probe.address, 0x1234);
400    }
401
402    // Write two probe records, from the same provider.
403    //
404    // The version argument is used to control the probe record version, which helps test one-time
405    // registration of probes.
406    fn make_record(version: u8) -> Vec<u8> {
407        let mut data = Vec::<u8>::new();
408
409        // write a dummy length for the first record
410        data.write_u32::<NativeEndian>(0).unwrap();
411        data.write_u8(version).unwrap();
412        data.write_u8(0).unwrap();
413        data.write_u16::<NativeEndian>(0).unwrap();
414        data.write_u64::<NativeEndian>(0x1234).unwrap();
415        data.write_cstr("provider");
416        data.write_cstr("probe");
417        let len = data.len();
418        (&mut data[0..])
419            .write_u32::<NativeEndian>(len as u32)
420            .unwrap();
421
422        data.write_u32::<NativeEndian>(0).unwrap();
423        data.write_u8(version).unwrap();
424        data.write_u8(0).unwrap();
425        data.write_u16::<NativeEndian>(0).unwrap();
426        data.write_u64::<NativeEndian>(0x12ab).unwrap();
427        data.write_cstr("provider");
428        data.write_cstr("probe");
429        let len2 = data.len() - len;
430        (&mut data[len..])
431            .write_u32::<NativeEndian>(len2 as u32)
432            .unwrap();
433        data
434    }
435
436    #[test]
437    fn test_process_section() {
438        let mut data = make_record(PROBE_REC_VERSION);
439        let section = process_section(&mut data, true).unwrap();
440        let probe = section
441            .providers
442            .get("provider")
443            .unwrap()
444            .probes
445            .get("probe")
446            .unwrap();
447
448        assert_eq!(probe.name, "probe");
449        assert_eq!(probe.address, 0x1234);
450        assert_eq!(probe.offsets, vec![0, 0x12ab - 0x1234]);
451    }
452
453    #[test]
454    fn test_re_process_section() {
455        // Ensure that re-processing the same section returns zero probes, as they should have all
456        // been previously processed.
457        let mut data = make_record(PROBE_REC_VERSION);
458        let section = process_section(&mut data, true).unwrap();
459        assert_eq!(section.providers.len(), 1);
460        assert_eq!(data[4], u8::MAX);
461        let section = process_section(&mut data, true).unwrap();
462        assert_eq!(data[4], u8::MAX);
463        assert_eq!(section.providers.len(), 0);
464    }
465
466    #[test]
467    fn test_process_section_future_version() {
468        // Ensure that we _don't_ modify a future version number in a probe record, but that the
469        // probes are still skipped (since by definition we're ignoring future versions).
470        let mut data = make_record(PROBE_REC_VERSION + 1);
471        let section = process_section(&mut data, true).unwrap();
472        assert_eq!(section.providers.len(), 0);
473        assert_eq!(data[4], PROBE_REC_VERSION + 1);
474    }
475
476    trait WriteCstrExt {
477        fn write_cstr(&mut self, s: &str);
478    }
479
480    impl WriteCstrExt for Vec<u8> {
481        fn write_cstr(&mut self, s: &str) {
482            self.extend_from_slice(s.as_bytes());
483            self.push(0);
484        }
485    }
486
487    #[test]
488    fn test_emit_probe_record() {
489        let provider = "provider";
490        let probe = "probe";
491        let types = [
492            DataType::Native(DType::Pointer(Integer {
493                sign: Sign::Unsigned,
494                width: BitWidth::Bit8,
495            })),
496            DataType::Native(DType::String),
497        ];
498        let record = emit_probe_record(provider, probe, Some(&types));
499        let mut lines = record.lines();
500        println!("{}", record);
501        lines.next(); // empty line
502        assert!(lines.next().unwrap().contains(".pushsection"));
503        let mut lines = lines.skip(3);
504        assert!(lines
505            .next()
506            .unwrap()
507            .contains(&format!(".byte {}", PROBE_REC_VERSION)));
508        assert!(lines
509            .next()
510            .unwrap()
511            .contains(&format!(".byte {}", types.len())));
512        for (typ, line) in types.iter().zip(lines.skip(4)) {
513            assert!(line.contains(&format!(".asciz \"{}\"", typ.to_c_type())));
514        }
515    }
516
517    #[test]
518    fn test_emit_probe_record_dunders() {
519        let provider = "provider";
520        let probe = "my__probe";
521        let types = [
522            DataType::Native(DType::Pointer(Integer {
523                sign: Sign::Unsigned,
524                width: BitWidth::Bit8,
525            })),
526            DataType::Native(dtrace_parser::DataType::String),
527        ];
528        let record = emit_probe_record(provider, probe, Some(&types));
529        assert!(
530            record.contains("my-probe"),
531            "Expected double-underscores to be translated to a single dash"
532        );
533    }
534}