symbolic_symcache/
lib.rs

1//! Provides SymCache support.
2//!
3//! This includes a reader and writer for the binary format, as well as helper traits and functions
4//! to apply transformations to debugging symbols before they are written to the SymCache.
5//!
6//! # Structure of a SymCache
7//!
8//! A SymCache (version 7) contains the following primary kinds of data, written in the following
9//! order:
10//!
11//! 1. Files
12//! 2. Functions
13//! 3. Source Locations
14//! 4. Address Ranges
15//! 5. String Data
16//!
17//! The format uses `u32`s to represent line numbers, addresses, references, and string offsets.
18//! Line numbers use `0` to represent an unknown or invalid value. Addresses, references, and string
19//! offsets instead use `u32::MAX`.
20//!
21//! Strings are saved in one contiguous section with each individual string prefixed by 4 bytes
22//! denoting its length. Functions and files refer to strings by an offset into this string section,
23//! hence "string offset".
24//!
25//! ## Files
26//!
27//! A file contains string offsets for its file name, parent directory, and compilation directory.
28//!
29//! ## Functions
30//!
31//! A function contains string offsets for its name and compilation directory, a u32 for its entry
32//! address, and a u32 representing the source language. The name is non-optional, i.e., the name
33//! index should always point to a valid string.
34//!
35//! ## Address Ranges
36//!
37//! Ranges are saved as a contiguous list of `u32`s, representing their starting addresses.
38//!
39//! ## Source Locations
40//!
41//! A source location in a symcache represents a possibly-inlined copy of a line in a source file.
42//! It contains a line number, a reference to a file (see above), a reference to a function (ditto),
43//! and a reference to the source location into which this source location was inlined. All of these
44//! data except for the function are optional.
45//!
46//! ## Mapping From Ranges To Source Locations
47//!
48//! Every range in the SymCache is associated with at least one source location. As mentioned above,
49//! each source location may in turn have a reference to a source location into which it is inlined.
50//! Conceptually, each address range points to a sequence of source locations, representing a
51//! hierarchy of inlined function calls.
52//!
53//! ### Example
54//!
55//! The mapping
56//!
57//! - `0x0001 - 0x002f`
58//!   - `trigger_crash` in file `b.c`, line 12
59//!   - inlined into `main` in file `a.c`, line 10
60//! - `0x002f - 0x004a`
61//!   - `trigger_crash` in file `b.c`, line 13
62//!   - inlined into `main` in file `a.c`, line 10
63//!
64//! is represented like this in the SymCache (function/file name strings inlined for simplicity):
65//! ```text
66//! ranges: [
67//!     0x0001 -> 1
68//!     0x002f -> 2
69//! ]
70//!
71//! source_locations: [{
72//!     file: "a.c"
73//!     line: 10
74//!     function: "main"
75//!     inlined_into: u32::MAX (not inlined)
76//! }, {
77//!     file: "b.c"
78//!     line: 12
79//!     function: "trigger_crash"
80//!     inlined_into: 0 <- index reference to "main"
81//! }, {
82//!     file: "b.c"
83//!     line: 13
84//!     function: "trigger_crash"
85//!     inlined_into: 0 <- index reference to "main"
86//! }]
87//! ```
88//!
89//! # Lookups
90//!
91//! To look up an address `addr` in a SymCache:
92//!
93//! 1. Find the range covering `addr` via binary search.
94//! 2. Find the source location belonging to this range.
95//! 3. Return an iterator over a series of source locations that starts at the source location found
96//!    in step 2. The iterator climbs up through the inlining hierarchy, ending at the root source
97//!    location.
98//!
99//! The returned source locations contain accessor methods for their function, file, and line
100//! number.
101
102#![warn(missing_docs)]
103
104mod error;
105mod lookup;
106mod raw;
107pub mod transform;
108mod writer;
109
110use symbolic_common::Arch;
111use symbolic_common::AsSelf;
112use symbolic_common::DebugId;
113use watto::StringTable;
114use watto::{align_to, Pod};
115
116pub use error::{Error, ErrorKind};
117pub use lookup::*;
118pub use writer::SymCacheConverter;
119
120type Result<T, E = Error> = std::result::Result<T, E>;
121
122/// The latest version of the file format.
123///
124/// Version history:
125///
126/// 1: Initial implementation
127/// 2: PR #58:  Migrate from UUID to Debug ID
128/// 3: PR #148: Consider all PT_LOAD segments in ELF
129/// 4: PR #155: Functions with more than 65k line records
130/// 5: PR #221: Invalid inlinee nesting leading to wrong stack traces
131/// 6: PR #319: Correct line offsets and spacer line records
132/// 7: PR #459: A new binary format fundamentally based on addr ranges
133/// 8: PR #670: Use LEB128-prefixed string table
134pub const SYMCACHE_VERSION: u32 = 8;
135
136/// The serialized SymCache binary format.
137///
138/// This can be parsed from a binary buffer via [`SymCache::parse`] and lookups on it can be performed
139/// via the [`SymCache::lookup`] method.
140#[derive(Clone, PartialEq, Eq)]
141pub struct SymCache<'data> {
142    header: &'data raw::Header,
143    files: &'data [raw::File],
144    functions: &'data [raw::Function],
145    source_locations: &'data [raw::SourceLocation],
146    ranges: &'data [raw::Range],
147    string_bytes: &'data [u8],
148}
149
150impl std::fmt::Debug for SymCache<'_> {
151    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152        f.debug_struct("SymCache")
153            .field("version", &self.header.version)
154            .field("debug_id", &self.header.debug_id)
155            .field("arch", &self.header.arch)
156            .field("files", &self.header.num_files)
157            .field("functions", &self.header.num_functions)
158            .field("source_locations", &self.header.num_source_locations)
159            .field("ranges", &self.header.num_ranges)
160            .field("string_bytes", &self.header.string_bytes)
161            .finish()
162    }
163}
164
165impl<'data> SymCache<'data> {
166    /// Parse the SymCache binary format into a convenient type that allows safe access and
167    /// fast lookups.
168    pub fn parse(buf: &'data [u8]) -> Result<Self> {
169        let (header, rest) = raw::Header::ref_from_prefix(buf).ok_or(ErrorKind::InvalidHeader)?;
170        if header.magic == raw::SYMCACHE_MAGIC_FLIPPED {
171            return Err(ErrorKind::WrongEndianness.into());
172        }
173        if header.magic != raw::SYMCACHE_MAGIC {
174            return Err(ErrorKind::WrongFormat.into());
175        }
176        if header.version != SYMCACHE_VERSION && header.version != 7 {
177            return Err(ErrorKind::WrongVersion.into());
178        }
179
180        let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidFiles)?;
181        let (files, rest) = raw::File::slice_from_prefix(rest, header.num_files as usize)
182            .ok_or(ErrorKind::InvalidFiles)?;
183
184        let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidFunctions)?;
185        let (functions, rest) =
186            raw::Function::slice_from_prefix(rest, header.num_functions as usize)
187                .ok_or(ErrorKind::InvalidFunctions)?;
188
189        let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidSourceLocations)?;
190        let (source_locations, rest) =
191            raw::SourceLocation::slice_from_prefix(rest, header.num_source_locations as usize)
192                .ok_or(ErrorKind::InvalidSourceLocations)?;
193
194        let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidRanges)?;
195        let (ranges, rest) = raw::Range::slice_from_prefix(rest, header.num_ranges as usize)
196            .ok_or(ErrorKind::InvalidRanges)?;
197
198        let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::UnexpectedStringBytes {
199            expected: header.string_bytes as usize,
200            found: 0,
201        })?;
202        if rest.len() < header.string_bytes as usize {
203            return Err(ErrorKind::UnexpectedStringBytes {
204                expected: header.string_bytes as usize,
205                found: rest.len(),
206            }
207            .into());
208        }
209
210        Ok(SymCache {
211            header,
212            files,
213            functions,
214            source_locations,
215            ranges,
216            string_bytes: rest,
217        })
218    }
219
220    /// Resolves a string reference to the pointed-to `&str` data.
221    fn get_string(&self, offset: u32) -> Option<&'data str> {
222        if self.header.version >= 8 {
223            // version >= 8: string length prefixes are LEB128
224            StringTable::read(self.string_bytes, offset as usize).ok()
225        } else {
226            // version < 8: string length prefixes are u32
227            if offset == u32::MAX {
228                return None;
229            }
230            let len_offset = offset as usize;
231            let len_size = std::mem::size_of::<u32>();
232            let len = u32::from_ne_bytes(
233                self.string_bytes
234                    .get(len_offset..len_offset + len_size)?
235                    .try_into()
236                    .unwrap(),
237            ) as usize;
238
239            let start_offset = len_offset + len_size;
240            let end_offset = start_offset + len;
241            let bytes = self.string_bytes.get(start_offset..end_offset)?;
242
243            std::str::from_utf8(bytes).ok()
244        }
245    }
246
247    /// The version of the SymCache file format.
248    pub fn version(&self) -> u32 {
249        self.header.version
250    }
251
252    /// Returns true if this symcache's version is the current version of the format.
253    pub fn is_latest(&self) -> bool {
254        self.header.version == SYMCACHE_VERSION
255    }
256
257    /// The architecture of the symbol file.
258    pub fn arch(&self) -> Arch {
259        self.header.arch
260    }
261
262    /// The debug identifier of the cache file.
263    pub fn debug_id(&self) -> DebugId {
264        self.header.debug_id
265    }
266}
267
268impl<'slf, 'd: 'slf> AsSelf<'slf> for SymCache<'d> {
269    type Ref = SymCache<'slf>;
270
271    fn as_self(&'slf self) -> &'slf Self::Ref {
272        self
273    }
274}