symbolic_symcache/lib.rs
1//! Provides SymCache support.
2//!
3//! This includes a reader and writer for the binary format, as well as helper traits and functions
4//! to apply transformations to debugging symbols before they are written to the SymCache.
5//!
6//! # Structure of a SymCache
7//!
8//! A SymCache (version 7) contains the following primary kinds of data, written in the following
9//! order:
10//!
11//! 1. Files
12//! 2. Functions
13//! 3. Source Locations
14//! 4. Address Ranges
15//! 5. String Data
16//!
17//! The format uses `u32`s to represent line numbers, addresses, references, and string offsets.
18//! Line numbers use `0` to represent an unknown or invalid value. Addresses, references, and string
19//! offsets instead use `u32::MAX`.
20//!
21//! Strings are saved in one contiguous section with each individual string prefixed by 4 bytes
22//! denoting its length. Functions and files refer to strings by an offset into this string section,
23//! hence "string offset".
24//!
25//! ## Files
26//!
27//! A file contains string offsets for its file name, parent directory, and compilation directory.
28//!
29//! ## Functions
30//!
31//! A function contains string offsets for its name and compilation directory, a u32 for its entry
32//! address, and a u32 representing the source language. The name is non-optional, i.e., the name
33//! index should always point to a valid string.
34//!
35//! ## Address Ranges
36//!
37//! Ranges are saved as a contiguous list of `u32`s, representing their starting addresses.
38//!
39//! ## Source Locations
40//!
41//! A source location in a symcache represents a possibly-inlined copy of a line in a source file.
42//! It contains a line number, a reference to a file (see above), a reference to a function (ditto),
43//! and a reference to the source location into which this source location was inlined. All of these
44//! data except for the function are optional.
45//!
46//! ## Mapping From Ranges To Source Locations
47//!
48//! Every range in the SymCache is associated with at least one source location. As mentioned above,
49//! each source location may in turn have a reference to a source location into which it is inlined.
50//! Conceptually, each address range points to a sequence of source locations, representing a
51//! hierarchy of inlined function calls.
52//!
53//! ### Example
54//!
55//! The mapping
56//!
57//! - `0x0001 - 0x002f`
58//! - `trigger_crash` in file `b.c`, line 12
59//! - inlined into `main` in file `a.c`, line 10
60//! - `0x002f - 0x004a`
61//! - `trigger_crash` in file `b.c`, line 13
62//! - inlined into `main` in file `a.c`, line 10
63//!
64//! is represented like this in the SymCache (function/file name strings inlined for simplicity):
65//! ```text
66//! ranges: [
67//! 0x0001 -> 1
68//! 0x002f -> 2
69//! ]
70//!
71//! source_locations: [{
72//! file: "a.c"
73//! line: 10
74//! function: "main"
75//! inlined_into: u32::MAX (not inlined)
76//! }, {
77//! file: "b.c"
78//! line: 12
79//! function: "trigger_crash"
80//! inlined_into: 0 <- index reference to "main"
81//! }, {
82//! file: "b.c"
83//! line: 13
84//! function: "trigger_crash"
85//! inlined_into: 0 <- index reference to "main"
86//! }]
87//! ```
88//!
89//! # Lookups
90//!
91//! To look up an address `addr` in a SymCache:
92//!
93//! 1. Find the range covering `addr` via binary search.
94//! 2. Find the source location belonging to this range.
95//! 3. Return an iterator over a series of source locations that starts at the source location found
96//! in step 2. The iterator climbs up through the inlining hierarchy, ending at the root source
97//! location.
98//!
99//! The returned source locations contain accessor methods for their function, file, and line
100//! number.
101
102#![warn(missing_docs)]
103
104mod error;
105mod lookup;
106mod raw;
107pub mod transform;
108mod writer;
109
110use symbolic_common::Arch;
111use symbolic_common::AsSelf;
112use symbolic_common::DebugId;
113use watto::StringTable;
114use watto::{align_to, Pod};
115
116pub use error::{Error, ErrorKind};
117pub use lookup::*;
118pub use writer::SymCacheConverter;
119
120type Result<T, E = Error> = std::result::Result<T, E>;
121
122/// The latest version of the file format.
123///
124/// Version history:
125///
126/// 1: Initial implementation
127/// 2: PR #58: Migrate from UUID to Debug ID
128/// 3: PR #148: Consider all PT_LOAD segments in ELF
129/// 4: PR #155: Functions with more than 65k line records
130/// 5: PR #221: Invalid inlinee nesting leading to wrong stack traces
131/// 6: PR #319: Correct line offsets and spacer line records
132/// 7: PR #459: A new binary format fundamentally based on addr ranges
133/// 8: PR #670: Use LEB128-prefixed string table
134pub const SYMCACHE_VERSION: u32 = 8;
135
136/// The serialized SymCache binary format.
137///
138/// This can be parsed from a binary buffer via [`SymCache::parse`] and lookups on it can be performed
139/// via the [`SymCache::lookup`] method.
140#[derive(Clone, PartialEq, Eq)]
141pub struct SymCache<'data> {
142 header: &'data raw::Header,
143 files: &'data [raw::File],
144 functions: &'data [raw::Function],
145 source_locations: &'data [raw::SourceLocation],
146 ranges: &'data [raw::Range],
147 string_bytes: &'data [u8],
148}
149
150impl std::fmt::Debug for SymCache<'_> {
151 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152 f.debug_struct("SymCache")
153 .field("version", &self.header.version)
154 .field("debug_id", &self.header.debug_id)
155 .field("arch", &self.header.arch)
156 .field("files", &self.header.num_files)
157 .field("functions", &self.header.num_functions)
158 .field("source_locations", &self.header.num_source_locations)
159 .field("ranges", &self.header.num_ranges)
160 .field("string_bytes", &self.header.string_bytes)
161 .finish()
162 }
163}
164
165impl<'data> SymCache<'data> {
166 /// Parse the SymCache binary format into a convenient type that allows safe access and
167 /// fast lookups.
168 pub fn parse(buf: &'data [u8]) -> Result<Self> {
169 let (header, rest) = raw::Header::ref_from_prefix(buf).ok_or(ErrorKind::InvalidHeader)?;
170 if header.magic == raw::SYMCACHE_MAGIC_FLIPPED {
171 return Err(ErrorKind::WrongEndianness.into());
172 }
173 if header.magic != raw::SYMCACHE_MAGIC {
174 return Err(ErrorKind::WrongFormat.into());
175 }
176 if header.version != SYMCACHE_VERSION && header.version != 7 {
177 return Err(ErrorKind::WrongVersion.into());
178 }
179
180 let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidFiles)?;
181 let (files, rest) = raw::File::slice_from_prefix(rest, header.num_files as usize)
182 .ok_or(ErrorKind::InvalidFiles)?;
183
184 let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidFunctions)?;
185 let (functions, rest) =
186 raw::Function::slice_from_prefix(rest, header.num_functions as usize)
187 .ok_or(ErrorKind::InvalidFunctions)?;
188
189 let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidSourceLocations)?;
190 let (source_locations, rest) =
191 raw::SourceLocation::slice_from_prefix(rest, header.num_source_locations as usize)
192 .ok_or(ErrorKind::InvalidSourceLocations)?;
193
194 let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidRanges)?;
195 let (ranges, rest) = raw::Range::slice_from_prefix(rest, header.num_ranges as usize)
196 .ok_or(ErrorKind::InvalidRanges)?;
197
198 let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::UnexpectedStringBytes {
199 expected: header.string_bytes as usize,
200 found: 0,
201 })?;
202 if rest.len() < header.string_bytes as usize {
203 return Err(ErrorKind::UnexpectedStringBytes {
204 expected: header.string_bytes as usize,
205 found: rest.len(),
206 }
207 .into());
208 }
209
210 Ok(SymCache {
211 header,
212 files,
213 functions,
214 source_locations,
215 ranges,
216 string_bytes: rest,
217 })
218 }
219
220 /// Resolves a string reference to the pointed-to `&str` data.
221 fn get_string(&self, offset: u32) -> Option<&'data str> {
222 if self.header.version >= 8 {
223 // version >= 8: string length prefixes are LEB128
224 StringTable::read(self.string_bytes, offset as usize).ok()
225 } else {
226 // version < 8: string length prefixes are u32
227 if offset == u32::MAX {
228 return None;
229 }
230 let len_offset = offset as usize;
231 let len_size = std::mem::size_of::<u32>();
232 let len = u32::from_ne_bytes(
233 self.string_bytes
234 .get(len_offset..len_offset + len_size)?
235 .try_into()
236 .unwrap(),
237 ) as usize;
238
239 let start_offset = len_offset + len_size;
240 let end_offset = start_offset + len;
241 let bytes = self.string_bytes.get(start_offset..end_offset)?;
242
243 std::str::from_utf8(bytes).ok()
244 }
245 }
246
247 /// The version of the SymCache file format.
248 pub fn version(&self) -> u32 {
249 self.header.version
250 }
251
252 /// Returns true if this symcache's version is the current version of the format.
253 pub fn is_latest(&self) -> bool {
254 self.header.version == SYMCACHE_VERSION
255 }
256
257 /// The architecture of the symbol file.
258 pub fn arch(&self) -> Arch {
259 self.header.arch
260 }
261
262 /// The debug identifier of the cache file.
263 pub fn debug_id(&self) -> DebugId {
264 self.header.debug_id
265 }
266}
267
268impl<'slf, 'd: 'slf> AsSelf<'slf> for SymCache<'d> {
269 type Ref = SymCache<'slf>;
270
271 fn as_self(&'slf self) -> &'slf Self::Ref {
272 self
273 }
274}