samply_symbols/shared.rs
1#[cfg(feature = "partial_read_stats")]
2use std::cell::RefCell;
3use std::fmt::{Debug, Display};
4use std::future::Future;
5use std::marker::PhantomData;
6use std::ops::{Deref, Range};
7use std::str::FromStr;
8use std::sync::Arc;
9
10#[cfg(feature = "partial_read_stats")]
11use bitvec::{bitvec, prelude::BitVec};
12use debugid::DebugId;
13use object::read::ReadRef;
14use object::FileFlags;
15use uuid::Uuid;
16
17use crate::mapped_path::MappedPath;
18use crate::symbol_map::SymbolMapTrait;
19
20pub type FileAndPathHelperError = Box<dyn std::error::Error + Send + Sync + 'static>;
21pub type FileAndPathHelperResult<T> = std::result::Result<T, FileAndPathHelperError>;
22
23// Define a OptionallySendFuture trait. This exists for the following reasons:
24// - The "+ Send" in the return types of the FileAndPathHelper trait methods
25// trickles down all the way to the root async functions exposed by this crate.
26// - We have two consumers: One that requires Send on the futures returned by those
27// root functions, and one that cannot return Send futures from the trait methods.
28// The former is hyper/tokio (in profiler-symbol-server), the latter is the wasm/js
29// implementation: JsFutures are not Send.
30// So we provide a cargo feature to allow the consumer to select whether they want Send or not.
31//
32// Please tell me that there is a better way.
33
34#[cfg(not(feature = "send_futures"))]
35pub trait OptionallySendFuture: Future {}
36
37#[cfg(not(feature = "send_futures"))]
38impl<T> OptionallySendFuture for T where T: Future {}
39
40#[cfg(feature = "send_futures")]
41pub trait OptionallySendFuture: Future + Send {}
42
43#[cfg(feature = "send_futures")]
44impl<T> OptionallySendFuture for T where T: Future + Send {}
45
46#[derive(Debug)]
47pub enum CandidatePathInfo<FL: FileLocation> {
48 SingleFile(FL),
49 InDyldCache {
50 dyld_cache_path: FL,
51 dylib_path: String,
52 },
53}
54
55/// An address that can be looked up in a `SymbolMap`.
56///
57/// You'll usually want to use `LookupAddress::Relative`, i.e. addresses that
58/// are relative to some "image base address". This form works with all types
59/// of symbol maps across all platforms.
60///
61/// When testing, be aware that many binaries are laid out in such a way that
62/// all three representations of addresses are the same: The image base address
63/// is often zero and the sections are often laid out so that each section's
64/// address matches its file offset. So if you misrepresent an address in
65/// the wrong form, you might not notice it because it still works until you
66/// encounter a more complex binary.
67#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
68pub enum LookupAddress {
69 /// A relative address is relative to the image base address.
70 ///
71 /// What this means depends on the format of the binary:
72 ///
73 /// - On Windows, a "relative address" is the same as a RVA ("relative virtual
74 /// address") in the PE file.
75 /// - On macOS, a "relative address" is relative to the start of the `__TEXT`
76 /// segment.
77 /// - On Linux / ELF, a "relative address" is relative to the address of the
78 /// first LOAD command in the program header table. In other words, it's
79 /// relative to the start of the first segment.
80 /// - For Jitdump files, the "relative address" space is a conceptual space
81 /// in which the code from all `JIT_CODE_LOAD` records is laid out
82 /// sequentially, starting at 0.
83 /// So the relative address of an instruction inside a `JIT_CODE_LOAD` record
84 /// is the sum of the `code_size` fields of all previous `JIT_CODE_LOAD`
85 /// records plus the offset of the instruction within the code of this
86 /// `JIT_CODE_LOAD` record.
87 ///
88 /// See [`relative_address_base`] for more information.
89 Relative(u32),
90 /// A "stated virtual memory address", i.e. a virtual memory address as
91 /// written down in the binary. In mach-O and ELF, this is the space that
92 /// section addresses and symbol addresses are in. It's the type of address
93 /// you'd pass to the Linux `addr2line` tool.
94 ///
95 /// This type of lookup address is not supported by symbol maps for PDB
96 /// files or Breakpad files.
97 Svma(u64),
98 /// A raw file offset to the point in the binary file where the bytes of the
99 /// instruction are stored for which symbols should be looked up.
100 ///
101 /// On Linux, if you have an "AVMA" (absolute virtual memory address) and
102 /// the `/proc/<pid>/maps` for the process, this is probably the easiest
103 /// form of address to compute, because the process maps give you the file offsets.
104 ///
105 /// However, if you do this, be aware that the file offset often is not
106 /// the same as an SVMA, so expect wrong results if you end up using it in
107 /// places where SVMAs are expected - it might work fine with some binaries
108 /// and then break with others.
109 ///
110 /// File offsets are not supported by symbol maps for PDB files or Breakpad files.
111 FileOffset(u64),
112}
113
114/// In case the loaded binary contains multiple architectures, this specifies
115/// how to resolve the ambiguity. This is only needed on macOS.
116#[derive(Debug, Clone)]
117pub enum MultiArchDisambiguator {
118 /// Disambiguate by CPU architecture (exact match).
119 ///
120 /// This string is a name for what mach-O calls the "CPU type" and "CPU subtype".
121 /// Examples are `x86_64`, `x86_64h`, `arm64`, `arm64e`.
122 ///
123 /// These strings are returned by the mach function `macho_arch_name_for_cpu_type`.
124 Arch(String),
125
126 /// Disambiguate by CPU architecture (best match).
127 ///
128 /// The Vec contains the first choice, followed by acceptable fallback choices.
129 /// Examples are `["arm64e", "arm64"]` or `["x86_64h", "x86_64"]`.
130 /// This is used in cases where you have lost information about the architecture
131 /// you're interested in and just want to hope to get the right one.
132 ///
133 /// The strings are names for what mach-O calls the "CPU type" and "CPU subtype".
134 /// Examples are `x86_64`, `x86_64h`, `arm64`, `arm64e`.
135 ///
136 /// These strings are returned by the mach function `macho_arch_name_for_cpu_type`.
137 BestMatch(Vec<String>),
138
139 /// Disambiguate by CPU architecture and find the best match for the architecture
140 /// that is currently executing this code. This is a heuristic, and should only
141 /// be used in cases where you have lost information about the architecture you're
142 /// interested in.
143 BestMatchForNative,
144
145 /// Disambiguate by `DebugId`.
146 DebugId(DebugId),
147}
148
149/// An enum carrying an identifier for a binary. This is stores the same information
150/// as a [`debugid::CodeId`], but without projecting it down to a string.
151///
152/// All types need to be treated rather differently, see their respective documentation.
153#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
154pub enum CodeId {
155 /// The code ID for a Windows PE file. When combined with the binary name,
156 /// the code ID lets you obtain binaries from symbol servers. It is not useful
157 /// on its own, it has to be paired with the binary name.
158 ///
159 /// On Windows, a binary's code ID is distinct from its debug ID (= pdb GUID + age).
160 /// If you have a binary file, you can get both the code ID and the debug ID
161 /// from it. If you only have a PDB file, you usually *cannot* get the code ID of
162 /// the corresponding binary from it.
163 PeCodeId(PeCodeId),
164
165 /// The code ID for a macOS / iOS binary (mach-O). This is just the mach-O UUID.
166 /// The mach-O UUID is shared between both the binary file and the debug file (dSYM),
167 /// and it can be used on its own to find dSYMs using Spotlight.
168 ///
169 /// The debug ID and the code ID contain the same information; the debug ID
170 /// is literally just the UUID plus a zero at the end.
171 MachoUuid(Uuid),
172
173 /// The code ID for a Linux ELF file. This is the "ELF build ID" (also called "GNU build ID").
174 /// The build ID is usually 20 bytes, commonly written out as 40 hex chars.
175 ///
176 /// It can be used to find debug files on the local file system or to download
177 /// binaries or debug files from a `debuginfod` symbol server. it does not have to be
178 /// paired with the binary name.
179 ///
180 /// An ELF binary's code ID is more useful than its debug ID: The debug ID is truncated
181 /// to 16 bytes (32 hex characters), whereas the code ID is the full ELF build ID.
182 ElfBuildId(ElfBuildId),
183}
184
185impl FromStr for CodeId {
186 type Err = ();
187
188 fn from_str(s: &str) -> Result<Self, Self::Err> {
189 if s.len() <= 17 {
190 // 8 bytes timestamp + 1 to 8 bytes of image size
191 Ok(CodeId::PeCodeId(PeCodeId::from_str(s)?))
192 } else if s.len() == 32 && is_uppercase_hex(s) {
193 // mach-O UUID
194 Ok(CodeId::MachoUuid(Uuid::from_str(s).map_err(|_| ())?))
195 } else {
196 // ELF build ID. These are usually 40 hex characters (= 20 bytes).
197 Ok(CodeId::ElfBuildId(ElfBuildId::from_str(s)?))
198 }
199 }
200}
201
202fn is_uppercase_hex(s: &str) -> bool {
203 s.chars()
204 .all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_uppercase()))
205}
206
207impl std::fmt::Display for CodeId {
208 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209 match self {
210 CodeId::PeCodeId(pe) => std::fmt::Display::fmt(pe, f),
211 CodeId::MachoUuid(uuid) => f.write_fmt(format_args!("{:X}", uuid.simple())),
212 CodeId::ElfBuildId(elf) => std::fmt::Display::fmt(elf, f),
213 }
214 }
215}
216
217/// The code ID for a Windows PE file.
218///
219/// When combined with the binary name, the `PeCodeId` lets you obtain binaries from
220/// symbol servers. It is not useful on its own, it has to be paired with the binary name.
221///
222/// A Windows binary's `PeCodeId` is distinct from its debug ID (= pdb GUID + age).
223/// If you have a binary file, you can get both the `PeCodeId` and the debug ID
224/// from it. If you only have a PDB file, you usually *cannot* get the `PeCodeId` of
225/// the corresponding binary from it.
226#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
227pub struct PeCodeId {
228 pub timestamp: u32,
229 pub image_size: u32,
230}
231
232impl FromStr for PeCodeId {
233 type Err = ();
234
235 fn from_str(s: &str) -> Result<Self, Self::Err> {
236 if s.len() < 9 || s.len() > 16 {
237 return Err(());
238 }
239 let timestamp = u32::from_str_radix(&s[..8], 16).map_err(|_| ())?;
240 let image_size = u32::from_str_radix(&s[8..], 16).map_err(|_| ())?;
241 Ok(Self {
242 timestamp,
243 image_size,
244 })
245 }
246}
247
248impl std::fmt::Display for PeCodeId {
249 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
250 f.write_fmt(format_args!("{:08X}{:x}", self.timestamp, self.image_size))
251 }
252}
253
254/// The build ID for an ELF file (also called "GNU build ID").
255///
256/// The build ID can be used to find debug files on the local file system or to download
257/// binaries or debug files from a `debuginfod` symbol server. it does not have to be
258/// paired with the binary name.
259#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
260pub struct ElfBuildId(pub Vec<u8>);
261
262impl ElfBuildId {
263 /// Create a new `ElfBuildId` from a slice of bytes (commonly a sha1 hash
264 /// generated by the linker, i.e. 20 bytes).
265 pub fn from_bytes(bytes: &[u8]) -> Self {
266 Self(bytes.to_owned())
267 }
268}
269
270impl FromStr for ElfBuildId {
271 type Err = ();
272
273 fn from_str(s: &str) -> Result<Self, Self::Err> {
274 let byte_count = s.len() / 2;
275 let mut bytes = Vec::with_capacity(byte_count);
276 for i in 0..byte_count {
277 let hex_byte = &s[i * 2..i * 2 + 2];
278 let b = u8::from_str_radix(hex_byte, 16).map_err(|_| ())?;
279 bytes.push(b);
280 }
281 Ok(Self(bytes))
282 }
283}
284
285impl std::fmt::Display for ElfBuildId {
286 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
287 for byte in &self.0 {
288 f.write_fmt(format_args!("{byte:02x}"))?;
289 }
290 Ok(())
291 }
292}
293
294/// Information about a library ("binary" / "module" / "DSO") which allows finding
295/// symbol files for it. The information can be partial.
296#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
297pub struct LibraryInfo {
298 pub debug_name: Option<String>,
299 pub debug_id: Option<DebugId>,
300 pub debug_path: Option<String>,
301 pub name: Option<String>,
302 pub code_id: Option<CodeId>,
303 pub path: Option<String>,
304 pub arch: Option<String>,
305}
306
307impl LibraryInfo {
308 /// Fill all `None` fields on this object with the corresponding fields from `other`.
309 ///
310 /// This should only be called if some minimal matching has been established, for
311 /// example if the `code_id` matches or if the combination pair `debug_name, debug_id`
312 /// matches.
313 pub fn absorb(&mut self, other: &LibraryInfo) {
314 if self.debug_name.is_none() && other.debug_name.is_some() {
315 self.debug_name.clone_from(&other.debug_name);
316 }
317 if self.debug_id.is_none() && other.debug_id.is_some() {
318 self.debug_id = other.debug_id;
319 }
320 if self.debug_path.is_none() && other.debug_path.is_some() {
321 self.debug_path.clone_from(&other.debug_path);
322 }
323 if self.name.is_none() && other.name.is_some() {
324 self.name.clone_from(&other.name);
325 }
326 if self.code_id.is_none() && other.code_id.is_some() {
327 self.code_id.clone_from(&other.code_id);
328 }
329 if self.path.is_none() && other.path.is_some() {
330 self.path.clone_from(&other.path);
331 }
332 if self.arch.is_none() && other.arch.is_some() {
333 self.arch.clone_from(&other.arch);
334 }
335 }
336}
337
338/// This is the trait that consumers need to implement so that they can call
339/// the main entry points of this crate. This crate contains no direct file
340/// access - all access to the file system is via this trait, and its associated
341/// trait `FileContents`.
342pub trait FileAndPathHelper {
343 type F: FileContents + 'static;
344 type FL: FileLocation + 'static;
345
346 /// Given a "debug name" and a "breakpad ID", return a list of file paths
347 /// which may potentially have artifacts containing symbol data for the
348 /// requested binary (executable or library).
349 ///
350 /// The symbolication methods will try these paths one by one, calling
351 /// `load_file` for each until it succeeds and finds a file whose contents
352 /// match the breakpad ID. Any remaining paths are discarded.
353 ///
354 /// # Arguments
355 ///
356 /// - `debug_name`: On Windows, this is the filename of the associated PDB
357 /// file of the executable / DLL, for example "firefox.pdb" or "xul.pdb". On
358 /// non-Windows, this is the filename of the binary, for example "firefox"
359 /// or "XUL" or "libxul.so".
360 /// - `breakpad_id`: A string of 33 hex digits, serving as a hash of the
361 /// contents of the binary / library. On Windows, this is 32 digits "signature"
362 /// plus one digit of "pdbAge". On non-Windows, this is the binary's UUID
363 /// (ELF id or mach-o UUID) plus a "0" digit at the end (replacing the pdbAge).
364 ///
365 fn get_candidate_paths_for_debug_file(
366 &self,
367 info: &LibraryInfo,
368 ) -> FileAndPathHelperResult<Vec<CandidatePathInfo<Self::FL>>>;
369
370 /// TODO
371 fn get_candidate_paths_for_binary(
372 &self,
373 info: &LibraryInfo,
374 ) -> FileAndPathHelperResult<Vec<CandidatePathInfo<Self::FL>>>;
375
376 /// TODO
377 fn get_dyld_shared_cache_paths(
378 &self,
379 arch: Option<&str>,
380 ) -> FileAndPathHelperResult<Vec<Self::FL>>;
381
382 /// TODO
383 fn get_candidate_paths_for_gnu_debug_link_dest(
384 &self,
385 _original_file_location: &Self::FL,
386 _debug_link_name: &str,
387 ) -> FileAndPathHelperResult<Vec<Self::FL>> {
388 Ok(Vec::new())
389 }
390
391 /// TODO
392 fn get_candidate_paths_for_supplementary_debug_file(
393 &self,
394 _original_file_path: &Self::FL,
395 _supplementary_file_path: &str,
396 _supplementary_file_build_id: &ElfBuildId,
397 ) -> FileAndPathHelperResult<Vec<Self::FL>> {
398 Ok(Vec::new())
399 }
400
401 /// This method is the entry point for file access during symbolication.
402 /// The implementer needs to return an object which implements the `FileContents` trait.
403 /// This method is asynchronous, but once it returns, the file data needs to be
404 /// available synchronously because the `FileContents` methods are synchronous.
405 /// If there is no file at the requested path, an error should be returned (or in any
406 /// other error case).
407 fn load_file(
408 &self,
409 location: Self::FL,
410 ) -> std::pin::Pin<Box<dyn OptionallySendFuture<Output = FileAndPathHelperResult<Self::F>> + '_>>;
411
412 /// Ask the helper to return a SymbolMap if it happens to have one available already.
413 fn get_symbol_map_for_library(
414 &self,
415 _info: &LibraryInfo,
416 ) -> Option<(Self::FL, Arc<dyn SymbolMapTrait + Send + Sync>)> {
417 None
418 }
419}
420
421/// Provides synchronous access to the raw bytes of a file.
422/// This trait needs to be implemented by the consumer of this crate.
423pub trait FileContents: Send + Sync {
424 /// Must return the length, in bytes, of this file.
425 fn len(&self) -> u64;
426
427 /// Whether the file is empty.
428 fn is_empty(&self) -> bool {
429 self.len() == 0
430 }
431
432 /// Must return a slice of the file contents, or an error.
433 /// The slice's lifetime must be valid for the entire lifetime of this
434 /// `FileContents` object. This restriction may be a bit cumbersome to satisfy;
435 /// it's a restriction that's inherited from the `object` crate's `ReadRef` trait.
436 fn read_bytes_at(&self, offset: u64, size: u64) -> FileAndPathHelperResult<&[u8]>;
437
438 /// TODO: document
439 fn read_bytes_at_until(
440 &self,
441 range: Range<u64>,
442 delimiter: u8,
443 ) -> FileAndPathHelperResult<&[u8]>;
444
445 /// Append `size` bytes to `buffer`, starting to read at `offset` in the file.
446 /// If successful, `buffer` must have had its len increased exactly by `size`,
447 /// otherwise the caller may panic.
448 fn read_bytes_into(
449 &self,
450 buffer: &mut Vec<u8>,
451 offset: u64,
452 size: usize,
453 ) -> FileAndPathHelperResult<()>;
454}
455
456/// The debug information (function name, file path, line number) for a single frame
457/// at the looked-up address.
458#[derive(Debug, Clone, PartialEq, Eq)]
459pub struct FrameDebugInfo {
460 /// The function name for this frame, if known.
461 pub function: Option<String>,
462 /// The [`SourceFilePath`] for this frame, if known.
463 pub file_path: Option<SourceFilePath>,
464 /// The line number for this frame, if known.
465 pub line_number: Option<u32>,
466}
467
468/// A trait which abstracts away the token that's passed to the [`FileAndPathHelper::load_file`]
469/// trait method.
470///
471/// This is usually something like a `PathBuf`, but it can also be more complicated. For example,
472/// in `wholesym` this is an enum which can refer to a local file or to a file from a symbol
473/// server.
474pub trait FileLocation: Clone + Display {
475 /// Called on a Dyld shared cache location to create a location for a subcache.
476 /// Subcaches are separate files with filenames such as `dyld_shared_cache_arm64e.01`.
477 ///
478 /// The suffix begins with a period.
479 fn location_for_dyld_subcache(&self, suffix: &str) -> Option<Self>;
480
481 /// Called on the location of a debug file in order to create a location for an
482 /// external object file, based on an absolute path found in the "object map" of
483 /// the original file.
484 fn location_for_external_object_file(&self, object_file: &str) -> Option<Self>;
485
486 /// Callod on the location of a PE binary in order to create a location for
487 /// a corresponding PDB file, based on an absolute PDB path found in the binary.
488 fn location_for_pdb_from_binary(&self, pdb_path_in_binary: &str) -> Option<Self>;
489
490 /// Called on the location of a debug file in order to create a location for
491 /// a source file. `source_file_path` is the path to the source file as written
492 /// down in the debug file. This is usually an absolute path.
493 ///
494 /// Only one case with a relative path has been observed to date: In this case the
495 /// "debug file" was a synthetic .so file which was generated by `perf inject --jit`
496 /// based on a JITDUMP file which included relative paths. You could argue
497 /// that the application which emitted relative paths into the JITDUMP file was
498 /// creating bad data and should have written out absolute paths. However, the `perf`
499 /// infrastructure worked fine on this file, because the relative paths happened to
500 /// be relative to the working directory, and because perf / objdump were resolving
501 /// those relative paths relative to the current working directory.
502 fn location_for_source_file(&self, source_file_path: &str) -> Option<Self>;
503
504 /// Called on the location of a Breakpad sym file, to get a location for its
505 /// corresponding symindex file.
506 fn location_for_breakpad_symindex(&self) -> Option<Self>;
507
508 fn location_for_dwo(&self, comp_dir: &str, path: &str) -> Option<Self>;
509
510 fn location_for_dwp(&self) -> Option<Self>;
511}
512
513/// The path of a source file, as found in the debug info.
514///
515/// This contains both the raw path and an optional "mapped path". The raw path can
516/// refer to a file on this machine or on a different machine (i.e. the original
517/// build machine). The mapped path is something like a permalink which potentially
518/// allows obtaining the source file from a source server or a public hosted repository.
519#[derive(Debug, Clone, PartialEq, Eq)]
520pub struct SourceFilePath {
521 /// The raw path to the source file, as written down in the debug file. This is
522 /// usually an absolute path.
523 raw_path: String,
524
525 /// A variant of the path which may allow obtaining the source code for this file
526 /// from the web.
527 mapped_path: Option<MappedPath>,
528}
529
530impl SourceFilePath {
531 /// Create a new `SourceFilePath`.
532 pub fn new(raw_path: String, mapped_path: Option<MappedPath>) -> Self {
533 Self {
534 raw_path,
535 mapped_path,
536 }
537 }
538
539 /// Create a `SourceFilePath` from a path in a Breakpad .sym file. Such files can
540 /// contain the "special path" serialization of a mapped path, but they can
541 /// also contain absolute paths.
542 pub fn from_breakpad_path(raw_path: String) -> Self {
543 let mapped_path = MappedPath::from_special_path_str(&raw_path);
544 Self {
545 raw_path,
546 mapped_path,
547 }
548 }
549
550 /// A short, display-friendly version of this path.
551 pub fn display_path(&self) -> String {
552 match self.mapped_path() {
553 Some(mapped_path) => mapped_path.display_path(),
554 None => self.raw_path.clone(),
555 }
556 }
557
558 /// The raw path to the source file, as written down in the debug file. This is
559 /// usually an absolute path.
560 ///
561 /// Examples:
562 ///
563 /// - `"/Users/mstange/code/samply/samply-symbols/src/shared.rs"`
564 /// - `"/Users/mstange/code/mozilla/widget/cocoa/nsNativeThemeCocoa.mm"`
565 /// - `"./csu/../csu/libc-start.c"`
566 /// - `"/rustc/69f9c33d71c871fc16ac445211281c6e7a340943/library/core/src/ptr/const_ptr.rs"`
567 /// - `r#"D:\agent\_work\2\s\src\vctools\crt\vcstartup\src\startup\exe_common.inl"#`
568 ///
569 /// If the debug file was produced by compiling code on this machine, then the path
570 /// usually refers to a file on this machine. (An exception to this is debug info
571 /// from the Rust stdlib, which has fake `/rustc/<rev>/...` paths even if the when
572 /// compiling Rust code locally.)
573 ///
574 /// If the code was compiled on a different machine, then the raw path does not refer
575 /// to a file on this machine.
576 ///
577 /// Sometimes this path is a relative path. One such case was observed when the
578 /// "debug file" was a synthetic .so file which was generated by `perf inject --jit`
579 /// based on a JITDUMP file which included relative paths. You could argue
580 /// that the application which emitted relative paths into the JITDUMP file was
581 /// creating bad data and should have written out absolute paths. However, the `perf`
582 /// infrastructure worked fine on this file, because the relative paths happened to
583 /// be relative to the working directory, and because perf / objdump were resolving
584 /// those relative paths relative to the current working directory.
585 pub fn raw_path(&self) -> &str {
586 &self.raw_path
587 }
588
589 /// Returns the raw path while consuming this `SourceFilePath`.
590 pub fn into_raw_path(self) -> String {
591 self.raw_path
592 }
593
594 /// A variant of the path which may allow obtaining the source code for this file
595 /// from the web.
596 ///
597 /// Examples:
598 ///
599 /// - If the source file is from a Rust dependency from crates.io, we detect the
600 /// cargo cache directory in the raw path and create a mapped path of the form [`MappedPath::Cargo`].
601 /// - If the source file can be obtained from a github URL, and we know this either
602 /// from the `srcsrv` stream of a PDB file or because we recognize a path of the
603 /// form `/rustc/<rust-revision>/`, then we create a mapped path of the form [`MappedPath::Git`].
604 pub fn mapped_path(&self) -> Option<&MappedPath> {
605 self.mapped_path.as_ref()
606 }
607
608 /// Returns the mapped path while consuming this `SourceFilePath`.
609 pub fn into_mapped_path(self) -> Option<MappedPath> {
610 self.mapped_path
611 }
612}
613
614/// The "relative address base" is the base address which [`LookupAddress::Relative`]
615/// addresses are relative to. You start with an SVMA (a stated virtual memory address),
616/// you subtract the relative address base, and out comes a relative address.
617///
618/// This function computes that base address. It is defined as follows:
619///
620/// - For Windows binaries, the base address is the "image base address".
621/// - For mach-O binaries, the base address is the vmaddr of the __TEXT segment.
622/// - For ELF binaries, the base address is the vmaddr of the *first* segment,
623/// i.e. the vmaddr of the first "LOAD" ELF command.
624///
625/// In many cases, this base address is simply zero:
626///
627/// - ELF images of dynamic libraries (i.e. not executables) usually have a
628/// base address of zero.
629/// - Stand-alone mach-O dylibs usually have a base address of zero because their
630/// __TEXT segment is at address zero.
631/// - In PDBs, "RVAs" are relative addresses which are already relative to the
632/// image base.
633///
634/// However, in the following cases, the base address is usually non-zero:
635///
636/// - The "image base address" of Windows binaries is usually non-zero.
637/// - mach-O executable files (not dylibs) usually have their __TEXT segment at
638/// address 0x100000000.
639/// - mach-O libraries in the dyld shared cache have a __TEXT segment at some
640/// non-zero address in the cache.
641/// - ELF executables can have non-zero base addresses, e.g. 0x200000 or 0x400000.
642/// - Kernel ELF binaries ("vmlinux") have a large base address such as
643/// 0xffffffff81000000. Moreover, the base address seems to coincide with the
644/// vmaddr of the .text section, which is readily-available in perf.data files
645/// (in a synthetic mapping called "[kernel.kallsyms]_text").
646pub fn relative_address_base<'data>(object_file: &impl object::Object<'data>) -> u64 {
647 use object::read::ObjectSegment;
648 if let Some(text_segment) = object_file
649 .segments()
650 .find(|s| s.name() == Ok(Some("__TEXT")))
651 {
652 // This is a mach-O image. "Relative addresses" are relative to the
653 // vmaddr of the __TEXT segment.
654 return text_segment.address();
655 }
656
657 if let FileFlags::Elf { .. } = object_file.flags() {
658 // This is an ELF image. "Relative addresses" are relative to the
659 // vmaddr of the first segment (the first LOAD command).
660 if let Some(first_segment) = object_file.segments().next() {
661 return first_segment.address();
662 }
663 }
664
665 // For PE binaries, relative_address_base() returns the image base address.
666 object_file.relative_address_base()
667}
668
669/// The symbol for a function.
670#[derive(Debug, Clone, PartialEq, Eq)]
671pub struct SymbolInfo {
672 /// The function's address. This is a relative address.
673 pub address: u32,
674 /// The function size, in bytes. May have been approximated from neighboring symbols.
675 pub size: Option<u32>,
676 /// The function name, demangled.
677 pub name: String,
678}
679
680/// The lookup result for an address.
681#[derive(Debug, Clone, PartialEq, Eq)]
682pub struct AddressInfo {
683 /// Information about the symbol which contains the looked up address.
684 pub symbol: SymbolInfo,
685 /// Information about the frames at the looked up address, if found in the debug info.
686 ///
687 /// This Vec contains the file name and line number of the address.
688 /// If the compiler inlined a function call at this address, then this Vec
689 /// also contains the function name of the inlined function, along with the
690 /// file and line information inside that function.
691 ///
692 /// The Vec begins with the callee-most ("innermost") inlinee, followed by
693 /// its caller, and so on. The last element is always the outer function.
694 pub frames: Option<Vec<FrameDebugInfo>>,
695}
696
697/// The lookup result from `lookup_sync`.
698#[derive(Debug, Clone, PartialEq, Eq)]
699pub struct SyncAddressInfo {
700 /// Information about the symbol which contains the looked up address.
701 pub symbol: SymbolInfo,
702 /// Information about the frames at the looked up address, from the debug info.
703 pub frames: Option<FramesLookupResult>,
704}
705
706/// Contains address debug info (inlined functions, file names, line numbers) if
707/// available.
708#[derive(Debug, Clone, PartialEq, Eq)]
709pub enum FramesLookupResult {
710 /// Debug info for this address was found in the symbol map.
711 ///
712 /// This Vec contains the file name and line number of the address.
713 /// If the compiler inlined a function call at this address, then this Vec
714 /// also contains the function name of the inlined function, along with the
715 /// file and line information inside that function.
716 ///
717 /// The Vec begins with the callee-most ("innermost") inlinee, followed by
718 /// its caller, and so on. The last element is always the outer function.
719 Available(Vec<FrameDebugInfo>),
720
721 /// Debug info for this address was not found in the symbol map, but can
722 /// potentially be found in a different file, with the help of
723 /// [`SymbolMap::lookup_external`](crate::SymbolMap::lookup_external).
724 ///
725 /// This case can currently only be hit on macOS: On macOS, linking multiple
726 /// `.o` files together into a library or an executable does not copy the
727 /// DWARF information into the linked output. Instead, the linker stores the
728 /// paths to those original `.o` files, using 'OSO' stabs entries, and debug
729 /// info must be obtained from those original files.
730 External(ExternalFileAddressRef),
731}
732
733/// Information to find an external file and an address within that file, to be
734/// passed to [`SymbolMap::lookup_external`](crate::SymbolMap::lookup_external) or
735/// [`ExternalFileSymbolMap::lookup`](crate::ExternalFileSymbolMap::lookup).
736#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
737pub struct ExternalFileAddressRef {
738 /// Information needed to find the external file.
739 pub file_ref: ExternalFileRef,
740 /// Information needed to find the address within that external file.
741 pub address_in_file: ExternalFileAddressInFileRef,
742}
743
744/// Information to find an external file with debug information.
745#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
746pub enum ExternalFileRef {
747 MachoExternalObject {
748 /// The path to the file, as specified in the linked binary's object map.
749 file_path: String,
750 },
751 ElfExternalDwo {
752 comp_dir: String,
753 path: String,
754 },
755}
756
757/// Information to find an address within an external file, for debug info lookup.
758#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
759pub enum ExternalFileAddressInFileRef {
760 MachoOsoObject {
761 /// The name of the function symbol, as bytes, for the function which contains the
762 /// address we want to look up.
763 symbol_name: Vec<u8>,
764 /// The address to look up, as a relative offset from the function symbol address.
765 offset_from_symbol: u32,
766 },
767 MachoOsoArchive {
768 /// If the external file is an archive file (e.g. `libjs_static.a`, created with `ar`),
769 /// then this is the name of the archive member (e.g. `Unified_cpp_js_src23.o`),
770 /// otherwise `None`.
771 name_in_archive: String,
772 /// The name of the function symbol, as bytes, for the function which contains the
773 /// address we want to look up.
774 symbol_name: Vec<u8>,
775 /// The address to look up, as a relative offset from the function symbol address.
776 offset_from_symbol: u32,
777 },
778 ElfDwo {
779 dwo_id: u64,
780 svma: u64,
781 },
782}
783
784/// Implementation for slices.
785impl<T: Deref<Target = [u8]> + Send + Sync> FileContents for T {
786 fn len(&self) -> u64 {
787 <[u8]>::len(self) as u64
788 }
789
790 fn read_bytes_at(&self, offset: u64, size: u64) -> FileAndPathHelperResult<&[u8]> {
791 <[u8]>::get(self, offset as usize..)
792 .and_then(|s| s.get(..size as usize))
793 .ok_or_else(|| {
794 std::io::Error::new(
795 std::io::ErrorKind::UnexpectedEof,
796 "FileContents::read_bytes_at for &[u8] was called with out-of-range indexes",
797 )
798 .into()
799 })
800 }
801
802 fn read_bytes_at_until(
803 &self,
804 range: Range<u64>,
805 delimiter: u8,
806 ) -> FileAndPathHelperResult<&[u8]> {
807 if range.end < range.start {
808 return Err("Invalid range in read_bytes_at_until".into());
809 }
810 let slice = self.read_bytes_at(range.start, range.end - range.start)?;
811 if let Some(pos) = memchr::memchr(delimiter, slice) {
812 Ok(&slice[..pos])
813 } else {
814 Err(Box::new(std::io::Error::new(
815 std::io::ErrorKind::InvalidInput,
816 "Delimiter not found",
817 )))
818 }
819 }
820
821 #[inline]
822 fn read_bytes_into(
823 &self,
824 buffer: &mut Vec<u8>,
825 offset: u64,
826 size: usize,
827 ) -> FileAndPathHelperResult<()> {
828 buffer.extend_from_slice(self.read_bytes_at(offset, size as u64)?);
829 Ok(())
830 }
831}
832
833#[cfg(feature = "partial_read_stats")]
834const CHUNK_SIZE: u64 = 32 * 1024;
835
836#[cfg(feature = "partial_read_stats")]
837struct FileReadStats {
838 bytes_read: u64,
839 unique_chunks_read: BitVec,
840 read_call_count: u64,
841}
842
843#[cfg(feature = "partial_read_stats")]
844impl FileReadStats {
845 pub fn new(size_in_bytes: u64) -> Self {
846 assert!(size_in_bytes > 0);
847 let chunk_count = (size_in_bytes - 1) / CHUNK_SIZE + 1;
848 FileReadStats {
849 bytes_read: 0,
850 unique_chunks_read: bitvec![0; chunk_count as usize],
851 read_call_count: 0,
852 }
853 }
854
855 pub fn record_read(&mut self, offset: u64, size: u64) {
856 if size == 0 {
857 return;
858 }
859
860 let start = offset;
861 let end = offset + size;
862 let chunk_index_start = start / CHUNK_SIZE;
863 let chunk_index_end = (end - 1) / CHUNK_SIZE + 1;
864
865 let chunkbits =
866 &mut self.unique_chunks_read[chunk_index_start as usize..chunk_index_end as usize];
867 if chunkbits.count_ones() != (chunk_index_end - chunk_index_start) as usize {
868 if chunkbits[0] {
869 self.bytes_read += chunk_index_end * CHUNK_SIZE - start;
870 } else {
871 self.bytes_read += (chunk_index_end - chunk_index_start) * CHUNK_SIZE;
872 }
873 self.read_call_count += 1;
874 }
875 chunkbits.set_all(true);
876 }
877
878 pub fn unique_bytes_read(&self) -> u64 {
879 self.unique_chunks_read.count_ones() as u64 * CHUNK_SIZE
880 }
881}
882
883#[cfg(feature = "partial_read_stats")]
884impl std::fmt::Display for FileReadStats {
885 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
886 let unique_bytes_read = self.unique_bytes_read();
887 let repeated_bytes_read = self.bytes_read - unique_bytes_read;
888 let redudancy_percentage = repeated_bytes_read * 100 / unique_bytes_read;
889 write!(
890 f,
891 "{} total, {} unique, {}% redundancy, {} reads total",
892 bytesize::ByteSize(self.bytes_read),
893 bytesize::ByteSize(unique_bytes_read),
894 redudancy_percentage,
895 self.read_call_count
896 )
897 }
898}
899
900/// A wrapper for a FileContents object. The wrapper provides some convenience methods
901/// and, most importantly, implements `ReadRef` for `&FileContentsWrapper`.
902pub struct FileContentsWrapper<T: FileContents> {
903 file_contents: T,
904 len: u64,
905 #[cfg(feature = "partial_read_stats")]
906 partial_read_stats: std::sync::Mutex<FileReadStats>,
907}
908
909impl<T: FileContents> FileContentsWrapper<T> {
910 pub fn new(file_contents: T) -> Self {
911 let len = file_contents.len();
912 Self {
913 file_contents,
914 len,
915 #[cfg(feature = "partial_read_stats")]
916 partial_read_stats: std::sync::Mutex::new(FileReadStats::new(len)),
917 }
918 }
919
920 #[inline]
921 pub fn len(&self) -> u64 {
922 self.len
923 }
924
925 #[inline]
926 pub fn is_empty(&self) -> bool {
927 self.len == 0
928 }
929
930 #[inline]
931 pub fn read_bytes_at(&self, offset: u64, size: u64) -> FileAndPathHelperResult<&[u8]> {
932 #[cfg(feature = "partial_read_stats")]
933 self.partial_read_stats
934 .lock()
935 .unwrap()
936 .record_read(offset, size);
937
938 self.file_contents.read_bytes_at(offset, size)
939 }
940
941 #[inline]
942 pub fn read_bytes_at_until(
943 &self,
944 range: Range<u64>,
945 delimiter: u8,
946 ) -> FileAndPathHelperResult<&[u8]> {
947 #[cfg(feature = "partial_read_stats")]
948 let start = range.start;
949
950 let bytes = self.file_contents.read_bytes_at_until(range, delimiter)?;
951
952 #[cfg(feature = "partial_read_stats")]
953 self.partial_read_stats
954 .lock()
955 .unwrap()
956 .record_read(start, (bytes.len() + 1) as u64);
957
958 Ok(bytes)
959 }
960
961 /// Append `size` bytes to `buffer`, starting to read at `offset` in the file.
962 /// If successful, `buffer` must have had its len increased exactly by `size`,
963 /// otherwise the caller may panic.
964 pub fn read_bytes_into(
965 &self,
966 buffer: &mut Vec<u8>,
967 offset: u64,
968 size: usize,
969 ) -> FileAndPathHelperResult<()> {
970 #[cfg(feature = "partial_read_stats")]
971 self.partial_read_stats
972 .lock()
973 .unwrap()
974 .record_read(offset, size as u64);
975
976 self.file_contents.read_bytes_into(buffer, offset, size)
977 }
978
979 pub fn read_entire_data(&self) -> FileAndPathHelperResult<&[u8]> {
980 self.read_bytes_at(0, self.len())
981 }
982
983 pub fn full_range(&self) -> RangeReadRef<'_, &Self> {
984 RangeReadRef::new(self, 0, self.len)
985 }
986
987 pub fn range(&self, start: u64, size: u64) -> RangeReadRef<'_, &Self> {
988 RangeReadRef::new(self, start, size)
989 }
990}
991
992#[cfg(feature = "partial_read_stats")]
993impl<T: FileContents> Drop for FileContentsWrapper<T> {
994 fn drop(&mut self) {
995 eprintln!("{}", self.partial_read_stats.lock());
996 }
997}
998
999impl<T: FileContents> Debug for FileContentsWrapper<T> {
1000 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1001 write!(f, "FileContentsWrapper({} bytes)", self.len())
1002 }
1003}
1004
1005impl<'data, T: FileContents> ReadRef<'data> for &'data FileContentsWrapper<T> {
1006 #[inline]
1007 fn len(self) -> Result<u64, ()> {
1008 Ok(self.len())
1009 }
1010
1011 #[inline]
1012 fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'data [u8], ()> {
1013 self.read_bytes_at(offset, size).map_err(|_| {
1014 // Note: We're discarding the error from the FileContents method here.
1015 })
1016 }
1017
1018 #[inline]
1019 fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'data [u8], ()> {
1020 self.read_bytes_at_until(range, delimiter).map_err(|_| {
1021 // Note: We're discarding the error from the FileContents method here.
1022 })
1023 }
1024}
1025
1026#[test]
1027fn test_filecontents_readref_is_send_and_sync() {
1028 fn assert_is_send<T: Send>() {}
1029 fn assert_is_sync<T: Sync>() {}
1030 #[allow(unused)]
1031 fn wrapper<T: FileContents + Sync>() {
1032 assert_is_send::<&FileContentsWrapper<T>>();
1033 assert_is_sync::<&FileContentsWrapper<T>>();
1034 }
1035}
1036
1037#[derive(Clone, Copy)]
1038pub struct RangeReadRef<'data, T: ReadRef<'data>> {
1039 original_readref: T,
1040 range_start: u64,
1041 range_size: u64,
1042 _phantom_data: PhantomData<&'data ()>,
1043}
1044
1045impl<'data, T: ReadRef<'data>> RangeReadRef<'data, T> {
1046 pub fn new(original_readref: T, range_start: u64, range_size: u64) -> Self {
1047 Self {
1048 original_readref,
1049 range_start,
1050 range_size,
1051 _phantom_data: PhantomData,
1052 }
1053 }
1054
1055 pub fn make_subrange(&self, start: u64, size: u64) -> Self {
1056 Self::new(self.original_readref, self.range_start + start, size)
1057 }
1058
1059 pub fn original_readref(&self) -> T {
1060 self.original_readref
1061 }
1062
1063 pub fn range_start(&self) -> u64 {
1064 self.range_start
1065 }
1066
1067 pub fn range_size(&self) -> u64 {
1068 self.range_size
1069 }
1070}
1071
1072impl<'data, T: ReadRef<'data>> ReadRef<'data> for RangeReadRef<'data, T> {
1073 #[inline]
1074 fn len(self) -> Result<u64, ()> {
1075 Ok(self.range_size)
1076 }
1077
1078 #[inline]
1079 fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'data [u8], ()> {
1080 let shifted_offset = self.range_start.checked_add(offset).ok_or(())?;
1081 self.original_readref.read_bytes_at(shifted_offset, size)
1082 }
1083
1084 #[inline]
1085 fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'data [u8], ()> {
1086 if range.end < range.start {
1087 return Err(());
1088 }
1089 let shifted_start = self.range_start.checked_add(range.start).ok_or(())?;
1090 let shifted_end = self.range_start.checked_add(range.end).ok_or(())?;
1091 let range = shifted_start..shifted_end;
1092 self.original_readref.read_bytes_at_until(range, delimiter)
1093 }
1094}
1095
1096pub struct FileContentsCursor<'a, T: FileContents> {
1097 /// The current offset of the cursor. This can be beyond the end of the file!
1098 current_offset: u64,
1099 /// The total length of the file.
1100 total_len: u64,
1101 inner: &'a FileContentsWrapper<T>,
1102}
1103
1104impl<'a, T: FileContents> FileContentsCursor<'a, T> {
1105 pub fn new(inner: &'a FileContentsWrapper<T>) -> Self {
1106 let total_len = inner.len();
1107 Self {
1108 current_offset: 0,
1109 total_len,
1110 inner,
1111 }
1112 }
1113}
1114
1115impl<T: FileContents> std::io::Read for FileContentsCursor<'_, T> {
1116 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1117 if self.current_offset >= self.total_len {
1118 return Ok(0);
1119 }
1120 let remaining_len = self.total_len - self.current_offset;
1121 let read_len = <[u8]>::len(buf).min(remaining_len as usize);
1122 // Make a silly copy
1123 let mut tmp_buf = Vec::with_capacity(read_len);
1124 self.inner
1125 .read_bytes_into(&mut tmp_buf, self.current_offset, read_len)
1126 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
1127 buf[..read_len].copy_from_slice(&tmp_buf);
1128 self.current_offset += read_len as u64;
1129 Ok(read_len)
1130 }
1131}
1132
1133impl<T: FileContents> std::io::Seek for FileContentsCursor<'_, T> {
1134 fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
1135 /// Returns None on overflow / underflow.
1136 ///
1137 /// Seeks beyond the file length are allowed.
1138 fn inner(cur: u64, total_len: u64, pos: std::io::SeekFrom) -> Option<u64> {
1139 let new_offset: u64 = match pos {
1140 std::io::SeekFrom::Start(pos) => pos,
1141 std::io::SeekFrom::End(pos) => {
1142 (total_len as i64).checked_add(pos)?.try_into().ok()?
1143 }
1144 std::io::SeekFrom::Current(pos) => {
1145 (cur as i64).checked_add(pos)?.try_into().ok()?
1146 }
1147 };
1148 Some(new_offset)
1149 }
1150
1151 match inner(self.current_offset, self.total_len, pos) {
1152 Some(cur) => {
1153 self.current_offset = cur;
1154 Ok(cur)
1155 }
1156 None => Err(std::io::Error::new(std::io::ErrorKind::Other, "Bad Seek")),
1157 }
1158 }
1159}
1160
1161#[cfg(test)]
1162mod test {
1163 use super::*;
1164
1165 #[test]
1166 fn file_contents_cursor_allows_seeks_beyond_eof() {
1167 use std::io::{Read, Seek};
1168 let bytes = b"Test";
1169 let bytes = &bytes[..];
1170 let file_contents_wrapper = FileContentsWrapper::new(bytes);
1171 let mut cursor = FileContentsCursor::new(&file_contents_wrapper);
1172 let mut read_buf = [0; 10];
1173 let read_len = cursor.read(&mut read_buf[..3]).unwrap();
1174 assert_eq!(read_len, 3);
1175 assert_eq!(&read_buf[..3], b"Tes");
1176 let new_pos = cursor.seek(std::io::SeekFrom::Current(2)).unwrap();
1177 assert_eq!(new_pos, 5);
1178 let read_len = cursor.read(&mut read_buf[..2]).unwrap();
1179 assert_eq!(read_len, 0);
1180 }
1181}