use async_trait::async_trait;
use debugid::{CodeId, DebugId};
use tracing::trace;
use std::boxed::Box;
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
use std::sync::Mutex;
use std::{borrow::Cow, sync::Arc};
pub use minidump_common::{traits::Module, utils::basename};
pub use sym_file::walker;
pub use crate::sym_file::{CfiRules, SymbolFile};
#[cfg(feature = "http")]
pub mod http;
mod sym_file;
#[cfg(feature = "http")]
pub use http::*;
#[doc(hidden)]
#[cfg(feature = "fuzz")]
pub mod fuzzing_private_exports {
pub use crate::sym_file::walker::{eval_win_expr_for_fuzzer, walk_with_stack_cfi};
pub use crate::sym_file::{StackInfoWin, WinStackThing};
}
#[derive(Default, Debug, Clone)]
pub struct SymbolStats {
pub symbol_url: Option<String>,
pub loaded_symbols: bool,
pub corrupt_symbols: bool,
}
#[derive(Default, Debug, Clone)]
pub struct PendingSymbolStats {
pub symbols_processed: u64,
pub symbols_requested: u64,
}
#[derive(Default)]
pub struct SimpleModule {
pub base_address: Option<u64>,
pub size: Option<u64>,
pub code_file: Option<String>,
pub code_identifier: Option<CodeId>,
pub debug_file: Option<String>,
pub debug_id: Option<DebugId>,
pub version: Option<String>,
}
impl SimpleModule {
pub fn new(debug_file: &str, debug_id: DebugId) -> SimpleModule {
SimpleModule {
debug_file: Some(String::from(debug_file)),
debug_id: Some(debug_id),
..SimpleModule::default()
}
}
}
impl Module for SimpleModule {
fn base_address(&self) -> u64 {
self.base_address.unwrap_or(0)
}
fn size(&self) -> u64 {
self.size.unwrap_or(0)
}
fn code_file(&self) -> Cow<str> {
self.code_file
.as_ref()
.map_or(Cow::from(""), |s| Cow::Borrowed(&s[..]))
}
fn code_identifier(&self) -> Option<CodeId> {
self.code_identifier.as_ref().cloned()
}
fn debug_file(&self) -> Option<Cow<str>> {
self.debug_file.as_ref().map(|s| Cow::Borrowed(&s[..]))
}
fn debug_identifier(&self) -> Option<DebugId> {
self.debug_id
}
fn version(&self) -> Option<Cow<str>> {
self.version.as_ref().map(|s| Cow::Borrowed(&s[..]))
}
}
fn leafname(path: &str) -> &str {
path.rsplit(|c| c == '/' || c == '\\')
.next()
.unwrap_or(path)
}
fn replace_or_add_extension(filename: &str, match_extension: &str, new_extension: &str) -> String {
let mut bits = filename.split('.').collect::<Vec<_>>();
if bits.len() > 1
&& bits
.last()
.map_or(false, |e| e.to_lowercase() == match_extension)
{
bits.pop();
}
bits.push(new_extension);
bits.join(".")
}
#[derive(Debug, Clone)]
pub struct FileLookup {
cache_rel: String,
server_rel: String,
}
pub fn breakpad_sym_lookup(module: &(dyn Module + Sync)) -> Option<FileLookup> {
let debug_file = module.debug_file()?;
let debug_id = module.debug_identifier()?;
let leaf = leafname(&debug_file);
let filename = replace_or_add_extension(leaf, "pdb", "sym");
let rel_path = [leaf, &debug_id.breakpad().to_string(), &filename[..]].join("/");
Some(FileLookup {
cache_rel: rel_path.clone(),
server_rel: rel_path,
})
}
pub fn extra_debuginfo_lookup(module: &(dyn Module + Sync)) -> Option<FileLookup> {
let debug_file = module.debug_file()?;
let debug_id = module.debug_identifier()?;
let leaf = leafname(&debug_file);
let rel_path = [leaf, &debug_id.breakpad().to_string(), leaf].join("/");
Some(FileLookup {
cache_rel: rel_path.clone(),
server_rel: rel_path,
})
}
pub fn binary_lookup(module: &(dyn Module + Sync)) -> Option<FileLookup> {
let code_file = module.code_file();
let code_id = module.code_identifier()?;
let debug_file = module.debug_file()?;
let debug_id = module.debug_identifier()?;
let bin_leaf = leafname(&code_file);
let debug_leaf = leafname(&debug_file);
Some(FileLookup {
cache_rel: [debug_leaf, &debug_id.breakpad().to_string(), bin_leaf].join("/"),
server_rel: [bin_leaf, code_id.as_ref(), bin_leaf].join("/"),
})
}
pub fn moz_lookup(mut lookup: FileLookup) -> FileLookup {
lookup.server_rel.pop().unwrap();
lookup.server_rel.push('_');
lookup
}
pub fn lookup(module: &(dyn Module + Sync), file_kind: FileKind) -> Option<FileLookup> {
match file_kind {
FileKind::BreakpadSym => breakpad_sym_lookup(module),
FileKind::Binary => binary_lookup(module),
FileKind::ExtraDebugInfo => extra_debuginfo_lookup(module),
}
}
#[derive(Debug, thiserror::Error)]
pub enum SymbolError {
#[error("symbol file not found")]
NotFound,
#[error("the debug file or id were missing")]
MissingDebugFileOrId,
#[error("couldn't read input stream")]
LoadError(#[from] std::io::Error),
#[error("parse error: {0} at line {1}")]
ParseError(&'static str, u64),
}
#[derive(Clone, Debug, thiserror::Error)]
pub enum FileError {
#[error("file not found")]
NotFound,
}
#[derive(Debug)]
pub struct FillSymbolError {
}
impl PartialEq for SymbolError {
fn eq(&self, other: &SymbolError) -> bool {
matches!(
(self, other),
(SymbolError::NotFound, SymbolError::NotFound)
| (SymbolError::LoadError(_), SymbolError::LoadError(_))
| (SymbolError::ParseError(..), SymbolError::ParseError(..))
)
}
}
#[async_trait]
pub trait SymbolSupplier {
async fn locate_symbols(&self, module: &(dyn Module + Sync))
-> Result<SymbolFile, SymbolError>;
async fn locate_file(
&self,
module: &(dyn Module + Sync),
file_kind: FileKind,
) -> Result<PathBuf, FileError>;
}
pub struct SimpleSymbolSupplier {
paths: Vec<PathBuf>,
}
impl SimpleSymbolSupplier {
pub fn new(paths: Vec<PathBuf>) -> SimpleSymbolSupplier {
SimpleSymbolSupplier { paths }
}
}
#[async_trait]
impl SymbolSupplier for SimpleSymbolSupplier {
#[tracing::instrument(name = "symbols", level = "trace", skip_all, fields(module = crate::basename(&module.code_file())))]
async fn locate_symbols(
&self,
module: &(dyn Module + Sync),
) -> Result<SymbolFile, SymbolError> {
let file_path = self
.locate_file(module, FileKind::BreakpadSym)
.await
.map_err(|_| SymbolError::NotFound)?;
let symbols = SymbolFile::from_file(&file_path).map_err(|e| {
trace!("SimpleSymbolSupplier failed: {}", e);
e
})?;
trace!("SimpleSymbolSupplier parsed file!");
Ok(symbols)
}
#[tracing::instrument(level = "trace", skip(self, module), fields(module = crate::basename(&module.code_file())))]
async fn locate_file(
&self,
module: &(dyn Module + Sync),
file_kind: FileKind,
) -> Result<PathBuf, FileError> {
trace!("SimpleSymbolSupplier search");
if let Some(lookup) = lookup(module, file_kind) {
for path in self.paths.iter() {
let test_path = path.join(&lookup.cache_rel);
if fs::metadata(&test_path).ok().map_or(false, |m| m.is_file()) {
trace!("SimpleSymbolSupplier found file {}", test_path.display());
return Ok(test_path);
}
}
} else {
trace!("SimpleSymbolSupplier could not build symbol_path");
}
Err(FileError::NotFound)
}
}
#[derive(Default, Debug, Clone)]
pub struct StringSymbolSupplier {
modules: HashMap<String, String>,
}
impl StringSymbolSupplier {
pub fn new(modules: HashMap<String, String>) -> Self {
Self { modules }
}
}
#[async_trait]
impl SymbolSupplier for StringSymbolSupplier {
#[tracing::instrument(name = "symbols", level = "trace", skip_all, fields(file = crate::basename(&module.code_file())))]
async fn locate_symbols(
&self,
module: &(dyn Module + Sync),
) -> Result<SymbolFile, SymbolError> {
trace!("StringSymbolSupplier search");
if let Some(symbols) = self.modules.get(&*module.code_file()) {
trace!("StringSymbolSupplier found file");
let file = SymbolFile::from_bytes(symbols.as_bytes())?;
trace!("StringSymbolSupplier parsed file!");
return Ok(file);
}
trace!("StringSymbolSupplier could not find file");
Err(SymbolError::NotFound)
}
async fn locate_file(
&self,
_module: &(dyn Module + Sync),
_file_kind: FileKind,
) -> Result<PathBuf, FileError> {
Err(FileError::NotFound)
}
}
pub trait FrameSymbolizer {
fn get_instruction(&self) -> u64;
fn set_function(&mut self, name: &str, base: u64, parameter_size: u32);
fn set_source_file(&mut self, file: &str, line: u32, base: u64);
fn add_inline_frame(&mut self, _name: &str, _file: Option<&str>, _line: Option<u32>) {}
}
pub trait FrameWalker {
fn get_instruction(&self) -> u64;
fn has_grand_callee(&self) -> bool;
fn get_grand_callee_parameter_size(&self) -> u32;
fn get_register_at_address(&self, address: u64) -> Option<u64>;
fn get_callee_register(&self, name: &str) -> Option<u64>;
fn set_caller_register(&mut self, name: &str, val: u64) -> Option<()>;
fn clear_caller_register(&mut self, name: &str);
fn set_cfa(&mut self, val: u64) -> Option<()>;
fn set_ra(&mut self, val: u64) -> Option<()>;
}
#[derive(Debug, Default)]
pub struct SimpleFrame {
pub instruction: u64,
pub function: Option<String>,
pub function_base: Option<u64>,
pub parameter_size: Option<u32>,
pub source_file: Option<String>,
pub source_line: Option<u32>,
pub source_line_base: Option<u64>,
}
impl SimpleFrame {
pub fn with_instruction(instruction: u64) -> SimpleFrame {
SimpleFrame {
instruction,
..SimpleFrame::default()
}
}
}
impl FrameSymbolizer for SimpleFrame {
fn get_instruction(&self) -> u64 {
self.instruction
}
fn set_function(&mut self, name: &str, base: u64, parameter_size: u32) {
self.function = Some(String::from(name));
self.function_base = Some(base);
self.parameter_size = Some(parameter_size);
}
fn set_source_file(&mut self, file: &str, line: u32, base: u64) {
self.source_file = Some(String::from(file));
self.source_line = Some(line);
self.source_line_base = Some(base);
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum FileKind {
BreakpadSym,
Binary,
ExtraDebugInfo,
}
type ModuleKey = (String, Option<String>, Option<String>, Option<String>);
fn module_key(module: &(dyn Module + Sync)) -> ModuleKey {
(
module.code_file().to_string(),
module.code_identifier().map(|s| s.to_string()),
module.debug_file().map(|s| s.to_string()),
module.debug_identifier().map(|s| s.to_string()),
)
}
type CachedOperation<T, E> = Arc<tokio::sync::OnceCell<Result<T, E>>>;
pub struct Symbolizer {
supplier: Box<dyn SymbolSupplier + Send + Sync + 'static>,
symbols: Mutex<HashMap<ModuleKey, CachedOperation<SymbolFile, SymbolError>>>,
pending_stats: Mutex<PendingSymbolStats>,
}
impl Symbolizer {
pub fn new<T: SymbolSupplier + Send + Sync + 'static>(supplier: T) -> Symbolizer {
Symbolizer {
supplier: Box::new(supplier),
symbols: Mutex::new(HashMap::new()),
pending_stats: Mutex::default(),
}
}
pub async fn get_symbol_at_address(
&self,
debug_file: &str,
debug_id: DebugId,
address: u64,
) -> Option<String> {
let k = (debug_file, debug_id);
let mut frame = SimpleFrame::with_instruction(address);
self.fill_symbol(&k, &mut frame).await.ok()?;
frame.function
}
pub async fn fill_symbol(
&self,
module: &(dyn Module + Sync),
frame: &mut (dyn FrameSymbolizer + Send),
) -> Result<(), FillSymbolError> {
let cached_sym = self.get_symbols(module).await;
let sym = cached_sym
.get()
.unwrap()
.as_ref()
.map_err(|_| FillSymbolError {})?;
sym.fill_symbol(module, frame);
Ok(())
}
pub fn stats(&self) -> HashMap<String, SymbolStats> {
self.symbols
.lock()
.unwrap()
.iter()
.map(|(k, res)| {
let res = res.get().expect("Had uninitialized SymbolFile entry?");
let mut stats = SymbolStats::default();
match res {
Ok(sym) => {
stats.symbol_url = sym.url.clone();
stats.loaded_symbols = true;
stats.corrupt_symbols = false;
}
Err(SymbolError::NotFound) => {
stats.loaded_symbols = false;
}
Err(SymbolError::MissingDebugFileOrId) => {
stats.loaded_symbols = false;
}
Err(SymbolError::LoadError(_)) => {
stats.loaded_symbols = false;
}
Err(SymbolError::ParseError(..)) => {
stats.loaded_symbols = true;
stats.corrupt_symbols = true;
}
}
(leafname(&k.0).to_string(), stats)
})
.collect()
}
pub fn pending_stats(&self) -> PendingSymbolStats {
self.pending_stats.lock().unwrap().clone()
}
pub async fn walk_frame(
&self,
module: &(dyn Module + Sync),
walker: &mut (dyn FrameWalker + Send),
) -> Option<()> {
let cached_sym = self.get_symbols(module).await;
let sym = cached_sym.get().unwrap().as_ref();
if let Ok(sym) = sym {
trace!("found symbols for address, searching for cfi entries");
sym.walk_frame(module, walker)
} else {
trace!("couldn't find symbols for address, cannot use cfi");
None
}
}
async fn get_symbols(
&self,
module: &(dyn Module + Sync),
) -> CachedOperation<SymbolFile, SymbolError> {
let k = module_key(module);
let symbol_once = self.symbols.lock().unwrap().entry(k).or_default().clone();
symbol_once
.get_or_init(|| async {
trace!("locating symbols for module {}", module.code_file());
self.pending_stats.lock().unwrap().symbols_requested += 1;
let result = self.supplier.locate_symbols(module).await;
self.pending_stats.lock().unwrap().symbols_processed += 1;
result
})
.await;
symbol_once
}
pub async fn get_file_path(
&self,
module: &(dyn Module + Sync),
file_kind: FileKind,
) -> Result<PathBuf, FileError> {
self.supplier.locate_file(module, file_kind).await
}
}
#[test]
fn test_leafname() {
assert_eq!(leafname("c:\\foo\\bar\\test.pdb"), "test.pdb");
assert_eq!(leafname("c:/foo/bar/test.pdb"), "test.pdb");
assert_eq!(leafname("test.pdb"), "test.pdb");
assert_eq!(leafname("test"), "test");
assert_eq!(leafname("/path/to/test"), "test");
}
#[test]
fn test_replace_or_add_extension() {
assert_eq!(
replace_or_add_extension("test.pdb", "pdb", "sym"),
"test.sym"
);
assert_eq!(
replace_or_add_extension("TEST.PDB", "pdb", "sym"),
"TEST.sym"
);
assert_eq!(replace_or_add_extension("test", "pdb", "sym"), "test.sym");
assert_eq!(
replace_or_add_extension("test.x", "pdb", "sym"),
"test.x.sym"
);
assert_eq!(replace_or_add_extension("", "pdb", "sym"), ".sym");
assert_eq!(replace_or_add_extension("test.x", "x", "y"), "test.y");
}
#[cfg(test)]
mod test {
use super::*;
use std::fs;
use std::fs::File;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::str::FromStr;
#[tokio::test]
async fn test_relative_symbol_path() {
let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap();
let m = SimpleModule::new("foo.pdb", debug_id);
assert_eq!(
&breakpad_sym_lookup(&m).unwrap().cache_rel,
"foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"
);
let m2 = SimpleModule::new("foo.pdb", debug_id);
assert_eq!(
&breakpad_sym_lookup(&m2).unwrap().cache_rel,
"foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"
);
let m3 = SimpleModule::new("foo.xyz", debug_id);
assert_eq!(
&breakpad_sym_lookup(&m3).unwrap().cache_rel,
"foo.xyz/ABCD1234ABCD1234ABCDABCD12345678a/foo.xyz.sym"
);
let m4 = SimpleModule::new("foo.xyz", debug_id);
assert_eq!(
&breakpad_sym_lookup(&m4).unwrap().cache_rel,
"foo.xyz/ABCD1234ABCD1234ABCDABCD12345678a/foo.xyz.sym"
);
let bad = SimpleModule::default();
assert!(breakpad_sym_lookup(&bad).is_none());
let bad2 = SimpleModule {
debug_file: Some("foo".to_string()),
..SimpleModule::default()
};
assert!(breakpad_sym_lookup(&bad2).is_none());
let bad3 = SimpleModule {
debug_id: Some(debug_id),
..SimpleModule::default()
};
assert!(breakpad_sym_lookup(&bad3).is_none());
}
#[tokio::test]
async fn test_relative_symbol_path_abs_paths() {
let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap();
{
let m = SimpleModule::new("/path/to/foo.bin", debug_id);
assert_eq!(
&breakpad_sym_lookup(&m).unwrap().cache_rel,
"foo.bin/ABCD1234ABCD1234ABCDABCD12345678a/foo.bin.sym"
);
}
{
let m = SimpleModule::new("c:/path/to/foo.pdb", debug_id);
assert_eq!(
&breakpad_sym_lookup(&m).unwrap().cache_rel,
"foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"
);
}
{
let m = SimpleModule::new("c:\\path\\to\\foo.pdb", debug_id);
assert_eq!(
&breakpad_sym_lookup(&m).unwrap().cache_rel,
"foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"
);
}
}
fn mksubdirs(path: &Path, dirs: &[&str]) -> Vec<PathBuf> {
dirs.iter()
.map(|dir| {
let new_path = path.join(dir);
fs::create_dir(&new_path).unwrap();
new_path
})
.collect()
}
fn write_symbol_file(path: &Path, contents: &[u8]) {
let dir = path.parent().unwrap();
if !fs::metadata(dir).ok().map_or(false, |m| m.is_dir()) {
fs::create_dir_all(dir).unwrap();
}
let mut f = File::create(path).unwrap();
f.write_all(contents).unwrap();
}
fn write_good_symbol_file(path: &Path) {
write_symbol_file(path, b"MODULE Linux x86 abcd1234 foo\n");
}
fn write_bad_symbol_file(path: &Path) {
write_symbol_file(path, b"this is not a symbol file\n");
}
#[tokio::test]
async fn test_simple_symbol_supplier() {
let t = tempfile::tempdir().unwrap();
let paths = mksubdirs(t.path(), &["one", "two"]);
let supplier = SimpleSymbolSupplier::new(paths.clone());
let bad = SimpleModule::default();
assert_eq!(
supplier.locate_symbols(&bad).await,
Err(SymbolError::NotFound)
);
for &(path, file, id, sym) in [
(
&paths[0],
"foo.pdb",
DebugId::from_str("abcd1234-0000-0000-0000-abcd12345678-a").unwrap(),
"foo.pdb/ABCD1234000000000000ABCD12345678a/foo.sym",
),
(
&paths[1],
"bar.xyz",
DebugId::from_str("ff990000-0000-0000-0000-abcd12345678-a").unwrap(),
"bar.xyz/FF990000000000000000ABCD12345678a/bar.xyz.sym",
),
]
.iter()
{
let m = SimpleModule::new(file, id);
assert_eq!(
supplier.locate_symbols(&m).await,
Err(SymbolError::NotFound)
);
write_good_symbol_file(&path.join(sym));
assert!(
matches!(supplier.locate_symbols(&m).await, Ok(_)),
"{}",
format!("Located symbols for {}", sym)
);
}
let debug_id = DebugId::from_str("ffff0000-0000-0000-0000-abcd12345678-a").unwrap();
let mal = SimpleModule::new("baz.pdb", debug_id);
let sym = "baz.pdb/FFFF0000000000000000ABCD12345678a/baz.sym";
assert_eq!(
supplier.locate_symbols(&mal).await,
Err(SymbolError::NotFound)
);
write_bad_symbol_file(&paths[0].join(sym));
let res = supplier.locate_symbols(&mal).await;
assert!(
matches!(res, Err(SymbolError::ParseError(..))),
"{}",
format!("Correctly failed to parse {}, result: {:?}", sym, res)
);
}
#[tokio::test]
async fn test_symbolizer() {
let t = tempfile::tempdir().unwrap();
let path = t.path();
let supplier = SimpleSymbolSupplier::new(vec![PathBuf::from(path)]);
let symbolizer = Symbolizer::new(supplier);
let debug_id = DebugId::from_str("abcd1234-abcd-1234-abcd-abcd12345678-a").unwrap();
let m1 = SimpleModule::new("foo.pdb", debug_id);
write_symbol_file(
&path.join("foo.pdb/ABCD1234ABCD1234ABCDABCD12345678a/foo.sym"),
b"MODULE Linux x86 ABCD1234ABCD1234ABCDABCD12345678a foo
FILE 1 foo.c
FUNC 1000 30 10 some func
1000 30 100 1
",
);
let mut f1 = SimpleFrame::with_instruction(0x1010);
symbolizer.fill_symbol(&m1, &mut f1).await.unwrap();
assert_eq!(f1.function.unwrap(), "some func");
assert_eq!(f1.function_base.unwrap(), 0x1000);
assert_eq!(f1.source_file.unwrap(), "foo.c");
assert_eq!(f1.source_line.unwrap(), 100);
assert_eq!(f1.source_line_base.unwrap(), 0x1000);
assert_eq!(
symbolizer
.get_symbol_at_address("foo.pdb", debug_id, 0x1010)
.await
.unwrap(),
"some func"
);
let debug_id = DebugId::from_str("ffff0000-0000-0000-0000-abcd12345678-a").unwrap();
let m2 = SimpleModule::new("bar.pdb", debug_id);
let mut f2 = SimpleFrame::with_instruction(0x1010);
assert!(symbolizer.fill_symbol(&m2, &mut f2).await.is_err());
assert!(f2.function.is_none());
assert!(f2.function_base.is_none());
assert!(f2.source_file.is_none());
assert!(f2.source_line.is_none());
write_symbol_file(
&path.join("bar.pdb/ffff0000000000000000ABCD12345678a/bar.sym"),
b"MODULE Linux x86 ffff0000000000000000ABCD12345678a bar
FILE 53 bar.c
FUNC 1000 30 10 another func
1000 30 7 53
",
);
assert!(symbolizer.fill_symbol(&m2, &mut f2).await.is_err());
assert!(f2.function.is_none());
assert!(f2.function_base.is_none());
assert!(f2.source_file.is_none());
assert!(f2.source_line.is_none());
assert!(symbolizer
.get_symbol_at_address("bar.pdb", debug_id, 0x1010)
.await
.is_none());
}
}