1use std::collections::HashMap;
5use std::fs::{self, File};
6use std::hash::{BuildHasher, Hasher};
7use std::io::{self, BufWriter, Read, Seek, Write};
8use std::path::{Path, PathBuf};
9
10use log::{debug, info, trace, warn};
11
12use crate::addr_space::AddrSpace;
13use crate::misc::{fast_hex32, fast_hex64, parse_full_name};
14use crate::modules::{Module, Modules};
15use crate::pdbcache::{
16 PdbCache, PdbCacheBuilder, PdbCacheStore, format_symcache_path, format_symsrv_url,
17};
18use crate::pe::{PdbId, Pe, PeId, SymcacheEntry};
19use crate::stats::Stats;
20use crate::{Error, Guid, Result};
21
22#[derive(Debug)]
23struct DownloadedFile {
24 path: PathBuf,
25 size: u64,
26}
27
28impl DownloadedFile {
29 fn new(path: impl AsRef<Path>, size: u64) -> Self {
30 Self {
31 path: path.as_ref().to_path_buf(),
32 size,
33 }
34 }
35}
36
37#[derive(Debug)]
43enum PdbLocationKind {
44 Local,
46 LocalCache,
48 Download(u64),
50}
51
52#[derive(Debug)]
53struct PdbLocation {
54 kind: PdbLocationKind,
55 path: PathBuf,
56}
57
58impl PdbLocation {
59 fn new(kind: PdbLocationKind, path: PathBuf) -> Self {
60 Self { kind, path }
61 }
62}
63
64#[derive(Debug)]
70enum PeLocationKind {
71 LocalCache,
73 Download(u64),
75}
76
77#[derive(Debug)]
78struct PeLocation {
79 kind: PeLocationKind,
80 pdb_id: Option<PdbId>,
81}
82
83impl PeLocation {
84 fn new(kind: PeLocationKind, pdb_id: Option<PdbId>) -> Self {
85 Self { kind, pdb_id }
86 }
87}
88
89fn download_from_symsrv<'s>(
94 symcache: impl AsRef<Path>,
95 symsrvs: impl Iterator<Item = &'s str>,
96 entry: &impl SymcacheEntry,
97) -> Result<Option<DownloadedFile>> {
98 let symcache = symcache.as_ref();
100 let entry_root_dir = symcache.join(entry.name());
101
102 let entry_dir = entry_root_dir.join(entry.index());
104
105 let entry_path = entry_dir.join(entry.name());
107
108 for symsrv in symsrvs {
110 let entry_url = format_symsrv_url(symsrv, entry);
113 debug!("trying to download {entry_url}..");
114
115 let resp = match ureq::get(&entry_url).call() {
116 Ok(o) => o,
117 Err(ureq::Error::StatusCode(404)) => {
120 warn!("got a 404 for {entry_url}");
121 continue;
122 }
123 Err(e) => {
125 return Err(Error::Download {
126 entry_url,
127 e: e.into(),
128 });
129 }
130 };
131
132 if !entry_dir.try_exists()? {
135 debug!("creating {}..", entry_dir.display());
136 fs::create_dir_all(&entry_dir).map_err(|_| {
137 Error::Other(format!("failed to create pdb dir {}", entry_dir.display()))
138 })?;
139 }
140
141 let file = File::create(&entry_path)
143 .map_err(|_| Error::Other(format!("failed to create {}", entry_path.display())))?;
144
145 let size = io::copy(
146 &mut resp.into_body().into_reader(),
147 &mut BufWriter::new(file),
148 )?;
149
150 debug!("downloaded to {}", entry_path.display());
151 return Ok(Some(DownloadedFile::new(entry_path, size)));
152 }
153
154 Ok(None)
155}
156
157fn get_pdb_id_from_symsrvs(
160 pdb_lookup: &PdbLookupConfig,
161 pe_id: &PeId,
162) -> Result<Option<PeLocation>> {
163 Ok(match pdb_lookup.symsrvs() {
164 None => {
165 None
167 }
168
169 Some(symsrvs) => {
170 struct FileAddrSpace(File);
171
172 impl FileAddrSpace {
173 fn new(path: impl AsRef<Path>) -> Result<Self> {
174 Ok(Self(File::open(path.as_ref())?))
175 }
176 }
177
178 impl AddrSpace for FileAddrSpace {
179 fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result<usize> {
180 self.0.seek(io::SeekFrom::Start(addr))?;
181
182 self.0.read(buf)
183 }
184 }
185
186 let symcache = &pdb_lookup.symcache;
187 let mut pe_path = format_symcache_path(symcache, pe_id);
188 let kind = if pe_path.exists() {
189 PeLocationKind::LocalCache
190 } else {
191 let Some(downloaded) = download_from_symsrv(symcache, symsrvs, pe_id)? else {
193 debug!("did not find {pe_id} on any symbol server");
194 return Ok(None);
195 };
196
197 pe_path = downloaded.path;
198
199 PeLocationKind::Download(downloaded.size)
200 };
201
202 debug!("trying to parse {} from disk..", pe_path.display());
203 let mut addr_space = FileAddrSpace::new(pe_path)?;
204 let pe_file = Pe::new(&mut addr_space, 0)?;
205 let pdb_id = pe_file.read_pdbid(&mut addr_space)?;
206
207 debug!("PDB id parsed from the PE: {pdb_id:?}");
208
209 Some(PeLocation::new(kind, pdb_id))
210 }
211 })
212}
213
214fn get_pdb(pdb_lookup: &PdbLookupConfig, pdb_id: &PdbId) -> Result<Option<PdbLocation>> {
216 if pdb_id.path.is_file() {
218 return Ok(Some(PdbLocation::new(
220 PdbLocationKind::Local,
221 pdb_id.path.clone(),
222 )));
223 }
224
225 let symcache = &pdb_lookup.symcache;
227 let local_path = format_symcache_path(symcache, pdb_id);
228 if local_path.is_file() {
229 return Ok(Some(PdbLocation::new(
231 PdbLocationKind::LocalCache,
232 local_path,
233 )));
234 }
235
236 Ok(match pdb_lookup.symsrvs() {
237 None => {
238 None
240 }
241 Some(symsrvs) => {
242 let downloaded_path = download_from_symsrv(symcache, symsrvs, pdb_id)?;
244
245 downloaded_path
246 .map(|file| PdbLocation::new(PdbLocationKind::Download(file.size), file.path))
247 }
248 })
249}
250
251#[derive(Default)]
257struct IdentityHasher {
258 h: u64,
259}
260
261impl Hasher for IdentityHasher {
262 fn finish(&self) -> u64 {
263 self.h
264 }
265
266 fn write(&mut self, bytes: &[u8]) {
267 debug_assert_eq!(bytes.len(), 8);
268
269 self.h = u64::from_le_bytes(bytes.try_into().unwrap());
270 }
271}
272
273impl BuildHasher for IdentityHasher {
274 type Hasher = Self;
275
276 fn build_hasher(&self) -> Self::Hasher {
277 Self::default()
278 }
279}
280
281struct SymbolizerInner<'symbolizer> {
285 stats: &'symbolizer mut Stats,
286 pdb_lookup: &'symbolizer PdbLookupConfig,
287 pdbcache_store: &'symbolizer mut PdbCacheStore,
288}
289
290impl<'symbolizer> SymbolizerInner<'symbolizer> {
291 fn new(
292 stats: &'symbolizer mut Stats,
293 pdb_lookup: &'symbolizer PdbLookupConfig,
294 pdbcache_store: &'symbolizer mut PdbCacheStore,
295 ) -> Self {
296 Self {
297 stats,
298 pdb_lookup,
299 pdbcache_store,
300 }
301 }
302
303 fn get_or_create_module_pdbcache(
304 &'symbolizer mut self,
305 addr_space: &mut impl AddrSpace,
306 module: &Module,
307 ) -> Result<&'symbolizer PdbCache> {
308 let create_pdbcache = || -> Result<PdbCache> {
309 let mut builder = PdbCacheBuilder::new(module);
310
311 let pe = Pe::new(addr_space, module.at.start)?;
314
315 builder.ingest(pe.read_exports(addr_space)?.unwrap_or_default());
317
318 let pdb_id = pe.read_pdbid(addr_space).and_then(|pdb_id| {
321 if pdb_id.is_some() {
322 return Ok(pdb_id);
323 }
324
325 let pe_id = PeId::new(&module.name, pe.timestamp, pe.size);
326 trace!("No PDB information found, trying to download PE file for {pe_id}..");
327
328 let downloaded_pe = get_pdb_id_from_symsrvs(self.pdb_lookup, &pe_id)?;
329
330 Ok(downloaded_pe.and_then(|d| {
331 if let PeLocationKind::Download(size) = d.kind {
332 self.stats.downloaded_pe(pe_id, size);
333 }
334
335 d.pdb_id
336 }))
337 })?;
338
339 if let Some(pdb_id) = pdb_id {
340 trace!("getting PDB information for {module:?}/{pdb_id}..");
341
342 if let Some(downloaded_pdb) = get_pdb(self.pdb_lookup, &pdb_id)? {
344 if let PdbLocationKind::Download(size) = downloaded_pdb.kind {
345 self.stats.downloaded_pdb(pdb_id, size);
346 }
347
348 trace!("Ingesting PDB..");
350 builder.ingest_pdb(downloaded_pdb.path)?;
351 }
352 }
353
354 let pdbcache = builder.build()?;
356
357 Ok(pdbcache)
358 };
359
360 self.pdbcache_store.get_or_create(module, create_pdbcache)
361 }
362
363 fn try_symbolize_addr_from_pdbs(
371 &'symbolizer mut self,
372 addr_space: &mut impl AddrSpace,
373 module: &Module,
374 addr: u64,
375 ) -> Result<Option<String>> {
376 trace!("symbolizing address {addr:#x} from {}..", module.name);
377
378 let pdbcache = self.get_or_create_module_pdbcache(addr_space, module)?;
380
381 let line = pdbcache.symbolize(module.rva(addr));
383
384 Ok(Some(line))
385 }
386}
387
388#[derive(Debug)]
391pub struct PdbLookupConfig {
392 symcache: PathBuf,
395 symsrvs: Option<Vec<String>>,
397}
398
399impl PdbLookupConfig {
400 fn inner_new(symcache: PathBuf, symsrvs: Option<Vec<String>>) -> Result<Self> {
401 if !symcache.is_dir() {
402 return Err(Error::Other(format!(
403 "{} directory does not exist",
404 symcache.display()
405 )));
406 }
407
408 Ok(Self { symcache, symsrvs })
409 }
410
411 pub fn new(symcache: PathBuf) -> Result<Self> {
412 Self::inner_new(symcache, None)
413 }
414
415 pub fn with_msft_symsrv(symcache: PathBuf) -> Result<Self> {
416 Self::with_symsrvs(symcache, vec![
417 "https://msdl.microsoft.com/download/symbols/".to_string(),
418 ])
419 }
420
421 pub fn with_symsrvs(symcache: PathBuf, symsrvs: Vec<String>) -> Result<Self> {
422 Self::inner_new(symcache, Some(symsrvs))
423 }
424
425 #[must_use]
426 pub fn symcache(&self) -> &Path {
427 &self.symcache
428 }
429
430 #[must_use]
431 pub fn is_offline(&self) -> bool {
432 self.symsrvs.is_none()
433 }
434
435 #[must_use]
436 pub fn is_online(&self) -> bool {
437 self.symsrvs.is_some()
438 }
439
440 fn symsrvs(&self) -> Option<impl Iterator<Item = &str>> {
441 self.symsrvs
442 .as_ref()
443 .map(|symsrvs| symsrvs.iter().map(AsRef::as_ref))
444 }
445}
446
447pub struct Symbolizer {
451 stats: Stats,
454 modules: Modules,
457 pdb_lookup: PdbLookupConfig,
459 addr_cache: HashMap<u64, Box<str>, IdentityHasher>,
462 pdbcache_store: PdbCacheStore,
465}
466
467impl Symbolizer {
468 #[must_use]
470 pub fn new(pdb_lookup: PdbLookupConfig, modules: impl IntoIterator<Item = Module>) -> Self {
471 let modules = modules.into_iter().collect();
472
473 Self {
474 stats: Stats::default(),
475 modules: Modules::new(modules),
476 pdb_lookup,
477 addr_cache: HashMap::default(),
478 pdbcache_store: PdbCacheStore::default(),
479 }
480 }
481
482 pub fn with_cache_capacity(
483 pdb_lookup: PdbLookupConfig,
484 modules: impl IntoIterator<Item = Module>,
485 cache_capacity_hint: usize,
486 ) -> Self {
487 let modules = modules.into_iter().collect();
488 let addr_cache =
489 HashMap::with_capacity_and_hasher(cache_capacity_hint, IdentityHasher::default());
490
491 Self {
492 stats: Stats::default(),
493 modules: Modules::new(modules),
494 pdb_lookup,
495 addr_cache,
496 pdbcache_store: PdbCacheStore::default(),
497 }
498 }
499
500 #[must_use]
502 pub fn stats(&self) -> &Stats {
503 &self.stats
504 }
505
506 fn try_symbolize_addr(
512 &mut self,
513 addr_space: &mut impl AddrSpace,
514 addr: u64,
515 ) -> Result<Option<&str>> {
516 use std::collections::hash_map::Entry::{Occupied, Vacant};
517 Ok(match self.addr_cache.entry(addr) {
518 Occupied(o) => {
519 self.stats.cache_hit();
520
521 Some(o.into_mut())
522 }
523 Vacant(v) => {
524 let Some(module) = self.modules.by_addr(addr) else {
525 trace!("address {addr:#x} doesn't belong to any module");
526 return Ok(None);
527 };
528
529 let mut inner = SymbolizerInner::new(
530 &mut self.stats,
531 &self.pdb_lookup,
532 &mut self.pdbcache_store,
533 );
534
535 let Some(symbol) = inner.try_symbolize_addr_from_pdbs(addr_space, module, addr)?
536 else {
537 return Ok(None);
538 };
539
540 Some(v.insert(symbol.into_boxed_str()))
541 }
542 })
543 }
544
545 pub fn symbolize_modoff(&mut self, addr: u64) -> Result<String> {
547 let mut modoff = Vec::new();
548 self.symbolize_modoff_into(addr, &mut modoff)?;
549
550 Ok(String::from_utf8(modoff)?)
551 }
552
553 pub fn symbolize_full(&mut self, addr_space: &mut impl AddrSpace, addr: u64) -> Result<String> {
555 let mut full = Vec::new();
556 self.symbolize_full_into(addr_space, addr, &mut full)?;
557
558 Ok(String::from_utf8(full)?)
559 }
560
561 pub fn symbolize_modoff_into(&mut self, addr: u64, output: &mut impl Write) -> Result<()> {
564 let mut buffer = [0; 16];
565 if let Some(module) = self.modules.by_addr(addr) {
566 output.write_all(module.name.as_bytes())?;
567 output.write_all(b"+0x")?;
568
569 output.write_all(fast_hex32(
570 &mut buffer[0..8].try_into().unwrap(),
571 module.rva(addr),
572 ))
573 } else {
574 output.write_all(b"0x")?;
575
576 output.write_all(fast_hex64(&mut buffer, addr))
577 }
578 .map_err(|_| Error::Other("failed to write symbolized value to output".to_string()))?;
579
580 self.stats.addr_symbolized();
581
582 Ok(())
583 }
584
585 pub fn symbolize_full_into(
588 &mut self,
589 addr_space: &mut impl AddrSpace,
590 addr: u64,
591 output: &mut impl Write,
592 ) -> Result<()> {
593 match self.try_symbolize_addr(addr_space, addr)? {
594 Some(sym) => {
595 output.write_all(sym.as_bytes()).map_err(|_| {
596 Error::Other("failed to write symbolized value to output".to_string())
597 })?;
598
599 self.stats.addr_symbolized();
600
601 Ok(())
602 }
603 None => self.symbolize_modoff_into(addr, output),
604 }
605 }
606
607 pub fn name_to_addr(
610 &mut self,
611 addr_space: &mut impl AddrSpace,
612 name: &str,
613 ) -> Result<Option<u64>> {
614 let Some(parsed_name) = parse_full_name(name) else {
615 return Err(Error::Other(format!("failed to parse {name}")));
616 };
617
618 let Some(module) = self.modules.by_name(parsed_name.module_name) else {
619 return Ok(None);
620 };
621
622 let mut inner =
623 SymbolizerInner::new(&mut self.stats, &self.pdb_lookup, &mut self.pdbcache_store);
624
625 let pdbcache = inner.get_or_create_module_pdbcache(addr_space, module)?;
626
627 Ok(pdbcache
628 .addr_by_name(parsed_name.function_name)
629 .map(|base_addr| u64::from(base_addr).strict_add(parsed_name.offset)))
630 }
631
632 pub fn import_pdbs(&self, dirs: impl IntoIterator<Item = impl AsRef<Path>>) -> Result<()> {
635 for dir in dirs {
636 let dir = dir.as_ref();
637 if !(dir.exists() && dir.is_dir()) {
638 return Err(Error::Other(format!(
639 "cannot import pdb from {} as it doesn't exist or isn't a directory",
640 dir.display()
641 )));
642 }
643
644 for file in dir.read_dir()? {
645 let path = file?.path();
646 if !path.is_file() {
647 debug!("skipping {} because not a file", path.display());
648 continue;
649 }
650
651 let Some(ext) = path.extension() else {
652 debug!(
653 "skipping {} because doesn't have an extension",
654 path.display()
655 );
656 continue;
657 };
658
659 if ext != "pdb" {
660 debug!("skipping {} because not a pdb file", path.display());
661 continue;
662 }
663
664 let Some(filename) = path.file_name() else {
665 debug!("skipping {} because no filename", path.display());
666 continue;
667 };
668
669 let mut pdb = pdb2::PDB::open(File::open(&path)?)?;
670 let info = pdb.pdb_information()?;
671 let debug_info = pdb.debug_information()?;
672 let Some(age) = debug_info.age() else {
673 debug!("skipping {} because no age in debug info", path.display());
674 continue;
675 };
676
677 let pdbid = PdbId::new(filename, Guid::from(info.guid.to_bytes_le()), age)?;
678 let cached_pdb = format_symcache_path(self.pdb_lookup.symcache(), &pdbid);
679 if cached_pdb.exists() {
680 debug!(
681 "skipping {} because already in symbol cache",
682 path.display()
683 );
684 continue;
685 }
686
687 let Some(cached_pdb_dir) = cached_pdb.parent() else {
688 return Err(Error::Other(format!(
689 "{} has no parent",
690 cached_pdb.display()
691 )));
692 };
693
694 info!(
695 "copying {} into the symbol cache at {}",
696 path.display(),
697 cached_pdb.display()
698 );
699 fs::create_dir_all(cached_pdb_dir)?;
700 fs::copy(path, cached_pdb)?;
701 }
702 }
703
704 Ok(())
705 }
706}