vminer_core/symbols/
mod.rs

1pub mod dwarf;
2#[cfg(feature = "std")]
3pub mod pdb;
4pub mod symbols_file;
5
6use super::VirtualAddress;
7use crate::{utils::OnceCell, ResultExt, VmError, VmResult};
8use alloc::{
9    borrow::{Cow, ToOwned},
10    boxed::Box,
11    string::{String, ToString},
12    sync::Arc,
13    vec::Vec,
14};
15use core::{fmt, ops::Range};
16use hashbrown::HashMap;
17#[cfg(not(feature = "std"))]
18use once_map::unsync::OnceMap;
19#[cfg(feature = "std")]
20use once_map::OnceMap;
21#[cfg(feature = "std")]
22use std::{fs, path};
23
24/// Demangles a symbol to a string.
25///
26/// If the symbol was not mangled or if the mangling scheme is unknown, the
27/// symbol is returned as-is.
28pub fn demangle(sym: &str) -> Cow<str> {
29    if let Ok(sym) = rustc_demangle::try_demangle(sym) {
30        return Cow::Owned(sym.to_string());
31    }
32
33    if let Ok(sym) = cpp_demangle::Symbol::new(sym) {
34        return Cow::Owned(sym.to_string());
35    }
36
37    if let Ok(sym) = msvc_demangler::demangle(sym, msvc_demangler::DemangleFlags::NAME_ONLY) {
38        return Cow::Owned(sym);
39    }
40
41    Cow::Borrowed(sym)
42}
43
44/// Demangles a symbol to a writer.
45///
46/// If the symbol was not mangled or if the mangling scheme is unknown, the
47/// symbol is written as-is.
48pub fn demangle_to<W: fmt::Write>(sym: &str, mut writer: W) -> fmt::Result {
49    if let Ok(sym) = rustc_demangle::try_demangle(sym) {
50        writer.write_fmt(format_args!("{sym}"))?;
51        return Ok(());
52    }
53
54    if let Ok(sym) = cpp_demangle::Symbol::new(sym) {
55        writer.write_fmt(format_args!("{sym}"))?;
56        return Ok(());
57    }
58
59    if let Ok(sym) = msvc_demangler::demangle(sym, msvc_demangler::DemangleFlags::NAME_ONLY) {
60        writer.write_str(&sym)?;
61        return Ok(());
62    }
63
64    writer.write_str(sym)
65}
66
67#[derive(Debug, Clone, Copy)]
68pub enum Primitive {
69    Void,
70
71    I8,
72    U8,
73    I16,
74    U16,
75    I32,
76    U32,
77    I64,
78    U64,
79}
80
81pub type Type = Arc<TypeKind>;
82
83macro_rules! lazy_types {
84    ( $( $name:ident: $init:expr; )*) => {
85        impl TypeKind {
86            $(
87                pub fn $name() -> Type {
88                    static TYPE: OnceCell<Type> = OnceCell::new();
89                    TYPE.get_or_init(|| Arc::new($init)).clone()
90                }
91            )*
92        }
93    };
94}
95#[derive(Debug, Clone)]
96pub enum TypeKind {
97    Primitive(Primitive),
98    Bitfield,
99    Array(Type, u32),
100    Function,
101    Pointer(Type),
102    Struct(String),
103    Union(String),
104    Unknown,
105}
106
107lazy_types! {
108    unknown: TypeKind::Unknown;
109    void: TypeKind::Primitive(Primitive::Void);
110    void_ptr: TypeKind::Pointer(TypeKind::void());
111    i8: TypeKind::Primitive(Primitive::I8);
112    i8_ptr: TypeKind::Pointer(TypeKind::i8());
113    u8: TypeKind::Primitive(Primitive::U8);
114    u8_ptr: TypeKind::Pointer(TypeKind::u8());
115    i16: TypeKind::Primitive(Primitive::I16);
116    i16_ptr: TypeKind::Pointer(TypeKind::i16());
117    u16: TypeKind::Primitive(Primitive::U16);
118    u16_ptr: TypeKind::Pointer(TypeKind::u16());
119    i32: TypeKind::Primitive(Primitive::I32);
120    i32_ptr: TypeKind::Pointer(TypeKind::i32());
121    u32: TypeKind::Primitive(Primitive::U32);
122    u32_ptr: TypeKind::Pointer(TypeKind::u32());
123    i64: TypeKind::Primitive(Primitive::I64);
124    i64_ptr: TypeKind::Pointer(TypeKind::i64());
125    u64: TypeKind::Primitive(Primitive::U64);
126    u64_ptr: TypeKind::Pointer(TypeKind::u64());
127}
128
129#[derive(Debug, Clone)]
130pub struct StructField {
131    pub name: String,
132    pub offset: u64,
133    pub typ: Type,
134}
135
136#[derive(Debug)]
137pub struct Struct {
138    pub size: u64,
139    pub name: String,
140    pub fields: Vec<StructField>,
141}
142
143impl Struct {
144    fn borrow(&self) -> StructRef {
145        StructRef {
146            size: self.size,
147            name: &self.name,
148            fields: &self.fields,
149        }
150    }
151}
152
153#[derive(Debug, Clone, Copy)]
154pub struct StructRef<'a> {
155    pub size: u64,
156    pub name: &'a str,
157    pub fields: &'a [StructField],
158}
159
160impl StructRef<'_> {
161    pub fn find_offset(&self, field_name: &str) -> Option<u64> {
162        self.find_field(field_name).map(|f| f.offset)
163    }
164
165    pub fn require_offset(&self, field_name: &str) -> VmResult<u64> {
166        self.find_offset(field_name)
167            .ok_or_else(|| VmError::missing_field(field_name, self.name))
168    }
169
170    pub fn find_field(&self, field_name: &str) -> Option<&StructField> {
171        self.fields.iter().find(|field| field.name == field_name)
172    }
173
174    pub fn find_offset_and_size(&self, field_name: &str) -> VmResult<(u64, u64)> {
175        let (i, field) = self
176            .fields
177            .iter()
178            .enumerate()
179            .find(|(_, field)| field.name == field_name)
180            .ok_or_else(|| VmError::missing_field(field_name, self.name))?;
181        let size = self.fields.get(i + 1).map_or(self.size, |f| f.offset) - field.offset;
182        Ok((field.offset, size))
183    }
184
185    pub fn into_owned(&self) -> Struct {
186        Struct {
187            size: self.size,
188            name: self.name.to_owned(),
189            fields: self.fields.to_owned(),
190        }
191    }
192}
193
194#[derive(Debug, Default)]
195pub struct ModuleSymbolsBuilder {
196    buffer: String,
197    symbols: Vec<(VirtualAddress, Range<usize>)>,
198    types: HashMap<String, Struct>,
199}
200
201impl ModuleSymbolsBuilder {
202    pub fn new() -> Self {
203        Self::default()
204    }
205
206    pub fn build(self) -> ModuleSymbols {
207        let buffer = self.buffer.into_boxed_str();
208
209        let mut names = self.symbols.into_boxed_slice();
210        names.sort_unstable_by_key(|(addr, _)| *addr);
211
212        let mut addresses = names.clone();
213        addresses.sort_unstable_by_key(|(_, range)| &buffer[range.clone()]);
214
215        ModuleSymbols {
216            buffer,
217            symbols: names,
218            addresses,
219            types: self.types,
220        }
221    }
222
223    pub fn push(&mut self, addr: VirtualAddress, symbol: &str) {
224        let start = self.buffer.len();
225        self.buffer.push_str(symbol);
226        let end = self.buffer.len();
227        self.symbols.push((addr, start..end))
228    }
229
230    pub fn insert_struct(&mut self, structure: Struct) {
231        self.types.insert(structure.name.clone(), structure);
232    }
233
234    #[cfg(feature = "std")]
235    pub fn read_file<P: AsRef<std::path::Path>>(&mut self, path: P) -> VmResult<()> {
236        self.read_file_inner(path.as_ref())
237    }
238
239    #[cfg(feature = "std")]
240    fn read_file_inner(&mut self, path: &std::path::Path) -> VmResult<()> {
241        let content = std::fs::read(path)?;
242        self.read_bytes(&content)
243    }
244
245    pub fn read_bytes(&mut self, content: &[u8]) -> VmResult<()> {
246        if content.starts_with(b"\x7fELF") {
247            let obj = object::File::parse(content).map_err(VmError::new)?;
248            crate::symbols::dwarf::load_types(&obj, self).map_err(VmError::new)?;
249            return Ok(());
250        }
251
252        #[cfg(feature = "std")]
253        if content.starts_with(b"Microsoft C/C++") {
254            let content = std::io::Cursor::new(content);
255            let mut pdb = ::pdb::PDB::open(content).map_err(VmError::new)?;
256
257            pdb::load_syms(&mut pdb, self).map_err(VmError::new)?;
258
259            if let Err(err) = pdb::load_types(&mut pdb, self) {
260                log::warn!("Failed to load types from PDB: {err}");
261            }
262
263            return Ok(());
264        }
265
266        symbols_file::read_from_bytes(content, self)
267    }
268}
269
270impl<S: AsRef<str>> Extend<(VirtualAddress, S)> for ModuleSymbolsBuilder {
271    fn extend<I: IntoIterator<Item = (VirtualAddress, S)>>(&mut self, iter: I) {
272        self.symbols.extend(iter.into_iter().map(|(addr, sym)| {
273            let start = self.buffer.len();
274            self.buffer.push_str(sym.as_ref());
275            let end = self.buffer.len();
276            (addr, (start..end))
277        }))
278    }
279}
280
281impl Extend<Struct> for ModuleSymbolsBuilder {
282    fn extend<I: IntoIterator<Item = Struct>>(&mut self, iter: I) {
283        self.types
284            .extend(iter.into_iter().map(|s| (s.name.clone(), s)))
285    }
286}
287
288#[derive(Default)]
289pub struct ModuleSymbols {
290    buffer: Box<str>,
291
292    /// Sorted by address to find names
293    symbols: Box<[(VirtualAddress, Range<usize>)]>,
294
295    /// Sorted by name to find addresses
296    addresses: Box<[(VirtualAddress, Range<usize>)]>,
297
298    types: HashMap<String, Struct>,
299}
300
301impl ModuleSymbols {
302    #[cfg(feature = "std")]
303    pub fn from_file<P: AsRef<std::path::Path>>(path: P) -> VmResult<Self> {
304        let mut module = ModuleSymbolsBuilder::new();
305        module.read_file_inner(path.as_ref())?;
306        Ok(module.build())
307    }
308
309    pub fn from_bytes(content: &[u8]) -> VmResult<Self> {
310        let mut module = ModuleSymbolsBuilder::new();
311        module.read_bytes(content)?;
312        Ok(module.build())
313    }
314
315    fn symbol(&self, range: Range<usize>) -> &str {
316        &self.buffer[range]
317    }
318
319    pub fn get_symbol(&self, addr: VirtualAddress) -> Option<&str> {
320        let index = self.symbols.binary_search_by_key(&addr, |(a, _)| *a).ok()?;
321        Some(self.symbol(self.symbols[index].1.clone()))
322    }
323
324    pub fn get_symbol_inexact(&self, addr: VirtualAddress) -> Option<(&str, u64)> {
325        let (range, offset) = match self.symbols.binary_search_by_key(&addr, |(a, _)| *a) {
326            Ok(i) => (&self.symbols[i].1, 0),
327            Err(i) => {
328                let i = i.checked_sub(1)?;
329                let (sym_addr, range) = &self.symbols[i];
330                (range, (addr - *sym_addr) as u64)
331            }
332        };
333        Some((self.symbol(range.clone()), offset))
334    }
335
336    pub fn get_address(&self, name: &str) -> Option<VirtualAddress> {
337        let index = self
338            .addresses
339            .binary_search_by_key(&name, |(_, range)| self.symbol(range.clone()))
340            .ok()?;
341        Some(self.addresses[index].0)
342    }
343
344    pub fn require_address(&self, name: &str) -> VmResult<VirtualAddress> {
345        self.get_address(name)
346            .ok_or_else(|| VmError::missing_symbol(name))
347    }
348
349    pub fn iter_symbols(&self) -> impl ExactSizeIterator<Item = (VirtualAddress, &str)> {
350        self.symbols
351            .iter()
352            .map(|(addr, range)| (*addr, self.symbol(range.clone())))
353    }
354
355    pub fn get_struct(&self, name: &str) -> Option<StructRef> {
356        self.types.get(name).map(|s| s.borrow())
357    }
358
359    pub fn require_struct(&self, name: &str) -> VmResult<StructRef> {
360        self.get_struct(name)
361            .ok_or_else(|| VmError::missing_symbol(name))
362    }
363}
364
365impl fmt::Debug for ModuleSymbols {
366    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
367        f.debug_map().entries(self.iter_symbols()).finish()
368    }
369}
370
371#[derive(Debug, Default)]
372pub struct SymbolsIndexer {
373    modules: OnceMap<Box<str>, Arc<Option<ModuleSymbols>>>,
374}
375
376impl SymbolsIndexer {
377    pub fn new() -> Self {
378        Self {
379            modules: OnceMap::new(),
380        }
381    }
382
383    pub fn get_addr(&self, lib: &str, name: &str) -> VmResult<VirtualAddress> {
384        self.require_module(lib)?.require_address(name)
385    }
386
387    pub fn get_module(&self, name: &str) -> Option<&ModuleSymbols> {
388        self.modules.get(name)?.as_ref()
389    }
390
391    pub fn require_module(&self, name: &str) -> VmResult<&ModuleSymbols> {
392        self.get_module(name)
393            .ok_or_else(|| VmError::missing_module(name))
394    }
395
396    pub fn load_module(
397        &self,
398        name: Box<str>,
399        f: &mut dyn FnMut(&str) -> VmResult<Arc<Option<ModuleSymbols>>>,
400    ) -> VmResult<Option<&ModuleSymbols>> {
401        let module = self.modules.try_insert(name, |name| {
402            f(name).with_context(|| alloc::format!("failed to load symbols for module \"{name}\""))
403        })?;
404        Ok(module.as_ref())
405    }
406
407    pub fn load_from_bytes(
408        &mut self,
409        name: Box<str>,
410        content: &[u8],
411    ) -> VmResult<Option<&ModuleSymbols>> {
412        self.load_module(name, &mut |_| {
413            ModuleSymbols::from_bytes(content).map(Some).map(Arc::new)
414        })
415    }
416
417    #[cfg(feature = "std")]
418    #[inline]
419    pub fn load_from_file<P: AsRef<std::path::Path>>(
420        &mut self,
421        path: P,
422    ) -> VmResult<Option<&ModuleSymbols>> {
423        self.load_from_file_inner(path.as_ref())
424    }
425
426    #[cfg(feature = "std")]
427    fn load_from_file_inner(&mut self, path: &std::path::Path) -> VmResult<Option<&ModuleSymbols>> {
428        log::debug!("Loading {}", path.display());
429        let name = path
430            .file_name()
431            .context("no file name")?
432            .to_str()
433            .context("non UTF-8 file name")?
434            .into();
435
436        self.load_module(name, &mut |_| {
437            ModuleSymbols::from_file(path).map(Some).map(Arc::new)
438        })
439    }
440
441    #[cfg(feature = "std")]
442    fn load_dir_inner(&mut self, path: &path::Path) -> VmResult<()> {
443        for entry in fs::read_dir(path)? {
444            match entry {
445                Ok(entry) => {
446                    let path = entry.path();
447                    if let Err(err) = self.load_from_file_inner(&path) {
448                        log::warn!("Error reading {}: {err}", path.display());
449                    }
450                }
451                Err(err) => {
452                    log::warn!("Failed to read directory entry: {err}")
453                }
454            };
455        }
456
457        Ok(())
458    }
459
460    /// Reads profile data from the given directory.
461    #[cfg(feature = "std")]
462    #[inline]
463    pub fn load_dir<P: AsRef<path::Path>>(&mut self, path: P) -> VmResult<()> {
464        self.load_dir_inner(path.as_ref())
465    }
466}