1pub mod dwarf;
2#[cfg(feature = "std")]
3pub mod pdb;
4pub mod symbols_file;
5
6use super::VirtualAddress;
7use crate::{utils::OnceCell, ResultExt, VmError, VmResult};
8use alloc::{
9 borrow::{Cow, ToOwned},
10 boxed::Box,
11 string::{String, ToString},
12 sync::Arc,
13 vec::Vec,
14};
15use core::{fmt, ops::Range};
16use hashbrown::HashMap;
17#[cfg(not(feature = "std"))]
18use once_map::unsync::OnceMap;
19#[cfg(feature = "std")]
20use once_map::OnceMap;
21#[cfg(feature = "std")]
22use std::{fs, path};
23
24pub fn demangle(sym: &str) -> Cow<str> {
29 if let Ok(sym) = rustc_demangle::try_demangle(sym) {
30 return Cow::Owned(sym.to_string());
31 }
32
33 if let Ok(sym) = cpp_demangle::Symbol::new(sym) {
34 return Cow::Owned(sym.to_string());
35 }
36
37 if let Ok(sym) = msvc_demangler::demangle(sym, msvc_demangler::DemangleFlags::NAME_ONLY) {
38 return Cow::Owned(sym);
39 }
40
41 Cow::Borrowed(sym)
42}
43
44pub fn demangle_to<W: fmt::Write>(sym: &str, mut writer: W) -> fmt::Result {
49 if let Ok(sym) = rustc_demangle::try_demangle(sym) {
50 writer.write_fmt(format_args!("{sym}"))?;
51 return Ok(());
52 }
53
54 if let Ok(sym) = cpp_demangle::Symbol::new(sym) {
55 writer.write_fmt(format_args!("{sym}"))?;
56 return Ok(());
57 }
58
59 if let Ok(sym) = msvc_demangler::demangle(sym, msvc_demangler::DemangleFlags::NAME_ONLY) {
60 writer.write_str(&sym)?;
61 return Ok(());
62 }
63
64 writer.write_str(sym)
65}
66
67#[derive(Debug, Clone, Copy)]
68pub enum Primitive {
69 Void,
70
71 I8,
72 U8,
73 I16,
74 U16,
75 I32,
76 U32,
77 I64,
78 U64,
79}
80
81pub type Type = Arc<TypeKind>;
82
83macro_rules! lazy_types {
84 ( $( $name:ident: $init:expr; )*) => {
85 impl TypeKind {
86 $(
87 pub fn $name() -> Type {
88 static TYPE: OnceCell<Type> = OnceCell::new();
89 TYPE.get_or_init(|| Arc::new($init)).clone()
90 }
91 )*
92 }
93 };
94}
95#[derive(Debug, Clone)]
96pub enum TypeKind {
97 Primitive(Primitive),
98 Bitfield,
99 Array(Type, u32),
100 Function,
101 Pointer(Type),
102 Struct(String),
103 Union(String),
104 Unknown,
105}
106
107lazy_types! {
108 unknown: TypeKind::Unknown;
109 void: TypeKind::Primitive(Primitive::Void);
110 void_ptr: TypeKind::Pointer(TypeKind::void());
111 i8: TypeKind::Primitive(Primitive::I8);
112 i8_ptr: TypeKind::Pointer(TypeKind::i8());
113 u8: TypeKind::Primitive(Primitive::U8);
114 u8_ptr: TypeKind::Pointer(TypeKind::u8());
115 i16: TypeKind::Primitive(Primitive::I16);
116 i16_ptr: TypeKind::Pointer(TypeKind::i16());
117 u16: TypeKind::Primitive(Primitive::U16);
118 u16_ptr: TypeKind::Pointer(TypeKind::u16());
119 i32: TypeKind::Primitive(Primitive::I32);
120 i32_ptr: TypeKind::Pointer(TypeKind::i32());
121 u32: TypeKind::Primitive(Primitive::U32);
122 u32_ptr: TypeKind::Pointer(TypeKind::u32());
123 i64: TypeKind::Primitive(Primitive::I64);
124 i64_ptr: TypeKind::Pointer(TypeKind::i64());
125 u64: TypeKind::Primitive(Primitive::U64);
126 u64_ptr: TypeKind::Pointer(TypeKind::u64());
127}
128
129#[derive(Debug, Clone)]
130pub struct StructField {
131 pub name: String,
132 pub offset: u64,
133 pub typ: Type,
134}
135
136#[derive(Debug)]
137pub struct Struct {
138 pub size: u64,
139 pub name: String,
140 pub fields: Vec<StructField>,
141}
142
143impl Struct {
144 fn borrow(&self) -> StructRef {
145 StructRef {
146 size: self.size,
147 name: &self.name,
148 fields: &self.fields,
149 }
150 }
151}
152
153#[derive(Debug, Clone, Copy)]
154pub struct StructRef<'a> {
155 pub size: u64,
156 pub name: &'a str,
157 pub fields: &'a [StructField],
158}
159
160impl StructRef<'_> {
161 pub fn find_offset(&self, field_name: &str) -> Option<u64> {
162 self.find_field(field_name).map(|f| f.offset)
163 }
164
165 pub fn require_offset(&self, field_name: &str) -> VmResult<u64> {
166 self.find_offset(field_name)
167 .ok_or_else(|| VmError::missing_field(field_name, self.name))
168 }
169
170 pub fn find_field(&self, field_name: &str) -> Option<&StructField> {
171 self.fields.iter().find(|field| field.name == field_name)
172 }
173
174 pub fn find_offset_and_size(&self, field_name: &str) -> VmResult<(u64, u64)> {
175 let (i, field) = self
176 .fields
177 .iter()
178 .enumerate()
179 .find(|(_, field)| field.name == field_name)
180 .ok_or_else(|| VmError::missing_field(field_name, self.name))?;
181 let size = self.fields.get(i + 1).map_or(self.size, |f| f.offset) - field.offset;
182 Ok((field.offset, size))
183 }
184
185 pub fn into_owned(&self) -> Struct {
186 Struct {
187 size: self.size,
188 name: self.name.to_owned(),
189 fields: self.fields.to_owned(),
190 }
191 }
192}
193
194#[derive(Debug, Default)]
195pub struct ModuleSymbolsBuilder {
196 buffer: String,
197 symbols: Vec<(VirtualAddress, Range<usize>)>,
198 types: HashMap<String, Struct>,
199}
200
201impl ModuleSymbolsBuilder {
202 pub fn new() -> Self {
203 Self::default()
204 }
205
206 pub fn build(self) -> ModuleSymbols {
207 let buffer = self.buffer.into_boxed_str();
208
209 let mut names = self.symbols.into_boxed_slice();
210 names.sort_unstable_by_key(|(addr, _)| *addr);
211
212 let mut addresses = names.clone();
213 addresses.sort_unstable_by_key(|(_, range)| &buffer[range.clone()]);
214
215 ModuleSymbols {
216 buffer,
217 symbols: names,
218 addresses,
219 types: self.types,
220 }
221 }
222
223 pub fn push(&mut self, addr: VirtualAddress, symbol: &str) {
224 let start = self.buffer.len();
225 self.buffer.push_str(symbol);
226 let end = self.buffer.len();
227 self.symbols.push((addr, start..end))
228 }
229
230 pub fn insert_struct(&mut self, structure: Struct) {
231 self.types.insert(structure.name.clone(), structure);
232 }
233
234 #[cfg(feature = "std")]
235 pub fn read_file<P: AsRef<std::path::Path>>(&mut self, path: P) -> VmResult<()> {
236 self.read_file_inner(path.as_ref())
237 }
238
239 #[cfg(feature = "std")]
240 fn read_file_inner(&mut self, path: &std::path::Path) -> VmResult<()> {
241 let content = std::fs::read(path)?;
242 self.read_bytes(&content)
243 }
244
245 pub fn read_bytes(&mut self, content: &[u8]) -> VmResult<()> {
246 if content.starts_with(b"\x7fELF") {
247 let obj = object::File::parse(content).map_err(VmError::new)?;
248 crate::symbols::dwarf::load_types(&obj, self).map_err(VmError::new)?;
249 return Ok(());
250 }
251
252 #[cfg(feature = "std")]
253 if content.starts_with(b"Microsoft C/C++") {
254 let content = std::io::Cursor::new(content);
255 let mut pdb = ::pdb::PDB::open(content).map_err(VmError::new)?;
256
257 pdb::load_syms(&mut pdb, self).map_err(VmError::new)?;
258
259 if let Err(err) = pdb::load_types(&mut pdb, self) {
260 log::warn!("Failed to load types from PDB: {err}");
261 }
262
263 return Ok(());
264 }
265
266 symbols_file::read_from_bytes(content, self)
267 }
268}
269
270impl<S: AsRef<str>> Extend<(VirtualAddress, S)> for ModuleSymbolsBuilder {
271 fn extend<I: IntoIterator<Item = (VirtualAddress, S)>>(&mut self, iter: I) {
272 self.symbols.extend(iter.into_iter().map(|(addr, sym)| {
273 let start = self.buffer.len();
274 self.buffer.push_str(sym.as_ref());
275 let end = self.buffer.len();
276 (addr, (start..end))
277 }))
278 }
279}
280
281impl Extend<Struct> for ModuleSymbolsBuilder {
282 fn extend<I: IntoIterator<Item = Struct>>(&mut self, iter: I) {
283 self.types
284 .extend(iter.into_iter().map(|s| (s.name.clone(), s)))
285 }
286}
287
288#[derive(Default)]
289pub struct ModuleSymbols {
290 buffer: Box<str>,
291
292 symbols: Box<[(VirtualAddress, Range<usize>)]>,
294
295 addresses: Box<[(VirtualAddress, Range<usize>)]>,
297
298 types: HashMap<String, Struct>,
299}
300
301impl ModuleSymbols {
302 #[cfg(feature = "std")]
303 pub fn from_file<P: AsRef<std::path::Path>>(path: P) -> VmResult<Self> {
304 let mut module = ModuleSymbolsBuilder::new();
305 module.read_file_inner(path.as_ref())?;
306 Ok(module.build())
307 }
308
309 pub fn from_bytes(content: &[u8]) -> VmResult<Self> {
310 let mut module = ModuleSymbolsBuilder::new();
311 module.read_bytes(content)?;
312 Ok(module.build())
313 }
314
315 fn symbol(&self, range: Range<usize>) -> &str {
316 &self.buffer[range]
317 }
318
319 pub fn get_symbol(&self, addr: VirtualAddress) -> Option<&str> {
320 let index = self.symbols.binary_search_by_key(&addr, |(a, _)| *a).ok()?;
321 Some(self.symbol(self.symbols[index].1.clone()))
322 }
323
324 pub fn get_symbol_inexact(&self, addr: VirtualAddress) -> Option<(&str, u64)> {
325 let (range, offset) = match self.symbols.binary_search_by_key(&addr, |(a, _)| *a) {
326 Ok(i) => (&self.symbols[i].1, 0),
327 Err(i) => {
328 let i = i.checked_sub(1)?;
329 let (sym_addr, range) = &self.symbols[i];
330 (range, (addr - *sym_addr) as u64)
331 }
332 };
333 Some((self.symbol(range.clone()), offset))
334 }
335
336 pub fn get_address(&self, name: &str) -> Option<VirtualAddress> {
337 let index = self
338 .addresses
339 .binary_search_by_key(&name, |(_, range)| self.symbol(range.clone()))
340 .ok()?;
341 Some(self.addresses[index].0)
342 }
343
344 pub fn require_address(&self, name: &str) -> VmResult<VirtualAddress> {
345 self.get_address(name)
346 .ok_or_else(|| VmError::missing_symbol(name))
347 }
348
349 pub fn iter_symbols(&self) -> impl ExactSizeIterator<Item = (VirtualAddress, &str)> {
350 self.symbols
351 .iter()
352 .map(|(addr, range)| (*addr, self.symbol(range.clone())))
353 }
354
355 pub fn get_struct(&self, name: &str) -> Option<StructRef> {
356 self.types.get(name).map(|s| s.borrow())
357 }
358
359 pub fn require_struct(&self, name: &str) -> VmResult<StructRef> {
360 self.get_struct(name)
361 .ok_or_else(|| VmError::missing_symbol(name))
362 }
363}
364
365impl fmt::Debug for ModuleSymbols {
366 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
367 f.debug_map().entries(self.iter_symbols()).finish()
368 }
369}
370
371#[derive(Debug, Default)]
372pub struct SymbolsIndexer {
373 modules: OnceMap<Box<str>, Arc<Option<ModuleSymbols>>>,
374}
375
376impl SymbolsIndexer {
377 pub fn new() -> Self {
378 Self {
379 modules: OnceMap::new(),
380 }
381 }
382
383 pub fn get_addr(&self, lib: &str, name: &str) -> VmResult<VirtualAddress> {
384 self.require_module(lib)?.require_address(name)
385 }
386
387 pub fn get_module(&self, name: &str) -> Option<&ModuleSymbols> {
388 self.modules.get(name)?.as_ref()
389 }
390
391 pub fn require_module(&self, name: &str) -> VmResult<&ModuleSymbols> {
392 self.get_module(name)
393 .ok_or_else(|| VmError::missing_module(name))
394 }
395
396 pub fn load_module(
397 &self,
398 name: Box<str>,
399 f: &mut dyn FnMut(&str) -> VmResult<Arc<Option<ModuleSymbols>>>,
400 ) -> VmResult<Option<&ModuleSymbols>> {
401 let module = self.modules.try_insert(name, |name| {
402 f(name).with_context(|| alloc::format!("failed to load symbols for module \"{name}\""))
403 })?;
404 Ok(module.as_ref())
405 }
406
407 pub fn load_from_bytes(
408 &mut self,
409 name: Box<str>,
410 content: &[u8],
411 ) -> VmResult<Option<&ModuleSymbols>> {
412 self.load_module(name, &mut |_| {
413 ModuleSymbols::from_bytes(content).map(Some).map(Arc::new)
414 })
415 }
416
417 #[cfg(feature = "std")]
418 #[inline]
419 pub fn load_from_file<P: AsRef<std::path::Path>>(
420 &mut self,
421 path: P,
422 ) -> VmResult<Option<&ModuleSymbols>> {
423 self.load_from_file_inner(path.as_ref())
424 }
425
426 #[cfg(feature = "std")]
427 fn load_from_file_inner(&mut self, path: &std::path::Path) -> VmResult<Option<&ModuleSymbols>> {
428 log::debug!("Loading {}", path.display());
429 let name = path
430 .file_name()
431 .context("no file name")?
432 .to_str()
433 .context("non UTF-8 file name")?
434 .into();
435
436 self.load_module(name, &mut |_| {
437 ModuleSymbols::from_file(path).map(Some).map(Arc::new)
438 })
439 }
440
441 #[cfg(feature = "std")]
442 fn load_dir_inner(&mut self, path: &path::Path) -> VmResult<()> {
443 for entry in fs::read_dir(path)? {
444 match entry {
445 Ok(entry) => {
446 let path = entry.path();
447 if let Err(err) = self.load_from_file_inner(&path) {
448 log::warn!("Error reading {}: {err}", path.display());
449 }
450 }
451 Err(err) => {
452 log::warn!("Failed to read directory entry: {err}")
453 }
454 };
455 }
456
457 Ok(())
458 }
459
460 #[cfg(feature = "std")]
462 #[inline]
463 pub fn load_dir<P: AsRef<path::Path>>(&mut self, path: P) -> VmResult<()> {
464 self.load_dir_inner(path.as_ref())
465 }
466}