ddbug_parser/file/
mod.rs

1use std::borrow::Cow;
2use std::default::Default;
3use std::fs;
4use std::mem;
5use std::sync::Mutex;
6
7mod dwarf;
8
9use fnv::FnvHashMap as HashMap;
10use object::{self, Object, ObjectSection, ObjectSegment, ObjectSymbol, ObjectSymbolTable};
11
12use crate::cfi::Cfi;
13use crate::function::{Function, FunctionDetails, FunctionOffset};
14use crate::location::Register;
15use crate::range::{Range, RangeList};
16use crate::types::{Enumerator, Type, TypeOffset};
17use crate::unit::Unit;
18use crate::variable::Variable;
19use crate::{Address, Result, Size};
20
21pub(crate) enum DebugInfo<'input, Endian>
22where
23    Endian: gimli::Endianity + 'input,
24{
25    Dwarf(dwarf::DwarfDebugInfo<'input, Endian>),
26}
27
28impl<'input, Endian> DebugInfo<'input, Endian>
29where
30    Endian: gimli::Endianity + 'input,
31{
32    fn get_type(&self, offset: TypeOffset) -> Option<Type<'input>> {
33        match self {
34            DebugInfo::Dwarf(dwarf) => dwarf.get_type(offset),
35        }
36    }
37
38    fn get_enumerators(&self, offset: TypeOffset) -> Vec<Enumerator<'input>> {
39        match self {
40            DebugInfo::Dwarf(dwarf) => dwarf.get_enumerators(offset),
41        }
42    }
43
44    fn get_function_details(
45        &self,
46        offset: FunctionOffset,
47        hash: &FileHash<'input>,
48    ) -> Option<FunctionDetails<'input>> {
49        match self {
50            DebugInfo::Dwarf(dwarf) => dwarf.get_function_details(offset, hash),
51        }
52    }
53
54    fn get_cfi(&self, range: Range) -> Vec<Cfi> {
55        match self {
56            DebugInfo::Dwarf(dwarf) => dwarf.get_cfi(range),
57        }
58    }
59
60    fn get_register_name(&self, machine: Architecture, register: Register) -> Option<&'static str> {
61        match self {
62            DebugInfo::Dwarf(dwarf) => dwarf.get_register_name(machine, register),
63        }
64    }
65}
66
67pub(crate) struct Arena {
68    // TODO: can these be a single `Vec<Box<dyn ??>>`?
69    buffers: Mutex<Vec<Vec<u8>>>,
70    strings: Mutex<Vec<String>>,
71    #[allow(clippy::vec_box)]
72    relocations: Mutex<Vec<Box<dwarf::RelocationMap>>>,
73}
74
75impl Arena {
76    fn new() -> Self {
77        Arena {
78            buffers: Mutex::new(Vec::new()),
79            strings: Mutex::new(Vec::new()),
80            relocations: Mutex::new(Vec::new()),
81        }
82    }
83
84    fn add_buffer<'input>(&'input self, bytes: Vec<u8>) -> &'input [u8] {
85        let mut buffers = self.buffers.lock().unwrap();
86        let i = buffers.len();
87        buffers.push(bytes);
88        let b = &buffers[i];
89        unsafe { mem::transmute::<&[u8], &'input [u8]>(b) }
90    }
91
92    fn add_string<'input>(&'input self, bytes: &'input [u8]) -> &'input str {
93        // FIXME: this is effectively leaking strings that require lossy conversion,
94        // fix by avoiding duplicates
95        match String::from_utf8_lossy(bytes) {
96            Cow::Borrowed(s) => s,
97            Cow::Owned(s) => {
98                let mut strings = self.strings.lock().unwrap();
99                let i = strings.len();
100                strings.push(s);
101                let s = &strings[i];
102                unsafe { mem::transmute::<&str, &'input str>(s) }
103            }
104        }
105    }
106
107    fn add_relocations<'input>(
108        &'input self,
109        entry: Box<dwarf::RelocationMap>,
110    ) -> &'input dwarf::RelocationMap {
111        let mut relocations = self.relocations.lock().unwrap();
112        let i = relocations.len();
113        relocations.push(entry);
114        let entry = &relocations[i];
115        unsafe { mem::transmute::<&dwarf::RelocationMap, &'input dwarf::RelocationMap>(entry) }
116    }
117}
118
119pub use object::Architecture;
120
121/// The context needed for a parsed file.
122///
123/// The parsed file references the context, so it is included here as well.
124pub struct FileContext {
125    // Self-referential, not actually `static.
126    file: File<'static>,
127    _map: memmap2::Mmap,
128    _arena: Box<Arena>,
129}
130
131impl FileContext {
132    fn new<F>(map: memmap2::Mmap, f: F) -> Result<FileContext>
133    where
134        F: for<'a> FnOnce(&'a [u8], &'a Arena) -> Result<File<'a>>,
135    {
136        let arena = Box::new(Arena::new());
137        let file = f(&map, &arena)?;
138        Ok(FileContext {
139            // `file` only borrows from `map` and `arena`, which we are preserving
140            // without moving.
141            file: unsafe { mem::transmute::<File<'_>, File<'static>>(file) },
142            _map: map,
143            _arena: arena,
144        })
145    }
146
147    /// Return the parsed debuginfo for the file.
148    pub fn file<'a>(&'a self) -> &'a File<'a> {
149        unsafe { mem::transmute::<&'a File<'static>, &'a File<'a>>(&self.file) }
150    }
151}
152
153/// The parsed debuginfo for a single file.
154pub struct File<'input> {
155    pub(crate) path: String,
156    pub(crate) machine: Architecture,
157    pub(crate) segments: Vec<Segment<'input>>,
158    pub(crate) sections: Vec<Section<'input>>,
159    pub(crate) symbols: Vec<Symbol<'input>>,
160    pub(crate) relocations: Vec<Relocation<'input>>,
161    pub(crate) units: Vec<Unit<'input>>,
162    debug_info: DebugInfo<'input, gimli::RunTimeEndian>,
163}
164
165impl<'input> File<'input> {
166    pub(crate) fn get_type(&self, offset: TypeOffset) -> Option<Type<'input>> {
167        self.debug_info.get_type(offset)
168    }
169
170    pub(crate) fn get_enumerators(&self, offset: TypeOffset) -> Vec<Enumerator<'input>> {
171        self.debug_info.get_enumerators(offset)
172    }
173
174    pub(crate) fn get_function_details(
175        &self,
176        offset: FunctionOffset,
177        hash: &FileHash<'input>,
178    ) -> FunctionDetails<'input> {
179        self.debug_info
180            .get_function_details(offset, hash)
181            .unwrap_or_default()
182    }
183
184    pub(crate) fn get_register_name(&self, register: Register) -> Option<&'static str> {
185        self.debug_info.get_register_name(self.machine, register)
186    }
187
188    /// Parse the file with the given path.
189    pub fn parse(path: String) -> Result<FileContext> {
190        let handle = match fs::File::open(&path) {
191            Ok(handle) => handle,
192            Err(e) => {
193                return Err(format!("open failed: {}", e).into());
194            }
195        };
196
197        let map = match unsafe { memmap2::Mmap::map(&handle) } {
198            Ok(map) => map,
199            Err(e) => {
200                return Err(format!("memmap failed: {}", e).into());
201            }
202        };
203
204        // TODO: split DWARF
205        // TODO: PDB
206        FileContext::new(map, |data, strings| {
207            let object = object::File::parse(data)?;
208            File::parse_object(&object, &object, path, strings)
209        })
210    }
211
212    fn parse_object(
213        object: &object::File<'input>,
214        debug_object: &object::File<'input>,
215        path: String,
216        arena: &'input Arena,
217    ) -> Result<File<'input>> {
218        let machine = object.architecture();
219        let mut segments = Vec::new();
220        for segment in object.segments() {
221            if let Ok(bytes) = segment.data() {
222                segments.push(Segment {
223                    address: segment.address(),
224                    bytes,
225                });
226            }
227        }
228
229        let mut sections = Vec::new();
230        for section in object.sections() {
231            let name = Some(section.name()?).map(|x| Cow::Owned(x.to_string()));
232            let segment = section.segment_name()?.map(|x| Cow::Owned(x.to_string()));
233            let address = if section.address() != 0 {
234                Some(section.address())
235            } else {
236                None
237            };
238            let size = section.size();
239            if size != 0 {
240                sections.push(Section {
241                    name,
242                    segment,
243                    address,
244                    size,
245                });
246            }
247        }
248
249        // TODO: symbols from debug_object too?
250        let mut symbols = Vec::new();
251        for symbol in object.symbols() {
252            // TODO: handle relocatable objects
253            let address = symbol.address();
254            if address == 0 {
255                continue;
256            }
257
258            let size = symbol.size();
259            if size == 0 {
260                continue;
261            }
262
263            // TODO: handle SymbolKind::File
264            let kind = match symbol.kind() {
265                object::SymbolKind::Text => SymbolKind::Function,
266                object::SymbolKind::Data | object::SymbolKind::Unknown => SymbolKind::Variable,
267                _ => continue,
268            };
269
270            let name = Some(symbol.name()?);
271
272            symbols.push(Symbol {
273                name,
274                kind,
275                address,
276                size,
277            });
278        }
279
280        let mut relocations = Vec::new();
281        if let (Some(dynamic_symbols), Some(dynamic_relocations)) =
282            (object.dynamic_symbol_table(), object.dynamic_relocations())
283        {
284            for (address, relocation) in dynamic_relocations {
285                let size = relocation.size();
286                match relocation.target() {
287                    object::RelocationTarget::Symbol(index) => {
288                        if let Ok(symbol) = dynamic_symbols.symbol_by_index(index) {
289                            relocations.push(Relocation {
290                                address,
291                                size,
292                                symbol: symbol.name()?,
293                            });
294                        }
295                    }
296                    _ => {}
297                }
298            }
299        }
300
301        let endian = if debug_object.is_little_endian() {
302            gimli::RunTimeEndian::Little
303        } else {
304            gimli::RunTimeEndian::Big
305        };
306
307        let (units, debug_info) = dwarf::parse(endian, debug_object, arena)?;
308        let mut file = File {
309            path,
310            machine,
311            segments,
312            sections,
313            symbols,
314            relocations,
315            units,
316            debug_info,
317        };
318        file.normalize();
319        Ok(file)
320    }
321
322    fn normalize(&mut self) {
323        self.symbols.sort_by(|a, b| a.address.cmp(&b.address));
324        let mut used_symbols = vec![false; self.symbols.len()];
325
326        // Set symbol names on functions/variables.
327        for unit in &mut self.units {
328            for function in &mut unit.functions {
329                if let Some(address) = function.address() {
330                    if let Some(symbol) = Self::get_symbol(
331                        &self.symbols,
332                        &mut used_symbols,
333                        address,
334                        function.linkage_name().or_else(|| function.name()),
335                    ) {
336                        function.symbol_name = symbol.name;
337                    }
338                    // If there are multiple ranges for the function,
339                    // mark any symbols for the remaining ranges as used.
340                    // TODO: change `Function::symbol_name` to a list instead?
341                    for range in function.ranges().iter().skip(1) {
342                        Self::get_symbol(&self.symbols, &mut used_symbols, range.begin, None);
343                    }
344                }
345            }
346
347            for variable in &mut unit.variables {
348                if let Some(address) = variable.address() {
349                    if let Some(symbol) = Self::get_symbol(
350                        &self.symbols,
351                        &mut used_symbols,
352                        address,
353                        variable.linkage_name().or_else(|| variable.name()),
354                    ) {
355                        variable.symbol_name = symbol.name;
356                    }
357                }
358            }
359        }
360
361        // Create a unit for symbols that don't have debuginfo.
362        let mut unit = Unit {
363            name: Some(Cow::Borrowed("<symtab>")),
364            ..Default::default()
365        };
366        for (symbol, used) in self.symbols.iter().zip(used_symbols.iter()) {
367            if *used {
368                continue;
369            }
370            unit.ranges.push(Range {
371                begin: symbol.address,
372                end: symbol.address + symbol.size,
373            });
374            match symbol.kind() {
375                SymbolKind::Variable => {
376                    unit.variables.push(Variable {
377                        name: symbol.name,
378                        linkage_name: symbol.name,
379                        address: Address::new(symbol.address),
380                        size: Size::new(symbol.size),
381                        ..Default::default()
382                    });
383                }
384                SymbolKind::Function => {
385                    let mut ranges = Vec::new();
386                    if symbol.size > 0 {
387                        ranges.push(Range {
388                            begin: symbol.address,
389                            end: symbol.address + symbol.size,
390                        });
391                    }
392                    unit.functions.push(Function {
393                        name: symbol.name,
394                        linkage_name: symbol.name,
395                        address: Address::new(symbol.address),
396                        size: Size::new(symbol.size),
397                        ranges,
398                        ..Default::default()
399                    });
400                }
401            }
402        }
403        unit.ranges.sort();
404        self.units.push(unit);
405
406        // Create a unit for all remaining address ranges.
407        let unit = Unit {
408            name: Some(Cow::Borrowed("<unknown>")),
409            ranges: self.unknown_ranges(),
410            ..Default::default()
411        };
412        self.units.push(unit);
413    }
414
415    // Determine if the symbol at the given address has the given name.
416    // There may be multiple symbols for the same address.
417    // If none match the given name, then return the first one.
418    fn get_symbol<'sym>(
419        symbols: &'sym [Symbol<'input>],
420        used_symbols: &mut [bool],
421        address: u64,
422        name: Option<&str>,
423    ) -> Option<&'sym Symbol<'input>> {
424        if let Ok(mut index) = symbols.binary_search_by(|x| x.address.cmp(&address)) {
425            while index > 0 && symbols[index - 1].address == address {
426                index -= 1;
427            }
428            let mut found = false;
429            for (symbol, used_symbol) in symbols[index..]
430                .iter()
431                .zip(used_symbols[index..].iter_mut())
432            {
433                if symbol.address != address {
434                    break;
435                }
436                *used_symbol = true;
437                if symbol.name() == name {
438                    found = true;
439                }
440            }
441            if found {
442                None
443            } else {
444                Some(&symbols[index])
445            }
446        } else {
447            None
448        }
449    }
450
451    /// The file path.
452    #[inline]
453    pub fn path(&self) -> &str {
454        &self.path
455    }
456
457    /// The machine type that the file contains debuginfo for.
458    #[inline]
459    pub fn machine(&self) -> Architecture {
460        self.machine
461    }
462
463    /// Find the segment data for the given address range.
464    pub fn segment_bytes(&self, range: Range) -> Option<&'input [u8]> {
465        for segment in &self.segments {
466            if range.begin >= segment.address
467                && range.end <= segment.address + segment.bytes.len() as u64
468            {
469                let begin = (range.begin - segment.address) as usize;
470                let len = (range.end - range.begin) as usize;
471                return Some(&segment.bytes[begin..][..len]);
472            }
473        }
474        None
475    }
476
477    /// A list of segments in the file.
478    #[inline]
479    pub fn segments(&self) -> &[Segment<'input>] {
480        &self.segments
481    }
482
483    /// A list of sections in the file.
484    #[inline]
485    pub fn sections(&self) -> &[Section<'input>] {
486        &self.sections
487    }
488
489    /// A list of symbols in the file.
490    #[inline]
491    pub fn symbols(&self) -> &[Symbol<'input>] {
492        &self.symbols
493    }
494
495    /// A list of relocations in the file.
496    #[inline]
497    pub fn relocations(&self) -> &[Relocation<'input>] {
498        &self.relocations
499    }
500
501    /// A list of compilation units in the file.
502    #[inline]
503    pub fn units(&self) -> &[Unit<'input>] {
504        &self.units
505    }
506
507    /// A list of address ranges covered by the compilation units.
508    ///
509    /// This includes both `Unit::ranges` and `Unit::unknown_ranges`.
510    pub fn ranges(&self, hash: &FileHash) -> RangeList {
511        let mut ranges = RangeList::default();
512        for unit in &self.units {
513            for range in unit.ranges(hash).list() {
514                ranges.push(*range);
515            }
516            for range in unit.unknown_ranges(hash).list() {
517                ranges.push(*range);
518            }
519        }
520        ranges.sort();
521        ranges
522    }
523
524    // Used to create <unknown> unit. After creation of that unit
525    // this will return an empty range list.
526    fn unknown_ranges(&self) -> RangeList {
527        // FIXME: don't create this hash twice
528        let hash = FileHash::new(self);
529        let unit_ranges = self.ranges(&hash);
530
531        let mut ranges = RangeList::default();
532        for section in &self.sections {
533            if let Some(range) = section.address() {
534                ranges.push(range);
535            }
536        }
537        ranges.sort();
538        ranges.subtract(&unit_ranges)
539    }
540
541    /// The total size of functions in all compilation units.
542    pub fn function_size(&self) -> u64 {
543        let mut size = 0;
544        for unit in &self.units {
545            size += unit.function_size();
546        }
547        size
548    }
549
550    /// The total size of variables in all compilation units.
551    pub fn variable_size(&self, hash: &FileHash) -> u64 {
552        let mut size = 0;
553        for unit in &self.units {
554            size += unit.variable_size(hash);
555        }
556        size
557    }
558
559    /// Call frame information for the given address range.
560    pub fn cfi(&self, range: Range) -> Vec<Cfi> {
561        self.debug_info.get_cfi(range)
562    }
563}
564
565/// An index of functions and types within a file.
566pub struct FileHash<'input> {
567    /// The file being indexed.
568    pub file: &'input File<'input>,
569    /// All functions by address.
570    pub functions_by_address: HashMap<u64, &'input Function<'input>>,
571    /// All functions by offset.
572    pub functions_by_offset: HashMap<FunctionOffset, &'input Function<'input>>,
573    /// All variables by address.
574    pub variables_by_address: HashMap<u64, &'input Variable<'input>>,
575    /// All types by offset.
576    pub types: HashMap<TypeOffset, &'input Type<'input>>,
577    // The type corresponding to `TypeOffset::none()`.
578    pub(crate) void: Type<'input>,
579}
580
581impl<'input> FileHash<'input> {
582    /// Create a new `FileHash` for the given `File`.
583    pub fn new(file: &'input File<'input>) -> Self {
584        FileHash {
585            file,
586            functions_by_address: FileHash::functions_by_address(file),
587            functions_by_offset: FileHash::functions_by_offset(file),
588            variables_by_address: FileHash::variables_by_address(file),
589            types: FileHash::types(file),
590            void: Type::void(),
591        }
592    }
593
594    /// Returns a map from address to function for all functions in the file.
595    fn functions_by_address<'a>(file: &'a File<'input>) -> HashMap<u64, &'a Function<'input>> {
596        let mut functions = HashMap::default();
597        for unit in &file.units {
598            for function in &unit.functions {
599                if let Some(address) = function.address() {
600                    // TODO: handle duplicate addresses
601                    functions.insert(address, function);
602                }
603            }
604        }
605        functions
606    }
607
608    /// Returns a map from offset to function for all functions in the file.
609    fn functions_by_offset<'a>(
610        file: &'a File<'input>,
611    ) -> HashMap<FunctionOffset, &'a Function<'input>> {
612        let mut functions = HashMap::default();
613        for unit in &file.units {
614            for function in &unit.functions {
615                functions.insert(function.offset, function);
616            }
617        }
618        functions
619    }
620
621    /// Returns a map from address to function for all functions in the file.
622    fn variables_by_address<'a>(file: &'a File<'input>) -> HashMap<u64, &'a Variable<'input>> {
623        let mut variables = HashMap::default();
624        for unit in &file.units {
625            for variable in &unit.variables {
626                if let Some(address) = variable.address() {
627                    // TODO: handle duplicate addresses
628                    variables.insert(address, variable);
629                }
630            }
631        }
632        variables
633    }
634
635    /// Returns a map from offset to type for all types in the file.
636    fn types<'a>(file: &'a File<'input>) -> HashMap<TypeOffset, &'a Type<'input>> {
637        let mut types = HashMap::default();
638        for unit in &file.units {
639            for ty in &unit.types {
640                types.insert(ty.offset, ty);
641            }
642        }
643        types
644    }
645}
646
647/// A loadable range of bytes.
648#[derive(Debug)]
649pub struct Segment<'input> {
650    /// The address that the bytes should be loaded at.
651    pub address: u64,
652    /// The bytes, which may be code or data.
653    pub bytes: &'input [u8],
654}
655
656/// A named section.
657#[derive(Debug)]
658pub struct Section<'input> {
659    pub(crate) name: Option<Cow<'input, str>>,
660    pub(crate) segment: Option<Cow<'input, str>>,
661    pub(crate) address: Option<u64>,
662    pub(crate) size: u64,
663}
664
665impl<'input> Section<'input> {
666    /// The name of this section.
667    pub fn name(&self) -> Option<&str> {
668        self.name.as_deref()
669    }
670
671    /// The name of the segment containing this section, if applicable.
672    pub fn segment(&self) -> Option<&str> {
673        self.segment.as_deref()
674    }
675
676    /// The address range covered by this section if it is loadable.
677    pub fn address(&self) -> Option<Range> {
678        self.address.map(|address| Range {
679            begin: address,
680            end: address + self.size,
681        })
682    }
683
684    /// The size of the section.
685    #[inline]
686    pub fn size(&self) -> u64 {
687        self.size
688    }
689}
690
691/// A symbol kind.
692#[derive(Debug, Clone, Copy)]
693pub enum SymbolKind {
694    /// The symbol is a variable.
695    Variable,
696    /// The symbol is a function.
697    Function,
698}
699
700/// A symbol.
701#[derive(Debug, Clone)]
702pub struct Symbol<'input> {
703    pub(crate) name: Option<&'input str>,
704    pub(crate) kind: SymbolKind,
705    pub(crate) address: u64,
706    pub(crate) size: u64,
707}
708
709impl<'input> Symbol<'input> {
710    /// The symbol name.
711    #[inline]
712    pub fn name(&self) -> Option<&str> {
713        self.name
714    }
715
716    /// The symbol kind.
717    #[inline]
718    pub fn kind(&self) -> SymbolKind {
719        self.kind
720    }
721
722    /// The symbol address range.
723    #[inline]
724    pub fn address(&self) -> Range {
725        Range {
726            begin: self.address,
727            end: self.address + self.size,
728        }
729    }
730
731    /// The symbol size range.
732    #[inline]
733    pub fn size(&self) -> u64 {
734        self.size
735    }
736}
737
738/// A relocation.
739#[derive(Debug, Clone)]
740pub struct Relocation<'input> {
741    pub(crate) address: u64,
742    pub(crate) size: u8,
743    pub(crate) symbol: &'input str,
744}
745
746impl<'input> Relocation<'input> {
747    /// The relocation address.
748    #[inline]
749    pub fn address(&self) -> u64 {
750        self.address
751    }
752
753    /// The relocation size.
754    #[inline]
755    pub fn size(&self) -> u8 {
756        self.size
757    }
758
759    /// The name of the symbol referenced by the relocation.
760    #[inline]
761    pub fn symbol(&self) -> &'input str {
762        self.symbol
763    }
764}