pdb/
omap.rs

1// Copyright 2018 pdb Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! Utilities for translating addresses between PDB offsets and _Relative Virtual Addresses_ (RVAs).
9
10use std::cmp::{self, Ordering};
11use std::fmt;
12use std::iter::FusedIterator;
13use std::mem;
14use std::ops::Range;
15
16use crate::common::*;
17use crate::msf::Stream;
18use crate::pe::ImageSectionHeader;
19
20/// A address translation record from an `OMAPTable`.
21///
22/// This record applies to the half-open interval [ `record.source_address`,
23/// `next_record.source_address` ).
24#[repr(C)]
25#[derive(Clone, Copy, Eq, PartialEq)]
26pub(crate) struct OMAPRecord {
27    source_address: u32,
28    target_address: u32,
29}
30
31impl OMAPRecord {
32    /// Create a new OMAP record for the given mapping.
33    pub fn new(source_address: u32, target_address: u32) -> Self {
34        Self {
35            source_address: source_address.to_le(),
36            target_address: target_address.to_le(),
37        }
38    }
39
40    /// Returns the address in the source space.
41    #[inline]
42    pub fn source_address(self) -> u32 {
43        u32::from_le(self.source_address)
44    }
45
46    /// Returns the start of the mapped portion in the target address space.
47    #[inline]
48    pub fn target_address(self) -> u32 {
49        u32::from_le(self.target_address)
50    }
51
52    /// Translate the given address into the target address space.
53    #[inline]
54    fn translate(self, address: u32) -> u32 {
55        // This method is only to be used internally by the OMAP iterator and lookups. The caller
56        // must verify that the record is valid to translate an address.
57        debug_assert!(self.source_address() <= address);
58        (address - self.source_address()) + self.target_address()
59    }
60}
61
62impl fmt::Debug for OMAPRecord {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        f.debug_struct("OMAPRecord")
65            .field(
66                "source_address",
67                &format_args!("{:#010x}", self.source_address()),
68            )
69            .field(
70                "target_address",
71                &format_args!("{:#010x}", self.target_address()),
72            )
73            .finish()
74    }
75}
76
77impl PartialOrd for OMAPRecord {
78    #[inline]
79    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
80        self.source_address().partial_cmp(&other.source_address())
81    }
82}
83
84impl Ord for OMAPRecord {
85    #[inline]
86    fn cmp(&self, other: &Self) -> Ordering {
87        self.source_address().cmp(&other.source_address())
88    }
89}
90
91/// PDBs can contain OMAP tables, which translate relative virtual addresses (RVAs) from one address
92/// space into another.
93///
94/// For more information on the pratical use of OMAPs, see the [module level documentation] and
95/// [`AddressMap`]. A PDB can contain two OMAPs:
96///
97///  - `omap_from_src`: A mapping from the original address space to the transformed address space
98///    of an optimized binary. Use `PDB::omap_from_src` to obtain an instance of this OMAP. Also,
99///    `PdbInternalRva::rva` performs this conversion in a safe manner.
100///  - `omap_to_src`: A mapping from the transformed address space back into the original address
101///    space of the unoptimized binary. Use `PDB::omap_to_src` to obtain an instace of this OMAP.
102///    Also, `Rva::original_rva` performs this conversion in a safe manner.
103///
104/// # Structure
105///
106/// OMAP tables are dense arrays, sequentially storing `OMAPRecord` structs sorted by source
107/// address.
108///
109/// Each record applies to a range of addresses: i.e. record N indicates that addresses in the
110/// half-open interval [ `record[n].source_address`, `record[n+1].source_address` ) were relocated
111/// to a starting address of `record[n].target_address`. If `target_address` is zero, the `lookup()`
112/// will return None, since this indicates a non-existent location in the target address space.
113///
114/// Given that the table is sorted, lookups by source address can be efficiently serviced using a
115/// binary search directly against the underlying data without secondary data structures. This is
116/// not the most cache efficient data structure (especially given that half of each cache line is
117/// storing target addresses), but given that OMAP tables are an uncommon PDBs feature, the obvious
118/// binary search implementation seems appropriate.
119///
120/// [module level documentation]: self
121pub(crate) struct OMAPTable<'s> {
122    stream: Stream<'s>,
123}
124
125impl<'s> OMAPTable<'s> {
126    pub(crate) fn parse(stream: Stream<'s>) -> Result<Self> {
127        match cast_aligned::<OMAPRecord>(stream.as_slice()) {
128            Some(_) => Ok(OMAPTable { stream }),
129            None => Err(Error::InvalidStreamLength("OMAP")),
130        }
131    }
132
133    /// Returns a direct view onto the records stored in this OMAP table.
134    #[inline]
135    pub fn records(&self) -> &[OMAPRecord] {
136        // alignment is checked during parsing, unwrap is safe.
137        cast_aligned(self.stream.as_slice()).unwrap()
138    }
139
140    /// Look up `source_address` to yield a target address.
141    pub fn lookup(&self, source_address: u32) -> Option<u32> {
142        let records = self.records();
143
144        let index = match records.binary_search_by_key(&source_address, |r| r.source_address()) {
145            Ok(i) => i,
146            Err(0) => return None,
147            Err(i) => i - 1,
148        };
149
150        let record = records[index];
151
152        // As a special case, `target_address` can be zero, which indicates that the
153        // `source_address` does not exist in the target address space.
154        if record.target_address() == 0 {
155            return None;
156        }
157
158        Some(record.translate(source_address))
159    }
160
161    /// Look up a the range `start..end` and iterate all mapped sub-ranges.
162    pub fn lookup_range(&self, range: Range<u32>) -> RangeIter<'_> {
163        let Range { start, end } = range;
164        if end <= start {
165            return RangeIter::empty();
166        }
167
168        let records = self.records();
169        let (record, next) = match records.binary_search_by_key(&start, |r| r.source_address()) {
170            Ok(i) => (records[i], &records[i + 1..]),
171            // Insert a dummy record no indicate that the range before the first record is invalid.
172            // The range might still overlap with the first record however, so attempt regular
173            // iteration.
174            Err(0) => (OMAPRecord::new(0, 0), records),
175            Err(i) => (records[i - 1], &records[i..]),
176        };
177
178        RangeIter {
179            records: next.iter(),
180            record,
181            addr: start,
182            end,
183        }
184    }
185}
186
187impl fmt::Debug for OMAPTable<'_> {
188    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
189        f.debug_tuple("OMAPTable").field(&self.records()).finish()
190    }
191}
192
193/// An iterator over mapped target ranges in an OMAP.
194pub(crate) struct RangeIter<'t> {
195    /// Iterator over subsequent OMAP records.
196    records: std::slice::Iter<'t, OMAPRecord>,
197    /// The record that spans the current start address.
198    record: OMAPRecord,
199    /// The start address of the current subrange.
200    addr: u32,
201    /// The final end address of the (last sub-)range.
202    end: u32,
203}
204
205impl<'t> RangeIter<'t> {
206    /// Creates a `RangeIter` that does not yield any ranges.
207    pub fn empty() -> Self {
208        RangeIter {
209            records: [].iter(),
210            record: OMAPRecord::new(0, 0),
211            addr: 0,
212            end: 0,
213        }
214    }
215
216    /// Creates a `RangeIter` that only yields the specified range.
217    pub fn identity(range: Range<u32>) -> Self {
218        // Declare the range `start..` as valid with an identity mapping. We cannot use `0..` here
219        // since the target must be a non-zero value to be recognized as valid mapping. Since there
220        // are no further records, a single subrange `start..end` will be considered.
221        RangeIter {
222            records: [].iter(),
223            record: OMAPRecord::new(range.start, range.start),
224            addr: range.start,
225            end: range.end,
226        }
227    }
228}
229
230impl Default for RangeIter<'_> {
231    fn default() -> Self {
232        Self::empty()
233    }
234}
235
236impl Iterator for RangeIter<'_> {
237    type Item = Range<u32>;
238
239    fn next(&mut self) -> Option<Self::Item> {
240        while self.addr < self.end {
241            // Pull the next record from the list. Since the current record is only valid up to the
242            // next one, this will determine the end of the current sub slice. If there are no more
243            // records, create an unmapped dummy record starting at the end of the source range.
244            let next_record = match self.records.next() {
245                Some(record) => *record,
246                None => OMAPRecord::new(self.end, 0),
247            };
248
249            // Calculate the bounds of the current subrange and write it back for the next
250            // iteration. Likewise, remember the next record as address translation base.
251            let subrange_end = cmp::min(next_record.source_address(), self.end);
252            let subrange_start = mem::replace(&mut self.addr, subrange_end);
253            let last_record = mem::replace(&mut self.record, next_record);
254
255            // Check for the validity of this sub-range or skip it silently:
256            //  2. The sub range covered by the last OMAP record might be empty. This can be an
257            //     artifact of a dummy record used when creating a new iterator.
258            //  3. A `target_address` of zero indicates an unmapped address range.
259            if subrange_start >= subrange_end || last_record.target_address() == 0 {
260                continue;
261            }
262
263            let translated_start = last_record.translate(subrange_start);
264            let translated_end = last_record.translate(subrange_end);
265            return Some(translated_start..translated_end);
266        }
267
268        None
269    }
270}
271
272impl FusedIterator for RangeIter<'_> {}
273
274/// Iterator over [`Rva`] ranges returned by [`AddressMap::rva_ranges`].
275pub struct RvaRangeIter<'t>(RangeIter<'t>);
276
277impl Iterator for RvaRangeIter<'_> {
278    type Item = Range<Rva>;
279
280    fn next(&mut self) -> Option<Self::Item> {
281        self.0.next().map(|range| Rva(range.start)..Rva(range.end))
282    }
283}
284
285impl FusedIterator for RvaRangeIter<'_> {}
286
287/// Iterator over [`PdbInternalRva`] ranges returned by [`AddressMap::internal_rva_ranges`].
288pub struct PdbInternalRvaRangeIter<'t>(RangeIter<'t>);
289
290impl Iterator for PdbInternalRvaRangeIter<'_> {
291    type Item = Range<PdbInternalRva>;
292
293    fn next(&mut self) -> Option<Self::Item> {
294        self.0
295            .next()
296            .map(|range| PdbInternalRva(range.start)..PdbInternalRva(range.end))
297    }
298}
299
300impl FusedIterator for PdbInternalRvaRangeIter<'_> {}
301
302/// A mapping between addresses and offsets used in the PDB and PE file.
303///
304/// To obtain an instace of this address map, call `PDB::address_map`. It will determine the correct
305/// translation mode and read all internal state from the PDB. Then use the conversion methods on
306/// the address and offset types to translate addresses.
307///
308/// # Background
309///
310/// Addresses in PDBs are stored as offsets into sections of the PE file. The `AddressMap` contains
311/// the PE's section headers to translate between the offsets and virtual addresses relative to the
312/// image base (RVAs).
313///
314/// Additionally, Microsoft has been reordering the Windows system and application binaries to
315/// optimize them for paging reduction, using a toolset reported to be derived from and/or built on
316/// top of the [Vulcan research project]. Relatively little else is known about the tools or the
317/// methods they use. Looking at Windows system binaries like `ntoskrnl.exe`, it is apparent that
318/// their layout has been rearranged, and their respective symbol files contain _OMAP_ re-mapping
319/// information. The [Microsoft Binary Technologies Projects] may be involved in this.
320///
321/// The internals of this transformation are not well understood. According to [1997 reference
322/// material]:
323///
324/// > Yet another form of debug information is relatively new and undocumented, except for a few
325/// > obscure references in `WINNT.H` and the Win32 SDK help. This type of information is known as
326/// > OMAP. Apparently, as part of Microsoft's internal build procedure, small fragments of code in
327/// > EXEs and DLLs are moved around to put the most commonly used code at the beginning of the code
328/// > section. This presumably keeps the process memory working set as small as possible. However,
329/// > when shifting around the blocks of code, the corresponding debug information isn't updated.
330/// > Instead, OMAP information is created. It lets symbol table code translate between the original
331/// > address in a symbol table and the modified address where the variable or line of code really
332/// > exists in memory.
333///
334/// # Usage
335///
336/// To aid with translating addresses and offsets, this module exposes `AddressMap`, a helper that
337/// contains all information to apply the correct translation of any kind of address or offset to
338/// another. Due to the rearranging optimizations, there are four types involved:
339///
340///  - [`Rva`]: A _Relative Virtual Address_ in the actual binary. This address directly corresponds
341///    to instruction pointers seen in stack traces and symbol addresses reported by debuggers.
342///  - [`PdbInternalRva`]: An RVA as it would have appeared before the optimization. These RVAs are
343///    used in some places and can be converted to an `Rva` in the actual address space.
344///  - [`SectionOffset`]: An offset into a section of the actual binary. A `section` member of _n_
345///    refers to section _n - 1_, which makes a section number of _0_ a null pointer.
346///  - [`PdbInternalSectionOffset`]: An offset into a section of the original binary. These offsets
347///    are used throughout the PDB and can be converted to either `SectionOffset`, or directly to
348///    `Rva` in the actual address space.
349///
350/// For binaries that have not been optimized that way, the `PdbInternal*` values are effectively
351/// equal to their regular counterparts and the conversion between the two are no-ops. Address
352/// translation still has to assume different address spaces, which is why there is no direct
353/// conversion without an `AddressMap`.
354///
355/// # Example
356///
357/// ```rust
358/// # use pdb::{Rva, FallibleIterator};
359/// #
360/// # fn test() -> pdb::Result<()> {
361/// # let source = std::fs::File::open("fixtures/self/foo.pdb")?;
362/// let mut pdb = pdb::PDB::open(source)?;
363///
364/// // Compute the address map once and reuse it
365/// let address_map = pdb.address_map()?;
366///
367/// # let symbol_table = pdb.global_symbols()?;
368/// # let symbol = symbol_table.iter().next()?.unwrap();
369/// # match symbol.parse() { Ok(pdb::SymbolData::Public(pubsym)) => {
370/// // Obtain some section offset, eg from a symbol, and convert it
371/// match pubsym.offset.to_rva(&address_map) {
372///     Some(rva) => {
373///         println!("symbol is at {}", rva);
374/// #       assert_eq!(rva, Rva(26048));
375///     }
376///     None => {
377///         println!("symbol refers to eliminated code");
378/// #       panic!("symbol should exist");
379///     }
380/// }
381/// # } _ => unreachable!() }
382/// # Ok(())
383/// # }
384/// # test().unwrap()
385/// ```
386///
387/// [Vulcan research project]: https://research.microsoft.com/pubs/69850/tr-2001-50.pdf
388/// [Microsoft Binary Technologies Projects]: https://microsoft.com/windows/cse/bit_projects.mspx
389/// [1997 reference material]: https://www.microsoft.com/msj/0597/hood0597.aspx
390#[derive(Debug, Default)]
391pub struct AddressMap<'s> {
392    pub(crate) original_sections: Vec<ImageSectionHeader>,
393    pub(crate) transformed_sections: Option<Vec<ImageSectionHeader>>,
394    pub(crate) transformed_to_original: Option<OMAPTable<'s>>,
395    pub(crate) original_to_transformed: Option<OMAPTable<'s>>,
396}
397
398impl<'s> AddressMap<'s> {
399    /// Resolves actual ranges in the executable's address space.
400    ///
401    /// The given internal address range might be split up into multiple ranges in the executable.
402    /// This iterator traverses all mapped ranges in the order of the PDB-internal mapping. All
403    /// empty or eliminated ranges are skipped. Thus, the iterator might be empty even for non-empty
404    /// ranges.
405    pub fn rva_ranges(&self, range: Range<PdbInternalRva>) -> RvaRangeIter<'_> {
406        RvaRangeIter(match self.original_to_transformed {
407            Some(ref omap) => omap.lookup_range(range.start.0..range.end.0),
408            None => RangeIter::identity(range.start.0..range.end.0),
409        })
410    }
411
412    /// Resolves actual ranges in the executable's address space.
413    ///
414    /// The given address range might correspond to multiple ranges in the PDB-internal address
415    /// space. This iterator traverses all mapped ranges in the order of the actual RVA mapping.
416    /// This iterator might be empty even for non-empty ranges if no corresponding original range
417    /// can be found.
418    pub fn internal_rva_ranges(&self, range: Range<Rva>) -> PdbInternalRvaRangeIter<'_> {
419        PdbInternalRvaRangeIter(match self.transformed_to_original {
420            Some(ref omap) => omap.lookup_range(range.start.0..range.end.0),
421            None => RangeIter::identity(range.start.0..range.end.0),
422        })
423    }
424}
425
426fn get_section_offset(sections: &[ImageSectionHeader], address: u32) -> Option<(u16, u32)> {
427    // Section headers are sorted by virtual_address, so we only need to iterate until we exceed
428    // the desired address. Since the number of section headers is relatively low, a sequential
429    // search is the fastest option here.
430    let (index, section) = sections
431        .iter()
432        .take_while(|s| s.virtual_address <= address)
433        .enumerate()
434        .find(|(_, s)| address < s.virtual_address + s.size_of_raw_data)?;
435
436    Some((index as u16 + 1, address - section.virtual_address))
437}
438
439fn get_virtual_address(sections: &[ImageSectionHeader], section: u16, offset: u32) -> Option<u32> {
440    (section as usize)
441        .checked_sub(1)
442        .and_then(|i| sections.get(i))
443        .map(|section| section.virtual_address + offset)
444}
445
446impl Rva {
447    /// Resolves a PDB-internal Relative Virtual Address.
448    ///
449    /// This address is not necessarily compatible with the executable's address space and should
450    /// therefore not be used for debugging purposes.
451    pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option<PdbInternalRva> {
452        match translator.transformed_to_original {
453            Some(ref omap) => omap.lookup(self.0).map(PdbInternalRva),
454            None => Some(PdbInternalRva(self.0)),
455        }
456    }
457
458    /// Resolves the section offset in the PE headers.
459    ///
460    /// This is an offset into PE section headers of the executable. To retrieve section offsets
461    /// used in the PDB, use [`to_internal_offset`](Self::to_internal_offset) instead.
462    pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option<SectionOffset> {
463        let (section, offset) = match translator.transformed_sections {
464            Some(ref sections) => get_section_offset(sections, self.0)?,
465            None => get_section_offset(&translator.original_sections, self.0)?,
466        };
467
468        Some(SectionOffset { section, offset })
469    }
470
471    /// Resolves the PDB internal section offset.
472    ///
473    /// This is the offset value used in the PDB file. To index into the actual PE section headers,
474    /// use [`to_section_offset`](Self::to_section_offset) instead.
475    pub fn to_internal_offset(
476        self,
477        translator: &AddressMap<'_>,
478    ) -> Option<PdbInternalSectionOffset> {
479        self.to_internal_rva(translator)?
480            .to_internal_offset(translator)
481    }
482}
483
484impl PdbInternalRva {
485    /// Resolves an actual Relative Virtual Address in the executable's address space.
486    pub fn to_rva(self, translator: &AddressMap<'_>) -> Option<Rva> {
487        match translator.original_to_transformed {
488            Some(ref omap) => omap.lookup(self.0).map(Rva),
489            None => Some(Rva(self.0)),
490        }
491    }
492
493    /// Resolves the section offset in the PE headers.
494    ///
495    /// This is an offset into PE section headers of the executable. To retrieve section offsets
496    /// used in the PDB, use [`to_internal_offset`](Self::to_internal_offset) instead.
497    pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option<SectionOffset> {
498        self.to_rva(translator)?.to_section_offset(translator)
499    }
500
501    /// Resolves the PDB internal section offset.
502    ///
503    /// This is the offset value used in the PDB file. To index into the actual PE section headers,
504    /// use [`to_section_offset`](Self::to_section_offset) instead.
505    pub fn to_internal_offset(
506        self,
507        translator: &AddressMap<'_>,
508    ) -> Option<PdbInternalSectionOffset> {
509        let (section, offset) = get_section_offset(&translator.original_sections, self.0)?;
510        Some(PdbInternalSectionOffset { section, offset })
511    }
512}
513
514impl SectionOffset {
515    /// Resolves an actual Relative Virtual Address in the executable's address space.
516    pub fn to_rva(self, translator: &AddressMap<'_>) -> Option<Rva> {
517        let address = match translator.transformed_sections {
518            Some(ref sections) => get_virtual_address(sections, self.section, self.offset)?,
519            None => get_virtual_address(&translator.original_sections, self.section, self.offset)?,
520        };
521
522        Some(Rva(address))
523    }
524
525    /// Resolves a PDB-internal Relative Virtual Address.
526    ///
527    /// This address is not necessarily compatible with the executable's address space and should
528    /// therefore not be used for debugging purposes.
529    pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option<PdbInternalRva> {
530        self.to_rva(translator)?.to_internal_rva(translator)
531    }
532
533    /// Resolves the PDB internal section offset.
534    pub fn to_internal_offset(
535        self,
536        translator: &AddressMap<'_>,
537    ) -> Option<PdbInternalSectionOffset> {
538        if translator.transformed_sections.is_none() {
539            // Fast path to avoid section table lookups
540            let Self { section, offset } = self;
541            return Some(PdbInternalSectionOffset { section, offset });
542        }
543
544        self.to_internal_rva(translator)?
545            .to_internal_offset(translator)
546    }
547}
548
549impl PdbInternalSectionOffset {
550    /// Resolves an actual Relative Virtual Address in the executable's address space.
551    pub fn to_rva(self, translator: &AddressMap<'_>) -> Option<Rva> {
552        self.to_internal_rva(translator)?.to_rva(translator)
553    }
554
555    /// Resolves a PDB-internal Relative Virtual Address.
556    ///
557    /// This address is not necessarily compatible with the executable's address space and should
558    /// therefore not be used for debugging purposes.
559    pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option<PdbInternalRva> {
560        get_virtual_address(&translator.original_sections, self.section, self.offset)
561            .map(PdbInternalRva)
562    }
563
564    /// Resolves the section offset in the PE headers.
565    pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option<SectionOffset> {
566        if translator.transformed_sections.is_none() {
567            // Fast path to avoid section table lookups
568            let Self { section, offset } = self;
569            return Some(SectionOffset { section, offset });
570        }
571
572        self.to_rva(translator)?.to_section_offset(translator)
573    }
574}
575
576#[cfg(test)]
577mod tests {
578    use super::*;
579
580    use std::mem;
581
582    #[test]
583    fn test_omap_record() {
584        assert_eq!(mem::size_of::<OMAPRecord>(), 8);
585        assert_eq!(mem::align_of::<OMAPRecord>(), 4);
586    }
587
588    #[test]
589    fn test_get_virtual_address() {
590        let sections = vec![ImageSectionHeader {
591            virtual_address: 0x1000_0000,
592            ..Default::default()
593        }];
594
595        assert_eq!(get_virtual_address(&sections, 1, 0x1234), Some(0x1000_1234));
596        assert_eq!(get_virtual_address(&sections, 2, 0x1234), None);
597
598        // https://github.com/willglynn/pdb/issues/87
599        assert_eq!(get_virtual_address(&sections, 0, 0x1234), None);
600    }
601}