pdb/omap.rs
1// Copyright 2018 pdb Developers
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! Utilities for translating addresses between PDB offsets and _Relative Virtual Addresses_ (RVAs).
9
10use std::cmp::{self, Ordering};
11use std::fmt;
12use std::iter::FusedIterator;
13use std::mem;
14use std::ops::Range;
15
16use crate::common::*;
17use crate::msf::Stream;
18use crate::pe::ImageSectionHeader;
19
20/// A address translation record from an `OMAPTable`.
21///
22/// This record applies to the half-open interval [ `record.source_address`,
23/// `next_record.source_address` ).
24#[repr(C)]
25#[derive(Clone, Copy, Eq, PartialEq)]
26pub(crate) struct OMAPRecord {
27 source_address: u32,
28 target_address: u32,
29}
30
31impl OMAPRecord {
32 /// Create a new OMAP record for the given mapping.
33 pub fn new(source_address: u32, target_address: u32) -> Self {
34 Self {
35 source_address: source_address.to_le(),
36 target_address: target_address.to_le(),
37 }
38 }
39
40 /// Returns the address in the source space.
41 #[inline]
42 pub fn source_address(self) -> u32 {
43 u32::from_le(self.source_address)
44 }
45
46 /// Returns the start of the mapped portion in the target address space.
47 #[inline]
48 pub fn target_address(self) -> u32 {
49 u32::from_le(self.target_address)
50 }
51
52 /// Translate the given address into the target address space.
53 #[inline]
54 fn translate(self, address: u32) -> u32 {
55 // This method is only to be used internally by the OMAP iterator and lookups. The caller
56 // must verify that the record is valid to translate an address.
57 debug_assert!(self.source_address() <= address);
58 (address - self.source_address()) + self.target_address()
59 }
60}
61
62impl fmt::Debug for OMAPRecord {
63 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64 f.debug_struct("OMAPRecord")
65 .field(
66 "source_address",
67 &format_args!("{:#010x}", self.source_address()),
68 )
69 .field(
70 "target_address",
71 &format_args!("{:#010x}", self.target_address()),
72 )
73 .finish()
74 }
75}
76
77impl PartialOrd for OMAPRecord {
78 #[inline]
79 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
80 self.source_address().partial_cmp(&other.source_address())
81 }
82}
83
84impl Ord for OMAPRecord {
85 #[inline]
86 fn cmp(&self, other: &Self) -> Ordering {
87 self.source_address().cmp(&other.source_address())
88 }
89}
90
91/// PDBs can contain OMAP tables, which translate relative virtual addresses (RVAs) from one address
92/// space into another.
93///
94/// For more information on the pratical use of OMAPs, see the [module level documentation] and
95/// [`AddressMap`]. A PDB can contain two OMAPs:
96///
97/// - `omap_from_src`: A mapping from the original address space to the transformed address space
98/// of an optimized binary. Use `PDB::omap_from_src` to obtain an instance of this OMAP. Also,
99/// `PdbInternalRva::rva` performs this conversion in a safe manner.
100/// - `omap_to_src`: A mapping from the transformed address space back into the original address
101/// space of the unoptimized binary. Use `PDB::omap_to_src` to obtain an instace of this OMAP.
102/// Also, `Rva::original_rva` performs this conversion in a safe manner.
103///
104/// # Structure
105///
106/// OMAP tables are dense arrays, sequentially storing `OMAPRecord` structs sorted by source
107/// address.
108///
109/// Each record applies to a range of addresses: i.e. record N indicates that addresses in the
110/// half-open interval [ `record[n].source_address`, `record[n+1].source_address` ) were relocated
111/// to a starting address of `record[n].target_address`. If `target_address` is zero, the `lookup()`
112/// will return None, since this indicates a non-existent location in the target address space.
113///
114/// Given that the table is sorted, lookups by source address can be efficiently serviced using a
115/// binary search directly against the underlying data without secondary data structures. This is
116/// not the most cache efficient data structure (especially given that half of each cache line is
117/// storing target addresses), but given that OMAP tables are an uncommon PDBs feature, the obvious
118/// binary search implementation seems appropriate.
119///
120/// [module level documentation]: self
121pub(crate) struct OMAPTable<'s> {
122 stream: Stream<'s>,
123}
124
125impl<'s> OMAPTable<'s> {
126 pub(crate) fn parse(stream: Stream<'s>) -> Result<Self> {
127 match cast_aligned::<OMAPRecord>(stream.as_slice()) {
128 Some(_) => Ok(OMAPTable { stream }),
129 None => Err(Error::InvalidStreamLength("OMAP")),
130 }
131 }
132
133 /// Returns a direct view onto the records stored in this OMAP table.
134 #[inline]
135 pub fn records(&self) -> &[OMAPRecord] {
136 // alignment is checked during parsing, unwrap is safe.
137 cast_aligned(self.stream.as_slice()).unwrap()
138 }
139
140 /// Look up `source_address` to yield a target address.
141 pub fn lookup(&self, source_address: u32) -> Option<u32> {
142 let records = self.records();
143
144 let index = match records.binary_search_by_key(&source_address, |r| r.source_address()) {
145 Ok(i) => i,
146 Err(0) => return None,
147 Err(i) => i - 1,
148 };
149
150 let record = records[index];
151
152 // As a special case, `target_address` can be zero, which indicates that the
153 // `source_address` does not exist in the target address space.
154 if record.target_address() == 0 {
155 return None;
156 }
157
158 Some(record.translate(source_address))
159 }
160
161 /// Look up a the range `start..end` and iterate all mapped sub-ranges.
162 pub fn lookup_range(&self, range: Range<u32>) -> RangeIter<'_> {
163 let Range { start, end } = range;
164 if end <= start {
165 return RangeIter::empty();
166 }
167
168 let records = self.records();
169 let (record, next) = match records.binary_search_by_key(&start, |r| r.source_address()) {
170 Ok(i) => (records[i], &records[i + 1..]),
171 // Insert a dummy record no indicate that the range before the first record is invalid.
172 // The range might still overlap with the first record however, so attempt regular
173 // iteration.
174 Err(0) => (OMAPRecord::new(0, 0), records),
175 Err(i) => (records[i - 1], &records[i..]),
176 };
177
178 RangeIter {
179 records: next.iter(),
180 record,
181 addr: start,
182 end,
183 }
184 }
185}
186
187impl fmt::Debug for OMAPTable<'_> {
188 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
189 f.debug_tuple("OMAPTable").field(&self.records()).finish()
190 }
191}
192
193/// An iterator over mapped target ranges in an OMAP.
194pub(crate) struct RangeIter<'t> {
195 /// Iterator over subsequent OMAP records.
196 records: std::slice::Iter<'t, OMAPRecord>,
197 /// The record that spans the current start address.
198 record: OMAPRecord,
199 /// The start address of the current subrange.
200 addr: u32,
201 /// The final end address of the (last sub-)range.
202 end: u32,
203}
204
205impl<'t> RangeIter<'t> {
206 /// Creates a `RangeIter` that does not yield any ranges.
207 pub fn empty() -> Self {
208 RangeIter {
209 records: [].iter(),
210 record: OMAPRecord::new(0, 0),
211 addr: 0,
212 end: 0,
213 }
214 }
215
216 /// Creates a `RangeIter` that only yields the specified range.
217 pub fn identity(range: Range<u32>) -> Self {
218 // Declare the range `start..` as valid with an identity mapping. We cannot use `0..` here
219 // since the target must be a non-zero value to be recognized as valid mapping. Since there
220 // are no further records, a single subrange `start..end` will be considered.
221 RangeIter {
222 records: [].iter(),
223 record: OMAPRecord::new(range.start, range.start),
224 addr: range.start,
225 end: range.end,
226 }
227 }
228}
229
230impl Default for RangeIter<'_> {
231 fn default() -> Self {
232 Self::empty()
233 }
234}
235
236impl Iterator for RangeIter<'_> {
237 type Item = Range<u32>;
238
239 fn next(&mut self) -> Option<Self::Item> {
240 while self.addr < self.end {
241 // Pull the next record from the list. Since the current record is only valid up to the
242 // next one, this will determine the end of the current sub slice. If there are no more
243 // records, create an unmapped dummy record starting at the end of the source range.
244 let next_record = match self.records.next() {
245 Some(record) => *record,
246 None => OMAPRecord::new(self.end, 0),
247 };
248
249 // Calculate the bounds of the current subrange and write it back for the next
250 // iteration. Likewise, remember the next record as address translation base.
251 let subrange_end = cmp::min(next_record.source_address(), self.end);
252 let subrange_start = mem::replace(&mut self.addr, subrange_end);
253 let last_record = mem::replace(&mut self.record, next_record);
254
255 // Check for the validity of this sub-range or skip it silently:
256 // 2. The sub range covered by the last OMAP record might be empty. This can be an
257 // artifact of a dummy record used when creating a new iterator.
258 // 3. A `target_address` of zero indicates an unmapped address range.
259 if subrange_start >= subrange_end || last_record.target_address() == 0 {
260 continue;
261 }
262
263 let translated_start = last_record.translate(subrange_start);
264 let translated_end = last_record.translate(subrange_end);
265 return Some(translated_start..translated_end);
266 }
267
268 None
269 }
270}
271
272impl FusedIterator for RangeIter<'_> {}
273
274/// Iterator over [`Rva`] ranges returned by [`AddressMap::rva_ranges`].
275pub struct RvaRangeIter<'t>(RangeIter<'t>);
276
277impl Iterator for RvaRangeIter<'_> {
278 type Item = Range<Rva>;
279
280 fn next(&mut self) -> Option<Self::Item> {
281 self.0.next().map(|range| Rva(range.start)..Rva(range.end))
282 }
283}
284
285impl FusedIterator for RvaRangeIter<'_> {}
286
287/// Iterator over [`PdbInternalRva`] ranges returned by [`AddressMap::internal_rva_ranges`].
288pub struct PdbInternalRvaRangeIter<'t>(RangeIter<'t>);
289
290impl Iterator for PdbInternalRvaRangeIter<'_> {
291 type Item = Range<PdbInternalRva>;
292
293 fn next(&mut self) -> Option<Self::Item> {
294 self.0
295 .next()
296 .map(|range| PdbInternalRva(range.start)..PdbInternalRva(range.end))
297 }
298}
299
300impl FusedIterator for PdbInternalRvaRangeIter<'_> {}
301
302/// A mapping between addresses and offsets used in the PDB and PE file.
303///
304/// To obtain an instace of this address map, call `PDB::address_map`. It will determine the correct
305/// translation mode and read all internal state from the PDB. Then use the conversion methods on
306/// the address and offset types to translate addresses.
307///
308/// # Background
309///
310/// Addresses in PDBs are stored as offsets into sections of the PE file. The `AddressMap` contains
311/// the PE's section headers to translate between the offsets and virtual addresses relative to the
312/// image base (RVAs).
313///
314/// Additionally, Microsoft has been reordering the Windows system and application binaries to
315/// optimize them for paging reduction, using a toolset reported to be derived from and/or built on
316/// top of the [Vulcan research project]. Relatively little else is known about the tools or the
317/// methods they use. Looking at Windows system binaries like `ntoskrnl.exe`, it is apparent that
318/// their layout has been rearranged, and their respective symbol files contain _OMAP_ re-mapping
319/// information. The [Microsoft Binary Technologies Projects] may be involved in this.
320///
321/// The internals of this transformation are not well understood. According to [1997 reference
322/// material]:
323///
324/// > Yet another form of debug information is relatively new and undocumented, except for a few
325/// > obscure references in `WINNT.H` and the Win32 SDK help. This type of information is known as
326/// > OMAP. Apparently, as part of Microsoft's internal build procedure, small fragments of code in
327/// > EXEs and DLLs are moved around to put the most commonly used code at the beginning of the code
328/// > section. This presumably keeps the process memory working set as small as possible. However,
329/// > when shifting around the blocks of code, the corresponding debug information isn't updated.
330/// > Instead, OMAP information is created. It lets symbol table code translate between the original
331/// > address in a symbol table and the modified address where the variable or line of code really
332/// > exists in memory.
333///
334/// # Usage
335///
336/// To aid with translating addresses and offsets, this module exposes `AddressMap`, a helper that
337/// contains all information to apply the correct translation of any kind of address or offset to
338/// another. Due to the rearranging optimizations, there are four types involved:
339///
340/// - [`Rva`]: A _Relative Virtual Address_ in the actual binary. This address directly corresponds
341/// to instruction pointers seen in stack traces and symbol addresses reported by debuggers.
342/// - [`PdbInternalRva`]: An RVA as it would have appeared before the optimization. These RVAs are
343/// used in some places and can be converted to an `Rva` in the actual address space.
344/// - [`SectionOffset`]: An offset into a section of the actual binary. A `section` member of _n_
345/// refers to section _n - 1_, which makes a section number of _0_ a null pointer.
346/// - [`PdbInternalSectionOffset`]: An offset into a section of the original binary. These offsets
347/// are used throughout the PDB and can be converted to either `SectionOffset`, or directly to
348/// `Rva` in the actual address space.
349///
350/// For binaries that have not been optimized that way, the `PdbInternal*` values are effectively
351/// equal to their regular counterparts and the conversion between the two are no-ops. Address
352/// translation still has to assume different address spaces, which is why there is no direct
353/// conversion without an `AddressMap`.
354///
355/// # Example
356///
357/// ```rust
358/// # use pdb::{Rva, FallibleIterator};
359/// #
360/// # fn test() -> pdb::Result<()> {
361/// # let source = std::fs::File::open("fixtures/self/foo.pdb")?;
362/// let mut pdb = pdb::PDB::open(source)?;
363///
364/// // Compute the address map once and reuse it
365/// let address_map = pdb.address_map()?;
366///
367/// # let symbol_table = pdb.global_symbols()?;
368/// # let symbol = symbol_table.iter().next()?.unwrap();
369/// # match symbol.parse() { Ok(pdb::SymbolData::Public(pubsym)) => {
370/// // Obtain some section offset, eg from a symbol, and convert it
371/// match pubsym.offset.to_rva(&address_map) {
372/// Some(rva) => {
373/// println!("symbol is at {}", rva);
374/// # assert_eq!(rva, Rva(26048));
375/// }
376/// None => {
377/// println!("symbol refers to eliminated code");
378/// # panic!("symbol should exist");
379/// }
380/// }
381/// # } _ => unreachable!() }
382/// # Ok(())
383/// # }
384/// # test().unwrap()
385/// ```
386///
387/// [Vulcan research project]: https://research.microsoft.com/pubs/69850/tr-2001-50.pdf
388/// [Microsoft Binary Technologies Projects]: https://microsoft.com/windows/cse/bit_projects.mspx
389/// [1997 reference material]: https://www.microsoft.com/msj/0597/hood0597.aspx
390#[derive(Debug, Default)]
391pub struct AddressMap<'s> {
392 pub(crate) original_sections: Vec<ImageSectionHeader>,
393 pub(crate) transformed_sections: Option<Vec<ImageSectionHeader>>,
394 pub(crate) transformed_to_original: Option<OMAPTable<'s>>,
395 pub(crate) original_to_transformed: Option<OMAPTable<'s>>,
396}
397
398impl<'s> AddressMap<'s> {
399 /// Resolves actual ranges in the executable's address space.
400 ///
401 /// The given internal address range might be split up into multiple ranges in the executable.
402 /// This iterator traverses all mapped ranges in the order of the PDB-internal mapping. All
403 /// empty or eliminated ranges are skipped. Thus, the iterator might be empty even for non-empty
404 /// ranges.
405 pub fn rva_ranges(&self, range: Range<PdbInternalRva>) -> RvaRangeIter<'_> {
406 RvaRangeIter(match self.original_to_transformed {
407 Some(ref omap) => omap.lookup_range(range.start.0..range.end.0),
408 None => RangeIter::identity(range.start.0..range.end.0),
409 })
410 }
411
412 /// Resolves actual ranges in the executable's address space.
413 ///
414 /// The given address range might correspond to multiple ranges in the PDB-internal address
415 /// space. This iterator traverses all mapped ranges in the order of the actual RVA mapping.
416 /// This iterator might be empty even for non-empty ranges if no corresponding original range
417 /// can be found.
418 pub fn internal_rva_ranges(&self, range: Range<Rva>) -> PdbInternalRvaRangeIter<'_> {
419 PdbInternalRvaRangeIter(match self.transformed_to_original {
420 Some(ref omap) => omap.lookup_range(range.start.0..range.end.0),
421 None => RangeIter::identity(range.start.0..range.end.0),
422 })
423 }
424}
425
426fn get_section_offset(sections: &[ImageSectionHeader], address: u32) -> Option<(u16, u32)> {
427 // Section headers are sorted by virtual_address, so we only need to iterate until we exceed
428 // the desired address. Since the number of section headers is relatively low, a sequential
429 // search is the fastest option here.
430 let (index, section) = sections
431 .iter()
432 .take_while(|s| s.virtual_address <= address)
433 .enumerate()
434 .find(|(_, s)| address < s.virtual_address + s.size_of_raw_data)?;
435
436 Some((index as u16 + 1, address - section.virtual_address))
437}
438
439fn get_virtual_address(sections: &[ImageSectionHeader], section: u16, offset: u32) -> Option<u32> {
440 (section as usize)
441 .checked_sub(1)
442 .and_then(|i| sections.get(i))
443 .map(|section| section.virtual_address + offset)
444}
445
446impl Rva {
447 /// Resolves a PDB-internal Relative Virtual Address.
448 ///
449 /// This address is not necessarily compatible with the executable's address space and should
450 /// therefore not be used for debugging purposes.
451 pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option<PdbInternalRva> {
452 match translator.transformed_to_original {
453 Some(ref omap) => omap.lookup(self.0).map(PdbInternalRva),
454 None => Some(PdbInternalRva(self.0)),
455 }
456 }
457
458 /// Resolves the section offset in the PE headers.
459 ///
460 /// This is an offset into PE section headers of the executable. To retrieve section offsets
461 /// used in the PDB, use [`to_internal_offset`](Self::to_internal_offset) instead.
462 pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option<SectionOffset> {
463 let (section, offset) = match translator.transformed_sections {
464 Some(ref sections) => get_section_offset(sections, self.0)?,
465 None => get_section_offset(&translator.original_sections, self.0)?,
466 };
467
468 Some(SectionOffset { section, offset })
469 }
470
471 /// Resolves the PDB internal section offset.
472 ///
473 /// This is the offset value used in the PDB file. To index into the actual PE section headers,
474 /// use [`to_section_offset`](Self::to_section_offset) instead.
475 pub fn to_internal_offset(
476 self,
477 translator: &AddressMap<'_>,
478 ) -> Option<PdbInternalSectionOffset> {
479 self.to_internal_rva(translator)?
480 .to_internal_offset(translator)
481 }
482}
483
484impl PdbInternalRva {
485 /// Resolves an actual Relative Virtual Address in the executable's address space.
486 pub fn to_rva(self, translator: &AddressMap<'_>) -> Option<Rva> {
487 match translator.original_to_transformed {
488 Some(ref omap) => omap.lookup(self.0).map(Rva),
489 None => Some(Rva(self.0)),
490 }
491 }
492
493 /// Resolves the section offset in the PE headers.
494 ///
495 /// This is an offset into PE section headers of the executable. To retrieve section offsets
496 /// used in the PDB, use [`to_internal_offset`](Self::to_internal_offset) instead.
497 pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option<SectionOffset> {
498 self.to_rva(translator)?.to_section_offset(translator)
499 }
500
501 /// Resolves the PDB internal section offset.
502 ///
503 /// This is the offset value used in the PDB file. To index into the actual PE section headers,
504 /// use [`to_section_offset`](Self::to_section_offset) instead.
505 pub fn to_internal_offset(
506 self,
507 translator: &AddressMap<'_>,
508 ) -> Option<PdbInternalSectionOffset> {
509 let (section, offset) = get_section_offset(&translator.original_sections, self.0)?;
510 Some(PdbInternalSectionOffset { section, offset })
511 }
512}
513
514impl SectionOffset {
515 /// Resolves an actual Relative Virtual Address in the executable's address space.
516 pub fn to_rva(self, translator: &AddressMap<'_>) -> Option<Rva> {
517 let address = match translator.transformed_sections {
518 Some(ref sections) => get_virtual_address(sections, self.section, self.offset)?,
519 None => get_virtual_address(&translator.original_sections, self.section, self.offset)?,
520 };
521
522 Some(Rva(address))
523 }
524
525 /// Resolves a PDB-internal Relative Virtual Address.
526 ///
527 /// This address is not necessarily compatible with the executable's address space and should
528 /// therefore not be used for debugging purposes.
529 pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option<PdbInternalRva> {
530 self.to_rva(translator)?.to_internal_rva(translator)
531 }
532
533 /// Resolves the PDB internal section offset.
534 pub fn to_internal_offset(
535 self,
536 translator: &AddressMap<'_>,
537 ) -> Option<PdbInternalSectionOffset> {
538 if translator.transformed_sections.is_none() {
539 // Fast path to avoid section table lookups
540 let Self { section, offset } = self;
541 return Some(PdbInternalSectionOffset { section, offset });
542 }
543
544 self.to_internal_rva(translator)?
545 .to_internal_offset(translator)
546 }
547}
548
549impl PdbInternalSectionOffset {
550 /// Resolves an actual Relative Virtual Address in the executable's address space.
551 pub fn to_rva(self, translator: &AddressMap<'_>) -> Option<Rva> {
552 self.to_internal_rva(translator)?.to_rva(translator)
553 }
554
555 /// Resolves a PDB-internal Relative Virtual Address.
556 ///
557 /// This address is not necessarily compatible with the executable's address space and should
558 /// therefore not be used for debugging purposes.
559 pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option<PdbInternalRva> {
560 get_virtual_address(&translator.original_sections, self.section, self.offset)
561 .map(PdbInternalRva)
562 }
563
564 /// Resolves the section offset in the PE headers.
565 pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option<SectionOffset> {
566 if translator.transformed_sections.is_none() {
567 // Fast path to avoid section table lookups
568 let Self { section, offset } = self;
569 return Some(SectionOffset { section, offset });
570 }
571
572 self.to_rva(translator)?.to_section_offset(translator)
573 }
574}
575
576#[cfg(test)]
577mod tests {
578 use super::*;
579
580 use std::mem;
581
582 #[test]
583 fn test_omap_record() {
584 assert_eq!(mem::size_of::<OMAPRecord>(), 8);
585 assert_eq!(mem::align_of::<OMAPRecord>(), 4);
586 }
587
588 #[test]
589 fn test_get_virtual_address() {
590 let sections = vec![ImageSectionHeader {
591 virtual_address: 0x1000_0000,
592 ..Default::default()
593 }];
594
595 assert_eq!(get_virtual_address(§ions, 1, 0x1234), Some(0x1000_1234));
596 assert_eq!(get_virtual_address(§ions, 2, 0x1234), None);
597
598 // https://github.com/willglynn/pdb/issues/87
599 assert_eq!(get_virtual_address(§ions, 0, 0x1234), None);
600 }
601}