Skip to main content

ud_emulator/pe/
mod.rs

1//! PE32 loader — parses Microsoft Portable Executable images,
2//! maps them into the emulator MMU, applies base relocations,
3//! resolves imports against the [`crate::win32::Registry`], and
4//! exposes export-by-name lookup.
5//!
6//! Reference: Microsoft "PE Format" (a.k.a. "Microsoft PE and
7//! COFF Specification"), revision 11.0 (2022-08-26). All struct
8//! field names and offsets in this module match that document.
9//!
10//! Supported subset (per design doc §"The PE loader"):
11//!
12//! * PE32 only. PE32+ (`Magic == 0x20B`) → reject.
13//! * `IMAGE_FILE_MACHINE_I386` only.
14//! * No .NET CLR. `IMAGE_DIRECTORY_ENTRY_COMHEADER` non-zero →
15//!   reject.
16//! * No delay-load imports. The directory entry must be empty.
17//! * No SxS manifest dependencies. (We don't enforce — codecs
18//!   never have one.)
19//!
20//! Reject-paths return [`PeError`]; everything else surfaces as
21//! a well-formed [`Image`].
22
23pub mod exports;
24pub mod header;
25pub mod imports;
26pub mod reloc;
27pub mod sections;
28
29use std::collections::BTreeMap;
30
31use crate::emulator::{mmu::Mmu, Trap};
32use crate::win32::{HostState, Registry};
33
34/// PE-loader-specific error variants.
35#[derive(Debug, Clone, PartialEq, Eq)]
36pub enum PeError {
37    /// File too small to contain a DOS header.
38    TooSmall { got: usize, need: usize },
39    /// "MZ" signature missing at offset 0.
40    NotMz,
41    /// `e_lfanew` points outside the file.
42    BadELfanew { offset: u32, file_len: usize },
43    /// "PE\0\0" signature missing at `e_lfanew`.
44    NotPe,
45    /// Optional-header magic indicates PE32+ (64-bit).
46    Pe32PlusUnsupported,
47    /// Optional-header magic is neither PE32 nor PE32+.
48    BadOptionalHeaderMagic { magic: u16 },
49    /// File-header machine field is not `IMAGE_FILE_MACHINE_I386`.
50    UnsupportedMachine { machine: u16 },
51    /// A directory entry refers to bytes outside the image.
52    DirectoryOutOfRange {
53        name: &'static str,
54        rva: u32,
55        size: u32,
56    },
57    /// `IMAGE_DIRECTORY_ENTRY_COMHEADER` non-zero — managed PE.
58    ManagedPe,
59    /// Referenced DLL is not registered with the Win32 stub
60    /// registry.
61    UnknownImportDll { dll: String },
62    /// Specific function in a known DLL is not registered.
63    UnknownImportFunction { dll: String, name: String },
64    /// Section `SizeOfRawData` overflows the file or
65    /// `VirtualSize` overflows the section table.
66    SectionOutOfRange {
67        name: String,
68        raw_off: u32,
69        raw_size: u32,
70    },
71    /// Base relocation block was malformed.
72    BadRelocBlock { rva: u32, reason: &'static str },
73    /// Memory-map operation traps. Wrapped here so the loader's
74    /// public API exposes a single error type.
75    Trap(Trap),
76}
77
78impl core::fmt::Display for PeError {
79    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
80        match self {
81            PeError::TooSmall { got, need } => {
82                write!(f, "PE file too small: {got} bytes, need ≥ {need}")
83            }
84            PeError::NotMz => f.write_str("missing 'MZ' DOS signature"),
85            PeError::BadELfanew { offset, file_len } => {
86                write!(f, "e_lfanew {offset:#x} outside file (len {file_len})")
87            }
88            PeError::NotPe => f.write_str("missing 'PE\\0\\0' signature"),
89            PeError::Pe32PlusUnsupported => f.write_str("PE32+ (64-bit) not supported"),
90            PeError::BadOptionalHeaderMagic { magic } => {
91                write!(f, "bad optional-header magic {magic:#x}")
92            }
93            PeError::UnsupportedMachine { machine } => {
94                write!(f, "machine {machine:#x} is not IMAGE_FILE_MACHINE_I386")
95            }
96            PeError::DirectoryOutOfRange { name, rva, size } => {
97                write!(
98                    f,
99                    "directory {name} (rva {rva:#x}, size {size}) out of image range"
100                )
101            }
102            PeError::ManagedPe => f.write_str("managed (.NET) PE not supported"),
103            PeError::UnknownImportDll { dll } => {
104                write!(f, "no Round-1 stub registry entry for DLL '{dll}'")
105            }
106            PeError::UnknownImportFunction { dll, name } => {
107                write!(f, "no stub for {dll}!{name}")
108            }
109            PeError::SectionOutOfRange {
110                name,
111                raw_off,
112                raw_size,
113            } => {
114                write!(
115                    f,
116                    "section '{name}' raw bytes [{raw_off:#x}..+{raw_size}] out of file"
117                )
118            }
119            PeError::BadRelocBlock { rva, reason } => {
120                write!(f, "malformed reloc block at rva {rva:#x}: {reason}")
121            }
122            PeError::Trap(t) => write!(f, "MMU trap during load: {t}"),
123        }
124    }
125}
126
127impl From<Trap> for PeError {
128    fn from(t: Trap) -> Self {
129        PeError::Trap(t)
130    }
131}
132
133/// A loaded PE image.
134#[derive(Debug, Clone)]
135pub struct Image {
136    /// Path/identifier used during loading. May be empty.
137    pub name: String,
138    /// Final image base in emulator memory (after relocation if
139    /// the preferred base was occupied; round-1 always uses the
140    /// preferred base).
141    pub image_base: u32,
142    /// `OptionalHeader.AddressOfEntryPoint` resolved to a
143    /// VA (= `image_base + AddressOfEntryPoint`).
144    pub entry_point: u32,
145    /// Total size of the image in memory (rounded to
146    /// `SectionAlignment`).
147    pub size_of_image: u32,
148    /// Section descriptors, in load order.
149    pub sections: Vec<sections::Section>,
150    /// `(name → RVA-of-export)` table, RVA relative to
151    /// `image_base`. Populated by [`exports::parse_exports`].
152    pub exports: BTreeMap<String, u32>,
153}
154
155impl Image {
156    /// Resolve an exported symbol to a guest VA.
157    pub fn export(&self, name: &str) -> Option<u32> {
158        self.exports
159            .get(name)
160            .map(|rva| self.image_base.wrapping_add(*rva))
161    }
162}
163
164/// PE loader options.
165#[derive(Debug, Default, Clone)]
166pub struct LoadOptions {
167    /// Strict (default) vs fail-soft import resolution. Fail-
168    /// soft installs a trap thunk for every unknown import so
169    /// loading succeeds and execution proceeds until the first
170    /// unimplemented API actually gets called.
171    pub imports: imports::ResolveMode,
172    /// When fail-soft is on, every unresolved (dll, name) is
173    /// pushed here for the caller to log. Ignored in strict
174    /// mode (every miss is already a load-time error).
175    pub fail_soft_log: Option<Vec<(String, String)>>,
176    /// Override the PE's preferred image base. When `Some`,
177    /// the loader maps sections at the given address and walks
178    /// the relocation table to fix up absolute references.
179    /// `CreateProcessA` uses this to load each child PE at a
180    /// unique base so multiple processes can coexist in the
181    /// shared MMU. PEs without relocations (`IMAGE_FILE_RELOCS_STRIPPED`)
182    /// are rejected when a target base is requested that
183    /// differs from the preferred one.
184    pub target_image_base: Option<u32>,
185}
186
187impl Default for imports::ResolveMode {
188    fn default() -> Self {
189        Self::Strict
190    }
191}
192
193/// PE loader entry point.
194pub struct Loader<'a> {
195    mmu: &'a mut Mmu,
196    registry: &'a mut Registry,
197    host: &'a mut HostState,
198}
199
200impl<'a> Loader<'a> {
201    pub fn new(mmu: &'a mut Mmu, registry: &'a mut Registry, host: &'a mut HostState) -> Self {
202        Loader {
203            mmu,
204            registry,
205            host,
206        }
207    }
208
209    /// Parse + load a PE image from a byte slice. The image
210    /// starts at offset 0 in `bytes`; `name` is recorded for
211    /// diagnostics + module-handle lookups.
212    pub fn load(&mut self, name: &str, bytes: &[u8]) -> Result<Image, PeError> {
213        self.load_with_options(name, bytes, &mut LoadOptions::default())
214    }
215
216    /// Variant of [`Loader::load`] with explicit options.
217    /// `fail_soft_log` in `options` is populated with the
218    /// `(dll, name)` of every import that received a fallback
219    /// thunk.
220    pub fn load_with_options(
221        &mut self,
222        name: &str,
223        bytes: &[u8],
224        options: &mut LoadOptions,
225    ) -> Result<Image, PeError> {
226        let parsed = header::parse(bytes)?;
227
228        let preferred_base = parsed.optional.image_base;
229        let load_base = options.target_image_base.unwrap_or(preferred_base);
230        let delta = load_base.wrapping_sub(preferred_base);
231
232        // Map sections at the chosen base.
233        let secs = sections::map_sections_at(self.mmu, &parsed, bytes, load_base)?;
234
235        // Apply base relocations. When loading at the preferred
236        // base, delta is 0 and the relocation pass is a no-op
237        // walk (still exercised for malformed-block detection).
238        // When the target base differs, the relocation table
239        // patches absolute references; PEs without a `.reloc`
240        // table can only be loaded at their preferred base.
241        if delta != 0 {
242            reloc::apply(self.mmu, &parsed, load_base, delta)?;
243        }
244
245        // Resolve imports.
246        imports::resolve_with(
247            self.mmu,
248            &parsed,
249            load_base,
250            self.registry,
251            options.imports,
252            options.fail_soft_log.as_mut(),
253        )?;
254
255        // Build export table.
256        let exports = exports::parse_exports(&parsed, bytes, load_base)?;
257
258        // Stamp final permissions per section Characteristics
259        // flags. Done last so write_initializer in earlier steps
260        // does not need W on code pages.
261        for s in &secs {
262            sections::apply_section_permissions(self.mmu, s);
263        }
264
265        let image = Image {
266            name: name.to_string(),
267            image_base: load_base,
268            entry_point: load_base.wrapping_add(parsed.optional.address_of_entry_point),
269            size_of_image: parsed.optional.size_of_image,
270            sections: secs,
271            exports,
272        };
273
274        // Record the module so subsequent LoadLibraryA /
275        // GetModuleHandleA calls return the right ImageBase.
276        self.host
277            .modules
278            .insert(name.to_ascii_lowercase(), load_base);
279
280        // Round 12 — record the resource directory location so
281        // FindResourceA / LoadResource / LockResource can walk it.
282        // PE/COFF Data Directory entry 2 is the Resource Table.
283        let rsrc = parsed.optional.data_directories[2];
284        if rsrc.virtual_address != 0 && rsrc.size != 0 {
285            self.host
286                .module_resource_dirs
287                .insert(load_base, load_base.wrapping_add(rsrc.virtual_address));
288        }
289
290        Ok(image)
291    }
292}
293
294/// Helper to synthesise a minimal valid PE32 DLL byte-by-byte
295/// for tests. Used by both unit tests and the integration test
296/// `tests/m1_load_dll_main.rs`. Always compiled, but only
297/// referenced from `cfg(test)` paths in this crate's own
298/// codebase.
299pub mod test_image;
300
301#[cfg(test)]
302mod tests {
303    use super::test_image::build_minimal_dll;
304    use super::*;
305    use crate::win32::HostState;
306
307    #[test]
308    fn load_minimal_synthesised_dll_succeeds() {
309        let bytes = build_minimal_dll();
310        let mut mmu = Mmu::new();
311        let mut registry = Registry::new();
312        registry.register_kernel32();
313        let mut host = HostState::new(0x6000_0000, 0x7000_0000);
314        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
315        let img = loader.load("synth.dll", &bytes).unwrap();
316        assert_eq!(img.image_base, 0x1000_0000);
317        // Entry point + 1 byte (the RET) must be readable+executable.
318        assert!(mmu.fetch_x8(img.entry_point).is_ok());
319    }
320
321    #[test]
322    fn rejects_non_mz() {
323        let bytes = vec![0u8; 1024];
324        let mut mmu = Mmu::new();
325        let mut registry = Registry::new();
326        let mut host = HostState::new(0, 0);
327        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
328        match loader.load("bad.dll", &bytes) {
329            Err(PeError::NotMz) => (),
330            other => panic!("expected NotMz, got {other:?}"),
331        }
332    }
333
334    #[test]
335    fn rejects_pe32_plus() {
336        let mut bytes = build_minimal_dll();
337        // Bend the optional-header magic to 0x20B (PE32+).
338        let pe_off =
339            u32::from_le_bytes([bytes[0x3C], bytes[0x3D], bytes[0x3E], bytes[0x3F]]) as usize;
340        let opt_magic_off = pe_off + 4 + 20; // PE sig (4) + IMAGE_FILE_HEADER (20)
341        bytes[opt_magic_off] = 0x0B;
342        bytes[opt_magic_off + 1] = 0x02;
343        let mut mmu = Mmu::new();
344        let mut registry = Registry::new();
345        let mut host = HostState::new(0, 0);
346        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
347        match loader.load("bad.dll", &bytes) {
348            Err(PeError::Pe32PlusUnsupported) => (),
349            other => panic!("expected Pe32PlusUnsupported, got {other:?}"),
350        }
351    }
352
353    #[test]
354    fn rejects_managed_pe() {
355        let mut bytes = build_minimal_dll();
356        // Set IMAGE_DIRECTORY_ENTRY_COMHEADER (#14) RVA=1, Size=8.
357        let pe_off =
358            u32::from_le_bytes([bytes[0x3C], bytes[0x3D], bytes[0x3E], bytes[0x3F]]) as usize;
359        // Optional-header data directories start at pe_off + 4 + 20 + 96
360        // for PE32 (FileHeader=20, OptionalHeader standard fields = 96).
361        let dirs_off = pe_off + 4 + 20 + 96;
362        let comheader_off = dirs_off + 14 * 8;
363        bytes[comheader_off..comheader_off + 4].copy_from_slice(&1u32.to_le_bytes());
364        bytes[comheader_off + 4..comheader_off + 8].copy_from_slice(&8u32.to_le_bytes());
365        let mut mmu = Mmu::new();
366        let mut registry = Registry::new();
367        let mut host = HostState::new(0, 0);
368        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
369        match loader.load("bad.dll", &bytes) {
370            Err(PeError::ManagedPe) => (),
371            other => panic!("expected ManagedPe, got {other:?}"),
372        }
373    }
374
375    #[test]
376    fn export_by_name_resolves_to_va() {
377        let bytes = build_minimal_dll();
378        let mut mmu = Mmu::new();
379        let mut registry = Registry::new();
380        registry.register_kernel32();
381        let mut host = HostState::new(0x6000_0000, 0x7000_0000);
382        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
383        let img = loader.load("synth.dll", &bytes).unwrap();
384        // The synthesised DLL exports DllMain at the entry-point.
385        let p = img.export("DllMain").expect("DllMain export");
386        assert_eq!(p, img.entry_point);
387    }
388
389    #[test]
390    fn iat_is_populated_with_thunks() {
391        let bytes = build_minimal_dll();
392        let mut mmu = Mmu::new();
393        let mut registry = Registry::new();
394        registry.register_kernel32();
395        let mut host = HostState::new(0x6000_0000, 0x7000_0000);
396        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
397        let img = loader.load("synth.dll", &bytes).unwrap();
398        // The IAT slot for kernel32!GetProcessHeap should now
399        // hold the thunk address registered for that stub.
400        let expected = registry.resolve("kernel32.dll", "GetProcessHeap").unwrap();
401        // The synth DLL plants its IAT for one import — read it
402        // back from the image. test_image::IAT_RVA is the RVA.
403        let iat = mmu
404            .load32(img.image_base + super::test_image::IAT_RVA)
405            .unwrap();
406        assert_eq!(iat, expected);
407    }
408}