ud-emulator 0.2.0

Pure-Rust 32-bit x86 emulator + PE runtime loader + Win32 host shims. Mirrors oxideav-vfw; intended to grow into the dynamic-analysis backend that informs decompilation (indirect-target recovery, constant-data discovery).
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
//! PE32 loader — parses Microsoft Portable Executable images,
//! maps them into the emulator MMU, applies base relocations,
//! resolves imports against the [`crate::win32::Registry`], and
//! exposes export-by-name lookup.
//!
//! Reference: Microsoft "PE Format" (a.k.a. "Microsoft PE and
//! COFF Specification"), revision 11.0 (2022-08-26). All struct
//! field names and offsets in this module match that document.
//!
//! Supported subset (per design doc §"The PE loader"):
//!
//! * PE32 only. PE32+ (`Magic == 0x20B`) → reject.
//! * `IMAGE_FILE_MACHINE_I386` only.
//! * No .NET CLR. `IMAGE_DIRECTORY_ENTRY_COMHEADER` non-zero →
//!   reject.
//! * No delay-load imports. The directory entry must be empty.
//! * No SxS manifest dependencies. (We don't enforce — codecs
//!   never have one.)
//!
//! Reject-paths return [`PeError`]; everything else surfaces as
//! a well-formed [`Image`].

pub mod exports;
pub mod header;
pub mod imports;
pub mod reloc;
pub mod sections;

use std::collections::BTreeMap;

use crate::emulator::{mmu::Mmu, Trap};
use crate::win32::{HostState, Registry};

/// PE-loader-specific error variants.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PeError {
    /// File too small to contain a DOS header.
    TooSmall { got: usize, need: usize },
    /// "MZ" signature missing at offset 0.
    NotMz,
    /// `e_lfanew` points outside the file.
    BadELfanew { offset: u32, file_len: usize },
    /// "PE\0\0" signature missing at `e_lfanew`.
    NotPe,
    /// Optional-header magic indicates PE32+ (64-bit).
    Pe32PlusUnsupported,
    /// Optional-header magic is neither PE32 nor PE32+.
    BadOptionalHeaderMagic { magic: u16 },
    /// File-header machine field is not `IMAGE_FILE_MACHINE_I386`.
    UnsupportedMachine { machine: u16 },
    /// A directory entry refers to bytes outside the image.
    DirectoryOutOfRange {
        name: &'static str,
        rva: u32,
        size: u32,
    },
    /// `IMAGE_DIRECTORY_ENTRY_COMHEADER` non-zero — managed PE.
    ManagedPe,
    /// Referenced DLL is not registered with the Win32 stub
    /// registry.
    UnknownImportDll { dll: String },
    /// Specific function in a known DLL is not registered.
    UnknownImportFunction { dll: String, name: String },
    /// Section `SizeOfRawData` overflows the file or
    /// `VirtualSize` overflows the section table.
    SectionOutOfRange {
        name: String,
        raw_off: u32,
        raw_size: u32,
    },
    /// Base relocation block was malformed.
    BadRelocBlock { rva: u32, reason: &'static str },
    /// Memory-map operation traps. Wrapped here so the loader's
    /// public API exposes a single error type.
    Trap(Trap),
}

impl core::fmt::Display for PeError {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        match self {
            PeError::TooSmall { got, need } => {
                write!(f, "PE file too small: {got} bytes, need ≥ {need}")
            }
            PeError::NotMz => f.write_str("missing 'MZ' DOS signature"),
            PeError::BadELfanew { offset, file_len } => {
                write!(f, "e_lfanew {offset:#x} outside file (len {file_len})")
            }
            PeError::NotPe => f.write_str("missing 'PE\\0\\0' signature"),
            PeError::Pe32PlusUnsupported => f.write_str("PE32+ (64-bit) not supported"),
            PeError::BadOptionalHeaderMagic { magic } => {
                write!(f, "bad optional-header magic {magic:#x}")
            }
            PeError::UnsupportedMachine { machine } => {
                write!(f, "machine {machine:#x} is not IMAGE_FILE_MACHINE_I386")
            }
            PeError::DirectoryOutOfRange { name, rva, size } => {
                write!(
                    f,
                    "directory {name} (rva {rva:#x}, size {size}) out of image range"
                )
            }
            PeError::ManagedPe => f.write_str("managed (.NET) PE not supported"),
            PeError::UnknownImportDll { dll } => {
                write!(f, "no Round-1 stub registry entry for DLL '{dll}'")
            }
            PeError::UnknownImportFunction { dll, name } => {
                write!(f, "no stub for {dll}!{name}")
            }
            PeError::SectionOutOfRange {
                name,
                raw_off,
                raw_size,
            } => {
                write!(
                    f,
                    "section '{name}' raw bytes [{raw_off:#x}..+{raw_size}] out of file"
                )
            }
            PeError::BadRelocBlock { rva, reason } => {
                write!(f, "malformed reloc block at rva {rva:#x}: {reason}")
            }
            PeError::Trap(t) => write!(f, "MMU trap during load: {t}"),
        }
    }
}

impl From<Trap> for PeError {
    fn from(t: Trap) -> Self {
        PeError::Trap(t)
    }
}

/// A loaded PE image.
#[derive(Debug, Clone)]
pub struct Image {
    /// Path/identifier used during loading. May be empty.
    pub name: String,
    /// Final image base in emulator memory (after relocation if
    /// the preferred base was occupied; round-1 always uses the
    /// preferred base).
    pub image_base: u32,
    /// `OptionalHeader.AddressOfEntryPoint` resolved to a
    /// VA (= `image_base + AddressOfEntryPoint`).
    pub entry_point: u32,
    /// Total size of the image in memory (rounded to
    /// `SectionAlignment`).
    pub size_of_image: u32,
    /// Section descriptors, in load order.
    pub sections: Vec<sections::Section>,
    /// `(name → RVA-of-export)` table, RVA relative to
    /// `image_base`. Populated by [`exports::parse_exports`].
    pub exports: BTreeMap<String, u32>,
}

impl Image {
    /// Resolve an exported symbol to a guest VA.
    pub fn export(&self, name: &str) -> Option<u32> {
        self.exports
            .get(name)
            .map(|rva| self.image_base.wrapping_add(*rva))
    }
}

/// PE loader options.
#[derive(Debug, Default, Clone)]
pub struct LoadOptions {
    /// Strict (default) vs fail-soft import resolution. Fail-
    /// soft installs a trap thunk for every unknown import so
    /// loading succeeds and execution proceeds until the first
    /// unimplemented API actually gets called.
    pub imports: imports::ResolveMode,
    /// When fail-soft is on, every unresolved (dll, name) is
    /// pushed here for the caller to log. Ignored in strict
    /// mode (every miss is already a load-time error).
    pub fail_soft_log: Option<Vec<(String, String)>>,
    /// Override the PE's preferred image base. When `Some`,
    /// the loader maps sections at the given address and walks
    /// the relocation table to fix up absolute references.
    /// `CreateProcessA` uses this to load each child PE at a
    /// unique base so multiple processes can coexist in the
    /// shared MMU. PEs without relocations (`IMAGE_FILE_RELOCS_STRIPPED`)
    /// are rejected when a target base is requested that
    /// differs from the preferred one.
    pub target_image_base: Option<u32>,
}

impl Default for imports::ResolveMode {
    fn default() -> Self {
        Self::Strict
    }
}

/// PE loader entry point.
pub struct Loader<'a> {
    mmu: &'a mut Mmu,
    registry: &'a mut Registry,
    host: &'a mut HostState,
}

impl<'a> Loader<'a> {
    pub fn new(mmu: &'a mut Mmu, registry: &'a mut Registry, host: &'a mut HostState) -> Self {
        Loader {
            mmu,
            registry,
            host,
        }
    }

    /// Parse + load a PE image from a byte slice. The image
    /// starts at offset 0 in `bytes`; `name` is recorded for
    /// diagnostics + module-handle lookups.
    pub fn load(&mut self, name: &str, bytes: &[u8]) -> Result<Image, PeError> {
        self.load_with_options(name, bytes, &mut LoadOptions::default())
    }

    /// Variant of [`Loader::load`] with explicit options.
    /// `fail_soft_log` in `options` is populated with the
    /// `(dll, name)` of every import that received a fallback
    /// thunk.
    pub fn load_with_options(
        &mut self,
        name: &str,
        bytes: &[u8],
        options: &mut LoadOptions,
    ) -> Result<Image, PeError> {
        let parsed = header::parse(bytes)?;

        let preferred_base = parsed.optional.image_base;
        let load_base = options.target_image_base.unwrap_or(preferred_base);
        let delta = load_base.wrapping_sub(preferred_base);

        // Map sections at the chosen base.
        let secs = sections::map_sections_at(self.mmu, &parsed, bytes, load_base)?;

        // Apply base relocations. When loading at the preferred
        // base, delta is 0 and the relocation pass is a no-op
        // walk (still exercised for malformed-block detection).
        // When the target base differs, the relocation table
        // patches absolute references; PEs without a `.reloc`
        // table can only be loaded at their preferred base.
        if delta != 0 {
            reloc::apply(self.mmu, &parsed, load_base, delta)?;
        }

        // Resolve imports.
        imports::resolve_with(
            self.mmu,
            &parsed,
            load_base,
            self.registry,
            options.imports,
            options.fail_soft_log.as_mut(),
        )?;

        // Build export table.
        let exports = exports::parse_exports(&parsed, bytes, load_base)?;

        // Stamp final permissions per section Characteristics
        // flags. Done last so write_initializer in earlier steps
        // does not need W on code pages.
        for s in &secs {
            sections::apply_section_permissions(self.mmu, s);
        }

        let image = Image {
            name: name.to_string(),
            image_base: load_base,
            entry_point: load_base.wrapping_add(parsed.optional.address_of_entry_point),
            size_of_image: parsed.optional.size_of_image,
            sections: secs,
            exports,
        };

        // Record the module so subsequent LoadLibraryA /
        // GetModuleHandleA calls return the right ImageBase.
        self.host
            .modules
            .insert(name.to_ascii_lowercase(), load_base);

        // Round 12 — record the resource directory location so
        // FindResourceA / LoadResource / LockResource can walk it.
        // PE/COFF Data Directory entry 2 is the Resource Table.
        let rsrc = parsed.optional.data_directories[2];
        if rsrc.virtual_address != 0 && rsrc.size != 0 {
            self.host
                .module_resource_dirs
                .insert(load_base, load_base.wrapping_add(rsrc.virtual_address));
        }

        Ok(image)
    }
}

/// Helper to synthesise a minimal valid PE32 DLL byte-by-byte
/// for tests. Used by both unit tests and the integration test
/// `tests/m1_load_dll_main.rs`. Always compiled, but only
/// referenced from `cfg(test)` paths in this crate's own
/// codebase.
pub mod test_image;

#[cfg(test)]
mod tests {
    use super::test_image::build_minimal_dll;
    use super::*;
    use crate::win32::HostState;

    #[test]
    fn load_minimal_synthesised_dll_succeeds() {
        let bytes = build_minimal_dll();
        let mut mmu = Mmu::new();
        let mut registry = Registry::new();
        registry.register_kernel32();
        let mut host = HostState::new(0x6000_0000, 0x7000_0000);
        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
        let img = loader.load("synth.dll", &bytes).unwrap();
        assert_eq!(img.image_base, 0x1000_0000);
        // Entry point + 1 byte (the RET) must be readable+executable.
        assert!(mmu.fetch_x8(img.entry_point).is_ok());
    }

    #[test]
    fn rejects_non_mz() {
        let bytes = vec![0u8; 1024];
        let mut mmu = Mmu::new();
        let mut registry = Registry::new();
        let mut host = HostState::new(0, 0);
        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
        match loader.load("bad.dll", &bytes) {
            Err(PeError::NotMz) => (),
            other => panic!("expected NotMz, got {other:?}"),
        }
    }

    #[test]
    fn rejects_pe32_plus() {
        let mut bytes = build_minimal_dll();
        // Bend the optional-header magic to 0x20B (PE32+).
        let pe_off =
            u32::from_le_bytes([bytes[0x3C], bytes[0x3D], bytes[0x3E], bytes[0x3F]]) as usize;
        let opt_magic_off = pe_off + 4 + 20; // PE sig (4) + IMAGE_FILE_HEADER (20)
        bytes[opt_magic_off] = 0x0B;
        bytes[opt_magic_off + 1] = 0x02;
        let mut mmu = Mmu::new();
        let mut registry = Registry::new();
        let mut host = HostState::new(0, 0);
        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
        match loader.load("bad.dll", &bytes) {
            Err(PeError::Pe32PlusUnsupported) => (),
            other => panic!("expected Pe32PlusUnsupported, got {other:?}"),
        }
    }

    #[test]
    fn rejects_managed_pe() {
        let mut bytes = build_minimal_dll();
        // Set IMAGE_DIRECTORY_ENTRY_COMHEADER (#14) RVA=1, Size=8.
        let pe_off =
            u32::from_le_bytes([bytes[0x3C], bytes[0x3D], bytes[0x3E], bytes[0x3F]]) as usize;
        // Optional-header data directories start at pe_off + 4 + 20 + 96
        // for PE32 (FileHeader=20, OptionalHeader standard fields = 96).
        let dirs_off = pe_off + 4 + 20 + 96;
        let comheader_off = dirs_off + 14 * 8;
        bytes[comheader_off..comheader_off + 4].copy_from_slice(&1u32.to_le_bytes());
        bytes[comheader_off + 4..comheader_off + 8].copy_from_slice(&8u32.to_le_bytes());
        let mut mmu = Mmu::new();
        let mut registry = Registry::new();
        let mut host = HostState::new(0, 0);
        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
        match loader.load("bad.dll", &bytes) {
            Err(PeError::ManagedPe) => (),
            other => panic!("expected ManagedPe, got {other:?}"),
        }
    }

    #[test]
    fn export_by_name_resolves_to_va() {
        let bytes = build_minimal_dll();
        let mut mmu = Mmu::new();
        let mut registry = Registry::new();
        registry.register_kernel32();
        let mut host = HostState::new(0x6000_0000, 0x7000_0000);
        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
        let img = loader.load("synth.dll", &bytes).unwrap();
        // The synthesised DLL exports DllMain at the entry-point.
        let p = img.export("DllMain").expect("DllMain export");
        assert_eq!(p, img.entry_point);
    }

    #[test]
    fn iat_is_populated_with_thunks() {
        let bytes = build_minimal_dll();
        let mut mmu = Mmu::new();
        let mut registry = Registry::new();
        registry.register_kernel32();
        let mut host = HostState::new(0x6000_0000, 0x7000_0000);
        let mut loader = Loader::new(&mut mmu, &mut registry, &mut host);
        let img = loader.load("synth.dll", &bytes).unwrap();
        // The IAT slot for kernel32!GetProcessHeap should now
        // hold the thunk address registered for that stub.
        let expected = registry.resolve("kernel32.dll", "GetProcessHeap").unwrap();
        // The synth DLL plants its IAT for one import — read it
        // back from the image. test_image::IAT_RVA is the RVA.
        let iat = mmu
            .load32(img.image_base + super::test_image::IAT_RVA)
            .unwrap();
        assert_eq!(iat, expected);
    }
}