ud_emulator/win32/mod.rs
1//! Win32 stub registry + per-DLL host implementations of the
2//! functions the loaded codec DLLs import.
3//!
4//! Each stub is a Rust function pointer with the signature
5//! [`StubFn`]. The PE loader, when populating the IAT, looks up
6//! `(dll_name_lowercased, function_name)` in [`Registry`] and
7//! writes the synthetic `StubAddr` (a guest address that lives
8//! in the unmapped "thunk space" near `0xFFFE_0000`) into the
9//! IAT slot.
10//!
11//! At call time, the integer ISA executor sees `eip` jump to a
12//! thunk address. It detects this via [`Registry::is_thunk`]
13//! and dispatches to the stub directly, popping the right number
14//! of bytes off the guest stack for the calling convention.
15//!
16//! All stubs are stdcall (callee-cleanup) for round 1; the
17//! `arg_dwords` field carries the count. Round-2 will add cdecl
18//! (caller-cleanup) once vfw32 needs it.
19//!
20//! Reference for each function: the corresponding MSDN page
21//! (linked in source comments next to each stub).
22
23use std::collections::BTreeMap;
24
25use crate::emulator::{Cpu, Mmu};
26
27pub mod advapi32;
28pub mod comctl32;
29pub mod gdi32;
30pub mod kernel32;
31pub mod mfplat;
32pub mod msi;
33pub mod msiexec;
34pub mod msvcrt;
35pub mod ole32;
36pub mod shell32;
37pub mod shlwapi;
38pub mod user32;
39pub mod version;
40pub mod vfw32;
41pub mod winmm;
42
43/// First synthetic thunk address. Chosen well above any plausible
44/// `ImageBase + section.VirtualAddress` so it cannot be mistaken
45/// for a real DLL byte. Each registered stub gets the next
46/// 16-byte slot.
47pub const THUNK_BASE: u32 = 0xFFFE_0000;
48const THUNK_STRIDE: u32 = 16;
49
50/// Signature every Win32 stub uses.
51///
52/// Returns the dword to put in `eax` on return. The stub
53/// internally reads its arguments off the guest stack via the
54/// [`Cpu`] / [`Mmu`] handles. The runtime takes care of popping
55/// `arg_dwords * 4` bytes from the guest stack after the stub
56/// returns (stdcall callee-cleanup).
57///
58/// `&Registry` is passed so a stub can re-enter the run-loop to
59/// call back into the guest (used by the round-2 `vfw32` stub
60/// surface, which has to dispatch the codec DLL's `DriverProc`
61/// before returning to the IAT caller).
62pub type StubFn = fn(&mut Cpu, &mut Mmu, &mut HostState, &Registry) -> Result<u32, Win32Error>;
63
64/// One stub call recorded for analysis. Populated whenever
65/// [`HostState::trace_stubs`] is set; the [`HostState::stub_calls`]
66/// vector accumulates these in call order.
67#[derive(Clone, Debug)]
68pub struct StubCall {
69 /// The DLL the call targeted (`"kernel32.dll"`, …).
70 pub dll: String,
71 /// The function name (`"CreateFileA"`, …).
72 pub name: String,
73 /// Dword arguments captured off the guest stack at call
74 /// entry, before the stub ran. Length is the stdcall
75 /// `arg_dwords` count, or a per-call override for known
76 /// cdecl shapes.
77 pub args: Vec<u32>,
78 /// Whatever `eax` value the stub returned.
79 pub ret: u32,
80 /// Call-site EIP — the saved return address on the guest
81 /// stack at call entry, i.e. the instruction the codec
82 /// will resume at when the stub returns.
83 pub call_site_eip: u32,
84}
85
86/// Information stored alongside each stub.
87#[derive(Clone)]
88pub struct StubEntry {
89 pub dll: String,
90 pub name: String,
91 pub func: StubFn,
92 /// Number of dword arguments to pop off the stack (stdcall
93 /// callee-cleanup). cdecl callers will be added in round 2
94 /// with a separate flag.
95 pub arg_dwords: u32,
96 /// The synthetic guest address that, when called, invokes
97 /// this stub.
98 pub thunk_addr: u32,
99}
100
101/// Errors a stub can raise. Wrapped in `crate::Error::Win32`.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub enum Win32Error {
104 /// No stub registered for the requested `(dll, name)` pair.
105 /// PE-load-time error; surfaces from
106 /// `crate::pe::Loader::resolve_imports`.
107 UnknownImport { dll: String, name: String },
108 /// Stub-side argument validation failed.
109 InvalidArgument { stub: &'static str, reason: String },
110 /// Heap call referenced an unknown allocation.
111 InvalidHeapBlock { stub: &'static str, addr: u32 },
112 /// The per-run instruction budget set on
113 /// [`HostState::instruction_budget`] was exhausted before
114 /// the guest reached `RET_SENTINEL`. Analysis front-ends
115 /// use this to cap adversarial samples that loop. The
116 /// state captured up to the budget point — coverage
117 /// map, stub trace, register snapshot — is still valid.
118 BudgetExhausted { executed: u64 },
119}
120
121impl core::fmt::Display for Win32Error {
122 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
123 match self {
124 Win32Error::UnknownImport { dll, name } => {
125 write!(f, "no Round-1 stub for import {dll}!{name}")
126 }
127 Win32Error::InvalidArgument { stub, reason } => {
128 write!(f, "{stub}: {reason}")
129 }
130 Win32Error::InvalidHeapBlock { stub, addr } => {
131 write!(f, "{stub}: unknown heap allocation {addr:#010x}")
132 }
133 Win32Error::BudgetExhausted { executed } => {
134 write!(
135 f,
136 "instruction budget exhausted after {executed} steps without reaching RET_SENTINEL"
137 )
138 }
139 }
140 }
141}
142
143/// One entry in the open-codec table — a "Handle to Installable
144/// Compressor" in MSDN's vfw32 vocabulary.
145#[derive(Debug, Clone)]
146pub struct HicEntry {
147 /// 4-byte fcc type ('VIDC' for video).
148 pub fcc_type: u32,
149 /// 4-byte fcc handler ('cvid' for Cinepak, 'IV50' for Indeo 5).
150 pub fcc_handler: u32,
151 /// Open mode (vfw.h: 1 = ICMODE_COMPRESS, 2 = ICMODE_DECOMPRESS, …).
152 pub mode: u32,
153 /// VA of the codec DLL's `DriverProc` export (the entry point
154 /// that every IC* call dispatches into).
155 pub driver_proc_va: u32,
156 /// `dwDriverId` to pass back to `DriverProc` on every call —
157 /// the value `DriverProc(_, _, DRV_OPEN, _, _)` returned.
158 pub driver_id: u32,
159}
160
161/// Per-process emulator state. Phase 1 of the scheduler refactor
162/// (see `magical-popping-oasis` plan): every field that is
163/// conceptually scoped to a single Win32 *process* lives here.
164/// Today there is exactly one [`ProcessState`] per [`HostState`];
165/// Phase 5 will switch this to a `BTreeMap<pid, ProcessState>`
166/// indexed by an `active_pid` cursor, which keeps stub bodies
167/// unchanged (they reach state through [`HostState`]'s
168/// auto-deref).
169///
170/// The conceptual boundary: anything a child process spawned via
171/// `CreateProcessA` should NOT share with its parent goes here.
172/// Anything truly shared (virtual filesystem, virtual registry,
173/// host-side trace buffers, the clock, the instruction budget)
174/// stays on [`HostState`].
175#[derive(Default)]
176pub struct ProcessState {
177 /// Synthetic process identifier. `1` for the bootstrap
178 /// process; `CreateProcessA` mints monotonically
179 /// increasing values.
180 pub pid: u32,
181 /// PID of the process that called `CreateProcessA` to spawn
182 /// this one. `0` for the bootstrap (no parent).
183 pub parent_pid: u32,
184 /// Image base where the process's primary PE is mapped.
185 /// Each process gets a unique base so child PEs don't
186 /// collide with the parent — `0x00400000` for the bootstrap,
187 /// `0x10000000` / `0x20000000` / … for spawned children.
188 pub image_base: u32,
189 /// Exit code reported by `ExitProcess` / `TerminateProcess`
190 /// on this process; `None` while the process is still
191 /// running. Wakes any `WaitForSingleObject` on the process
192 /// handle when set.
193 pub exit_code: Option<u32>,
194 /// Heap allocations keyed by guest address.
195 pub heap: BTreeMap<u32, Vec<u8>>,
196 /// Cursor for the next heap allocation. Walks through a
197 /// dedicated guest-virtual region (configured by [`HostState::new`]).
198 pub heap_cursor: u32,
199 pub heap_arena_end: u32,
200 /// Default process heap handle returned by `GetProcessHeap`.
201 pub process_heap_handle: u32,
202 /// Loaded-module registry: name → ImageBase.
203 pub modules: BTreeMap<String, u32>,
204 /// Most-recently-loaded codec module's image base — returned
205 /// by `GetModuleHandleA(NULL)`. Set to 0 if no DLL has been
206 /// loaded yet.
207 pub primary_module_base: u32,
208 /// Open codec handles. Synthesised inside the host (no codec
209 /// guest memory is consumed); each handle is a small integer
210 /// the codec sees as an `HIC`.
211 pub hics: BTreeMap<u32, HicEntry>,
212 /// Counter for the next synthetic HIC. Starts at 1; 0 means
213 /// "open failed".
214 pub next_hic: u32,
215 /// Default `DriverProc` VA used when a host caller invokes an
216 /// `IC*` stub but has not staged a real codec image (i.e. for
217 /// the no-fixture unit tests). Set to 0 when no codec is
218 /// loaded — `ICOpen` then refuses to mint a HIC.
219 pub default_driver_proc: u32,
220 /// Set by `kernel32!ExitProcess` (and `TerminateProcess` on
221 /// the current process) to break out of the emulator loop in
222 /// lieu of unwinding to `RET_SENTINEL`. `Some(code)` means
223 /// "this process asked to terminate"; the run-loop converts
224 /// this into a clean return so the calling host code can
225 /// introspect what happened.
226 pub exit_requested: Option<u32>,
227 /// Read-only constant-data arena. Used by stubs like
228 /// `GetCommandLineA` / `GetEnvironmentStrings` that need to
229 /// hand out stable guest pointers to canned strings. The
230 /// slab grows by `arena_const_alloc` and lives at
231 /// `[const_arena_start, const_arena_end)`. Configured by
232 /// [`HostState::new`] like the heap arena.
233 pub const_arena_cursor: u32,
234 pub const_arena_end: u32,
235 /// Cached pointer to the canned `"oxideav-vfw\0"` command
236 /// line. Lazily populated by `GetCommandLineA`.
237 pub command_line_ptr: u32,
238 /// Cached pointer to the canned empty environment block.
239 pub environment_strings_ptr: u32,
240 /// Currently-live `HDC` values handed out by
241 /// `gdi32!CreateCompatibleDC` / `user32!GetDC`. `None` until
242 /// the first DC is allocated, then a populated set.
243 pub gdi_hdcs: Option<std::collections::BTreeSet<u32>>,
244 /// Round 26 — synthetic `HWND` registry. `CreateWindowExA`
245 /// hands out `HWND_BASE + n` values; `IsWindow` consults this
246 /// set; `DestroyWindow` removes from it. None of these HWNDs
247 /// back a real window — DirectShow / VfW codecs only need the
248 /// `HWND` value to feel non-NULL so they fall through to
249 /// their headless code path.
250 pub hwnd_registry: std::collections::BTreeSet<u32>,
251 /// Counter for the next synthetic HWND allocation. Starts at
252 /// 0; first HWND handed out is `HWND_BASE + 0`.
253 pub next_hwnd_index: u32,
254 /// Set of `DriverProc` VAs that have already received the
255 /// one-time `DRV_LOAD` + `DRV_ENABLE` initialisation pair.
256 /// Round 11 — without this, `IR50_32.DLL`'s `DRV_LOAD` handler
257 /// (which allocates the codec's huffman / inverse-DCT tables
258 /// at `[0x1009c770]`) never runs, and `ICDecompress` later
259 /// reads `[0x1009c770] == NULL` and bails with
260 /// `ICERR_BADIMAGE`. We track per-VA so multi-codec sandboxes
261 /// (round 12+) don't double-load the same driver.
262 pub loaded_drivers: std::collections::BTreeSet<u32>,
263 /// Per-loaded-module resource directory location:
264 /// `image_base → resource_dir_va`. Empty if the module has no
265 /// `.rsrc` (PE Data Directory entry 2). Round 12 needs this so
266 /// that `kernel32!FindResourceA` on `IR50_32.DLL` can locate
267 /// the RT_BITMAP/112 entry that holds the codec's huffman /
268 /// inverse-DCT tables. Without it the codec's `DRV_LOAD`
269 /// chain bails at `0x10034d31 (jz 0x10034f61)` and
270 /// `[0x1009c770]` stays NULL.
271 pub module_resource_dirs: BTreeMap<u32, u32>,
272 /// Round 25 — host-side bookkeeping for COM objects the
273 /// guest has handed back to the test harness (live class
274 /// factories + IBaseFilter pointers etc). See
275 /// [`crate::com::ComObjectTable`] for the data layout. Each
276 /// `ole32!CoCreateInstance` and every test-side
277 /// `query_interface` / `add_ref` / `release` updates this
278 /// table so a missing `Release` surfaces as a non-zero
279 /// `total_refcount()` at end-of-test.
280 pub com: crate::com::ComObjectTable,
281 /// Round 55 — PRNG state for `msvcrt!rand` calls from
282 /// sandboxed codec code. Default `1` matches MSVC's
283 /// documented "no `srand` called yet" initial state.
284 /// Updated by both `msvcrt!srand(seed)` (from guest code)
285 /// and by `Sandbox::set_rand_seed` / `with_rand_seed` (from
286 /// host code) — they share the same field so the host can
287 /// observe what the codec did to the state, and the codec
288 /// can override the host-staged seed via its own `srand`.
289 /// LCG step (Knuth-style, mod 2^32, output bits 30..16
290 /// masked to 15 bits per MSVC's documented contract):
291 ///
292 /// ```text
293 /// state = state * 214013 + 2531011 (mod 2^32)
294 /// rand = (state >> 16) & 0x7FFF
295 /// ```
296 pub rand_state: u32,
297 /// Cursor for the next `TlsAlloc` slot. Slot indices are
298 /// process-scoped (each `TlsAlloc` mints a fresh integer)
299 /// but the *values* stored at those indices live in
300 /// per-thread [`ThreadState::tls_slots`]. Phase 2 of the
301 /// scheduler refactor.
302 pub next_tls_slot: u32,
303 /// Bottom of the thread-stack pool. `CreateThread` carves
304 /// stacks from `[bottom, top)` walking down from
305 /// `next_thread_stack_top`. Both are `0` when no pool has
306 /// been configured — `CreateThread` reports an
307 /// `InvalidArgument` error in that case.
308 pub thread_stack_pool_bottom: u32,
309 /// Next available stack-top for the next `CreateThread`.
310 /// Decrements by [`THREAD_STACK_SIZE`] per spawned thread.
311 pub next_thread_stack_top: u32,
312 /// Bottom of the per-thread TIB (Thread Information Block)
313 /// pool. `CreateThread` carves a 4 KiB TIB region per
314 /// spawned thread. Phase 6 of the scheduler refactor.
315 pub tib_pool_bottom: u32,
316 /// Next available TIB base. Increments by
317 /// [`THREAD_TIB_SIZE`] per spawned thread.
318 pub next_tib_addr: u32,
319}
320
321/// Per-thread TIB size, in bytes. Real Windows uses a much
322/// larger TEB (~4 KiB minimum); we only need enough room for
323/// the handful of fields installer / codec CRTs actually
324/// touch (SEH chain head at 0x00, self pointer at 0x18,
325/// LastError at 0x34, …).
326pub const THREAD_TIB_SIZE: u32 = 0x0000_1000;
327
328/// Stride between consecutive child PE image bases. 256 MiB
329/// per child gives plenty of room for sections + heap + stack
330/// + TIB without colliding with adjacent processes.
331pub const CHILD_IMAGE_STRIDE: u32 = 0x1000_0000;
332
333/// Per-child-process heap arena size. Each spawned child
334/// carves this much from the host's child-heap pool.
335pub const CHILD_HEAP_SIZE: u32 = 0x0100_0000; // 16 MiB
336
337/// Default per-thread stack size, in bytes. 64 KiB matches the
338/// typical Win32 reserve size; many codec / installer threads
339/// use only a few hundred bytes.
340pub const THREAD_STACK_SIZE: u32 = 0x0001_0000;
341
342impl ProcessState {
343 /// Construct a fresh process with the heap arena at
344 /// `[heap_start, heap_end)` (caller is responsible for
345 /// mapping that region in the MMU as R+W).
346 #[must_use]
347 pub fn new(heap_start: u32, heap_end: u32) -> Self {
348 ProcessState {
349 pid: 1,
350 heap_cursor: heap_start,
351 heap_arena_end: heap_end,
352 process_heap_handle: 0xDEAD_BEEF,
353 next_hic: 1,
354 rand_state: 1,
355 ..ProcessState::default()
356 }
357 }
358}
359
360/// Per-thread emulator state. Phase 2 of the scheduler refactor:
361/// the live `Cpu` in [`crate::Sandbox`] continues to drive
362/// execution, but every thread-local Win32 surface (TLS slots,
363/// priority, parked CPU register file from past quanta) lives
364/// here. Phase 3 will swap the live `Cpu` value with
365/// `ThreadState::parked_cpu` on context switch.
366///
367/// The `parked_cpu` field is `None` for the currently-running
368/// thread (its register file lives on `Sandbox::cpu`) and
369/// `Some(cpu)` for any thread that has been suspended,
370/// preempted, or is waiting on a synchronization object.
371pub struct ThreadState {
372 /// Synthetic thread identifier. The first thread is `1`;
373 /// `CreateThread` mints monotonically increasing values.
374 pub tid: u32,
375 /// Owning process. For Phase 2 every thread maps to
376 /// process `1`.
377 pub pid: u32,
378 /// Windows thread priority axis. Default is
379 /// `THREAD_PRIORITY_NORMAL = 0`. Range `-15..15` for the
380 /// realtime / idle extremes.
381 pub priority: i32,
382 /// Map of TLS slot → value, set by `TlsSetValue` and read
383 /// by `TlsGetValue`. Slot indices come from
384 /// [`ProcessState::next_tls_slot`].
385 pub tls_slots: BTreeMap<u32, u32>,
386 /// Parked register file. Phase 3 will populate this when
387 /// the scheduler swaps out the live `Cpu`.
388 pub parked_cpu: Option<Cpu>,
389 /// Quantum remaining for the scheduler's current slice.
390 /// Phase 4 will tick this down; for now it's a placeholder
391 /// initialised to the default quantum.
392 pub quantum_remaining: u32,
393 /// Lifecycle state — driven by the scheduler.
394 pub status: crate::sched::ThreadStatus,
395 /// Active wait, if `status == Waiting`. Cleared on wake.
396 pub wait: Option<crate::sched::WaitCondition>,
397 /// Per-thread TIB (Thread Information Block) base — guest
398 /// VA the thread's CPU references via FS:[0]. `0` for the
399 /// bootstrap thread (which uses the runtime's shared
400 /// `TEB_BASE`); `CreateThread` carves a fresh page out of
401 /// the per-process TIB pool for each new thread.
402 pub tib_addr: u32,
403}
404
405impl Default for ThreadState {
406 fn default() -> Self {
407 ThreadState {
408 tid: 0,
409 pid: 0,
410 priority: 0,
411 tls_slots: BTreeMap::new(),
412 parked_cpu: None,
413 quantum_remaining: DEFAULT_QUANTUM,
414 status: crate::sched::ThreadStatus::Ready,
415 wait: None,
416 tib_addr: 0,
417 }
418 }
419}
420
421impl ThreadState {
422 /// Construct a fresh thread bound to the given process.
423 #[must_use]
424 pub fn new(tid: u32, pid: u32) -> Self {
425 ThreadState {
426 tid,
427 pid,
428 ..ThreadState::default()
429 }
430 }
431}
432
433/// Default scheduler quantum, in guest instructions. Phase 4
434/// will start consulting this; until then the value is purely
435/// informational so `ThreadState::quantum_remaining` has a
436/// sensible initial value.
437pub const DEFAULT_QUANTUM: u32 = 10_000;
438
439/// The host-side state every stub may read or mutate.
440///
441/// This is the "operating system" of the sandbox — the heap, the
442/// LastError TLS, the pseudo-tick counter, the loaded-module
443/// registry, etc. One per emulator instance.
444///
445/// `HostState` is the union of (a) **truly shared** state
446/// (virtual filesystem, virtual registry, trace buffers, clock,
447/// instruction budget) and (b) the **current process**'s
448/// [`ProcessState`], exposed through [`std::ops::Deref`] so that
449/// existing stub bodies that read `state.heap` / `state.modules`
450/// / etc. continue to compile unchanged. The split prepares
451/// Phase 5 of the scheduler refactor (`CreateProcessA` spawning
452/// a real child PE) without churning every Win32 stub today.
453pub struct HostState {
454 /// Process table keyed by PID. The bootstrap process has
455 /// `pid = 1`; `CreateProcessA` mints children with
456 /// monotonically increasing PIDs. The
457 /// [`std::ops::Deref`] / [`std::ops::DerefMut`] impls on
458 /// `HostState` resolve `state.heap` / `state.modules` /
459 /// etc. through the *active* process, so stub bodies
460 /// continue to compile unchanged.
461 pub processes: BTreeMap<u32, ProcessState>,
462 /// PID of the currently-running process — points into
463 /// [`Self::processes`]. The scheduler updates this when
464 /// switching threads across process boundaries.
465 pub active_pid: u32,
466 /// Cursor for the next `CreateProcessA` to mint a PID.
467 pub next_pid: u32,
468 /// Image base for the next child PE loaded via
469 /// `CreateProcessA`. `0` until a child-image arena is
470 /// configured (via [`Self::with_child_image_arena`]); the
471 /// runtime walks the cursor forward by [`CHILD_IMAGE_STRIDE`]
472 /// per spawn.
473 pub next_child_image_base: u32,
474 /// Heap-arena pool the next child process carves its
475 /// per-process heap from. `[next_child_heap_base,
476 /// child_heap_arena_end)`. Each spawn takes
477 /// [`CHILD_HEAP_SIZE`] bytes.
478 pub next_child_heap_base: u32,
479 pub child_heap_arena_end: u32,
480 /// Thread table keyed by TID. Phase 2 of the scheduler
481 /// refactor: there is always at least one thread, with
482 /// `tid = 1`, owning the live `Cpu` on `Sandbox`. Phase 3
483 /// will populate more entries when `CreateThread` mints a
484 /// real thread, and the scheduler will move the live `Cpu`
485 /// in/out of `parked_cpu` here on context switch.
486 pub threads: BTreeMap<u32, ThreadState>,
487 /// TID of the currently-executing thread — points into
488 /// [`Self::threads`]. Phase 3 will mutate this on context
489 /// switch.
490 pub active_tid: u32,
491 /// Cursor for the next `CreateThread` to mint a TID. Stays
492 /// monotonic across the lifetime of the sandbox.
493 pub next_tid: u32,
494 /// Last error code (`SetLastError` / `GetLastError`). Phase
495 /// 6 will mirror this through the per-thread TIB at
496 /// FS:[0x34] so guest code reading it directly sees the
497 /// per-thread value. For now (single thread) the field on
498 /// HostState is the source of truth.
499 pub last_error: u32,
500 /// Lazily-allocated guest address of the C-CRT `errno` cell.
501 /// `None` until the first call to `msvcrt::_errno`, then
502 /// stable for the lifetime of the sandbox so repeated calls
503 /// return the same pointer (the contract `int * _errno(void)`
504 /// requires).
505 pub errno_cell: Option<u32>,
506 /// Pseudo-tick counter incremented on every `GetTickCount`.
507 pub tick: u32,
508 /// Lines that the codec wrote to `OutputDebugString*`. Tests
509 /// can introspect to confirm a known string was emitted.
510 pub debug_log: Vec<String>,
511 /// Lines that the codec wrote to `MessageBoxA` (also mirrored
512 /// to `eprintln!`). Distinct from `debug_log` so a test can
513 /// distinguish OutputDebugStringA traffic from real popups.
514 pub message_box_log: Vec<String>,
515 /// Optional per-run instruction budget. Decremented at each
516 /// top-of-loop iteration in [`run_until_sentinel`] (both
517 /// instruction steps and stub dispatches count). When it
518 /// hits zero the run loop bails with
519 /// [`Win32Error::BudgetExhausted`] so adversarial guests
520 /// can't loop the host. `None` (the default) keeps the
521 /// historical unbounded behaviour.
522 pub instruction_budget: Option<u64>,
523 /// Counts how many instructions actually ran in the last
524 /// (or current) run-loop session. Useful for the analysis
525 /// front-ends to report "ran for N instructions, budget
526 /// was M". Reset to zero on each top-level run entry.
527 pub instructions_executed: u64,
528 /// Optional emulation-context layer (virtual filesystem,
529 /// virtual registry, future surfaces). When `None`, the
530 /// Win32 stubs that would consult it fall through to
531 /// their fail-soft default. See [`crate::context::Context`]
532 /// for the contract.
533 pub context: crate::context::Context,
534 /// When `true`, [`dispatch_stub`] appends one line per Win32
535 /// call to [`HostState::stub_trace`]. Off by default; round-8 tests flip
536 /// it on while triaging which stub returns a bad value.
537 pub trace_stubs: bool,
538 /// Per-call trace lines populated when [`HostState::trace_stubs`] is on.
539 /// Format: `dll!name(arg0, arg1, …) → 0xRET`. The args are
540 /// the first `arg_dwords` (or, for known cdecl shapes, the
541 /// override from [`cdecl_trace_arg_count`]) dwords off the
542 /// guest stack, captured BEFORE the stub mutates them.
543 pub stub_trace: Vec<String>,
544 /// Structured per-call log, populated when [`HostState::trace_stubs`]
545 /// is on. Parallel to [`HostState::stub_trace`]; analysis front-ends
546 /// (the `ud analyze` JSON output) consume this directly so
547 /// they don't have to re-parse the formatted string.
548 pub stub_calls: Vec<StubCall>,
549 /// Scheduler-owned wait-object table + global instruction
550 /// clock. Phase 3 of the scheduler refactor.
551 pub scheduler: crate::sched::Scheduler,
552 /// When `Some`, the most recently dispatched stub asked the
553 /// run loop to switch threads. The run loop drains the
554 /// field after every stub return: a `Wait(...)` moves the
555 /// current thread to `Waiting`; `Yield` re-queues it at the
556 /// end of the Ready queue; `Exit { code }` terminates it.
557 pub yield_requested: Option<crate::sched::YieldRequest>,
558}
559
560impl Default for HostState {
561 fn default() -> Self {
562 let mut threads = BTreeMap::new();
563 threads.insert(1, ThreadState::new(1, 1));
564 let mut processes = BTreeMap::new();
565 let mut p = ProcessState::default();
566 p.pid = 1;
567 processes.insert(1, p);
568 HostState {
569 processes,
570 active_pid: 1,
571 next_pid: 2,
572 next_child_image_base: 0,
573 next_child_heap_base: 0,
574 child_heap_arena_end: 0,
575 threads,
576 active_tid: 1,
577 next_tid: 2,
578 last_error: 0,
579 errno_cell: None,
580 tick: 0,
581 debug_log: Vec::new(),
582 message_box_log: Vec::new(),
583 instruction_budget: None,
584 instructions_executed: 0,
585 context: crate::context::Context::default(),
586 trace_stubs: false,
587 stub_trace: Vec::new(),
588 stub_calls: Vec::new(),
589 scheduler: crate::sched::Scheduler::new(),
590 yield_requested: None,
591 }
592 }
593}
594
595impl std::ops::Deref for HostState {
596 type Target = ProcessState;
597 fn deref(&self) -> &ProcessState {
598 self.processes
599 .get(&self.active_pid)
600 .expect("active_pid must always point to a live process")
601 }
602}
603
604impl std::ops::DerefMut for HostState {
605 fn deref_mut(&mut self) -> &mut ProcessState {
606 self.processes
607 .get_mut(&self.active_pid)
608 .expect("active_pid must always point to a live process")
609 }
610}
611
612impl HostState {
613 /// Construct a HostState with the heap arena at `[heap_start,
614 /// heap_end)` (caller is responsible for mapping that region
615 /// in the MMU as R+W).
616 ///
617 /// The const-arena (used for canned strings handed back from
618 /// `GetCommandLineA` / `GetEnvironmentStrings` / etc.) is
619 /// **not** allocated here — call [`Self::with_const_arena`]
620 /// to set it up if those stubs are exercised. Tests that
621 /// don't use them can leave it at zero.
622 pub fn new(heap_start: u32, heap_end: u32) -> Self {
623 let mut s = HostState::default();
624 s.processes
625 .insert(1, ProcessState::new(heap_start, heap_end));
626 s
627 }
628
629 /// Borrow the active process. Resolved through
630 /// [`Self::active_pid`].
631 #[must_use]
632 pub fn cur_process(&self) -> &ProcessState {
633 self.processes
634 .get(&self.active_pid)
635 .expect("active_pid must always point to a live process")
636 }
637
638 /// Mutable borrow of the active process. Pair to
639 /// [`Self::cur_process`]; same invariant.
640 pub fn cur_process_mut(&mut self) -> &mut ProcessState {
641 self.processes
642 .get_mut(&self.active_pid)
643 .expect("active_pid must always point to a live process")
644 }
645
646 /// Borrow a process by PID, if it exists.
647 #[must_use]
648 pub fn process(&self, pid: u32) -> Option<&ProcessState> {
649 self.processes.get(&pid)
650 }
651
652 /// Mutable borrow of a process by PID.
653 pub fn process_mut(&mut self, pid: u32) -> Option<&mut ProcessState> {
654 self.processes.get_mut(&pid)
655 }
656
657 /// Borrow the currently-running thread. Falls back to the
658 /// bootstrap thread (`tid = 1`) on the freshly-constructed
659 /// state.
660 #[must_use]
661 pub fn cur_thread(&self) -> &ThreadState {
662 self.threads
663 .get(&self.active_tid)
664 .expect("active_tid must always point to a live thread (Default initialises tid 1)")
665 }
666
667 /// Mutable borrow of the currently-running thread. Pair to
668 /// [`Self::cur_thread`]; same invariant.
669 pub fn cur_thread_mut(&mut self) -> &mut ThreadState {
670 self.threads
671 .get_mut(&self.active_tid)
672 .expect("active_tid must always point to a live thread (Default initialises tid 1)")
673 }
674
675 /// Configure the const-arena (region for canned read-only
676 /// strings handed back to the codec). `[start, end)` is a
677 /// guest-virtual range the caller has already mapped R+W
678 /// (the arena bytes are written via `write_initializer`,
679 /// so any page perms suffice as long as the page is mapped).
680 pub fn with_const_arena(mut self, start: u32, end: u32) -> Self {
681 let p = self.cur_process_mut();
682 p.const_arena_cursor = start;
683 p.const_arena_end = end;
684 self
685 }
686
687 /// Configure the thread-stack pool. `CreateThread` carves
688 /// per-thread stacks from the top of this region walking
689 /// downward. `[bottom, top)` must already be mapped R+W in
690 /// the MMU.
691 pub fn with_thread_stack_pool(mut self, bottom: u32, top: u32) -> Self {
692 let p = self.cur_process_mut();
693 p.thread_stack_pool_bottom = bottom;
694 p.next_thread_stack_top = top;
695 self
696 }
697
698 /// Configure the per-thread TIB pool. `CreateThread` carves
699 /// 4 KiB TIB regions out of `[bottom, top)` walking upward.
700 /// Both ends must already be mapped R+W in the MMU. The
701 /// bootstrap thread continues to use the runtime's shared
702 /// `TEB_BASE`; only spawned threads consume this pool.
703 pub fn with_tib_pool(mut self, bottom: u32, top: u32) -> Self {
704 let p = self.cur_process_mut();
705 p.tib_pool_bottom = bottom;
706 p.next_tib_addr = bottom;
707 let _ = top; // explicit upper bound is informational
708 self
709 }
710
711 /// Configure the child-process pools: image-base cursor
712 /// + heap arena. `CreateProcessA` carves a child PE into
713 /// `[image_base, image_base + CHILD_IMAGE_STRIDE)` and a
714 /// 16 MiB heap out of `[heap_start, heap_end)`.
715 pub fn with_child_arena(
716 mut self,
717 image_base_cursor: u32,
718 heap_start: u32,
719 heap_end: u32,
720 ) -> Self {
721 self.next_child_image_base = image_base_cursor;
722 self.next_child_heap_base = heap_start;
723 self.child_heap_arena_end = heap_end;
724 self
725 }
726
727 /// Bump-allocate `n` bytes in the const arena. Returns the
728 /// guest address of the new slab. The caller is responsible
729 /// for [`Mmu::write_initializer`]'ing the contents.
730 pub fn arena_const_alloc(&mut self, n: u32) -> Result<u32, Win32Error> {
731 let aligned =
732 n.checked_add(15)
733 .map(|v| v & !15u32)
734 .ok_or_else(|| Win32Error::InvalidArgument {
735 stub: "arena_const_alloc",
736 reason: format!("size overflow: requested {n} (≈ {n:#x})"),
737 })?;
738 let addr = self.const_arena_cursor;
739 let next = addr
740 .checked_add(aligned)
741 .ok_or(Win32Error::InvalidArgument {
742 stub: "arena_const_alloc",
743 reason: "const arena address-space overflow".into(),
744 })?;
745 if next > self.const_arena_end {
746 return Err(Win32Error::InvalidArgument {
747 stub: "arena_const_alloc",
748 reason: format!(
749 "const arena exhausted (need {n}, have {})",
750 self.const_arena_end - addr
751 ),
752 });
753 }
754 self.const_arena_cursor = next;
755 Ok(addr)
756 }
757
758 /// Allocate a fresh slab in the heap arena and return its
759 /// guest address. Used by the round-2 marshalling helpers to
760 /// stage `ICDECOMPRESS` / `BITMAPINFOHEADER` / raw-frame
761 /// buffers in guest memory before calling `DriverProc`.
762 pub fn arena_alloc(&mut self, n: u32) -> Result<u32, Win32Error> {
763 let aligned =
764 n.checked_add(15)
765 .map(|v| v & !15u32)
766 .ok_or_else(|| Win32Error::InvalidArgument {
767 stub: "arena_alloc",
768 reason: format!("size overflow: requested {n} (≈ {n:#x})"),
769 })?;
770 let addr = self.heap_cursor;
771 let next = addr
772 .checked_add(aligned)
773 .ok_or(Win32Error::InvalidArgument {
774 stub: "arena_alloc",
775 reason: "heap address-space overflow".into(),
776 })?;
777 if next > self.heap_arena_end {
778 return Err(Win32Error::InvalidArgument {
779 stub: "arena_alloc",
780 reason: format!(
781 "arena exhausted (need {n}, have {})",
782 self.heap_arena_end - addr
783 ),
784 });
785 }
786 self.heap_cursor = next;
787 self.heap.insert(addr, vec![0u8; n as usize]);
788 Ok(addr)
789 }
790}
791
792/// Stub registry. Created once per emulator instance.
793#[derive(Default)]
794pub struct Registry {
795 by_thunk: BTreeMap<u32, StubEntry>,
796 by_name: BTreeMap<(String, String), u32>,
797 next_slot: u32,
798 /// Per-(dll, name) **data imports**. Some CRT symbols are
799 /// imported by name but are read as data (e.g.
800 /// `msvcrt!_adjust_fdiv`, an `int` flag the FDIV-erratum
801 /// fix-up code consults). The PE loader treats their IAT
802 /// slots as `mov ecx, [iat]; mov edx, [ecx]` — the IAT
803 /// slot is the address OF a 4-byte int, not a function
804 /// pointer. We pre-allocate a small read/write region for
805 /// these and patch the IAT slot to its address. The
806 /// `(value)` is whatever the symbol is documented to hold;
807 /// 0 is the safe default.
808 data_imports: BTreeMap<(String, String), DataImport>,
809 /// Bump cursor in the data-import slot region (assigned
810 /// addresses live in `[DATA_IMPORT_BASE, DATA_IMPORT_BASE +
811 /// DATA_IMPORT_SIZE)`).
812 next_data_slot: u32,
813}
814
815/// One data-import slot, addressed via [`Registry::resolve`].
816#[derive(Clone, Copy, Debug)]
817pub struct DataImport {
818 /// Guest address of the 4-byte slot. The PE loader patches
819 /// the IAT entry with this value.
820 pub addr: u32,
821 /// Initial value to seed into `[addr]` at first slot
822 /// allocation. Subsequent registrations of the same name
823 /// keep the prior value.
824 pub initial: u32,
825}
826
827/// Region reserved for data-import slots — see [`DataImport`].
828/// 4 KiB is plenty: the entire CRT data-import set is fewer
829/// than 16 dwords across all codecs we expect to load.
830pub const DATA_IMPORT_BASE: u32 = 0x7010_0000;
831const DATA_IMPORT_SIZE: u32 = 0x0000_1000;
832const DATA_IMPORT_END: u32 = DATA_IMPORT_BASE + DATA_IMPORT_SIZE;
833
834impl Registry {
835 pub fn new() -> Self {
836 Registry {
837 by_thunk: BTreeMap::new(),
838 by_name: BTreeMap::new(),
839 next_slot: 0,
840 data_imports: BTreeMap::new(),
841 next_data_slot: DATA_IMPORT_BASE,
842 }
843 }
844
845 /// Register a data import — a 4-byte symbol the codec
846 /// reads via `mov reg, [iat]; mov reg, [reg]`. Returns
847 /// the guest address that the IAT slot should point at.
848 /// Subsequent calls with the same `(dll, name)` return the
849 /// previously assigned slot.
850 pub fn register_data(&mut self, dll: &str, name: &str, initial: u32) -> u32 {
851 let key = (dll.to_ascii_lowercase(), name.to_string());
852 if let Some(d) = self.data_imports.get(&key) {
853 return d.addr;
854 }
855 let addr = self.next_data_slot;
856 let next = addr.saturating_add(4);
857 if next > DATA_IMPORT_END {
858 // Caller asked to register more data imports than
859 // we reserved space for. Return 0 — the loader
860 // handles "unresolved" by falling back to a thunk
861 // that will trap loudly.
862 return 0;
863 }
864 self.next_data_slot = next;
865 self.data_imports.insert(key, DataImport { addr, initial });
866 // Also expose it through the by-name resolver so the
867 // PE loader's ordinary lookup picks it up. The
868 // returned address is in the data region (not a thunk
869 // — `is_thunk(addr)` will correctly return false).
870 self.by_name
871 .insert((dll.to_ascii_lowercase(), name.to_string()), addr);
872 addr
873 }
874
875 /// Iterate the registered data imports. The PE loader uses
876 /// this to seed each slot's `initial` value into MMU memory
877 /// after the data-import region has been mapped.
878 pub fn data_imports(&self) -> impl Iterator<Item = (&String, &String, &DataImport)> {
879 self.data_imports
880 .iter()
881 .map(|((dll, name), d)| (dll, name, d))
882 }
883
884 /// Register a stub. Returns the synthetic thunk address that
885 /// the IAT slot should be populated with.
886 pub fn register(&mut self, dll: &str, name: &str, func: StubFn, arg_dwords: u32) -> u32 {
887 let key = (dll.to_ascii_lowercase(), name.to_string());
888 if let Some(addr) = self.by_name.get(&key) {
889 return *addr;
890 }
891 let thunk_addr = THUNK_BASE.wrapping_add(self.next_slot.wrapping_mul(THUNK_STRIDE));
892 self.next_slot += 1;
893 self.by_name.insert(key.clone(), thunk_addr);
894 self.by_thunk.insert(
895 thunk_addr,
896 StubEntry {
897 dll: key.0,
898 name: key.1,
899 func,
900 arg_dwords,
901 thunk_addr,
902 },
903 );
904 thunk_addr
905 }
906
907 /// Resolve an import. The PE loader uses this when populating
908 /// IAT slots. `dll_name` is matched case-insensitively.
909 pub fn resolve(&self, dll: &str, name: &str) -> Option<u32> {
910 let key = (dll.to_ascii_lowercase(), name.to_string());
911 self.by_name.get(&key).copied()
912 }
913
914 /// Register a fail-soft fallback thunk for an import we
915 /// don't have a stub for. The thunk's stub function looks
916 /// itself up in the registry and raises
917 /// [`crate::emulator::Trap::UnresolvedImport`] carrying
918 /// the (dll, name) pair on first call.
919 ///
920 /// The PE loader's fail-soft mode installs one of these
921 /// for every unresolved IAT entry so loading succeeds and
922 /// execution proceeds until the first unknown API actually
923 /// gets called. That's a much better signal than failing
924 /// at load time: the trap names the specific function to
925 /// implement next, and reveals which import paths are
926 /// reachable from the entry point.
927 pub fn register_unknown_fallback(&mut self, dll: &str, name: &str) -> u32 {
928 let key = (dll.to_ascii_lowercase(), name.to_string());
929 if let Some(addr) = self.by_name.get(&key) {
930 return *addr;
931 }
932 let thunk_addr = THUNK_BASE.wrapping_add(self.next_slot.wrapping_mul(THUNK_STRIDE));
933 self.next_slot += 1;
934 self.by_name.insert(key.clone(), thunk_addr);
935 // arg_dwords=0 is wrong for most stdcall APIs but
936 // doesn't matter — the stub traps before returning so
937 // dispatch_stub never reaches the stack-cleanup path.
938 self.by_thunk.insert(
939 thunk_addr,
940 StubEntry {
941 dll: key.0,
942 name: key.1,
943 func: stub_unresolved_fallback,
944 arg_dwords: 0,
945 thunk_addr,
946 },
947 );
948 thunk_addr
949 }
950
951 /// True iff `addr` is a registered thunk address.
952 pub fn is_thunk(&self, addr: u32) -> bool {
953 self.by_thunk.contains_key(&addr)
954 }
955
956 /// Look up the stub entry by its thunk address. Used by the
957 /// runtime when it sees `eip == thunk_addr`.
958 pub fn entry(&self, addr: u32) -> Option<&StubEntry> {
959 self.by_thunk.get(&addr)
960 }
961
962 /// Convenience: register every kernel32 stub. Returns the
963 /// number of stubs registered.
964 pub fn register_kernel32(&mut self) -> usize {
965 let before = self.by_name.len();
966 kernel32::register(self);
967 self.by_name.len() - before
968 }
969
970 /// Register every gdi32 stub. Returns the number registered.
971 pub fn register_gdi32(&mut self) -> usize {
972 let before = self.by_name.len();
973 gdi32::register(self);
974 self.by_name.len() - before
975 }
976
977 /// Register every user32 stub. Returns the number registered.
978 pub fn register_user32(&mut self) -> usize {
979 let before = self.by_name.len();
980 user32::register(self);
981 self.by_name.len() - before
982 }
983
984 /// Register every winmm stub. Returns the number registered.
985 pub fn register_winmm(&mut self) -> usize {
986 let before = self.by_name.len();
987 winmm::register(self);
988 self.by_name.len() - before
989 }
990
991 /// Register every advapi32 stub. Returns the number registered.
992 pub fn register_advapi32(&mut self) -> usize {
993 let before = self.by_name.len();
994 advapi32::register(self);
995 self.by_name.len() - before
996 }
997
998 /// Register every ole32 stub. Returns the number registered.
999 pub fn register_ole32(&mut self) -> usize {
1000 let before = self.by_name.len();
1001 ole32::register(self);
1002 self.by_name.len() - before
1003 }
1004
1005 /// Register every msvcrt stub. Returns the number registered.
1006 pub fn register_msvcrt(&mut self) -> usize {
1007 let before = self.by_name.len();
1008 msvcrt::register(self);
1009 self.by_name.len() - before
1010 }
1011
1012 /// Register the msvcrt stub set under `msvcr71.dll`. Used by
1013 /// codecs from the wmfdist11 era (mp43decd, mp4sdecd,
1014 /// wmvdecod, …) that link MSVC 7.1's runtime by its
1015 /// per-version name. Returns the number registered.
1016 pub fn register_msvcr71(&mut self) -> usize {
1017 let before = self.by_name.len();
1018 msvcrt::register_alias(self, "msvcr71.dll");
1019 self.by_name.len() - before
1020 }
1021
1022 /// Register the msvcrt stub set under `pncrt.dll`. Used by
1023 /// RealNetworks codecs that ship their own CRT fork.
1024 /// Returns the number registered.
1025 pub fn register_pncrt(&mut self) -> usize {
1026 let before = self.by_name.len();
1027 msvcrt::register_alias(self, "pncrt.dll");
1028 self.by_name.len() - before
1029 }
1030
1031 /// Register the msvcrt stub set under `msvcr80.dll` (Visual
1032 /// Studio 2005 CRT). Used by `camstudio-1.4-camcodec.dll`.
1033 pub fn register_msvcr80(&mut self) -> usize {
1034 let before = self.by_name.len();
1035 msvcrt::register_alias(self, "msvcr80.dll");
1036 self.by_name.len() - before
1037 }
1038
1039 /// Register the msvcrt stub set under `msvcr90.dll` (Visual
1040 /// Studio 2008 CRT). Used by `camstudio-1.5-camcodec.dll`.
1041 pub fn register_msvcr90(&mut self) -> usize {
1042 let before = self.by_name.len();
1043 msvcrt::register_alias(self, "msvcr90.dll");
1044 self.by_name.len() - before
1045 }
1046
1047 /// Register every mfplat (Media Foundation platform) stub.
1048 /// Returns the number registered.
1049 pub fn register_mfplat(&mut self) -> usize {
1050 let before = self.by_name.len();
1051 mfplat::register(self);
1052 self.by_name.len() - before
1053 }
1054
1055 /// Register every msi.dll stub — Windows Installer surface
1056 /// touched by application installers (QuickTime, …).
1057 /// Returns the number registered.
1058 pub fn register_msi(&mut self) -> usize {
1059 let before = self.by_name.len();
1060 msi::register(self);
1061 self.by_name.len() - before
1062 }
1063
1064 /// Register the version.dll / comctl32.dll / shell32.dll /
1065 /// shlwapi.dll stub families — the config-dialog and
1066 /// settings-file surface VfW codecs pull in alongside their
1067 /// decode core. Returns the number registered.
1068 pub fn register_shell_support(&mut self) -> usize {
1069 let before = self.by_name.len();
1070 version::register(self);
1071 comctl32::register(self);
1072 shell32::register(self);
1073 shlwapi::register(self);
1074 self.by_name.len() - before
1075 }
1076
1077 /// Register every Round-1+4+8+20 stub family in one call:
1078 /// kernel32, gdi32, user32, winmm, advapi32, ole32, msvcrt,
1079 /// plus the round-27 host-COM thunk family used by
1080 /// [`crate::com::mint_host_filter_graph`]. Returns the total
1081 /// number registered.
1082 pub fn register_all(&mut self) -> usize {
1083 let host_before = self.by_name.len();
1084 crate::com::host_iface::register(self);
1085 crate::com::host_iface_r31::register(self);
1086 let host_count = self.by_name.len() - host_before;
1087 self.register_kernel32()
1088 + self.register_gdi32()
1089 + self.register_user32()
1090 + self.register_winmm()
1091 + self.register_advapi32()
1092 + self.register_ole32()
1093 + self.register_msvcrt()
1094 + self.register_msvcr71()
1095 + self.register_pncrt()
1096 + self.register_msvcr80()
1097 + self.register_msvcr90()
1098 + self.register_mfplat()
1099 + self.register_msi()
1100 + self.register_shell_support()
1101 + host_count
1102 }
1103}
1104
1105/// Read the `n`-th stdcall dword argument off the guest stack.
1106///
1107/// At entry, `esp` points to the saved return address (pushed by
1108/// the caller's CALL); the first argument is at `esp+4`, the
1109/// second at `esp+8`, etc.
1110pub fn arg_dword(cpu: &Cpu, mmu: &Mmu, n: u32) -> Result<u32, crate::emulator::Trap> {
1111 let addr = cpu.regs.esp().wrapping_add(4u32 * (n + 1));
1112 mmu.load32(addr)
1113}
1114
1115/// Cdecl arg-count override table for trace-event extraction.
1116///
1117/// Stdcall stubs already declare their argument count in
1118/// [`StubEntry::arg_dwords`] (the value the dispatch site uses
1119/// to pop the stack on return). Cdecl stubs declare `0` because
1120/// the *caller* cleans the stack — but the args are still on the
1121/// stack at call entry. For known-shape cdecl entries we return
1122/// the per-call dword count so the trace probe can read those
1123/// dwords back into `args[]` on `kind=win32_call` events.
1124///
1125/// Returns `None` if the `(dll, name)` pair has no override; in
1126/// that case the trace site falls back to the registered
1127/// `arg_dwords` (0 for any cdecl stub, leaving `args:[]` as
1128/// before — so this is purely additive).
1129///
1130/// Reference: `docs/video/msmpeg4/audit/06-sandbox-O3-quant-init.md`
1131/// §5.2.3 — Auditor needs allocation sizes surfaced at call
1132/// time so the codec-context allocation can be located by size
1133/// match rather than by return-address differencing.
1134pub fn cdecl_trace_arg_count(dll: &str, name: &str) -> Option<u32> {
1135 match (dll, name) {
1136 // Heap surface — single-arg shapes.
1137 // void* malloc(size_t) — 1
1138 // void free(void*) — 1
1139 // void* operator new(unsigned int) ??2@YAPAXI@Z — 1
1140 // void operator delete(void*) ??3@YAXPAX@Z — 1
1141 ("msvcrt.dll", "malloc")
1142 | ("msvcrt.dll", "free")
1143 | ("msvcrt.dll", "??2@YAPAXI@Z")
1144 | ("msvcrt.dll", "??3@YAXPAX@Z") => Some(1),
1145 // Two-arg shapes — not registered today but cheap to
1146 // pre-declare so a future `register("msvcrt.dll",
1147 // "calloc"/"realloc", ...)` automatically gets traced
1148 // args without revisiting this table.
1149 // void* calloc(size_t count, size_t size) — 2
1150 // void* realloc(void*, size_t) — 2
1151 ("msvcrt.dll", "calloc") | ("msvcrt.dll", "realloc") => Some(2),
1152 _ => None,
1153 }
1154}
1155
1156/// Convert an MMU/CPU [`crate::emulator::Trap`] into a [`Win32Error`]
1157/// so a stub's argument-fetch failure surfaces as
1158/// `Win32Error::InvalidArgument`. Used by the gdi32 / user32 /
1159/// winmm modules.
1160pub fn trap_to_win32_local(stub: &'static str, t: crate::emulator::Trap) -> Win32Error {
1161 Win32Error::InvalidArgument {
1162 stub,
1163 reason: format!("{t}"),
1164 }
1165}
1166
1167/// Read a NUL-terminated 8-bit string from guest memory at `addr`,
1168/// stopping at NUL or after `max` bytes. Used by user32/winmm
1169/// stubs that take an `LPCSTR`.
1170pub fn read_cstr_local(mmu: &Mmu, mut addr: u32, max: u32) -> Result<String, Win32Error> {
1171 let mut bytes = Vec::new();
1172 for _ in 0..max {
1173 let b = mmu
1174 .load8(addr)
1175 .map_err(|t| trap_to_win32_local("read_cstr", t))?;
1176 if b == 0 {
1177 break;
1178 }
1179 bytes.push(b);
1180 addr = addr.wrapping_add(1);
1181 }
1182 Ok(String::from_utf8_lossy(&bytes).into_owned())
1183}
1184
1185/// Read a NUL-terminated UTF-16 string from guest memory at
1186/// `addr`, stopping at NUL or after `max_chars` 16-bit code
1187/// units. Used by every `*W` Win32 stub that takes an
1188/// `LPCWSTR`.
1189pub fn read_wide_cstr_local(mmu: &Mmu, mut addr: u32, max_chars: u32) -> String {
1190 let mut units = Vec::new();
1191 for _ in 0..max_chars {
1192 match mmu.load16(addr) {
1193 Ok(0) => break,
1194 Ok(u) => units.push(u),
1195 Err(_) => break,
1196 }
1197 addr = addr.wrapping_add(2);
1198 }
1199 String::from_utf16_lossy(&units)
1200}
1201
1202/// Stub function used by [`Registry::register_unknown_fallback`].
1203/// Looks up its own (dll, name) by reverse-resolving the entry
1204/// EIP against the registry and raises a
1205/// [`Win32Error::UnknownImport`] that the runtime surfaces as
1206/// `Trap::UnresolvedImport`. Execution halts on first call —
1207/// the operator sees the precise import to implement next.
1208fn stub_unresolved_fallback(
1209 cpu: &mut Cpu,
1210 _mmu: &mut Mmu,
1211 _state: &mut HostState,
1212 registry: &Registry,
1213) -> Result<u32, Win32Error> {
1214 let addr = cpu.regs.eip;
1215 let (dll, name) = registry
1216 .entry(addr)
1217 .map(|e| (e.dll.clone(), e.name.clone()))
1218 .unwrap_or_else(|| ("<unknown>".to_string(), format!("@{addr:#010x}")));
1219 Err(Win32Error::UnknownImport { dll, name })
1220}
1221
1222/// Dispatch a stub call. The runtime wires this into the executor
1223/// so that whenever `eip` lands on a thunk address, control
1224/// transfers here instead of fetching instruction bytes.
1225///
1226/// On entry: the guest CALL has already pushed the return
1227/// address; `eip` is the thunk address. On exit: `eax` holds the
1228/// stub's return value, `eip` is the popped return address, and
1229/// `arg_dwords*4` bytes have been removed from the stack
1230/// (stdcall callee-cleanup).
1231pub fn dispatch_stub(
1232 cpu: &mut Cpu,
1233 mmu: &mut Mmu,
1234 registry: &Registry,
1235 state: &mut HostState,
1236) -> Result<(), crate::Error> {
1237 let addr = cpu.regs.eip;
1238 let entry = registry
1239 .entry(addr)
1240 .ok_or_else(|| Win32Error::UnknownImport {
1241 dll: "<thunk>".into(),
1242 name: format!("@{:#010x}", addr),
1243 })?
1244 .clone();
1245 // Snapshot the call-site EIP (= the saved return address
1246 // pushed by the guest CALL — the instruction right after
1247 // the CALL, not the thunk address) and the first few args
1248 // off the guest stack BEFORE running the stub, since the
1249 // stub mutates the stack.
1250 //
1251 // Argument count: `entry.arg_dwords` carries the stdcall
1252 // count (the value used to pop the stack on return). For
1253 // cdecl stubs this is 0 — but for known cdecl shapes
1254 // (msvcrt heap entries) [`cdecl_trace_arg_count`] supplies a
1255 // per-call override so the trace surfaces the size / pointer
1256 // args rather than `args:[]`.
1257 //
1258 // The snapshot is always-on when `state.trace_stubs` is set
1259 // (the structured `stub_calls` vector consumes it) and is
1260 // additionally emitted as a JSONL event under the `trace`
1261 // feature flag.
1262 let capture_args = state.trace_stubs;
1263 #[cfg(feature = "trace")]
1264 let capture_args = capture_args || mmu.trace.has_sink();
1265 let snapshot: Option<(u32, Vec<u32>)> = if capture_args {
1266 let call_site_eip = mmu.load32(cpu.regs.esp()).unwrap_or(0);
1267 let n_args = cdecl_trace_arg_count(&entry.dll, &entry.name).unwrap_or(entry.arg_dwords);
1268 let mut args = Vec::with_capacity(n_args as usize);
1269 for i in 0..n_args {
1270 let a = arg_dword(cpu, mmu, i).unwrap_or(0);
1271 args.push(a);
1272 }
1273 Some((call_site_eip, args))
1274 } else {
1275 None
1276 };
1277 // Run the host-side stub.
1278 let ret = (entry.func)(cpu, mmu, state, registry)?;
1279 if state.trace_stubs {
1280 let (call_site_eip, args) = snapshot.clone().unwrap_or((0, Vec::new()));
1281 let args_str = args
1282 .iter()
1283 .map(|a| format!("{a:#010x}"))
1284 .collect::<Vec<_>>()
1285 .join(", ");
1286 state.stub_trace.push(format!(
1287 "{}!{}({args_str}) → {:#010x}",
1288 entry.dll, entry.name, ret
1289 ));
1290 state.stub_calls.push(StubCall {
1291 dll: entry.dll.clone(),
1292 name: entry.name.clone(),
1293 args,
1294 ret,
1295 call_site_eip,
1296 });
1297 }
1298 // Emit the trace event with the captured args + the actual
1299 // return value. Done before stack unwind so the EIP we log
1300 // is the call site, not the post-return PC.
1301 #[cfg(feature = "trace")]
1302 if let Some((call_site_eip, args)) = snapshot {
1303 mmu.trace
1304 .ev_win32_call(&entry.dll, &entry.name, &args, ret, call_site_eip);
1305 }
1306 // stdcall: pop return address, advance esp by arg_dwords*4,
1307 // set eax to the return value.
1308 let ret_addr = cpu.pop32(mmu)?;
1309 cpu.regs.set32(crate::emulator::regs::Reg32::Eax, ret);
1310 let new_esp = cpu
1311 .regs
1312 .esp()
1313 .wrapping_add(entry.arg_dwords.wrapping_mul(4));
1314 cpu.regs.set_esp(new_esp);
1315 cpu.regs.eip = ret_addr;
1316 Ok(())
1317}
1318
1319/// Run the emulator until `eip == RET_SENTINEL`, dispatching to
1320/// any Win32 stub thunk addresses encountered along the way.
1321///
1322/// This is the shared run-loop body used both by [`crate::Sandbox`]
1323/// and by re-entrant host stubs (notably the `vfw32` surface,
1324/// which dispatches the codec's `DriverProc` synchronously
1325/// inside an outer `IC*` call).
1326/// Process a yield request from a freshly-returned stub. The
1327/// active thread transitions to the requested scheduler state,
1328/// then the run loop picks the next `Ready` thread via
1329/// [`schedule_next_thread`]. The live `Cpu` is parked into the
1330/// previous active thread's `parked_cpu` slot and the new
1331/// thread's parked Cpu replaces it.
1332fn handle_yield(cpu: &mut Cpu, state: &mut HostState, req: crate::sched::YieldRequest) {
1333 use crate::sched::{ThreadStatus, YieldRequest};
1334 match req {
1335 YieldRequest::Wait(cond) => {
1336 let t = state.cur_thread_mut();
1337 t.status = ThreadStatus::Waiting;
1338 t.wait = Some(cond);
1339 }
1340 YieldRequest::Yield => {
1341 let t = state.cur_thread_mut();
1342 t.status = ThreadStatus::Ready;
1343 }
1344 YieldRequest::Exit { code } => {
1345 let tid = state.active_tid;
1346 let t = state.cur_thread_mut();
1347 t.status = ThreadStatus::Terminated;
1348 t.wait = None;
1349 on_thread_terminated(state, tid);
1350 // Signal any pending `WaitForSingleObject` against
1351 // this thread's Thread WaitObject. (Phase 3c will
1352 // implement the wake side; here we just mark the
1353 // state machine so Phase 3c's wake-up sees a
1354 // terminated thread).
1355 let _ = code;
1356 }
1357 }
1358 schedule_next_thread(cpu, state);
1359}
1360
1361/// Park the live `Cpu` into the current thread and resume the
1362/// next `Ready` thread (if one exists). When no other Ready
1363/// thread is available, restores the current thread's CPU
1364/// unchanged — the run loop continues with the same thread
1365/// (which is fine for single-thread Sleep behaviour: the clock
1366/// fast-forward at the top of the loop wakes the same thread).
1367fn schedule_next_thread(cpu: &mut Cpu, state: &mut HostState) {
1368 use crate::sched::ThreadStatus;
1369 // Pick the next runnable thread other than the current
1370 // one — round-robin by TID order. Phase 4 will add
1371 // priority-aware picking.
1372 let cur_tid = state.active_tid;
1373 let next_tid = {
1374 let mut candidates: Vec<(i32, u32)> = state
1375 .threads
1376 .iter()
1377 .filter(|(tid, t)| **tid != cur_tid && matches!(t.status, ThreadStatus::Ready))
1378 .map(|(tid, t)| (t.priority, *tid))
1379 .collect();
1380 // Sort by descending priority then ascending TID for
1381 // deterministic round-robin within the same priority.
1382 candidates.sort_by(|a, b| b.0.cmp(&a.0).then(a.1.cmp(&b.1)));
1383 candidates.into_iter().next().map(|(_, tid)| tid)
1384 };
1385 let cur_is_runnable = matches!(
1386 state
1387 .threads
1388 .get(&cur_tid)
1389 .map(|t| t.status)
1390 .unwrap_or(ThreadStatus::Terminated),
1391 ThreadStatus::Ready | ThreadStatus::Running
1392 );
1393 let Some(next_tid) = next_tid else {
1394 // No other Ready thread. If the current is still
1395 // runnable, just keep going. Otherwise the run loop's
1396 // sleep-clock fast-forward will wake it; if that
1397 // doesn't apply we'd deadlock — but Phase 3b only
1398 // exposes Sleep, so this path is fine.
1399 if cur_is_runnable {
1400 state.cur_thread_mut().status = ThreadStatus::Running;
1401 }
1402 return;
1403 };
1404 // Park the live CPU into the current thread.
1405 let parked = std::mem::take(cpu);
1406 if let Some(t) = state.threads.get_mut(&cur_tid) {
1407 t.parked_cpu = Some(parked);
1408 }
1409 // Restore the next thread's parked CPU into the live one.
1410 let mut new_pid = None;
1411 if let Some(t) = state.threads.get_mut(&next_tid) {
1412 if let Some(c) = t.parked_cpu.take() {
1413 *cpu = c;
1414 }
1415 t.status = ThreadStatus::Running;
1416 new_pid = Some(t.pid);
1417 }
1418 state.active_tid = next_tid;
1419 // When the new thread lives in a different process, update
1420 // `active_pid` so the Deref-resolved per-process state
1421 // (heap arena, modules, hwnd registry, …) points at the
1422 // new process. Phase 5c.
1423 if let Some(pid) = new_pid {
1424 if state.processes.contains_key(&pid) {
1425 state.active_pid = pid;
1426 }
1427 }
1428}
1429
1430/// After a thread terminates, check whether its owning
1431/// process has any live threads left. If not, record the
1432/// process's exit code (defaulting to 0 if not already set)
1433/// and wake every thread blocked on a `WaitObject::Process`
1434/// targeting that PID. Phase 5c — chains the natural Win32
1435/// "last thread out marks the process exited" contract.
1436fn on_thread_terminated(state: &mut HostState, tid: u32) {
1437 let pid = match state.threads.get(&tid) {
1438 Some(t) => t.pid,
1439 None => return,
1440 };
1441 let alive = state
1442 .threads
1443 .values()
1444 .any(|t| t.pid == pid && !matches!(t.status, crate::sched::ThreadStatus::Terminated));
1445 if alive {
1446 return;
1447 }
1448 if let Some(p) = state.processes.get_mut(&pid) {
1449 if p.exit_code.is_none() {
1450 p.exit_code = Some(0);
1451 }
1452 }
1453 // Wake every Process-handle waiter on this PID.
1454 let handles: Vec<u32> = state
1455 .scheduler
1456 .objects
1457 .iter()
1458 .filter_map(|(h, obj)| match obj {
1459 crate::sched::WaitObject::Process { pid: p } if *p == pid => Some(*h),
1460 _ => None,
1461 })
1462 .collect();
1463 for h in handles {
1464 for waiter_tid in crate::sched::waiters_on(&state.threads, h) {
1465 if let Some(t) = state.threads.get_mut(&waiter_tid) {
1466 t.status = crate::sched::ThreadStatus::Ready;
1467 t.wait = None;
1468 }
1469 }
1470 }
1471 // Same for any pending Thread-handle waits on this TID.
1472 let thread_handles: Vec<u32> = state
1473 .scheduler
1474 .objects
1475 .iter()
1476 .filter_map(|(h, obj)| match obj {
1477 crate::sched::WaitObject::Thread { tid: t } if *t == tid => Some(*h),
1478 _ => None,
1479 })
1480 .collect();
1481 for h in thread_handles {
1482 for waiter_tid in crate::sched::waiters_on(&state.threads, h) {
1483 if let Some(t) = state.threads.get_mut(&waiter_tid) {
1484 t.status = crate::sched::ThreadStatus::Ready;
1485 t.wait = None;
1486 }
1487 }
1488 }
1489}
1490
1491/// Earliest `resume_after_instructions` across every
1492/// Sleep-waiting thread, or `None` if no thread is sleeping.
1493fn earliest_sleep_resume(state: &HostState) -> Option<u64> {
1494 state
1495 .threads
1496 .values()
1497 .filter_map(|t| {
1498 if matches!(t.status, crate::sched::ThreadStatus::Waiting) {
1499 if let Some(crate::sched::WaitCondition::Sleep {
1500 resume_after_instructions,
1501 }) = t.wait
1502 {
1503 return Some(resume_after_instructions);
1504 }
1505 }
1506 None
1507 })
1508 .min()
1509}
1510
1511/// Move every `Waiting`-on-Sleep thread whose resume target is
1512/// in the past back to `Ready`. Called from
1513/// [`run_until_sentinel`] after the global clock advances.
1514fn wake_sleep_if_due(state: &mut HostState) {
1515 let now = state.scheduler.instructions_global;
1516 for t in state.threads.values_mut() {
1517 if matches!(t.status, crate::sched::ThreadStatus::Waiting) {
1518 if let Some(crate::sched::WaitCondition::Sleep {
1519 resume_after_instructions,
1520 }) = t.wait
1521 {
1522 if now >= resume_after_instructions {
1523 t.status = crate::sched::ThreadStatus::Ready;
1524 t.wait = None;
1525 }
1526 }
1527 }
1528 }
1529}
1530
1531pub fn run_until_sentinel(
1532 cpu: &mut Cpu,
1533 mmu: &mut Mmu,
1534 registry: &Registry,
1535 state: &mut HostState,
1536) -> Result<(), crate::Error> {
1537 use crate::emulator::isa_int::{StepOk, RET_SENTINEL};
1538 // Reset the per-run instruction counter so analysis
1539 // front-ends can ask "how many did this top-level call
1540 // burn?" without subtracting from a stale snapshot.
1541 state.instructions_executed = 0;
1542 loop {
1543 // Honour any yield request the most recently dispatched
1544 // stub left behind. Phase 3 of the scheduler refactor:
1545 // a `Wait`/`Yield`/`Exit` request handed up from a stub
1546 // suspends the active thread and resumes the next
1547 // `Ready` one. Until Phase 3d ships, only `Sleep` and
1548 // `Yield` (single-thread) are observable here — both
1549 // resolve as "spin until wake-up" without an actual
1550 // context switch.
1551 if let Some(req) = state.yield_requested.take() {
1552 handle_yield(cpu, state, req);
1553 }
1554 // Scheduler nudge: when the active thread isn't
1555 // runnable (Terminated / Waiting because no other
1556 // Ready thread could be picked at yield time), look
1557 // for any thread sleeping on a Sleep wait. The
1558 // earliest wake target fast-forwards the global
1559 // clock; `wake_sleep_if_due` then moves matching
1560 // threads back to Ready, and `schedule_next_thread`
1561 // switches into one of them.
1562 let active_runnable = matches!(
1563 state.cur_thread().status,
1564 crate::sched::ThreadStatus::Ready | crate::sched::ThreadStatus::Running
1565 );
1566 if !active_runnable {
1567 if let Some(earliest) = earliest_sleep_resume(state) {
1568 state.scheduler.instructions_global =
1569 state.scheduler.instructions_global.max(earliest);
1570 wake_sleep_if_due(state);
1571 schedule_next_thread(cpu, state);
1572 }
1573 // Active thread still not runnable AND no Ready
1574 // peer was found — the run is done. Return so the
1575 // outer host caller observes a clean exit rather
1576 // than a busy spin.
1577 if !matches!(
1578 state.cur_thread().status,
1579 crate::sched::ThreadStatus::Ready | crate::sched::ThreadStatus::Running
1580 ) {
1581 cpu.regs.eip = RET_SENTINEL;
1582 return Ok(());
1583 }
1584 state.cur_thread_mut().status = crate::sched::ThreadStatus::Running;
1585 }
1586 if state.exit_requested.is_some() {
1587 // `kernel32!ExitProcess` was called. Force eip to
1588 // the sentinel so the outer caller's stack-frame
1589 // cleanup is consistent and exit cleanly.
1590 cpu.regs.eip = RET_SENTINEL;
1591 return Ok(());
1592 }
1593 if cpu.regs.eip == RET_SENTINEL {
1594 // The active thread has run off the end of its
1595 // top-level callable. If it's the bootstrap thread
1596 // (TID 1), the entire run is done. Otherwise, mark
1597 // the thread Terminated and switch to the next
1598 // Ready one.
1599 if state.active_tid == 1 {
1600 return Ok(());
1601 }
1602 let dead_tid = state.active_tid;
1603 state.cur_thread_mut().status = crate::sched::ThreadStatus::Terminated;
1604 on_thread_terminated(state, dead_tid);
1605 schedule_next_thread(cpu, state);
1606 // After the switch the live CPU points at the next
1607 // thread; if no other was Ready, we're back on the
1608 // bootstrap thread and `schedule_next_thread`
1609 // left the live CPU untouched — so we'll re-enter
1610 // this branch and return.
1611 if state.active_tid == 1
1612 && matches!(
1613 state.cur_thread().status,
1614 crate::sched::ThreadStatus::Ready | crate::sched::ThreadStatus::Running
1615 )
1616 && cpu.regs.eip == RET_SENTINEL
1617 {
1618 return Ok(());
1619 }
1620 continue;
1621 }
1622 // Optional instruction budget — both instruction steps
1623 // and stub dispatches are counted as one "step" each,
1624 // since either is a unit of progress the host attributed
1625 // to the guest. When the budget hits zero, bail with a
1626 // clean `BudgetExhausted` so adversarial samples can't
1627 // loop the analyser host.
1628 if let Some(remaining) = state.instruction_budget.as_mut() {
1629 if *remaining == 0 {
1630 return Err(crate::Error::Win32(Win32Error::BudgetExhausted {
1631 executed: state.instructions_executed,
1632 }));
1633 }
1634 *remaining -= 1;
1635 }
1636 state.instructions_executed = state.instructions_executed.saturating_add(1);
1637 state.scheduler.instructions_global = state.scheduler.instructions_global.saturating_add(1);
1638 // Quantum-based preemption (Phase 4). Each executed
1639 // instruction or stub dispatch counts against the
1640 // current thread's quantum. When it hits zero, ask the
1641 // scheduler to switch — but only when there is another
1642 // Ready thread to switch to, otherwise the current
1643 // thread just keeps the floor with a fresh quantum.
1644 {
1645 let quantum_default = state.scheduler.quantum_default;
1646 let cur_tid = state.active_tid;
1647 let t = state.cur_thread_mut();
1648 if t.quantum_remaining > 0 {
1649 t.quantum_remaining -= 1;
1650 }
1651 let exhausted = t.quantum_remaining == 0;
1652 if exhausted {
1653 t.quantum_remaining = quantum_default;
1654 }
1655 if exhausted {
1656 let has_peer = state.threads.iter().any(|(tid, ts)| {
1657 *tid != cur_tid && matches!(ts.status, crate::sched::ThreadStatus::Ready)
1658 });
1659 if has_peer {
1660 state.yield_requested = Some(crate::sched::YieldRequest::Yield);
1661 }
1662 }
1663 }
1664 if registry.is_thunk(cpu.regs.eip) {
1665 match dispatch_stub(cpu, mmu, registry, state) {
1666 Ok(()) => continue,
1667 Err(e) => {
1668 #[cfg(feature = "trace")]
1669 emit_trap_event(cpu, mmu, &e);
1670 return Err(e);
1671 }
1672 }
1673 }
1674 match cpu.step(mmu) {
1675 Ok(StepOk::Continued) => continue,
1676 Ok(StepOk::Halted) => {
1677 // The active thread executed a `ret` whose
1678 // popped address was `RET_SENTINEL`. For the
1679 // bootstrap thread that's the run's exit; for
1680 // any other thread it means the thread proc
1681 // returned, so we mark it Terminated and let
1682 // the scheduler pick the next runnable peer.
1683 if state.active_tid == 1 {
1684 return Ok(());
1685 }
1686 cpu.regs.eip = RET_SENTINEL;
1687 let dead_tid = state.active_tid;
1688 state.cur_thread_mut().status = crate::sched::ThreadStatus::Terminated;
1689 on_thread_terminated(state, dead_tid);
1690 schedule_next_thread(cpu, state);
1691 continue;
1692 }
1693 Err(t) => {
1694 let e: crate::Error = t.into();
1695 #[cfg(feature = "trace")]
1696 emit_trap_event(cpu, mmu, &e);
1697 return Err(e);
1698 }
1699 }
1700 }
1701}
1702
1703/// Trace-feature-gated: format the trap variant + register
1704/// snapshot and push one `kind=trap` JSONL event.
1705#[cfg(feature = "trace")]
1706fn emit_trap_event(cpu: &Cpu, mmu: &Mmu, err: &crate::Error) {
1707 use crate::emulator::regs::Reg32;
1708 let (label, eip, opcode) = match err {
1709 crate::Error::Trap(t) => match t {
1710 crate::emulator::Trap::MemoryFault { addr } => ("MemoryFault", *addr, None::<u32>),
1711 crate::emulator::Trap::ReadProtectFault { addr } => ("ReadProtectFault", *addr, None),
1712 crate::emulator::Trap::WriteProtectFault { addr } => ("WriteProtectFault", *addr, None),
1713 crate::emulator::Trap::ExecuteProtectFault { addr } => {
1714 ("ExecuteProtectFault", *addr, None)
1715 }
1716 crate::emulator::Trap::UndefinedOpcode { eip, opcode } => {
1717 ("UndefinedOpcode", *eip, Some(*opcode))
1718 }
1719 crate::emulator::Trap::PrivilegedOpcode { eip, .. } => ("PrivilegedOpcode", *eip, None),
1720 crate::emulator::Trap::DivideByZero { eip } => ("DivideByZero", *eip, None),
1721 crate::emulator::Trap::UnresolvedImport { .. } => {
1722 ("UnresolvedImport", cpu.regs.eip, None)
1723 }
1724 crate::emulator::Trap::InstructionLimitExceeded { eip, .. } => {
1725 ("InstructionLimitExceeded", *eip, None)
1726 }
1727 crate::emulator::Trap::UnimplementedMmx { eip, opcode, .. } => {
1728 ("UnimplementedMmx", *eip, Some(*opcode))
1729 }
1730 },
1731 crate::Error::PeLoader(_) => ("PeLoader", cpu.regs.eip, None),
1732 crate::Error::Win32(_) => ("Win32", cpu.regs.eip, None),
1733 crate::Error::NotImplemented => ("NotImplemented", cpu.regs.eip, None),
1734 };
1735 let regs = [
1736 ("eax", cpu.regs.get32(Reg32::Eax)),
1737 ("ecx", cpu.regs.get32(Reg32::Ecx)),
1738 ("edx", cpu.regs.get32(Reg32::Edx)),
1739 ("ebx", cpu.regs.get32(Reg32::Ebx)),
1740 ("esp", cpu.regs.esp()),
1741 ("ebp", cpu.regs.get32(Reg32::Ebp)),
1742 ("esi", cpu.regs.get32(Reg32::Esi)),
1743 ("edi", cpu.regs.get32(Reg32::Edi)),
1744 ];
1745 mmu.trace.ev_trap(label, eip, opcode, ®s);
1746}
1747
1748/// Push args right-to-left, push the synthetic `RET_SENTINEL`,
1749/// jump to `target_va`, run the emulator until it returns,
1750/// and report the final `eax` value.
1751///
1752/// This is the building block both `Sandbox::call_dll_main`
1753/// and the round-2 `vfw32` stub surface use to invoke an
1754/// exported guest function with stdcall calling convention.
1755/// On entry, `cpu.regs.eip` may be anything; on exit it is
1756/// the popped return address (= `RET_SENTINEL`). Caller-saved
1757/// registers are not preserved beyond what the guest callee
1758/// preserves itself.
1759pub fn call_guest(
1760 cpu: &mut Cpu,
1761 mmu: &mut Mmu,
1762 registry: &Registry,
1763 state: &mut HostState,
1764 target_va: u32,
1765 args: &[u32],
1766) -> Result<u32, crate::Error> {
1767 use crate::emulator::isa_int::RET_SENTINEL;
1768 use crate::emulator::regs::Reg32;
1769 // Push args right-to-left.
1770 for a in args.iter().rev() {
1771 cpu.push32(mmu, *a)?;
1772 }
1773 cpu.push32(mmu, RET_SENTINEL)?;
1774 cpu.regs.eip = target_va;
1775 run_until_sentinel(cpu, mmu, registry, state)?;
1776 Ok(cpu.regs.get32(Reg32::Eax))
1777}
1778
1779#[cfg(test)]
1780mod tests {
1781 use super::*;
1782 use crate::emulator::{mmu::Perm, Mmu};
1783
1784 fn dummy_stub(
1785 _cpu: &mut Cpu,
1786 _mmu: &mut Mmu,
1787 _h: &mut HostState,
1788 _r: &Registry,
1789 ) -> Result<u32, Win32Error> {
1790 Ok(0xCAFE)
1791 }
1792
1793 #[test]
1794 fn registry_assigns_stable_thunk_addresses() {
1795 let mut r = Registry::new();
1796 let a = r.register("kernel32.dll", "Foo", dummy_stub, 1);
1797 let b = r.register("kernel32.dll", "Bar", dummy_stub, 0);
1798 let a2 = r.register("kernel32.dll", "Foo", dummy_stub, 1);
1799 assert_eq!(a, a2);
1800 assert_ne!(a, b);
1801 assert!(r.is_thunk(a));
1802 }
1803
1804 #[test]
1805 fn registry_resolve_is_case_insensitive_on_dll_name() {
1806 let mut r = Registry::new();
1807 let addr = r.register("KERNEL32.DLL", "GetProcessHeap", dummy_stub, 0);
1808 assert_eq!(r.resolve("kernel32.dll", "GetProcessHeap"), Some(addr));
1809 assert_eq!(r.resolve("Kernel32.Dll", "GetProcessHeap"), Some(addr));
1810 }
1811
1812 #[test]
1813 fn cdecl_trace_arg_count_covers_msvcrt_heap_surface() {
1814 // Single-arg msvcrt cdecl entries.
1815 assert_eq!(cdecl_trace_arg_count("msvcrt.dll", "malloc"), Some(1));
1816 assert_eq!(cdecl_trace_arg_count("msvcrt.dll", "free"), Some(1));
1817 assert_eq!(
1818 cdecl_trace_arg_count("msvcrt.dll", "??2@YAPAXI@Z"),
1819 Some(1),
1820 "operator new",
1821 );
1822 assert_eq!(
1823 cdecl_trace_arg_count("msvcrt.dll", "??3@YAXPAX@Z"),
1824 Some(1),
1825 "operator delete",
1826 );
1827 // Two-arg msvcrt cdecl entries (pre-declared for future
1828 // calloc / realloc registrations).
1829 assert_eq!(cdecl_trace_arg_count("msvcrt.dll", "calloc"), Some(2));
1830 assert_eq!(cdecl_trace_arg_count("msvcrt.dll", "realloc"), Some(2));
1831 }
1832
1833 #[test]
1834 fn cdecl_trace_arg_count_returns_none_for_unknown_calls() {
1835 assert_eq!(
1836 cdecl_trace_arg_count("kernel32.dll", "GetProcessHeap"),
1837 None
1838 );
1839 assert_eq!(cdecl_trace_arg_count("msvcrt.dll", "memcpy"), None);
1840 assert_eq!(
1841 cdecl_trace_arg_count("MSVCRT.DLL", "malloc"),
1842 None,
1843 "match is exact-case on dll string per registry contract"
1844 );
1845 }
1846
1847 #[cfg(feature = "trace")]
1848 #[test]
1849 fn dispatch_emits_size_arg_for_msvcrt_malloc() {
1850 use std::sync::{Arc, Mutex};
1851
1852 // Capture sink shared between TraceState (owns Box<dyn Write>)
1853 // and the test (reads back the JSONL line).
1854 struct CapSink(Arc<Mutex<Vec<u8>>>);
1855 impl std::io::Write for CapSink {
1856 fn write(&mut self, b: &[u8]) -> std::io::Result<usize> {
1857 self.0.lock().unwrap().extend_from_slice(b);
1858 Ok(b.len())
1859 }
1860 fn flush(&mut self) -> std::io::Result<()> {
1861 Ok(())
1862 }
1863 }
1864 let buf = Arc::new(Mutex::new(Vec::new()));
1865
1866 // Bring up an MMU + CPU + registry exactly as a real
1867 // dispatch would see them.
1868 let mut mmu = Mmu::new();
1869 mmu.map(0x4000, 0x4000, Perm::R | Perm::W);
1870 mmu.trace.set_sink(Box::new(CapSink(Arc::clone(&buf))));
1871
1872 let mut cpu = Cpu::new();
1873 cpu.regs.set_esp(0x7000);
1874
1875 let mut registry = Registry::new();
1876 // Register a dummy malloc-shaped stub at the msvcrt slot.
1877 // The stub returns a known pointer (the value the trace
1878 // event records as `ret`); the SIZE arg comes from the
1879 // stack at [esp+4] and must surface as `args:[2928]`.
1880 fn dummy_malloc_stub(
1881 _cpu: &mut Cpu,
1882 _mmu: &mut Mmu,
1883 _h: &mut HostState,
1884 _r: &Registry,
1885 ) -> Result<u32, Win32Error> {
1886 Ok(0x6000_0000)
1887 }
1888 let addr = registry.register("msvcrt.dll", "malloc", dummy_malloc_stub, 0);
1889
1890 // Cdecl call frame: ret addr at [esp], size at [esp+4].
1891 // 2928 == 0xb70 — matches the auditor reference value.
1892 cpu.push32(&mut mmu, 2928).unwrap(); // arg0 (size)
1893 cpu.push32(&mut mmu, 0x1c218058).unwrap(); // saved ret addr (call-site EIP)
1894
1895 cpu.regs.eip = addr;
1896 let mut state = HostState::new(0, 0);
1897 dispatch_stub(&mut cpu, &mut mmu, ®istry, &mut state).unwrap();
1898
1899 // The captured JSONL line should carry args:[2928] (decimal,
1900 // matching the existing ev_win32_call format), the dummy
1901 // pointer in `ret`, and the call-site EIP (NOT the thunk).
1902 let s = String::from_utf8(buf.lock().unwrap().clone()).unwrap();
1903 assert!(s.contains(r#""kind":"win32_call""#), "line: {s}");
1904 assert!(s.contains(r#""dll":"msvcrt.dll""#), "line: {s}");
1905 assert!(s.contains(r#""name":"malloc""#), "line: {s}");
1906 assert!(
1907 s.contains(r#""args":[2928]"#),
1908 "expected args:[2928] (== 0xb70), got: {s}",
1909 );
1910 assert!(s.contains(r#""ret":"0x60000000""#), "line: {s}");
1911 assert!(s.contains(r#""eip":"0x1c218058""#), "line: {s}");
1912 }
1913
1914 #[cfg(feature = "trace")]
1915 #[test]
1916 fn dispatch_emits_pointer_arg_for_msvcrt_operator_delete() {
1917 use std::sync::{Arc, Mutex};
1918 struct CapSink(Arc<Mutex<Vec<u8>>>);
1919 impl std::io::Write for CapSink {
1920 fn write(&mut self, b: &[u8]) -> std::io::Result<usize> {
1921 self.0.lock().unwrap().extend_from_slice(b);
1922 Ok(b.len())
1923 }
1924 fn flush(&mut self) -> std::io::Result<()> {
1925 Ok(())
1926 }
1927 }
1928 let buf = Arc::new(Mutex::new(Vec::new()));
1929 let mut mmu = Mmu::new();
1930 mmu.map(0x4000, 0x4000, Perm::R | Perm::W);
1931 mmu.trace.set_sink(Box::new(CapSink(Arc::clone(&buf))));
1932 let mut cpu = Cpu::new();
1933 cpu.regs.set_esp(0x7000);
1934 let mut registry = Registry::new();
1935 fn dummy_delete_stub(
1936 _cpu: &mut Cpu,
1937 _mmu: &mut Mmu,
1938 _h: &mut HostState,
1939 _r: &Registry,
1940 ) -> Result<u32, Win32Error> {
1941 Ok(0)
1942 }
1943 let addr = registry.register("msvcrt.dll", "??3@YAXPAX@Z", dummy_delete_stub, 0);
1944 cpu.push32(&mut mmu, 0x6000_02c0).unwrap(); // ptr arg
1945 cpu.push32(&mut mmu, 0x1c237e58).unwrap(); // saved ret
1946 cpu.regs.eip = addr;
1947 let mut state = HostState::new(0, 0);
1948 dispatch_stub(&mut cpu, &mut mmu, ®istry, &mut state).unwrap();
1949 let s = String::from_utf8(buf.lock().unwrap().clone()).unwrap();
1950 assert!(
1951 s.contains(r#""args":[1610613440]"#),
1952 "expected args:[1610613440] (== 0x600002c0), got: {s}",
1953 );
1954 assert!(s.contains(r#""name":"??3@YAXPAX@Z""#), "line: {s}");
1955 }
1956
1957 #[test]
1958 fn dispatch_pops_return_addr_and_args() {
1959 let mut mmu = Mmu::new();
1960 mmu.map(0x4000, 0x4000, Perm::R | Perm::W);
1961 let mut cpu = Cpu::new();
1962 cpu.regs.set_esp(0x7000);
1963
1964 let mut registry = Registry::new();
1965 let addr = registry.register("kernel32.dll", "Sample", dummy_stub, 2);
1966
1967 // Lay out a fake call frame: ret addr, arg1, arg2.
1968 cpu.push32(&mut mmu, 0x4444).unwrap(); // arg2
1969 cpu.push32(&mut mmu, 0x3333).unwrap(); // arg1
1970 cpu.push32(&mut mmu, 0x2222).unwrap(); // saved ret addr
1971 let esp_before = cpu.regs.esp();
1972
1973 cpu.regs.eip = addr;
1974 let mut state = HostState::new(0, 0);
1975 dispatch_stub(&mut cpu, &mut mmu, ®istry, &mut state).unwrap();
1976
1977 // After: eax=0xCAFE, eip = ret addr, esp pops 12 bytes
1978 // total (1 ret + 2 args).
1979 assert_eq!(cpu.regs.get32(crate::emulator::regs::Reg32::Eax), 0xCAFE);
1980 assert_eq!(cpu.regs.eip, 0x2222);
1981 assert_eq!(cpu.regs.esp(), esp_before + 12);
1982 }
1983}