Skip to main content

ud_emulator/
trace.rs

1//! Reverse-engineering trace surface (gated on the `trace` Cargo
2//! feature).
3//!
4//! See `docs/winmf/winmf-emulator.md` §"Trace mode" for the design
5//! contract. With the feature on, an opt-in JSONL event tape is
6//! emitted at four probe sites:
7//!
8//! 1. **Win32 calls** (`kind=win32_call`) — every dispatch through
9//!    [`crate::win32::dispatch_stub`].
10//! 2. **Memory watchpoints** (`kind=mem_write` / `kind=mem_read`)
11//!    — every guest access to a range registered via
12//!    [`crate::Sandbox::watch`].
13//! 3. **Instruction trace** (`kind=exec`) — per-instruction event
14//!    when the `trace-exec` sub-feature is enabled and the
15//!    `Sandbox::set_exec_trace(true)` runtime flag is set.
16//! 4. **Traps** (`kind=trap`) — unconditionally emitted when a
17//!    fault propagates out of the run loop, so something is on
18//!    the trace tape even when the codec misbehaves.
19//!
20//! The schema matches the JSONL shape the rest of the workspace
21//! uses (oxideav-magicyuv / oxideav-tta `--features trace`),
22//! `jq`-line-greppable and `awk`-friendly. Example events from
23//! the design doc are authoritative.
24//!
25//! With the feature OFF, every type and function in this module
26//! is `#[cfg(...)]`'d out; call sites in
27//! [`crate::emulator::mmu`], [`crate::win32`], and
28//! [`crate::runtime`] compile to nothing — release builds pay
29//! zero cost.
30
31#![cfg(feature = "trace")]
32
33use std::cell::RefCell;
34use std::env;
35use std::fs::File;
36use std::io::{self, Write};
37use std::path::PathBuf;
38
39/// Memory-watchpoint trigger discipline.
40#[derive(Copy, Clone, Debug, PartialEq, Eq)]
41pub enum WatchMode {
42    /// Fire only on writes intersecting `[addr, addr+size)`.
43    Write,
44    /// Fire only on reads intersecting `[addr, addr+size)`.
45    Read,
46    /// Fire on both reads and writes.
47    Both,
48}
49
50impl WatchMode {
51    /// Does this mode emit a `mem_write` event?
52    pub const fn watches_writes(self) -> bool {
53        matches!(self, WatchMode::Write | WatchMode::Both)
54    }
55    /// Does this mode emit a `mem_read` event?
56    pub const fn watches_reads(self) -> bool {
57        matches!(self, WatchMode::Read | WatchMode::Both)
58    }
59}
60
61/// One installed watchpoint covering `[addr, addr+size)`.
62#[derive(Copy, Clone, Debug, PartialEq, Eq)]
63pub struct Watchpoint {
64    pub addr: u32,
65    pub size: u32,
66    pub mode: WatchMode,
67}
68
69impl Watchpoint {
70    /// True iff `[hit, hit+hit_size)` overlaps `[self.addr, self.addr+self.size)`.
71    pub fn overlaps(&self, hit: u32, hit_size: u32) -> bool {
72        let a_lo = self.addr;
73        let a_hi = self.addr.wrapping_add(self.size);
74        let b_lo = hit;
75        let b_hi = hit.wrapping_add(hit_size);
76        // Half-open intervals overlap when neither is entirely
77        // below the other. Use saturating to avoid u32 wrap on
78        // the rare top-of-address-space probe.
79        a_lo < b_hi && b_lo < a_hi
80    }
81}
82
83/// Per-sandbox trace state — owned by [`crate::emulator::Mmu`] so
84/// the MMU's hot path can consult watchpoints without an extra
85/// indirection, and shared via `&mut` with the higher layers
86/// (Cpu, Sandbox, Win32 dispatch) that emit their own probe
87/// flavours.
88pub struct TraceState {
89    /// Active watchpoints. Linear-scan inside hot paths; we don't
90    /// expect more than a handful at a time per spec.
91    pub watchpoints: Vec<Watchpoint>,
92    /// Sink that JSONL events flush to. `None` ⇒ events are
93    /// silently dropped (per the design doc), even when the
94    /// feature is on.
95    ///
96    /// Wrapped in `RefCell` so the immutable load paths in the
97    /// MMU (which take `&self`) can still emit a `mem_read`
98    /// event without forcing every caller in the crate onto a
99    /// `&mut Mmu` borrow.
100    pub sink: RefCell<Option<Box<dyn Write + Send>>>,
101    /// `true` when the `trace-exec` sub-feature is on AND the
102    /// runtime has flipped the per-instruction trace on. The
103    /// feature flag alone gates compilation; this flag gates
104    /// emission per-step, so a sandbox can toggle exec trace mid
105    /// run when triaging a specific section.
106    pub exec_on: bool,
107    /// Mirror of `cpu.regs.eip` updated by [`crate::emulator::Cpu::step`]
108    /// before each MMU access — the MMU itself doesn't have a
109    /// reference to the CPU, so we shadow the EIP into the trace
110    /// state on the slow probe path.
111    pub last_eip: u32,
112}
113
114impl Default for TraceState {
115    fn default() -> Self {
116        Self::new()
117    }
118}
119
120impl TraceState {
121    /// Create a TraceState with an empty watchpoint set, the
122    /// default sink (env-var) installed, and exec-trace off.
123    pub fn new() -> Self {
124        TraceState {
125            watchpoints: Vec::new(),
126            sink: RefCell::new(open_default_sink()),
127            exec_on: false,
128            last_eip: 0,
129        }
130    }
131
132    /// Install a watchpoint. Multiple watchpoints may overlap;
133    /// each fires independently. A duplicate
134    /// `(addr, size, mode)` is registered as a separate entry —
135    /// callers wanting de-dup can use [`Self::unwatch`] first.
136    pub fn watch(&mut self, addr: u32, size: u32, mode: WatchMode) {
137        self.watchpoints.push(Watchpoint { addr, size, mode });
138    }
139
140    /// Remove watchpoints whose `(addr, size)` exactly matches.
141    /// Mode is ignored for the match — the design doc treats a
142    /// `(addr, size)` pair as the watchpoint identity.
143    pub fn unwatch(&mut self, addr: u32, size: u32) {
144        self.watchpoints
145            .retain(|w| !(w.addr == addr && w.size == size));
146    }
147
148    /// Override the sink at runtime. Use this from tests to
149    /// capture events into a `Vec<u8>`-backed `Box<dyn Write>`.
150    pub fn set_sink(&mut self, sink: Box<dyn Write + Send>) {
151        *self.sink.borrow_mut() = Some(sink);
152    }
153
154    /// Convenience inverse — tear down the current sink so
155    /// subsequent emits drop silently.
156    pub fn clear_sink(&mut self) {
157        *self.sink.borrow_mut() = None;
158    }
159
160    /// Set the per-step EIP shadow. Called by
161    /// [`crate::emulator::Cpu::step`] once per instruction so the
162    /// MMU's `mem_read` / `mem_write` probes can include the
163    /// faulting EIP without taking another reference to the CPU.
164    pub fn set_eip(&mut self, eip: u32) {
165        self.last_eip = eip;
166    }
167
168    /// Walk the watchpoint list — return the first watchpoint
169    /// whose mode + range matches the access, or `None`.
170    pub fn matched_for_write(&self, addr: u32, size: u32) -> Option<&Watchpoint> {
171        self.watchpoints
172            .iter()
173            .find(|w| w.mode.watches_writes() && w.overlaps(addr, size))
174    }
175
176    /// As above for reads.
177    pub fn matched_for_read(&self, addr: u32, size: u32) -> Option<&Watchpoint> {
178        self.watchpoints
179            .iter()
180            .find(|w| w.mode.watches_reads() && w.overlaps(addr, size))
181    }
182
183    /// Write one already-formatted JSONL line followed by `\n`.
184    /// Errors are silenced — the trace tape is a debugging
185    /// convenience, not part of any correctness contract.
186    pub fn emit_line(&self, line: &str) {
187        if let Some(sink) = self.sink.borrow_mut().as_mut() {
188            let _ = sink.write_all(line.as_bytes());
189            let _ = sink.write_all(b"\n");
190            let _ = sink.flush();
191        }
192    }
193
194    /// True iff a sink is currently installed. Used by emit
195    /// helpers to short-circuit the formatting work when the
196    /// event would be dropped.
197    pub fn has_sink(&self) -> bool {
198        self.sink.borrow().is_some()
199    }
200
201    // ------------- High-level event helpers ---------------------
202
203    /// Emit a `kind=win32_call` event.
204    ///
205    /// `args` is captured from the guest stack at call time;
206    /// `ret` is the dword the stub put back into `eax`.
207    pub fn ev_win32_call(&self, dll: &str, name: &str, args: &[u32], ret: u32, eip: u32) {
208        if !self.has_sink() {
209            return;
210        }
211        let mut s = String::with_capacity(96);
212        s.push_str(r#"{"kind":"win32_call","dll":""#);
213        s.push_str(dll);
214        s.push_str(r#"","name":""#);
215        s.push_str(name);
216        s.push_str(r#"","args":["#);
217        for (i, a) in args.iter().enumerate() {
218            if i > 0 {
219                s.push(',');
220            }
221            // Args printed as decimal per the spec example.
222            use core::fmt::Write as _;
223            let _ = write!(s, "{}", a);
224        }
225        s.push_str(r#"],"ret":""#);
226        push_hex32(&mut s, ret);
227        s.push_str(r#"","eip":""#);
228        push_hex32(&mut s, eip);
229        s.push_str(r#""}"#);
230        self.emit_line(&s);
231    }
232
233    /// Emit a `kind=mem_write` event.
234    pub fn ev_mem_write(&self, addr: u32, size: u32, value: u64, eip: u32) {
235        if !self.has_sink() {
236            return;
237        }
238        let mut s = String::with_capacity(96);
239        s.push_str(r#"{"kind":"mem_write","addr":""#);
240        push_hex32(&mut s, addr);
241        s.push_str(r#"","size":"#);
242        use core::fmt::Write as _;
243        let _ = write!(s, "{}", size);
244        s.push_str(r#","value":""#);
245        push_hex_value(&mut s, size, value);
246        s.push_str(r#"","eip":""#);
247        push_hex32(&mut s, eip);
248        s.push_str(r#""}"#);
249        self.emit_line(&s);
250    }
251
252    /// Emit a `kind=mem_read` event.
253    pub fn ev_mem_read(&self, addr: u32, size: u32, value: u64, eip: u32) {
254        if !self.has_sink() {
255            return;
256        }
257        let mut s = String::with_capacity(96);
258        s.push_str(r#"{"kind":"mem_read","addr":""#);
259        push_hex32(&mut s, addr);
260        s.push_str(r#"","size":"#);
261        use core::fmt::Write as _;
262        let _ = write!(s, "{}", size);
263        s.push_str(r#","value":""#);
264        push_hex_value(&mut s, size, value);
265        s.push_str(r#"","eip":""#);
266        push_hex32(&mut s, eip);
267        s.push_str(r#""}"#);
268        self.emit_line(&s);
269    }
270
271    /// Emit a `kind=exec` event. `bytes` is hex-encoded
272    /// (`opcode_bytes` field in the design doc); `mnemonic` is a
273    /// short SDM-style hint when available, or just the leading
274    /// opcode byte otherwise.
275    pub fn ev_exec(&self, eip: u32, bytes: &[u8], mnemonic: &str, registers: &[(&str, u32)]) {
276        if !self.has_sink() {
277            return;
278        }
279        let mut s = String::with_capacity(192);
280        s.push_str(r#"{"kind":"exec","eip":""#);
281        push_hex32(&mut s, eip);
282        s.push_str(r#"","bytes":""#);
283        for b in bytes {
284            use core::fmt::Write as _;
285            let _ = write!(s, "{:02x}", b);
286        }
287        s.push_str(r#"","mnemonic":""#);
288        // mnemonic should not contain quotes or newlines for our
289        // callers; bypass JSON escaping for that reason.
290        s.push_str(mnemonic);
291        s.push_str(r#"","registers":{"#);
292        for (i, (name, val)) in registers.iter().enumerate() {
293            if i > 0 {
294                s.push(',');
295            }
296            s.push('"');
297            s.push_str(name);
298            s.push_str(r#"":""#);
299            push_hex32(&mut s, *val);
300            s.push('"');
301        }
302        s.push_str(r#"}}"#);
303        self.emit_line(&s);
304    }
305
306    /// Emit a `kind=trap` event.
307    pub fn ev_trap(&self, trap: &str, eip: u32, opcode: Option<u32>, registers: &[(&str, u32)]) {
308        if !self.has_sink() {
309            return;
310        }
311        let mut s = String::with_capacity(160);
312        s.push_str(r#"{"kind":"trap","trap":""#);
313        s.push_str(trap);
314        s.push_str(r#"","eip":""#);
315        push_hex32(&mut s, eip);
316        s.push('"');
317        if let Some(op) = opcode {
318            s.push_str(r#","opcode":""#);
319            use core::fmt::Write as _;
320            let _ = write!(s, "0x{:02x}", op & 0xFF);
321            s.push('"');
322        }
323        s.push_str(r#","registers":{"#);
324        for (i, (name, val)) in registers.iter().enumerate() {
325            if i > 0 {
326                s.push(',');
327            }
328            s.push('"');
329            s.push_str(name);
330            s.push_str(r#"":""#);
331            push_hex32(&mut s, *val);
332            s.push('"');
333        }
334        s.push_str(r#"}}"#);
335        self.emit_line(&s);
336    }
337}
338
339fn push_hex32(s: &mut String, v: u32) {
340    use core::fmt::Write as _;
341    let _ = write!(s, "0x{:08x}", v);
342}
343
344fn push_hex_value(s: &mut String, size: u32, v: u64) {
345    use core::fmt::Write as _;
346    match size {
347        1 => {
348            let _ = write!(s, "0x{:02x}", (v as u8));
349        }
350        2 => {
351            let _ = write!(s, "0x{:04x}", (v as u16));
352        }
353        4 => {
354            let _ = write!(s, "0x{:08x}", (v as u32));
355        }
356        8 => {
357            let _ = write!(s, "0x{:016x}", v);
358        }
359        _ => {
360            let _ = write!(s, "0x{:x}", v);
361        }
362    }
363}
364
365/// Honour `OXIDEAV_VFW_TRACE_FILE`:
366///   * `=2` → stderr.
367///   * any other non-empty value → opened as a file (truncating).
368///   * unset / empty → `None` (caller is on the hook for
369///     [`TraceState::set_sink`] before any event will land).
370fn open_default_sink() -> Option<Box<dyn Write + Send>> {
371    let val = env::var_os("OXIDEAV_VFW_TRACE_FILE")?;
372    if val.is_empty() {
373        return None;
374    }
375    if val == "2" {
376        return Some(Box::new(io::stderr()));
377    }
378    let p = PathBuf::from(val);
379    File::create(&p)
380        .ok()
381        .map(|f| Box::new(f) as Box<dyn Write + Send>)
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    #[test]
389    fn watchpoint_overlap_detects_intersecting_ranges() {
390        let w = Watchpoint {
391            addr: 0x1000,
392            size: 16,
393            mode: WatchMode::Write,
394        };
395        assert!(w.overlaps(0x1000, 1));
396        assert!(w.overlaps(0x100F, 1));
397        assert!(w.overlaps(0x0FF8, 16));
398        assert!(!w.overlaps(0x1010, 1));
399        assert!(!w.overlaps(0x0FFF, 1));
400    }
401
402    #[test]
403    fn watch_mode_predicates_match_design_doc() {
404        assert!(WatchMode::Write.watches_writes());
405        assert!(!WatchMode::Write.watches_reads());
406        assert!(WatchMode::Read.watches_reads());
407        assert!(!WatchMode::Read.watches_writes());
408        assert!(WatchMode::Both.watches_writes());
409        assert!(WatchMode::Both.watches_reads());
410    }
411
412    #[test]
413    fn unwatch_drops_only_exact_addr_size_match() {
414        let mut t = TraceState::new();
415        t.set_sink(Box::new(Vec::<u8>::new())); // discardable
416        t.watch(0x1000, 4, WatchMode::Write);
417        t.watch(0x2000, 4, WatchMode::Read);
418        t.unwatch(0x1000, 4);
419        assert_eq!(t.watchpoints.len(), 1);
420        assert_eq!(t.watchpoints[0].addr, 0x2000);
421    }
422
423    #[test]
424    fn ev_win32_call_emits_jsonl_line() {
425        let mut t = TraceState::default();
426        let buf: Vec<u8> = Vec::new();
427        t.set_sink(Box::new(buf));
428        t.ev_win32_call(
429            "kernel32.dll",
430            "HeapAlloc",
431            &[0xDEADBEEF, 0, 1024],
432            0x10001000,
433            0x10004A17,
434        );
435        // Sink is captured into a moved Box; we can't read it
436        // back from here. The next test does the round trip via
437        // a shared cursor.
438    }
439
440    #[test]
441    fn matched_for_write_finds_overlapping_watch() {
442        let mut t = TraceState::new();
443        t.watch(0x1000, 8, WatchMode::Both);
444        let m = t.matched_for_write(0x1004, 4).unwrap();
445        assert_eq!(m.addr, 0x1000);
446        assert!(t.matched_for_read(0x1004, 4).is_some());
447    }
448
449    /// A sink wrapper around `Vec<u8>` that we own here — used by
450    /// the integration-style tests that need to read events back.
451    pub(crate) struct VecSink(pub std::sync::Arc<std::sync::Mutex<Vec<u8>>>);
452    impl Write for VecSink {
453        fn write(&mut self, b: &[u8]) -> io::Result<usize> {
454            self.0.lock().unwrap().extend_from_slice(b);
455            Ok(b.len())
456        }
457        fn flush(&mut self) -> io::Result<()> {
458            Ok(())
459        }
460    }
461
462    #[test]
463    fn ev_mem_write_round_trip_through_shared_buffer() {
464        use std::sync::{Arc, Mutex};
465        let buf = Arc::new(Mutex::new(Vec::new()));
466        let mut t = TraceState::new();
467        t.set_sink(Box::new(VecSink(Arc::clone(&buf))));
468        t.ev_mem_write(0x1000, 4, 0x40, 0x10004A32);
469        let s = String::from_utf8(buf.lock().unwrap().clone()).unwrap();
470        assert!(s.contains(r#""kind":"mem_write""#));
471        assert!(s.contains(r#""addr":"0x00001000""#));
472        assert!(s.contains(r#""value":"0x00000040""#));
473        assert!(s.contains(r#""eip":"0x10004a32""#));
474    }
475}