vmi-os-windows 0.7.0

Windows OS specific code for VMI
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
//! x64 (AMD64) stack unwinding using .pdata / UNWIND_INFO.
//!
//! Implements the Windows x64 stack unwinding algorithm by reading
//! RUNTIME_FUNCTION entries from the PE exception directory and
//! processing UNWIND_INFO structures to recover caller register state.

use vmi_arch_amd64::Registers;
use vmi_core::{Va, VmiError, VmiState, driver::VmiRead};
use zerocopy::{FromBytes, IntoBytes};

use super::{Frame, Unwinder, Unwound};
use crate::{
    ArchAdapter, WindowsOs,
    pe::{ImageRuntimeFunctionEntry, PeImage},
};

// Reference:
// https://github.com/dotnet/coreclr/blob/a9f3fc16483eecfc47fb79c362811d870be02249/src/unwinder/amd64/unwinder_amd64.cpp
// TODO: epilog unwind

/// Process UNWIND_INFO entries, following chains.
/// Limit chain depth to guard against corrupted/circular data.
pub const UNWIND_CHAIN_LIMIT: u32 = 32;

// Unwind operation codes.

/// Push a nonvolatile integer register, decrementing RSP by 8.
///
/// The operation info is the number of the register. Because of the constraints
/// on epilogs, `UWOP_PUSH_NONVOL` unwind codes must appear first in the prolog
/// and correspondingly, last in the unwind code array. This relative ordering
/// applies to all other unwind codes except `UWOP_PUSH_MACHFRAME`.
pub const UWOP_PUSH_NONVOL: u8 = 0;

/// Allocate a large-sized area on the stack.
///
/// There are two forms. If the operation info equals 0, then the size of the
/// allocation divided by 8 is recorded in the next slot, allowing an allocation
/// up to 512K - 8. If the operation info equals 1, then the unscaled size of
/// the allocation is recorded in the next two slots in little-endian format,
/// allowing allocations up to 4GB - 8.
pub const UWOP_ALLOC_LARGE: u8 = 1;

/// Allocate a small-sized area on the stack.
///
/// The size of the allocation is the operation info field * 8 + 8, allowing
/// allocations from 8 to 128 bytes.
///
/// The unwind code for a stack allocation should always use the shortest
/// possible encoding:
///
/// | Allocation Size     | Unwind Code                            |
/// | ------------------- | -------------------------------------- |
/// | 8 to 128 bytes      | `UWOP_ALLOC_SMALL`                     |
/// | 136 to 512K-8 bytes | `UWOP_ALLOC_LARGE`, operation info = 0 |
/// | 512K to 4G-8 bytes  | `UWOP_ALLOC_LARGE`, operation info = 1 |
pub const UWOP_ALLOC_SMALL: u8 = 2;

/// Establish the frame pointer register by setting the register to some offset of the current RSP.
///
/// The offset is equal to the Frame Register offset (scaled) field in the
/// `UNWIND_INFO * 16`, allowing offsets from 0 to 240. The use of an offset
/// permits establishing a frame pointer that points to the middle of the fixed
/// stack allocation, helping code density by allowing more accesses to use
/// short instruction forms. The operation info field is reserved and shouldn't
/// be used.
pub const UWOP_SET_FPREG: u8 = 3;

/// Save a nonvolatile integer register on the stack using a MOV instead of a PUSH.
///
/// This code is primarily used for *shrink-wrapping*, where a nonvolatile
/// register is saved to the stack in a position that was previously allocated.
/// The operation info is the number of the register. The scaled-by-8 stack
/// offset is recorded in the next unwind operation code slot.
pub const UWOP_SAVE_NONVOL: u8 = 4;

/// Save a nonvolatile integer register on the stack with a long offset, using a MOV instead of a PUSH.
///
/// This code is primarily used for *shrink-wrapping*, where a nonvolatile
/// register is saved to the stack in a position that was previously allocated.
/// The operation info is the number of the register. The unscaled stack offset
/// is recorded in the next two unwind operation code slots.
pub const UWOP_SAVE_NONVOL_FAR: u8 = 5;

/// Undocumented.
pub const UWOP_EPILOG: u8 = 6;

/// Undocumented.
///
/// Previously 64-bit `UWOP_SAVE_XMM_FAR`.
pub const UWOP_SPARE_CODE: u8 = 7;

/// Save all 128 bits of a nonvolatile XMM register on the stack.
///
/// The operation info is the number of the register. The scaled-by-16 stack
/// offset is recorded in the next slot.
pub const UWOP_SAVE_XMM128: u8 = 8;

/// Save all 128 bits of a nonvolatile XMM register on the stack with a long offset.
///
/// The operation info is the number of the register. The unscaled stack offset
/// is recorded in the next two slots.
pub const UWOP_SAVE_XMM128_FAR: u8 = 9;

/// Push a machine frame.
///
/// This unwind code is used to record the effect of a hardware interrupt or
/// exception. There are two forms. If the operation info equals 0, one of these
/// frames has been pushed on the stack:
///
/// | Location | Value   |
/// | -------- | ------- |
/// | RSP+32   | SS      |
/// | RSP+24   | Old RSP |
/// | RSP+16   | EFLAGS  |
/// | RSP+8    | CS      |
/// | RSP      | RIP     |
///
/// If the operation info equals 1, then one of these frames has been pushed:
///
/// | Location | Value      |
/// | -------- | ---------- |
/// | RSP+40   | SS         |
/// | RSP+32   | Old RSP    |
/// | RSP+24   | EFLAGS     |
/// | RSP+16   | CS         |
/// | RSP+8    | RIP        |
/// | RSP      | Error code |
///
/// This unwind code always appears in a dummy prolog, which is never actually
/// executed, but instead appears before the real entry point of an interrupt
/// routine, and exists only to provide a place to simulate the push of a
/// machine frame. `UWOP_PUSH_MACHFRAME` records that simulation, which
/// indicates the machine has conceptually done this operation:
///
/// 1. Pop RIP return address from top of stack into *Temp*
/// 2. Push SS
/// 3. Push old RSP
/// 4. Push EFLAGS
/// 5. Push CS
/// 6. Push *Temp*
/// 7. Push Error Code (if op info equals 1)
///
/// The simulated `UWOP_PUSH_MACHFRAME` operation decrements RSP by 40 (op info
/// equals 0) or 48 (op info equals 1).
pub const UWOP_PUSH_MACHFRAME: u8 = 10;

// UNW_FLAG values.

/// The function has no handler.
pub const UNW_FLAG_NHANDLER: u8 = 0x0;

/// The function has an exception handler that should be called.
pub const UNW_FLAG_EHANDLER: u8 = 0x1;

/// The function has a termination handler that should be called when unwinding an exception.
pub const UNW_FLAG_UHANDLER: u8 = 0x2;

/// The FunctionEntry member is the contents of a previous function table entry.
pub const UNW_FLAG_CHAININFO: u8 = 0x4;

/// Corresponds to the first 4 bytes of `UNWIND_INFO`.
#[allow(non_camel_case_types, non_snake_case)]
#[repr(C)]
#[derive(Debug, Clone, Copy, FromBytes, IntoBytes)]
pub struct UNWIND_INFO {
    /// UCHAR Version : 3;
    /// UCHAR Flags : 5;
    pub VersionAndFlags: u8,

    /// UCHAR SizeOfProlog;
    pub SizeOfProlog: u8,

    /// UCHAR CountOfCodes;
    pub CountOfCodes: u8,

    /// UCHAR FrameRegister : 4;
    /// UCHAR FrameOffset : 4;
    pub FrameRegisterAndOffset: u8,
}

impl UNWIND_INFO {
    /// Returns the version field (bits 0-2).
    pub fn version(&self) -> u8 {
        self.VersionAndFlags & 0x07
    }

    /// Returns the flags field (bits 3-7).
    pub fn flags(&self) -> u8 {
        (self.VersionAndFlags >> 3) & 0x1f
    }

    /// Returns the size of the prolog in bytes.
    pub fn size_of_prolog(&self) -> u8 {
        self.SizeOfProlog
    }

    /// Returns the number of unwind codes.
    pub fn count_of_codes(&self) -> u8 {
        self.CountOfCodes
    }

    /// Returns the frame register index (bits 0-3), or 0 if none.
    pub fn frame_register(&self) -> u8 {
        self.FrameRegisterAndOffset & 0x0f
    }

    /// Returns the scaled frame offset (bits 4-7).
    pub fn frame_offset(&self) -> u8 {
        (self.FrameRegisterAndOffset >> 4) & 0x0f
    }

    /// Returns the byte offset past the unwind codes where a chained
    /// `RUNTIME_FUNCTION` or handler data starts. The codes are
    /// 2 bytes each, aligned to a 4-byte boundary.
    pub fn codes_end_offset(&self) -> u32 {
        let aligned = self.CountOfCodes as u32 + (self.CountOfCodes as u32 & 1);
        4 + aligned * 2
    }
}

// x64 register encoding indices, matching the CPU instruction encoding
// (REX.B + reg field) and the UNWIND_CODE.OpInfo numbering. These are
// used to index into the unwind context when processing unwind codes.

/// Index of `AMD64_CONTEXT::Rax`.
pub const REG_RAX: u8 = 0;

/// Index of `AMD64_CONTEXT::Rcx`.
pub const REG_RCX: u8 = 1;

/// Index of `AMD64_CONTEXT::Rdx`.
pub const REG_RDX: u8 = 2;

/// Index of `AMD64_CONTEXT::Rbx`.
pub const REG_RBX: u8 = 3;

/// Index of `AMD64_CONTEXT::Rsp`.
pub const REG_RSP: u8 = 4;

/// Index of `AMD64_CONTEXT::Rbp`.
pub const REG_RBP: u8 = 5;

/// Index of `AMD64_CONTEXT::Rsi`.
pub const REG_RSI: u8 = 6;

/// Index of `AMD64_CONTEXT::Rdi`.
pub const REG_RDI: u8 = 7;

/// Index of `AMD64_CONTEXT::R8`.
pub const REG_R8: u8 = 8;

/// Index of `AMD64_CONTEXT::R9`.
pub const REG_R9: u8 = 9;

/// Index of `AMD64_CONTEXT::R10`.
pub const REG_R10: u8 = 10;

/// Index of `AMD64_CONTEXT::R11`.
pub const REG_R11: u8 = 11;

/// Index of `AMD64_CONTEXT::R12`.
pub const REG_R12: u8 = 12;

/// Index of `AMD64_CONTEXT::R13`.
pub const REG_R13: u8 = 13;

/// Index of `AMD64_CONTEXT::R14`.
pub const REG_R14: u8 = 14;

/// Index of `AMD64_CONTEXT::R15`.
pub const REG_R15: u8 = 15;

/// Unwind context for x64 (AMD64).
///
/// Holds the register state needed for stack unwinding, including
/// the instruction pointer, stack pointer, and all callee-saved
/// (non-volatile) general-purpose registers.
#[derive(Debug, Clone)]
pub struct UnwindContextAmd64 {
    /// Instruction pointer (RIP).
    pub rip: u64,
    /// Stack pointer (RSP).
    pub rsp: u64,
    /// Base pointer (RBP) - callee-saved.
    pub rbp: u64,
    /// RBX - callee-saved.
    pub rbx: u64,
    /// RSI - callee-saved.
    pub rsi: u64,
    /// RDI - callee-saved.
    pub rdi: u64,
    /// R12 - callee-saved.
    pub r12: u64,
    /// R13 - callee-saved.
    pub r13: u64,
    /// R14 - callee-saved.
    pub r14: u64,
    /// R15 - callee-saved.
    pub r15: u64,
}

impl From<&Registers> for UnwindContextAmd64 {
    fn from(value: &Registers) -> Self {
        Self {
            rip: value.rip,
            rsp: value.rsp,
            rbp: value.rbp,
            rbx: value.rbx,
            rsi: value.rsi,
            rdi: value.rdi,
            r12: value.r12,
            r13: value.r13,
            r14: value.r14,
            r15: value.r15,
        }
    }
}

impl UnwindContextAmd64 {
    /// Returns the value of a register by its x64 encoding index.
    pub fn get_register(&self, reg: u8) -> u64 {
        match reg {
            REG_RBX => self.rbx,
            REG_RBP => self.rbp,
            REG_RSI => self.rsi,
            REG_RDI => self.rdi,
            REG_R12 => self.r12,
            REG_R13 => self.r13,
            REG_R14 => self.r14,
            REG_R15 => self.r15,
            _ => 0,
        }
    }

    /// Sets the value of a register by its x64 encoding index.
    ///
    /// Only updates non-volatile registers; volatile registers are ignored.
    pub fn set_register(&mut self, reg: u8, value: u64) {
        match reg {
            REG_RBX => self.rbx = value,
            REG_RBP => self.rbp = value,
            REG_RSI => self.rsi = value,
            REG_RDI => self.rdi = value,
            REG_R12 => self.r12 = value,
            REG_R13 => self.r13 = value,
            REG_R14 => self.r14 = value,
            REG_R15 => self.r15 = value,
            _ => {} // volatile registers - ignore
        }
    }
}

/// x64 stack unwinder.
///
/// Implements stack unwinding for the Windows x64 ABI by reading
/// .pdata RUNTIME_FUNCTION entries and processing UNWIND_INFO
/// structures to recover the caller's register state.
pub struct UnwinderAmd64;

impl<Driver> Unwinder<Driver> for UnwinderAmd64
where
    Driver: VmiRead,
    Driver::Architecture: ArchAdapter<Driver>,
{
    type Context = UnwindContextAmd64;

    fn unwind(
        &self,
        vmi: &VmiState<WindowsOs<Driver>>,
        image_base: Va,
        image: &impl PeImage,
        context: &mut UnwindContextAmd64,
    ) -> Result<Unwound, VmiError> {
        // Compute RVA of the current instruction pointer.
        let rva = context.rip.saturating_sub(image_base.0) as u32;

        // Look up RUNTIME_FUNCTION for this RVA.
        let exception_dir = image.exception_directory()?;
        let runtime_function = exception_dir.as_ref().and_then(|dir| dir.find(rva));

        let entry = match runtime_function {
            Some(entry) => entry,
            None => {
                // Leaf function: return address is at [RSP].
                return unwind_leaf(vmi, context);
            }
        };

        let begin_address = entry.begin_address;
        let unwind_data_rva = entry.unwind_info_address_or_data;
        let mut unwind_rva = unwind_data_rva;

        // RIP offset within the function, for prolog detection.
        // Only meaningful for the first (non-chained) UNWIND_INFO.
        let rip_offset = rva.saturating_sub(begin_address);
        let mut is_first = true;
        let mut machine_frame = false;
        let mut chain_count = 0u32;

        loop {
            let header = image.read_struct_at_rva::<UNWIND_INFO>(unwind_rva)?;

            let flags = header.flags();
            let size_of_prolog = header.size_of_prolog();
            let count_of_codes = header.count_of_codes() as usize;
            let frame_register = header.frame_register();
            let frame_offset = header.frame_offset();

            // Read unwind codes.
            let codes_size = count_of_codes * 2;
            let mut codes_data = vec![0u8; codes_size];
            if codes_size > 0 {
                image.read_at_rva(unwind_rva + 4, &mut codes_data)?;
            }

            // Compute FrameBase (per Microsoft spec, computed ONCE before
            // processing codes). SAVE_NONVOL offsets are relative to this.
            //
            // `wrapping_sub`: when the prolog has not yet executed the LEA
            // that establishes the frame register, the register holds an
            // unrelated value that may be smaller than `frame_offset * 16`.
            // Frame base is unread in that case (the gate at line below
            // and the `code_offset > rip_offset` skip in the loop both
            // exclude it), so an underflowing computation is harmless.
            let frame_base = if frame_register != 0 {
                context
                    .get_register(frame_register)
                    .wrapping_sub((frame_offset as u64) * 16)
            }
            else {
                context.rsp
            };

            // If frame_register is set and we are past the prolog (or in
            // a chained entry), restore RSP from the frame register.
            if frame_register != 0 && (!is_first || rip_offset >= size_of_prolog as u32) {
                context.rsp = frame_base;
            }

            // Process unwind codes.
            let mut slot = 0;
            while slot < count_of_codes {
                let code_offset = codes_data[slot * 2];
                let op_info = codes_data[slot * 2 + 1];
                let op = op_info & 0x0f;
                let info = (op_info >> 4) & 0x0f;

                // In the first (non-chained) entry, if we are in the prolog,
                // skip codes whose instructions have not yet executed.
                if is_first
                    && (rip_offset < size_of_prolog as u32)
                    && (code_offset as u32 > rip_offset)
                {
                    slot += slots_for_op(op, info);
                    continue;
                }

                match op {
                    UWOP_PUSH_NONVOL => {
                        let value = vmi.read_u64(Va(context.rsp))?;
                        context.set_register(info, value);
                        context.rsp += 8;
                        slot += 1;
                    }
                    UWOP_ALLOC_LARGE => {
                        if info == 0 {
                            let alloc = read_u16_from_codes(&codes_data, slot + 1) as u64 * 8;
                            context.rsp += alloc;
                            slot += 2;
                        }
                        else {
                            let alloc = read_u32_from_codes(&codes_data, slot + 1) as u64;
                            context.rsp += alloc;
                            slot += 3;
                        }
                    }
                    UWOP_ALLOC_SMALL => {
                        context.rsp += info as u64 * 8 + 8;
                        slot += 1;
                    }
                    UWOP_SET_FPREG => {
                        // Restore RSP from the frame register.
                        // frame_base was already computed with the same formula.
                        context.rsp = frame_base;
                        slot += 1;
                    }
                    UWOP_SAVE_NONVOL => {
                        // Offset is relative to FrameBase, NOT current RSP.
                        // `wrapping_add`: if frame_base underflowed above,
                        // the resulting address is bogus and the read will
                        // fail with VmiError instead of panicking.
                        let offset = read_u16_from_codes(&codes_data, slot + 1) as u64 * 8;
                        let value = vmi.read_u64(Va(frame_base.wrapping_add(offset)))?;
                        context.set_register(info, value);
                        slot += 2;
                    }
                    UWOP_SAVE_NONVOL_FAR => {
                        // Offset is relative to FrameBase, NOT current RSP.
                        // `wrapping_add`: see UWOP_SAVE_NONVOL above.
                        let offset = read_u32_from_codes(&codes_data, slot + 1) as u64;
                        let value = vmi.read_u64(Va(frame_base.wrapping_add(offset)))?;
                        context.set_register(info, value);
                        slot += 3;
                    }
                    UWOP_EPILOG => {
                        // v2 epilog descriptor - skip.
                        slot += 2;
                    }
                    UWOP_SPARE_CODE => {
                        // Reserved/undocumented opcode, consumes 3 slots.
                        slot += 3;
                    }
                    UWOP_SAVE_XMM128 => {
                        slot += 2;
                    }
                    UWOP_SAVE_XMM128_FAR => {
                        slot += 3;
                    }
                    UWOP_PUSH_MACHFRAME => {
                        if info == 1 {
                            context.rsp += 8; // skip error code
                        }
                        // Machine frame: RIP at [RSP], RSP at [RSP+24].
                        let new_rip = vmi.read_u64(Va(context.rsp))?;
                        let new_rsp = vmi.read_u64(Va(context.rsp + 24))?;
                        context.rip = new_rip;
                        context.rsp = new_rsp;
                        machine_frame = true;
                        slot += 1;
                    }
                    _ => {
                        tracing::warn!(op, info, "unknown unwind opcode");
                        slot += 1;
                    }
                }
            }

            // If no chained info, we are done processing codes.
            if flags & UNW_FLAG_CHAININFO == 0 {
                break;
            }

            // Follow the chain: RUNTIME_FUNCTION is after the unwind codes.
            let chain_rva = unwind_rva + header.codes_end_offset();

            let mut chain_buf = [0u8; 12];
            image.read_at_rva(chain_rva, &mut chain_buf)?;

            unwind_rva = u32::from_le_bytes(chain_buf[8..12].try_into().unwrap());
            is_first = false;
            chain_count += 1;

            if chain_count > UNWIND_CHAIN_LIMIT {
                tracing::warn!("unwind chain limit exceeded");
                break;
            }
        }

        // If a machine frame was encountered, RIP/RSP are already set.
        if machine_frame {
            if context.rip == 0 {
                return Ok(Unwound::MachineEnd);
            }

            // Read home space from the caller's RSP.
            let params = read_params(vmi, Va(context.rsp));

            return Ok(Unwound::Frame(Frame {
                instruction_pointer: Va(context.rip),
                stack_pointer: Va(context.rsp),
                params,
                machine_frame: true,
            }));
        }

        // Pop return address from [RSP].
        let return_addr = vmi.read_u64(Va(context.rsp))?;
        context.rsp += 8;

        if return_addr == 0 {
            return Ok(Unwound::End);
        }

        context.rip = return_addr;

        // Read home space from the caller's RSP (after popping the
        // return address, RSP points to the start of the home space).
        let params = read_params(vmi, Va(context.rsp));

        Ok(Unwound::Frame(Frame {
            instruction_pointer: Va(return_addr),
            stack_pointer: Va(context.rsp),
            params,
            machine_frame: false,
        }))
    }
}

/// Reads the four parameter home-space values from the caller's stack.
///
/// Called after unwinding and popping the return address, so RSP
/// points to the start of the home space (P1Home at RSP+0).
/// Returns zeros for any unreadable slots.
pub fn read_params<Driver>(vmi: &VmiState<WindowsOs<Driver>>, rsp: Va) -> [u64; 4]
where
    Driver: VmiRead,
    Driver::Architecture: ArchAdapter<Driver>,
{
    // Params are informational only - don't abort the unwind on failure.
    [
        vmi.read_u64(rsp).unwrap_or(0),
        vmi.read_u64(rsp + 8).unwrap_or(0),
        vmi.read_u64(rsp + 16).unwrap_or(0),
        vmi.read_u64(rsp + 24).unwrap_or(0),
    ]
}

/// Unwinds a leaf function (no RUNTIME_FUNCTION entry).
///
/// For leaf functions, the return address is at `[RSP]` and the
/// caller's RSP is `RSP + 8`. This is also useful as a fallback
/// when .pdata is unavailable (e.g., file-backed pages missing
/// from a crash dump).
pub fn unwind_leaf<Driver>(
    vmi: &VmiState<WindowsOs<Driver>>,
    context: &mut UnwindContextAmd64,
) -> Result<Unwound, VmiError>
where
    Driver: VmiRead,
    Driver::Architecture: ArchAdapter<Driver>,
{
    let return_addr = vmi.read_u64(Va(context.rsp))?;
    context.rsp += 8;

    if return_addr == 0 {
        return Ok(Unwound::End);
    }

    context.rip = return_addr;

    // Read home space from the caller's RSP.
    let params = read_params(vmi, Va(context.rsp));

    Ok(Unwound::Frame(Frame {
        instruction_pointer: Va(return_addr),
        stack_pointer: Va(context.rsp),
        params,
        machine_frame: false,
    }))
}

/// Resolves an RVA to the primary function's BeginAddress by following
/// `UNW_FLAG_CHAININFO` chains in the exception directory.
///
/// MSVC can split a single function into non-contiguous chunks scattered
/// across the binary (common with SEH and large functions). Each chunk
/// has its own `RUNTIME_FUNCTION` entry in `.pdata`, with the chunk's
/// `UNWIND_INFO` carrying the `UNW_FLAG_CHAININFO` flag and a pointer
/// to the parent entry. This function follows that chain back to the
/// primary (non-chained) entry whose `BeginAddress` is the real
/// function start - the address that maps to the symbol in the PDB.
///
/// Returns `None` if the RVA is not covered by any `RUNTIME_FUNCTION`.
pub fn resolve_primary_function(image: &impl PeImage, rva: u32) -> Result<Option<u32>, VmiError> {
    let exception_dir = match image.exception_directory()? {
        Some(exception_dir) => exception_dir,
        None => return Ok(None),
    };

    let entry = match exception_dir.find(rva) {
        Some(entry) => entry,
        None => return Ok(None),
    };

    let mut begin_address = entry.begin_address;
    let mut unwind_rva = entry.unwind_info_address_or_data;

    for _ in 0..UNWIND_CHAIN_LIMIT {
        let header = image.read_struct_at_rva::<UNWIND_INFO>(unwind_rva)?;

        if header.flags() & UNW_FLAG_CHAININFO == 0 {
            break;
        }

        let entry = image.read_struct_at_rva::<ImageRuntimeFunctionEntry>(
            unwind_rva + header.codes_end_offset(),
        )?;

        begin_address = entry.begin_address;
        unwind_rva = entry.unwind_info_address_or_data;
    }

    Ok(Some(begin_address))
}

/// Returns the number of code slots consumed by an unwind operation.
fn slots_for_op(op: u8, info: u8) -> usize {
    match op {
        UWOP_PUSH_NONVOL => 1,
        UWOP_ALLOC_LARGE => {
            if info == 0 {
                2
            }
            else {
                3
            }
        }
        UWOP_ALLOC_SMALL => 1,
        UWOP_SET_FPREG => 1,
        UWOP_SAVE_NONVOL => 2,
        UWOP_SAVE_NONVOL_FAR => 3,
        UWOP_SPARE_CODE => 3,
        UWOP_SAVE_XMM128 => 2,
        UWOP_SAVE_XMM128_FAR => 3,
        UWOP_PUSH_MACHFRAME => 1,
        _ => 1,
    }
}

/// Reads a u16 from the unwind code data at the given slot index.
///
/// Each slot is 2 bytes. The u16 is stored in little-endian format.
fn read_u16_from_codes(data: &[u8], slot: usize) -> u16 {
    let offset = slot * 2;
    if offset + 2 <= data.len() {
        u16::from_le_bytes([data[offset], data[offset + 1]])
    }
    else {
        0
    }
}

/// Reads a u32 from the unwind code data at the given slot index.
///
/// The u32 spans two consecutive slots (4 bytes) in little-endian format.
fn read_u32_from_codes(data: &[u8], slot: usize) -> u32 {
    let offset = slot * 2;
    if offset + 4 <= data.len() {
        u32::from_le_bytes([
            data[offset],
            data[offset + 1],
            data[offset + 2],
            data[offset + 3],
        ])
    }
    else {
        0
    }
}