windows_breakpoint_manager/
windows-breakpoint-manager.rs

1use std::sync::{
2    Arc,
3    atomic::{AtomicBool, Ordering},
4};
5
6use isr::{
7    Profile,
8    cache::{IsrCache, JsonCodec},
9    macros::symbols,
10};
11use vmi::{
12    Hex, MemoryAccess, Va, VcpuId, View, VmiContext, VmiCore, VmiDriver, VmiError,
13    VmiEventResponse, VmiHandler, VmiSession,
14    arch::amd64::{Amd64, EventMonitor, EventReason, ExceptionVector, Interrupt},
15    driver::xen::VmiXenDriver,
16    os::{
17        ProcessObject, VmiOsProcess as _,
18        windows::{WindowsOs, WindowsOsExt as _},
19    },
20    utils::{
21        bpm::{Breakpoint, BreakpointController, BreakpointManager},
22        ptm::{PageTableMonitor, PageTableMonitorEvent},
23    },
24};
25use xen::XenStore;
26
27symbols! {
28    #[derive(Debug)]
29    pub struct Symbols {
30        NtCreateFile: u64,
31        NtWriteFile: u64,
32
33        PspInsertProcess: u64,
34        MmCleanProcessAddressSpace: u64,
35
36        // `symbols!` macro also accepts an `Option<u64>` as a value,
37        // where `None` means that the symbol is not present in the profile.
38        // MiInsertVad: Option<u64>,
39        // MiInsertPrivateVad: Option<u64>,
40        // MiGetWsAndInsertVad: Option<u64>,
41        // MiDeleteVad: Option<u64>,
42        // MiDeletePartialVad: Option<u64>,
43        // MiDeleteVirtualAddresses: Option<u64>,
44        // MiRemoveVadAndView: Option<u64>,
45    }
46}
47
48pub struct Monitor<Driver>
49where
50    Driver: VmiDriver<Architecture = Amd64>,
51{
52    terminate_flag: Arc<AtomicBool>,
53    view: View,
54    bpm: BreakpointManager<BreakpointController<Driver>>,
55    ptm: PageTableMonitor<Driver>,
56}
57
58#[expect(non_snake_case)]
59impl<Driver> Monitor<Driver>
60where
61    Driver: VmiDriver<Architecture = Amd64>,
62{
63    pub fn new(
64        session: &VmiSession<Driver, WindowsOs<Driver>>,
65        profile: &Profile,
66        terminate_flag: Arc<AtomicBool>,
67    ) -> Result<Self, VmiError> {
68        // Capture the current state of the VCPU and get the base address of
69        // the kernel.
70        //
71        // This base address is essential to correctly offset monitored
72        // functions.
73        //
74        // NOTE: `kernel_image_base` tries to find the kernel in the memory
75        //       with the help of the CPU registers. On AMD64 architecture,
76        //       the kernel image base is usually found using the `MSR_LSTAR`
77        //       register, which contains the address of the system call
78        //       handler. This register is set by the operating system during
79        //       boot and is left unchanged (unless some rootkits are involved).
80        //
81        //       Therefore, we can take an arbitrary registers at any point
82        //       in time (as long as the OS has booted and the page tables are
83        //       set up) and use them to find the kernel image base.
84        let registers = session.registers(VcpuId(0))?;
85        let vmi = session.with_registers(&registers);
86
87        let kernel_image_base = vmi.os().kernel_image_base()?;
88        tracing::info!(%kernel_image_base);
89
90        // Get the system process.
91        //
92        // The system process is the first process created by the kernel.
93        // In Windows, it is referenced by the kernel symbol `PsInitialSystemProcess`.
94        // To monitor page table entries, we need to locate the translation root
95        // of this process.
96        let system_process = vmi.os().system_process()?;
97        tracing::info!(system_process = %system_process.object()?);
98
99        // Get the translation root of the system process.
100        // This is effectively "the CR3 of the kernel".
101        //
102        // The translation root is the root of the page table hierarchy (also
103        // known as the Directory Table Base or PML4).
104        let root = system_process.translation_root()?;
105        tracing::info!(%root);
106
107        // Load the symbols from the profile.
108        let symbols = Symbols::new(profile)?;
109
110        // Enable monitoring of the INT3 and singlestep events.
111        //
112        // INT3 is used to monitor the execution of specific functions.
113        // Singlestep is used to monitor the modifications of page table
114        // entries.
115        vmi.monitor_enable(EventMonitor::Interrupt(ExceptionVector::Breakpoint))?;
116        vmi.monitor_enable(EventMonitor::Singlestep)?;
117
118        // Create a new view for the monitor.
119        // This view is used for monitoring function calls and memory accesses.
120        let view = vmi.create_view(MemoryAccess::RWX)?;
121        vmi.switch_to_view(view)?;
122
123        // Create a new breakpoint controller.
124        //
125        // The breakpoint controller is used to insert breakpoints for specific
126        // functions.
127        //
128        // From the guest's perspective, these breakpoints are "hidden", since
129        // the breakpoint controller will unset the read/write access to the
130        // physical memory page where the breakpoint is inserted, while keeping
131        // the execute access.
132        //
133        // This way, the guest will be able to execute the code, but attempts to
134        // read or write the memory will trigger the `memory_access` callback.
135        //
136        // When a VCPU tries to execute the breakpoint instruction:
137        // - an `interrupt` callback will be triggered
138        // - the breakpoint will be handled (e.g., log the function call)
139        // - a fast-singlestep[1] will be performed over the INT3 instruction
140        //
141        // When a VCPU tries to read from this page (e.g., a PatchGuard check):
142        // - `memory_access` callback will be triggered (with the `MemoryAccess::R`
143        //   access type)
144        // - fast-singlestep[1] will be performed over the instruction that tried to
145        //   read the memory
146        //
147        // This way, the instruction will read the original memory content.
148        //
149        // [1] Fast-singlestep is a VMI feature that allows to switch the VCPU
150        //     to a different view, execute a single instruction, and then
151        //     switch back to the original view. In this case, the view is
152        //     switched to the `default_view` (which is unmodified).
153        let mut bpm = BreakpointManager::new();
154
155        // Create a new page table monitor.
156        //
157        // The page table monitor is used to monitor the page table entries of
158        // the hooked functions.
159        //
160        // More specifically, it is used to monitor the pages that the breakpoint
161        // was inserted into. This is necessary to handle the case when the
162        // page containing the breakpoint is paged out (and then paged in
163        // again).
164        //
165        // `PageTableMonitor` works by unsetting the write access to the page
166        // tables of the hooked functions. When the page is paged out, the
167        // `PRESENT` bit in the page table entry is unset and, conversely, when
168        // the page is paged in, the `PRESENT` bit is set again.
169        //
170        // When that happens:
171        // - the `memory_access` callback will be triggered (with the `MemoryAccess::R`
172        //   access type)
173        // - the callback will mark the page as dirty in the page table monitor
174        // - a singlestep will be performed over the instruction that tried to modify
175        //   the memory containing the page table entry
176        // - the `singlestep` handler will process the dirty page table entries and
177        //   inform the breakpoint controller to handle the changes
178        let mut ptm = PageTableMonitor::new();
179
180        // Pause the VM to avoid race conditions between inserting breakpoints
181        // and monitoring page table entries. The VM resumes when the pause
182        // guard is dropped.
183        let _pause_guard = vmi.pause_guard()?;
184
185        // Insert breakpoint for the `NtCreateFile` function.
186        let va_NtCreateFile = kernel_image_base + symbols.NtCreateFile;
187        let cx_NtCreateFile = (va_NtCreateFile, root);
188        let bp_NtCreateFile = Breakpoint::new(cx_NtCreateFile, view)
189            .global()
190            .with_tag("NtCreateFile");
191        bpm.insert(&vmi, bp_NtCreateFile)?;
192        ptm.monitor(&vmi, cx_NtCreateFile, view, "NtCreateFile")?;
193        tracing::info!(%va_NtCreateFile);
194
195        // Insert breakpoint for the `NtWriteFile` function.
196        let va_NtWriteFile = kernel_image_base + symbols.NtWriteFile;
197        let cx_NtWriteFile = (va_NtWriteFile, root);
198        let bp_NtWriteFile = Breakpoint::new(cx_NtWriteFile, view)
199            .global()
200            .with_tag("NtWriteFile");
201        bpm.insert(&vmi, bp_NtWriteFile)?;
202        ptm.monitor(&vmi, cx_NtWriteFile, view, "NtWriteFile")?;
203        tracing::info!(%va_NtWriteFile);
204
205        // Insert breakpoint for the `PspInsertProcess` function.
206        let va_PspInsertProcess = kernel_image_base + symbols.PspInsertProcess;
207        let cx_PspInsertProcess = (va_PspInsertProcess, root);
208        let bp_PspInsertProcess = Breakpoint::new(cx_PspInsertProcess, view)
209            .global()
210            .with_tag("PspInsertProcess");
211        bpm.insert(&vmi, bp_PspInsertProcess)?;
212        ptm.monitor(&vmi, cx_PspInsertProcess, view, "PspInsertProcess")?;
213
214        // Insert breakpoint for the `MmCleanProcessAddressSpace` function.
215        let va_MmCleanProcessAddressSpace = kernel_image_base + symbols.MmCleanProcessAddressSpace;
216        let cx_MmCleanProcessAddressSpace = (va_MmCleanProcessAddressSpace, root);
217        let bp_MmCleanProcessAddressSpace = Breakpoint::new(cx_MmCleanProcessAddressSpace, view)
218            .global()
219            .with_tag("MmCleanProcessAddressSpace");
220        bpm.insert(&vmi, bp_MmCleanProcessAddressSpace)?;
221        ptm.monitor(
222            &vmi,
223            cx_MmCleanProcessAddressSpace,
224            view,
225            "MmCleanProcessAddressSpace",
226        )?;
227
228        Ok(Self {
229            terminate_flag,
230            view,
231            bpm,
232            ptm,
233        })
234    }
235
236    #[tracing::instrument(skip_all)]
237    fn memory_access(
238        &mut self,
239        vmi: &VmiContext<'_, Driver, WindowsOs<Driver>>,
240    ) -> Result<VmiEventResponse<Amd64>, VmiError> {
241        let memory_access = vmi.event().reason().as_memory_access();
242
243        tracing::trace!(
244            pa = %memory_access.pa,
245            va = %memory_access.va,
246            access = %memory_access.access,
247        );
248
249        if memory_access.access.contains(MemoryAccess::W) {
250            // It is assumed that a write memory access event is caused by a
251            // page table modification.
252            //
253            // The page table entry is marked as dirty in the page table monitor
254            // and a singlestep is performed to process the dirty entries.
255            self.ptm
256                .mark_dirty_entry(memory_access.pa, self.view, vmi.event().vcpu_id());
257
258            Ok(VmiEventResponse::toggle_singlestep().and_set_view(vmi.default_view()))
259        }
260        else if memory_access.access.contains(MemoryAccess::R) {
261            // When the guest tries to read from the memory, a fast-singlestep
262            // is performed over the instruction that tried to read the memory.
263            // This is done to allow the instruction to read the original memory
264            // content.
265            Ok(VmiEventResponse::toggle_fast_singlestep().and_set_view(vmi.default_view()))
266        }
267        else {
268            panic!("Unhandled memory access: {memory_access:?}");
269        }
270    }
271
272    #[tracing::instrument(skip_all, fields(pid, process))]
273    fn interrupt(
274        &mut self,
275        vmi: &VmiContext<'_, Driver, WindowsOs<Driver>>,
276    ) -> Result<VmiEventResponse<Amd64>, VmiError> {
277        let tag = match self.bpm.get_by_event(vmi.event(), ()) {
278            Some(breakpoints) => {
279                // Breakpoints can have multiple tags, but we have set only one
280                // tag for each breakpoint.
281                let first_breakpoint = breakpoints.into_iter().next().expect("breakpoint");
282                first_breakpoint.tag()
283            }
284            None => {
285                if BreakpointController::is_breakpoint(vmi, vmi.event())? {
286                    // This breakpoint was not set by us. Reinject it.
287                    tracing::warn!("Unknown breakpoint, reinjecting");
288                    return Ok(VmiEventResponse::reinject_interrupt());
289                }
290                else {
291                    // We have received a breakpoint event, but there is no
292                    // breakpoint instruction at the current memory location.
293                    // This can happen if the event was triggered by a breakpoint
294                    // we just removed.
295                    tracing::warn!("Ignoring old breakpoint event");
296                    return Ok(
297                        VmiEventResponse::toggle_fast_singlestep().and_set_view(vmi.default_view())
298                    );
299                }
300            }
301        };
302
303        let process = vmi.os().current_process()?;
304        let process_id = process.id()?;
305        let process_name = process.name()?;
306        tracing::Span::current()
307            .record("pid", process_id.0)
308            .record("process", process_name);
309
310        match tag {
311            "NtCreateFile" => self.NtCreateFile(vmi)?,
312            "NtWriteFile" => self.NtWriteFile(vmi)?,
313            "PspInsertProcess" => self.PspInsertProcess(vmi)?,
314            "MmCleanProcessAddressSpace" => self.MmCleanProcessAddressSpace(vmi)?,
315            _ => panic!("Unhandled tag: {tag}"),
316        }
317
318        Ok(VmiEventResponse::toggle_fast_singlestep().and_set_view(vmi.default_view()))
319    }
320
321    #[tracing::instrument(skip_all)]
322    fn singlestep(
323        &mut self,
324        vmi: &VmiContext<'_, Driver, WindowsOs<Driver>>,
325    ) -> Result<VmiEventResponse<Amd64>, VmiError> {
326        // Get the page table modifications by processing the dirty page table
327        // entries.
328        let ptm_events = self.ptm.process_dirty_entries(vmi, vmi.event().vcpu_id())?;
329
330        for event in &ptm_events {
331            // Log the page table modifications.
332            match &event {
333                PageTableMonitorEvent::PageIn(update) => tracing::debug!(?update, "page-in"),
334                PageTableMonitorEvent::PageOut(update) => tracing::debug!(?update, "page-out"),
335            }
336
337            // Let the breakpoint controller handle the page table modifications.
338            self.bpm.handle_ptm_event(vmi, event)?;
339        }
340
341        // Disable singlestep and switch back to our view.
342        Ok(VmiEventResponse::toggle_singlestep().and_set_view(self.view))
343    }
344
345    #[tracing::instrument(skip_all)]
346    fn NtCreateFile(
347        &mut self,
348        vmi: &VmiContext<'_, Driver, WindowsOs<Driver>>,
349    ) -> Result<(), VmiError> {
350        //
351        // NTSTATUS
352        // NtCreateFile (
353        //     _Out_ PHANDLE FileHandle,
354        //     _In_ ACCESS_MASK DesiredAccess,
355        //     _In_ POBJECT_ATTRIBUTES ObjectAttributes,
356        //     _Out_ PIO_STATUS_BLOCK IoStatusBlock,
357        //     _In_opt_ PLARGE_INTEGER AllocationSize,
358        //     _In_ ULONG FileAttributes,
359        //     _In_ ULONG ShareAccess,
360        //     _In_ ULONG CreateDisposition,
361        //     _In_ ULONG CreateOptions,
362        //     _In_reads_bytes_opt_(EaLength) PVOID EaBuffer,
363        //     _In_ ULONG EaLength
364        //     );
365        //
366
367        let ObjectAttributes = Va(vmi.os().function_argument(2)?);
368
369        let object_attributes = vmi.os().object_attributes(ObjectAttributes)?;
370        let object_name = match object_attributes.object_name()? {
371            Some(object_name) => object_name,
372            None => {
373                tracing::warn!(%ObjectAttributes, "No object name found");
374                return Ok(());
375            }
376        };
377
378        tracing::info!(%object_name);
379
380        Ok(())
381    }
382
383    #[tracing::instrument(skip_all)]
384    fn NtWriteFile(
385        &mut self,
386        vmi: &VmiContext<'_, Driver, WindowsOs<Driver>>,
387    ) -> Result<(), VmiError> {
388        //
389        // NTSTATUS
390        // NtWriteFile (
391        //     _In_ HANDLE FileHandle,
392        //     _In_opt_ HANDLE Event,
393        //     _In_opt_ PIO_APC_ROUTINE ApcRoutine,
394        //     _In_opt_ PVOID ApcContext,
395        //     _Out_ PIO_STATUS_BLOCK IoStatusBlock,
396        //     _In_reads_bytes_(Length) PVOID Buffer,
397        //     _In_ ULONG Length,
398        //     _In_opt_ PLARGE_INTEGER ByteOffset,
399        //     _In_opt_ PULONG Key
400        //     );
401        //
402
403        let FileHandle = vmi.os().function_argument(0)?;
404
405        let handle_table = match vmi.os().current_process()?.handle_table()? {
406            Some(handle_table) => handle_table,
407            None => {
408                tracing::warn!("No handle table found");
409                return Ok(());
410            }
411        };
412
413        let handle_table_entry = match handle_table.lookup(FileHandle)? {
414            Some(handle_table_entry) => handle_table_entry,
415            None => {
416                tracing::warn!(FileHandle = %Hex(FileHandle), "No handle table entry found");
417                return Ok(());
418            }
419        };
420
421        let object = match handle_table_entry.object()? {
422            Some(object) => object,
423            None => {
424                tracing::warn!(FileHandle = %Hex(FileHandle), "No object found");
425                return Ok(());
426            }
427        };
428
429        let file_object = match object.as_file()? {
430            Some(file_object) => file_object,
431            None => {
432                tracing::warn!(FileHandle = %Hex(FileHandle), "Not a file object");
433                return Ok(());
434            }
435        };
436
437        let path = file_object.full_path()?;
438        tracing::info!(%path);
439
440        Ok(())
441    }
442
443    #[tracing::instrument(skip_all)]
444    fn PspInsertProcess(
445        &mut self,
446        vmi: &VmiContext<'_, Driver, WindowsOs<Driver>>,
447    ) -> Result<(), VmiError> {
448        //
449        // NTSTATUS
450        // PspInsertProcess (
451        //     _In_ PEPROCESS NewProcess,
452        //     _In_ PEPROCESS Parent,
453        //     _In_ ULONG DesiredAccess,
454        //     _In_ ULONG CreateFlags,
455        //     ...
456        //     );
457        //
458
459        let NewProcess = vmi.os().function_argument(0)?;
460        let Parent = vmi.os().function_argument(1)?;
461
462        let process = vmi.os().process(ProcessObject(Va(NewProcess)))?;
463        let process_id = process.id()?;
464
465        let parent_process = vmi.os().process(ProcessObject(Va(Parent)))?;
466        let parent_process_id = parent_process.id()?;
467
468        // We rely heavily on the 2nd argument to be the parent process object.
469        // If that ever changes, this assertion should catch it.
470        //
471        // So far it is verified that it works for Windows 7 up to Windows 11
472        // (23H2, build 22631).
473        debug_assert_eq!(parent_process_id, process.parent_id()?);
474
475        let name = process.name()?;
476        let image_base = process.image_base()?;
477        let peb = process.peb()?;
478
479        tracing::info!(
480            %process_id,
481            name,
482            %image_base,
483            ?peb,
484        );
485
486        Ok(())
487    }
488
489    #[tracing::instrument(skip_all)]
490    fn MmCleanProcessAddressSpace(
491        &mut self,
492        vmi: &VmiContext<'_, Driver, WindowsOs<Driver>>,
493    ) -> Result<(), VmiError> {
494        //
495        // VOID
496        // MmCleanProcessAddressSpace (
497        //     _In_ PEPROCESS Process
498        //     );
499        //
500
501        let Process = vmi.os().function_argument(0)?;
502
503        let process = vmi.os().process(ProcessObject(Va(Process)))?;
504        let process_id = process.id()?;
505
506        let name = process.name()?;
507        let image_base = process.image_base()?;
508
509        tracing::info!(%process_id, name, %image_base);
510
511        Ok(())
512    }
513
514    fn dispatch(
515        &mut self,
516        vmi: &VmiContext<'_, Driver, WindowsOs<Driver>>,
517    ) -> Result<VmiEventResponse<Amd64>, VmiError> {
518        let event = vmi.event();
519        let result = match event.reason() {
520            EventReason::MemoryAccess(_) => self.memory_access(vmi),
521            EventReason::Interrupt(_) => self.interrupt(vmi),
522            EventReason::Singlestep(_) => self.singlestep(vmi),
523            _ => panic!("Unhandled event: {:?}", event.reason()),
524        };
525
526        // If VMI tries to read from a page that is not present, it will return
527        // a page fault error. In this case, we inject a page fault interrupt
528        // to the guest.
529        //
530        // Once the guest handles the page fault, it will retry to execute the
531        // instruction that caused the page fault.
532        if let Err(VmiError::Translation(pfs)) = result {
533            tracing::warn!(?pfs, "Page fault, injecting");
534            vmi.inject_interrupt(event.vcpu_id(), Interrupt::page_fault(pfs[0].va, 0))?;
535            return Ok(VmiEventResponse::default());
536        }
537
538        result
539    }
540}
541
542impl<Driver> VmiHandler<Driver, WindowsOs<Driver>> for Monitor<Driver>
543where
544    Driver: VmiDriver<Architecture = Amd64>,
545{
546    type Output = ();
547
548    fn handle_event(
549        &mut self,
550        vmi: VmiContext<'_, Driver, WindowsOs<Driver>>,
551    ) -> VmiEventResponse<Amd64> {
552        // Flush the V2P cache on every event to avoid stale translations.
553        vmi.flush_v2p_cache();
554
555        self.dispatch(&vmi).expect("dispatch")
556    }
557
558    fn check_completion(&self) -> Option<Self::Output> {
559        self.terminate_flag.load(Ordering::Relaxed).then_some(())
560    }
561}
562
563fn main() -> Result<(), Box<dyn std::error::Error>> {
564    tracing_subscriber::fmt()
565        .with_max_level(tracing::Level::DEBUG)
566        .init();
567
568    let domain_id = 'x: {
569        for name in &["win7", "win10", "win11", "ubuntu22"] {
570            if let Some(domain_id) = XenStore::new()?.domain_id_from_name(name)? {
571                break 'x domain_id;
572            }
573        }
574
575        panic!("Domain not found");
576    };
577
578    tracing::debug!(?domain_id);
579
580    // Setup VMI.
581    let driver = VmiXenDriver::<Amd64>::new(domain_id)?;
582    let core = VmiCore::new(driver)?;
583
584    // Try to find the kernel information.
585    // This is necessary in order to load the profile.
586    let kernel_info = {
587        let _pause_guard = core.pause_guard()?;
588        let regs = core.registers(0.into())?;
589
590        WindowsOs::find_kernel(&core, &regs)?.expect("kernel information")
591    };
592
593    // Load the profile.
594    // The profile contains offsets to kernel functions and data structures.
595    let isr = IsrCache::<JsonCodec>::new("cache")?;
596    let entry = isr.entry_from_codeview(kernel_info.codeview)?;
597    let profile = entry.profile()?;
598
599    // Create the VMI session.
600    tracing::info!("Creating VMI session");
601    let terminate_flag = Arc::new(AtomicBool::new(false));
602    signal_hook::flag::register(signal_hook::consts::SIGHUP, terminate_flag.clone())?;
603    signal_hook::flag::register(signal_hook::consts::SIGINT, terminate_flag.clone())?;
604    signal_hook::flag::register(signal_hook::consts::SIGALRM, terminate_flag.clone())?;
605    signal_hook::flag::register(signal_hook::consts::SIGTERM, terminate_flag.clone())?;
606
607    let os = WindowsOs::<VmiXenDriver<Amd64>>::new(&profile)?;
608    let session = VmiSession::new(&core, &os);
609
610    session.handle(|session| Monitor::new(session, &profile, terminate_flag))?;
611
612    Ok(())
613}