panda/
syscall_injection.rs

1//! Everything to perform async system call injection to perform system calls
2//! within the guest.
3//!
4//! This feature allows for writing code using Rust's async model in such a manner
5//! that allows you to treat guest system calls as I/O to be performed. This enables
6//! writing code that feels synchronous while allowing for automatically running the
7//! guest concurrently in order to perform any needed tasks such as filesystem access,
8//! interacting with processes/signals, mapping memory, etc. all within the guest,
9//! while all computation is performed on the host.
10//!
11//! A system call injector under this API is an async block which can make use of the
12//! [`syscall`] function in order to perform system calls. An injector can only be run
13//! (or, rather, started) within a syscall enter callback.
14//!
15//! ## Example
16//!
17//! ```
18//! use panda::prelude::*;
19//! use panda::syscall_injection::{run_injector, syscall};
20//!
21//! async fn getpid() -> target_ulong {
22//!     syscall(GET_PID, ()).await
23//! }
24//!
25//! async fn getuid() -> target_ulong {
26//!     syscall(GET_UID, ()).await
27//! }
28//!
29//! #[panda::on_all_sys_enter]
30//! fn any_syscall(cpu: &mut CPUState, pc: SyscallPc, syscall_num: target_ulong) {
31//!     run_injector(pc, async {
32//!         println!("PID: {}", getpid().await);
33//!         println!("UID: {}", getuid().await);
34//!         println!("PID (again): {}", getpid().await);
35//!     });
36//! }
37//!
38//! fn main() {
39//!     Panda::new()
40//!         .generic("x86_64")
41//!         .args(&["-loadvm", "root"])
42//!         .run();
43//! }
44//! ```
45//!
46//! (Full example present in `examples/syscall_injection.rs`)
47
48use std::{
49    future::Future,
50    pin::Pin,
51    sync::atomic::{AtomicBool, AtomicU64, Ordering},
52    task::{Context, Poll, RawWaker, RawWakerVTable, Waker},
53};
54
55use dashmap::{DashMap, DashSet};
56use lazy_static::lazy_static;
57use parking_lot::{const_mutex, Mutex};
58
59use crate::prelude::*;
60use crate::{
61    plugins::{osi::OSI, syscalls2::Syscalls2Callbacks},
62    regs, sys, PppCallback,
63};
64
65mod arch;
66mod conversion;
67mod pinned_queue;
68mod syscall_future;
69mod syscall_regs;
70mod syscalls;
71
72pub(crate) use crate::abi::set_is_sysenter;
73use {
74    arch::{FORK_IS_CLONE, SYSCALL_RET, VFORK},
75    pinned_queue::PinnedQueue,
76    syscall_future::{INJECTOR_BAIL, WAITING_FOR_SYSCALL},
77    syscall_regs::SyscallRegs,
78};
79pub use {conversion::*, syscall_future::*};
80
81type Injector = dyn Future<Output = ()> + 'static;
82
83/// A unique identifier for a thread of execution. The actual makeup is not relevant
84/// to use, but currently consists of process ID and thread ID pairs. The only need
85/// of this is for it to be equivelant if and only if it is the same thread of execution
86/// at a given point in time.
87///
88/// `ThreadId`s *may* be reused if the thread of execution no longer exists. Previously
89/// `ThreadId`s were just ASIDs, however this may not be enough on all platforms due to
90/// things such as `fork(2)` using the same ASID for both processes.
91#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
92struct ThreadId {
93    pid: target_ulong,
94    tid: target_ulong,
95}
96
97impl ThreadId {
98    fn current() -> Self {
99        let cpu = unsafe { &mut *sys::get_cpu() };
100        let thread = OSI.get_current_thread(cpu);
101
102        let tid = thread.tid as target_ulong;
103        let pid = thread.pid as target_ulong;
104
105        log::trace!("current tid, pid: {:#x?}, {:#x?}", tid, pid);
106
107        Self { tid, pid }
108    }
109}
110
111lazy_static! {
112    /// A list of injectors. Since multiple can run at the same time, we need a mapping
113    /// of which threads run which injectors. Injectors can be queued in sequence but
114    /// need to be capable of pinning[^1] the current injector, hence the `PinnedQueue`.
115    ///
116    /// [^1]: Pinning in Rust is a concept of being able to ensure that a struct does not
117    /// move. This is used by async code due to the fact that "stack" references in an
118    /// async function desugars down to a reference inside of the `Future` which points
119    /// to other data within the `Future`. This means the type backing the `Future` can
120    /// be self-referential, so if the underlying Future is moved then the reference would
121    /// be invalid. For information see [`std::pin`].
122    static ref INJECTORS: DashMap<ThreadId, PinnedQueue<Injector>> = DashMap::new();
123
124    /// A list of thread ids which have started forking but not returned from the fork
125    static ref FORKING_THREADS: DashSet<ThreadId> = DashSet::new();
126}
127
128struct ChildInjector((SyscallRegs, Pin<Box<Injector>>));
129
130unsafe impl Send for ChildInjector {}
131unsafe impl Sync for ChildInjector {}
132
133static CHILD_INJECTOR: Mutex<Option<ChildInjector>> = const_mutex(None);
134
135static PARENT_PID: AtomicU64 = AtomicU64::new(u64::MAX);
136
137static JUST_CLONED: AtomicBool = AtomicBool::new(false);
138
139/// Fork the guest process being injected into and begin injecting into it using the
140/// provided injector.
141///
142/// Registers will be restored once the child process completes as well, unless the
143/// child injector bails.
144pub async fn fork(child_injector: impl Future<Output = ()> + 'static) -> target_ulong {
145    // Since all state needs to be copied when forking, we also need to copy *our*
146    // state. Since we've backed up the registers to restore once we're done injecting
147    // our system calls, we need to copy those registers as well in case the user wants
148    // to resume the base program's execution within the child.
149    let backed_up_regs = get_backed_up_regs().expect("Fork was run outside of an injector");
150
151    PARENT_PID.store(ThreadId::current().pid as u64, Ordering::SeqCst);
152
153    // Used to keep track of the threads from which parent processes are forking
154    FORKING_THREADS.insert(ThreadId::current());
155
156    // This code assumes that we aren't going to be injecting into multiple processes
157    // and forking at same time in an overlapping manner. Effectively this is storing
158    // the future (e.g. the second injector the user passes to `fork(...)` to run in the
159    // child) so that once the child process starts we can begin syscall injection there.
160    CHILD_INJECTOR
161        .lock()
162        .replace(ChildInjector((backed_up_regs, Box::pin(child_injector))));
163
164    // aarch64 is a new enough Linux target that it deprecates `fork(2)` entirely and
165    // replaces it with the `clone(2)`. This means that for certain targets we'll have
166    // our syscall number for it (`FORK`) actually be the syscall number for clone, which
167    // has a different set of arguments. Currently unsupported.
168    if FORK_IS_CLONE {
169        const CLONE_FILES: target_ulong = 0x00000400;
170        const CLONE_VFORK: target_ulong = 0x00004000;
171        const CLONE_NEWPID: target_ulong = 0x20000000;
172
173        const NULL: target_ptr_t = 0;
174        const CLONE: target_ulong = VFORK;
175
176        JUST_CLONED.swap(true, Ordering::SeqCst);
177
178        log::debug!("Running clone syscall");
179        syscall(
180            CLONE,
181            (
182                CLONE_VFORK | CLONE_NEWPID | CLONE_FILES,
183                NULL,
184                NULL,
185                NULL,
186                NULL,
187            ),
188        )
189        .await
190    } else {
191        syscall(VFORK, ()).await
192    }
193}
194
195fn get_child_injector() -> Option<(SyscallRegs, Pin<Box<Injector>>)> {
196    CHILD_INJECTOR.lock().take().map(|x| x.0)
197}
198
199fn restart_syscall(cpu: &mut CPUState, pc: target_ulong) {
200    regs::set_pc(cpu, pc);
201    unsafe {
202        panda::sys::cpu_loop_exit_noexc(cpu);
203    }
204}
205
206#[cfg(any(feature = "x86_64", feature = "i386"))]
207const SYSENTER_INSTR: &[u8] = &[0xf, 0x34];
208
209/// Run a syscall injector in the form as an async block/value to be evaluated. If
210/// another injector is already running, it will be queued to start after all previous
211/// injectors have finished running.
212///
213/// This operates by running each system call before resuming the original system call,
214/// allowing the guest to run until all injected system calls have finished.
215///
216/// ### Context Requirements
217///
218/// `run_injector` must be run within a syscall enter callback. This is enforced by
219/// means of only accepting [`SyscallPc`] to prevent misuse.
220///
221/// If you'd like to setup an injector to run during the next system call to avoid this
222/// requirement, see [`run_injector_next_syscall`].
223///
224/// ### Async Execution
225///
226/// The async runtime included allows for non-system call futures to be awaited, however
227/// the async executor used does not provide any support for any level of parallelism
228/// outside of Host/Guest parallelism. This means any async I/O performed will be
229/// busily polled, wakers are no-ops, and executor-dependent futures will not function.
230///
231/// There are currently no plans for injectors to be a true-async context, so
232/// outside of simple Futures it is recommended to only use the provided [`syscall`]
233/// function and Futures built on top of it.
234///
235/// ### Behavior
236///
237/// The behavior of injecting into system calls which don't return, fork, or otherwise
238/// effect control flow, are currently not defined.
239pub fn run_injector(pc: SyscallPc, injector: impl Future<Output = ()> + 'static) {
240    let pc = pc.pc();
241    log::trace!("Running injector with syscall pc of {:#x?}", pc);
242
243    // If our syscall is a `sysenter` instruction, we need to note this so that
244    // we can handle the fact that `sysenter` uses a different syscall ABI involving
245    // stack storage.
246    #[cfg(any(feature = "x86_64", feature = "i386"))]
247    {
248        use crate::mem::virtual_memory_read;
249
250        let cpu = unsafe { &mut *sys::get_cpu() };
251        let is_sysenter = virtual_memory_read(cpu, pc, 2)
252            .ok()
253            .map(|bytes| bytes == SYSENTER_INSTR)
254            .unwrap_or(false);
255
256        log::trace!("is_sysenter = {}", is_sysenter);
257        set_is_sysenter(is_sysenter);
258    }
259
260    // Now we push the injector into the queue for the current thread so that we can
261    // begin polling it. Since we can't move it once we start polling it, we need to
262    // put it in the PinnedQueue before we poll it the first time
263    let is_first = INJECTORS.is_empty();
264    let thread_id = ThreadId::current();
265    INJECTORS.entry(thread_id).or_default().push_future(async {
266        let backed_up_regs = SyscallRegs::backup();
267        set_backed_up_regs(backed_up_regs.clone());
268
269        injector.await;
270
271        log::debug!("Restoring backed up registers");
272        backed_up_regs.restore();
273        unset_backed_up_regs();
274    });
275
276    // We only want to install the callbacks once, so if there's any existing
277    // callbacks in place we don't want to install them. And if another one is
278    // already running, we don't want to start polling either
279    if is_first {
280        log::trace!("Enabling callbacks...");
281
282        // Make callback handles so they can be self-referential in order to uninstall
283        // themselves when they are done running all our injectors.
284        let sys_enter = PppCallback::new();
285        let sys_return = PppCallback::new();
286
287        let disable_callbacks = move || {
288            log::trace!("Disabling callbacks...");
289            sys_enter.disable();
290            sys_return.disable();
291        };
292
293        // after the syscall set the return value for the future then jump back to
294        // the syscall instruction
295        sys_return.on_all_sys_return(move |cpu: &mut CPUState, sys_pc, sys_num| {
296            if JUST_CLONED.load(Ordering::SeqCst) {
297                log::debug!(
298                    "on_sys_return: {} @ {:#x?} ({:#x?}?) ({:?})",
299                    sys_num,
300                    sys_pc.pc(),
301                    pc,
302                    ThreadId::current(),
303                );
304            } else {
305                log::trace!(
306                    "on_sys_return: {} @ {:#x?} ({:#x?}?) ({:?})",
307                    sys_num,
308                    sys_pc.pc(),
309                    pc,
310                    ThreadId::current(),
311                );
312            }
313
314            if sys_num == VFORK {
315                log::trace!("ret = {:#x?}", regs::get_reg(cpu, SYSCALL_RET));
316            }
317
318            let thread_id = ThreadId::current();
319            if FORKING_THREADS.contains(&thread_id) {
320                if sys_num != VFORK {
321                    log::warn!("Non-fork ({}) return from {:?}", sys_num, thread_id);
322                    log::warn!("Non-fork ret = {:#x?}", regs::get_reg(cpu, SYSCALL_RET));
323
324                    if cfg!(not(feature = "arm")) {
325                        return;
326                    }
327
328                    log::warn!("Returning from fork anyways.");
329                    FORKING_THREADS.remove(&thread_id);
330
331                    SHOULD_LOOP_AGAIN.store(true, Ordering::SeqCst);
332                    set_ret_value(cpu);
333                    restart_syscall(cpu, pc);
334                } else {
335                    log::debug!("Returning from fork {:?}", &thread_id);
336                    FORKING_THREADS.remove(&thread_id);
337                }
338            }
339
340            let forker_pid = PARENT_PID.load(Ordering::SeqCst);
341
342            //let parent_pid = OSI.get_current_process(cpu).ppid as u64;
343            //let is_fork_child = FORKING_THREADS
344            //    .iter()
345            //    .any(|thread| thread.pid as u64 == parent_pid);
346
347            let is_child_of_forker = forker_pid != u64::MAX
348                && OSI
349                    .get_current_process(cpu)
350                    .map(|proc| proc.ppid as u64 == forker_pid)
351                    .unwrap_or_else(|| {
352                        log::debug!("Failed to get process");
353                        false
354                    });
355
356            if is_child_of_forker {
357                PARENT_PID.store(u64::MAX, Ordering::SeqCst);
358            }
359
360            let is_fork_child = is_child_of_forker;
361            //let is_fork = last_injected_syscall() == VFORK || sys_num == VFORK;
362            //let is_fork_child =
363            //    is_child_of_forker || (is_fork && regs::get_reg(cpu, SYSCALL_RET) == 0);
364
365            if is_fork_child {
366                // If we're returning from a fork and are in the child process, retrieve
367                // the previously stored child-injector, which doesn't need to back up its
368                // registers since we already did that from the parent process, we just
369                // need to take the previously backed-up parent process registers in
370                // case we end up wanting to restore them.
371                if let Some((backed_up_regs, child_injector)) = get_child_injector() {
372                    INJECTORS
373                        .entry(ThreadId::current())
374                        .or_default()
375                        .push_future(async move {
376                            child_injector.await;
377                            backed_up_regs.restore();
378                        });
379                } else {
380                    println!("WARNING: failed to get child injector");
381                    return;
382                }
383            }
384
385            log::trace!("Current asid = {:x}", current_asid());
386
387            // only run for the asid we're currently injecting into, unless we just forked
388            if is_fork_child || is_current_injector_thread() {
389                SHOULD_LOOP_AGAIN.store(true, Ordering::SeqCst);
390                if !is_fork_child {
391                    set_ret_value(cpu);
392                }
393                restart_syscall(cpu, pc);
394            }
395        });
396
397        // poll the injectors and if they've all finished running, disable these
398        // callbacks
399        sys_enter.on_all_sys_enter(move |cpu, sys_pc, sys_num| {
400            log::trace!(
401                "on_sys_enter: {} @ {:#x?} ({:#x?}?)",
402                sys_num,
403                sys_pc.pc(),
404                pc
405            );
406
407            if poll_injectors() {
408                disable_callbacks();
409            }
410
411            if SHOULD_LOOP_AGAIN.swap(false, Ordering::SeqCst) {
412                restart_syscall(cpu, pc);
413            }
414        });
415
416        // If this is the first syscall it needs to be polled too,
417        // disabling if it's already finished running
418        if poll_injectors() {
419            println!("WARN: Injector seemed to not call any system calls?");
420            disable_callbacks();
421        }
422    }
423}
424
425static SHOULD_LOOP_AGAIN: AtomicBool = AtomicBool::new(false);
426
427lazy_static! {
428    static ref CURRENT_REGS_BACKUP: DashMap<ThreadId, SyscallRegs> = DashMap::new();
429}
430
431/// Get the registers set to be restored when the current injector finishes
432pub fn get_backed_up_regs() -> Option<SyscallRegs> {
433    CURRENT_REGS_BACKUP
434        .get(&ThreadId::current())
435        .map(|regs| regs.clone())
436}
437
438fn set_backed_up_regs(regs: SyscallRegs) {
439    CURRENT_REGS_BACKUP.insert(ThreadId::current(), regs);
440}
441
442fn unset_backed_up_regs() {
443    CURRENT_REGS_BACKUP.remove(&ThreadId::current());
444}
445
446fn current_asid() -> target_ulong {
447    unsafe { sys::panda_current_asid(sys::get_cpu()) }
448}
449
450/// Queue an injector to be run during the next system call.
451///
452/// For more information or for usage during a system call callback, see [`run_injector`].
453pub fn run_injector_next_syscall(injector: impl Future<Output = ()> + 'static) {
454    let next_syscall = PppCallback::new();
455    let mut injector = Some(injector);
456
457    next_syscall.on_all_sys_enter(move |_, pc, _| {
458        let injector = injector.take().unwrap();
459        run_injector(pc, injector);
460        next_syscall.disable();
461    });
462}
463
464fn do_nothing(_ptr: *const ()) {}
465
466fn clone(ptr: *const ()) -> RawWaker {
467    RawWaker::new(ptr, &VTABLE)
468}
469
470static VTABLE: RawWakerVTable = RawWakerVTable::new(clone, do_nothing, do_nothing, do_nothing);
471
472fn waiting_for_syscall() -> bool {
473    WAITING_FOR_SYSCALL.load(Ordering::SeqCst)
474}
475
476lazy_static! {
477    static ref CURRENT_INJECTOR_THREAD: Mutex<Option<ThreadId>> = Mutex::new(None);
478}
479
480fn is_current_injector_thread() -> bool {
481    CURRENT_INJECTOR_THREAD
482        .lock()
483        .as_ref()
484        .map(|&id| id == ThreadId::current())
485        .unwrap_or(false)
486}
487
488/// Returns true if all injectors have been processed
489fn poll_injectors() -> bool {
490    let raw = RawWaker::new(std::ptr::null(), &VTABLE);
491    let waker = unsafe { Waker::from_raw(raw) };
492    let mut ctxt = Context::from_waker(&waker);
493
494    // reset the 'waiting for system call' flag
495    WAITING_FOR_SYSCALL.store(false, Ordering::SeqCst);
496
497    // Clear in case we're looping without any injectors, so a stale 'current injector'
498    // won't be injected into
499    CURRENT_INJECTOR_THREAD.lock().take();
500
501    if let Some(mut injectors) = INJECTORS.get_mut(&ThreadId::current()) {
502        while let Some(ref mut current_injector) = injectors.current_mut() {
503            //let current_injector = &mut *current_injector;
504
505            CURRENT_INJECTOR_THREAD.lock().replace(ThreadId::current());
506
507            match current_injector.as_mut().poll(&mut ctxt) {
508                // If the current injector has finished running start polling the next
509                // injector. This includes if the current injector bails early.
510                status
511                    if matches!(status, Poll::Ready(_))
512                        || INJECTOR_BAIL.swap(false, Ordering::SeqCst) =>
513                {
514                    injectors.pop();
515
516                    // No more injectors in the current thread
517                    if injectors.is_empty() {
518                        drop(injectors);
519                        INJECTORS.remove(&ThreadId::current());
520
521                        break;
522                    }
523
524                    continue;
525                }
526
527                // If the future is now waiting on a syscall to be evaluated, return
528                // so a system call can be run
529                Poll::Pending if waiting_for_syscall() => return false,
530
531                // If the future is not waiting on a system call we should keep polling
532                Poll::Pending => continue,
533
534                _ => unreachable!(),
535            }
536        }
537    } else {
538        return false;
539    }
540
541    let all_injectors_finished = INJECTORS.is_empty() && CHILD_INJECTOR.lock().is_none();
542
543    all_injectors_finished
544}