panda/syscall_injection.rs
1//! Everything to perform async system call injection to perform system calls
2//! within the guest.
3//!
4//! This feature allows for writing code using Rust's async model in such a manner
5//! that allows you to treat guest system calls as I/O to be performed. This enables
6//! writing code that feels synchronous while allowing for automatically running the
7//! guest concurrently in order to perform any needed tasks such as filesystem access,
8//! interacting with processes/signals, mapping memory, etc. all within the guest,
9//! while all computation is performed on the host.
10//!
11//! A system call injector under this API is an async block which can make use of the
12//! [`syscall`] function in order to perform system calls. An injector can only be run
13//! (or, rather, started) within a syscall enter callback.
14//!
15//! ## Example
16//!
17//! ```
18//! use panda::prelude::*;
19//! use panda::syscall_injection::{run_injector, syscall};
20//!
21//! async fn getpid() -> target_ulong {
22//! syscall(GET_PID, ()).await
23//! }
24//!
25//! async fn getuid() -> target_ulong {
26//! syscall(GET_UID, ()).await
27//! }
28//!
29//! #[panda::on_all_sys_enter]
30//! fn any_syscall(cpu: &mut CPUState, pc: SyscallPc, syscall_num: target_ulong) {
31//! run_injector(pc, async {
32//! println!("PID: {}", getpid().await);
33//! println!("UID: {}", getuid().await);
34//! println!("PID (again): {}", getpid().await);
35//! });
36//! }
37//!
38//! fn main() {
39//! Panda::new()
40//! .generic("x86_64")
41//! .args(&["-loadvm", "root"])
42//! .run();
43//! }
44//! ```
45//!
46//! (Full example present in `examples/syscall_injection.rs`)
47
48use std::{
49 future::Future,
50 pin::Pin,
51 sync::atomic::{AtomicBool, AtomicU64, Ordering},
52 task::{Context, Poll, RawWaker, RawWakerVTable, Waker},
53};
54
55use dashmap::{DashMap, DashSet};
56use lazy_static::lazy_static;
57use parking_lot::{const_mutex, Mutex};
58
59use crate::prelude::*;
60use crate::{
61 plugins::{osi::OSI, syscalls2::Syscalls2Callbacks},
62 regs, sys, PppCallback,
63};
64
65mod arch;
66mod conversion;
67mod pinned_queue;
68mod syscall_future;
69mod syscall_regs;
70mod syscalls;
71
72pub(crate) use crate::abi::set_is_sysenter;
73use {
74 arch::{FORK_IS_CLONE, SYSCALL_RET, VFORK},
75 pinned_queue::PinnedQueue,
76 syscall_future::{INJECTOR_BAIL, WAITING_FOR_SYSCALL},
77 syscall_regs::SyscallRegs,
78};
79pub use {conversion::*, syscall_future::*};
80
81type Injector = dyn Future<Output = ()> + 'static;
82
83/// A unique identifier for a thread of execution. The actual makeup is not relevant
84/// to use, but currently consists of process ID and thread ID pairs. The only need
85/// of this is for it to be equivelant if and only if it is the same thread of execution
86/// at a given point in time.
87///
88/// `ThreadId`s *may* be reused if the thread of execution no longer exists. Previously
89/// `ThreadId`s were just ASIDs, however this may not be enough on all platforms due to
90/// things such as `fork(2)` using the same ASID for both processes.
91#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
92struct ThreadId {
93 pid: target_ulong,
94 tid: target_ulong,
95}
96
97impl ThreadId {
98 fn current() -> Self {
99 let cpu = unsafe { &mut *sys::get_cpu() };
100 let thread = OSI.get_current_thread(cpu);
101
102 let tid = thread.tid as target_ulong;
103 let pid = thread.pid as target_ulong;
104
105 log::trace!("current tid, pid: {:#x?}, {:#x?}", tid, pid);
106
107 Self { tid, pid }
108 }
109}
110
111lazy_static! {
112 /// A list of injectors. Since multiple can run at the same time, we need a mapping
113 /// of which threads run which injectors. Injectors can be queued in sequence but
114 /// need to be capable of pinning[^1] the current injector, hence the `PinnedQueue`.
115 ///
116 /// [^1]: Pinning in Rust is a concept of being able to ensure that a struct does not
117 /// move. This is used by async code due to the fact that "stack" references in an
118 /// async function desugars down to a reference inside of the `Future` which points
119 /// to other data within the `Future`. This means the type backing the `Future` can
120 /// be self-referential, so if the underlying Future is moved then the reference would
121 /// be invalid. For information see [`std::pin`].
122 static ref INJECTORS: DashMap<ThreadId, PinnedQueue<Injector>> = DashMap::new();
123
124 /// A list of thread ids which have started forking but not returned from the fork
125 static ref FORKING_THREADS: DashSet<ThreadId> = DashSet::new();
126}
127
128struct ChildInjector((SyscallRegs, Pin<Box<Injector>>));
129
130unsafe impl Send for ChildInjector {}
131unsafe impl Sync for ChildInjector {}
132
133static CHILD_INJECTOR: Mutex<Option<ChildInjector>> = const_mutex(None);
134
135static PARENT_PID: AtomicU64 = AtomicU64::new(u64::MAX);
136
137static JUST_CLONED: AtomicBool = AtomicBool::new(false);
138
139/// Fork the guest process being injected into and begin injecting into it using the
140/// provided injector.
141///
142/// Registers will be restored once the child process completes as well, unless the
143/// child injector bails.
144pub async fn fork(child_injector: impl Future<Output = ()> + 'static) -> target_ulong {
145 // Since all state needs to be copied when forking, we also need to copy *our*
146 // state. Since we've backed up the registers to restore once we're done injecting
147 // our system calls, we need to copy those registers as well in case the user wants
148 // to resume the base program's execution within the child.
149 let backed_up_regs = get_backed_up_regs().expect("Fork was run outside of an injector");
150
151 PARENT_PID.store(ThreadId::current().pid as u64, Ordering::SeqCst);
152
153 // Used to keep track of the threads from which parent processes are forking
154 FORKING_THREADS.insert(ThreadId::current());
155
156 // This code assumes that we aren't going to be injecting into multiple processes
157 // and forking at same time in an overlapping manner. Effectively this is storing
158 // the future (e.g. the second injector the user passes to `fork(...)` to run in the
159 // child) so that once the child process starts we can begin syscall injection there.
160 CHILD_INJECTOR
161 .lock()
162 .replace(ChildInjector((backed_up_regs, Box::pin(child_injector))));
163
164 // aarch64 is a new enough Linux target that it deprecates `fork(2)` entirely and
165 // replaces it with the `clone(2)`. This means that for certain targets we'll have
166 // our syscall number for it (`FORK`) actually be the syscall number for clone, which
167 // has a different set of arguments. Currently unsupported.
168 if FORK_IS_CLONE {
169 const CLONE_FILES: target_ulong = 0x00000400;
170 const CLONE_VFORK: target_ulong = 0x00004000;
171 const CLONE_NEWPID: target_ulong = 0x20000000;
172
173 const NULL: target_ptr_t = 0;
174 const CLONE: target_ulong = VFORK;
175
176 JUST_CLONED.swap(true, Ordering::SeqCst);
177
178 log::debug!("Running clone syscall");
179 syscall(
180 CLONE,
181 (
182 CLONE_VFORK | CLONE_NEWPID | CLONE_FILES,
183 NULL,
184 NULL,
185 NULL,
186 NULL,
187 ),
188 )
189 .await
190 } else {
191 syscall(VFORK, ()).await
192 }
193}
194
195fn get_child_injector() -> Option<(SyscallRegs, Pin<Box<Injector>>)> {
196 CHILD_INJECTOR.lock().take().map(|x| x.0)
197}
198
199fn restart_syscall(cpu: &mut CPUState, pc: target_ulong) {
200 regs::set_pc(cpu, pc);
201 unsafe {
202 panda::sys::cpu_loop_exit_noexc(cpu);
203 }
204}
205
206#[cfg(any(feature = "x86_64", feature = "i386"))]
207const SYSENTER_INSTR: &[u8] = &[0xf, 0x34];
208
209/// Run a syscall injector in the form as an async block/value to be evaluated. If
210/// another injector is already running, it will be queued to start after all previous
211/// injectors have finished running.
212///
213/// This operates by running each system call before resuming the original system call,
214/// allowing the guest to run until all injected system calls have finished.
215///
216/// ### Context Requirements
217///
218/// `run_injector` must be run within a syscall enter callback. This is enforced by
219/// means of only accepting [`SyscallPc`] to prevent misuse.
220///
221/// If you'd like to setup an injector to run during the next system call to avoid this
222/// requirement, see [`run_injector_next_syscall`].
223///
224/// ### Async Execution
225///
226/// The async runtime included allows for non-system call futures to be awaited, however
227/// the async executor used does not provide any support for any level of parallelism
228/// outside of Host/Guest parallelism. This means any async I/O performed will be
229/// busily polled, wakers are no-ops, and executor-dependent futures will not function.
230///
231/// There are currently no plans for injectors to be a true-async context, so
232/// outside of simple Futures it is recommended to only use the provided [`syscall`]
233/// function and Futures built on top of it.
234///
235/// ### Behavior
236///
237/// The behavior of injecting into system calls which don't return, fork, or otherwise
238/// effect control flow, are currently not defined.
239pub fn run_injector(pc: SyscallPc, injector: impl Future<Output = ()> + 'static) {
240 let pc = pc.pc();
241 log::trace!("Running injector with syscall pc of {:#x?}", pc);
242
243 // If our syscall is a `sysenter` instruction, we need to note this so that
244 // we can handle the fact that `sysenter` uses a different syscall ABI involving
245 // stack storage.
246 #[cfg(any(feature = "x86_64", feature = "i386"))]
247 {
248 use crate::mem::virtual_memory_read;
249
250 let cpu = unsafe { &mut *sys::get_cpu() };
251 let is_sysenter = virtual_memory_read(cpu, pc, 2)
252 .ok()
253 .map(|bytes| bytes == SYSENTER_INSTR)
254 .unwrap_or(false);
255
256 log::trace!("is_sysenter = {}", is_sysenter);
257 set_is_sysenter(is_sysenter);
258 }
259
260 // Now we push the injector into the queue for the current thread so that we can
261 // begin polling it. Since we can't move it once we start polling it, we need to
262 // put it in the PinnedQueue before we poll it the first time
263 let is_first = INJECTORS.is_empty();
264 let thread_id = ThreadId::current();
265 INJECTORS.entry(thread_id).or_default().push_future(async {
266 let backed_up_regs = SyscallRegs::backup();
267 set_backed_up_regs(backed_up_regs.clone());
268
269 injector.await;
270
271 log::debug!("Restoring backed up registers");
272 backed_up_regs.restore();
273 unset_backed_up_regs();
274 });
275
276 // We only want to install the callbacks once, so if there's any existing
277 // callbacks in place we don't want to install them. And if another one is
278 // already running, we don't want to start polling either
279 if is_first {
280 log::trace!("Enabling callbacks...");
281
282 // Make callback handles so they can be self-referential in order to uninstall
283 // themselves when they are done running all our injectors.
284 let sys_enter = PppCallback::new();
285 let sys_return = PppCallback::new();
286
287 let disable_callbacks = move || {
288 log::trace!("Disabling callbacks...");
289 sys_enter.disable();
290 sys_return.disable();
291 };
292
293 // after the syscall set the return value for the future then jump back to
294 // the syscall instruction
295 sys_return.on_all_sys_return(move |cpu: &mut CPUState, sys_pc, sys_num| {
296 if JUST_CLONED.load(Ordering::SeqCst) {
297 log::debug!(
298 "on_sys_return: {} @ {:#x?} ({:#x?}?) ({:?})",
299 sys_num,
300 sys_pc.pc(),
301 pc,
302 ThreadId::current(),
303 );
304 } else {
305 log::trace!(
306 "on_sys_return: {} @ {:#x?} ({:#x?}?) ({:?})",
307 sys_num,
308 sys_pc.pc(),
309 pc,
310 ThreadId::current(),
311 );
312 }
313
314 if sys_num == VFORK {
315 log::trace!("ret = {:#x?}", regs::get_reg(cpu, SYSCALL_RET));
316 }
317
318 let thread_id = ThreadId::current();
319 if FORKING_THREADS.contains(&thread_id) {
320 if sys_num != VFORK {
321 log::warn!("Non-fork ({}) return from {:?}", sys_num, thread_id);
322 log::warn!("Non-fork ret = {:#x?}", regs::get_reg(cpu, SYSCALL_RET));
323
324 if cfg!(not(feature = "arm")) {
325 return;
326 }
327
328 log::warn!("Returning from fork anyways.");
329 FORKING_THREADS.remove(&thread_id);
330
331 SHOULD_LOOP_AGAIN.store(true, Ordering::SeqCst);
332 set_ret_value(cpu);
333 restart_syscall(cpu, pc);
334 } else {
335 log::debug!("Returning from fork {:?}", &thread_id);
336 FORKING_THREADS.remove(&thread_id);
337 }
338 }
339
340 let forker_pid = PARENT_PID.load(Ordering::SeqCst);
341
342 //let parent_pid = OSI.get_current_process(cpu).ppid as u64;
343 //let is_fork_child = FORKING_THREADS
344 // .iter()
345 // .any(|thread| thread.pid as u64 == parent_pid);
346
347 let is_child_of_forker = forker_pid != u64::MAX
348 && OSI
349 .get_current_process(cpu)
350 .map(|proc| proc.ppid as u64 == forker_pid)
351 .unwrap_or_else(|| {
352 log::debug!("Failed to get process");
353 false
354 });
355
356 if is_child_of_forker {
357 PARENT_PID.store(u64::MAX, Ordering::SeqCst);
358 }
359
360 let is_fork_child = is_child_of_forker;
361 //let is_fork = last_injected_syscall() == VFORK || sys_num == VFORK;
362 //let is_fork_child =
363 // is_child_of_forker || (is_fork && regs::get_reg(cpu, SYSCALL_RET) == 0);
364
365 if is_fork_child {
366 // If we're returning from a fork and are in the child process, retrieve
367 // the previously stored child-injector, which doesn't need to back up its
368 // registers since we already did that from the parent process, we just
369 // need to take the previously backed-up parent process registers in
370 // case we end up wanting to restore them.
371 if let Some((backed_up_regs, child_injector)) = get_child_injector() {
372 INJECTORS
373 .entry(ThreadId::current())
374 .or_default()
375 .push_future(async move {
376 child_injector.await;
377 backed_up_regs.restore();
378 });
379 } else {
380 println!("WARNING: failed to get child injector");
381 return;
382 }
383 }
384
385 log::trace!("Current asid = {:x}", current_asid());
386
387 // only run for the asid we're currently injecting into, unless we just forked
388 if is_fork_child || is_current_injector_thread() {
389 SHOULD_LOOP_AGAIN.store(true, Ordering::SeqCst);
390 if !is_fork_child {
391 set_ret_value(cpu);
392 }
393 restart_syscall(cpu, pc);
394 }
395 });
396
397 // poll the injectors and if they've all finished running, disable these
398 // callbacks
399 sys_enter.on_all_sys_enter(move |cpu, sys_pc, sys_num| {
400 log::trace!(
401 "on_sys_enter: {} @ {:#x?} ({:#x?}?)",
402 sys_num,
403 sys_pc.pc(),
404 pc
405 );
406
407 if poll_injectors() {
408 disable_callbacks();
409 }
410
411 if SHOULD_LOOP_AGAIN.swap(false, Ordering::SeqCst) {
412 restart_syscall(cpu, pc);
413 }
414 });
415
416 // If this is the first syscall it needs to be polled too,
417 // disabling if it's already finished running
418 if poll_injectors() {
419 println!("WARN: Injector seemed to not call any system calls?");
420 disable_callbacks();
421 }
422 }
423}
424
425static SHOULD_LOOP_AGAIN: AtomicBool = AtomicBool::new(false);
426
427lazy_static! {
428 static ref CURRENT_REGS_BACKUP: DashMap<ThreadId, SyscallRegs> = DashMap::new();
429}
430
431/// Get the registers set to be restored when the current injector finishes
432pub fn get_backed_up_regs() -> Option<SyscallRegs> {
433 CURRENT_REGS_BACKUP
434 .get(&ThreadId::current())
435 .map(|regs| regs.clone())
436}
437
438fn set_backed_up_regs(regs: SyscallRegs) {
439 CURRENT_REGS_BACKUP.insert(ThreadId::current(), regs);
440}
441
442fn unset_backed_up_regs() {
443 CURRENT_REGS_BACKUP.remove(&ThreadId::current());
444}
445
446fn current_asid() -> target_ulong {
447 unsafe { sys::panda_current_asid(sys::get_cpu()) }
448}
449
450/// Queue an injector to be run during the next system call.
451///
452/// For more information or for usage during a system call callback, see [`run_injector`].
453pub fn run_injector_next_syscall(injector: impl Future<Output = ()> + 'static) {
454 let next_syscall = PppCallback::new();
455 let mut injector = Some(injector);
456
457 next_syscall.on_all_sys_enter(move |_, pc, _| {
458 let injector = injector.take().unwrap();
459 run_injector(pc, injector);
460 next_syscall.disable();
461 });
462}
463
464fn do_nothing(_ptr: *const ()) {}
465
466fn clone(ptr: *const ()) -> RawWaker {
467 RawWaker::new(ptr, &VTABLE)
468}
469
470static VTABLE: RawWakerVTable = RawWakerVTable::new(clone, do_nothing, do_nothing, do_nothing);
471
472fn waiting_for_syscall() -> bool {
473 WAITING_FOR_SYSCALL.load(Ordering::SeqCst)
474}
475
476lazy_static! {
477 static ref CURRENT_INJECTOR_THREAD: Mutex<Option<ThreadId>> = Mutex::new(None);
478}
479
480fn is_current_injector_thread() -> bool {
481 CURRENT_INJECTOR_THREAD
482 .lock()
483 .as_ref()
484 .map(|&id| id == ThreadId::current())
485 .unwrap_or(false)
486}
487
488/// Returns true if all injectors have been processed
489fn poll_injectors() -> bool {
490 let raw = RawWaker::new(std::ptr::null(), &VTABLE);
491 let waker = unsafe { Waker::from_raw(raw) };
492 let mut ctxt = Context::from_waker(&waker);
493
494 // reset the 'waiting for system call' flag
495 WAITING_FOR_SYSCALL.store(false, Ordering::SeqCst);
496
497 // Clear in case we're looping without any injectors, so a stale 'current injector'
498 // won't be injected into
499 CURRENT_INJECTOR_THREAD.lock().take();
500
501 if let Some(mut injectors) = INJECTORS.get_mut(&ThreadId::current()) {
502 while let Some(ref mut current_injector) = injectors.current_mut() {
503 //let current_injector = &mut *current_injector;
504
505 CURRENT_INJECTOR_THREAD.lock().replace(ThreadId::current());
506
507 match current_injector.as_mut().poll(&mut ctxt) {
508 // If the current injector has finished running start polling the next
509 // injector. This includes if the current injector bails early.
510 status
511 if matches!(status, Poll::Ready(_))
512 || INJECTOR_BAIL.swap(false, Ordering::SeqCst) =>
513 {
514 injectors.pop();
515
516 // No more injectors in the current thread
517 if injectors.is_empty() {
518 drop(injectors);
519 INJECTORS.remove(&ThreadId::current());
520
521 break;
522 }
523
524 continue;
525 }
526
527 // If the future is now waiting on a syscall to be evaluated, return
528 // so a system call can be run
529 Poll::Pending if waiting_for_syscall() => return false,
530
531 // If the future is not waiting on a system call we should keep polling
532 Poll::Pending => continue,
533
534 _ => unreachable!(),
535 }
536 }
537 } else {
538 return false;
539 }
540
541 let all_injectors_finished = INJECTORS.is_empty() && CHILD_INJECTOR.lock().is_none();
542
543 all_injectors_finished
544}