polkavm_linux_sandbox/
lib.rs

1#![doc = include_str!("../README.md")]
2#![allow(clippy::collapsible_else_if)]
3#![allow(clippy::len_without_is_empty)]
4#![allow(clippy::manual_range_contains)]
5
6extern crate polkavm_linux_raw as linux_raw;
7
8use polkavm_common::{
9    abi::VM_MAXIMUM_EXTERN_ARG_COUNT,
10    abi::VM_PAGE_SIZE,
11    error::{ExecutionError, Trap},
12    init::GuestProgramInit,
13    program::Reg,
14    utils::{align_to_next_page_usize, slice_assume_init_mut, Access, AsUninitSliceMut},
15    zygote::{
16        SandboxMemoryConfig, VmCtx, SANDBOX_EMPTY_NATIVE_PROGRAM_COUNTER, SANDBOX_EMPTY_NTH_INSTRUCTION, VMCTX_FUTEX_BUSY,
17        VMCTX_FUTEX_HOSTCALL, VMCTX_FUTEX_IDLE, VMCTX_FUTEX_INIT, VMCTX_FUTEX_TRAP, VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION,
18        VM_RPC_FLAG_RECONFIGURE, VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, VM_RPC_FLAG_SIGSTOP_BEFORE_EXECUTION,
19    },
20};
21
22pub use linux_raw::Error;
23
24use core::ffi::{c_int, c_long, c_uint};
25use core::sync::atomic::Ordering;
26use linux_raw::{abort, cstr, syscall_readonly, Fd, Mmap, STDERR_FILENO, STDIN_FILENO};
27use std::time::Instant;
28
29pub struct SandboxConfig {
30    pub enable_logger: bool,
31}
32
33impl SandboxConfig {
34    pub fn new() -> Self {
35        SandboxConfig { enable_logger: false }
36    }
37
38    pub fn enable_logger(&mut self, value: bool) {
39        self.enable_logger = value;
40    }
41}
42
43impl Default for SandboxConfig {
44    fn default() -> Self {
45        Self::new()
46    }
47}
48
49#[repr(C)]
50struct CloneArgs {
51    /// Flags.
52    flags: u64,
53    /// Where to store PID file descriptor. (int *)
54    pidfd: *mut c_int,
55    /// Where to store child TID in child's memory. (pid_t *)
56    child_tid: u64,
57    /// Where to store child TID in parent's memory. (pid_t *)
58    parent_tid: u64,
59    /// Signal to deliver to parent on child termination.
60    exit_signal: u64,
61    /// Pointer to lowest byte of stack.
62    stack: u64,
63    /// Size of the stack.
64    stack_size: u64,
65    /// Location of the new TLS.
66    tls: u64,
67}
68
69/// Closes all file descriptors except the ones given.
70fn close_other_file_descriptors(preserved_fds: &[c_int]) -> Result<(), Error> {
71    let mut start_at = 0;
72    for &fd in preserved_fds {
73        if start_at == fd {
74            start_at = fd + 1;
75            continue;
76        }
77
78        if start_at > fd {
79            // Preserved file descriptors must be sorted.
80            return Err(Error::from_str("internal error: preserved file descriptors are not sorted"));
81        }
82
83        if linux_raw::sys_close_range(start_at, fd - 1, 0).is_err() {
84            return close_other_file_descriptors_legacy(preserved_fds);
85        }
86
87        start_at = fd + 1;
88    }
89
90    if linux_raw::sys_close_range(start_at, c_int::MAX, 0).is_err() {
91        return close_other_file_descriptors_legacy(preserved_fds);
92    }
93
94    Ok(())
95}
96
97/// Closes all file descriptors except the ones given.
98///
99/// For compatibility with old versions of Linux.
100fn close_other_file_descriptors_legacy(preserved_fds: &[c_int]) -> Result<(), Error> {
101    let dirfd = linux_raw::sys_open(
102        cstr!("/proc/self/fd"),
103        linux_raw::O_RDONLY | linux_raw::O_DIRECTORY | linux_raw::O_CLOEXEC,
104    )?;
105    for dirent in linux_raw::readdir(dirfd.borrow()) {
106        let dirent = dirent?;
107        let name = dirent.d_name();
108        if !name.iter().all(|&byte| byte >= b'0' && byte <= b'9') {
109            continue;
110        }
111
112        let name = core::str::from_utf8(name)
113            .ok()
114            .ok_or_else(|| Error::from_str("entry in '/proc/self/fd' is not valid utf-8"))?;
115        let fd: c_int = name
116            .parse()
117            .ok()
118            .ok_or_else(|| Error::from_str("entry in '/proc/self/fd' is not a number"))?;
119        if fd == dirfd.raw() || preserved_fds.iter().any(|&pfd| pfd == fd) {
120            continue;
121        }
122
123        Fd::from_raw_unchecked(fd).close()?;
124    }
125
126    dirfd.close()?;
127    Ok(())
128}
129
130struct Sigmask {
131    sigset_original: linux_raw::kernel_sigset_t,
132}
133
134impl Sigmask {
135    /// Temporarily blocks all signals from being delivered.
136    fn block_all_signals() -> Result<Self, Error> {
137        let sigset_all: linux_raw::kernel_sigset_t = !0;
138        let mut sigset_original: linux_raw::kernel_sigset_t = 0;
139        unsafe { linux_raw::sys_rt_sigprocmask(linux_raw::SIG_SETMASK, &sigset_all, Some(&mut sigset_original))? };
140
141        Ok(Sigmask { sigset_original })
142    }
143
144    /// Unblocks signal delivery.
145    fn unblock(mut self) -> Result<(), Error> {
146        let result = self.unblock_inplace();
147        core::mem::forget(self);
148        result
149    }
150
151    /// Unblocks signal delivery.
152    fn unblock_inplace(&mut self) -> Result<(), Error> {
153        unsafe { linux_raw::sys_rt_sigprocmask(linux_raw::SIG_SETMASK, &self.sigset_original, None) }
154    }
155}
156
157impl Drop for Sigmask {
158    fn drop(&mut self) {
159        let _ = self.unblock_inplace();
160    }
161}
162
163#[derive(Debug)]
164struct ChildProcess {
165    pid: c_int,
166    pidfd: Option<Fd>,
167}
168
169#[derive(Debug)]
170enum ChildStatus {
171    Running,
172    NotRunning,
173    Exited(c_int),
174    ExitedDueToSignal(c_int),
175}
176
177impl ChildStatus {
178    pub fn is_running(&self) -> bool {
179        matches!(self, Self::Running)
180    }
181}
182
183impl ChildProcess {
184    fn waitid(&mut self, flags: u32) -> Result<linux_raw::siginfo_t, Error> {
185        let mut siginfo: linux_raw::siginfo_t = unsafe { core::mem::zeroed() };
186        let mut result;
187        loop {
188            result = if let Some(ref pidfd) = self.pidfd {
189                linux_raw::sys_waitid(linux_raw::P_PIDFD, pidfd.raw(), &mut siginfo, flags, None)
190            } else {
191                linux_raw::sys_waitid(linux_raw::P_PID, self.pid, &mut siginfo, flags, None)
192            };
193
194            if let Err(error) = result {
195                if error.errno() == linux_raw::EINTR {
196                    // Should not happen since we should be blocking all signals while this is called, but just in case.
197                    continue;
198                }
199
200                return Err(error);
201            }
202
203            return Ok(siginfo);
204        }
205    }
206
207    fn check_status(&mut self, non_blocking: bool) -> Result<ChildStatus, Error> {
208        // The __WALL here is needed since we're not specifying an exit signal
209        // when cloning the child process, so we'd get an ECHILD error without this flag.
210        //
211        // (And we're not using __WCLONE since that doesn't work for children which ran execve.)
212        let mut flags = linux_raw::WEXITED | linux_raw::__WALL;
213        if non_blocking {
214            flags |= linux_raw::WNOHANG;
215        }
216
217        match self.waitid(flags) {
218            Ok(ok) => unsafe {
219                if ok.si_signo() == 0 && ok.si_pid() == 0 {
220                    Ok(ChildStatus::Running)
221                } else {
222                    if linux_raw::WIFSIGNALED(ok.si_status()) {
223                        Ok(ChildStatus::ExitedDueToSignal(linux_raw::WTERMSIG(ok.si_status())))
224                    } else if linux_raw::WIFEXITED(ok.si_status()) {
225                        Ok(ChildStatus::Exited(linux_raw::WEXITSTATUS(ok.si_status())))
226                    } else {
227                        Err(Error::from_last_os_error("waitid failed: internal error: unexpected state"))
228                    }
229                }
230            },
231            Err(error) => {
232                if error.errno() == linux_raw::ECHILD {
233                    Ok(ChildStatus::NotRunning)
234                } else {
235                    Err(error)
236                }
237            }
238        }
239    }
240
241    fn send_signal(&mut self, signal: c_uint) -> Result<(), Error> {
242        unsafe {
243            if let Some(ref pidfd) = self.pidfd {
244                let errcode = syscall_readonly!(linux_raw::SYS_pidfd_send_signal, pidfd, signal, 0, 0);
245                Error::from_syscall("pidfd_send_signal", errcode)
246            } else {
247                linux_raw::sys_kill(self.pid, signal)
248            }
249        }
250    }
251}
252
253impl Drop for ChildProcess {
254    fn drop(&mut self) {
255        if self.send_signal(linux_raw::SIGKILL).is_ok() {
256            // Reap the zombie process.
257            let _ = self.check_status(false);
258        }
259    }
260}
261
262fn get_native_page_size() -> usize {
263    // This is literally the only thing we need from `libc`, so instead of including
264    // the whole crate let's just define these ourselves.
265
266    const _SC_PAGESIZE: c_int = 30;
267    extern "C" {
268        fn sysconf(name: c_int) -> c_long;
269    }
270
271    unsafe { sysconf(_SC_PAGESIZE) as usize }
272}
273
274#[cfg(polkavm_dev_use_built_zygote)]
275static ZYGOTE_BLOB: &[u8] = include_bytes!("../../polkavm-zygote/target/x86_64-unknown-linux-gnu/release/polkavm-zygote");
276
277#[cfg(not(polkavm_dev_use_built_zygote))]
278static ZYGOTE_BLOB: &[u8] = include_bytes!("./polkavm-zygote");
279
280fn prepare_sealed_memfd(name: &core::ffi::CStr, length: usize, populate: impl FnOnce(&mut [u8])) -> Result<Fd, Error> {
281    let native_page_size = get_native_page_size();
282    if length % native_page_size != 0 {
283        return Err(Error::from_str("memfd size doesn't end on a page boundary"));
284    }
285
286    let memfd = linux_raw::sys_memfd_create(name, linux_raw::MFD_CLOEXEC | linux_raw::MFD_ALLOW_SEALING)?;
287    linux_raw::sys_ftruncate(memfd.borrow(), length as linux_raw::c_ulong)?;
288
289    let mut map = unsafe {
290        linux_raw::Mmap::map(
291            core::ptr::null_mut(),
292            length,
293            linux_raw::PROT_READ | linux_raw::PROT_WRITE,
294            linux_raw::MAP_SHARED,
295            Some(memfd.borrow()),
296            0,
297        )?
298    };
299
300    populate(map.as_slice_mut());
301    map.unmap()?;
302
303    let timestamp = linux_raw::sys_clock_gettime(linux_raw::CLOCK_MONOTONIC_RAW)?;
304    loop {
305        if let Err(error) = linux_raw::sys_fcntl(
306            memfd.borrow(),
307            linux_raw::F_ADD_SEALS,
308            linux_raw::F_SEAL_SEAL | linux_raw::F_SEAL_SHRINK | linux_raw::F_SEAL_GROW | linux_raw::F_SEAL_WRITE,
309        ) {
310            if error.errno() == linux_raw::EBUSY {
311                // This will return EBUSY if the fd is still mapped, and since apparently munmap is asynchronous in the presence
312                // of multiple threads this can still sometimes randomly fail with EBUSY anyway, even though we did unmap the fd already.
313                let elapsed = linux_raw::sys_clock_gettime(linux_raw::CLOCK_MONOTONIC_RAW)? - timestamp;
314                if elapsed > core::time::Duration::from_secs(3) {
315                    // Just a fail-safe to make sure we don't deadlock.
316                    return Err(error);
317                }
318
319                continue;
320            } else {
321                return Err(error);
322            }
323        }
324
325        break;
326    }
327
328    Ok(memfd)
329}
330
331fn prepare_zygote() -> Result<Fd, Error> {
332    let native_page_size = get_native_page_size();
333
334    #[allow(clippy::unwrap_used)]
335    // The size of the zygote blob is always going to be much less than the size of usize, so this never fails.
336    let length_aligned = align_to_next_page_usize(native_page_size, ZYGOTE_BLOB.len()).unwrap();
337
338    prepare_sealed_memfd(cstr!("polkavm_zygote"), length_aligned, |buffer| {
339        buffer[..ZYGOTE_BLOB.len()].copy_from_slice(ZYGOTE_BLOB);
340    })
341}
342
343fn prepare_vmctx() -> Result<(Fd, Mmap), Error> {
344    let native_page_size = get_native_page_size();
345
346    #[allow(clippy::unwrap_used)] // The size of VmCtx is always going to be much less than the size of usize, so this never fails.
347    let length_aligned = align_to_next_page_usize(native_page_size, core::mem::size_of::<VmCtx>()).unwrap();
348
349    let memfd = linux_raw::sys_memfd_create(cstr!("polkavm_vmctx"), linux_raw::MFD_CLOEXEC | linux_raw::MFD_ALLOW_SEALING)?;
350    linux_raw::sys_ftruncate(memfd.borrow(), length_aligned as linux_raw::c_ulong)?;
351    linux_raw::sys_fcntl(
352        memfd.borrow(),
353        linux_raw::F_ADD_SEALS,
354        linux_raw::F_SEAL_SEAL | linux_raw::F_SEAL_SHRINK | linux_raw::F_SEAL_GROW,
355    )?;
356
357    let vmctx = unsafe {
358        linux_raw::Mmap::map(
359            core::ptr::null_mut(),
360            length_aligned,
361            linux_raw::PROT_READ | linux_raw::PROT_WRITE,
362            linux_raw::MAP_SHARED,
363            Some(memfd.borrow()),
364            0,
365        )?
366    };
367
368    unsafe {
369        *vmctx.as_mut_ptr().cast::<VmCtx>() = VmCtx::new();
370    }
371
372    Ok((memfd, vmctx))
373}
374
375unsafe fn child_main(zygote_memfd: Fd, child_socket: Fd, uid_map: &str, gid_map: &str, logging_pipe: Option<Fd>) -> Result<(), Error> {
376    // Change the name of the process.
377    linux_raw::sys_prctl_set_name(b"polkavm-sandbox\0")?;
378
379    // Overwrite the hostname and domainname.
380    linux_raw::sys_sethostname("localhost")?;
381    linux_raw::sys_setdomainname("localhost")?;
382
383    // Disable the 'setgroups' syscall. Probably unnecessary since we'll do it though seccomp anyway, but just in case.
384    // (See CVE-2014-8989 for more details.)
385    let proc_self = linux_raw::sys_open(cstr!("/proc/self"), linux_raw::O_CLOEXEC | linux_raw::O_PATH)?;
386    let fd = linux_raw::sys_openat(proc_self.borrow(), cstr!("setgroups"), linux_raw::O_CLOEXEC | linux_raw::O_WRONLY)?;
387    linux_raw::sys_write(fd.borrow(), b"deny")?;
388    fd.close()?;
389
390    // Set up UID and GID maps. This can only be done once, so if we do it here we'll block the possibility of doing it later.
391    let fd = linux_raw::sys_openat(proc_self.borrow(), cstr!("gid_map"), linux_raw::O_CLOEXEC | linux_raw::O_RDWR)?;
392    linux_raw::sys_write(fd.borrow(), gid_map.as_bytes())?;
393    fd.close()?;
394
395    let fd = linux_raw::sys_openat(proc_self.borrow(), cstr!("uid_map"), linux_raw::O_CLOEXEC | linux_raw::O_RDWR)?;
396    linux_raw::sys_write(fd.borrow(), uid_map.as_bytes())?;
397    fd.close()?;
398    proc_self.close()?;
399
400    if zygote_memfd.raw() == STDIN_FILENO {
401        // This should never happen in practice, but can in theory if the user closes STDIN manually.
402        return Err(Error::from_str("internal error: zygote memfd was allocated as STDIN"));
403    }
404
405    // Replace the stdin fd (which we don't need).
406    if child_socket.raw() != STDIN_FILENO {
407        linux_raw::sys_dup3(child_socket.raw(), STDIN_FILENO, 0)?;
408        child_socket.close()?;
409    } else {
410        child_socket.leak();
411    }
412
413    // Clean up any file descriptors which might have been opened by the host process.
414    let mut fds_to_keep = [core::ffi::c_int::MAX; 3];
415    let fds_to_keep = {
416        let mut index = 1;
417        fds_to_keep[0] = STDIN_FILENO;
418        if let Some(logging_pipe) = logging_pipe {
419            if logging_pipe.raw() != STDERR_FILENO {
420                linux_raw::sys_dup3(logging_pipe.raw(), STDERR_FILENO, 0)?;
421                logging_pipe.close()?;
422            } else {
423                logging_pipe.leak();
424            }
425
426            fds_to_keep[index] = STDERR_FILENO;
427            index += 1;
428        }
429
430        fds_to_keep[index] = zygote_memfd.raw();
431        fds_to_keep.sort_unstable(); // Should be a no-op.
432        &fds_to_keep[..index + 1]
433    };
434    close_other_file_descriptors(fds_to_keep)?;
435
436    // Hide the host filesystem.
437    let mount_flags = linux_raw::MS_REC | linux_raw::MS_NODEV | linux_raw::MS_NOEXEC | linux_raw::MS_NOSUID | linux_raw::MS_RDONLY;
438    linux_raw::sys_mount(cstr!("none"), cstr!("/mnt"), cstr!("tmpfs"), mount_flags, Some(cstr!("size=0")))?;
439    linux_raw::sys_chdir(cstr!("/mnt"))?;
440    linux_raw::sys_pivot_root(cstr!("."), cstr!("."))?;
441    linux_raw::sys_umount2(cstr!("."), linux_raw::MNT_DETACH)?;
442
443    // Clear all of our ambient capabilities.
444    linux_raw::sys_prctl_cap_ambient_clear_all()?;
445
446    // Flag ourselves that we won't ever want to acquire any new priviledges.
447    linux_raw::sys_prctl_set_no_new_privs()?;
448
449    linux_raw::sys_prctl_set_securebits(
450        // Make UID == 0 have no special priviledges.
451        linux_raw::SECBIT_NOROOT |
452        linux_raw::SECBIT_NOROOT_LOCKED |
453        // Calling 'setuid' from/to UID == 0 doesn't change any priviledges.
454        linux_raw::SECBIT_NO_SETUID_FIXUP |
455        linux_raw::SECBIT_NO_SETUID_FIXUP_LOCKED |
456        // The process cannot add capabilities to its ambient set.
457        linux_raw::SECBIT_NO_CAP_AMBIENT_RAISE |
458        linux_raw::SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED,
459    )?;
460
461    // Set resource limits.
462    let max_memory = 8 * 1024 * 1024 * 1024;
463    linux_raw::sys_setrlimit(
464        linux_raw::RLIMIT_DATA,
465        &linux_raw::rlimit {
466            rlim_cur: max_memory,
467            rlim_max: max_memory,
468        },
469    )?;
470    linux_raw::sys_setrlimit(
471        linux_raw::RLIMIT_STACK,
472        &linux_raw::rlimit {
473            rlim_cur: 16 * 1024,
474            rlim_max: 16 * 1024,
475        },
476    )?;
477    linux_raw::sys_setrlimit(linux_raw::RLIMIT_NOFILE, &linux_raw::rlimit { rlim_cur: 8, rlim_max: 8 })?;
478    linux_raw::sys_setrlimit(linux_raw::RLIMIT_NPROC, &linux_raw::rlimit { rlim_cur: 1, rlim_max: 1 })?;
479    linux_raw::sys_setrlimit(linux_raw::RLIMIT_FSIZE, &linux_raw::rlimit { rlim_cur: 0, rlim_max: 0 })?;
480    linux_raw::sys_setrlimit(linux_raw::RLIMIT_LOCKS, &linux_raw::rlimit { rlim_cur: 0, rlim_max: 0 })?;
481    linux_raw::sys_setrlimit(linux_raw::RLIMIT_MEMLOCK, &linux_raw::rlimit { rlim_cur: 0, rlim_max: 0 })?;
482    linux_raw::sys_setrlimit(linux_raw::RLIMIT_MSGQUEUE, &linux_raw::rlimit { rlim_cur: 0, rlim_max: 0 })?;
483
484    // Finally, drop all capabilities.
485    linux_raw::sys_capset_drop_all()?;
486
487    let child_argv: [*const u8; 2] = [b"polkavm-zygote\0".as_ptr(), core::ptr::null()];
488    let child_envp: [*const u8; 1] = [core::ptr::null()];
489    linux_raw::sys_execveat(
490        Some(zygote_memfd.borrow()),
491        cstr!(""),
492        &child_argv,
493        &child_envp,
494        linux_raw::AT_EMPTY_PATH,
495    )?;
496
497    // This should never happen, but since the never type is still unstable let's return normally.
498    Ok(())
499}
500
501pub struct SandboxProgram {
502    memfd: Fd,
503    memory_config: SandboxMemoryConfig,
504    sysreturn_address: u64,
505}
506
507#[derive(Copy, Clone)]
508pub struct SandboxProgramInit<'a> {
509    guest_init: GuestProgramInit<'a>,
510    code: &'a [u8],
511    jump_table: &'a [u8],
512    sysreturn_address: u64,
513}
514
515impl<'a> Default for SandboxProgramInit<'a> {
516    fn default() -> Self {
517        Self::new(Default::default())
518    }
519}
520
521impl<'a> core::ops::Deref for SandboxProgramInit<'a> {
522    type Target = GuestProgramInit<'a>;
523    fn deref(&self) -> &Self::Target {
524        &self.guest_init
525    }
526}
527
528impl<'a> SandboxProgramInit<'a> {
529    pub fn new(guest_init: GuestProgramInit<'a>) -> Self {
530        Self {
531            guest_init,
532            code: &[],
533            jump_table: &[],
534            sysreturn_address: 0,
535        }
536    }
537
538    pub fn with_code(mut self, code: &'a [u8]) -> Self {
539        self.code = code;
540        self
541    }
542
543    pub fn with_jump_table(mut self, jump_table: &'a [u8]) -> Self {
544        self.jump_table = jump_table;
545        self
546    }
547
548    pub fn with_sysreturn_address(mut self, address: u64) -> Self {
549        self.sysreturn_address = address;
550        self
551    }
552
553    fn memory_config(&self, native_page_size: usize) -> Result<SandboxMemoryConfig, Error> {
554        let mut config = SandboxMemoryConfig::empty();
555        config.set_guest_config(self.guest_init.memory_config()?);
556        config.set_code_size(native_page_size, self.code.len())?;
557        config.set_jump_table_size(native_page_size, self.jump_table.len())?;
558
559        Ok(config)
560    }
561}
562
563impl SandboxProgram {
564    pub fn new(init: SandboxProgramInit) -> Result<Self, Error> {
565        let native_page_size = get_native_page_size();
566        assert!(
567            native_page_size <= VM_PAGE_SIZE as usize && VM_PAGE_SIZE as usize % native_page_size == 0,
568            "unsupported native page size: {}",
569            native_page_size
570        );
571
572        let cfg = init.memory_config(native_page_size)?;
573        let memfd = prepare_sealed_memfd(
574            cstr!("polkavm_program"),
575            cfg.ro_data_size() as usize + cfg.rw_data_size() as usize + cfg.code_size() + cfg.jump_table_size(),
576            |buffer| {
577                let mut offset = 0;
578                macro_rules! append {
579                    ($slice:expr, $length:expr) => {
580                        assert!($slice.len() <= $length as usize);
581                        buffer[offset..offset + $slice.len()].copy_from_slice($slice);
582                        #[allow(unused_assignments)]
583                        {
584                            offset += $length as usize;
585                        }
586                    };
587                }
588
589                append!(init.ro_data(), cfg.ro_data_size());
590                append!(init.rw_data(), cfg.rw_data_size());
591                append!(init.code, cfg.code_size());
592                append!(init.jump_table, cfg.jump_table_size());
593            },
594        )?;
595
596        Ok(Self {
597            memfd,
598            memory_config: cfg,
599            sysreturn_address: init.sysreturn_address,
600        })
601    }
602}
603
604#[derive(Clone, PartialEq, Eq, Hash, Debug)]
605pub struct Map<'a> {
606    pub start: u64,
607    pub end: u64,
608    pub is_readable: bool,
609    pub is_writable: bool,
610    pub is_executable: bool,
611    pub is_shared: bool,
612    pub file_offset: u64,
613    pub major: u64,
614    pub minor: u64,
615    pub inode: u64,
616    pub name: &'a [u8],
617}
618
619fn parse_u64_radix(input: &[u8], radix: u32) -> Option<u64> {
620    u64::from_str_radix(core::str::from_utf8(input).ok()?, radix).ok()
621}
622
623fn get_until<'a>(p: &mut &'a [u8], delimiter: u8) -> &'a [u8] {
624    let mut found = None;
625    for (index, ch) in p.iter().enumerate() {
626        if *ch == delimiter {
627            found = Some(index);
628            break;
629        }
630    }
631
632    if let Some(index) = found {
633        let (before, after) = p.split_at(index);
634        *p = &after[1..];
635        before
636    } else {
637        let before = *p;
638        *p = b"";
639        before
640    }
641}
642
643fn get_char(p: &mut &[u8]) -> Option<u8> {
644    let ch = p.first()?;
645    *p = &p[1..];
646    Some(*ch)
647}
648
649fn skip_whitespace(p: &mut &[u8]) {
650    while let Some(ch) = p.first() {
651        if *ch == b' ' {
652            *p = &p[1..];
653        } else {
654            break;
655        }
656    }
657}
658
659impl<'a> Map<'a> {
660    fn parse(mut line: &'a [u8]) -> Option<Self> {
661        let start = parse_u64_radix(get_until(&mut line, b'-'), 16)?;
662        let end = parse_u64_radix(get_until(&mut line, b' '), 16)?;
663        let is_readable = get_char(&mut line)? == b'r';
664        let is_writable = get_char(&mut line)? == b'w';
665        let is_executable = get_char(&mut line)? == b'x';
666        let is_shared = get_char(&mut line)? == b's';
667        get_char(&mut line);
668
669        let file_offset = parse_u64_radix(get_until(&mut line, b' '), 16)?;
670        let major = parse_u64_radix(get_until(&mut line, b':'), 16)?;
671        let minor = parse_u64_radix(get_until(&mut line, b' '), 16)?;
672        let inode = parse_u64_radix(get_until(&mut line, b' '), 10)?;
673        skip_whitespace(&mut line);
674        let name = line;
675
676        Some(Map {
677            start,
678            end,
679            is_readable,
680            is_writable,
681            is_executable,
682            is_shared,
683            file_offset,
684            major,
685            minor,
686            inode,
687            name,
688        })
689    }
690}
691
692fn get_message(vmctx: &VmCtx) -> Option<String> {
693    let message = unsafe {
694        let message_length = *vmctx.message_length.get() as usize;
695        let message = &*vmctx.message_buffer.get();
696        &message[..core::cmp::min(message_length, message.len())]
697    };
698
699    if message.is_empty() {
700        return None;
701    }
702
703    // The message is in shared memory, so clone it first to make sure
704    // it doesn't change under us and violate string's invariants.
705    let message = message.to_vec();
706    String::from_utf8(message).ok()
707}
708
709pub type OnHostcall<'a> = &'a mut dyn for<'r> FnMut(u64, SandboxAccess<'r>) -> Result<(), Trap>;
710
711pub struct Sandbox {
712    vmctx_mmap: Mmap,
713    child: ChildProcess,
714    socket: Fd,
715
716    count_wait_loop_start: u64,
717    count_futex_wait: u64,
718}
719
720impl Drop for Sandbox {
721    fn drop(&mut self) {
722        let vmctx = self.vmctx();
723        let child_futex_wait = unsafe { *vmctx.counters.syscall_futex_wait.get() };
724        let child_loop_start = unsafe { *vmctx.counters.syscall_wait_loop_start.get() };
725        log::debug!(
726            "Host futex wait count: {}/{} ({:.02}%)",
727            self.count_futex_wait,
728            self.count_wait_loop_start,
729            self.count_futex_wait as f64 / self.count_wait_loop_start as f64 * 100.0
730        );
731        log::debug!(
732            "Child futex wait count: {}/{} ({:.02}%)",
733            child_futex_wait,
734            child_loop_start,
735            child_futex_wait as f64 / child_loop_start as f64 * 100.0
736        );
737    }
738}
739
740impl Sandbox {
741    pub fn spawn(config: &SandboxConfig) -> Result<Self, Error> {
742        let sigset = Sigmask::block_all_signals()?;
743        let zygote_memfd = prepare_zygote()?;
744        let (vmctx_memfd, vmctx_mmap) = prepare_vmctx()?;
745        let (socket, child_socket) = linux_raw::sys_socketpair(linux_raw::AF_UNIX, linux_raw::SOCK_SEQPACKET, 0)?;
746
747        let sandbox_flags = linux_raw::CLONE_NEWCGROUP as u64
748            | linux_raw::CLONE_NEWIPC as u64
749            | linux_raw::CLONE_NEWNET as u64
750            | linux_raw::CLONE_NEWNS as u64
751            | linux_raw::CLONE_NEWPID as u64
752            | linux_raw::CLONE_NEWUSER as u64
753            | linux_raw::CLONE_NEWUTS as u64;
754
755        let mut pidfd: c_int = -1;
756        let args = CloneArgs {
757            flags: linux_raw::CLONE_CLEAR_SIGHAND | linux_raw::CLONE_PIDFD as u64 | sandbox_flags,
758            pidfd: &mut pidfd,
759            child_tid: 0,
760            parent_tid: 0,
761            exit_signal: 0,
762            stack: 0,
763            stack_size: 0,
764            tls: 0,
765        };
766
767        let uid = linux_raw::sys_getuid()?;
768        let gid = linux_raw::sys_getgid()?;
769
770        let uid_map = format!("0 {} 1\n", uid);
771        let gid_map = format!("0 {} 1\n", gid);
772
773        let (logger_rx, logger_tx) = if config.enable_logger {
774            let (rx, tx) = linux_raw::sys_pipe2(linux_raw::O_CLOEXEC)?;
775            (Some(rx), Some(tx))
776        } else {
777            (None, None)
778        };
779
780        // Fork a new process.
781        let mut child_pid =
782            unsafe { linux_raw::syscall!(linux_raw::SYS_clone3, &args as *const CloneArgs, core::mem::size_of::<CloneArgs>()) };
783
784        if child_pid < 0 {
785            // Fallback for Linux versions older than 5.5.
786            let error = Error::from_last_os_error("clone");
787            child_pid = unsafe { linux_raw::syscall!(linux_raw::SYS_clone, sandbox_flags, 0, 0, 0, 0) };
788
789            if child_pid < 0 {
790                return Err(error);
791            }
792        }
793
794        if child_pid == 0 {
795            // We're in the child.
796            //
797            // Calling into libc from here risks a deadlock as other threads might have
798            // been holding onto internal libc locks while we were cloning ourselves,
799            // so from now on we can't use anything from libc anymore.
800            core::mem::forget(sigset);
801
802            unsafe {
803                match child_main(zygote_memfd, child_socket, &uid_map, &gid_map, logger_tx) {
804                    Ok(()) => {
805                        // This is impossible.
806                        abort();
807                    }
808                    Err(_) => {
809                        // TODO: We should display this error somehow.
810                        abort();
811                    }
812                }
813            }
814        }
815
816        if let Some(logger_rx) = logger_rx {
817            // Hook up the child process' STDERR to our logger.
818            std::thread::Builder::new()
819                .name("polkavm-logger".into())
820                .spawn(move || {
821                    let mut tmp = [0; 4096];
822                    let mut buffer = Vec::new();
823                    loop {
824                        if buffer.len() > 8192 {
825                            // Make sure the child can't exhaust our memory by spamming logs.
826                            buffer.clear();
827                        }
828
829                        match linux_raw::sys_read(logger_rx.borrow(), &mut tmp) {
830                            Err(error) if error.errno() == linux_raw::EINTR => continue,
831                            Err(error) => {
832                                log::warn!("Failed to read from logger: {}", error);
833                                break;
834                            }
835                            Ok(0) => break,
836                            Ok(count) => {
837                                let mut tmp = &tmp[..count];
838                                while !tmp.is_empty() {
839                                    if let Some(index) = tmp.iter().position(|&byte| byte == b'\n') {
840                                        buffer.extend_from_slice(&tmp[..index]);
841                                        tmp = &tmp[index + 1..];
842
843                                        log::trace!(target: "polkavm_zygote", "Child #{}: {}", child_pid, String::from_utf8_lossy(&buffer));
844                                        buffer.clear();
845                                    } else {
846                                        buffer.extend_from_slice(tmp);
847                                        break;
848                                    }
849                                }
850                            }
851                        }
852                    }
853                })
854                .map_err(|error| Error::from_os_error("failed to spawn logger thread", error))?;
855        }
856
857        let mut child = ChildProcess {
858            pid: child_pid as c_int,
859            pidfd: if pidfd < 0 { None } else { Some(Fd::from_raw_unchecked(pidfd)) },
860        };
861
862        // We're in the parent. Restore the signal mask.
863        child_socket.close()?;
864        sigset.unblock()?;
865
866        fn wait_for_futex(vmctx: &VmCtx, child: &mut ChildProcess, current_state: u32, target_state: u32) -> Result<(), Error> {
867            let instant = Instant::now();
868            loop {
869                let state = vmctx.futex.load(Ordering::Relaxed);
870                if state == target_state {
871                    return Ok(());
872                }
873
874                if state != current_state {
875                    return Err(Error::from_str("failed to initialize sandbox process: unexpected futex state"));
876                }
877
878                if !child.check_status(true)?.is_running() {
879                    let message = get_message(vmctx);
880                    if let Some(message) = message {
881                        let error = Error::from(format!("failed to initialize sandbox process: {}", message));
882                        return Err(error);
883                    } else {
884                        return Err(Error::from_str(
885                            "failed to initialize sandbox process: child process unexpectedly quit",
886                        ));
887                    }
888                }
889
890                if instant.elapsed() > core::time::Duration::from_secs(10) {
891                    // This should never happen, but just in case.
892                    return Err(Error::from_str("failed to initialize sandbox process: initialization timeout"));
893                }
894
895                match linux_raw::sys_futex_wait(&vmctx.futex, state, Some(core::time::Duration::from_millis(100))) {
896                    Ok(()) => continue,
897                    Err(error)
898                        if error.errno() == linux_raw::EAGAIN
899                            || error.errno() == linux_raw::EINTR
900                            || error.errno() == linux_raw::ETIMEDOUT =>
901                    {
902                        continue
903                    }
904                    Err(error) => return Err(error),
905                }
906            }
907        }
908
909        let vmctx = unsafe { &*vmctx_mmap.as_ptr().cast::<VmCtx>() };
910
911        // Send the vmctx memfd to the child process.
912        linux_raw::sendfd(socket.borrow(), vmctx_memfd.borrow())?;
913
914        // Wait until the child process receives the vmctx memfd.
915        wait_for_futex(vmctx, &mut child, VMCTX_FUTEX_BUSY, VMCTX_FUTEX_INIT)?;
916
917        // Grab the child process' maps and see what we can unmap.
918        //
919        // The child process can't do it itself as it's too sandboxed.
920        let maps = std::fs::read(format!("/proc/{}/maps", child_pid))
921            .map_err(|error| Error::from_errno("failed to read child's maps", error.raw_os_error().unwrap_or(0)))?;
922
923        for line in maps.split(|&byte| byte == b'\n') {
924            if line.is_empty() {
925                continue;
926            }
927
928            let map = Map::parse(line).ok_or_else(|| Error::from_str("failed to parse the maps of the child process"))?;
929            match map.name {
930                b"[stack]" => {
931                    vmctx.init.stack_address.store(map.start, Ordering::Relaxed);
932                    vmctx.init.stack_length.store(map.end - map.start, Ordering::Relaxed);
933                }
934                b"[vdso]" => {
935                    vmctx.init.vdso_address.store(map.start, Ordering::Relaxed);
936                    vmctx.init.vdso_length.store(map.end - map.start, Ordering::Relaxed);
937                }
938                b"[vvar]" => {
939                    vmctx.init.vvar_address.store(map.start, Ordering::Relaxed);
940                    vmctx.init.vvar_length.store(map.end - map.start, Ordering::Relaxed);
941                }
942                b"[vsyscall]" => {
943                    if map.is_readable {
944                        return Err(Error::from_str("failed to initialize sandbox process: vsyscall region is readable"));
945                    }
946                }
947                _ => {}
948            }
949        }
950
951        // Wake the child so that it finishes initialization.
952        vmctx.futex.store(VMCTX_FUTEX_BUSY, Ordering::Release);
953        linux_raw::sys_futex_wake_one(&vmctx.futex)?;
954
955        // Wait for the child to finish initialization.
956        wait_for_futex(vmctx, &mut child, VMCTX_FUTEX_BUSY, VMCTX_FUTEX_IDLE)?;
957
958        Ok(Sandbox {
959            vmctx_mmap,
960            child,
961            socket,
962
963            count_wait_loop_start: 0,
964            count_futex_wait: 0,
965        })
966    }
967
968    #[inline]
969    fn vmctx(&self) -> &VmCtx {
970        unsafe { &*self.vmctx_mmap.as_ptr().cast::<VmCtx>() }
971    }
972
973    #[inline(never)]
974    #[cold]
975    fn wait(&mut self, mut on_hostcall: Option<OnHostcall>) -> Result<(), ExecutionError<Error>> {
976        let mut spin_target = 0;
977        'outer: loop {
978            self.count_wait_loop_start += 1;
979
980            let state = self.vmctx().futex.load(Ordering::Relaxed);
981            if state == VMCTX_FUTEX_IDLE {
982                core::sync::atomic::fence(Ordering::Acquire);
983                return Ok(());
984            }
985
986            if state == VMCTX_FUTEX_TRAP {
987                core::sync::atomic::fence(Ordering::Acquire);
988
989                self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release);
990                linux_raw::sys_futex_wake_one(&self.vmctx().futex)?;
991
992                return Err(ExecutionError::Trap(Trap::default()));
993            }
994
995            if state == VMCTX_FUTEX_HOSTCALL {
996                core::sync::atomic::fence(Ordering::Acquire);
997
998                let on_hostcall = match on_hostcall {
999                    Some(ref mut on_hostcall) => &mut *on_hostcall,
1000                    None => {
1001                        unsafe {
1002                            *self.vmctx().hostcall().get() = polkavm_common::zygote::HOSTCALL_ABORT_EXECUTION;
1003                        }
1004                        self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release);
1005                        linux_raw::sys_futex_wake_one(&self.vmctx().futex)?;
1006
1007                        return Err(Error::from_str("hostcall called without any hostcall handler set").into());
1008                    }
1009                };
1010
1011                let hostcall = unsafe { *self.vmctx().hostcall().get() };
1012                if hostcall == polkavm_common::zygote::HOSTCALL_TRACE {
1013                    // When tracing aggressively spin to avoid having to call into the kernel.
1014                    spin_target = 512;
1015                }
1016
1017                match on_hostcall(hostcall, self.access()) {
1018                    Ok(()) => {
1019                        self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release);
1020                        linux_raw::sys_futex_wake_one(&self.vmctx().futex)?;
1021                        continue;
1022                    }
1023                    Err(trap) => {
1024                        unsafe {
1025                            *self.vmctx().hostcall().get() = polkavm_common::zygote::HOSTCALL_ABORT_EXECUTION;
1026                        }
1027                        self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release);
1028                        linux_raw::sys_futex_wake_one(&self.vmctx().futex)?;
1029
1030                        return Err(ExecutionError::Trap(trap));
1031                    }
1032                }
1033            }
1034
1035            if state != VMCTX_FUTEX_BUSY {
1036                return Err(Error::from_str("internal error: unexpected worker process state").into());
1037            }
1038
1039            for _ in 0..spin_target {
1040                core::hint::spin_loop();
1041                if self.vmctx().futex.load(Ordering::Relaxed) != VMCTX_FUTEX_BUSY {
1042                    continue 'outer;
1043                }
1044            }
1045
1046            self.count_futex_wait += 1;
1047            match linux_raw::sys_futex_wait(&self.vmctx().futex, VMCTX_FUTEX_BUSY, Some(core::time::Duration::from_millis(100))) {
1048                Ok(()) => continue,
1049                Err(error) if error.errno() == linux_raw::EAGAIN || error.errno() == linux_raw::EINTR => continue,
1050                Err(error) if error.errno() == linux_raw::ETIMEDOUT => {
1051                    log::trace!("Timeout expired while waiting for child #{}...", self.child.pid);
1052                    let status = self.child.check_status(true)?;
1053                    if !status.is_running() {
1054                        log::trace!("Child #{} is not running anymore!", self.child.pid);
1055                        let message = get_message(self.vmctx());
1056                        if let Some(message) = message {
1057                            return Err(Error::from(message).into());
1058                        } else {
1059                            return Err(Error::from_str("worker process unexpectedly quit").into());
1060                        }
1061                    }
1062                }
1063                Err(error) => return Err(error.into()),
1064            }
1065        }
1066    }
1067
1068    #[inline]
1069    fn wait_if_necessary(&mut self, on_hostcall: Option<OnHostcall>) -> Result<(), ExecutionError<Error>> {
1070        if self.vmctx().futex.load(Ordering::Relaxed) != VMCTX_FUTEX_IDLE {
1071            self.wait(on_hostcall)?;
1072        }
1073
1074        Ok(())
1075    }
1076
1077    pub fn execute(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError<Error>> {
1078        self.wait_if_necessary(match args.on_hostcall {
1079            Some(ref mut on_hostcall) => Some(&mut *on_hostcall),
1080            None => None,
1081        })?;
1082
1083        unsafe {
1084            *self.vmctx().rpc_address.get() = args.rpc_address;
1085            *self.vmctx().rpc_flags.get() = args.rpc_flags;
1086            if let Some(program) = args.program {
1087                *self.vmctx().new_memory_config.get() = program.memory_config;
1088                *self.vmctx().new_sysreturn_address.get() = program.sysreturn_address;
1089            }
1090
1091            for (reg, value) in Reg::ARG_REGS.into_iter().zip(args.args[..args.arg_count].iter().copied()) {
1092                self.access().set_reg(reg, value);
1093            }
1094
1095            self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release);
1096            linux_raw::sys_futex_wake_one(&self.vmctx().futex)?;
1097
1098            if let Some(program) = args.program {
1099                // TODO: This can block forever.
1100                linux_raw::sendfd(self.socket.borrow(), program.memfd.borrow())?;
1101            }
1102        }
1103
1104        self.wait_if_necessary(args.on_hostcall)?;
1105        Ok(())
1106    }
1107
1108    #[inline]
1109    pub fn access(&mut self) -> SandboxAccess {
1110        SandboxAccess { sandbox: self }
1111    }
1112}
1113
1114pub struct SandboxAccess<'a> {
1115    sandbox: &'a mut Sandbox,
1116}
1117
1118impl<'a> Access<'a> for SandboxAccess<'a> {
1119    type Error = linux_raw::Error;
1120
1121    fn get_reg(&self, reg: Reg) -> u32 {
1122        if reg == Reg::Zero {
1123            return 0;
1124        }
1125
1126        let regs = unsafe { &*self.sandbox.vmctx().regs().get() };
1127
1128        regs[reg as usize - 1]
1129    }
1130
1131    fn set_reg(&mut self, reg: Reg, value: u32) {
1132        if reg == Reg::Zero {
1133            return;
1134        }
1135
1136        unsafe {
1137            (*self.sandbox.vmctx().regs().get())[reg as usize - 1] = value;
1138        }
1139    }
1140
1141    fn read_memory_into_slice<'slice, T>(&self, address: u32, buffer: &'slice mut T) -> Result<&'slice mut [u8], Error>
1142    where
1143        T: ?Sized + AsUninitSliceMut,
1144    {
1145        let slice = buffer.as_uninit_slice_mut();
1146        log::trace!(
1147            "Reading memory: 0x{:x}-0x{:x} ({} bytes)",
1148            address,
1149            address as usize + slice.len(),
1150            slice.len()
1151        );
1152
1153        if address as usize + slice.len() > 0xffffffff {
1154            return Err(Error::from_str("out of range read"));
1155        }
1156
1157        let length = slice.len();
1158        let actual_length = linux_raw::vm_read_memory(self.sandbox.child.pid, [slice], [(address as usize, length)])?;
1159        if length != actual_length {
1160            return Err(Error::from_str("incomplete read"));
1161        }
1162
1163        unsafe { Ok(slice_assume_init_mut(slice)) }
1164    }
1165
1166    fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Error> {
1167        log::trace!(
1168            "Writing memory: 0x{:x}-0x{:x} ({} bytes)",
1169            address,
1170            address as usize + data.len(),
1171            data.len()
1172        );
1173        if address as usize + data.len() > 0xffffffff {
1174            return Err(Error::from_str("out of range write"));
1175        }
1176
1177        let length = data.len();
1178        let actual_length = linux_raw::vm_write_memory(self.sandbox.child.pid, [data], [(address as usize, length)])?;
1179        if length != actual_length {
1180            return Err(Error::from_str("incomplete write"));
1181        }
1182
1183        Ok(())
1184    }
1185
1186    fn program_counter(&self) -> Option<u32> {
1187        let value = unsafe { *self.sandbox.vmctx().nth_instruction().get() };
1188
1189        if value == SANDBOX_EMPTY_NTH_INSTRUCTION {
1190            None
1191        } else {
1192            Some(value)
1193        }
1194    }
1195
1196    fn native_program_counter(&self) -> Option<u64> {
1197        let value = unsafe { *self.sandbox.vmctx().rip().get() };
1198
1199        if value == SANDBOX_EMPTY_NATIVE_PROGRAM_COUNTER {
1200            None
1201        } else {
1202            Some(value)
1203        }
1204    }
1205}
1206
1207pub struct ExecuteArgs<'a> {
1208    rpc_address: u64,
1209    rpc_flags: u32,
1210    program: Option<&'a SandboxProgram>,
1211    on_hostcall: Option<OnHostcall<'a>>,
1212    args: [u32; VM_MAXIMUM_EXTERN_ARG_COUNT],
1213    arg_count: usize,
1214}
1215
1216impl<'a> Default for ExecuteArgs<'a> {
1217    fn default() -> Self {
1218        Self::new()
1219    }
1220}
1221
1222impl<'a> ExecuteArgs<'a> {
1223    #[inline]
1224    pub fn new() -> Self {
1225        ExecuteArgs {
1226            rpc_address: 0,
1227            rpc_flags: 0,
1228            program: None,
1229            on_hostcall: None,
1230            args: [0; VM_MAXIMUM_EXTERN_ARG_COUNT],
1231            arg_count: 0,
1232        }
1233    }
1234
1235    #[inline]
1236    pub fn set_program(&mut self, program: &'a SandboxProgram) {
1237        self.rpc_flags |= VM_RPC_FLAG_RECONFIGURE;
1238        self.program = Some(program);
1239    }
1240
1241    #[inline]
1242    pub fn set_reset_memory_after_execution(&mut self) {
1243        self.rpc_flags |= VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION;
1244    }
1245
1246    #[inline]
1247    pub fn set_clear_program_after_execution(&mut self) {
1248        self.rpc_flags |= VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION;
1249    }
1250
1251    #[inline]
1252    pub fn set_send_sigstop_before_execution(&mut self) {
1253        self.rpc_flags |= VM_RPC_FLAG_SIGSTOP_BEFORE_EXECUTION;
1254    }
1255
1256    #[inline]
1257    pub fn set_call(&mut self, address: u64) {
1258        self.rpc_address = address;
1259    }
1260
1261    #[inline]
1262    pub fn set_on_hostcall(&mut self, callback: OnHostcall<'a>) {
1263        self.on_hostcall = Some(callback);
1264    }
1265
1266    #[inline]
1267    pub fn set_args(&mut self, args: &[u32]) {
1268        assert!(args.len() <= VM_MAXIMUM_EXTERN_ARG_COUNT);
1269        for (dst, src) in self.args.iter_mut().zip(args.iter().copied()) {
1270            *dst = src;
1271        }
1272        self.arg_count = args.len();
1273    }
1274}
1275
1276#[cfg(test)]
1277mod tests {
1278    use super::*;
1279    use polkavm_assembler::amd64::inst::*;
1280    use polkavm_assembler::amd64::Reg::*;
1281    use polkavm_assembler::amd64::{LoadKind, RegSize, StoreKind};
1282    use polkavm_assembler::Assembler;
1283    use polkavm_common::zygote::VM_ADDR_NATIVE_CODE;
1284
1285    #[test]
1286    fn basic_execution_works() {
1287        let _ = env_logger::try_init();
1288
1289        let init = GuestProgramInit::new().with_ro_data(&[0xaa, 0xbb]).with_bss(1);
1290        let init = SandboxProgramInit::new(init);
1291
1292        let mem = init.memory_config(get_native_page_size()).unwrap();
1293        let mut asm = Assembler::new();
1294        let code = asm
1295            .push(load_abs(rax, mem.ro_data_address().try_into().unwrap(), LoadKind::U32))
1296            .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U8))
1297            .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap() + 4, rax, StoreKind::U16))
1298            .push(ret())
1299            .finalize();
1300
1301        let program = SandboxProgram::new(init.with_code(code)).unwrap();
1302        let mut args = ExecuteArgs::new();
1303        args.set_program(&program);
1304        args.set_call(VM_ADDR_NATIVE_CODE);
1305
1306        let mut config = SandboxConfig::default();
1307        config.enable_logger(true);
1308
1309        let mut sandbox = Sandbox::spawn(&config).unwrap();
1310        sandbox.execute(args).unwrap();
1311
1312        assert_eq!(
1313            sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 8).unwrap(),
1314            [0xaa, 0x00, 0x00, 0x00, 0xaa, 0xbb, 0x00, 0x00,]
1315        );
1316    }
1317
1318    #[test]
1319    fn program_memory_can_be_reused_and_cleared() {
1320        let _ = env_logger::try_init();
1321
1322        let init = GuestProgramInit::new().with_bss(1);
1323        let init = SandboxProgramInit::new(init);
1324        let mem = init.memory_config(get_native_page_size()).unwrap();
1325        let mut asm = Assembler::new();
1326        let code = asm
1327            .push(load_abs(rax, mem.rw_data_address().try_into().unwrap(), LoadKind::U32))
1328            .push(add_imm(RegSize::R64, rax, 1))
1329            .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U32))
1330            .push(ret())
1331            .finalize();
1332
1333        let program = SandboxProgram::new(init.with_code(code)).unwrap();
1334
1335        let mut sandbox = Sandbox::spawn(&Default::default()).unwrap();
1336        assert!(sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).is_err());
1337
1338        {
1339            let mut args = ExecuteArgs::new();
1340            args.set_program(&program);
1341            sandbox.execute(args).unwrap();
1342            assert_eq!(
1343                sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(),
1344                [0x00, 0x00, 0x00, 0x00]
1345            );
1346        }
1347
1348        {
1349            let mut args = ExecuteArgs::new();
1350            args.set_call(VM_ADDR_NATIVE_CODE);
1351            sandbox.execute(args).unwrap();
1352            assert_eq!(
1353                sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(),
1354                [0x01, 0x00, 0x00, 0x00]
1355            );
1356        }
1357
1358        {
1359            let mut args = ExecuteArgs::new();
1360            args.set_call(VM_ADDR_NATIVE_CODE);
1361            sandbox.execute(args).unwrap();
1362            assert_eq!(
1363                sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(),
1364                [0x02, 0x00, 0x00, 0x00]
1365            );
1366        }
1367
1368        {
1369            let mut args = ExecuteArgs::new();
1370            args.set_call(VM_ADDR_NATIVE_CODE);
1371            args.set_reset_memory_after_execution();
1372            sandbox.execute(args).unwrap();
1373            assert_eq!(
1374                sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(),
1375                [0x00, 0x00, 0x00, 0x00]
1376            );
1377        }
1378
1379        {
1380            let mut args = ExecuteArgs::new();
1381            args.set_call(VM_ADDR_NATIVE_CODE);
1382            sandbox.execute(args).unwrap();
1383            assert_eq!(
1384                sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(),
1385                [0x01, 0x00, 0x00, 0x00]
1386            );
1387        }
1388
1389        {
1390            let mut args = ExecuteArgs::new();
1391            args.set_clear_program_after_execution();
1392            sandbox.execute(args).unwrap();
1393            assert!(sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).is_err());
1394        }
1395    }
1396
1397    #[test]
1398    fn out_of_bounds_memory_access_generates_a_trap() {
1399        let _ = env_logger::try_init();
1400
1401        let init = GuestProgramInit::new().with_bss(1);
1402        let init = SandboxProgramInit::new(init);
1403        let mem = init.memory_config(get_native_page_size()).unwrap();
1404        let mut asm = Assembler::new();
1405        let code = asm
1406            .push(load_abs(rax, mem.rw_data_address().try_into().unwrap(), LoadKind::U32))
1407            .push(add_imm(RegSize::R64, rax, 1))
1408            .push(store_abs(i32::try_from(mem.rw_data_address()).unwrap(), rax, StoreKind::U32))
1409            .push(load_abs(rax, 0, LoadKind::U32))
1410            .push(ret())
1411            .finalize();
1412
1413        let program = SandboxProgram::new(init.with_code(code)).unwrap();
1414
1415        let mut sandbox = Sandbox::spawn(&Default::default()).unwrap();
1416        {
1417            let mut args = ExecuteArgs::new();
1418            args.set_program(&program);
1419            args.set_call(VM_ADDR_NATIVE_CODE);
1420            match sandbox.execute(args) {
1421                Err(ExecutionError::Trap(_)) => {}
1422                _ => panic!(),
1423            }
1424
1425            assert_eq!(
1426                sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(),
1427                [0x01, 0x00, 0x00, 0x00]
1428            );
1429        }
1430
1431        // The VM still works even though it got hit with a SIGSEGV.
1432        {
1433            let mut args = ExecuteArgs::new();
1434            args.set_call(VM_ADDR_NATIVE_CODE);
1435            match sandbox.execute(args) {
1436                Err(ExecutionError::Trap(_)) => {}
1437                _ => panic!(),
1438            }
1439
1440            assert_eq!(
1441                sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(),
1442                [0x02, 0x00, 0x00, 0x00]
1443            );
1444        }
1445    }
1446
1447    #[ignore]
1448    #[test]
1449    fn divide_by_zero_generates_a_trap() {
1450        todo!()
1451    }
1452}