polkavm_common/
zygote.rs

1//! This module defines the ABI boundary between the host and the zygote.
2//!
3//! In general everything here can be modified at will, provided the zygote
4//! is recompiled.
5
6use core::cell::UnsafeCell;
7use core::sync::atomic::{AtomicBool, AtomicI64, AtomicU32, AtomicU64};
8
9// Due to the limitations of Rust's compile time constant evaluation machinery
10// we need to define this struct multiple times.
11macro_rules! define_address_table {
12    (
13        $name_raw:ident, $name_packed:ident, $name_table:ident,
14        $($name:ident: $type:ty,)+
15    ) => {
16        #[repr(C)]
17        pub struct $name_raw {
18            $(pub $name: $type),+
19        }
20
21        #[derive(Copy, Clone)]
22        #[repr(packed)]
23        pub struct $name_packed {
24            $(pub $name: u64),+
25        }
26
27        #[derive(Copy, Clone)]
28        pub struct $name_table {
29            $(pub $name: u64),+
30        }
31
32        impl $name_table {
33            #[allow(clippy::fn_to_numeric_cast_any)]
34            #[allow(clippy::fn_to_numeric_cast)]
35            #[inline]
36            pub fn from_raw(table: $name_raw) -> Self {
37                Self {
38                    $(
39                        $name: table.$name as u64
40                    ),+
41                }
42            }
43
44            pub const fn from_packed(table: &$name_packed) -> Self {
45                Self {
46                    $(
47                        $name: table.$name
48                    ),+
49                }
50            }
51        }
52
53        static_assert!(core::mem::size_of::<$name_raw>() == core::mem::size_of::<$name_packed>());
54        static_assert!(core::mem::size_of::<$name_raw>() == core::mem::size_of::<$name_table>());
55    }
56}
57
58// These are the addresses exported from the zygote.
59define_address_table! {
60    AddressTableRaw, AddressTablePacked, AddressTable,
61    syscall_hostcall: unsafe extern "C" fn() -> !,
62    syscall_trap: unsafe extern "C" fn() -> !,
63    syscall_return: unsafe extern "C" fn() -> !,
64    syscall_step: unsafe extern "C" fn() -> !,
65    syscall_sbrk: unsafe extern "C" fn(u64) -> u32,
66    syscall_not_enough_gas: unsafe extern "C" fn() -> !,
67}
68
69define_address_table! {
70    ExtTableRaw, ExtTablePacked, ExtTable,
71    ext_sbrk: unsafe extern "C" fn() -> !,
72    ext_reset_memory: unsafe extern "C" fn() -> !,
73    ext_zero_memory_chunk: unsafe extern "C" fn() -> !,
74    ext_load_program: unsafe extern "C" fn() -> !,
75    ext_recycle: unsafe extern "C" fn() -> !,
76    ext_set_accessible_aux_size: unsafe extern "C" fn() -> !,
77}
78
79pub const FD_DUMMY_STDIN: i32 = 0;
80pub const FD_LOGGER_STDOUT: i32 = 1;
81pub const FD_LOGGER_STDERR: i32 = 2;
82pub const FD_SHM: i32 = 3;
83pub const FD_MEM: i32 = 4;
84pub const FD_SOCKET: i32 = 5;
85pub const FD_VMCTX: i32 = 6;
86pub const FD_LIFETIME_PIPE: i32 = 7;
87pub const LAST_USED_FD: i32 = FD_LIFETIME_PIPE;
88
89/// The address where the native code starts inside of the VM.
90///
91/// This is not directly accessible by the program running inside of the VM.
92pub const VM_ADDR_NATIVE_CODE: u64 = 0x100000000;
93
94/// The address where the indirect jump table starts inside of the VM.
95///
96/// This is not directly accessible by the program running inside of the VM.
97pub const VM_ADDR_JUMP_TABLE: u64 = 0x800000000;
98
99/// The address where the return-to-host jump table vector physically resides.
100pub const VM_ADDR_JUMP_TABLE_RETURN_TO_HOST: u64 = VM_ADDR_JUMP_TABLE + ((crate::abi::VM_ADDR_RETURN_TO_HOST as u64) << 3);
101
102/// The address of the global per-VM context struct.
103pub const VM_ADDR_VMCTX: u64 = 0x400000000;
104
105/// The address of the signal stack.
106pub const VM_ADDR_SIGSTACK: u64 = 0x500000000;
107
108/// The address of the native stack.
109pub const VM_ADDR_NATIVE_STACK_LOW: u64 = 0x600000000;
110
111/// The size of the native stack.
112pub const VM_ADDR_NATIVE_STACK_SIZE: u64 = 0x4000;
113
114/// The address of the top of the native stack.
115pub const VM_ADDR_NATIVE_STACK_HIGH: u64 = VM_ADDR_NATIVE_STACK_LOW + VM_ADDR_NATIVE_STACK_SIZE;
116
117/// Address where the shared memory is mapped.
118pub const VM_ADDR_SHARED_MEMORY: u64 = 0x700000000;
119
120/// The size of the shared memory region.
121pub const VM_SHARED_MEMORY_SIZE: u64 = u32::MAX as u64;
122
123/// The maximum number of native code bytes that can be emitted by a single VM instruction.
124///
125/// This does *not* affect the VM ABI and can be changed at will,
126/// but should be high enough that it's never hit.
127pub const VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH: u32 = 67;
128
129/// The maximum number of bytes the jump table can be.
130pub const VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE: u64 = (crate::abi::VM_MAXIMUM_JUMP_TABLE_ENTRIES as u64 + 1)
131    * core::mem::size_of::<u64>() as u64
132    * crate::abi::VM_CODE_ADDRESS_ALIGNMENT as u64;
133
134/// The maximum number of bytes the jump table can span in virtual memory.
135pub const VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE: u64 = 0x100000000 * core::mem::size_of::<u64>() as u64;
136
137// TODO: Make this smaller.
138/// The maximum number of bytes the native code can be.
139pub const VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE: u32 = 2176 * 1024 * 1024 - 1;
140
141#[repr(C)]
142pub struct JmpBuf {
143    pub rip: AtomicU64,
144    pub rbx: AtomicU64,
145    pub rsp: AtomicU64,
146    pub rbp: AtomicU64,
147    pub r12: AtomicU64,
148    pub r13: AtomicU64,
149    pub r14: AtomicU64,
150    pub r15: AtomicU64,
151}
152
153#[repr(C)]
154pub struct VmInit {
155    pub stack_address: AtomicU64,
156    pub stack_length: AtomicU64,
157    pub vdso_address: AtomicU64,
158    pub vdso_length: AtomicU64,
159    pub vvar_address: AtomicU64,
160    pub vvar_length: AtomicU64,
161
162    /// Whether userfaultfd-based memory management is available.
163    pub uffd_available: AtomicBool,
164
165    /// Whether sandboxing is disabled.
166    pub sandbox_disabled: AtomicBool,
167
168    /// Whether the logger is enabled.
169    pub logging_enabled: AtomicBool,
170}
171
172const MESSAGE_BUFFER_SIZE: usize = 512;
173
174#[repr(align(64))]
175pub struct CacheAligned<T>(pub T);
176
177impl<T> core::ops::Deref for CacheAligned<T> {
178    type Target = T;
179    #[inline(always)]
180    fn deref(&self) -> &Self::Target {
181        &self.0
182    }
183}
184
185impl<T> core::ops::DerefMut for CacheAligned<T> {
186    #[inline(always)]
187    fn deref_mut(&mut self) -> &mut Self::Target {
188        &mut self.0
189    }
190}
191
192#[repr(C)]
193pub struct VmCtxHeapInfo {
194    pub heap_top: UnsafeCell<u64>,
195    pub heap_threshold: UnsafeCell<u64>,
196}
197
198const REG_COUNT: usize = crate::program::Reg::ALL.len();
199
200#[repr(C)]
201pub struct VmCtxCounters {
202    pub syscall_wait_loop_start: UnsafeCell<u64>,
203    pub syscall_futex_wait: UnsafeCell<u64>,
204}
205
206#[repr(C)]
207pub enum VmFd {
208    None,
209    Shm,
210    Mem,
211}
212
213#[repr(C)]
214pub struct VmMap {
215    pub address: u64,
216    pub length: u64,
217    pub protection: u32,
218    pub flags: u32,
219    pub fd: VmFd,
220    pub fd_offset: u64,
221}
222
223/// The virtual machine context.
224///
225/// This is mapped in shared memory and used by the sandbox to keep its state in,
226/// as well as by the host to communicate with the sandbox.
227#[allow(clippy::partial_pub_fields)]
228#[repr(C)]
229pub struct VmCtx {
230    // NOTE: The order of fields here can matter for performance!
231    _align_1: CacheAligned<()>,
232
233    /// The futex used to synchronize the sandbox with the host process.
234    pub futex: AtomicU32,
235
236    /// The address of the instruction currently being executed.
237    pub program_counter: AtomicU32,
238
239    /// Address to which to jump to.
240    pub jump_into: AtomicU64,
241
242    /// The address of the native code to call inside of the VM process, if non-zero.
243    pub next_native_program_counter: AtomicU64,
244
245    pub tmp_reg: AtomicU64,
246    pub rip: AtomicU64,
247
248    /// The address of the next instruction to be executed.
249    pub next_program_counter: AtomicU32,
250
251    /// A multipurpose field:
252    ///   - the hostcall number that was triggered,
253    ///   - the sbrk argument,
254    ///   - the sbrk return value,
255    pub arg: AtomicU32,
256    pub arg2: AtomicU32,
257    pub arg3: AtomicU32,
258
259    _align_2: CacheAligned<()>,
260
261    pub _align_dummy: [u64; 4],
262
263    /// The current gas counter.
264    pub gas: AtomicI64,
265
266    _align_3: CacheAligned<()>,
267
268    /// A dump of all of the registers of the VM.
269    pub regs: [AtomicU64; REG_COUNT],
270
271    /// The state of the program's heap.
272    pub heap_info: VmCtxHeapInfo,
273
274    /// Offset in shared memory to this sandbox's memory map.
275    pub shm_memory_map_offset: AtomicU64,
276    /// Number of maps to map.
277    pub shm_memory_map_count: AtomicU64,
278    /// Offset in shared memory to this sandbox's code.
279    pub shm_code_offset: AtomicU64,
280    /// Length this sandbox's code.
281    pub shm_code_length: AtomicU64,
282    /// Offset in shared memory to this sandbox's jump table.
283    pub shm_jump_table_offset: AtomicU64,
284    /// Length of sandbox's jump table, in bytes.
285    pub shm_jump_table_length: AtomicU64,
286
287    /// Address of the sysreturn routine.
288    pub sysreturn_address: AtomicU64,
289
290    /// Whether userfaultfd-based memory management is enabled.
291    pub uffd_enabled: AtomicBool,
292
293    /// Address to the base of the heap.
294    pub heap_base: UnsafeCell<u32>,
295
296    /// The initial heap growth threshold.
297    pub heap_initial_threshold: UnsafeCell<u32>,
298
299    /// The maximum heap size.
300    pub heap_max_size: UnsafeCell<u32>,
301
302    /// The page size.
303    pub page_size: UnsafeCell<u32>,
304
305    /// Performance counters. Only for debugging.
306    pub counters: CacheAligned<VmCtxCounters>,
307
308    /// One-time args used during initialization.
309    pub init: VmInit,
310
311    /// Length of the message in the message buffer.
312    pub message_length: UnsafeCell<u32>,
313    /// A buffer used to marshal error messages.
314    pub message_buffer: UnsafeCell<[u8; MESSAGE_BUFFER_SIZE]>,
315}
316
317#[test]
318fn test_gas_offset() {
319    // NOTE: The codegen depends on the gas field being at this *exact* offset.
320    #[allow(unsafe_code)]
321    let vmctx: VmCtx = unsafe { core::mem::zeroed() };
322    assert_eq!(core::ptr::addr_of!(vmctx.gas) as usize - core::ptr::addr_of!(vmctx) as usize, 0x60);
323}
324
325// Make sure it fits within a single page on amd64.
326static_assert!(core::mem::size_of::<VmCtx>() <= 4096);
327
328/// The VM is busy.
329pub const VMCTX_FUTEX_BUSY: u32 = 0;
330
331/// The VM is idle.
332pub const VMCTX_FUTEX_IDLE: u32 = 1;
333
334/// The VM has triggered a host call and is idle.
335pub const VMCTX_FUTEX_GUEST_ECALLI: u32 = VMCTX_FUTEX_IDLE | (1 << 1);
336
337/// The VM has triggered a trap and is idle.
338pub const VMCTX_FUTEX_GUEST_TRAP: u32 = VMCTX_FUTEX_IDLE | (2 << 1);
339
340/// The VM's signal handler was triggered.
341pub const VMCTX_FUTEX_GUEST_SIGNAL: u32 = VMCTX_FUTEX_IDLE | (3 << 1);
342
343/// The VM has went through a single instruction is idle.
344pub const VMCTX_FUTEX_GUEST_STEP: u32 = VMCTX_FUTEX_IDLE | (4 << 1);
345
346/// The VM gas ran out of gas.
347pub const VMCTX_FUTEX_GUEST_NOT_ENOUGH_GAS: u32 = VMCTX_FUTEX_IDLE | (5 << 1);
348
349/// The VM has triggered a page fault.
350pub const VMCTX_FUTEX_GUEST_PAGEFAULT: u32 = VMCTX_FUTEX_IDLE | (6 << 1);
351
352#[allow(clippy::declare_interior_mutable_const)]
353const ATOMIC_U64_ZERO: AtomicU64 = AtomicU64::new(0);
354
355#[allow(clippy::new_without_default)]
356impl VmCtx {
357    /// Creates a zeroed VM context.
358    pub const fn zeroed() -> Self {
359        VmCtx {
360            _align_1: CacheAligned(()),
361            _align_2: CacheAligned(()),
362            _align_3: CacheAligned(()),
363            _align_dummy: [0; 4],
364
365            gas: AtomicI64::new(0),
366            program_counter: AtomicU32::new(0),
367            next_program_counter: AtomicU32::new(0),
368            arg: AtomicU32::new(0),
369            arg2: AtomicU32::new(0),
370            arg3: AtomicU32::new(0),
371            tmp_reg: AtomicU64::new(0),
372            rip: AtomicU64::new(0),
373            regs: [ATOMIC_U64_ZERO; REG_COUNT],
374            jump_into: AtomicU64::new(0),
375            next_native_program_counter: AtomicU64::new(0),
376
377            futex: AtomicU32::new(VMCTX_FUTEX_BUSY),
378
379            shm_memory_map_offset: AtomicU64::new(0),
380            shm_memory_map_count: AtomicU64::new(0),
381            shm_code_offset: AtomicU64::new(0),
382            shm_code_length: AtomicU64::new(0),
383            shm_jump_table_offset: AtomicU64::new(0),
384            shm_jump_table_length: AtomicU64::new(0),
385            uffd_enabled: AtomicBool::new(false),
386            sysreturn_address: AtomicU64::new(0),
387            heap_base: UnsafeCell::new(0),
388            heap_initial_threshold: UnsafeCell::new(0),
389            heap_max_size: UnsafeCell::new(0),
390            page_size: UnsafeCell::new(0),
391
392            heap_info: VmCtxHeapInfo {
393                heap_top: UnsafeCell::new(0),
394                heap_threshold: UnsafeCell::new(0),
395            },
396
397            counters: CacheAligned(VmCtxCounters {
398                syscall_wait_loop_start: UnsafeCell::new(0),
399                syscall_futex_wait: UnsafeCell::new(0),
400            }),
401
402            init: VmInit {
403                stack_address: AtomicU64::new(0),
404                stack_length: AtomicU64::new(0),
405                vdso_address: AtomicU64::new(0),
406                vdso_length: AtomicU64::new(0),
407                vvar_address: AtomicU64::new(0),
408                vvar_length: AtomicU64::new(0),
409                uffd_available: AtomicBool::new(false),
410                sandbox_disabled: AtomicBool::new(false),
411                logging_enabled: AtomicBool::new(false),
412            },
413
414            message_length: UnsafeCell::new(0),
415            message_buffer: UnsafeCell::new([0; MESSAGE_BUFFER_SIZE]),
416        }
417    }
418
419    /// Creates a fresh VM context.
420    pub const fn new() -> Self {
421        Self::zeroed()
422    }
423}
424
425static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST > VM_ADDR_JUMP_TABLE);
426static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST % 0x4000 == 0);
427static_assert!(VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE <= VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE);
428static_assert!(VM_ADDR_JUMP_TABLE + VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE < VM_ADDR_JUMP_TABLE_RETURN_TO_HOST);
429static_assert!(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST < VM_ADDR_JUMP_TABLE + VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE);
430static_assert!(VM_ADDR_JUMP_TABLE.count_ones() == 1);
431static_assert!((1 << VM_ADDR_JUMP_TABLE.trailing_zeros()) == VM_ADDR_JUMP_TABLE);
432
433static_assert!(VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE >= crate::abi::VM_MAXIMUM_CODE_SIZE * VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH);
434static_assert!(VM_ADDR_NATIVE_CODE > 0xffffffff);
435static_assert!(VM_ADDR_VMCTX > 0xffffffff);
436static_assert!(VM_ADDR_NATIVE_STACK_LOW > 0xffffffff);