polkavm_linux_raw/
lib.rs

1#![doc = include_str!("../README.md")]
2#![no_std]
3#![deny(clippy::panic)]
4#![deny(clippy::unwrap_used)]
5#![deny(clippy::expect_used)]
6#![deny(clippy::unreachable)]
7#![deny(clippy::indexing_slicing)]
8#![allow(clippy::collapsible_else_if)]
9#![allow(clippy::len_without_is_empty)]
10#![allow(clippy::manual_range_contains)]
11// This crate mostly contains syscall wrappers. If you use them you should know what you're doing.
12#![allow(clippy::missing_safety_doc)]
13#![allow(clippy::undocumented_unsafe_blocks)]
14#![cfg(all(target_os = "linux", target_arch = "x86_64"))]
15
16#[cfg(feature = "std")]
17extern crate std;
18
19mod syscall;
20
21#[cfg(target_arch = "x86_64")]
22#[doc(hidden)]
23pub mod arch_amd64_syscall;
24
25#[cfg(target_arch = "x86_64")]
26#[allow(dead_code)]
27#[allow(non_upper_case_globals)]
28#[allow(non_camel_case_types)]
29#[allow(non_snake_case)]
30#[allow(clippy::ptr_as_ptr)]
31#[allow(clippy::used_underscore_binding)]
32#[allow(clippy::transmute_ptr_to_ptr)]
33mod arch_amd64_bindings;
34
35mod io_uring;
36mod mmap;
37
38pub use io_uring::IoUring;
39pub use mmap::Mmap;
40
41#[cfg(target_arch = "x86_64")]
42#[doc(hidden)]
43pub use arch_amd64_syscall as syscall_impl;
44
45pub use core::ffi::{c_int, c_long, c_uchar, c_uint, c_ulong, c_ushort, c_void};
46
47use core::ffi::CStr;
48use core::marker::PhantomData;
49use core::mem::MaybeUninit;
50use core::sync::atomic::AtomicU32;
51use core::time::Duration;
52
53#[cfg(feature = "std")]
54use std::borrow::Cow;
55
56// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/88345
57#[allow(non_camel_case_types)]
58type c_size_t = usize;
59
60#[allow(non_camel_case_types)]
61pub type size_t = c_size_t;
62
63// Doesn't appear in public headers.
64pub const MNT_FORCE: u32 = 1;
65pub const MNT_DETACH: u32 = 2;
66pub const MNT_EXPIRE: u32 = 4;
67
68pub const SIG_DFL: usize = 0;
69pub const SIG_IGN: usize = 1;
70
71// Bindgen seems to not want to emit this constant,
72// so let's define it manually.
73pub const HWCAP2_FSGSBASE: usize = 1 << 1;
74
75pub(crate) use crate::arch_amd64_bindings as arch_bindings;
76
77#[rustfmt::skip]
78pub use crate::arch_bindings::{
79    __kernel_gid_t as gid_t,
80    __kernel_pid_t as pid_t,
81    __kernel_uid_t as uid_t,
82    __NR_arch_prctl as SYS_arch_prctl,
83    __NR_capset as SYS_capset,
84    __NR_chdir as SYS_chdir,
85    __NR_clock_gettime as SYS_clock_gettime,
86    __NR_clone as SYS_clone,
87    __NR_clone3 as SYS_clone3,
88    __NR_close as SYS_close,
89    __NR_close_range as SYS_close_range,
90    __NR_dup3 as SYS_dup3,
91    __NR_execveat as SYS_execveat,
92    __NR_exit as SYS_exit,
93    __NR_fallocate as SYS_fallocate,
94    __NR_fchdir as SYS_fchdir,
95    __NR_fcntl as SYS_fcntl,
96    __NR_ftruncate as SYS_ftruncate,
97    __NR_futex as SYS_futex,
98    __NR_getdents64 as SYS_getdents64,
99    __NR_getgid as SYS_getgid,
100    __NR_getpid as SYS_getpid,
101    __NR_getuid as SYS_getuid,
102    __NR_io_uring_enter as SYS_io_uring_enter,
103    __NR_io_uring_register as SYS_io_uring_register,
104    __NR_io_uring_setup as SYS_io_uring_setup,
105    __NR_ioctl as SYS_ioctl,
106    __NR_kill as SYS_kill,
107    __NR_lseek as SYS_lseek,
108    __NR_nanosleep as SYS_nanosleep,
109    __NR_madvise as SYS_madvise,
110    __NR_memfd_create as SYS_memfd_create,
111    __NR_mmap as SYS_mmap,
112    __NR_mount as SYS_mount,
113    __NR_mprotect as SYS_mprotect,
114    __NR_mremap as SYS_mremap,
115    __NR_munmap as SYS_munmap,
116    __NR_open as SYS_open,
117    __NR_openat as SYS_openat,
118    __NR_pidfd_send_signal as SYS_pidfd_send_signal,
119    __NR_pipe2 as SYS_pipe2,
120    __NR_pivot_root as SYS_pivot_root,
121    __NR_prctl as SYS_prctl,
122    __NR_process_vm_readv as SYS_process_vm_readv,
123    __NR_process_vm_writev as SYS_process_vm_writev,
124    __NR_ptrace as SYS_ptrace,
125    __NR_read as SYS_read,
126    __NR_recvmsg as SYS_recvmsg,
127    __NR_rt_sigaction as SYS_rt_sigaction,
128    __NR_rt_sigprocmask as SYS_rt_sigprocmask,
129    __NR_rt_sigreturn as SYS_rt_sigreturn,
130    __NR_sched_yield as SYS_sched_yield,
131    __NR_seccomp as SYS_seccomp,
132    __NR_sendmsg as SYS_sendmsg,
133    __NR_set_tid_address as SYS_set_tid_address,
134    __NR_setdomainname as SYS_setdomainname,
135    __NR_sethostname as SYS_sethostname,
136    __NR_setrlimit as SYS_setrlimit,
137    __NR_sigaltstack as SYS_sigaltstack,
138    __NR_socketpair as SYS_socketpair,
139    __NR_umount2 as SYS_umount2,
140    __NR_uname as SYS_uname,
141    __NR_unshare as SYS_unshare,
142    __NR_userfaultfd as SYS_userfaultfd,
143    __NR_waitid as SYS_waitid,
144    __NR_write as SYS_write,
145    __NR_writev as SYS_writev,
146    __user_cap_data_struct,
147    __user_cap_header_struct,
148    __WALL,
149    _LINUX_CAPABILITY_VERSION_3,
150    ARCH_GET_FS,
151    ARCH_GET_GS,
152    ARCH_SET_FS,
153    ARCH_SET_GS,
154    AT_EMPTY_PATH,
155    AT_HWCAP2,
156    AT_MINSIGSTKSZ,
157    AT_NULL,
158    AT_PAGESZ,
159    AT_SYSINFO_EHDR,
160    CLD_CONTINUED,
161    CLD_DUMPED,
162    CLD_EXITED,
163    CLD_KILLED,
164    CLD_STOPPED,
165    CLD_TRAPPED,
166    CLOCK_MONOTONIC_RAW,
167    CLONE_CLEAR_SIGHAND,
168    CLONE_NEWCGROUP,
169    CLONE_NEWIPC,
170    CLONE_NEWNET,
171    CLONE_NEWNS,
172    CLONE_NEWPID,
173    CLONE_NEWUSER,
174    CLONE_NEWUTS,
175    CLONE_PIDFD,
176    E2BIG,
177    EACCES,
178    EAGAIN,
179    EBADF,
180    EBUSY,
181    ECHILD,
182    EDOM,
183    EEXIST,
184    EFAULT,
185    EFBIG,
186    EINTR,
187    EINVAL,
188    EIO,
189    EISDIR,
190    EMFILE,
191    EMLINK,
192    ENFILE,
193    ENODEV,
194    ENOENT,
195    ENOEXEC,
196    ENOMEM,
197    ENOSPC,
198    ENOTBLK,
199    ENOTDIR,
200    ENOTTY,
201    ENXIO,
202    EOPNOTSUPP,
203    EPERM,
204    EPIPE,
205    ERANGE,
206    EROFS,
207    ESPIPE,
208    ESRCH,
209    ETIMEDOUT,
210    ETOOMANYREFS,
211    ETXTBSY,
212    EXDEV,
213    ERESTARTSYS,
214    F_ADD_SEALS,
215    F_DUPFD,
216    F_GETFD,
217    F_SEAL_EXEC,
218    F_SEAL_FUTURE_WRITE,
219    F_SEAL_GROW,
220    F_SEAL_SEAL,
221    F_SEAL_SHRINK,
222    F_SEAL_WRITE,
223    F_SETFD,
224    F_SETFL,
225    F_SETOWN,
226    F_SETSIG,
227    FALLOC_FL_COLLAPSE_RANGE,
228    FALLOC_FL_INSERT_RANGE,
229    FALLOC_FL_KEEP_SIZE,
230    FALLOC_FL_NO_HIDE_STALE,
231    FALLOC_FL_PUNCH_HOLE,
232    FALLOC_FL_UNSHARE_RANGE,
233    FALLOC_FL_ZERO_RANGE,
234    FUTEX_BITSET_MATCH_ANY,
235    FUTEX_WAIT,
236    FUTEX_WAKE,
237    FUTEX2_SIZE_U32,
238    io_cqring_offsets,
239    io_sqring_offsets,
240    io_uring_buf_reg,
241    io_uring_buf_ring,
242    io_uring_buf_status,
243    io_uring_buf,
244    io_uring_cqe,
245    io_uring_file_index_range,
246    io_uring_files_update,
247    io_uring_getevents_arg,
248    io_uring_napi,
249    io_uring_op_IORING_OP_ACCEPT,
250    io_uring_op_IORING_OP_ASYNC_CANCEL,
251    io_uring_op_IORING_OP_CLOSE,
252    io_uring_op_IORING_OP_CONNECT,
253    io_uring_op_IORING_OP_EPOLL_CTL,
254    io_uring_op_IORING_OP_FADVISE,
255    io_uring_op_IORING_OP_FALLOCATE,
256    io_uring_op_IORING_OP_FGETXATTR,
257    io_uring_op_IORING_OP_FILES_UPDATE,
258    io_uring_op_IORING_OP_FIXED_FD_INSTALL,
259    io_uring_op_IORING_OP_FSETXATTR,
260    io_uring_op_IORING_OP_FSYNC,
261    io_uring_op_IORING_OP_FTRUNCATE,
262    io_uring_op_IORING_OP_FUTEX_WAIT,
263    io_uring_op_IORING_OP_FUTEX_WAITV,
264    io_uring_op_IORING_OP_FUTEX_WAKE,
265    io_uring_op_IORING_OP_GETXATTR,
266    io_uring_op_IORING_OP_LAST,
267    io_uring_op_IORING_OP_LINK_TIMEOUT,
268    io_uring_op_IORING_OP_LINKAT,
269    io_uring_op_IORING_OP_MADVISE,
270    io_uring_op_IORING_OP_MKDIRAT,
271    io_uring_op_IORING_OP_MSG_RING,
272    io_uring_op_IORING_OP_NOP,
273    io_uring_op_IORING_OP_OPENAT,
274    io_uring_op_IORING_OP_OPENAT2,
275    io_uring_op_IORING_OP_POLL_ADD,
276    io_uring_op_IORING_OP_POLL_REMOVE,
277    io_uring_op_IORING_OP_PROVIDE_BUFFERS,
278    io_uring_op_IORING_OP_READ_FIXED,
279    io_uring_op_IORING_OP_READ_MULTISHOT,
280    io_uring_op_IORING_OP_READ,
281    io_uring_op_IORING_OP_READV,
282    io_uring_op_IORING_OP_RECV,
283    io_uring_op_IORING_OP_RECVMSG,
284    io_uring_op_IORING_OP_REMOVE_BUFFERS,
285    io_uring_op_IORING_OP_RENAMEAT,
286    io_uring_op_IORING_OP_SEND_ZC,
287    io_uring_op_IORING_OP_SEND,
288    io_uring_op_IORING_OP_SENDMSG_ZC,
289    io_uring_op_IORING_OP_SENDMSG,
290    io_uring_op_IORING_OP_SETXATTR,
291    io_uring_op_IORING_OP_SHUTDOWN,
292    io_uring_op_IORING_OP_SOCKET,
293    io_uring_op_IORING_OP_SPLICE,
294    io_uring_op_IORING_OP_STATX,
295    io_uring_op_IORING_OP_SYMLINKAT,
296    io_uring_op_IORING_OP_SYNC_FILE_RANGE,
297    io_uring_op_IORING_OP_TEE,
298    io_uring_op_IORING_OP_TIMEOUT_REMOVE,
299    io_uring_op_IORING_OP_TIMEOUT,
300    io_uring_op_IORING_OP_UNLINKAT,
301    io_uring_op_IORING_OP_URING_CMD,
302    io_uring_op_IORING_OP_WAITID,
303    io_uring_op_IORING_OP_WRITE_FIXED,
304    io_uring_op_IORING_OP_WRITE,
305    io_uring_op_IORING_OP_WRITEV,
306    io_uring_params,
307    io_uring_probe_op,
308    io_uring_probe,
309    io_uring_recvmsg_out,
310    io_uring_restriction,
311    io_uring_rsrc_register,
312    io_uring_rsrc_update,
313    io_uring_rsrc_update2,
314    io_uring_sqe,
315    io_uring_sync_cancel_reg,
316    IORING_ACCEPT_MULTISHOT,
317    IORING_ASYNC_CANCEL_ALL,
318    IORING_ASYNC_CANCEL_ANY,
319    IORING_ASYNC_CANCEL_FD_FIXED,
320    IORING_ASYNC_CANCEL_FD,
321    IORING_ASYNC_CANCEL_OP,
322    IORING_ASYNC_CANCEL_USERDATA,
323    IORING_CQ_EVENTFD_DISABLED,
324    IORING_CQE_BUFFER_SHIFT,
325    IORING_CQE_F_BUFFER,
326    IORING_CQE_F_MORE,
327    IORING_CQE_F_NOTIF,
328    IORING_CQE_F_SOCK_NONEMPTY,
329    IORING_ENTER_EXT_ARG,
330    IORING_ENTER_GETEVENTS,
331    IORING_ENTER_REGISTERED_RING,
332    IORING_ENTER_SQ_WAIT,
333    IORING_ENTER_SQ_WAKEUP,
334    IORING_FEAT_CQE_SKIP,
335    IORING_FEAT_CUR_PERSONALITY,
336    IORING_FEAT_EXT_ARG,
337    IORING_FEAT_FAST_POLL,
338    IORING_FEAT_LINKED_FILE,
339    IORING_FEAT_NATIVE_WORKERS,
340    IORING_FEAT_NODROP,
341    IORING_FEAT_POLL_32BITS,
342    IORING_FEAT_REG_REG_RING,
343    IORING_FEAT_RSRC_TAGS,
344    IORING_FEAT_RW_CUR_POS,
345    IORING_FEAT_SINGLE_MMAP,
346    IORING_FEAT_SQPOLL_NONFIXED,
347    IORING_FEAT_SUBMIT_STABLE,
348    IORING_FILE_INDEX_ALLOC,
349    IORING_FIXED_FD_NO_CLOEXEC,
350    IORING_FSYNC_DATASYNC,
351    IORING_LINK_TIMEOUT_UPDATE,
352    IORING_MSG_DATA,
353    IORING_MSG_RING_CQE_SKIP,
354    IORING_MSG_RING_FLAGS_PASS,
355    IORING_MSG_SEND_FD,
356    IORING_NOTIF_USAGE_ZC_COPIED,
357    IORING_OFF_CQ_RING,
358    IORING_OFF_MMAP_MASK,
359    IORING_OFF_PBUF_RING,
360    IORING_OFF_PBUF_SHIFT,
361    IORING_OFF_SQ_RING,
362    IORING_OFF_SQES,
363    IORING_POLL_ADD_LEVEL,
364    IORING_POLL_ADD_MULTI,
365    IORING_POLL_UPDATE_EVENTS,
366    IORING_POLL_UPDATE_USER_DATA,
367    IORING_RECV_MULTISHOT,
368    IORING_RECVSEND_FIXED_BUF,
369    IORING_RECVSEND_POLL_FIRST,
370    IORING_REGISTER_BUFFERS_UPDATE,
371    IORING_REGISTER_BUFFERS,
372    IORING_REGISTER_BUFFERS2,
373    IORING_REGISTER_ENABLE_RINGS,
374    IORING_REGISTER_EVENTFD_ASYNC,
375    IORING_REGISTER_EVENTFD,
376    IORING_REGISTER_FILE_ALLOC_RANGE,
377    IORING_REGISTER_FILES_SKIP,
378    IORING_REGISTER_FILES_UPDATE,
379    IORING_REGISTER_FILES_UPDATE2,
380    IORING_REGISTER_FILES,
381    IORING_REGISTER_FILES2,
382    IORING_REGISTER_IOWQ_AFF,
383    IORING_REGISTER_IOWQ_MAX_WORKERS,
384    IORING_REGISTER_LAST,
385    IORING_REGISTER_NAPI,
386    IORING_REGISTER_PBUF_RING,
387    IORING_REGISTER_PBUF_STATUS,
388    IORING_REGISTER_PERSONALITY,
389    IORING_REGISTER_PROBE,
390    IORING_REGISTER_RESTRICTIONS,
391    IORING_REGISTER_RING_FDS,
392    IORING_REGISTER_SYNC_CANCEL,
393    IORING_REGISTER_USE_REGISTERED_RING,
394    IORING_RESTRICTION_LAST,
395    IORING_RESTRICTION_REGISTER_OP,
396    IORING_RESTRICTION_SQE_FLAGS_ALLOWED,
397    IORING_RESTRICTION_SQE_FLAGS_REQUIRED,
398    IORING_RESTRICTION_SQE_OP,
399    IORING_RSRC_REGISTER_SPARSE,
400    IORING_SEND_ZC_REPORT_USAGE,
401    IORING_SETUP_ATTACH_WQ,
402    IORING_SETUP_CLAMP,
403    IORING_SETUP_COOP_TASKRUN,
404    IORING_SETUP_CQE32,
405    IORING_SETUP_CQSIZE,
406    IORING_SETUP_DEFER_TASKRUN,
407    IORING_SETUP_IOPOLL,
408    IORING_SETUP_NO_MMAP,
409    IORING_SETUP_NO_SQARRAY,
410    IORING_SETUP_R_DISABLED,
411    IORING_SETUP_REGISTERED_FD_ONLY,
412    IORING_SETUP_SINGLE_ISSUER,
413    IORING_SETUP_SQ_AFF,
414    IORING_SETUP_SQE128,
415    IORING_SETUP_SQPOLL,
416    IORING_SETUP_SUBMIT_ALL,
417    IORING_SETUP_TASKRUN_FLAG,
418    IORING_SQ_CQ_OVERFLOW,
419    IORING_SQ_NEED_WAKEUP,
420    IORING_SQ_TASKRUN,
421    IORING_TIMEOUT_ABS,
422    IORING_TIMEOUT_BOOTTIME,
423    IORING_TIMEOUT_CLOCK_MASK,
424    IORING_TIMEOUT_ETIME_SUCCESS,
425    IORING_TIMEOUT_MULTISHOT,
426    IORING_TIMEOUT_REALTIME,
427    IORING_TIMEOUT_UPDATE_MASK,
428    IORING_TIMEOUT_UPDATE,
429    IORING_UNREGISTER_BUFFERS,
430    IORING_UNREGISTER_EVENTFD,
431    IORING_UNREGISTER_FILES,
432    IORING_UNREGISTER_IOWQ_AFF,
433    IORING_UNREGISTER_NAPI,
434    IORING_UNREGISTER_PBUF_RING,
435    IORING_UNREGISTER_PERSONALITY,
436    IORING_UNREGISTER_RING_FDS,
437    IORING_URING_CMD_FIXED,
438    IORING_URING_CMD_MASK,
439    IOSQE_ASYNC_BIT,
440    IOSQE_BUFFER_SELECT_BIT,
441    IOSQE_CQE_SKIP_SUCCESS_BIT,
442    IOSQE_FIXED_FILE_BIT,
443    IOSQE_IO_DRAIN_BIT,
444    IOSQE_IO_HARDLINK_BIT,
445    IOSQE_IO_LINK_BIT,
446    iovec,
447    linux_dirent64,
448    MADV_COLD,
449    MADV_COLLAPSE,
450    MADV_DODUMP,
451    MADV_DOFORK,
452    MADV_DONTDUMP,
453    MADV_DONTFORK,
454    MADV_DONTNEED_LOCKED,
455    MADV_DONTNEED,
456    MADV_FREE,
457    MADV_HUGEPAGE,
458    MADV_HWPOISON,
459    MADV_KEEPONFORK,
460    MADV_MERGEABLE,
461    MADV_NOHUGEPAGE,
462    MADV_NORMAL,
463    MADV_PAGEOUT,
464    MADV_POPULATE_READ,
465    MADV_POPULATE_WRITE,
466    MADV_RANDOM,
467    MADV_REMOVE,
468    MADV_SEQUENTIAL,
469    MADV_SOFT_OFFLINE,
470    MADV_UNMERGEABLE,
471    MADV_WILLNEED,
472    MADV_WIPEONFORK,
473    MAP_ANONYMOUS,
474    MAP_FIXED,
475    MAP_POPULATE,
476    MAP_PRIVATE,
477    MAP_SHARED,
478    MFD_ALLOW_SEALING,
479    MFD_CLOEXEC,
480    MINSIGSTKSZ,
481    MREMAP_FIXED,
482    MREMAP_MAYMOVE,
483    MS_BIND,
484    MS_NODEV,
485    MS_NOEXEC,
486    MS_NOSUID,
487    MS_PRIVATE,
488    MS_RDONLY,
489    MS_REC,
490    new_utsname,
491    O_CLOEXEC,
492    O_DIRECTORY,
493    O_NONBLOCK,
494    O_PATH,
495    O_RDONLY,
496    O_RDWR,
497    O_WRONLY,
498    P_ALL,
499    P_PGID,
500    P_PID,
501    P_PIDFD,
502    PROT_EXEC,
503    PROT_READ,
504    PROT_WRITE,
505    RLIMIT_DATA,
506    RLIMIT_FSIZE,
507    RLIMIT_LOCKS,
508    RLIMIT_MEMLOCK,
509    RLIMIT_MSGQUEUE,
510    RLIMIT_NOFILE,
511    RLIMIT_NPROC,
512    RLIMIT_STACK,
513    rlimit,
514    rusage,
515    SA_NODEFER,
516    SA_ONSTACK,
517    SA_RESTORER,
518    SA_SIGINFO,
519    SECCOMP_RET_ALLOW,
520    SECCOMP_RET_ERRNO,
521    SECCOMP_RET_KILL_THREAD,
522    SECCOMP_SET_MODE_FILTER,
523    SIG_BLOCK,
524    SIG_SETMASK,
525    SIG_UNBLOCK,
526    SIGABRT,
527    sigaction as kernel_sigaction,
528    SIGBUS,
529    SIGCHLD,
530    SIGCONT,
531    SIGFPE,
532    SIGHUP,
533    SIGILL,
534    siginfo_t,
535    SIGINT,
536    SIGIO,
537    SIGKILL,
538    SIGPIPE,
539    SIGSEGV,
540    sigset_t as kernel_sigset_t,
541    SIGSTOP,
542    SIGSYS,
543    SIGTERM,
544    SIGTRAP,
545    timespec,
546    UFFD_EVENT_FORK,
547    UFFD_EVENT_PAGEFAULT,
548    UFFD_EVENT_REMAP,
549    UFFD_EVENT_REMOVE,
550    UFFD_EVENT_UNMAP,
551    UFFD_FEATURE_EVENT_FORK,
552    UFFD_FEATURE_EVENT_REMAP,
553    UFFD_FEATURE_EVENT_REMOVE,
554    UFFD_FEATURE_EVENT_UNMAP,
555    UFFD_FEATURE_EXACT_ADDRESS,
556    UFFD_FEATURE_MINOR_HUGETLBFS,
557    UFFD_FEATURE_MINOR_SHMEM,
558    UFFD_FEATURE_MISSING_HUGETLBFS,
559    UFFD_FEATURE_MISSING_SHMEM,
560    UFFD_FEATURE_MOVE,
561    UFFD_FEATURE_PAGEFAULT_FLAG_WP,
562    UFFD_FEATURE_POISON,
563    UFFD_FEATURE_SIGBUS,
564    UFFD_FEATURE_THREAD_ID,
565    UFFD_FEATURE_WP_ASYNC,
566    UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
567    UFFD_FEATURE_WP_UNPOPULATED,
568    uffd_msg,
569    UFFD_PAGEFAULT_FLAG_MINOR,
570    UFFD_PAGEFAULT_FLAG_WP,
571    UFFD_PAGEFAULT_FLAG_WRITE,
572    UFFD_USER_MODE_ONLY,
573    uffdio_api,
574    uffdio_continue,
575    uffdio_copy,
576    uffdio_move,
577    uffdio_poison,
578    uffdio_range,
579    uffdio_register,
580    uffdio_writeprotect,
581    uffdio_zeropage,
582    WEXITED,
583    WNOHANG,
584};
585
586// For some reason bindgen just refuses to emit these.
587pub const UFFD_API: u64 = 0xaa;
588pub const UFFDIO_REGISTER_MODE_MISSING: u64 = 1 << 0;
589pub const UFFDIO_REGISTER_MODE_WP: u64 = 1 << 1;
590pub const UFFDIO_REGISTER_MODE_MINOR: u64 = 1 << 2;
591pub const UFFDIO_COPY_MODE_DONTWAKE: u64 = 1 << 0;
592pub const UFFDIO_COPY_MODE_WP: u64 = 1 << 1;
593pub const UFFDIO_ZEROPAGE_MODE_DONTWAKE: u64 = 1 << 0;
594pub const UFFDIO_WRITEPROTECT_MODE_WP: u64 = 1 << 0;
595pub const UFFDIO_WRITEPROTECT_MODE_DONTWAKE: u64 = 1 << 1;
596pub const UFFDIO_CONTINUE_MODE_DONTWAKE: u64 = 1 << 0;
597pub const UFFDIO_CONTINUE_MODE_WP: u64 = 1 << 1;
598
599macro_rules! ioc {
600    ($dir:expr, $type:expr, $nr:expr, $size:expr) => {
601        ($dir << $crate::arch_bindings::_IOC_DIRSHIFT)
602            | ($type << $crate::arch_bindings::_IOC_TYPESHIFT)
603            | ($nr << $crate::arch_bindings::_IOC_NRSHIFT)
604            | ($size << $crate::arch_bindings::_IOC_SIZESHIFT)
605    };
606}
607
608macro_rules! ior {
609    ($type:expr, $nr:expr, $size:ty) => {
610        ioc!(
611            $crate::arch_bindings::_IOC_READ,
612            $type,
613            $nr,
614            core::mem::size_of::<$size>() as $crate::c_uint
615        )
616    };
617}
618
619macro_rules! iowr {
620    ($type:expr, $nr:expr, $size:ty) => {
621        ioc!(
622            $crate::arch_bindings::_IOC_READ | $crate::arch_bindings::_IOC_WRITE,
623            $type,
624            $nr,
625            core::mem::size_of::<$size>() as $crate::c_uint
626        )
627    };
628}
629
630use crate::arch_bindings::UFFDIO;
631
632const UFFDIO_API: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_API, uffdio_api);
633const UFFDIO_REGISTER: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_REGISTER, uffdio_register);
634const UFFDIO_UNREGISTER: c_uint = ior!(UFFDIO, crate::arch_bindings::_UFFDIO_UNREGISTER, uffdio_range);
635const UFFDIO_WAKE: c_uint = ior!(UFFDIO, crate::arch_bindings::_UFFDIO_WAKE, uffdio_range);
636const UFFDIO_COPY: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_COPY, uffdio_copy);
637const UFFDIO_ZEROPAGE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_ZEROPAGE, uffdio_zeropage);
638const UFFDIO_MOVE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_MOVE, uffdio_move);
639const UFFDIO_WRITEPROTECT: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_WRITEPROTECT, uffdio_writeprotect);
640const UFFDIO_CONTINUE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_CONTINUE, uffdio_continue);
641const UFFDIO_POISON: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_POISON, uffdio_poison);
642
643macro_rules! ioctl_wrapper {
644    ($(
645        ($name:ident, $command:ident, $struct:ident),
646    )*) => {
647        $(
648            pub fn $name(fd: FdRef, arg: &mut $struct) -> Result<(), Error> {
649                sys_ioctl(fd, $command, arg as *mut _ as c_ulong)?;
650                Ok(())
651            }
652        )*
653    }
654}
655
656ioctl_wrapper! {
657    (sys_uffdio_api, UFFDIO_API, uffdio_api),
658    (sys_uffdio_register, UFFDIO_REGISTER, uffdio_register),
659    (sys_uffdio_unregister, UFFDIO_UNREGISTER, uffdio_range),
660    (sys_uffdio_wake, UFFDIO_WAKE, uffdio_range),
661    (sys_uffdio_copy, UFFDIO_COPY, uffdio_copy),
662    (sys_uffdio_zeropage, UFFDIO_ZEROPAGE, uffdio_zeropage),
663    (sys_uffdio_move, UFFDIO_MOVE, uffdio_move),
664    (sys_uffdio_writeprotect, UFFDIO_WRITEPROTECT, uffdio_writeprotect),
665    (sys_uffdio_continue, UFFDIO_CONTINUE, uffdio_continue),
666    (sys_uffdio_poison, UFFDIO_POISON, uffdio_poison),
667}
668
669macro_rules! unsafe_impl_zeroed_default {
670    ($(
671        $name:ident,
672    )*) => {
673        $(
674            impl Default for $name {
675                #[inline]
676                fn default() -> Self {
677                    unsafe { core::mem::zeroed() }
678                }
679            }
680        )*
681    }
682}
683
684unsafe_impl_zeroed_default! {
685    uffdio_api,
686    uffdio_register,
687    uffdio_range,
688    uffdio_copy,
689    uffdio_zeropage,
690    uffdio_move,
691    uffdio_writeprotect,
692    uffdio_continue,
693    uffdio_poison,
694    uffd_msg,
695    io_uring_params,
696    io_uring_sqe,
697}
698
699impl siginfo_t {
700    pub unsafe fn si_signo(&self) -> c_int {
701        self.__bindgen_anon_1.__bindgen_anon_1.si_signo
702    }
703
704    pub unsafe fn si_code(&self) -> c_int {
705        self.__bindgen_anon_1.__bindgen_anon_1.si_code
706    }
707
708    pub unsafe fn si_pid(&self) -> pid_t {
709        self.__bindgen_anon_1.__bindgen_anon_1._sifields._sigchld._pid
710    }
711
712    pub unsafe fn si_status(&self) -> c_int {
713        self.__bindgen_anon_1.__bindgen_anon_1._sifields._sigchld._status
714    }
715}
716
717#[allow(non_snake_case)]
718pub const fn WIFSIGNALED(status: c_int) -> bool {
719    ((status & 0x7f) + 1) as i8 >= 2
720}
721
722#[allow(non_snake_case)]
723pub const fn WTERMSIG(status: c_int) -> c_int {
724    status & 0x7f
725}
726
727#[allow(non_snake_case)]
728pub const fn WIFEXITED(status: c_int) -> bool {
729    (status & 0x7f) == 0
730}
731
732#[allow(non_snake_case)]
733pub const fn WEXITSTATUS(status: c_int) -> c_int {
734    (status >> 8) & 0xff
735}
736
737#[allow(non_camel_case_types)]
738pub type socklen_t = u32;
739
740// Source: linux/arch/x86/include/uapi/asm/signal.h
741#[derive(Debug)]
742#[repr(C)]
743pub struct stack_t {
744    pub ss_sp: *mut c_void,
745    pub ss_flags: c_int,
746    pub ss_size: usize,
747}
748
749// Source: linux/include/uapi/asm-generic/ucontext.h
750#[derive(Debug)]
751#[repr(C)]
752pub struct ucontext {
753    pub uc_flags: c_ulong,
754    pub uc_link: *mut ucontext,
755    pub uc_stack: stack_t,
756    pub uc_mcontext: sigcontext,
757    pub uc_sigmask: kernel_sigset_t,
758}
759
760// Source: linux/arch/x86/include/uapi/asm/sigcontext.h
761#[derive(Debug)]
762#[repr(C)]
763pub struct sigcontext {
764    pub r8: u64,
765    pub r9: u64,
766    pub r10: u64,
767    pub r11: u64,
768    pub r12: u64,
769    pub r13: u64,
770    pub r14: u64,
771    pub r15: u64,
772    pub rdi: u64,
773    pub rsi: u64,
774    pub rbp: u64,
775    pub rbx: u64,
776    pub rdx: u64,
777    pub rax: u64,
778    pub rcx: u64,
779    pub rsp: u64,
780    pub rip: u64,
781    pub eflags: u64,
782    pub cs: u16,
783    pub gs: u16,
784    pub fs: u16,
785    pub ss: u16,
786    pub err: u64,
787    pub trapno: u64,
788    pub oldmask: u64,
789    pub cr2: u64,
790    pub fpstate: *mut fpstate,
791    pub reserved: [u64; 8],
792}
793
794#[repr(C)]
795pub struct fpstate {
796    pub cwd: u16,
797    pub swd: u16,
798    pub twd: u16,
799    pub fop: u16,
800    pub rip: u64,
801    pub rdp: u64,
802    pub mxcsr: u32,
803    pub mxcsr_mask: u32,
804    pub st_space: [u32; 32],  /*  8x  FP registers, 16 bytes each */
805    pub xmm_space: [u32; 64], /* 16x XMM registers, 16 bytes each */
806    pub reserved_1: [u32; 12],
807    pub sw_reserved: fpx_sw_bytes,
808}
809
810#[repr(C)]
811pub struct fpx_sw_bytes {
812    pub magic1: u32,
813    pub extended_size: u32,
814    pub xfeatures: u64,
815    pub xstate_size: u32,
816    pub padding: [u32; 7],
817}
818
819#[repr(C)]
820pub struct msghdr {
821    pub msg_name: *mut c_void,
822    pub msg_namelen: socklen_t,
823    pub msg_iov: *mut iovec,
824    pub msg_iovlen: c_size_t,
825    pub msg_control: *mut c_void,
826    pub msg_controllen: c_size_t,
827    pub msg_flags: c_int,
828}
829
830#[repr(C)]
831pub struct cmsghdr {
832    pub cmsg_len: c_size_t,
833    pub cmsg_level: c_int,
834    pub cmsg_type: c_int,
835}
836
837#[repr(C)]
838struct sock_fprog {
839    pub length: c_ushort,
840    pub filter: *const sock_filter,
841}
842
843#[derive(Copy, Clone, PartialEq, Eq, Debug)]
844#[repr(C)]
845pub struct sock_filter {
846    pub code: u16,
847    pub jt: u8,
848    pub jf: u8,
849    pub k: u32,
850}
851
852// BPF instruction classes (3 bits, mask: 0b00111)
853pub const BPF_LD: u16 = 0b000;
854pub const BPF_LDX: u16 = 0b001;
855pub const BPF_ST: u16 = 0b010;
856pub const BPF_STX: u16 = 0b011;
857pub const BPF_ALU: u16 = 0b100;
858pub const BPF_JMP: u16 = 0b101;
859pub const BPF_RET: u16 = 0b110;
860pub const BPF_MISC: u16 = 0b111;
861
862// BPF LD/LDX/ST/STX width (2 bits, mask: 0b11000)
863pub const BPF_W: u16 = 0b00000; // 32-bit
864pub const BPF_H: u16 = 0b01000; // 16-bit
865pub const BPF_B: u16 = 0b10000; // 8-bit
866
867// BPF LD/LDX/ST/STX addressing mode (3 bits, mask: 0b11100000)
868pub const BPF_IMM: u16 = 0b00000000;
869pub const BPF_ABS: u16 = 0b00100000;
870pub const BPF_IND: u16 = 0b01000000;
871pub const BPF_MEM: u16 = 0b01100000;
872pub const BPF_LEN: u16 = 0b10000000;
873pub const BPF_MSH: u16 = 0b10100000;
874
875// BPF ALU operations (4 bits, mask: 0b11110000)
876pub const BPF_ADD: u16 = 0b00000000;
877pub const BPF_SUB: u16 = 0b00010000;
878pub const BPF_MUL: u16 = 0b00100000;
879pub const BPF_DIV: u16 = 0b00110000;
880pub const BPF_OR: u16 = 0b01000000;
881pub const BPF_AND: u16 = 0b01010000;
882pub const BPF_LSH: u16 = 0b01100000;
883pub const BPF_RSH: u16 = 0b01110000;
884pub const BPF_NEG: u16 = 0b10000000;
885pub const BPF_MOD: u16 = 0b10010000;
886pub const BPF_XOR: u16 = 0b10100000;
887
888// BPF JMP operations (4 bits, mask: 0b11110000)
889pub const BPF_JA: u16 = 0b00000000;
890pub const BPF_JEQ: u16 = 0b00010000;
891pub const BPF_JGT: u16 = 0b00100000;
892pub const BPF_JGE: u16 = 0b00110000;
893pub const BPF_JSET: u16 = 0b01000000;
894
895// BPF ALU/JMP source (1 bit, mask: 0b1000)
896pub const BPF_K: u16 = 0b0000;
897pub const BPF_X: u16 = 0b1000;
898
899pub const SECBIT_NOROOT: u32 = 1;
900pub const SECBIT_NOROOT_LOCKED: u32 = 2;
901pub const SECBIT_NO_SETUID_FIXUP: u32 = 4;
902pub const SECBIT_NO_SETUID_FIXUP_LOCKED: u32 = 8;
903pub const SECBIT_KEEP_CAPS: u32 = 16;
904pub const SECBIT_KEEP_CAPS_LOCKED: u32 = 32;
905pub const SECBIT_NO_CAP_AMBIENT_RAISE: u32 = 64;
906pub const SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED: u32 = 128;
907
908#[macro_export]
909macro_rules! bpf {
910    (@const_one $tok:tt) => {
911        1
912    };
913
914    (@get_label_or_zero ([$label:expr]: $($tok:tt)+)) => {
915        $label
916    };
917
918    (@get_label_or_zero ($($tok:tt)+)) => {
919        0
920    };
921
922    (@count_instructions
923        $(
924            ($($tok:tt)+)
925        ),+
926    ) => {{
927        let mut count = 0;
928        $(
929            count += $crate::bpf!(@const_one ($($tok)+));
930        )+
931
932        count
933    }};
934
935    (@max_label_plus_one
936        $(
937            ($($tok:tt)+)
938        ),+
939    ) => {{
940        let mut max = -1;
941        $(
942            let label = $crate::bpf!(@get_label_or_zero ($($tok)+));
943            if label > max {
944                max = label;
945            }
946        )+
947
948        if max < 0 {
949            0
950        } else {
951            (max + 1) as usize
952        }
953    }};
954
955    (@fill_label $labels:expr, $nth_instruction:expr, [$label:expr]: $($tok:tt)+) => {
956        $labels[$label] = $nth_instruction;
957    };
958
959    (@fill_label $labels:expr, $nth_instruction:expr, $($tok:tt)+) => {};
960
961    (@fill_labels
962        $labels:expr,
963        $(
964            ($($tok:tt)+)
965        ),+
966    ) => {{
967        let mut nth_instruction = 0;
968        $(
969            $crate::bpf!(@fill_label $labels, nth_instruction, $($tok)+);
970            #[allow(unused_assignments)]
971            {
972                nth_instruction += 1;
973            }
974        )+
975    }};
976
977    (@target $labels:expr, $nth_instruction:expr, $target:expr) => {{
978        let target = ($labels[$target] as i32 - $nth_instruction as i32 - 1);
979        if target < 0 || target > 255 {
980            panic!("invalid jump");
981        }
982
983        target as u8
984    }};
985
986    (@into_u32 $value:expr) => {{
987        let value = $value;
988        if value as i128 > core::u32::MAX as i128 || (value as i128) < core::i32::MIN as i128 {
989            panic!("out of range value");
990        }
991        value as u32
992    }};
993
994    (@op $labels:expr, $nth_instruction:expr, [$label:expr]: $($tok:tt)+) => { $crate::bpf!(@op $labels, $nth_instruction, $($tok)+) };
995
996    (@op $labels:expr, $nth_instruction:expr, a = *abs[$addr:expr]) => { $crate::sock_filter { code: $crate::BPF_LD | $crate::BPF_W | $crate::BPF_ABS, jt: 0, jf: 0, k: $addr } };
997    (@op $labels:expr, $nth_instruction:expr, a &= $value:expr) => { $crate::sock_filter { code: $crate::BPF_ALU | $crate::BPF_AND | $crate::BPF_K, jt: 0, jf: 0, k: $value } };
998    (@op $labels:expr, $nth_instruction:expr, if a == $value:expr => jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JEQ | $crate::BPF_K, jt: $crate::bpf!(@target $labels, $nth_instruction, $target), jf: 0, k: $crate::bpf!(@into_u32 $value) } };
999    (@op $labels:expr, $nth_instruction:expr, if a != $value:expr => jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JEQ | $crate::BPF_K, jt: 0, jf: $crate::bpf!(@target $labels, $nth_instruction, $target), k: $crate::bpf!(@into_u32 $value) } };
1000    (@op $labels:expr, $nth_instruction:expr, jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JA, jt: 0, jf: 0, k: $crate::bpf!(@target $labels, $nth_instruction, $target) as u32 } };
1001    (@op $labels:expr, $nth_instruction:expr, return $value:expr) => { $crate::sock_filter { code: $crate::BPF_RET | $crate::BPF_K, jt: 0, jf: 0, k: $value } };
1002    (@op $labels:expr, $nth_instruction:expr, seccomp_allow) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_ALLOW) };
1003    (@op $labels:expr, $nth_instruction:expr, seccomp_kill_thread) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_KILL_THREAD) };
1004    (@op $labels:expr, $nth_instruction:expr, seccomp_return_error($errno:expr)) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_ERRNO | { let errno: u16 = $errno; errno as u32 }) };
1005    (@op $labels:expr, $nth_instruction:expr, seccomp_return_eperm) => { $crate::bpf!(@op $labels, $nth_instruction, seccomp_return_error($crate::EPERM as u16)) };
1006    (@op $labels:expr, $nth_instruction:expr, a = syscall_nr) => { $crate::bpf!(@op $labels, $nth_instruction, a = *abs[0]) };
1007    (@op $labels:expr, $nth_instruction:expr, a = syscall_arg[$nth_arg:expr]) => { $crate::bpf!(@op $labels, $nth_instruction, a = *abs[16 + $nth_arg * 8]) };
1008
1009    (
1010        $(
1011            ($($tok:tt)+),
1012        )+
1013    ) => {{
1014        let mut filter = [
1015            $crate::sock_filter { code: 0, jt: 0, jf: 0, k: 0 };
1016            { $crate::bpf!(@count_instructions $(($($tok)+)),+) }
1017        ];
1018
1019        let mut labels = [
1020            0;
1021            { $crate::bpf!(@max_label_plus_one $(($($tok)+)),+) }
1022        ];
1023
1024        $crate::bpf!(@fill_labels labels, $(($($tok)+)),+);
1025
1026        {
1027            let mut nth_instruction = 0;
1028
1029            $(
1030                #[allow(clippy::indexing_slicing)]
1031                {
1032                    filter[nth_instruction] = $crate::bpf!(@op labels, nth_instruction, $($tok)+);
1033                }
1034                nth_instruction += 1;
1035            )+
1036
1037            let _ = nth_instruction;
1038        }
1039
1040        filter
1041    }};
1042}
1043
1044#[test]
1045fn test_bpf_jump() {
1046    assert_eq!(
1047        bpf! {
1048            (if a == 1234 => jump @0),
1049            (return 10),
1050            ([0]: return 20),
1051        },
1052        [
1053            sock_filter {
1054                code: BPF_JMP | BPF_JEQ | BPF_K,
1055                jt: 1,
1056                jf: 0,
1057                k: 1234
1058            },
1059            sock_filter {
1060                code: BPF_RET,
1061                jt: 0,
1062                jf: 0,
1063                k: 10
1064            },
1065            sock_filter {
1066                code: BPF_RET,
1067                jt: 0,
1068                jf: 0,
1069                k: 20
1070            },
1071        ]
1072    );
1073
1074    assert_eq!(
1075        bpf! {
1076            (if a == 20 => jump @2),
1077            (if a == 10 => jump @2),
1078            ([0]: return 0),
1079            ([1]: return 1),
1080            ([2]: return 2),
1081        },
1082        [
1083            sock_filter {
1084                code: BPF_JMP | BPF_JEQ | BPF_K,
1085                jt: 3,
1086                jf: 0,
1087                k: 20
1088            },
1089            sock_filter {
1090                code: BPF_JMP | BPF_JEQ | BPF_K,
1091                jt: 2,
1092                jf: 0,
1093                k: 10
1094            },
1095            sock_filter {
1096                code: BPF_RET,
1097                jt: 0,
1098                jf: 0,
1099                k: 0
1100            },
1101            sock_filter {
1102                code: BPF_RET,
1103                jt: 0,
1104                jf: 0,
1105                k: 1
1106            },
1107            sock_filter {
1108                code: BPF_RET,
1109                jt: 0,
1110                jf: 0,
1111                k: 2
1112            },
1113        ]
1114    );
1115}
1116
1117pub const STDIN_FILENO: c_int = 0;
1118pub const STDOUT_FILENO: c_int = 1;
1119pub const STDERR_FILENO: c_int = 2;
1120
1121pub const AF_UNIX: u32 = 1;
1122pub const SOCK_STREAM: u32 = 1;
1123pub const SOCK_SEQPACKET: u32 = 5;
1124pub const SOCK_CLOEXEC: u32 = 0x80000;
1125pub const SOL_SOCKET: c_int = 1;
1126pub const SCM_RIGHTS: c_int = 1;
1127pub const MSG_NOSIGNAL: u32 = 0x4000;
1128
1129pub const SEEK_SET: u32 = 0;
1130pub const SEEK_CUR: u32 = 1;
1131pub const SEEK_END: u32 = 2;
1132
1133pub const O_ASYNC: u32 = 0x2000;
1134
1135#[allow(non_snake_case)]
1136const fn CMSG_ALIGN(len: usize) -> usize {
1137    (len + core::mem::size_of::<usize>() - 1) & !(core::mem::size_of::<usize>() - 1)
1138}
1139
1140#[allow(non_snake_case)]
1141pub unsafe fn CMSG_FIRSTHDR(mhdr: *const msghdr) -> *mut cmsghdr {
1142    if (*mhdr).msg_controllen >= core::mem::size_of::<cmsghdr>() {
1143        (*mhdr).msg_control.cast::<cmsghdr>()
1144    } else {
1145        core::ptr::null_mut()
1146    }
1147}
1148
1149#[allow(non_snake_case)]
1150pub unsafe fn CMSG_DATA(cmsg: *mut cmsghdr) -> *mut c_uchar {
1151    cmsg.add(1).cast::<c_uchar>()
1152}
1153
1154#[allow(non_snake_case)]
1155pub const fn CMSG_SPACE(length: usize) -> usize {
1156    CMSG_ALIGN(length) + CMSG_ALIGN(core::mem::size_of::<cmsghdr>())
1157}
1158
1159#[allow(non_snake_case)]
1160pub const fn CMSG_LEN(length: usize) -> usize {
1161    CMSG_ALIGN(core::mem::size_of::<cmsghdr>()) + length
1162}
1163
1164// The following was copied from the `cstr_core` crate.
1165//
1166// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
1167#[inline]
1168#[doc(hidden)]
1169#[allow(clippy::indexing_slicing)]
1170pub const fn cstr_is_valid(bytes: &[u8]) -> bool {
1171    if bytes.is_empty() || bytes[bytes.len() - 1] != 0 {
1172        return false;
1173    }
1174
1175    let mut index = 0;
1176    while index < bytes.len() - 1 {
1177        if bytes[index] == 0 {
1178            return false;
1179        }
1180        index += 1;
1181    }
1182    true
1183}
1184
1185#[macro_export]
1186macro_rules! cstr {
1187    ($e:expr) => {{
1188        const STR: &[u8] = concat!($e, "\0").as_bytes();
1189        const STR_VALID: bool = $crate::cstr_is_valid(STR);
1190        let _ = [(); 0 - (!(STR_VALID) as usize)];
1191        #[allow(unused_unsafe)]
1192        unsafe {
1193            core::ffi::CStr::from_bytes_with_nul_unchecked(STR)
1194        }
1195    }}
1196}
1197
1198#[derive(Clone)]
1199pub struct Error {
1200    #[cfg(not(feature = "std"))]
1201    message: &'static str,
1202    #[cfg(feature = "std")]
1203    message: Cow<'static, str>,
1204    errno: c_int,
1205}
1206
1207impl core::fmt::Debug for Error {
1208    #[cold]
1209    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
1210        core::fmt::Display::fmt(self, fmt)
1211    }
1212}
1213
1214impl core::fmt::Display for Error {
1215    #[cold]
1216    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
1217        let mut is_err = false;
1218        self.fmt_to_string(|chunk| {
1219            if fmt.write_str(chunk).is_err() {
1220                is_err = true;
1221            }
1222        });
1223
1224        if is_err {
1225            Err(core::fmt::Error)
1226        } else {
1227            Ok(())
1228        }
1229    }
1230}
1231
1232#[cfg(feature = "std")]
1233impl std::error::Error for Error {}
1234
1235#[cfg(feature = "std")]
1236impl From<std::string::String> for Error {
1237    fn from(message: std::string::String) -> Self {
1238        Error {
1239            message: message.into(),
1240            errno: 0,
1241        }
1242    }
1243}
1244
1245impl From<&'static str> for Error {
1246    fn from(message: &'static str) -> Self {
1247        Error::from_str(message)
1248    }
1249}
1250
1251fn write_number(value: u32, write_str: &mut dyn FnMut(&str)) {
1252    let n = if value >= 10 {
1253        write_number(value / 10, write_str);
1254        value % 10
1255    } else {
1256        value
1257    };
1258
1259    let s = [n as u8 + b'0'];
1260    let s = unsafe { core::str::from_utf8_unchecked(&s) };
1261    write_str(s);
1262}
1263
1264impl Error {
1265    pub fn fmt_to_string(&self, mut write_str: impl FnMut(&str)) {
1266        self.fmt_to_string_impl(&mut write_str);
1267    }
1268
1269    // Avoid pulling in core::fmt machinery to keep the code size low.
1270    #[cold]
1271    fn fmt_to_string_impl(&self, write_str: &mut dyn FnMut(&str)) {
1272        write_str(&self.message);
1273
1274        if self.errno == 0 {
1275            return;
1276        }
1277
1278        write_str(" (errno = ");
1279        write_number(self.errno as u32, write_str);
1280
1281        let errno = match self.errno as u32 {
1282            EPERM => Some("EPERM"),
1283            ENOENT => Some("ENOENT"),
1284            ESRCH => Some("ESRCH"),
1285            EINTR => Some("EINTR"),
1286            EIO => Some("EIO"),
1287            ENXIO => Some("ENXIO"),
1288            E2BIG => Some("E2BIG"),
1289            ENOEXEC => Some("ENOEXEC"),
1290            EBADF => Some("EBADF"),
1291            ECHILD => Some("ECHILD"),
1292            EAGAIN => Some("EAGAIN"),
1293            ENOMEM => Some("ENOMEM"),
1294            EACCES => Some("EACCES"),
1295            EFAULT => Some("EFAULT"),
1296            ENOTBLK => Some("ENOTBLK"),
1297            EBUSY => Some("EBUSY"),
1298            EEXIST => Some("EEXIST"),
1299            EXDEV => Some("EXDEV"),
1300            ENODEV => Some("ENODEV"),
1301            ENOTDIR => Some("ENOTDIR"),
1302            EISDIR => Some("EISDIR"),
1303            EINVAL => Some("EINVAL"),
1304            ENFILE => Some("ENFILE"),
1305            EMFILE => Some("EMFILE"),
1306            ENOTTY => Some("ENOTTY"),
1307            ETXTBSY => Some("ETXTBSY"),
1308            EFBIG => Some("EFBIG"),
1309            ENOSPC => Some("ENOSPC"),
1310            ESPIPE => Some("ESPIPE"),
1311            EROFS => Some("EROFS"),
1312            EMLINK => Some("EMLINK"),
1313            EPIPE => Some("EPIPE"),
1314            EDOM => Some("EDOM"),
1315            ERANGE => Some("ERANGE"),
1316            EOPNOTSUPP => Some("EOPNOTSUPP"),
1317            ETOOMANYREFS => Some("ETOOMANYREFS"),
1318            ERESTARTSYS => Some("ERESTARTSYS"),
1319            _ => None,
1320        };
1321
1322        if let Some(errno) = errno {
1323            write_str(" (");
1324            write_str(errno);
1325            write_str(")");
1326        }
1327
1328        write_str(")");
1329    }
1330
1331    #[cfg(feature = "std")]
1332    #[cold]
1333    pub fn from_os_error(message: &'static str, error: std::io::Error) -> Self {
1334        Self {
1335            message: message.into(),
1336            errno: error.raw_os_error().unwrap_or(0),
1337        }
1338    }
1339
1340    #[cfg(feature = "std")]
1341    #[cold]
1342    pub fn from_last_os_error(message: &'static str) -> Self {
1343        Self {
1344            message: message.into(),
1345            errno: std::io::Error::last_os_error().raw_os_error().unwrap_or(0),
1346        }
1347    }
1348
1349    #[cold]
1350    pub const fn from_errno(message: &'static str, errno: i32) -> Self {
1351        Self {
1352            #[cfg(not(feature = "std"))]
1353            message,
1354            #[cfg(feature = "std")]
1355            message: Cow::Borrowed(message),
1356
1357            errno,
1358        }
1359    }
1360
1361    #[cold]
1362    pub const fn from_str(message: &'static str) -> Self {
1363        Self {
1364            #[cfg(not(feature = "std"))]
1365            message,
1366            #[cfg(feature = "std")]
1367            message: Cow::Borrowed(message),
1368
1369            errno: 0,
1370        }
1371    }
1372
1373    #[inline]
1374    pub fn from_syscall(message: &'static str, result: i64) -> Result<(), Self> {
1375        if result >= -4095 && result < 0 {
1376            Err(Self::from_syscall_unchecked(message, result))
1377        } else {
1378            Ok(())
1379        }
1380    }
1381
1382    #[cold]
1383    #[inline]
1384    const fn from_syscall_unchecked(message: &'static str, result: i64) -> Self {
1385        Self {
1386            #[cfg(not(feature = "std"))]
1387            message,
1388            #[cfg(feature = "std")]
1389            message: Cow::Borrowed(message),
1390
1391            errno: -result as i32,
1392        }
1393    }
1394
1395    #[inline]
1396    pub fn errno(&self) -> u32 {
1397        self.errno as u32
1398    }
1399}
1400
1401#[cfg(target_arch = "x86_64")]
1402#[inline(never)]
1403#[cold]
1404pub fn abort() -> ! {
1405    // In practice `core::hint::unreachable_unchecked` emits this,
1406    // but technically calling it is undefined behavior which could
1407    // affect unrelated code, so let's just call it through `asm!`.
1408
1409    unsafe {
1410        core::arch::asm!("ud2", options(noreturn, nostack));
1411    }
1412}
1413
1414/// An owned file descriptor. Will be automatically closed on drop.
1415#[repr(transparent)]
1416#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
1417pub struct Fd(c_int);
1418
1419/// An unowned file descriptor.
1420#[repr(transparent)]
1421#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
1422pub struct FdRef<'a>(c_int, PhantomData<&'a Fd>);
1423
1424impl Fd {
1425    pub fn raw(&self) -> c_int {
1426        self.0
1427    }
1428
1429    #[inline]
1430    pub const fn from_raw_unchecked(fd: c_int) -> Self {
1431        Self(fd)
1432    }
1433
1434    pub fn borrow(&self) -> FdRef {
1435        FdRef(self.0, PhantomData)
1436    }
1437
1438    pub fn close(mut self) -> Result<(), Error> {
1439        self.close_inplace()?;
1440        Ok(())
1441    }
1442
1443    pub fn leak(mut self) -> c_int {
1444        core::mem::replace(&mut self.0, -1)
1445    }
1446
1447    fn close_inplace(&mut self) -> Result<(), Error> {
1448        if self.raw() < 0 {
1449            return Ok(());
1450        }
1451
1452        let fd = core::mem::replace(&mut self.0, -1);
1453        let result = unsafe { syscall_readonly!(SYS_close, fd) };
1454        Error::from_syscall("close", result)
1455    }
1456}
1457
1458impl Drop for Fd {
1459    fn drop(&mut self) {
1460        let _ = self.close_inplace();
1461    }
1462}
1463
1464impl<'a> FdRef<'a> {
1465    pub fn raw(&self) -> c_int {
1466        self.0
1467    }
1468
1469    #[inline]
1470    pub const fn from_raw_unchecked(fd: c_int) -> Self {
1471        Self(fd, PhantomData)
1472    }
1473}
1474
1475impl<'a> From<&'a Fd> for FdRef<'a> {
1476    fn from(fd: &'a Fd) -> Self {
1477        FdRef(fd.0, PhantomData)
1478    }
1479}
1480
1481impl<'a> From<&'a mut Fd> for FdRef<'a> {
1482    fn from(fd: &'a mut Fd) -> Self {
1483        FdRef(fd.0, PhantomData)
1484    }
1485}
1486
1487impl core::fmt::Write for Fd {
1488    fn write_str(&mut self, string: &str) -> core::fmt::Result {
1489        FdRef::from(self).write_str(string)
1490    }
1491}
1492
1493impl<'a> core::fmt::Write for FdRef<'a> {
1494    fn write_str(&mut self, string: &str) -> core::fmt::Result {
1495        let mut bytes = string.as_bytes();
1496        while !bytes.is_empty() {
1497            let count = sys_write(*self, bytes).map_err(|_| core::fmt::Error)?;
1498            if count == 0 {
1499                return Err(core::fmt::Error);
1500            }
1501            bytes = bytes.get(count..).ok_or(core::fmt::Error)?;
1502        }
1503
1504        Ok(())
1505    }
1506}
1507
1508pub fn sys_uname() -> Result<new_utsname, Error> {
1509    let mut out: new_utsname = unsafe { core::mem::zeroed() };
1510    let result = unsafe { syscall!(SYS_uname, core::ptr::addr_of_mut!(out)) };
1511    Error::from_syscall("uname", result)?;
1512    Ok(out)
1513}
1514
1515pub fn sys_io_uring_setup(entries: u32, params: &mut io_uring_params) -> Result<Fd, Error> {
1516    let fd = unsafe { syscall!(SYS_io_uring_setup, entries, params as *mut io_uring_params) };
1517    Error::from_syscall("io_uring_setup", fd)?;
1518    Ok(Fd::from_raw_unchecked(fd as c_int))
1519}
1520
1521pub fn sys_io_uring_register(fd: FdRef, opcode: u32, arg: *const c_void, arg_count: u32) -> Result<(), Error> {
1522    let result = unsafe { syscall!(SYS_io_uring_register, fd, opcode, arg, arg_count) };
1523    Error::from_syscall("io_uring_register", result)?;
1524    Ok(())
1525}
1526
1527pub unsafe fn sys_io_uring_enter(
1528    fd: FdRef,
1529    to_submit: u32,
1530    min_complete: u32,
1531    flags: u32,
1532    arg: *const c_void,
1533    argsz: usize,
1534) -> Result<u32, Error> {
1535    let result = unsafe { syscall!(SYS_io_uring_enter, fd, to_submit, min_complete, flags, arg, argsz) };
1536    Error::from_syscall("io_uring_enter", result)?;
1537    Ok(result as u32)
1538}
1539
1540pub fn sys_ioctl(fd: FdRef, cmd: c_uint, arg: c_ulong) -> Result<c_int, Error> {
1541    let result = unsafe { syscall!(SYS_ioctl, fd, cmd, arg) };
1542    Error::from_syscall("ioctl", result)?;
1543    Ok(result as c_int)
1544}
1545
1546pub fn sys_userfaultfd(flags: c_uint) -> Result<Fd, Error> {
1547    let fd = unsafe { syscall_readonly!(SYS_userfaultfd, flags) };
1548    Error::from_syscall("userfaultfd", fd)?;
1549    Ok(Fd::from_raw_unchecked(fd as c_int))
1550}
1551
1552fn sys_getdents64(fd: FdRef, buffer: &mut [u8]) -> Result<Option<usize>, Error> {
1553    let length = buffer.len();
1554    let bytes_read = unsafe { syscall!(SYS_getdents64, fd.raw(), buffer, length) };
1555    Error::from_syscall("getdents64", bytes_read)?;
1556
1557    if bytes_read == 0 {
1558        Ok(None)
1559    } else {
1560        Ok(Some(bytes_read as usize))
1561    }
1562}
1563
1564pub unsafe fn sys_arch_prctl_set_gs(value: usize) -> Result<(), Error> {
1565    let result = syscall_readonly!(SYS_arch_prctl, ARCH_SET_GS, value);
1566    Error::from_syscall("arch_prctl(ARCH_SET_GS)", result)?;
1567    Ok(())
1568}
1569
1570pub fn sys_sched_yield() -> Result<(), Error> {
1571    // On Linux this always succeeds, although technically it could fail
1572    // due to a seccomp sandbox, so let's return an error anyway.
1573    let result = unsafe { syscall_readonly!(SYS_sched_yield) };
1574    Error::from_syscall("sched_yield", result)?;
1575    Ok(())
1576}
1577
1578pub fn sys_socketpair(domain: u32, kind: u32, protocol: u32) -> Result<(Fd, Fd), Error> {
1579    let mut output: [c_int; 2] = [-1, -1];
1580    let fd = unsafe { syscall_readonly!(SYS_socketpair, domain, kind, protocol, &mut output[..]) };
1581    Error::from_syscall("socketpair", fd)?;
1582    Ok((Fd(output[0] as c_int), Fd(output[1] as c_int)))
1583}
1584
1585pub fn sys_pipe2(flags: c_uint) -> Result<(Fd, Fd), Error> {
1586    let mut pipes: [c_int; 2] = [-1, -1];
1587    let result = unsafe { syscall_readonly!(SYS_pipe2, pipes.as_mut_ptr(), flags) };
1588    Error::from_syscall("pipe2", result)?;
1589    Ok((Fd::from_raw_unchecked(pipes[0]), Fd::from_raw_unchecked(pipes[1])))
1590}
1591
1592pub fn sys_open(path: &CStr, flags: c_uint) -> Result<Fd, Error> {
1593    let fd = unsafe { syscall_readonly!(SYS_open, path.as_ptr(), flags, 0) };
1594    Error::from_syscall("open", fd)?;
1595    Ok(Fd(fd as c_int))
1596}
1597
1598pub fn sys_openat(dir: FdRef, path: &CStr, flags: c_uint) -> Result<Fd, Error> {
1599    let fd = unsafe { syscall_readonly!(SYS_openat, dir, path.as_ptr(), flags, 0) };
1600    Error::from_syscall("openat", fd)?;
1601    Ok(Fd(fd as c_int))
1602}
1603
1604pub fn sys_memfd_create(name: &CStr, flags: c_uint) -> Result<Fd, Error> {
1605    let fd = unsafe { syscall_readonly!(SYS_memfd_create, name.as_ptr(), flags) };
1606    Error::from_syscall("memfd_create", fd)?;
1607    Ok(Fd(fd as c_int))
1608}
1609
1610pub fn sys_fcntl(fd: FdRef, cmd: u32, arg: u32) -> Result<i32, Error> {
1611    let result = unsafe { syscall_readonly!(SYS_fcntl, fd, cmd, arg) };
1612    Error::from_syscall("fcntl", result)?;
1613    Ok(result as i32)
1614}
1615
1616pub fn sys_fcntl_dupfd(fd: FdRef, min: c_int) -> Result<Fd, Error> {
1617    let fd = sys_fcntl(fd, F_DUPFD, min as u32)?;
1618    Ok(Fd::from_raw_unchecked(fd))
1619}
1620
1621pub fn sys_close_range(first_fd: c_int, last_fd: c_int, flags: c_uint) -> Result<(), Error> {
1622    let result = unsafe { syscall_readonly!(SYS_close_range, first_fd, last_fd, flags) };
1623    Error::from_syscall("close_range", result)
1624}
1625
1626pub fn sys_fallocate(fd: FdRef, mode: c_uint, offset: u64, length: u64) -> Result<(), Error> {
1627    let result = unsafe { syscall!(SYS_fallocate, fd, mode, offset, length) };
1628    Error::from_syscall("fallocate", result)
1629}
1630
1631pub fn sys_ftruncate(fd: FdRef, length: c_ulong) -> Result<(), Error> {
1632    let result = unsafe { syscall!(SYS_ftruncate, fd, length) };
1633    Error::from_syscall("ftruncate", result)
1634}
1635
1636pub fn sys_chdir(path: &CStr) -> Result<(), Error> {
1637    let result = unsafe { syscall_readonly!(SYS_chdir, path.as_ptr()) };
1638    Error::from_syscall("chdir", result)
1639}
1640
1641pub fn sys_fchdir(fd: FdRef) -> Result<(), Error> {
1642    let result = unsafe { syscall_readonly!(SYS_fchdir, fd) };
1643    Error::from_syscall("fchdir", result)
1644}
1645
1646pub unsafe fn sys_mmap(
1647    address: *mut c_void,
1648    length: c_size_t,
1649    protection: c_uint,
1650    flags: c_uint,
1651    fd: Option<FdRef>,
1652    offset: c_ulong,
1653) -> Result<*mut c_void, Error> {
1654    let result = syscall!(SYS_mmap, address, length, protection, flags, fd, offset);
1655    Error::from_syscall("mmap", result)?;
1656    Ok(result as *mut c_void)
1657}
1658
1659pub unsafe fn sys_munmap(address: *mut c_void, length: c_size_t) -> Result<(), Error> {
1660    let result = syscall!(SYS_munmap, address, length);
1661    Error::from_syscall("munmap", result)
1662}
1663
1664pub unsafe fn sys_mremap(
1665    address: *mut c_void,
1666    old_length: c_size_t,
1667    new_length: c_size_t,
1668    flags: c_uint,
1669    new_address: *mut c_void,
1670) -> Result<*mut c_void, Error> {
1671    let result = syscall!(SYS_mremap, address, old_length, new_length, flags, new_address);
1672    Error::from_syscall("mremap", result)?;
1673    Ok(result as *mut c_void)
1674}
1675
1676pub unsafe fn sys_mprotect(address: *mut c_void, length: c_size_t, protection: c_uint) -> Result<(), Error> {
1677    let result = syscall!(SYS_mprotect, address, length, protection);
1678    Error::from_syscall("mprotect", result)
1679}
1680
1681pub unsafe fn sys_madvise(address: *mut c_void, length: c_size_t, advice: c_uint) -> Result<(), Error> {
1682    let result = syscall!(SYS_madvise, address, length, advice);
1683    Error::from_syscall("madvise", result)
1684}
1685
1686pub fn sys_getpid() -> Result<pid_t, Error> {
1687    let result = unsafe { syscall_readonly!(SYS_getpid) };
1688    Error::from_syscall("getpid", result)?;
1689    Ok(result as pid_t)
1690}
1691
1692pub fn sys_getuid() -> Result<uid_t, Error> {
1693    let result = unsafe { syscall_readonly!(SYS_getuid) };
1694    Error::from_syscall("getuid", result)?;
1695    Ok(result as u32)
1696}
1697
1698pub fn sys_getgid() -> Result<gid_t, Error> {
1699    let result = unsafe { syscall_readonly!(SYS_getgid) };
1700    Error::from_syscall("getgid", result)?;
1701    Ok(result as u32)
1702}
1703
1704pub fn sys_kill(pid: pid_t, signal: c_uint) -> Result<(), Error> {
1705    let result = unsafe { syscall_readonly!(SYS_kill, pid, signal) };
1706    Error::from_syscall("kill", result)?;
1707    Ok(())
1708}
1709
1710pub unsafe fn sys_read_raw(fd: FdRef, buffer: *mut u8, length: usize) -> Result<c_size_t, Error> {
1711    let result = unsafe { syscall!(SYS_read, fd.raw(), buffer, length) };
1712    Error::from_syscall("read", result)?;
1713    Ok(result as c_size_t)
1714}
1715
1716pub fn sys_read(fd: FdRef, buffer: &mut [u8]) -> Result<c_size_t, Error> {
1717    unsafe { sys_read_raw(fd, buffer.as_mut_ptr(), buffer.len()) }
1718}
1719
1720pub fn sys_write(fd: FdRef, buffer: &[u8]) -> Result<c_size_t, Error> {
1721    let result = unsafe { syscall_readonly!(SYS_write, fd.raw(), buffer.as_ptr(), buffer.len()) };
1722    Error::from_syscall("write", result)?;
1723    Ok(result as c_size_t)
1724}
1725
1726pub fn sys_lseek(fd: FdRef, offset: i64, whence: u32) -> Result<u64, Error> {
1727    let result = unsafe { syscall_readonly!(SYS_lseek, fd.raw(), offset, whence) };
1728    Error::from_syscall("lseek", result)?;
1729    Ok(result as u64)
1730}
1731
1732pub unsafe fn sys_process_vm_readv(pid: pid_t, local_iovec: &[iovec], remote_iovec: &[iovec]) -> Result<usize, Error> {
1733    let result = unsafe {
1734        syscall!(
1735            SYS_process_vm_readv,
1736            pid,
1737            local_iovec,
1738            local_iovec.len(),
1739            remote_iovec,
1740            remote_iovec.len(),
1741            0
1742        )
1743    };
1744    Error::from_syscall("process_vm_readv", result)?;
1745    Ok(result as usize)
1746}
1747
1748pub unsafe fn sys_process_vm_writev(pid: pid_t, local_iovec: &[iovec], remote_iovec: &[iovec]) -> Result<usize, Error> {
1749    let result = unsafe {
1750        syscall!(
1751            SYS_process_vm_writev,
1752            pid,
1753            local_iovec,
1754            local_iovec.len(),
1755            remote_iovec,
1756            remote_iovec.len(),
1757            0
1758        )
1759    };
1760    Error::from_syscall("process_vm_writev", result)?;
1761    Ok(result as usize)
1762}
1763
1764pub unsafe fn sys_writev(fd: FdRef, iv: &[iovec]) -> Result<usize, Error> {
1765    let result = unsafe { syscall!(SYS_writev, fd, iv, iv.len()) };
1766    Error::from_syscall("writev", result)?;
1767    Ok(result as usize)
1768}
1769
1770pub fn sys_sendmsg(fd: FdRef, message: &msghdr, flags: u32) -> Result<usize, Error> {
1771    let result = unsafe { syscall_readonly!(SYS_sendmsg, fd.raw(), message as *const msghdr, flags) };
1772    Error::from_syscall("sendmsg", result)?;
1773    Ok(result as usize)
1774}
1775
1776pub fn sys_recvmsg(fd: FdRef, message: &mut msghdr, flags: u32) -> Result<usize, Error> {
1777    let result = unsafe { syscall!(SYS_recvmsg, fd.raw(), message as *mut msghdr, flags) };
1778    Error::from_syscall("recvmsg", result)?;
1779    Ok(result as usize)
1780}
1781
1782pub fn sys_exit(errcode: c_int) -> Result<(), Error> {
1783    let result = unsafe { syscall_readonly!(SYS_exit, errcode) };
1784    Error::from_syscall("exit", result)?;
1785    Ok(())
1786}
1787
1788pub fn sys_dup3(old_fd: c_int, new_fd: c_int, flags: c_uint) -> Result<(), Error> {
1789    let result = unsafe { syscall_readonly!(SYS_dup3, old_fd, new_fd, flags) };
1790    Error::from_syscall("dup3", result)?;
1791    Ok(())
1792}
1793
1794pub unsafe fn sys_execveat(
1795    dirfd: Option<FdRef>,
1796    path: &CStr,
1797    argv: &[*const c_uchar],
1798    envp: &[*const c_uchar],
1799    flags: c_uint,
1800) -> Result<(), Error> {
1801    let result = unsafe { syscall_readonly!(SYS_execveat, dirfd, path.as_ptr(), argv, envp, flags) };
1802    Error::from_syscall("execveat", result)?;
1803    Ok(())
1804}
1805
1806pub fn sys_ptrace_traceme() -> Result<(), Error> {
1807    let result = unsafe { syscall_readonly!(SYS_ptrace, 0, 0, 0) };
1808    Error::from_syscall("ptrace (PTRACE_TRACEME)", result)?;
1809    Ok(())
1810}
1811
1812pub fn sys_ptrace_interrupt(pid: pid_t) -> Result<(), Error> {
1813    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_INTERRUPT, pid, 0, 0) };
1814    Error::from_syscall("ptrace (PTRACE_INTERRUPT)", result)?;
1815    Ok(())
1816}
1817
1818pub fn sys_ptrace_attach(pid: pid_t) -> Result<(), Error> {
1819    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_ATTACH, pid, 0, 0) };
1820    Error::from_syscall("ptrace (PTRACE_ATTACH)", result)?;
1821    Ok(())
1822}
1823
1824pub fn sys_ptrace_seize(pid: pid_t) -> Result<(), Error> {
1825    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_SEIZE, pid, 0, 0) };
1826    Error::from_syscall("ptrace (PTRACE_SEIZE)", result)?;
1827    Ok(())
1828}
1829
1830pub fn sys_ptrace_continue(pid: pid_t, signal: Option<u32>) -> Result<(), Error> {
1831    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_CONT, pid, 0, signal.unwrap_or(0)) };
1832    Error::from_syscall("ptrace (PTRACE_CONT)", result)?;
1833    Ok(())
1834}
1835
1836pub fn sys_ptrace_detach(pid: pid_t) -> Result<(), Error> {
1837    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_DETACH, pid, 0, 0) };
1838    Error::from_syscall("ptrace (PTRACE_DETACH)", result)?;
1839    Ok(())
1840}
1841
1842pub fn sys_ptrace_get_siginfo(pid: pid_t) -> Result<siginfo_t, Error> {
1843    let mut siginfo: siginfo_t = unsafe { core::mem::zeroed() };
1844    let result = unsafe {
1845        syscall!(
1846            SYS_ptrace,
1847            crate::arch_bindings::PTRACE_GETSIGINFO,
1848            pid,
1849            0,
1850            core::ptr::addr_of_mut!(siginfo)
1851        )
1852    };
1853    Error::from_syscall("ptrace (PTRACE_GETSIGINFO)", result)?;
1854    Ok(siginfo)
1855}
1856
1857#[cfg(target_arch = "x86_64")]
1858#[repr(C)]
1859#[derive(Default, Debug)]
1860pub struct user_regs_struct {
1861    pub r15: c_ulong,
1862    pub r14: c_ulong,
1863    pub r13: c_ulong,
1864    pub r12: c_ulong,
1865    pub rbp: c_ulong,
1866    pub rbx: c_ulong,
1867    pub r11: c_ulong,
1868    pub r10: c_ulong,
1869    pub r9: c_ulong,
1870    pub r8: c_ulong,
1871    pub rax: c_ulong,
1872    pub rcx: c_ulong,
1873    pub rdx: c_ulong,
1874    pub rsi: c_ulong,
1875    pub rdi: c_ulong,
1876    pub orig_rax: c_ulong,
1877    pub rip: c_ulong,
1878    pub cs: c_ulong,
1879    pub flags: c_ulong,
1880    pub sp: c_ulong,
1881    pub ss: c_ulong,
1882    pub fs_base: c_ulong,
1883    pub gs_base: c_ulong,
1884    pub ds: c_ulong,
1885    pub es: c_ulong,
1886    pub fs: c_ulong,
1887    pub gs: c_ulong,
1888}
1889
1890pub fn sys_ptrace_getregs(pid: pid_t) -> Result<user_regs_struct, Error> {
1891    let mut output: MaybeUninit<user_regs_struct> = MaybeUninit::uninit();
1892    let result = unsafe { syscall!(SYS_ptrace, crate::arch_bindings::PTRACE_GETREGS, pid, 0, output.as_mut_ptr()) };
1893    Error::from_syscall("ptrace (PTRACE_GETREGS)", result)?;
1894
1895    unsafe { Ok(output.assume_init()) }
1896}
1897
1898pub fn sys_ptrace_setregs(pid: pid_t, regs: &user_regs_struct) -> Result<(), Error> {
1899    let regs: *const user_regs_struct = regs;
1900    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_SETREGS, pid, 0, regs) };
1901    Error::from_syscall("ptrace (PTRACE_SETREGS)", result)?;
1902    Ok(())
1903}
1904
1905pub fn sys_prctl_set_no_new_privs() -> Result<(), Error> {
1906    const PR_SET_NO_NEW_PRIVS: usize = 38;
1907    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
1908    Error::from_syscall("prctl(PR_SET_NO_NEW_PRIVS)", result)
1909}
1910
1911pub fn sys_prctl_cap_ambient_clear_all() -> Result<(), Error> {
1912    const PR_CAP_AMBIENT: usize = 47;
1913    const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4;
1914    let result = unsafe { syscall_readonly!(SYS_prctl, PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) };
1915    Error::from_syscall("prctl(PR_CAP_AMBIENT)", result)
1916}
1917
1918pub fn sys_prctl_set_securebits(bits: u32) -> Result<(), Error> {
1919    const PR_SET_SECUREBITS: usize = 28;
1920    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_SECUREBITS, bits, 0, 0, 0) };
1921    Error::from_syscall("prctl(PR_SET_SECUREBITS)", result)
1922}
1923
1924pub fn sys_prctl_set_name(name: &[u8; 16]) -> Result<(), Error> {
1925    const PR_SET_NAME: usize = 15;
1926    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_NAME, name.as_ptr(), 0, 0, 0) };
1927    Error::from_syscall("prctl(PR_SET_NAME)", result)
1928}
1929
1930pub fn sys_prctl_set_dumpable(value: bool) -> Result<(), Error> {
1931    const PR_SET_DUMPABLE: usize = 4;
1932    let value = usize::from(value);
1933    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_DUMPABLE, value, 0, 0, 0) };
1934    Error::from_syscall("prctl(PR_SET_DUMPABLE)", result)
1935}
1936
1937pub fn sys_prctl_get_dumpable() -> Result<bool, Error> {
1938    const PR_GET_DUMPABLE: usize = 3;
1939    let result = unsafe { syscall_readonly!(SYS_prctl, PR_GET_DUMPABLE, 0, 0, 0, 0) };
1940    Error::from_syscall("prctl(PR_GET_DUMPABLE)", result)?;
1941    if result == 0 {
1942        Ok(false)
1943    } else {
1944        Ok(true)
1945    }
1946}
1947
1948pub fn sys_capset(header: &__user_cap_header_struct, data: &[__user_cap_data_struct; 2]) -> Result<(), Error> {
1949    let result = unsafe {
1950        syscall_readonly!(
1951            SYS_capset,
1952            header as *const __user_cap_header_struct,
1953            data as *const __user_cap_data_struct
1954        )
1955    };
1956    Error::from_syscall("capset", result)
1957}
1958
1959pub fn sys_capset_drop_all() -> Result<(), Error> {
1960    let cap_user_header = __user_cap_header_struct {
1961        version: _LINUX_CAPABILITY_VERSION_3,
1962        pid: 0,
1963    };
1964    let cap_user_data = [__user_cap_data_struct {
1965        effective: 0,
1966        inheritable: 0,
1967        permitted: 0,
1968    }; 2];
1969
1970    sys_capset(&cap_user_header, &cap_user_data)
1971}
1972
1973pub fn sys_seccomp_set_mode_filter(filter: &[sock_filter]) -> Result<(), Error> {
1974    let filter = sock_fprog {
1975        length: if let Ok(length) = c_ushort::try_from(filter.len()) {
1976            length
1977        } else {
1978            return Err(Error::from_errno("seccomp(SECCOMP_SET_MODE_FILTER)", EINVAL as i32));
1979        },
1980        filter: filter.as_ptr(),
1981    };
1982
1983    let result = unsafe { syscall_readonly!(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, core::ptr::addr_of!(filter)) };
1984    Error::from_syscall("seccomp(SECCOMP_SET_MODE_FILTER)", result)
1985}
1986
1987pub fn sys_setrlimit(resource: u32, limit: &rlimit) -> Result<(), Error> {
1988    let result = unsafe { syscall_readonly!(SYS_setrlimit, resource, limit as *const rlimit) };
1989    Error::from_syscall("setrlimit", result)
1990}
1991
1992pub fn sys_sethostname(name: &str) -> Result<(), Error> {
1993    let result = unsafe { syscall_readonly!(SYS_sethostname, name.as_ptr(), name.len()) };
1994    Error::from_syscall("sethostname", result)
1995}
1996
1997pub fn sys_setdomainname(name: &str) -> Result<(), Error> {
1998    let result = unsafe { syscall_readonly!(SYS_setdomainname, name.as_ptr(), name.len()) };
1999    Error::from_syscall("setdomainname", result)
2000}
2001
2002pub fn sys_mount(dev_name: &CStr, dir_name: &CStr, kind: &CStr, flags: u32, data: Option<&CStr>) -> Result<(), Error> {
2003    let result = unsafe {
2004        syscall_readonly!(
2005            SYS_mount,
2006            dev_name.as_ptr(),
2007            dir_name.as_ptr(),
2008            kind.as_ptr(),
2009            flags,
2010            data.map_or(core::ptr::null(), |data| data.as_ptr())
2011        )
2012    };
2013    Error::from_syscall("mount", result)
2014}
2015
2016pub fn sys_umount2(target: &CStr, flags: u32) -> Result<(), Error> {
2017    let result = unsafe { syscall_readonly!(SYS_umount2, target.as_ptr(), flags) };
2018    Error::from_syscall("umount2", result)
2019}
2020
2021pub fn sys_pivot_root(new_root: &CStr, old_root: &CStr) -> Result<(), Error> {
2022    let result = unsafe { syscall_readonly!(SYS_pivot_root, new_root.as_ptr(), old_root.as_ptr()) };
2023    Error::from_syscall("pivot_root", result)
2024}
2025
2026pub fn sys_unshare(flags: u32) -> Result<(), Error> {
2027    let result = unsafe { syscall_readonly!(SYS_unshare, flags) };
2028    Error::from_syscall("unshare", result)
2029}
2030
2031/// Calls the `futex` syscall with `FUTEX_WAIT` operation.
2032///
2033/// This will block *if* the value of the `futex` is equal to the `expected_value`.
2034///
2035/// Possible non-fatal errors:
2036///   - `EAGAIN`: the value of `futex` is not equal to `expected_value`
2037///   - `EINTR`: the syscall was interrupted by a signal
2038///   - `ETIMEDOUT`: the specified timeout has elapsed without the futex being woken up
2039pub fn sys_futex_wait(futex: &AtomicU32, expected_value: u32, timeout: Option<Duration>) -> Result<(), Error> {
2040    let ts: Option<timespec> = timeout.map(|timeout| timespec {
2041        tv_sec: timeout.as_secs() as i64,
2042        tv_nsec: u64::from(timeout.subsec_nanos()) as i64,
2043    });
2044
2045    let result = unsafe {
2046        syscall!(
2047            SYS_futex,
2048            futex as *const AtomicU32,
2049            FUTEX_WAIT,
2050            expected_value,
2051            ts.as_ref().map_or(core::ptr::null(), |ts| ts as *const timespec)
2052        )
2053    };
2054    Error::from_syscall("futex (wait)", result)
2055}
2056
2057/// Wakes up at most one thread waiting on `futex`.
2058///
2059/// Will return `true` if anybody was woken up.
2060pub fn sys_futex_wake_one(futex: &AtomicU32) -> Result<bool, Error> {
2061    let result = unsafe { syscall_readonly!(SYS_futex, futex as *const AtomicU32, FUTEX_WAKE, 1) };
2062    Error::from_syscall("futex (wake)", result)?;
2063    Ok(result == 1)
2064}
2065
2066pub fn sys_set_tid_address(address: *const u32) -> Result<(), Error> {
2067    let result = unsafe { syscall_readonly!(SYS_set_tid_address, address) };
2068    Error::from_syscall("set_tid_address", result)?;
2069    Ok(())
2070}
2071
2072pub unsafe fn sys_rt_sigaction(signal: u32, new_action: &kernel_sigaction, old_action: Option<&mut kernel_sigaction>) -> Result<(), Error> {
2073    let result = unsafe {
2074        syscall_readonly!(
2075            SYS_rt_sigaction,
2076            signal,
2077            new_action as *const kernel_sigaction,
2078            old_action.map_or(core::ptr::null_mut(), |old_action| old_action as *mut kernel_sigaction),
2079            core::mem::size_of::<kernel_sigset_t>()
2080        )
2081    };
2082    Error::from_syscall("rt_sigaction", result)?;
2083    Ok(())
2084}
2085
2086pub unsafe fn sys_rt_sigprocmask(how: u32, new_sigset: &kernel_sigset_t, old_sigset: Option<&mut kernel_sigset_t>) -> Result<(), Error> {
2087    let result = unsafe {
2088        syscall_readonly!(
2089            SYS_rt_sigprocmask,
2090            how,
2091            new_sigset as *const kernel_sigset_t,
2092            old_sigset.map_or(core::ptr::null_mut(), |old_sigset| old_sigset as *mut kernel_sigset_t),
2093            core::mem::size_of::<kernel_sigset_t>()
2094        )
2095    };
2096    Error::from_syscall("rt_sigprocmask", result)?;
2097    Ok(())
2098}
2099
2100pub unsafe fn sys_sigaltstack(new_stack: &stack_t, old_stack: Option<&mut stack_t>) -> Result<(), Error> {
2101    let result = unsafe {
2102        syscall_readonly!(
2103            SYS_sigaltstack,
2104            new_stack as *const stack_t,
2105            old_stack.map_or(core::ptr::null_mut(), |old_stack| old_stack as *mut stack_t)
2106        )
2107    };
2108    Error::from_syscall("sigaltstack", result)?;
2109    Ok(())
2110}
2111
2112pub fn sys_clock_gettime(clock_id: u32) -> Result<Duration, Error> {
2113    let mut output = timespec { tv_sec: 0, tv_nsec: 0 };
2114    let result = unsafe { syscall_readonly!(SYS_clock_gettime, clock_id, core::ptr::addr_of_mut!(output)) };
2115    Error::from_syscall("clock_gettime", result)?;
2116
2117    let duration = Duration::new(output.tv_sec as u64, output.tv_nsec as u32);
2118    Ok(duration)
2119}
2120
2121pub fn sys_nanosleep(duration: Duration) -> Result<Option<Duration>, Error> {
2122    let duration = timespec {
2123        tv_sec: duration.as_secs() as i64,
2124        tv_nsec: u64::from(duration.subsec_nanos()) as i64,
2125    };
2126
2127    let mut remaining = timespec { tv_sec: 0, tv_nsec: 0 };
2128    let result = unsafe { syscall_readonly!(SYS_nanosleep, core::ptr::addr_of!(duration), core::ptr::addr_of_mut!(remaining)) };
2129    let error = Error::from_syscall("nanosleep", result);
2130    if let Err(error) = error {
2131        if error.errno() == EINTR {
2132            let remaining = Duration::new(remaining.tv_sec as u64, remaining.tv_nsec as u32);
2133            Ok(Some(remaining))
2134        } else {
2135            Err(error)
2136        }
2137    } else {
2138        Ok(None)
2139    }
2140}
2141
2142pub fn sys_waitid(which: u32, pid: pid_t, info: &mut siginfo_t, options: u32, usage: Option<&mut rusage>) -> Result<(), Error> {
2143    let result = unsafe {
2144        syscall_readonly!(
2145            SYS_waitid,
2146            which,
2147            pid,
2148            info as *mut siginfo_t,
2149            options,
2150            usage.map_or(core::ptr::null_mut(), |usage| usage as *mut rusage)
2151        )
2152    };
2153
2154    Error::from_syscall("waitid", result)?;
2155    Ok(())
2156}
2157
2158pub fn vm_read_memory<const N_LOCAL: usize, const N_REMOTE: usize>(
2159    pid: pid_t,
2160    local: [&mut [MaybeUninit<u8>]; N_LOCAL],
2161    remote: [(usize, usize); N_REMOTE],
2162) -> Result<usize, Error> {
2163    let local_iovec = local.map(|slice| iovec {
2164        iov_base: slice.as_mut_ptr().cast(),
2165        iov_len: slice.len() as u64,
2166    });
2167    let remote_iovec = remote.map(|(address, length)| iovec {
2168        iov_base: address as *mut c_void,
2169        iov_len: length as u64,
2170    });
2171    unsafe { sys_process_vm_readv(pid, &local_iovec, &remote_iovec) }
2172}
2173
2174pub fn vm_write_memory<const N_LOCAL: usize, const N_REMOTE: usize>(
2175    pid: pid_t,
2176    local: [&[u8]; N_LOCAL],
2177    remote: [(usize, usize); N_REMOTE],
2178) -> Result<usize, Error> {
2179    let local_iovec = local.map(|slice| iovec {
2180        iov_base: slice.as_ptr().cast_mut().cast(),
2181        iov_len: slice.len() as u64,
2182    });
2183    let remote_iovec = remote.map(|(address, length)| iovec {
2184        iov_base: address as *mut c_void,
2185        iov_len: length as u64,
2186    });
2187    unsafe { sys_process_vm_writev(pid, &local_iovec, &remote_iovec) }
2188}
2189
2190pub fn writev<const N: usize>(fd: FdRef, list: [&[u8]; N]) -> Result<usize, Error> {
2191    let iv = list.map(|slice| iovec {
2192        iov_base: slice.as_ptr().cast_mut().cast(),
2193        iov_len: slice.len() as u64,
2194    });
2195    unsafe { sys_writev(fd, &iv) }
2196}
2197
2198#[inline(always)] // To prevent the buffer from being copied.
2199pub fn readdir(dirfd: FdRef) -> Dirent64Iter {
2200    Dirent64Iter {
2201        dirfd,
2202        buffer: [0; 1024], // TODO: Use MaybeUninit.
2203        bytes_available: 0,
2204        position: 0,
2205    }
2206}
2207
2208#[repr(transparent)]
2209pub struct Dirent64<'a> {
2210    raw: linux_dirent64,
2211    _lifetime: core::marker::PhantomData<&'a [u8]>,
2212}
2213
2214impl<'a> Dirent64<'a> {
2215    pub fn d_type(&self) -> c_uchar {
2216        self.raw.d_type
2217    }
2218
2219    pub fn d_name(&self) -> &'a [u8] {
2220        unsafe {
2221            let name = self.raw.d_name.as_ptr();
2222            let length = {
2223                let mut p = self.raw.d_name.as_ptr();
2224                while *p != 0 {
2225                    p = p.add(1);
2226                }
2227
2228                p as usize - name as usize
2229            };
2230
2231            core::slice::from_raw_parts(name.cast(), length)
2232        }
2233    }
2234}
2235
2236pub struct Dirent64Iter<'a> {
2237    dirfd: FdRef<'a>,
2238    buffer: [u8; 1024],
2239    bytes_available: usize,
2240    position: usize,
2241}
2242
2243impl<'a> Iterator for Dirent64Iter<'a> {
2244    type Item = Result<Dirent64<'a>, Error>;
2245    fn next(&mut self) -> Option<Self::Item> {
2246        loop {
2247            if self.position < self.bytes_available {
2248                let dirent = unsafe { core::ptr::read_unaligned(self.buffer.as_ptr().add(self.position).cast::<Dirent64>()) };
2249
2250                self.position += usize::from(dirent.raw.d_reclen);
2251                return Some(Ok(dirent));
2252            }
2253
2254            match sys_getdents64(self.dirfd, &mut self.buffer) {
2255                Ok(Some(bytes_available)) => self.bytes_available = bytes_available,
2256                Ok(None) => return None,
2257                Err(error) => return Some(Err(error)),
2258            };
2259        }
2260    }
2261}
2262
2263pub fn sendfd(socket: FdRef, fd: FdRef) -> Result<(), Error> {
2264    let mut dummy: c_int = 0;
2265    let mut buffer = [0; CMSG_SPACE(core::mem::size_of::<c_int>())];
2266
2267    let mut iov = iovec {
2268        iov_base: core::ptr::addr_of_mut!(dummy).cast::<c_void>(),
2269        iov_len: core::mem::size_of_val(&dummy) as u64,
2270    };
2271
2272    let mut header = msghdr {
2273        msg_name: core::ptr::null_mut(),
2274        msg_namelen: 0,
2275        msg_iov: &mut iov,
2276        msg_iovlen: 1,
2277        msg_control: buffer.as_mut_ptr().cast::<c_void>(),
2278        msg_controllen: core::mem::size_of_val(&buffer),
2279        msg_flags: 0,
2280    };
2281
2282    let control_header = cmsghdr {
2283        cmsg_len: CMSG_LEN(core::mem::size_of::<c_int>()),
2284        cmsg_level: SOL_SOCKET,
2285        cmsg_type: SCM_RIGHTS,
2286    };
2287
2288    #[allow(clippy::cast_ptr_alignment)]
2289    unsafe {
2290        core::ptr::write_unaligned(CMSG_FIRSTHDR(&header), control_header);
2291        core::ptr::write_unaligned(CMSG_DATA(buffer.as_mut_ptr().cast::<cmsghdr>()).cast::<c_int>(), fd.raw());
2292    }
2293
2294    header.msg_controllen = CMSG_LEN(core::mem::size_of::<c_int>());
2295    sys_sendmsg(socket, &header, MSG_NOSIGNAL)?;
2296
2297    Ok(())
2298}
2299
2300pub fn recvfd(socket: FdRef) -> Result<Fd, Error> {
2301    let mut dummy: c_int = 0;
2302    let mut buffer = [0; CMSG_SPACE(core::mem::size_of::<c_int>())];
2303
2304    let mut iov = iovec {
2305        iov_base: core::ptr::addr_of_mut!(dummy).cast::<c_void>(),
2306        iov_len: core::mem::size_of_val(&dummy) as u64,
2307    };
2308
2309    let mut header = msghdr {
2310        msg_name: core::ptr::null_mut(),
2311        msg_namelen: 0,
2312        msg_iov: &mut iov,
2313        msg_iovlen: 1,
2314        msg_control: buffer.as_mut_ptr().cast::<c_void>(),
2315        msg_controllen: core::mem::size_of_val(&buffer),
2316        msg_flags: 0,
2317    };
2318
2319    let count = sys_recvmsg(socket, &mut header, 0)?;
2320    if count == 0 {
2321        return Err(Error::from_str("recvfd failed: received zero bytes"));
2322    }
2323
2324    if count != core::mem::size_of::<c_int>() {
2325        return Err(Error::from_str("recvfd failed: received unexpected number of bytes"));
2326    }
2327
2328    if header.msg_controllen != CMSG_SPACE(core::mem::size_of::<c_int>()) {
2329        return Err(Error::from_str("recvfd failed: invalid control message size"));
2330    }
2331
2332    let control_header = unsafe { &mut *header.msg_control.cast::<cmsghdr>() };
2333
2334    if control_header.cmsg_level != SOL_SOCKET {
2335        return Err(Error::from_str("recvfd failed: invalid control message level"));
2336    }
2337
2338    if control_header.cmsg_type != SCM_RIGHTS {
2339        return Err(Error::from_str("recvfd failed: invalid control message type"));
2340    }
2341
2342    let fd = unsafe { core::ptr::read_unaligned(CMSG_DATA(control_header).cast::<c_int>()) };
2343
2344    Ok(Fd::from_raw_unchecked(fd))
2345}