polkavm_linux_raw/
lib.rs

1#![doc = include_str!("../README.md")]
2#![no_std]
3#![deny(clippy::panic)]
4#![deny(clippy::unwrap_used)]
5#![deny(clippy::expect_used)]
6#![deny(clippy::unreachable)]
7#![deny(clippy::indexing_slicing)]
8#![allow(clippy::collapsible_else_if)]
9#![allow(clippy::len_without_is_empty)]
10#![allow(clippy::manual_range_contains)]
11// This crate mostly contains syscall wrappers. If you use them you should know what you're doing.
12#![allow(clippy::missing_safety_doc)]
13#![allow(clippy::undocumented_unsafe_blocks)]
14#![cfg(all(target_os = "linux", target_arch = "x86_64"))]
15
16#[cfg(feature = "std")]
17extern crate std;
18
19mod syscall;
20
21#[cfg(target_arch = "x86_64")]
22#[doc(hidden)]
23pub mod arch_amd64_syscall;
24
25#[cfg(target_arch = "x86_64")]
26#[allow(dead_code)]
27#[allow(non_upper_case_globals)]
28#[allow(non_camel_case_types)]
29#[allow(non_snake_case)]
30#[allow(clippy::ptr_as_ptr)]
31#[allow(clippy::used_underscore_binding)]
32#[allow(clippy::transmute_ptr_to_ptr)]
33mod arch_amd64_bindings;
34
35mod io_uring;
36mod mmap;
37
38pub use io_uring::IoUring;
39pub use mmap::Mmap;
40
41#[cfg(target_arch = "x86_64")]
42#[doc(hidden)]
43pub use arch_amd64_syscall as syscall_impl;
44
45pub use core::ffi::{c_int, c_long, c_uchar, c_uint, c_ulong, c_ushort, c_void};
46
47use core::ffi::CStr;
48use core::marker::PhantomData;
49use core::mem::MaybeUninit;
50use core::sync::atomic::AtomicU32;
51use core::time::Duration;
52
53#[cfg(feature = "std")]
54use std::borrow::Cow;
55
56// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/88345
57#[allow(non_camel_case_types)]
58type c_size_t = usize;
59
60#[allow(non_camel_case_types)]
61pub type size_t = c_size_t;
62
63// Doesn't appear in public headers.
64pub const MNT_FORCE: u32 = 1;
65pub const MNT_DETACH: u32 = 2;
66pub const MNT_EXPIRE: u32 = 4;
67
68pub const SIG_DFL: usize = 0;
69pub const SIG_IGN: usize = 1;
70
71// Bindgen seems to not want to emit this constant,
72// so let's define it manually.
73pub const HWCAP2_FSGSBASE: usize = 1 << 1;
74
75pub(crate) use crate::arch_amd64_bindings as arch_bindings;
76
77#[rustfmt::skip]
78pub use crate::arch_bindings::{
79    __kernel_gid_t as gid_t,
80    __kernel_pid_t as pid_t,
81    __kernel_uid_t as uid_t,
82    __NR_arch_prctl as SYS_arch_prctl,
83    __NR_capset as SYS_capset,
84    __NR_chdir as SYS_chdir,
85    __NR_clock_gettime as SYS_clock_gettime,
86    __NR_clone as SYS_clone,
87    __NR_clone3 as SYS_clone3,
88    __NR_close as SYS_close,
89    __NR_close_range as SYS_close_range,
90    __NR_dup3 as SYS_dup3,
91    __NR_execveat as SYS_execveat,
92    __NR_exit as SYS_exit,
93    __NR_fallocate as SYS_fallocate,
94    __NR_fchdir as SYS_fchdir,
95    __NR_fcntl as SYS_fcntl,
96    __NR_ftruncate as SYS_ftruncate,
97    __NR_futex as SYS_futex,
98    __NR_getdents64 as SYS_getdents64,
99    __NR_getgid as SYS_getgid,
100    __NR_getpid as SYS_getpid,
101    __NR_getuid as SYS_getuid,
102    __NR_io_uring_enter as SYS_io_uring_enter,
103    __NR_io_uring_register as SYS_io_uring_register,
104    __NR_io_uring_setup as SYS_io_uring_setup,
105    __NR_ioctl as SYS_ioctl,
106    __NR_kill as SYS_kill,
107    __NR_lseek as SYS_lseek,
108    __NR_nanosleep as SYS_nanosleep,
109    __NR_madvise as SYS_madvise,
110    __NR_memfd_create as SYS_memfd_create,
111    __NR_mmap as SYS_mmap,
112    __NR_mlock as SYS_mlock,
113    __NR_mlockall as SYS_mlockall,
114    __NR_mount as SYS_mount,
115    __NR_mprotect as SYS_mprotect,
116    __NR_mremap as SYS_mremap,
117    __NR_munmap as SYS_munmap,
118    __NR_open as SYS_open,
119    __NR_openat as SYS_openat,
120    __NR_perf_event_open as SYS_perf_event_open,
121    __NR_pidfd_send_signal as SYS_pidfd_send_signal,
122    __NR_pipe2 as SYS_pipe2,
123    __NR_pivot_root as SYS_pivot_root,
124    __NR_prctl as SYS_prctl,
125    __NR_process_vm_readv as SYS_process_vm_readv,
126    __NR_process_vm_writev as SYS_process_vm_writev,
127    __NR_ptrace as SYS_ptrace,
128    __NR_read as SYS_read,
129    __NR_recvmsg as SYS_recvmsg,
130    __NR_rseq as SYS_rseq,
131    __NR_rt_sigaction as SYS_rt_sigaction,
132    __NR_rt_sigprocmask as SYS_rt_sigprocmask,
133    __NR_rt_sigreturn as SYS_rt_sigreturn,
134    __NR_sched_yield as SYS_sched_yield,
135    __NR_seccomp as SYS_seccomp,
136    __NR_sendmsg as SYS_sendmsg,
137    __NR_set_tid_address as SYS_set_tid_address,
138    __NR_setdomainname as SYS_setdomainname,
139    __NR_sethostname as SYS_sethostname,
140    __NR_setrlimit as SYS_setrlimit,
141    __NR_sigaltstack as SYS_sigaltstack,
142    __NR_socketpair as SYS_socketpair,
143    __NR_umount2 as SYS_umount2,
144    __NR_uname as SYS_uname,
145    __NR_unshare as SYS_unshare,
146    __NR_userfaultfd as SYS_userfaultfd,
147    __NR_waitid as SYS_waitid,
148    __NR_write as SYS_write,
149    __NR_writev as SYS_writev,
150    __user_cap_data_struct,
151    __user_cap_header_struct,
152    __WALL,
153    _LINUX_CAPABILITY_VERSION_3,
154    ARCH_GET_FS,
155    ARCH_GET_GS,
156    ARCH_SET_FS,
157    ARCH_SET_GS,
158    AT_EMPTY_PATH,
159    AT_HWCAP2,
160    AT_MINSIGSTKSZ,
161    AT_NULL,
162    AT_PAGESZ,
163    AT_SYSINFO_EHDR,
164    CLD_CONTINUED,
165    CLD_DUMPED,
166    CLD_EXITED,
167    CLD_KILLED,
168    CLD_STOPPED,
169    CLD_TRAPPED,
170    CLOCK_MONOTONIC_RAW,
171    CLONE_CLEAR_SIGHAND,
172    CLONE_NEWCGROUP,
173    CLONE_NEWIPC,
174    CLONE_NEWNET,
175    CLONE_NEWNS,
176    CLONE_NEWPID,
177    CLONE_NEWUSER,
178    CLONE_NEWUTS,
179    CLONE_PIDFD,
180    E2BIG,
181    EACCES,
182    EAGAIN,
183    EBADF,
184    EBUSY,
185    ECHILD,
186    EDOM,
187    EEXIST,
188    EFAULT,
189    EFBIG,
190    EINTR,
191    EINVAL,
192    EIO,
193    EISDIR,
194    EMFILE,
195    EMLINK,
196    ENFILE,
197    ENODEV,
198    ENOENT,
199    ENOEXEC,
200    ENOMEM,
201    ENOSPC,
202    ENOTBLK,
203    ENOTDIR,
204    ENOTTY,
205    ENXIO,
206    EOPNOTSUPP,
207    EPERM,
208    EPIPE,
209    ERANGE,
210    EROFS,
211    ESPIPE,
212    ESRCH,
213    ETIMEDOUT,
214    ETOOMANYREFS,
215    ETXTBSY,
216    EXDEV,
217    ERESTARTSYS,
218    F_ADD_SEALS,
219    F_DUPFD,
220    F_GETFD,
221    F_SEAL_EXEC,
222    F_SEAL_FUTURE_WRITE,
223    F_SEAL_GROW,
224    F_SEAL_SEAL,
225    F_SEAL_SHRINK,
226    F_SEAL_WRITE,
227    F_SETFD,
228    F_SETFL,
229    F_SETOWN,
230    F_SETSIG,
231    FALLOC_FL_COLLAPSE_RANGE,
232    FALLOC_FL_INSERT_RANGE,
233    FALLOC_FL_KEEP_SIZE,
234    FALLOC_FL_NO_HIDE_STALE,
235    FALLOC_FL_PUNCH_HOLE,
236    FALLOC_FL_UNSHARE_RANGE,
237    FALLOC_FL_ZERO_RANGE,
238    FUTEX_BITSET_MATCH_ANY,
239    FUTEX_WAIT,
240    FUTEX_WAKE,
241    FUTEX2_SIZE_U32,
242    io_cqring_offsets,
243    io_sqring_offsets,
244    io_uring_buf_reg,
245    io_uring_buf_ring,
246    io_uring_buf_status,
247    io_uring_buf,
248    io_uring_cqe,
249    io_uring_file_index_range,
250    io_uring_files_update,
251    io_uring_getevents_arg,
252    io_uring_napi,
253    io_uring_op_IORING_OP_ACCEPT,
254    io_uring_op_IORING_OP_ASYNC_CANCEL,
255    io_uring_op_IORING_OP_CLOSE,
256    io_uring_op_IORING_OP_CONNECT,
257    io_uring_op_IORING_OP_EPOLL_CTL,
258    io_uring_op_IORING_OP_FADVISE,
259    io_uring_op_IORING_OP_FALLOCATE,
260    io_uring_op_IORING_OP_FGETXATTR,
261    io_uring_op_IORING_OP_FILES_UPDATE,
262    io_uring_op_IORING_OP_FIXED_FD_INSTALL,
263    io_uring_op_IORING_OP_FSETXATTR,
264    io_uring_op_IORING_OP_FSYNC,
265    io_uring_op_IORING_OP_FTRUNCATE,
266    io_uring_op_IORING_OP_FUTEX_WAIT,
267    io_uring_op_IORING_OP_FUTEX_WAITV,
268    io_uring_op_IORING_OP_FUTEX_WAKE,
269    io_uring_op_IORING_OP_GETXATTR,
270    io_uring_op_IORING_OP_LAST,
271    io_uring_op_IORING_OP_LINK_TIMEOUT,
272    io_uring_op_IORING_OP_LINKAT,
273    io_uring_op_IORING_OP_MADVISE,
274    io_uring_op_IORING_OP_MKDIRAT,
275    io_uring_op_IORING_OP_MSG_RING,
276    io_uring_op_IORING_OP_NOP,
277    io_uring_op_IORING_OP_OPENAT,
278    io_uring_op_IORING_OP_OPENAT2,
279    io_uring_op_IORING_OP_POLL_ADD,
280    io_uring_op_IORING_OP_POLL_REMOVE,
281    io_uring_op_IORING_OP_PROVIDE_BUFFERS,
282    io_uring_op_IORING_OP_READ_FIXED,
283    io_uring_op_IORING_OP_READ_MULTISHOT,
284    io_uring_op_IORING_OP_READ,
285    io_uring_op_IORING_OP_READV,
286    io_uring_op_IORING_OP_RECV,
287    io_uring_op_IORING_OP_RECVMSG,
288    io_uring_op_IORING_OP_REMOVE_BUFFERS,
289    io_uring_op_IORING_OP_RENAMEAT,
290    io_uring_op_IORING_OP_SEND_ZC,
291    io_uring_op_IORING_OP_SEND,
292    io_uring_op_IORING_OP_SENDMSG_ZC,
293    io_uring_op_IORING_OP_SENDMSG,
294    io_uring_op_IORING_OP_SETXATTR,
295    io_uring_op_IORING_OP_SHUTDOWN,
296    io_uring_op_IORING_OP_SOCKET,
297    io_uring_op_IORING_OP_SPLICE,
298    io_uring_op_IORING_OP_STATX,
299    io_uring_op_IORING_OP_SYMLINKAT,
300    io_uring_op_IORING_OP_SYNC_FILE_RANGE,
301    io_uring_op_IORING_OP_TEE,
302    io_uring_op_IORING_OP_TIMEOUT_REMOVE,
303    io_uring_op_IORING_OP_TIMEOUT,
304    io_uring_op_IORING_OP_UNLINKAT,
305    io_uring_op_IORING_OP_URING_CMD,
306    io_uring_op_IORING_OP_WAITID,
307    io_uring_op_IORING_OP_WRITE_FIXED,
308    io_uring_op_IORING_OP_WRITE,
309    io_uring_op_IORING_OP_WRITEV,
310    io_uring_params,
311    io_uring_probe_op,
312    io_uring_probe,
313    io_uring_recvmsg_out,
314    io_uring_restriction,
315    io_uring_rsrc_register,
316    io_uring_rsrc_update,
317    io_uring_rsrc_update2,
318    io_uring_sqe,
319    io_uring_sync_cancel_reg,
320    IORING_ACCEPT_MULTISHOT,
321    IORING_ASYNC_CANCEL_ALL,
322    IORING_ASYNC_CANCEL_ANY,
323    IORING_ASYNC_CANCEL_FD_FIXED,
324    IORING_ASYNC_CANCEL_FD,
325    IORING_ASYNC_CANCEL_OP,
326    IORING_ASYNC_CANCEL_USERDATA,
327    IORING_CQ_EVENTFD_DISABLED,
328    IORING_CQE_BUFFER_SHIFT,
329    IORING_CQE_F_BUFFER,
330    IORING_CQE_F_MORE,
331    IORING_CQE_F_NOTIF,
332    IORING_CQE_F_SOCK_NONEMPTY,
333    IORING_ENTER_EXT_ARG,
334    IORING_ENTER_GETEVENTS,
335    IORING_ENTER_REGISTERED_RING,
336    IORING_ENTER_SQ_WAIT,
337    IORING_ENTER_SQ_WAKEUP,
338    IORING_FEAT_CQE_SKIP,
339    IORING_FEAT_CUR_PERSONALITY,
340    IORING_FEAT_EXT_ARG,
341    IORING_FEAT_FAST_POLL,
342    IORING_FEAT_LINKED_FILE,
343    IORING_FEAT_NATIVE_WORKERS,
344    IORING_FEAT_NODROP,
345    IORING_FEAT_POLL_32BITS,
346    IORING_FEAT_REG_REG_RING,
347    IORING_FEAT_RSRC_TAGS,
348    IORING_FEAT_RW_CUR_POS,
349    IORING_FEAT_SINGLE_MMAP,
350    IORING_FEAT_SQPOLL_NONFIXED,
351    IORING_FEAT_SUBMIT_STABLE,
352    IORING_FILE_INDEX_ALLOC,
353    IORING_FIXED_FD_NO_CLOEXEC,
354    IORING_FSYNC_DATASYNC,
355    IORING_LINK_TIMEOUT_UPDATE,
356    IORING_MSG_DATA,
357    IORING_MSG_RING_CQE_SKIP,
358    IORING_MSG_RING_FLAGS_PASS,
359    IORING_MSG_SEND_FD,
360    IORING_NOTIF_USAGE_ZC_COPIED,
361    IORING_OFF_CQ_RING,
362    IORING_OFF_MMAP_MASK,
363    IORING_OFF_PBUF_RING,
364    IORING_OFF_PBUF_SHIFT,
365    IORING_OFF_SQ_RING,
366    IORING_OFF_SQES,
367    IORING_POLL_ADD_LEVEL,
368    IORING_POLL_ADD_MULTI,
369    IORING_POLL_UPDATE_EVENTS,
370    IORING_POLL_UPDATE_USER_DATA,
371    IORING_RECV_MULTISHOT,
372    IORING_RECVSEND_FIXED_BUF,
373    IORING_RECVSEND_POLL_FIRST,
374    IORING_REGISTER_BUFFERS_UPDATE,
375    IORING_REGISTER_BUFFERS,
376    IORING_REGISTER_BUFFERS2,
377    IORING_REGISTER_ENABLE_RINGS,
378    IORING_REGISTER_EVENTFD_ASYNC,
379    IORING_REGISTER_EVENTFD,
380    IORING_REGISTER_FILE_ALLOC_RANGE,
381    IORING_REGISTER_FILES_SKIP,
382    IORING_REGISTER_FILES_UPDATE,
383    IORING_REGISTER_FILES_UPDATE2,
384    IORING_REGISTER_FILES,
385    IORING_REGISTER_FILES2,
386    IORING_REGISTER_IOWQ_AFF,
387    IORING_REGISTER_IOWQ_MAX_WORKERS,
388    IORING_REGISTER_LAST,
389    IORING_REGISTER_NAPI,
390    IORING_REGISTER_PBUF_RING,
391    IORING_REGISTER_PBUF_STATUS,
392    IORING_REGISTER_PERSONALITY,
393    IORING_REGISTER_PROBE,
394    IORING_REGISTER_RESTRICTIONS,
395    IORING_REGISTER_RING_FDS,
396    IORING_REGISTER_SYNC_CANCEL,
397    IORING_REGISTER_USE_REGISTERED_RING,
398    IORING_RESTRICTION_LAST,
399    IORING_RESTRICTION_REGISTER_OP,
400    IORING_RESTRICTION_SQE_FLAGS_ALLOWED,
401    IORING_RESTRICTION_SQE_FLAGS_REQUIRED,
402    IORING_RESTRICTION_SQE_OP,
403    IORING_RSRC_REGISTER_SPARSE,
404    IORING_SEND_ZC_REPORT_USAGE,
405    IORING_SETUP_ATTACH_WQ,
406    IORING_SETUP_CLAMP,
407    IORING_SETUP_COOP_TASKRUN,
408    IORING_SETUP_CQE32,
409    IORING_SETUP_CQSIZE,
410    IORING_SETUP_DEFER_TASKRUN,
411    IORING_SETUP_IOPOLL,
412    IORING_SETUP_NO_MMAP,
413    IORING_SETUP_NO_SQARRAY,
414    IORING_SETUP_R_DISABLED,
415    IORING_SETUP_REGISTERED_FD_ONLY,
416    IORING_SETUP_SINGLE_ISSUER,
417    IORING_SETUP_SQ_AFF,
418    IORING_SETUP_SQE128,
419    IORING_SETUP_SQPOLL,
420    IORING_SETUP_SUBMIT_ALL,
421    IORING_SETUP_TASKRUN_FLAG,
422    IORING_SQ_CQ_OVERFLOW,
423    IORING_SQ_NEED_WAKEUP,
424    IORING_SQ_TASKRUN,
425    IORING_TIMEOUT_ABS,
426    IORING_TIMEOUT_BOOTTIME,
427    IORING_TIMEOUT_CLOCK_MASK,
428    IORING_TIMEOUT_ETIME_SUCCESS,
429    IORING_TIMEOUT_MULTISHOT,
430    IORING_TIMEOUT_REALTIME,
431    IORING_TIMEOUT_UPDATE_MASK,
432    IORING_TIMEOUT_UPDATE,
433    IORING_UNREGISTER_BUFFERS,
434    IORING_UNREGISTER_EVENTFD,
435    IORING_UNREGISTER_FILES,
436    IORING_UNREGISTER_IOWQ_AFF,
437    IORING_UNREGISTER_NAPI,
438    IORING_UNREGISTER_PBUF_RING,
439    IORING_UNREGISTER_PERSONALITY,
440    IORING_UNREGISTER_RING_FDS,
441    IORING_URING_CMD_FIXED,
442    IORING_URING_CMD_MASK,
443    IOSQE_ASYNC_BIT,
444    IOSQE_BUFFER_SELECT_BIT,
445    IOSQE_CQE_SKIP_SUCCESS_BIT,
446    IOSQE_FIXED_FILE_BIT,
447    IOSQE_IO_DRAIN_BIT,
448    IOSQE_IO_HARDLINK_BIT,
449    IOSQE_IO_LINK_BIT,
450    iovec,
451    linux_dirent64,
452    MADV_COLD,
453    MADV_COLLAPSE,
454    MADV_DODUMP,
455    MADV_DOFORK,
456    MADV_DONTDUMP,
457    MADV_DONTFORK,
458    MADV_DONTNEED_LOCKED,
459    MADV_DONTNEED,
460    MADV_FREE,
461    MADV_HUGEPAGE,
462    MADV_HWPOISON,
463    MADV_KEEPONFORK,
464    MADV_MERGEABLE,
465    MADV_NOHUGEPAGE,
466    MADV_NORMAL,
467    MADV_PAGEOUT,
468    MADV_POPULATE_READ,
469    MADV_POPULATE_WRITE,
470    MADV_RANDOM,
471    MADV_REMOVE,
472    MADV_SEQUENTIAL,
473    MADV_SOFT_OFFLINE,
474    MADV_UNMERGEABLE,
475    MADV_WILLNEED,
476    MADV_WIPEONFORK,
477    MAP_ANONYMOUS,
478    MAP_FIXED,
479    MAP_HUGE_1GB,
480    MAP_HUGE_2MB,
481    MAP_HUGETLB,
482    MAP_POPULATE,
483    MAP_PRIVATE,
484    MAP_SHARED,
485    MCL_CURRENT,
486    MCL_FUTURE,
487    MCL_ONFAULT,
488    MFD_ALLOW_SEALING,
489    MFD_CLOEXEC,
490    MFD_HUGE_1GB,
491    MFD_HUGE_2MB,
492    MFD_HUGETLB,
493    MINSIGSTKSZ,
494    MREMAP_FIXED,
495    MREMAP_MAYMOVE,
496    MS_BIND,
497    MS_NODEV,
498    MS_NOEXEC,
499    MS_NOSUID,
500    MS_PRIVATE,
501    MS_RDONLY,
502    MS_REC,
503    new_utsname,
504    O_CLOEXEC,
505    O_DIRECTORY,
506    O_NONBLOCK,
507    O_PATH,
508    O_RDONLY,
509    O_RDWR,
510    O_WRONLY,
511    P_ALL,
512    P_PGID,
513    P_PID,
514    P_PIDFD,
515    PROT_EXEC,
516    PROT_READ,
517    PROT_WRITE,
518    RLIMIT_DATA,
519    RLIMIT_FSIZE,
520    RLIMIT_LOCKS,
521    RLIMIT_MEMLOCK,
522    RLIMIT_MSGQUEUE,
523    RLIMIT_NOFILE,
524    RLIMIT_NPROC,
525    RLIMIT_STACK,
526    rlimit,
527    rseq_flags_RSEQ_FLAG_UNREGISTER as RSEQ_FLAG_UNREGISTER,
528    rseq,
529    rseq_cs,
530    rusage,
531    SA_NODEFER,
532    SA_ONSTACK,
533    SA_RESTORER,
534    SA_SIGINFO,
535    SECCOMP_RET_ALLOW,
536    SECCOMP_RET_ERRNO,
537    SECCOMP_RET_KILL_THREAD,
538    SECCOMP_SET_MODE_FILTER,
539    SIG_BLOCK,
540    SIG_SETMASK,
541    SIG_UNBLOCK,
542    SIGABRT,
543    sigaction as kernel_sigaction,
544    SIGBUS,
545    SIGCHLD,
546    SIGCONT,
547    SIGFPE,
548    SIGHUP,
549    SIGILL,
550    siginfo_t,
551    SIGINT,
552    SIGIO,
553    SIGKILL,
554    SIGPIPE,
555    SIGSEGV,
556    sigset_t as kernel_sigset_t,
557    SIGSTOP,
558    SIGSYS,
559    SIGTERM,
560    SIGTRAP,
561    timespec,
562    UFFD_EVENT_FORK,
563    UFFD_EVENT_PAGEFAULT,
564    UFFD_EVENT_REMAP,
565    UFFD_EVENT_REMOVE,
566    UFFD_EVENT_UNMAP,
567    UFFD_FEATURE_EVENT_FORK,
568    UFFD_FEATURE_EVENT_REMAP,
569    UFFD_FEATURE_EVENT_REMOVE,
570    UFFD_FEATURE_EVENT_UNMAP,
571    UFFD_FEATURE_EXACT_ADDRESS,
572    UFFD_FEATURE_MINOR_HUGETLBFS,
573    UFFD_FEATURE_MINOR_SHMEM,
574    UFFD_FEATURE_MISSING_HUGETLBFS,
575    UFFD_FEATURE_MISSING_SHMEM,
576    UFFD_FEATURE_MOVE,
577    UFFD_FEATURE_PAGEFAULT_FLAG_WP,
578    UFFD_FEATURE_POISON,
579    UFFD_FEATURE_SIGBUS,
580    UFFD_FEATURE_THREAD_ID,
581    UFFD_FEATURE_WP_ASYNC,
582    UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
583    UFFD_FEATURE_WP_UNPOPULATED,
584    uffd_msg,
585    UFFD_PAGEFAULT_FLAG_MINOR,
586    UFFD_PAGEFAULT_FLAG_WP,
587    UFFD_PAGEFAULT_FLAG_WRITE,
588    UFFD_USER_MODE_ONLY,
589    uffdio_api,
590    uffdio_continue,
591    uffdio_copy,
592    uffdio_move,
593    uffdio_poison,
594    uffdio_range,
595    uffdio_register,
596    uffdio_writeprotect,
597    uffdio_zeropage,
598    WEXITED,
599    WNOHANG,
600};
601
602// For some reason bindgen just refuses to emit these.
603pub const UFFD_API: u64 = 0xaa;
604pub const UFFDIO_REGISTER_MODE_MISSING: u64 = 1 << 0;
605pub const UFFDIO_REGISTER_MODE_WP: u64 = 1 << 1;
606pub const UFFDIO_REGISTER_MODE_MINOR: u64 = 1 << 2;
607pub const UFFDIO_COPY_MODE_DONTWAKE: u64 = 1 << 0;
608pub const UFFDIO_COPY_MODE_WP: u64 = 1 << 1;
609pub const UFFDIO_ZEROPAGE_MODE_DONTWAKE: u64 = 1 << 0;
610pub const UFFDIO_WRITEPROTECT_MODE_WP: u64 = 1 << 0;
611pub const UFFDIO_WRITEPROTECT_MODE_DONTWAKE: u64 = 1 << 1;
612pub const UFFDIO_CONTINUE_MODE_DONTWAKE: u64 = 1 << 0;
613pub const UFFDIO_CONTINUE_MODE_WP: u64 = 1 << 1;
614
615macro_rules! ioc {
616    ($dir:expr, $type:expr, $nr:expr, $size:expr) => {
617        ($dir << $crate::arch_bindings::_IOC_DIRSHIFT)
618            | ($type << $crate::arch_bindings::_IOC_TYPESHIFT)
619            | ($nr << $crate::arch_bindings::_IOC_NRSHIFT)
620            | ($size << $crate::arch_bindings::_IOC_SIZESHIFT)
621    };
622}
623
624macro_rules! ior {
625    ($type:expr, $nr:expr, $size:ty) => {
626        ioc!(
627            $crate::arch_bindings::_IOC_READ,
628            $type,
629            $nr,
630            core::mem::size_of::<$size>() as $crate::c_uint
631        )
632    };
633}
634
635macro_rules! iowr {
636    ($type:expr, $nr:expr, $size:ty) => {
637        ioc!(
638            $crate::arch_bindings::_IOC_READ | $crate::arch_bindings::_IOC_WRITE,
639            $type,
640            $nr,
641            core::mem::size_of::<$size>() as $crate::c_uint
642        )
643    };
644}
645
646use crate::arch_bindings::UFFDIO;
647
648const UFFDIO_API: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_API, uffdio_api);
649const UFFDIO_REGISTER: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_REGISTER, uffdio_register);
650const UFFDIO_UNREGISTER: c_uint = ior!(UFFDIO, crate::arch_bindings::_UFFDIO_UNREGISTER, uffdio_range);
651const UFFDIO_WAKE: c_uint = ior!(UFFDIO, crate::arch_bindings::_UFFDIO_WAKE, uffdio_range);
652const UFFDIO_COPY: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_COPY, uffdio_copy);
653const UFFDIO_ZEROPAGE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_ZEROPAGE, uffdio_zeropage);
654const UFFDIO_MOVE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_MOVE, uffdio_move);
655const UFFDIO_WRITEPROTECT: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_WRITEPROTECT, uffdio_writeprotect);
656const UFFDIO_CONTINUE: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_CONTINUE, uffdio_continue);
657const UFFDIO_POISON: c_uint = iowr!(UFFDIO, crate::arch_bindings::_UFFDIO_POISON, uffdio_poison);
658
659macro_rules! ioctl_wrapper {
660    ($(
661        ($name:ident, $command:ident, $struct:ident),
662    )*) => {
663        $(
664            pub fn $name(fd: FdRef, arg: &mut $struct) -> Result<(), Error> {
665                sys_ioctl(fd, $command, arg as *mut _ as c_ulong)?;
666                Ok(())
667            }
668        )*
669    }
670}
671
672ioctl_wrapper! {
673    (sys_uffdio_api, UFFDIO_API, uffdio_api),
674    (sys_uffdio_register, UFFDIO_REGISTER, uffdio_register),
675    (sys_uffdio_unregister, UFFDIO_UNREGISTER, uffdio_range),
676    (sys_uffdio_wake, UFFDIO_WAKE, uffdio_range),
677    (sys_uffdio_copy, UFFDIO_COPY, uffdio_copy),
678    (sys_uffdio_zeropage, UFFDIO_ZEROPAGE, uffdio_zeropage),
679    (sys_uffdio_move, UFFDIO_MOVE, uffdio_move),
680    (sys_uffdio_writeprotect, UFFDIO_WRITEPROTECT, uffdio_writeprotect),
681    (sys_uffdio_continue, UFFDIO_CONTINUE, uffdio_continue),
682    (sys_uffdio_poison, UFFDIO_POISON, uffdio_poison),
683}
684
685macro_rules! unsafe_impl_zeroed_default {
686    ($(
687        $name:ident,
688    )*) => {
689        $(
690            impl Default for $name {
691                #[inline]
692                fn default() -> Self {
693                    unsafe { core::mem::zeroed() }
694                }
695            }
696        )*
697    }
698}
699
700unsafe_impl_zeroed_default! {
701    uffdio_api,
702    uffdio_register,
703    uffdio_range,
704    uffdio_copy,
705    uffdio_zeropage,
706    uffdio_move,
707    uffdio_writeprotect,
708    uffdio_continue,
709    uffdio_poison,
710    uffd_msg,
711    io_uring_params,
712    io_uring_sqe,
713}
714
715impl siginfo_t {
716    pub unsafe fn si_signo(&self) -> c_int {
717        self.__bindgen_anon_1.__bindgen_anon_1.si_signo
718    }
719
720    pub unsafe fn si_code(&self) -> c_int {
721        self.__bindgen_anon_1.__bindgen_anon_1.si_code
722    }
723
724    pub unsafe fn si_pid(&self) -> pid_t {
725        self.__bindgen_anon_1.__bindgen_anon_1._sifields._sigchld._pid
726    }
727
728    pub unsafe fn si_status(&self) -> c_int {
729        self.__bindgen_anon_1.__bindgen_anon_1._sifields._sigchld._status
730    }
731}
732
733#[allow(non_snake_case)]
734pub const fn WIFSIGNALED(status: c_int) -> bool {
735    ((status & 0x7f) + 1) as i8 >= 2
736}
737
738#[allow(non_snake_case)]
739pub const fn WTERMSIG(status: c_int) -> c_int {
740    status & 0x7f
741}
742
743#[allow(non_snake_case)]
744pub const fn WIFEXITED(status: c_int) -> bool {
745    (status & 0x7f) == 0
746}
747
748#[allow(non_snake_case)]
749pub const fn WEXITSTATUS(status: c_int) -> c_int {
750    (status >> 8) & 0xff
751}
752
753#[allow(non_camel_case_types)]
754pub type socklen_t = u32;
755
756// Source: linux/arch/x86/include/uapi/asm/signal.h
757#[derive(Debug)]
758#[repr(C)]
759pub struct stack_t {
760    pub ss_sp: *mut c_void,
761    pub ss_flags: c_int,
762    pub ss_size: usize,
763}
764
765// Source: linux/include/uapi/asm-generic/ucontext.h
766#[derive(Debug)]
767#[repr(C)]
768pub struct ucontext {
769    pub uc_flags: c_ulong,
770    pub uc_link: *mut ucontext,
771    pub uc_stack: stack_t,
772    pub uc_mcontext: sigcontext,
773    pub uc_sigmask: kernel_sigset_t,
774}
775
776// Source: linux/arch/x86/include/uapi/asm/sigcontext.h
777#[derive(Debug)]
778#[repr(C)]
779pub struct sigcontext {
780    pub r8: u64,
781    pub r9: u64,
782    pub r10: u64,
783    pub r11: u64,
784    pub r12: u64,
785    pub r13: u64,
786    pub r14: u64,
787    pub r15: u64,
788    pub rdi: u64,
789    pub rsi: u64,
790    pub rbp: u64,
791    pub rbx: u64,
792    pub rdx: u64,
793    pub rax: u64,
794    pub rcx: u64,
795    pub rsp: u64,
796    pub rip: u64,
797    pub eflags: u64,
798    pub cs: u16,
799    pub gs: u16,
800    pub fs: u16,
801    pub ss: u16,
802    pub err: u64,
803    pub trapno: u64,
804    pub oldmask: u64,
805    pub cr2: u64,
806    pub fpstate: *mut fpstate,
807    pub reserved: [u64; 8],
808}
809
810#[repr(C)]
811pub struct fpstate {
812    pub cwd: u16,
813    pub swd: u16,
814    pub twd: u16,
815    pub fop: u16,
816    pub rip: u64,
817    pub rdp: u64,
818    pub mxcsr: u32,
819    pub mxcsr_mask: u32,
820    pub st_space: [u32; 32],  /*  8x  FP registers, 16 bytes each */
821    pub xmm_space: [u32; 64], /* 16x XMM registers, 16 bytes each */
822    pub reserved_1: [u32; 12],
823    pub sw_reserved: fpx_sw_bytes,
824}
825
826#[repr(C)]
827pub struct fpx_sw_bytes {
828    pub magic1: u32,
829    pub extended_size: u32,
830    pub xfeatures: u64,
831    pub xstate_size: u32,
832    pub padding: [u32; 7],
833}
834
835#[repr(C)]
836pub struct msghdr {
837    pub msg_name: *mut c_void,
838    pub msg_namelen: socklen_t,
839    pub msg_iov: *mut iovec,
840    pub msg_iovlen: c_size_t,
841    pub msg_control: *mut c_void,
842    pub msg_controllen: c_size_t,
843    pub msg_flags: c_int,
844}
845
846#[repr(C)]
847pub struct cmsghdr {
848    pub cmsg_len: c_size_t,
849    pub cmsg_level: c_int,
850    pub cmsg_type: c_int,
851}
852
853#[repr(C)]
854struct sock_fprog {
855    pub length: c_ushort,
856    pub filter: *const sock_filter,
857}
858
859#[derive(Copy, Clone, PartialEq, Eq, Debug)]
860#[repr(C)]
861pub struct sock_filter {
862    pub code: u16,
863    pub jt: u8,
864    pub jf: u8,
865    pub k: u32,
866}
867
868// BPF instruction classes (3 bits, mask: 0b00111)
869pub const BPF_LD: u16 = 0b000;
870pub const BPF_LDX: u16 = 0b001;
871pub const BPF_ST: u16 = 0b010;
872pub const BPF_STX: u16 = 0b011;
873pub const BPF_ALU: u16 = 0b100;
874pub const BPF_JMP: u16 = 0b101;
875pub const BPF_RET: u16 = 0b110;
876pub const BPF_MISC: u16 = 0b111;
877
878// BPF LD/LDX/ST/STX width (2 bits, mask: 0b11000)
879pub const BPF_W: u16 = 0b00000; // 32-bit
880pub const BPF_H: u16 = 0b01000; // 16-bit
881pub const BPF_B: u16 = 0b10000; // 8-bit
882
883// BPF LD/LDX/ST/STX addressing mode (3 bits, mask: 0b11100000)
884pub const BPF_IMM: u16 = 0b00000000;
885pub const BPF_ABS: u16 = 0b00100000;
886pub const BPF_IND: u16 = 0b01000000;
887pub const BPF_MEM: u16 = 0b01100000;
888pub const BPF_LEN: u16 = 0b10000000;
889pub const BPF_MSH: u16 = 0b10100000;
890
891// BPF ALU operations (4 bits, mask: 0b11110000)
892pub const BPF_ADD: u16 = 0b00000000;
893pub const BPF_SUB: u16 = 0b00010000;
894pub const BPF_MUL: u16 = 0b00100000;
895pub const BPF_DIV: u16 = 0b00110000;
896pub const BPF_OR: u16 = 0b01000000;
897pub const BPF_AND: u16 = 0b01010000;
898pub const BPF_LSH: u16 = 0b01100000;
899pub const BPF_RSH: u16 = 0b01110000;
900pub const BPF_NEG: u16 = 0b10000000;
901pub const BPF_MOD: u16 = 0b10010000;
902pub const BPF_XOR: u16 = 0b10100000;
903
904// BPF JMP operations (4 bits, mask: 0b11110000)
905pub const BPF_JA: u16 = 0b00000000;
906pub const BPF_JEQ: u16 = 0b00010000;
907pub const BPF_JGT: u16 = 0b00100000;
908pub const BPF_JGE: u16 = 0b00110000;
909pub const BPF_JSET: u16 = 0b01000000;
910
911// BPF ALU/JMP source (1 bit, mask: 0b1000)
912pub const BPF_K: u16 = 0b0000;
913pub const BPF_X: u16 = 0b1000;
914
915pub const SECBIT_NOROOT: u32 = 1;
916pub const SECBIT_NOROOT_LOCKED: u32 = 2;
917pub const SECBIT_NO_SETUID_FIXUP: u32 = 4;
918pub const SECBIT_NO_SETUID_FIXUP_LOCKED: u32 = 8;
919pub const SECBIT_KEEP_CAPS: u32 = 16;
920pub const SECBIT_KEEP_CAPS_LOCKED: u32 = 32;
921pub const SECBIT_NO_CAP_AMBIENT_RAISE: u32 = 64;
922pub const SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED: u32 = 128;
923
924#[macro_export]
925macro_rules! bpf {
926    (@const_one $tok:tt) => {
927        1
928    };
929
930    (@get_label_or_zero ([$label:expr]: $($tok:tt)+)) => {
931        $label
932    };
933
934    (@get_label_or_zero ($($tok:tt)+)) => {
935        0
936    };
937
938    (@count_instructions
939        $(
940            ($($tok:tt)+)
941        ),+
942    ) => {{
943        let mut count = 0;
944        $(
945            count += $crate::bpf!(@const_one ($($tok)+));
946        )+
947
948        count
949    }};
950
951    (@max_label_plus_one
952        $(
953            ($($tok:tt)+)
954        ),+
955    ) => {{
956        let mut max = -1;
957        $(
958            let label = $crate::bpf!(@get_label_or_zero ($($tok)+));
959            if label > max {
960                max = label;
961            }
962        )+
963
964        if max < 0 {
965            0
966        } else {
967            (max + 1) as usize
968        }
969    }};
970
971    (@fill_label $labels:expr, $nth_instruction:expr, [$label:expr]: $($tok:tt)+) => {
972        $labels[$label] = $nth_instruction;
973    };
974
975    (@fill_label $labels:expr, $nth_instruction:expr, $($tok:tt)+) => {};
976
977    (@fill_labels
978        $labels:expr,
979        $(
980            ($($tok:tt)+)
981        ),+
982    ) => {{
983        let mut nth_instruction = 0;
984        $(
985            $crate::bpf!(@fill_label $labels, nth_instruction, $($tok)+);
986            #[allow(unused_assignments)]
987            {
988                nth_instruction += 1;
989            }
990        )+
991    }};
992
993    (@target $labels:expr, $nth_instruction:expr, $target:expr) => {{
994        let target = ($labels[$target] as i32 - $nth_instruction as i32 - 1);
995        if target < 0 || target > 255 {
996            panic!("invalid jump");
997        }
998
999        target as u8
1000    }};
1001
1002    (@into_u32 $value:expr) => {{
1003        let value = $value;
1004        #[allow(clippy::cast_lossless)]
1005        if value as i128 > u32::MAX as i128 || (value as i128) < i32::MIN as i128 {
1006            panic!("out of range value");
1007        }
1008        value as u32
1009    }};
1010
1011    (@op $labels:expr, $nth_instruction:expr, [$label:expr]: $($tok:tt)+) => { $crate::bpf!(@op $labels, $nth_instruction, $($tok)+) };
1012
1013    (@op $labels:expr, $nth_instruction:expr, a = *abs[$addr:expr]) => { $crate::sock_filter { code: $crate::BPF_LD | $crate::BPF_W | $crate::BPF_ABS, jt: 0, jf: 0, k: $addr } };
1014    (@op $labels:expr, $nth_instruction:expr, a &= $value:expr) => { $crate::sock_filter { code: $crate::BPF_ALU | $crate::BPF_AND | $crate::BPF_K, jt: 0, jf: 0, k: $value } };
1015    (@op $labels:expr, $nth_instruction:expr, if a == $value:expr => jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JEQ | $crate::BPF_K, jt: $crate::bpf!(@target $labels, $nth_instruction, $target), jf: 0, k: $crate::bpf!(@into_u32 $value) } };
1016    (@op $labels:expr, $nth_instruction:expr, if a != $value:expr => jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JEQ | $crate::BPF_K, jt: 0, jf: $crate::bpf!(@target $labels, $nth_instruction, $target), k: $crate::bpf!(@into_u32 $value) } };
1017    (@op $labels:expr, $nth_instruction:expr, jump @$target:expr) => { $crate::sock_filter { code: $crate::BPF_JMP | $crate::BPF_JA, jt: 0, jf: 0, k: $crate::bpf!(@target $labels, $nth_instruction, $target) as u32 } };
1018    (@op $labels:expr, $nth_instruction:expr, return $value:expr) => { $crate::sock_filter { code: $crate::BPF_RET | $crate::BPF_K, jt: 0, jf: 0, k: $value } };
1019    (@op $labels:expr, $nth_instruction:expr, seccomp_allow) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_ALLOW) };
1020    (@op $labels:expr, $nth_instruction:expr, seccomp_kill_thread) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_KILL_THREAD) };
1021    (@op $labels:expr, $nth_instruction:expr, seccomp_return_error($errno:expr)) => { $crate::bpf!(@op $labels, $nth_instruction, return $crate::SECCOMP_RET_ERRNO | { let errno: u16 = $errno; errno as u32 }) };
1022    (@op $labels:expr, $nth_instruction:expr, seccomp_return_eperm) => { $crate::bpf!(@op $labels, $nth_instruction, seccomp_return_error($crate::EPERM as u16)) };
1023    (@op $labels:expr, $nth_instruction:expr, a = syscall_nr) => { $crate::bpf!(@op $labels, $nth_instruction, a = *abs[0]) };
1024    (@op $labels:expr, $nth_instruction:expr, a = syscall_arg[$nth_arg:expr]) => { $crate::bpf!(@op $labels, $nth_instruction, a = *abs[16 + $nth_arg * 8]) };
1025
1026    (
1027        $(
1028            ($($tok:tt)+),
1029        )+
1030    ) => {{
1031        let mut filter = [
1032            $crate::sock_filter { code: 0, jt: 0, jf: 0, k: 0 };
1033            { $crate::bpf!(@count_instructions $(($($tok)+)),+) }
1034        ];
1035
1036        let mut labels = [
1037            0;
1038            { $crate::bpf!(@max_label_plus_one $(($($tok)+)),+) }
1039        ];
1040
1041        $crate::bpf!(@fill_labels labels, $(($($tok)+)),+);
1042
1043        {
1044            let mut nth_instruction = 0;
1045
1046            $(
1047                #[allow(clippy::indexing_slicing)]
1048                {
1049                    filter[nth_instruction] = $crate::bpf!(@op labels, nth_instruction, $($tok)+);
1050                }
1051                nth_instruction += 1;
1052            )+
1053
1054            let _ = nth_instruction;
1055        }
1056
1057        filter
1058    }};
1059}
1060
1061#[test]
1062fn test_bpf_jump() {
1063    assert_eq!(
1064        bpf! {
1065            (if a == 1234 => jump @0),
1066            (return 10),
1067            ([0]: return 20),
1068        },
1069        [
1070            sock_filter {
1071                code: BPF_JMP | BPF_JEQ | BPF_K,
1072                jt: 1,
1073                jf: 0,
1074                k: 1234
1075            },
1076            sock_filter {
1077                code: BPF_RET,
1078                jt: 0,
1079                jf: 0,
1080                k: 10
1081            },
1082            sock_filter {
1083                code: BPF_RET,
1084                jt: 0,
1085                jf: 0,
1086                k: 20
1087            },
1088        ]
1089    );
1090
1091    assert_eq!(
1092        bpf! {
1093            (if a == 20 => jump @2),
1094            (if a == 10 => jump @2),
1095            ([0]: return 0),
1096            ([1]: return 1),
1097            ([2]: return 2),
1098        },
1099        [
1100            sock_filter {
1101                code: BPF_JMP | BPF_JEQ | BPF_K,
1102                jt: 3,
1103                jf: 0,
1104                k: 20
1105            },
1106            sock_filter {
1107                code: BPF_JMP | BPF_JEQ | BPF_K,
1108                jt: 2,
1109                jf: 0,
1110                k: 10
1111            },
1112            sock_filter {
1113                code: BPF_RET,
1114                jt: 0,
1115                jf: 0,
1116                k: 0
1117            },
1118            sock_filter {
1119                code: BPF_RET,
1120                jt: 0,
1121                jf: 0,
1122                k: 1
1123            },
1124            sock_filter {
1125                code: BPF_RET,
1126                jt: 0,
1127                jf: 0,
1128                k: 2
1129            },
1130        ]
1131    );
1132}
1133
1134pub const STDIN_FILENO: c_int = 0;
1135pub const STDOUT_FILENO: c_int = 1;
1136pub const STDERR_FILENO: c_int = 2;
1137
1138pub const AF_UNIX: u32 = 1;
1139pub const SOCK_STREAM: u32 = 1;
1140pub const SOCK_SEQPACKET: u32 = 5;
1141pub const SOCK_CLOEXEC: u32 = 0x80000;
1142pub const SOL_SOCKET: c_int = 1;
1143pub const SCM_RIGHTS: c_int = 1;
1144pub const MSG_NOSIGNAL: u32 = 0x4000;
1145
1146pub const SEEK_SET: u32 = 0;
1147pub const SEEK_CUR: u32 = 1;
1148pub const SEEK_END: u32 = 2;
1149
1150pub const O_ASYNC: u32 = 0x2000;
1151
1152#[allow(non_snake_case)]
1153const fn CMSG_ALIGN(len: usize) -> usize {
1154    (len + core::mem::size_of::<usize>() - 1) & !(core::mem::size_of::<usize>() - 1)
1155}
1156
1157#[allow(non_snake_case)]
1158pub unsafe fn CMSG_FIRSTHDR(mhdr: *const msghdr) -> *mut cmsghdr {
1159    if (*mhdr).msg_controllen >= core::mem::size_of::<cmsghdr>() {
1160        (*mhdr).msg_control.cast::<cmsghdr>()
1161    } else {
1162        core::ptr::null_mut()
1163    }
1164}
1165
1166#[allow(non_snake_case)]
1167pub unsafe fn CMSG_DATA(cmsg: *mut cmsghdr) -> *mut c_uchar {
1168    cmsg.add(1).cast::<c_uchar>()
1169}
1170
1171#[allow(non_snake_case)]
1172pub const fn CMSG_SPACE(length: usize) -> usize {
1173    CMSG_ALIGN(length) + CMSG_ALIGN(core::mem::size_of::<cmsghdr>())
1174}
1175
1176#[allow(non_snake_case)]
1177pub const fn CMSG_LEN(length: usize) -> usize {
1178    CMSG_ALIGN(core::mem::size_of::<cmsghdr>()) + length
1179}
1180
1181// The following was copied from the `cstr_core` crate.
1182//
1183// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
1184#[inline]
1185#[doc(hidden)]
1186#[allow(clippy::indexing_slicing)]
1187pub const fn cstr_is_valid(bytes: &[u8]) -> bool {
1188    if bytes.is_empty() || bytes[bytes.len() - 1] != 0 {
1189        return false;
1190    }
1191
1192    let mut index = 0;
1193    while index < bytes.len() - 1 {
1194        if bytes[index] == 0 {
1195            return false;
1196        }
1197        index += 1;
1198    }
1199    true
1200}
1201
1202#[macro_export]
1203macro_rules! cstr {
1204    ($e:expr) => {{
1205        const STR: &[u8] = concat!($e, "\0").as_bytes();
1206        const STR_VALID: bool = $crate::cstr_is_valid(STR);
1207        let _ = [(); 0 - (!(STR_VALID) as usize)];
1208        #[allow(unused_unsafe)]
1209        unsafe {
1210            core::ffi::CStr::from_bytes_with_nul_unchecked(STR)
1211        }
1212    }}
1213}
1214
1215#[derive(Clone)]
1216pub struct Error {
1217    #[cfg(not(feature = "std"))]
1218    message: &'static str,
1219    #[cfg(feature = "std")]
1220    message: Cow<'static, str>,
1221    errno: c_int,
1222}
1223
1224impl core::fmt::Debug for Error {
1225    #[cold]
1226    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
1227        core::fmt::Display::fmt(self, fmt)
1228    }
1229}
1230
1231impl core::fmt::Display for Error {
1232    #[cold]
1233    fn fmt(&self, fmt: &mut core::fmt::Formatter) -> core::fmt::Result {
1234        let mut is_err = false;
1235        self.fmt_to_string(|chunk| {
1236            if fmt.write_str(chunk).is_err() {
1237                is_err = true;
1238            }
1239        });
1240
1241        if is_err {
1242            Err(core::fmt::Error)
1243        } else {
1244            Ok(())
1245        }
1246    }
1247}
1248
1249impl core::error::Error for Error {}
1250
1251#[cfg(feature = "std")]
1252impl From<std::string::String> for Error {
1253    fn from(message: std::string::String) -> Self {
1254        Error {
1255            message: message.into(),
1256            errno: 0,
1257        }
1258    }
1259}
1260
1261impl From<&'static str> for Error {
1262    fn from(message: &'static str) -> Self {
1263        Error::from_str(message)
1264    }
1265}
1266
1267fn write_number(value: u32, write_str: &mut dyn FnMut(&str)) {
1268    let n = if value >= 10 {
1269        write_number(value / 10, write_str);
1270        value % 10
1271    } else {
1272        value
1273    };
1274
1275    let s = [n as u8 + b'0'];
1276    let s = unsafe { core::str::from_utf8_unchecked(&s) };
1277    write_str(s);
1278}
1279
1280impl Error {
1281    pub fn fmt_to_string(&self, mut write_str: impl FnMut(&str)) {
1282        self.fmt_to_string_impl(&mut write_str);
1283    }
1284
1285    // Avoid pulling in core::fmt machinery to keep the code size low.
1286    #[cold]
1287    fn fmt_to_string_impl(&self, write_str: &mut dyn FnMut(&str)) {
1288        #[allow(clippy::needless_borrow)]
1289        write_str(&self.message);
1290
1291        if self.errno == 0 {
1292            return;
1293        }
1294
1295        write_str(" (errno = ");
1296        write_number(self.errno as u32, write_str);
1297
1298        let errno = match self.errno as u32 {
1299            EPERM => Some("EPERM"),
1300            ENOENT => Some("ENOENT"),
1301            ESRCH => Some("ESRCH"),
1302            EINTR => Some("EINTR"),
1303            EIO => Some("EIO"),
1304            ENXIO => Some("ENXIO"),
1305            E2BIG => Some("E2BIG"),
1306            ENOEXEC => Some("ENOEXEC"),
1307            EBADF => Some("EBADF"),
1308            ECHILD => Some("ECHILD"),
1309            EAGAIN => Some("EAGAIN"),
1310            ENOMEM => Some("ENOMEM"),
1311            EACCES => Some("EACCES"),
1312            EFAULT => Some("EFAULT"),
1313            ENOTBLK => Some("ENOTBLK"),
1314            EBUSY => Some("EBUSY"),
1315            EEXIST => Some("EEXIST"),
1316            EXDEV => Some("EXDEV"),
1317            ENODEV => Some("ENODEV"),
1318            ENOTDIR => Some("ENOTDIR"),
1319            EISDIR => Some("EISDIR"),
1320            EINVAL => Some("EINVAL"),
1321            ENFILE => Some("ENFILE"),
1322            EMFILE => Some("EMFILE"),
1323            ENOTTY => Some("ENOTTY"),
1324            ETXTBSY => Some("ETXTBSY"),
1325            EFBIG => Some("EFBIG"),
1326            ENOSPC => Some("ENOSPC"),
1327            ESPIPE => Some("ESPIPE"),
1328            EROFS => Some("EROFS"),
1329            EMLINK => Some("EMLINK"),
1330            EPIPE => Some("EPIPE"),
1331            EDOM => Some("EDOM"),
1332            ERANGE => Some("ERANGE"),
1333            EOPNOTSUPP => Some("EOPNOTSUPP"),
1334            ETOOMANYREFS => Some("ETOOMANYREFS"),
1335            ERESTARTSYS => Some("ERESTARTSYS"),
1336            _ => None,
1337        };
1338
1339        if let Some(errno) = errno {
1340            write_str(" (");
1341            write_str(errno);
1342            write_str(")");
1343        }
1344
1345        write_str(")");
1346    }
1347
1348    #[cfg(feature = "std")]
1349    #[cold]
1350    pub fn from_os_error(message: &'static str, error: std::io::Error) -> Self {
1351        Self {
1352            message: message.into(),
1353            errno: error.raw_os_error().unwrap_or(0),
1354        }
1355    }
1356
1357    #[cfg(feature = "std")]
1358    #[cold]
1359    pub fn from_last_os_error(message: &'static str) -> Self {
1360        Self {
1361            message: message.into(),
1362            errno: std::io::Error::last_os_error().raw_os_error().unwrap_or(0),
1363        }
1364    }
1365
1366    #[cold]
1367    pub const fn from_errno(message: &'static str, errno: i32) -> Self {
1368        Self {
1369            #[cfg(not(feature = "std"))]
1370            message,
1371            #[cfg(feature = "std")]
1372            message: Cow::Borrowed(message),
1373
1374            errno,
1375        }
1376    }
1377
1378    #[cold]
1379    pub const fn from_str(message: &'static str) -> Self {
1380        Self {
1381            #[cfg(not(feature = "std"))]
1382            message,
1383            #[cfg(feature = "std")]
1384            message: Cow::Borrowed(message),
1385
1386            errno: 0,
1387        }
1388    }
1389
1390    #[inline]
1391    pub fn from_syscall(message: &'static str, result: i64) -> Result<(), Self> {
1392        if result >= -4095 && result < 0 {
1393            Err(Self::from_syscall_unchecked(message, result))
1394        } else {
1395            Ok(())
1396        }
1397    }
1398
1399    #[cold]
1400    #[inline]
1401    const fn from_syscall_unchecked(message: &'static str, result: i64) -> Self {
1402        Self {
1403            #[cfg(not(feature = "std"))]
1404            message,
1405            #[cfg(feature = "std")]
1406            message: Cow::Borrowed(message),
1407
1408            errno: -result as i32,
1409        }
1410    }
1411
1412    #[inline]
1413    pub fn errno(&self) -> u32 {
1414        self.errno as u32
1415    }
1416}
1417
1418#[cfg(target_arch = "x86_64")]
1419#[inline(never)]
1420#[cold]
1421pub fn abort() -> ! {
1422    // In practice `core::hint::unreachable_unchecked` emits this,
1423    // but technically calling it is undefined behavior which could
1424    // affect unrelated code, so let's just call it through `asm!`.
1425
1426    unsafe {
1427        core::arch::asm!("ud2", options(noreturn, nostack));
1428    }
1429}
1430
1431/// An owned file descriptor. Will be automatically closed on drop.
1432#[repr(transparent)]
1433#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
1434pub struct Fd(c_int);
1435
1436/// An unowned file descriptor.
1437#[repr(transparent)]
1438#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
1439pub struct FdRef<'a>(c_int, PhantomData<&'a Fd>);
1440
1441impl Fd {
1442    pub fn raw(&self) -> c_int {
1443        self.0
1444    }
1445
1446    #[inline]
1447    pub const fn from_raw_unchecked(fd: c_int) -> Self {
1448        Self(fd)
1449    }
1450
1451    pub fn borrow(&self) -> FdRef {
1452        FdRef(self.0, PhantomData)
1453    }
1454
1455    pub fn close(mut self) -> Result<(), Error> {
1456        self.close_inplace()?;
1457        Ok(())
1458    }
1459
1460    pub fn leak(mut self) -> c_int {
1461        core::mem::replace(&mut self.0, -1)
1462    }
1463
1464    fn close_inplace(&mut self) -> Result<(), Error> {
1465        if self.raw() < 0 {
1466            return Ok(());
1467        }
1468
1469        let fd = core::mem::replace(&mut self.0, -1);
1470        let result = unsafe { syscall_readonly!(SYS_close, fd) };
1471        Error::from_syscall("close", result)
1472    }
1473}
1474
1475impl Drop for Fd {
1476    fn drop(&mut self) {
1477        let _ = self.close_inplace();
1478    }
1479}
1480
1481impl<'a> FdRef<'a> {
1482    pub fn raw(&self) -> c_int {
1483        self.0
1484    }
1485
1486    #[inline]
1487    pub const fn from_raw_unchecked(fd: c_int) -> Self {
1488        Self(fd, PhantomData)
1489    }
1490}
1491
1492impl<'a> From<&'a Fd> for FdRef<'a> {
1493    fn from(fd: &'a Fd) -> Self {
1494        FdRef(fd.0, PhantomData)
1495    }
1496}
1497
1498impl<'a> From<&'a mut Fd> for FdRef<'a> {
1499    fn from(fd: &'a mut Fd) -> Self {
1500        FdRef(fd.0, PhantomData)
1501    }
1502}
1503
1504impl core::fmt::Write for Fd {
1505    fn write_str(&mut self, string: &str) -> core::fmt::Result {
1506        FdRef::from(self).write_str(string)
1507    }
1508}
1509
1510impl<'a> core::fmt::Write for FdRef<'a> {
1511    fn write_str(&mut self, string: &str) -> core::fmt::Result {
1512        let mut bytes = string.as_bytes();
1513        while !bytes.is_empty() {
1514            let count = sys_write(*self, bytes).map_err(|_| core::fmt::Error)?;
1515            if count == 0 {
1516                return Err(core::fmt::Error);
1517            }
1518            bytes = bytes.get(count..).ok_or(core::fmt::Error)?;
1519        }
1520
1521        Ok(())
1522    }
1523}
1524
1525pub fn sys_uname() -> Result<new_utsname, Error> {
1526    let mut out: new_utsname = unsafe { core::mem::zeroed() };
1527    let result = unsafe { syscall!(SYS_uname, core::ptr::addr_of_mut!(out)) };
1528    Error::from_syscall("uname", result)?;
1529    Ok(out)
1530}
1531
1532pub fn sys_io_uring_setup(entries: u32, params: &mut io_uring_params) -> Result<Fd, Error> {
1533    let fd = unsafe { syscall!(SYS_io_uring_setup, entries, params as *mut io_uring_params) };
1534    Error::from_syscall("io_uring_setup", fd)?;
1535    Ok(Fd::from_raw_unchecked(fd as c_int))
1536}
1537
1538pub fn sys_io_uring_register(fd: FdRef, opcode: u32, arg: *const c_void, arg_count: u32) -> Result<(), Error> {
1539    let result = unsafe { syscall!(SYS_io_uring_register, fd, opcode, arg, arg_count) };
1540    Error::from_syscall("io_uring_register", result)?;
1541    Ok(())
1542}
1543
1544pub unsafe fn sys_io_uring_enter(
1545    fd: FdRef,
1546    to_submit: u32,
1547    min_complete: u32,
1548    flags: u32,
1549    arg: *const c_void,
1550    argsz: usize,
1551) -> Result<u32, Error> {
1552    let result = unsafe { syscall!(SYS_io_uring_enter, fd, to_submit, min_complete, flags, arg, argsz) };
1553    Error::from_syscall("io_uring_enter", result)?;
1554    Ok(result as u32)
1555}
1556
1557pub fn sys_ioctl(fd: FdRef, cmd: c_uint, arg: c_ulong) -> Result<c_int, Error> {
1558    let result = unsafe { syscall!(SYS_ioctl, fd, cmd, arg) };
1559    Error::from_syscall("ioctl", result)?;
1560    Ok(result as c_int)
1561}
1562
1563pub fn sys_userfaultfd(flags: c_uint) -> Result<Fd, Error> {
1564    let fd = unsafe { syscall_readonly!(SYS_userfaultfd, flags) };
1565    Error::from_syscall("userfaultfd", fd)?;
1566    Ok(Fd::from_raw_unchecked(fd as c_int))
1567}
1568
1569fn sys_getdents64(fd: FdRef, buffer: &mut [u8]) -> Result<Option<usize>, Error> {
1570    let length = buffer.len();
1571    let bytes_read = unsafe { syscall!(SYS_getdents64, fd.raw(), buffer, length) };
1572    Error::from_syscall("getdents64", bytes_read)?;
1573
1574    if bytes_read == 0 {
1575        Ok(None)
1576    } else {
1577        Ok(Some(bytes_read as usize))
1578    }
1579}
1580
1581pub unsafe fn sys_arch_prctl_set_gs(value: usize) -> Result<(), Error> {
1582    let result = syscall_readonly!(SYS_arch_prctl, ARCH_SET_GS, value);
1583    Error::from_syscall("arch_prctl(ARCH_SET_GS)", result)?;
1584    Ok(())
1585}
1586
1587pub fn sys_sched_yield() -> Result<(), Error> {
1588    // On Linux this always succeeds, although technically it could fail
1589    // due to a seccomp sandbox, so let's return an error anyway.
1590    let result = unsafe { syscall_readonly!(SYS_sched_yield) };
1591    Error::from_syscall("sched_yield", result)?;
1592    Ok(())
1593}
1594
1595pub fn sys_socketpair(domain: u32, kind: u32, protocol: u32) -> Result<(Fd, Fd), Error> {
1596    let mut output: [c_int; 2] = [-1, -1];
1597    let fd = unsafe { syscall_readonly!(SYS_socketpair, domain, kind, protocol, &mut output[..]) };
1598    Error::from_syscall("socketpair", fd)?;
1599    Ok((Fd(output[0] as c_int), Fd(output[1] as c_int)))
1600}
1601
1602pub fn sys_pipe2(flags: c_uint) -> Result<(Fd, Fd), Error> {
1603    let mut pipes: [c_int; 2] = [-1, -1];
1604    let result = unsafe { syscall_readonly!(SYS_pipe2, pipes.as_mut_ptr(), flags) };
1605    Error::from_syscall("pipe2", result)?;
1606    Ok((Fd::from_raw_unchecked(pipes[0]), Fd::from_raw_unchecked(pipes[1])))
1607}
1608
1609pub fn sys_open(path: &CStr, flags: c_uint) -> Result<Fd, Error> {
1610    let fd = unsafe { syscall_readonly!(SYS_open, path.as_ptr(), flags, 0) };
1611    Error::from_syscall("open", fd)?;
1612    Ok(Fd(fd as c_int))
1613}
1614
1615pub fn sys_openat(dir: FdRef, path: &CStr, flags: c_uint) -> Result<Fd, Error> {
1616    let fd = unsafe { syscall_readonly!(SYS_openat, dir, path.as_ptr(), flags, 0) };
1617    Error::from_syscall("openat", fd)?;
1618    Ok(Fd(fd as c_int))
1619}
1620
1621pub fn sys_memfd_create(name: &CStr, flags: c_uint) -> Result<Fd, Error> {
1622    let fd = unsafe { syscall_readonly!(SYS_memfd_create, name.as_ptr(), flags) };
1623    Error::from_syscall("memfd_create", fd)?;
1624    Ok(Fd(fd as c_int))
1625}
1626
1627pub fn sys_fcntl(fd: FdRef, cmd: u32, arg: u32) -> Result<i32, Error> {
1628    let result = unsafe { syscall_readonly!(SYS_fcntl, fd, cmd, arg) };
1629    Error::from_syscall("fcntl", result)?;
1630    Ok(result as i32)
1631}
1632
1633pub fn sys_fcntl_dupfd(fd: FdRef, min: c_int) -> Result<Fd, Error> {
1634    let fd = sys_fcntl(fd, F_DUPFD, min as u32)?;
1635    Ok(Fd::from_raw_unchecked(fd))
1636}
1637
1638pub fn sys_close_range(first_fd: c_int, last_fd: c_int, flags: c_uint) -> Result<(), Error> {
1639    let result = unsafe { syscall_readonly!(SYS_close_range, first_fd, last_fd, flags) };
1640    Error::from_syscall("close_range", result)
1641}
1642
1643pub fn sys_fallocate(fd: FdRef, mode: c_uint, offset: u64, length: u64) -> Result<(), Error> {
1644    let result = unsafe { syscall!(SYS_fallocate, fd, mode, offset, length) };
1645    Error::from_syscall("fallocate", result)
1646}
1647
1648pub fn sys_ftruncate(fd: FdRef, length: c_ulong) -> Result<(), Error> {
1649    let result = unsafe { syscall!(SYS_ftruncate, fd, length) };
1650    Error::from_syscall("ftruncate", result)
1651}
1652
1653pub fn sys_chdir(path: &CStr) -> Result<(), Error> {
1654    let result = unsafe { syscall_readonly!(SYS_chdir, path.as_ptr()) };
1655    Error::from_syscall("chdir", result)
1656}
1657
1658pub fn sys_fchdir(fd: FdRef) -> Result<(), Error> {
1659    let result = unsafe { syscall_readonly!(SYS_fchdir, fd) };
1660    Error::from_syscall("fchdir", result)
1661}
1662
1663pub unsafe fn sys_mmap(
1664    address: *mut c_void,
1665    length: c_size_t,
1666    protection: c_uint,
1667    flags: c_uint,
1668    fd: Option<FdRef>,
1669    offset: c_ulong,
1670) -> Result<*mut c_void, Error> {
1671    let result = syscall!(SYS_mmap, address, length, protection, flags, fd, offset);
1672    Error::from_syscall("mmap", result)?;
1673    Ok(result as *mut c_void)
1674}
1675
1676pub unsafe fn sys_munmap(address: *mut c_void, length: c_size_t) -> Result<(), Error> {
1677    let result = syscall!(SYS_munmap, address, length);
1678    Error::from_syscall("munmap", result)
1679}
1680
1681pub unsafe fn sys_mremap(
1682    address: *mut c_void,
1683    old_length: c_size_t,
1684    new_length: c_size_t,
1685    flags: c_uint,
1686    new_address: *mut c_void,
1687) -> Result<*mut c_void, Error> {
1688    let result = syscall!(SYS_mremap, address, old_length, new_length, flags, new_address);
1689    Error::from_syscall("mremap", result)?;
1690    Ok(result as *mut c_void)
1691}
1692
1693pub unsafe fn sys_mprotect(address: *mut c_void, length: c_size_t, protection: c_uint) -> Result<(), Error> {
1694    let result = syscall!(SYS_mprotect, address, length, protection);
1695    Error::from_syscall("mprotect", result)
1696}
1697
1698pub unsafe fn sys_madvise(address: *mut c_void, length: c_size_t, advice: c_uint) -> Result<(), Error> {
1699    let result = syscall!(SYS_madvise, address, length, advice);
1700    Error::from_syscall("madvise", result)
1701}
1702
1703pub fn sys_getpid() -> Result<pid_t, Error> {
1704    let result = unsafe { syscall_readonly!(SYS_getpid) };
1705    Error::from_syscall("getpid", result)?;
1706    Ok(result as pid_t)
1707}
1708
1709pub fn sys_getuid() -> Result<uid_t, Error> {
1710    let result = unsafe { syscall_readonly!(SYS_getuid) };
1711    Error::from_syscall("getuid", result)?;
1712    Ok(result as u32)
1713}
1714
1715pub fn sys_getgid() -> Result<gid_t, Error> {
1716    let result = unsafe { syscall_readonly!(SYS_getgid) };
1717    Error::from_syscall("getgid", result)?;
1718    Ok(result as u32)
1719}
1720
1721pub fn sys_kill(pid: pid_t, signal: c_uint) -> Result<(), Error> {
1722    let result = unsafe { syscall_readonly!(SYS_kill, pid, signal) };
1723    Error::from_syscall("kill", result)?;
1724    Ok(())
1725}
1726
1727pub unsafe fn sys_read_raw(fd: FdRef, buffer: *mut u8, length: usize) -> Result<c_size_t, Error> {
1728    let result = unsafe { syscall!(SYS_read, fd.raw(), buffer, length) };
1729    Error::from_syscall("read", result)?;
1730    Ok(result as c_size_t)
1731}
1732
1733pub fn sys_read(fd: FdRef, buffer: &mut [u8]) -> Result<c_size_t, Error> {
1734    unsafe { sys_read_raw(fd, buffer.as_mut_ptr(), buffer.len()) }
1735}
1736
1737pub fn sys_write(fd: FdRef, buffer: &[u8]) -> Result<c_size_t, Error> {
1738    let result = unsafe { syscall_readonly!(SYS_write, fd.raw(), buffer.as_ptr(), buffer.len()) };
1739    Error::from_syscall("write", result)?;
1740    Ok(result as c_size_t)
1741}
1742
1743pub fn sys_lseek(fd: FdRef, offset: i64, whence: u32) -> Result<u64, Error> {
1744    let result = unsafe { syscall_readonly!(SYS_lseek, fd.raw(), offset, whence) };
1745    Error::from_syscall("lseek", result)?;
1746    Ok(result as u64)
1747}
1748
1749pub unsafe fn sys_process_vm_readv(pid: pid_t, local_iovec: &[iovec], remote_iovec: &[iovec]) -> Result<usize, Error> {
1750    let result = unsafe {
1751        syscall!(
1752            SYS_process_vm_readv,
1753            pid,
1754            local_iovec,
1755            local_iovec.len(),
1756            remote_iovec,
1757            remote_iovec.len(),
1758            0
1759        )
1760    };
1761    Error::from_syscall("process_vm_readv", result)?;
1762    Ok(result as usize)
1763}
1764
1765pub unsafe fn sys_process_vm_writev(pid: pid_t, local_iovec: &[iovec], remote_iovec: &[iovec]) -> Result<usize, Error> {
1766    let result = unsafe {
1767        syscall!(
1768            SYS_process_vm_writev,
1769            pid,
1770            local_iovec,
1771            local_iovec.len(),
1772            remote_iovec,
1773            remote_iovec.len(),
1774            0
1775        )
1776    };
1777    Error::from_syscall("process_vm_writev", result)?;
1778    Ok(result as usize)
1779}
1780
1781pub unsafe fn sys_writev(fd: FdRef, iv: &[iovec]) -> Result<usize, Error> {
1782    let result = unsafe { syscall!(SYS_writev, fd, iv, iv.len()) };
1783    Error::from_syscall("writev", result)?;
1784    Ok(result as usize)
1785}
1786
1787pub fn sys_sendmsg(fd: FdRef, message: &msghdr, flags: u32) -> Result<usize, Error> {
1788    let result = unsafe { syscall_readonly!(SYS_sendmsg, fd.raw(), message as *const msghdr, flags) };
1789    Error::from_syscall("sendmsg", result)?;
1790    Ok(result as usize)
1791}
1792
1793pub fn sys_recvmsg(fd: FdRef, message: &mut msghdr, flags: u32) -> Result<usize, Error> {
1794    let result = unsafe { syscall!(SYS_recvmsg, fd.raw(), message as *mut msghdr, flags) };
1795    Error::from_syscall("recvmsg", result)?;
1796    Ok(result as usize)
1797}
1798
1799pub fn sys_exit(errcode: c_int) -> Result<(), Error> {
1800    let result = unsafe { syscall_readonly!(SYS_exit, errcode) };
1801    Error::from_syscall("exit", result)?;
1802    Ok(())
1803}
1804
1805pub fn sys_dup3(old_fd: c_int, new_fd: c_int, flags: c_uint) -> Result<(), Error> {
1806    let result = unsafe { syscall_readonly!(SYS_dup3, old_fd, new_fd, flags) };
1807    Error::from_syscall("dup3", result)?;
1808    Ok(())
1809}
1810
1811pub unsafe fn sys_execveat(
1812    dirfd: Option<FdRef>,
1813    path: &CStr,
1814    argv: &[*const c_uchar],
1815    envp: &[*const c_uchar],
1816    flags: c_uint,
1817) -> Result<(), Error> {
1818    let result = unsafe { syscall_readonly!(SYS_execveat, dirfd, path.as_ptr(), argv, envp, flags) };
1819    Error::from_syscall("execveat", result)?;
1820    Ok(())
1821}
1822
1823pub fn sys_ptrace_traceme() -> Result<(), Error> {
1824    let result = unsafe { syscall_readonly!(SYS_ptrace, 0, 0, 0) };
1825    Error::from_syscall("ptrace (PTRACE_TRACEME)", result)?;
1826    Ok(())
1827}
1828
1829pub fn sys_ptrace_interrupt(pid: pid_t) -> Result<(), Error> {
1830    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_INTERRUPT, pid, 0, 0) };
1831    Error::from_syscall("ptrace (PTRACE_INTERRUPT)", result)?;
1832    Ok(())
1833}
1834
1835pub fn sys_ptrace_attach(pid: pid_t) -> Result<(), Error> {
1836    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_ATTACH, pid, 0, 0) };
1837    Error::from_syscall("ptrace (PTRACE_ATTACH)", result)?;
1838    Ok(())
1839}
1840
1841pub fn sys_ptrace_seize(pid: pid_t) -> Result<(), Error> {
1842    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_SEIZE, pid, 0, 0) };
1843    Error::from_syscall("ptrace (PTRACE_SEIZE)", result)?;
1844    Ok(())
1845}
1846
1847pub fn sys_ptrace_continue(pid: pid_t, signal: Option<u32>) -> Result<(), Error> {
1848    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_CONT, pid, 0, signal.unwrap_or(0)) };
1849    Error::from_syscall("ptrace (PTRACE_CONT)", result)?;
1850    Ok(())
1851}
1852
1853pub fn sys_ptrace_detach(pid: pid_t) -> Result<(), Error> {
1854    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_DETACH, pid, 0, 0) };
1855    Error::from_syscall("ptrace (PTRACE_DETACH)", result)?;
1856    Ok(())
1857}
1858
1859pub fn sys_ptrace_get_siginfo(pid: pid_t) -> Result<siginfo_t, Error> {
1860    let mut siginfo: siginfo_t = unsafe { core::mem::zeroed() };
1861    let result = unsafe {
1862        syscall!(
1863            SYS_ptrace,
1864            crate::arch_bindings::PTRACE_GETSIGINFO,
1865            pid,
1866            0,
1867            core::ptr::addr_of_mut!(siginfo)
1868        )
1869    };
1870    Error::from_syscall("ptrace (PTRACE_GETSIGINFO)", result)?;
1871    Ok(siginfo)
1872}
1873
1874#[cfg(target_arch = "x86_64")]
1875#[repr(C)]
1876#[derive(Default, Debug)]
1877pub struct user_regs_struct {
1878    pub r15: c_ulong,
1879    pub r14: c_ulong,
1880    pub r13: c_ulong,
1881    pub r12: c_ulong,
1882    pub rbp: c_ulong,
1883    pub rbx: c_ulong,
1884    pub r11: c_ulong,
1885    pub r10: c_ulong,
1886    pub r9: c_ulong,
1887    pub r8: c_ulong,
1888    pub rax: c_ulong,
1889    pub rcx: c_ulong,
1890    pub rdx: c_ulong,
1891    pub rsi: c_ulong,
1892    pub rdi: c_ulong,
1893    pub orig_rax: c_ulong,
1894    pub rip: c_ulong,
1895    pub cs: c_ulong,
1896    pub flags: c_ulong,
1897    pub sp: c_ulong,
1898    pub ss: c_ulong,
1899    pub fs_base: c_ulong,
1900    pub gs_base: c_ulong,
1901    pub ds: c_ulong,
1902    pub es: c_ulong,
1903    pub fs: c_ulong,
1904    pub gs: c_ulong,
1905}
1906
1907pub fn sys_ptrace_getregs(pid: pid_t) -> Result<user_regs_struct, Error> {
1908    let mut output: MaybeUninit<user_regs_struct> = MaybeUninit::uninit();
1909    let result = unsafe { syscall!(SYS_ptrace, crate::arch_bindings::PTRACE_GETREGS, pid, 0, output.as_mut_ptr()) };
1910    Error::from_syscall("ptrace (PTRACE_GETREGS)", result)?;
1911
1912    unsafe { Ok(output.assume_init()) }
1913}
1914
1915pub fn sys_ptrace_setregs(pid: pid_t, regs: &user_regs_struct) -> Result<(), Error> {
1916    let regs: *const user_regs_struct = regs;
1917    let result = unsafe { syscall_readonly!(SYS_ptrace, crate::arch_bindings::PTRACE_SETREGS, pid, 0, regs) };
1918    Error::from_syscall("ptrace (PTRACE_SETREGS)", result)?;
1919    Ok(())
1920}
1921
1922pub fn sys_prctl_set_no_new_privs() -> Result<(), Error> {
1923    const PR_SET_NO_NEW_PRIVS: usize = 38;
1924    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) };
1925    Error::from_syscall("prctl(PR_SET_NO_NEW_PRIVS)", result)
1926}
1927
1928pub fn sys_prctl_cap_ambient_clear_all() -> Result<(), Error> {
1929    const PR_CAP_AMBIENT: usize = 47;
1930    const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4;
1931    let result = unsafe { syscall_readonly!(SYS_prctl, PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) };
1932    Error::from_syscall("prctl(PR_CAP_AMBIENT)", result)
1933}
1934
1935pub fn sys_prctl_set_securebits(bits: u32) -> Result<(), Error> {
1936    const PR_SET_SECUREBITS: usize = 28;
1937    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_SECUREBITS, bits, 0, 0, 0) };
1938    Error::from_syscall("prctl(PR_SET_SECUREBITS)", result)
1939}
1940
1941pub fn sys_prctl_set_name(name: &[u8; 16]) -> Result<(), Error> {
1942    const PR_SET_NAME: usize = 15;
1943    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_NAME, name.as_ptr(), 0, 0, 0) };
1944    Error::from_syscall("prctl(PR_SET_NAME)", result)
1945}
1946
1947pub fn sys_prctl_set_dumpable(value: bool) -> Result<(), Error> {
1948    const PR_SET_DUMPABLE: usize = 4;
1949    let value = usize::from(value);
1950    let result = unsafe { syscall_readonly!(SYS_prctl, PR_SET_DUMPABLE, value, 0, 0, 0) };
1951    Error::from_syscall("prctl(PR_SET_DUMPABLE)", result)
1952}
1953
1954pub fn sys_prctl_get_dumpable() -> Result<bool, Error> {
1955    const PR_GET_DUMPABLE: usize = 3;
1956    let result = unsafe { syscall_readonly!(SYS_prctl, PR_GET_DUMPABLE, 0, 0, 0, 0) };
1957    Error::from_syscall("prctl(PR_GET_DUMPABLE)", result)?;
1958    if result == 0 {
1959        Ok(false)
1960    } else {
1961        Ok(true)
1962    }
1963}
1964
1965pub fn sys_capset(header: &__user_cap_header_struct, data: &[__user_cap_data_struct; 2]) -> Result<(), Error> {
1966    let result = unsafe {
1967        syscall_readonly!(
1968            SYS_capset,
1969            header as *const __user_cap_header_struct,
1970            data as *const __user_cap_data_struct
1971        )
1972    };
1973    Error::from_syscall("capset", result)
1974}
1975
1976pub fn sys_capset_drop_all() -> Result<(), Error> {
1977    let cap_user_header = __user_cap_header_struct {
1978        version: _LINUX_CAPABILITY_VERSION_3,
1979        pid: 0,
1980    };
1981    let cap_user_data = [__user_cap_data_struct {
1982        effective: 0,
1983        inheritable: 0,
1984        permitted: 0,
1985    }; 2];
1986
1987    sys_capset(&cap_user_header, &cap_user_data)
1988}
1989
1990pub fn sys_seccomp_set_mode_filter(filter: &[sock_filter]) -> Result<(), Error> {
1991    let filter = sock_fprog {
1992        length: if let Ok(length) = c_ushort::try_from(filter.len()) {
1993            length
1994        } else {
1995            return Err(Error::from_errno("seccomp(SECCOMP_SET_MODE_FILTER)", EINVAL as i32));
1996        },
1997        filter: filter.as_ptr(),
1998    };
1999
2000    let result = unsafe { syscall_readonly!(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, core::ptr::addr_of!(filter)) };
2001    Error::from_syscall("seccomp(SECCOMP_SET_MODE_FILTER)", result)
2002}
2003
2004pub fn sys_setrlimit(resource: u32, limit: &rlimit) -> Result<(), Error> {
2005    let result = unsafe { syscall_readonly!(SYS_setrlimit, resource, limit as *const rlimit) };
2006    Error::from_syscall("setrlimit", result)
2007}
2008
2009pub fn sys_sethostname(name: &str) -> Result<(), Error> {
2010    let result = unsafe { syscall_readonly!(SYS_sethostname, name.as_ptr(), name.len()) };
2011    Error::from_syscall("sethostname", result)
2012}
2013
2014pub fn sys_setdomainname(name: &str) -> Result<(), Error> {
2015    let result = unsafe { syscall_readonly!(SYS_setdomainname, name.as_ptr(), name.len()) };
2016    Error::from_syscall("setdomainname", result)
2017}
2018
2019pub fn sys_mount(dev_name: &CStr, dir_name: &CStr, kind: &CStr, flags: u32, data: Option<&CStr>) -> Result<(), Error> {
2020    let result = unsafe {
2021        syscall_readonly!(
2022            SYS_mount,
2023            dev_name.as_ptr(),
2024            dir_name.as_ptr(),
2025            kind.as_ptr(),
2026            flags,
2027            data.map_or(core::ptr::null(), |data| data.as_ptr())
2028        )
2029    };
2030    Error::from_syscall("mount", result)
2031}
2032
2033pub fn sys_umount2(target: &CStr, flags: u32) -> Result<(), Error> {
2034    let result = unsafe { syscall_readonly!(SYS_umount2, target.as_ptr(), flags) };
2035    Error::from_syscall("umount2", result)
2036}
2037
2038pub fn sys_pivot_root(new_root: &CStr, old_root: &CStr) -> Result<(), Error> {
2039    let result = unsafe { syscall_readonly!(SYS_pivot_root, new_root.as_ptr(), old_root.as_ptr()) };
2040    Error::from_syscall("pivot_root", result)
2041}
2042
2043pub fn sys_unshare(flags: u32) -> Result<(), Error> {
2044    let result = unsafe { syscall_readonly!(SYS_unshare, flags) };
2045    Error::from_syscall("unshare", result)
2046}
2047
2048/// Calls the `futex` syscall with `FUTEX_WAIT` operation.
2049///
2050/// This will block *if* the value of the `futex` is equal to the `expected_value`.
2051///
2052/// Possible non-fatal errors:
2053///   - `EAGAIN`: the value of `futex` is not equal to `expected_value`
2054///   - `EINTR`: the syscall was interrupted by a signal
2055///   - `ETIMEDOUT`: the specified timeout has elapsed without the futex being woken up
2056pub fn sys_futex_wait(futex: &AtomicU32, expected_value: u32, timeout: Option<Duration>) -> Result<(), Error> {
2057    let ts: Option<timespec> = timeout.map(|timeout| timespec {
2058        tv_sec: timeout.as_secs() as i64,
2059        tv_nsec: u64::from(timeout.subsec_nanos()) as i64,
2060    });
2061
2062    let result = unsafe {
2063        syscall!(
2064            SYS_futex,
2065            futex as *const AtomicU32,
2066            FUTEX_WAIT,
2067            expected_value,
2068            ts.as_ref().map_or(core::ptr::null(), |ts| ts as *const timespec)
2069        )
2070    };
2071    Error::from_syscall("futex (wait)", result)
2072}
2073
2074/// Wakes up at most one thread waiting on `futex`.
2075///
2076/// Will return `true` if anybody was woken up.
2077pub fn sys_futex_wake_one(futex: &AtomicU32) -> Result<bool, Error> {
2078    let result = unsafe { syscall_readonly!(SYS_futex, futex as *const AtomicU32, FUTEX_WAKE, 1) };
2079    Error::from_syscall("futex (wake)", result)?;
2080    Ok(result == 1)
2081}
2082
2083pub fn sys_set_tid_address(address: *const u32) -> Result<(), Error> {
2084    let result = unsafe { syscall_readonly!(SYS_set_tid_address, address) };
2085    Error::from_syscall("set_tid_address", result)?;
2086    Ok(())
2087}
2088
2089pub unsafe fn sys_rt_sigaction(signal: u32, new_action: &kernel_sigaction, old_action: Option<&mut kernel_sigaction>) -> Result<(), Error> {
2090    let result = unsafe {
2091        syscall_readonly!(
2092            SYS_rt_sigaction,
2093            signal,
2094            new_action as *const kernel_sigaction,
2095            old_action.map_or(core::ptr::null_mut(), |old_action| old_action as *mut kernel_sigaction),
2096            core::mem::size_of::<kernel_sigset_t>()
2097        )
2098    };
2099    Error::from_syscall("rt_sigaction", result)?;
2100    Ok(())
2101}
2102
2103pub unsafe fn sys_rt_sigprocmask(how: u32, new_sigset: &kernel_sigset_t, old_sigset: Option<&mut kernel_sigset_t>) -> Result<(), Error> {
2104    let result = unsafe {
2105        syscall_readonly!(
2106            SYS_rt_sigprocmask,
2107            how,
2108            new_sigset as *const kernel_sigset_t,
2109            old_sigset.map_or(core::ptr::null_mut(), |old_sigset| old_sigset as *mut kernel_sigset_t),
2110            core::mem::size_of::<kernel_sigset_t>()
2111        )
2112    };
2113    Error::from_syscall("rt_sigprocmask", result)?;
2114    Ok(())
2115}
2116
2117pub unsafe fn sys_sigaltstack(new_stack: &stack_t, old_stack: Option<&mut stack_t>) -> Result<(), Error> {
2118    let result = unsafe {
2119        syscall_readonly!(
2120            SYS_sigaltstack,
2121            new_stack as *const stack_t,
2122            old_stack.map_or(core::ptr::null_mut(), |old_stack| old_stack as *mut stack_t)
2123        )
2124    };
2125    Error::from_syscall("sigaltstack", result)?;
2126    Ok(())
2127}
2128
2129pub fn sys_clock_gettime(clock_id: u32) -> Result<Duration, Error> {
2130    let mut output = timespec { tv_sec: 0, tv_nsec: 0 };
2131    let result = unsafe { syscall_readonly!(SYS_clock_gettime, clock_id, core::ptr::addr_of_mut!(output)) };
2132    Error::from_syscall("clock_gettime", result)?;
2133
2134    let duration = Duration::new(output.tv_sec as u64, output.tv_nsec as u32);
2135    Ok(duration)
2136}
2137
2138pub fn sys_nanosleep(duration: Duration) -> Result<Option<Duration>, Error> {
2139    let duration = timespec {
2140        tv_sec: duration.as_secs() as i64,
2141        tv_nsec: u64::from(duration.subsec_nanos()) as i64,
2142    };
2143
2144    let mut remaining = timespec { tv_sec: 0, tv_nsec: 0 };
2145    let result = unsafe { syscall_readonly!(SYS_nanosleep, core::ptr::addr_of!(duration), core::ptr::addr_of_mut!(remaining)) };
2146    let error = Error::from_syscall("nanosleep", result);
2147    if let Err(error) = error {
2148        if error.errno() == EINTR {
2149            let remaining = Duration::new(remaining.tv_sec as u64, remaining.tv_nsec as u32);
2150            Ok(Some(remaining))
2151        } else {
2152            Err(error)
2153        }
2154    } else {
2155        Ok(None)
2156    }
2157}
2158
2159pub fn sys_waitid(which: u32, pid: pid_t, info: &mut siginfo_t, options: u32, usage: Option<&mut rusage>) -> Result<(), Error> {
2160    let result = unsafe {
2161        syscall_readonly!(
2162            SYS_waitid,
2163            which,
2164            pid,
2165            info as *mut siginfo_t,
2166            options,
2167            usage.map_or(core::ptr::null_mut(), |usage| usage as *mut rusage)
2168        )
2169    };
2170
2171    Error::from_syscall("waitid", result)?;
2172    Ok(())
2173}
2174
2175pub fn vm_read_memory<const N_LOCAL: usize, const N_REMOTE: usize>(
2176    pid: pid_t,
2177    local: [&mut [MaybeUninit<u8>]; N_LOCAL],
2178    remote: [(usize, usize); N_REMOTE],
2179) -> Result<usize, Error> {
2180    let local_iovec = local.map(|slice| iovec {
2181        iov_base: slice.as_mut_ptr().cast(),
2182        iov_len: slice.len() as u64,
2183    });
2184    let remote_iovec = remote.map(|(address, length)| iovec {
2185        iov_base: address as *mut c_void,
2186        iov_len: length as u64,
2187    });
2188    unsafe { sys_process_vm_readv(pid, &local_iovec, &remote_iovec) }
2189}
2190
2191pub fn vm_write_memory<const N_LOCAL: usize, const N_REMOTE: usize>(
2192    pid: pid_t,
2193    local: [&[u8]; N_LOCAL],
2194    remote: [(usize, usize); N_REMOTE],
2195) -> Result<usize, Error> {
2196    let local_iovec = local.map(|slice| iovec {
2197        iov_base: slice.as_ptr().cast_mut().cast(),
2198        iov_len: slice.len() as u64,
2199    });
2200    let remote_iovec = remote.map(|(address, length)| iovec {
2201        iov_base: address as *mut c_void,
2202        iov_len: length as u64,
2203    });
2204    unsafe { sys_process_vm_writev(pid, &local_iovec, &remote_iovec) }
2205}
2206
2207pub fn writev<const N: usize>(fd: FdRef, list: [&[u8]; N]) -> Result<usize, Error> {
2208    let iv = list.map(|slice| iovec {
2209        iov_base: slice.as_ptr().cast_mut().cast(),
2210        iov_len: slice.len() as u64,
2211    });
2212    unsafe { sys_writev(fd, &iv) }
2213}
2214
2215#[inline(always)] // To prevent the buffer from being copied.
2216pub fn readdir(dirfd: FdRef) -> Dirent64Iter {
2217    Dirent64Iter {
2218        dirfd,
2219        buffer: [0; 1024], // TODO: Use MaybeUninit.
2220        bytes_available: 0,
2221        position: 0,
2222    }
2223}
2224
2225#[repr(transparent)]
2226pub struct Dirent64<'a> {
2227    raw: linux_dirent64,
2228    _lifetime: core::marker::PhantomData<&'a [u8]>,
2229}
2230
2231impl<'a> Dirent64<'a> {
2232    pub fn d_type(&self) -> c_uchar {
2233        self.raw.d_type
2234    }
2235
2236    pub fn d_name(&self) -> &'a [u8] {
2237        unsafe {
2238            let name = self.raw.d_name.as_ptr();
2239            let length = {
2240                let mut p = self.raw.d_name.as_ptr();
2241                while *p != 0 {
2242                    p = p.add(1);
2243                }
2244
2245                p as usize - name as usize
2246            };
2247
2248            core::slice::from_raw_parts(name.cast(), length)
2249        }
2250    }
2251}
2252
2253pub struct Dirent64Iter<'a> {
2254    dirfd: FdRef<'a>,
2255    buffer: [u8; 1024],
2256    bytes_available: usize,
2257    position: usize,
2258}
2259
2260impl<'a> Iterator for Dirent64Iter<'a> {
2261    type Item = Result<Dirent64<'a>, Error>;
2262    fn next(&mut self) -> Option<Self::Item> {
2263        loop {
2264            if self.position < self.bytes_available {
2265                let dirent = unsafe { core::ptr::read_unaligned(self.buffer.as_ptr().add(self.position).cast::<Dirent64>()) };
2266
2267                self.position += usize::from(dirent.raw.d_reclen);
2268                return Some(Ok(dirent));
2269            }
2270
2271            match sys_getdents64(self.dirfd, &mut self.buffer) {
2272                Ok(Some(bytes_available)) => self.bytes_available = bytes_available,
2273                Ok(None) => return None,
2274                Err(error) => return Some(Err(error)),
2275            };
2276        }
2277    }
2278}
2279
2280pub fn sendfd(socket: FdRef, fd: FdRef) -> Result<(), Error> {
2281    let mut dummy: c_int = 0;
2282    let mut buffer = [0; CMSG_SPACE(core::mem::size_of::<c_int>())];
2283
2284    let mut iov = iovec {
2285        iov_base: core::ptr::addr_of_mut!(dummy).cast::<c_void>(),
2286        iov_len: core::mem::size_of_val(&dummy) as u64,
2287    };
2288
2289    let mut header = msghdr {
2290        msg_name: core::ptr::null_mut(),
2291        msg_namelen: 0,
2292        msg_iov: &mut iov,
2293        msg_iovlen: 1,
2294        msg_control: buffer.as_mut_ptr().cast::<c_void>(),
2295        msg_controllen: core::mem::size_of_val(&buffer),
2296        msg_flags: 0,
2297    };
2298
2299    let control_header = cmsghdr {
2300        cmsg_len: CMSG_LEN(core::mem::size_of::<c_int>()),
2301        cmsg_level: SOL_SOCKET,
2302        cmsg_type: SCM_RIGHTS,
2303    };
2304
2305    #[allow(clippy::cast_ptr_alignment)]
2306    unsafe {
2307        core::ptr::write_unaligned(CMSG_FIRSTHDR(&header), control_header);
2308        core::ptr::write_unaligned(CMSG_DATA(buffer.as_mut_ptr().cast::<cmsghdr>()).cast::<c_int>(), fd.raw());
2309    }
2310
2311    header.msg_controllen = CMSG_LEN(core::mem::size_of::<c_int>());
2312    sys_sendmsg(socket, &header, MSG_NOSIGNAL)?;
2313
2314    Ok(())
2315}
2316
2317pub fn recvfd(socket: FdRef) -> Result<Fd, Error> {
2318    let mut dummy: c_int = 0;
2319    let mut buffer = [0; CMSG_SPACE(core::mem::size_of::<c_int>())];
2320
2321    let mut iov = iovec {
2322        iov_base: core::ptr::addr_of_mut!(dummy).cast::<c_void>(),
2323        iov_len: core::mem::size_of_val(&dummy) as u64,
2324    };
2325
2326    let mut header = msghdr {
2327        msg_name: core::ptr::null_mut(),
2328        msg_namelen: 0,
2329        msg_iov: &mut iov,
2330        msg_iovlen: 1,
2331        msg_control: buffer.as_mut_ptr().cast::<c_void>(),
2332        msg_controllen: core::mem::size_of_val(&buffer),
2333        msg_flags: 0,
2334    };
2335
2336    let count = sys_recvmsg(socket, &mut header, 0)?;
2337    if count == 0 {
2338        return Err(Error::from_str("recvfd failed: received zero bytes"));
2339    }
2340
2341    if count != core::mem::size_of::<c_int>() {
2342        return Err(Error::from_str("recvfd failed: received unexpected number of bytes"));
2343    }
2344
2345    if header.msg_controllen != CMSG_SPACE(core::mem::size_of::<c_int>()) {
2346        return Err(Error::from_str("recvfd failed: invalid control message size"));
2347    }
2348
2349    let control_header = unsafe { &mut *header.msg_control.cast::<cmsghdr>() };
2350
2351    if control_header.cmsg_level != SOL_SOCKET {
2352        return Err(Error::from_str("recvfd failed: invalid control message level"));
2353    }
2354
2355    if control_header.cmsg_type != SCM_RIGHTS {
2356        return Err(Error::from_str("recvfd failed: invalid control message type"));
2357    }
2358
2359    let fd = unsafe { core::ptr::read_unaligned(CMSG_DATA(control_header).cast::<c_int>()) };
2360
2361    Ok(Fd::from_raw_unchecked(fd))
2362}