rustix/backend/linux_raw/runtime/
syscalls.rs

1//! linux_raw syscalls supporting `rustix::runtime`.
2//!
3//! # Safety
4//!
5//! See the `rustix::backend` module documentation for details.
6#![allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
7
8use crate::backend::c;
9#[cfg(target_arch = "x86")]
10use crate::backend::conv::by_mut;
11#[cfg(target_arch = "x86_64")]
12use crate::backend::conv::c_uint;
13use crate::backend::conv::{
14    by_ref, c_int, opt_ref, ret, ret_c_int, ret_c_int_infallible, ret_error, ret_infallible,
15    ret_void_star, size_of, zero,
16};
17#[cfg(feature = "fs")]
18use crate::fd::BorrowedFd;
19use crate::ffi::CStr;
20#[cfg(feature = "fs")]
21use crate::fs::AtFlags;
22use crate::io;
23use crate::pid::{Pid, RawPid};
24use crate::runtime::{Fork, How, KernelSigSet, KernelSigaction, Siginfo, Stack};
25use crate::signal::Signal;
26use crate::timespec::Timespec;
27use core::ffi::c_void;
28use core::mem::MaybeUninit;
29#[cfg(all(target_pointer_width = "32", not(feature = "linux_5_1")))]
30use linux_raw_sys::general::__kernel_old_timespec;
31#[cfg(target_arch = "x86_64")]
32use linux_raw_sys::general::ARCH_SET_FS;
33
34#[inline]
35pub(crate) unsafe fn kernel_fork() -> io::Result<Fork> {
36    let mut child_pid = MaybeUninit::<RawPid>::uninit();
37
38    // Unix `fork` only returns the child PID in the parent; we'd like it in
39    // the child too, so set `CLONE_CHILD_SETTID` and pass in the address of a
40    // memory location to store it to in the child.
41    //
42    // Architectures differ on the order of the parameters.
43    #[cfg(target_arch = "x86_64")]
44    let pid = ret_c_int(syscall!(
45        __NR_clone,
46        c_int(c::SIGCHLD | c::CLONE_CHILD_SETTID),
47        zero(),
48        zero(),
49        &mut child_pid,
50        zero()
51    ))?;
52    #[cfg(any(
53        target_arch = "aarch64",
54        target_arch = "arm",
55        target_arch = "mips",
56        target_arch = "mips32r6",
57        target_arch = "mips64",
58        target_arch = "mips64r6",
59        target_arch = "powerpc",
60        target_arch = "powerpc64",
61        target_arch = "riscv64",
62        target_arch = "s390x",
63        target_arch = "x86"
64    ))]
65    let pid = ret_c_int(syscall!(
66        __NR_clone,
67        c_int(c::SIGCHLD | c::CLONE_CHILD_SETTID),
68        zero(),
69        zero(),
70        zero(),
71        &mut child_pid
72    ))?;
73
74    Ok(if let Some(pid) = Pid::from_raw(pid) {
75        Fork::ParentOf(pid)
76    } else {
77        Fork::Child(Pid::from_raw_unchecked(child_pid.assume_init()))
78    })
79}
80
81#[cfg(feature = "fs")]
82pub(crate) unsafe fn execveat(
83    dirfd: BorrowedFd<'_>,
84    path: &CStr,
85    args: *const *const u8,
86    env_vars: *const *const u8,
87    flags: AtFlags,
88) -> io::Errno {
89    ret_error(syscall_readonly!(
90        __NR_execveat,
91        dirfd,
92        path,
93        args,
94        env_vars,
95        flags
96    ))
97}
98
99pub(crate) unsafe fn execve(
100    path: &CStr,
101    args: *const *const u8,
102    env_vars: *const *const u8,
103) -> io::Errno {
104    ret_error(syscall_readonly!(__NR_execve, path, args, env_vars))
105}
106
107pub(crate) mod tls {
108    use super::*;
109    #[cfg(target_arch = "x86")]
110    use crate::backend::runtime::tls::UserDesc;
111
112    #[cfg(target_arch = "x86")]
113    #[inline]
114    pub(crate) unsafe fn set_thread_area(u_info: &mut UserDesc) -> io::Result<()> {
115        ret(syscall!(__NR_set_thread_area, by_mut(u_info)))
116    }
117
118    #[cfg(target_arch = "arm")]
119    #[inline]
120    pub(crate) unsafe fn arm_set_tls(data: *mut c::c_void) -> io::Result<()> {
121        ret(syscall_readonly!(__ARM_NR_set_tls, data))
122    }
123
124    #[cfg(target_arch = "x86_64")]
125    #[inline]
126    pub(crate) unsafe fn set_fs(data: *mut c::c_void) {
127        ret_infallible(syscall_readonly!(
128            __NR_arch_prctl,
129            c_uint(ARCH_SET_FS),
130            data,
131            zero(),
132            zero(),
133            zero()
134        ))
135    }
136
137    #[inline]
138    pub(crate) unsafe fn set_tid_address(data: *mut c::c_void) -> Pid {
139        let tid: i32 = ret_c_int_infallible(syscall_readonly!(__NR_set_tid_address, data));
140        Pid::from_raw_unchecked(tid)
141    }
142
143    #[inline]
144    pub(crate) fn exit_thread(code: c::c_int) -> ! {
145        unsafe { syscall_noreturn!(__NR_exit, c_int(code)) }
146    }
147}
148
149#[inline]
150pub(crate) unsafe fn kernel_sigaction(
151    signal: Signal,
152    new: Option<KernelSigaction>,
153) -> io::Result<KernelSigaction> {
154    let mut old = MaybeUninit::<KernelSigaction>::uninit();
155    let new = opt_ref(new.as_ref());
156    ret(syscall!(
157        __NR_rt_sigaction,
158        signal,
159        new,
160        &mut old,
161        size_of::<KernelSigSet, _>()
162    ))?;
163    Ok(old.assume_init())
164}
165
166#[inline]
167pub(crate) unsafe fn kernel_sigaltstack(new: Option<Stack>) -> io::Result<Stack> {
168    let mut old = MaybeUninit::<Stack>::uninit();
169    let new = opt_ref(new.as_ref());
170    ret(syscall!(__NR_sigaltstack, new, &mut old))?;
171    Ok(old.assume_init())
172}
173
174#[inline]
175pub(crate) unsafe fn tkill(tid: Pid, sig: Signal) -> io::Result<()> {
176    ret(syscall_readonly!(__NR_tkill, tid, sig))
177}
178
179#[inline]
180pub(crate) unsafe fn kernel_sigprocmask(
181    how: How,
182    new: Option<&KernelSigSet>,
183) -> io::Result<KernelSigSet> {
184    let mut old = MaybeUninit::<KernelSigSet>::uninit();
185    let new = opt_ref(new);
186    ret(syscall!(
187        __NR_rt_sigprocmask,
188        how,
189        new,
190        &mut old,
191        size_of::<KernelSigSet, _>()
192    ))?;
193    Ok(old.assume_init())
194}
195
196#[inline]
197pub(crate) fn kernel_sigpending() -> KernelSigSet {
198    let mut pending = MaybeUninit::<KernelSigSet>::uninit();
199    unsafe {
200        ret_infallible(syscall!(
201            __NR_rt_sigpending,
202            &mut pending,
203            size_of::<KernelSigSet, _>()
204        ));
205        pending.assume_init()
206    }
207}
208
209#[inline]
210pub(crate) fn kernel_sigsuspend(set: &KernelSigSet) -> io::Result<()> {
211    unsafe {
212        ret(syscall_readonly!(
213            __NR_rt_sigsuspend,
214            by_ref(set),
215            size_of::<KernelSigSet, _>()
216        ))
217    }
218}
219
220#[inline]
221pub(crate) unsafe fn kernel_sigwait(set: &KernelSigSet) -> io::Result<Signal> {
222    Ok(Signal::from_raw_unchecked(ret_c_int(syscall_readonly!(
223        __NR_rt_sigtimedwait,
224        by_ref(set),
225        zero(),
226        zero(),
227        size_of::<KernelSigSet, _>()
228    ))?))
229}
230
231#[inline]
232pub(crate) unsafe fn kernel_sigwaitinfo(set: &KernelSigSet) -> io::Result<Siginfo> {
233    let mut info = MaybeUninit::<Siginfo>::uninit();
234    let _signum = ret_c_int(syscall!(
235        __NR_rt_sigtimedwait,
236        by_ref(set),
237        &mut info,
238        zero(),
239        size_of::<KernelSigSet, _>()
240    ))?;
241    Ok(info.assume_init())
242}
243
244#[inline]
245pub(crate) unsafe fn kernel_sigtimedwait(
246    set: &KernelSigSet,
247    timeout: Option<&Timespec>,
248) -> io::Result<Siginfo> {
249    let mut info = MaybeUninit::<Siginfo>::uninit();
250
251    // `rt_sigtimedwait_time64` was introduced in Linux 5.1. The old
252    // `rt_sigtimedwait` syscall is not y2038-compatible on 32-bit
253    // architectures.
254    #[cfg(target_pointer_width = "32")]
255    {
256        // If we don't have Linux 5.1, and the timeout fits in a
257        // `__kernel_old_timespec`, use plain `rt_sigtimedwait`.
258        //
259        // We do this unconditionally, rather than trying
260        // `rt_sigtimedwait_time64` and falling back on `Errno::NOSYS`, because
261        // seccomp configurations will sometimes abort the process on syscalls
262        // they don't recognize.
263        #[cfg(not(feature = "linux_5_1"))]
264        {
265            // If we don't have a timeout, or if we can convert the timeout to
266            // a `__kernel_old_timespec`, the use `__NR_futex`.
267            fn convert(timeout: &Timespec) -> Option<__kernel_old_timespec> {
268                Some(__kernel_old_timespec {
269                    tv_sec: timeout.tv_sec.try_into().ok()?,
270                    tv_nsec: timeout.tv_nsec.try_into().ok()?,
271                })
272            }
273            let old_timeout = if let Some(timeout) = timeout {
274                match convert(timeout) {
275                    // Could not convert timeout.
276                    None => None,
277                    // Could convert timeout. Ok!
278                    Some(old_timeout) => Some(Some(old_timeout)),
279                }
280            } else {
281                // No timeout. Ok!
282                Some(None)
283            };
284            if let Some(old_timeout) = old_timeout {
285                return ret_c_int(syscall!(
286                    __NR_rt_sigtimedwait,
287                    by_ref(set),
288                    &mut info,
289                    opt_ref(old_timeout.as_ref()),
290                    size_of::<KernelSigSet, _>()
291                ))
292                .map(|sig| {
293                    debug_assert_eq!(
294                        sig,
295                        info.assume_init_ref()
296                            .__bindgen_anon_1
297                            .__bindgen_anon_1
298                            .si_signo
299                    );
300                    info.assume_init()
301                });
302            }
303        }
304
305        ret_c_int(syscall!(
306            __NR_rt_sigtimedwait_time64,
307            by_ref(set),
308            &mut info,
309            opt_ref(timeout),
310            size_of::<KernelSigSet, _>()
311        ))
312        .map(|sig| {
313            debug_assert_eq!(
314                sig,
315                info.assume_init_ref()
316                    .__bindgen_anon_1
317                    .__bindgen_anon_1
318                    .si_signo
319            );
320            info.assume_init()
321        })
322    }
323
324    #[cfg(target_pointer_width = "64")]
325    {
326        let _signum = ret_c_int(syscall!(
327            __NR_rt_sigtimedwait,
328            by_ref(set),
329            &mut info,
330            opt_ref(timeout),
331            size_of::<KernelSigSet, _>()
332        ))?;
333        Ok(info.assume_init())
334    }
335}
336
337#[inline]
338pub(crate) fn exit_group(code: c::c_int) -> ! {
339    unsafe { syscall_noreturn!(__NR_exit_group, c_int(code)) }
340}
341
342#[inline]
343pub(crate) unsafe fn kernel_brk(addr: *mut c::c_void) -> io::Result<*mut c_void> {
344    // This is non-`readonly`, to prevent loads from being reordered past it.
345    ret_void_star(syscall!(__NR_brk, addr))
346}