maelstrom_worker_child/
lib.rs

1//! Helper library for maelstrom-worker.
2//!
3//! This code is run in the child process after the call to `clone`. In this environment, since the
4//! cloning process is multi-threaded, there is very little that we can do safely. In particular,
5//! we can't allocate from the heap. This library is separate so we can make it `no_std` and manage
6//! its dependencies carefully.
7#![no_std]
8
9use core::{cell::UnsafeCell, ffi::CStr, fmt::Write as _, result};
10use maelstrom_linux::{
11    self as linux, AccessMode, CloseRangeFirst, CloseRangeFlags, CloseRangeLast, Errno, Fd,
12    FileMode, FsconfigCommand, FsmountFlags, FsopenFlags, Gid, MountAttrs, MountFlags,
13    MoveMountFlags, OpenFlags, OpenTreeFlags, OwnedFd, Sockaddr, SocketDomain, SocketProtocol,
14    SocketType, Uid, UmountFlags,
15};
16
17struct SliceFmt<'a> {
18    slice: &'a mut [u8],
19    offset: usize,
20}
21
22impl<'a> SliceFmt<'a> {
23    fn new(slice: &'a mut [u8]) -> Self {
24        Self { slice, offset: 0 }
25    }
26}
27
28impl core::fmt::Write for SliceFmt<'_> {
29    fn write_str(&mut self, s: &str) -> core::fmt::Result {
30        let bytes = s.as_bytes();
31        if self.slice.len() - self.offset < bytes.len() {
32            return Err(core::fmt::Error);
33        }
34
35        self.slice[self.offset..(self.offset + bytes.len())].copy_from_slice(bytes);
36        self.offset += bytes.len();
37
38        Ok(())
39    }
40}
41
42#[derive(Clone, Copy)]
43pub struct FdSlot<'a>(&'a UnsafeCell<Fd>);
44
45impl<'a> FdSlot<'a> {
46    pub fn new(slot: &'a UnsafeCell<Fd>) -> Self {
47        Self(slot)
48    }
49
50    pub fn set(&self, fd: Fd) {
51        let fd_ptr = self.0.get();
52        unsafe { *fd_ptr = fd };
53    }
54
55    pub fn get(&self) -> Fd {
56        let fd_ptr = self.0.get();
57        unsafe { *fd_ptr }
58    }
59}
60
61impl linux::AsFd for FdSlot<'_> {
62    fn fd(&self) -> Fd {
63        self.get()
64    }
65}
66
67/// A syscall to call. This should be part of slice, which we refer to as a script. Some variants
68/// deal with a value. This is a `usize` local variable that can be written to and read from.
69pub enum Syscall<'a> {
70    Bind {
71        fd: FdSlot<'a>,
72        addr: &'a Sockaddr,
73    },
74    Chdir {
75        path: &'a CStr,
76    },
77    CloseRange {
78        first: CloseRangeFirst,
79        last: CloseRangeLast,
80        flags: CloseRangeFlags,
81    },
82    Dup2 {
83        from: Fd,
84        to: Fd,
85    },
86    Execve {
87        path: &'a CStr,
88        argv: &'a [Option<&'a u8>],
89        envp: &'a [Option<&'a u8>],
90    },
91    ExecveList {
92        paths: &'a [&'a CStr],
93        fallback: &'a CStr,
94        argv: &'a [Option<&'a u8>],
95        envp: &'a [Option<&'a u8>],
96    },
97    Fsconfig {
98        fd: FdSlot<'a>,
99        command: FsconfigCommand,
100        key: Option<&'a CStr>,
101        value: Option<&'a u8>,
102        aux: Option<i32>,
103    },
104    Fsmount {
105        fd: FdSlot<'a>,
106        flags: FsmountFlags,
107        mount_attrs: MountAttrs,
108        out: FdSlot<'a>,
109    },
110    Fsopen {
111        fsname: &'a CStr,
112        flags: FsopenFlags,
113        out: FdSlot<'a>,
114    },
115    FuseMount {
116        source: &'a CStr,
117        target: &'a CStr,
118        flags: MountFlags,
119        root_mode: u32,
120        uid: Uid,
121        gid: Gid,
122        fuse_fd: FdSlot<'a>,
123    },
124    IoctlTiocsctty {
125        fd: Fd,
126        arg: i32,
127    },
128    Mkdir {
129        path: &'a CStr,
130        mode: FileMode,
131    },
132    Mount {
133        source: Option<&'a CStr>,
134        target: &'a CStr,
135        fstype: Option<&'a CStr>,
136        flags: MountFlags,
137        data: Option<&'a [u8]>,
138    },
139    MoveMount {
140        from_dirfd: FdSlot<'a>,
141        from_path: &'a CStr,
142        to_dirfd: Fd,
143        to_path: &'a CStr,
144        flags: MoveMountFlags,
145    },
146    Open {
147        path: &'a CStr,
148        flags: OpenFlags,
149        mode: FileMode,
150        out: FdSlot<'a>,
151    },
152    OpenTree {
153        dirfd: Fd,
154        path: &'a CStr,
155        flags: OpenTreeFlags,
156        out: FdSlot<'a>,
157    },
158    PivotRoot {
159        new_root: &'a CStr,
160        put_old: &'a CStr,
161    },
162    Read {
163        fd: FdSlot<'a>,
164        buf: &'a mut [u8],
165    },
166    SendMsg {
167        buf: &'a [u8],
168        fd_to_send: FdSlot<'a>,
169    },
170    SetSid,
171    Socket {
172        domain: SocketDomain,
173        type_: SocketType,
174        protocol: SocketProtocol,
175        out: FdSlot<'a>,
176    },
177    Umount2 {
178        path: &'a CStr,
179        flags: UmountFlags,
180    },
181    Write {
182        fd: FdSlot<'a>,
183        buf: &'a [u8],
184    },
185}
186
187impl Syscall<'_> {
188    fn call(&mut self, write_sock: &linux::UnixStream) -> result::Result<(), Errno> {
189        match self {
190            Syscall::Bind { fd, addr } => linux::bind(fd, addr),
191            Syscall::Chdir { path } => linux::chdir(path),
192            Syscall::CloseRange { first, last, flags } => linux::close_range(*first, *last, *flags),
193            Syscall::Dup2 { from, to } => linux::dup2(&*from, &*to).map(drop),
194            Syscall::Execve { path, argv, envp } => linux::execve(path, argv, envp),
195            Syscall::ExecveList {
196                paths,
197                fallback,
198                argv,
199                envp,
200            } => {
201                for path in paths.iter() {
202                    if linux::access(path, AccessMode::X).is_ok() {
203                        return linux::execve(path, argv, envp);
204                    }
205                }
206                linux::execve(fallback, argv, envp)
207            }
208            Syscall::Fsmount {
209                fd,
210                flags,
211                mount_attrs,
212                out,
213            } => {
214                out.set(linux::fsmount(fd, *flags, *mount_attrs).map(OwnedFd::into_fd)?);
215                Ok(())
216            }
217            Syscall::Fsopen { fsname, flags, out } => {
218                out.set(linux::fsopen(fsname, *flags).map(OwnedFd::into_fd)?);
219                Ok(())
220            }
221            Syscall::Fsconfig {
222                fd,
223                command,
224                key,
225                value,
226                aux,
227            } => linux::fsconfig(fd, *command, *key, *value, *aux),
228            Syscall::FuseMount {
229                source,
230                target,
231                flags,
232                root_mode,
233                uid,
234                gid,
235                fuse_fd,
236            } => {
237                let mut options = [0; 100];
238                write!(
239                    SliceFmt::new(&mut options),
240                    "fd={},rootmode={:o},user_id={},group_id={}\0",
241                    fuse_fd.get().as_c_int(),
242                    root_mode,
243                    uid.as_u32(),
244                    gid.as_u32()
245                )
246                .unwrap();
247                let source = Some(*source);
248                let fstype = Some(c"fuse");
249                linux::mount(source, target, fstype, *flags, Some(options.as_slice()))
250            }
251            Syscall::IoctlTiocsctty { fd, arg } => linux::ioctl_tiocsctty(fd, *arg),
252            Syscall::Mkdir { path, mode } => linux::mkdir(path, *mode),
253            Syscall::Mount {
254                source,
255                target,
256                fstype,
257                flags,
258                data,
259            } => linux::mount(*source, target, *fstype, *flags, *data),
260            Syscall::MoveMount {
261                from_dirfd,
262                from_path,
263                to_dirfd,
264                to_path,
265                flags,
266            } => linux::move_mount(from_dirfd, from_path, to_dirfd, to_path, *flags),
267            Syscall::Open {
268                path,
269                flags,
270                mode,
271                out,
272            } => {
273                out.set(linux::open(path, *flags, *mode).map(OwnedFd::into_fd)?);
274                Ok(())
275            }
276            Syscall::OpenTree {
277                dirfd,
278                path,
279                flags,
280                out,
281            } => {
282                out.set(linux::open_tree(dirfd, path, *flags).map(OwnedFd::into_fd)?);
283                Ok(())
284            }
285            Syscall::PivotRoot { new_root, put_old } => linux::pivot_root(new_root, put_old),
286            Syscall::Read { fd, buf } => linux::read(fd, buf).map(drop),
287            Syscall::SendMsg { buf, fd_to_send } => {
288                let count = write_sock.send_with_fd(buf, fd_to_send.get())?;
289                assert_eq!(count, buf.len());
290                Ok(())
291            }
292            Syscall::SetSid => linux::setsid(),
293            Syscall::Socket {
294                domain,
295                type_,
296                protocol,
297                out,
298            } => {
299                out.set(linux::socket(*domain, *type_, *protocol).map(OwnedFd::into_fd)?);
300                Ok(())
301            }
302            Syscall::Umount2 { path, flags } => linux::umount2(path, *flags),
303            Syscall::Write { fd, buf } => linux::write(fd, buf).map(drop),
304        }
305    }
306}
307
308/// The guts of the child code. This function shouldn't return on success, because in that case,
309/// the last syscall should be an execve. If this function returns, than an error was encountered.
310/// In that case, the script item index and the errno will be returned.
311fn start_and_exec_in_child_inner(
312    write_sock: &linux::UnixStream,
313    syscalls: &mut [Syscall],
314) -> (usize, Errno) {
315    for (index, syscall) in syscalls.iter_mut().enumerate() {
316        if let Err(errno) = syscall.call(write_sock) {
317            return (index, errno);
318        }
319    }
320    panic!("should not reach here");
321}
322
323/// Run the provided syscall script in `syscalls`.
324///
325/// It is assumed that the last syscall won't return (i.e. will be `execve`). If there is an error,
326/// write an 8-byte value to `write_sock` describing the error in little-endian format.
327/// The upper 32 bits will be the index in the script of the syscall that errored, and the lower 32
328/// bits will be the errno value.
329///
330/// The caller should ensure that `write_sock` is marked close-on-exec. This way, upon
331/// normal completion, no bytes will be written to the file descriptor and the worker can
332/// distinguish between an error and no error.
333pub fn start_and_exec_in_child(write_sock: linux::UnixStream, syscalls: &mut [Syscall]) -> ! {
334    let (index, errno) = start_and_exec_in_child_inner(&write_sock, syscalls);
335    let result = (index as u64) << 32 | errno.as_u64();
336    // There's not really much to do if this write fails. Therefore, we just ignore the result.
337    // However, it's hard to imagine any case where this could fail and we'd actually care.
338    let _ = write_sock.send(result.to_ne_bytes().as_slice());
339    linux::_exit(linux::ExitCode::from_u8(1));
340}
341
342pub struct ChildArgs<'a, 'b> {
343    pub write_sock: linux::Fd,
344    pub syscalls: &'a mut [Syscall<'b>],
345}
346
347pub extern "C" fn start_and_exec_in_child_trampoline(arg: *mut core::ffi::c_void) -> i32 {
348    let args = unsafe { &mut *(arg as *mut ChildArgs<'_, '_>) };
349    start_and_exec_in_child(
350        linux::OwnedFd::from_fd(args.write_sock).into(),
351        args.syscalls,
352    )
353}