1use std::any::Any;
3use std::ffi::{CStr, CString, OsStr};
4use std::os::fd::{BorrowedFd, FromRawFd, RawFd};
5use std::os::unix::ffi::OsStrExt;
6use std::os::unix::fs::symlink;
7use std::os::unix::io::{AsRawFd, OwnedFd};
8use std::path::Path;
9use std::str::FromStr;
10use std::sync::Arc;
11use std::{mem, ptr};
12
13use caps::{CapSet, CapsHashSet};
14use libc::{c_char, setdomainname, uid_t};
15use nix::dir::Dir;
16use nix::fcntl;
17use nix::fcntl::{OFlag, open};
18use nix::mount::{MntFlags, MsFlags, mount, umount2};
19use nix::sched::{CloneFlags, unshare};
20use nix::sys::stat::{Mode, SFlag, mknod};
21use nix::unistd::{Gid, Uid, chown, chroot, close, fchdir, pivot_root, sethostname};
22use oci_spec::runtime::PosixRlimit;
23use pathrs::flags::OpenFlags;
24use pathrs::procfs::{ProcfsBase, ProcfsHandle};
25
26use super::{Result, Syscall, SyscallError};
27use crate::capabilities;
28use crate::config::PersonalityDomain;
29
30pub const AT_RECURSIVE: u32 = 0x00008000; pub const AT_EMPTY_PATH: u32 = 0x00001000;
34#[allow(non_upper_case_globals)]
35pub const MOUNT_ATTR__ATIME: u64 = 0x00000070; pub const MOUNT_ATTR_RDONLY: u64 = 0x00000001;
37pub const MOUNT_ATTR_NOSUID: u64 = 0x00000002;
38pub const MOUNT_ATTR_NODEV: u64 = 0x00000004;
39pub const MOUNT_ATTR_NOEXEC: u64 = 0x00000008;
40pub const MOUNT_ATTR_RELATIME: u64 = 0x00000000;
41pub const MOUNT_ATTR_NOATIME: u64 = 0x00000010;
42pub const MOUNT_ATTR_STRICTATIME: u64 = 0x00000020;
43pub const MOUNT_ATTR_NODIRATIME: u64 = 0x00000080;
44pub const MOUNT_ATTR_NOSYMFOLLOW: u64 = 0x00200000;
45pub const MOVE_MOUNT_F_EMPTY_PATH: u32 = 0x00000004;
46pub const MOVE_MOUNT_T_EMPTY_PATH: u32 = 0x00000040;
47
48pub const FSCONFIG_SET_FLAG: u64 = 0;
50pub const FSCONFIG_SET_STRING: u64 = 1;
51pub const FSCONFIG_SET_BINARY: u64 = 2;
52pub const FSCONFIG_SET_PATH: u64 = 3;
53pub const FSCONFIG_SET_PATH_EMPTY: u64 = 4;
54pub const FSCONFIG_SET_FD: u64 = 5;
55pub const FSCONFIG_CMD_CREATE: u64 = 6;
56pub const FSCONFIG_CMD_RECONFIGURE: u64 = 7;
57pub const FSCONFIG_CMD_CREATE_EXCL: u64 = 8;
58
59pub enum MountOption {
61 Defaults(bool, MsFlags),
62 Ro(bool, MsFlags),
63 Rw(bool, MsFlags),
64 Suid(bool, MsFlags),
65 Nosuid(bool, MsFlags),
66 Dev(bool, MsFlags),
67 Nodev(bool, MsFlags),
68 Exec(bool, MsFlags),
69 Noexec(bool, MsFlags),
70 Sync(bool, MsFlags),
71 Async(bool, MsFlags),
72 Dirsync(bool, MsFlags),
73 Remount(bool, MsFlags),
74 Mand(bool, MsFlags),
75 Nomand(bool, MsFlags),
76 Atime(bool, MsFlags),
77 Noatime(bool, MsFlags),
78 Diratime(bool, MsFlags),
79 Nodiratime(bool, MsFlags),
80 Bind(bool, MsFlags),
81 Rbind(bool, MsFlags),
82 Unbindable(bool, MsFlags),
83 Runbindable(bool, MsFlags),
84 Private(bool, MsFlags),
85 Rprivate(bool, MsFlags),
86 Shared(bool, MsFlags),
87 Rshared(bool, MsFlags),
88 Slave(bool, MsFlags),
89 Rslave(bool, MsFlags),
90 Relatime(bool, MsFlags),
91 Norelatime(bool, MsFlags),
92 Strictatime(bool, MsFlags),
93 Nostrictatime(bool, MsFlags),
94}
95
96impl MountOption {
97 pub fn known_options() -> Vec<String> {
99 [
100 "defaults",
101 "ro",
102 "rw",
103 "suid",
104 "nosuid",
105 "dev",
106 "nodev",
107 "exec",
108 "noexec",
109 "sync",
110 "async",
111 "dirsync",
112 "remount",
113 "mand",
114 "nomand",
115 "atime",
116 "noatime",
117 "diratime",
118 "nodiratime",
119 "bind",
120 "rbind",
121 "unbindable",
122 "runbindable",
123 "private",
124 "rprivate",
125 "shared",
126 "rshared",
127 "slave",
128 "rslave",
129 "relatime",
130 "norelatime",
131 "strictatime",
132 "nostrictatime",
133 ]
134 .iter()
135 .map(|s| s.to_string())
136 .collect()
137 }
138}
139
140impl FromStr for MountOption {
141 type Err = String;
142
143 fn from_str(option: &str) -> std::result::Result<Self, Self::Err> {
144 match option {
145 "defaults" => Ok(MountOption::Defaults(false, MsFlags::empty())),
146 "ro" => Ok(MountOption::Ro(false, MsFlags::MS_RDONLY)),
147 "rw" => Ok(MountOption::Rw(true, MsFlags::MS_RDONLY)),
148 "suid" => Ok(MountOption::Suid(true, MsFlags::MS_NOSUID)),
149 "nosuid" => Ok(MountOption::Nosuid(false, MsFlags::MS_NOSUID)),
150 "dev" => Ok(MountOption::Dev(true, MsFlags::MS_NODEV)),
151 "nodev" => Ok(MountOption::Nodev(false, MsFlags::MS_NODEV)),
152 "exec" => Ok(MountOption::Exec(true, MsFlags::MS_NOEXEC)),
153 "noexec" => Ok(MountOption::Noexec(false, MsFlags::MS_NOEXEC)),
154 "sync" => Ok(MountOption::Sync(false, MsFlags::MS_SYNCHRONOUS)),
155 "async" => Ok(MountOption::Async(true, MsFlags::MS_SYNCHRONOUS)),
156 "dirsync" => Ok(MountOption::Dirsync(false, MsFlags::MS_DIRSYNC)),
157 "remount" => Ok(MountOption::Remount(false, MsFlags::MS_REMOUNT)),
158 "mand" => Ok(MountOption::Mand(false, MsFlags::MS_MANDLOCK)),
159 "nomand" => Ok(MountOption::Nomand(true, MsFlags::MS_MANDLOCK)),
160 "atime" => Ok(MountOption::Atime(true, MsFlags::MS_NOATIME)),
161 "noatime" => Ok(MountOption::Noatime(false, MsFlags::MS_NOATIME)),
162 "diratime" => Ok(MountOption::Diratime(true, MsFlags::MS_NODIRATIME)),
163 "nodiratime" => Ok(MountOption::Nodiratime(false, MsFlags::MS_NODIRATIME)),
164 "bind" => Ok(MountOption::Bind(false, MsFlags::MS_BIND)),
165 "rbind" => Ok(MountOption::Rbind(
166 false,
167 MsFlags::MS_BIND | MsFlags::MS_REC,
168 )),
169 "unbindable" => Ok(MountOption::Unbindable(false, MsFlags::MS_UNBINDABLE)),
170 "runbindable" => Ok(MountOption::Runbindable(
171 false,
172 MsFlags::MS_UNBINDABLE | MsFlags::MS_REC,
173 )),
174 "private" => Ok(MountOption::Private(true, MsFlags::MS_PRIVATE)),
175 "rprivate" => Ok(MountOption::Rprivate(
176 true,
177 MsFlags::MS_PRIVATE | MsFlags::MS_REC,
178 )),
179 "shared" => Ok(MountOption::Shared(true, MsFlags::MS_SHARED)),
180 "rshared" => Ok(MountOption::Rshared(
181 true,
182 MsFlags::MS_SHARED | MsFlags::MS_REC,
183 )),
184 "slave" => Ok(MountOption::Slave(true, MsFlags::MS_SLAVE)),
185 "rslave" => Ok(MountOption::Rslave(
186 true,
187 MsFlags::MS_SLAVE | MsFlags::MS_REC,
188 )),
189 "relatime" => Ok(MountOption::Relatime(false, MsFlags::MS_RELATIME)),
190 "norelatime" => Ok(MountOption::Norelatime(true, MsFlags::MS_RELATIME)),
191 "strictatime" => Ok(MountOption::Strictatime(false, MsFlags::MS_STRICTATIME)),
192 "nostrictatime" => Ok(MountOption::Nostrictatime(true, MsFlags::MS_STRICTATIME)),
193 _ => Err(option.to_string()),
194 }
195 }
196}
197
198pub enum MountRecursive {
200 Rdonly(bool, u64),
202
203 Nosuid(bool, u64),
205
206 Nodev(bool, u64),
208
209 Noexec(bool, u64),
211
212 Atime(bool, u64),
214
215 Relatime(bool, u64),
217
218 Noatime(bool, u64),
220
221 StrictAtime(bool, u64),
223
224 NoDiratime(bool, u64),
226
227 Nosymfollow(bool, u64),
229}
230
231impl FromStr for MountRecursive {
232 type Err = SyscallError;
233
234 fn from_str(option: &str) -> std::result::Result<Self, Self::Err> {
235 match option {
236 "rro" => Ok(MountRecursive::Rdonly(false, MOUNT_ATTR_RDONLY)),
237 "rrw" => Ok(MountRecursive::Rdonly(true, MOUNT_ATTR_RDONLY)),
238 "rnosuid" => Ok(MountRecursive::Nosuid(false, MOUNT_ATTR_NOSUID)),
239 "rsuid" => Ok(MountRecursive::Nosuid(true, MOUNT_ATTR_NOSUID)),
240 "rnodev" => Ok(MountRecursive::Nodev(false, MOUNT_ATTR_NODEV)),
241 "rdev" => Ok(MountRecursive::Nodev(true, MOUNT_ATTR_NODEV)),
242 "rnoexec" => Ok(MountRecursive::Noexec(false, MOUNT_ATTR_NOEXEC)),
243 "rexec" => Ok(MountRecursive::Noexec(true, MOUNT_ATTR_NOEXEC)),
244 "rnodiratime" => Ok(MountRecursive::NoDiratime(false, MOUNT_ATTR_NODIRATIME)),
245 "rdiratime" => Ok(MountRecursive::NoDiratime(true, MOUNT_ATTR_NODIRATIME)),
246 "rrelatime" => Ok(MountRecursive::Relatime(false, MOUNT_ATTR_RELATIME)),
247 "rnorelatime" => Ok(MountRecursive::Relatime(true, MOUNT_ATTR_RELATIME)),
248 "rnoatime" => Ok(MountRecursive::Noatime(false, MOUNT_ATTR_NOATIME)),
249 "ratime" => Ok(MountRecursive::Noatime(true, MOUNT_ATTR_NOATIME)),
250 "rstrictatime" => Ok(MountRecursive::StrictAtime(false, MOUNT_ATTR_STRICTATIME)),
251 "rnostrictatime" => Ok(MountRecursive::StrictAtime(true, MOUNT_ATTR_STRICTATIME)),
252 "rnosymfollow" => Ok(MountRecursive::Nosymfollow(false, MOUNT_ATTR_NOSYMFOLLOW)),
253 "rsymfollow" => Ok(MountRecursive::Nosymfollow(true, MOUNT_ATTR_NOSYMFOLLOW)),
254 _ => Err(SyscallError::UnexpectedMountRecursiveOption(
256 option.to_string(),
257 )),
258 }
259 }
260}
261
262#[repr(C)]
263#[derive(Debug, Clone, PartialEq, Eq)]
264pub struct MountAttr {
266 pub attr_set: u64,
268
269 pub attr_clr: u64,
271
272 pub propagation: u64,
274
275 pub userns_fd: u64,
277}
278
279impl MountAttr {
280 pub fn all() -> Self {
283 MountAttr {
284 attr_set: MOUNT_ATTR_RDONLY
285 | MOUNT_ATTR_NOSUID
286 | MOUNT_ATTR_NODEV
287 | MOUNT_ATTR_NOEXEC
288 | MOUNT_ATTR_NODIRATIME
289 | MOUNT_ATTR_RELATIME
290 | MOUNT_ATTR_NOATIME
291 | MOUNT_ATTR_STRICTATIME
292 | MOUNT_ATTR_NOSYMFOLLOW,
293 attr_clr: MOUNT_ATTR_RDONLY
294 | MOUNT_ATTR_NOSUID
295 | MOUNT_ATTR_NODEV
296 | MOUNT_ATTR_NOEXEC
297 | MOUNT_ATTR_NODIRATIME
298 | MOUNT_ATTR_RELATIME
299 | MOUNT_ATTR_NOATIME
300 | MOUNT_ATTR_STRICTATIME
301 | MOUNT_ATTR_NOSYMFOLLOW
302 | MOUNT_ATTR__ATIME,
303 propagation: 0,
304 userns_fd: 0,
305 }
306 }
307}
308
309#[derive(Clone)]
311pub struct LinuxSyscall;
312
313impl LinuxSyscall {
314 unsafe fn from_raw_buf<'a, T>(p: *const c_char) -> T
315 where
316 T: From<&'a OsStr>,
317 {
318 unsafe { T::from(OsStr::from_bytes(CStr::from_ptr(p).to_bytes())) }
319 }
320
321 unsafe fn passwd_to_user(passwd: libc::passwd) -> Arc<OsStr> {
323 let name: Arc<OsStr> = unsafe { Self::from_raw_buf(passwd.pw_name) };
324 name
325 }
326
327 fn emulate_close_range(preserve_fds: i32) -> Result<()> {
328 let open_fds = Self::get_open_fds()?;
329 let min_fd = preserve_fds + 3;
331 let to_be_cleaned_up_fds: Vec<i32> = open_fds
332 .iter()
333 .filter_map(|&fd| if fd >= min_fd { Some(fd) } else { None })
334 .collect();
335
336 to_be_cleaned_up_fds.iter().for_each(|&fd| {
337 let _ = fcntl::fcntl(fd, fcntl::F_SETFD(fcntl::FdFlag::FD_CLOEXEC));
340 });
341
342 Ok(())
343 }
344
345 fn get_open_fds() -> Result<Vec<i32>> {
347 let dir = ProcfsHandle::new()?.open(
348 ProcfsBase::ProcSelf,
349 Path::new("fd"),
350 OpenFlags::O_DIRECTORY | OpenFlags::O_CLOEXEC,
351 )?;
352
353 let fds = Dir::from(dir)?
354 .into_iter()
355 .filter_map(|entry| entry.ok())
356 .filter_map(|entry| {
357 entry
361 .file_name()
362 .to_str()
363 .ok()
364 .and_then(|name| name.parse::<i32>().ok())
365 })
366 .collect();
367
368 Ok(fds)
369 }
370}
371
372impl Syscall for LinuxSyscall {
373 fn as_any(&self) -> &dyn Any {
376 self
377 }
378
379 fn pivot_rootfs(&self, path: &Path) -> Result<()> {
381 let newroot = open(
383 path,
384 OFlag::O_DIRECTORY | OFlag::O_RDONLY | OFlag::O_CLOEXEC,
385 Mode::empty(),
386 )
387 .inspect_err(|errno| {
388 tracing::error!(?errno, ?path, "failed to open the new root for pivot root");
389 })?;
390
391 pivot_root(path, path).inspect_err(|errno| {
400 tracing::error!(?errno, ?path, "failed to pivot root to");
401 })?;
402
403 mount(
406 None::<&str>,
407 "/",
408 None::<&str>,
409 MsFlags::MS_SLAVE | MsFlags::MS_REC,
410 None::<&str>,
411 )
412 .inspect_err(|errno| {
413 tracing::error!(?errno, "failed to make original root directory rslave");
414 })?;
415
416 umount2("/", MntFlags::MNT_DETACH).inspect_err(|errno| {
421 tracing::error!(?errno, "failed to unmount old root directory");
422 })?;
423 fchdir(newroot).inspect_err(|errno| {
425 tracing::error!(?errno, ?newroot, "failed to change directory to new root");
426 })?;
427
428 close(newroot).inspect_err(|errno| {
429 tracing::error!(?errno, ?newroot, "failed to close new root directory");
430 })?;
431
432 Ok(())
433 }
434
435 fn set_ns(&self, rawfd: i32, nstype: CloneFlags) -> Result<()> {
437 let fd = unsafe { BorrowedFd::borrow_raw(rawfd) };
438 nix::sched::setns(fd, nstype)?;
439 Ok(())
440 }
441
442 fn set_id(&self, uid: Uid, gid: Gid) -> Result<()> {
444 prctl::set_keep_capabilities(true).map_err(|errno| {
445 tracing::error!(?errno, "failed to set keep capabilities to true");
446 nix::errno::Errno::from_raw(errno)
447 })?;
448 if unsafe { libc::syscall(libc::SYS_setresgid, gid, gid, gid) } == -1 {
453 let err = nix::errno::Errno::last();
454 tracing::error!(
455 ?err,
456 ?gid,
457 "failed to set real, effective and saved set gid"
458 );
459 return Err(err.into());
460 }
461
462 if unsafe { libc::syscall(libc::SYS_setresuid, uid, uid, uid) } == -1 {
465 let err = nix::errno::Errno::last();
466 tracing::error!(
467 ?err,
468 ?uid,
469 "failed to set real, effective and saved set uid"
470 );
471 return Err(err.into());
472 }
473
474 if uid != Uid::from_raw(0) {
478 capabilities::reset_effective(self)?;
479 }
480 prctl::set_keep_capabilities(false).map_err(|errno| {
481 tracing::error!(?errno, "failed to set keep capabilities to false");
482 nix::errno::Errno::from_raw(errno)
483 })?;
484 Ok(())
485 }
486
487 fn unshare(&self, flags: CloneFlags) -> Result<()> {
490 unshare(flags)?;
491
492 Ok(())
493 }
494 fn set_capability(&self, cset: CapSet, value: &CapsHashSet) -> Result<()> {
496 match cset {
497 CapSet::Bounding => {
500 let all = caps::read(None, CapSet::Bounding)?;
502 for c in all.difference(value) {
507 caps::drop(None, CapSet::Bounding, *c)?
508 }
509 }
510 CapSet::Ambient => {
511 caps::clear(None, CapSet::Ambient)?;
519 for c in value {
520 if let Err(e) = caps::raise(None, CapSet::Ambient, *c) {
521 tracing::warn!(?e, ?c, "can't raise ambient capability");
522 }
523 }
524 }
525 _ => {
526 caps::set(None, cset, value)?;
527 }
528 }
529 Ok(())
530 }
531
532 fn set_hostname(&self, hostname: &str) -> Result<()> {
534 sethostname(hostname)?;
535 Ok(())
536 }
537
538 fn set_domainname(&self, domainname: &str) -> Result<()> {
541 let ptr = domainname.as_bytes().as_ptr() as *const c_char;
542 let len = domainname.len();
543 match unsafe { setdomainname(ptr, len) } {
544 0 => Ok(()),
545 -1 => Err(nix::Error::last()),
546
547 _ => Err(nix::Error::UnknownErrno),
548 }?;
549
550 Ok(())
551 }
552
553 fn set_rlimit(&self, rlimit: &PosixRlimit) -> Result<()> {
555 let rlim = &libc::rlimit {
556 rlim_cur: rlimit.soft(),
557 rlim_max: rlimit.hard(),
558 };
559
560 #[cfg(not(target_env = "musl"))]
562 let res = unsafe { libc::setrlimit(rlimit.typ() as u32, rlim) };
563 #[cfg(target_env = "musl")]
564 let res = unsafe { libc::setrlimit(rlimit.typ() as i32, rlim) };
565
566 match res {
567 0 => Ok(()),
568 -1 => Err(SyscallError::Nix(nix::Error::last())),
569 _ => Err(SyscallError::Nix(nix::Error::UnknownErrno)),
570 }?;
571
572 Ok(())
573 }
574
575 fn get_pwuid(&self, uid: uid_t) -> Option<Arc<OsStr>> {
577 let mut passwd = unsafe { mem::zeroed::<libc::passwd>() };
578 let mut buf = vec![0; 2048];
579 let mut result = ptr::null_mut::<libc::passwd>();
580
581 loop {
582 let r = unsafe {
583 libc::getpwuid_r(uid, &mut passwd, buf.as_mut_ptr(), buf.len(), &mut result)
584 };
585
586 if r != libc::ERANGE {
587 break;
588 }
589
590 let newsize = buf.len().checked_mul(2)?;
591 buf.resize(newsize, 0);
592 }
593
594 if result.is_null() {
595 return None;
598 }
599
600 if result != &mut passwd {
601 return None;
603 }
604
605 let user = unsafe { Self::passwd_to_user(result.read()) };
606 Some(user)
607 }
608
609 fn chroot(&self, path: &Path) -> Result<()> {
610 chroot(path)?;
611
612 Ok(())
613 }
614
615 fn mount(
616 &self,
617 source: Option<&Path>,
618 target: &Path,
619 fstype: Option<&str>,
620 flags: MsFlags,
621 data: Option<&str>,
622 ) -> Result<()> {
623 mount(source, target, fstype, flags, data)?;
624 Ok(())
625 }
626
627 fn mount_from_fd(&self, source_fd: &OwnedFd, target: &Path) -> Result<()> {
628 let parent = target.parent().ok_or_else(|| {
629 tracing::error!(?target, "target has no parent");
630 SyscallError::Nix(nix::Error::EINVAL)
631 })?;
632 let name = target.file_name().ok_or_else(|| {
633 tracing::error!(?target, "target has no file name");
634 SyscallError::Nix(nix::Error::EINVAL)
635 })?;
636
637 let parent_fd = unsafe {
638 OwnedFd::from_raw_fd(open(
639 parent,
640 OFlag::O_PATH | OFlag::O_CLOEXEC | OFlag::O_DIRECTORY,
641 Mode::empty(),
642 )?)
643 };
644
645 let open_tree_flags: libc::c_uint = (libc::OPEN_TREE_CLOEXEC as libc::c_uint)
646 | (libc::OPEN_TREE_CLONE as libc::c_uint)
647 | (libc::AT_EMPTY_PATH as libc::c_uint);
648
649 const EMPTY_PATH: [libc::c_char; 1] = [0];
650
651 let mount_fd_raw = unsafe {
652 libc::syscall(
653 libc::SYS_open_tree,
654 source_fd.as_raw_fd(),
655 EMPTY_PATH.as_ptr(),
656 open_tree_flags,
657 )
658 };
659
660 if mount_fd_raw < 0 {
661 let err = nix::errno::Errno::last();
662 tracing::error!(?err, "open_tree from fd failed");
663 return Err(SyscallError::Nix(err));
664 }
665 let mount_fd = unsafe { OwnedFd::from_raw_fd(mount_fd_raw as RawFd) };
666
667 let name_cstr = CString::new(name.as_bytes()).map_err(|err| {
668 tracing::error!(?target, ?err, "failed to convert file name to cstring");
669 SyscallError::Nix(nix::Error::EINVAL)
670 })?;
671
672 let res = unsafe {
673 libc::syscall(
674 libc::SYS_move_mount,
675 mount_fd.as_raw_fd(),
676 EMPTY_PATH.as_ptr(),
677 parent_fd.as_raw_fd(),
678 name_cstr.as_ptr(),
679 MOVE_MOUNT_F_EMPTY_PATH as libc::c_uint,
680 )
681 };
682
683 if res < 0 {
684 let err = nix::errno::Errno::last();
685 tracing::error!(?target, ?err, "move_mount failed");
686 return Err(SyscallError::Nix(err));
687 }
688
689 Ok(())
690 }
691
692 fn move_mount(
693 &self,
694 from_dirfd: BorrowedFd<'_>,
695 from_path: Option<&str>,
696 to_dirfd: BorrowedFd<'_>,
697 to_path: Option<&str>,
698 flags: u32,
699 ) -> Result<()> {
700 const EMPTY_PATH: [libc::c_char; 1] = [0];
701
702 let from_cstr: Option<CString> = from_path
703 .and_then(|s| if s.is_empty() { None } else { Some(s) })
704 .map(|s| CString::new(s).map_err(|_| nix::Error::EINVAL))
705 .transpose()?;
706 let from_ptr = from_cstr
707 .as_ref()
708 .map_or(EMPTY_PATH.as_ptr(), |c| c.as_ptr());
709
710 let to_cstr: Option<CString> = to_path
711 .and_then(|s| if s.is_empty() { None } else { Some(s) })
712 .map(|s| CString::new(s).map_err(|_| nix::Error::EINVAL))
713 .transpose()?;
714 let to_ptr = to_cstr.as_ref().map_or(EMPTY_PATH.as_ptr(), |c| c.as_ptr());
715
716 let rc = unsafe {
717 libc::syscall(
718 libc::SYS_move_mount,
719 from_dirfd,
720 from_ptr,
721 to_dirfd,
722 to_ptr,
723 flags as libc::c_uint,
724 )
725 };
726
727 match rc {
728 0 => Ok(()),
729 -1 => Err(nix::Error::last().into()),
730 _ => Err(nix::Error::UnknownErrno.into()),
731 }
732 }
733
734 fn fsopen(&self, fstype: Option<&str>, flags: u32) -> Result<OwnedFd> {
735 let t_cstr: Option<CString> = fstype
736 .map(|t| CString::new(t).map_err(|_| SyscallError::Nix(nix::errno::Errno::EINVAL)))
737 .transpose()?;
738
739 let t_ptr = t_cstr.as_ref().map_or(std::ptr::null(), |c| c.as_ptr());
740
741 let fd =
742 unsafe { libc::syscall(libc::SYS_fsopen, t_ptr, flags as libc::c_uint) } as libc::c_int;
743 if fd < 0 {
744 return Err(SyscallError::Nix(nix::Error::last()));
745 }
746 Ok(unsafe { OwnedFd::from_raw_fd(fd) })
747 }
748
749 fn fsconfig(
750 &self,
751 fsfd: BorrowedFd<'_>,
752 cmd: u32,
753 key: Option<&str>,
754 val: Option<&str>,
755 aux: libc::c_int,
756 ) -> Result<()> {
757 let k_cstr: Option<CString> = key
758 .map(|k| CString::new(k).map_err(|_| SyscallError::Nix(nix::errno::Errno::EINVAL)))
759 .transpose()?;
760 let k_ptr = k_cstr.as_ref().map_or(std::ptr::null(), |k| k.as_ptr());
761
762 let v_cstr: Option<CString> = val
763 .map(|v| CString::new(v).map_err(|_| SyscallError::Nix(nix::errno::Errno::EINVAL)))
764 .transpose()?;
765 let v_ptr = v_cstr
766 .as_ref()
767 .map_or(std::ptr::null(), |v| v.as_ptr() as *const libc::c_void);
768
769 let rc = unsafe {
770 libc::syscall(
771 libc::SYS_fsconfig,
772 fsfd.as_raw_fd() as libc::c_int,
773 cmd as libc::c_uint,
774 k_ptr,
775 v_ptr,
776 aux,
777 )
778 };
779 if rc == -1 {
780 return Err(SyscallError::Nix(nix::Error::last()));
781 }
782 Ok(())
783 }
784
785 fn fsmount(
786 &self,
787 fsfd: BorrowedFd<'_>,
788 flags: u32,
789 attr_flags: Option<u64>,
790 ) -> Result<OwnedFd> {
791 let attr = attr_flags.unwrap_or(0);
792
793 let ret = unsafe {
794 libc::syscall(
795 libc::SYS_fsmount,
796 fsfd.as_raw_fd() as libc::c_int,
797 flags as libc::c_uint,
798 attr as libc::c_ulong,
799 )
800 } as libc::c_int;
801
802 if ret < 0 {
803 return Err(SyscallError::Nix(nix::Error::last()));
804 }
805 Ok(unsafe { std::os::fd::OwnedFd::from_raw_fd(ret) })
806 }
807
808 fn open_tree(&self, dirfd: RawFd, path: Option<&str>, flags: u32) -> Result<OwnedFd> {
810 static EMPTY: [libc::c_char; 1] = [0];
811 let path_cstr: Option<CString> = path
812 .map(|s| CString::new(s).map_err(|_| SyscallError::Nix(nix::errno::Errno::EINVAL)))
813 .transpose()?;
814 let c_path: *const c_char = match path_cstr.as_ref() {
815 Some(cs) => cs.as_ptr(),
816 None => EMPTY.as_ptr(),
817 };
818
819 let fd = unsafe {
820 libc::syscall(
821 libc::SYS_open_tree,
822 dirfd as libc::c_int,
823 c_path,
824 flags as libc::c_uint,
825 )
826 } as libc::c_int;
827
828 if fd < 0 {
829 return Err(SyscallError::Nix(nix::Error::last()));
830 }
831 Ok(unsafe { OwnedFd::from_raw_fd(fd) })
832 }
833
834 fn symlink(&self, original: &Path, link: &Path) -> Result<()> {
835 symlink(original, link)?;
836
837 Ok(())
838 }
839
840 fn mknod(&self, path: &Path, kind: SFlag, perm: Mode, dev: u64) -> Result<()> {
841 mknod(path, kind, perm, dev)?;
842
843 Ok(())
844 }
845
846 fn chown(&self, path: &Path, owner: Option<Uid>, group: Option<Gid>) -> Result<()> {
847 chown(path, owner, group)?;
848
849 Ok(())
850 }
851
852 fn set_groups(&self, groups: &[Gid]) -> Result<()> {
853 let n_groups = groups.len() as libc::size_t;
854 let groups_ptr = groups.as_ptr() as *const libc::gid_t;
855
856 if unsafe { libc::syscall(libc::SYS_setgroups, n_groups, groups_ptr) } == -1 {
859 let err = nix::errno::Errno::last();
860 tracing::error!(?err, ?groups, "failed to set groups");
861 return Err(err.into());
862 }
863 Ok(())
864 }
865
866 #[tracing::instrument(skip(self))]
867 fn close_range(&self, preserve_fds: i32) -> Result<()> {
868 match unsafe {
869 libc::syscall(
870 libc::SYS_close_range,
871 3 + preserve_fds,
872 libc::c_int::MAX,
873 libc::CLOSE_RANGE_CLOEXEC,
874 )
875 } {
876 0 => Ok(()),
877 -1 => {
878 match nix::errno::Errno::last() {
879 nix::errno::Errno::ENOSYS | nix::errno::Errno::EINVAL => {
880 Self::emulate_close_range(preserve_fds)
883 }
884 e => Err(SyscallError::Nix(e)),
885 }
886 }
887 _ => Err(SyscallError::Nix(nix::errno::Errno::UnknownErrno)),
888 }?;
889
890 Ok(())
891 }
892
893 fn mount_setattr(
894 &self,
895 dirfd: BorrowedFd<'_>,
896 pathname: &Path,
897 flags: u32,
898 mount_attr: &MountAttr,
899 size: libc::size_t,
900 ) -> Result<()> {
901 let path_c_string = pathname
902 .to_path_buf()
903 .to_str()
904 .map(CString::new)
905 .ok_or_else(|| {
906 tracing::error!(path = ?pathname, "failed to convert path to string");
907 nix::Error::EINVAL
908 })?
909 .map_err(|err| {
910 tracing::error!(path = ?pathname, ?err, "failed to convert path to string");
911 nix::Error::EINVAL
912 })?;
913
914 match unsafe {
915 libc::syscall(
916 libc::SYS_mount_setattr,
917 dirfd,
918 path_c_string.as_ptr(),
919 flags,
920 mount_attr as *const MountAttr,
921 size,
922 )
923 } {
924 0 => Ok(()),
925 -1 => Err(nix::Error::last()),
926 _ => Err(nix::Error::UnknownErrno),
927 }?;
928 Ok(())
929 }
930
931 fn set_io_priority(&self, class: i64, priority: i64) -> Result<()> {
932 let ioprio_who_progress: libc::c_int = 1;
933 let ioprio_who_pid = 0;
934 let iop = (class << 13) | priority;
935 match unsafe {
936 libc::syscall(
937 libc::SYS_ioprio_set,
938 ioprio_who_progress,
939 ioprio_who_pid,
940 iop as libc::c_ulong,
941 )
942 } {
943 0 => Ok(()),
944 -1 => Err(nix::Error::last()),
945 _ => Err(nix::Error::UnknownErrno),
946 }?;
947 Ok(())
948 }
949
950 fn set_mempolicy(&self, mode: i32, nodemask: &[libc::c_ulong], maxnode: u64) -> Result<()> {
951 let libc_nodemask = if nodemask.is_empty() {
953 std::ptr::null()
954 } else {
955 nodemask.as_ptr()
956 };
957 let libc_maxnode = maxnode as libc::c_ulong;
958
959 match unsafe {
960 libc::syscall(
961 libc::SYS_set_mempolicy,
962 mode as libc::c_long,
963 libc_nodemask,
964 libc_maxnode,
965 )
966 } {
967 0 => Ok(()),
968 -1 => Err(SyscallError::Nix(nix::Error::last())),
969 _ => Err(SyscallError::Nix(nix::Error::UnknownErrno)),
970 }
971 }
972
973 fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()> {
974 umount2(target, flags)?;
975 Ok(())
976 }
977
978 fn get_uid(&self) -> Uid {
979 nix::unistd::getuid()
980 }
981
982 fn get_gid(&self) -> Gid {
983 nix::unistd::getgid()
984 }
985
986 fn get_euid(&self) -> Uid {
987 nix::unistd::geteuid()
988 }
989
990 fn get_egid(&self) -> Gid {
991 nix::unistd::getegid()
992 }
993
994 fn personality(&self, domain: PersonalityDomain) -> Result<()> {
995 let domain = nix::sys::personality::Persona::from_bits_retain(domain as i32);
996 nix::sys::personality::set(domain)
997 .map(|_| ())
998 .map_err(|e| e.into())
999 }
1000}
1001
1002#[cfg(test)]
1003mod tests {
1004 use std::fs;
1010 use std::os::unix::prelude::AsRawFd;
1011 use std::str::FromStr;
1012
1013 use anyhow::{Context, Result, bail};
1014 use nix::{fcntl, sys, unistd};
1015 use serial_test::serial;
1016
1017 use super::{LinuxSyscall, MountOption};
1018 use crate::syscall::Syscall;
1019
1020 #[test]
1021 #[serial]
1022 fn test_get_open_fds() -> Result<()> {
1023 let file = fs::File::open("/dev/null")?;
1024 let fd = file.as_raw_fd();
1025 let open_fds = LinuxSyscall::get_open_fds()?;
1026
1027 if !open_fds.contains(&fd) {
1028 bail!("failed to find the opened dev null fds: {:?}", open_fds);
1029 }
1030
1031 drop(file);
1033
1034 if ![0, 1, 2]
1036 .iter()
1037 .all(|&stdio_fd| open_fds.contains(&stdio_fd))
1038 {
1039 bail!("failed to find the stdio fds: {:?}", open_fds);
1040 }
1041
1042 Ok(())
1043 }
1044
1045 #[test]
1046 #[serial]
1047 fn test_close_range_userspace() -> Result<()> {
1048 let fd = fcntl::open("/dev/null", fcntl::OFlag::O_RDWR, sys::stat::Mode::empty())?;
1051 LinuxSyscall::emulate_close_range(0).context("failed to clean up the fds")?;
1052
1053 let fd_flag = fcntl::fcntl(fd, fcntl::F_GETFD)?;
1054 if (fd_flag & fcntl::FdFlag::FD_CLOEXEC.bits()) == 0 {
1055 bail!("CLOEXEC flag is not set correctly");
1056 }
1057
1058 unistd::close(fd)?;
1059 Ok(())
1060 }
1061
1062 #[test]
1063 #[serial]
1064 fn test_close_range_native() -> Result<()> {
1065 let fd = fcntl::open("/dev/null", fcntl::OFlag::O_RDWR, sys::stat::Mode::empty())?;
1066 let syscall = LinuxSyscall {};
1067 syscall
1068 .close_range(0)
1069 .context("failed to clean up the fds")?;
1070
1071 let fd_flag = fcntl::fcntl(fd, fcntl::F_GETFD)?;
1072 if (fd_flag & fcntl::FdFlag::FD_CLOEXEC.bits()) == 0 {
1073 bail!("CLOEXEC flag is not set correctly");
1074 }
1075
1076 unistd::close(fd)?;
1077 Ok(())
1078 }
1079
1080 #[test]
1081 fn test_known_mount_options_implemented() -> Result<()> {
1082 for option in MountOption::known_options() {
1083 match MountOption::from_str(&option) {
1084 Ok(_) => {}
1085 Err(e) => bail!("failed to parse mount option: {}", e),
1086 }
1087 }
1088 Ok(())
1089 }
1090}