use std::any::Any;
use std::ffi::{CStr, CString, OsStr};
use std::os::fd::{BorrowedFd, FromRawFd, RawFd};
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::symlink;
use std::os::unix::io::{AsRawFd, OwnedFd};
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
use std::{mem, ptr};
use caps::{CapSet, CapsHashSet};
use libc::{c_char, setdomainname, uid_t};
use nix::dir::Dir;
use nix::fcntl;
use nix::fcntl::{OFlag, open};
use nix::mount::{MntFlags, MsFlags, mount, umount2};
use nix::sched::{CloneFlags, unshare};
use nix::sys::stat::{Mode, SFlag, mknod};
use nix::unistd::{Gid, Uid, chown, chroot, close, fchdir, pivot_root, sethostname};
use oci_spec::runtime::PosixRlimit;
use pathrs::flags::OpenFlags;
use pathrs::procfs::{ProcfsBase, ProcfsHandle};
use super::{Result, Syscall, SyscallError};
use crate::capabilities;
use crate::config::PersonalityDomain;
pub const AT_RECURSIVE: u32 = 0x00008000; pub const AT_EMPTY_PATH: u32 = 0x00001000;
#[allow(non_upper_case_globals)]
pub const MOUNT_ATTR__ATIME: u64 = 0x00000070; pub const MOUNT_ATTR_RDONLY: u64 = 0x00000001;
pub const MOUNT_ATTR_NOSUID: u64 = 0x00000002;
pub const MOUNT_ATTR_NODEV: u64 = 0x00000004;
pub const MOUNT_ATTR_NOEXEC: u64 = 0x00000008;
pub const MOUNT_ATTR_RELATIME: u64 = 0x00000000;
pub const MOUNT_ATTR_NOATIME: u64 = 0x00000010;
pub const MOUNT_ATTR_STRICTATIME: u64 = 0x00000020;
pub const MOUNT_ATTR_NODIRATIME: u64 = 0x00000080;
pub const MOUNT_ATTR_NOSYMFOLLOW: u64 = 0x00200000;
pub const MOVE_MOUNT_F_EMPTY_PATH: u32 = 0x00000004;
pub const MOVE_MOUNT_T_EMPTY_PATH: u32 = 0x00000040;
pub const FSCONFIG_SET_FLAG: u64 = 0;
pub const FSCONFIG_SET_STRING: u64 = 1;
pub const FSCONFIG_SET_BINARY: u64 = 2;
pub const FSCONFIG_SET_PATH: u64 = 3;
pub const FSCONFIG_SET_PATH_EMPTY: u64 = 4;
pub const FSCONFIG_SET_FD: u64 = 5;
pub const FSCONFIG_CMD_CREATE: u64 = 6;
pub const FSCONFIG_CMD_RECONFIGURE: u64 = 7;
pub const FSCONFIG_CMD_CREATE_EXCL: u64 = 8;
pub enum MountOption {
Defaults(bool, MsFlags),
Ro(bool, MsFlags),
Rw(bool, MsFlags),
Suid(bool, MsFlags),
Nosuid(bool, MsFlags),
Dev(bool, MsFlags),
Nodev(bool, MsFlags),
Exec(bool, MsFlags),
Noexec(bool, MsFlags),
Sync(bool, MsFlags),
Async(bool, MsFlags),
Dirsync(bool, MsFlags),
Remount(bool, MsFlags),
Mand(bool, MsFlags),
Nomand(bool, MsFlags),
Atime(bool, MsFlags),
Noatime(bool, MsFlags),
Diratime(bool, MsFlags),
Nodiratime(bool, MsFlags),
Bind(bool, MsFlags),
Rbind(bool, MsFlags),
Unbindable(bool, MsFlags),
Runbindable(bool, MsFlags),
Private(bool, MsFlags),
Rprivate(bool, MsFlags),
Shared(bool, MsFlags),
Rshared(bool, MsFlags),
Slave(bool, MsFlags),
Rslave(bool, MsFlags),
Relatime(bool, MsFlags),
Norelatime(bool, MsFlags),
Strictatime(bool, MsFlags),
Nostrictatime(bool, MsFlags),
}
impl MountOption {
pub fn known_options() -> Vec<String> {
[
"defaults",
"ro",
"rw",
"suid",
"nosuid",
"dev",
"nodev",
"exec",
"noexec",
"sync",
"async",
"dirsync",
"remount",
"mand",
"nomand",
"atime",
"noatime",
"diratime",
"nodiratime",
"bind",
"rbind",
"unbindable",
"runbindable",
"private",
"rprivate",
"shared",
"rshared",
"slave",
"rslave",
"relatime",
"norelatime",
"strictatime",
"nostrictatime",
]
.iter()
.map(|s| s.to_string())
.collect()
}
}
impl FromStr for MountOption {
type Err = String;
fn from_str(option: &str) -> std::result::Result<Self, Self::Err> {
match option {
"defaults" => Ok(MountOption::Defaults(false, MsFlags::empty())),
"ro" => Ok(MountOption::Ro(false, MsFlags::MS_RDONLY)),
"rw" => Ok(MountOption::Rw(true, MsFlags::MS_RDONLY)),
"suid" => Ok(MountOption::Suid(true, MsFlags::MS_NOSUID)),
"nosuid" => Ok(MountOption::Nosuid(false, MsFlags::MS_NOSUID)),
"dev" => Ok(MountOption::Dev(true, MsFlags::MS_NODEV)),
"nodev" => Ok(MountOption::Nodev(false, MsFlags::MS_NODEV)),
"exec" => Ok(MountOption::Exec(true, MsFlags::MS_NOEXEC)),
"noexec" => Ok(MountOption::Noexec(false, MsFlags::MS_NOEXEC)),
"sync" => Ok(MountOption::Sync(false, MsFlags::MS_SYNCHRONOUS)),
"async" => Ok(MountOption::Async(true, MsFlags::MS_SYNCHRONOUS)),
"dirsync" => Ok(MountOption::Dirsync(false, MsFlags::MS_DIRSYNC)),
"remount" => Ok(MountOption::Remount(false, MsFlags::MS_REMOUNT)),
"mand" => Ok(MountOption::Mand(false, MsFlags::MS_MANDLOCK)),
"nomand" => Ok(MountOption::Nomand(true, MsFlags::MS_MANDLOCK)),
"atime" => Ok(MountOption::Atime(true, MsFlags::MS_NOATIME)),
"noatime" => Ok(MountOption::Noatime(false, MsFlags::MS_NOATIME)),
"diratime" => Ok(MountOption::Diratime(true, MsFlags::MS_NODIRATIME)),
"nodiratime" => Ok(MountOption::Nodiratime(false, MsFlags::MS_NODIRATIME)),
"bind" => Ok(MountOption::Bind(false, MsFlags::MS_BIND)),
"rbind" => Ok(MountOption::Rbind(
false,
MsFlags::MS_BIND | MsFlags::MS_REC,
)),
"unbindable" => Ok(MountOption::Unbindable(false, MsFlags::MS_UNBINDABLE)),
"runbindable" => Ok(MountOption::Runbindable(
false,
MsFlags::MS_UNBINDABLE | MsFlags::MS_REC,
)),
"private" => Ok(MountOption::Private(true, MsFlags::MS_PRIVATE)),
"rprivate" => Ok(MountOption::Rprivate(
true,
MsFlags::MS_PRIVATE | MsFlags::MS_REC,
)),
"shared" => Ok(MountOption::Shared(true, MsFlags::MS_SHARED)),
"rshared" => Ok(MountOption::Rshared(
true,
MsFlags::MS_SHARED | MsFlags::MS_REC,
)),
"slave" => Ok(MountOption::Slave(true, MsFlags::MS_SLAVE)),
"rslave" => Ok(MountOption::Rslave(
true,
MsFlags::MS_SLAVE | MsFlags::MS_REC,
)),
"relatime" => Ok(MountOption::Relatime(false, MsFlags::MS_RELATIME)),
"norelatime" => Ok(MountOption::Norelatime(true, MsFlags::MS_RELATIME)),
"strictatime" => Ok(MountOption::Strictatime(false, MsFlags::MS_STRICTATIME)),
"nostrictatime" => Ok(MountOption::Nostrictatime(true, MsFlags::MS_STRICTATIME)),
_ => Err(option.to_string()),
}
}
}
pub enum MountRecursive {
Rdonly(bool, u64),
Nosuid(bool, u64),
Nodev(bool, u64),
Noexec(bool, u64),
Atime(bool, u64),
Relatime(bool, u64),
Noatime(bool, u64),
StrictAtime(bool, u64),
NoDiratime(bool, u64),
Nosymfollow(bool, u64),
}
impl FromStr for MountRecursive {
type Err = SyscallError;
fn from_str(option: &str) -> std::result::Result<Self, Self::Err> {
match option {
"rro" => Ok(MountRecursive::Rdonly(false, MOUNT_ATTR_RDONLY)),
"rrw" => Ok(MountRecursive::Rdonly(true, MOUNT_ATTR_RDONLY)),
"rnosuid" => Ok(MountRecursive::Nosuid(false, MOUNT_ATTR_NOSUID)),
"rsuid" => Ok(MountRecursive::Nosuid(true, MOUNT_ATTR_NOSUID)),
"rnodev" => Ok(MountRecursive::Nodev(false, MOUNT_ATTR_NODEV)),
"rdev" => Ok(MountRecursive::Nodev(true, MOUNT_ATTR_NODEV)),
"rnoexec" => Ok(MountRecursive::Noexec(false, MOUNT_ATTR_NOEXEC)),
"rexec" => Ok(MountRecursive::Noexec(true, MOUNT_ATTR_NOEXEC)),
"rnodiratime" => Ok(MountRecursive::NoDiratime(false, MOUNT_ATTR_NODIRATIME)),
"rdiratime" => Ok(MountRecursive::NoDiratime(true, MOUNT_ATTR_NODIRATIME)),
"rrelatime" => Ok(MountRecursive::Relatime(false, MOUNT_ATTR_RELATIME)),
"rnorelatime" => Ok(MountRecursive::Relatime(true, MOUNT_ATTR_RELATIME)),
"rnoatime" => Ok(MountRecursive::Noatime(false, MOUNT_ATTR_NOATIME)),
"ratime" => Ok(MountRecursive::Noatime(true, MOUNT_ATTR_NOATIME)),
"rstrictatime" => Ok(MountRecursive::StrictAtime(false, MOUNT_ATTR_STRICTATIME)),
"rnostrictatime" => Ok(MountRecursive::StrictAtime(true, MOUNT_ATTR_STRICTATIME)),
"rnosymfollow" => Ok(MountRecursive::Nosymfollow(false, MOUNT_ATTR_NOSYMFOLLOW)),
"rsymfollow" => Ok(MountRecursive::Nosymfollow(true, MOUNT_ATTR_NOSYMFOLLOW)),
_ => Err(SyscallError::UnexpectedMountRecursiveOption(
option.to_string(),
)),
}
}
}
#[repr(C)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MountAttr {
pub attr_set: u64,
pub attr_clr: u64,
pub propagation: u64,
pub userns_fd: u64,
}
impl MountAttr {
pub fn all() -> Self {
MountAttr {
attr_set: MOUNT_ATTR_RDONLY
| MOUNT_ATTR_NOSUID
| MOUNT_ATTR_NODEV
| MOUNT_ATTR_NOEXEC
| MOUNT_ATTR_NODIRATIME
| MOUNT_ATTR_RELATIME
| MOUNT_ATTR_NOATIME
| MOUNT_ATTR_STRICTATIME
| MOUNT_ATTR_NOSYMFOLLOW,
attr_clr: MOUNT_ATTR_RDONLY
| MOUNT_ATTR_NOSUID
| MOUNT_ATTR_NODEV
| MOUNT_ATTR_NOEXEC
| MOUNT_ATTR_NODIRATIME
| MOUNT_ATTR_RELATIME
| MOUNT_ATTR_NOATIME
| MOUNT_ATTR_STRICTATIME
| MOUNT_ATTR_NOSYMFOLLOW
| MOUNT_ATTR__ATIME,
propagation: 0,
userns_fd: 0,
}
}
}
#[derive(Clone)]
pub struct LinuxSyscall;
impl LinuxSyscall {
unsafe fn from_raw_buf<'a, T>(p: *const c_char) -> T
where
T: From<&'a OsStr>,
{
unsafe { T::from(OsStr::from_bytes(CStr::from_ptr(p).to_bytes())) }
}
unsafe fn passwd_to_user(passwd: libc::passwd) -> Arc<OsStr> {
let name: Arc<OsStr> = unsafe { Self::from_raw_buf(passwd.pw_name) };
name
}
fn emulate_close_range(preserve_fds: i32) -> Result<()> {
let open_fds = Self::get_open_fds()?;
let min_fd = preserve_fds + 3;
let to_be_cleaned_up_fds: Vec<i32> = open_fds
.iter()
.filter_map(|&fd| if fd >= min_fd { Some(fd) } else { None })
.collect();
to_be_cleaned_up_fds.iter().for_each(|&fd| {
let _ = fcntl::fcntl(fd, fcntl::F_SETFD(fcntl::FdFlag::FD_CLOEXEC));
});
Ok(())
}
fn get_open_fds() -> Result<Vec<i32>> {
let dir = ProcfsHandle::new()?.open(
ProcfsBase::ProcSelf,
Path::new("fd"),
OpenFlags::O_DIRECTORY | OpenFlags::O_CLOEXEC,
)?;
let fds = Dir::from(dir)?
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| {
entry
.file_name()
.to_str()
.ok()
.and_then(|name| name.parse::<i32>().ok())
})
.collect();
Ok(fds)
}
}
impl Syscall for LinuxSyscall {
fn as_any(&self) -> &dyn Any {
self
}
fn pivot_rootfs(&self, path: &Path) -> Result<()> {
let newroot = open(
path,
OFlag::O_DIRECTORY | OFlag::O_RDONLY | OFlag::O_CLOEXEC,
Mode::empty(),
)
.inspect_err(|errno| {
tracing::error!(?errno, ?path, "failed to open the new root for pivot root");
})?;
pivot_root(path, path).inspect_err(|errno| {
tracing::error!(?errno, ?path, "failed to pivot root to");
})?;
mount(
None::<&str>,
"/",
None::<&str>,
MsFlags::MS_SLAVE | MsFlags::MS_REC,
None::<&str>,
)
.inspect_err(|errno| {
tracing::error!(?errno, "failed to make original root directory rslave");
})?;
umount2("/", MntFlags::MNT_DETACH).inspect_err(|errno| {
tracing::error!(?errno, "failed to unmount old root directory");
})?;
fchdir(newroot).inspect_err(|errno| {
tracing::error!(?errno, ?newroot, "failed to change directory to new root");
})?;
close(newroot).inspect_err(|errno| {
tracing::error!(?errno, ?newroot, "failed to close new root directory");
})?;
Ok(())
}
fn set_ns(&self, rawfd: i32, nstype: CloneFlags) -> Result<()> {
let fd = unsafe { BorrowedFd::borrow_raw(rawfd) };
nix::sched::setns(fd, nstype)?;
Ok(())
}
fn set_id(&self, uid: Uid, gid: Gid) -> Result<()> {
prctl::set_keep_capabilities(true).map_err(|errno| {
tracing::error!(?errno, "failed to set keep capabilities to true");
nix::errno::Errno::from_raw(errno)
})?;
if unsafe { libc::syscall(libc::SYS_setresgid, gid, gid, gid) } == -1 {
let err = nix::errno::Errno::last();
tracing::error!(
?err,
?gid,
"failed to set real, effective and saved set gid"
);
return Err(err.into());
}
if unsafe { libc::syscall(libc::SYS_setresuid, uid, uid, uid) } == -1 {
let err = nix::errno::Errno::last();
tracing::error!(
?err,
?uid,
"failed to set real, effective and saved set uid"
);
return Err(err.into());
}
if uid != Uid::from_raw(0) {
capabilities::reset_effective(self)?;
}
prctl::set_keep_capabilities(false).map_err(|errno| {
tracing::error!(?errno, "failed to set keep capabilities to false");
nix::errno::Errno::from_raw(errno)
})?;
Ok(())
}
fn unshare(&self, flags: CloneFlags) -> Result<()> {
unshare(flags)?;
Ok(())
}
fn set_capability(&self, cset: CapSet, value: &CapsHashSet) -> Result<()> {
match cset {
CapSet::Bounding => {
let all = caps::read(None, CapSet::Bounding)?;
for c in all.difference(value) {
caps::drop(None, CapSet::Bounding, *c)?
}
}
CapSet::Ambient => {
caps::clear(None, CapSet::Ambient)?;
for c in value {
if let Err(e) = caps::raise(None, CapSet::Ambient, *c) {
tracing::warn!(?e, ?c, "can't raise ambient capability");
}
}
}
_ => {
caps::set(None, cset, value)?;
}
}
Ok(())
}
fn set_hostname(&self, hostname: &str) -> Result<()> {
sethostname(hostname)?;
Ok(())
}
fn set_domainname(&self, domainname: &str) -> Result<()> {
let ptr = domainname.as_bytes().as_ptr() as *const c_char;
let len = domainname.len();
match unsafe { setdomainname(ptr, len) } {
0 => Ok(()),
-1 => Err(nix::Error::last()),
_ => Err(nix::Error::UnknownErrno),
}?;
Ok(())
}
fn set_rlimit(&self, rlimit: &PosixRlimit) -> Result<()> {
let rlim = &libc::rlimit {
rlim_cur: rlimit.soft(),
rlim_max: rlimit.hard(),
};
#[cfg(not(target_env = "musl"))]
let res = unsafe { libc::setrlimit(rlimit.typ() as u32, rlim) };
#[cfg(target_env = "musl")]
let res = unsafe { libc::setrlimit(rlimit.typ() as i32, rlim) };
match res {
0 => Ok(()),
-1 => Err(SyscallError::Nix(nix::Error::last())),
_ => Err(SyscallError::Nix(nix::Error::UnknownErrno)),
}?;
Ok(())
}
fn get_pwuid(&self, uid: uid_t) -> Option<Arc<OsStr>> {
let mut passwd = unsafe { mem::zeroed::<libc::passwd>() };
let mut buf = vec![0; 2048];
let mut result = ptr::null_mut::<libc::passwd>();
loop {
let r = unsafe {
libc::getpwuid_r(uid, &mut passwd, buf.as_mut_ptr(), buf.len(), &mut result)
};
if r != libc::ERANGE {
break;
}
let newsize = buf.len().checked_mul(2)?;
buf.resize(newsize, 0);
}
if result.is_null() {
return None;
}
if result != &mut passwd {
return None;
}
let user = unsafe { Self::passwd_to_user(result.read()) };
Some(user)
}
fn chroot(&self, path: &Path) -> Result<()> {
chroot(path)?;
Ok(())
}
fn mount(
&self,
source: Option<&Path>,
target: &Path,
fstype: Option<&str>,
flags: MsFlags,
data: Option<&str>,
) -> Result<()> {
mount(source, target, fstype, flags, data)?;
Ok(())
}
fn mount_from_fd(&self, source_fd: &OwnedFd, target: &Path) -> Result<()> {
let parent = target.parent().ok_or_else(|| {
tracing::error!(?target, "target has no parent");
SyscallError::Nix(nix::Error::EINVAL)
})?;
let name = target.file_name().ok_or_else(|| {
tracing::error!(?target, "target has no file name");
SyscallError::Nix(nix::Error::EINVAL)
})?;
let parent_fd = unsafe {
OwnedFd::from_raw_fd(open(
parent,
OFlag::O_PATH | OFlag::O_CLOEXEC | OFlag::O_DIRECTORY,
Mode::empty(),
)?)
};
let open_tree_flags: libc::c_uint = (libc::OPEN_TREE_CLOEXEC as libc::c_uint)
| (libc::OPEN_TREE_CLONE as libc::c_uint)
| (libc::AT_EMPTY_PATH as libc::c_uint);
const EMPTY_PATH: [libc::c_char; 1] = [0];
let mount_fd_raw = unsafe {
libc::syscall(
libc::SYS_open_tree,
source_fd.as_raw_fd(),
EMPTY_PATH.as_ptr(),
open_tree_flags,
)
};
if mount_fd_raw < 0 {
let err = nix::errno::Errno::last();
tracing::error!(?err, "open_tree from fd failed");
return Err(SyscallError::Nix(err));
}
let mount_fd = unsafe { OwnedFd::from_raw_fd(mount_fd_raw as RawFd) };
let name_cstr = CString::new(name.as_bytes()).map_err(|err| {
tracing::error!(?target, ?err, "failed to convert file name to cstring");
SyscallError::Nix(nix::Error::EINVAL)
})?;
let res = unsafe {
libc::syscall(
libc::SYS_move_mount,
mount_fd.as_raw_fd(),
EMPTY_PATH.as_ptr(),
parent_fd.as_raw_fd(),
name_cstr.as_ptr(),
MOVE_MOUNT_F_EMPTY_PATH as libc::c_uint,
)
};
if res < 0 {
let err = nix::errno::Errno::last();
tracing::error!(?target, ?err, "move_mount failed");
return Err(SyscallError::Nix(err));
}
Ok(())
}
fn move_mount(
&self,
from_dirfd: BorrowedFd<'_>,
from_path: Option<&str>,
to_dirfd: BorrowedFd<'_>,
to_path: Option<&str>,
flags: u32,
) -> Result<()> {
const EMPTY_PATH: [libc::c_char; 1] = [0];
let from_cstr: Option<CString> = from_path
.and_then(|s| if s.is_empty() { None } else { Some(s) })
.map(|s| CString::new(s).map_err(|_| nix::Error::EINVAL))
.transpose()?;
let from_ptr = from_cstr
.as_ref()
.map_or(EMPTY_PATH.as_ptr(), |c| c.as_ptr());
let to_cstr: Option<CString> = to_path
.and_then(|s| if s.is_empty() { None } else { Some(s) })
.map(|s| CString::new(s).map_err(|_| nix::Error::EINVAL))
.transpose()?;
let to_ptr = to_cstr.as_ref().map_or(EMPTY_PATH.as_ptr(), |c| c.as_ptr());
let rc = unsafe {
libc::syscall(
libc::SYS_move_mount,
from_dirfd,
from_ptr,
to_dirfd,
to_ptr,
flags as libc::c_uint,
)
};
match rc {
0 => Ok(()),
-1 => Err(nix::Error::last().into()),
_ => Err(nix::Error::UnknownErrno.into()),
}
}
fn fsopen(&self, fstype: Option<&str>, flags: u32) -> Result<OwnedFd> {
let t_cstr: Option<CString> = fstype
.map(|t| CString::new(t).map_err(|_| SyscallError::Nix(nix::errno::Errno::EINVAL)))
.transpose()?;
let t_ptr = t_cstr.as_ref().map_or(std::ptr::null(), |c| c.as_ptr());
let fd =
unsafe { libc::syscall(libc::SYS_fsopen, t_ptr, flags as libc::c_uint) } as libc::c_int;
if fd < 0 {
return Err(SyscallError::Nix(nix::Error::last()));
}
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
}
fn fsconfig(
&self,
fsfd: BorrowedFd<'_>,
cmd: u32,
key: Option<&str>,
val: Option<&str>,
aux: libc::c_int,
) -> Result<()> {
let k_cstr: Option<CString> = key
.map(|k| CString::new(k).map_err(|_| SyscallError::Nix(nix::errno::Errno::EINVAL)))
.transpose()?;
let k_ptr = k_cstr.as_ref().map_or(std::ptr::null(), |k| k.as_ptr());
let v_cstr: Option<CString> = val
.map(|v| CString::new(v).map_err(|_| SyscallError::Nix(nix::errno::Errno::EINVAL)))
.transpose()?;
let v_ptr = v_cstr
.as_ref()
.map_or(std::ptr::null(), |v| v.as_ptr() as *const libc::c_void);
let rc = unsafe {
libc::syscall(
libc::SYS_fsconfig,
fsfd.as_raw_fd() as libc::c_int,
cmd as libc::c_uint,
k_ptr,
v_ptr,
aux,
)
};
if rc == -1 {
return Err(SyscallError::Nix(nix::Error::last()));
}
Ok(())
}
fn fsmount(
&self,
fsfd: BorrowedFd<'_>,
flags: u32,
attr_flags: Option<u64>,
) -> Result<OwnedFd> {
let attr = attr_flags.unwrap_or(0);
let ret = unsafe {
libc::syscall(
libc::SYS_fsmount,
fsfd.as_raw_fd() as libc::c_int,
flags as libc::c_uint,
attr as libc::c_ulong,
)
} as libc::c_int;
if ret < 0 {
return Err(SyscallError::Nix(nix::Error::last()));
}
Ok(unsafe { std::os::fd::OwnedFd::from_raw_fd(ret) })
}
fn open_tree(&self, dirfd: RawFd, path: Option<&str>, flags: u32) -> Result<OwnedFd> {
static EMPTY: [libc::c_char; 1] = [0];
let path_cstr: Option<CString> = path
.map(|s| CString::new(s).map_err(|_| SyscallError::Nix(nix::errno::Errno::EINVAL)))
.transpose()?;
let c_path: *const c_char = match path_cstr.as_ref() {
Some(cs) => cs.as_ptr(),
None => EMPTY.as_ptr(),
};
let fd = unsafe {
libc::syscall(
libc::SYS_open_tree,
dirfd as libc::c_int,
c_path,
flags as libc::c_uint,
)
} as libc::c_int;
if fd < 0 {
return Err(SyscallError::Nix(nix::Error::last()));
}
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
}
fn symlink(&self, original: &Path, link: &Path) -> Result<()> {
symlink(original, link)?;
Ok(())
}
fn mknod(&self, path: &Path, kind: SFlag, perm: Mode, dev: u64) -> Result<()> {
mknod(path, kind, perm, dev)?;
Ok(())
}
fn chown(&self, path: &Path, owner: Option<Uid>, group: Option<Gid>) -> Result<()> {
chown(path, owner, group)?;
Ok(())
}
fn set_groups(&self, groups: &[Gid]) -> Result<()> {
let n_groups = groups.len() as libc::size_t;
let groups_ptr = groups.as_ptr() as *const libc::gid_t;
if unsafe { libc::syscall(libc::SYS_setgroups, n_groups, groups_ptr) } == -1 {
let err = nix::errno::Errno::last();
tracing::error!(?err, ?groups, "failed to set groups");
return Err(err.into());
}
Ok(())
}
#[tracing::instrument(skip(self))]
fn close_range(&self, preserve_fds: i32) -> Result<()> {
match unsafe {
libc::syscall(
libc::SYS_close_range,
3 + preserve_fds,
libc::c_int::MAX,
libc::CLOSE_RANGE_CLOEXEC,
)
} {
0 => Ok(()),
-1 => {
match nix::errno::Errno::last() {
nix::errno::Errno::ENOSYS | nix::errno::Errno::EINVAL => {
Self::emulate_close_range(preserve_fds)
}
e => Err(SyscallError::Nix(e)),
}
}
_ => Err(SyscallError::Nix(nix::errno::Errno::UnknownErrno)),
}?;
Ok(())
}
fn mount_setattr(
&self,
dirfd: BorrowedFd<'_>,
pathname: &Path,
flags: u32,
mount_attr: &MountAttr,
size: libc::size_t,
) -> Result<()> {
let path_c_string = pathname
.to_path_buf()
.to_str()
.map(CString::new)
.ok_or_else(|| {
tracing::error!(path = ?pathname, "failed to convert path to string");
nix::Error::EINVAL
})?
.map_err(|err| {
tracing::error!(path = ?pathname, ?err, "failed to convert path to string");
nix::Error::EINVAL
})?;
match unsafe {
libc::syscall(
libc::SYS_mount_setattr,
dirfd,
path_c_string.as_ptr(),
flags,
mount_attr as *const MountAttr,
size,
)
} {
0 => Ok(()),
-1 => Err(nix::Error::last()),
_ => Err(nix::Error::UnknownErrno),
}?;
Ok(())
}
fn set_io_priority(&self, class: i64, priority: i64) -> Result<()> {
let ioprio_who_progress: libc::c_int = 1;
let ioprio_who_pid = 0;
let iop = (class << 13) | priority;
match unsafe {
libc::syscall(
libc::SYS_ioprio_set,
ioprio_who_progress,
ioprio_who_pid,
iop as libc::c_ulong,
)
} {
0 => Ok(()),
-1 => Err(nix::Error::last()),
_ => Err(nix::Error::UnknownErrno),
}?;
Ok(())
}
fn set_mempolicy(&self, mode: i32, nodemask: &[libc::c_ulong], maxnode: u64) -> Result<()> {
let libc_nodemask = if nodemask.is_empty() {
std::ptr::null()
} else {
nodemask.as_ptr()
};
let libc_maxnode = maxnode as libc::c_ulong;
match unsafe {
libc::syscall(
libc::SYS_set_mempolicy,
mode as libc::c_long,
libc_nodemask,
libc_maxnode,
)
} {
0 => Ok(()),
-1 => Err(SyscallError::Nix(nix::Error::last())),
_ => Err(SyscallError::Nix(nix::Error::UnknownErrno)),
}
}
fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()> {
umount2(target, flags)?;
Ok(())
}
fn get_uid(&self) -> Uid {
nix::unistd::getuid()
}
fn get_gid(&self) -> Gid {
nix::unistd::getgid()
}
fn get_euid(&self) -> Uid {
nix::unistd::geteuid()
}
fn get_egid(&self) -> Gid {
nix::unistd::getegid()
}
fn personality(&self, domain: PersonalityDomain) -> Result<()> {
let domain = nix::sys::personality::Persona::from_bits_retain(domain as i32);
nix::sys::personality::set(domain)
.map(|_| ())
.map_err(|e| e.into())
}
}
#[cfg(test)]
mod tests {
use std::fs;
use std::os::unix::prelude::AsRawFd;
use std::str::FromStr;
use anyhow::{Context, Result, bail};
use nix::{fcntl, sys, unistd};
use serial_test::serial;
use super::{LinuxSyscall, MountOption};
use crate::syscall::Syscall;
#[test]
#[serial]
fn test_get_open_fds() -> Result<()> {
let file = fs::File::open("/dev/null")?;
let fd = file.as_raw_fd();
let open_fds = LinuxSyscall::get_open_fds()?;
if !open_fds.contains(&fd) {
bail!("failed to find the opened dev null fds: {:?}", open_fds);
}
drop(file);
if ![0, 1, 2]
.iter()
.all(|&stdio_fd| open_fds.contains(&stdio_fd))
{
bail!("failed to find the stdio fds: {:?}", open_fds);
}
Ok(())
}
#[test]
#[serial]
fn test_close_range_userspace() -> Result<()> {
let fd = fcntl::open("/dev/null", fcntl::OFlag::O_RDWR, sys::stat::Mode::empty())?;
LinuxSyscall::emulate_close_range(0).context("failed to clean up the fds")?;
let fd_flag = fcntl::fcntl(fd, fcntl::F_GETFD)?;
if (fd_flag & fcntl::FdFlag::FD_CLOEXEC.bits()) == 0 {
bail!("CLOEXEC flag is not set correctly");
}
unistd::close(fd)?;
Ok(())
}
#[test]
#[serial]
fn test_close_range_native() -> Result<()> {
let fd = fcntl::open("/dev/null", fcntl::OFlag::O_RDWR, sys::stat::Mode::empty())?;
let syscall = LinuxSyscall {};
syscall
.close_range(0)
.context("failed to clean up the fds")?;
let fd_flag = fcntl::fcntl(fd, fcntl::F_GETFD)?;
if (fd_flag & fcntl::FdFlag::FD_CLOEXEC.bits()) == 0 {
bail!("CLOEXEC flag is not set correctly");
}
unistd::close(fd)?;
Ok(())
}
#[test]
fn test_known_mount_options_implemented() -> Result<()> {
for option in MountOption::known_options() {
match MountOption::from_str(&option) {
Ok(_) => {}
Err(e) => bail!("failed to parse mount option: {}", e),
}
}
Ok(())
}
}