use std::cmp::Ordering;
use std::ffi::{CStr, CString};
use std::fs::{self, File};
use std::io::Error as IoError;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs as unixfs;
use std::path::{Component, Path, PathBuf};
use std::{env, io, mem, ptr};
use bitflags::bitflags;
use crate::linux::PathExceptions;
const NEW_ROOT: &str = "/tmp/birdcage-root";
pub(crate) fn setup_mount_namespace(exceptions: PathExceptions) -> io::Result<()> {
let new_root = PathBuf::from(NEW_ROOT);
if !new_root.exists() {
fs::create_dir_all(&new_root)?;
}
let new_root_c = CString::new(new_root.as_os_str().as_bytes()).unwrap();
mount_tmpfs(&new_root_c)?;
let mut bind_mounts: Vec<_> = exceptions.bind_mounts.into_iter().collect();
bind_mounts.sort_unstable_by(|(a_path, a_flags), (b_path, b_flags)| {
match a_path.components().count().cmp(&b_path.components().count()) {
Ordering::Equal => (a_path, a_flags).cmp(&(b_path, b_flags)),
ord => ord,
}
});
for (path, flags) in bind_mounts {
let src_c = CString::new(path.as_os_str().as_bytes()).unwrap();
let unrooted_path = path.strip_prefix("/").unwrap();
let dst = new_root.join(unrooted_path);
let dst_c = CString::new(dst.as_os_str().as_bytes()).unwrap();
if let Err(err) = copy_tree(&path, &new_root) {
log::error!("skipping birdcage exception {path:?}: {err}");
continue;
}
bind_mount(&src_c, &dst_c)?;
update_mount_flags(&dst_c, flags | MountAttrFlags::NOSUID)?;
}
create_symlinks(&new_root, exceptions.symlinks)?;
let old_proc_c = CString::new("/proc").unwrap();
let new_proc = new_root.join("proc");
let new_proc_c = CString::new(new_proc.as_os_str().as_bytes()).unwrap();
fs::create_dir_all(&new_proc)?;
bind_mount(&old_proc_c, &new_proc_c)?;
pivot_root(&new_root_c, &new_root_c)?;
let root_c = CString::new("/").unwrap();
umount(&root_c)?;
deny_mount_propagation()?;
Ok(())
}
fn create_symlinks(new_root: &Path, symlinks: Vec<(PathBuf, PathBuf)>) -> io::Result<()> {
for (symlink, target) in symlinks {
let unrooted_path = symlink.strip_prefix("/").unwrap();
let dst = new_root.join(unrooted_path);
if dst.symlink_metadata().is_ok() {
continue;
}
let parent = match symlink.parent() {
Some(parent) => parent,
None => continue,
};
copy_tree(parent, new_root)?;
unixfs::symlink(target, dst)?;
}
Ok(())
}
fn copy_tree(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> io::Result<()> {
let mut dst = dst.as_ref().to_path_buf();
let mut src_sub = PathBuf::new();
let src = src.as_ref();
for component in src.components() {
if component == Component::RootDir {
src_sub = src_sub.join(component);
continue;
}
src_sub = src_sub.join(component);
dst = dst.join(component);
if dst.exists() {
continue;
}
let metadata = src_sub.metadata()?;
if metadata.is_dir() {
fs::create_dir(&dst)?;
} else {
File::create(&dst)?;
}
let permissions = metadata.permissions();
fs::set_permissions(&dst, permissions)?;
}
Ok(())
}
fn mount_tmpfs(dst: &CStr) -> io::Result<()> {
let flags = MountFlags::empty();
let fstype = CString::new("tmpfs").unwrap();
let res = unsafe {
libc::mount(ptr::null(), dst.as_ptr(), fstype.as_ptr(), flags.bits(), ptr::null())
};
if res == 0 {
Ok(())
} else {
Err(IoError::last_os_error())
}
}
pub fn mount_proc(dst: &CStr) -> io::Result<()> {
let flags = MountFlags::NOSUID | MountFlags::NODEV | MountFlags::NOEXEC;
let fstype = CString::new("proc").unwrap();
let res = unsafe {
libc::mount(fstype.as_ptr(), dst.as_ptr(), fstype.as_ptr(), flags.bits(), ptr::null())
};
if res == 0 {
Ok(())
} else {
Err(IoError::last_os_error())
}
}
fn bind_mount(src: &CStr, dst: &CStr) -> io::Result<()> {
let flags = MountFlags::BIND | MountFlags::RECURSIVE;
let res =
unsafe { libc::mount(src.as_ptr(), dst.as_ptr(), ptr::null(), flags.bits(), ptr::null()) };
if res == 0 {
Ok(())
} else {
Err(IoError::last_os_error())
}
}
fn update_mount_flags(mount: &CStr, flags: MountAttrFlags) -> io::Result<()> {
let attrs = MountAttr { attr_set: flags.bits(), ..Default::default() };
let res = unsafe {
libc::syscall(
libc::SYS_mount_setattr,
libc::AT_FDCWD,
mount.as_ptr(),
libc::AT_RECURSIVE,
&attrs as *const _,
mem::size_of::<MountAttr>(),
)
};
if res == 0 {
Ok(())
} else {
Err(IoError::last_os_error())
}
}
fn deny_mount_propagation() -> io::Result<()> {
let flags = MountFlags::PRIVATE | MountFlags::RECURSIVE;
let root = CString::new("/").unwrap();
let res =
unsafe { libc::mount(ptr::null(), root.as_ptr(), ptr::null(), flags.bits(), ptr::null()) };
if res == 0 {
Ok(())
} else {
Err(IoError::last_os_error())
}
}
fn pivot_root(new_root: &CStr, put_old: &CStr) -> io::Result<()> {
let working_dir = env::current_dir().unwrap_or_else(|_| PathBuf::from("/"));
let result =
unsafe { libc::syscall(libc::SYS_pivot_root, new_root.as_ptr(), put_old.as_ptr()) };
if result != 0 {
return Err(IoError::last_os_error());
}
if env::set_current_dir(working_dir).is_err() {
env::set_current_dir("/")?;
}
Ok(())
}
fn umount(target: &CStr) -> io::Result<()> {
let result = unsafe { libc::umount2(target.as_ptr(), libc::MNT_DETACH) };
match result {
0 => Ok(()),
_ => Err(IoError::last_os_error()),
}
}
pub fn create_user_namespace(
child_uid: u32,
child_gid: u32,
extra_namespaces: Namespaces,
) -> io::Result<()> {
let parent_euid = unsafe { libc::geteuid() };
let parent_egid = unsafe { libc::getegid() };
unshare(Namespaces::USER | extra_namespaces)?;
map_ids(parent_euid, parent_egid, child_uid, child_gid)?;
Ok(())
}
pub fn map_ids(
parent_euid: u32,
parent_egid: u32,
child_uid: u32,
child_gid: u32,
) -> io::Result<()> {
let uid_map = format!("{child_uid} {parent_euid} 1\n");
let gid_map = format!("{child_gid} {parent_egid} 1\n");
fs::write("/proc/self/uid_map", uid_map.as_bytes())?;
fs::write("/proc/self/setgroups", b"deny")?;
fs::write("/proc/self/gid_map", gid_map.as_bytes())?;
Ok(())
}
fn unshare(namespaces: Namespaces) -> io::Result<()> {
let result = unsafe { libc::unshare(namespaces.bits()) };
match result {
0 => Ok(()),
_ => Err(IoError::last_os_error()),
}
}
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct MountFlags: libc::c_ulong {
const NOSUID = libc::MS_NOSUID;
const NODEV = libc::MS_NODEV;
const NOEXEC = libc::MS_NOEXEC;
const BIND = libc::MS_BIND;
const RECURSIVE = libc::MS_REC;
const PRIVATE = libc::MS_PRIVATE;
const NOSYMFOLLOW = 256;
}
}
#[repr(C)]
#[derive(Default)]
struct MountAttr {
attr_set: u64,
attr_clr: u64,
propagation: u64,
userns_fd: u64,
}
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct MountAttrFlags: u64 {
const RDONLY = 0x00000001;
const NOSUID = 0x00000002;
const NODEV = 0x00000004;
const NOEXEC = 0x00000008;
const _ATIME = 0x00000070;
const RELATI = 0x00000000;
const NOATIM = 0x00000010;
const STRICTATIME = 0x00000020;
const NODIRATIME = 0x00000080;
const IDMAP = 0x00100000;
const NOSYMFOLLOW = 0x00200000;
}
}
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Namespaces: libc::c_int {
const FILES = libc::CLONE_FILES;
const FS = libc::CLONE_FS;
const CGROUP = libc::CLONE_NEWCGROUP;
const IPC = libc::CLONE_NEWIPC;
const NETWORK = libc::CLONE_NEWNET;
const MOUNT = libc::CLONE_NEWNS;
const PID = libc::CLONE_NEWPID;
const TIME = 0x80;
const USER = libc::CLONE_NEWUSER;
const UTS = libc::CLONE_NEWUTS;
const SYSVSEM = libc::CLONE_SYSVSEM;
}
}