use std::collections::HashMap;
use std::ffi::CString;
use std::io::{Error as IoError, ErrorKind as IoErrorKind};
use std::os::fd::OwnedFd;
use std::os::unix::ffi::OsStrExt;
use std::path::{Component, Path, PathBuf};
use std::{env, fs, io, ptr};
use rustix::pipe::pipe;
use rustix::process::{Gid, Pid, Uid, WaitOptions};
use crate::error::{Error, Result};
use crate::linux::namespaces::{MountAttrFlags, Namespaces};
use crate::linux::seccomp::SyscallFilter;
use crate::{Child, Command, Exception, Sandbox};
mod namespaces;
mod seccomp;
#[derive(Default)]
pub struct LinuxSandbox {
env_exceptions: Vec<String>,
path_exceptions: PathExceptions,
allow_networking: bool,
full_env: bool,
}
impl Sandbox for LinuxSandbox {
fn new() -> Self {
Self::default()
}
fn add_exception(&mut self, exception: Exception) -> Result<&mut Self> {
match exception {
Exception::Read(path) => self.path_exceptions.update(path, false, false)?,
Exception::WriteAndRead(path) => self.path_exceptions.update(path, true, false)?,
Exception::ExecuteAndRead(path) => self.path_exceptions.update(path, false, true)?,
Exception::Environment(key) => self.env_exceptions.push(key),
Exception::FullEnvironment => self.full_env = true,
Exception::Networking => self.allow_networking = true,
}
Ok(self)
}
fn spawn(self, sandboxee: Command) -> Result<Child> {
assert!(
thread_count().unwrap_or(0) == 1,
"`Sandbox::spawn` must be called from a single-threaded process"
);
if !self.full_env {
crate::restrict_env_variables(&self.env_exceptions);
}
let stdin_pipe = sandboxee.stdin.make_pipe(true)?;
let stdout_pipe = sandboxee.stdout.make_pipe(false)?;
let stderr_pipe = sandboxee.stderr.make_pipe(false)?;
let exit_signal_pipe = pipe().map_err(IoError::from)?;
let allow_networking = self.allow_networking;
let init_arg = ProcessInitArg::new(
self,
sandboxee,
exit_signal_pipe,
stdin_pipe,
stdout_pipe,
stderr_pipe,
);
let init_arg = spawn_sandbox_init(init_arg, allow_networking)?;
let (pid, stdin_tx, stdout_rx, stderr_rx, exit_signal_rx) = {
let ProcessInitArg {
pid,
stdin_tx,
stdout_rx,
stderr_rx,
exit_signal_rx,
path_exceptions: _x0,
exit_signal_tx: _x1,
parent_euid: _x2,
parent_egid: _x3,
stdout_tx: _x4,
stderr_tx: _x5,
sandboxee: _x6,
stdin_rx: _x7,
} = init_arg;
(pid, stdin_tx, stdout_rx, stderr_rx, exit_signal_rx)
};
let child = Child::new(pid, exit_signal_rx, stdin_tx, stdout_rx, stderr_rx)?;
Ok(child)
}
}
fn spawn_sandbox_init(init_arg: ProcessInitArg, allow_networking: bool) -> Result<ProcessInitArg> {
unsafe {
let stack_size = 1024 * 1024;
let child_stack = libc::mmap(
ptr::null_mut(),
stack_size,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_STACK,
-1,
0,
);
if child_stack == libc::MAP_FAILED {
return Err(IoError::last_os_error().into());
}
let stack_top = child_stack.add(stack_size);
let mut flags =
libc::CLONE_NEWIPC | libc::CLONE_NEWNS | libc::CLONE_NEWPID | libc::CLONE_NEWUSER;
if !allow_networking {
flags |= libc::CLONE_NEWNET;
}
let init_arg_raw = Box::into_raw(Box::new(init_arg));
let init_pid =
libc::clone(sandbox_init, stack_top, flags | libc::SIGCHLD, init_arg_raw as _);
if init_pid == -1 {
Err(IoError::last_os_error().into())
} else {
let mut init_arg = Box::from_raw(init_arg_raw);
init_arg.pid = init_pid;
Ok(*init_arg)
}
}
}
extern "C" fn sandbox_init(arg: *mut libc::c_void) -> libc::c_int {
let init_arg: Box<ProcessInitArg> = unsafe { Box::from_raw(arg as _) };
match sandbox_init_inner(*init_arg) {
Ok(exit_code) => exit_code,
Err(err) => {
eprintln!("sandboxing failure: {err}");
1
},
}
}
fn sandbox_init_inner(mut init_arg: ProcessInitArg) -> io::Result<libc::c_int> {
init_arg.stdin_tx.take();
init_arg.stdout_rx.take();
init_arg.stderr_rx.take();
drop(init_arg.exit_signal_rx);
if let Some(stdin_pipe) = &mut init_arg.stdin_rx {
rustix::stdio::dup2_stdin(stdin_pipe)?;
}
if let Some(stdout_pipe) = &init_arg.stdout_tx {
rustix::stdio::dup2_stdout(stdout_pipe)?;
}
if let Some(stderr_pipe) = &init_arg.stderr_tx {
rustix::stdio::dup2_stderr(stderr_pipe)?;
}
namespaces::map_ids(init_arg.parent_euid.as_raw(), init_arg.parent_egid.as_raw(), 0, 0)?;
namespaces::setup_mount_namespace(init_arg.path_exceptions)?;
let new_proc_c = CString::new("/proc")?;
namespaces::mount_proc(&new_proc_c)?;
namespaces::create_user_namespace(
init_arg.parent_euid.as_raw(),
init_arg.parent_egid.as_raw(),
Namespaces::empty(),
)?;
SyscallFilter::apply().map_err(|err| IoError::new(IoErrorKind::Other, err))?;
rustix::thread::set_no_new_privs(true)?;
let mut std_command = std::process::Command::from(init_arg.sandboxee);
std_command.stdin(std::process::Stdio::inherit());
std_command.stdout(std::process::Stdio::inherit());
std_command.stderr(std::process::Stdio::inherit());
let child = std_command.spawn()?;
let child_pid = Pid::from_raw(child.id() as i32);
loop {
match rustix::process::wait(WaitOptions::empty())? {
Some((pid, status)) if Some(pid) == child_pid => match status.terminating_signal() {
Some(signal) => {
rustix::io::write(init_arg.exit_signal_tx, &signal.to_le_bytes())?;
return Ok(1);
},
None => return Ok(status.exit_status().unwrap_or(1) as i32),
},
Some(_) => (),
None => unreachable!("none without nohang"),
}
}
}
struct ProcessInitArg {
path_exceptions: PathExceptions,
sandboxee: Command,
parent_euid: Uid,
parent_egid: Gid,
stdin_rx: Option<OwnedFd>,
stdout_tx: Option<OwnedFd>,
stderr_tx: Option<OwnedFd>,
exit_signal_tx: OwnedFd,
stdin_tx: Option<OwnedFd>,
stdout_rx: Option<OwnedFd>,
stderr_rx: Option<OwnedFd>,
exit_signal_rx: OwnedFd,
pid: i32,
}
impl ProcessInitArg {
fn new(
sandbox: LinuxSandbox,
sandboxee: Command,
exit_signal: (OwnedFd, OwnedFd),
stdin: (Option<OwnedFd>, Option<OwnedFd>),
stdout: (Option<OwnedFd>, Option<OwnedFd>),
stderr: (Option<OwnedFd>, Option<OwnedFd>),
) -> Self {
let parent_euid = rustix::process::geteuid();
let parent_egid = rustix::process::getegid();
Self {
parent_euid,
parent_egid,
sandboxee,
path_exceptions: sandbox.path_exceptions,
stdin_rx: stdin.0,
stdout_tx: stdout.1,
stderr_tx: stderr.1,
exit_signal_tx: exit_signal.1,
stdin_tx: stdin.1,
stdout_rx: stdout.0,
stderr_rx: stderr.0,
exit_signal_rx: exit_signal.0,
pid: -1,
}
}
}
#[derive(Default)]
pub(crate) struct PathExceptions {
bind_mounts: HashMap<PathBuf, MountAttrFlags>,
symlinks: Vec<(PathBuf, PathBuf)>,
}
impl PathExceptions {
fn update(&mut self, path: PathBuf, write: bool, execute: bool) -> Result<()> {
let canonical_path = match path.canonicalize() {
Ok(path) => path,
Err(_) => return Err(Error::InvalidPath(path)),
};
if path_has_symlinks(&path) {
let absolute = absolute(&path)?;
let normalized = normalize_path(&absolute);
self.symlinks.push((normalized, canonical_path.clone()));
}
let flags = self
.bind_mounts
.entry(canonical_path)
.or_insert(MountAttrFlags::RDONLY | MountAttrFlags::NOEXEC);
if write {
flags.remove(MountAttrFlags::RDONLY);
}
if execute {
flags.remove(MountAttrFlags::NOEXEC);
}
Ok(())
}
}
fn absolute(path: &Path) -> io::Result<PathBuf> {
let mut components = path.strip_prefix(".").unwrap_or(path).components();
let path_os = path.as_os_str().as_bytes();
let mut normalized = if path.is_absolute() {
if path_os.starts_with(b"//") && !path_os.starts_with(b"///") {
components.next();
PathBuf::from("//")
} else {
PathBuf::new()
}
} else {
env::current_dir()?
};
normalized.extend(components);
if path_os.ends_with(b"/") {
normalized.push("");
}
Ok(normalized)
}
fn normalize_path(path: &Path) -> PathBuf {
let mut normalized = PathBuf::new();
for component in path.components() {
match component {
Component::Prefix(_) => unreachable!("impl does not consider windows"),
Component::RootDir => normalized.push("/"),
Component::CurDir => continue,
Component::ParentDir => {
normalized.pop();
},
Component::Normal(segment) => normalized.push(segment),
}
}
normalized
}
fn path_has_symlinks(path: &Path) -> bool {
path.ancestors().any(|path| path.read_link().is_ok())
}
fn thread_count() -> io::Result<usize> {
let status = fs::read_to_string("/proc/self/status")?;
let (_, threads_start) = status.split_once("Threads:").ok_or_else(|| {
io::Error::new(io::ErrorKind::InvalidData, "/proc/self/status missing \"Threads:\"")
})?;
let thread_count = threads_start.split_whitespace().next().ok_or_else(|| {
io::Error::new(io::ErrorKind::InvalidData, "/proc/self/status output malformed")
})?;
let thread_count = thread_count
.parse::<usize>()
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
Ok(thread_count)
}