use std::{
borrow::Cow,
env,
ffi::{CStr, OsStr},
net::{IpAddr, SocketAddrV4, SocketAddrV6},
os::{
fd::{AsFd, AsRawFd},
unix::{ffi::OsStrExt, net::UnixStream, process::CommandExt},
},
process::{Command, Stdio},
};
use libc::setdomainname;
use memchr::arch::all::is_equal;
use nix::{
errno::Errno,
fcntl::{AtFlags, OFlag, AT_FDCWD},
mount::{umount2, MntFlags, MsFlags},
sys::{
signal::Signal,
socket::{bind, socket, SockFlag, SockaddrIn, SockaddrIn6},
stat::{mkdirat, mknodat, umask, Mode, SFlag},
},
time::{clock_gettime, ClockId},
unistd::{chdir, fchdir, linkat, pivot_root, sethostname, setsid, symlinkat, Gid, Pid, Uid},
NixPath,
};
use crate::{
caps::{securebits::set_keepcaps, CapSet},
compat::{
openat2, set_name, set_no_new_privs, set_pdeathsig, try_from_bytes, ResolveFlag, SockType,
},
config::{HAVE_NAMESPACED_PID_MAX, LANDLOCK_ABI, NPROC},
confine::{confine_landlock_scope, safe_drop_caps},
err::{err2no, SydResult},
error,
fd::{is_dev_null, pidfd_open, send_with_fd, set_cloexec, SafeOwnedFd, AT_BADFD},
info,
landlock::{AccessFs, AccessNet},
log_enabled,
lookup::{file_type, safe_mkdir_all, safe_open_how},
mount::{
api::MountAttrFlags,
util::{mount_bind, mount_fs, set_root_mount_propagation},
},
path::{XPath, XPathBuf},
proc::{proc_map_user, proc_pid_max, proc_set_time},
retry::retry_on_eintr,
sandbox::MountOp,
syslog::LogLevel,
warn,
};
pub fn ns_setup_user<Fd: AsFd>(fd_proc: Fd, uid: Uid, gid: Gid, map_root: bool) -> SydResult<()> {
proc_map_user(fd_proc, uid, gid, map_root).inspect_err(|errno| {
error!("ctx": "setup_user_namespace", "op": "map_user",
"uid": uid.as_raw(), "gid": gid.as_raw(),
"map_root": map_root, "err": *errno as i32,
"msg": format!("set up uid/gid mapping for user namespace failed: {errno}"),
"tip": "configure your system to allow user namespaces");
})?;
set_keepcaps(true)?;
let permitted_caps = crate::caps::read(None, CapSet::Permitted)?;
crate::caps::set(None, CapSet::Inheritable, permitted_caps)?;
for flag in permitted_caps {
let cap = flag.try_into()?;
crate::caps::raise(None, CapSet::Ambient, cap)?;
}
Ok(())
}
pub fn ns_setup_time<Fd: AsFd>(
fd_proc: Fd,
boottime: Option<i64>,
monotime: Option<i64>,
) -> SydResult<()> {
let boffset = if let Some(boffset) = boottime {
boffset
} else {
let btime = clock_gettime(ClockId::CLOCK_BOOTTIME)?;
#[expect(clippy::useless_conversion)]
btime.tv_sec().checked_neg().ok_or(Errno::EOVERFLOW)?.into()
};
let moffset = if let Some(moffset) = monotime {
moffset
} else {
let mtime = clock_gettime(ClockId::CLOCK_MONOTONIC)?;
#[expect(clippy::useless_conversion)]
mtime.tv_sec().checked_neg().ok_or(Errno::EOVERFLOW)?.into()
};
if boffset != 0 || moffset != 0 {
proc_set_time(fd_proc, boffset, moffset).inspect_err(|errno| {
error!("ctx": "setup_time_namespace", "op": "set_boot_time",
"err": *errno as i32,
"msg": format!("set boot time in time namespace failed: {errno}"),
"tip": "configure your system to allow unprivileged user namespaces");
})?;
info!("ctx": "setup_time_namespace", "op": "set_boot_time",
"timens_offsets": [moffset, boffset],
"msg": "set boot time in time namespace");
}
Ok(())
}
pub fn ns_setup_uts(hostname: Option<&CStr>, domainname: Option<&CStr>) -> SydResult<()> {
if let Some(domainname) = domainname {
let domainsize = domainname.to_bytes().len(); let domainname = domainname.as_ptr().cast();
Errno::result(unsafe { setdomainname(domainname, domainsize) }).inspect_err(|errno| {
error!("ctx": "setup_uts_namespace", "op": "set_domain_name",
"msg": format!("set NIS/YP domain name failed: {errno}"),
"tip": "configure `uts/domain:none'",
"err": *errno as i32);
})?;
}
if let Some(hostname) = hostname.as_ref() {
let hostname = OsStr::from_bytes(hostname.to_bytes());
sethostname(hostname).inspect_err(|errno| {
error!("ctx": "setup_uts_namespace", "op": "set_host_name",
"msg": format!("set host name failed: {errno}"),
"tip": "configure `uts/host:none'",
"err": *errno as i32);
})?;
}
Ok(())
}
pub fn ns_setup_pid<Fd: AsFd>(fd_proc: Fd, pid_max: u64) -> SydResult<()> {
if !*HAVE_NAMESPACED_PID_MAX {
return Ok(());
}
const PIDS_PER_CPU_MIN: u64 = 8;
const RESERVED_PIDS: u64 = if cfg!(target_arch = "s390x") {
512
} else {
301
};
#[expect(clippy::arithmetic_side_effects)]
let pid_max = pid_max
.max(RESERVED_PIDS)
.max(PIDS_PER_CPU_MIN * (*NPROC as u64));
proc_pid_max(fd_proc, pid_max)?;
info!("ctx": "setup_pid_namespace", "op": "set_pid_max", "max": pid_max,
"msg": format!("set namespaced kernel.pid_max sysctl to {pid_max}"));
Ok(())
}
pub fn ns_setup_mnt(
root: Option<&XPath>,
mnt_ops: Option<&[MountOp]>,
workdir: Option<&XPath>,
restrict_proc_files: bool,
) -> Result<(), Errno> {
let mymask = umask(Mode::S_IRWXG | Mode::S_IRWXO);
let result = do_setup_mnt(root, mnt_ops, workdir, restrict_proc_files);
umask(mymask);
result
}
#[expect(clippy::cognitive_complexity)]
fn do_setup_mnt(
root: Option<&XPath>,
mnt_ops: Option<&[MountOp]>,
workdir: Option<&XPath>,
restrict_proc_files: bool,
) -> Result<(), Errno> {
let how = safe_open_how(OFlag::O_PATH | OFlag::O_NOFOLLOW, ResolveFlag::empty())
.resolve(ResolveFlag::RESOLVE_NO_MAGICLINKS | ResolveFlag::RESOLVE_NO_SYMLINKS);
let how_dir =
how.flags(OFlag::O_CLOEXEC | OFlag::O_PATH | OFlag::O_NOFOLLOW | OFlag::O_DIRECTORY);
let how_xdev = safe_open_how(
OFlag::O_PATH | OFlag::O_NOFOLLOW,
ResolveFlag::RESOLVE_NO_XDEV,
);
let how_xdev_dir =
how_xdev.flags(OFlag::O_CLOEXEC | OFlag::O_PATH | OFlag::O_NOFOLLOW | OFlag::O_DIRECTORY);
let how_xdev_new = how_xdev
.flags(OFlag::O_CLOEXEC | OFlag::O_RDONLY | OFlag::O_CREAT | OFlag::O_EXCL)
.mode(Mode::S_IRUSR);
set_root_mount_propagation(MsFlags::MS_PRIVATE)?;
let (root_fd, root_is_tmpfs) = if let Some(root) = root {
let root_tmp = if root.is_equal(b"ramfs") {
Some("ramfs")
} else if root.is_equal(b"tmpfs") {
Some("tmpfs")
} else {
None
};
let root = if root_tmp.is_some() {
let mut pfd = XPathBuf::from("/proc");
pfd.push_pid(Pid::this());
pfd.push(b"fdinfo");
Cow::Owned(pfd)
} else {
Cow::Borrowed(root)
};
#[expect(clippy::disallowed_methods)]
let mut fd = retry_on_eintr(|| openat2(AT_BADFD, root.as_ref(), how_dir))?;
#[expect(clippy::disallowed_methods)]
if let Some(root_tmp) = root_tmp {
mount_fs(
OsStr::new(root_tmp),
fd,
MountAttrFlags::MOUNT_ATTR_NOSUID
| MountAttrFlags::MOUNT_ATTR_NODEV
| MountAttrFlags::MOUNT_ATTR_NOEXEC
| MountAttrFlags::MOUNT_ATTR_NOATIME
| MountAttrFlags::MOUNT_ATTR_NOSYMFOLLOW,
Some("mode=700"),
)?;
fd = retry_on_eintr(|| openat2(AT_BADFD, root.as_ref(), how_dir))?;
retry_on_eintr(|| fchdir(&fd))?;
retry_on_eintr(|| mkdirat(&fd, "dev", Mode::S_IRWXU))?;
retry_on_eintr(|| mkdirat(&fd, "proc", Mode::S_IRWXU))?;
} else {
mount_bind(&fd, &fd, MountAttrFlags::empty())?;
drop(fd);
fd = retry_on_eintr(|| openat2(AT_BADFD, root.as_ref(), how_dir))?;
#[expect(clippy::disallowed_methods)]
let fd = retry_on_eintr(|| openat2(&fd, "dev", how_dir))?;
mount_fs(
OsStr::new("tmpfs"),
fd,
MountAttrFlags::MOUNT_ATTR_NOSUID
| MountAttrFlags::MOUNT_ATTR_NOEXEC
| MountAttrFlags::MOUNT_ATTR_NOATIME
| MountAttrFlags::MOUNT_ATTR_NOSYMFOLLOW,
Some("mode=700"),
)?;
}
(Some(fd), root_tmp.is_some())
} else {
(None, false)
};
if let Some(mnt_ops) = mnt_ops {
for mnt_op in mnt_ops {
match mnt_op {
MountOp::Bind { .. } => ns_setup_bind(mnt_op, root_fd.as_ref(), root_is_tmpfs)?,
MountOp::Link { sym: false, .. } if root_is_tmpfs => {
#[expect(clippy::disallowed_methods)]
ns_setup_link(mnt_op, root_fd.as_ref().unwrap())?
}
MountOp::Link { sym: true, .. } if root_is_tmpfs => {
#[expect(clippy::disallowed_methods)]
ns_setup_symlink(mnt_op, root_fd.as_ref().unwrap())?
}
MountOp::MakeNod { .. } if root_is_tmpfs => {
#[expect(clippy::disallowed_methods)]
ns_setup_mknod(mnt_op, root_fd.as_ref().unwrap())?
}
MountOp::Link { sym: false, .. } => {
warn!("ctx": "setup_mount_namespace", "op": "make_link",
"mnt": mnt_op, "err": Errno::EOPNOTSUPP as i32,
"msg": format!("skip create link `{mnt_op}' due to root isn't tmpfs"));
}
MountOp::Link { sym: true, .. } => {
warn!("ctx": "setup_mount_namespace", "op": "make_symlink",
"mnt": mnt_op, "err": Errno::EOPNOTSUPP as i32,
"msg": format!("skip create symlink `{mnt_op}' due to root isn't tmpfs"));
}
MountOp::MakeNod { .. } => {
warn!("ctx": "setup_mount_namespace", "op": "make_node",
"mnt": mnt_op, "err": Errno::EOPNOTSUPP as i32,
"msg": format!("skip create node `{mnt_op}' due to root isn't tmpfs"));
}
}
}
}
#[expect(clippy::disallowed_methods)]
let proc_fd = if let Some(ref root_fd) = root_fd {
retry_on_eintr(|| openat2(root_fd, "proc", how_xdev_dir))
} else {
retry_on_eintr(|| openat2(AT_BADFD, "/proc", how_dir))
}?;
let flags = MountAttrFlags::MOUNT_ATTR_NOSUID
| MountAttrFlags::MOUNT_ATTR_NOEXEC
| MountAttrFlags::MOUNT_ATTR_NODEV;
let mut opts = "hidepid=4".to_string();
if restrict_proc_files {
opts.push_str(",subset=pid");
}
mount_fs(OsStr::new("proc"), proc_fd, flags, Some(opts.as_str()))?;
if log_enabled!(LogLevel::Info) {
let mnt = MountOp::Bind {
src: "proc".into(),
dst: "/proc".into(),
opt: flags,
dat: Some(opts),
r#try: false,
};
info!("ctx": "setup_mount_namespace", "op": "mount_procfs", "mnt": &mnt,
"msg": format!("mounted procfs `{mnt}'"));
}
if let Some(ref root_fd) = root_fd {
#[expect(clippy::disallowed_methods)]
let src_fd = retry_on_eintr(|| openat2(AT_BADFD, "/dev/null", how))?;
if !is_dev_null(&src_fd).unwrap_or(false) {
let errno = Errno::ENODEV;
error!("ctx": "setup_mount_namespace", "op": "null_mount", "err": errno as i32,
"msg": format!("/dev/null is not a character device"));
return Err(errno);
}
#[expect(clippy::disallowed_methods)]
let dev_fd = retry_on_eintr(|| {
openat2(
root_fd,
"dev",
if root_is_tmpfs { how_xdev_dir } else { how_dir },
)
})?;
#[expect(clippy::disallowed_methods)]
let dst_fd = retry_on_eintr(|| openat2(&dev_fd, "null", how_xdev_new))?;
mount_bind(src_fd, dst_fd, MountAttrFlags::empty())?;
let _ = retry_on_eintr(|| symlinkat("/proc/thread-self/fd/0", &dev_fd, "stdin"));
let _ = retry_on_eintr(|| symlinkat("/proc/thread-self/fd/1", &dev_fd, "stdout"));
let _ = retry_on_eintr(|| symlinkat("/proc/thread-self/fd/2", &dev_fd, "stderr"));
drop(dev_fd);
retry_on_eintr(|| fchdir(root_fd))?;
retry_on_eintr(|| pivot_root(".", "."))?;
retry_on_eintr(|| umount2(".", MntFlags::MNT_DETACH))?;
retry_on_eintr(|| chdir("/"))?;
if let Some(workdir) = workdir {
#[expect(clippy::disallowed_methods)]
retry_on_eintr(|| openat2(AT_BADFD, workdir, how_dir))
.and_then(|fd| retry_on_eintr(|| fchdir(&fd)))?;
}
}
Ok(())
}
#[expect(clippy::cognitive_complexity)]
fn ns_setup_bind(
bind_op: &MountOp,
root_fd: Option<&SafeOwnedFd>,
root_is_tmpfs: bool,
) -> Result<(), Errno> {
let how = safe_open_how(OFlag::O_PATH | OFlag::O_NOFOLLOW, ResolveFlag::empty())
.resolve(ResolveFlag::RESOLVE_NO_MAGICLINKS | ResolveFlag::RESOLVE_NO_SYMLINKS);
let how_dir =
how.flags(OFlag::O_CLOEXEC | OFlag::O_PATH | OFlag::O_NOFOLLOW | OFlag::O_DIRECTORY);
let how_xdev = safe_open_how(
OFlag::O_PATH | OFlag::O_NOFOLLOW,
ResolveFlag::RESOLVE_NO_XDEV,
);
let how_xdev_dir =
how_xdev.flags(OFlag::O_CLOEXEC | OFlag::O_PATH | OFlag::O_NOFOLLOW | OFlag::O_DIRECTORY);
let (src, dst, opt, dat, r#try) = if let MountOp::Bind {
src,
dst,
opt,
dat,
r#try,
} = bind_op
{
(src, dst, opt, dat, r#try)
} else {
panic!("ns_setup_bind without MountOp::Bind")
};
if root_fd.is_some() && dst.is_root() {
let errno = Errno::EINVAL;
error!("ctx": "setup_mount_namespace", "op": "spec_mount",
"mnt": bind_op, "err": errno as i32,
"msg": format!("mount over rootfs is not permitted: {errno}"));
return Err(errno);
}
if dst.is_relative() || src.has_parent_dot() || dst.has_parent_dot() {
return Err(Errno::EINVAL);
}
if src.is_equal(b"proc") || src.is_procfs() || src.is_proc() {
return Err(Errno::EPERM);
}
if src.is_relative() {
let dst_fd = if let Some(root_fd) = root_fd {
let mut dst = dst.clone();
dst.clean_consecutive_slashes();
dst.remove(0); if !dst.is_relative() {
return Err(Errno::EINVAL);
}
if root_is_tmpfs && !dst.is_empty() {
safe_mkdir_all(root_fd, &dst)
} else {
#[expect(clippy::disallowed_methods)]
retry_on_eintr(|| openat2(root_fd, &dst, how_xdev_dir))
}
} else {
#[expect(clippy::disallowed_methods)]
retry_on_eintr(|| openat2(AT_BADFD, dst, how_dir))
}?;
match mount_fs(src.as_os_str(), dst_fd, *opt, dat.as_deref()) {
Ok(_) => {
info!("ctx": "setup_mount_namespace", "op": "spec_mount", "mnt": bind_op,
"msg": format!("mounted special-fs `{bind_op}'"));
}
Err(errno) => {
error!("ctx": "setup_mount_namespace", "op": "spec_mount",
"mnt": bind_op, "err": errno as i32,
"msg": format!("mount special-fs `{bind_op}' failed: {errno}"));
return Err(errno);
}
}
} else {
#[expect(clippy::disallowed_methods)]
let src_fd = match retry_on_eintr(|| openat2(AT_BADFD, src, how)) {
Ok(fd) => fd,
Err(Errno::ENOENT) if *r#try => {
warn!("ctx": "setup_mount_namespace", "op": "bind_mount",
"mnt": bind_op, "err": Errno::ENOENT as i32,
"msg": format!("skip non-existent file `{bind_op}' due to bind-try"));
return Ok(());
}
Err(errno) => return Err(errno),
};
let is_dir = file_type(&src_fd, None, false)?.is_dir();
let dst_fd = if let Some(root_fd) = root_fd {
let mut dst = dst.clone();
dst.clean_consecutive_slashes();
dst.remove(0); if !dst.is_relative() {
return Err(Errno::EINVAL);
}
let empty = dst.is_empty();
if root_is_tmpfs && !empty {
if is_dir {
safe_mkdir_all(root_fd, &dst).map(drop)?;
} else {
let (parent, base) = dst.split();
if base.is_empty() || base.is_root() {
retry_on_eintr(|| mknodat(root_fd, base, SFlag::S_IFREG, Mode::S_IRUSR, 0))
} else {
safe_mkdir_all(root_fd, parent).and_then(|fd| {
retry_on_eintr(|| mknodat(&fd, base, SFlag::S_IFREG, Mode::S_IRUSR, 0))
})
}?;
}
}
if dst.is_empty() {
dst.append_byte(b'.');
}
#[expect(clippy::disallowed_methods)]
retry_on_eintr(|| openat2(root_fd, &dst, if is_dir { how_xdev_dir } else { how_xdev }))
} else {
#[expect(clippy::disallowed_methods)]
retry_on_eintr(|| openat2(AT_BADFD, dst, if is_dir { how_dir } else { how }))
}?;
match mount_bind(src_fd, dst_fd, *opt) {
Ok(_) => {
info!("ctx": "setup_mount_namespace", "op": "bind_mount", "mnt": bind_op,
"msg": format!("bind mounted `{bind_op}'"));
}
Err(errno) => {
error!("ctx": "setup_mount_namespace", "op": "bind_mount",
"mnt": bind_op, "err": errno as i32,
"msg": format!("bind mount `{bind_op}' failed: {errno}"));
return Err(errno);
}
}
}
Ok(())
}
#[expect(clippy::cognitive_complexity)]
fn ns_setup_link(link_op: &MountOp, root_fd: &SafeOwnedFd) -> Result<(), Errno> {
let how = safe_open_how(
OFlag::O_PATH | OFlag::O_NOFOLLOW,
ResolveFlag::RESOLVE_NO_XDEV,
);
let (dst, mut src, sym, r#try) = if let MountOp::Link {
dst,
src,
sym,
r#try,
} = link_op
{
(dst, src.clone(), *sym, *r#try)
} else {
panic!("ns_setup_link without MountOp::Link")
};
assert!(!sym, "ns_setup_link with symbolic link");
if src.is_relative() || src.has_parent_dot() {
return Err(Errno::EINVAL);
}
if dst.is_relative() || dst.has_parent_dot() {
return Err(Errno::EINVAL);
}
src.clean_consecutive_slashes();
src.remove(0); if !src.is_relative() {
return Err(Errno::EINVAL);
}
#[expect(clippy::disallowed_methods)]
let src_fd = retry_on_eintr(|| openat2(root_fd, &src, how))?;
let mut src = XPathBuf::from("/proc/thread-self/fd");
src.push_fd(src_fd.as_raw_fd());
let (parent, base) = dst.split();
if base.is_dot() {
return Err(Errno::EINVAL);
}
let result = if base.is_empty() || base.is_root() {
if r#try {
Ok(())
} else {
Err(Errno::EEXIST)
}
} else if parent.is_empty() || parent.is_root() {
retry_on_eintr(|| linkat(AT_FDCWD, &src, root_fd, base, AtFlags::AT_SYMLINK_FOLLOW))
} else {
let mut parent = parent.to_owned();
parent.clean_consecutive_slashes();
parent.remove(0); if !parent.is_relative() {
return Err(Errno::EINVAL);
}
let parent_fd = safe_mkdir_all(root_fd, &parent)?;
retry_on_eintr(|| linkat(AT_FDCWD, &src, &parent_fd, base, AtFlags::AT_SYMLINK_FOLLOW))
};
match result {
Ok(_) => {
info!("ctx": "setup_mount_namespace", "op": "make_link", "mnt": link_op,
"msg": format!("created link `{link_op}'"));
Ok(())
}
Err(Errno::EEXIST) if r#try => {
warn!("ctx": "setup_mount_namespace", "op": "make_link",
"mnt": link_op, "err": Errno::EEXIST as i32,
"msg": format!("skip existing link `{link_op}' due to try"));
Ok(())
}
Err(errno) => {
error!("ctx": "setup_mount_namespace", "op": "make_link",
"mnt": link_op, "err": errno as i32,
"msg": format!("create link `{link_op}' failed: {errno}"));
Err(errno)
}
}
}
#[expect(clippy::cognitive_complexity)]
fn ns_setup_symlink(link_op: &MountOp, root_fd: &SafeOwnedFd) -> Result<(), Errno> {
let (dst, src, sym, r#try) = if let MountOp::Link {
dst,
src,
sym,
r#try,
} = link_op
{
(dst, src, *sym, *r#try)
} else {
panic!("ns_setup_link without MountOp::Link")
};
assert!(sym, "ns_setup_link with hard link");
if dst.is_relative() || dst.has_parent_dot() {
return Err(Errno::EINVAL);
}
if src.has_parent_dot() {
return Err(Errno::EINVAL);
}
let (parent, base) = dst.split();
if base.is_dot() {
return Err(Errno::EINVAL);
}
let result = if base.is_empty() || base.is_root() {
if r#try {
Ok(())
} else {
Err(Errno::EEXIST)
}
} else if parent.is_empty() || parent.is_root() {
retry_on_eintr(|| symlinkat(src, root_fd, base))
} else {
let mut parent = parent.to_owned();
parent.clean_consecutive_slashes();
parent.remove(0); if !parent.is_relative() {
return Err(Errno::EINVAL);
}
let parent_fd = safe_mkdir_all(root_fd, &parent)?;
retry_on_eintr(|| symlinkat(src, &parent_fd, base))
};
match result {
Ok(_) => {
info!("ctx": "setup_mount_namespace", "op": "make_symlink", "mnt": link_op,
"msg": format!("created symlink `{link_op}'"));
Ok(())
}
Err(Errno::EEXIST) if r#try => {
warn!("ctx": "setup_mount_namespace", "op": "make_symlink",
"mnt": link_op, "err": Errno::EEXIST as i32,
"msg": format!("skip existing symlink `{link_op}' due to try"));
Ok(())
}
Err(errno) => {
error!("ctx": "setup_mount_namespace", "op": "make_symlink",
"mnt": link_op, "err": errno as i32,
"msg": format!("create symlink `{link_op}' failed: {errno}"));
Err(errno)
}
}
}
#[expect(clippy::cognitive_complexity)]
fn ns_setup_mknod(file_op: &MountOp, root_fd: &SafeOwnedFd) -> Result<(), Errno> {
let (kind, path, mut mode, r#try) = if let MountOp::MakeNod {
kind,
path,
mode,
r#try,
} = file_op
{
(*kind, path, *mode, *r#try)
} else {
panic!("ns_setup_mknod without MountOp::MakeNod")
};
mode.remove(Mode::S_IRWXG | Mode::S_IRWXO | Mode::S_ISUID | Mode::S_ISGID);
if !matches!(kind, SFlag::S_IFREG | SFlag::S_IFDIR | SFlag::S_IFIFO) {
return Err(Errno::EINVAL);
}
if path.is_relative() || path.has_parent_dot() {
return Err(Errno::EINVAL);
}
let (parent, base) = path.split();
if base.is_dot() {
return Err(Errno::EINVAL);
}
let result = if base.is_empty() || base.is_root() {
if r#try {
Ok(())
} else {
Err(Errno::EEXIST)
}
} else if parent.is_empty() || parent.is_root() {
match kind {
SFlag::S_IFREG | SFlag::S_IFIFO => {
retry_on_eintr(|| mknodat(root_fd, base, kind, mode, 0))
}
SFlag::S_IFDIR => retry_on_eintr(|| mkdirat(root_fd, base, mode)),
_ => return Err(Errno::EINVAL),
}
} else {
let mut parent = parent.to_owned();
parent.clean_consecutive_slashes();
parent.remove(0); if !parent.is_relative() {
return Err(Errno::EINVAL);
}
let parent_fd = safe_mkdir_all(root_fd, &parent)?;
match kind {
SFlag::S_IFREG | SFlag::S_IFIFO => {
retry_on_eintr(|| mknodat(&parent_fd, base, kind, mode, 0))
}
SFlag::S_IFDIR => retry_on_eintr(|| mkdirat(&parent_fd, base, mode)),
_ => return Err(Errno::EINVAL),
}
};
match result {
Ok(_) => {
info!("ctx": "setup_mount_namespace", "op": "make_node", "mnt": file_op,
"msg": format!("created node `{file_op}'"));
Ok(())
}
Err(Errno::EEXIST) if r#try => {
warn!("ctx": "setup_mount_namespace", "op": "make_node",
"mnt": file_op, "err": Errno::EEXIST as i32,
"msg": format!("skip existing node `{file_op}' due to try"));
Ok(())
}
Err(errno) => {
error!("ctx": "setup_mount_namespace", "op": "make_node",
"mnt": file_op, "err": errno as i32,
"msg": format!("create node `{file_op}' failed: {errno}"));
Err(errno)
}
}
}
#[expect(clippy::cognitive_complexity)]
pub fn ns_setup_net<Fd: AsFd>(
proxy: Option<Fd>,
proxy_addr: IpAddr,
proxy_port: u16,
proxy_debug: bool,
) -> SydResult<()> {
let loindex = loopback_setup().inspect_err(|errno| {
error!("ctx": "setup_network_namespace", "op": "setup_loopback",
"err": *errno as i32,
"msg": format!("set up loopback network device failed: {errno}"),
"tip": "configure your system to allow network namespaces");
})?;
if proxy_debug {
warn!("ctx": "setup_network_namespace", "op": "setup_loopback",
"idx": loindex,
"msg": format!("loopback network device is up with index:{loindex:#x}"));
} else {
info!("ctx": "setup_network_namespace", "op": "setup_loopback",
"idx": loindex,
"msg": format!("loopback network device is up with index:{loindex:#x}"));
}
let stream_child = if let Some(stream_child) = proxy {
stream_child
} else {
return Ok(());
};
let ipv = if proxy_addr.is_ipv6() { 6 } else { 4 };
let fml = if ipv == 6 {
nix::sys::socket::AddressFamily::Inet6
} else {
nix::sys::socket::AddressFamily::Inet
};
let lfd = socket(
fml,
SockType::Stream.try_into()?,
SockFlag::SOCK_NONBLOCK | SockFlag::SOCK_CLOEXEC,
None,
)?;
match proxy_addr {
IpAddr::V4(addr_v4) => {
let sockaddr = SockaddrIn::from(SocketAddrV4::new(addr_v4, proxy_port));
bind(lfd.as_raw_fd(), &sockaddr)
}
IpAddr::V6(addr_v6) => {
let sockaddr = SockaddrIn6::from(SocketAddrV6::new(addr_v6, proxy_port, 0, 0));
bind(lfd.as_raw_fd(), &sockaddr)
}
}
.inspect_err(|errno| {
error!("ctx": "setup_network_namespace", "op": "bind_proxy",
"msg": format!("bind proxy to IPv{ipv} {proxy_addr}!{proxy_port} failed: {errno}"),
"err": *errno as i32);
})?;
if proxy_debug {
warn!("ctx": "setup_network_namespace", "op": "bind_proxy",
"msg": format!("proxy is now listening incoming IPv{ipv} requests from {proxy_addr}!{proxy_port}"));
} else {
info!("ctx": "setup_network_namespace", "op": "bind_proxy",
"msg": format!("proxy is now listening incoming IPv{ipv} requests from {proxy_addr}!{proxy_port}"));
}
let lfd = lfd.as_raw_fd();
send_with_fd(&stream_child, &[0u8; 1], &[lfd]).inspect_err(|errno| {
error!("ctx": "setup_network_namespace", "op": "send_proxy_fd",
"fd": lfd, "err": *errno as i32,
"msg": format!("send proxy file descriptor {lfd} to syd-tor failed: {errno}"));
})?;
if proxy_debug {
warn!("ctx": "setup_network_namespace", "op": "send_proxy_fd", "fd": lfd,
"msg": format!("sent proxy fd {lfd} to syd-tor, IPv{ipv} traffic forwarding is now started \\o/"));
warn!("ctx": "setup_network_namespace", "op": "send_proxy_fd", "syd": "ping",
"msg": "Change return success. Going and coming without error.");
} else {
info!("ctx": "setup_network_namespace", "op": "send_proxy_fd", "fd": lfd,
"msg": format!("sent proxy fd {lfd} to syd-tor, IPv{ipv} traffic forwarding is now started \\o/"));
info!("ctx": "setup_network_namespace", "op": "send_proxy_fd", "syd": "ping",
"msg": "Change return success. Going and coming without error.");
}
Ok(())
}
pub fn ns_setup_tor(
proxy_ext_addr: IpAddr,
proxy_ext_port: u16,
proxy_ext_unix: Option<&XPath>,
proxy_repr: &str,
proxy_debug: bool,
) -> SydResult<SafeOwnedFd> {
const TIP: &str = "set sandbox/proxy:off";
#[expect(clippy::cast_sign_loss)]
let pidfd = pidfd_open(Pid::this(), OFlag::O_NONBLOCK.bits() as u32).inspect_err(|errno| {
error!("ctx": "setup_tor", "op": "pidfd_open",
"msg": format!("syd-tor pidfd_open error: {errno}"),
"tip": TIP, "err": *errno as i32);
})?;
let (stream_parent, stream_child) = UnixStream::pair().inspect_err(|error| {
error!("ctx": "setup_tor", "op": "socketpair",
"msg": format!("syd-tor socketpair error: {error}"),
"tip": TIP, "err": err2no(error) as i32);
})?;
set_cloexec(&pidfd, false)?;
set_cloexec(&stream_parent, false)?;
let safe_env: &[&[u8]] = &[b"LD_LIBRARY_PATH", b"SYD_TOR_RULES"];
let mut cmd = Command::new("/proc/self/exe");
cmd.arg0("syd-tor");
cmd.stdin(Stdio::inherit());
cmd.stdout(Stdio::inherit());
cmd.env_clear();
cmd.envs(
env::vars_os().filter(|(key, _)| safe_env.iter().any(|env| is_equal(key.as_bytes(), env))),
);
if proxy_debug {
cmd.arg("-d");
cmd.stderr(Stdio::inherit());
} else {
cmd.stderr(Stdio::null());
}
let mut buf = itoa::Buffer::new();
cmd.arg("-p");
cmd.arg(buf.format(pidfd.as_raw_fd()));
cmd.arg("-i");
cmd.arg(buf.format(stream_parent.as_raw_fd()));
if let Some(ref proxy_ext_unix) = proxy_ext_unix {
cmd.arg("-u");
cmd.arg(proxy_ext_unix);
} else {
cmd.arg("-o");
cmd.arg(format!("{proxy_ext_addr}:{proxy_ext_port}"));
}
unsafe { cmd.pre_exec(|| Ok(ns_init_tor()?)) };
let syd_tor = cmd.spawn().inspect_err(|error| {
let errno = err2no(error);
error!("ctx": "setup_tor", "op": "spawn",
"msg": format!("syd-tor spawn error: {error}"),
"tip": TIP, "err": errno as i32);
})?;
drop(pidfd);
let mut buf = itoa::Buffer::new();
env::set_var("SYD_PID_TOR", buf.format(syd_tor.id()));
if proxy_debug {
warn!("ctx": "setup_tor", "op": "forward_net",
"msg": format!("syd-tor is now forwarding external traffic to {proxy_repr}"));
} else {
info!("ctx": "setup_tor", "op": "forward_net",
"msg": format!("syd-tor is now forwarding external traffic to {proxy_repr}"));
}
Ok(stream_child.into())
}
fn ns_init_tor() -> SydResult<()> {
let _ = set_name(c"syd-tor");
set_pdeathsig(Some(Signal::SIGKILL))?;
safe_drop_caps()?;
set_no_new_privs()?;
setsid()?;
confine_landlock_scope(
None::<SafeOwnedFd>,
AccessFs::from_write(*LANDLOCK_ABI),
AccessNet::BindTcp,
true,
)?;
umask(Mode::from_bits_truncate(0o777));
Ok(())
}
#[expect(clippy::unnecessary_cast)]
const SIOCGIFINDEX: u64 = libc::SIOCGIFINDEX as u64;
#[expect(clippy::unnecessary_cast)]
const SIOCGIFFLAGS: u64 = libc::SIOCGIFFLAGS as u64;
#[expect(clippy::unnecessary_cast)]
const SIOCSIFFLAGS: u64 = libc::SIOCSIFFLAGS as u64;
pub fn loopback_setup() -> Result<i32, Errno> {
let sock = socket(
nix::sys::socket::AddressFamily::Inet,
SockType::Stream.try_into()?,
SockFlag::empty(),
None,
)?;
let mut ifreq = libc::ifreq {
#[expect(clippy::cast_possible_wrap)]
ifr_name: [
b'l' as libc::c_char,
b'o' as libc::c_char,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
],
ifr_ifru: unsafe { std::mem::zeroed() },
};
let loindex: i32 = unsafe {
let mut ifr_index: libc::ifreq = std::mem::zeroed();
ifr_index.ifr_name = ifreq.ifr_name;
Errno::result(libc::syscall(
libc::SYS_ioctl,
sock.as_raw_fd(),
SIOCGIFINDEX as libc::c_ulong,
&mut ifr_index,
))?;
ifr_index.ifr_ifru.ifru_mtu
};
use crate::config::LOOPBACK_BIGTCP_MAX;
match loopback_set_bigtcp(loindex, LOOPBACK_BIGTCP_MAX) {
Ok(_) => {
info!("ctx": "setup_network_namespace", "op": "set_bigtcp_loopback",
"msg": "loopback network device has BIGTCP set",
"max": LOOPBACK_BIGTCP_MAX);
}
Err(errno) => {
info!("ctx": "setup_network_namespace", "op": "set_bigtcp_loopback",
"msg": format!("set BIGTCP for loopback network device error: {errno}"),
"err": errno as i32);
}
};
Errno::result(unsafe {
libc::syscall(
libc::SYS_ioctl,
sock.as_raw_fd(),
SIOCGIFFLAGS as libc::c_ulong,
&mut ifreq,
)
})?;
#[expect(clippy::cast_possible_truncation)]
unsafe {
ifreq.ifr_ifru.ifru_flags |= (libc::IFF_UP | libc::IFF_RUNNING) as libc::c_short
};
Errno::result(unsafe {
libc::syscall(
libc::SYS_ioctl,
sock.as_raw_fd(),
SIOCSIFFLAGS as libc::c_ulong,
&mut ifreq,
)
})?;
Ok(loindex)
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct nlattr {
nla_len: u16,
nla_type: u16,
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct nlmsg {
hdr: libc::nlmsghdr,
info: ifinfomsg,
attrs: [u8; 64],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
struct ifinfomsg {
family: u8,
pad: u8,
ifi_type: u16, index: i32, flags: u32, change: u32, }
const IFLA_GRO_IPV4_MAX_SIZE: libc::c_ushort = 0x40;
const IFLA_GRO_MAX_SIZE: libc::c_ushort = 0x3a;
const IFLA_GSO_IPV4_MAX_SIZE: libc::c_ushort = 0x3f;
const IFLA_GSO_MAX_SIZE: libc::c_ushort = 0x29;
pub fn loopback_set_bigtcp(ifindex: i32, max_size: u32) -> Result<(), Errno> {
use netlink_sys::{constants::*, Socket, SocketAddr};
let mut sock = Socket::new(NETLINK_ROUTE)
.map_err(|e| Errno::from_raw(e.raw_os_error().unwrap_or(libc::ENOSYS)))?;
sock.bind(&SocketAddr::new(0, 0))
.map_err(|e| Errno::from_raw(e.raw_os_error().unwrap_or(libc::ENOSYS)))?;
let mut msg: nlmsg = unsafe { std::mem::zeroed() };
let nl_hdr = &mut msg.hdr;
#[expect(clippy::arithmetic_side_effects)]
#[expect(clippy::cast_possible_truncation)]
{
nl_hdr.nlmsg_len = (size_of::<libc::nlmsghdr>() + size_of::<ifinfomsg>()) as u32;
nl_hdr.nlmsg_type = libc::RTM_NEWLINK;
nl_hdr.nlmsg_flags = (libc::NLM_F_REQUEST | libc::NLM_F_ACK) as u16;
nl_hdr.nlmsg_seq = 1;
nl_hdr.nlmsg_pid = 0;
}
let info = &mut msg.info;
#[expect(clippy::cast_possible_truncation)]
{
info.family = libc::AF_UNSPEC as u8;
info.index = ifindex;
info.change = u32::MAX;
}
let mut offset = 0;
for &kind in &[
IFLA_GRO_IPV4_MAX_SIZE,
IFLA_GRO_MAX_SIZE,
IFLA_GSO_IPV4_MAX_SIZE,
IFLA_GSO_MAX_SIZE,
] {
#[expect(clippy::arithmetic_side_effects)]
#[expect(clippy::cast_possible_truncation)]
unsafe {
let attr = nlattr {
nla_type: kind,
nla_len: (size_of::<nlattr>() + size_of::<u32>()) as u16,
};
let attr_ptr = msg.attrs.as_mut_ptr().add(offset);
std::ptr::write_unaligned(attr_ptr.cast::<nlattr>(), attr);
let val_ptr = attr_ptr.add(size_of::<nlattr>());
std::ptr::write_unaligned(val_ptr.cast::<u32>(), max_size);
}
#[expect(clippy::arithmetic_side_effects)]
#[expect(clippy::cast_lossless)]
#[expect(clippy::cast_sign_loss)]
#[expect(clippy::cast_possible_truncation)]
{
let nla_len = (size_of::<nlattr>() + size_of::<u32>()) as u16;
offset += unsafe { libc::NLA_ALIGN(nla_len as libc::c_int) } as usize;
}
}
#[expect(clippy::arithmetic_side_effects)]
#[expect(clippy::cast_possible_truncation)]
{
msg.hdr.nlmsg_len += offset as u32;
}
let buf = unsafe {
std::slice::from_raw_parts(
std::ptr::addr_of!(msg) as *const u8,
msg.hdr.nlmsg_len as usize,
)
};
sock.send(buf, 0)
.map_err(|e| Errno::from_raw(e.raw_os_error().unwrap_or(libc::ENOSYS)))?;
let (buf, _) = sock
.recv_from_full()
.map_err(|e| Errno::from_raw(e.raw_os_error().unwrap_or(libc::ENOSYS)))?;
let mut offset = 0;
#[expect(clippy::arithmetic_side_effects)]
while offset + size_of::<libc::nlmsghdr>() <= buf.len() {
let hdr: libc::nlmsghdr = try_from_bytes(&buf[offset..])?;
let len = hdr.nlmsg_len as usize;
if len < size_of::<libc::nlmsghdr>() || offset + len > buf.len() {
return Err(Errno::EINVAL);
}
#[expect(clippy::cast_possible_truncation)]
if hdr.nlmsg_type == libc::NLMSG_ERROR as libc::c_ushort
&& len >= size_of::<libc::nlmsghdr>() + size_of::<libc::nlmsgerr>()
{
let err_offset = offset
.checked_add(size_of::<libc::nlmsghdr>())
.ok_or(Errno::EINVAL)?;
let err: libc::nlmsgerr = try_from_bytes(&buf[err_offset..])?;
if err.error != 0 {
return Err(Errno::from_raw(-err.error));
}
}
#[expect(clippy::cast_possible_truncation)]
#[expect(clippy::cast_possible_wrap)]
#[expect(clippy::cast_sign_loss)]
{
offset += unsafe { libc::NLA_ALIGN(len as i32) as usize };
}
}
Ok(())
}