use crate::{
container::ContainerProcessState,
process::{
args::{ContainerArgs, ContainerType},
channel, container_intermediate_process, fork,
},
rootless::Rootless,
seccomp, utils,
};
use anyhow::{Context, Result};
use nix::{
sys::{
socket::{self, UnixAddr},
wait::{waitpid, WaitStatus},
},
unistd::{self, Pid},
};
use oci_spec::runtime;
use std::{io::IoSlice, path::Path};
pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, Pid)> {
let (main_sender, main_receiver) = &mut channel::main_channel()?;
let inter_chan = &mut channel::intermediate_channel()?;
let init_chan = &mut channel::init_channel()?;
let intermediate_pid = fork::container_fork(|| {
let container_pid = container_intermediate_process::container_intermediate_process(
container_args,
inter_chan,
init_chan,
main_sender,
)?;
if matches!(
container_args.container_type,
ContainerType::TenantContainer { exec_notify_fd: _ }
) && !container_args.detached
{
match waitpid(container_pid, None)? {
WaitStatus::Exited(_, s) => Ok(s),
WaitStatus::Signaled(_, sig, _) => Ok(sig as i32),
_ => Ok(0),
}
} else {
Ok(0)
}
})?;
main_sender
.close()
.context("failed to close unused sender")?;
let (inter_sender, _) = inter_chan;
let (init_sender, _) = init_chan;
if let Some(rootless) = &container_args.rootless {
main_receiver.wait_for_mapping_request()?;
setup_mapping(rootless, intermediate_pid)?;
inter_sender.mapping_written()?;
}
inter_sender
.close()
.context("failed to close unused intermediate sender")?;
let init_pid = main_receiver.wait_for_intermediate_ready()?;
if let Some(linux) = container_args.spec.linux() {
if let Some(seccomp) = linux.seccomp() {
let state = ContainerProcessState {
oci_version: container_args.spec.version().to_string(),
fds: vec![String::from("seccompFd")],
pid: init_pid.as_raw(),
metadata: seccomp.listener_metadata().to_owned().unwrap_or_default(),
state: container_args
.container
.as_ref()
.context("container state is required")?
.state
.clone(),
};
sync_seccomp(seccomp, &state, init_sender, main_receiver)
.context("failed to sync seccomp with init")?;
}
}
init_sender
.close()
.context("failed to close unused init sender")?;
main_receiver
.wait_for_init_ready()
.context("failed to wait for init ready")?;
log::debug!("init pid is {:?}", init_pid);
Ok((intermediate_pid, init_pid))
}
fn sync_seccomp(
seccomp: &runtime::LinuxSeccomp,
state: &ContainerProcessState,
init_sender: &mut channel::InitSender,
main_receiver: &mut channel::MainReceiver,
) -> Result<()> {
if seccomp::is_notify(seccomp) {
log::debug!("main process waiting for sync seccomp");
let seccomp_fd = main_receiver.wait_for_seccomp_request()?;
let listener_path = seccomp
.listener_path()
.as_ref()
.context("notify will require seccomp listener path to be set")?;
let encoded_state =
serde_json::to_vec(state).context("failed to encode container process state")?;
sync_seccomp_send_msg(listener_path, &encoded_state, seccomp_fd)
.context("failed to send msg to seccomp listener")?;
init_sender.seccomp_notify_done()?;
let _ = unistd::close(seccomp_fd);
}
Ok(())
}
fn sync_seccomp_send_msg(listener_path: &Path, msg: &[u8], fd: i32) -> Result<()> {
let socket = socket::socket(
socket::AddressFamily::Unix,
socket::SockType::Stream,
socket::SockFlag::empty(),
None,
)
.context("failed to create unix domain socket for seccomp listener")?;
let unix_addr = socket::UnixAddr::new(listener_path).context("failed to create unix addr")?;
socket::connect(socket, &unix_addr).with_context(|| {
format!(
"failed to connect to seccomp notify listerner path: {:?}",
listener_path
)
})?;
let iov = [IoSlice::new(msg)];
let fds = [fd];
let cmsgs = socket::ControlMessage::ScmRights(&fds);
socket::sendmsg::<UnixAddr>(socket, &iov, &[cmsgs], socket::MsgFlags::empty(), None)
.context("failed to write container state to seccomp listener")?;
let _ = unistd::close(socket);
Ok(())
}
fn setup_mapping(rootless: &Rootless, pid: Pid) -> Result<()> {
log::debug!("write mapping for pid {:?}", pid);
if !rootless.privileged {
utils::write_file(format!("/proc/{}/setgroups", pid), "deny")?;
}
rootless
.write_uid_mapping(pid)
.context(format!("failed to map uid of pid {}", pid))?;
rootless
.write_gid_mapping(pid)
.context(format!("failed to map gid of pid {}", pid))?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::process::channel::{intermediate_channel, main_channel};
use crate::rootless::{get_gid_path, get_uid_path};
use nix::{
sched::{unshare, CloneFlags},
unistd::{self, getgid, getuid},
};
use oci_spec::runtime::{
LinuxIdMappingBuilder, LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscallBuilder,
};
use serial_test::serial;
use std::fs;
use crate::utils::TempDir;
#[test]
#[serial]
fn setup_uid_mapping_should_succeed() -> Result<()> {
let uid_mapping = LinuxIdMappingBuilder::default()
.host_id(getuid())
.container_id(0u32)
.size(1u32)
.build()?;
let uid_mappings = vec![uid_mapping];
let rootless = Rootless {
uid_mappings: Some(&uid_mappings),
privileged: true,
..Default::default()
};
let (mut parent_sender, mut parent_receiver) = main_channel()?;
let (mut child_sender, mut child_receiver) = intermediate_channel()?;
match unsafe { unistd::fork()? } {
unistd::ForkResult::Parent { child } => {
parent_receiver.wait_for_mapping_request()?;
parent_receiver.close()?;
let tempdir = TempDir::new(get_uid_path(&child).parent().unwrap())?;
let uid_map_path = tempdir.join("uid_map");
let _ = fs::File::create(&uid_map_path)?;
let tempdir = TempDir::new(get_gid_path(&child).parent().unwrap())?;
let gid_map_path = tempdir.join("gid_map");
let _ = fs::File::create(&gid_map_path)?;
setup_mapping(&rootless, child)?;
let line = fs::read_to_string(uid_map_path)?;
let line_splited = line.split_whitespace();
for (act, expect) in line_splited.zip([
uid_mapping.container_id().to_string(),
uid_mapping.host_id().to_string(),
uid_mapping.size().to_string(),
]) {
assert_eq!(act, expect);
}
child_sender.mapping_written()?;
child_sender.close()?;
}
unistd::ForkResult::Child => {
prctl::set_dumpable(true).unwrap();
unshare(CloneFlags::CLONE_NEWUSER)?;
parent_sender.identifier_mapping_request()?;
parent_sender.close()?;
child_receiver.wait_for_mapping_ack()?;
child_receiver.close()?;
std::process::exit(0);
}
}
Ok(())
}
#[test]
#[serial]
fn setup_gid_mapping_should_successed() -> Result<()> {
let gid_mapping = LinuxIdMappingBuilder::default()
.host_id(getgid())
.container_id(0u32)
.size(1u32)
.build()?;
let gid_mappings = vec![gid_mapping];
let rootless = Rootless {
gid_mappings: Some(&gid_mappings),
..Default::default()
};
let (mut parent_sender, mut parent_receiver) = main_channel()?;
let (mut child_sender, mut child_receiver) = intermediate_channel()?;
match unsafe { unistd::fork()? } {
unistd::ForkResult::Parent { child } => {
parent_receiver.wait_for_mapping_request()?;
parent_receiver.close()?;
let tempdir = TempDir::new(get_uid_path(&child).parent().unwrap())?;
let uid_map_path = tempdir.join("uid_map");
let _ = fs::File::create(&uid_map_path)?;
let tempdir = TempDir::new(get_gid_path(&child).parent().unwrap())?;
let gid_map_path = tempdir.join("gid_map");
let _ = fs::File::create(&gid_map_path)?;
setup_mapping(&rootless, child)?;
let line = fs::read_to_string(gid_map_path)?;
let line_splited = line.split_whitespace();
for (act, expect) in line_splited.zip([
gid_mapping.container_id().to_string(),
gid_mapping.host_id().to_string(),
gid_mapping.size().to_string(),
]) {
assert_eq!(act, expect);
}
assert_eq!(
fs::read_to_string(format!("/proc/{}/setgroups", child.as_raw()))?,
"deny\n",
);
child_sender.mapping_written()?;
child_sender.close()?;
}
unistd::ForkResult::Child => {
prctl::set_dumpable(true).unwrap();
unshare(CloneFlags::CLONE_NEWUSER)?;
parent_sender.identifier_mapping_request()?;
parent_sender.close()?;
child_receiver.wait_for_mapping_ack()?;
child_receiver.close()?;
std::process::exit(0);
}
}
Ok(())
}
#[test]
#[serial]
fn test_sync_seccomp() -> Result<()> {
use std::io::Read;
use std::os::unix::io::IntoRawFd;
use std::os::unix::net::UnixListener;
use std::thread;
use utils::create_temp_dir;
let tmp_dir = create_temp_dir("test_sync_seccomp")?;
let scmp_file = std::fs::OpenOptions::new()
.write(true)
.create(true)
.open(tmp_dir.path().join("scmp_file"))?;
std::fs::OpenOptions::new()
.write(true)
.create(true)
.open(tmp_dir.path().join("socket_file.sock"))?;
let (mut main_sender, mut main_receiver) = channel::main_channel()?;
let (mut init_sender, mut init_receiver) = channel::init_channel()?;
let socket_path = tmp_dir.path().join("socket_file.sock");
let socket_path_seccomp_th = socket_path.clone();
let state = ContainerProcessState::default();
let want = serde_json::to_string(&state)?;
let th = thread::spawn(move || {
sync_seccomp(
&LinuxSeccompBuilder::default()
.listener_path(socket_path_seccomp_th)
.syscalls(vec![LinuxSyscallBuilder::default()
.action(LinuxSeccompAction::ScmpActNotify)
.build()
.unwrap()])
.build()
.unwrap(),
&state,
&mut init_sender,
&mut main_receiver,
)
.unwrap();
});
let fd = scmp_file.into_raw_fd();
assert!(main_sender.seccomp_notify_request(fd).is_ok());
fs::remove_file(socket_path.clone())?;
let lis = UnixListener::bind(socket_path)?;
let (mut socket, _) = lis.accept()?;
let mut got = String::new();
socket.read_to_string(&mut got)?;
assert!(init_receiver.wait_for_seccomp_request_done().is_ok());
assert_eq!(want, got);
assert!(th.join().is_ok());
Ok(())
}
}