use std::os::fd::FromRawFd;
use libcgroups::common::CgroupManager;
use nix::unistd::{Gid, Pid, Uid, close, getpid, write};
use oci_spec::runtime::{LinuxNamespace, LinuxNamespaceType, LinuxResources};
use super::args::{ContainerArgs, ContainerType};
use super::channel::{IntermediateReceiver, MainSender};
use super::fork::CloneCb;
use super::init::process as init_process;
use crate::error::MissingSpecError;
use crate::namespaces::Namespaces;
use crate::process::{channel, cpu_affinity, fork};
#[derive(Debug, thiserror::Error)]
pub enum IntermediateProcessError {
#[error(transparent)]
Channel(#[from] channel::ChannelError),
#[error(transparent)]
Namespace(#[from] crate::namespaces::NamespaceError),
#[error(transparent)]
Syscall(#[from] crate::syscall::SyscallError),
#[error("failed to launch init process")]
InitProcess(#[source] fork::CloneError),
#[error("cgroup error: {0}")]
Cgroup(String),
#[error(transparent)]
Procfs(#[from] procfs::ProcError),
#[error("exec notify failed")]
ExecNotify(#[source] nix::Error),
#[error(transparent)]
MissingSpec(#[from] crate::error::MissingSpecError),
#[error("CPU affinity error {0}")]
CpuAffinity(#[from] cpu_affinity::CPUAffinityError),
#[error("other error")]
Other(String),
}
type Result<T> = std::result::Result<T, IntermediateProcessError>;
pub fn container_intermediate_process(
args: &ContainerArgs,
intermediate_chan: &mut (channel::IntermediateSender, channel::IntermediateReceiver),
init_chan: &mut (channel::InitSender, channel::InitReceiver),
main_sender: &mut channel::MainSender,
) -> Result<()> {
let (inter_sender, inter_receiver) = intermediate_chan;
let (init_sender, init_receiver) = init_chan;
let command = args.syscall.create_syscall();
let spec = &args.spec;
let linux = spec.linux().as_ref().ok_or(MissingSpecError::Linux)?;
let namespaces = Namespaces::try_from(linux.namespaces().as_ref())?;
let cgroup_manager = libcgroups::common::create_cgroup_manager(args.cgroup_config.to_owned())
.map_err(|e| IntermediateProcessError::Cgroup(e.to_string()))?;
let current_pid = Pid::this();
if matches!(args.container_type, ContainerType::TenantContainer { .. }) {
if let Some(exec_cpu_affinity) = spec
.process()
.as_ref()
.and_then(|p| p.exec_cpu_affinity().as_ref())
{
if let Some(initial) = exec_cpu_affinity.initial() {
cpu_affinity::set_cpuset_affinity_from_string(current_pid, initial)?;
}
}
}
let _ = cpu_affinity::log_cpu_affinity();
apply_cgroups(
&cgroup_manager,
linux.resources().as_ref(),
matches!(args.container_type, ContainerType::InitContainer),
)?;
if matches!(args.container_type, ContainerType::TenantContainer { .. }) {
if let Some(exec_cpu_affinity) = spec
.process()
.as_ref()
.and_then(|p| p.exec_cpu_affinity().as_ref())
{
if let Some(cpu_affinity_final) = exec_cpu_affinity.cpu_affinity_final() {
cpu_affinity::set_cpuset_affinity_from_string(current_pid, cpu_affinity_final)?;
}
}
}
if let Some(user_namespace) = namespaces.get(LinuxNamespaceType::User)? {
setup_userns(&namespaces, user_namespace, main_sender, inter_receiver)?;
command.set_id(Uid::from_raw(0), Gid::from_raw(0))?;
}
let proc = spec.process().as_ref().ok_or(MissingSpecError::Process)?;
if let Some(rlimits) = proc.rlimits() {
for rlimit in rlimits {
command.set_rlimit(rlimit).map_err(|err| {
tracing::error!(?err, ?rlimit, "failed to set rlimit");
err
})?;
}
}
if let Some(pid_namespace) = namespaces.get(LinuxNamespaceType::Pid)? {
namespaces.unshare_or_setns(pid_namespace)?;
}
let cb: CloneCb = {
Box::new(|| {
if let Err(ret) = prctl::set_name("youki:[2:INIT]") {
tracing::error!(?ret, "failed to set name for child process");
return ret;
}
if let Err(err) = init_sender.close() {
tracing::error!(?err, "failed to close receiver in init process");
return -1;
}
if let Err(err) = inter_sender.close() {
tracing::error!(?err, "failed to close sender in the intermediate process");
return -1;
}
match init_process::container_init_process(args, main_sender, init_receiver) {
Ok(_) => 0,
Err(e) => {
tracing::error!("failed to initialize container process: {e}");
if let Err(err) = main_sender.exec_failed(e.to_string()) {
tracing::error!(?err, "failed sending error to main sender");
}
if let ContainerType::TenantContainer { exec_notify_fd } = args.container_type {
let buf = format!("{e}");
let exec_notify_fd =
unsafe { std::os::fd::OwnedFd::from_raw_fd(exec_notify_fd) };
if let Err(err) = write(&exec_notify_fd, buf.as_bytes()) {
tracing::error!(?err, "failed to write to exec notify fd");
}
drop(exec_notify_fd);
}
-1
}
}
})
};
let pid = fork::container_clone_sibling(cb).map_err(|err| {
tracing::error!("failed to fork init process: {}", err);
IntermediateProcessError::InitProcess(err)
})?;
if let ContainerType::TenantContainer { exec_notify_fd } = args.container_type {
close(exec_notify_fd).map_err(|err| {
tracing::error!("failed to close exec notify fd: {}", err);
IntermediateProcessError::ExecNotify(err)
})?;
}
main_sender.intermediate_ready(pid).map_err(|err| {
tracing::error!("failed to wait on intermediate process: {}", err);
err
})?;
main_sender.close().map_err(|err| {
tracing::error!("failed to close unused main sender: {}", err);
err
})?;
inter_sender.close().map_err(|err| {
tracing::error!(
"failed to close sender in the intermediate process: {}",
err
);
err
})?;
init_sender.close().map_err(|err| {
tracing::error!("failed to close unused init sender: {}", err);
err
})?;
Ok(())
}
fn setup_userns(
namespaces: &Namespaces,
user_namespace: &LinuxNamespace,
sender: &mut MainSender,
receiver: &mut IntermediateReceiver,
) -> Result<()> {
namespaces.unshare_or_setns(user_namespace)?;
if user_namespace.path().is_some() {
return Ok(());
}
tracing::debug!("creating new user namespace");
prctl::set_dumpable(true).map_err(|e| {
IntermediateProcessError::Other(format!(
"error in setting dumpable to true : {}",
nix::errno::Errno::from_raw(e)
))
})?;
sender.identifier_mapping_request().map_err(|err| {
tracing::error!("failed to send id mapping request: {}", err);
err
})?;
receiver.wait_for_mapping_ack().map_err(|err| {
tracing::error!("failed to receive id mapping ack: {}", err);
err
})?;
prctl::set_dumpable(false).map_err(|e| {
IntermediateProcessError::Other(format!(
"error in setting dumplable to false : {}",
nix::errno::Errno::from_raw(e)
))
})?;
Ok(())
}
fn apply_cgroups<
C: CgroupManager<Error = E> + ?Sized,
E: std::error::Error + Send + Sync + 'static,
>(
cmanager: &C,
resources: Option<&LinuxResources>,
init: bool,
) -> Result<()> {
let pid = getpid();
cmanager.add_task(pid).map_err(|err| {
tracing::error!(?pid, ?err, ?init, "failed to add task to cgroup");
IntermediateProcessError::Cgroup(err.to_string())
})?;
if let Some(resources) = resources {
if init {
let controller_opt = libcgroups::common::ControllerOpt {
resources,
freezer_state: None,
oom_score_adj: None,
disable_oom_killer: false,
};
cmanager.apply(&controller_opt).map_err(|err| {
tracing::error!(?pid, ?err, ?init, "failed to apply cgroup");
IntermediateProcessError::Cgroup(err.to_string())
})?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use libcgroups::test_manager::TestManager;
use nix::unistd::Pid;
use oci_spec::runtime::LinuxResources;
use procfs::process::Process;
use super::*;
#[test]
fn apply_cgroup_init() -> Result<()> {
let cmanager = TestManager::default();
let resources = LinuxResources::default();
apply_cgroups(&cmanager, Some(&resources), true)?;
assert!(cmanager.get_add_task_args().len() == 1);
assert_eq!(
cmanager.get_add_task_args()[0],
Pid::from_raw(Process::myself()?.pid())
);
assert!(cmanager.apply_called());
Ok(())
}
#[test]
fn apply_cgroup_tenant() -> Result<()> {
let cmanager = TestManager::default();
let resources = LinuxResources::default();
apply_cgroups(&cmanager, Some(&resources), false)?;
assert_eq!(
cmanager.get_add_task_args()[0],
Pid::from_raw(Process::myself()?.pid())
);
assert!(!cmanager.apply_called());
Ok(())
}
#[test]
fn apply_cgroup_no_resources() -> Result<()> {
let cmanager = TestManager::default();
apply_cgroups(&cmanager, None, true)?;
assert_eq!(
cmanager.get_add_task_args()[0],
Pid::from_raw(Process::myself()?.pid())
);
assert!(!cmanager.apply_called());
Ok(())
}
}