#![expect(clippy::disallowed_types)]
use std::{
collections::{HashMap, HashSet},
env,
ffi::{OsStr, OsString},
fmt::Write as FmtWrite,
fs::{self, DirBuilder},
io::{BufReader, BufWriter, Write as IOWrite},
os::{
fd::{AsRawFd, OwnedFd, RawFd},
linux::fs::MetadataExt,
unix::fs::{DirBuilderExt, PermissionsExt},
},
path::{Path, PathBuf},
process::{exit, ExitCode},
rc::Rc,
str::FromStr,
};
use clap::Parser;
use libcgroups::common::{CgroupManager, ControllerOpt};
use libcontainer::{
apparmor,
config::YoukiConfig,
container::{
builder::ContainerBuilder, state::State as ContainerState, Container, ContainerStatus,
},
error::{ErrInvalidSpec, LibcontainerError, MissingSpecError},
notify_socket::{NotifyListener, NotifySocket, NOTIFY_FILE},
process,
process::{args::ContainerArgs, intel_rdt::delete_resctrl_subdirectory},
signal::Signal,
syscall::{
linux::MountOption,
syscall::{create_syscall, SyscallType},
},
tty,
user_ns::UserNamespaceConfig,
utils,
utils::{rootless_required, PathBufExt},
workload::{Executor, ExecutorError, ExecutorValidationError},
};
use liboci_cli::{
Checkpoint, CommonCmd, Create, Delete, Events, Exec, Features, GlobalOpts, Kill, List, Pause,
Ps, Resume, Run, StandardCmd, Start, State, Update,
};
use nix::{
errno::Errno,
fcntl::OFlag,
sys::{
signal,
signal::kill,
signalfd::SigSet,
stat::{fchmod, Mode},
wait::{Id, WaitPidFlag},
},
unistd::{pipe2, read, Gid, Pid, Uid},
};
use oci_spec::runtime::{
ApparmorBuilder, Capabilities as SpecCapabilities, Capability, CgroupBuilder, FeaturesBuilder,
IDMapBuilder, IntelRdtBuilder, LinuxBuilder, LinuxCapabilities, LinuxCapabilitiesBuilder,
LinuxFeatureBuilder, LinuxIdMappingBuilder, LinuxNamespace, LinuxNamespaceBuilder,
LinuxNamespaceType, LinuxPidsBuilder, LinuxResources, LinuxResourcesBuilder,
LinuxSchedulerPolicy, LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscall,
LinuxSyscallBuilder, Mount, MountExtensionsBuilder, Process, ProcessBuilder, SelinuxBuilder,
Spec, UserBuilder, VERSION,
};
use procfs::process::Namespace;
use serde_json::to_writer_pretty;
use syd::{
compat::{openat2, set_dumpable, set_name, set_no_new_privs, waitid, ResolveFlag, WaitStatus},
config::ENV_SKIP_SCMP,
confine::secure_getenv,
err::{err2io, SydError, SydResult},
fd::AT_BADFD,
hook::Supervisor,
id::SydId,
ignore_signals,
log::log_init,
lookup::{safe_open_file2, safe_open_how},
path::{XPath, XPathBuf},
retry::retry_on_eintr,
sandbox::Sandbox,
syslog::LogLevel,
IgnoreSignalOpts,
};
use tabwriter::TabWriter;
use tracing_subscriber::layer::SubscriberExt;
#[cfg(all(
not(coverage),
not(feature = "prof"),
not(target_os = "android"),
not(target_arch = "riscv64"),
target_page_size_4k,
target_pointer_width = "64"
))]
#[global_allocator]
static GLOBAL: hardened_malloc::HardenedMalloc = hardened_malloc::HardenedMalloc;
#[cfg(feature = "prof")]
#[global_allocator]
static GLOBAL: tcmalloc::TCMalloc = tcmalloc::TCMalloc;
pub struct SydLayer;
impl<S> tracing_subscriber::layer::Layer<S> for SydLayer
where
S: tracing::Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>,
{
fn event_enabled(
&self,
_event: &tracing::Event,
_ctx: tracing_subscriber::layer::Context<S>,
) -> bool {
syd::log_enabled!(LogLevel::Debug)
}
fn on_event(&self, event: &tracing::Event, _ctx: tracing_subscriber::layer::Context<S>) {
syd::debug!("ctx": "oci_trace", "event": format!("{event:?}"));
}
}
#[derive(Clone)]
struct SydExecutor {}
impl Executor for SydExecutor {
fn exec(&self, spec: &Spec) -> Result<(), ExecutorError> {
set_no_new_privs().or(Err(ExecutorError::CantHandle(
"Failed to set no-new-privs attribute!",
)))?;
if !cfg!(feature = "trusted") {
set_dumpable(false).or(Err(ExecutorError::CantHandle(
"Failed to unset dumpable attribute!",
)))?;
}
let _ = set_name(SydId::get_cname(c"syd-oci"));
syd::t!(
"Syd-OCI-Spec: {}",
serde_json::to_string(spec).unwrap_or("?".to_string())
);
#[expect(clippy::disallowed_methods)]
let proc = spec
.process()
.as_ref()
.expect("oci_spec::runtime::Spec::process");
#[expect(clippy::disallowed_methods)]
let argv = proc
.args()
.as_ref()
.expect("oci_spec::runtime::Process::args!None")
.iter()
.map(OsString::from)
.collect::<Vec<_>>();
#[expect(clippy::disallowed_methods)]
let (comm, argv) = argv
.split_first()
.map(|(arg0, argv)| (arg0, argv.to_vec()))
.expect("oci_spec::runtime::Process::args!Empty");
if let Some(env) = proc.env() {
for var in env {
if let Some((var, val)) = var.split_once('=') {
let var = OsString::from(var);
if env::var_os(&var).is_none() {
env::set_var(var, OsString::from(val));
}
}
}
}
Sandbox::clear_env();
let sandbox = Sandbox::try_from(spec)
.or(Err(ExecutorError::CantHandle("Failed to initialize Syd!")))?;
syd::t!(
"Syd-OCI-Sandbox: {}",
serde_json::to_string(&sandbox).unwrap_or("?".to_string())
);
let mut opts = IgnoreSignalOpts::SkipIgnoreAlarm;
if sandbox.options.allow_unsafe_prlimit() {
opts.insert(IgnoreSignalOpts::SkipIgnoreCoreDump);
}
ignore_signals(opts).or(Err(ExecutorError::CantHandle("Failed to ignore signals!")))?;
Supervisor::run(sandbox, comm, argv, None)
.map(i32::from)
.map(exit)
.map_err(|err| ExecutorError::Execution(err.into()))?
}
fn validate(&self, spec: &Spec) -> Result<(), ExecutorValidationError> {
let proc = spec
.process()
.as_ref()
.ok_or(ExecutorValidationError::ArgValidationError(
"spec did not contain process".into(),
))?;
if let Some(args) = proc.args() {
let envs: Vec<String> = proc.env().as_ref().unwrap_or(&vec![]).clone();
let path_vars: Vec<&String> = envs.iter().filter(|&e| e.starts_with("PATH=")).collect();
if path_vars.is_empty() {
syd::t!("PATH environment variable is not set");
Err(ExecutorValidationError::ArgValidationError(
"PATH environment variable is not set".into(),
))?;
}
let path_var = path_vars[0].trim_start_matches("PATH=");
match get_executable_path(&args[0], path_var) {
None => {
syd::t!("executable for container process not found in PATH");
Err(ExecutorValidationError::ArgValidationError(format!(
"executable '{}' not found in $PATH",
args[0]
)))?;
}
Some(path) => match is_executable(&path) {
Ok(true) => {
syd::t!("found executable in executor");
}
Ok(false) => {
syd::t!("executable does not have correct permission set");
Err(ExecutorValidationError::ArgValidationError(format!(
"executable '{}' at path '{:?}' does not have correct permissions",
args[0], path
)))?;
}
Err(err) => {
syd::t!("failed to check permissions for executable: {err}");
Err(ExecutorValidationError::ArgValidationError(format!(
"failed to check permissions for executable '{}' at path '{:?}' : {}",
args[0], path, err
)))?;
}
},
}
}
Ok(())
}
}
const NAMESPACE_TYPES: &[&str] = &["ipc", "uts", "net", "pid", "mnt", "cgroup"];
const TENANT_NOTIFY: &str = "tenant-notify-";
const TENANT_TTY: &str = "tenant-tty-";
fn get_capabilities(
additional: &[String],
spec: &Spec,
) -> Result<LinuxCapabilities, LibcontainerError> {
let mut caps: Vec<syd::caps::Capability> = Vec::with_capacity(additional.len());
for cap in additional {
caps.push(
syd::caps::Capability::from_str(cap)
.map_err(|e| LibcontainerError::Other(e.to_string()))?,
);
}
let caps: SpecCapabilities = caps.iter().map(|c| c.spec()).collect();
if let Some(spec_caps) = spec
.process()
.as_ref()
.ok_or(MissingSpecError::Process)?
.capabilities()
{
let mut capabilities_builder = LinuxCapabilitiesBuilder::default();
let bounding: SpecCapabilities = match spec_caps.bounding() {
Some(bounding) => bounding.union(&caps).copied().collect(),
None => SpecCapabilities::new().union(&caps).copied().collect(),
};
capabilities_builder = capabilities_builder.bounding(bounding);
let effective: SpecCapabilities = match spec_caps.effective() {
Some(effective) => effective.union(&caps).copied().collect(),
None => SpecCapabilities::new().union(&caps).copied().collect(),
};
capabilities_builder = capabilities_builder.effective(effective);
let permitted: SpecCapabilities = match spec_caps.permitted() {
Some(permitted) => permitted.union(&caps).copied().collect(),
None => SpecCapabilities::new().union(&caps).copied().collect(),
};
capabilities_builder = capabilities_builder.permitted(permitted);
let c = if let Some(inheritable) = spec_caps.inheritable() {
let ambient: SpecCapabilities = match spec_caps.ambient() {
Some(ambient) => ambient.union(&caps).copied().collect(),
None => SpecCapabilities::new().union(&caps).copied().collect(),
};
capabilities_builder = capabilities_builder.ambient(ambient);
capabilities_builder = capabilities_builder.inheritable(inheritable.clone());
capabilities_builder.build()?
} else {
let mut c = capabilities_builder.build()?;
c.set_inheritable(None);
c.set_ambient(None);
c
};
return Ok(c);
}
let mut c = LinuxCapabilitiesBuilder::default()
.bounding(caps.clone())
.effective(caps.clone())
.permitted(caps.clone())
.build()?;
c.set_inheritable(None);
c.set_ambient(None);
Ok(c)
}
struct SydTenantContainerBuilder {
base: ContainerBuilder,
env: HashMap<String, String>,
cwd: Option<PathBuf>,
args: Vec<String>,
no_new_privs: Option<bool>,
capabilities: Vec<String>,
process: Option<PathBuf>,
detached: bool,
as_sibling: bool,
additional_gids: Vec<u32>,
user: Option<u32>,
group: Option<u32>,
ignore_paused: bool,
sub_cgroup: Option<String>,
process_label: Option<String>,
apparmor: Option<String>,
syscall: SyscallType,
container_id: String,
pid_file: Option<PathBuf>,
preserve_fds: i32,
executor: Box<dyn Executor>,
root_path: PathBuf,
console_socket: Option<PathBuf>,
}
impl SydTenantContainerBuilder {
fn new(opt: GlobalOpts, args: Exec) -> SydResult<Self> {
let syscall = SyscallType::default();
let container_id = args.container_id.clone();
let pid_file = if let Some(ref p) = args.pid_file {
Some(p.canonicalize_safely()?)
} else {
None
};
let executor = Box::new(SydExecutor {});
let mut preserve_fds = args.preserve_fds;
if opt.log.is_some() {
preserve_fds += 1; }
#[expect(clippy::disallowed_methods)]
let builder = ContainerBuilder::new(container_id.clone(), syscall)
.with_executor(SydExecutor {})
.with_root_path(opt.root.clone().unwrap())?
.with_console_socket(args.console_socket.as_ref())
.with_pid_file(pid_file.clone())?
.validate_id()?;
#[expect(clippy::disallowed_methods)]
Ok(Self {
base: builder,
env: HashMap::new(),
cwd: None,
args: Vec::new(),
no_new_privs: None,
capabilities: Vec::new(),
process: None,
detached: false,
as_sibling: false,
additional_gids: vec![],
user: None,
group: None,
ignore_paused: false,
sub_cgroup: None,
process_label: None,
apparmor: None,
syscall,
container_id,
pid_file,
preserve_fds,
executor,
root_path: opt.root.unwrap(),
console_socket: args.console_socket,
})
}
pub fn with_env(mut self, env: HashMap<String, String>) -> Self {
self.env = env;
self
}
pub fn with_cwd<P: Into<PathBuf>>(mut self, path: Option<P>) -> Self {
self.cwd = path.map(|p| p.into());
self
}
pub fn with_container_args(mut self, args: Vec<String>) -> Self {
self.args = args;
self
}
pub fn with_no_new_privs(mut self, no_new_privs: bool) -> Self {
self.no_new_privs = Some(no_new_privs);
self
}
pub fn with_capabilities(mut self, capabilities: Vec<String>) -> Self {
self.capabilities = capabilities;
self
}
pub fn with_process<P: Into<PathBuf>>(mut self, path: Option<P>) -> Self {
self.process = path.map(|p| p.into());
self
}
pub fn with_additional_gids(mut self, gids: Vec<u32>) -> Self {
self.additional_gids = gids;
self
}
pub fn with_user(mut self, user: Option<u32>) -> Self {
self.user = user;
self
}
pub fn with_group(mut self, group: Option<u32>) -> Self {
self.group = group;
self
}
pub fn with_ignore_paused(mut self, ignore_paused: bool) -> Self {
self.ignore_paused = ignore_paused;
self
}
pub fn with_sub_cgroup(mut self, sub_cgroup: Option<String>) -> Self {
self.sub_cgroup = sub_cgroup;
self
}
#[expect(dead_code)]
pub fn with_process_label(mut self, process_label: Option<String>) -> Self {
self.process_label = process_label;
self
}
pub fn with_apparmor(mut self, apparmor: Option<String>) -> Self {
self.apparmor = apparmor;
self
}
pub fn with_detach(mut self, detached: bool) -> Self {
self.detached = detached;
self
}
pub fn build(self) -> Result<Pid, LibcontainerError> {
let container_dir = self.lookup_container_dir()?;
let container = self.load_container_state(container_dir.clone())?;
let mut spec = self.load_init_spec(&container)?;
self.adapt_spec_for_tenant(&mut spec, &container)?;
self.check_terminal(&spec)?;
syd::t!("{spec:?}");
let notify_path = Self::setup_notify_listener(&container_dir)?;
let rootfs = fs::canonicalize(spec.root().as_ref().ok_or(MissingSpecError::Root)?.path())
.map_err(LibcontainerError::OtherIO)?;
let csocketfd = self.setup_tty_socket(&container_dir)?;
let use_systemd = self.should_use_systemd(&container);
let user_ns_config = UserNamespaceConfig::new(&spec)?;
let (read_end, write_end) = pipe2(OFlag::O_CLOEXEC)
.map_err(|e| LibcontainerError::OtherIO(std::io::Error::from_raw_os_error(e as i32)))?;
let mut builder_impl = SydContainerBuilderImpl {
container_type: ContainerType::SydTenantContainer {
exec_notify_fd: write_end.as_raw_fd(),
},
syscall: self.syscall,
container_id: self.container_id,
pid_file: self.pid_file,
console_socket: csocketfd,
use_systemd,
spec: Rc::new(spec),
rootfs,
user_ns_config,
notify_path: notify_path.clone(),
container: None,
preserve_fds: self.preserve_fds,
detached: self.detached,
executor: self.executor,
no_pivot: false,
stdin: self.base.stdin,
stdout: self.base.stdout,
stderr: self.base.stderr,
as_sibling: self.as_sibling,
sub_cgroup_path: self.sub_cgroup,
process_label: self.process_label,
};
let pid = builder_impl.create()?;
let mut notify_socket = NotifySocket::new(notify_path);
notify_socket.notify_container_start()?;
drop(write_end);
let mut err_str_buf = Vec::new();
loop {
let mut buf = [0; 3];
#[expect(clippy::disallowed_methods)]
match read(&read_end, &mut buf).map_err(|e| {
LibcontainerError::OtherIO(std::io::Error::from_raw_os_error(e as i32))
})? {
0 => {
if err_str_buf.is_empty() {
return Ok(pid);
} else {
return Err(LibcontainerError::Other(
String::from_utf8_lossy(&err_str_buf).to_string(),
));
}
}
_ => {
err_str_buf.extend(buf);
}
}
}
}
fn lookup_container_dir(&self) -> Result<PathBuf, LibcontainerError> {
let container_dir = self.root_path.join(&self.container_id);
if !XPath::new(&container_dir).exists(true) {
syd::t!("container dir does not exist");
return Err(LibcontainerError::NoDirectory);
}
Ok(container_dir)
}
fn load_init_spec(&self, container: &Container) -> Result<Spec, LibcontainerError> {
let spec_path = container.bundle().join("config.json");
let mut spec = syd_spec_load(spec_path)?;
Self::validate_spec(&spec)?;
spec.canonicalize_rootfs(container.bundle())?;
Ok(spec)
}
fn validate_spec(spec: &Spec) -> Result<(), LibcontainerError> {
let version = spec.version();
if !version.starts_with("1.") {
syd::t!(
"runtime spec has incompatible version '{}'. Only 1.X.Y is supported",
spec.version()
);
Err(ErrInvalidSpec::UnsupportedVersion)?;
}
if let Some(process) = spec.process() {
if let Some(io_priority) = process.io_priority() {
let priority = io_priority.priority();
let iop_class_res = serde_json::to_string(&io_priority.class());
match iop_class_res {
Ok(_iop_class) => {
if !(0..=7).contains(&priority) {
syd::t!("io priority '{}' not between 0 and 7 (inclusive), class '{}' not in (IO_PRIO_CLASS_RT,IO_PRIO_CLASS_BE,IO_PRIO_CLASS_IDLE)",
priority, _iop_class);
Err(ErrInvalidSpec::IoPriority)?;
}
}
Err(_e) => {
syd::t!("failed to parse io priority class: {_e}");
Err(ErrInvalidSpec::IoPriority)?;
}
}
}
if let Some(sc) = process.scheduler() {
let policy = sc.policy();
if let Some(nice) = sc.nice() {
if (*policy == LinuxSchedulerPolicy::SchedBatch
|| *policy == LinuxSchedulerPolicy::SchedOther)
&& (*nice < -20 || *nice > 19)
{
syd::t!("invalid scheduler.nice: '{nice}', must be within -20 to 19");
Err(ErrInvalidSpec::Scheduler)?;
}
}
if let Some(priority) = sc.priority() {
if *priority != 0
&& (*policy != LinuxSchedulerPolicy::SchedFifo
&& *policy != LinuxSchedulerPolicy::SchedRr)
{
syd::t!("scheduler.priority can only be specified for SchedFIFO or SchedRR policy");
Err(ErrInvalidSpec::Scheduler)?;
}
}
if *policy != LinuxSchedulerPolicy::SchedDeadline {
if let Some(runtime) = sc.runtime() {
if *runtime != 0 {
syd::t!(
"scheduler runtime can only be specified for SchedDeadline policy"
);
Err(ErrInvalidSpec::Scheduler)?;
}
}
if let Some(deadline) = sc.deadline() {
if *deadline != 0 {
syd::t!(
"scheduler deadline can only be specified for SchedDeadline policy"
);
Err(ErrInvalidSpec::Scheduler)?;
}
}
if let Some(period) = sc.period() {
if *period != 0 {
syd::t!(
"scheduler period can only be specified for SchedDeadline policy"
);
Err(ErrInvalidSpec::Scheduler)?;
}
}
}
}
}
if let Some(mounts) = spec.mounts() {
utils::validate_mount_options(mounts)?;
}
let syscall = create_syscall();
utils::validate_spec_for_new_user_ns(spec, &*syscall)?;
utils::validate_spec_for_net_devices(spec, &*syscall)
.map_err(LibcontainerError::NetDevicesError)?;
Ok(())
}
fn check_terminal(&self, spec: &Spec) -> Result<(), LibcontainerError> {
let terminal = spec
.process()
.as_ref()
.and_then(|p| p.terminal())
.unwrap_or(false);
let has_console_socket = self.console_socket.is_some();
let requires_console_socket = self.detached && terminal;
if requires_console_socket && !has_console_socket {
return Err(LibcontainerError::Other(
"terminal: true requires a console socket when detached".into(),
));
}
if !requires_console_socket && has_console_socket {
return Err(LibcontainerError::Other(
"console socket provided but not needed (terminal is false or not detached)".into(),
));
}
Ok(())
}
fn load_container_state(&self, container_dir: PathBuf) -> Result<Container, LibcontainerError> {
let container = Container::load(container_dir)?;
match container.status() {
ContainerStatus::Running => Ok(container),
ContainerStatus::Paused if self.ignore_paused => Ok(container),
_ => {
syd::t!("cannot exec: invalid container state");
Err(LibcontainerError::IncorrectStatus(container.status()))
}
}
}
fn adapt_spec_for_tenant(
&self,
spec: &mut Spec,
container: &Container,
) -> Result<(), LibcontainerError> {
let process = if let Some(process) = &self.process {
self.get_process(process)?
} else {
let spec_env = spec
.process()
.as_ref()
.and_then(|p| p.env().as_ref().cloned())
.unwrap_or_default();
let mut process_builder = ProcessBuilder::default()
.args(self.get_args()?)
.env(self.get_environment(spec_env));
if let Some(cwd) = self.get_working_dir()? {
process_builder = process_builder.cwd(cwd);
}
if let Some(process) = spec.process() {
if let Some(cpu_affinity) = process.exec_cpu_affinity() {
process_builder = process_builder.exec_cpu_affinity(cpu_affinity.clone());
}
}
if let Some(no_new_priv) = self.get_no_new_privileges(spec) {
process_builder = process_builder.no_new_privileges(no_new_priv);
}
if let Some(ref apparmor) = self.apparmor {
process_builder = process_builder.apparmor_profile(apparmor)
}
let capabilities = get_capabilities(&self.capabilities, spec)?;
process_builder = process_builder.capabilities(capabilities);
let mut user_builder = UserBuilder::default();
if !self.additional_gids.is_empty() {
user_builder = user_builder.additional_gids(self.additional_gids.clone());
}
if let Some(uid) = self.user {
user_builder = user_builder.uid(uid);
}
if let Some(gid) = self.group {
user_builder = user_builder.gid(gid);
}
process_builder = process_builder.user(user_builder.build()?);
process_builder.build()?
};
let container_pid = container.pid().ok_or(LibcontainerError::Other(
"could not retrieve container init pid".into(),
))?;
let init_process = procfs::process::Process::new(container_pid.as_raw()).map_err(|_| {
LibcontainerError::OtherIO(std::io::Error::from_raw_os_error(libc::ESRCH))
})?;
let ns = self.get_namespaces(
init_process
.namespaces()
.map_err(|_| {
LibcontainerError::OtherIO(std::io::Error::from_raw_os_error(libc::ESRCH))
})?
.0,
)?;
#[expect(clippy::disallowed_methods)]
let spec_linux = spec.linux().as_ref().unwrap();
let mut linux_builder = LinuxBuilder::default().namespaces(ns);
if let Some(ref cgroup_path) = spec_linux.cgroups_path() {
linux_builder = linux_builder.cgroups_path(cgroup_path.clone());
}
if let Some(personality) = spec_linux.personality() {
linux_builder = linux_builder.personality(personality.clone());
}
let linux = linux_builder.build()?;
spec.set_process(Some(process)).set_linux(Some(linux));
Ok(())
}
fn get_process(&self, process: &Path) -> Result<Process, LibcontainerError> {
if !XPath::new(process).exists(true) {
syd::t!("process.json file does not exist");
return Err(LibcontainerError::Other(
"process.json file does not exist".into(),
));
}
let process = utils::open(process).map_err(LibcontainerError::OtherIO)?;
let reader = BufReader::new(process);
let process_spec =
serde_json::from_reader(reader).map_err(LibcontainerError::OtherSerialization)?;
Ok(process_spec)
}
fn get_working_dir(&self) -> Result<Option<PathBuf>, LibcontainerError> {
if let Some(cwd) = &self.cwd {
if cwd.is_relative() {
syd::t!("current working directory must be an absolute path");
return Err(LibcontainerError::Other(
"current working directory must be an absolute path".into(),
));
}
return Ok(Some(cwd.into()));
}
Ok(None)
}
fn get_args(&self) -> Result<Vec<String>, LibcontainerError> {
if self.args.is_empty() {
Err(MissingSpecError::Args)?;
}
Ok(self.args.clone())
}
fn get_environment(&self, spec_env: Vec<String>) -> Vec<String> {
let mut env: Vec<String> = spec_env
.into_iter()
.filter(|entry| {
let key = entry.split('=').next().unwrap_or("");
!self.env.contains_key(key)
})
.collect();
for (k, v) in &self.env {
env.push(format!("{k}={v}"));
}
env
}
fn get_no_new_privileges(&self, spec: &Spec) -> Option<bool> {
self.no_new_privs
.filter(|&is_set| is_set)
.or_else(|| spec.process().as_ref().and_then(|p| p.no_new_privileges()))
}
fn get_namespaces(
&self,
init_namespaces: HashMap<OsString, Namespace>,
) -> Result<Vec<LinuxNamespace>, LibcontainerError> {
let mut tenant_namespaces = Vec::with_capacity(init_namespaces.len());
for &ns_type in NAMESPACE_TYPES {
if let Some(init_ns) = init_namespaces.get(OsStr::new(ns_type)) {
let tenant_ns = LinuxNamespaceType::try_from(ns_type)?;
tenant_namespaces.push(
LinuxNamespaceBuilder::default()
.typ(tenant_ns)
.path(init_ns.path.clone())
.build()?,
)
}
}
Ok(tenant_namespaces)
}
fn should_use_systemd(&self, container: &Container) -> bool {
container.systemd()
}
fn setup_notify_listener(container_dir: &Path) -> Result<PathBuf, LibcontainerError> {
let notify_name = Self::generate_name(container_dir, TENANT_NOTIFY);
let socket_path = container_dir.join(notify_name);
Ok(socket_path)
}
fn setup_tty_socket(&self, container_dir: &Path) -> Result<Option<OwnedFd>, LibcontainerError> {
let tty_name = Self::generate_name(container_dir, TENANT_TTY);
let csocketfd = if let Some(console_socket) = &self.console_socket {
Some(tty::setup_console_socket(
container_dir,
console_socket,
&tty_name,
)?)
} else {
None
};
Ok(csocketfd)
}
fn generate_name(dir: &Path, prefix: &str) -> String {
loop {
let mut rand_buf = [0u8; 4];
if unsafe { libc::getrandom(rand_buf.as_mut_ptr().cast(), rand_buf.len(), 0) } < 0 {
panic!("getrandom: {}", Errno::last());
}
let rand = i32::from_be_bytes(rand_buf);
let name = format!("{prefix}{rand:x}.sock");
if !XPath::new(&dir.join(&name)).exists(true) {
return name;
}
}
}
}
struct SydInitContainerBuilder {
base: ContainerBuilder,
bundle: PathBuf,
use_systemd: bool,
detached: bool,
no_pivot: bool,
as_sibling: bool,
console_socket: Option<PathBuf>,
syscall: SyscallType,
container_id: String,
pid_file: Option<PathBuf>,
preserve_fds: i32,
executor: Box<dyn Executor>,
root_path: PathBuf,
}
impl TryFrom<(GlobalOpts, Create)> for SydInitContainerBuilder {
type Error = SydError;
fn try_from(options: (GlobalOpts, Create)) -> SydResult<Self> {
let (opt, args) = options;
let syscall = SyscallType::default();
let container_id = args.container_id.clone();
let pid_file = if let Some(ref p) = args.pid_file {
Some(p.canonicalize_safely()?)
} else {
None
};
let executor = Box::new(SydExecutor {});
let mut preserve_fds = args.preserve_fds;
if opt.log.is_some() {
preserve_fds += 1; }
#[expect(clippy::disallowed_methods)]
let builder = ContainerBuilder::new(container_id.clone(), syscall)
.with_executor(SydExecutor {})
.with_pid_file(pid_file.clone())?
.with_console_socket(args.console_socket.as_ref())
.with_root_path(opt.root.clone().unwrap())?
.with_preserved_fds(preserve_fds)
.validate_id()?;
#[expect(clippy::disallowed_methods)]
Ok(Self {
base: builder,
bundle: args.bundle,
use_systemd: opt.systemd_cgroup,
detached: true,
no_pivot: false,
as_sibling: false,
container_id,
executor,
pid_file,
syscall,
console_socket: args.console_socket,
preserve_fds,
root_path: opt.root.unwrap(),
})
}
}
impl TryFrom<(GlobalOpts, Run)> for SydInitContainerBuilder {
type Error = SydError;
fn try_from(options: (GlobalOpts, Run)) -> SydResult<Self> {
let (opt, args) = options;
let syscall = SyscallType::default();
let container_id = args.container_id.clone();
let pid_file = if let Some(ref p) = args.pid_file {
Some(p.canonicalize_safely()?)
} else {
None
};
let executor = Box::new(SydExecutor {});
let mut preserve_fds = args.preserve_fds;
if opt.log.is_some() {
preserve_fds += 1; }
#[expect(clippy::disallowed_methods)]
let builder = ContainerBuilder::new(container_id.clone(), syscall)
.with_executor(SydExecutor {})
.with_pid_file(pid_file.clone())?
.with_console_socket(args.console_socket.as_ref())
.with_root_path(opt.root.clone().unwrap())?
.with_preserved_fds(preserve_fds)
.validate_id()?;
#[expect(clippy::disallowed_methods)]
Ok(Self {
base: builder,
bundle: args.bundle,
use_systemd: opt.systemd_cgroup,
detached: true,
no_pivot: false,
as_sibling: false,
container_id,
executor,
pid_file,
syscall,
console_socket: args.console_socket,
preserve_fds,
root_path: opt.root.unwrap(),
})
}
}
impl SydInitContainerBuilder {
pub fn with_systemd(mut self, should_use: bool) -> Self {
self.use_systemd = should_use;
self
}
#[expect(dead_code)]
#[expect(clippy::wrong_self_convention)]
pub fn as_sibling(mut self, as_sibling: bool) -> Self {
self.as_sibling = as_sibling;
self
}
pub fn with_no_pivot(mut self, no_pivot: bool) -> Self {
self.no_pivot = no_pivot;
self
}
pub fn with_detach(mut self, detached: bool) -> Self {
self.detached = detached;
self
}
#[expect(dead_code)]
pub fn with_bundle<P: Into<PathBuf>>(mut self, bundle: P) -> Self {
self.bundle = bundle.into();
self
}
pub fn build(self) -> Result<Container, LibcontainerError> {
let spec = self.load_spec()?;
self.check_terminal(&spec)?;
let container_dir = self.create_container_dir()?;
let mut container = self.create_container_state(&container_dir)?;
container
.set_systemd(self.use_systemd)
.set_annotations(spec.annotations().clone());
let notify_path = container_dir.join(NOTIFY_FILE);
let rootfs = fs::canonicalize(spec.root().as_ref().ok_or(MissingSpecError::Root)?.path())
.map_err(LibcontainerError::OtherIO)?;
let csocketfd = if let Some(console_socket) = &self.console_socket {
Some(tty::setup_console_socket(
&container_dir,
console_socket,
"console-socket",
)?)
} else {
None
};
syd::t!("parsing user namespace config");
let user_ns_config = UserNamespaceConfig::new(&spec)?;
syd::t!("parsing youki config");
let config = YoukiConfig::from_spec(&spec, container.id())?;
config.save(&container_dir).map_err(|err| {
syd::t!("failed to save config: {err}");
err
})?;
let mut builder_impl = SydContainerBuilderImpl {
container_type: ContainerType::SydInitContainer,
syscall: self.syscall,
container_id: self.container_id,
pid_file: self.pid_file,
console_socket: csocketfd,
use_systemd: self.use_systemd,
spec: Rc::new(spec),
rootfs,
user_ns_config,
notify_path,
container: Some(container.clone()),
preserve_fds: self.preserve_fds,
detached: self.detached,
executor: self.executor,
no_pivot: self.no_pivot,
stdin: self.base.stdin,
stdout: self.base.stdout,
stderr: self.base.stderr,
as_sibling: self.as_sibling,
sub_cgroup_path: None,
process_label: None,
};
builder_impl.create()?;
container.refresh_state()?;
Ok(container)
}
fn create_container_dir(&self) -> Result<PathBuf, LibcontainerError> {
let container_dir = self.root_path.join(&self.container_id);
syd::t!("container directory will be {container_dir:?}");
if XPath::new(&container_dir).exists(false) {
syd::t!("container already exists");
return Err(LibcontainerError::Exist);
}
#[expect(clippy::disallowed_methods)]
std::fs::create_dir_all(&container_dir).map_err(|err| {
syd::t!("failed to create container directory: {err}");
LibcontainerError::OtherIO(err)
})?;
Ok(container_dir)
}
fn load_spec(&self) -> Result<Spec, LibcontainerError> {
let source_spec_path = self.bundle.join("config.json");
let mut spec = syd_spec_load(source_spec_path).inspect_err(|err| {
syd::t!("failed to load OCI spec: {err}");
})?;
Self::validate_spec(&spec).inspect_err(|err| {
syd::t!("failed to validate OCI spec: {err}");
})?;
spec.canonicalize_rootfs(&self.bundle).inspect_err(|err| {
syd::t!("failed to canonicalize rootfs: {err}");
})?;
let dot_oci = if let Some(root) = spec.root() {
XPathBuf::from(root.path().clone()).join(b".oci.syd-3")
} else {
return Err(ErrInvalidSpec::UnsupportedVersion)?;
};
let is_rootless = rootless_required(&*create_syscall())
.map_err(LibcontainerError::OtherIO)
.inspect_err(|err| {
syd::t!("failed to determine rootless required: {err}");
})?;
let syd_dir = if env::var_os(syd::config::ENV_OCI_NO_CONFIG).is_some() {
None
} else if !is_rootless {
Some(XPathBuf::from("/etc/syd/oci"))
} else if let Some(path) = env::var_os("XDG_CONFIG_HOME") {
Some(XPathBuf::from(path).join(b"syd").join(b"oci"))
} else {
env::var_os("HOME").map(|path| XPathBuf::from(path).join(b".syd").join(b"oci"))
};
if let Some(syd_dir) = syd_dir {
let mut sources = vec![];
match (spec.hostname(), spec.domainname()) {
(Some(hostname), Some(domainname)) => {
let hname = XPathBuf::from(hostname.clone());
let dname = XPathBuf::from(domainname.clone());
hname.check_name().map_err(err2io).inspect_err(|err| {
syd::t!("detected unsafe hostname in OCI spec: {err}");
})?;
dname.check_name().map_err(err2io).inspect_err(|err| {
syd::t!("detected unsafe domainname in OCI spec: {err}");
})?;
let mut name = dname.clone();
name.append_bytes(b".syd-3");
sources.push(syd_dir.join(name.as_bytes()));
let mut name = hname.clone();
name.append_byte(b'.');
name.append_bytes(dname.as_bytes());
name.append_bytes(b".syd-3");
sources.push(syd_dir.join(name.as_bytes()));
let mut name = hname.clone();
name.append_bytes(b".syd-3");
sources.push(syd_dir.join(name.as_bytes()));
}
(None, Some(domainname)) => {
let dname = XPathBuf::from(domainname.clone());
dname.check_name().map_err(err2io).inspect_err(|err| {
syd::t!("detected unsafe domainname in OCI spec: {err}");
})?;
let mut name = dname.clone();
name.append_bytes(b".syd-3");
sources.push(syd_dir.join(name.as_bytes()));
}
(Some(hostname), None) => {
let hname = XPathBuf::from(hostname.clone());
hname.check_name().map_err(err2io).inspect_err(|err| {
syd::t!("detected unsafe hostname in OCI spec: {err}");
})?;
let mut name = hname.clone();
name.append_bytes(b".syd-3");
sources.push(syd_dir.join(name.as_bytes()));
}
_ => {}
};
sources.push(syd_dir.join(b"default.syd-3"));
let mut config_fd = None;
for path in sources {
match safe_open_file2(AT_BADFD, &path) {
Ok((fd, _)) => {
config_fd = Some(fd);
break;
}
Err(Errno::ENOENT) => {}
Err(errno) => {
syd::t!("error opening Syd configuration file `{path}' for read: {errno}");
return Err(err2io(errno));
}
};
}
if let Some(mut config_fd) = config_fd {
let how = safe_open_how(OFlag::O_WRONLY | OFlag::O_CREAT | OFlag::O_EXCL, ResolveFlag::empty())
.resolve(ResolveFlag::RESOLVE_NO_MAGICLINKS | ResolveFlag::RESOLVE_NO_SYMLINKS);
#[expect(clippy::disallowed_methods)]
let mut oci_fd = retry_on_eintr(|| openat2(AT_BADFD, &dot_oci, how))
.map_err(err2io)
.inspect_err(|err| {
syd::t!(
"error opening Syd configuration file `{dot_oci}' for write: {err}"
);
})?;
syd::io::copy(&mut config_fd, &mut oci_fd)
.map_err(err2io)
.inspect_err(|err| {
syd::t!("error writing Syd configuration file `{dot_oci}': {err}");
})?;
fchmod(oci_fd, Mode::from_bits_retain(0o444))
.map_err(err2io)
.inspect_err(|err| {
syd::t!("error changing mode of Syd configuration file `{dot_oci}': {err}");
})?;
}
}
Ok(spec)
}
fn validate_spec(spec: &Spec) -> Result<(), LibcontainerError> {
let version = spec.version();
if !version.starts_with("1.") {
syd::t!(
"runtime spec has incompatible version '{}'. Only 1.X.Y is supported",
spec.version()
);
Err(ErrInvalidSpec::UnsupportedVersion)?;
}
if let Some(process) = spec.process() {
if let Some(_profile) = process.apparmor_profile() {
let apparmor_is_enabled = apparmor::is_enabled().map_err(|err| {
syd::t!("failed to check if apparmor is enabled");
LibcontainerError::OtherIO(err)
})?;
if !apparmor_is_enabled {
syd::t!("apparmor profile exists in spec, but apparmor is not activated on this system");
Err(ErrInvalidSpec::AppArmorNotEnabled)?;
}
}
if let Some(io_priority) = process.io_priority() {
let priority = io_priority.priority();
let iop_class_res = serde_json::to_string(&io_priority.class());
match iop_class_res {
Ok(_iop_class) => {
if !(0..=7).contains(&priority) {
syd::t!("io priority '{}' not between 0 and 7 (inclusive), class '{}' not in (IO_PRIO_CLASS_RT,IO_PRIO_CLASS_BE,IO_PRIO_CLASS_IDLE)",
priority, _iop_class);
Err(ErrInvalidSpec::IoPriority)?;
}
}
Err(_e) => {
syd::t!("failed to parse io priority class: {_e}");
Err(ErrInvalidSpec::IoPriority)?;
}
}
}
if let Some(sc) = process.scheduler() {
let policy = sc.policy();
if let Some(nice) = sc.nice() {
if (*policy == LinuxSchedulerPolicy::SchedBatch
|| *policy == LinuxSchedulerPolicy::SchedOther)
&& (*nice < -20 || *nice > 19)
{
syd::t!("invalid scheduler.nice: '{nice}', must be within -20 to 19");
Err(ErrInvalidSpec::Scheduler)?;
}
}
if let Some(priority) = sc.priority() {
if *priority != 0
&& (*policy != LinuxSchedulerPolicy::SchedFifo
&& *policy != LinuxSchedulerPolicy::SchedRr)
{
syd::t!("scheduler.priority can only be specified for SchedFIFO or SchedRR policy");
Err(ErrInvalidSpec::Scheduler)?;
}
}
if *policy != LinuxSchedulerPolicy::SchedDeadline {
if let Some(runtime) = sc.runtime() {
if *runtime != 0 {
syd::t!(
"scheduler runtime can only be specified for SchedDeadline policy"
);
Err(ErrInvalidSpec::Scheduler)?;
}
}
if let Some(deadline) = sc.deadline() {
if *deadline != 0 {
syd::t!(
"scheduler deadline can only be specified for SchedDeadline policy"
);
Err(ErrInvalidSpec::Scheduler)?;
}
}
if let Some(period) = sc.period() {
if *period != 0 {
syd::t!(
"scheduler period can only be specified for SchedDeadline policy"
);
Err(ErrInvalidSpec::Scheduler)?;
}
}
}
}
}
if let Some(mounts) = spec.mounts() {
utils::validate_mount_options(mounts)?;
}
let syscall = create_syscall();
utils::validate_spec_for_new_user_ns(spec, &*syscall)?;
utils::validate_spec_for_net_devices(spec, &*syscall)
.map_err(LibcontainerError::NetDevicesError)?;
Ok(())
}
fn check_terminal(&self, spec: &Spec) -> Result<(), LibcontainerError> {
let terminal = spec
.process()
.as_ref()
.and_then(|p| p.terminal())
.unwrap_or(false);
let has_console_socket = self.console_socket.is_some();
let requires_console_socket = self.detached && terminal;
if requires_console_socket && !has_console_socket {
return Err(LibcontainerError::Other(
"terminal: true requires a console socket when detached".into(),
));
}
if !requires_console_socket && has_console_socket {
return Err(LibcontainerError::Other(
"console socket provided but not needed (terminal is false or not detached)".into(),
));
}
Ok(())
}
fn create_container_state(&self, container_dir: &Path) -> Result<Container, LibcontainerError> {
let container = Container::new(
&self.container_id,
ContainerStatus::Creating,
None,
&self.bundle,
container_dir,
)?;
container.save()?;
Ok(container)
}
}
#[derive(Debug, Copy, Clone)]
enum ContainerType {
SydInitContainer,
SydTenantContainer { exec_notify_fd: RawFd },
}
struct SydContainerBuilderImpl {
pub container_type: ContainerType,
pub syscall: SyscallType,
pub use_systemd: bool,
pub container_id: String,
pub spec: Rc<Spec>,
pub rootfs: PathBuf,
pub pid_file: Option<PathBuf>,
pub console_socket: Option<OwnedFd>,
pub user_ns_config: Option<UserNamespaceConfig>,
pub notify_path: PathBuf,
pub container: Option<Container>,
pub preserve_fds: i32,
pub detached: bool,
pub executor: Box<dyn Executor>,
pub no_pivot: bool,
pub stdin: Option<OwnedFd>,
pub stdout: Option<OwnedFd>,
pub stderr: Option<OwnedFd>,
pub as_sibling: bool,
pub sub_cgroup_path: Option<String>,
#[expect(dead_code)]
pub process_label: Option<String>,
}
impl SydContainerBuilderImpl {
fn create(&mut self) -> Result<Pid, LibcontainerError> {
match self.run_container() {
Ok(pid) => Ok(pid),
Err(outer) => {
let cleanup_err = if self.is_init_container() {
self.cleanup_container().err()
} else {
None
};
if let Some(cleanup_err) = cleanup_err {
Err(LibcontainerError::Other(format!(
"{outer}; cleanup error: {cleanup_err}"
)))
} else {
Err(outer)
}
}
}
}
fn is_init_container(&self) -> bool {
matches!(self.container_type, ContainerType::SydInitContainer)
}
fn run_container(&mut self) -> Result<Pid, LibcontainerError> {
let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?;
let base_cgroups_path = get_cgroup_path(linux.cgroups_path(), &self.container_id);
let mut final_cgroups_path = base_cgroups_path;
if let Some(sub_cgroup_path) = &self.sub_cgroup_path {
if sub_cgroup_path != "/" {
let potential_path = final_cgroups_path.join(sub_cgroup_path);
let normalized = potential_path.normalize();
if !normalized.starts_with(&final_cgroups_path) {
return Err(LibcontainerError::OtherCgroup(format!(
"{} is not a sub cgroup path",
sub_cgroup_path
)));
}
final_cgroups_path = normalized;
}
}
let cgroup_config = libcgroups::common::CgroupConfig {
cgroup_path: final_cgroups_path,
systemd_cgroup: self.use_systemd || self.user_ns_config.is_some(),
container_name: self.container_id.to_owned(),
};
let process = self
.spec
.process()
.as_ref()
.ok_or(MissingSpecError::Process)?;
let notify_listener = NotifyListener::new(&self.notify_path)?;
#[expect(clippy::disallowed_methods)]
if let Some(oom_score_adj) = process.oom_score_adj() {
syd::t!("Set OOM score to {oom_score_adj}");
let mut f = fs::File::create("/proc/self/oom_score_adj").map_err(|err| {
syd::t!("failed to open /proc/self/oom_score_adj: {err}");
LibcontainerError::OtherIO(err)
})?;
f.write_all(oom_score_adj.to_string().as_bytes())
.map_err(|err| {
syd::t!("failed to write to /proc/self/oom_score_adj: {err}");
LibcontainerError::OtherIO(err)
})?;
}
let container_args = ContainerArgs {
container_type: match self.container_type {
ContainerType::SydInitContainer => process::args::ContainerType::InitContainer,
ContainerType::SydTenantContainer { exec_notify_fd } => {
process::args::ContainerType::TenantContainer { exec_notify_fd }
}
},
syscall: self.syscall,
spec: Rc::clone(&self.spec),
rootfs: self.rootfs.to_owned(),
console_socket: self.console_socket.as_ref().map(|c| c.as_raw_fd()),
notify_listener,
preserve_fds: self.preserve_fds,
container: self.container.to_owned(),
user_ns_config: self.user_ns_config.to_owned(),
cgroup_config,
detached: self.detached,
executor: self.executor.clone(),
no_pivot: self.no_pivot,
stdin: self.stdin.as_ref().map(|x| x.as_raw_fd()),
stdout: self.stdout.as_ref().map(|x| x.as_raw_fd()),
stderr: self.stderr.as_ref().map(|x| x.as_raw_fd()),
as_sibling: self.as_sibling,
pid_file: self.pid_file.to_owned(),
};
let (init_pid, need_to_clean_up_intel_rdt_dir) =
process::container_main_process::container_main_process(&container_args).map_err(
|err| {
syd::t!("failed to run container process: {err}");
LibcontainerError::MainProcess(err)
},
)?;
if let Some(container) = &mut self.container {
container
.set_status(ContainerStatus::Created)
.set_creator(nix::unistd::geteuid().as_raw())
.set_pid(init_pid.as_raw())
.set_clean_up_intel_rdt_directory(need_to_clean_up_intel_rdt_dir)
.save()?;
}
Ok(Pid::from_raw(init_pid.as_raw()))
}
fn cleanup_container(&self) -> Result<(), LibcontainerError> {
let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?;
let cgroups_path = get_cgroup_path(linux.cgroups_path(), &self.container_id);
let cmanager =
libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig {
cgroup_path: cgroups_path,
systemd_cgroup: self.use_systemd || self.user_ns_config.is_some(),
container_name: self.container_id.to_string(),
})?;
let mut errors = Vec::new();
if let Err(e) = cmanager.remove() {
syd::t!("failed to remove cgroup manager: {e}");
errors.push(e.to_string());
}
if let Some(container) = &self.container {
if let Some(true) = container.clean_up_intel_rdt_subdirectory() {
if let Err(e) = delete_resctrl_subdirectory(container.id()) {
syd::t!("failed to delete resctrl subdirectory: {e}");
errors.push(e.to_string());
}
}
if XPath::new(&container.root).exists(true) {
if let Err(e) = fs::remove_dir_all(&container.root) {
syd::t!("failed to delete container root: {e}");
errors.push(e.to_string());
}
}
}
if !errors.is_empty() {
return Err(LibcontainerError::Other(format!(
"failed to cleanup container: {}",
errors.join(";")
)));
}
Ok(())
}
}
#[macro_export]
macro_rules! syd_oci_version {
() => {
concat!(
"version ",
env!("CARGO_PKG_VERSION"),
"\ncommit: ",
env!("SYD_GIT_COMMIT"),
)
};
}
#[derive(Parser, Debug)]
enum SubCommand {
#[clap(flatten)]
Standard(Box<StandardCmd>),
#[clap(flatten)]
Common(Box<CommonCmd>),
}
#[derive(Parser, Debug)]
#[clap(
name = "syd-oci",
version = syd_oci_version!(),
about = "Syd's OCI container runtime",
author = "Ali Polatel <alip@chesswob.org>",
)]
struct Opts {
#[clap(flatten)]
global: GlobalOpts,
#[clap(subcommand)]
subcmd: SubCommand,
}
syd::main! {
if env::var_os(syd::config::ENV_QUICK_BOOT).is_none() {
syd::seal::ensure_sealed()?;
} else {
match env::var_os("RUST_BACKTRACE") {
Some(val) => env::set_var("SYD_RUST_BACKTRACE", val),
None => env::remove_var("SYD_RUST_BACKTRACE"),
};
if secure_getenv(ENV_SKIP_SCMP).is_none() {
env::set_var("RUST_BACKTRACE", "0");
}
}
env::remove_var(syd::config::ENV_DUMP_SCMP);
let mut opts = Opts::parse();
let (level, trace_level) = if opts.global.debug {
(LogLevel::Debug, tracing::Level::DEBUG)
} else {
(LogLevel::Info, tracing::Level::INFO)
};
log_init(level, None)?;
let log_level_filter = tracing_subscriber::filter::LevelFilter::from(trace_level);
let format_layer = tracing_subscriber::fmt::layer()
.with_writer(std::io::sink) .with_span_events(tracing_subscriber::fmt::format::FmtSpan::NONE);
let subscriber = tracing_subscriber::registry()
.with(format_layer)
.with(log_level_filter)
.with(SydLayer);
tracing::subscriber::set_global_default(subscriber)?;
make_root(&mut opts.global)?;
match opts.subcmd {
SubCommand::Standard(cmd) => match *cmd {
StandardCmd::Create(subopts) => cmd_create(opts.global, subopts),
StandardCmd::Start(subopts) => cmd_start(opts.global, subopts),
StandardCmd::State(subopts) => cmd_state(opts.global, subopts),
StandardCmd::Kill(subopts) => cmd_kill(opts.global, subopts),
StandardCmd::Delete(subopts) => cmd_delete(opts.global, subopts),
},
SubCommand::Common(cmd) => match *cmd {
CommonCmd::Features(subopts) => cmd_features(opts.global, subopts),
CommonCmd::Ps(subopts) => cmd_ps(opts.global, subopts),
CommonCmd::List(subopts) => cmd_list(opts.global, subopts),
CommonCmd::Spec(subopts) => cmd_spec(opts.global, subopts),
CommonCmd::Pause(subopts) => cmd_pause(opts.global, subopts),
CommonCmd::Resume(subopts) => cmd_resume(opts.global, subopts),
CommonCmd::Events(subopts) => cmd_events(opts.global, subopts),
CommonCmd::Update(subopts) => cmd_update(opts.global, subopts),
CommonCmd::Checkpointt(subopts) => cmd_checkpoint(opts.global, subopts),
CommonCmd::Exec(subopts) => cmd_exec(opts.global, subopts),
CommonCmd::Run(subopts) => cmd_run(opts.global, subopts),
},
}
}
fn construct_container_root(root_path: &Path, container_id: &str) -> SydResult<PathBuf> {
let root_path = fs::canonicalize(root_path).map_err(|e| {
syd::t!("failed to canonicalize {root_path:?} for container {container_id}: {e}");
SydError::from(Errno::ENOENT)
})?;
Ok(root_path.join(container_id))
}
fn load_container(root_path: &Path, container_id: &str) -> SydResult<Container> {
let container_root = construct_container_root(root_path, container_id)?;
if !XPath::new(&container_root).exists(true) {
syd::t!("container {container_id} does not exist");
return Err(Errno::ENOENT.into());
}
Ok(Container::load(container_root)?)
}
fn cmd_create(opt: GlobalOpts, args: Create) -> SydResult<ExitCode> {
let systemd_cgroup = opt.systemd_cgroup;
let no_pivot = args.no_pivot;
SydInitContainerBuilder::try_from((opt, args))?
.with_systemd(systemd_cgroup)
.with_detach(true)
.with_no_pivot(no_pivot)
.build()?;
Ok(ExitCode::SUCCESS)
}
fn cmd_start(opt: GlobalOpts, args: Start) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let mut container = load_container(&opt.root.unwrap(), &args.container_id)?;
container.start()?;
Ok(ExitCode::SUCCESS)
}
fn cmd_state(opt: GlobalOpts, args: State) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let container = load_container(&opt.root.unwrap(), &args.container_id)?;
println!("{}", serde_json::to_string_pretty(&container.state)?);
std::process::exit(0);
}
fn cmd_kill(opt: GlobalOpts, args: Kill) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let mut container = load_container(&opt.root.unwrap(), &args.container_id)?;
let signal: Signal = args.signal.as_str().try_into()?;
match container.kill(signal, args.all) {
Ok(_) => Ok(ExitCode::SUCCESS),
Err(e) => {
if container.status() == ContainerStatus::Stopped {
syd::t!("container not running");
Err(SydError::from(Errno::ESRCH))
} else {
syd::t!("failed to kill container: {e}");
Err(e.into())
}
}
}
}
fn cmd_delete(opt: GlobalOpts, args: Delete) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let root_path = opt.root.unwrap();
let container_root = construct_container_root(&root_path, &args.container_id)?;
if !XPath::new(&container_root).exists(false) && args.force {
return Ok(ExitCode::SUCCESS);
}
let mut container = Container::load(container_root)?;
container.delete(args.force)?;
Ok(ExitCode::SUCCESS)
}
#[expect(clippy::disallowed_methods)]
fn cmd_features(_opt: GlobalOpts, _args: Features) -> SydResult<ExitCode> {
let namespaces = match query_supported_namespaces() {
Ok(ns) => ns,
Err(e) => {
eprintln!("Error querying supported namespaces: {e}");
Vec::new()
}
};
let capabilities = match query_caps() {
Ok(caps) => caps,
Err(e) => {
eprintln!("Error querying available capabilities: {e}");
Vec::new()
}
};
let linux = LinuxFeatureBuilder::default()
.namespaces(namespaces)
.capabilities(capabilities)
.cgroup(
CgroupBuilder::default()
.v1(true) .v2(true) .systemd(true) .systemd_user(true) .rdma(false)
.build()
.unwrap(),
)
.apparmor(ApparmorBuilder::default().enabled(true).build().unwrap())
.mount_extensions(
MountExtensionsBuilder::default()
.idmap(IDMapBuilder::default().enabled(false).build().unwrap())
.build()
.unwrap(),
)
.selinux(SelinuxBuilder::default().enabled(false).build().unwrap())
.intel_rdt(IntelRdtBuilder::default().enabled(true).build().unwrap())
.build()
.unwrap();
let features = FeaturesBuilder::default()
.oci_version_max(VERSION)
.oci_version_min(String::from("1.0.0"))
.hooks(known_hooks())
.mount_options(MountOption::known_options())
.linux(linux)
.build()
.unwrap();
let pretty_json_str = serde_json::to_string_pretty(&features)?;
println!("{pretty_json_str}");
Ok(ExitCode::SUCCESS)
}
fn cmd_ps(opt: GlobalOpts, args: Ps) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let container = load_container(&opt.root.unwrap(), &args.container_id)?;
let cmanager = libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig {
cgroup_path: container.spec()?.cgroup_path,
systemd_cgroup: container.systemd(),
container_name: container.id().to_string(),
})?;
let pids: Vec<i32> = cmanager
.get_all_pids()?
.iter()
.map(|pid| pid.as_raw())
.collect();
if args.format == "json" {
println!("{}", serde_json::to_string(&pids)?);
} else if args.format == "table" {
let default_ps_options = vec![String::from("-ef")];
let ps_options = if args.ps_options.is_empty() {
&default_ps_options
} else {
&args.ps_options
};
let output = std::process::Command::new("ps").args(ps_options).output()?;
if !output.status.success() {
println!("{}", std::str::from_utf8(&output.stderr)?);
} else {
let lines = std::str::from_utf8(&output.stdout)?;
let lines: Vec<&str> = lines.split('\n').collect();
let pid_index = get_pid_index(lines[0])?;
println!("{}", lines[0]);
for line in &lines[1..] {
if line.is_empty() {
continue;
}
let fields: Vec<&str> = line.split_whitespace().collect();
let pid: i32 = fields[pid_index].parse()?;
if pids.contains(&pid) {
println!("{line}");
}
}
}
}
Ok(ExitCode::SUCCESS)
}
fn cmd_list(opt: GlobalOpts, _args: List) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let root_path = fs::canonicalize(opt.root.unwrap())?;
let mut content = String::new();
#[expect(clippy::disallowed_methods)]
for container_dir in fs::read_dir(root_path)? {
let container_dir = container_dir?.path();
let state_file = ContainerState::file_path(&container_dir);
if !XPath::new(&state_file).exists(true) {
continue;
}
let container = Container::load(container_dir)?;
let pid = if let Some(pid) = container.pid() {
pid.to_string()
} else {
"".to_owned()
};
let user_name = container.creator().unwrap_or_default();
let created = if let Some(utc) = container.created() {
utc.to_rfc3339()
} else {
"".to_owned()
};
let _ = writeln!(
content,
"{}\t{}\t{}\t{}\t{}\t{}",
container.id(),
pid,
container.status(),
container.bundle().display(),
created,
user_name.to_string_lossy()
);
}
let mut tab_writer = TabWriter::new(std::io::stdout());
writeln!(&mut tab_writer, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tCREATOR")?;
write!(&mut tab_writer, "{content}")?;
tab_writer.flush()?;
Ok(ExitCode::SUCCESS)
}
fn cmd_spec(_opt: GlobalOpts, args: liboci_cli::Spec) -> SydResult<ExitCode> {
let spec = if args.rootless {
get_rootless_spec()?
} else {
Spec::default()
};
#[expect(clippy::disallowed_methods)]
let file = fs::File::create("config.json")?;
let mut writer = BufWriter::new(file);
to_writer_pretty(&mut writer, &spec)?;
writer.flush()?;
Ok(ExitCode::SUCCESS)
}
fn cmd_pause(opt: GlobalOpts, args: Pause) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let mut container = load_container(&opt.root.unwrap(), &args.container_id)?;
container.pause()?;
Ok(ExitCode::SUCCESS)
}
fn cmd_resume(opt: GlobalOpts, args: Resume) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let mut container = load_container(&opt.root.unwrap(), &args.container_id)?;
container.resume()?;
Ok(ExitCode::SUCCESS)
}
fn cmd_events(opt: GlobalOpts, args: Events) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let mut container = load_container(&opt.root.unwrap(), &args.container_id)?;
container.events(args.interval, args.stats)?;
Ok(ExitCode::SUCCESS)
}
fn cmd_update(opt: GlobalOpts, args: Update) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let container = load_container(&opt.root.unwrap(), &args.container_id)?;
let cmanager = libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig {
cgroup_path: container.spec()?.cgroup_path,
systemd_cgroup: container.systemd(),
container_name: container.id().to_string(),
})?;
let linux_res: LinuxResources;
#[expect(clippy::disallowed_methods)]
if let Some(resources_path) = args.resources {
linux_res = if resources_path.to_string_lossy() == "-" {
serde_json::from_reader(std::io::stdin())?
} else {
let file = fs::File::open(resources_path)?;
let reader = BufReader::new(file);
serde_json::from_reader(reader)?
};
} else {
let mut builder = LinuxResourcesBuilder::default();
if let Some(new_pids_limit) = args.pids_limit {
builder = builder.pids(LinuxPidsBuilder::default().limit(new_pids_limit).build()?);
}
linux_res = builder.build()?;
}
cmanager.apply(&ControllerOpt {
resources: &linux_res,
disable_oom_killer: false,
oom_score_adj: None,
freezer_state: None,
})?;
Ok(ExitCode::SUCCESS)
}
fn cmd_checkpoint(opt: GlobalOpts, args: Checkpoint) -> SydResult<ExitCode> {
#[expect(clippy::disallowed_methods)]
let mut container = load_container(&opt.root.unwrap(), &args.container_id)?;
let opts = libcontainer::container::CheckpointOptions {
ext_unix_sk: args.ext_unix_sk,
file_locks: args.file_locks,
image_path: args.image_path,
leave_running: args.leave_running,
shell_job: args.shell_job,
tcp_established: args.tcp_established,
work_path: args.work_path,
};
container.checkpoint(&opts)?;
Ok(ExitCode::SUCCESS)
}
fn cmd_exec(opt: GlobalOpts, args: Exec) -> SydResult<ExitCode> {
let pid = {
let cwd = args.cwd.clone();
let env = args.env.clone().into_iter().collect();
let detach = args.detach;
let no_new_privs = args.no_new_privs;
let command = args.command.clone();
let process = args.process.clone();
let user = args.user.map(|(u, _)| u);
let group = args.user.and_then(|(_, g)| g);
let additional_gids = args.additional_gids.clone();
let cap = args.cap.clone();
let ignore_paused = args.ignore_paused;
let cgroup = args.cgroup.clone();
let apparmor = args.apparmor.clone();
let pid = SydTenantContainerBuilder::new(opt, args)?
.with_detach(detach)
.with_cwd(cwd)
.with_env(env)
.with_process(process)
.with_no_new_privs(no_new_privs)
.with_container_args(command)
.with_additional_gids(additional_gids)
.with_user(user)
.with_group(group)
.with_capabilities(cap)
.with_ignore_paused(ignore_paused)
.with_sub_cgroup(cgroup)
.with_apparmor(apparmor)
.build()?;
if detach {
return Ok(ExitCode::SUCCESS);
}
pid
};
loop {
return match waitid(Id::Pid(Pid::from_raw(pid.as_raw())), WaitPidFlag::WEXITED) {
Ok(WaitStatus::Exited(_, status)) => Ok(ExitCode::from(status as u8)),
Ok(WaitStatus::Signaled(_, sig, _)) => Ok(ExitCode::from(128 + (sig as u8))),
Ok(_) => Ok(ExitCode::SUCCESS),
Err(Errno::EINTR) => continue,
Err(errno) => Err(errno.into()),
};
}
}
fn cmd_run(opt: GlobalOpts, args: Run) -> SydResult<ExitCode> {
let detach = args.detach;
let no_pivot = args.no_pivot;
let systemd_cgroup = opt.systemd_cgroup;
let mut container = SydInitContainerBuilder::try_from((opt, args))?
.with_systemd(systemd_cgroup)
.with_detach(detach)
.with_no_pivot(no_pivot)
.build()?;
container.start()?;
if detach {
return Ok(ExitCode::SUCCESS);
}
debug_assert!(
container.pid().is_some(),
"expects a container init pid in the container state"
);
#[expect(clippy::disallowed_methods)]
let foreground_result = handle_foreground(Pid::from_raw(container.pid().unwrap().as_raw()));
container.delete(true)?;
let exit_code = foreground_result?;
Ok(ExitCode::from(exit_code as u8))
}
fn syd_spec_load<P: AsRef<Path>>(config: P) -> Result<Spec, LibcontainerError> {
let mut spec = Spec::load(&config)?;
if let Some(linux) = spec.linux() {
if let Some(seccomp) = linux.seccomp() {
syd::t!(
"Syd-OCI-Seccomp-Pre: {}",
serde_json::to_string(&seccomp).unwrap_or("?".to_string())
);
let mut syscalls = if let Some(syscalls) = seccomp.syscalls() {
syscalls
.iter()
.cloned()
.map(|mut entry| {
if entry.action() != LinuxSeccompAction::ScmpActAllow
|| entry.errno_ret().is_some()
{
let filtered = entry
.names()
.iter()
.filter(|n| {
syd::config::OCI_SYSCALLS
.binary_search(&n.as_str())
.is_err()
})
.cloned()
.collect::<Vec<String>>();
entry.set_names(filtered);
}
entry
})
.collect::<Vec<LinuxSyscall>>()
} else {
Vec::new()
};
let sydallowlist = LinuxSyscallBuilder::default()
.action(LinuxSeccompAction::ScmpActAllow)
.names(
syd::config::OCI_SYSCALLS
.iter()
.map(ToString::to_string)
.collect::<Vec<_>>(),
)
.build()?;
syscalls.push(sydallowlist);
let mut builder = LinuxSeccompBuilder::default()
.default_action(seccomp.default_action())
.syscalls(syscalls);
if let Some(default_errno_ret) = seccomp.default_errno_ret() {
builder = builder.default_errno_ret(default_errno_ret)
}
if let Some(flags) = seccomp.flags() {
builder = builder.flags(flags.clone());
}
if let Some(architectures) = seccomp.architectures() {
builder = builder.architectures(architectures.clone());
}
if let Some(listener_path) = seccomp.listener_path() {
builder = builder.listener_path(listener_path);
}
if let Some(listener_metadata) = seccomp.listener_metadata() {
builder = builder.listener_metadata(listener_metadata);
}
let seccomp = builder.build()?;
syd::t!(
"Syd-OCI-Seccomp-Post: {}",
serde_json::to_string(&seccomp).unwrap_or("?".to_string())
);
let mut linux = linux.clone();
linux.set_seccomp(Some(seccomp));
spec.set_linux(Some(linux));
}
}
if let Some(process) = spec.process() {
let syscall = create_syscall();
if rootless_required(&*syscall).map_err(LibcontainerError::OtherIO)? {
return Ok(spec);
}
let mut p = process.clone();
if let Some(capabilities) = process.capabilities() {
let mut caps = LinuxCapabilitiesBuilder::default();
if let Some(c) = capabilities.bounding() {
let mut c = c.clone();
c.insert(Capability::SysPtrace);
caps = caps.bounding(c);
} else {
caps = caps.bounding(HashSet::from([Capability::SysPtrace]));
}
if let Some(c) = capabilities.effective() {
let mut c = c.clone();
c.insert(Capability::SysPtrace);
caps = caps.effective(c);
} else {
caps = caps.effective(HashSet::from([Capability::SysPtrace]));
}
if let Some(c) = capabilities.permitted() {
let mut c = c.clone();
c.insert(Capability::SysPtrace);
caps = caps.permitted(c);
} else {
caps = caps.permitted(HashSet::from([Capability::SysPtrace]));
}
if let Some(c) = capabilities.inheritable() {
caps = caps.inheritable(c.clone());
} else {
caps = caps.inheritable(HashSet::new());
}
if let Some(c) = capabilities.ambient() {
caps = caps.ambient(c.clone());
} else {
caps = caps.ambient(HashSet::new());
}
let caps = caps.build()?;
p.set_capabilities(Some(caps));
} else {
let caps = LinuxCapabilitiesBuilder::default()
.bounding(HashSet::from([Capability::SysPtrace]))
.effective(HashSet::from([Capability::SysPtrace]))
.permitted(HashSet::from([Capability::SysPtrace]))
.inheritable(HashSet::new())
.ambient(HashSet::new())
.build()?;
p.set_capabilities(Some(caps));
}
spec.set_process(Some(p));
}
Ok(spec)
}
fn handle_foreground(init_pid: Pid) -> SydResult<i32> {
syd::t!("waiting for container init process to exit");
let signal_set = SigSet::all();
signal_set.thread_block()?;
loop {
match signal_set.wait()? {
signal::SIGCHLD => {
syd::t!("reaping child processes");
loop {
match waitid(Id::All, WaitPidFlag::WNOHANG) {
Ok(WaitStatus::Exited(pid, status)) => {
if pid.eq(&init_pid) {
return Ok(status);
}
}
Ok(WaitStatus::Signaled(pid, signal, _)) => {
if pid.eq(&init_pid) {
return Ok(signal);
}
}
Ok(WaitStatus::StillAlive) => {
break;
}
Ok(_) | Err(Errno::EINTR) => {}
Err(Errno::ECHILD) => {
break;
}
Err(errno) => return Err(errno.into()),
}
}
}
signal::SIGURG => {
}
signal::SIGWINCH => {
}
signal => {
syd::t!("forwarding signal {}", signal as i32);
#[expect(clippy::disallowed_methods)]
let _ = kill(init_pid, Some(signal)).map_err(|_err| {
syd::t!("failed to forward signal to container init process: {_err}")
});
}
}
}
}
fn get_rootless_spec() -> SydResult<Spec> {
let mut namespaces: Vec<LinuxNamespace> =
libcontainer::oci_spec::runtime::get_default_namespaces()
.into_iter()
.filter(|ns| {
ns.typ() != LinuxNamespaceType::Network && ns.typ() != LinuxNamespaceType::User
})
.collect();
namespaces.push(
LinuxNamespaceBuilder::default()
.typ(LinuxNamespaceType::User)
.build()?,
);
let uid = Uid::effective().as_raw();
let gid = Gid::effective().as_raw();
let linux = LinuxBuilder::default()
.namespaces(namespaces)
.uid_mappings(vec![LinuxIdMappingBuilder::default()
.host_id(uid)
.container_id(0_u32)
.size(1_u32)
.build()?])
.gid_mappings(vec![LinuxIdMappingBuilder::default()
.host_id(gid)
.container_id(0_u32)
.size(1_u32)
.build()?])
.build()?;
let mut mounts: Vec<Mount> = libcontainer::oci_spec::runtime::get_default_mounts();
for mount in &mut mounts {
if mount.destination().eq(Path::new("/sys")) {
mount
.set_source(Some(PathBuf::from("/sys")))
.set_typ(Some(String::from("none")))
.set_options(Some(vec![
"rbind".to_string(),
"nosuid".to_string(),
"noexec".to_string(),
"nodev".to_string(),
"ro".to_string(),
]));
} else {
let options: Vec<String> = mount
.options()
.as_ref()
.unwrap_or(&vec![])
.iter()
.filter(|&o| !o.starts_with("gid=") && !o.starts_with("uid="))
.map(|o| o.to_string())
.collect();
mount.set_options(Some(options));
}
}
let mut spec = Spec::default();
spec.set_linux(Some(linux)).set_mounts(Some(mounts));
Ok(spec)
}
fn get_pid_index(title: &str) -> SydResult<usize> {
let titles = title.split_whitespace();
for (index, name) in titles.enumerate() {
if name == "PID" {
return Ok(index);
}
}
Err(Errno::ENOENT.into())
}
fn make_root(opt: &mut GlobalOpts) -> SydResult<()> {
let uid = Uid::current();
#[expect(clippy::disallowed_methods)]
if opt.root.is_none() {
let syscall = create_syscall();
let is_rootless_required = rootless_required(&*syscall)?;
opt.root = Some(if !is_rootless_required {
PathBuf::from("/run/syd")
} else if let Ok(path) = env::var("XDG_RUNTIME_DIR") {
PathBuf::from(format!("{path}/syd"))
} else {
PathBuf::from(format!("/run/user/{uid}/syd"))
});
};
let path = match opt.root {
Some(ref path) => path,
_ => unreachable!(),
};
mkdir_p(path, Mode::S_IRWXU | Mode::S_ISVTX)?;
let path = path.canonicalize()?;
assert_eq!(path_uid(&path)?, uid, "UID mismatch on root directory!");
opt.root = Some(path);
Ok(())
}
fn mkdir_p<P: AsRef<Path>>(dir: P, mode: Mode) -> SydResult<()> {
Ok(DirBuilder::new()
.recursive(true)
.mode(mode.bits())
.create(&dir)?)
}
fn path_uid<P: AsRef<Path>>(path: P) -> SydResult<Uid> {
Ok(Uid::from_raw(fs::metadata(&path)?.st_uid()))
}
fn get_executable_path(name: &str, path_var: &str) -> Option<PathBuf> {
if name.contains('/') && XPath::new(name).exists(true) {
return Some(PathBuf::from(name));
}
for path in path_var.split(':') {
let potential_path = PathBuf::from(path).join(name);
if XPath::new(&potential_path).exists(true) {
return Some(potential_path);
}
}
None
}
fn is_executable(path: &Path) -> std::result::Result<bool, std::io::Error> {
let metadata = path.metadata()?;
let permissions = metadata.permissions();
Ok(metadata.is_file() && permissions.mode() & 0o001 != 0)
}
fn get_cgroup_path(cgroups_path: &Option<PathBuf>, container_id: &str) -> PathBuf {
match cgroups_path {
Some(cpath) => cpath.clone(),
None => PathBuf::from(format!(":syd:{container_id}")),
}
}
fn query_caps() -> SydResult<Vec<String>> {
Ok(syd::caps::Capabilities::all()
.iter()
.map(|cap| format!("{cap:?}"))
.collect())
}
fn query_supported_namespaces() -> SydResult<Vec<LinuxNamespaceType>> {
Ok(vec![
LinuxNamespaceType::Pid,
LinuxNamespaceType::Network,
LinuxNamespaceType::Uts,
LinuxNamespaceType::Ipc,
LinuxNamespaceType::Mount,
LinuxNamespaceType::User,
LinuxNamespaceType::Cgroup,
LinuxNamespaceType::Time,
])
}
fn known_hooks() -> Vec<String> {
[
"prestart",
"createRuntime",
"createContainer",
"startContainer",
"poststart",
"poststop",
]
.iter()
.map(|s| s.to_string())
.collect()
}