use crate::cdi::{self, CdiContainerEdits, CdiRegistry};
use crate::error::{AgentError, Result};
use crate::runtime::ContainerId;
use oci_spec::runtime::{
Capability, Hook, HookBuilder, Hooks, HooksBuilder, LinuxBuilder, LinuxCapabilitiesBuilder,
LinuxCpuBuilder, LinuxDeviceBuilder, LinuxDeviceCgroupBuilder, LinuxDeviceType,
LinuxMemoryBuilder, LinuxNamespaceBuilder, LinuxNamespaceType, LinuxResourcesBuilder, Mount,
MountBuilder, PosixRlimit, PosixRlimitBuilder, PosixRlimitType, ProcessBuilder, RootBuilder,
Spec, SpecBuilder, UserBuilder,
};
#[cfg(unix)]
use oci_spec::runtime::LinuxIdMappingBuilder;
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use tokio::fs;
use zlayer_secrets::SecretsProvider;
use zlayer_spec::{GpuSharingMode, ServiceSpec, StorageSpec, StorageTier};
const DEFAULT_MPS_PIPE_DIR: &str = "/tmp/nvidia-mps";
const DEFAULT_MPS_LOG_DIR: &str = "/tmp/nvidia-log";
const TIMESLICE_CONFIG_CONTAINER_PATH: &str = "/etc/nvidia/gpu-time-slicing.yaml";
struct MpsDirs {
pipe_dir: PathBuf,
log_dir: PathBuf,
}
fn resolve_mps_dirs(gpu: &zlayer_spec::GpuSpec) -> Result<Option<MpsDirs>> {
if gpu.sharing != Some(GpuSharingMode::Mps) {
return Ok(None);
}
let pipe_dir = PathBuf::from(gpu.mps_pipe_dir.as_deref().unwrap_or(DEFAULT_MPS_PIPE_DIR));
let log_dir = PathBuf::from(gpu.mps_log_dir.as_deref().unwrap_or(DEFAULT_MPS_LOG_DIR));
if !pipe_dir.is_dir() {
return Err(AgentError::GpuSharingUnavailable {
mode: "mps".to_string(),
reason: format!(
"MPS pipe directory {} does not exist; ensure nvidia-cuda-mps-control is running",
pipe_dir.display()
),
});
}
if !log_dir.is_dir() {
return Err(AgentError::GpuSharingUnavailable {
mode: "mps".to_string(),
reason: format!(
"MPS log directory {} does not exist; ensure nvidia-cuda-mps-control is running",
log_dir.display()
),
});
}
Ok(Some(MpsDirs { pipe_dir, log_dir }))
}
fn cdi_node_to_oci_device(
node: &crate::cdi::CdiDeviceNode,
) -> Result<oci_spec::runtime::LinuxDevice> {
let host_path = node.host_path.as_deref().unwrap_or(&node.path);
let dev_type = match node.device_type.as_deref() {
Some("c" | "u") => LinuxDeviceType::C,
Some("b") => LinuxDeviceType::B,
Some("p") => LinuxDeviceType::P,
_ => get_device_type(host_path).unwrap_or(LinuxDeviceType::C),
};
let (major, minor) = if let (Some(maj), Some(min)) = (node.major, node.minor) {
(maj, min)
} else {
get_device_major_minor(host_path).unwrap_or((0, 0))
};
let mut builder = LinuxDeviceBuilder::default()
.path(node.path.clone())
.typ(dev_type)
.major(major)
.minor(minor);
if let Some(mode) = node.file_mode {
builder = builder.file_mode(mode);
} else {
builder = builder.file_mode(0o666u32);
}
builder = builder.uid(node.uid.unwrap_or(0));
builder = builder.gid(node.gid.unwrap_or(0));
builder.build().map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build CDI device {path}: {e}",
path = node.path
))
})
}
fn convert_cdi_hook(cdi_hook: &crate::cdi::CdiHook) -> Result<Hook> {
let mut builder = HookBuilder::default().path(PathBuf::from(&cdi_hook.path));
if !cdi_hook.args.is_empty() {
builder = builder.args(cdi_hook.args.clone());
}
if !cdi_hook.env.is_empty() {
builder = builder.env(cdi_hook.env.clone());
}
builder
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build CDI hook: {e}")))
}
const ALL_CAPABILITIES: &[Capability] = &[
Capability::AuditControl,
Capability::AuditRead,
Capability::AuditWrite,
Capability::BlockSuspend,
Capability::Bpf,
Capability::CheckpointRestore,
Capability::Chown,
Capability::DacOverride,
Capability::DacReadSearch,
Capability::Fowner,
Capability::Fsetid,
Capability::IpcLock,
Capability::IpcOwner,
Capability::Kill,
Capability::Lease,
Capability::LinuxImmutable,
Capability::MacAdmin,
Capability::MacOverride,
Capability::Mknod,
Capability::NetAdmin,
Capability::NetBindService,
Capability::NetBroadcast,
Capability::NetRaw,
Capability::Perfmon,
Capability::Setfcap,
Capability::Setgid,
Capability::Setpcap,
Capability::Setuid,
Capability::SysAdmin,
Capability::SysBoot,
Capability::SysChroot,
Capability::SysModule,
Capability::SysNice,
Capability::SysPacct,
Capability::SysPtrace,
Capability::SysRawio,
Capability::SysResource,
Capability::SysTime,
Capability::SysTtyConfig,
Capability::Syslog,
Capability::WakeAlarm,
];
#[must_use]
pub fn generate_resolv_conf(nameservers: &[String]) -> String {
let mut out = String::new();
for ns in nameservers {
out.push_str("nameserver ");
out.push_str(ns);
out.push('\n');
}
out.push_str("options edns0\n");
out
}
pub fn parse_memory_string(s: &str) -> std::result::Result<u64, String> {
let s = s.trim();
if s.is_empty() {
return Err("empty memory string".to_string());
}
let (num_str, multiplier) = if let Some(n) = s.strip_suffix("Ki") {
(n, 1024u64)
} else if let Some(n) = s.strip_suffix("Mi") {
(n, 1024u64 * 1024)
} else if let Some(n) = s.strip_suffix("Gi") {
(n, 1024u64 * 1024 * 1024)
} else if let Some(n) = s.strip_suffix("Ti") {
(n, 1024u64 * 1024 * 1024 * 1024)
} else if let Some(n) = s.strip_suffix('K').or_else(|| s.strip_suffix('k')) {
(n, 1000u64)
} else if let Some(n) = s.strip_suffix('M').or_else(|| s.strip_suffix('m')) {
(n, 1000u64 * 1000)
} else if let Some(n) = s.strip_suffix('G').or_else(|| s.strip_suffix('g')) {
(n, 1000u64 * 1000 * 1000)
} else if let Some(n) = s.strip_suffix('T').or_else(|| s.strip_suffix('t')) {
(n, 1000u64 * 1000 * 1000 * 1000)
} else {
(s, 1u64)
};
let num: u64 = num_str
.parse()
.map_err(|e| format!("invalid number: {e}"))?;
Ok(num * multiplier)
}
#[cfg(unix)]
#[allow(clippy::cast_possible_wrap)]
fn get_device_major_minor(path: &str) -> std::io::Result<(i64, i64)> {
use std::os::unix::fs::MetadataExt;
let metadata = std::fs::metadata(path)?;
let rdev = metadata.rdev();
let major = ((rdev >> 8) & 0xff) as i64;
let minor = (rdev & 0xff) as i64;
Ok((major, minor))
}
#[cfg(not(unix))]
fn get_device_major_minor(_path: &str) -> std::io::Result<(i64, i64)> {
Err(std::io::Error::new(
std::io::ErrorKind::Unsupported,
"device-cgroup probes require Unix",
))
}
fn ulimit_name_to_posix(name: &str) -> Option<PosixRlimitType> {
Some(match name.to_ascii_lowercase().as_str() {
"cpu" => PosixRlimitType::RlimitCpu,
"fsize" => PosixRlimitType::RlimitFsize,
"data" => PosixRlimitType::RlimitData,
"stack" => PosixRlimitType::RlimitStack,
"core" => PosixRlimitType::RlimitCore,
"rss" => PosixRlimitType::RlimitRss,
"nproc" => PosixRlimitType::RlimitNproc,
"nofile" => PosixRlimitType::RlimitNofile,
"memlock" => PosixRlimitType::RlimitMemlock,
"as" => PosixRlimitType::RlimitAs,
"locks" => PosixRlimitType::RlimitLocks,
"sigpending" => PosixRlimitType::RlimitSigpending,
"msgqueue" => PosixRlimitType::RlimitMsgqueue,
"nice" => PosixRlimitType::RlimitNice,
"rtprio" => PosixRlimitType::RlimitRtprio,
"rttime" => PosixRlimitType::RlimitRttime,
_ => return None,
})
}
#[cfg(test)]
mod ulimit_translation_tests {
use super::{ulimit_name_to_posix, PosixRlimitType};
#[test]
fn known_names_map() {
assert_eq!(
ulimit_name_to_posix("nofile"),
Some(PosixRlimitType::RlimitNofile)
);
assert_eq!(
ulimit_name_to_posix("NOFILE"),
Some(PosixRlimitType::RlimitNofile)
);
assert_eq!(
ulimit_name_to_posix("nproc"),
Some(PosixRlimitType::RlimitNproc)
);
assert_eq!(ulimit_name_to_posix("as"), Some(PosixRlimitType::RlimitAs));
}
#[test]
fn unknown_names_return_none() {
assert!(ulimit_name_to_posix("not_a_real_ulimit").is_none());
assert!(ulimit_name_to_posix("").is_none());
}
}
#[cfg(unix)]
fn get_device_type(path: &str) -> std::io::Result<LinuxDeviceType> {
use std::os::unix::fs::FileTypeExt;
let metadata = std::fs::metadata(path)?;
let file_type = metadata.file_type();
if file_type.is_char_device() {
Ok(LinuxDeviceType::C)
} else if file_type.is_block_device() {
Ok(LinuxDeviceType::B)
} else {
Ok(LinuxDeviceType::U) }
}
#[cfg(not(unix))]
fn get_device_type(_path: &str) -> std::io::Result<LinuxDeviceType> {
Err(std::io::Error::new(
std::io::ErrorKind::Unsupported,
"device-cgroup probes require Unix",
))
}
#[derive(Clone)]
pub struct BundleBuilder {
bundle_dir: PathBuf,
rootfs_path: Option<PathBuf>,
hostname: Option<String>,
extra_env: Vec<(String, String)>,
cwd: Option<String>,
args: Option<Vec<String>>,
volume_paths: HashMap<String, PathBuf>,
image_config: Option<zlayer_registry::ImageConfig>,
host_network: bool,
secrets_provider: Option<Arc<dyn SecretsProvider>>,
deployment_scope: Option<String>,
socket_path: Option<String>,
cdi_registry: Option<Arc<CdiRegistry>>,
}
impl std::fmt::Debug for BundleBuilder {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BundleBuilder")
.field("bundle_dir", &self.bundle_dir)
.field("rootfs_path", &self.rootfs_path)
.field("hostname", &self.hostname)
.field("extra_env", &self.extra_env)
.field("cwd", &self.cwd)
.field("args", &self.args)
.field("volume_paths", &self.volume_paths)
.field("image_config", &self.image_config)
.field("host_network", &self.host_network)
.field("secrets_provider", &self.secrets_provider.is_some())
.field("deployment_scope", &self.deployment_scope)
.field("socket_path", &self.socket_path)
.field("cdi_registry", &self.cdi_registry.is_some())
.finish()
}
}
#[cfg(unix)]
fn build_rootless_id_mappings(
host_id: u32,
subid_path: &str,
username: &str,
) -> Vec<oci_spec::runtime::LinuxIdMapping> {
let mut mappings = vec![LinuxIdMappingBuilder::default()
.container_id(0_u32)
.host_id(host_id)
.size(1_u32)
.build()
.unwrap()];
if !username.is_empty() {
if let Some((start, count)) = read_subid_range(subid_path, username) {
mappings.push(
LinuxIdMappingBuilder::default()
.container_id(1_u32)
.host_id(start)
.size(count)
.build()
.unwrap(),
);
}
}
mappings
}
#[cfg(unix)]
fn read_subid_range(path: &str, username: &str) -> Option<(u32, u32)> {
let contents = std::fs::read_to_string(path).ok()?;
for line in contents.lines() {
let mut parts = line.splitn(3, ':');
let user = parts.next()?;
if user != username {
continue;
}
let start: u32 = parts.next()?.parse().ok()?;
let count: u32 = parts.next()?.parse().ok()?;
return Some((start, count));
}
None
}
impl BundleBuilder {
#[must_use]
pub fn new(bundle_dir: PathBuf) -> Self {
Self {
bundle_dir,
rootfs_path: None,
hostname: None,
extra_env: Vec::new(),
cwd: None,
args: None,
volume_paths: HashMap::new(),
image_config: None,
host_network: false,
secrets_provider: None,
deployment_scope: None,
socket_path: None,
cdi_registry: None,
}
}
#[must_use]
pub fn with_cdi_registry(mut self, registry: Arc<CdiRegistry>) -> Self {
self.cdi_registry = Some(registry);
self
}
#[must_use]
pub fn for_container(container_id: &ContainerId) -> Self {
let bundle_dir = zlayer_paths::ZLayerDirs::system_default()
.bundles()
.join(container_id.to_string());
Self::new(bundle_dir)
}
#[must_use]
pub fn with_rootfs(mut self, rootfs_path: PathBuf) -> Self {
self.rootfs_path = Some(rootfs_path);
self
}
#[must_use]
pub fn with_hostname(mut self, hostname: String) -> Self {
self.hostname = Some(hostname);
self
}
#[must_use]
pub fn with_env(mut self, key: String, value: String) -> Self {
self.extra_env.push((key, value));
self
}
#[must_use]
pub fn with_cwd(mut self, cwd: String) -> Self {
self.cwd = Some(cwd);
self
}
#[must_use]
pub fn with_args(mut self, args: Vec<String>) -> Self {
self.args = Some(args);
self
}
#[must_use]
pub fn with_volume_paths(mut self, volume_paths: HashMap<String, PathBuf>) -> Self {
self.volume_paths = volume_paths;
self
}
#[must_use]
pub fn with_image_config(mut self, config: zlayer_registry::ImageConfig) -> Self {
self.image_config = Some(config);
self
}
#[must_use]
pub fn with_host_network(mut self, host_network: bool) -> Self {
self.host_network = host_network;
self
}
#[must_use]
pub fn with_secrets_provider(mut self, provider: Arc<dyn SecretsProvider>) -> Self {
self.secrets_provider = Some(provider);
self
}
#[must_use]
pub fn with_deployment_scope(mut self, scope: String) -> Self {
self.deployment_scope = Some(scope);
self
}
#[must_use]
pub fn with_socket_mount(mut self, path: impl Into<String>) -> Self {
self.socket_path = Some(path.into());
self
}
#[must_use]
pub fn bundle_dir(&self) -> &Path {
&self.bundle_dir
}
#[cfg(unix)]
pub async fn build(&self, container_id: &ContainerId, spec: &ServiceSpec) -> Result<PathBuf> {
fs::create_dir_all(&self.bundle_dir)
.await
.map_err(|e| AgentError::CreateFailed {
id: container_id.to_string(),
reason: format!("failed to create bundle directory: {e}"),
})?;
let rootfs_in_bundle = self.bundle_dir.join("rootfs");
if let Some(ref rootfs_path) = self.rootfs_path {
let _ = fs::remove_file(&rootfs_in_bundle).await;
let _ = fs::remove_dir(&rootfs_in_bundle).await;
#[cfg(unix)]
tokio::fs::symlink(rootfs_path, &rootfs_in_bundle)
.await
.map_err(|e| AgentError::CreateFailed {
id: container_id.to_string(),
reason: format!(
"failed to symlink rootfs from {} to {}: {}",
rootfs_path.display(),
rootfs_in_bundle.display(),
e
),
})?;
#[cfg(windows)]
tokio::fs::symlink_dir(rootfs_path, &rootfs_in_bundle)
.await
.map_err(|e| AgentError::CreateFailed {
id: container_id.to_string(),
reason: format!(
"failed to symlink rootfs from {} to {}: {}",
rootfs_path.display(),
rootfs_in_bundle.display(),
e
),
})?;
} else {
fs::create_dir_all(&rootfs_in_bundle)
.await
.map_err(|e| AgentError::CreateFailed {
id: container_id.to_string(),
reason: format!("failed to create rootfs directory: {e}"),
})?;
}
let oci_spec = self
.build_spec_only(container_id, spec, &self.volume_paths)
.await?;
let config_path = self.bundle_dir.join("config.json");
let config_json =
serde_json::to_string_pretty(&oci_spec).map_err(|e| AgentError::CreateFailed {
id: container_id.to_string(),
reason: format!("failed to serialize OCI spec: {e}"),
})?;
fs::write(&config_path, config_json)
.await
.map_err(|e| AgentError::CreateFailed {
id: container_id.to_string(),
reason: format!("failed to write config.json: {e}"),
})?;
tracing::debug!(
"Created OCI bundle at {} for container {}",
self.bundle_dir.display(),
container_id
);
Ok(self.bundle_dir.clone())
}
pub async fn build_spec_only(
&self,
container_id: &ContainerId,
spec: &ServiceSpec,
volume_paths: &std::collections::HashMap<String, PathBuf>,
) -> Result<oci_spec::runtime::Spec> {
self.build_oci_spec(container_id, spec, volume_paths).await
}
fn resolve_cdi_edits(&self, spec: &ServiceSpec) -> Result<Option<Vec<CdiContainerEdits>>> {
let Some(ref gpu) = spec.resources.gpu else {
return Ok(None);
};
let Some(kind) = cdi::vendor_to_cdi_kind(&gpu.vendor) else {
return Ok(None);
};
let (registry, strict) = if let Some(reg) = &self.cdi_registry {
(reg.clone(), true)
} else {
let reg = Arc::new(CdiRegistry::discover());
if reg.is_empty() {
return Ok(None);
}
(reg, false)
};
let device_names: Vec<String> = (0..gpu.count).map(|i| i.to_string()).collect();
match registry.resolve_for_kind(kind, &device_names) {
Ok(edits) => Ok(Some(edits)),
Err(err) => {
if strict {
Err(AgentError::InvalidSpec(format!(
"CDI resolution failed for vendor '{}': {err}",
gpu.vendor
)))
} else {
tracing::warn!(
vendor = %gpu.vendor,
kind = %kind,
error = %err,
"CDI resolution failed; falling back to baked-in GPU device passthrough"
);
Ok(None)
}
}
}
}
#[allow(clippy::too_many_lines)]
async fn build_oci_spec(
&self,
container_id: &ContainerId,
spec: &ServiceSpec,
volume_paths: &std::collections::HashMap<String, PathBuf>,
) -> Result<Spec> {
let cdi_edits = self.resolve_cdi_edits(spec)?;
let user = {
let (uid, gid) = if let Some(user_str) = self
.image_config
.as_ref()
.and_then(|c| c.user.as_ref())
.filter(|u| !u.is_empty())
{
let parts: Vec<&str> = user_str.splitn(2, ':').collect();
let uid = parts[0].parse::<u32>().unwrap_or(0);
let gid = if parts.len() > 1 {
parts[1].parse::<u32>().unwrap_or(0)
} else {
uid
};
(uid, gid)
} else {
(0u32, 0u32)
};
UserBuilder::default()
.uid(uid)
.gid(gid)
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build user: {e}")))?
};
let mut env: Vec<String> = Vec::new();
let mut env_keys: HashSet<String> = HashSet::new();
if let Some(img_env) = self.image_config.as_ref().and_then(|c| c.env.as_ref()) {
for entry in img_env {
if let Some(key) = entry.split('=').next() {
env_keys.insert(key.to_string());
}
env.push(entry.clone());
}
}
if !env_keys.contains("PATH") {
env.push(
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(),
);
env_keys.insert("PATH".to_string());
}
if !env_keys.contains("TERM") {
env.push("TERM=xterm".to_string());
env_keys.insert("TERM".to_string());
}
if let (Some(secrets_provider), Some(scope)) =
(&self.secrets_provider, &self.deployment_scope)
{
let resolved_map =
crate::env::resolve_env_with_secrets(&spec.env, secrets_provider.as_ref(), scope)
.await
.map_err(|e| {
AgentError::InvalidSpec(format!(
"environment variable resolution failed: {e}"
))
})?;
for (key, value) in &resolved_map {
if env_keys.contains(key.as_str()) {
env.retain(|e| e.split('=').next() != Some(key.as_str()));
}
env_keys.insert(key.clone());
env.push(format!("{key}={value}"));
}
} else {
let resolved = crate::env::resolve_env_vars_with_warnings(&spec.env).map_err(|e| {
AgentError::InvalidSpec(format!("environment variable resolution failed: {e}"))
})?;
for warning in &resolved.warnings {
tracing::warn!(container = %container_id, "{}", warning);
}
for var in &resolved.vars {
if let Some(key) = var.split('=').next() {
if env_keys.contains(key) {
env.retain(|e| e.split('=').next() != Some(key));
}
env_keys.insert(key.to_string());
}
env.push(var.clone());
}
}
for (key, value) in &self.extra_env {
if env_keys.contains(key.as_str()) {
env.retain(|e| e.split('=').next() != Some(key.as_str()));
}
env_keys.insert(key.clone());
env.push(format!("{key}={value}"));
}
if let Some(ref edits_per_device) = cdi_edits {
for edits in edits_per_device {
for entry in &edits.env {
if let Some(key) = entry.split('=').next() {
if env_keys.contains(key) {
env.retain(|e| e.split('=').next() != Some(key));
}
env_keys.insert(key.to_string());
}
env.push(entry.clone());
}
}
} else if let Some(ref gpu) = spec.resources.gpu {
let indices: Vec<String> = (0..gpu.count).map(|i| i.to_string()).collect();
let device_list = indices.join(",");
match gpu.vendor.as_str() {
"nvidia" => {
env.push(format!("NVIDIA_VISIBLE_DEVICES={device_list}"));
env.push(format!("CUDA_VISIBLE_DEVICES={device_list}"));
}
"amd" => {
env.push(format!("ROCR_VISIBLE_DEVICES={device_list}"));
env.push(format!("HIP_VISIBLE_DEVICES={device_list}"));
}
"intel" => {
env.push(format!("ZE_AFFINITY_MASK={device_list}"));
}
_ => {}
}
}
let mps_dirs = if let Some(ref gpu) = spec.resources.gpu {
resolve_mps_dirs(gpu)?
} else {
None
};
if let Some(ref dirs) = mps_dirs {
let pipe = format!("CUDA_MPS_PIPE_DIRECTORY={}", dirs.pipe_dir.display());
let log = format!("CUDA_MPS_LOG_DIRECTORY={}", dirs.log_dir.display());
if env_keys.contains("CUDA_MPS_PIPE_DIRECTORY") {
env.retain(|e| e.split('=').next() != Some("CUDA_MPS_PIPE_DIRECTORY"));
}
if env_keys.contains("CUDA_MPS_LOG_DIRECTORY") {
env.retain(|e| e.split('=').next() != Some("CUDA_MPS_LOG_DIRECTORY"));
}
env_keys.insert("CUDA_MPS_PIPE_DIRECTORY".to_string());
env_keys.insert("CUDA_MPS_LOG_DIRECTORY".to_string());
env.push(pipe);
env.push(log);
}
if let Some(ref gpu) = spec.resources.gpu {
if gpu.sharing == Some(GpuSharingMode::TimeSlice) {
if let Some(idx) = gpu.time_slice_index {
env.retain(|e| e.split('=').next() != Some("CUDA_VISIBLE_DEVICES"));
env_keys.insert("CUDA_VISIBLE_DEVICES".to_string());
env.push(format!("CUDA_VISIBLE_DEVICES={idx}"));
}
}
}
if let Some(ref gpu) = spec.resources.gpu {
if let Some(ref dist) = gpu.distributed {
env.push(format!("MASTER_PORT={}", dist.master_port));
env.push(format!("MASTER_ADDR={}", container_id.service));
env.push("WORLD_SIZE=1".to_string());
env.push("RANK=0".to_string());
env.push("LOCAL_RANK=0".to_string());
match dist.backend.as_str() {
"nccl" => env.push("NCCL_SOCKET_IFNAME=eth0".to_string()),
"gloo" => env.push("GLOO_SOCKET_IFNAME=eth0".to_string()),
_ => {}
}
}
}
let capabilities = self.build_capabilities(spec)?;
let cwd = self
.cwd
.clone()
.or_else(|| spec.command.workdir.clone())
.or_else(|| {
self.image_config
.as_ref()
.and_then(|c| c.working_dir.as_ref())
.filter(|w| !w.is_empty())
.cloned()
})
.unwrap_or_else(|| "/".to_string());
let process_args = if let Some(ref args) = self.args {
args.clone()
} else {
Self::resolve_command_from_spec(spec, self.image_config.as_ref())
};
let mut process_builder = ProcessBuilder::default()
.terminal(false)
.user(user)
.env(env)
.args(process_args)
.cwd(cwd)
.no_new_privileges(!spec.privileged && spec.capabilities.is_empty());
if let Some(caps) = capabilities {
process_builder = process_builder.capabilities(caps);
}
let mut rlimits: Vec<PosixRlimit> = Vec::with_capacity(spec.ulimits.len());
for (name, limit) in &spec.ulimits {
let typ = ulimit_name_to_posix(name).ok_or_else(|| {
AgentError::InvalidSpec(format!(
"unknown ulimit name `{name}` (expected one of: cpu, fsize, data, stack, \
core, rss, nproc, nofile, memlock, as, locks, sigpending, msgqueue, nice, \
rtprio, rttime)"
))
})?;
let entry = PosixRlimitBuilder::default()
.typ(typ)
.soft(u64::try_from(limit.soft.max(0)).unwrap_or(0))
.hard(u64::try_from(limit.hard.max(0)).unwrap_or(0))
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build rlimit `{name}`: {e}"))
})?;
rlimits.push(entry);
}
if !rlimits.is_empty() {
process_builder = process_builder.rlimits(rlimits);
}
let process = process_builder
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build process: {e}")))?;
let root = RootBuilder::default()
.path("rootfs".to_string())
.readonly(false)
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build root: {e}")))?;
let mut mounts = self.build_default_mounts(spec)?;
let storage_mounts = self.build_storage_mounts(spec, volume_paths)?;
mounts.extend(storage_mounts);
if let Some(ref socket_path) = self.socket_path {
mounts.push(
MountBuilder::default()
.destination(zlayer_paths::ZLayerDirs::default_socket_path())
.typ("bind")
.source(socket_path.clone())
.options(vec!["rbind".into(), "ro".into()])
.build()
.expect("valid socket mount"),
);
}
if !spec.host_network && !spec.dns.is_empty() && self.bundle_dir.exists() {
let resolv_path = self.bundle_dir.join("resolv.conf");
let contents = generate_resolv_conf(&spec.dns);
fs::write(&resolv_path, contents).await.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to write resolv.conf to bundle at {}: {e}",
resolv_path.display()
))
})?;
mounts.push(
MountBuilder::default()
.destination("/etc/resolv.conf".to_string())
.typ("bind")
.source(resolv_path.to_string_lossy().to_string())
.options(vec!["rbind".to_string(), "ro".to_string()])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build resolv.conf mount: {e}"))
})?,
);
}
if let Some(ref edits_per_device) = cdi_edits {
for edits in edits_per_device {
for cdi_mount in &edits.mounts {
let mut opts = cdi_mount.options.clone();
if !opts.iter().any(|o| o == "bind" || o == "rbind") {
opts.push("rbind".to_string());
}
mounts.push(
MountBuilder::default()
.destination(cdi_mount.container_path.clone())
.typ("bind")
.source(cdi_mount.host_path.clone())
.options(opts)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build CDI mount: {e}"))
})?,
);
}
}
}
if let Some(ref dirs) = mps_dirs {
mounts.push(
MountBuilder::default()
.destination(dirs.pipe_dir.clone())
.typ("bind")
.source(dirs.pipe_dir.clone())
.options(vec!["rbind".into(), "rw".into()])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build MPS pipe mount: {e}"))
})?,
);
mounts.push(
MountBuilder::default()
.destination(dirs.log_dir.clone())
.typ("bind")
.source(dirs.log_dir.clone())
.options(vec!["rbind".into(), "rw".into()])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build MPS log mount: {e}"))
})?,
);
}
if let Some(ref gpu) = spec.resources.gpu {
if gpu.sharing == Some(GpuSharingMode::TimeSlice) {
if let Some(ref cfg_path) = gpu.time_slicing_config_path {
let host = PathBuf::from(cfg_path);
if !host.is_file() {
return Err(AgentError::GpuSharingUnavailable {
mode: "time-slice".to_string(),
reason: format!(
"time-slicing config {} is not a regular file on the host",
host.display()
),
});
}
mounts.push(
MountBuilder::default()
.destination(PathBuf::from(TIMESLICE_CONFIG_CONTAINER_PATH))
.typ("bind")
.source(host)
.options(vec!["rbind".into(), "ro".into()])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build time-slicing config mount: {e}"
))
})?,
);
}
}
}
let linux = self.build_linux_config(container_id, spec, cdi_edits.as_deref())?;
let hostname = self
.hostname
.clone()
.unwrap_or_else(|| container_id.to_string());
let mut spec_builder = SpecBuilder::default()
.version("1.0.2".to_string())
.root(root)
.process(process)
.hostname(hostname)
.mounts(mounts)
.linux(linux);
if let Some(ref edits_per_device) = cdi_edits {
if let Some(hooks) = Self::build_hooks_from_cdi(edits_per_device)? {
spec_builder = spec_builder.hooks(hooks);
}
}
let oci_spec = spec_builder
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build OCI spec: {e}")))?;
Ok(oci_spec)
}
fn build_hooks_from_cdi(edits_per_device: &[CdiContainerEdits]) -> Result<Option<Hooks>> {
let mut prestart: Vec<Hook> = Vec::new();
let mut create_runtime: Vec<Hook> = Vec::new();
let mut create_container: Vec<Hook> = Vec::new();
let mut start_container: Vec<Hook> = Vec::new();
let mut poststart: Vec<Hook> = Vec::new();
let mut poststop: Vec<Hook> = Vec::new();
for edits in edits_per_device {
let Some(ref h) = edits.hooks else { continue };
for hook in &h.prestart {
prestart.push(convert_cdi_hook(hook)?);
}
for hook in &h.create_runtime {
create_runtime.push(convert_cdi_hook(hook)?);
}
for hook in &h.create_container {
create_container.push(convert_cdi_hook(hook)?);
}
for hook in &h.start_container {
start_container.push(convert_cdi_hook(hook)?);
}
for hook in &h.poststart {
poststart.push(convert_cdi_hook(hook)?);
}
for hook in &h.poststop {
poststop.push(convert_cdi_hook(hook)?);
}
}
if prestart.is_empty()
&& create_runtime.is_empty()
&& create_container.is_empty()
&& start_container.is_empty()
&& poststart.is_empty()
&& poststop.is_empty()
{
return Ok(None);
}
let mut builder = HooksBuilder::default();
if !prestart.is_empty() {
#[allow(deprecated)]
{
builder = builder.prestart(prestart);
}
}
if !create_runtime.is_empty() {
builder = builder.create_runtime(create_runtime);
}
if !create_container.is_empty() {
builder = builder.create_container(create_container);
}
if !start_container.is_empty() {
builder = builder.start_container(start_container);
}
if !poststart.is_empty() {
builder = builder.poststart(poststart);
}
if !poststop.is_empty() {
builder = builder.poststop(poststop);
}
let hooks = builder
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build CDI hooks: {e}")))?;
Ok(Some(hooks))
}
#[allow(clippy::unused_self)]
fn build_capabilities(
&self,
spec: &ServiceSpec,
) -> Result<Option<oci_spec::runtime::LinuxCapabilities>> {
if spec.privileged {
let all_caps: HashSet<Capability> = ALL_CAPABILITIES.iter().copied().collect();
let empty_caps: HashSet<Capability> = HashSet::new();
let caps = LinuxCapabilitiesBuilder::default()
.bounding(all_caps.clone())
.effective(all_caps.clone())
.permitted(all_caps)
.inheritable(empty_caps.clone())
.ambient(empty_caps)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
})?;
Ok(Some(caps))
} else if !spec.capabilities.is_empty() {
let caps: HashSet<Capability> = spec
.capabilities
.iter()
.filter_map(|c| {
let cap_name = if c.starts_with("CAP_") {
c.to_uppercase()
} else {
format!("CAP_{}", c.to_uppercase())
};
Capability::from_str(&cap_name).ok()
})
.collect();
let empty_caps: HashSet<Capability> = HashSet::new();
let built_caps = LinuxCapabilitiesBuilder::default()
.bounding(caps.clone())
.effective(caps.clone())
.permitted(caps)
.inheritable(empty_caps.clone())
.ambient(empty_caps)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
})?;
Ok(Some(built_caps))
} else {
let default_caps: HashSet<Capability> = [
Capability::Chown,
Capability::DacOverride,
Capability::Fsetid,
Capability::Fowner,
Capability::Mknod,
Capability::NetRaw,
Capability::Setgid,
Capability::Setuid,
Capability::Setfcap,
Capability::Setpcap,
Capability::NetBindService,
Capability::SysChroot,
Capability::Kill,
Capability::AuditWrite,
]
.into_iter()
.collect();
let empty_caps: HashSet<Capability> = HashSet::new();
let built_caps = LinuxCapabilitiesBuilder::default()
.bounding(default_caps.clone())
.effective(default_caps.clone())
.permitted(default_caps)
.inheritable(empty_caps.clone())
.ambient(empty_caps)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
})?;
Ok(Some(built_caps))
}
}
#[allow(clippy::unused_self, clippy::too_many_lines)]
fn build_default_mounts(&self, spec: &ServiceSpec) -> Result<Vec<Mount>> {
let mut mounts = Vec::new();
mounts.push(
MountBuilder::default()
.destination("/proc".to_string())
.typ("proc".to_string())
.source("proc".to_string())
.options(vec![
"nosuid".to_string(),
"noexec".to_string(),
"nodev".to_string(),
])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build /proc mount: {e}"))
})?,
);
mounts.push(
MountBuilder::default()
.destination("/dev".to_string())
.typ("tmpfs".to_string())
.source("tmpfs".to_string())
.options(vec![
"nosuid".to_string(),
"strictatime".to_string(),
"mode=755".to_string(),
"size=65536k".to_string(),
])
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build /dev mount: {e}")))?,
);
mounts.push(
MountBuilder::default()
.destination("/dev/pts".to_string())
.typ("devpts".to_string())
.source("devpts".to_string())
.options(vec![
"nosuid".to_string(),
"noexec".to_string(),
"newinstance".to_string(),
"ptmxmode=0666".to_string(),
"mode=0620".to_string(),
"gid=5".to_string(),
])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build /dev/pts mount: {e}"))
})?,
);
mounts.push(
MountBuilder::default()
.destination("/dev/shm".to_string())
.typ("tmpfs".to_string())
.source("shm".to_string())
.options(vec![
"nosuid".to_string(),
"noexec".to_string(),
"nodev".to_string(),
"mode=1777".to_string(),
"size=65536k".to_string(),
])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build /dev/shm mount: {e}"))
})?,
);
mounts.push(
MountBuilder::default()
.destination("/dev/mqueue".to_string())
.typ("mqueue".to_string())
.source("mqueue".to_string())
.options(vec![
"nosuid".to_string(),
"noexec".to_string(),
"nodev".to_string(),
])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build /dev/mqueue mount: {e}"))
})?,
);
let sys_options = if spec.privileged {
vec![
"nosuid".to_string(),
"noexec".to_string(),
"nodev".to_string(),
]
} else {
vec![
"nosuid".to_string(),
"noexec".to_string(),
"nodev".to_string(),
"ro".to_string(),
]
};
mounts.push(
MountBuilder::default()
.destination("/sys".to_string())
.typ("sysfs".to_string())
.source("sysfs".to_string())
.options(sys_options)
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build /sys mount: {e}")))?,
);
mounts.push(
MountBuilder::default()
.destination("/sys/fs/cgroup".to_string())
.typ("cgroup2".to_string())
.source("cgroup".to_string())
.options(vec![
"nosuid".to_string(),
"noexec".to_string(),
"nodev".to_string(),
"relatime".to_string(),
])
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build cgroup mount: {e}"))
})?,
);
Ok(mounts)
}
#[allow(clippy::unused_self, clippy::too_many_lines)]
fn build_storage_mounts(
&self,
spec: &ServiceSpec,
volume_paths: &std::collections::HashMap<String, PathBuf>,
) -> Result<Vec<Mount>> {
let mut mounts = Vec::new();
for storage in &spec.storage {
let mount = match storage {
StorageSpec::Bind {
source,
target,
readonly,
} => {
let mut options = vec!["rbind".to_string()];
if *readonly {
options.push("ro".to_string());
} else {
options.push("rw".to_string());
}
MountBuilder::default()
.destination(target.clone())
.typ("none".to_string())
.source(source.clone())
.options(options)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build bind mount for {target}: {e}"
))
})?
}
StorageSpec::Named {
name,
target,
readonly,
tier,
..
} => {
let source = volume_paths.get(name).ok_or_else(|| {
AgentError::InvalidSpec(format!(
"volume '{name}' not prepared - ensure StorageManager.ensure_volume() was called"
))
})?;
if matches!(tier, StorageTier::Network) {
tracing::warn!(
volume = %name,
tier = ?tier,
"Network storage tier is NOT SQLite-safe. Avoid using SQLite databases on this volume."
);
}
let mut options = vec!["rbind".to_string()];
if *readonly {
options.push("ro".to_string());
} else {
options.push("rw".to_string());
}
MountBuilder::default()
.destination(target.clone())
.typ("none".to_string())
.source(source.to_string_lossy().to_string())
.options(options)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build named volume mount for {target}: {e}"
))
})?
}
StorageSpec::Anonymous { target, tier } => {
let key = format!("_anon_{}", target.trim_start_matches('/').replace('/', "_"));
let source = volume_paths.get(&key).ok_or_else(|| {
AgentError::InvalidSpec(format!(
"anonymous volume for '{target}' not prepared"
))
})?;
if matches!(tier, StorageTier::Network) {
tracing::warn!(
target = %target,
tier = ?tier,
"Network storage tier is NOT SQLite-safe."
);
}
let options = vec!["rbind".to_string(), "rw".to_string()];
MountBuilder::default()
.destination(target.clone())
.typ("none".to_string())
.source(source.to_string_lossy().to_string())
.options(options)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build anonymous volume mount for {target}: {e}"
))
})?
}
StorageSpec::Tmpfs { target, size, mode } => {
let mut options = vec!["nosuid".to_string(), "nodev".to_string()];
if let Some(size_str) = size {
options.push(format!("size={size_str}"));
}
if let Some(mode_val) = mode {
options.push(format!("mode={mode_val:o}"));
}
MountBuilder::default()
.destination(target.clone())
.typ("tmpfs".to_string())
.source("tmpfs".to_string())
.options(options)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build tmpfs mount for {target}: {e}"
))
})?
}
StorageSpec::S3 {
bucket,
prefix,
target,
readonly,
endpoint: _,
credentials: _,
} => {
let key = format!("_s3_{}_{}", bucket, prefix.as_deref().unwrap_or(""));
let source = volume_paths.get(&key).ok_or_else(|| {
AgentError::InvalidSpec(format!(
"S3 volume for bucket '{bucket}' not mounted - ensure StorageManager.mount_s3() was called"
))
})?;
tracing::warn!(
bucket = %bucket,
target = %target,
"S3 storage is NOT SQLite-safe. Use for read-heavy workloads only."
);
let mut options = vec!["rbind".to_string()];
if *readonly {
options.push("ro".to_string());
} else {
options.push("rw".to_string());
}
MountBuilder::default()
.destination(target.clone())
.typ("none".to_string())
.source(source.to_string_lossy().to_string())
.options(options)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build S3 mount for {target}: {e}"
))
})?
}
};
mounts.push(mount);
}
Ok(mounts)
}
#[allow(clippy::similar_names)] #[allow(clippy::too_many_lines)]
fn build_linux_config(
&self,
container_id: &ContainerId,
spec: &ServiceSpec,
cdi_edits: Option<&[CdiContainerEdits]>,
) -> Result<oci_spec::runtime::Linux> {
let mut namespaces = vec![
LinuxNamespaceBuilder::default()
.typ(LinuxNamespaceType::Pid)
.build()
.unwrap(),
LinuxNamespaceBuilder::default()
.typ(LinuxNamespaceType::Ipc)
.build()
.unwrap(),
LinuxNamespaceBuilder::default()
.typ(LinuxNamespaceType::Uts)
.build()
.unwrap(),
LinuxNamespaceBuilder::default()
.typ(LinuxNamespaceType::Mount)
.build()
.unwrap(),
];
if !self.host_network {
namespaces.push(
LinuxNamespaceBuilder::default()
.typ(LinuxNamespaceType::Network)
.build()
.unwrap(),
);
}
#[cfg(unix)]
let rootless = !nix::unistd::geteuid().is_root();
#[cfg(not(unix))]
let rootless = false;
if rootless {
namespaces.push(
LinuxNamespaceBuilder::default()
.typ(LinuxNamespaceType::User)
.build()
.unwrap(),
);
namespaces.push(
LinuxNamespaceBuilder::default()
.typ(LinuxNamespaceType::Cgroup)
.build()
.unwrap(),
);
}
let mut linux_builder = LinuxBuilder::default().namespaces(namespaces);
#[cfg(unix)]
if rootless {
let euid = nix::unistd::geteuid();
let egid = nix::unistd::getegid();
let username = nix::unistd::User::from_uid(euid)
.ok()
.flatten()
.map(|u| u.name)
.unwrap_or_default();
linux_builder = linux_builder
.uid_mappings(build_rootless_id_mappings(
euid.as_raw(),
"/etc/subuid",
&username,
))
.gid_mappings(build_rootless_id_mappings(
egid.as_raw(),
"/etc/subgid",
&username,
));
}
let resources = self.build_resources(spec)?;
if let Some(resources) = resources {
linux_builder = linux_builder.resources(resources);
}
let mut devices = self.build_devices(spec, None, cdi_edits.is_some())?;
if let Some(edits_per_device) = cdi_edits {
for edits in edits_per_device {
for node in &edits.device_nodes {
devices.push(cdi_node_to_oci_device(node)?);
}
}
}
if !devices.is_empty() {
linux_builder = linux_builder.devices(devices);
}
linux_builder = linux_builder.rootfs_propagation("private".to_string());
if spec.privileged {
linux_builder = linux_builder.masked_paths(vec![]).readonly_paths(vec![]);
} else {
let masked_paths = vec![
"/proc/acpi".to_string(),
"/proc/asound".to_string(),
"/proc/kcore".to_string(),
"/proc/keys".to_string(),
"/proc/latency_stats".to_string(),
"/proc/timer_list".to_string(),
"/proc/timer_stats".to_string(),
"/proc/sched_debug".to_string(),
"/proc/scsi".to_string(),
"/sys/firmware".to_string(),
];
let readonly_paths = vec![
"/proc/bus".to_string(),
"/proc/fs".to_string(),
"/proc/irq".to_string(),
"/proc/sys".to_string(),
"/proc/sysrq-trigger".to_string(),
];
linux_builder = linux_builder
.masked_paths(masked_paths)
.readonly_paths(readonly_paths);
}
let cid = container_id.to_string();
let explicit_parent: Option<(String, &'static str)> =
if let Some(p) = spec.cgroup_parent.as_deref().filter(|s| !s.is_empty()) {
Some((p.to_string(), "spec"))
} else if let Some(p) = std::env::var("ZLAYER_CGROUP_PARENT")
.ok()
.filter(|s| !s.is_empty())
{
Some((p, "env"))
} else {
None
};
#[cfg(target_os = "linux")]
let auto_parent: Option<(String, &'static str)> =
if let Some(p) = crate::capability::ensure_daemon_leaf_and_container_parent() {
Some((p, "auto-init"))
} else if let Some(p) = crate::capability::current_cgroup_v2_path() {
Some((p, "auto"))
} else {
None
};
#[cfg(not(target_os = "linux"))]
let auto_parent: Option<(String, &'static str)> = None;
let (cgroup_parent_value, cgroup_parent_source): (Option<String>, &'static str) =
explicit_parent
.or(auto_parent)
.map_or((None, "none"), |(p, s)| (Some(p), s));
#[cfg(target_os = "linux")]
if cgroup_parent_value.is_none() && crate::capability::DaemonCapabilities::get().is_nested {
tracing::warn!(
container_id = %cid,
"capability survey reports nested daemon but cgroup_parent could not be resolved — proceeding with v2 root"
);
}
if let Some(parent) = cgroup_parent_value {
let parent = parent.trim_end_matches('/');
let full = format!("{parent}/{cid}");
match cgroup_parent_source {
"spec" => tracing::info!(
container_id = %cid,
source = "spec",
path = %full,
"cgroup_parent selected"
),
"env" => tracing::info!(
container_id = %cid,
source = "env",
path = %full,
"cgroup_parent selected"
),
"auto" => tracing::info!(
container_id = %cid,
source = "auto",
path = %full,
"cgroup_parent selected (from /proc/self/cgroup)"
),
"auto-init" => tracing::info!(
container_id = %cid,
source = "auto-init",
path = %full,
"cgroup_parent selected (migrated daemon to <scope>/init; containers go under <scope>/containers)"
),
_ => unreachable!(),
}
linux_builder = linux_builder.cgroups_path(std::path::PathBuf::from(full));
} else {
#[cfg(target_os = "linux")]
{
let caps = crate::capability::DaemonCapabilities::get();
if !caps.can_write_cgroup_root {
return Err(AgentError::InvalidSpec(format!(
"cannot create container {cid}: no writable cgroup parent. \
/proc/self/cgroup reports the cgroup-v2 root, and \
/sys/fs/cgroup is read-only to this process. Fix one of: \
(a) run the daemon's outer container with --cgroupns=host \
so /proc/self/cgroup reports a real parent; \
(b) set ZLAYER_CGROUP_PARENT=/path/to/writable/cgroup; \
(c) grant the daemon write access to /sys/fs/cgroup."
)));
}
tracing::info!(
container_id = %cid,
"cgroup_parent unset — libcontainer will use v2 root (cgroup root is writable here)"
);
}
#[cfg(not(target_os = "linux"))]
tracing::debug!(
container_id = %cid,
"non-Linux host — cgroup_parent unset; libcontainer inside the WSL distro will resolve a parent from its cgroup-v2 root"
);
}
linux_builder
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build linux config: {e}")))
}
#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
fn build_resources(
&self,
spec: &ServiceSpec,
) -> Result<Option<oci_spec::runtime::LinuxResources>> {
let mut resources_builder = LinuxResourcesBuilder::default();
let mut has_resources = false;
if let Some(cpu_limit) = spec.resources.cpu {
let quota = (cpu_limit * 100_000.0) as i64;
let cpu = LinuxCpuBuilder::default()
.quota(quota)
.period(100_000u64)
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build CPU limits: {e}")))?;
resources_builder = resources_builder.cpu(cpu);
has_resources = true;
}
if let Some(ref memory_str) = spec.resources.memory {
let bytes = parse_memory_string(memory_str)
.map_err(|e| AgentError::InvalidSpec(format!("invalid memory limit: {e}")))?;
let memory = LinuxMemoryBuilder::default()
.limit(bytes as i64)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build memory limits: {e}"))
})?;
resources_builder = resources_builder.memory(memory);
has_resources = true;
}
let device_rules = self.build_device_cgroup_rules(spec, None)?;
if !device_rules.is_empty() {
resources_builder = resources_builder.devices(device_rules);
has_resources = true;
}
if has_resources {
let resources = resources_builder
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build resources: {e}")))?;
Ok(Some(resources))
} else {
Ok(None)
}
}
#[allow(clippy::unused_self, clippy::too_many_lines)]
fn build_device_cgroup_rules(
&self,
spec: &ServiceSpec,
_gpu_indices: Option<&[u32]>,
) -> Result<Vec<oci_spec::runtime::LinuxDeviceCgroup>> {
let mut rules = Vec::new();
if spec.privileged {
let rule = LinuxDeviceCgroupBuilder::default()
.allow(true)
.access("rwm".to_string())
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!("failed to build device cgroup rule: {e}"))
})?;
rules.push(rule);
} else {
let deny_all = LinuxDeviceCgroupBuilder::default()
.allow(false)
.access("rwm".to_string())
.build()
.map_err(|e| AgentError::InvalidSpec(format!("failed to build deny rule: {e}")))?;
rules.push(deny_all);
let standard_char_devices = [
(1, 3, "rwm"), (1, 5, "rwm"), (1, 7, "rwm"), (1, 8, "rwm"), (1, 9, "rwm"), (5, 0, "rwm"), (5, 1, "rwm"), (5, 2, "rwm"), (136, -1, "rwm"), ];
for (major, minor, access) in standard_char_devices {
let mut builder = LinuxDeviceCgroupBuilder::default()
.allow(true)
.typ(LinuxDeviceType::C)
.major(i64::from(major))
.access(access.to_string());
if minor >= 0 {
builder = builder.minor(i64::from(minor));
}
let rule = builder.build().map_err(|e| {
AgentError::InvalidSpec(format!("failed to build char device rule: {e}"))
})?;
rules.push(rule);
}
#[cfg(unix)]
for device in &spec.devices {
if let Ok((major, minor)) = get_device_major_minor(&device.path) {
let dev_type = get_device_type(&device.path).unwrap_or(LinuxDeviceType::C);
let mut access = String::new();
if device.read {
access.push('r');
}
if device.write {
access.push('w');
}
if device.mknod {
access.push('m');
}
if access.is_empty() {
access = "rw".to_string();
}
let rule = LinuxDeviceCgroupBuilder::default()
.allow(true)
.typ(dev_type)
.major(major)
.minor(minor)
.access(access)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build device rule for {}: {}",
device.path, e
))
})?;
rules.push(rule);
} else {
tracing::warn!("Failed to get device info for {}, skipping", device.path);
}
}
if let Some(ref gpu) = spec.resources.gpu {
match gpu.vendor.as_str() {
"nvidia" => {
let rule = LinuxDeviceCgroupBuilder::default()
.allow(true)
.typ(LinuxDeviceType::C)
.major(195i64)
.access("rwm".to_string())
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU cgroup rule: {e}"
))
})?;
rules.push(rule);
let uvm_rule = LinuxDeviceCgroupBuilder::default()
.allow(true)
.typ(LinuxDeviceType::C)
.major(510i64)
.access("rwm".to_string())
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU UVM cgroup rule: {e}"
))
})?;
rules.push(uvm_rule);
}
"amd" => {
let dri_rule = LinuxDeviceCgroupBuilder::default()
.allow(true)
.typ(LinuxDeviceType::C)
.major(226i64)
.access("rwm".to_string())
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build AMD DRI cgroup rule: {e}"
))
})?;
rules.push(dri_rule);
let kfd_rule = LinuxDeviceCgroupBuilder::default()
.allow(true)
.typ(LinuxDeviceType::C)
.major(234i64)
.access("rwm".to_string())
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build AMD KFD cgroup rule: {e}"
))
})?;
rules.push(kfd_rule);
}
"intel" => {
let dri_rule = LinuxDeviceCgroupBuilder::default()
.allow(true)
.typ(LinuxDeviceType::C)
.major(226i64)
.access("rwm".to_string())
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build Intel DRI cgroup rule: {e}"
))
})?;
rules.push(dri_rule);
}
other => {
tracing::warn!(
vendor = %other,
"Unknown GPU vendor, allowing DRI devices (major 226)"
);
let dri_rule = LinuxDeviceCgroupBuilder::default()
.allow(true)
.typ(LinuxDeviceType::C)
.major(226i64)
.access("rwm".to_string())
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU DRI cgroup rule: {e}"
))
})?;
rules.push(dri_rule);
}
}
}
}
Ok(rules)
}
#[allow(clippy::unused_self, clippy::too_many_lines)]
#[cfg_attr(not(unix), allow(clippy::unnecessary_wraps, clippy::needless_return))]
fn build_devices(
&self,
spec: &ServiceSpec,
gpu_indices: Option<&[u32]>,
skip_gpu_defaults: bool,
) -> Result<Vec<oci_spec::runtime::LinuxDevice>> {
#[cfg(not(unix))]
{
let _ = (spec, gpu_indices, skip_gpu_defaults);
return Ok(Vec::new());
}
#[cfg(unix)]
{
let mut devices = Vec::new();
for device in &spec.devices {
if let Ok((major, minor)) = get_device_major_minor(&device.path) {
let dev_type = get_device_type(&device.path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path(device.path.clone())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build device {}: {}",
device.path, e
))
})?;
devices.push(linux_device);
}
}
if skip_gpu_defaults {
return Ok(devices);
}
if let Some(ref gpu) = spec.resources.gpu {
let indices: Vec<u32> =
gpu_indices.map_or_else(|| (0..gpu.count).collect(), <[u32]>::to_vec);
match gpu.vendor.as_str() {
"nvidia" => {
let always_devices =
["/dev/nvidiactl", "/dev/nvidia-uvm", "/dev/nvidia-uvm-tools"];
for dev_path in &always_devices {
if let Ok((major, minor)) = get_device_major_minor(dev_path) {
let dev_type =
get_device_type(dev_path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path((*dev_path).to_string())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU device {dev_path}: {e}"
))
})?;
devices.push(linux_device);
} else {
tracing::warn!(
"GPU device {} not found on host, skipping",
dev_path
);
}
}
for i in &indices {
let dev_path = format!("/dev/nvidia{i}");
if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
let dev_type =
get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path(dev_path.clone())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU device {dev_path}: {e}"
))
})?;
devices.push(linux_device);
} else {
tracing::warn!(
"GPU device {} not found on host, skipping",
dev_path
);
}
}
}
"amd" => {
let amd_always_devices = ["/dev/kfd"];
for dev_path in &amd_always_devices {
if let Ok((major, minor)) = get_device_major_minor(dev_path) {
let dev_type =
get_device_type(dev_path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path((*dev_path).to_string())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU device {dev_path}: {e}"
))
})?;
devices.push(linux_device);
} else {
tracing::warn!(
"GPU device {} not found on host, skipping",
dev_path
);
}
}
for i in &indices {
let dev_path = format!("/dev/dri/renderD{}", 128 + i);
if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
let dev_type =
get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path(dev_path.clone())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU device {dev_path}: {e}"
))
})?;
devices.push(linux_device);
} else {
tracing::warn!(
"GPU device {} not found on host, skipping",
dev_path
);
}
}
for i in &indices {
let dev_path = format!("/dev/dri/card{i}");
if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
let dev_type =
get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path(dev_path.clone())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU device {dev_path}: {e}"
))
})?;
devices.push(linux_device);
} else {
tracing::warn!(
"GPU device {} not found on host, skipping",
dev_path
);
}
}
}
"intel" => {
for i in &indices {
let dev_path = format!("/dev/dri/renderD{}", 128 + i);
if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
let dev_type =
get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path(dev_path.clone())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU device {dev_path}: {e}"
))
})?;
devices.push(linux_device);
} else {
tracing::warn!(
"GPU device {} not found on host, skipping",
dev_path
);
}
}
for i in &indices {
let dev_path = format!("/dev/dri/card{i}");
if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
let dev_type =
get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path(dev_path.clone())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU device {dev_path}: {e}"
))
})?;
devices.push(linux_device);
} else {
tracing::warn!(
"GPU device {} not found on host, skipping",
dev_path
);
}
}
}
other => {
tracing::warn!(
vendor = %other,
"Unknown GPU vendor, attempting DRI device passthrough"
);
for i in &indices {
let dev_path = format!("/dev/dri/renderD{}", 128 + i);
if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
let dev_type =
get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
let linux_device = LinuxDeviceBuilder::default()
.path(dev_path.clone())
.typ(dev_type)
.major(major)
.minor(minor)
.file_mode(0o666u32)
.uid(0u32)
.gid(0u32)
.build()
.map_err(|e| {
AgentError::InvalidSpec(format!(
"failed to build GPU device {dev_path}: {e}"
))
})?;
devices.push(linux_device);
} else {
tracing::warn!(
"GPU device {} not found on host, skipping",
dev_path
);
}
}
}
}
}
Ok(devices)
} }
pub async fn write_config(
&self,
container_id: &ContainerId,
spec: &ServiceSpec,
) -> Result<PathBuf> {
let oci_spec = self
.build_spec_only(container_id, spec, &self.volume_paths)
.await?;
let config_path = self.bundle_dir.join("config.json");
let config_json =
serde_json::to_string_pretty(&oci_spec).map_err(|e| AgentError::CreateFailed {
id: container_id.to_string(),
reason: format!("failed to serialize OCI spec: {e}"),
})?;
fs::write(&config_path, config_json)
.await
.map_err(|e| AgentError::CreateFailed {
id: container_id.to_string(),
reason: format!("failed to write config.json: {e}"),
})?;
tracing::debug!(
"Wrote OCI config.json at {} for container {}",
config_path.display(),
container_id
);
Ok(self.bundle_dir.clone())
}
fn resolve_command_from_spec(
spec: &ServiceSpec,
image_config: Option<&zlayer_registry::ImageConfig>,
) -> Vec<String> {
let mut args = Vec::new();
match (&spec.command.entrypoint, &spec.command.args) {
(Some(entrypoint), Some(cmd_args)) => {
args.extend_from_slice(entrypoint);
args.extend_from_slice(cmd_args);
}
(Some(entrypoint), None) => {
args.extend_from_slice(entrypoint);
}
(None, Some(cmd_args)) if !cmd_args.is_empty() => {
args.extend_from_slice(cmd_args);
}
_ => {
if let Some(img_cmd) =
image_config.and_then(zlayer_registry::ImageConfig::full_command)
{
if img_cmd.is_empty() {
args.push("/bin/sh".to_string());
} else {
args.extend(img_cmd);
}
} else {
args.push("/bin/sh".to_string());
}
}
}
args
}
pub async fn cleanup(&self) -> Result<()> {
if self.bundle_dir.exists() {
fs::remove_dir_all(&self.bundle_dir)
.await
.map_err(|e| AgentError::CreateFailed {
id: "cleanup".to_string(),
reason: format!(
"failed to remove bundle directory {}: {}",
self.bundle_dir.display(),
e
),
})?;
}
Ok(())
}
}
#[cfg(unix)]
pub async fn create_bundle(
container_id: &ContainerId,
spec: &ServiceSpec,
rootfs_path: Option<PathBuf>,
) -> Result<PathBuf> {
let mut builder =
BundleBuilder::for_container(container_id).with_host_network(spec.host_network);
if let Some(rootfs) = rootfs_path {
builder = builder.with_rootfs(rootfs);
}
builder.build(container_id, spec).await
}
pub async fn cleanup_bundle(container_id: &ContainerId) -> Result<()> {
let builder = BundleBuilder::for_container(container_id);
builder.cleanup().await
}
#[cfg(test)]
mod tests {
use super::*;
use zlayer_spec::*;
fn mock_spec() -> ServiceSpec {
serde_yaml::from_str::<DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
rtype: service
image:
name: test:latest
endpoints:
- name: http
protocol: http
port: 8080
",
)
.unwrap()
.services
.remove("test")
.unwrap()
}
#[cfg(target_os = "linux")]
fn mock_spec_with_resources() -> ServiceSpec {
serde_yaml::from_str::<DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
rtype: service
image:
name: test:latest
resources:
cpu: 0.5
memory: 512Mi
env:
MY_VAR: my_value
ANOTHER: value2
endpoints:
- name: http
protocol: http
port: 8080
",
)
.unwrap()
.services
.remove("test")
.unwrap()
}
#[cfg(target_os = "linux")]
fn mock_privileged_spec() -> ServiceSpec {
serde_yaml::from_str::<DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
rtype: service
image:
name: test:latest
privileged: true
endpoints:
- name: http
protocol: http
port: 8080
",
)
.unwrap()
.services
.remove("test")
.unwrap()
}
#[test]
fn test_parse_memory_string() {
assert_eq!(parse_memory_string("512Mi").unwrap(), 512 * 1024 * 1024);
assert_eq!(parse_memory_string("1Gi").unwrap(), 1024 * 1024 * 1024);
assert_eq!(parse_memory_string("2G").unwrap(), 2 * 1000 * 1000 * 1000);
assert_eq!(parse_memory_string("1024").unwrap(), 1024);
assert_eq!(parse_memory_string("512Ki").unwrap(), 512 * 1024);
}
#[test]
fn test_parse_memory_string_errors() {
assert!(parse_memory_string("").is_err());
assert!(parse_memory_string("abc").is_err());
assert!(parse_memory_string("12.5Mi").is_err());
}
#[test]
fn test_generate_resolv_conf_single_nameserver() {
let out = generate_resolv_conf(&["10.42.0.1".to_string()]);
assert_eq!(out, "nameserver 10.42.0.1\noptions edns0\n");
}
#[test]
fn test_generate_resolv_conf_two_nameservers() {
let out = generate_resolv_conf(&["10.42.0.1".to_string(), "fd00::1".to_string()]);
assert_eq!(
out,
"nameserver 10.42.0.1\nnameserver fd00::1\noptions edns0\n"
);
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_injects_resolv_conf_mount() {
let dir = tempfile::tempdir().unwrap();
let id = ContainerId::new("test".to_string(), 1);
let mut spec = mock_spec();
spec.dns = vec!["10.42.0.1".to_string()];
let builder = BundleBuilder::new(dir.path().to_path_buf());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let mounts = oci_spec.mounts().as_ref().expect("mounts present");
let resolv_mount = mounts
.iter()
.find(|m| m.destination() == Path::new("/etc/resolv.conf"))
.expect("resolv.conf mount injected");
let source = resolv_mount.source().as_ref().unwrap();
let written = std::fs::read_to_string(source).unwrap();
assert_eq!(written, "nameserver 10.42.0.1\noptions edns0\n");
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_no_resolv_conf_when_dns_empty() {
let dir = tempfile::tempdir().unwrap();
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_spec(); let builder = BundleBuilder::new(dir.path().to_path_buf());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let mounts = oci_spec.mounts().as_ref().expect("mounts present");
assert!(
!mounts
.iter()
.any(|m| m.destination() == Path::new("/etc/resolv.conf")),
"no resolv.conf mount should be injected for empty spec.dns"
);
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_no_resolv_conf_when_host_network() {
let dir = tempfile::tempdir().unwrap();
let id = ContainerId::new("test".to_string(), 1);
let mut spec = mock_spec();
spec.dns = vec!["10.42.0.1".to_string()];
spec.host_network = true;
let builder = BundleBuilder::new(dir.path().to_path_buf());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let mounts = oci_spec.mounts().as_ref().expect("mounts present");
assert!(
!mounts
.iter()
.any(|m| m.destination() == Path::new("/etc/resolv.conf")),
"host_network containers must inherit the host resolv.conf"
);
}
#[test]
fn test_bundle_builder_new() {
let builder = BundleBuilder::new("/tmp/test-bundle".into());
assert_eq!(builder.bundle_dir(), Path::new("/tmp/test-bundle"));
assert!(builder.rootfs_path.is_none());
}
#[test]
fn test_bundle_builder_for_container() {
let dirs = zlayer_paths::ZLayerDirs::system_default();
let id = ContainerId::new("myservice".to_string(), 1);
let builder = BundleBuilder::for_container(&id);
assert_eq!(builder.bundle_dir(), dirs.bundles().join("myservice-rep-1"));
}
#[test]
fn test_bundle_builder_with_rootfs() {
let dirs = zlayer_paths::ZLayerDirs::system_default();
let builder = BundleBuilder::new("/tmp/test-bundle".into())
.with_rootfs(dirs.rootfs().join("myimage"));
assert_eq!(builder.rootfs_path, Some(dirs.rootfs().join("myimage")));
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_basic() {
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_spec();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
assert_eq!(oci_spec.version(), "1.0.2");
assert!(oci_spec.root().is_some());
assert_eq!(
oci_spec.root().as_ref().unwrap().path(),
std::path::Path::new("rootfs")
);
assert!(oci_spec.process().is_some());
assert!(oci_spec.linux().is_some());
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_with_resources() {
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_spec_with_resources();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let linux = oci_spec.linux().as_ref().unwrap();
let resources = linux.resources().as_ref().unwrap();
let cpu = resources.cpu().as_ref().unwrap();
assert_eq!(cpu.quota(), Some(50_000)); assert_eq!(cpu.period(), Some(100_000));
let memory = resources.memory().as_ref().unwrap();
assert_eq!(memory.limit(), Some(512 * 1024 * 1024)); }
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_translates_ulimits() {
let id = ContainerId::new("test".to_string(), 1);
let mut spec = mock_spec();
spec.ulimits.insert(
"nofile".to_string(),
UlimitSpec {
soft: 100_000,
hard: 200_000,
},
);
spec.ulimits
.insert("nproc".to_string(), UlimitSpec { soft: -1, hard: -5 });
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let process = oci_spec.process().as_ref().expect("process present");
let rlimits = process.rlimits().as_ref().expect("rlimits present");
let nofile: Vec<_> = rlimits
.iter()
.filter(|r| r.typ() == PosixRlimitType::RlimitNofile)
.collect();
assert_eq!(nofile.len(), 1, "nofile must not be duplicated");
assert_eq!(nofile[0].soft(), 100_000);
assert_eq!(nofile[0].hard(), 200_000);
let nproc = rlimits
.iter()
.find(|r| r.typ() == PosixRlimitType::RlimitNproc)
.expect("nproc rlimit present");
assert_eq!(nproc.soft(), 0, "negative soft clamps to 0");
assert_eq!(nproc.hard(), 0, "negative hard clamps to 0");
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_rejects_unknown_ulimit() {
let id = ContainerId::new("test".to_string(), 1);
let mut spec = mock_spec();
spec.ulimits.insert(
"not_a_real_ulimit".to_string(),
UlimitSpec { soft: 1, hard: 1 },
);
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let err = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.expect_err("unknown ulimit name must be rejected");
assert!(
err.to_string().contains("not_a_real_ulimit"),
"error should name the unknown ulimit: {err}"
);
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_keeps_oci_default_rlimits_when_ulimits_empty() {
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_spec();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let process = oci_spec.process().as_ref().expect("process present");
let rlimits = process
.rlimits()
.as_ref()
.expect("oci default rlimits present");
let nofile = rlimits
.iter()
.find(|r| r.typ() == PosixRlimitType::RlimitNofile)
.expect("default nofile rlimit present");
assert_eq!(nofile.soft(), 1024);
assert_eq!(nofile.hard(), 1024);
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_privileged() {
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_privileged_spec();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let process = oci_spec.process().as_ref().unwrap();
let caps = process.capabilities().as_ref().unwrap();
let bounding = caps.bounding().as_ref().unwrap();
assert!(bounding.contains(&Capability::SysAdmin));
assert!(bounding.contains(&Capability::NetAdmin));
let linux = oci_spec.linux().as_ref().unwrap();
assert!(
linux.masked_paths().is_none() || linux.masked_paths().as_ref().unwrap().is_empty()
);
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_oci_spec_environment() {
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_spec_with_resources();
let builder = BundleBuilder::new("/tmp/test-bundle".into())
.with_env("EXTRA_VAR".to_string(), "extra_value".to_string());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let process = oci_spec.process().as_ref().unwrap();
let env = process.env().as_ref().unwrap();
assert!(env.iter().any(|e| e == "MY_VAR=my_value"));
assert!(env.iter().any(|e| e == "ANOTHER=value2"));
assert!(env.iter().any(|e| e == "EXTRA_VAR=extra_value"));
assert!(env.iter().any(|e| e.starts_with("PATH=")));
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_namespaces() {
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_spec();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let linux = oci_spec.linux().as_ref().unwrap();
let namespaces = linux.namespaces().as_ref().unwrap();
let namespace_types: Vec<_> = namespaces
.iter()
.map(oci_spec::runtime::LinuxNamespace::typ)
.collect();
assert!(namespace_types.contains(&LinuxNamespaceType::Pid));
assert!(namespace_types.contains(&LinuxNamespaceType::Ipc));
assert!(namespace_types.contains(&LinuxNamespaceType::Uts));
assert!(namespace_types.contains(&LinuxNamespaceType::Mount));
assert!(namespace_types.contains(&LinuxNamespaceType::Network));
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_build_namespaces_host_network() {
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_spec();
let builder = BundleBuilder::new("/tmp/test-bundle".into()).with_host_network(true);
let oci_spec = builder
.build_spec_only(&id, &spec, &std::collections::HashMap::new())
.await
.unwrap();
let linux = oci_spec.linux().as_ref().unwrap();
let namespaces = linux.namespaces().as_ref().unwrap();
let namespace_types: Vec<_> = namespaces
.iter()
.map(oci_spec::runtime::LinuxNamespace::typ)
.collect();
assert!(namespace_types.contains(&LinuxNamespaceType::Pid));
assert!(namespace_types.contains(&LinuxNamespaceType::Ipc));
assert!(namespace_types.contains(&LinuxNamespaceType::Uts));
assert!(namespace_types.contains(&LinuxNamespaceType::Mount));
assert!(
!namespace_types.contains(&LinuxNamespaceType::Network),
"Network namespace should NOT be present in host_network mode"
);
}
#[test]
fn test_build_default_mounts() {
let spec = mock_spec();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let mounts = builder.build_default_mounts(&spec).unwrap();
let mount_destinations: Vec<_> = mounts
.iter()
.map(|m| m.destination().to_string_lossy().to_string())
.collect();
assert!(mount_destinations.contains(&"/proc".to_string()));
assert!(mount_destinations.contains(&"/dev".to_string()));
assert!(mount_destinations.contains(&"/dev/pts".to_string()));
assert!(mount_destinations.contains(&"/dev/shm".to_string()));
assert!(mount_destinations.contains(&"/sys".to_string()));
}
#[test]
fn test_build_storage_mounts_bind() {
let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
image:
name: test:latest
storage:
- type: bind
source: /host/data
target: /app/data
readonly: true
",
)
.unwrap()
.services
.remove("test")
.unwrap();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let volume_paths = std::collections::HashMap::new();
let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
assert_eq!(mounts.len(), 1);
assert_eq!(mounts[0].destination().to_string_lossy(), "/app/data");
assert_eq!(
mounts[0]
.source()
.as_ref()
.map(|s| s.to_string_lossy().to_string()),
Some("/host/data".to_string())
);
let options = mounts[0].options().as_ref().unwrap();
assert!(options.contains(&"rbind".to_string()));
assert!(options.contains(&"ro".to_string()));
}
#[test]
fn test_build_storage_mounts_named() {
let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
image:
name: test:latest
storage:
- type: named
name: my-volume
target: /app/data
",
)
.unwrap()
.services
.remove("test")
.unwrap();
let dirs = zlayer_paths::ZLayerDirs::system_default();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let mut volume_paths = std::collections::HashMap::new();
volume_paths.insert("my-volume".to_string(), dirs.volumes().join("my-volume"));
let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
assert_eq!(mounts.len(), 1);
assert_eq!(mounts[0].destination().to_string_lossy(), "/app/data");
assert_eq!(
mounts[0]
.source()
.as_ref()
.map(|s| s.to_string_lossy().to_string()),
Some(
dirs.volumes()
.join("my-volume")
.to_string_lossy()
.into_owned()
)
);
}
#[test]
fn test_build_storage_mounts_tmpfs() {
let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
image:
name: test:latest
storage:
- type: tmpfs
target: /app/tmp
size: 256Mi
mode: 1777
",
)
.unwrap()
.services
.remove("test")
.unwrap();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let volume_paths = std::collections::HashMap::new();
let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
assert_eq!(mounts.len(), 1);
assert_eq!(mounts[0].destination().to_string_lossy(), "/app/tmp");
assert_eq!(mounts[0].typ().as_ref().map(String::as_str), Some("tmpfs"));
let options = mounts[0].options().as_ref().unwrap();
assert!(options.iter().any(|o| o.starts_with("size=")));
assert!(options.iter().any(|o| o.starts_with("mode=")));
}
#[test]
fn test_build_storage_mounts_multiple() {
let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
image:
name: test:latest
storage:
- type: bind
source: /etc/config
target: /app/config
readonly: true
- type: named
name: app-data
target: /app/data
- type: tmpfs
target: /app/tmp
",
)
.unwrap()
.services
.remove("test")
.unwrap();
let dirs = zlayer_paths::ZLayerDirs::system_default();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let mut volume_paths = std::collections::HashMap::new();
volume_paths.insert("app-data".to_string(), dirs.volumes().join("app-data"));
let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
assert_eq!(mounts.len(), 3);
let destinations: Vec<String> = mounts
.iter()
.map(|m| m.destination().to_string_lossy().to_string())
.collect();
assert!(destinations.contains(&"/app/config".to_string()));
assert!(destinations.contains(&"/app/data".to_string()));
assert!(destinations.contains(&"/app/tmp".to_string()));
}
#[test]
fn test_build_storage_mounts_anonymous_missing_path() {
let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
image:
name: test:latest
storage:
- type: anonymous
target: /app/cache
",
)
.unwrap()
.services
.remove("test")
.unwrap();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let volume_paths = std::collections::HashMap::new();
let result = builder.build_storage_mounts(&spec, &volume_paths);
assert!(result.is_err());
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn test_oci_spec_includes_storage_mounts() {
let id = ContainerId::new("test".to_string(), 1);
let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
r"
version: v1
deployment: test
services:
test:
image:
name: test:latest
storage:
- type: bind
source: /host/data
target: /app/data
- type: tmpfs
target: /app/tmp
",
)
.unwrap()
.services
.remove("test")
.unwrap();
let builder = BundleBuilder::new("/tmp/test-bundle".into());
let volume_paths = std::collections::HashMap::new();
let oci_spec = builder
.build_spec_only(&id, &spec, &volume_paths)
.await
.unwrap();
let mounts = oci_spec.mounts().as_ref().unwrap();
let destinations: Vec<String> = mounts
.iter()
.map(|m| m.destination().to_string_lossy().to_string())
.collect();
assert!(destinations.contains(&"/proc".to_string())); assert!(destinations.contains(&"/dev".to_string())); assert!(destinations.contains(&"/app/data".to_string())); assert!(destinations.contains(&"/app/tmp".to_string())); }
fn mock_gpu_spec(vendor: &str, count: u32) -> ServiceSpec {
let yaml = format!(
"
version: v1
deployment: test
services:
test:
rtype: service
image:
name: test:latest
resources:
gpu:
count: {count}
vendor: {vendor}
endpoints:
- name: http
protocol: http
port: 8080
"
);
serde_yaml::from_str::<DeploymentSpec>(&yaml)
.unwrap()
.services
.remove("test")
.unwrap()
}
fn write_nvidia_cdi_fixture(dir: &std::path::Path, json: &str) {
std::fs::write(dir.join("nvidia.json"), json).unwrap();
}
fn nvidia_cdi_fixture() -> &'static str {
r#"{
"cdiVersion": "0.6.0",
"kind": "nvidia.com/gpu",
"devices": [{
"name": "0",
"containerEdits": {
"deviceNodes": [
{"path": "/dev/nvidia0", "type": "c", "major": 195, "minor": 0}
],
"env": ["NVIDIA_VISIBLE_DEVICES=0"],
"hooks": {
"createContainer": [{
"path": "/usr/bin/nvidia-container-runtime-hook",
"args": ["nvidia-container-runtime-hook", "prestart"]
}]
}
}
}]
}"#
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn gpu_spec_translates_to_cdi_device_nodes() {
let dir = tempfile::tempdir().unwrap();
write_nvidia_cdi_fixture(dir.path(), nvidia_cdi_fixture());
let registry = std::sync::Arc::new(crate::cdi::CdiRegistry::discover_from(&[dir.path()]));
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_gpu_spec("nvidia", 1);
let builder = BundleBuilder::new("/tmp/test-bundle-cdi".into()).with_cdi_registry(registry);
let oci_spec = builder
.build_oci_spec(&id, &spec, &std::collections::HashMap::new())
.await
.expect("build with CDI fixture");
let linux = oci_spec.linux().as_ref().expect("linux config present");
let devices = linux.devices().as_ref().expect("devices present");
assert!(
devices
.iter()
.any(|d| d.path() == std::path::Path::new("/dev/nvidia0")),
"expected /dev/nvidia0 from CDI fixture; got {:?}",
devices
.iter()
.map(oci_spec::runtime::LinuxDevice::path)
.collect::<Vec<_>>()
);
let process = oci_spec.process().as_ref().expect("process present");
let env = process.env().as_ref().expect("env present");
assert!(
env.iter().any(|e| e == "NVIDIA_VISIBLE_DEVICES=0"),
"expected NVIDIA_VISIBLE_DEVICES=0 in env; got {env:?}"
);
let hooks = oci_spec.hooks().as_ref().expect("hooks present");
let create_container = hooks
.create_container()
.as_ref()
.expect("createContainer hooks present");
assert_eq!(create_container.len(), 1);
assert_eq!(
create_container[0].path(),
&std::path::PathBuf::from("/usr/bin/nvidia-container-runtime-hook")
);
}
#[tokio::test]
async fn gpu_spec_with_missing_cdi_returns_error() {
let dir = tempfile::tempdir().unwrap();
let registry = std::sync::Arc::new(crate::cdi::CdiRegistry::discover_from(&[dir.path()]));
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_gpu_spec("nvidia", 1);
let builder =
BundleBuilder::new("/tmp/test-bundle-cdi-missing".into()).with_cdi_registry(registry);
let err = builder
.build_oci_spec(&id, &spec, &std::collections::HashMap::new())
.await
.expect_err("should fail when CDI registry is empty");
match err {
AgentError::InvalidSpec(msg) => {
assert!(
msg.contains("nvidia") || msg.contains("CDI"),
"error should mention CDI / vendor; got: {msg}"
);
}
other => panic!("expected InvalidSpec, got {other:?}"),
}
}
#[tokio::test]
async fn gpu_spec_with_unknown_device_returns_error() {
let dir = tempfile::tempdir().unwrap();
write_nvidia_cdi_fixture(dir.path(), nvidia_cdi_fixture());
let registry = std::sync::Arc::new(crate::cdi::CdiRegistry::discover_from(&[dir.path()]));
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_gpu_spec("nvidia", 2);
let builder =
BundleBuilder::new("/tmp/test-bundle-cdi-unknown".into()).with_cdi_registry(registry);
let err = builder
.build_oci_spec(&id, &spec, &std::collections::HashMap::new())
.await
.expect_err("should fail when device '1' is not declared");
match err {
AgentError::InvalidSpec(msg) => {
assert!(
msg.contains("'1'") || msg.contains("device"),
"error should mention the missing device; got: {msg}"
);
}
other => panic!("expected InvalidSpec, got {other:?}"),
}
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn gpu_spec_with_all_devices_expands_to_all_in_spec() {
let dir = tempfile::tempdir().unwrap();
let fixture = r#"{
"cdiVersion": "0.6.0",
"kind": "nvidia.com/gpu",
"devices": [
{
"name": "0",
"containerEdits": {
"env": ["NVIDIA_VISIBLE_DEVICES=0"],
"deviceNodes": [
{"path": "/dev/nvidia0", "type": "c", "major": 195, "minor": 0}
]
}
},
{
"name": "1",
"containerEdits": {
"env": ["NVIDIA_VISIBLE_DEVICES=1"],
"deviceNodes": [
{"path": "/dev/nvidia1", "type": "c", "major": 195, "minor": 1}
]
}
}
]
}"#;
write_nvidia_cdi_fixture(dir.path(), fixture);
let registry = std::sync::Arc::new(crate::cdi::CdiRegistry::discover_from(&[dir.path()]));
let edits = registry
.resolve_for_kind("nvidia.com/gpu", &["all".to_string()])
.expect("resolve all");
assert_eq!(edits.len(), 2);
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_gpu_spec("nvidia", 2);
let builder =
BundleBuilder::new("/tmp/test-bundle-cdi-all".into()).with_cdi_registry(registry);
let oci_spec = builder
.build_oci_spec(&id, &spec, &std::collections::HashMap::new())
.await
.expect("build with 2-device fixture");
let devices = oci_spec
.linux()
.as_ref()
.unwrap()
.devices()
.as_ref()
.expect("devices present");
let paths: Vec<_> = devices.iter().map(|d| d.path().clone()).collect();
assert!(paths.contains(&std::path::PathBuf::from("/dev/nvidia0")));
assert!(paths.contains(&std::path::PathBuf::from("/dev/nvidia1")));
}
fn build_nvidia_cdi_registry(dir: &std::path::Path) -> std::sync::Arc<crate::cdi::CdiRegistry> {
write_nvidia_cdi_fixture(dir, nvidia_cdi_fixture());
std::sync::Arc::new(crate::cdi::CdiRegistry::discover_from(&[dir]))
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn gpu_spec_with_mps_sharing_injects_env_and_mounts() {
let cdi_dir = tempfile::tempdir().unwrap();
let mps_root = tempfile::tempdir().unwrap();
let pipe_dir = mps_root.path().join("nvidia-mps");
let log_dir = mps_root.path().join("nvidia-log");
std::fs::create_dir(&pipe_dir).unwrap();
std::fs::create_dir(&log_dir).unwrap();
let registry = build_nvidia_cdi_registry(cdi_dir.path());
let id = ContainerId::new("test".to_string(), 1);
let mut spec = mock_gpu_spec("nvidia", 1);
let gpu = spec.resources.gpu.as_mut().expect("gpu spec set");
gpu.sharing = Some(zlayer_spec::GpuSharingMode::Mps);
gpu.mps_pipe_dir = Some(pipe_dir.to_string_lossy().into_owned());
gpu.mps_log_dir = Some(log_dir.to_string_lossy().into_owned());
let builder =
BundleBuilder::new("/tmp/test-bundle-mps-env".into()).with_cdi_registry(registry);
let oci_spec = builder
.build_oci_spec(&id, &spec, &std::collections::HashMap::new())
.await
.expect("build with MPS sharing");
let env = oci_spec
.process()
.as_ref()
.and_then(|p| p.env().as_ref())
.expect("env present");
let pipe_expect = format!("CUDA_MPS_PIPE_DIRECTORY={}", pipe_dir.display());
let log_expect = format!("CUDA_MPS_LOG_DIRECTORY={}", log_dir.display());
assert!(
env.iter().any(|e| e == &pipe_expect),
"expected {pipe_expect} in env; got {env:?}"
);
assert!(
env.iter().any(|e| e == &log_expect),
"expected {log_expect} in env; got {env:?}"
);
let mounts = oci_spec.mounts().as_ref().expect("mounts present");
assert!(
mounts
.iter()
.any(|m| m.destination() == &pipe_dir && m.source().as_ref() == Some(&pipe_dir)),
"expected bind mount of MPS pipe dir {}; got destinations {:?}",
pipe_dir.display(),
mounts.iter().map(Mount::destination).collect::<Vec<_>>()
);
assert!(
mounts
.iter()
.any(|m| m.destination() == &log_dir && m.source().as_ref() == Some(&log_dir)),
"expected bind mount of MPS log dir {}",
log_dir.display()
);
}
#[tokio::test]
async fn gpu_spec_with_mps_sharing_fails_when_pipe_dir_missing() {
let cdi_dir = tempfile::tempdir().unwrap();
let registry = build_nvidia_cdi_registry(cdi_dir.path());
let id = ContainerId::new("test".to_string(), 1);
let mut spec = mock_gpu_spec("nvidia", 1);
let gpu = spec.resources.gpu.as_mut().expect("gpu spec set");
gpu.sharing = Some(zlayer_spec::GpuSharingMode::Mps);
let missing = tempfile::tempdir().unwrap();
let missing_path = missing.path().join("definitely-not-here");
gpu.mps_pipe_dir = Some(missing_path.to_string_lossy().into_owned());
let builder =
BundleBuilder::new("/tmp/test-bundle-mps-missing".into()).with_cdi_registry(registry);
let err = builder
.build_oci_spec(&id, &spec, &std::collections::HashMap::new())
.await
.expect_err("should fail when MPS pipe dir is missing");
match err {
AgentError::GpuSharingUnavailable { mode, reason } => {
assert_eq!(mode, "mps");
assert!(
reason.contains("pipe") || reason.contains(&missing_path.display().to_string()),
"reason should mention the missing path; got: {reason}"
);
}
other => panic!("expected GpuSharingUnavailable, got {other:?}"),
}
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn gpu_spec_with_timeslicing_injects_visible_devices() {
let cdi_dir = tempfile::tempdir().unwrap();
let registry = build_nvidia_cdi_registry(cdi_dir.path());
let id = ContainerId::new("test".to_string(), 1);
let mut spec = mock_gpu_spec("nvidia", 1);
let gpu = spec.resources.gpu.as_mut().expect("gpu spec set");
gpu.sharing = Some(zlayer_spec::GpuSharingMode::TimeSlice);
gpu.time_slice_index = Some(2);
let builder =
BundleBuilder::new("/tmp/test-bundle-timeslice".into()).with_cdi_registry(registry);
let oci_spec = builder
.build_oci_spec(&id, &spec, &std::collections::HashMap::new())
.await
.expect("build with time-slicing");
let env = oci_spec
.process()
.as_ref()
.and_then(|p| p.env().as_ref())
.expect("env present");
let cuda_entries: Vec<&String> = env
.iter()
.filter(|e| e.starts_with("CUDA_VISIBLE_DEVICES="))
.collect();
assert_eq!(
cuda_entries.len(),
1,
"exactly one CUDA_VISIBLE_DEVICES expected; got {cuda_entries:?}"
);
assert_eq!(cuda_entries[0], "CUDA_VISIBLE_DEVICES=2");
}
#[cfg(target_os = "linux")]
#[tokio::test]
async fn gpu_spec_without_sharing_omits_mps_env() {
let cdi_dir = tempfile::tempdir().unwrap();
let registry = build_nvidia_cdi_registry(cdi_dir.path());
let id = ContainerId::new("test".to_string(), 1);
let spec = mock_gpu_spec("nvidia", 1);
assert!(spec.resources.gpu.as_ref().unwrap().sharing.is_none());
let builder =
BundleBuilder::new("/tmp/test-bundle-no-sharing".into()).with_cdi_registry(registry);
let oci_spec = builder
.build_oci_spec(&id, &spec, &std::collections::HashMap::new())
.await
.expect("build without sharing");
let env = oci_spec
.process()
.as_ref()
.and_then(|p| p.env().as_ref())
.expect("env present");
assert!(
!env.iter().any(|e| e.starts_with("CUDA_MPS_")),
"no CUDA_MPS_* env should be present without sharing; got {env:?}"
);
let mounts = oci_spec.mounts().as_ref().expect("mounts present");
assert!(
!mounts
.iter()
.any(|m| { m.destination().to_string_lossy().contains("nvidia-mps") }),
"no MPS pipe mount should be present without sharing"
);
}
#[cfg(unix)]
mod subid_tests {
use super::super::read_subid_range;
use std::io::Write;
#[test]
fn read_subid_range_returns_range_for_user() {
let mut tmp = tempfile::NamedTempFile::new().unwrap();
writeln!(tmp, "alice:100000:65536").unwrap();
writeln!(tmp, "bob:165536:65536").unwrap();
tmp.flush().unwrap();
let path = tmp.path().to_str().unwrap();
assert_eq!(read_subid_range(path, "bob"), Some((165_536, 65_536)));
assert_eq!(read_subid_range(path, "alice"), Some((100_000, 65_536)));
}
#[test]
fn read_subid_range_returns_none_for_unknown_user() {
let mut tmp = tempfile::NamedTempFile::new().unwrap();
writeln!(tmp, "alice:100000:65536").unwrap();
tmp.flush().unwrap();
assert_eq!(
read_subid_range(tmp.path().to_str().unwrap(), "carol"),
None
);
}
#[test]
fn read_subid_range_returns_none_on_missing_file() {
assert_eq!(
read_subid_range("/this/path/does/not/exist/subuid", "anyone"),
None
);
}
}
}