Skip to main content

zlayer_agent/
bundle.rs

1//! OCI Bundle Creation
2//!
3//! Creates OCI-compliant bundles for container runtimes using libcontainer (youki).
4//! A bundle consists of a directory with:
5//! - config.json: OCI runtime specification
6//! - rootfs/: Container filesystem (symlink or bind mount target)
7
8use crate::error::{AgentError, Result};
9use crate::runtime::ContainerId;
10use oci_spec::runtime::{
11    Capability, LinuxBuilder, LinuxCapabilitiesBuilder, LinuxCpuBuilder, LinuxDeviceBuilder,
12    LinuxDeviceCgroupBuilder, LinuxDeviceType, LinuxMemoryBuilder, LinuxNamespaceBuilder,
13    LinuxNamespaceType, LinuxResourcesBuilder, Mount, MountBuilder, ProcessBuilder, RootBuilder,
14    Spec, SpecBuilder, UserBuilder,
15};
16use std::collections::{HashMap, HashSet};
17use std::path::{Path, PathBuf};
18use std::str::FromStr;
19use std::sync::Arc;
20use tokio::fs;
21use zlayer_secrets::SecretsProvider;
22use zlayer_spec::{ServiceSpec, StorageSpec, StorageTier};
23
24/// All Linux capabilities for privileged mode
25const ALL_CAPABILITIES: &[Capability] = &[
26    Capability::AuditControl,
27    Capability::AuditRead,
28    Capability::AuditWrite,
29    Capability::BlockSuspend,
30    Capability::Bpf,
31    Capability::CheckpointRestore,
32    Capability::Chown,
33    Capability::DacOverride,
34    Capability::DacReadSearch,
35    Capability::Fowner,
36    Capability::Fsetid,
37    Capability::IpcLock,
38    Capability::IpcOwner,
39    Capability::Kill,
40    Capability::Lease,
41    Capability::LinuxImmutable,
42    Capability::MacAdmin,
43    Capability::MacOverride,
44    Capability::Mknod,
45    Capability::NetAdmin,
46    Capability::NetBindService,
47    Capability::NetBroadcast,
48    Capability::NetRaw,
49    Capability::Perfmon,
50    Capability::Setfcap,
51    Capability::Setgid,
52    Capability::Setpcap,
53    Capability::Setuid,
54    Capability::SysAdmin,
55    Capability::SysBoot,
56    Capability::SysChroot,
57    Capability::SysModule,
58    Capability::SysNice,
59    Capability::SysPacct,
60    Capability::SysPtrace,
61    Capability::SysRawio,
62    Capability::SysResource,
63    Capability::SysTime,
64    Capability::SysTtyConfig,
65    Capability::Syslog,
66    Capability::WakeAlarm,
67];
68
69/// Parse memory string like "512Mi", "1Gi" to bytes
70///
71/// Supports both IEC (binary) and SI (decimal) units:
72/// - IEC: Ki, Mi, Gi, Ti (powers of 1024)
73/// - SI: K/k, M/m, G/g, T/t (powers of 1000)
74/// - No suffix: bytes
75///
76/// # Examples
77/// ```ignore
78/// assert_eq!(parse_memory_string("512Mi").unwrap(), 512 * 1024 * 1024);
79/// assert_eq!(parse_memory_string("1Gi").unwrap(), 1024 * 1024 * 1024);
80/// assert_eq!(parse_memory_string("2G").unwrap(), 2 * 1000 * 1000 * 1000);
81/// ```
82///
83/// # Errors
84/// Returns an error if the string cannot be parsed as a memory size.
85pub fn parse_memory_string(s: &str) -> std::result::Result<u64, String> {
86    let s = s.trim();
87    if s.is_empty() {
88        return Err("empty memory string".to_string());
89    }
90
91    let (num_str, multiplier) = if let Some(n) = s.strip_suffix("Ki") {
92        (n, 1024u64)
93    } else if let Some(n) = s.strip_suffix("Mi") {
94        (n, 1024u64 * 1024)
95    } else if let Some(n) = s.strip_suffix("Gi") {
96        (n, 1024u64 * 1024 * 1024)
97    } else if let Some(n) = s.strip_suffix("Ti") {
98        (n, 1024u64 * 1024 * 1024 * 1024)
99    } else if let Some(n) = s.strip_suffix('K').or_else(|| s.strip_suffix('k')) {
100        (n, 1000u64)
101    } else if let Some(n) = s.strip_suffix('M').or_else(|| s.strip_suffix('m')) {
102        (n, 1000u64 * 1000)
103    } else if let Some(n) = s.strip_suffix('G').or_else(|| s.strip_suffix('g')) {
104        (n, 1000u64 * 1000 * 1000)
105    } else if let Some(n) = s.strip_suffix('T').or_else(|| s.strip_suffix('t')) {
106        (n, 1000u64 * 1000 * 1000 * 1000)
107    } else {
108        (s, 1u64)
109    };
110
111    let num: u64 = num_str
112        .parse()
113        .map_err(|e| format!("invalid number: {e}"))?;
114
115    Ok(num * multiplier)
116}
117
118/// Get major and minor device numbers from a device path
119#[cfg(unix)]
120#[allow(clippy::cast_possible_wrap)]
121fn get_device_major_minor(path: &str) -> std::io::Result<(i64, i64)> {
122    use std::os::unix::fs::MetadataExt;
123    let metadata = std::fs::metadata(path)?;
124    let rdev = metadata.rdev();
125    // Major is upper 8 bits (after shifting), minor is lower 8 bits
126    let major = ((rdev >> 8) & 0xff) as i64;
127    let minor = (rdev & 0xff) as i64;
128    Ok((major, minor))
129}
130
131/// Non-Unix stub: device-cgroup probes require Unix; callers use `if let Ok(..)` to skip.
132#[cfg(not(unix))]
133fn get_device_major_minor(_path: &str) -> std::io::Result<(i64, i64)> {
134    Err(std::io::Error::new(
135        std::io::ErrorKind::Unsupported,
136        "device-cgroup probes require Unix",
137    ))
138}
139
140/// Detect device type from path
141#[cfg(unix)]
142fn get_device_type(path: &str) -> std::io::Result<LinuxDeviceType> {
143    use std::os::unix::fs::FileTypeExt;
144    let metadata = std::fs::metadata(path)?;
145    let file_type = metadata.file_type();
146    if file_type.is_char_device() {
147        Ok(LinuxDeviceType::C)
148    } else if file_type.is_block_device() {
149        Ok(LinuxDeviceType::B)
150    } else {
151        Ok(LinuxDeviceType::U) // Unknown/other
152    }
153}
154
155/// Non-Unix stub: device-cgroup probes require Unix; callers use `.unwrap_or(..)` to skip.
156#[cfg(not(unix))]
157fn get_device_type(_path: &str) -> std::io::Result<LinuxDeviceType> {
158    Err(std::io::Error::new(
159        std::io::ErrorKind::Unsupported,
160        "device-cgroup probes require Unix",
161    ))
162}
163
164/// Builder for OCI container bundles
165///
166/// Creates the directory structure and config.json required for OCI-compliant
167/// container runtimes like runc or youki.
168///
169/// # Example
170/// ```ignore
171/// let dirs = zlayer_paths::ZLayerDirs::system_default();
172/// let builder = BundleBuilder::new(dirs.bundles().join("mycontainer"))
173///     .with_rootfs(dirs.rootfs().join("myimage"));
174///
175/// let bundle_path = builder.build(&container_id, &service_spec).await?;
176/// ```
177#[derive(Clone)]
178pub struct BundleBuilder {
179    /// Base directory for the bundle
180    bundle_dir: PathBuf,
181    /// Path to the unpacked rootfs (from image layers)
182    rootfs_path: Option<PathBuf>,
183    /// Custom hostname (defaults to container ID)
184    hostname: Option<String>,
185    /// Additional environment variables
186    extra_env: Vec<(String, String)>,
187    /// Custom working directory
188    cwd: Option<String>,
189    /// Custom command/args to run (overrides image default)
190    args: Option<Vec<String>>,
191    /// Pre-resolved volume paths from `StorageManager`
192    volume_paths: HashMap<String, PathBuf>,
193    /// Image configuration from the OCI registry (entrypoint, cmd, env, workdir, user)
194    image_config: Option<zlayer_registry::ImageConfig>,
195    /// Use host networking (skip Network namespace, container shares host network)
196    host_network: bool,
197    /// Secrets provider for resolving $S: prefixed env vars
198    secrets_provider: Option<Arc<dyn SecretsProvider>>,
199    /// Deployment scope for secret lookups (e.g., deployment name)
200    deployment_scope: Option<String>,
201    /// Host-side Unix socket path to bind-mount into the container
202    socket_path: Option<String>,
203}
204
205impl std::fmt::Debug for BundleBuilder {
206    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207        f.debug_struct("BundleBuilder")
208            .field("bundle_dir", &self.bundle_dir)
209            .field("rootfs_path", &self.rootfs_path)
210            .field("hostname", &self.hostname)
211            .field("extra_env", &self.extra_env)
212            .field("cwd", &self.cwd)
213            .field("args", &self.args)
214            .field("volume_paths", &self.volume_paths)
215            .field("image_config", &self.image_config)
216            .field("host_network", &self.host_network)
217            .field("secrets_provider", &self.secrets_provider.is_some())
218            .field("deployment_scope", &self.deployment_scope)
219            .field("socket_path", &self.socket_path)
220            .finish()
221    }
222}
223
224impl BundleBuilder {
225    /// Create a new `BundleBuilder` with the specified bundle directory
226    ///
227    /// The bundle directory will be created if it doesn't exist.
228    /// The structure will be:
229    /// ```text
230    /// {bundle_dir}/
231    /// ├── config.json
232    /// └── rootfs/  (symlink to actual rootfs or mount point)
233    /// ```
234    #[must_use]
235    pub fn new(bundle_dir: PathBuf) -> Self {
236        Self {
237            bundle_dir,
238            rootfs_path: None,
239            hostname: None,
240            extra_env: Vec::new(),
241            cwd: None,
242            args: None,
243            volume_paths: HashMap::new(),
244            image_config: None,
245            host_network: false,
246            secrets_provider: None,
247            deployment_scope: None,
248            socket_path: None,
249        }
250    }
251
252    /// Create a `BundleBuilder` for a container in the default bundle location
253    #[must_use]
254    pub fn for_container(container_id: &ContainerId) -> Self {
255        let bundle_dir = zlayer_paths::ZLayerDirs::system_default()
256            .bundles()
257            .join(container_id.to_string());
258        Self::new(bundle_dir)
259    }
260
261    /// Set the rootfs path (from unpacked image layers)
262    ///
263    /// This path will be symlinked into the bundle as `rootfs/`
264    #[must_use]
265    pub fn with_rootfs(mut self, rootfs_path: PathBuf) -> Self {
266        self.rootfs_path = Some(rootfs_path);
267        self
268    }
269
270    /// Set a custom hostname for the container
271    #[must_use]
272    pub fn with_hostname(mut self, hostname: String) -> Self {
273        self.hostname = Some(hostname);
274        self
275    }
276
277    /// Add extra environment variables
278    #[must_use]
279    pub fn with_env(mut self, key: String, value: String) -> Self {
280        self.extra_env.push((key, value));
281        self
282    }
283
284    /// Set the working directory
285    #[must_use]
286    pub fn with_cwd(mut self, cwd: String) -> Self {
287        self.cwd = Some(cwd);
288        self
289    }
290
291    /// Set the command/args to run
292    #[must_use]
293    pub fn with_args(mut self, args: Vec<String>) -> Self {
294        self.args = Some(args);
295        self
296    }
297
298    /// Set pre-resolved volume paths from `StorageManager`
299    ///
300    /// These are used to map named/anonymous/S3 volumes to their host paths
301    /// when building storage mounts in the OCI spec.
302    #[must_use]
303    pub fn with_volume_paths(mut self, volume_paths: HashMap<String, PathBuf>) -> Self {
304        self.volume_paths = volume_paths;
305        self
306    }
307
308    /// Set the OCI image configuration (entrypoint, cmd, env, workdir, user)
309    ///
310    /// When set, the image config provides defaults for the container process
311    /// that are used when the deployment spec doesn't override them.
312    #[must_use]
313    pub fn with_image_config(mut self, config: zlayer_registry::ImageConfig) -> Self {
314        self.image_config = Some(config);
315        self
316    }
317
318    /// Enable host networking mode
319    ///
320    /// When true, the container will NOT get its own network namespace and will
321    /// share the host's network stack. This is equivalent to Docker's `--network host`.
322    /// Use this when overlay networking is unavailable or not desired.
323    #[must_use]
324    pub fn with_host_network(mut self, host_network: bool) -> Self {
325        self.host_network = host_network;
326        self
327    }
328
329    /// Set the secrets provider for resolving `$S:` prefixed environment variables
330    ///
331    /// When set, environment variables with `$S:secret-name` syntax will be resolved
332    /// from this provider at bundle creation time.
333    #[must_use]
334    pub fn with_secrets_provider(mut self, provider: Arc<dyn SecretsProvider>) -> Self {
335        self.secrets_provider = Some(provider);
336        self
337    }
338
339    /// Set the deployment scope for secret lookups
340    ///
341    /// This is typically the deployment name and is used as the scope when
342    /// resolving `$S:` prefixed environment variables.
343    #[must_use]
344    pub fn with_deployment_scope(mut self, scope: String) -> Self {
345        self.deployment_scope = Some(scope);
346        self
347    }
348
349    /// Set a host-side Unix socket path to bind-mount into the container at
350    /// the default `ZLayer` socket path (read-only).
351    #[must_use]
352    pub fn with_socket_mount(mut self, path: impl Into<String>) -> Self {
353        self.socket_path = Some(path.into());
354        self
355    }
356
357    /// Get the bundle directory path
358    #[must_use]
359    pub fn bundle_dir(&self) -> &Path {
360        &self.bundle_dir
361    }
362
363    /// Build the OCI bundle from a `ServiceSpec`
364    ///
365    /// Creates the bundle directory structure and generates config.json
366    /// based on the provided service specification.
367    ///
368    /// # Returns
369    /// The path to the bundle directory on success
370    ///
371    /// # Errors
372    /// - `AgentError::CreateFailed` if directory creation fails
373    /// - `AgentError::InvalidSpec` if the OCI spec generation fails
374    pub async fn build(&self, container_id: &ContainerId, spec: &ServiceSpec) -> Result<PathBuf> {
375        // Create bundle directory
376        fs::create_dir_all(&self.bundle_dir)
377            .await
378            .map_err(|e| AgentError::CreateFailed {
379                id: container_id.to_string(),
380                reason: format!("failed to create bundle directory: {e}"),
381            })?;
382
383        // Set up rootfs (symlink or create empty directory)
384        let rootfs_in_bundle = self.bundle_dir.join("rootfs");
385        if let Some(ref rootfs_path) = self.rootfs_path {
386            // Remove existing rootfs symlink/dir if present
387            let _ = fs::remove_file(&rootfs_in_bundle).await;
388            let _ = fs::remove_dir(&rootfs_in_bundle).await;
389
390            // Create symlink to actual rootfs.
391            // On Unix: `tokio::fs::symlink` (unified file/dir symlink).
392            // On Windows: `tokio::fs::symlink_dir` (wraps CreateSymbolicLinkW with
393            // SYMBOLIC_LINK_FLAG_DIRECTORY) — rootfs is always an OCI layer directory.
394            #[cfg(unix)]
395            tokio::fs::symlink(rootfs_path, &rootfs_in_bundle)
396                .await
397                .map_err(|e| AgentError::CreateFailed {
398                    id: container_id.to_string(),
399                    reason: format!(
400                        "failed to symlink rootfs from {} to {}: {}",
401                        rootfs_path.display(),
402                        rootfs_in_bundle.display(),
403                        e
404                    ),
405                })?;
406
407            #[cfg(windows)]
408            tokio::fs::symlink_dir(rootfs_path, &rootfs_in_bundle)
409                .await
410                .map_err(|e| AgentError::CreateFailed {
411                    id: container_id.to_string(),
412                    reason: format!(
413                        "failed to symlink rootfs from {} to {}: {}",
414                        rootfs_path.display(),
415                        rootfs_in_bundle.display(),
416                        e
417                    ),
418                })?;
419        } else {
420            // Create empty rootfs directory (for bind mounts)
421            fs::create_dir_all(&rootfs_in_bundle)
422                .await
423                .map_err(|e| AgentError::CreateFailed {
424                    id: container_id.to_string(),
425                    reason: format!("failed to create rootfs directory: {e}"),
426                })?;
427        }
428
429        // Generate OCI runtime spec
430        let oci_spec = self
431            .build_oci_spec(container_id, spec, &self.volume_paths)
432            .await?;
433
434        // Write config.json
435        let config_path = self.bundle_dir.join("config.json");
436        let config_json =
437            serde_json::to_string_pretty(&oci_spec).map_err(|e| AgentError::CreateFailed {
438                id: container_id.to_string(),
439                reason: format!("failed to serialize OCI spec: {e}"),
440            })?;
441
442        fs::write(&config_path, config_json)
443            .await
444            .map_err(|e| AgentError::CreateFailed {
445                id: container_id.to_string(),
446                reason: format!("failed to write config.json: {e}"),
447            })?;
448
449        tracing::debug!(
450            "Created OCI bundle at {} for container {}",
451            self.bundle_dir.display(),
452            container_id
453        );
454
455        Ok(self.bundle_dir.clone())
456    }
457
458    /// Render the OCI runtime spec without creating a bundle directory
459    /// or writing `config.json`.
460    ///
461    /// Used by the WSL2 delegate runtime (`runtimes/wsl2_delegate.rs`):
462    /// the Windows host renders the spec, then streams the JSON into the
463    /// WSL distro filesystem where `youki` will consume it. The bundle
464    /// path passed to `BundleBuilder::new` is purely informational in
465    /// that flow; this method never touches the filesystem.
466    ///
467    /// # Errors
468    ///
469    /// Returns [`AgentError::InvalidSpec`] if the spec generation fails.
470    pub async fn build_spec_only(
471        &self,
472        container_id: &ContainerId,
473        spec: &ServiceSpec,
474        volume_paths: &std::collections::HashMap<String, PathBuf>,
475    ) -> Result<oci_spec::runtime::Spec> {
476        self.build_oci_spec(container_id, spec, volume_paths).await
477    }
478
479    /// Build the OCI runtime spec from `ServiceSpec`
480    #[allow(clippy::too_many_lines)]
481    async fn build_oci_spec(
482        &self,
483        container_id: &ContainerId,
484        spec: &ServiceSpec,
485        volume_paths: &std::collections::HashMap<String, PathBuf>,
486    ) -> Result<Spec> {
487        // Build user: image config user > root (spec doesn't currently have user override)
488        let user = {
489            let (uid, gid) = if let Some(user_str) = self
490                .image_config
491                .as_ref()
492                .and_then(|c| c.user.as_ref())
493                .filter(|u| !u.is_empty())
494            {
495                // Parse "uid:gid" or "uid" format from image config
496                let parts: Vec<&str> = user_str.splitn(2, ':').collect();
497                let uid = parts[0].parse::<u32>().unwrap_or(0);
498                let gid = if parts.len() > 1 {
499                    parts[1].parse::<u32>().unwrap_or(0)
500                } else {
501                    uid
502                };
503                (uid, gid)
504            } else {
505                (0u32, 0u32)
506            };
507
508            UserBuilder::default()
509                .uid(uid)
510                .gid(gid)
511                .build()
512                .map_err(|e| AgentError::InvalidSpec(format!("failed to build user: {e}")))?
513        };
514
515        // Build environment variables
516        // Layer: image config env (base) -> defaults -> spec env -> builder extra env
517        let mut env: Vec<String> = Vec::new();
518        let mut env_keys: HashSet<String> = HashSet::new();
519
520        // Seed with image config env first (lowest priority)
521        if let Some(img_env) = self.image_config.as_ref().and_then(|c| c.env.as_ref()) {
522            for entry in img_env {
523                if let Some(key) = entry.split('=').next() {
524                    env_keys.insert(key.to_string());
525                }
526                env.push(entry.clone());
527            }
528        }
529
530        // If image config didn't provide PATH, add the default
531        if !env_keys.contains("PATH") {
532            env.push(
533                "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(),
534            );
535            env_keys.insert("PATH".to_string());
536        }
537
538        // Add TERM for interactive compatibility (if not already set)
539        if !env_keys.contains("TERM") {
540            env.push("TERM=xterm".to_string());
541            env_keys.insert("TERM".to_string());
542        }
543
544        // Add service-specific env vars, resolving $S: and $E: prefixed references
545        // These override image config env for same keys
546        //
547        // When a secrets provider is available, use the full secrets-aware resolver
548        // that handles both $S: (secret) and $E: (env) prefixed values.
549        // Otherwise fall back to the env-only resolver.
550        if let (Some(secrets_provider), Some(scope)) =
551            (&self.secrets_provider, &self.deployment_scope)
552        {
553            let resolved_map =
554                crate::env::resolve_env_with_secrets(&spec.env, secrets_provider.as_ref(), scope)
555                    .await
556                    .map_err(|e| {
557                        AgentError::InvalidSpec(format!(
558                            "environment variable resolution failed: {e}"
559                        ))
560                    })?;
561
562            for (key, value) in &resolved_map {
563                if env_keys.contains(key.as_str()) {
564                    env.retain(|e| e.split('=').next() != Some(key.as_str()));
565                }
566                env_keys.insert(key.clone());
567                env.push(format!("{key}={value}"));
568            }
569        } else {
570            let resolved = crate::env::resolve_env_vars_with_warnings(&spec.env).map_err(|e| {
571                AgentError::InvalidSpec(format!("environment variable resolution failed: {e}"))
572            })?;
573
574            // Log any warnings about resolved env vars
575            for warning in &resolved.warnings {
576                tracing::warn!(container = %container_id, "{}", warning);
577            }
578
579            // Merge spec env: spec values take precedence over image config for same keys
580            for var in &resolved.vars {
581                if let Some(key) = var.split('=').next() {
582                    if env_keys.contains(key) {
583                        // Remove the old entry from image config
584                        env.retain(|e| e.split('=').next() != Some(key));
585                    }
586                    env_keys.insert(key.to_string());
587                }
588                env.push(var.clone());
589            }
590        }
591
592        // Add extra env vars from builder (highest priority)
593        for (key, value) in &self.extra_env {
594            if env_keys.contains(key.as_str()) {
595                env.retain(|e| e.split('=').next() != Some(key.as_str()));
596            }
597            env_keys.insert(key.clone());
598            env.push(format!("{key}={value}"));
599        }
600
601        // Inject GPU device visibility environment variables based on vendor
602        // and allocated indices so runtimes (CUDA, ROCm, oneAPI) see only
603        // the GPUs assigned to this container.
604        if let Some(ref gpu) = spec.resources.gpu {
605            // Default to 0..count when no explicit indices are provided
606            let indices: Vec<String> = (0..gpu.count).map(|i| i.to_string()).collect();
607            let device_list = indices.join(",");
608            match gpu.vendor.as_str() {
609                "nvidia" => {
610                    env.push(format!("NVIDIA_VISIBLE_DEVICES={device_list}"));
611                    env.push(format!("CUDA_VISIBLE_DEVICES={device_list}"));
612                }
613                "amd" => {
614                    env.push(format!("ROCR_VISIBLE_DEVICES={device_list}"));
615                    env.push(format!("HIP_VISIBLE_DEVICES={device_list}"));
616                }
617                "intel" => {
618                    env.push(format!("ZE_AFFINITY_MASK={device_list}"));
619                }
620                _ => {}
621            }
622        }
623
624        // Inject distributed training coordination env vars when configured.
625        // MASTER_ADDR uses the service DNS name (resolved by the overlay DNS).
626        // RANK defaults to 0 (overridden by the agent when placing specific replicas).
627        if let Some(ref gpu) = spec.resources.gpu {
628            if let Some(ref dist) = gpu.distributed {
629                env.push(format!("MASTER_PORT={}", dist.master_port));
630                env.push(format!("MASTER_ADDR={}", container_id.service));
631                env.push("WORLD_SIZE=1".to_string());
632                env.push("RANK=0".to_string());
633                env.push("LOCAL_RANK=0".to_string());
634                match dist.backend.as_str() {
635                    "nccl" => env.push("NCCL_SOCKET_IFNAME=eth0".to_string()),
636                    "gloo" => env.push("GLOO_SOCKET_IFNAME=eth0".to_string()),
637                    _ => {}
638                }
639            }
640        }
641
642        // Build capabilities
643        let capabilities = self.build_capabilities(spec)?;
644
645        // Determine working directory: builder override > spec.command.workdir > image config > "/"
646        let cwd = self
647            .cwd
648            .clone()
649            .or_else(|| spec.command.workdir.clone())
650            .or_else(|| {
651                self.image_config
652                    .as_ref()
653                    .and_then(|c| c.working_dir.as_ref())
654                    .filter(|w| !w.is_empty())
655                    .cloned()
656            })
657            .unwrap_or_else(|| "/".to_string());
658
659        // Resolve process args: builder override > spec command > image config > /bin/sh
660        let process_args = if let Some(ref args) = self.args {
661            args.clone()
662        } else {
663            Self::resolve_command_from_spec(spec, self.image_config.as_ref())
664        };
665
666        // Build process
667        let mut process_builder = ProcessBuilder::default()
668            .terminal(false)
669            .user(user)
670            .env(env)
671            .args(process_args)
672            .cwd(cwd)
673            .no_new_privileges(!spec.privileged && spec.capabilities.is_empty());
674
675        // Set capabilities if we have them
676        if let Some(caps) = capabilities {
677            process_builder = process_builder.capabilities(caps);
678        }
679
680        let process = process_builder
681            .build()
682            .map_err(|e| AgentError::InvalidSpec(format!("failed to build process: {e}")))?;
683
684        // Build root filesystem config
685        // Note: "rootfs" is relative to the bundle directory per OCI spec
686        let root = RootBuilder::default()
687            .path("rootfs".to_string())
688            .readonly(false)
689            .build()
690            .map_err(|e| AgentError::InvalidSpec(format!("failed to build root: {e}")))?;
691
692        // Build default mounts
693        let mut mounts = self.build_default_mounts(spec)?;
694
695        // Add storage mounts from spec
696        let storage_mounts = self.build_storage_mounts(spec, volume_paths)?;
697        mounts.extend(storage_mounts);
698
699        // Add ZLayer API socket bind-mount if configured.
700        // Use typ("bind") so libcontainer's mount code handles the source path
701        // correctly for sockets (canonicalize + file-based mount point creation).
702        if let Some(ref socket_path) = self.socket_path {
703            mounts.push(
704                MountBuilder::default()
705                    .destination(zlayer_paths::ZLayerDirs::default_socket_path())
706                    .typ("bind")
707                    .source(socket_path.clone())
708                    .options(vec!["rbind".into(), "ro".into()])
709                    .build()
710                    .expect("valid socket mount"),
711            );
712        }
713
714        // Build Linux-specific config
715        let linux = self.build_linux_config(spec)?;
716
717        // Determine hostname
718        let hostname = self
719            .hostname
720            .clone()
721            .unwrap_or_else(|| container_id.to_string());
722
723        // Build the complete spec
724        let oci_spec = SpecBuilder::default()
725            .version("1.0.2".to_string())
726            .root(root)
727            .process(process)
728            .hostname(hostname)
729            .mounts(mounts)
730            .linux(linux)
731            .build()
732            .map_err(|e| AgentError::InvalidSpec(format!("failed to build OCI spec: {e}")))?;
733
734        Ok(oci_spec)
735    }
736
737    /// Build Linux capabilities configuration
738    #[allow(clippy::unused_self)]
739    fn build_capabilities(
740        &self,
741        spec: &ServiceSpec,
742    ) -> Result<Option<oci_spec::runtime::LinuxCapabilities>> {
743        if spec.privileged {
744            // Privileged mode: all capabilities
745            let all_caps: HashSet<Capability> = ALL_CAPABILITIES.iter().copied().collect();
746            let empty_caps: HashSet<Capability> = HashSet::new();
747
748            let caps = LinuxCapabilitiesBuilder::default()
749                .bounding(all_caps.clone())
750                .effective(all_caps.clone())
751                .permitted(all_caps)
752                .inheritable(empty_caps.clone())
753                .ambient(empty_caps)
754                .build()
755                .map_err(|e| {
756                    AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
757                })?;
758
759            Ok(Some(caps))
760        } else if !spec.capabilities.is_empty() {
761            // Specific capabilities requested
762            let caps: HashSet<Capability> = spec
763                .capabilities
764                .iter()
765                .filter_map(|c| {
766                    // Normalize capability name (add CAP_ prefix if missing, uppercase)
767                    let cap_name = if c.starts_with("CAP_") {
768                        c.to_uppercase()
769                    } else {
770                        format!("CAP_{}", c.to_uppercase())
771                    };
772                    Capability::from_str(&cap_name).ok()
773                })
774                .collect();
775
776            let empty_caps: HashSet<Capability> = HashSet::new();
777
778            let built_caps = LinuxCapabilitiesBuilder::default()
779                .bounding(caps.clone())
780                .effective(caps.clone())
781                .permitted(caps)
782                .inheritable(empty_caps.clone())
783                .ambient(empty_caps)
784                .build()
785                .map_err(|e| {
786                    AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
787                })?;
788
789            Ok(Some(built_caps))
790        } else {
791            // Default: minimal capabilities for basic container operation
792            let default_caps: HashSet<Capability> = [
793                Capability::Chown,
794                Capability::DacOverride,
795                Capability::Fsetid,
796                Capability::Fowner,
797                Capability::Mknod,
798                Capability::NetRaw,
799                Capability::Setgid,
800                Capability::Setuid,
801                Capability::Setfcap,
802                Capability::Setpcap,
803                Capability::NetBindService,
804                Capability::SysChroot,
805                Capability::Kill,
806                Capability::AuditWrite,
807            ]
808            .into_iter()
809            .collect();
810
811            let empty_caps: HashSet<Capability> = HashSet::new();
812
813            let built_caps = LinuxCapabilitiesBuilder::default()
814                .bounding(default_caps.clone())
815                .effective(default_caps.clone())
816                .permitted(default_caps)
817                .inheritable(empty_caps.clone())
818                .ambient(empty_caps)
819                .build()
820                .map_err(|e| {
821                    AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
822                })?;
823
824            Ok(Some(built_caps))
825        }
826    }
827
828    /// Build default filesystem mounts for the container
829    #[allow(clippy::unused_self, clippy::too_many_lines)]
830    fn build_default_mounts(&self, spec: &ServiceSpec) -> Result<Vec<Mount>> {
831        let mut mounts = Vec::new();
832
833        // /proc
834        mounts.push(
835            MountBuilder::default()
836                .destination("/proc".to_string())
837                .typ("proc".to_string())
838                .source("proc".to_string())
839                .options(vec![
840                    "nosuid".to_string(),
841                    "noexec".to_string(),
842                    "nodev".to_string(),
843                ])
844                .build()
845                .map_err(|e| {
846                    AgentError::InvalidSpec(format!("failed to build /proc mount: {e}"))
847                })?,
848        );
849
850        // /dev
851        mounts.push(
852            MountBuilder::default()
853                .destination("/dev".to_string())
854                .typ("tmpfs".to_string())
855                .source("tmpfs".to_string())
856                .options(vec![
857                    "nosuid".to_string(),
858                    "strictatime".to_string(),
859                    "mode=755".to_string(),
860                    "size=65536k".to_string(),
861                ])
862                .build()
863                .map_err(|e| AgentError::InvalidSpec(format!("failed to build /dev mount: {e}")))?,
864        );
865
866        // /dev/pts
867        mounts.push(
868            MountBuilder::default()
869                .destination("/dev/pts".to_string())
870                .typ("devpts".to_string())
871                .source("devpts".to_string())
872                .options(vec![
873                    "nosuid".to_string(),
874                    "noexec".to_string(),
875                    "newinstance".to_string(),
876                    "ptmxmode=0666".to_string(),
877                    "mode=0620".to_string(),
878                    "gid=5".to_string(),
879                ])
880                .build()
881                .map_err(|e| {
882                    AgentError::InvalidSpec(format!("failed to build /dev/pts mount: {e}"))
883                })?,
884        );
885
886        // /dev/shm
887        mounts.push(
888            MountBuilder::default()
889                .destination("/dev/shm".to_string())
890                .typ("tmpfs".to_string())
891                .source("shm".to_string())
892                .options(vec![
893                    "nosuid".to_string(),
894                    "noexec".to_string(),
895                    "nodev".to_string(),
896                    "mode=1777".to_string(),
897                    "size=65536k".to_string(),
898                ])
899                .build()
900                .map_err(|e| {
901                    AgentError::InvalidSpec(format!("failed to build /dev/shm mount: {e}"))
902                })?,
903        );
904
905        // /dev/mqueue
906        mounts.push(
907            MountBuilder::default()
908                .destination("/dev/mqueue".to_string())
909                .typ("mqueue".to_string())
910                .source("mqueue".to_string())
911                .options(vec![
912                    "nosuid".to_string(),
913                    "noexec".to_string(),
914                    "nodev".to_string(),
915                ])
916                .build()
917                .map_err(|e| {
918                    AgentError::InvalidSpec(format!("failed to build /dev/mqueue mount: {e}"))
919                })?,
920        );
921
922        // /sys - read-only unless privileged
923        let sys_options = if spec.privileged {
924            vec![
925                "nosuid".to_string(),
926                "noexec".to_string(),
927                "nodev".to_string(),
928            ]
929        } else {
930            vec![
931                "nosuid".to_string(),
932                "noexec".to_string(),
933                "nodev".to_string(),
934                "ro".to_string(),
935            ]
936        };
937
938        mounts.push(
939            MountBuilder::default()
940                .destination("/sys".to_string())
941                .typ("sysfs".to_string())
942                .source("sysfs".to_string())
943                .options(sys_options)
944                .build()
945                .map_err(|e| AgentError::InvalidSpec(format!("failed to build /sys mount: {e}")))?,
946        );
947
948        // /sys/fs/cgroup - for cgroup access
949        mounts.push(
950            MountBuilder::default()
951                .destination("/sys/fs/cgroup".to_string())
952                .typ("cgroup2".to_string())
953                .source("cgroup".to_string())
954                .options(vec![
955                    "nosuid".to_string(),
956                    "noexec".to_string(),
957                    "nodev".to_string(),
958                    "relatime".to_string(),
959                ])
960                .build()
961                .map_err(|e| {
962                    AgentError::InvalidSpec(format!("failed to build cgroup mount: {e}"))
963                })?,
964        );
965
966        Ok(mounts)
967    }
968
969    /// Build storage mounts from `ServiceSpec` storage entries
970    ///
971    /// Converts `StorageSpec` entries to OCI Mount entries.
972    /// Note: Named and Anonymous volumes require `StorageManager` to prepare paths.
973    /// S3 volumes require s3fs FUSE mount (handled separately).
974    #[allow(clippy::unused_self, clippy::too_many_lines)]
975    fn build_storage_mounts(
976        &self,
977        spec: &ServiceSpec,
978        volume_paths: &std::collections::HashMap<String, PathBuf>,
979    ) -> Result<Vec<Mount>> {
980        let mut mounts = Vec::new();
981
982        for storage in &spec.storage {
983            let mount = match storage {
984                StorageSpec::Bind {
985                    source,
986                    target,
987                    readonly,
988                } => {
989                    let mut options = vec!["rbind".to_string()];
990                    if *readonly {
991                        options.push("ro".to_string());
992                    } else {
993                        options.push("rw".to_string());
994                    }
995
996                    MountBuilder::default()
997                        .destination(target.clone())
998                        .typ("none".to_string())
999                        .source(source.clone())
1000                        .options(options)
1001                        .build()
1002                        .map_err(|e| {
1003                            AgentError::InvalidSpec(format!(
1004                                "failed to build bind mount for {target}: {e}"
1005                            ))
1006                        })?
1007                }
1008
1009                StorageSpec::Named {
1010                    name,
1011                    target,
1012                    readonly,
1013                    tier,
1014                    ..
1015                } => {
1016                    // Get the prepared volume path from StorageManager
1017                    let source = volume_paths.get(name).ok_or_else(|| {
1018                        AgentError::InvalidSpec(format!(
1019                            "volume '{name}' not prepared - ensure StorageManager.ensure_volume() was called"
1020                        ))
1021                    })?;
1022
1023                    // Warn about SQLite safety for non-local tiers
1024                    if matches!(tier, StorageTier::Network) {
1025                        tracing::warn!(
1026                            volume = %name,
1027                            tier = ?tier,
1028                            "Network storage tier is NOT SQLite-safe. Avoid using SQLite databases on this volume."
1029                        );
1030                    }
1031
1032                    let mut options = vec!["rbind".to_string()];
1033                    if *readonly {
1034                        options.push("ro".to_string());
1035                    } else {
1036                        options.push("rw".to_string());
1037                    }
1038
1039                    MountBuilder::default()
1040                        .destination(target.clone())
1041                        .typ("none".to_string())
1042                        .source(source.to_string_lossy().to_string())
1043                        .options(options)
1044                        .build()
1045                        .map_err(|e| {
1046                            AgentError::InvalidSpec(format!(
1047                                "failed to build named volume mount for {target}: {e}"
1048                            ))
1049                        })?
1050                }
1051
1052                StorageSpec::Anonymous { target, tier } => {
1053                    // Anonymous volumes should have been created by StorageManager
1054                    // and the path passed in volume_paths with key "_anon_{target}"
1055                    let key = format!("_anon_{}", target.trim_start_matches('/').replace('/', "_"));
1056                    let source = volume_paths.get(&key).ok_or_else(|| {
1057                        AgentError::InvalidSpec(format!(
1058                            "anonymous volume for '{target}' not prepared"
1059                        ))
1060                    })?;
1061
1062                    if matches!(tier, StorageTier::Network) {
1063                        tracing::warn!(
1064                            target = %target,
1065                            tier = ?tier,
1066                            "Network storage tier is NOT SQLite-safe."
1067                        );
1068                    }
1069
1070                    let options = vec!["rbind".to_string(), "rw".to_string()];
1071
1072                    MountBuilder::default()
1073                        .destination(target.clone())
1074                        .typ("none".to_string())
1075                        .source(source.to_string_lossy().to_string())
1076                        .options(options)
1077                        .build()
1078                        .map_err(|e| {
1079                            AgentError::InvalidSpec(format!(
1080                                "failed to build anonymous volume mount for {target}: {e}"
1081                            ))
1082                        })?
1083                }
1084
1085                StorageSpec::Tmpfs { target, size, mode } => {
1086                    let mut options = vec!["nosuid".to_string(), "nodev".to_string()];
1087
1088                    if let Some(size_str) = size {
1089                        options.push(format!("size={size_str}"));
1090                    }
1091
1092                    if let Some(mode_val) = mode {
1093                        options.push(format!("mode={mode_val:o}"));
1094                    }
1095
1096                    MountBuilder::default()
1097                        .destination(target.clone())
1098                        .typ("tmpfs".to_string())
1099                        .source("tmpfs".to_string())
1100                        .options(options)
1101                        .build()
1102                        .map_err(|e| {
1103                            AgentError::InvalidSpec(format!(
1104                                "failed to build tmpfs mount for {target}: {e}"
1105                            ))
1106                        })?
1107                }
1108
1109                StorageSpec::S3 {
1110                    bucket,
1111                    prefix,
1112                    target,
1113                    readonly,
1114                    endpoint: _,
1115                    credentials: _,
1116                } => {
1117                    // S3 mounts are handled via s3fs FUSE
1118                    // The StorageManager should have mounted the bucket and passed the path
1119                    let key = format!("_s3_{}_{}", bucket, prefix.as_deref().unwrap_or(""));
1120                    let source = volume_paths.get(&key).ok_or_else(|| {
1121                        AgentError::InvalidSpec(format!(
1122                            "S3 volume for bucket '{bucket}' not mounted - ensure StorageManager.mount_s3() was called"
1123                        ))
1124                    })?;
1125
1126                    tracing::warn!(
1127                        bucket = %bucket,
1128                        target = %target,
1129                        "S3 storage is NOT SQLite-safe. Use for read-heavy workloads only."
1130                    );
1131
1132                    let mut options = vec!["rbind".to_string()];
1133                    if *readonly {
1134                        options.push("ro".to_string());
1135                    } else {
1136                        options.push("rw".to_string());
1137                    }
1138
1139                    MountBuilder::default()
1140                        .destination(target.clone())
1141                        .typ("none".to_string())
1142                        .source(source.to_string_lossy().to_string())
1143                        .options(options)
1144                        .build()
1145                        .map_err(|e| {
1146                            AgentError::InvalidSpec(format!(
1147                                "failed to build S3 mount for {target}: {e}"
1148                            ))
1149                        })?
1150                }
1151            };
1152
1153            mounts.push(mount);
1154        }
1155
1156        Ok(mounts)
1157    }
1158
1159    /// Build Linux-specific configuration
1160    fn build_linux_config(&self, spec: &ServiceSpec) -> Result<oci_spec::runtime::Linux> {
1161        // Build namespaces
1162        let mut namespaces = vec![
1163            LinuxNamespaceBuilder::default()
1164                .typ(LinuxNamespaceType::Pid)
1165                .build()
1166                .unwrap(),
1167            LinuxNamespaceBuilder::default()
1168                .typ(LinuxNamespaceType::Ipc)
1169                .build()
1170                .unwrap(),
1171            LinuxNamespaceBuilder::default()
1172                .typ(LinuxNamespaceType::Uts)
1173                .build()
1174                .unwrap(),
1175            LinuxNamespaceBuilder::default()
1176                .typ(LinuxNamespaceType::Mount)
1177                .build()
1178                .unwrap(),
1179        ];
1180
1181        // Only add Network namespace when NOT using host networking.
1182        // In host networking mode, the container shares the host's network stack
1183        // (like Docker's --network host).
1184        if !self.host_network {
1185            namespaces.push(
1186                LinuxNamespaceBuilder::default()
1187                    .typ(LinuxNamespaceType::Network)
1188                    .build()
1189                    .unwrap(),
1190            );
1191        }
1192
1193        let mut linux_builder = LinuxBuilder::default().namespaces(namespaces);
1194
1195        // Build resources (CPU, memory, devices)
1196        let resources = self.build_resources(spec)?;
1197        if let Some(resources) = resources {
1198            linux_builder = linux_builder.resources(resources);
1199        }
1200
1201        // Build device entries for passthrough
1202        let devices = self.build_devices(spec, None)?;
1203        if !devices.is_empty() {
1204            linux_builder = linux_builder.devices(devices);
1205        }
1206
1207        // Set rootfs propagation (matches Docker default)
1208        linux_builder = linux_builder.rootfs_propagation("private".to_string());
1209
1210        // Set masked/readonly paths based on privileged mode
1211        if spec.privileged {
1212            // Privileged containers get no masked paths (full access)
1213            linux_builder = linux_builder.masked_paths(vec![]).readonly_paths(vec![]);
1214        } else {
1215            // Set masked paths for security (hide sensitive host info)
1216            let masked_paths = vec![
1217                "/proc/acpi".to_string(),
1218                "/proc/asound".to_string(),
1219                "/proc/kcore".to_string(),
1220                "/proc/keys".to_string(),
1221                "/proc/latency_stats".to_string(),
1222                "/proc/timer_list".to_string(),
1223                "/proc/timer_stats".to_string(),
1224                "/proc/sched_debug".to_string(),
1225                "/proc/scsi".to_string(),
1226                "/sys/firmware".to_string(),
1227            ];
1228
1229            // Set readonly paths for security
1230            let readonly_paths = vec![
1231                "/proc/bus".to_string(),
1232                "/proc/fs".to_string(),
1233                "/proc/irq".to_string(),
1234                "/proc/sys".to_string(),
1235                "/proc/sysrq-trigger".to_string(),
1236            ];
1237
1238            linux_builder = linux_builder
1239                .masked_paths(masked_paths)
1240                .readonly_paths(readonly_paths);
1241        }
1242
1243        linux_builder
1244            .build()
1245            .map_err(|e| AgentError::InvalidSpec(format!("failed to build linux config: {e}")))
1246    }
1247
1248    /// Build resource limits (CPU, memory, device cgroups)
1249    #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
1250    fn build_resources(
1251        &self,
1252        spec: &ServiceSpec,
1253    ) -> Result<Option<oci_spec::runtime::LinuxResources>> {
1254        let mut resources_builder = LinuxResourcesBuilder::default();
1255        let mut has_resources = false;
1256
1257        // CPU limits
1258        if let Some(cpu_limit) = spec.resources.cpu {
1259            // Convert CPU cores to microseconds quota
1260            // 100000 microseconds = 1 core's worth of time per period
1261            let quota = (cpu_limit * 100_000.0) as i64;
1262            let cpu = LinuxCpuBuilder::default()
1263                .quota(quota)
1264                .period(100_000u64)
1265                .build()
1266                .map_err(|e| AgentError::InvalidSpec(format!("failed to build CPU limits: {e}")))?;
1267
1268            resources_builder = resources_builder.cpu(cpu);
1269            has_resources = true;
1270        }
1271
1272        // Memory limits
1273        if let Some(ref memory_str) = spec.resources.memory {
1274            let bytes = parse_memory_string(memory_str)
1275                .map_err(|e| AgentError::InvalidSpec(format!("invalid memory limit: {e}")))?;
1276
1277            let memory = LinuxMemoryBuilder::default()
1278                .limit(bytes as i64)
1279                .build()
1280                .map_err(|e| {
1281                    AgentError::InvalidSpec(format!("failed to build memory limits: {e}"))
1282                })?;
1283
1284            resources_builder = resources_builder.memory(memory);
1285            has_resources = true;
1286        }
1287
1288        // Device cgroup rules
1289        let device_rules = self.build_device_cgroup_rules(spec, None)?;
1290        if !device_rules.is_empty() {
1291            resources_builder = resources_builder.devices(device_rules);
1292            has_resources = true;
1293        }
1294
1295        if has_resources {
1296            let resources = resources_builder
1297                .build()
1298                .map_err(|e| AgentError::InvalidSpec(format!("failed to build resources: {e}")))?;
1299            Ok(Some(resources))
1300        } else {
1301            Ok(None)
1302        }
1303    }
1304
1305    /// Build device cgroup rules
1306    #[allow(clippy::unused_self, clippy::too_many_lines)]
1307    fn build_device_cgroup_rules(
1308        &self,
1309        spec: &ServiceSpec,
1310        _gpu_indices: Option<&[u32]>,
1311    ) -> Result<Vec<oci_spec::runtime::LinuxDeviceCgroup>> {
1312        let mut rules = Vec::new();
1313
1314        if spec.privileged {
1315            // Privileged mode: allow all devices
1316            let rule = LinuxDeviceCgroupBuilder::default()
1317                .allow(true)
1318                .access("rwm".to_string())
1319                .build()
1320                .map_err(|e| {
1321                    AgentError::InvalidSpec(format!("failed to build device cgroup rule: {e}"))
1322                })?;
1323            rules.push(rule);
1324        } else {
1325            // Default: deny all, then allow specific devices
1326            let deny_all = LinuxDeviceCgroupBuilder::default()
1327                .allow(false)
1328                .access("rwm".to_string())
1329                .build()
1330                .map_err(|e| AgentError::InvalidSpec(format!("failed to build deny rule: {e}")))?;
1331            rules.push(deny_all);
1332
1333            // Allow standard container devices
1334            // /dev/null, /dev/zero, /dev/full, /dev/random, /dev/urandom, /dev/tty
1335            let standard_char_devices = [
1336                (1, 3, "rwm"),    // /dev/null
1337                (1, 5, "rwm"),    // /dev/zero
1338                (1, 7, "rwm"),    // /dev/full
1339                (1, 8, "rwm"),    // /dev/random
1340                (1, 9, "rwm"),    // /dev/urandom
1341                (5, 0, "rwm"),    // /dev/tty
1342                (5, 1, "rwm"),    // /dev/console
1343                (5, 2, "rwm"),    // /dev/ptmx
1344                (136, -1, "rwm"), // /dev/pts/* (wildcard minor)
1345            ];
1346
1347            for (major, minor, access) in standard_char_devices {
1348                let mut builder = LinuxDeviceCgroupBuilder::default()
1349                    .allow(true)
1350                    .typ(LinuxDeviceType::C)
1351                    .major(i64::from(major))
1352                    .access(access.to_string());
1353
1354                if minor >= 0 {
1355                    builder = builder.minor(i64::from(minor));
1356                }
1357
1358                let rule = builder.build().map_err(|e| {
1359                    AgentError::InvalidSpec(format!("failed to build char device rule: {e}"))
1360                })?;
1361                rules.push(rule);
1362            }
1363
1364            // Allow specific devices from spec
1365            for device in &spec.devices {
1366                if let Ok((major, minor)) = get_device_major_minor(&device.path) {
1367                    let dev_type = get_device_type(&device.path).unwrap_or(LinuxDeviceType::C);
1368
1369                    // Build access string
1370                    let mut access = String::new();
1371                    if device.read {
1372                        access.push('r');
1373                    }
1374                    if device.write {
1375                        access.push('w');
1376                    }
1377                    if device.mknod {
1378                        access.push('m');
1379                    }
1380                    if access.is_empty() {
1381                        access = "rw".to_string();
1382                    }
1383
1384                    let rule = LinuxDeviceCgroupBuilder::default()
1385                        .allow(true)
1386                        .typ(dev_type)
1387                        .major(major)
1388                        .minor(minor)
1389                        .access(access)
1390                        .build()
1391                        .map_err(|e| {
1392                            AgentError::InvalidSpec(format!(
1393                                "failed to build device rule for {}: {}",
1394                                device.path, e
1395                            ))
1396                        })?;
1397                    rules.push(rule);
1398                } else {
1399                    tracing::warn!("Failed to get device info for {}, skipping", device.path);
1400                }
1401            }
1402
1403            // Auto-allow GPU devices in cgroup when gpu spec is set
1404            if let Some(ref gpu) = spec.resources.gpu {
1405                match gpu.vendor.as_str() {
1406                    "nvidia" => {
1407                        // Allow all nvidia devices (major 195 for nvidia GPUs)
1408                        let rule = LinuxDeviceCgroupBuilder::default()
1409                            .allow(true)
1410                            .typ(LinuxDeviceType::C)
1411                            .major(195i64)
1412                            .access("rwm".to_string())
1413                            .build()
1414                            .map_err(|e| {
1415                                AgentError::InvalidSpec(format!(
1416                                    "failed to build GPU cgroup rule: {e}"
1417                                ))
1418                            })?;
1419                        rules.push(rule);
1420
1421                        // nvidia-uvm (major 510 or check dynamically)
1422                        let uvm_rule = LinuxDeviceCgroupBuilder::default()
1423                            .allow(true)
1424                            .typ(LinuxDeviceType::C)
1425                            .major(510i64)
1426                            .access("rwm".to_string())
1427                            .build()
1428                            .map_err(|e| {
1429                                AgentError::InvalidSpec(format!(
1430                                    "failed to build GPU UVM cgroup rule: {e}"
1431                                ))
1432                            })?;
1433                        rules.push(uvm_rule);
1434                    }
1435                    "amd" => {
1436                        // AMD ROCm: /dev/dri/renderD* and /dev/dri/card* (major 226)
1437                        let dri_rule = LinuxDeviceCgroupBuilder::default()
1438                            .allow(true)
1439                            .typ(LinuxDeviceType::C)
1440                            .major(226i64)
1441                            .access("rwm".to_string())
1442                            .build()
1443                            .map_err(|e| {
1444                                AgentError::InvalidSpec(format!(
1445                                    "failed to build AMD DRI cgroup rule: {e}"
1446                                ))
1447                            })?;
1448                        rules.push(dri_rule);
1449
1450                        // /dev/kfd - AMD Kernel Fusion Driver for compute (major 234)
1451                        let kfd_rule = LinuxDeviceCgroupBuilder::default()
1452                            .allow(true)
1453                            .typ(LinuxDeviceType::C)
1454                            .major(234i64)
1455                            .access("rwm".to_string())
1456                            .build()
1457                            .map_err(|e| {
1458                                AgentError::InvalidSpec(format!(
1459                                    "failed to build AMD KFD cgroup rule: {e}"
1460                                ))
1461                            })?;
1462                        rules.push(kfd_rule);
1463                    }
1464                    "intel" => {
1465                        // Intel GPU: /dev/dri/renderD* and /dev/dri/card* (major 226)
1466                        let dri_rule = LinuxDeviceCgroupBuilder::default()
1467                            .allow(true)
1468                            .typ(LinuxDeviceType::C)
1469                            .major(226i64)
1470                            .access("rwm".to_string())
1471                            .build()
1472                            .map_err(|e| {
1473                                AgentError::InvalidSpec(format!(
1474                                    "failed to build Intel DRI cgroup rule: {e}"
1475                                ))
1476                            })?;
1477                        rules.push(dri_rule);
1478                    }
1479                    other => {
1480                        // Unknown vendor - allow DRI devices as a reasonable default
1481                        tracing::warn!(
1482                            vendor = %other,
1483                            "Unknown GPU vendor, allowing DRI devices (major 226)"
1484                        );
1485                        let dri_rule = LinuxDeviceCgroupBuilder::default()
1486                            .allow(true)
1487                            .typ(LinuxDeviceType::C)
1488                            .major(226i64)
1489                            .access("rwm".to_string())
1490                            .build()
1491                            .map_err(|e| {
1492                                AgentError::InvalidSpec(format!(
1493                                    "failed to build GPU DRI cgroup rule: {e}"
1494                                ))
1495                            })?;
1496                        rules.push(dri_rule);
1497                    }
1498                }
1499            }
1500        }
1501
1502        Ok(rules)
1503    }
1504
1505    /// Build Linux device entries for passthrough
1506    #[allow(clippy::unused_self, clippy::too_many_lines)]
1507    fn build_devices(
1508        &self,
1509        spec: &ServiceSpec,
1510        gpu_indices: Option<&[u32]>,
1511    ) -> Result<Vec<oci_spec::runtime::LinuxDevice>> {
1512        let mut devices = Vec::new();
1513
1514        for device in &spec.devices {
1515            if let Ok((major, minor)) = get_device_major_minor(&device.path) {
1516                let dev_type = get_device_type(&device.path).unwrap_or(LinuxDeviceType::C);
1517
1518                let linux_device = LinuxDeviceBuilder::default()
1519                    .path(device.path.clone())
1520                    .typ(dev_type)
1521                    .major(major)
1522                    .minor(minor)
1523                    .file_mode(0o666u32)
1524                    .uid(0u32)
1525                    .gid(0u32)
1526                    .build()
1527                    .map_err(|e| {
1528                        AgentError::InvalidSpec(format!(
1529                            "failed to build device {}: {}",
1530                            device.path, e
1531                        ))
1532                    })?;
1533
1534                devices.push(linux_device);
1535            }
1536        }
1537
1538        // Auto-inject GPU devices when gpu spec is set
1539        if let Some(ref gpu) = spec.resources.gpu {
1540            let indices: Vec<u32> =
1541                gpu_indices.map_or_else(|| (0..gpu.count).collect(), <[u32]>::to_vec);
1542
1543            match gpu.vendor.as_str() {
1544                "nvidia" => {
1545                    // Always needed: nvidiactl, nvidia-uvm, nvidia-uvm-tools
1546                    let always_devices =
1547                        ["/dev/nvidiactl", "/dev/nvidia-uvm", "/dev/nvidia-uvm-tools"];
1548                    for dev_path in &always_devices {
1549                        if let Ok((major, minor)) = get_device_major_minor(dev_path) {
1550                            let dev_type = get_device_type(dev_path).unwrap_or(LinuxDeviceType::C);
1551                            let linux_device = LinuxDeviceBuilder::default()
1552                                .path((*dev_path).to_string())
1553                                .typ(dev_type)
1554                                .major(major)
1555                                .minor(minor)
1556                                .file_mode(0o666u32)
1557                                .uid(0u32)
1558                                .gid(0u32)
1559                                .build()
1560                                .map_err(|e| {
1561                                    AgentError::InvalidSpec(format!(
1562                                        "failed to build GPU device {dev_path}: {e}"
1563                                    ))
1564                                })?;
1565                            devices.push(linux_device);
1566                        } else {
1567                            tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1568                        }
1569                    }
1570
1571                    // Per-GPU devices: /dev/nvidia0, /dev/nvidia1, etc.
1572                    for i in &indices {
1573                        let dev_path = format!("/dev/nvidia{i}");
1574                        if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1575                            let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1576                            let linux_device = LinuxDeviceBuilder::default()
1577                                .path(dev_path.clone())
1578                                .typ(dev_type)
1579                                .major(major)
1580                                .minor(minor)
1581                                .file_mode(0o666u32)
1582                                .uid(0u32)
1583                                .gid(0u32)
1584                                .build()
1585                                .map_err(|e| {
1586                                    AgentError::InvalidSpec(format!(
1587                                        "failed to build GPU device {dev_path}: {e}"
1588                                    ))
1589                                })?;
1590                            devices.push(linux_device);
1591                        } else {
1592                            tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1593                        }
1594                    }
1595                }
1596                "amd" => {
1597                    // AMD ROCm: /dev/kfd is always required for compute
1598                    let amd_always_devices = ["/dev/kfd"];
1599                    for dev_path in &amd_always_devices {
1600                        if let Ok((major, minor)) = get_device_major_minor(dev_path) {
1601                            let dev_type = get_device_type(dev_path).unwrap_or(LinuxDeviceType::C);
1602                            let linux_device = LinuxDeviceBuilder::default()
1603                                .path((*dev_path).to_string())
1604                                .typ(dev_type)
1605                                .major(major)
1606                                .minor(minor)
1607                                .file_mode(0o666u32)
1608                                .uid(0u32)
1609                                .gid(0u32)
1610                                .build()
1611                                .map_err(|e| {
1612                                    AgentError::InvalidSpec(format!(
1613                                        "failed to build GPU device {dev_path}: {e}"
1614                                    ))
1615                                })?;
1616                            devices.push(linux_device);
1617                        } else {
1618                            tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1619                        }
1620                    }
1621
1622                    // DRI render nodes: /dev/dri/renderD128, renderD129, etc.
1623                    for i in &indices {
1624                        let dev_path = format!("/dev/dri/renderD{}", 128 + i);
1625                        if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1626                            let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1627                            let linux_device = LinuxDeviceBuilder::default()
1628                                .path(dev_path.clone())
1629                                .typ(dev_type)
1630                                .major(major)
1631                                .minor(minor)
1632                                .file_mode(0o666u32)
1633                                .uid(0u32)
1634                                .gid(0u32)
1635                                .build()
1636                                .map_err(|e| {
1637                                    AgentError::InvalidSpec(format!(
1638                                        "failed to build GPU device {dev_path}: {e}"
1639                                    ))
1640                                })?;
1641                            devices.push(linux_device);
1642                        } else {
1643                            tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1644                        }
1645                    }
1646
1647                    // DRI card nodes: /dev/dri/card0, card1, etc.
1648                    for i in &indices {
1649                        let dev_path = format!("/dev/dri/card{i}");
1650                        if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1651                            let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1652                            let linux_device = LinuxDeviceBuilder::default()
1653                                .path(dev_path.clone())
1654                                .typ(dev_type)
1655                                .major(major)
1656                                .minor(minor)
1657                                .file_mode(0o666u32)
1658                                .uid(0u32)
1659                                .gid(0u32)
1660                                .build()
1661                                .map_err(|e| {
1662                                    AgentError::InvalidSpec(format!(
1663                                        "failed to build GPU device {dev_path}: {e}"
1664                                    ))
1665                                })?;
1666                            devices.push(linux_device);
1667                        } else {
1668                            tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1669                        }
1670                    }
1671                }
1672                "intel" => {
1673                    // Intel GPU: DRI render nodes /dev/dri/renderD128, etc.
1674                    for i in &indices {
1675                        let dev_path = format!("/dev/dri/renderD{}", 128 + i);
1676                        if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1677                            let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1678                            let linux_device = LinuxDeviceBuilder::default()
1679                                .path(dev_path.clone())
1680                                .typ(dev_type)
1681                                .major(major)
1682                                .minor(minor)
1683                                .file_mode(0o666u32)
1684                                .uid(0u32)
1685                                .gid(0u32)
1686                                .build()
1687                                .map_err(|e| {
1688                                    AgentError::InvalidSpec(format!(
1689                                        "failed to build GPU device {dev_path}: {e}"
1690                                    ))
1691                                })?;
1692                            devices.push(linux_device);
1693                        } else {
1694                            tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1695                        }
1696                    }
1697
1698                    // Intel DRI card nodes: /dev/dri/card0, card1, etc.
1699                    for i in &indices {
1700                        let dev_path = format!("/dev/dri/card{i}");
1701                        if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1702                            let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1703                            let linux_device = LinuxDeviceBuilder::default()
1704                                .path(dev_path.clone())
1705                                .typ(dev_type)
1706                                .major(major)
1707                                .minor(minor)
1708                                .file_mode(0o666u32)
1709                                .uid(0u32)
1710                                .gid(0u32)
1711                                .build()
1712                                .map_err(|e| {
1713                                    AgentError::InvalidSpec(format!(
1714                                        "failed to build GPU device {dev_path}: {e}"
1715                                    ))
1716                                })?;
1717                            devices.push(linux_device);
1718                        } else {
1719                            tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1720                        }
1721                    }
1722                }
1723                other => {
1724                    // Unknown vendor - try DRI render nodes as default
1725                    tracing::warn!(
1726                        vendor = %other,
1727                        "Unknown GPU vendor, attempting DRI device passthrough"
1728                    );
1729                    for i in &indices {
1730                        let dev_path = format!("/dev/dri/renderD{}", 128 + i);
1731                        if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1732                            let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1733                            let linux_device = LinuxDeviceBuilder::default()
1734                                .path(dev_path.clone())
1735                                .typ(dev_type)
1736                                .major(major)
1737                                .minor(minor)
1738                                .file_mode(0o666u32)
1739                                .uid(0u32)
1740                                .gid(0u32)
1741                                .build()
1742                                .map_err(|e| {
1743                                    AgentError::InvalidSpec(format!(
1744                                        "failed to build GPU device {dev_path}: {e}"
1745                                    ))
1746                                })?;
1747                            devices.push(linux_device);
1748                        } else {
1749                            tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1750                        }
1751                    }
1752                }
1753            }
1754        }
1755
1756        Ok(devices)
1757    }
1758
1759    /// Generate the OCI spec and write config.json to the bundle directory
1760    ///
1761    /// Unlike `build()`, this does NOT create the bundle directory or set up rootfs.
1762    /// Use this when the bundle directory and rootfs already exist (e.g., rootfs was
1763    /// extracted directly by `LayerUnpacker`).
1764    ///
1765    /// # Errors
1766    /// Returns an error if the OCI spec cannot be built or config.json cannot be written.
1767    ///
1768    /// # Returns
1769    /// The path to the bundle directory on success
1770    pub async fn write_config(
1771        &self,
1772        container_id: &ContainerId,
1773        spec: &ServiceSpec,
1774    ) -> Result<PathBuf> {
1775        // Generate OCI runtime spec
1776        let oci_spec = self
1777            .build_oci_spec(container_id, spec, &self.volume_paths)
1778            .await?;
1779
1780        // Write config.json
1781        let config_path = self.bundle_dir.join("config.json");
1782        let config_json =
1783            serde_json::to_string_pretty(&oci_spec).map_err(|e| AgentError::CreateFailed {
1784                id: container_id.to_string(),
1785                reason: format!("failed to serialize OCI spec: {e}"),
1786            })?;
1787
1788        fs::write(&config_path, config_json)
1789            .await
1790            .map_err(|e| AgentError::CreateFailed {
1791                id: container_id.to_string(),
1792                reason: format!("failed to write config.json: {e}"),
1793            })?;
1794
1795        tracing::debug!(
1796            "Wrote OCI config.json at {} for container {}",
1797            config_path.display(),
1798            container_id
1799        );
1800
1801        Ok(self.bundle_dir.clone())
1802    }
1803
1804    /// Resolve command from `ServiceSpec` and optional image config following Docker/OCI semantics
1805    ///
1806    /// Resolution order:
1807    /// 1. spec entrypoint + args -> use those
1808    /// 2. spec entrypoint only -> use entrypoint
1809    /// 3. spec args only -> use args
1810    /// 4. `image_config` entrypoint/cmd -> use `image_config.full_command()`
1811    /// 5. fallback to /bin/sh
1812    fn resolve_command_from_spec(
1813        spec: &ServiceSpec,
1814        image_config: Option<&zlayer_registry::ImageConfig>,
1815    ) -> Vec<String> {
1816        let mut args = Vec::new();
1817
1818        match (&spec.command.entrypoint, &spec.command.args) {
1819            (Some(entrypoint), Some(cmd_args)) => {
1820                args.extend_from_slice(entrypoint);
1821                args.extend_from_slice(cmd_args);
1822            }
1823            (Some(entrypoint), None) => {
1824                args.extend_from_slice(entrypoint);
1825            }
1826            (None, Some(cmd_args)) if !cmd_args.is_empty() => {
1827                args.extend_from_slice(cmd_args);
1828            }
1829            _ => {
1830                // No spec command - try image config
1831                if let Some(img_cmd) =
1832                    image_config.and_then(zlayer_registry::ImageConfig::full_command)
1833                {
1834                    if img_cmd.is_empty() {
1835                        args.push("/bin/sh".to_string());
1836                    } else {
1837                        args.extend(img_cmd);
1838                    }
1839                } else {
1840                    args.push("/bin/sh".to_string());
1841                }
1842            }
1843        }
1844
1845        args
1846    }
1847
1848    /// Clean up a bundle directory
1849    ///
1850    /// Removes the bundle directory and all its contents.
1851    ///
1852    /// # Errors
1853    /// Returns an error if the bundle directory cannot be removed.
1854    pub async fn cleanup(&self) -> Result<()> {
1855        if self.bundle_dir.exists() {
1856            fs::remove_dir_all(&self.bundle_dir)
1857                .await
1858                .map_err(|e| AgentError::CreateFailed {
1859                    id: "cleanup".to_string(),
1860                    reason: format!(
1861                        "failed to remove bundle directory {}: {}",
1862                        self.bundle_dir.display(),
1863                        e
1864                    ),
1865                })?;
1866        }
1867        Ok(())
1868    }
1869}
1870
1871/// Create a bundle for a container
1872///
1873/// Convenience function that creates a bundle in the default location.
1874///
1875/// # Errors
1876/// Returns an error if bundle creation fails.
1877pub async fn create_bundle(
1878    container_id: &ContainerId,
1879    spec: &ServiceSpec,
1880    rootfs_path: Option<PathBuf>,
1881) -> Result<PathBuf> {
1882    let mut builder =
1883        BundleBuilder::for_container(container_id).with_host_network(spec.host_network);
1884
1885    if let Some(rootfs) = rootfs_path {
1886        builder = builder.with_rootfs(rootfs);
1887    }
1888
1889    builder.build(container_id, spec).await
1890}
1891
1892/// Clean up a container's bundle
1893///
1894/// Convenience function to remove a bundle from the default location.
1895///
1896/// # Errors
1897/// Returns an error if cleanup fails.
1898pub async fn cleanup_bundle(container_id: &ContainerId) -> Result<()> {
1899    let builder = BundleBuilder::for_container(container_id);
1900    builder.cleanup().await
1901}
1902
1903#[cfg(test)]
1904mod tests {
1905    use super::*;
1906    use zlayer_spec::*;
1907
1908    fn mock_spec() -> ServiceSpec {
1909        serde_yaml::from_str::<DeploymentSpec>(
1910            r"
1911version: v1
1912deployment: test
1913services:
1914  test:
1915    rtype: service
1916    image:
1917      name: test:latest
1918    endpoints:
1919      - name: http
1920        protocol: http
1921        port: 8080
1922",
1923        )
1924        .unwrap()
1925        .services
1926        .remove("test")
1927        .unwrap()
1928    }
1929
1930    fn mock_spec_with_resources() -> ServiceSpec {
1931        serde_yaml::from_str::<DeploymentSpec>(
1932            r"
1933version: v1
1934deployment: test
1935services:
1936  test:
1937    rtype: service
1938    image:
1939      name: test:latest
1940    resources:
1941      cpu: 0.5
1942      memory: 512Mi
1943    env:
1944      MY_VAR: my_value
1945      ANOTHER: value2
1946    endpoints:
1947      - name: http
1948        protocol: http
1949        port: 8080
1950",
1951        )
1952        .unwrap()
1953        .services
1954        .remove("test")
1955        .unwrap()
1956    }
1957
1958    fn mock_privileged_spec() -> ServiceSpec {
1959        serde_yaml::from_str::<DeploymentSpec>(
1960            r"
1961version: v1
1962deployment: test
1963services:
1964  test:
1965    rtype: service
1966    image:
1967      name: test:latest
1968    privileged: true
1969    endpoints:
1970      - name: http
1971        protocol: http
1972        port: 8080
1973",
1974        )
1975        .unwrap()
1976        .services
1977        .remove("test")
1978        .unwrap()
1979    }
1980
1981    #[test]
1982    fn test_parse_memory_string() {
1983        assert_eq!(parse_memory_string("512Mi").unwrap(), 512 * 1024 * 1024);
1984        assert_eq!(parse_memory_string("1Gi").unwrap(), 1024 * 1024 * 1024);
1985        assert_eq!(parse_memory_string("2G").unwrap(), 2 * 1000 * 1000 * 1000);
1986        assert_eq!(parse_memory_string("1024").unwrap(), 1024);
1987        assert_eq!(parse_memory_string("512Ki").unwrap(), 512 * 1024);
1988    }
1989
1990    #[test]
1991    fn test_parse_memory_string_errors() {
1992        assert!(parse_memory_string("").is_err());
1993        assert!(parse_memory_string("abc").is_err());
1994        assert!(parse_memory_string("12.5Mi").is_err());
1995    }
1996
1997    #[test]
1998    fn test_bundle_builder_new() {
1999        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2000        assert_eq!(builder.bundle_dir(), Path::new("/tmp/test-bundle"));
2001        assert!(builder.rootfs_path.is_none());
2002    }
2003
2004    #[test]
2005    fn test_bundle_builder_for_container() {
2006        let dirs = zlayer_paths::ZLayerDirs::system_default();
2007        let id = ContainerId {
2008            service: "myservice".to_string(),
2009            replica: 1,
2010        };
2011        let builder = BundleBuilder::for_container(&id);
2012        assert_eq!(builder.bundle_dir(), dirs.bundles().join("myservice-rep-1"));
2013    }
2014
2015    #[test]
2016    fn test_bundle_builder_with_rootfs() {
2017        let dirs = zlayer_paths::ZLayerDirs::system_default();
2018        let builder = BundleBuilder::new("/tmp/test-bundle".into())
2019            .with_rootfs(dirs.rootfs().join("myimage"));
2020        assert_eq!(builder.rootfs_path, Some(dirs.rootfs().join("myimage")));
2021    }
2022
2023    #[tokio::test]
2024    async fn test_build_oci_spec_basic() {
2025        let id = ContainerId {
2026            service: "test".to_string(),
2027            replica: 1,
2028        };
2029        let spec = mock_spec();
2030        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2031
2032        let oci_spec = builder
2033            .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2034            .await
2035            .unwrap();
2036
2037        assert_eq!(oci_spec.version(), "1.0.2");
2038        assert!(oci_spec.root().is_some());
2039        assert_eq!(
2040            oci_spec.root().as_ref().unwrap().path(),
2041            std::path::Path::new("rootfs")
2042        );
2043        assert!(oci_spec.process().is_some());
2044        assert!(oci_spec.linux().is_some());
2045    }
2046
2047    #[tokio::test]
2048    async fn test_build_oci_spec_with_resources() {
2049        let id = ContainerId {
2050            service: "test".to_string(),
2051            replica: 1,
2052        };
2053        let spec = mock_spec_with_resources();
2054        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2055
2056        let oci_spec = builder
2057            .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2058            .await
2059            .unwrap();
2060
2061        // Check that resources are set
2062        let linux = oci_spec.linux().as_ref().unwrap();
2063        let resources = linux.resources().as_ref().unwrap();
2064
2065        // Check CPU
2066        let cpu = resources.cpu().as_ref().unwrap();
2067        assert_eq!(cpu.quota(), Some(50_000)); // 0.5 cores * 100000
2068        assert_eq!(cpu.period(), Some(100_000));
2069
2070        // Check memory
2071        let memory = resources.memory().as_ref().unwrap();
2072        assert_eq!(memory.limit(), Some(512 * 1024 * 1024)); // 512Mi
2073    }
2074
2075    #[tokio::test]
2076    async fn test_build_oci_spec_privileged() {
2077        let id = ContainerId {
2078            service: "test".to_string(),
2079            replica: 1,
2080        };
2081        let spec = mock_privileged_spec();
2082        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2083
2084        let oci_spec = builder
2085            .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2086            .await
2087            .unwrap();
2088
2089        // Check that all capabilities are set
2090        let process = oci_spec.process().as_ref().unwrap();
2091        let caps = process.capabilities().as_ref().unwrap();
2092        let bounding = caps.bounding().as_ref().unwrap();
2093
2094        // Should have all capabilities
2095        assert!(bounding.contains(&Capability::SysAdmin));
2096        assert!(bounding.contains(&Capability::NetAdmin));
2097
2098        // Check that masked paths are NOT set for privileged
2099        let linux = oci_spec.linux().as_ref().unwrap();
2100        assert!(
2101            linux.masked_paths().is_none() || linux.masked_paths().as_ref().unwrap().is_empty()
2102        );
2103    }
2104
2105    #[tokio::test]
2106    async fn test_build_oci_spec_environment() {
2107        let id = ContainerId {
2108            service: "test".to_string(),
2109            replica: 1,
2110        };
2111        let spec = mock_spec_with_resources();
2112        let builder = BundleBuilder::new("/tmp/test-bundle".into())
2113            .with_env("EXTRA_VAR".to_string(), "extra_value".to_string());
2114
2115        let oci_spec = builder
2116            .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2117            .await
2118            .unwrap();
2119
2120        let process = oci_spec.process().as_ref().unwrap();
2121        let env = process.env().as_ref().unwrap();
2122
2123        // Check service env vars are present
2124        assert!(env.iter().any(|e| e == "MY_VAR=my_value"));
2125        assert!(env.iter().any(|e| e == "ANOTHER=value2"));
2126        // Check extra env var is present
2127        assert!(env.iter().any(|e| e == "EXTRA_VAR=extra_value"));
2128        // Check PATH is present
2129        assert!(env.iter().any(|e| e.starts_with("PATH=")));
2130    }
2131
2132    #[tokio::test]
2133    async fn test_build_namespaces() {
2134        let id = ContainerId {
2135            service: "test".to_string(),
2136            replica: 1,
2137        };
2138        let spec = mock_spec();
2139        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2140
2141        let oci_spec = builder
2142            .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2143            .await
2144            .unwrap();
2145        let linux = oci_spec.linux().as_ref().unwrap();
2146        let namespaces = linux.namespaces().as_ref().unwrap();
2147
2148        // Check we have the expected namespaces
2149        let namespace_types: Vec<_> = namespaces
2150            .iter()
2151            .map(oci_spec::runtime::LinuxNamespace::typ)
2152            .collect();
2153        assert!(namespace_types.contains(&LinuxNamespaceType::Pid));
2154        assert!(namespace_types.contains(&LinuxNamespaceType::Ipc));
2155        assert!(namespace_types.contains(&LinuxNamespaceType::Uts));
2156        assert!(namespace_types.contains(&LinuxNamespaceType::Mount));
2157        assert!(namespace_types.contains(&LinuxNamespaceType::Network));
2158    }
2159
2160    #[tokio::test]
2161    async fn test_build_namespaces_host_network() {
2162        let id = ContainerId {
2163            service: "test".to_string(),
2164            replica: 1,
2165        };
2166        let spec = mock_spec();
2167        let builder = BundleBuilder::new("/tmp/test-bundle".into()).with_host_network(true);
2168
2169        let oci_spec = builder
2170            .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2171            .await
2172            .unwrap();
2173        let linux = oci_spec.linux().as_ref().unwrap();
2174        let namespaces = linux.namespaces().as_ref().unwrap();
2175
2176        // Check we have the expected namespaces (NO Network namespace)
2177        let namespace_types: Vec<_> = namespaces
2178            .iter()
2179            .map(oci_spec::runtime::LinuxNamespace::typ)
2180            .collect();
2181        assert!(namespace_types.contains(&LinuxNamespaceType::Pid));
2182        assert!(namespace_types.contains(&LinuxNamespaceType::Ipc));
2183        assert!(namespace_types.contains(&LinuxNamespaceType::Uts));
2184        assert!(namespace_types.contains(&LinuxNamespaceType::Mount));
2185        assert!(
2186            !namespace_types.contains(&LinuxNamespaceType::Network),
2187            "Network namespace should NOT be present in host_network mode"
2188        );
2189    }
2190
2191    #[test]
2192    fn test_build_default_mounts() {
2193        let spec = mock_spec();
2194        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2195
2196        let mounts = builder.build_default_mounts(&spec).unwrap();
2197
2198        // Check we have the expected mounts
2199        let mount_destinations: Vec<_> = mounts
2200            .iter()
2201            .map(|m| m.destination().to_string_lossy().to_string())
2202            .collect();
2203        assert!(mount_destinations.contains(&"/proc".to_string()));
2204        assert!(mount_destinations.contains(&"/dev".to_string()));
2205        assert!(mount_destinations.contains(&"/dev/pts".to_string()));
2206        assert!(mount_destinations.contains(&"/dev/shm".to_string()));
2207        assert!(mount_destinations.contains(&"/sys".to_string()));
2208    }
2209
2210    #[test]
2211    fn test_build_storage_mounts_bind() {
2212        let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2213            r"
2214version: v1
2215deployment: test
2216services:
2217  test:
2218    image:
2219      name: test:latest
2220    storage:
2221      - type: bind
2222        source: /host/data
2223        target: /app/data
2224        readonly: true
2225",
2226        )
2227        .unwrap()
2228        .services
2229        .remove("test")
2230        .unwrap();
2231
2232        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2233        let volume_paths = std::collections::HashMap::new();
2234
2235        let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
2236
2237        assert_eq!(mounts.len(), 1);
2238        assert_eq!(mounts[0].destination().to_string_lossy(), "/app/data");
2239        assert_eq!(
2240            mounts[0]
2241                .source()
2242                .as_ref()
2243                .map(|s| s.to_string_lossy().to_string()),
2244            Some("/host/data".to_string())
2245        );
2246        let options = mounts[0].options().as_ref().unwrap();
2247        assert!(options.contains(&"rbind".to_string()));
2248        assert!(options.contains(&"ro".to_string()));
2249    }
2250
2251    #[test]
2252    fn test_build_storage_mounts_named() {
2253        let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2254            r"
2255version: v1
2256deployment: test
2257services:
2258  test:
2259    image:
2260      name: test:latest
2261    storage:
2262      - type: named
2263        name: my-volume
2264        target: /app/data
2265",
2266        )
2267        .unwrap()
2268        .services
2269        .remove("test")
2270        .unwrap();
2271
2272        let dirs = zlayer_paths::ZLayerDirs::system_default();
2273        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2274        let mut volume_paths = std::collections::HashMap::new();
2275        volume_paths.insert("my-volume".to_string(), dirs.volumes().join("my-volume"));
2276
2277        let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
2278
2279        assert_eq!(mounts.len(), 1);
2280        assert_eq!(mounts[0].destination().to_string_lossy(), "/app/data");
2281        assert_eq!(
2282            mounts[0]
2283                .source()
2284                .as_ref()
2285                .map(|s| s.to_string_lossy().to_string()),
2286            Some(
2287                dirs.volumes()
2288                    .join("my-volume")
2289                    .to_string_lossy()
2290                    .into_owned()
2291            )
2292        );
2293    }
2294
2295    #[test]
2296    fn test_build_storage_mounts_tmpfs() {
2297        let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2298            r"
2299version: v1
2300deployment: test
2301services:
2302  test:
2303    image:
2304      name: test:latest
2305    storage:
2306      - type: tmpfs
2307        target: /app/tmp
2308        size: 256Mi
2309        mode: 1777
2310",
2311        )
2312        .unwrap()
2313        .services
2314        .remove("test")
2315        .unwrap();
2316
2317        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2318        let volume_paths = std::collections::HashMap::new();
2319
2320        let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
2321
2322        assert_eq!(mounts.len(), 1);
2323        assert_eq!(mounts[0].destination().to_string_lossy(), "/app/tmp");
2324        assert_eq!(mounts[0].typ().as_ref().map(String::as_str), Some("tmpfs"));
2325        let options = mounts[0].options().as_ref().unwrap();
2326        assert!(options.iter().any(|o| o.starts_with("size=")));
2327        assert!(options.iter().any(|o| o.starts_with("mode=")));
2328    }
2329
2330    #[test]
2331    fn test_build_storage_mounts_multiple() {
2332        let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2333            r"
2334version: v1
2335deployment: test
2336services:
2337  test:
2338    image:
2339      name: test:latest
2340    storage:
2341      - type: bind
2342        source: /etc/config
2343        target: /app/config
2344        readonly: true
2345      - type: named
2346        name: app-data
2347        target: /app/data
2348      - type: tmpfs
2349        target: /app/tmp
2350",
2351        )
2352        .unwrap()
2353        .services
2354        .remove("test")
2355        .unwrap();
2356
2357        let dirs = zlayer_paths::ZLayerDirs::system_default();
2358        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2359        let mut volume_paths = std::collections::HashMap::new();
2360        volume_paths.insert("app-data".to_string(), dirs.volumes().join("app-data"));
2361
2362        let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
2363
2364        assert_eq!(mounts.len(), 3);
2365
2366        // Verify each mount is correct type
2367        let destinations: Vec<String> = mounts
2368            .iter()
2369            .map(|m| m.destination().to_string_lossy().to_string())
2370            .collect();
2371        assert!(destinations.contains(&"/app/config".to_string()));
2372        assert!(destinations.contains(&"/app/data".to_string()));
2373        assert!(destinations.contains(&"/app/tmp".to_string()));
2374    }
2375
2376    #[test]
2377    fn test_build_storage_mounts_anonymous_missing_path() {
2378        let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2379            r"
2380version: v1
2381deployment: test
2382services:
2383  test:
2384    image:
2385      name: test:latest
2386    storage:
2387      - type: anonymous
2388        target: /app/cache
2389",
2390        )
2391        .unwrap()
2392        .services
2393        .remove("test")
2394        .unwrap();
2395
2396        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2397        let volume_paths = std::collections::HashMap::new(); // No path provided
2398
2399        let result = builder.build_storage_mounts(&spec, &volume_paths);
2400
2401        // Should fail because anonymous volume path not prepared
2402        assert!(result.is_err());
2403    }
2404
2405    #[tokio::test]
2406    async fn test_oci_spec_includes_storage_mounts() {
2407        let id = ContainerId {
2408            service: "test".to_string(),
2409            replica: 1,
2410        };
2411        let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2412            r"
2413version: v1
2414deployment: test
2415services:
2416  test:
2417    image:
2418      name: test:latest
2419    storage:
2420      - type: bind
2421        source: /host/data
2422        target: /app/data
2423      - type: tmpfs
2424        target: /app/tmp
2425",
2426        )
2427        .unwrap()
2428        .services
2429        .remove("test")
2430        .unwrap();
2431
2432        let builder = BundleBuilder::new("/tmp/test-bundle".into());
2433        let volume_paths = std::collections::HashMap::new();
2434
2435        let oci_spec = builder
2436            .build_oci_spec(&id, &spec, &volume_paths)
2437            .await
2438            .unwrap();
2439
2440        // Verify the OCI spec includes storage mounts
2441        let mounts = oci_spec.mounts().as_ref().unwrap();
2442        let destinations: Vec<String> = mounts
2443            .iter()
2444            .map(|m| m.destination().to_string_lossy().to_string())
2445            .collect();
2446
2447        // Should include both default mounts and storage mounts
2448        assert!(destinations.contains(&"/proc".to_string())); // default
2449        assert!(destinations.contains(&"/dev".to_string())); // default
2450        assert!(destinations.contains(&"/app/data".to_string())); // storage bind
2451        assert!(destinations.contains(&"/app/tmp".to_string())); // storage tmpfs
2452    }
2453}