1use crate::error::{AgentError, Result};
9use crate::runtime::ContainerId;
10use oci_spec::runtime::{
11 Capability, LinuxBuilder, LinuxCapabilitiesBuilder, LinuxCpuBuilder, LinuxDeviceBuilder,
12 LinuxDeviceCgroupBuilder, LinuxDeviceType, LinuxMemoryBuilder, LinuxNamespaceBuilder,
13 LinuxNamespaceType, LinuxResourcesBuilder, Mount, MountBuilder, ProcessBuilder, RootBuilder,
14 Spec, SpecBuilder, UserBuilder,
15};
16use std::collections::{HashMap, HashSet};
17use std::path::{Path, PathBuf};
18use std::str::FromStr;
19use std::sync::Arc;
20use tokio::fs;
21use zlayer_secrets::SecretsProvider;
22use zlayer_spec::{ServiceSpec, StorageSpec, StorageTier};
23
24const ALL_CAPABILITIES: &[Capability] = &[
26 Capability::AuditControl,
27 Capability::AuditRead,
28 Capability::AuditWrite,
29 Capability::BlockSuspend,
30 Capability::Bpf,
31 Capability::CheckpointRestore,
32 Capability::Chown,
33 Capability::DacOverride,
34 Capability::DacReadSearch,
35 Capability::Fowner,
36 Capability::Fsetid,
37 Capability::IpcLock,
38 Capability::IpcOwner,
39 Capability::Kill,
40 Capability::Lease,
41 Capability::LinuxImmutable,
42 Capability::MacAdmin,
43 Capability::MacOverride,
44 Capability::Mknod,
45 Capability::NetAdmin,
46 Capability::NetBindService,
47 Capability::NetBroadcast,
48 Capability::NetRaw,
49 Capability::Perfmon,
50 Capability::Setfcap,
51 Capability::Setgid,
52 Capability::Setpcap,
53 Capability::Setuid,
54 Capability::SysAdmin,
55 Capability::SysBoot,
56 Capability::SysChroot,
57 Capability::SysModule,
58 Capability::SysNice,
59 Capability::SysPacct,
60 Capability::SysPtrace,
61 Capability::SysRawio,
62 Capability::SysResource,
63 Capability::SysTime,
64 Capability::SysTtyConfig,
65 Capability::Syslog,
66 Capability::WakeAlarm,
67];
68
69pub fn parse_memory_string(s: &str) -> std::result::Result<u64, String> {
86 let s = s.trim();
87 if s.is_empty() {
88 return Err("empty memory string".to_string());
89 }
90
91 let (num_str, multiplier) = if let Some(n) = s.strip_suffix("Ki") {
92 (n, 1024u64)
93 } else if let Some(n) = s.strip_suffix("Mi") {
94 (n, 1024u64 * 1024)
95 } else if let Some(n) = s.strip_suffix("Gi") {
96 (n, 1024u64 * 1024 * 1024)
97 } else if let Some(n) = s.strip_suffix("Ti") {
98 (n, 1024u64 * 1024 * 1024 * 1024)
99 } else if let Some(n) = s.strip_suffix('K').or_else(|| s.strip_suffix('k')) {
100 (n, 1000u64)
101 } else if let Some(n) = s.strip_suffix('M').or_else(|| s.strip_suffix('m')) {
102 (n, 1000u64 * 1000)
103 } else if let Some(n) = s.strip_suffix('G').or_else(|| s.strip_suffix('g')) {
104 (n, 1000u64 * 1000 * 1000)
105 } else if let Some(n) = s.strip_suffix('T').or_else(|| s.strip_suffix('t')) {
106 (n, 1000u64 * 1000 * 1000 * 1000)
107 } else {
108 (s, 1u64)
109 };
110
111 let num: u64 = num_str
112 .parse()
113 .map_err(|e| format!("invalid number: {e}"))?;
114
115 Ok(num * multiplier)
116}
117
118#[cfg(unix)]
120#[allow(clippy::cast_possible_wrap)]
121fn get_device_major_minor(path: &str) -> std::io::Result<(i64, i64)> {
122 use std::os::unix::fs::MetadataExt;
123 let metadata = std::fs::metadata(path)?;
124 let rdev = metadata.rdev();
125 let major = ((rdev >> 8) & 0xff) as i64;
127 let minor = (rdev & 0xff) as i64;
128 Ok((major, minor))
129}
130
131#[cfg(not(unix))]
133fn get_device_major_minor(_path: &str) -> std::io::Result<(i64, i64)> {
134 Err(std::io::Error::new(
135 std::io::ErrorKind::Unsupported,
136 "device-cgroup probes require Unix",
137 ))
138}
139
140#[cfg(unix)]
142fn get_device_type(path: &str) -> std::io::Result<LinuxDeviceType> {
143 use std::os::unix::fs::FileTypeExt;
144 let metadata = std::fs::metadata(path)?;
145 let file_type = metadata.file_type();
146 if file_type.is_char_device() {
147 Ok(LinuxDeviceType::C)
148 } else if file_type.is_block_device() {
149 Ok(LinuxDeviceType::B)
150 } else {
151 Ok(LinuxDeviceType::U) }
153}
154
155#[cfg(not(unix))]
157fn get_device_type(_path: &str) -> std::io::Result<LinuxDeviceType> {
158 Err(std::io::Error::new(
159 std::io::ErrorKind::Unsupported,
160 "device-cgroup probes require Unix",
161 ))
162}
163
164#[derive(Clone)]
178pub struct BundleBuilder {
179 bundle_dir: PathBuf,
181 rootfs_path: Option<PathBuf>,
183 hostname: Option<String>,
185 extra_env: Vec<(String, String)>,
187 cwd: Option<String>,
189 args: Option<Vec<String>>,
191 volume_paths: HashMap<String, PathBuf>,
193 image_config: Option<zlayer_registry::ImageConfig>,
195 host_network: bool,
197 secrets_provider: Option<Arc<dyn SecretsProvider>>,
199 deployment_scope: Option<String>,
201 socket_path: Option<String>,
203}
204
205impl std::fmt::Debug for BundleBuilder {
206 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207 f.debug_struct("BundleBuilder")
208 .field("bundle_dir", &self.bundle_dir)
209 .field("rootfs_path", &self.rootfs_path)
210 .field("hostname", &self.hostname)
211 .field("extra_env", &self.extra_env)
212 .field("cwd", &self.cwd)
213 .field("args", &self.args)
214 .field("volume_paths", &self.volume_paths)
215 .field("image_config", &self.image_config)
216 .field("host_network", &self.host_network)
217 .field("secrets_provider", &self.secrets_provider.is_some())
218 .field("deployment_scope", &self.deployment_scope)
219 .field("socket_path", &self.socket_path)
220 .finish()
221 }
222}
223
224impl BundleBuilder {
225 #[must_use]
235 pub fn new(bundle_dir: PathBuf) -> Self {
236 Self {
237 bundle_dir,
238 rootfs_path: None,
239 hostname: None,
240 extra_env: Vec::new(),
241 cwd: None,
242 args: None,
243 volume_paths: HashMap::new(),
244 image_config: None,
245 host_network: false,
246 secrets_provider: None,
247 deployment_scope: None,
248 socket_path: None,
249 }
250 }
251
252 #[must_use]
254 pub fn for_container(container_id: &ContainerId) -> Self {
255 let bundle_dir = zlayer_paths::ZLayerDirs::system_default()
256 .bundles()
257 .join(container_id.to_string());
258 Self::new(bundle_dir)
259 }
260
261 #[must_use]
265 pub fn with_rootfs(mut self, rootfs_path: PathBuf) -> Self {
266 self.rootfs_path = Some(rootfs_path);
267 self
268 }
269
270 #[must_use]
272 pub fn with_hostname(mut self, hostname: String) -> Self {
273 self.hostname = Some(hostname);
274 self
275 }
276
277 #[must_use]
279 pub fn with_env(mut self, key: String, value: String) -> Self {
280 self.extra_env.push((key, value));
281 self
282 }
283
284 #[must_use]
286 pub fn with_cwd(mut self, cwd: String) -> Self {
287 self.cwd = Some(cwd);
288 self
289 }
290
291 #[must_use]
293 pub fn with_args(mut self, args: Vec<String>) -> Self {
294 self.args = Some(args);
295 self
296 }
297
298 #[must_use]
303 pub fn with_volume_paths(mut self, volume_paths: HashMap<String, PathBuf>) -> Self {
304 self.volume_paths = volume_paths;
305 self
306 }
307
308 #[must_use]
313 pub fn with_image_config(mut self, config: zlayer_registry::ImageConfig) -> Self {
314 self.image_config = Some(config);
315 self
316 }
317
318 #[must_use]
324 pub fn with_host_network(mut self, host_network: bool) -> Self {
325 self.host_network = host_network;
326 self
327 }
328
329 #[must_use]
334 pub fn with_secrets_provider(mut self, provider: Arc<dyn SecretsProvider>) -> Self {
335 self.secrets_provider = Some(provider);
336 self
337 }
338
339 #[must_use]
344 pub fn with_deployment_scope(mut self, scope: String) -> Self {
345 self.deployment_scope = Some(scope);
346 self
347 }
348
349 #[must_use]
352 pub fn with_socket_mount(mut self, path: impl Into<String>) -> Self {
353 self.socket_path = Some(path.into());
354 self
355 }
356
357 #[must_use]
359 pub fn bundle_dir(&self) -> &Path {
360 &self.bundle_dir
361 }
362
363 pub async fn build(&self, container_id: &ContainerId, spec: &ServiceSpec) -> Result<PathBuf> {
375 fs::create_dir_all(&self.bundle_dir)
377 .await
378 .map_err(|e| AgentError::CreateFailed {
379 id: container_id.to_string(),
380 reason: format!("failed to create bundle directory: {e}"),
381 })?;
382
383 let rootfs_in_bundle = self.bundle_dir.join("rootfs");
385 if let Some(ref rootfs_path) = self.rootfs_path {
386 let _ = fs::remove_file(&rootfs_in_bundle).await;
388 let _ = fs::remove_dir(&rootfs_in_bundle).await;
389
390 #[cfg(unix)]
395 tokio::fs::symlink(rootfs_path, &rootfs_in_bundle)
396 .await
397 .map_err(|e| AgentError::CreateFailed {
398 id: container_id.to_string(),
399 reason: format!(
400 "failed to symlink rootfs from {} to {}: {}",
401 rootfs_path.display(),
402 rootfs_in_bundle.display(),
403 e
404 ),
405 })?;
406
407 #[cfg(windows)]
408 tokio::fs::symlink_dir(rootfs_path, &rootfs_in_bundle)
409 .await
410 .map_err(|e| AgentError::CreateFailed {
411 id: container_id.to_string(),
412 reason: format!(
413 "failed to symlink rootfs from {} to {}: {}",
414 rootfs_path.display(),
415 rootfs_in_bundle.display(),
416 e
417 ),
418 })?;
419 } else {
420 fs::create_dir_all(&rootfs_in_bundle)
422 .await
423 .map_err(|e| AgentError::CreateFailed {
424 id: container_id.to_string(),
425 reason: format!("failed to create rootfs directory: {e}"),
426 })?;
427 }
428
429 let oci_spec = self
431 .build_oci_spec(container_id, spec, &self.volume_paths)
432 .await?;
433
434 let config_path = self.bundle_dir.join("config.json");
436 let config_json =
437 serde_json::to_string_pretty(&oci_spec).map_err(|e| AgentError::CreateFailed {
438 id: container_id.to_string(),
439 reason: format!("failed to serialize OCI spec: {e}"),
440 })?;
441
442 fs::write(&config_path, config_json)
443 .await
444 .map_err(|e| AgentError::CreateFailed {
445 id: container_id.to_string(),
446 reason: format!("failed to write config.json: {e}"),
447 })?;
448
449 tracing::debug!(
450 "Created OCI bundle at {} for container {}",
451 self.bundle_dir.display(),
452 container_id
453 );
454
455 Ok(self.bundle_dir.clone())
456 }
457
458 pub async fn build_spec_only(
471 &self,
472 container_id: &ContainerId,
473 spec: &ServiceSpec,
474 volume_paths: &std::collections::HashMap<String, PathBuf>,
475 ) -> Result<oci_spec::runtime::Spec> {
476 self.build_oci_spec(container_id, spec, volume_paths).await
477 }
478
479 #[allow(clippy::too_many_lines)]
481 async fn build_oci_spec(
482 &self,
483 container_id: &ContainerId,
484 spec: &ServiceSpec,
485 volume_paths: &std::collections::HashMap<String, PathBuf>,
486 ) -> Result<Spec> {
487 let user = {
489 let (uid, gid) = if let Some(user_str) = self
490 .image_config
491 .as_ref()
492 .and_then(|c| c.user.as_ref())
493 .filter(|u| !u.is_empty())
494 {
495 let parts: Vec<&str> = user_str.splitn(2, ':').collect();
497 let uid = parts[0].parse::<u32>().unwrap_or(0);
498 let gid = if parts.len() > 1 {
499 parts[1].parse::<u32>().unwrap_or(0)
500 } else {
501 uid
502 };
503 (uid, gid)
504 } else {
505 (0u32, 0u32)
506 };
507
508 UserBuilder::default()
509 .uid(uid)
510 .gid(gid)
511 .build()
512 .map_err(|e| AgentError::InvalidSpec(format!("failed to build user: {e}")))?
513 };
514
515 let mut env: Vec<String> = Vec::new();
518 let mut env_keys: HashSet<String> = HashSet::new();
519
520 if let Some(img_env) = self.image_config.as_ref().and_then(|c| c.env.as_ref()) {
522 for entry in img_env {
523 if let Some(key) = entry.split('=').next() {
524 env_keys.insert(key.to_string());
525 }
526 env.push(entry.clone());
527 }
528 }
529
530 if !env_keys.contains("PATH") {
532 env.push(
533 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(),
534 );
535 env_keys.insert("PATH".to_string());
536 }
537
538 if !env_keys.contains("TERM") {
540 env.push("TERM=xterm".to_string());
541 env_keys.insert("TERM".to_string());
542 }
543
544 if let (Some(secrets_provider), Some(scope)) =
551 (&self.secrets_provider, &self.deployment_scope)
552 {
553 let resolved_map =
554 crate::env::resolve_env_with_secrets(&spec.env, secrets_provider.as_ref(), scope)
555 .await
556 .map_err(|e| {
557 AgentError::InvalidSpec(format!(
558 "environment variable resolution failed: {e}"
559 ))
560 })?;
561
562 for (key, value) in &resolved_map {
563 if env_keys.contains(key.as_str()) {
564 env.retain(|e| e.split('=').next() != Some(key.as_str()));
565 }
566 env_keys.insert(key.clone());
567 env.push(format!("{key}={value}"));
568 }
569 } else {
570 let resolved = crate::env::resolve_env_vars_with_warnings(&spec.env).map_err(|e| {
571 AgentError::InvalidSpec(format!("environment variable resolution failed: {e}"))
572 })?;
573
574 for warning in &resolved.warnings {
576 tracing::warn!(container = %container_id, "{}", warning);
577 }
578
579 for var in &resolved.vars {
581 if let Some(key) = var.split('=').next() {
582 if env_keys.contains(key) {
583 env.retain(|e| e.split('=').next() != Some(key));
585 }
586 env_keys.insert(key.to_string());
587 }
588 env.push(var.clone());
589 }
590 }
591
592 for (key, value) in &self.extra_env {
594 if env_keys.contains(key.as_str()) {
595 env.retain(|e| e.split('=').next() != Some(key.as_str()));
596 }
597 env_keys.insert(key.clone());
598 env.push(format!("{key}={value}"));
599 }
600
601 if let Some(ref gpu) = spec.resources.gpu {
605 let indices: Vec<String> = (0..gpu.count).map(|i| i.to_string()).collect();
607 let device_list = indices.join(",");
608 match gpu.vendor.as_str() {
609 "nvidia" => {
610 env.push(format!("NVIDIA_VISIBLE_DEVICES={device_list}"));
611 env.push(format!("CUDA_VISIBLE_DEVICES={device_list}"));
612 }
613 "amd" => {
614 env.push(format!("ROCR_VISIBLE_DEVICES={device_list}"));
615 env.push(format!("HIP_VISIBLE_DEVICES={device_list}"));
616 }
617 "intel" => {
618 env.push(format!("ZE_AFFINITY_MASK={device_list}"));
619 }
620 _ => {}
621 }
622 }
623
624 if let Some(ref gpu) = spec.resources.gpu {
628 if let Some(ref dist) = gpu.distributed {
629 env.push(format!("MASTER_PORT={}", dist.master_port));
630 env.push(format!("MASTER_ADDR={}", container_id.service));
631 env.push("WORLD_SIZE=1".to_string());
632 env.push("RANK=0".to_string());
633 env.push("LOCAL_RANK=0".to_string());
634 match dist.backend.as_str() {
635 "nccl" => env.push("NCCL_SOCKET_IFNAME=eth0".to_string()),
636 "gloo" => env.push("GLOO_SOCKET_IFNAME=eth0".to_string()),
637 _ => {}
638 }
639 }
640 }
641
642 let capabilities = self.build_capabilities(spec)?;
644
645 let cwd = self
647 .cwd
648 .clone()
649 .or_else(|| spec.command.workdir.clone())
650 .or_else(|| {
651 self.image_config
652 .as_ref()
653 .and_then(|c| c.working_dir.as_ref())
654 .filter(|w| !w.is_empty())
655 .cloned()
656 })
657 .unwrap_or_else(|| "/".to_string());
658
659 let process_args = if let Some(ref args) = self.args {
661 args.clone()
662 } else {
663 Self::resolve_command_from_spec(spec, self.image_config.as_ref())
664 };
665
666 let mut process_builder = ProcessBuilder::default()
668 .terminal(false)
669 .user(user)
670 .env(env)
671 .args(process_args)
672 .cwd(cwd)
673 .no_new_privileges(!spec.privileged && spec.capabilities.is_empty());
674
675 if let Some(caps) = capabilities {
677 process_builder = process_builder.capabilities(caps);
678 }
679
680 let process = process_builder
681 .build()
682 .map_err(|e| AgentError::InvalidSpec(format!("failed to build process: {e}")))?;
683
684 let root = RootBuilder::default()
687 .path("rootfs".to_string())
688 .readonly(false)
689 .build()
690 .map_err(|e| AgentError::InvalidSpec(format!("failed to build root: {e}")))?;
691
692 let mut mounts = self.build_default_mounts(spec)?;
694
695 let storage_mounts = self.build_storage_mounts(spec, volume_paths)?;
697 mounts.extend(storage_mounts);
698
699 if let Some(ref socket_path) = self.socket_path {
703 mounts.push(
704 MountBuilder::default()
705 .destination(zlayer_paths::ZLayerDirs::default_socket_path())
706 .typ("bind")
707 .source(socket_path.clone())
708 .options(vec!["rbind".into(), "ro".into()])
709 .build()
710 .expect("valid socket mount"),
711 );
712 }
713
714 let linux = self.build_linux_config(spec)?;
716
717 let hostname = self
719 .hostname
720 .clone()
721 .unwrap_or_else(|| container_id.to_string());
722
723 let oci_spec = SpecBuilder::default()
725 .version("1.0.2".to_string())
726 .root(root)
727 .process(process)
728 .hostname(hostname)
729 .mounts(mounts)
730 .linux(linux)
731 .build()
732 .map_err(|e| AgentError::InvalidSpec(format!("failed to build OCI spec: {e}")))?;
733
734 Ok(oci_spec)
735 }
736
737 #[allow(clippy::unused_self)]
739 fn build_capabilities(
740 &self,
741 spec: &ServiceSpec,
742 ) -> Result<Option<oci_spec::runtime::LinuxCapabilities>> {
743 if spec.privileged {
744 let all_caps: HashSet<Capability> = ALL_CAPABILITIES.iter().copied().collect();
746 let empty_caps: HashSet<Capability> = HashSet::new();
747
748 let caps = LinuxCapabilitiesBuilder::default()
749 .bounding(all_caps.clone())
750 .effective(all_caps.clone())
751 .permitted(all_caps)
752 .inheritable(empty_caps.clone())
753 .ambient(empty_caps)
754 .build()
755 .map_err(|e| {
756 AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
757 })?;
758
759 Ok(Some(caps))
760 } else if !spec.capabilities.is_empty() {
761 let caps: HashSet<Capability> = spec
763 .capabilities
764 .iter()
765 .filter_map(|c| {
766 let cap_name = if c.starts_with("CAP_") {
768 c.to_uppercase()
769 } else {
770 format!("CAP_{}", c.to_uppercase())
771 };
772 Capability::from_str(&cap_name).ok()
773 })
774 .collect();
775
776 let empty_caps: HashSet<Capability> = HashSet::new();
777
778 let built_caps = LinuxCapabilitiesBuilder::default()
779 .bounding(caps.clone())
780 .effective(caps.clone())
781 .permitted(caps)
782 .inheritable(empty_caps.clone())
783 .ambient(empty_caps)
784 .build()
785 .map_err(|e| {
786 AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
787 })?;
788
789 Ok(Some(built_caps))
790 } else {
791 let default_caps: HashSet<Capability> = [
793 Capability::Chown,
794 Capability::DacOverride,
795 Capability::Fsetid,
796 Capability::Fowner,
797 Capability::Mknod,
798 Capability::NetRaw,
799 Capability::Setgid,
800 Capability::Setuid,
801 Capability::Setfcap,
802 Capability::Setpcap,
803 Capability::NetBindService,
804 Capability::SysChroot,
805 Capability::Kill,
806 Capability::AuditWrite,
807 ]
808 .into_iter()
809 .collect();
810
811 let empty_caps: HashSet<Capability> = HashSet::new();
812
813 let built_caps = LinuxCapabilitiesBuilder::default()
814 .bounding(default_caps.clone())
815 .effective(default_caps.clone())
816 .permitted(default_caps)
817 .inheritable(empty_caps.clone())
818 .ambient(empty_caps)
819 .build()
820 .map_err(|e| {
821 AgentError::InvalidSpec(format!("failed to build capabilities: {e}"))
822 })?;
823
824 Ok(Some(built_caps))
825 }
826 }
827
828 #[allow(clippy::unused_self, clippy::too_many_lines)]
830 fn build_default_mounts(&self, spec: &ServiceSpec) -> Result<Vec<Mount>> {
831 let mut mounts = Vec::new();
832
833 mounts.push(
835 MountBuilder::default()
836 .destination("/proc".to_string())
837 .typ("proc".to_string())
838 .source("proc".to_string())
839 .options(vec![
840 "nosuid".to_string(),
841 "noexec".to_string(),
842 "nodev".to_string(),
843 ])
844 .build()
845 .map_err(|e| {
846 AgentError::InvalidSpec(format!("failed to build /proc mount: {e}"))
847 })?,
848 );
849
850 mounts.push(
852 MountBuilder::default()
853 .destination("/dev".to_string())
854 .typ("tmpfs".to_string())
855 .source("tmpfs".to_string())
856 .options(vec![
857 "nosuid".to_string(),
858 "strictatime".to_string(),
859 "mode=755".to_string(),
860 "size=65536k".to_string(),
861 ])
862 .build()
863 .map_err(|e| AgentError::InvalidSpec(format!("failed to build /dev mount: {e}")))?,
864 );
865
866 mounts.push(
868 MountBuilder::default()
869 .destination("/dev/pts".to_string())
870 .typ("devpts".to_string())
871 .source("devpts".to_string())
872 .options(vec![
873 "nosuid".to_string(),
874 "noexec".to_string(),
875 "newinstance".to_string(),
876 "ptmxmode=0666".to_string(),
877 "mode=0620".to_string(),
878 "gid=5".to_string(),
879 ])
880 .build()
881 .map_err(|e| {
882 AgentError::InvalidSpec(format!("failed to build /dev/pts mount: {e}"))
883 })?,
884 );
885
886 mounts.push(
888 MountBuilder::default()
889 .destination("/dev/shm".to_string())
890 .typ("tmpfs".to_string())
891 .source("shm".to_string())
892 .options(vec![
893 "nosuid".to_string(),
894 "noexec".to_string(),
895 "nodev".to_string(),
896 "mode=1777".to_string(),
897 "size=65536k".to_string(),
898 ])
899 .build()
900 .map_err(|e| {
901 AgentError::InvalidSpec(format!("failed to build /dev/shm mount: {e}"))
902 })?,
903 );
904
905 mounts.push(
907 MountBuilder::default()
908 .destination("/dev/mqueue".to_string())
909 .typ("mqueue".to_string())
910 .source("mqueue".to_string())
911 .options(vec![
912 "nosuid".to_string(),
913 "noexec".to_string(),
914 "nodev".to_string(),
915 ])
916 .build()
917 .map_err(|e| {
918 AgentError::InvalidSpec(format!("failed to build /dev/mqueue mount: {e}"))
919 })?,
920 );
921
922 let sys_options = if spec.privileged {
924 vec![
925 "nosuid".to_string(),
926 "noexec".to_string(),
927 "nodev".to_string(),
928 ]
929 } else {
930 vec![
931 "nosuid".to_string(),
932 "noexec".to_string(),
933 "nodev".to_string(),
934 "ro".to_string(),
935 ]
936 };
937
938 mounts.push(
939 MountBuilder::default()
940 .destination("/sys".to_string())
941 .typ("sysfs".to_string())
942 .source("sysfs".to_string())
943 .options(sys_options)
944 .build()
945 .map_err(|e| AgentError::InvalidSpec(format!("failed to build /sys mount: {e}")))?,
946 );
947
948 mounts.push(
950 MountBuilder::default()
951 .destination("/sys/fs/cgroup".to_string())
952 .typ("cgroup2".to_string())
953 .source("cgroup".to_string())
954 .options(vec![
955 "nosuid".to_string(),
956 "noexec".to_string(),
957 "nodev".to_string(),
958 "relatime".to_string(),
959 ])
960 .build()
961 .map_err(|e| {
962 AgentError::InvalidSpec(format!("failed to build cgroup mount: {e}"))
963 })?,
964 );
965
966 Ok(mounts)
967 }
968
969 #[allow(clippy::unused_self, clippy::too_many_lines)]
975 fn build_storage_mounts(
976 &self,
977 spec: &ServiceSpec,
978 volume_paths: &std::collections::HashMap<String, PathBuf>,
979 ) -> Result<Vec<Mount>> {
980 let mut mounts = Vec::new();
981
982 for storage in &spec.storage {
983 let mount = match storage {
984 StorageSpec::Bind {
985 source,
986 target,
987 readonly,
988 } => {
989 let mut options = vec!["rbind".to_string()];
990 if *readonly {
991 options.push("ro".to_string());
992 } else {
993 options.push("rw".to_string());
994 }
995
996 MountBuilder::default()
997 .destination(target.clone())
998 .typ("none".to_string())
999 .source(source.clone())
1000 .options(options)
1001 .build()
1002 .map_err(|e| {
1003 AgentError::InvalidSpec(format!(
1004 "failed to build bind mount for {target}: {e}"
1005 ))
1006 })?
1007 }
1008
1009 StorageSpec::Named {
1010 name,
1011 target,
1012 readonly,
1013 tier,
1014 ..
1015 } => {
1016 let source = volume_paths.get(name).ok_or_else(|| {
1018 AgentError::InvalidSpec(format!(
1019 "volume '{name}' not prepared - ensure StorageManager.ensure_volume() was called"
1020 ))
1021 })?;
1022
1023 if matches!(tier, StorageTier::Network) {
1025 tracing::warn!(
1026 volume = %name,
1027 tier = ?tier,
1028 "Network storage tier is NOT SQLite-safe. Avoid using SQLite databases on this volume."
1029 );
1030 }
1031
1032 let mut options = vec!["rbind".to_string()];
1033 if *readonly {
1034 options.push("ro".to_string());
1035 } else {
1036 options.push("rw".to_string());
1037 }
1038
1039 MountBuilder::default()
1040 .destination(target.clone())
1041 .typ("none".to_string())
1042 .source(source.to_string_lossy().to_string())
1043 .options(options)
1044 .build()
1045 .map_err(|e| {
1046 AgentError::InvalidSpec(format!(
1047 "failed to build named volume mount for {target}: {e}"
1048 ))
1049 })?
1050 }
1051
1052 StorageSpec::Anonymous { target, tier } => {
1053 let key = format!("_anon_{}", target.trim_start_matches('/').replace('/', "_"));
1056 let source = volume_paths.get(&key).ok_or_else(|| {
1057 AgentError::InvalidSpec(format!(
1058 "anonymous volume for '{target}' not prepared"
1059 ))
1060 })?;
1061
1062 if matches!(tier, StorageTier::Network) {
1063 tracing::warn!(
1064 target = %target,
1065 tier = ?tier,
1066 "Network storage tier is NOT SQLite-safe."
1067 );
1068 }
1069
1070 let options = vec!["rbind".to_string(), "rw".to_string()];
1071
1072 MountBuilder::default()
1073 .destination(target.clone())
1074 .typ("none".to_string())
1075 .source(source.to_string_lossy().to_string())
1076 .options(options)
1077 .build()
1078 .map_err(|e| {
1079 AgentError::InvalidSpec(format!(
1080 "failed to build anonymous volume mount for {target}: {e}"
1081 ))
1082 })?
1083 }
1084
1085 StorageSpec::Tmpfs { target, size, mode } => {
1086 let mut options = vec!["nosuid".to_string(), "nodev".to_string()];
1087
1088 if let Some(size_str) = size {
1089 options.push(format!("size={size_str}"));
1090 }
1091
1092 if let Some(mode_val) = mode {
1093 options.push(format!("mode={mode_val:o}"));
1094 }
1095
1096 MountBuilder::default()
1097 .destination(target.clone())
1098 .typ("tmpfs".to_string())
1099 .source("tmpfs".to_string())
1100 .options(options)
1101 .build()
1102 .map_err(|e| {
1103 AgentError::InvalidSpec(format!(
1104 "failed to build tmpfs mount for {target}: {e}"
1105 ))
1106 })?
1107 }
1108
1109 StorageSpec::S3 {
1110 bucket,
1111 prefix,
1112 target,
1113 readonly,
1114 endpoint: _,
1115 credentials: _,
1116 } => {
1117 let key = format!("_s3_{}_{}", bucket, prefix.as_deref().unwrap_or(""));
1120 let source = volume_paths.get(&key).ok_or_else(|| {
1121 AgentError::InvalidSpec(format!(
1122 "S3 volume for bucket '{bucket}' not mounted - ensure StorageManager.mount_s3() was called"
1123 ))
1124 })?;
1125
1126 tracing::warn!(
1127 bucket = %bucket,
1128 target = %target,
1129 "S3 storage is NOT SQLite-safe. Use for read-heavy workloads only."
1130 );
1131
1132 let mut options = vec!["rbind".to_string()];
1133 if *readonly {
1134 options.push("ro".to_string());
1135 } else {
1136 options.push("rw".to_string());
1137 }
1138
1139 MountBuilder::default()
1140 .destination(target.clone())
1141 .typ("none".to_string())
1142 .source(source.to_string_lossy().to_string())
1143 .options(options)
1144 .build()
1145 .map_err(|e| {
1146 AgentError::InvalidSpec(format!(
1147 "failed to build S3 mount for {target}: {e}"
1148 ))
1149 })?
1150 }
1151 };
1152
1153 mounts.push(mount);
1154 }
1155
1156 Ok(mounts)
1157 }
1158
1159 fn build_linux_config(&self, spec: &ServiceSpec) -> Result<oci_spec::runtime::Linux> {
1161 let mut namespaces = vec![
1163 LinuxNamespaceBuilder::default()
1164 .typ(LinuxNamespaceType::Pid)
1165 .build()
1166 .unwrap(),
1167 LinuxNamespaceBuilder::default()
1168 .typ(LinuxNamespaceType::Ipc)
1169 .build()
1170 .unwrap(),
1171 LinuxNamespaceBuilder::default()
1172 .typ(LinuxNamespaceType::Uts)
1173 .build()
1174 .unwrap(),
1175 LinuxNamespaceBuilder::default()
1176 .typ(LinuxNamespaceType::Mount)
1177 .build()
1178 .unwrap(),
1179 ];
1180
1181 if !self.host_network {
1185 namespaces.push(
1186 LinuxNamespaceBuilder::default()
1187 .typ(LinuxNamespaceType::Network)
1188 .build()
1189 .unwrap(),
1190 );
1191 }
1192
1193 let mut linux_builder = LinuxBuilder::default().namespaces(namespaces);
1194
1195 let resources = self.build_resources(spec)?;
1197 if let Some(resources) = resources {
1198 linux_builder = linux_builder.resources(resources);
1199 }
1200
1201 let devices = self.build_devices(spec, None)?;
1203 if !devices.is_empty() {
1204 linux_builder = linux_builder.devices(devices);
1205 }
1206
1207 linux_builder = linux_builder.rootfs_propagation("private".to_string());
1209
1210 if spec.privileged {
1212 linux_builder = linux_builder.masked_paths(vec![]).readonly_paths(vec![]);
1214 } else {
1215 let masked_paths = vec![
1217 "/proc/acpi".to_string(),
1218 "/proc/asound".to_string(),
1219 "/proc/kcore".to_string(),
1220 "/proc/keys".to_string(),
1221 "/proc/latency_stats".to_string(),
1222 "/proc/timer_list".to_string(),
1223 "/proc/timer_stats".to_string(),
1224 "/proc/sched_debug".to_string(),
1225 "/proc/scsi".to_string(),
1226 "/sys/firmware".to_string(),
1227 ];
1228
1229 let readonly_paths = vec![
1231 "/proc/bus".to_string(),
1232 "/proc/fs".to_string(),
1233 "/proc/irq".to_string(),
1234 "/proc/sys".to_string(),
1235 "/proc/sysrq-trigger".to_string(),
1236 ];
1237
1238 linux_builder = linux_builder
1239 .masked_paths(masked_paths)
1240 .readonly_paths(readonly_paths);
1241 }
1242
1243 linux_builder
1244 .build()
1245 .map_err(|e| AgentError::InvalidSpec(format!("failed to build linux config: {e}")))
1246 }
1247
1248 #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
1250 fn build_resources(
1251 &self,
1252 spec: &ServiceSpec,
1253 ) -> Result<Option<oci_spec::runtime::LinuxResources>> {
1254 let mut resources_builder = LinuxResourcesBuilder::default();
1255 let mut has_resources = false;
1256
1257 if let Some(cpu_limit) = spec.resources.cpu {
1259 let quota = (cpu_limit * 100_000.0) as i64;
1262 let cpu = LinuxCpuBuilder::default()
1263 .quota(quota)
1264 .period(100_000u64)
1265 .build()
1266 .map_err(|e| AgentError::InvalidSpec(format!("failed to build CPU limits: {e}")))?;
1267
1268 resources_builder = resources_builder.cpu(cpu);
1269 has_resources = true;
1270 }
1271
1272 if let Some(ref memory_str) = spec.resources.memory {
1274 let bytes = parse_memory_string(memory_str)
1275 .map_err(|e| AgentError::InvalidSpec(format!("invalid memory limit: {e}")))?;
1276
1277 let memory = LinuxMemoryBuilder::default()
1278 .limit(bytes as i64)
1279 .build()
1280 .map_err(|e| {
1281 AgentError::InvalidSpec(format!("failed to build memory limits: {e}"))
1282 })?;
1283
1284 resources_builder = resources_builder.memory(memory);
1285 has_resources = true;
1286 }
1287
1288 let device_rules = self.build_device_cgroup_rules(spec, None)?;
1290 if !device_rules.is_empty() {
1291 resources_builder = resources_builder.devices(device_rules);
1292 has_resources = true;
1293 }
1294
1295 if has_resources {
1296 let resources = resources_builder
1297 .build()
1298 .map_err(|e| AgentError::InvalidSpec(format!("failed to build resources: {e}")))?;
1299 Ok(Some(resources))
1300 } else {
1301 Ok(None)
1302 }
1303 }
1304
1305 #[allow(clippy::unused_self, clippy::too_many_lines)]
1307 fn build_device_cgroup_rules(
1308 &self,
1309 spec: &ServiceSpec,
1310 _gpu_indices: Option<&[u32]>,
1311 ) -> Result<Vec<oci_spec::runtime::LinuxDeviceCgroup>> {
1312 let mut rules = Vec::new();
1313
1314 if spec.privileged {
1315 let rule = LinuxDeviceCgroupBuilder::default()
1317 .allow(true)
1318 .access("rwm".to_string())
1319 .build()
1320 .map_err(|e| {
1321 AgentError::InvalidSpec(format!("failed to build device cgroup rule: {e}"))
1322 })?;
1323 rules.push(rule);
1324 } else {
1325 let deny_all = LinuxDeviceCgroupBuilder::default()
1327 .allow(false)
1328 .access("rwm".to_string())
1329 .build()
1330 .map_err(|e| AgentError::InvalidSpec(format!("failed to build deny rule: {e}")))?;
1331 rules.push(deny_all);
1332
1333 let standard_char_devices = [
1336 (1, 3, "rwm"), (1, 5, "rwm"), (1, 7, "rwm"), (1, 8, "rwm"), (1, 9, "rwm"), (5, 0, "rwm"), (5, 1, "rwm"), (5, 2, "rwm"), (136, -1, "rwm"), ];
1346
1347 for (major, minor, access) in standard_char_devices {
1348 let mut builder = LinuxDeviceCgroupBuilder::default()
1349 .allow(true)
1350 .typ(LinuxDeviceType::C)
1351 .major(i64::from(major))
1352 .access(access.to_string());
1353
1354 if minor >= 0 {
1355 builder = builder.minor(i64::from(minor));
1356 }
1357
1358 let rule = builder.build().map_err(|e| {
1359 AgentError::InvalidSpec(format!("failed to build char device rule: {e}"))
1360 })?;
1361 rules.push(rule);
1362 }
1363
1364 for device in &spec.devices {
1366 if let Ok((major, minor)) = get_device_major_minor(&device.path) {
1367 let dev_type = get_device_type(&device.path).unwrap_or(LinuxDeviceType::C);
1368
1369 let mut access = String::new();
1371 if device.read {
1372 access.push('r');
1373 }
1374 if device.write {
1375 access.push('w');
1376 }
1377 if device.mknod {
1378 access.push('m');
1379 }
1380 if access.is_empty() {
1381 access = "rw".to_string();
1382 }
1383
1384 let rule = LinuxDeviceCgroupBuilder::default()
1385 .allow(true)
1386 .typ(dev_type)
1387 .major(major)
1388 .minor(minor)
1389 .access(access)
1390 .build()
1391 .map_err(|e| {
1392 AgentError::InvalidSpec(format!(
1393 "failed to build device rule for {}: {}",
1394 device.path, e
1395 ))
1396 })?;
1397 rules.push(rule);
1398 } else {
1399 tracing::warn!("Failed to get device info for {}, skipping", device.path);
1400 }
1401 }
1402
1403 if let Some(ref gpu) = spec.resources.gpu {
1405 match gpu.vendor.as_str() {
1406 "nvidia" => {
1407 let rule = LinuxDeviceCgroupBuilder::default()
1409 .allow(true)
1410 .typ(LinuxDeviceType::C)
1411 .major(195i64)
1412 .access("rwm".to_string())
1413 .build()
1414 .map_err(|e| {
1415 AgentError::InvalidSpec(format!(
1416 "failed to build GPU cgroup rule: {e}"
1417 ))
1418 })?;
1419 rules.push(rule);
1420
1421 let uvm_rule = LinuxDeviceCgroupBuilder::default()
1423 .allow(true)
1424 .typ(LinuxDeviceType::C)
1425 .major(510i64)
1426 .access("rwm".to_string())
1427 .build()
1428 .map_err(|e| {
1429 AgentError::InvalidSpec(format!(
1430 "failed to build GPU UVM cgroup rule: {e}"
1431 ))
1432 })?;
1433 rules.push(uvm_rule);
1434 }
1435 "amd" => {
1436 let dri_rule = LinuxDeviceCgroupBuilder::default()
1438 .allow(true)
1439 .typ(LinuxDeviceType::C)
1440 .major(226i64)
1441 .access("rwm".to_string())
1442 .build()
1443 .map_err(|e| {
1444 AgentError::InvalidSpec(format!(
1445 "failed to build AMD DRI cgroup rule: {e}"
1446 ))
1447 })?;
1448 rules.push(dri_rule);
1449
1450 let kfd_rule = LinuxDeviceCgroupBuilder::default()
1452 .allow(true)
1453 .typ(LinuxDeviceType::C)
1454 .major(234i64)
1455 .access("rwm".to_string())
1456 .build()
1457 .map_err(|e| {
1458 AgentError::InvalidSpec(format!(
1459 "failed to build AMD KFD cgroup rule: {e}"
1460 ))
1461 })?;
1462 rules.push(kfd_rule);
1463 }
1464 "intel" => {
1465 let dri_rule = LinuxDeviceCgroupBuilder::default()
1467 .allow(true)
1468 .typ(LinuxDeviceType::C)
1469 .major(226i64)
1470 .access("rwm".to_string())
1471 .build()
1472 .map_err(|e| {
1473 AgentError::InvalidSpec(format!(
1474 "failed to build Intel DRI cgroup rule: {e}"
1475 ))
1476 })?;
1477 rules.push(dri_rule);
1478 }
1479 other => {
1480 tracing::warn!(
1482 vendor = %other,
1483 "Unknown GPU vendor, allowing DRI devices (major 226)"
1484 );
1485 let dri_rule = LinuxDeviceCgroupBuilder::default()
1486 .allow(true)
1487 .typ(LinuxDeviceType::C)
1488 .major(226i64)
1489 .access("rwm".to_string())
1490 .build()
1491 .map_err(|e| {
1492 AgentError::InvalidSpec(format!(
1493 "failed to build GPU DRI cgroup rule: {e}"
1494 ))
1495 })?;
1496 rules.push(dri_rule);
1497 }
1498 }
1499 }
1500 }
1501
1502 Ok(rules)
1503 }
1504
1505 #[allow(clippy::unused_self, clippy::too_many_lines)]
1507 fn build_devices(
1508 &self,
1509 spec: &ServiceSpec,
1510 gpu_indices: Option<&[u32]>,
1511 ) -> Result<Vec<oci_spec::runtime::LinuxDevice>> {
1512 let mut devices = Vec::new();
1513
1514 for device in &spec.devices {
1515 if let Ok((major, minor)) = get_device_major_minor(&device.path) {
1516 let dev_type = get_device_type(&device.path).unwrap_or(LinuxDeviceType::C);
1517
1518 let linux_device = LinuxDeviceBuilder::default()
1519 .path(device.path.clone())
1520 .typ(dev_type)
1521 .major(major)
1522 .minor(minor)
1523 .file_mode(0o666u32)
1524 .uid(0u32)
1525 .gid(0u32)
1526 .build()
1527 .map_err(|e| {
1528 AgentError::InvalidSpec(format!(
1529 "failed to build device {}: {}",
1530 device.path, e
1531 ))
1532 })?;
1533
1534 devices.push(linux_device);
1535 }
1536 }
1537
1538 if let Some(ref gpu) = spec.resources.gpu {
1540 let indices: Vec<u32> =
1541 gpu_indices.map_or_else(|| (0..gpu.count).collect(), <[u32]>::to_vec);
1542
1543 match gpu.vendor.as_str() {
1544 "nvidia" => {
1545 let always_devices =
1547 ["/dev/nvidiactl", "/dev/nvidia-uvm", "/dev/nvidia-uvm-tools"];
1548 for dev_path in &always_devices {
1549 if let Ok((major, minor)) = get_device_major_minor(dev_path) {
1550 let dev_type = get_device_type(dev_path).unwrap_or(LinuxDeviceType::C);
1551 let linux_device = LinuxDeviceBuilder::default()
1552 .path((*dev_path).to_string())
1553 .typ(dev_type)
1554 .major(major)
1555 .minor(minor)
1556 .file_mode(0o666u32)
1557 .uid(0u32)
1558 .gid(0u32)
1559 .build()
1560 .map_err(|e| {
1561 AgentError::InvalidSpec(format!(
1562 "failed to build GPU device {dev_path}: {e}"
1563 ))
1564 })?;
1565 devices.push(linux_device);
1566 } else {
1567 tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1568 }
1569 }
1570
1571 for i in &indices {
1573 let dev_path = format!("/dev/nvidia{i}");
1574 if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1575 let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1576 let linux_device = LinuxDeviceBuilder::default()
1577 .path(dev_path.clone())
1578 .typ(dev_type)
1579 .major(major)
1580 .minor(minor)
1581 .file_mode(0o666u32)
1582 .uid(0u32)
1583 .gid(0u32)
1584 .build()
1585 .map_err(|e| {
1586 AgentError::InvalidSpec(format!(
1587 "failed to build GPU device {dev_path}: {e}"
1588 ))
1589 })?;
1590 devices.push(linux_device);
1591 } else {
1592 tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1593 }
1594 }
1595 }
1596 "amd" => {
1597 let amd_always_devices = ["/dev/kfd"];
1599 for dev_path in &amd_always_devices {
1600 if let Ok((major, minor)) = get_device_major_minor(dev_path) {
1601 let dev_type = get_device_type(dev_path).unwrap_or(LinuxDeviceType::C);
1602 let linux_device = LinuxDeviceBuilder::default()
1603 .path((*dev_path).to_string())
1604 .typ(dev_type)
1605 .major(major)
1606 .minor(minor)
1607 .file_mode(0o666u32)
1608 .uid(0u32)
1609 .gid(0u32)
1610 .build()
1611 .map_err(|e| {
1612 AgentError::InvalidSpec(format!(
1613 "failed to build GPU device {dev_path}: {e}"
1614 ))
1615 })?;
1616 devices.push(linux_device);
1617 } else {
1618 tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1619 }
1620 }
1621
1622 for i in &indices {
1624 let dev_path = format!("/dev/dri/renderD{}", 128 + i);
1625 if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1626 let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1627 let linux_device = LinuxDeviceBuilder::default()
1628 .path(dev_path.clone())
1629 .typ(dev_type)
1630 .major(major)
1631 .minor(minor)
1632 .file_mode(0o666u32)
1633 .uid(0u32)
1634 .gid(0u32)
1635 .build()
1636 .map_err(|e| {
1637 AgentError::InvalidSpec(format!(
1638 "failed to build GPU device {dev_path}: {e}"
1639 ))
1640 })?;
1641 devices.push(linux_device);
1642 } else {
1643 tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1644 }
1645 }
1646
1647 for i in &indices {
1649 let dev_path = format!("/dev/dri/card{i}");
1650 if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1651 let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1652 let linux_device = LinuxDeviceBuilder::default()
1653 .path(dev_path.clone())
1654 .typ(dev_type)
1655 .major(major)
1656 .minor(minor)
1657 .file_mode(0o666u32)
1658 .uid(0u32)
1659 .gid(0u32)
1660 .build()
1661 .map_err(|e| {
1662 AgentError::InvalidSpec(format!(
1663 "failed to build GPU device {dev_path}: {e}"
1664 ))
1665 })?;
1666 devices.push(linux_device);
1667 } else {
1668 tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1669 }
1670 }
1671 }
1672 "intel" => {
1673 for i in &indices {
1675 let dev_path = format!("/dev/dri/renderD{}", 128 + i);
1676 if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1677 let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1678 let linux_device = LinuxDeviceBuilder::default()
1679 .path(dev_path.clone())
1680 .typ(dev_type)
1681 .major(major)
1682 .minor(minor)
1683 .file_mode(0o666u32)
1684 .uid(0u32)
1685 .gid(0u32)
1686 .build()
1687 .map_err(|e| {
1688 AgentError::InvalidSpec(format!(
1689 "failed to build GPU device {dev_path}: {e}"
1690 ))
1691 })?;
1692 devices.push(linux_device);
1693 } else {
1694 tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1695 }
1696 }
1697
1698 for i in &indices {
1700 let dev_path = format!("/dev/dri/card{i}");
1701 if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1702 let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1703 let linux_device = LinuxDeviceBuilder::default()
1704 .path(dev_path.clone())
1705 .typ(dev_type)
1706 .major(major)
1707 .minor(minor)
1708 .file_mode(0o666u32)
1709 .uid(0u32)
1710 .gid(0u32)
1711 .build()
1712 .map_err(|e| {
1713 AgentError::InvalidSpec(format!(
1714 "failed to build GPU device {dev_path}: {e}"
1715 ))
1716 })?;
1717 devices.push(linux_device);
1718 } else {
1719 tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1720 }
1721 }
1722 }
1723 other => {
1724 tracing::warn!(
1726 vendor = %other,
1727 "Unknown GPU vendor, attempting DRI device passthrough"
1728 );
1729 for i in &indices {
1730 let dev_path = format!("/dev/dri/renderD{}", 128 + i);
1731 if let Ok((major, minor)) = get_device_major_minor(&dev_path) {
1732 let dev_type = get_device_type(&dev_path).unwrap_or(LinuxDeviceType::C);
1733 let linux_device = LinuxDeviceBuilder::default()
1734 .path(dev_path.clone())
1735 .typ(dev_type)
1736 .major(major)
1737 .minor(minor)
1738 .file_mode(0o666u32)
1739 .uid(0u32)
1740 .gid(0u32)
1741 .build()
1742 .map_err(|e| {
1743 AgentError::InvalidSpec(format!(
1744 "failed to build GPU device {dev_path}: {e}"
1745 ))
1746 })?;
1747 devices.push(linux_device);
1748 } else {
1749 tracing::warn!("GPU device {} not found on host, skipping", dev_path);
1750 }
1751 }
1752 }
1753 }
1754 }
1755
1756 Ok(devices)
1757 }
1758
1759 pub async fn write_config(
1771 &self,
1772 container_id: &ContainerId,
1773 spec: &ServiceSpec,
1774 ) -> Result<PathBuf> {
1775 let oci_spec = self
1777 .build_oci_spec(container_id, spec, &self.volume_paths)
1778 .await?;
1779
1780 let config_path = self.bundle_dir.join("config.json");
1782 let config_json =
1783 serde_json::to_string_pretty(&oci_spec).map_err(|e| AgentError::CreateFailed {
1784 id: container_id.to_string(),
1785 reason: format!("failed to serialize OCI spec: {e}"),
1786 })?;
1787
1788 fs::write(&config_path, config_json)
1789 .await
1790 .map_err(|e| AgentError::CreateFailed {
1791 id: container_id.to_string(),
1792 reason: format!("failed to write config.json: {e}"),
1793 })?;
1794
1795 tracing::debug!(
1796 "Wrote OCI config.json at {} for container {}",
1797 config_path.display(),
1798 container_id
1799 );
1800
1801 Ok(self.bundle_dir.clone())
1802 }
1803
1804 fn resolve_command_from_spec(
1813 spec: &ServiceSpec,
1814 image_config: Option<&zlayer_registry::ImageConfig>,
1815 ) -> Vec<String> {
1816 let mut args = Vec::new();
1817
1818 match (&spec.command.entrypoint, &spec.command.args) {
1819 (Some(entrypoint), Some(cmd_args)) => {
1820 args.extend_from_slice(entrypoint);
1821 args.extend_from_slice(cmd_args);
1822 }
1823 (Some(entrypoint), None) => {
1824 args.extend_from_slice(entrypoint);
1825 }
1826 (None, Some(cmd_args)) if !cmd_args.is_empty() => {
1827 args.extend_from_slice(cmd_args);
1828 }
1829 _ => {
1830 if let Some(img_cmd) =
1832 image_config.and_then(zlayer_registry::ImageConfig::full_command)
1833 {
1834 if img_cmd.is_empty() {
1835 args.push("/bin/sh".to_string());
1836 } else {
1837 args.extend(img_cmd);
1838 }
1839 } else {
1840 args.push("/bin/sh".to_string());
1841 }
1842 }
1843 }
1844
1845 args
1846 }
1847
1848 pub async fn cleanup(&self) -> Result<()> {
1855 if self.bundle_dir.exists() {
1856 fs::remove_dir_all(&self.bundle_dir)
1857 .await
1858 .map_err(|e| AgentError::CreateFailed {
1859 id: "cleanup".to_string(),
1860 reason: format!(
1861 "failed to remove bundle directory {}: {}",
1862 self.bundle_dir.display(),
1863 e
1864 ),
1865 })?;
1866 }
1867 Ok(())
1868 }
1869}
1870
1871pub async fn create_bundle(
1878 container_id: &ContainerId,
1879 spec: &ServiceSpec,
1880 rootfs_path: Option<PathBuf>,
1881) -> Result<PathBuf> {
1882 let mut builder =
1883 BundleBuilder::for_container(container_id).with_host_network(spec.host_network);
1884
1885 if let Some(rootfs) = rootfs_path {
1886 builder = builder.with_rootfs(rootfs);
1887 }
1888
1889 builder.build(container_id, spec).await
1890}
1891
1892pub async fn cleanup_bundle(container_id: &ContainerId) -> Result<()> {
1899 let builder = BundleBuilder::for_container(container_id);
1900 builder.cleanup().await
1901}
1902
1903#[cfg(test)]
1904mod tests {
1905 use super::*;
1906 use zlayer_spec::*;
1907
1908 fn mock_spec() -> ServiceSpec {
1909 serde_yaml::from_str::<DeploymentSpec>(
1910 r"
1911version: v1
1912deployment: test
1913services:
1914 test:
1915 rtype: service
1916 image:
1917 name: test:latest
1918 endpoints:
1919 - name: http
1920 protocol: http
1921 port: 8080
1922",
1923 )
1924 .unwrap()
1925 .services
1926 .remove("test")
1927 .unwrap()
1928 }
1929
1930 fn mock_spec_with_resources() -> ServiceSpec {
1931 serde_yaml::from_str::<DeploymentSpec>(
1932 r"
1933version: v1
1934deployment: test
1935services:
1936 test:
1937 rtype: service
1938 image:
1939 name: test:latest
1940 resources:
1941 cpu: 0.5
1942 memory: 512Mi
1943 env:
1944 MY_VAR: my_value
1945 ANOTHER: value2
1946 endpoints:
1947 - name: http
1948 protocol: http
1949 port: 8080
1950",
1951 )
1952 .unwrap()
1953 .services
1954 .remove("test")
1955 .unwrap()
1956 }
1957
1958 fn mock_privileged_spec() -> ServiceSpec {
1959 serde_yaml::from_str::<DeploymentSpec>(
1960 r"
1961version: v1
1962deployment: test
1963services:
1964 test:
1965 rtype: service
1966 image:
1967 name: test:latest
1968 privileged: true
1969 endpoints:
1970 - name: http
1971 protocol: http
1972 port: 8080
1973",
1974 )
1975 .unwrap()
1976 .services
1977 .remove("test")
1978 .unwrap()
1979 }
1980
1981 #[test]
1982 fn test_parse_memory_string() {
1983 assert_eq!(parse_memory_string("512Mi").unwrap(), 512 * 1024 * 1024);
1984 assert_eq!(parse_memory_string("1Gi").unwrap(), 1024 * 1024 * 1024);
1985 assert_eq!(parse_memory_string("2G").unwrap(), 2 * 1000 * 1000 * 1000);
1986 assert_eq!(parse_memory_string("1024").unwrap(), 1024);
1987 assert_eq!(parse_memory_string("512Ki").unwrap(), 512 * 1024);
1988 }
1989
1990 #[test]
1991 fn test_parse_memory_string_errors() {
1992 assert!(parse_memory_string("").is_err());
1993 assert!(parse_memory_string("abc").is_err());
1994 assert!(parse_memory_string("12.5Mi").is_err());
1995 }
1996
1997 #[test]
1998 fn test_bundle_builder_new() {
1999 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2000 assert_eq!(builder.bundle_dir(), Path::new("/tmp/test-bundle"));
2001 assert!(builder.rootfs_path.is_none());
2002 }
2003
2004 #[test]
2005 fn test_bundle_builder_for_container() {
2006 let dirs = zlayer_paths::ZLayerDirs::system_default();
2007 let id = ContainerId {
2008 service: "myservice".to_string(),
2009 replica: 1,
2010 };
2011 let builder = BundleBuilder::for_container(&id);
2012 assert_eq!(builder.bundle_dir(), dirs.bundles().join("myservice-rep-1"));
2013 }
2014
2015 #[test]
2016 fn test_bundle_builder_with_rootfs() {
2017 let dirs = zlayer_paths::ZLayerDirs::system_default();
2018 let builder = BundleBuilder::new("/tmp/test-bundle".into())
2019 .with_rootfs(dirs.rootfs().join("myimage"));
2020 assert_eq!(builder.rootfs_path, Some(dirs.rootfs().join("myimage")));
2021 }
2022
2023 #[tokio::test]
2024 async fn test_build_oci_spec_basic() {
2025 let id = ContainerId {
2026 service: "test".to_string(),
2027 replica: 1,
2028 };
2029 let spec = mock_spec();
2030 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2031
2032 let oci_spec = builder
2033 .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2034 .await
2035 .unwrap();
2036
2037 assert_eq!(oci_spec.version(), "1.0.2");
2038 assert!(oci_spec.root().is_some());
2039 assert_eq!(
2040 oci_spec.root().as_ref().unwrap().path(),
2041 std::path::Path::new("rootfs")
2042 );
2043 assert!(oci_spec.process().is_some());
2044 assert!(oci_spec.linux().is_some());
2045 }
2046
2047 #[tokio::test]
2048 async fn test_build_oci_spec_with_resources() {
2049 let id = ContainerId {
2050 service: "test".to_string(),
2051 replica: 1,
2052 };
2053 let spec = mock_spec_with_resources();
2054 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2055
2056 let oci_spec = builder
2057 .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2058 .await
2059 .unwrap();
2060
2061 let linux = oci_spec.linux().as_ref().unwrap();
2063 let resources = linux.resources().as_ref().unwrap();
2064
2065 let cpu = resources.cpu().as_ref().unwrap();
2067 assert_eq!(cpu.quota(), Some(50_000)); assert_eq!(cpu.period(), Some(100_000));
2069
2070 let memory = resources.memory().as_ref().unwrap();
2072 assert_eq!(memory.limit(), Some(512 * 1024 * 1024)); }
2074
2075 #[tokio::test]
2076 async fn test_build_oci_spec_privileged() {
2077 let id = ContainerId {
2078 service: "test".to_string(),
2079 replica: 1,
2080 };
2081 let spec = mock_privileged_spec();
2082 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2083
2084 let oci_spec = builder
2085 .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2086 .await
2087 .unwrap();
2088
2089 let process = oci_spec.process().as_ref().unwrap();
2091 let caps = process.capabilities().as_ref().unwrap();
2092 let bounding = caps.bounding().as_ref().unwrap();
2093
2094 assert!(bounding.contains(&Capability::SysAdmin));
2096 assert!(bounding.contains(&Capability::NetAdmin));
2097
2098 let linux = oci_spec.linux().as_ref().unwrap();
2100 assert!(
2101 linux.masked_paths().is_none() || linux.masked_paths().as_ref().unwrap().is_empty()
2102 );
2103 }
2104
2105 #[tokio::test]
2106 async fn test_build_oci_spec_environment() {
2107 let id = ContainerId {
2108 service: "test".to_string(),
2109 replica: 1,
2110 };
2111 let spec = mock_spec_with_resources();
2112 let builder = BundleBuilder::new("/tmp/test-bundle".into())
2113 .with_env("EXTRA_VAR".to_string(), "extra_value".to_string());
2114
2115 let oci_spec = builder
2116 .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2117 .await
2118 .unwrap();
2119
2120 let process = oci_spec.process().as_ref().unwrap();
2121 let env = process.env().as_ref().unwrap();
2122
2123 assert!(env.iter().any(|e| e == "MY_VAR=my_value"));
2125 assert!(env.iter().any(|e| e == "ANOTHER=value2"));
2126 assert!(env.iter().any(|e| e == "EXTRA_VAR=extra_value"));
2128 assert!(env.iter().any(|e| e.starts_with("PATH=")));
2130 }
2131
2132 #[tokio::test]
2133 async fn test_build_namespaces() {
2134 let id = ContainerId {
2135 service: "test".to_string(),
2136 replica: 1,
2137 };
2138 let spec = mock_spec();
2139 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2140
2141 let oci_spec = builder
2142 .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2143 .await
2144 .unwrap();
2145 let linux = oci_spec.linux().as_ref().unwrap();
2146 let namespaces = linux.namespaces().as_ref().unwrap();
2147
2148 let namespace_types: Vec<_> = namespaces
2150 .iter()
2151 .map(oci_spec::runtime::LinuxNamespace::typ)
2152 .collect();
2153 assert!(namespace_types.contains(&LinuxNamespaceType::Pid));
2154 assert!(namespace_types.contains(&LinuxNamespaceType::Ipc));
2155 assert!(namespace_types.contains(&LinuxNamespaceType::Uts));
2156 assert!(namespace_types.contains(&LinuxNamespaceType::Mount));
2157 assert!(namespace_types.contains(&LinuxNamespaceType::Network));
2158 }
2159
2160 #[tokio::test]
2161 async fn test_build_namespaces_host_network() {
2162 let id = ContainerId {
2163 service: "test".to_string(),
2164 replica: 1,
2165 };
2166 let spec = mock_spec();
2167 let builder = BundleBuilder::new("/tmp/test-bundle".into()).with_host_network(true);
2168
2169 let oci_spec = builder
2170 .build_oci_spec(&id, &spec, &std::collections::HashMap::new())
2171 .await
2172 .unwrap();
2173 let linux = oci_spec.linux().as_ref().unwrap();
2174 let namespaces = linux.namespaces().as_ref().unwrap();
2175
2176 let namespace_types: Vec<_> = namespaces
2178 .iter()
2179 .map(oci_spec::runtime::LinuxNamespace::typ)
2180 .collect();
2181 assert!(namespace_types.contains(&LinuxNamespaceType::Pid));
2182 assert!(namespace_types.contains(&LinuxNamespaceType::Ipc));
2183 assert!(namespace_types.contains(&LinuxNamespaceType::Uts));
2184 assert!(namespace_types.contains(&LinuxNamespaceType::Mount));
2185 assert!(
2186 !namespace_types.contains(&LinuxNamespaceType::Network),
2187 "Network namespace should NOT be present in host_network mode"
2188 );
2189 }
2190
2191 #[test]
2192 fn test_build_default_mounts() {
2193 let spec = mock_spec();
2194 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2195
2196 let mounts = builder.build_default_mounts(&spec).unwrap();
2197
2198 let mount_destinations: Vec<_> = mounts
2200 .iter()
2201 .map(|m| m.destination().to_string_lossy().to_string())
2202 .collect();
2203 assert!(mount_destinations.contains(&"/proc".to_string()));
2204 assert!(mount_destinations.contains(&"/dev".to_string()));
2205 assert!(mount_destinations.contains(&"/dev/pts".to_string()));
2206 assert!(mount_destinations.contains(&"/dev/shm".to_string()));
2207 assert!(mount_destinations.contains(&"/sys".to_string()));
2208 }
2209
2210 #[test]
2211 fn test_build_storage_mounts_bind() {
2212 let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2213 r"
2214version: v1
2215deployment: test
2216services:
2217 test:
2218 image:
2219 name: test:latest
2220 storage:
2221 - type: bind
2222 source: /host/data
2223 target: /app/data
2224 readonly: true
2225",
2226 )
2227 .unwrap()
2228 .services
2229 .remove("test")
2230 .unwrap();
2231
2232 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2233 let volume_paths = std::collections::HashMap::new();
2234
2235 let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
2236
2237 assert_eq!(mounts.len(), 1);
2238 assert_eq!(mounts[0].destination().to_string_lossy(), "/app/data");
2239 assert_eq!(
2240 mounts[0]
2241 .source()
2242 .as_ref()
2243 .map(|s| s.to_string_lossy().to_string()),
2244 Some("/host/data".to_string())
2245 );
2246 let options = mounts[0].options().as_ref().unwrap();
2247 assert!(options.contains(&"rbind".to_string()));
2248 assert!(options.contains(&"ro".to_string()));
2249 }
2250
2251 #[test]
2252 fn test_build_storage_mounts_named() {
2253 let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2254 r"
2255version: v1
2256deployment: test
2257services:
2258 test:
2259 image:
2260 name: test:latest
2261 storage:
2262 - type: named
2263 name: my-volume
2264 target: /app/data
2265",
2266 )
2267 .unwrap()
2268 .services
2269 .remove("test")
2270 .unwrap();
2271
2272 let dirs = zlayer_paths::ZLayerDirs::system_default();
2273 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2274 let mut volume_paths = std::collections::HashMap::new();
2275 volume_paths.insert("my-volume".to_string(), dirs.volumes().join("my-volume"));
2276
2277 let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
2278
2279 assert_eq!(mounts.len(), 1);
2280 assert_eq!(mounts[0].destination().to_string_lossy(), "/app/data");
2281 assert_eq!(
2282 mounts[0]
2283 .source()
2284 .as_ref()
2285 .map(|s| s.to_string_lossy().to_string()),
2286 Some(
2287 dirs.volumes()
2288 .join("my-volume")
2289 .to_string_lossy()
2290 .into_owned()
2291 )
2292 );
2293 }
2294
2295 #[test]
2296 fn test_build_storage_mounts_tmpfs() {
2297 let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2298 r"
2299version: v1
2300deployment: test
2301services:
2302 test:
2303 image:
2304 name: test:latest
2305 storage:
2306 - type: tmpfs
2307 target: /app/tmp
2308 size: 256Mi
2309 mode: 1777
2310",
2311 )
2312 .unwrap()
2313 .services
2314 .remove("test")
2315 .unwrap();
2316
2317 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2318 let volume_paths = std::collections::HashMap::new();
2319
2320 let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
2321
2322 assert_eq!(mounts.len(), 1);
2323 assert_eq!(mounts[0].destination().to_string_lossy(), "/app/tmp");
2324 assert_eq!(mounts[0].typ().as_ref().map(String::as_str), Some("tmpfs"));
2325 let options = mounts[0].options().as_ref().unwrap();
2326 assert!(options.iter().any(|o| o.starts_with("size=")));
2327 assert!(options.iter().any(|o| o.starts_with("mode=")));
2328 }
2329
2330 #[test]
2331 fn test_build_storage_mounts_multiple() {
2332 let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2333 r"
2334version: v1
2335deployment: test
2336services:
2337 test:
2338 image:
2339 name: test:latest
2340 storage:
2341 - type: bind
2342 source: /etc/config
2343 target: /app/config
2344 readonly: true
2345 - type: named
2346 name: app-data
2347 target: /app/data
2348 - type: tmpfs
2349 target: /app/tmp
2350",
2351 )
2352 .unwrap()
2353 .services
2354 .remove("test")
2355 .unwrap();
2356
2357 let dirs = zlayer_paths::ZLayerDirs::system_default();
2358 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2359 let mut volume_paths = std::collections::HashMap::new();
2360 volume_paths.insert("app-data".to_string(), dirs.volumes().join("app-data"));
2361
2362 let mounts = builder.build_storage_mounts(&spec, &volume_paths).unwrap();
2363
2364 assert_eq!(mounts.len(), 3);
2365
2366 let destinations: Vec<String> = mounts
2368 .iter()
2369 .map(|m| m.destination().to_string_lossy().to_string())
2370 .collect();
2371 assert!(destinations.contains(&"/app/config".to_string()));
2372 assert!(destinations.contains(&"/app/data".to_string()));
2373 assert!(destinations.contains(&"/app/tmp".to_string()));
2374 }
2375
2376 #[test]
2377 fn test_build_storage_mounts_anonymous_missing_path() {
2378 let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2379 r"
2380version: v1
2381deployment: test
2382services:
2383 test:
2384 image:
2385 name: test:latest
2386 storage:
2387 - type: anonymous
2388 target: /app/cache
2389",
2390 )
2391 .unwrap()
2392 .services
2393 .remove("test")
2394 .unwrap();
2395
2396 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2397 let volume_paths = std::collections::HashMap::new(); let result = builder.build_storage_mounts(&spec, &volume_paths);
2400
2401 assert!(result.is_err());
2403 }
2404
2405 #[tokio::test]
2406 async fn test_oci_spec_includes_storage_mounts() {
2407 let id = ContainerId {
2408 service: "test".to_string(),
2409 replica: 1,
2410 };
2411 let spec = serde_yaml::from_str::<zlayer_spec::DeploymentSpec>(
2412 r"
2413version: v1
2414deployment: test
2415services:
2416 test:
2417 image:
2418 name: test:latest
2419 storage:
2420 - type: bind
2421 source: /host/data
2422 target: /app/data
2423 - type: tmpfs
2424 target: /app/tmp
2425",
2426 )
2427 .unwrap()
2428 .services
2429 .remove("test")
2430 .unwrap();
2431
2432 let builder = BundleBuilder::new("/tmp/test-bundle".into());
2433 let volume_paths = std::collections::HashMap::new();
2434
2435 let oci_spec = builder
2436 .build_oci_spec(&id, &spec, &volume_paths)
2437 .await
2438 .unwrap();
2439
2440 let mounts = oci_spec.mounts().as_ref().unwrap();
2442 let destinations: Vec<String> = mounts
2443 .iter()
2444 .map(|m| m.destination().to_string_lossy().to_string())
2445 .collect();
2446
2447 assert!(destinations.contains(&"/proc".to_string())); assert!(destinations.contains(&"/dev".to_string())); assert!(destinations.contains(&"/app/data".to_string())); assert!(destinations.contains(&"/app/tmp".to_string())); }
2453}