1use crate::container::OciStatus;
2use crate::error::{NucleusError, Result};
3use crate::filesystem::normalize_container_destination;
4use crate::isolation::{IdMapping, NamespaceConfig, UserNamespaceConfig};
5use crate::resources::ResourceLimits;
6use serde::{Deserialize, Serialize};
7use std::collections::{BTreeSet, HashMap};
8use std::fs;
9use std::fs::OpenOptions;
10use std::io::Write;
11use std::os::unix::fs::{OpenOptionsExt, PermissionsExt};
12use std::path::{Path, PathBuf};
13use tracing::{debug, info, warn};
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct OciConfig {
21 #[serde(rename = "ociVersion")]
22 pub oci_version: String,
23
24 pub root: OciRoot,
25 pub process: OciProcess,
26 pub hostname: Option<String>,
27 pub mounts: Vec<OciMount>,
28 pub linux: Option<OciLinux>,
29 #[serde(default, skip_serializing_if = "Option::is_none")]
30 pub hooks: Option<OciHooks>,
31 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
32 pub annotations: HashMap<String, String>,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct OciRoot {
37 pub path: String,
38 pub readonly: bool,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct OciProcess {
43 pub terminal: bool,
44 pub user: OciUser,
45 pub args: Vec<String>,
46 pub env: Vec<String>,
47 pub cwd: String,
48 #[serde(rename = "noNewPrivileges")]
49 pub no_new_privileges: bool,
50 pub capabilities: Option<OciCapabilities>,
51 #[serde(default, skip_serializing_if = "Vec::is_empty")]
52 pub rlimits: Vec<OciRlimit>,
53 #[serde(
54 rename = "consoleSize",
55 default,
56 skip_serializing_if = "Option::is_none"
57 )]
58 pub console_size: Option<OciConsoleSize>,
59 #[serde(
60 rename = "apparmorProfile",
61 default,
62 skip_serializing_if = "Option::is_none"
63 )]
64 pub apparmor_profile: Option<String>,
65 #[serde(
66 rename = "selinuxLabel",
67 default,
68 skip_serializing_if = "Option::is_none"
69 )]
70 pub selinux_label: Option<String>,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct OciUser {
75 pub uid: u32,
76 pub gid: u32,
77 #[serde(skip_serializing_if = "Option::is_none")]
78 pub additional_gids: Option<Vec<u32>>,
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct OciCapabilities {
83 pub bounding: Vec<String>,
84 pub effective: Vec<String>,
85 pub inheritable: Vec<String>,
86 pub permitted: Vec<String>,
87 pub ambient: Vec<String>,
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct OciMount {
92 pub destination: String,
93 pub source: String,
94 #[serde(rename = "type")]
95 pub mount_type: String,
96 pub options: Vec<String>,
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct OciLinux {
101 #[serde(skip_serializing_if = "Option::is_none")]
102 pub namespaces: Option<Vec<OciNamespace>>,
103 #[serde(skip_serializing_if = "Option::is_none")]
104 pub resources: Option<OciResources>,
105 #[serde(rename = "uidMappings", skip_serializing_if = "Vec::is_empty", default)]
106 pub uid_mappings: Vec<OciIdMapping>,
107 #[serde(rename = "gidMappings", skip_serializing_if = "Vec::is_empty", default)]
108 pub gid_mappings: Vec<OciIdMapping>,
109 #[serde(rename = "maskedPaths", skip_serializing_if = "Vec::is_empty", default)]
110 pub masked_paths: Vec<String>,
111 #[serde(
112 rename = "readonlyPaths",
113 skip_serializing_if = "Vec::is_empty",
114 default
115 )]
116 pub readonly_paths: Vec<String>,
117 #[serde(default, skip_serializing_if = "Vec::is_empty")]
118 pub devices: Vec<OciDevice>,
119 #[serde(default, skip_serializing_if = "Option::is_none")]
120 pub seccomp: Option<OciSeccomp>,
121 #[serde(
122 rename = "rootfsPropagation",
123 default,
124 skip_serializing_if = "Option::is_none"
125 )]
126 pub rootfs_propagation: Option<String>,
127 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
128 pub sysctl: HashMap<String, String>,
129 #[serde(
130 rename = "cgroupsPath",
131 default,
132 skip_serializing_if = "Option::is_none"
133 )]
134 pub cgroups_path: Option<String>,
135 #[serde(rename = "intelRdt", default, skip_serializing_if = "Option::is_none")]
136 pub intel_rdt: Option<OciIntelRdt>,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct OciNamespace {
141 #[serde(rename = "type")]
142 pub namespace_type: String,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
146pub struct OciIdMapping {
147 #[serde(rename = "containerID")]
148 pub container_id: u32,
149 #[serde(rename = "hostID")]
150 pub host_id: u32,
151 pub size: u32,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct OciResources {
156 #[serde(skip_serializing_if = "Option::is_none")]
157 pub memory: Option<OciMemory>,
158 #[serde(skip_serializing_if = "Option::is_none")]
159 pub cpu: Option<OciCpu>,
160 #[serde(skip_serializing_if = "Option::is_none")]
161 pub pids: Option<OciPids>,
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct OciMemory {
166 #[serde(skip_serializing_if = "Option::is_none")]
167 pub limit: Option<i64>,
168}
169
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct OciCpu {
172 #[serde(skip_serializing_if = "Option::is_none")]
173 pub quota: Option<i64>,
174 #[serde(skip_serializing_if = "Option::is_none")]
175 pub period: Option<u64>,
176}
177
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct OciPids {
180 pub limit: i64,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct OciRlimit {
188 #[serde(rename = "type")]
190 pub limit_type: String,
191 pub hard: u64,
193 pub soft: u64,
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct OciConsoleSize {
200 pub height: u32,
201 pub width: u32,
202}
203
204#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct OciDevice {
209 #[serde(rename = "type")]
211 pub device_type: String,
212 pub path: String,
214 #[serde(skip_serializing_if = "Option::is_none")]
216 pub major: Option<i64>,
217 #[serde(skip_serializing_if = "Option::is_none")]
219 pub minor: Option<i64>,
220 #[serde(rename = "fileMode", skip_serializing_if = "Option::is_none")]
222 pub file_mode: Option<u32>,
223 #[serde(skip_serializing_if = "Option::is_none")]
225 pub uid: Option<u32>,
226 #[serde(skip_serializing_if = "Option::is_none")]
228 pub gid: Option<u32>,
229}
230
231#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct OciSeccomp {
236 #[serde(rename = "defaultAction")]
238 pub default_action: String,
239 #[serde(default, skip_serializing_if = "Vec::is_empty")]
241 pub architectures: Vec<String>,
242 #[serde(default, skip_serializing_if = "Vec::is_empty")]
244 pub syscalls: Vec<OciSeccompSyscall>,
245}
246
247#[derive(Debug, Clone, Serialize, Deserialize)]
249pub struct OciSeccompSyscall {
250 pub names: Vec<String>,
252 pub action: String,
254 #[serde(default, skip_serializing_if = "Vec::is_empty")]
256 pub args: Vec<OciSeccompArg>,
257}
258
259#[derive(Debug, Clone, Serialize, Deserialize)]
261pub struct OciSeccompArg {
262 pub index: u32,
264 pub value: u64,
266 #[serde(rename = "valueTwo", default, skip_serializing_if = "is_zero")]
268 pub value_two: u64,
269 pub op: String,
271}
272
273fn is_zero(v: &u64) -> bool {
274 *v == 0
275}
276
277#[derive(Debug, Clone, Serialize, Deserialize)]
281pub struct OciIntelRdt {
282 #[serde(rename = "closID", default, skip_serializing_if = "Option::is_none")]
284 pub clos_id: Option<String>,
285 #[serde(
287 rename = "l3CacheSchema",
288 default,
289 skip_serializing_if = "Option::is_none"
290 )]
291 pub l3_cache_schema: Option<String>,
292 #[serde(
294 rename = "memBwSchema",
295 default,
296 skip_serializing_if = "Option::is_none"
297 )]
298 pub mem_bw_schema: Option<String>,
299}
300
301#[derive(Debug, Clone, Serialize, Deserialize)]
305pub struct OciHook {
306 pub path: String,
308 #[serde(default, skip_serializing_if = "Vec::is_empty")]
310 pub args: Vec<String>,
311 #[serde(default, skip_serializing_if = "Vec::is_empty")]
313 pub env: Vec<String>,
314 #[serde(default, skip_serializing_if = "Option::is_none")]
316 pub timeout: Option<u32>,
317}
318
319#[derive(Debug, Clone, Default, Serialize, Deserialize)]
323pub struct OciHooks {
324 #[serde(
326 rename = "createRuntime",
327 default,
328 skip_serializing_if = "Vec::is_empty"
329 )]
330 pub create_runtime: Vec<OciHook>,
331 #[serde(
333 rename = "createContainer",
334 default,
335 skip_serializing_if = "Vec::is_empty"
336 )]
337 pub create_container: Vec<OciHook>,
338 #[serde(
340 rename = "startContainer",
341 default,
342 skip_serializing_if = "Vec::is_empty"
343 )]
344 pub start_container: Vec<OciHook>,
345 #[serde(default, skip_serializing_if = "Vec::is_empty")]
347 pub poststart: Vec<OciHook>,
348 #[serde(default, skip_serializing_if = "Vec::is_empty")]
350 pub poststop: Vec<OciHook>,
351}
352
353#[derive(Debug, Clone, Serialize)]
357pub struct OciContainerState {
358 #[serde(rename = "ociVersion")]
359 pub oci_version: String,
360 pub id: String,
361 pub status: OciStatus,
362 pub pid: u32,
363 pub bundle: String,
364}
365
366impl OciHooks {
367 pub fn is_empty(&self) -> bool {
369 self.create_runtime.is_empty()
370 && self.create_container.is_empty()
371 && self.start_container.is_empty()
372 && self.poststart.is_empty()
373 && self.poststop.is_empty()
374 }
375
376 pub fn run_hooks(hooks: &[OciHook], state: &OciContainerState, phase: &str) -> Result<()> {
380 let state_json = serde_json::to_string(state).map_err(|e| {
381 NucleusError::HookError(format!(
382 "Failed to serialize container state for hook: {}",
383 e
384 ))
385 })?;
386
387 for (i, hook) in hooks.iter().enumerate() {
388 info!(
389 "Running {} hook [{}/{}]: {}",
390 phase,
391 i + 1,
392 hooks.len(),
393 hook.path
394 );
395 Self::execute_hook(hook, &state_json, phase)?;
396 }
397
398 Ok(())
399 }
400
401 pub fn run_hooks_best_effort(hooks: &[OciHook], state: &OciContainerState, phase: &str) {
406 let state_json = match serde_json::to_string(state) {
407 Ok(json) => json,
408 Err(e) => {
409 warn!(
410 "Failed to serialize container state for {} hooks: {}",
411 phase, e
412 );
413 return;
414 }
415 };
416
417 for (i, hook) in hooks.iter().enumerate() {
418 info!(
419 "Running {} hook [{}/{}]: {}",
420 phase,
421 i + 1,
422 hooks.len(),
423 hook.path
424 );
425 if let Err(e) = Self::execute_hook(hook, &state_json, phase) {
426 warn!("{} hook [{}] failed (continuing): {}", phase, i + 1, e);
427 }
428 }
429 }
430
431 fn execute_hook(hook: &OciHook, state_json: &str, phase: &str) -> Result<()> {
432 #[cfg(not(test))]
433 use std::os::unix::process::CommandExt;
434 use std::process::{Command, Stdio};
435
436 let hook_path = Path::new(&hook.path);
437 if !hook_path.is_absolute() {
438 return Err(NucleusError::HookError(format!(
439 "{} hook path must be absolute: {}",
440 phase, hook.path
441 )));
442 }
443
444 #[cfg(not(test))]
448 {
449 const TRUSTED_HOOK_PREFIXES: &[&str] = &[
450 "/usr/bin/",
451 "/usr/sbin/",
452 "/usr/lib/",
453 "/usr/libexec/",
454 "/usr/local/bin/",
455 "/usr/local/sbin/",
456 "/usr/local/libexec/",
457 "/bin/",
458 "/sbin/",
459 "/nix/store/",
460 "/opt/",
461 ];
462 if !TRUSTED_HOOK_PREFIXES
463 .iter()
464 .any(|prefix| hook.path.starts_with(prefix))
465 {
466 return Err(NucleusError::HookError(format!(
467 "{} hook path '{}' is not under a trusted directory ({:?})",
468 phase, hook.path, TRUSTED_HOOK_PREFIXES
469 )));
470 }
471 }
472
473 match std::fs::symlink_metadata(hook_path) {
477 Ok(meta) if meta.file_type().is_symlink() => {
478 return Err(NucleusError::HookError(format!(
479 "{} hook path is a symlink (refusing to follow): {}",
480 phase, hook.path
481 )));
482 }
483 Err(_) => {
484 return Err(NucleusError::HookError(format!(
485 "{} hook binary not found: {}",
486 phase, hook.path
487 )));
488 }
489 Ok(_) => {}
490 }
491
492 Self::validate_hook_binary(hook_path, phase)?;
497
498 let mut cmd = Command::new(&hook.path);
499 if !hook.args.is_empty() {
500 cmd.args(&hook.args[1..]);
502 }
503
504 if !hook.env.is_empty() {
505 cmd.env_clear();
506 for entry in &hook.env {
507 if let Some((key, value)) = entry.split_once('=') {
508 cmd.env(key, value);
509 }
510 }
511 }
512
513 cmd.stdin(Stdio::piped());
517 cmd.stdout(Stdio::piped());
518 cmd.stderr(Stdio::piped());
519
520 #[cfg(not(test))]
524 unsafe {
525 cmd.pre_exec(|| {
526 if libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0 {
529 return Err(std::io::Error::last_os_error());
530 }
531
532 let rlim_nproc = libc::rlimit {
533 rlim_cur: 1024,
534 rlim_max: 1024,
535 };
536 if libc::setrlimit(libc::RLIMIT_NPROC, &rlim_nproc) != 0 {
537 return Err(std::io::Error::last_os_error());
538 }
539
540 let rlim_nofile = libc::rlimit {
541 rlim_cur: 1024,
542 rlim_max: 1024,
543 };
544 if libc::setrlimit(libc::RLIMIT_NOFILE, &rlim_nofile) != 0 {
545 return Err(std::io::Error::last_os_error());
546 }
547
548 Ok(())
549 });
550 }
551
552 const TEXT_FILE_BUSY_SPAWN_RETRIES: usize = 100;
553 const TEXT_FILE_BUSY_RETRY_DELAY: std::time::Duration =
554 std::time::Duration::from_millis(10);
555
556 let mut text_file_busy_retries = 0;
557 let mut child = loop {
558 match cmd.spawn() {
559 Ok(child) => break child,
560 Err(e)
561 if e.raw_os_error() == Some(libc::ETXTBSY)
562 && text_file_busy_retries < TEXT_FILE_BUSY_SPAWN_RETRIES =>
563 {
564 text_file_busy_retries += 1;
565 debug!(
566 "{} hook {} was busy during spawn; retrying ({}/{})",
567 phase, hook.path, text_file_busy_retries, TEXT_FILE_BUSY_SPAWN_RETRIES
568 );
569 std::thread::sleep(TEXT_FILE_BUSY_RETRY_DELAY);
570 }
571 Err(e) => {
572 return Err(NucleusError::HookError(format!(
573 "Failed to spawn {} hook {}: {}",
574 phase, hook.path, e
575 )));
576 }
577 }
578 };
579
580 if let Some(mut stdin) = child.stdin.take() {
581 use std::io::Write as IoWrite;
582 let _ = stdin.write_all(state_json.as_bytes());
583 }
584
585 let timeout_secs = hook.timeout.unwrap_or(30) as u64;
586 let start = std::time::Instant::now();
587 let timeout = std::time::Duration::from_secs(timeout_secs);
588
589 loop {
590 match child.try_wait() {
591 Ok(Some(status)) => {
592 if status.success() {
593 debug!("{} hook {} completed successfully", phase, hook.path);
594 return Ok(());
595 } else {
596 let stderr = child
597 .stderr
598 .take()
599 .map(|mut e| {
600 let mut buf = String::new();
601 use std::io::Read;
602 let _ = e.read_to_string(&mut buf);
603 buf
604 })
605 .unwrap_or_default();
606 return Err(NucleusError::HookError(format!(
607 "{} hook {} exited with status: {}{}",
608 phase,
609 hook.path,
610 status,
611 if stderr.is_empty() {
612 String::new()
613 } else {
614 format!(" (stderr: {})", stderr.trim())
615 }
616 )));
617 }
618 }
619 Ok(None) => {
620 if start.elapsed() >= timeout {
621 let _ = child.kill();
622 let _ = child.wait();
623 return Err(NucleusError::HookError(format!(
624 "{} hook {} timed out after {}s",
625 phase, hook.path, timeout_secs
626 )));
627 }
628 std::thread::sleep(std::time::Duration::from_millis(50));
629 }
630 Err(e) => {
631 return Err(NucleusError::HookError(format!(
632 "Failed to wait for {} hook {}: {}",
633 phase, hook.path, e
634 )));
635 }
636 }
637 }
638 }
639
640 fn validate_hook_binary(hook_path: &Path, phase: &str) -> Result<()> {
646 let metadata = std::fs::symlink_metadata(hook_path).map_err(|e| {
650 NucleusError::HookError(format!(
651 "Failed to stat {} hook {}: {}",
652 phase,
653 hook_path.display(),
654 e
655 ))
656 })?;
657
658 use std::os::unix::fs::MetadataExt;
659 let mode = metadata.mode();
660 let uid = metadata.uid();
661 let gid = metadata.gid();
662 let effective_uid = nix::unistd::Uid::effective().as_raw();
663
664 if mode & 0o002 != 0 {
666 return Err(NucleusError::HookError(format!(
667 "{} hook {} is world-writable (mode {:04o}) – refusing to execute",
668 phase,
669 hook_path.display(),
670 mode & 0o7777
671 )));
672 }
673
674 if mode & 0o020 != 0 && uid != 0 {
676 return Err(NucleusError::HookError(format!(
677 "{} hook {} is group-writable and not owned by root (mode {:04o}, uid {}) – refusing to execute",
678 phase,
679 hook_path.display(),
680 mode & 0o7777,
681 uid
682 )));
683 }
684
685 if uid != 0 && uid != effective_uid {
687 return Err(NucleusError::HookError(format!(
688 "{} hook {} is owned by UID {} (expected 0 or {}) – refusing to execute",
689 phase,
690 hook_path.display(),
691 uid,
692 effective_uid
693 )));
694 }
695
696 if mode & 0o6000 != 0 {
698 return Err(NucleusError::HookError(format!(
699 "{} hook {} has setuid/setgid bits (mode {:04o}) – refusing to execute",
700 phase,
701 hook_path.display(),
702 mode & 0o7777
703 )));
704 }
705
706 debug!(
707 "{} hook {} validation passed (uid={}, gid={}, mode={:04o})",
708 phase,
709 hook_path.display(),
710 uid,
711 gid,
712 mode & 0o7777
713 );
714
715 Ok(())
716 }
717}
718
719impl OciConfig {
720 pub fn new(command: Vec<String>, hostname: Option<String>) -> Self {
722 Self {
723 oci_version: "1.0.2".to_string(),
724 root: OciRoot {
725 path: "rootfs".to_string(),
726 readonly: true,
727 },
728 process: OciProcess {
729 terminal: false,
730 user: OciUser {
731 uid: 0,
732 gid: 0,
733 additional_gids: None,
734 },
735 args: command,
736 env: vec![
737 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(),
738 ],
739 cwd: "/".to_string(),
740 no_new_privileges: true,
741 capabilities: Some(OciCapabilities {
742 bounding: vec![],
743 effective: vec![],
744 inheritable: vec![],
745 permitted: vec![],
746 ambient: vec![],
747 }),
748 rlimits: vec![],
749 console_size: None,
750 apparmor_profile: None,
751 selinux_label: None,
752 },
753 hostname,
754 mounts: vec![
755 OciMount {
756 destination: "/proc".to_string(),
757 source: "proc".to_string(),
758 mount_type: "proc".to_string(),
759 options: vec![
760 "nosuid".to_string(),
761 "noexec".to_string(),
762 "nodev".to_string(),
763 ],
764 },
765 OciMount {
766 destination: "/dev".to_string(),
767 source: "tmpfs".to_string(),
768 mount_type: "tmpfs".to_string(),
769 options: vec![
770 "nosuid".to_string(),
771 "noexec".to_string(),
772 "strictatime".to_string(),
773 "mode=755".to_string(),
774 "size=65536k".to_string(),
775 ],
776 },
777 OciMount {
778 destination: "/dev/shm".to_string(),
779 source: "shm".to_string(),
780 mount_type: "tmpfs".to_string(),
781 options: vec![
782 "nosuid".to_string(),
783 "noexec".to_string(),
784 "nodev".to_string(),
785 "mode=1777".to_string(),
786 "size=65536k".to_string(),
787 ],
788 },
789 OciMount {
790 destination: "/tmp".to_string(),
791 source: "tmpfs".to_string(),
792 mount_type: "tmpfs".to_string(),
793 options: vec![
794 "nosuid".to_string(),
795 "nodev".to_string(),
796 "noexec".to_string(),
797 "mode=1777".to_string(),
798 "size=65536k".to_string(),
799 ],
800 },
801 OciMount {
802 destination: "/sys".to_string(),
803 source: "sysfs".to_string(),
804 mount_type: "sysfs".to_string(),
805 options: vec![
806 "nosuid".to_string(),
807 "noexec".to_string(),
808 "nodev".to_string(),
809 "ro".to_string(),
810 ],
811 },
812 ],
813 hooks: None,
814 annotations: HashMap::new(),
815 linux: Some(OciLinux {
816 namespaces: Some(vec![
817 OciNamespace {
818 namespace_type: "pid".to_string(),
819 },
820 OciNamespace {
821 namespace_type: "network".to_string(),
822 },
823 OciNamespace {
824 namespace_type: "ipc".to_string(),
825 },
826 OciNamespace {
827 namespace_type: "uts".to_string(),
828 },
829 OciNamespace {
830 namespace_type: "mount".to_string(),
831 },
832 ]),
833 resources: None,
834 uid_mappings: vec![],
835 gid_mappings: vec![],
836 masked_paths: vec![
838 "/proc/acpi".to_string(),
839 "/proc/asound".to_string(),
840 "/proc/kcore".to_string(),
841 "/proc/keys".to_string(),
842 "/proc/latency_stats".to_string(),
843 "/proc/sched_debug".to_string(),
844 "/proc/scsi".to_string(),
845 "/proc/timer_list".to_string(),
846 "/proc/timer_stats".to_string(),
847 "/proc/sysrq-trigger".to_string(), "/proc/kpagecount".to_string(),
849 "/proc/kpageflags".to_string(),
850 "/proc/kpagecgroup".to_string(),
851 "/proc/config.gz".to_string(),
852 "/proc/kallsyms".to_string(),
853 "/sys/firmware".to_string(),
854 ],
855 readonly_paths: vec![
856 "/proc/bus".to_string(),
857 "/proc/fs".to_string(),
858 "/proc/irq".to_string(),
859 "/proc/sys".to_string(),
860 ],
861 devices: vec![
862 OciDevice {
863 device_type: "c".to_string(),
864 path: "/dev/null".to_string(),
865 major: Some(1),
866 minor: Some(3),
867 file_mode: Some(0o666),
868 uid: Some(0),
869 gid: Some(0),
870 },
871 OciDevice {
872 device_type: "c".to_string(),
873 path: "/dev/zero".to_string(),
874 major: Some(1),
875 minor: Some(5),
876 file_mode: Some(0o666),
877 uid: Some(0),
878 gid: Some(0),
879 },
880 OciDevice {
881 device_type: "c".to_string(),
882 path: "/dev/full".to_string(),
883 major: Some(1),
884 minor: Some(7),
885 file_mode: Some(0o666),
886 uid: Some(0),
887 gid: Some(0),
888 },
889 OciDevice {
890 device_type: "c".to_string(),
891 path: "/dev/random".to_string(),
892 major: Some(1),
893 minor: Some(8),
894 file_mode: Some(0o666),
895 uid: Some(0),
896 gid: Some(0),
897 },
898 OciDevice {
899 device_type: "c".to_string(),
900 path: "/dev/urandom".to_string(),
901 major: Some(1),
902 minor: Some(9),
903 file_mode: Some(0o666),
904 uid: Some(0),
905 gid: Some(0),
906 },
907 ],
908 seccomp: None,
909 rootfs_propagation: Some("rprivate".to_string()),
910 sysctl: HashMap::new(),
911 cgroups_path: None,
912 intel_rdt: None,
913 }),
914 }
915 }
916
917 pub fn with_resources(mut self, limits: &ResourceLimits) -> Self {
919 let mut resources = OciResources {
920 memory: None,
921 cpu: None,
922 pids: None,
923 };
924
925 if let Some(memory_bytes) = limits.memory_bytes {
926 resources.memory = Some(OciMemory {
927 limit: Some(memory_bytes as i64),
928 });
929 }
930
931 if let Some(quota_us) = limits.cpu_quota_us {
932 resources.cpu = Some(OciCpu {
933 quota: Some(quota_us as i64),
934 period: Some(limits.cpu_period_us),
935 });
936 }
937
938 if let Some(pids_max) = limits.pids_max {
939 resources.pids = Some(OciPids {
940 limit: pids_max as i64,
941 });
942 }
943
944 if let Some(linux) = &mut self.linux {
945 linux.resources = Some(resources);
946 }
947
948 self
949 }
950
951 pub fn with_no_new_privileges(mut self, enabled: bool) -> Self {
953 self.process.no_new_privileges = enabled;
954 self
955 }
956
957 pub fn with_env(mut self, vars: &[(String, String)]) -> Self {
959 for (key, value) in vars {
960 self.process.env.push(format!("{}={}", key, value));
961 }
962 self
963 }
964
965 pub fn with_sd_notify(mut self) -> Self {
967 if let Ok(notify_socket) = std::env::var("NOTIFY_SOCKET") {
968 self.process
969 .env
970 .push(format!("NOTIFY_SOCKET={}", notify_socket));
971 }
972 self
973 }
974
975 pub fn with_secret_mounts(mut self, secrets: &[crate::container::SecretMount]) -> Self {
977 for secret in secrets {
978 self.mounts.push(OciMount {
979 destination: secret.dest.to_string_lossy().to_string(),
980 source: secret.source.to_string_lossy().to_string(),
981 mount_type: "bind".to_string(),
982 options: vec![
983 "bind".to_string(),
984 "ro".to_string(),
985 "nosuid".to_string(),
986 "nodev".to_string(),
987 "noexec".to_string(),
988 ],
989 });
990 }
991 self
992 }
993
994 pub fn with_process_identity(mut self, identity: &crate::container::ProcessIdentity) -> Self {
996 self.process.user.uid = identity.uid;
997 self.process.user.gid = identity.gid;
998 self.process.user.additional_gids = if identity.additional_gids.is_empty() {
999 None
1000 } else {
1001 Some(identity.additional_gids.clone())
1002 };
1003 self
1004 }
1005
1006 pub fn with_inmemory_secret_mounts(
1010 mut self,
1011 stage_dir: &Path,
1012 secrets: &[crate::container::SecretMount],
1013 ) -> Result<Self> {
1014 self.mounts.push(OciMount {
1015 destination: "/run/secrets".to_string(),
1016 source: stage_dir.to_string_lossy().to_string(),
1017 mount_type: "bind".to_string(),
1018 options: vec![
1019 "bind".to_string(),
1020 "ro".to_string(),
1021 "nosuid".to_string(),
1022 "nodev".to_string(),
1023 "noexec".to_string(),
1024 ],
1025 });
1026
1027 for secret in secrets {
1028 let dest = normalize_container_destination(&secret.dest)?;
1029 if !secret.source.starts_with(stage_dir) {
1030 return Err(NucleusError::ConfigError(format!(
1031 "Staged secret source {:?} must live under {:?}",
1032 secret.source, stage_dir
1033 )));
1034 }
1035 self.mounts.push(OciMount {
1036 destination: dest.to_string_lossy().to_string(),
1037 source: secret.source.to_string_lossy().to_string(),
1038 mount_type: "bind".to_string(),
1039 options: vec![
1040 "bind".to_string(),
1041 "ro".to_string(),
1042 "nosuid".to_string(),
1043 "nodev".to_string(),
1044 "noexec".to_string(),
1045 ],
1046 });
1047 }
1048
1049 Ok(self)
1050 }
1051
1052 pub fn with_volume_mounts(mut self, volumes: &[crate::container::VolumeMount]) -> Result<Self> {
1054 use crate::container::VolumeSource;
1055
1056 for volume in volumes {
1057 let dest = normalize_container_destination(&volume.dest)?;
1058 match &volume.source {
1059 VolumeSource::Bind { source } => {
1060 crate::filesystem::validate_bind_mount_source(source)?;
1061 let mut options = vec![
1062 "bind".to_string(),
1063 "nosuid".to_string(),
1064 "nodev".to_string(),
1065 ];
1066 if volume.read_only {
1067 options.push("ro".to_string());
1068 }
1069 self.mounts.push(OciMount {
1070 destination: dest.to_string_lossy().to_string(),
1071 source: source.to_string_lossy().to_string(),
1072 mount_type: "bind".to_string(),
1073 options,
1074 });
1075 }
1076 VolumeSource::Tmpfs { size } => {
1077 let mut options = vec![
1078 "nosuid".to_string(),
1079 "nodev".to_string(),
1080 "mode=0755".to_string(),
1081 ];
1082 if volume.read_only {
1083 options.push("ro".to_string());
1084 }
1085 if let Some(size) = size {
1086 options.push(format!("size={}", size));
1087 }
1088 self.mounts.push(OciMount {
1089 destination: dest.to_string_lossy().to_string(),
1090 source: "tmpfs".to_string(),
1091 mount_type: "tmpfs".to_string(),
1092 options,
1093 });
1094 }
1095 }
1096 }
1097
1098 Ok(self)
1099 }
1100
1101 pub fn with_context_bind(mut self, context_dir: &std::path::Path) -> Self {
1106 self.mounts.push(OciMount {
1107 destination: "/context".to_string(),
1108 source: context_dir.to_string_lossy().to_string(),
1109 mount_type: "bind".to_string(),
1110 options: vec![
1111 "bind".to_string(),
1112 "ro".to_string(),
1113 "nosuid".to_string(),
1114 "nodev".to_string(),
1115 ],
1116 });
1117 self
1118 }
1119
1120 pub fn with_rootfs_binds(mut self, rootfs_path: &std::path::Path) -> Self {
1122 let subdirs = ["bin", "sbin", "lib", "lib64", "usr", "etc", "nix"];
1123 for subdir in &subdirs {
1124 let source = rootfs_path.join(subdir);
1125 if source.exists() {
1126 self.mounts.push(OciMount {
1127 destination: format!("/{}", subdir),
1128 source: source.to_string_lossy().to_string(),
1129 mount_type: "bind".to_string(),
1130 options: vec![
1131 "bind".to_string(),
1132 "ro".to_string(),
1133 "nosuid".to_string(),
1134 "nodev".to_string(),
1135 ],
1136 });
1137 }
1138 }
1139 self
1140 }
1141
1142 pub fn with_namespace_config(mut self, config: &NamespaceConfig) -> Self {
1144 let mut namespaces = Vec::new();
1145
1146 if config.pid {
1147 namespaces.push(OciNamespace {
1148 namespace_type: "pid".to_string(),
1149 });
1150 }
1151 if config.net {
1152 namespaces.push(OciNamespace {
1153 namespace_type: "network".to_string(),
1154 });
1155 }
1156 if config.ipc {
1157 namespaces.push(OciNamespace {
1158 namespace_type: "ipc".to_string(),
1159 });
1160 }
1161 if config.uts {
1162 namespaces.push(OciNamespace {
1163 namespace_type: "uts".to_string(),
1164 });
1165 }
1166 if config.mnt {
1167 namespaces.push(OciNamespace {
1168 namespace_type: "mount".to_string(),
1169 });
1170 }
1171 if config.cgroup {
1172 namespaces.push(OciNamespace {
1173 namespace_type: "cgroup".to_string(),
1174 });
1175 }
1176 if config.time {
1177 namespaces.push(OciNamespace {
1178 namespace_type: "time".to_string(),
1179 });
1180 }
1181 if config.user {
1182 namespaces.push(OciNamespace {
1183 namespace_type: "user".to_string(),
1184 });
1185 }
1186
1187 if let Some(linux) = &mut self.linux {
1188 linux.namespaces = Some(namespaces);
1189 }
1190
1191 self
1192 }
1193
1194 pub fn with_host_runtime_binds(mut self) -> Self {
1200 let host_paths: BTreeSet<String> =
1203 ["/bin", "/sbin", "/usr", "/lib", "/lib64", "/nix/store"]
1204 .iter()
1205 .map(|s| s.to_string())
1206 .collect();
1207
1208 for host_path in host_paths {
1209 let source = Path::new(&host_path);
1210 if !source.exists() {
1211 continue;
1212 }
1213
1214 self.mounts.push(OciMount {
1215 destination: host_path.clone(),
1216 source: source.to_string_lossy().to_string(),
1217 mount_type: "bind".to_string(),
1218 options: vec![
1219 "bind".to_string(),
1220 "ro".to_string(),
1221 "nosuid".to_string(),
1222 "nodev".to_string(),
1223 ],
1224 });
1225 }
1226 self
1227 }
1228
1229 pub fn with_user_namespace(mut self) -> Self {
1231 if let Some(linux) = &mut self.linux {
1232 if let Some(namespaces) = &mut linux.namespaces {
1233 namespaces.push(OciNamespace {
1234 namespace_type: "user".to_string(),
1235 });
1236 }
1237 }
1238 self
1239 }
1240
1241 pub fn without_network_namespace(mut self) -> Self {
1244 if let Some(linux) = &mut self.linux {
1245 if let Some(namespaces) = &mut linux.namespaces {
1246 namespaces.retain(|ns| ns.namespace_type != "network");
1247 }
1248 }
1249
1250 self
1251 }
1252
1253 pub fn with_rootless_user_namespace(mut self, config: &UserNamespaceConfig) -> Self {
1260 if let Some(linux) = &mut self.linux {
1261 if let Some(namespaces) = &mut linux.namespaces {
1262 namespaces.retain(|ns| ns.namespace_type != "network");
1263 if !namespaces.iter().any(|ns| ns.namespace_type == "user") {
1264 namespaces.push(OciNamespace {
1265 namespace_type: "user".to_string(),
1266 });
1267 }
1268 }
1269 linux.uid_mappings = config.uid_mappings.iter().map(OciIdMapping::from).collect();
1270 linux.gid_mappings = config.gid_mappings.iter().map(OciIdMapping::from).collect();
1271 }
1272 self
1273 }
1274
1275 pub fn with_hooks(mut self, hooks: OciHooks) -> Self {
1277 if hooks.is_empty() {
1278 self.hooks = None;
1279 } else {
1280 self.hooks = Some(hooks);
1281 }
1282 self
1283 }
1284
1285 pub fn with_rlimits(mut self, limits: &ResourceLimits) -> Self {
1290 let mut rlimits = Vec::with_capacity(3);
1291
1292 if let Some(nproc_limit) = limits.pids_max {
1293 rlimits.push(OciRlimit {
1294 limit_type: "RLIMIT_NPROC".to_string(),
1295 hard: nproc_limit,
1296 soft: nproc_limit,
1297 });
1298 }
1299
1300 rlimits.push(OciRlimit {
1301 limit_type: "RLIMIT_NOFILE".to_string(),
1302 hard: 1024,
1303 soft: 1024,
1304 });
1305
1306 let memlock_limit = limits.memlock_bytes.unwrap_or(64 * 1024);
1307 rlimits.push(OciRlimit {
1308 limit_type: "RLIMIT_MEMLOCK".to_string(),
1309 hard: memlock_limit,
1310 soft: memlock_limit,
1311 });
1312
1313 self.process.rlimits = rlimits;
1314 self
1315 }
1316
1317 pub fn with_seccomp(mut self, seccomp: OciSeccomp) -> Self {
1319 if let Some(linux) = &mut self.linux {
1320 linux.seccomp = Some(seccomp);
1321 }
1322 self
1323 }
1324
1325 pub fn with_cgroups_path(mut self, path: String) -> Self {
1327 if let Some(linux) = &mut self.linux {
1328 linux.cgroups_path = Some(path);
1329 }
1330 self
1331 }
1332
1333 pub fn with_sysctl(mut self, sysctl: HashMap<String, String>) -> Self {
1335 if let Some(linux) = &mut self.linux {
1336 linux.sysctl = sysctl;
1337 }
1338 self
1339 }
1340
1341 pub fn with_annotations(mut self, annotations: HashMap<String, String>) -> Self {
1343 self.annotations = annotations;
1344 self
1345 }
1346}
1347
1348impl From<&IdMapping> for OciIdMapping {
1349 fn from(mapping: &IdMapping) -> Self {
1350 Self {
1351 container_id: mapping.container_id,
1352 host_id: mapping.host_id,
1353 size: mapping.count,
1354 }
1355 }
1356}
1357
1358pub struct OciBundle {
1362 bundle_path: PathBuf,
1363 config: OciConfig,
1364}
1365
1366impl OciBundle {
1367 pub fn new(bundle_path: PathBuf, config: OciConfig) -> Self {
1369 Self {
1370 bundle_path,
1371 config,
1372 }
1373 }
1374
1375 pub fn create(&self) -> Result<()> {
1377 info!("Creating OCI bundle at {:?}", self.bundle_path);
1378
1379 fs::create_dir_all(&self.bundle_path).map_err(|e| {
1381 NucleusError::GVisorError(format!(
1382 "Failed to create bundle directory {:?}: {}",
1383 self.bundle_path, e
1384 ))
1385 })?;
1386 fs::set_permissions(&self.bundle_path, fs::Permissions::from_mode(0o700)).map_err(|e| {
1387 NucleusError::GVisorError(format!(
1388 "Failed to secure bundle directory permissions {:?}: {}",
1389 self.bundle_path, e
1390 ))
1391 })?;
1392
1393 let rootfs = self.bundle_path.join("rootfs");
1395 fs::create_dir_all(&rootfs).map_err(|e| {
1396 NucleusError::GVisorError(format!("Failed to create rootfs directory: {}", e))
1397 })?;
1398 fs::set_permissions(&rootfs, fs::Permissions::from_mode(0o755)).map_err(|e| {
1403 NucleusError::GVisorError(format!(
1404 "Failed to set rootfs directory permissions {:?}: {}",
1405 rootfs, e
1406 ))
1407 })?;
1408
1409 let config_path = self.bundle_path.join("config.json");
1411 let config_json = serde_json::to_string_pretty(&self.config).map_err(|e| {
1412 NucleusError::GVisorError(format!("Failed to serialize OCI config: {}", e))
1413 })?;
1414
1415 let mut file = OpenOptions::new()
1417 .create(true)
1418 .truncate(true)
1419 .write(true)
1420 .mode(0o600)
1421 .custom_flags(libc::O_NOFOLLOW)
1422 .open(&config_path)
1423 .map_err(|e| NucleusError::GVisorError(format!("Failed to open config.json: {}", e)))?;
1424 file.write_all(config_json.as_bytes()).map_err(|e| {
1425 NucleusError::GVisorError(format!("Failed to write config.json: {}", e))
1426 })?;
1427 file.sync_all()
1428 .map_err(|e| NucleusError::GVisorError(format!("Failed to sync config.json: {}", e)))?;
1429
1430 debug!("Created OCI bundle structure at {:?}", self.bundle_path);
1431
1432 Ok(())
1433 }
1434
1435 pub fn rootfs_path(&self) -> PathBuf {
1437 self.bundle_path.join("rootfs")
1438 }
1439
1440 pub fn bundle_path(&self) -> &Path {
1442 &self.bundle_path
1443 }
1444
1445 pub fn cleanup(&self) -> Result<()> {
1447 if self.bundle_path.exists() {
1448 fs::remove_dir_all(&self.bundle_path).map_err(|e| {
1449 NucleusError::GVisorError(format!("Failed to cleanup bundle: {}", e))
1450 })?;
1451 debug!("Cleaned up OCI bundle at {:?}", self.bundle_path);
1452 }
1453 Ok(())
1454 }
1455}
1456
1457#[cfg(test)]
1458mod tests {
1459 use super::*;
1460 use tempfile::TempDir;
1461
1462 #[test]
1463 fn test_oci_config_new() {
1464 let config = OciConfig::new(vec!["/bin/sh".to_string()], Some("test".to_string()));
1465
1466 assert_eq!(config.oci_version, "1.0.2");
1467 assert_eq!(config.root.path, "rootfs");
1468 assert_eq!(config.process.args, vec!["/bin/sh"]);
1469 assert_eq!(config.hostname, Some("test".to_string()));
1470 }
1471
1472 #[test]
1473 fn test_oci_config_with_resources() {
1474 let limits = ResourceLimits::unlimited()
1475 .with_memory("512M")
1476 .unwrap()
1477 .with_cpu_cores(2.0)
1478 .unwrap();
1479
1480 let config = OciConfig::new(vec!["/bin/sh".to_string()], None).with_resources(&limits);
1481
1482 assert!(config.linux.is_some());
1483 let linux = config.linux.unwrap();
1484 assert!(linux.resources.is_some());
1485
1486 let resources = linux.resources.unwrap();
1487 assert!(resources.memory.is_some());
1488 assert!(resources.cpu.is_some());
1489 }
1490
1491 #[test]
1492 fn test_oci_bundle_create() {
1493 let temp_dir = TempDir::new().unwrap();
1494 let bundle_path = temp_dir.path().join("test-bundle");
1495
1496 let config = OciConfig::new(vec!["/bin/sh".to_string()], None);
1497 let bundle = OciBundle::new(bundle_path.clone(), config);
1498
1499 bundle.create().unwrap();
1500
1501 assert!(bundle_path.exists());
1502 assert!(bundle_path.join("rootfs").exists());
1503 assert!(bundle_path.join("config.json").exists());
1504
1505 bundle.cleanup().unwrap();
1506 assert!(!bundle_path.exists());
1507 }
1508
1509 #[test]
1510 fn test_oci_config_serialization() {
1511 let config = OciConfig::new(vec!["/bin/sh".to_string()], Some("test".to_string()));
1512
1513 let json = serde_json::to_string_pretty(&config).unwrap();
1514 assert!(json.contains("ociVersion"));
1515 assert!(json.contains("1.0.2"));
1516 assert!(json.contains("/bin/sh"));
1517
1518 let deserialized: OciConfig = serde_json::from_str(&json).unwrap();
1520 assert_eq!(deserialized.oci_version, config.oci_version);
1521 assert_eq!(deserialized.process.args, config.process.args);
1522 }
1523
1524 #[test]
1525 fn test_host_runtime_binds_uses_fixed_paths_not_host_path() {
1526 std::env::set_var("PATH", "/tmp/evil-inject-path/bin:/opt/attacker/sbin");
1531 let config = OciConfig::new(vec!["/bin/sh".to_string()], None).with_host_runtime_binds();
1532 let mount_dests: Vec<&str> = config
1533 .mounts
1534 .iter()
1535 .map(|m| m.destination.as_str())
1536 .collect();
1537 let mount_srcs: Vec<&str> = config.mounts.iter().map(|m| m.source.as_str()).collect();
1538 for path in &["/tmp/evil-inject-path", "/opt/attacker"] {
1540 assert!(
1541 !mount_dests.iter().any(|d| d.contains(path)),
1542 "with_host_runtime_binds must not use host $PATH – found {:?} in mount destinations",
1543 path
1544 );
1545 assert!(
1546 !mount_srcs.iter().any(|s| s.contains(path)),
1547 "with_host_runtime_binds must not use host $PATH – found {:?} in mount sources",
1548 path
1549 );
1550 }
1551 let allowed_prefixes = ["/bin", "/sbin", "/usr", "/lib", "/lib64", "/nix/store"];
1553 for mount in &config.mounts {
1554 if mount.mount_type == "bind" {
1555 assert!(
1556 allowed_prefixes
1557 .iter()
1558 .any(|p| mount.destination.starts_with(p)),
1559 "unexpected bind mount destination: {} – only FHS paths allowed",
1560 mount.destination
1561 );
1562 }
1563 }
1564 }
1565
1566 #[test]
1567 fn test_volume_mounts_include_bind_and_tmpfs_options() {
1568 let tmp = tempfile::TempDir::new().unwrap();
1569 let config = OciConfig::new(vec!["/bin/sh".to_string()], None)
1570 .with_volume_mounts(&[
1571 crate::container::VolumeMount {
1572 source: crate::container::VolumeSource::Bind {
1573 source: tmp.path().to_path_buf(),
1574 },
1575 dest: std::path::PathBuf::from("/var/lib/app"),
1576 read_only: true,
1577 },
1578 crate::container::VolumeMount {
1579 source: crate::container::VolumeSource::Tmpfs {
1580 size: Some("64M".to_string()),
1581 },
1582 dest: std::path::PathBuf::from("/var/cache/app"),
1583 read_only: false,
1584 },
1585 ])
1586 .unwrap();
1587
1588 assert!(config.mounts.iter().any(|mount| {
1589 mount.destination == "/var/lib/app"
1590 && mount.mount_type == "bind"
1591 && mount.options.contains(&"ro".to_string())
1592 }));
1593 assert!(config.mounts.iter().any(|mount| {
1594 mount.destination == "/var/cache/app"
1595 && mount.mount_type == "tmpfs"
1596 && mount.options.contains(&"size=64M".to_string())
1597 }));
1598 }
1599
1600 #[test]
1601 fn test_volume_mounts_reject_sensitive_host_sources() {
1602 let err = OciConfig::new(vec!["/bin/sh".to_string()], None)
1603 .with_volume_mounts(&[crate::container::VolumeMount {
1604 source: crate::container::VolumeSource::Bind {
1605 source: std::path::PathBuf::from("/proc/sys"),
1606 },
1607 dest: std::path::PathBuf::from("/host-proc"),
1608 read_only: true,
1609 }])
1610 .unwrap_err();
1611
1612 assert!(err.to_string().contains("sensitive host path"));
1613 }
1614
1615 #[test]
1616 fn test_oci_config_with_process_identity() {
1617 let config = OciConfig::new(vec!["/bin/sh".to_string()], None).with_process_identity(
1618 &crate::container::ProcessIdentity {
1619 uid: 1001,
1620 gid: 1002,
1621 additional_gids: vec![1003, 1004],
1622 },
1623 );
1624
1625 assert_eq!(config.process.user.uid, 1001);
1626 assert_eq!(config.process.user.gid, 1002);
1627 assert_eq!(config.process.user.additional_gids, Some(vec![1003, 1004]));
1628 }
1629
1630 #[test]
1631 fn test_oci_config_with_rlimits_uses_configured_memlock() {
1632 let limits = ResourceLimits::default()
1633 .with_pids(99)
1634 .unwrap()
1635 .with_memlock("8M")
1636 .unwrap();
1637
1638 let config = OciConfig::new(vec!["/bin/sh".to_string()], None).with_rlimits(&limits);
1639
1640 assert!(config.process.rlimits.iter().any(|limit| {
1641 limit.limit_type == "RLIMIT_NPROC" && limit.soft == 99 && limit.hard == 99
1642 }));
1643 assert!(config.process.rlimits.iter().any(|limit| {
1644 limit.limit_type == "RLIMIT_MEMLOCK"
1645 && limit.soft == 8 * 1024 * 1024
1646 && limit.hard == 8 * 1024 * 1024
1647 }));
1648 }
1649
1650 #[test]
1651 fn test_oci_config_with_rlimits_omits_nproc_when_unlimited() {
1652 let limits = ResourceLimits {
1653 pids_max: None,
1654 ..ResourceLimits::default()
1655 };
1656
1657 let config = OciConfig::new(vec!["/bin/sh".to_string()], None).with_rlimits(&limits);
1658
1659 assert!(
1660 !config
1661 .process
1662 .rlimits
1663 .iter()
1664 .any(|limit| limit.limit_type == "RLIMIT_NPROC"),
1665 "RLIMIT_NPROC must be omitted when pids_max is unlimited"
1666 );
1667 }
1668
1669 #[test]
1670 fn test_oci_config_uses_hardcoded_path_not_host() {
1671 std::env::set_var("PATH", "/nix/store/secret-hash/bin:/home/user/.local/bin");
1674 let config = OciConfig::new(vec!["/bin/sh".to_string()], None);
1675 let path_env = config
1676 .process
1677 .env
1678 .iter()
1679 .find(|e| e.starts_with("PATH="))
1680 .expect("PATH env must be set");
1681 assert_eq!(
1682 path_env, "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1683 "OCI config must not leak host PATH"
1684 );
1685 assert!(
1686 !path_env.contains("/nix/store/secret"),
1687 "Host PATH must not leak into container"
1688 );
1689 }
1690
1691 #[test]
1692 fn test_oci_hooks_serialization_roundtrip() {
1693 let hooks = OciHooks {
1694 create_runtime: vec![OciHook {
1695 path: "/usr/bin/hook1".to_string(),
1696 args: vec!["hook1".to_string(), "--arg1".to_string()],
1697 env: vec!["FOO=bar".to_string()],
1698 timeout: Some(10),
1699 }],
1700 create_container: vec![],
1701 start_container: vec![],
1702 poststart: vec![OciHook {
1703 path: "/usr/bin/hook2".to_string(),
1704 args: vec![],
1705 env: vec![],
1706 timeout: None,
1707 }],
1708 poststop: vec![],
1709 };
1710
1711 let json = serde_json::to_string_pretty(&hooks).unwrap();
1712 assert!(json.contains("createRuntime"));
1713 assert!(json.contains("/usr/bin/hook1"));
1714 assert!(!json.contains("createContainer")); let deserialized: OciHooks = serde_json::from_str(&json).unwrap();
1717 assert_eq!(deserialized.create_runtime.len(), 1);
1718 assert_eq!(deserialized.create_runtime[0].path, "/usr/bin/hook1");
1719 assert_eq!(deserialized.create_runtime[0].timeout, Some(10));
1720 assert_eq!(deserialized.poststart.len(), 1);
1721 assert!(deserialized.create_container.is_empty());
1722 }
1723
1724 #[test]
1725 fn test_oci_hooks_is_empty() {
1726 let empty = OciHooks::default();
1727 assert!(empty.is_empty());
1728
1729 let not_empty = OciHooks {
1730 poststop: vec![OciHook {
1731 path: "/bin/cleanup".to_string(),
1732 args: vec![],
1733 env: vec![],
1734 timeout: None,
1735 }],
1736 ..Default::default()
1737 };
1738 assert!(!not_empty.is_empty());
1739 }
1740
1741 #[test]
1742 fn test_oci_config_with_hooks() {
1743 let hooks = OciHooks {
1744 create_runtime: vec![OciHook {
1745 path: "/usr/bin/setup".to_string(),
1746 args: vec![],
1747 env: vec![],
1748 timeout: None,
1749 }],
1750 ..Default::default()
1751 };
1752
1753 let config = OciConfig::new(vec!["/bin/sh".to_string()], None).with_hooks(hooks);
1754 assert!(config.hooks.is_some());
1755
1756 let json = serde_json::to_string_pretty(&config).unwrap();
1757 assert!(json.contains("hooks"));
1758 assert!(json.contains("createRuntime"));
1759
1760 let deserialized: OciConfig = serde_json::from_str(&json).unwrap();
1761 assert!(deserialized.hooks.is_some());
1762 assert_eq!(deserialized.hooks.unwrap().create_runtime.len(), 1);
1763 }
1764
1765 #[test]
1766 fn test_oci_config_with_empty_hooks_serializes_without_hooks() {
1767 let config =
1768 OciConfig::new(vec!["/bin/sh".to_string()], None).with_hooks(OciHooks::default());
1769 assert!(config.hooks.is_none()); let json = serde_json::to_string_pretty(&config).unwrap();
1772 assert!(!json.contains("hooks"));
1773 }
1774
1775 #[test]
1776 fn test_oci_hook_rejects_relative_path() {
1777 let hook = OciHook {
1778 path: "relative/path".to_string(),
1779 args: vec![],
1780 env: vec![],
1781 timeout: None,
1782 };
1783 let state = OciContainerState {
1784 oci_version: "1.0.2".to_string(),
1785 id: "test".to_string(),
1786 status: OciStatus::Creating,
1787 pid: 1234,
1788 bundle: "/tmp/bundle".to_string(),
1789 };
1790 let result = OciHooks::run_hooks(&[hook], &state, "test");
1791 assert!(result.is_err());
1792 let err_msg = result.unwrap_err().to_string();
1793 assert!(err_msg.contains("absolute"), "error: {}", err_msg);
1794 }
1795
1796 fn original_path() -> String {
1802 if let Ok(environ) = std::fs::read("/proc/self/environ") {
1803 for entry in environ.split(|&b| b == 0) {
1804 if let Ok(s) = std::str::from_utf8(entry) {
1805 if let Some(val) = s.strip_prefix("PATH=") {
1806 return val.to_string();
1807 }
1808 }
1809 }
1810 }
1811 String::new()
1812 }
1813
1814 fn find_bash() -> String {
1816 let candidates = ["/bin/bash", "/usr/bin/bash"];
1817 for c in &candidates {
1818 if std::path::Path::new(c).exists() {
1819 return c.to_string();
1820 }
1821 }
1822 for dir in original_path().split(':') {
1823 let candidate = std::path::PathBuf::from(dir).join("bash");
1824 if candidate.exists() {
1825 return candidate.to_string_lossy().to_string();
1826 }
1827 }
1828 panic!("Cannot find bash binary for test");
1829 }
1830
1831 fn write_script(path: &std::path::Path, body: &str) {
1835 use std::io::Write as IoWrite;
1836 let bash = find_bash();
1837 let orig_path = original_path();
1838 let content = format!("#!{}\nexport PATH='{}'\n{}", bash, orig_path, body);
1839 let mut f = OpenOptions::new()
1840 .create(true)
1841 .truncate(true)
1842 .write(true)
1843 .mode(0o755)
1844 .open(path)
1845 .unwrap();
1846 f.write_all(content.as_bytes()).unwrap();
1847 f.sync_all().unwrap();
1848 drop(f);
1849 }
1850
1851 #[test]
1852 fn test_oci_hook_executes_successfully() {
1853 let temp_dir = TempDir::new().unwrap();
1854 let hook_script = temp_dir.path().join("hook.sh");
1855 let output_file = temp_dir.path().join("output.json");
1856
1857 write_script(
1858 &hook_script,
1859 &format!("cat > {}\n", output_file.to_string_lossy()),
1860 );
1861
1862 let hook = OciHook {
1863 path: hook_script.to_string_lossy().to_string(),
1864 args: vec![],
1865 env: vec![],
1866 timeout: Some(5),
1867 };
1868 let state = OciContainerState {
1869 oci_version: "1.0.2".to_string(),
1870 id: "test-container".to_string(),
1871 status: OciStatus::Creating,
1872 pid: 12345,
1873 bundle: "/tmp/test-bundle".to_string(),
1874 };
1875
1876 OciHooks::run_hooks(&[hook], &state, "createRuntime").unwrap();
1877
1878 let written = std::fs::read_to_string(&output_file).unwrap();
1880 let parsed: serde_json::Value = serde_json::from_str(&written).unwrap();
1881 assert_eq!(parsed["id"], "test-container");
1882 assert_eq!(parsed["pid"], 12345);
1883 assert_eq!(parsed["status"], "creating");
1884 }
1885
1886 #[test]
1887 fn test_oci_hook_retries_text_file_busy_spawn() {
1888 let temp_dir = TempDir::new().unwrap();
1889 let hook_script = temp_dir.path().join("hook.sh");
1890 let output_file = temp_dir.path().join("output.json");
1891
1892 write_script(
1893 &hook_script,
1894 &format!("cat > {}\n", output_file.to_string_lossy()),
1895 );
1896
1897 let (ready_tx, ready_rx) = std::sync::mpsc::channel();
1898 let busy_script = hook_script.clone();
1899 let busy_handle = std::thread::spawn(move || {
1900 let _busy_file = OpenOptions::new().write(true).open(&busy_script).unwrap();
1901 ready_tx.send(()).unwrap();
1902 std::thread::sleep(std::time::Duration::from_millis(100));
1903 });
1904 ready_rx.recv().unwrap();
1905
1906 let hook = OciHook {
1907 path: hook_script.to_string_lossy().to_string(),
1908 args: vec![],
1909 env: vec![],
1910 timeout: Some(5),
1911 };
1912 let state = OciContainerState {
1913 oci_version: "1.0.2".to_string(),
1914 id: "test-container".to_string(),
1915 status: OciStatus::Creating,
1916 pid: 12345,
1917 bundle: "/tmp/test-bundle".to_string(),
1918 };
1919
1920 let result = OciHooks::run_hooks(&[hook], &state, "createRuntime");
1921 busy_handle.join().unwrap();
1922 result.unwrap();
1923
1924 let written = std::fs::read_to_string(&output_file).unwrap();
1925 let parsed: serde_json::Value = serde_json::from_str(&written).unwrap();
1926 assert_eq!(parsed["id"], "test-container");
1927 }
1928
1929 #[test]
1930 fn test_oci_hook_nonzero_exit_is_error() {
1931 let temp_dir = TempDir::new().unwrap();
1932 let hook_script = temp_dir.path().join("fail.sh");
1933 write_script(&hook_script, "exit 1\n");
1934
1935 let hook = OciHook {
1936 path: hook_script.to_string_lossy().to_string(),
1937 args: vec![],
1938 env: vec![],
1939 timeout: Some(5),
1940 };
1941 let state = OciContainerState {
1942 oci_version: "1.0.2".to_string(),
1943 id: "test".to_string(),
1944 status: OciStatus::Creating,
1945 pid: 1,
1946 bundle: "".to_string(),
1947 };
1948
1949 let result = OciHooks::run_hooks(&[hook], &state, "test");
1950 assert!(result.is_err());
1951 assert!(result
1952 .unwrap_err()
1953 .to_string()
1954 .contains("exited with status"));
1955 }
1956
1957 #[test]
1958 fn test_oci_hooks_best_effort_continues_on_failure() {
1959 let temp_dir = TempDir::new().unwrap();
1960 let fail_script = temp_dir.path().join("fail.sh");
1961 write_script(&fail_script, "exit 1\n");
1962
1963 let marker = temp_dir.path().join("ran");
1964 let ok_script = temp_dir.path().join("ok.sh");
1965 write_script(&ok_script, &format!("touch {}\n", marker.to_string_lossy()));
1966
1967 let hooks = vec![
1968 OciHook {
1969 path: fail_script.to_string_lossy().to_string(),
1970 args: vec![],
1971 env: vec![],
1972 timeout: Some(5),
1973 },
1974 OciHook {
1975 path: ok_script.to_string_lossy().to_string(),
1976 args: vec![],
1977 env: vec![],
1978 timeout: Some(5),
1979 },
1980 ];
1981 let state = OciContainerState {
1982 oci_version: "1.0.2".to_string(),
1983 id: "test".to_string(),
1984 status: OciStatus::Stopped,
1985 pid: 0,
1986 bundle: "".to_string(),
1987 };
1988
1989 OciHooks::run_hooks_best_effort(&hooks, &state, "poststop");
1991 assert!(marker.exists(), "second hook should run after first fails");
1993 }
1994}