Skip to main content

nucleus/container/
config.rs

1use crate::filesystem::{
2    normalize_container_destination, normalize_volume_destination, validate_production_rootfs_path,
3};
4use crate::isolation::{NamespaceConfig, UserNamespaceConfig};
5use crate::network::EgressPolicy;
6use crate::resources::ResourceLimits;
7use crate::security::GVisorPlatform;
8use std::fs::OpenOptions;
9use std::os::unix::fs::FileTypeExt;
10use std::os::unix::fs::OpenOptionsExt;
11use std::path::PathBuf;
12use std::time::Duration;
13
14fn open_dev_urandom() -> crate::error::Result<std::fs::File> {
15    let file = OpenOptions::new()
16        .read(true)
17        .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
18        .open("/dev/urandom")
19        .map_err(|e| {
20            crate::error::NucleusError::ConfigError(format!(
21                "Failed to open /dev/urandom for container ID generation: {}",
22                e
23            ))
24        })?;
25
26    let metadata = file.metadata().map_err(|e| {
27        crate::error::NucleusError::ConfigError(format!("Failed to stat /dev/urandom: {}", e))
28    })?;
29    if !metadata.file_type().is_char_device() {
30        return Err(crate::error::NucleusError::ConfigError(
31            "/dev/urandom is not a character device".to_string(),
32        ));
33    }
34
35    Ok(file)
36}
37
38/// Generate a unique 32-hex-char container ID (128-bit) using /dev/urandom.
39pub fn generate_container_id() -> crate::error::Result<String> {
40    use std::io::Read;
41
42    let mut buf = [0u8; 16];
43    let mut file = open_dev_urandom()?;
44    file.read_exact(&mut buf).map_err(|e| {
45        crate::error::NucleusError::ConfigError(format!(
46            "Failed to read secure random bytes for container ID generation: {}",
47            e
48        ))
49    })?;
50    Ok(hex::encode(buf))
51}
52
53/// Trust level for a container workload.
54///
55/// Determines the minimum isolation guarantees the runtime must enforce.
56#[derive(
57    Debug,
58    Clone,
59    Copy,
60    PartialEq,
61    Eq,
62    Default,
63    clap::ValueEnum,
64    serde::Serialize,
65    serde::Deserialize,
66)]
67pub enum TrustLevel {
68    /// Native kernel isolation (namespaces + seccomp + Landlock) is acceptable.
69    Trusted,
70    /// Requires gVisor; refuses to start without it unless degraded mode is allowed.
71    #[default]
72    Untrusted,
73}
74
75/// Service mode for the container.
76///
77/// Determines whether the container runs as an ephemeral agent sandbox
78/// or a long-running production service with stricter requirements.
79#[derive(
80    Debug,
81    Clone,
82    Copy,
83    PartialEq,
84    Eq,
85    Default,
86    clap::ValueEnum,
87    serde::Serialize,
88    serde::Deserialize,
89)]
90pub enum ServiceMode {
91    /// Ephemeral agent workload (default). Allows degraded fallbacks.
92    #[default]
93    Agent,
94    /// Long-running production service. Enforces strict security invariants:
95    /// - Forbids degraded security, chroot fallback, and host networking
96    /// - Requires cgroup resource limits
97    /// - Requires pivot_root (no chroot fallback)
98    /// - Requires explicit rootfs path (no host bind mounts)
99    Production,
100}
101
102/// CLI-level runtime selection.
103///
104/// Parsed by clap at argument time – invalid values are caught immediately.
105/// The variant triggers additional logic in `apply_runtime_selection`.
106#[derive(
107    Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum, serde::Serialize, serde::Deserialize,
108)]
109pub enum RuntimeSelection {
110    /// gVisor sandbox runtime (default). Provides kernel-level isolation.
111    #[value(name = "gvisor")]
112    GVisor,
113    /// Native kernel isolation (namespaces + seccomp + Landlock).
114    #[value(name = "native")]
115    Native,
116}
117
118/// CLI-level network mode selection.
119///
120/// Parsed by clap at argument time. The `bridge` variant carries additional
121/// configuration that is attached after parsing.
122#[derive(
123    Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum, serde::Serialize, serde::Deserialize,
124)]
125pub enum NetworkModeArg {
126    /// No network (default).
127    #[value(name = "none")]
128    None,
129    /// Share host network namespace (dangerous).
130    #[value(name = "host")]
131    Host,
132    /// Virtual bridge with veth pair.
133    #[value(name = "bridge")]
134    Bridge,
135}
136
137/// Required host kernel lockdown mode, when asserted by the runtime.
138#[derive(
139    Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum, serde::Serialize, serde::Deserialize,
140)]
141pub enum KernelLockdownMode {
142    /// Integrity mode blocks kernel writes from privileged userspace.
143    Integrity,
144    /// Confidentiality mode additionally blocks kernel data disclosure paths.
145    Confidentiality,
146}
147
148impl KernelLockdownMode {
149    pub fn as_str(self) -> &'static str {
150        match self {
151            Self::Integrity => "integrity",
152            Self::Confidentiality => "confidentiality",
153        }
154    }
155
156    pub fn accepts(self, active: Self) -> bool {
157        match self {
158            Self::Integrity => matches!(active, Self::Integrity | Self::Confidentiality),
159            Self::Confidentiality => matches!(active, Self::Confidentiality),
160        }
161    }
162}
163
164/// Health check configuration for long-running services.
165#[derive(Debug, Clone)]
166pub struct HealthCheck {
167    /// Command to run inside the container to check health.
168    pub command: Vec<String>,
169    /// Interval between health checks.
170    pub interval: Duration,
171    /// Number of consecutive failures before marking unhealthy.
172    pub retries: u32,
173    /// Grace period after start before health checks begin.
174    pub start_period: Duration,
175    /// Timeout for each health check execution.
176    pub timeout: Duration,
177}
178
179impl Default for HealthCheck {
180    fn default() -> Self {
181        Self {
182            command: Vec::new(),
183            interval: Duration::from_secs(30),
184            retries: 3,
185            start_period: Duration::from_secs(5),
186            timeout: Duration::from_secs(5),
187        }
188    }
189}
190
191/// Secrets configuration for mounting secret files into the container.
192#[derive(Debug, Clone)]
193pub struct SecretMount {
194    /// Source path on the host (or Nix store path).
195    pub source: PathBuf,
196    /// Destination path inside the container.
197    pub dest: PathBuf,
198    /// File mode (default: 0o400, read-only by owner).
199    pub mode: u32,
200}
201
202/// Runtime identity for the workload process inside the container.
203#[derive(Debug, Clone, PartialEq, Eq)]
204pub struct ProcessIdentity {
205    /// Primary user ID for the workload process.
206    pub uid: u32,
207    /// Primary group ID for the workload process.
208    pub gid: u32,
209    /// Supplementary group IDs for the workload process.
210    pub additional_gids: Vec<u32>,
211}
212
213impl ProcessIdentity {
214    /// Root identity (the historical default).
215    pub fn root() -> Self {
216        Self {
217            uid: 0,
218            gid: 0,
219            additional_gids: Vec::new(),
220        }
221    }
222
223    /// Returns true when the workload keeps the default root identity.
224    pub fn is_root(&self) -> bool {
225        self.uid == 0 && self.gid == 0 && self.additional_gids.is_empty()
226    }
227}
228
229impl Default for ProcessIdentity {
230    fn default() -> Self {
231        Self::root()
232    }
233}
234
235/// Source backing for a volume mount.
236#[derive(Debug, Clone)]
237pub enum VolumeSource {
238    /// Bind mount a host path into the container.
239    Bind { source: PathBuf },
240    /// Mount a fresh tmpfs at the destination.
241    Tmpfs { size: Option<String> },
242}
243
244/// Volume configuration for mounting persistent or ephemeral storage.
245#[derive(Debug, Clone)]
246pub struct VolumeMount {
247    /// Backing storage for the volume.
248    pub source: VolumeSource,
249    /// Destination path inside the container.
250    pub dest: PathBuf,
251    /// Whether the volume is mounted read-only.
252    pub read_only: bool,
253}
254
255/// Readiness probe configuration.
256#[derive(Debug, Clone)]
257pub enum ReadinessProbe {
258    /// Run a command; ready when it exits 0.
259    Exec { command: Vec<String> },
260    /// Check TCP port connectivity.
261    TcpPort(u16),
262    /// Use sd_notify protocol (service sends READY=1).
263    SdNotify,
264}
265
266/// Container configuration
267#[derive(Debug, Clone)]
268pub struct ContainerConfig {
269    /// Unique container ID (auto-generated 32 hex chars, 128-bit)
270    pub id: String,
271
272    /// User-supplied container name (optional, defaults to ID)
273    pub name: String,
274
275    /// Command to execute in the container
276    pub command: Vec<String>,
277
278    /// Context directory to pre-populate (optional)
279    pub context_dir: Option<PathBuf>,
280
281    /// Resource limits
282    pub limits: ResourceLimits,
283
284    /// Namespace configuration
285    pub namespaces: NamespaceConfig,
286
287    /// User namespace configuration (for rootless mode)
288    pub user_ns_config: Option<UserNamespaceConfig>,
289
290    /// Hostname to set in UTS namespace (optional)
291    pub hostname: Option<String>,
292
293    /// Whether to use gVisor runtime
294    pub use_gvisor: bool,
295
296    /// Trust level for this workload
297    pub trust_level: TrustLevel,
298
299    /// Network mode
300    pub network: crate::network::NetworkMode,
301
302    /// Context mode (copy or bind mount)
303    pub context_mode: crate::filesystem::ContextMode,
304
305    /// Allow degraded security behavior if a hardening layer cannot be applied
306    pub allow_degraded_security: bool,
307
308    /// Allow chroot fallback when pivot_root fails (weaker isolation)
309    pub allow_chroot_fallback: bool,
310
311    /// Require explicit opt-in for host networking
312    pub allow_host_network: bool,
313
314    /// Mount /proc read-only inside the container
315    pub proc_readonly: bool,
316
317    /// Service mode (agent vs production)
318    pub service_mode: ServiceMode,
319
320    /// Pre-built rootfs path (Nix store path). When set, this is bind-mounted
321    /// as the container root instead of bind-mounting host /bin, /usr, /lib, etc.
322    pub rootfs_path: Option<PathBuf>,
323
324    /// Egress policy for audited outbound network access.
325    pub egress_policy: Option<EgressPolicy>,
326
327    /// Health check configuration for long-running services.
328    pub health_check: Option<HealthCheck>,
329
330    /// Readiness probe for service startup detection.
331    pub readiness_probe: Option<ReadinessProbe>,
332
333    /// Secret files to mount into the container.
334    pub secrets: Vec<SecretMount>,
335
336    /// Volume mounts to attach to the container filesystem.
337    pub volumes: Vec<VolumeMount>,
338
339    /// Environment variables to pass to the container process.
340    pub environment: Vec<(String, String)>,
341
342    /// Runtime uid/gid and supplementary groups for the workload process.
343    pub process_identity: ProcessIdentity,
344
345    /// Desired topology config hash for reconciliation change detection.
346    pub config_hash: Option<u64>,
347
348    /// Enable sd_notify integration (pass NOTIFY_SOCKET into container).
349    pub sd_notify: bool,
350
351    /// Require the host kernel to be in at least this lockdown mode.
352    pub required_kernel_lockdown: Option<KernelLockdownMode>,
353
354    /// Verify context contents before executing the workload.
355    pub verify_context_integrity: bool,
356
357    /// Verify rootfs attestation manifest before mounting it.
358    pub verify_rootfs_attestation: bool,
359
360    /// Request kernel logging for denied seccomp decisions when supported.
361    pub seccomp_log_denied: bool,
362
363    /// Select the gVisor platform backend.
364    pub gvisor_platform: GVisorPlatform,
365
366    /// Path to a per-service seccomp profile (JSON, OCI subset format).
367    /// When set, this profile is used instead of the built-in allowlist.
368    pub seccomp_profile: Option<PathBuf>,
369
370    /// Expected SHA-256 hash of the seccomp profile file for integrity verification.
371    pub seccomp_profile_sha256: Option<String>,
372
373    /// Seccomp operating mode.
374    pub seccomp_mode: SeccompMode,
375
376    /// Path to write seccomp trace log (NDJSON) when seccomp_mode == Trace.
377    pub seccomp_trace_log: Option<PathBuf>,
378
379    /// Additional syscalls to allow beyond the built-in default allowlist.
380    /// Each entry is a syscall name (e.g. "io_uring_setup", "sysinfo").
381    /// These are merged into the built-in filter; they do NOT replace it.
382    pub seccomp_allow_syscalls: Vec<String>,
383
384    /// Path to capability policy file (TOML).
385    pub caps_policy: Option<PathBuf>,
386
387    /// Expected SHA-256 hash of the capability policy file.
388    pub caps_policy_sha256: Option<String>,
389
390    /// Path to Landlock policy file (TOML).
391    pub landlock_policy: Option<PathBuf>,
392
393    /// Expected SHA-256 hash of the Landlock policy file.
394    pub landlock_policy_sha256: Option<String>,
395
396    /// OCI lifecycle hooks to execute at various container lifecycle points.
397    pub hooks: Option<crate::security::OciHooks>,
398
399    /// Path to write the container PID (OCI --pid-file).
400    pub pid_file: Option<PathBuf>,
401
402    /// Path to AF_UNIX socket for console pseudo-terminal master (OCI --console-socket).
403    pub console_socket: Option<PathBuf>,
404
405    /// Override OCI bundle directory path (OCI --bundle).
406    pub bundle_dir: Option<PathBuf>,
407
408    /// Override root directory for state storage (--root).
409    /// When set, ContainerStateManager uses this instead of the default.
410    pub state_root: Option<PathBuf>,
411}
412
413/// Seccomp operating mode.
414#[derive(
415    Debug,
416    Clone,
417    Copy,
418    PartialEq,
419    Eq,
420    Default,
421    clap::ValueEnum,
422    serde::Serialize,
423    serde::Deserialize,
424)]
425pub enum SeccompMode {
426    /// Normal enforcement – deny unlisted syscalls.
427    #[default]
428    Enforce,
429    /// Trace mode – allow all syscalls but log them for profile generation.
430    /// Development only; rejected in production mode.
431    Trace,
432}
433
434impl ContainerConfig {
435    /// Create a new container config with a random ID.
436    ///
437    /// # Panics
438    /// Panics if secure random bytes cannot be read from `/dev/urandom`.
439    pub fn try_new(name: Option<String>, command: Vec<String>) -> crate::error::Result<Self> {
440        Self::try_new_with_id(None, name, command)
441    }
442
443    /// Create a new container config, optionally using a pre-generated ID.
444    ///
445    /// When `preset_id` is `Some`, it is used as the container ID instead of
446    /// generating a new one. This is used by `--detach` to ensure the outer
447    /// CLI process and the systemd-managed inner process share the same ID.
448    pub fn try_new_with_id(
449        preset_id: Option<String>,
450        name: Option<String>,
451        command: Vec<String>,
452    ) -> crate::error::Result<Self> {
453        let id = match preset_id {
454            Some(id) => {
455                // Validate preset ID: must be exactly 32 hex chars
456                if id.len() != 32 || !id.chars().all(|c| c.is_ascii_hexdigit()) {
457                    return Err(crate::error::NucleusError::ConfigError(format!(
458                        "Invalid preset container ID '{}': must be 32 hex characters",
459                        id
460                    )));
461                }
462                id
463            }
464            None => generate_container_id()?,
465        };
466        let name = name.unwrap_or_else(|| id.clone());
467        Ok(Self {
468            id,
469            name: name.clone(),
470            command,
471            context_dir: None,
472            limits: ResourceLimits::default(),
473            namespaces: NamespaceConfig::default(),
474            user_ns_config: None,
475            hostname: Some(name),
476            use_gvisor: true,
477            trust_level: TrustLevel::default(),
478            network: crate::network::NetworkMode::None,
479            context_mode: crate::filesystem::ContextMode::Copy,
480            allow_degraded_security: false,
481            allow_chroot_fallback: false,
482            allow_host_network: false,
483            proc_readonly: true,
484            service_mode: ServiceMode::default(),
485            rootfs_path: None,
486            egress_policy: None,
487            health_check: None,
488            readiness_probe: None,
489            secrets: Vec::new(),
490            volumes: Vec::new(),
491            environment: Vec::new(),
492            process_identity: ProcessIdentity::default(),
493            config_hash: None,
494            sd_notify: false,
495            required_kernel_lockdown: None,
496            verify_context_integrity: false,
497            verify_rootfs_attestation: false,
498            seccomp_log_denied: false,
499            gvisor_platform: GVisorPlatform::default(),
500            seccomp_profile: None,
501            seccomp_profile_sha256: None,
502            seccomp_mode: SeccompMode::default(),
503            seccomp_trace_log: None,
504            seccomp_allow_syscalls: Vec::new(),
505            caps_policy: None,
506            caps_policy_sha256: None,
507            landlock_policy: None,
508            landlock_policy_sha256: None,
509            hooks: None,
510            pid_file: None,
511            console_socket: None,
512            bundle_dir: None,
513            state_root: None,
514        })
515    }
516
517    /// Enable rootless mode with user namespace mapping
518    #[must_use]
519    pub fn with_rootless(mut self) -> Self {
520        self.namespaces.user = true;
521        self.user_ns_config = Some(UserNamespaceConfig::rootless());
522        self
523    }
524
525    /// Configure custom user namespace mapping
526    #[must_use]
527    pub fn with_user_namespace(mut self, config: UserNamespaceConfig) -> Self {
528        self.namespaces.user = true;
529        self.user_ns_config = Some(config);
530        self
531    }
532
533    #[must_use]
534    pub fn with_context(mut self, dir: PathBuf) -> Self {
535        self.context_dir = Some(dir);
536        self
537    }
538
539    #[must_use]
540    pub fn with_limits(mut self, limits: ResourceLimits) -> Self {
541        self.limits = limits;
542        self
543    }
544
545    #[must_use]
546    pub fn with_namespaces(mut self, namespaces: NamespaceConfig) -> Self {
547        self.namespaces = namespaces;
548        self
549    }
550
551    #[must_use]
552    pub fn with_hostname(mut self, hostname: Option<String>) -> Self {
553        self.hostname = hostname;
554        self
555    }
556
557    #[must_use]
558    pub fn with_gvisor(mut self, enabled: bool) -> Self {
559        self.use_gvisor = enabled;
560        self
561    }
562
563    #[must_use]
564    pub fn with_trust_level(mut self, level: TrustLevel) -> Self {
565        self.trust_level = level;
566        self
567    }
568
569    /// Enable OCI bundle runtime path (always OCI for gVisor).
570    #[must_use]
571    pub fn with_oci_bundle(mut self) -> Self {
572        self.use_gvisor = true;
573        self
574    }
575
576    #[must_use]
577    pub fn with_network(mut self, mode: crate::network::NetworkMode) -> Self {
578        self.network = mode;
579        self
580    }
581
582    #[must_use]
583    pub fn with_context_mode(mut self, mode: crate::filesystem::ContextMode) -> Self {
584        self.context_mode = mode;
585        self
586    }
587
588    #[must_use]
589    pub fn with_allow_degraded_security(mut self, allow: bool) -> Self {
590        self.allow_degraded_security = allow;
591        self
592    }
593
594    #[must_use]
595    pub fn with_allow_chroot_fallback(mut self, allow: bool) -> Self {
596        self.allow_chroot_fallback = allow;
597        self
598    }
599
600    #[must_use]
601    pub fn with_allow_host_network(mut self, allow: bool) -> Self {
602        self.allow_host_network = allow;
603        self
604    }
605
606    #[must_use]
607    pub fn with_proc_readonly(mut self, proc_readonly: bool) -> Self {
608        self.proc_readonly = proc_readonly;
609        self
610    }
611
612    #[must_use]
613    pub fn with_service_mode(mut self, mode: ServiceMode) -> Self {
614        self.service_mode = mode;
615        self
616    }
617
618    #[must_use]
619    pub fn with_rootfs_path(mut self, path: PathBuf) -> Self {
620        self.rootfs_path = Some(path);
621        self
622    }
623
624    #[must_use]
625    pub fn with_egress_policy(mut self, policy: EgressPolicy) -> Self {
626        self.egress_policy = Some(policy);
627        self
628    }
629
630    #[must_use]
631    pub fn with_health_check(mut self, hc: HealthCheck) -> Self {
632        self.health_check = Some(hc);
633        self
634    }
635
636    #[must_use]
637    pub fn with_readiness_probe(mut self, probe: ReadinessProbe) -> Self {
638        self.readiness_probe = Some(probe);
639        self
640    }
641
642    #[must_use]
643    pub fn with_secret(mut self, secret: SecretMount) -> Self {
644        self.secrets.push(secret);
645        self
646    }
647
648    #[must_use]
649    pub fn with_volume(mut self, volume: VolumeMount) -> Self {
650        self.volumes.push(volume);
651        self
652    }
653
654    #[must_use]
655    pub fn with_env(mut self, key: String, value: String) -> Self {
656        self.environment.push((key, value));
657        self
658    }
659
660    #[must_use]
661    pub fn with_process_identity(mut self, identity: ProcessIdentity) -> Self {
662        self.process_identity = identity;
663        self
664    }
665
666    #[must_use]
667    pub fn with_config_hash(mut self, hash: u64) -> Self {
668        self.config_hash = Some(hash);
669        self
670    }
671
672    #[must_use]
673    pub fn with_sd_notify(mut self, enabled: bool) -> Self {
674        self.sd_notify = enabled;
675        self
676    }
677
678    #[must_use]
679    pub fn with_required_kernel_lockdown(mut self, mode: KernelLockdownMode) -> Self {
680        self.required_kernel_lockdown = Some(mode);
681        self
682    }
683
684    #[must_use]
685    pub fn with_verify_context_integrity(mut self, enabled: bool) -> Self {
686        self.verify_context_integrity = enabled;
687        self
688    }
689
690    #[must_use]
691    pub fn with_verify_rootfs_attestation(mut self, enabled: bool) -> Self {
692        self.verify_rootfs_attestation = enabled;
693        self
694    }
695
696    #[must_use]
697    pub fn with_seccomp_log_denied(mut self, enabled: bool) -> Self {
698        self.seccomp_log_denied = enabled;
699        self
700    }
701
702    #[must_use]
703    pub fn with_gvisor_platform(mut self, platform: GVisorPlatform) -> Self {
704        self.gvisor_platform = platform;
705        self
706    }
707
708    #[must_use]
709    pub fn with_seccomp_profile(mut self, path: PathBuf) -> Self {
710        self.seccomp_profile = Some(path);
711        self
712    }
713
714    #[must_use]
715    pub fn with_seccomp_profile_sha256(mut self, hash: String) -> Self {
716        self.seccomp_profile_sha256 = Some(hash);
717        self
718    }
719
720    #[must_use]
721    pub fn with_seccomp_mode(mut self, mode: SeccompMode) -> Self {
722        self.seccomp_mode = mode;
723        self
724    }
725
726    #[must_use]
727    pub fn with_seccomp_trace_log(mut self, path: PathBuf) -> Self {
728        self.seccomp_trace_log = Some(path);
729        self
730    }
731
732    #[must_use]
733    pub fn with_seccomp_allow_syscalls(mut self, syscalls: Vec<String>) -> Self {
734        self.seccomp_allow_syscalls = syscalls;
735        self
736    }
737
738    #[must_use]
739    pub fn with_caps_policy(mut self, path: PathBuf) -> Self {
740        self.caps_policy = Some(path);
741        self
742    }
743
744    #[must_use]
745    pub fn with_caps_policy_sha256(mut self, hash: String) -> Self {
746        self.caps_policy_sha256 = Some(hash);
747        self
748    }
749
750    #[must_use]
751    pub fn with_landlock_policy(mut self, path: PathBuf) -> Self {
752        self.landlock_policy = Some(path);
753        self
754    }
755
756    #[must_use]
757    pub fn with_landlock_policy_sha256(mut self, hash: String) -> Self {
758        self.landlock_policy_sha256 = Some(hash);
759        self
760    }
761
762    #[must_use]
763    pub fn with_pid_file(mut self, path: PathBuf) -> Self {
764        self.pid_file = Some(path);
765        self
766    }
767
768    #[must_use]
769    pub fn with_console_socket(mut self, path: PathBuf) -> Self {
770        self.console_socket = Some(path);
771        self
772    }
773
774    #[must_use]
775    pub fn with_bundle_dir(mut self, path: PathBuf) -> Self {
776        self.bundle_dir = Some(path);
777        self
778    }
779
780    pub fn with_state_root(mut self, root: PathBuf) -> Self {
781        self.state_root = Some(root);
782        self
783    }
784
785    /// Validate that production mode invariants are satisfied.
786    /// Called before container startup when service_mode == Production.
787    pub fn validate_production_mode(&self) -> crate::error::Result<()> {
788        if self.service_mode != ServiceMode::Production {
789            return Ok(());
790        }
791
792        if self.allow_degraded_security {
793            return Err(crate::error::NucleusError::ConfigError(
794                "Production mode forbids --allow-degraded-security".to_string(),
795            ));
796        }
797
798        if self.allow_chroot_fallback {
799            return Err(crate::error::NucleusError::ConfigError(
800                "Production mode forbids --allow-chroot-fallback".to_string(),
801            ));
802        }
803
804        if self.allow_host_network {
805            return Err(crate::error::NucleusError::ConfigError(
806                "Production mode forbids --allow-host-network".to_string(),
807            ));
808        }
809
810        if matches!(self.network, crate::network::NetworkMode::Host) {
811            return Err(crate::error::NucleusError::ConfigError(
812                "Production mode forbids host network mode".to_string(),
813            ));
814        }
815
816        // Production mode requires explicit rootfs (no host bind mount fallback)
817        let Some(rootfs_path) = self.rootfs_path.as_ref() else {
818            return Err(crate::error::NucleusError::ConfigError(
819                "Production mode requires explicit --rootfs path (no host bind mounts)".to_string(),
820            ));
821        };
822
823        if self.seccomp_mode == SeccompMode::Trace {
824            return Err(crate::error::NucleusError::ConfigError(
825                "Production mode forbids --seccomp-mode trace".to_string(),
826            ));
827        }
828
829        if !self.seccomp_allow_syscalls.is_empty() {
830            let allow_network = !matches!(self.network, crate::network::NetworkMode::None);
831            crate::security::SeccompManager::validate_extra_syscalls_for_production(
832                allow_network,
833                &self.seccomp_allow_syscalls,
834            )?;
835        }
836
837        // L6: Policy files must have SHA-256 verification in production
838        if self.caps_policy.is_some() && self.caps_policy_sha256.is_none() {
839            return Err(crate::error::NucleusError::ConfigError(
840                "Production mode requires --caps-policy-sha256 when using --caps-policy"
841                    .to_string(),
842            ));
843        }
844        if self.landlock_policy.is_some() && self.landlock_policy_sha256.is_none() {
845            return Err(crate::error::NucleusError::ConfigError(
846                "Production mode requires --landlock-policy-sha256 when using --landlock-policy"
847                    .to_string(),
848            ));
849        }
850        if self.seccomp_profile.is_some() && self.seccomp_profile_sha256.is_none() {
851            return Err(crate::error::NucleusError::ConfigError(
852                "Production mode requires --seccomp-profile-sha256 when using --seccomp-profile"
853                    .to_string(),
854            ));
855        }
856
857        // Production mode requires explicit resource limits
858        if self.limits.memory_bytes.is_none() {
859            return Err(crate::error::NucleusError::ConfigError(
860                "Production mode requires explicit --memory limit".to_string(),
861            ));
862        }
863
864        if self.limits.cpu_quota_us.is_none() {
865            return Err(crate::error::NucleusError::ConfigError(
866                "Production mode requires explicit --cpus limit".to_string(),
867            ));
868        }
869
870        if !self.verify_rootfs_attestation {
871            return Err(crate::error::NucleusError::ConfigError(
872                "Production mode requires --verify-rootfs-attestation".to_string(),
873            ));
874        }
875
876        validate_production_rootfs_path(rootfs_path)?;
877
878        Ok(())
879    }
880
881    /// Validate runtime-specific feature support.
882    pub fn validate_runtime_support(&self) -> crate::error::Result<()> {
883        self.limits.validate_runtime_sanity()?;
884
885        if let Some(user_ns_config) = &self.user_ns_config {
886            if !self.process_identity.additional_gids.is_empty() {
887                return Err(crate::error::NucleusError::ConfigError(
888                    "Supplementary groups are currently unsupported with user namespaces"
889                        .to_string(),
890                ));
891            }
892
893            let uid_mapped = user_ns_config.uid_mappings.iter().any(|mapping| {
894                self.process_identity.uid >= mapping.container_id
895                    && self.process_identity.uid
896                        < mapping.container_id.saturating_add(mapping.count)
897            });
898            if !uid_mapped {
899                return Err(crate::error::NucleusError::ConfigError(format!(
900                    "Process uid {} is not mapped in the configured user namespace",
901                    self.process_identity.uid
902                )));
903            }
904
905            let gid_mapped = user_ns_config.gid_mappings.iter().any(|mapping| {
906                self.process_identity.gid >= mapping.container_id
907                    && self.process_identity.gid
908                        < mapping.container_id.saturating_add(mapping.count)
909            });
910            if !gid_mapped {
911                return Err(crate::error::NucleusError::ConfigError(format!(
912                    "Process gid {} is not mapped in the configured user namespace",
913                    self.process_identity.gid
914                )));
915            }
916        }
917
918        if self.seccomp_mode == SeccompMode::Trace && self.seccomp_trace_log.is_none() {
919            return Err(crate::error::NucleusError::ConfigError(
920                "Seccomp trace mode requires --seccomp-log / seccomp_trace_log".to_string(),
921            ));
922        }
923
924        for secret in &self.secrets {
925            normalize_container_destination(&secret.dest)?;
926        }
927
928        for volume in &self.volumes {
929            normalize_volume_destination(&volume.dest)?;
930            match &volume.source {
931                VolumeSource::Bind { source } => {
932                    if !source.is_absolute() {
933                        return Err(crate::error::NucleusError::ConfigError(format!(
934                            "Volume source must be absolute: {:?}",
935                            source
936                        )));
937                    }
938                    if !source.exists() {
939                        return Err(crate::error::NucleusError::ConfigError(format!(
940                            "Volume source does not exist: {:?}",
941                            source
942                        )));
943                    }
944                    crate::filesystem::validate_bind_mount_source(source)?;
945                }
946                VolumeSource::Tmpfs { .. } => {}
947            }
948        }
949
950        if !self.use_gvisor {
951            return Ok(());
952        }
953
954        if self.seccomp_mode == SeccompMode::Trace {
955            return Err(crate::error::NucleusError::ConfigError(
956                "gVisor runtime does not support --seccomp-mode trace; use --runtime native"
957                    .to_string(),
958            ));
959        }
960
961        if self.seccomp_log_denied {
962            return Err(crate::error::NucleusError::ConfigError(
963                "gVisor runtime does not support seccomp deny logging; use --runtime native"
964                    .to_string(),
965            ));
966        }
967
968        if !self.seccomp_allow_syscalls.is_empty() {
969            return Err(crate::error::NucleusError::ConfigError(
970                "gVisor runtime does not support --seccomp-allow; use a custom --seccomp-profile or --runtime native"
971                    .to_string(),
972            ));
973        }
974
975        if self.caps_policy.is_some() {
976            return Err(crate::error::NucleusError::ConfigError(
977                "gVisor runtime does not support capability policy files; use --runtime native"
978                    .to_string(),
979            ));
980        }
981
982        if self.landlock_policy.is_some() {
983            return Err(crate::error::NucleusError::ConfigError(
984                "gVisor runtime does not support Landlock policy files; use --runtime native"
985                    .to_string(),
986            ));
987        }
988
989        if self.health_check.is_some() {
990            return Err(crate::error::NucleusError::ConfigError(
991                "gVisor runtime does not support exec health checks; use --runtime native or remove --health-cmd"
992                    .to_string(),
993            ));
994        }
995
996        if matches!(
997            self.readiness_probe.as_ref(),
998            Some(ReadinessProbe::Exec { .. }) | Some(ReadinessProbe::TcpPort(_))
999        ) {
1000            return Err(crate::error::NucleusError::ConfigError(
1001                "gVisor runtime does not support exec/TCP readiness probes; use --runtime native or --readiness-sd-notify"
1002                    .to_string(),
1003            ));
1004        }
1005
1006        if self.verify_context_integrity
1007            && self.context_dir.is_some()
1008            && matches!(self.context_mode, crate::filesystem::ContextMode::BindMount)
1009        {
1010            return Err(crate::error::NucleusError::ConfigError(
1011                "gVisor runtime cannot verify bind-mounted context integrity; use --context-mode copy or disable --verify-context-integrity"
1012                    .to_string(),
1013            ));
1014        }
1015
1016        Ok(())
1017    }
1018
1019    /// Apply runtime selection (native vs gVisor) and OCI bundle mode.
1020    pub fn apply_runtime_selection(
1021        mut self,
1022        runtime: RuntimeSelection,
1023        oci: bool,
1024    ) -> crate::error::Result<Self> {
1025        match runtime {
1026            RuntimeSelection::Native => {
1027                if oci {
1028                    return Err(crate::error::NucleusError::ConfigError(
1029                        "--bundle requires gVisor runtime; use --runtime gvisor".to_string(),
1030                    ));
1031                }
1032                self = self.with_gvisor(false);
1033            }
1034            RuntimeSelection::GVisor => {
1035                self = self.with_gvisor(true);
1036                if !oci {
1037                    tracing::info!(
1038                        "Security hardening: enabling OCI bundle mode for gVisor runtime"
1039                    );
1040                }
1041                self = self.with_oci_bundle();
1042            }
1043        }
1044        Ok(self)
1045    }
1046}
1047
1048/// Validate a container name for safe use.
1049pub fn validate_container_name(name: &str) -> crate::error::Result<()> {
1050    if name.is_empty() || name.len() > 128 {
1051        return Err(crate::error::NucleusError::ConfigError(
1052            "Invalid container name: must be 1-128 characters".to_string(),
1053        ));
1054    }
1055    if !name
1056        .chars()
1057        .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.')
1058    {
1059        return Err(crate::error::NucleusError::ConfigError(
1060            "Invalid container name: allowed characters are a-zA-Z0-9, '-', '_', '.'".to_string(),
1061        ));
1062    }
1063    Ok(())
1064}
1065
1066/// Validate a hostname according to RFC 1123.
1067pub fn validate_hostname(hostname: &str) -> crate::error::Result<()> {
1068    if hostname.is_empty() || hostname.len() > 253 {
1069        return Err(crate::error::NucleusError::ConfigError(
1070            "Invalid hostname: must be 1-253 characters".to_string(),
1071        ));
1072    }
1073
1074    for label in hostname.split('.') {
1075        if label.is_empty() || label.len() > 63 {
1076            return Err(crate::error::NucleusError::ConfigError(format!(
1077                "Invalid hostname label: '{}'",
1078                label
1079            )));
1080        }
1081        if label.starts_with('-') || label.ends_with('-') {
1082            return Err(crate::error::NucleusError::ConfigError(format!(
1083                "Invalid hostname label '{}': cannot start or end with '-'",
1084                label
1085            )));
1086        }
1087        if !label.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
1088            return Err(crate::error::NucleusError::ConfigError(format!(
1089                "Invalid hostname label '{}': allowed characters are a-zA-Z0-9 and '-'",
1090                label
1091            )));
1092        }
1093    }
1094
1095    Ok(())
1096}
1097
1098#[cfg(test)]
1099#[allow(deprecated)]
1100mod tests {
1101    use super::*;
1102    use crate::network::NetworkMode;
1103
1104    #[test]
1105    fn test_generate_container_id_is_32_hex_chars() {
1106        let id = generate_container_id().unwrap();
1107        assert_eq!(
1108            id.len(),
1109            32,
1110            "Container ID must be full 128-bit (32 hex chars), got {}",
1111            id.len()
1112        );
1113        assert!(
1114            id.chars().all(|c| c.is_ascii_hexdigit()),
1115            "Container ID must be hex: {}",
1116            id
1117        );
1118    }
1119
1120    #[test]
1121    fn test_generate_container_id_is_unique() {
1122        let id1 = generate_container_id().unwrap();
1123        let id2 = generate_container_id().unwrap();
1124        assert_ne!(id1, id2, "Two consecutive IDs must differ");
1125    }
1126
1127    #[test]
1128    fn test_config_security_defaults_are_hardened() {
1129        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()]).unwrap();
1130        assert!(!cfg.allow_degraded_security);
1131        assert!(!cfg.allow_chroot_fallback);
1132        assert!(!cfg.allow_host_network);
1133        assert!(cfg.proc_readonly);
1134        assert_eq!(cfg.service_mode, ServiceMode::Agent);
1135        assert!(cfg.rootfs_path.is_none());
1136        assert!(cfg.egress_policy.is_none());
1137        assert!(cfg.secrets.is_empty());
1138        assert!(cfg.volumes.is_empty());
1139        assert!(!cfg.sd_notify);
1140        assert!(cfg.required_kernel_lockdown.is_none());
1141        assert!(!cfg.verify_context_integrity);
1142        assert!(!cfg.verify_rootfs_attestation);
1143        assert!(!cfg.seccomp_log_denied);
1144        assert_eq!(cfg.gvisor_platform, GVisorPlatform::Systrap);
1145    }
1146
1147    #[test]
1148    fn test_production_mode_rejects_degraded_flags() {
1149        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1150            .unwrap()
1151            .with_service_mode(ServiceMode::Production)
1152            .with_allow_degraded_security(true)
1153            .with_rootfs_path(std::path::PathBuf::from("/nix/store/fake-rootfs"))
1154            .with_limits(
1155                crate::resources::ResourceLimits::default()
1156                    .with_memory("512M")
1157                    .unwrap()
1158                    .with_cpu_cores(2.0)
1159                    .unwrap(),
1160            );
1161        assert!(cfg.validate_production_mode().is_err());
1162    }
1163
1164    #[test]
1165    fn test_production_mode_rejects_chroot_fallback() {
1166        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1167            .unwrap()
1168            .with_service_mode(ServiceMode::Production)
1169            .with_allow_chroot_fallback(true)
1170            .with_rootfs_path(std::path::PathBuf::from("/nix/store/fake-rootfs"))
1171            .with_limits(
1172                crate::resources::ResourceLimits::default()
1173                    .with_memory("512M")
1174                    .unwrap()
1175                    .with_cpu_cores(2.0)
1176                    .unwrap(),
1177            );
1178        let err = cfg.validate_production_mode().unwrap_err();
1179        assert!(
1180            err.to_string().contains("chroot"),
1181            "Production mode must reject chroot fallback"
1182        );
1183    }
1184
1185    #[test]
1186    fn test_production_mode_requires_rootfs() {
1187        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1188            .unwrap()
1189            .with_service_mode(ServiceMode::Production)
1190            .with_limits(
1191                crate::resources::ResourceLimits::default()
1192                    .with_memory("512M")
1193                    .unwrap(),
1194            );
1195        let err = cfg.validate_production_mode().unwrap_err();
1196        assert!(err.to_string().contains("--rootfs"));
1197    }
1198
1199    fn test_rootfs_path() -> std::path::PathBuf {
1200        std::path::PathBuf::from("/nix/store")
1201    }
1202
1203    #[test]
1204    fn test_production_mode_requires_memory_limit() {
1205        let rootfs = test_rootfs_path();
1206        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1207            .unwrap()
1208            .with_service_mode(ServiceMode::Production)
1209            .with_rootfs_path(rootfs);
1210        let err = cfg.validate_production_mode().unwrap_err();
1211        assert!(err.to_string().contains("--memory"));
1212    }
1213
1214    #[test]
1215    fn test_production_mode_valid_config() {
1216        let rootfs = test_rootfs_path();
1217        if !rootfs.is_dir() {
1218            return;
1219        }
1220        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1221            .unwrap()
1222            .with_service_mode(ServiceMode::Production)
1223            .with_rootfs_path(rootfs.clone())
1224            .with_verify_rootfs_attestation(true)
1225            .with_limits(
1226                crate::resources::ResourceLimits::default()
1227                    .with_memory("512M")
1228                    .unwrap()
1229                    .with_cpu_cores(2.0)
1230                    .unwrap(),
1231            );
1232        let result = cfg.validate_production_mode();
1233        assert!(result.is_ok());
1234    }
1235
1236    #[test]
1237    fn test_production_mode_rejects_rootfs_parent_traversal() {
1238        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1239            .unwrap()
1240            .with_service_mode(ServiceMode::Production)
1241            .with_rootfs_path(std::path::PathBuf::from("/nix/store/../../tmp/evil-rootfs"))
1242            .with_verify_rootfs_attestation(true)
1243            .with_limits(
1244                crate::resources::ResourceLimits::default()
1245                    .with_memory("512M")
1246                    .unwrap()
1247                    .with_cpu_cores(2.0)
1248                    .unwrap(),
1249            );
1250
1251        let err = cfg.validate_production_mode().unwrap_err();
1252
1253        assert!(
1254            err.to_string().contains("parent traversal"),
1255            "Production mode must reject raw rootfs traversal before canonicalization"
1256        );
1257    }
1258
1259    #[test]
1260    fn test_production_mode_rejects_out_of_store_rootfs() {
1261        let temp = tempfile::TempDir::new().unwrap();
1262        let rootfs = temp.path().join("rootfs");
1263        std::fs::create_dir(&rootfs).unwrap();
1264        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1265            .unwrap()
1266            .with_service_mode(ServiceMode::Production)
1267            .with_rootfs_path(rootfs)
1268            .with_verify_rootfs_attestation(true)
1269            .with_limits(
1270                crate::resources::ResourceLimits::default()
1271                    .with_memory("512M")
1272                    .unwrap()
1273                    .with_cpu_cores(2.0)
1274                    .unwrap(),
1275            );
1276
1277        let err = cfg.validate_production_mode().unwrap_err();
1278
1279        assert!(
1280            err.to_string().contains("/nix/store"),
1281            "Production mode must reject rootfs paths that resolve outside /nix/store"
1282        );
1283    }
1284
1285    #[test]
1286    fn test_production_mode_requires_rootfs_attestation() {
1287        let rootfs = test_rootfs_path();
1288        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1289            .unwrap()
1290            .with_service_mode(ServiceMode::Production)
1291            .with_rootfs_path(rootfs.clone())
1292            .with_limits(
1293                crate::resources::ResourceLimits::default()
1294                    .with_memory("512M")
1295                    .unwrap()
1296                    .with_cpu_cores(2.0)
1297                    .unwrap(),
1298            );
1299        let err = cfg.validate_production_mode().unwrap_err();
1300        assert!(err.to_string().contains("attestation"));
1301    }
1302
1303    #[test]
1304    fn test_production_mode_rejects_seccomp_trace() {
1305        let rootfs = test_rootfs_path();
1306        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1307            .unwrap()
1308            .with_service_mode(ServiceMode::Production)
1309            .with_rootfs_path(rootfs.clone())
1310            .with_seccomp_mode(SeccompMode::Trace)
1311            .with_limits(
1312                crate::resources::ResourceLimits::default()
1313                    .with_memory("512M")
1314                    .unwrap()
1315                    .with_cpu_cores(2.0)
1316                    .unwrap(),
1317            );
1318        let err = cfg.validate_production_mode().unwrap_err();
1319        assert!(
1320            err.to_string().contains("trace"),
1321            "Production mode must reject seccomp trace mode"
1322        );
1323    }
1324
1325    #[test]
1326    fn test_production_mode_rejects_security_critical_seccomp_allow() {
1327        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1328            .unwrap()
1329            .with_service_mode(ServiceMode::Production)
1330            .with_rootfs_path(test_rootfs_path())
1331            .with_verify_rootfs_attestation(true)
1332            .with_seccomp_allow_syscalls(vec!["keyctl".to_string()])
1333            .with_limits(
1334                crate::resources::ResourceLimits::default()
1335                    .with_memory("512M")
1336                    .unwrap()
1337                    .with_cpu_cores(2.0)
1338                    .unwrap(),
1339            );
1340
1341        let err = cfg.validate_production_mode().unwrap_err();
1342        assert!(err.to_string().contains("seccomp-allow"));
1343        assert!(err.to_string().contains("keyctl"));
1344    }
1345
1346    #[test]
1347    fn test_production_mode_requires_cpu_limit() {
1348        let rootfs = test_rootfs_path();
1349        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1350            .unwrap()
1351            .with_service_mode(ServiceMode::Production)
1352            .with_rootfs_path(rootfs.clone())
1353            .with_limits(
1354                crate::resources::ResourceLimits::default()
1355                    .with_memory("512M")
1356                    .unwrap(),
1357            );
1358        let err = cfg.validate_production_mode().unwrap_err();
1359        assert!(err.to_string().contains("--cpus"));
1360    }
1361
1362    #[test]
1363    fn test_config_security_builders_override_defaults() {
1364        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1365            .unwrap()
1366            .with_allow_degraded_security(true)
1367            .with_allow_chroot_fallback(true)
1368            .with_allow_host_network(true)
1369            .with_proc_readonly(false)
1370            .with_network(NetworkMode::Host);
1371
1372        assert!(cfg.allow_degraded_security);
1373        assert!(cfg.allow_chroot_fallback);
1374        assert!(cfg.allow_host_network);
1375        assert!(!cfg.proc_readonly);
1376        assert!(matches!(cfg.network, NetworkMode::Host));
1377    }
1378
1379    #[test]
1380    fn test_hardening_builders_override_defaults() {
1381        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1382            .unwrap()
1383            .with_required_kernel_lockdown(KernelLockdownMode::Confidentiality)
1384            .with_verify_context_integrity(true)
1385            .with_verify_rootfs_attestation(true)
1386            .with_seccomp_log_denied(true)
1387            .with_gvisor_platform(GVisorPlatform::Kvm);
1388
1389        assert_eq!(
1390            cfg.required_kernel_lockdown,
1391            Some(KernelLockdownMode::Confidentiality)
1392        );
1393        assert!(cfg.verify_context_integrity);
1394        assert!(cfg.verify_rootfs_attestation);
1395        assert!(cfg.seccomp_log_denied);
1396        assert_eq!(cfg.gvisor_platform, GVisorPlatform::Kvm);
1397    }
1398
1399    #[test]
1400    fn test_seccomp_trace_requires_log_path() {
1401        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1402            .unwrap()
1403            .with_gvisor(false)
1404            .with_seccomp_mode(SeccompMode::Trace);
1405
1406        let err = cfg.validate_runtime_support().unwrap_err();
1407        assert!(err.to_string().contains("seccomp-log"));
1408    }
1409
1410    #[test]
1411    fn test_gvisor_allows_custom_seccomp_profile_but_rejects_native_policy_files() {
1412        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1413            .unwrap()
1414            .with_seccomp_profile(PathBuf::from("/tmp/seccomp.json"))
1415            .with_caps_policy(PathBuf::from("/tmp/caps.toml"));
1416
1417        let err = cfg.validate_runtime_support().unwrap_err();
1418        assert!(err.to_string().contains("capability policy"));
1419    }
1420
1421    #[test]
1422    fn test_gvisor_accepts_custom_seccomp_profile() {
1423        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1424            .unwrap()
1425            .with_seccomp_profile(PathBuf::from("/tmp/seccomp.json"));
1426
1427        cfg.validate_runtime_support().unwrap();
1428    }
1429
1430    #[test]
1431    fn test_gvisor_rejects_landlock_policy_file() {
1432        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1433            .unwrap()
1434            .with_landlock_policy(PathBuf::from("/tmp/landlock.toml"));
1435
1436        let err = cfg.validate_runtime_support().unwrap_err();
1437        assert!(err.to_string().contains("Landlock"));
1438    }
1439
1440    #[test]
1441    fn test_gvisor_rejects_trace_mode_even_with_log_path() {
1442        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1443            .unwrap()
1444            .with_seccomp_mode(SeccompMode::Trace)
1445            .with_seccomp_trace_log(PathBuf::from("/tmp/trace.ndjson"));
1446
1447        let err = cfg.validate_runtime_support().unwrap_err();
1448        assert!(err.to_string().contains("gVisor runtime"));
1449    }
1450
1451    #[test]
1452    fn test_gvisor_rejects_seccomp_allow_without_custom_profile_projection() {
1453        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1454            .unwrap()
1455            .with_seccomp_allow_syscalls(vec!["io_uring_setup".to_string()]);
1456
1457        let err = cfg.validate_runtime_support().unwrap_err();
1458        assert!(err.to_string().contains("seccomp-allow"));
1459    }
1460
1461    #[test]
1462    fn test_secret_dest_must_be_absolute() {
1463        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1464            .unwrap()
1465            .with_secret(crate::container::SecretMount {
1466                source: PathBuf::from("/run/secrets/api-key"),
1467                dest: PathBuf::from("secrets/api-key"),
1468                mode: 0o400,
1469            });
1470
1471        let err = cfg.validate_runtime_support().unwrap_err();
1472        assert!(err.to_string().contains("absolute"));
1473    }
1474
1475    #[test]
1476    fn test_secret_dest_rejects_parent_traversal() {
1477        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1478            .unwrap()
1479            .with_secret(crate::container::SecretMount {
1480                source: PathBuf::from("/run/secrets/api-key"),
1481                dest: PathBuf::from("/../../etc/passwd"),
1482                mode: 0o400,
1483            });
1484
1485        let err = cfg.validate_runtime_support().unwrap_err();
1486        assert!(err.to_string().contains("parent traversal"));
1487    }
1488
1489    #[test]
1490    fn test_bind_volume_source_must_exist() {
1491        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1492            .unwrap()
1493            .with_volume(VolumeMount {
1494                source: VolumeSource::Bind {
1495                    source: PathBuf::from("/tmp/definitely-missing-nucleus-volume"),
1496                },
1497                dest: PathBuf::from("/var/lib/app"),
1498                read_only: false,
1499            });
1500
1501        let err = cfg.validate_runtime_support().unwrap_err();
1502        assert!(err.to_string().contains("Volume source does not exist"));
1503    }
1504
1505    #[test]
1506    fn test_bind_volume_source_rejects_sensitive_host_subtrees() {
1507        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1508            .unwrap()
1509            .with_volume(VolumeMount {
1510                source: VolumeSource::Bind {
1511                    source: PathBuf::from("/proc/sys"),
1512                },
1513                dest: PathBuf::from("/host-proc"),
1514                read_only: true,
1515            });
1516
1517        let err = cfg.validate_runtime_support().unwrap_err();
1518        assert!(err.to_string().contains("sensitive host path"));
1519    }
1520
1521    #[test]
1522    fn test_bind_volume_dest_must_be_absolute() {
1523        let dir = tempfile::TempDir::new().unwrap();
1524        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1525            .unwrap()
1526            .with_volume(VolumeMount {
1527                source: VolumeSource::Bind {
1528                    source: dir.path().to_path_buf(),
1529                },
1530                dest: PathBuf::from("var/lib/app"),
1531                read_only: false,
1532            });
1533
1534        let err = cfg.validate_runtime_support().unwrap_err();
1535        assert!(err.to_string().contains("absolute"));
1536    }
1537
1538    #[test]
1539    fn test_bind_volume_dest_rejects_reserved_container_paths() {
1540        let dir = tempfile::TempDir::new().unwrap();
1541        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1542            .unwrap()
1543            .with_volume(VolumeMount {
1544                source: VolumeSource::Bind {
1545                    source: dir.path().to_path_buf(),
1546                },
1547                dest: PathBuf::from("/etc"),
1548                read_only: false,
1549            });
1550
1551        let err = cfg.validate_runtime_support().unwrap_err();
1552        assert!(err.to_string().contains("reserved"));
1553    }
1554
1555    #[test]
1556    fn test_tmpfs_volume_rejects_parent_traversal() {
1557        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1558            .unwrap()
1559            .with_volume(VolumeMount {
1560                source: VolumeSource::Tmpfs {
1561                    size: Some("64M".to_string()),
1562                },
1563                dest: PathBuf::from("/../../var/lib/app"),
1564                read_only: false,
1565            });
1566
1567        let err = cfg.validate_runtime_support().unwrap_err();
1568        assert!(err.to_string().contains("parent traversal"));
1569    }
1570
1571    #[test]
1572    fn test_gvisor_rejects_bind_mount_context_integrity_verification() {
1573        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1574            .unwrap()
1575            .with_context(PathBuf::from("/tmp/context"))
1576            .with_context_mode(crate::filesystem::ContextMode::BindMount)
1577            .with_verify_context_integrity(true);
1578
1579        let err = cfg.validate_runtime_support().unwrap_err();
1580        assert!(err.to_string().contains("context integrity"));
1581    }
1582
1583    #[test]
1584    fn test_gvisor_rejects_exec_health_checks() {
1585        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1586            .unwrap()
1587            .with_health_check(HealthCheck {
1588                command: vec!["/bin/sh".to_string(), "-c".to_string(), "true".to_string()],
1589                interval: Duration::from_secs(30),
1590                retries: 3,
1591                start_period: Duration::from_secs(1),
1592                timeout: Duration::from_secs(5),
1593            });
1594
1595        let err = cfg.validate_runtime_support().unwrap_err();
1596        assert!(err.to_string().contains("health checks"));
1597    }
1598
1599    #[test]
1600    fn test_gvisor_rejects_exec_readiness_probes() {
1601        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1602            .unwrap()
1603            .with_readiness_probe(ReadinessProbe::Exec {
1604                command: vec!["/bin/sh".to_string(), "-c".to_string(), "true".to_string()],
1605            });
1606
1607        let err = cfg.validate_runtime_support().unwrap_err();
1608        assert!(err.to_string().contains("readiness"));
1609    }
1610
1611    #[test]
1612    fn test_gvisor_allows_copy_mode_context_integrity_verification() {
1613        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1614            .unwrap()
1615            .with_context(PathBuf::from("/tmp/context"))
1616            .with_context_mode(crate::filesystem::ContextMode::Copy)
1617            .with_verify_context_integrity(true);
1618
1619        assert!(cfg.validate_runtime_support().is_ok());
1620    }
1621
1622    #[test]
1623    fn test_user_namespace_rejects_unmapped_process_identity() {
1624        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1625            .unwrap()
1626            .with_rootless()
1627            .with_process_identity(ProcessIdentity {
1628                uid: 1000,
1629                gid: 1000,
1630                additional_gids: Vec::new(),
1631            });
1632
1633        let err = cfg.validate_runtime_support().unwrap_err();
1634        assert!(err.to_string().contains("not mapped"));
1635    }
1636
1637    #[test]
1638    fn test_user_namespace_rejects_supplementary_groups() {
1639        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1640            .unwrap()
1641            .with_rootless()
1642            .with_process_identity(ProcessIdentity {
1643                uid: 0,
1644                gid: 0,
1645                additional_gids: vec![1],
1646            });
1647
1648        let err = cfg.validate_runtime_support().unwrap_err();
1649        assert!(err.to_string().contains("Supplementary groups"));
1650    }
1651
1652    #[test]
1653    fn test_native_runtime_disables_gvisor() {
1654        // --runtime native selects the native runtime without changing trust policy.
1655        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1656            .unwrap()
1657            .apply_runtime_selection(RuntimeSelection::Native, false)
1658            .unwrap();
1659        assert!(!cfg.use_gvisor, "native runtime must disable gVisor");
1660        assert_eq!(
1661            cfg.trust_level,
1662            TrustLevel::Untrusted,
1663            "native runtime must preserve the default Untrusted trust level"
1664        );
1665    }
1666
1667    #[test]
1668    fn test_native_runtime_preserves_explicit_trusted_policy() {
1669        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()])
1670            .unwrap()
1671            .with_trust_level(TrustLevel::Trusted)
1672            .apply_runtime_selection(RuntimeSelection::Native, false)
1673            .unwrap();
1674
1675        assert!(!cfg.use_gvisor, "native runtime must disable gVisor");
1676        assert_eq!(
1677            cfg.trust_level,
1678            TrustLevel::Trusted,
1679            "native runtime must preserve explicit Trusted trust level"
1680        );
1681    }
1682
1683    #[test]
1684    fn test_default_config_has_gvisor_enabled() {
1685        let cfg = ContainerConfig::try_new(None, vec!["/bin/sh".to_string()]).unwrap();
1686        assert!(cfg.use_gvisor, "default must have gVisor enabled");
1687        assert_eq!(
1688            cfg.trust_level,
1689            TrustLevel::Untrusted,
1690            "default must be Untrusted"
1691        );
1692    }
1693
1694    #[test]
1695    fn test_generate_container_id_returns_result() {
1696        // BUG-07: generate_container_id must return Result, not panic.
1697        // Verify by calling it and checking the Ok value is valid hex.
1698        let id: crate::error::Result<String> = generate_container_id();
1699        let id = id.expect("generate_container_id must return Ok, not panic");
1700        assert_eq!(id.len(), 32, "container ID must be 32 hex chars");
1701        assert!(
1702            id.chars().all(|c| c.is_ascii_hexdigit()),
1703            "container ID must be valid hex: {}",
1704            id
1705        );
1706    }
1707}