1use crate::error::{NucleusError, Result};
2use nix::mount::{mount, MsFlags};
3use nix::sys::stat::{makedev, mknod, Mode, SFlag};
4use nix::unistd::chroot;
5use std::path::{Component, Path, PathBuf};
6use tracing::{debug, info, warn};
7
8struct ExpectedMount {
10 path: &'static str,
11 required_flags: &'static [&'static str],
12 critical: bool,
15}
16
17const PRODUCTION_MOUNT_EXPECTATIONS: &[ExpectedMount] = &[
19 ExpectedMount {
20 path: "/bin",
21 required_flags: &["ro", "nosuid", "nodev"],
22 critical: true,
23 },
24 ExpectedMount {
25 path: "/usr",
26 required_flags: &["ro", "nosuid", "nodev"],
27 critical: true,
28 },
29 ExpectedMount {
30 path: "/lib",
31 required_flags: &["ro", "nosuid", "nodev"],
32 critical: false, },
34 ExpectedMount {
35 path: "/lib64",
36 required_flags: &["ro", "nosuid", "nodev"],
37 critical: false, },
39 ExpectedMount {
40 path: "/etc",
41 required_flags: &["ro", "nosuid", "nodev"],
42 critical: true,
43 },
44 ExpectedMount {
45 path: "/nix",
46 required_flags: &["ro", "nosuid", "nodev"],
47 critical: false, },
49 ExpectedMount {
50 path: "/sbin",
51 required_flags: &["ro", "nosuid", "nodev"],
52 critical: false, },
54 ExpectedMount {
55 path: "/proc",
56 required_flags: &["nosuid", "nodev", "noexec"],
57 critical: true,
58 },
59 ExpectedMount {
60 path: "/run/secrets",
61 required_flags: &["nosuid", "nodev", "noexec"],
62 critical: false, },
64];
65
66pub fn normalize_container_destination(dest: &Path) -> Result<PathBuf> {
71 if !dest.is_absolute() {
72 return Err(NucleusError::ConfigError(format!(
73 "Container destination must be absolute: {:?}",
74 dest
75 )));
76 }
77
78 let mut normalized = PathBuf::from("/");
79 let mut saw_component = false;
80
81 for component in dest.components() {
82 match component {
83 Component::RootDir => {}
84 Component::CurDir => {}
85 Component::Normal(part) => {
86 normalized.push(part);
87 saw_component = true;
88 }
89 Component::ParentDir => {
90 return Err(NucleusError::ConfigError(format!(
91 "Container destination must not contain parent traversal: {:?}",
92 dest
93 )));
94 }
95 Component::Prefix(_) => {
96 return Err(NucleusError::ConfigError(format!(
97 "Unsupported container destination prefix: {:?}",
98 dest
99 )));
100 }
101 }
102 }
103
104 if !saw_component {
105 return Err(NucleusError::ConfigError(format!(
106 "Container destination must not be the root directory: {:?}",
107 dest
108 )));
109 }
110
111 Ok(normalized)
112}
113
114pub fn resolve_container_destination(root: &Path, dest: &Path) -> Result<PathBuf> {
116 let normalized = normalize_container_destination(dest)?;
117 let relative = normalized.strip_prefix("/").map_err(|_| {
118 NucleusError::ConfigError(format!(
119 "Container destination is not absolute after normalization: {:?}",
120 normalized
121 ))
122 })?;
123 Ok(root.join(relative))
124}
125
126pub fn audit_mounts(production_mode: bool) -> Result<()> {
132 let mounts_content = std::fs::read_to_string("/proc/self/mounts").map_err(|e| {
133 NucleusError::FilesystemError(format!("Failed to read /proc/self/mounts: {}", e))
134 })?;
135
136 let mut violations = Vec::new();
137
138 for expectation in PRODUCTION_MOUNT_EXPECTATIONS {
139 let mount_entry = mounts_content.lines().find(|line| {
141 let parts: Vec<&str> = line.split_whitespace().collect();
142 parts.len() >= 4 && parts[1] == expectation.path
143 });
144
145 if let Some(entry) = mount_entry {
146 let parts: Vec<&str> = entry.split_whitespace().collect();
147 if parts.len() >= 4 {
148 let options = parts[3];
149 for &flag in expectation.required_flags {
150 if !options.split(',').any(|opt| opt == flag) {
151 violations.push(format!(
152 "Mount {} missing required flag '{}' (has: {})",
153 expectation.path, flag, options
154 ));
155 }
156 }
157 }
158 } else if expectation.critical && production_mode {
159 violations.push(format!(
160 "Critical mount {} is missing from the mount namespace",
161 expectation.path
162 ));
163 }
164 }
165
166 if violations.is_empty() {
167 info!("Mount audit passed: all expected flags verified");
168 Ok(())
169 } else if production_mode {
170 Err(NucleusError::FilesystemError(format!(
171 "Mount audit failed in production mode:\n {}",
172 violations.join("\n ")
173 )))
174 } else {
175 for v in &violations {
176 warn!("Mount audit: {}", v);
177 }
178 Ok(())
179 }
180}
181
182pub fn create_minimal_fs(root: &Path) -> Result<()> {
184 info!("Creating minimal filesystem structure at {:?}", root);
185
186 let dirs = vec![
188 "dev",
189 "proc",
190 "sys",
191 "tmp",
192 "bin",
193 "sbin",
194 "usr",
195 "lib",
196 "lib64",
197 "etc",
198 "nix",
199 "nix/store",
200 "run",
201 "context",
202 ];
203
204 for dir in dirs {
205 let path = root.join(dir);
206 std::fs::create_dir_all(&path).map_err(|e| {
207 NucleusError::FilesystemError(format!("Failed to create directory {:?}: {}", path, e))
208 })?;
209 }
210
211 info!("Created minimal filesystem structure");
212
213 Ok(())
214}
215
216pub fn create_dev_nodes(dev_path: &Path, include_tty: bool) -> Result<()> {
220 info!("Creating device nodes at {:?}", dev_path);
221
222 let mut devices = vec![
224 ("null", SFlag::S_IFCHR, 1, 3),
225 ("zero", SFlag::S_IFCHR, 1, 5),
226 ("full", SFlag::S_IFCHR, 1, 7),
227 ("random", SFlag::S_IFCHR, 1, 8),
228 ("urandom", SFlag::S_IFCHR, 1, 9),
229 ];
230 if include_tty {
231 devices.push(("tty", SFlag::S_IFCHR, 5, 0));
232 }
233
234 let mut created_count = 0;
235 let mut failed_count = 0;
236
237 for (name, dev_type, major, minor) in devices {
238 let path = dev_path.join(name);
239 let mode = Mode::from_bits_truncate(0o660);
240 let dev = makedev(major, minor);
241
242 match mknod(&path, dev_type, mode, dev) {
243 Ok(_) => {
244 info!("Created device node: {:?}", path);
245 created_count += 1;
246 }
247 Err(e) => {
248 warn!(
250 "Failed to create device node {:?}: {} (this is normal in rootless mode)",
251 path, e
252 );
253 failed_count += 1;
254 }
255 }
256 }
257
258 if created_count > 0 {
259 info!("Successfully created {} device nodes", created_count);
260 }
261 if failed_count > 0 {
262 info!("Skipped {} device nodes (rootless mode)", failed_count);
263 }
264
265 Ok(())
266}
267
268pub fn bind_mount_rootfs(root: &Path, rootfs_path: &Path) -> Result<()> {
273 info!(
274 "Bind mounting production rootfs {:?} into container {:?}",
275 rootfs_path, root
276 );
277
278 if std::fs::symlink_metadata(rootfs_path).is_err() {
279 return Err(NucleusError::FilesystemError(format!(
280 "Rootfs path does not exist: {:?}",
281 rootfs_path
282 )));
283 }
284
285 let subdirs = ["bin", "sbin", "lib", "lib64", "usr", "etc", "nix"];
289
290 for subdir in &subdirs {
291 let source = rootfs_path.join(subdir);
292 if !source.exists() {
293 debug!("Rootfs subdir {} not present, skipping", subdir);
294 continue;
295 }
296
297 let target = root.join(subdir);
298 std::fs::create_dir_all(&target).map_err(|e| {
299 NucleusError::FilesystemError(format!(
300 "Failed to create mount point {:?}: {}",
301 target, e
302 ))
303 })?;
304
305 mount(
306 Some(&source),
307 &target,
308 None::<&str>,
309 MsFlags::MS_BIND | MsFlags::MS_REC,
310 None::<&str>,
311 )
312 .map_err(|e| {
313 NucleusError::FilesystemError(format!(
314 "Failed to bind mount rootfs {:?} -> {:?}: {}",
315 source, target, e
316 ))
317 })?;
318
319 mount(
321 None::<&str>,
322 &target,
323 None::<&str>,
324 MsFlags::MS_REMOUNT
325 | MsFlags::MS_BIND
326 | MsFlags::MS_RDONLY
327 | MsFlags::MS_REC
328 | MsFlags::MS_NOSUID
329 | MsFlags::MS_NODEV,
330 None::<&str>,
331 )
332 .map_err(|e| {
333 NucleusError::FilesystemError(format!(
334 "Failed to remount rootfs {:?} read-only: {}",
335 target, e
336 ))
337 })?;
338
339 info!("Mounted rootfs/{} read-only", subdir);
340 }
341
342 Ok(())
343}
344
345pub fn bind_mount_host_paths(root: &Path, best_effort: bool) -> Result<()> {
350 info!("Bind mounting host paths into container");
351
352 let host_paths = vec![
354 "/bin", "/usr", "/lib", "/lib64", "/nix", ];
356
357 for host_path in host_paths {
358 let host = Path::new(host_path);
359
360 if !host.exists() {
362 debug!("Skipping {} (not present on host)", host_path);
363 continue;
364 }
365
366 let container_path = root.join(host_path.trim_start_matches('/'));
367
368 if let Err(e) = std::fs::create_dir_all(&container_path) {
370 if best_effort {
371 warn!("Failed to create mount point {:?}: {}", container_path, e);
372 continue;
373 }
374 return Err(NucleusError::FilesystemError(format!(
375 "Failed to create mount point {:?}: {}",
376 container_path, e
377 )));
378 }
379
380 match mount(
384 Some(host),
385 &container_path,
386 None::<&str>,
387 MsFlags::MS_BIND | MsFlags::MS_REC,
388 None::<&str>,
389 ) {
390 Ok(_) => {
391 mount(
393 None::<&str>,
394 &container_path,
395 None::<&str>,
396 MsFlags::MS_REMOUNT
397 | MsFlags::MS_BIND
398 | MsFlags::MS_RDONLY
399 | MsFlags::MS_REC
400 | MsFlags::MS_NOSUID
401 | MsFlags::MS_NODEV,
402 None::<&str>,
403 )
404 .map_err(|e| {
405 NucleusError::FilesystemError(format!(
406 "Failed to remount {} as read-only: {}",
407 host_path, e
408 ))
409 })?;
410 info!(
411 "Bind mounted {} to {:?} (read-only)",
412 host_path, container_path
413 );
414 }
415 Err(e) => {
416 if best_effort {
417 warn!(
418 "Failed to bind mount {}: {} (continuing anyway)",
419 host_path, e
420 );
421 } else {
422 return Err(NucleusError::FilesystemError(format!(
423 "Failed to bind mount {}: {}",
424 host_path, e
425 )));
426 }
427 }
428 }
429 }
430
431 Ok(())
432}
433
434const DENIED_BIND_MOUNT_SOURCES: &[&str] = &[
436 "/",
437 "/proc",
438 "/sys",
439 "/dev",
440 "/boot",
441 "/etc/shadow",
442 "/etc/sudoers",
443 "/etc/passwd",
444 "/etc/gshadow",
445];
446
447fn validate_bind_mount_source(source: &Path) -> Result<()> {
449 let source_str = source.to_string_lossy();
450 for denied in DENIED_BIND_MOUNT_SOURCES {
451 if source_str == *denied {
452 return Err(NucleusError::FilesystemError(format!(
453 "Bind mount source '{}' is a sensitive host path and cannot be mounted into containers",
454 source.display()
455 )));
456 }
457 }
458 Ok(())
459}
460
461pub fn mount_volumes(root: &Path, volumes: &[crate::container::VolumeMount]) -> Result<()> {
463 use crate::container::VolumeSource;
464
465 if volumes.is_empty() {
466 return Ok(());
467 }
468
469 info!("Mounting {} volume(s) into container", volumes.len());
470
471 for volume in volumes {
472 let dest = resolve_container_destination(root, &volume.dest)?;
473
474 match &volume.source {
475 VolumeSource::Bind { source } => {
476 validate_bind_mount_source(source)?;
478
479 if std::fs::symlink_metadata(source).is_err() {
482 return Err(NucleusError::FilesystemError(format!(
483 "Volume source does not exist: {:?}",
484 source
485 )));
486 }
487
488 if let Some(parent) = dest.parent() {
489 std::fs::create_dir_all(parent).map_err(|e| {
490 NucleusError::FilesystemError(format!(
491 "Failed to create volume mount parent {:?}: {}",
492 parent, e
493 ))
494 })?;
495 }
496
497 let recursive = source.is_dir();
498 if source.is_file() {
499 std::fs::write(&dest, "").map_err(|e| {
500 NucleusError::FilesystemError(format!(
501 "Failed to create volume mount point {:?}: {}",
502 dest, e
503 ))
504 })?;
505 } else {
506 std::fs::create_dir_all(&dest).map_err(|e| {
507 NucleusError::FilesystemError(format!(
508 "Failed to create volume mount dir {:?}: {}",
509 dest, e
510 ))
511 })?;
512 }
513
514 let initial_flags = if recursive {
515 MsFlags::MS_BIND | MsFlags::MS_REC
516 } else {
517 MsFlags::MS_BIND
518 };
519 mount(
520 Some(source.as_path()),
521 &dest,
522 None::<&str>,
523 initial_flags,
524 None::<&str>,
525 )
526 .map_err(|e| {
527 NucleusError::FilesystemError(format!(
528 "Failed to bind mount volume {:?} -> {:?}: {}",
529 source, dest, e
530 ))
531 })?;
532
533 let mut remount_flags =
534 MsFlags::MS_REMOUNT | MsFlags::MS_BIND | MsFlags::MS_NOSUID | MsFlags::MS_NODEV;
535 if recursive {
536 remount_flags |= MsFlags::MS_REC;
537 }
538 if volume.read_only {
539 remount_flags |= MsFlags::MS_RDONLY;
540 }
541
542 mount(
543 None::<&str>,
544 &dest,
545 None::<&str>,
546 remount_flags,
547 None::<&str>,
548 )
549 .map_err(|e| {
550 NucleusError::FilesystemError(format!(
551 "Failed to remount volume {:?} with final flags: {}",
552 dest, e
553 ))
554 })?;
555
556 info!(
557 "Mounted bind volume {:?} -> {:?} ({})",
558 source,
559 volume.dest,
560 if volume.read_only { "ro" } else { "rw" }
561 );
562 }
563 VolumeSource::Tmpfs { size } => {
564 std::fs::create_dir_all(&dest).map_err(|e| {
565 NucleusError::FilesystemError(format!(
566 "Failed to create tmpfs mount dir {:?}: {}",
567 dest, e
568 ))
569 })?;
570
571 if let Some(value) = size.as_ref() {
574 let valid = value
575 .chars()
576 .all(|c| c.is_ascii_digit() || "kKmMgG".contains(c));
577 if !valid || value.is_empty() {
578 return Err(NucleusError::FilesystemError(format!(
579 "Invalid tmpfs size value '{}': only digits with optional K/M/G suffix allowed",
580 value
581 )));
582 }
583 }
584
585 let mount_data = size
588 .as_ref()
589 .map(|value| format!("size={},mode=0700", value))
590 .unwrap_or_else(|| "size=64M,mode=0700".to_string());
591
592 let mut flags = MsFlags::MS_NOSUID | MsFlags::MS_NODEV;
593 if volume.read_only {
594 flags |= MsFlags::MS_RDONLY;
595 }
596 mount(
597 Some("tmpfs"),
598 &dest,
599 Some("tmpfs"),
600 flags,
601 Some(mount_data.as_str()),
602 )
603 .map_err(|e| {
604 NucleusError::FilesystemError(format!(
605 "Failed to mount tmpfs volume at {:?}: {}",
606 dest, e
607 ))
608 })?;
609
610 info!(
611 "Mounted tmpfs volume at {:?}{}{}",
612 volume.dest,
613 size.as_ref()
614 .map(|value| format!(" (size={})", value))
615 .unwrap_or_default(),
616 if volume.read_only { " (ro)" } else { "" }
617 );
618 }
619 }
620 }
621
622 Ok(())
623}
624
625pub fn mount_procfs(
631 proc_path: &Path,
632 best_effort: bool,
633 read_only: bool,
634 hide_pids: bool,
635) -> Result<()> {
636 info!(
637 "Mounting procfs at {:?} (hidepid={})",
638 proc_path,
639 if hide_pids { "2" } else { "0" }
640 );
641
642 let mount_data: Option<&str> = if hide_pids { Some("hidepid=2") } else { None };
643
644 match mount(
645 Some("proc"),
646 proc_path,
647 Some("proc"),
648 MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
649 mount_data,
650 ) {
651 Ok(_) => {
652 if read_only {
653 mount(
654 None::<&str>,
655 proc_path,
656 None::<&str>,
657 MsFlags::MS_REMOUNT
658 | MsFlags::MS_RDONLY
659 | MsFlags::MS_NOSUID
660 | MsFlags::MS_NODEV
661 | MsFlags::MS_NOEXEC,
662 None::<&str>,
663 )
664 .map_err(|e| {
665 NucleusError::FilesystemError(format!(
666 "Failed to remount procfs read-only: {}",
667 e
668 ))
669 })?;
670 info!("Successfully mounted procfs (read-only)");
671 } else {
672 info!("Successfully mounted procfs");
673 }
674 Ok(())
675 }
676 Err(e) => {
677 if best_effort {
678 warn!("Failed to mount procfs: {} (continuing anyway)", e);
679 Ok(())
680 } else {
681 Err(NucleusError::FilesystemError(format!(
682 "Failed to mount procfs: {}",
683 e
684 )))
685 }
686 }
687 }
688}
689
690pub const PROC_NULL_MASKED: &[&str] = &[
694 "kallsyms",
695 "kcore",
696 "sched_debug",
697 "timer_list",
698 "timer_stats",
699 "keys",
700 "latency_stats",
701 "config.gz",
702 "sysrq-trigger",
703 "kpagecount",
704 "kpageflags",
705 "kpagecgroup",
706];
707
708pub const PROC_TMPFS_MASKED: &[&str] = &["acpi", "bus", "irq", "scsi", "sys"];
710
711pub fn mask_proc_paths(proc_path: &Path, production: bool) -> Result<()> {
719 info!("Masking sensitive /proc paths");
720
721 const CRITICAL_PROC_PATHS: &[&str] = &["kcore", "kallsyms", "sysrq-trigger"];
722
723 let dev_null = Path::new("/dev/null");
724
725 for name in PROC_NULL_MASKED {
726 let target = proc_path.join(name);
727 if !target.exists() {
728 continue;
729 }
730 match mount(
731 Some(dev_null),
732 &target,
733 None::<&str>,
734 MsFlags::MS_BIND,
735 None::<&str>,
736 ) {
737 Ok(_) => {
738 if let Err(e) = mount(
741 None::<&str>,
742 &target,
743 None::<&str>,
744 MsFlags::MS_REMOUNT | MsFlags::MS_BIND | MsFlags::MS_RDONLY,
745 None::<&str>,
746 ) {
747 if production && CRITICAL_PROC_PATHS.contains(name) {
748 return Err(NucleusError::FilesystemError(format!(
749 "Failed to remount /proc/{} read-only in production mode: {}",
750 name, e
751 )));
752 }
753 warn!(
754 "Failed to remount /proc/{} read-only: {} (continuing)",
755 name, e
756 );
757 }
758 debug!("Masked /proc/{} (read-only)", name);
759 }
760 Err(e) => {
761 if production && CRITICAL_PROC_PATHS.contains(name) {
762 return Err(NucleusError::FilesystemError(format!(
763 "Failed to mask critical /proc/{} in production mode: {}",
764 name, e
765 )));
766 }
767 warn!("Failed to mask /proc/{}: {} (continuing)", name, e);
768 }
769 }
770 }
771
772 for name in PROC_TMPFS_MASKED {
773 let target = proc_path.join(name);
774 if !target.exists() {
775 continue;
776 }
777 match mount(
778 Some("tmpfs"),
779 &target,
780 Some("tmpfs"),
781 MsFlags::MS_RDONLY | MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
782 Some("size=0"),
783 ) {
784 Ok(_) => debug!("Masked /proc/{}", name),
785 Err(e) => {
786 if production {
787 return Err(NucleusError::FilesystemError(format!(
788 "Failed to mask /proc/{} in production mode: {}",
789 name, e
790 )));
791 }
792 warn!("Failed to mask /proc/{}: {} (continuing)", name, e);
793 }
794 }
795 }
796
797 info!("Finished masking sensitive /proc paths");
798 Ok(())
799}
800
801pub fn switch_root(new_root: &Path, allow_chroot_fallback: bool) -> Result<()> {
806 info!("Switching root to {:?}", new_root);
807
808 match pivot_root_impl(new_root) {
809 Ok(()) => {
810 info!("Successfully switched root using pivot_root");
811 Ok(())
812 }
813 Err(e) => {
814 if allow_chroot_fallback {
815 warn!(
816 "pivot_root failed ({}), falling back to chroot due to explicit \
817 configuration",
818 e
819 );
820 chroot_impl(new_root)
821 } else {
822 Err(NucleusError::PivotRootError(format!(
823 "pivot_root failed: {}. chroot fallback is disabled by default; use \
824 --allow-chroot-fallback to allow weaker isolation",
825 e
826 )))
827 }
828 }
829 }
830}
831
832fn pivot_root_impl(new_root: &Path) -> Result<()> {
838 use nix::unistd::pivot_root;
839
840 let old_root = new_root.join(".old_root");
844 std::fs::create_dir_all(&old_root).map_err(|e| {
845 NucleusError::PivotRootError(format!("Failed to create old_root directory: {}", e))
846 })?;
847
848 pivot_root(new_root, &old_root)
850 .map_err(|e| NucleusError::PivotRootError(format!("pivot_root syscall failed: {}", e)))?;
851
852 std::env::set_current_dir("/")
854 .map_err(|e| NucleusError::PivotRootError(format!("Failed to chdir to /: {}", e)))?;
855
856 nix::mount::umount2("/.old_root", nix::mount::MntFlags::MNT_DETACH)
858 .map_err(|e| NucleusError::PivotRootError(format!("Failed to unmount old root: {}", e)))?;
859
860 let _ = std::fs::remove_dir("/.old_root");
862
863 Ok(())
864}
865
866fn chroot_impl(new_root: &Path) -> Result<()> {
870 chroot(new_root)
871 .map_err(|e| NucleusError::PivotRootError(format!("chroot syscall failed: {}", e)))?;
872
873 std::env::set_current_dir("/")
875 .map_err(|e| NucleusError::PivotRootError(format!("Failed to chdir to /: {}", e)))?;
876
877 if let Err(e) = caps::drop(
880 None,
881 caps::CapSet::Bounding,
882 caps::Capability::CAP_SYS_CHROOT,
883 ) {
884 debug!(
885 "Could not drop CAP_SYS_CHROOT after chroot: {} (may not be present)",
886 e
887 );
888 }
889 if let Err(e) = caps::drop(
890 None,
891 caps::CapSet::Effective,
892 caps::Capability::CAP_SYS_CHROOT,
893 ) {
894 debug!(
895 "Could not drop effective CAP_SYS_CHROOT: {} (may not be present)",
896 e
897 );
898 }
899 if let Err(e) = caps::drop(
900 None,
901 caps::CapSet::Permitted,
902 caps::Capability::CAP_SYS_CHROOT,
903 ) {
904 debug!(
905 "Could not drop permitted CAP_SYS_CHROOT: {} (may not be present)",
906 e
907 );
908 }
909
910 info!("Successfully switched root using chroot (CAP_SYS_CHROOT dropped)");
911
912 Ok(())
913}
914
915pub fn mount_secrets(root: &Path, secrets: &[crate::container::SecretMount]) -> Result<()> {
920 if secrets.is_empty() {
921 return Ok(());
922 }
923
924 info!("Mounting {} secret(s) into container", secrets.len());
925
926 for secret in secrets {
927 let meta = std::fs::symlink_metadata(&secret.source).map_err(|_| {
931 NucleusError::FilesystemError(format!(
932 "Secret source does not exist: {:?}",
933 secret.source
934 ))
935 })?;
936 if meta.file_type().is_symlink() {
937 return Err(NucleusError::FilesystemError(format!(
938 "Secret source {:?} is a symlink; refusing to mount (TOCTOU mitigation)",
939 secret.source
940 )));
941 }
942
943 let dest = resolve_container_destination(root, &secret.dest)?;
945
946 if let Some(parent) = dest.parent() {
948 std::fs::create_dir_all(parent).map_err(|e| {
949 NucleusError::FilesystemError(format!(
950 "Failed to create secret mount parent {:?}: {}",
951 parent, e
952 ))
953 })?;
954 }
955
956 if secret.source.is_file() {
958 std::fs::write(&dest, "").map_err(|e| {
959 NucleusError::FilesystemError(format!(
960 "Failed to create secret mount point {:?}: {}",
961 dest, e
962 ))
963 })?;
964 } else {
965 std::fs::create_dir_all(&dest).map_err(|e| {
966 NucleusError::FilesystemError(format!(
967 "Failed to create secret mount dir {:?}: {}",
968 dest, e
969 ))
970 })?;
971 }
972
973 mount(
975 Some(secret.source.as_path()),
976 &dest,
977 None::<&str>,
978 MsFlags::MS_BIND,
979 None::<&str>,
980 )
981 .map_err(|e| {
982 NucleusError::FilesystemError(format!(
983 "Failed to bind mount secret {:?}: {}",
984 secret.source, e
985 ))
986 })?;
987
988 mount(
989 None::<&str>,
990 &dest,
991 None::<&str>,
992 MsFlags::MS_REMOUNT
993 | MsFlags::MS_BIND
994 | MsFlags::MS_RDONLY
995 | MsFlags::MS_NOSUID
996 | MsFlags::MS_NODEV
997 | MsFlags::MS_NOEXEC,
998 None::<&str>,
999 )
1000 .map_err(|e| {
1001 NucleusError::FilesystemError(format!(
1002 "Failed to remount secret {:?} read-only: {}",
1003 dest, e
1004 ))
1005 })?;
1006
1007 if secret.source.is_file() {
1009 use std::os::unix::fs::PermissionsExt;
1010 let perms = std::fs::Permissions::from_mode(secret.mode);
1011 if let Err(e) = std::fs::set_permissions(&dest, perms) {
1012 warn!(
1013 "Failed to set mode {:04o} on secret {:?}: {} (bind mount may override)",
1014 secret.mode, dest, e
1015 );
1016 }
1017 }
1018
1019 debug!(
1020 "Mounted secret {:?} -> {:?} (mode {:04o})",
1021 secret.source, secret.dest, secret.mode
1022 );
1023 }
1024
1025 Ok(())
1026}
1027
1028pub fn mount_secrets_inmemory(
1034 root: &Path,
1035 secrets: &[crate::container::SecretMount],
1036 identity: &crate::container::ProcessIdentity,
1037) -> Result<()> {
1038 if secrets.is_empty() {
1039 return Ok(());
1040 }
1041
1042 info!("Mounting {} secret(s) on in-memory tmpfs", secrets.len());
1043
1044 let secrets_dir = root.join("run/secrets");
1045 std::fs::create_dir_all(&secrets_dir).map_err(|e| {
1046 NucleusError::FilesystemError(format!(
1047 "Failed to create secrets dir {:?}: {}",
1048 secrets_dir, e
1049 ))
1050 })?;
1051
1052 if let Err(e) = mount(
1054 Some("tmpfs"),
1055 &secrets_dir,
1056 Some("tmpfs"),
1057 MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
1058 Some("size=16m,mode=0700"),
1059 ) {
1060 let _ = std::fs::remove_dir_all(&secrets_dir);
1061 return Err(NucleusError::FilesystemError(format!(
1062 "Failed to mount secrets tmpfs at {:?}: {}",
1063 secrets_dir, e
1064 )));
1065 }
1066
1067 if !identity.is_root() {
1068 nix::unistd::chown(
1069 &secrets_dir,
1070 Some(nix::unistd::Uid::from_raw(identity.uid)),
1071 Some(nix::unistd::Gid::from_raw(identity.gid)),
1072 )
1073 .map_err(|e| {
1074 let _ = nix::mount::umount2(&secrets_dir, nix::mount::MntFlags::MNT_DETACH);
1075 let _ = std::fs::remove_dir_all(&secrets_dir);
1076 NucleusError::FilesystemError(format!(
1077 "Failed to set /run/secrets owner to {}:{}: {}",
1078 identity.uid, identity.gid, e
1079 ))
1080 })?;
1081 }
1082
1083 let result = mount_secrets_inmemory_inner(&secrets_dir, root, secrets, identity);
1085 if let Err(ref e) = result {
1086 let _ = nix::mount::umount2(&secrets_dir, nix::mount::MntFlags::MNT_DETACH);
1087 let _ = std::fs::remove_dir_all(&secrets_dir);
1088 return Err(NucleusError::FilesystemError(format!(
1089 "Secret mount failed (rolled back): {}",
1090 e
1091 )));
1092 }
1093
1094 info!("All secrets mounted on in-memory tmpfs");
1095 Ok(())
1096}
1097
1098fn mount_secrets_inmemory_inner(
1099 secrets_dir: &Path,
1100 root: &Path,
1101 secrets: &[crate::container::SecretMount],
1102 identity: &crate::container::ProcessIdentity,
1103) -> Result<()> {
1104 for secret in secrets {
1105 if std::fs::symlink_metadata(&secret.source).is_err() {
1108 return Err(NucleusError::FilesystemError(format!(
1109 "Secret source does not exist: {:?}",
1110 secret.source
1111 )));
1112 }
1113
1114 let mut content = std::fs::read(&secret.source).map_err(|e| {
1116 NucleusError::FilesystemError(format!(
1117 "Failed to read secret {:?}: {}",
1118 secret.source, e
1119 ))
1120 })?;
1121
1122 let dest = resolve_container_destination(secrets_dir, &secret.dest)?;
1124
1125 if let Some(parent) = dest.parent() {
1127 std::fs::create_dir_all(parent).map_err(|e| {
1128 NucleusError::FilesystemError(format!(
1129 "Failed to create secret parent dir {:?}: {}",
1130 parent, e
1131 ))
1132 })?;
1133 }
1134
1135 std::fs::write(&dest, &content).map_err(|e| {
1137 NucleusError::FilesystemError(format!("Failed to write secret to {:?}: {}", dest, e))
1138 })?;
1139
1140 {
1142 use std::os::unix::fs::PermissionsExt;
1143 let perms = std::fs::Permissions::from_mode(secret.mode);
1144 std::fs::set_permissions(&dest, perms).map_err(|e| {
1145 NucleusError::FilesystemError(format!(
1146 "Failed to set permissions on secret {:?}: {}",
1147 dest, e
1148 ))
1149 })?;
1150 }
1151
1152 if !identity.is_root() {
1153 nix::unistd::chown(
1154 &dest,
1155 Some(nix::unistd::Uid::from_raw(identity.uid)),
1156 Some(nix::unistd::Gid::from_raw(identity.gid)),
1157 )
1158 .map_err(|e| {
1159 NucleusError::FilesystemError(format!(
1160 "Failed to set permissions owner on secret {:?} to {}:{}: {}",
1161 dest, identity.uid, identity.gid, e
1162 ))
1163 })?;
1164 }
1165
1166 zeroize::Zeroize::zeroize(&mut content);
1168 drop(content);
1169
1170 let container_dest = resolve_container_destination(root, &secret.dest)?;
1172 if container_dest != dest {
1173 if let Some(parent) = container_dest.parent() {
1174 std::fs::create_dir_all(parent).map_err(|e| {
1175 NucleusError::FilesystemError(format!(
1176 "Failed to create secret mount parent {:?}: {}",
1177 parent, e
1178 ))
1179 })?;
1180 }
1181
1182 if secret.source.is_file() {
1183 std::fs::write(&container_dest, "").map_err(|e| {
1184 NucleusError::FilesystemError(format!(
1185 "Failed to create secret mount point {:?}: {}",
1186 container_dest, e
1187 ))
1188 })?;
1189 }
1190
1191 mount(
1192 Some(dest.as_path()),
1193 &container_dest,
1194 None::<&str>,
1195 MsFlags::MS_BIND,
1196 None::<&str>,
1197 )
1198 .map_err(|e| {
1199 NucleusError::FilesystemError(format!(
1200 "Failed to bind mount secret {:?} -> {:?}: {}",
1201 dest, container_dest, e
1202 ))
1203 })?;
1204
1205 mount(
1206 None::<&str>,
1207 &container_dest,
1208 None::<&str>,
1209 MsFlags::MS_REMOUNT
1210 | MsFlags::MS_BIND
1211 | MsFlags::MS_RDONLY
1212 | MsFlags::MS_NOSUID
1213 | MsFlags::MS_NODEV
1214 | MsFlags::MS_NOEXEC,
1215 None::<&str>,
1216 )
1217 .map_err(|e| {
1218 NucleusError::FilesystemError(format!(
1219 "Failed to remount secret {:?} read-only: {}",
1220 container_dest, e
1221 ))
1222 })?;
1223 }
1224
1225 debug!(
1226 "Secret {:?} -> {:?} (in-memory tmpfs, mode {:04o})",
1227 secret.source, secret.dest, secret.mode
1228 );
1229 }
1230
1231 Ok(())
1232}
1233
1234#[cfg(test)]
1235mod tests {
1236 use super::*;
1237
1238 #[test]
1239 fn test_proc_mask_includes_sysrq_trigger() {
1240 assert!(
1241 PROC_NULL_MASKED.contains(&"sysrq-trigger"),
1242 "/proc/sysrq-trigger must be masked to prevent host DoS"
1243 );
1244 }
1245
1246 #[test]
1247 fn test_proc_mask_includes_timer_stats() {
1248 assert!(
1249 PROC_NULL_MASKED.contains(&"timer_stats"),
1250 "/proc/timer_stats must be masked to prevent kernel info leakage"
1251 );
1252 }
1253
1254 #[test]
1255 fn test_proc_mask_includes_kpage_files() {
1256 for path in &["kpagecount", "kpageflags", "kpagecgroup"] {
1257 assert!(
1258 PROC_NULL_MASKED.contains(path),
1259 "/proc/{} must be masked to prevent host memory layout leakage",
1260 path
1261 );
1262 }
1263 }
1264
1265 #[test]
1266 fn test_proc_mask_includes_oci_standard_paths() {
1267 for path in &["kallsyms", "kcore", "sched_debug", "keys", "config.gz"] {
1269 assert!(
1270 PROC_NULL_MASKED.contains(path),
1271 "/proc/{} must be in null-masked list (OCI spec)",
1272 path
1273 );
1274 }
1275 for path in &["acpi", "bus", "scsi", "sys"] {
1276 assert!(
1277 PROC_TMPFS_MASKED.contains(path),
1278 "/proc/{} must be in tmpfs-masked list (OCI spec)",
1279 path
1280 );
1281 }
1282 }
1283}