1use crate::error::{NucleusError, Result};
2use nix::mount::{mount, MsFlags};
3use nix::sys::stat::{makedev, mknod, Mode, SFlag};
4use nix::unistd::chroot;
5use std::path::{Component, Path, PathBuf};
6use tracing::{debug, info, warn};
7
8struct ExpectedMount {
10 path: &'static str,
11 required_flags: &'static [&'static str],
12 critical: bool,
15}
16
17const PRODUCTION_MOUNT_EXPECTATIONS: &[ExpectedMount] = &[
19 ExpectedMount {
20 path: "/bin",
21 required_flags: &["ro", "nosuid", "nodev"],
22 critical: true,
23 },
24 ExpectedMount {
25 path: "/usr",
26 required_flags: &["ro", "nosuid", "nodev"],
27 critical: true,
28 },
29 ExpectedMount {
30 path: "/lib",
31 required_flags: &["ro", "nosuid", "nodev"],
32 critical: false, },
34 ExpectedMount {
35 path: "/lib64",
36 required_flags: &["ro", "nosuid", "nodev"],
37 critical: false, },
39 ExpectedMount {
40 path: "/etc",
41 required_flags: &["ro", "nosuid", "nodev"],
42 critical: true,
43 },
44 ExpectedMount {
45 path: "/nix",
46 required_flags: &["ro", "nosuid", "nodev"],
47 critical: false, },
49 ExpectedMount {
50 path: "/sbin",
51 required_flags: &["ro", "nosuid", "nodev"],
52 critical: false, },
54 ExpectedMount {
55 path: "/proc",
56 required_flags: &["nosuid", "nodev", "noexec"],
57 critical: true,
58 },
59 ExpectedMount {
60 path: "/run/secrets",
61 required_flags: &["nosuid", "nodev", "noexec"],
62 critical: false, },
64];
65
66pub fn normalize_container_destination(dest: &Path) -> Result<PathBuf> {
71 if !dest.is_absolute() {
72 return Err(NucleusError::ConfigError(format!(
73 "Container destination must be absolute: {:?}",
74 dest
75 )));
76 }
77
78 let mut normalized = PathBuf::from("/");
79 let mut saw_component = false;
80
81 for component in dest.components() {
82 match component {
83 Component::RootDir => {}
84 Component::CurDir => {}
85 Component::Normal(part) => {
86 normalized.push(part);
87 saw_component = true;
88 }
89 Component::ParentDir => {
90 return Err(NucleusError::ConfigError(format!(
91 "Container destination must not contain parent traversal: {:?}",
92 dest
93 )));
94 }
95 Component::Prefix(_) => {
96 return Err(NucleusError::ConfigError(format!(
97 "Unsupported container destination prefix: {:?}",
98 dest
99 )));
100 }
101 }
102 }
103
104 if !saw_component {
105 return Err(NucleusError::ConfigError(format!(
106 "Container destination must not be the root directory: {:?}",
107 dest
108 )));
109 }
110
111 Ok(normalized)
112}
113
114pub fn resolve_container_destination(root: &Path, dest: &Path) -> Result<PathBuf> {
116 let normalized = normalize_container_destination(dest)?;
117 let relative = normalized
118 .strip_prefix("/")
119 .expect("normalized container destination is always absolute");
120 Ok(root.join(relative))
121}
122
123pub fn audit_mounts(production_mode: bool) -> Result<()> {
129 let mounts_content = std::fs::read_to_string("/proc/self/mounts").map_err(|e| {
130 NucleusError::FilesystemError(format!("Failed to read /proc/self/mounts: {}", e))
131 })?;
132
133 let mut violations = Vec::new();
134
135 for expectation in PRODUCTION_MOUNT_EXPECTATIONS {
136 let mount_entry = mounts_content.lines().find(|line| {
138 let parts: Vec<&str> = line.split_whitespace().collect();
139 parts.len() >= 4 && parts[1] == expectation.path
140 });
141
142 if let Some(entry) = mount_entry {
143 let parts: Vec<&str> = entry.split_whitespace().collect();
144 if parts.len() >= 4 {
145 let options = parts[3];
146 for &flag in expectation.required_flags {
147 if !options.split(',').any(|opt| opt == flag) {
148 violations.push(format!(
149 "Mount {} missing required flag '{}' (has: {})",
150 expectation.path, flag, options
151 ));
152 }
153 }
154 }
155 } else if expectation.critical && production_mode {
156 violations.push(format!(
157 "Critical mount {} is missing from the mount namespace",
158 expectation.path
159 ));
160 }
161 }
162
163 if violations.is_empty() {
164 info!("Mount audit passed: all expected flags verified");
165 Ok(())
166 } else if production_mode {
167 Err(NucleusError::FilesystemError(format!(
168 "Mount audit failed in production mode:\n {}",
169 violations.join("\n ")
170 )))
171 } else {
172 for v in &violations {
173 warn!("Mount audit: {}", v);
174 }
175 Ok(())
176 }
177}
178
179pub fn create_minimal_fs(root: &Path) -> Result<()> {
181 info!("Creating minimal filesystem structure at {:?}", root);
182
183 let dirs = vec![
185 "dev",
186 "proc",
187 "sys",
188 "tmp",
189 "bin",
190 "sbin",
191 "usr",
192 "lib",
193 "lib64",
194 "etc",
195 "nix",
196 "nix/store",
197 "run",
198 "context",
199 ];
200
201 for dir in dirs {
202 let path = root.join(dir);
203 std::fs::create_dir_all(&path).map_err(|e| {
204 NucleusError::FilesystemError(format!("Failed to create directory {:?}: {}", path, e))
205 })?;
206 }
207
208 info!("Created minimal filesystem structure");
209
210 Ok(())
211}
212
213pub fn create_dev_nodes(dev_path: &Path, include_tty: bool) -> Result<()> {
217 info!("Creating device nodes at {:?}", dev_path);
218
219 let mut devices = vec![
221 ("null", SFlag::S_IFCHR, 1, 3),
222 ("zero", SFlag::S_IFCHR, 1, 5),
223 ("full", SFlag::S_IFCHR, 1, 7),
224 ("random", SFlag::S_IFCHR, 1, 8),
225 ("urandom", SFlag::S_IFCHR, 1, 9),
226 ];
227 if include_tty {
228 devices.push(("tty", SFlag::S_IFCHR, 5, 0));
229 }
230
231 let mut created_count = 0;
232 let mut failed_count = 0;
233
234 for (name, dev_type, major, minor) in devices {
235 let path = dev_path.join(name);
236 let mode = Mode::from_bits_truncate(0o666);
237 let dev = makedev(major, minor);
238
239 match mknod(&path, dev_type, mode, dev) {
240 Ok(_) => {
241 info!("Created device node: {:?}", path);
242 created_count += 1;
243 }
244 Err(e) => {
245 warn!(
247 "Failed to create device node {:?}: {} (this is normal in rootless mode)",
248 path, e
249 );
250 failed_count += 1;
251 }
252 }
253 }
254
255 if created_count > 0 {
256 info!("Successfully created {} device nodes", created_count);
257 }
258 if failed_count > 0 {
259 info!("Skipped {} device nodes (rootless mode)", failed_count);
260 }
261
262 Ok(())
263}
264
265pub fn bind_mount_rootfs(root: &Path, rootfs_path: &Path) -> Result<()> {
270 info!(
271 "Bind mounting production rootfs {:?} into container {:?}",
272 rootfs_path, root
273 );
274
275 if !rootfs_path.exists() {
276 return Err(NucleusError::FilesystemError(format!(
277 "Rootfs path does not exist: {:?}",
278 rootfs_path
279 )));
280 }
281
282 let subdirs = ["bin", "sbin", "lib", "lib64", "usr", "etc", "nix"];
286
287 for subdir in &subdirs {
288 let source = rootfs_path.join(subdir);
289 if !source.exists() {
290 debug!("Rootfs subdir {} not present, skipping", subdir);
291 continue;
292 }
293
294 let target = root.join(subdir);
295 std::fs::create_dir_all(&target).map_err(|e| {
296 NucleusError::FilesystemError(format!(
297 "Failed to create mount point {:?}: {}",
298 target, e
299 ))
300 })?;
301
302 mount(
303 Some(&source),
304 &target,
305 None::<&str>,
306 MsFlags::MS_BIND | MsFlags::MS_REC,
307 None::<&str>,
308 )
309 .map_err(|e| {
310 NucleusError::FilesystemError(format!(
311 "Failed to bind mount rootfs {:?} -> {:?}: {}",
312 source, target, e
313 ))
314 })?;
315
316 mount(
318 None::<&str>,
319 &target,
320 None::<&str>,
321 MsFlags::MS_REMOUNT
322 | MsFlags::MS_BIND
323 | MsFlags::MS_RDONLY
324 | MsFlags::MS_REC
325 | MsFlags::MS_NOSUID
326 | MsFlags::MS_NODEV,
327 None::<&str>,
328 )
329 .map_err(|e| {
330 NucleusError::FilesystemError(format!(
331 "Failed to remount rootfs {:?} read-only: {}",
332 target, e
333 ))
334 })?;
335
336 info!("Mounted rootfs/{} read-only", subdir);
337 }
338
339 Ok(())
340}
341
342pub fn bind_mount_host_paths(root: &Path, best_effort: bool) -> Result<()> {
347 info!("Bind mounting host paths into container");
348
349 let host_paths = vec![
351 "/bin", "/usr", "/lib", "/lib64", "/nix", ];
353
354 for host_path in host_paths {
355 let host = Path::new(host_path);
356
357 if !host.exists() {
359 debug!("Skipping {} (not present on host)", host_path);
360 continue;
361 }
362
363 let container_path = root.join(host_path.trim_start_matches('/'));
364
365 if let Err(e) = std::fs::create_dir_all(&container_path) {
367 if best_effort {
368 warn!("Failed to create mount point {:?}: {}", container_path, e);
369 continue;
370 }
371 return Err(NucleusError::FilesystemError(format!(
372 "Failed to create mount point {:?}: {}",
373 container_path, e
374 )));
375 }
376
377 match mount(
381 Some(host),
382 &container_path,
383 None::<&str>,
384 MsFlags::MS_BIND | MsFlags::MS_REC,
385 None::<&str>,
386 ) {
387 Ok(_) => {
388 mount(
390 None::<&str>,
391 &container_path,
392 None::<&str>,
393 MsFlags::MS_REMOUNT
394 | MsFlags::MS_BIND
395 | MsFlags::MS_RDONLY
396 | MsFlags::MS_REC
397 | MsFlags::MS_NOSUID
398 | MsFlags::MS_NODEV,
399 None::<&str>,
400 )
401 .map_err(|e| {
402 NucleusError::FilesystemError(format!(
403 "Failed to remount {} as read-only: {}",
404 host_path, e
405 ))
406 })?;
407 info!(
408 "Bind mounted {} to {:?} (read-only)",
409 host_path, container_path
410 );
411 }
412 Err(e) => {
413 if best_effort {
414 warn!(
415 "Failed to bind mount {}: {} (continuing anyway)",
416 host_path, e
417 );
418 } else {
419 return Err(NucleusError::FilesystemError(format!(
420 "Failed to bind mount {}: {}",
421 host_path, e
422 )));
423 }
424 }
425 }
426 }
427
428 Ok(())
429}
430
431pub fn mount_volumes(root: &Path, volumes: &[crate::container::VolumeMount]) -> Result<()> {
433 use crate::container::VolumeSource;
434
435 if volumes.is_empty() {
436 return Ok(());
437 }
438
439 info!("Mounting {} volume(s) into container", volumes.len());
440
441 for volume in volumes {
442 let dest = resolve_container_destination(root, &volume.dest)?;
443
444 match &volume.source {
445 VolumeSource::Bind { source } => {
446 if !source.exists() {
447 return Err(NucleusError::FilesystemError(format!(
448 "Volume source does not exist: {:?}",
449 source
450 )));
451 }
452
453 if let Some(parent) = dest.parent() {
454 std::fs::create_dir_all(parent).map_err(|e| {
455 NucleusError::FilesystemError(format!(
456 "Failed to create volume mount parent {:?}: {}",
457 parent, e
458 ))
459 })?;
460 }
461
462 let recursive = source.is_dir();
463 if source.is_file() {
464 std::fs::write(&dest, "").map_err(|e| {
465 NucleusError::FilesystemError(format!(
466 "Failed to create volume mount point {:?}: {}",
467 dest, e
468 ))
469 })?;
470 } else {
471 std::fs::create_dir_all(&dest).map_err(|e| {
472 NucleusError::FilesystemError(format!(
473 "Failed to create volume mount dir {:?}: {}",
474 dest, e
475 ))
476 })?;
477 }
478
479 let initial_flags = if recursive {
480 MsFlags::MS_BIND | MsFlags::MS_REC
481 } else {
482 MsFlags::MS_BIND
483 };
484 mount(
485 Some(source.as_path()),
486 &dest,
487 None::<&str>,
488 initial_flags,
489 None::<&str>,
490 )
491 .map_err(|e| {
492 NucleusError::FilesystemError(format!(
493 "Failed to bind mount volume {:?} -> {:?}: {}",
494 source, dest, e
495 ))
496 })?;
497
498 let mut remount_flags =
499 MsFlags::MS_REMOUNT | MsFlags::MS_BIND | MsFlags::MS_NOSUID | MsFlags::MS_NODEV;
500 if recursive {
501 remount_flags |= MsFlags::MS_REC;
502 }
503 if volume.read_only {
504 remount_flags |= MsFlags::MS_RDONLY;
505 }
506
507 mount(
508 None::<&str>,
509 &dest,
510 None::<&str>,
511 remount_flags,
512 None::<&str>,
513 )
514 .map_err(|e| {
515 NucleusError::FilesystemError(format!(
516 "Failed to remount volume {:?} with final flags: {}",
517 dest, e
518 ))
519 })?;
520
521 info!(
522 "Mounted bind volume {:?} -> {:?} ({})",
523 source,
524 volume.dest,
525 if volume.read_only { "ro" } else { "rw" }
526 );
527 }
528 VolumeSource::Tmpfs { size } => {
529 std::fs::create_dir_all(&dest).map_err(|e| {
530 NucleusError::FilesystemError(format!(
531 "Failed to create tmpfs mount dir {:?}: {}",
532 dest, e
533 ))
534 })?;
535
536 let mount_data = size
537 .as_ref()
538 .map(|value| format!("size={},mode=0755", value))
539 .unwrap_or_else(|| "mode=0755".to_string());
540
541 let mut flags = MsFlags::MS_NOSUID | MsFlags::MS_NODEV;
542 if volume.read_only {
543 flags |= MsFlags::MS_RDONLY;
544 }
545 mount(
546 Some("tmpfs"),
547 &dest,
548 Some("tmpfs"),
549 flags,
550 Some(mount_data.as_str()),
551 )
552 .map_err(|e| {
553 NucleusError::FilesystemError(format!(
554 "Failed to mount tmpfs volume at {:?}: {}",
555 dest, e
556 ))
557 })?;
558
559 info!(
560 "Mounted tmpfs volume at {:?}{}{}",
561 volume.dest,
562 size.as_ref()
563 .map(|value| format!(" (size={})", value))
564 .unwrap_or_default(),
565 if volume.read_only { " (ro)" } else { "" }
566 );
567 }
568 }
569 }
570
571 Ok(())
572}
573
574pub fn mount_procfs(
580 proc_path: &Path,
581 best_effort: bool,
582 read_only: bool,
583 hide_pids: bool,
584) -> Result<()> {
585 info!(
586 "Mounting procfs at {:?} (hidepid={})",
587 proc_path,
588 if hide_pids { "2" } else { "0" }
589 );
590
591 let mount_data: Option<&str> = if hide_pids { Some("hidepid=2") } else { None };
592
593 match mount(
594 Some("proc"),
595 proc_path,
596 Some("proc"),
597 MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
598 mount_data,
599 ) {
600 Ok(_) => {
601 if read_only {
602 mount(
603 None::<&str>,
604 proc_path,
605 None::<&str>,
606 MsFlags::MS_REMOUNT
607 | MsFlags::MS_RDONLY
608 | MsFlags::MS_NOSUID
609 | MsFlags::MS_NODEV
610 | MsFlags::MS_NOEXEC,
611 None::<&str>,
612 )
613 .map_err(|e| {
614 NucleusError::FilesystemError(format!(
615 "Failed to remount procfs read-only: {}",
616 e
617 ))
618 })?;
619 info!("Successfully mounted procfs (read-only)");
620 } else {
621 info!("Successfully mounted procfs");
622 }
623 Ok(())
624 }
625 Err(e) => {
626 if best_effort {
627 warn!("Failed to mount procfs: {} (continuing anyway)", e);
628 Ok(())
629 } else {
630 Err(NucleusError::FilesystemError(format!(
631 "Failed to mount procfs: {}",
632 e
633 )))
634 }
635 }
636 }
637}
638
639pub const PROC_NULL_MASKED: &[&str] = &[
643 "kallsyms",
644 "kcore",
645 "sched_debug",
646 "timer_list",
647 "timer_stats",
648 "keys",
649 "latency_stats",
650 "config.gz",
651 "sysrq-trigger",
652 "kpagecount",
653 "kpageflags",
654 "kpagecgroup",
655];
656
657pub const PROC_TMPFS_MASKED: &[&str] = &["acpi", "bus", "irq", "scsi", "sys"];
659
660pub fn mask_proc_paths(proc_path: &Path, production: bool) -> Result<()> {
668 info!("Masking sensitive /proc paths");
669
670 const CRITICAL_PROC_PATHS: &[&str] = &["kcore", "kallsyms", "sysrq-trigger"];
671
672 let dev_null = Path::new("/dev/null");
673
674 for name in PROC_NULL_MASKED {
675 let target = proc_path.join(name);
676 if !target.exists() {
677 continue;
678 }
679 match mount(
680 Some(dev_null),
681 &target,
682 None::<&str>,
683 MsFlags::MS_BIND,
684 None::<&str>,
685 ) {
686 Ok(_) => {
687 if let Err(e) = mount(
690 None::<&str>,
691 &target,
692 None::<&str>,
693 MsFlags::MS_REMOUNT | MsFlags::MS_BIND | MsFlags::MS_RDONLY,
694 None::<&str>,
695 ) {
696 if production && CRITICAL_PROC_PATHS.contains(name) {
697 return Err(NucleusError::FilesystemError(format!(
698 "Failed to remount /proc/{} read-only in production mode: {}",
699 name, e
700 )));
701 }
702 warn!(
703 "Failed to remount /proc/{} read-only: {} (continuing)",
704 name, e
705 );
706 }
707 debug!("Masked /proc/{} (read-only)", name);
708 }
709 Err(e) => {
710 if production && CRITICAL_PROC_PATHS.contains(name) {
711 return Err(NucleusError::FilesystemError(format!(
712 "Failed to mask critical /proc/{} in production mode: {}",
713 name, e
714 )));
715 }
716 warn!("Failed to mask /proc/{}: {} (continuing)", name, e);
717 }
718 }
719 }
720
721 for name in PROC_TMPFS_MASKED {
722 let target = proc_path.join(name);
723 if !target.exists() {
724 continue;
725 }
726 match mount(
727 Some("tmpfs"),
728 &target,
729 Some("tmpfs"),
730 MsFlags::MS_RDONLY | MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
731 Some("size=0"),
732 ) {
733 Ok(_) => debug!("Masked /proc/{}", name),
734 Err(e) => {
735 if production {
736 return Err(NucleusError::FilesystemError(format!(
737 "Failed to mask /proc/{} in production mode: {}",
738 name, e
739 )));
740 }
741 warn!("Failed to mask /proc/{}: {} (continuing)", name, e);
742 }
743 }
744 }
745
746 info!("Finished masking sensitive /proc paths");
747 Ok(())
748}
749
750pub fn switch_root(new_root: &Path, allow_chroot_fallback: bool) -> Result<()> {
755 info!("Switching root to {:?}", new_root);
756
757 match pivot_root_impl(new_root) {
758 Ok(()) => {
759 info!("Successfully switched root using pivot_root");
760 Ok(())
761 }
762 Err(e) => {
763 if allow_chroot_fallback {
764 warn!(
765 "pivot_root failed ({}), falling back to chroot due to explicit \
766 configuration",
767 e
768 );
769 chroot_impl(new_root)
770 } else {
771 Err(NucleusError::PivotRootError(format!(
772 "pivot_root failed: {}. chroot fallback is disabled by default; use \
773 --allow-chroot-fallback to allow weaker isolation",
774 e
775 )))
776 }
777 }
778 }
779}
780
781fn pivot_root_impl(new_root: &Path) -> Result<()> {
787 use nix::unistd::pivot_root;
788
789 let old_root = new_root.join(".old_root");
793 std::fs::create_dir_all(&old_root).map_err(|e| {
794 NucleusError::PivotRootError(format!("Failed to create old_root directory: {}", e))
795 })?;
796
797 pivot_root(new_root, &old_root)
799 .map_err(|e| NucleusError::PivotRootError(format!("pivot_root syscall failed: {}", e)))?;
800
801 std::env::set_current_dir("/")
803 .map_err(|e| NucleusError::PivotRootError(format!("Failed to chdir to /: {}", e)))?;
804
805 nix::mount::umount2("/.old_root", nix::mount::MntFlags::MNT_DETACH)
807 .map_err(|e| NucleusError::PivotRootError(format!("Failed to unmount old root: {}", e)))?;
808
809 let _ = std::fs::remove_dir("/.old_root");
811
812 Ok(())
813}
814
815fn chroot_impl(new_root: &Path) -> Result<()> {
819 chroot(new_root)
820 .map_err(|e| NucleusError::PivotRootError(format!("chroot syscall failed: {}", e)))?;
821
822 std::env::set_current_dir("/")
824 .map_err(|e| NucleusError::PivotRootError(format!("Failed to chdir to /: {}", e)))?;
825
826 info!("Successfully switched root using chroot");
827
828 Ok(())
829}
830
831pub fn mount_secrets(root: &Path, secrets: &[crate::container::SecretMount]) -> Result<()> {
836 if secrets.is_empty() {
837 return Ok(());
838 }
839
840 info!("Mounting {} secret(s) into container", secrets.len());
841
842 for secret in secrets {
843 if !secret.source.exists() {
844 return Err(NucleusError::FilesystemError(format!(
845 "Secret source does not exist: {:?}",
846 secret.source
847 )));
848 }
849
850 let dest = resolve_container_destination(root, &secret.dest)?;
852
853 if let Some(parent) = dest.parent() {
855 std::fs::create_dir_all(parent).map_err(|e| {
856 NucleusError::FilesystemError(format!(
857 "Failed to create secret mount parent {:?}: {}",
858 parent, e
859 ))
860 })?;
861 }
862
863 if secret.source.is_file() {
865 std::fs::write(&dest, "").map_err(|e| {
866 NucleusError::FilesystemError(format!(
867 "Failed to create secret mount point {:?}: {}",
868 dest, e
869 ))
870 })?;
871 } else {
872 std::fs::create_dir_all(&dest).map_err(|e| {
873 NucleusError::FilesystemError(format!(
874 "Failed to create secret mount dir {:?}: {}",
875 dest, e
876 ))
877 })?;
878 }
879
880 mount(
882 Some(secret.source.as_path()),
883 &dest,
884 None::<&str>,
885 MsFlags::MS_BIND,
886 None::<&str>,
887 )
888 .map_err(|e| {
889 NucleusError::FilesystemError(format!(
890 "Failed to bind mount secret {:?}: {}",
891 secret.source, e
892 ))
893 })?;
894
895 mount(
896 None::<&str>,
897 &dest,
898 None::<&str>,
899 MsFlags::MS_REMOUNT
900 | MsFlags::MS_BIND
901 | MsFlags::MS_RDONLY
902 | MsFlags::MS_NOSUID
903 | MsFlags::MS_NODEV
904 | MsFlags::MS_NOEXEC,
905 None::<&str>,
906 )
907 .map_err(|e| {
908 NucleusError::FilesystemError(format!(
909 "Failed to remount secret {:?} read-only: {}",
910 dest, e
911 ))
912 })?;
913
914 if secret.source.is_file() {
916 use std::os::unix::fs::PermissionsExt;
917 let perms = std::fs::Permissions::from_mode(secret.mode);
918 if let Err(e) = std::fs::set_permissions(&dest, perms) {
919 warn!(
920 "Failed to set mode {:04o} on secret {:?}: {} (bind mount may override)",
921 secret.mode, dest, e
922 );
923 }
924 }
925
926 debug!(
927 "Mounted secret {:?} -> {:?} (mode {:04o})",
928 secret.source, secret.dest, secret.mode
929 );
930 }
931
932 Ok(())
933}
934
935pub fn mount_secrets_inmemory(
941 root: &Path,
942 secrets: &[crate::container::SecretMount],
943 identity: &crate::container::ProcessIdentity,
944) -> Result<()> {
945 if secrets.is_empty() {
946 return Ok(());
947 }
948
949 info!("Mounting {} secret(s) on in-memory tmpfs", secrets.len());
950
951 let secrets_dir = root.join("run/secrets");
952 std::fs::create_dir_all(&secrets_dir).map_err(|e| {
953 NucleusError::FilesystemError(format!(
954 "Failed to create secrets dir {:?}: {}",
955 secrets_dir, e
956 ))
957 })?;
958
959 if let Err(e) = mount(
961 Some("tmpfs"),
962 &secrets_dir,
963 Some("tmpfs"),
964 MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
965 Some("size=16m,mode=0700"),
966 ) {
967 let _ = std::fs::remove_dir_all(&secrets_dir);
968 return Err(NucleusError::FilesystemError(format!(
969 "Failed to mount secrets tmpfs at {:?}: {}",
970 secrets_dir, e
971 )));
972 }
973
974 if !identity.is_root() {
975 nix::unistd::chown(
976 &secrets_dir,
977 Some(nix::unistd::Uid::from_raw(identity.uid)),
978 Some(nix::unistd::Gid::from_raw(identity.gid)),
979 )
980 .map_err(|e| {
981 let _ = nix::mount::umount2(&secrets_dir, nix::mount::MntFlags::MNT_DETACH);
982 let _ = std::fs::remove_dir_all(&secrets_dir);
983 NucleusError::FilesystemError(format!(
984 "Failed to set /run/secrets owner to {}:{}: {}",
985 identity.uid, identity.gid, e
986 ))
987 })?;
988 }
989
990 let result = mount_secrets_inmemory_inner(&secrets_dir, root, secrets, identity);
992 if let Err(ref e) = result {
993 let _ = nix::mount::umount2(&secrets_dir, nix::mount::MntFlags::MNT_DETACH);
994 let _ = std::fs::remove_dir_all(&secrets_dir);
995 return Err(NucleusError::FilesystemError(format!(
996 "Secret mount failed (rolled back): {}",
997 e
998 )));
999 }
1000
1001 info!("All secrets mounted on in-memory tmpfs");
1002 Ok(())
1003}
1004
1005fn mount_secrets_inmemory_inner(
1006 secrets_dir: &Path,
1007 root: &Path,
1008 secrets: &[crate::container::SecretMount],
1009 identity: &crate::container::ProcessIdentity,
1010) -> Result<()> {
1011 for secret in secrets {
1012 if !secret.source.exists() {
1013 return Err(NucleusError::FilesystemError(format!(
1014 "Secret source does not exist: {:?}",
1015 secret.source
1016 )));
1017 }
1018
1019 let mut content = std::fs::read(&secret.source).map_err(|e| {
1021 NucleusError::FilesystemError(format!(
1022 "Failed to read secret {:?}: {}",
1023 secret.source, e
1024 ))
1025 })?;
1026
1027 let dest = resolve_container_destination(secrets_dir, &secret.dest)?;
1029
1030 if let Some(parent) = dest.parent() {
1032 std::fs::create_dir_all(parent).map_err(|e| {
1033 NucleusError::FilesystemError(format!(
1034 "Failed to create secret parent dir {:?}: {}",
1035 parent, e
1036 ))
1037 })?;
1038 }
1039
1040 std::fs::write(&dest, &content).map_err(|e| {
1042 NucleusError::FilesystemError(format!("Failed to write secret to {:?}: {}", dest, e))
1043 })?;
1044
1045 {
1047 use std::os::unix::fs::PermissionsExt;
1048 let perms = std::fs::Permissions::from_mode(secret.mode);
1049 std::fs::set_permissions(&dest, perms).map_err(|e| {
1050 NucleusError::FilesystemError(format!(
1051 "Failed to set permissions on secret {:?}: {}",
1052 dest, e
1053 ))
1054 })?;
1055 }
1056
1057 if !identity.is_root() {
1058 nix::unistd::chown(
1059 &dest,
1060 Some(nix::unistd::Uid::from_raw(identity.uid)),
1061 Some(nix::unistd::Gid::from_raw(identity.gid)),
1062 )
1063 .map_err(|e| {
1064 NucleusError::FilesystemError(format!(
1065 "Failed to set permissions owner on secret {:?} to {}:{}: {}",
1066 dest, identity.uid, identity.gid, e
1067 ))
1068 })?;
1069 }
1070
1071 zeroize::Zeroize::zeroize(&mut content);
1073 drop(content);
1074
1075 let container_dest = resolve_container_destination(root, &secret.dest)?;
1077 if container_dest != dest {
1078 if let Some(parent) = container_dest.parent() {
1079 std::fs::create_dir_all(parent).map_err(|e| {
1080 NucleusError::FilesystemError(format!(
1081 "Failed to create secret mount parent {:?}: {}",
1082 parent, e
1083 ))
1084 })?;
1085 }
1086
1087 if secret.source.is_file() {
1088 std::fs::write(&container_dest, "").map_err(|e| {
1089 NucleusError::FilesystemError(format!(
1090 "Failed to create secret mount point {:?}: {}",
1091 container_dest, e
1092 ))
1093 })?;
1094 }
1095
1096 mount(
1097 Some(dest.as_path()),
1098 &container_dest,
1099 None::<&str>,
1100 MsFlags::MS_BIND,
1101 None::<&str>,
1102 )
1103 .map_err(|e| {
1104 NucleusError::FilesystemError(format!(
1105 "Failed to bind mount secret {:?} -> {:?}: {}",
1106 dest, container_dest, e
1107 ))
1108 })?;
1109
1110 mount(
1111 None::<&str>,
1112 &container_dest,
1113 None::<&str>,
1114 MsFlags::MS_REMOUNT
1115 | MsFlags::MS_BIND
1116 | MsFlags::MS_RDONLY
1117 | MsFlags::MS_NOSUID
1118 | MsFlags::MS_NODEV
1119 | MsFlags::MS_NOEXEC,
1120 None::<&str>,
1121 )
1122 .map_err(|e| {
1123 NucleusError::FilesystemError(format!(
1124 "Failed to remount secret {:?} read-only: {}",
1125 container_dest, e
1126 ))
1127 })?;
1128 }
1129
1130 debug!(
1131 "Secret {:?} -> {:?} (in-memory tmpfs, mode {:04o})",
1132 secret.source, secret.dest, secret.mode
1133 );
1134 }
1135
1136 Ok(())
1137}
1138
1139#[cfg(test)]
1140mod tests {
1141 use super::*;
1142
1143 #[test]
1144 fn test_proc_mask_includes_sysrq_trigger() {
1145 assert!(
1146 PROC_NULL_MASKED.contains(&"sysrq-trigger"),
1147 "/proc/sysrq-trigger must be masked to prevent host DoS"
1148 );
1149 }
1150
1151 #[test]
1152 fn test_proc_mask_includes_timer_stats() {
1153 assert!(
1154 PROC_NULL_MASKED.contains(&"timer_stats"),
1155 "/proc/timer_stats must be masked to prevent kernel info leakage"
1156 );
1157 }
1158
1159 #[test]
1160 fn test_proc_mask_includes_kpage_files() {
1161 for path in &["kpagecount", "kpageflags", "kpagecgroup"] {
1162 assert!(
1163 PROC_NULL_MASKED.contains(path),
1164 "/proc/{} must be masked to prevent host memory layout leakage",
1165 path
1166 );
1167 }
1168 }
1169
1170 #[test]
1171 fn test_proc_mask_includes_oci_standard_paths() {
1172 for path in &["kallsyms", "kcore", "sched_debug", "keys", "config.gz"] {
1174 assert!(
1175 PROC_NULL_MASKED.contains(path),
1176 "/proc/{} must be in null-masked list (OCI spec)",
1177 path
1178 );
1179 }
1180 for path in &["acpi", "bus", "scsi", "sys"] {
1181 assert!(
1182 PROC_TMPFS_MASKED.contains(path),
1183 "/proc/{} must be in tmpfs-masked list (OCI spec)",
1184 path
1185 );
1186 }
1187 }
1188}