1use crate::error::{NucleusError, Result};
2use nix::fcntl::{open, OFlag};
3use nix::mount::{mount, MsFlags};
4use nix::sys::stat::{fstat, makedev, mknod, Mode, SFlag};
5use nix::unistd::chroot;
6use std::fs::OpenOptions;
7use std::io::Read;
8use std::os::fd::AsRawFd;
9use std::os::unix::fs::OpenOptionsExt;
10use std::path::{Component, Path, PathBuf};
11use tracing::{debug, info, warn};
12
13struct ExpectedMount {
15 path: &'static str,
16 required_flags: &'static [&'static str],
17 critical: bool,
20}
21
22const PRODUCTION_MOUNT_EXPECTATIONS: &[ExpectedMount] = &[
24 ExpectedMount {
25 path: "/bin",
26 required_flags: &["ro", "nosuid", "nodev"],
27 critical: true,
28 },
29 ExpectedMount {
30 path: "/usr",
31 required_flags: &["ro", "nosuid", "nodev"],
32 critical: true,
33 },
34 ExpectedMount {
35 path: "/lib",
36 required_flags: &["ro", "nosuid", "nodev"],
37 critical: false, },
39 ExpectedMount {
40 path: "/lib64",
41 required_flags: &["ro", "nosuid", "nodev"],
42 critical: false, },
44 ExpectedMount {
45 path: "/etc",
46 required_flags: &["ro", "nosuid", "nodev"],
47 critical: true,
48 },
49 ExpectedMount {
50 path: "/nix",
51 required_flags: &["ro", "nosuid", "nodev"],
52 critical: false, },
54 ExpectedMount {
55 path: "/sbin",
56 required_flags: &["ro", "nosuid", "nodev"],
57 critical: false, },
59 ExpectedMount {
60 path: "/proc",
61 required_flags: &["nosuid", "nodev", "noexec"],
62 critical: true,
63 },
64 ExpectedMount {
65 path: "/run/secrets",
66 required_flags: &["nosuid", "nodev", "noexec"],
67 critical: false, },
69];
70
71pub fn normalize_container_destination(dest: &Path) -> Result<PathBuf> {
76 if !dest.is_absolute() {
77 return Err(NucleusError::ConfigError(format!(
78 "Container destination must be absolute: {:?}",
79 dest
80 )));
81 }
82
83 let mut normalized = PathBuf::from("/");
84 let mut saw_component = false;
85
86 for component in dest.components() {
87 match component {
88 Component::RootDir => {}
89 Component::CurDir => {}
90 Component::Normal(part) => {
91 normalized.push(part);
92 saw_component = true;
93 }
94 Component::ParentDir => {
95 return Err(NucleusError::ConfigError(format!(
96 "Container destination must not contain parent traversal: {:?}",
97 dest
98 )));
99 }
100 Component::Prefix(_) => {
101 return Err(NucleusError::ConfigError(format!(
102 "Unsupported container destination prefix: {:?}",
103 dest
104 )));
105 }
106 }
107 }
108
109 if !saw_component {
110 return Err(NucleusError::ConfigError(format!(
111 "Container destination must not be the root directory: {:?}",
112 dest
113 )));
114 }
115
116 Ok(normalized)
117}
118
119pub fn resolve_container_destination(root: &Path, dest: &Path) -> Result<PathBuf> {
121 let normalized = normalize_container_destination(dest)?;
122 let relative = normalized.strip_prefix("/").map_err(|_| {
123 NucleusError::ConfigError(format!(
124 "Container destination is not absolute after normalization: {:?}",
125 normalized
126 ))
127 })?;
128 Ok(root.join(relative))
129}
130
131fn validate_rootfs_path_under_store(rootfs_path: &Path, store_root: &Path) -> Result<PathBuf> {
132 if !rootfs_path.is_absolute() {
133 return Err(NucleusError::ConfigError(format!(
134 "Production rootfs path must be absolute: {}",
135 rootfs_path.display()
136 )));
137 }
138
139 for component in rootfs_path.components() {
140 match component {
141 Component::ParentDir => {
142 return Err(NucleusError::ConfigError(format!(
143 "Production rootfs path must not contain parent traversal: {}",
144 rootfs_path.display()
145 )));
146 }
147 Component::Prefix(_) => {
148 return Err(NucleusError::ConfigError(format!(
149 "Unsupported production rootfs path prefix: {}",
150 rootfs_path.display()
151 )));
152 }
153 Component::RootDir | Component::CurDir | Component::Normal(_) => {}
154 }
155 }
156
157 let canonical = std::fs::canonicalize(rootfs_path).map_err(|e| {
158 NucleusError::ConfigError(format!(
159 "Failed to canonicalize production rootfs path '{}': {}",
160 rootfs_path.display(),
161 e
162 ))
163 })?;
164
165 if !canonical.starts_with(store_root) {
166 return Err(NucleusError::ConfigError(format!(
167 "Production mode requires rootfs path to resolve under {}: {} -> {}",
168 store_root.display(),
169 rootfs_path.display(),
170 canonical.display()
171 )));
172 }
173
174 if !canonical.is_dir() {
175 return Err(NucleusError::ConfigError(format!(
176 "Production rootfs path must resolve to a directory: {}",
177 canonical.display()
178 )));
179 }
180
181 Ok(canonical)
182}
183
184pub fn validate_production_rootfs_path(rootfs_path: &Path) -> Result<PathBuf> {
189 validate_rootfs_path_under_store(rootfs_path, Path::new("/nix/store"))
190}
191
192pub(crate) fn read_regular_file_nofollow(path: &Path) -> Result<Vec<u8>> {
193 let mut file = OpenOptions::new()
194 .read(true)
195 .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
196 .open(path)
197 .map_err(|e| {
198 NucleusError::FilesystemError(format!(
199 "Failed to open file {:?} with O_NOFOLLOW: {}",
200 path, e
201 ))
202 })?;
203
204 let metadata = file.metadata().map_err(|e| {
205 NucleusError::FilesystemError(format!("Failed to stat file {:?}: {}", path, e))
206 })?;
207 if !metadata.is_file() {
208 return Err(NucleusError::FilesystemError(format!(
209 "Expected regular file for {:?}, found non-file source",
210 path
211 )));
212 }
213
214 let mut content = Vec::new();
215 file.read_to_end(&mut content).map_err(|e| {
216 NucleusError::FilesystemError(format!("Failed to read file {:?}: {}", path, e))
217 })?;
218 Ok(content)
219}
220
221fn decode_mountinfo_field(field: &str) -> String {
222 let mut decoded = String::with_capacity(field.len());
223 let mut chars = field.chars().peekable();
224
225 while let Some(ch) = chars.next() {
226 if ch == '\\' {
227 let code: String = chars.by_ref().take(3).collect();
228 match code.as_str() {
229 "040" => decoded.push(' '),
230 "011" => decoded.push('\t'),
231 "012" => decoded.push('\n'),
232 "134" => decoded.push('\\'),
233 _ => {
234 decoded.push('\\');
235 decoded.push_str(&code);
236 }
237 }
238 } else {
239 decoded.push(ch);
240 }
241 }
242
243 decoded
244}
245
246fn parse_mountinfo_line(line: &str) -> Option<(String, std::collections::HashSet<String>)> {
247 let (left, _) = line.split_once(" - ")?;
248 let fields: Vec<&str> = left.split_whitespace().collect();
249 if fields.len() < 6 {
250 return None;
251 }
252
253 let mount_point = decode_mountinfo_field(fields[4]);
254 let options = fields[5]
255 .split(',')
256 .map(str::trim)
257 .filter(|opt| !opt.is_empty())
258 .map(str::to_string)
259 .collect();
260
261 Some((mount_point, options))
262}
263
264pub fn audit_mounts(production_mode: bool) -> Result<()> {
270 let mounts_content = std::fs::read_to_string("/proc/self/mountinfo").map_err(|e| {
271 NucleusError::FilesystemError(format!("Failed to read /proc/self/mountinfo: {}", e))
272 })?;
273 let mount_table: std::collections::HashMap<String, std::collections::HashSet<String>> =
274 mounts_content
275 .lines()
276 .filter_map(parse_mountinfo_line)
277 .collect();
278
279 let mut violations = Vec::new();
280
281 for expectation in PRODUCTION_MOUNT_EXPECTATIONS {
282 if let Some(options) = mount_table.get(expectation.path) {
283 for &flag in expectation.required_flags {
284 if !options.contains(flag) {
285 let rendered = options
286 .iter()
287 .map(String::as_str)
288 .collect::<Vec<_>>()
289 .join(",");
290 violations.push(format!(
291 "Mount {} missing required flag '{}' (has: {})",
292 expectation.path, flag, rendered
293 ));
294 }
295 }
296 } else if expectation.critical && production_mode {
297 violations.push(format!(
298 "Critical mount {} is missing from the mount namespace",
299 expectation.path
300 ));
301 }
302 }
303
304 if violations.is_empty() {
305 info!("Mount audit passed: all expected flags verified");
306 Ok(())
307 } else if production_mode {
308 Err(NucleusError::FilesystemError(format!(
309 "Mount audit failed in production mode:\n {}",
310 violations.join("\n ")
311 )))
312 } else {
313 for v in &violations {
314 warn!("Mount audit: {}", v);
315 }
316 Ok(())
317 }
318}
319
320pub fn create_minimal_fs(root: &Path) -> Result<()> {
322 info!("Creating minimal filesystem structure at {:?}", root);
323
324 let dirs = vec![
326 "dev",
327 "proc",
328 "sys",
329 "tmp",
330 "bin",
331 "sbin",
332 "usr",
333 "lib",
334 "lib64",
335 "etc",
336 "nix",
337 "nix/store",
338 "run",
339 "context",
340 ];
341
342 for dir in dirs {
343 let path = root.join(dir);
344 std::fs::create_dir_all(&path).map_err(|e| {
345 NucleusError::FilesystemError(format!("Failed to create directory {:?}: {}", path, e))
346 })?;
347 }
348
349 info!("Created minimal filesystem structure");
350
351 Ok(())
352}
353
354pub fn create_dev_nodes(dev_path: &Path, include_tty: bool) -> Result<()> {
358 info!("Creating device nodes at {:?}", dev_path);
359
360 let mut devices = vec![
362 ("null", SFlag::S_IFCHR, 1, 3),
363 ("zero", SFlag::S_IFCHR, 1, 5),
364 ("full", SFlag::S_IFCHR, 1, 7),
365 ("random", SFlag::S_IFCHR, 1, 8),
366 ("urandom", SFlag::S_IFCHR, 1, 9),
367 ];
368 if include_tty {
369 devices.push(("tty", SFlag::S_IFCHR, 5, 0));
370 }
371
372 let mut created_count = 0;
373 let mut failed_count = 0;
374
375 for (name, dev_type, major, minor) in devices {
376 let path = dev_path.join(name);
377 let mode = Mode::from_bits_truncate(0o660);
378 let dev = makedev(major, minor);
379
380 match mknod(&path, dev_type, mode, dev) {
381 Ok(_) => {
382 info!("Created device node: {:?}", path);
383 created_count += 1;
384 }
385 Err(e) => {
386 warn!(
388 "Failed to create device node {:?}: {} (this is normal in rootless mode)",
389 path, e
390 );
391 failed_count += 1;
392 }
393 }
394 }
395
396 if created_count > 0 {
397 info!("Successfully created {} device nodes", created_count);
398 }
399 if failed_count > 0 {
400 info!("Skipped {} device nodes (rootless mode)", failed_count);
401 }
402
403 Ok(())
404}
405
406pub fn bind_mount_rootfs(root: &Path, rootfs_path: &Path) -> Result<()> {
411 info!(
412 "Bind mounting production rootfs {:?} into container {:?}",
413 rootfs_path, root
414 );
415
416 if std::fs::symlink_metadata(rootfs_path).is_err() {
417 return Err(NucleusError::FilesystemError(format!(
418 "Rootfs path does not exist: {:?}",
419 rootfs_path
420 )));
421 }
422
423 let subdirs = ["bin", "sbin", "lib", "lib64", "usr", "etc", "nix"];
427
428 for subdir in &subdirs {
429 let source = rootfs_path.join(subdir);
430 if !source.exists() {
431 debug!("Rootfs subdir {} not present, skipping", subdir);
432 continue;
433 }
434
435 let target = root.join(subdir);
436 std::fs::create_dir_all(&target).map_err(|e| {
437 NucleusError::FilesystemError(format!(
438 "Failed to create mount point {:?}: {}",
439 target, e
440 ))
441 })?;
442
443 mount(
444 Some(&source),
445 &target,
446 None::<&str>,
447 MsFlags::MS_BIND | MsFlags::MS_REC,
448 None::<&str>,
449 )
450 .map_err(|e| {
451 NucleusError::FilesystemError(format!(
452 "Failed to bind mount rootfs {:?} -> {:?}: {}",
453 source, target, e
454 ))
455 })?;
456
457 mount(
459 None::<&str>,
460 &target,
461 None::<&str>,
462 MsFlags::MS_REMOUNT
463 | MsFlags::MS_BIND
464 | MsFlags::MS_RDONLY
465 | MsFlags::MS_REC
466 | MsFlags::MS_NOSUID
467 | MsFlags::MS_NODEV,
468 None::<&str>,
469 )
470 .map_err(|e| {
471 NucleusError::FilesystemError(format!(
472 "Failed to remount rootfs {:?} read-only: {}",
473 target, e
474 ))
475 })?;
476
477 info!("Mounted rootfs/{} read-only", subdir);
478 }
479
480 Ok(())
481}
482
483pub fn bind_mount_host_paths(root: &Path, best_effort: bool) -> Result<()> {
488 info!("Bind mounting host paths into container");
489
490 let host_paths = vec![
492 "/bin", "/usr", "/lib", "/lib64", "/nix", ];
494
495 for host_path in host_paths {
496 let host = Path::new(host_path);
497
498 if !host.exists() {
500 debug!("Skipping {} (not present on host)", host_path);
501 continue;
502 }
503
504 let container_path = root.join(host_path.trim_start_matches('/'));
505
506 if let Err(e) = std::fs::create_dir_all(&container_path) {
508 if best_effort {
509 warn!("Failed to create mount point {:?}: {}", container_path, e);
510 continue;
511 }
512 return Err(NucleusError::FilesystemError(format!(
513 "Failed to create mount point {:?}: {}",
514 container_path, e
515 )));
516 }
517
518 match mount(
522 Some(host),
523 &container_path,
524 None::<&str>,
525 MsFlags::MS_BIND | MsFlags::MS_REC,
526 None::<&str>,
527 ) {
528 Ok(_) => {
529 mount(
531 None::<&str>,
532 &container_path,
533 None::<&str>,
534 MsFlags::MS_REMOUNT
535 | MsFlags::MS_BIND
536 | MsFlags::MS_RDONLY
537 | MsFlags::MS_REC
538 | MsFlags::MS_NOSUID
539 | MsFlags::MS_NODEV,
540 None::<&str>,
541 )
542 .map_err(|e| {
543 NucleusError::FilesystemError(format!(
544 "Failed to remount {} as read-only: {}",
545 host_path, e
546 ))
547 })?;
548 info!(
549 "Bind mounted {} to {:?} (read-only)",
550 host_path, container_path
551 );
552 }
553 Err(e) => {
554 if best_effort {
555 warn!(
556 "Failed to bind mount {}: {} (continuing anyway)",
557 host_path, e
558 );
559 } else {
560 return Err(NucleusError::FilesystemError(format!(
561 "Failed to bind mount {}: {}",
562 host_path, e
563 )));
564 }
565 }
566 }
567 }
568
569 Ok(())
570}
571
572const DENIED_BIND_MOUNT_SOURCES_EXACT: &[&str] = &["/"];
574
575const DENIED_BIND_MOUNT_SOURCE_PREFIXES: &[&str] = &[
577 "/boot", "/dev", "/etc", "/home", "/proc", "/root", "/run", "/sys", "/var/log", "/var/run",
578];
579
580const RESERVED_VOLUME_DESTINATION_PREFIXES: &[&str] = &[
586 "/bin",
587 "/boot",
588 "/dev",
589 "/etc",
590 "/lib",
591 "/lib64",
592 "/nix",
593 "/proc",
594 "/run/secrets",
595 "/sbin",
596 "/sys",
597 "/usr",
598];
599
600fn normalize_bind_mount_source_for_policy(source: &Path) -> Result<PathBuf> {
601 if !source.is_absolute() {
602 return Err(NucleusError::ConfigError(format!(
603 "Bind mount source must be absolute: {:?}",
604 source
605 )));
606 }
607
608 let mut normalized = PathBuf::from("/");
609
610 for component in source.components() {
611 match component {
612 Component::RootDir => {}
613 Component::CurDir => {}
614 Component::Normal(part) => normalized.push(part),
615 Component::ParentDir => {
616 normalized.pop();
617 if normalized.as_os_str().is_empty() {
618 normalized.push("/");
619 }
620 }
621 Component::Prefix(_) => {
622 return Err(NucleusError::ConfigError(format!(
623 "Unsupported bind mount source prefix: {:?}",
624 source
625 )));
626 }
627 }
628 }
629
630 Ok(normalized)
631}
632
633fn reject_denied_bind_mount_source(source: &Path) -> Result<()> {
634 for denied in DENIED_BIND_MOUNT_SOURCES_EXACT {
635 if source == Path::new(denied) {
636 return Err(NucleusError::ConfigError(format!(
637 "Bind mount source '{}' is a sensitive host path and cannot be mounted into containers",
638 source.display()
639 )));
640 }
641 }
642
643 for denied in DENIED_BIND_MOUNT_SOURCE_PREFIXES {
644 let denied_path = Path::new(denied);
645 if source == denied_path || source.starts_with(denied_path) {
646 return Err(NucleusError::ConfigError(format!(
647 "Bind mount source '{}' is under sensitive host path '{}' and cannot be mounted into containers",
648 source.display(),
649 denied
650 )));
651 }
652 }
653
654 Ok(())
655}
656
657pub fn validate_bind_mount_source_policy(source: &Path) -> Result<PathBuf> {
662 let normalized = normalize_bind_mount_source_for_policy(source)?;
663 reject_denied_bind_mount_source(&normalized)?;
664 Ok(normalized)
665}
666
667pub fn validate_bind_mount_source(source: &Path) -> Result<()> {
669 validate_bind_mount_source_policy(source)?;
670
671 let canonical = std::fs::canonicalize(source).map_err(|e| {
672 NucleusError::ConfigError(format!(
673 "Failed to resolve bind mount source {:?}: {}",
674 source, e
675 ))
676 })?;
677 reject_denied_bind_mount_source(&canonical)
678}
679
680fn reject_reserved_volume_destination(dest: &Path) -> Result<()> {
681 for reserved in RESERVED_VOLUME_DESTINATION_PREFIXES {
682 let reserved_path = Path::new(reserved);
683 if dest == reserved_path || dest.starts_with(reserved_path) {
684 return Err(NucleusError::ConfigError(format!(
685 "Volume destination '{}' is reserved for trusted container/runtime paths and cannot be overlaid",
686 dest.display()
687 )));
688 }
689 }
690
691 Ok(())
692}
693
694pub fn normalize_volume_destination(dest: &Path) -> Result<PathBuf> {
696 let normalized = normalize_container_destination(dest)?;
697 reject_reserved_volume_destination(&normalized)?;
698 Ok(normalized)
699}
700
701pub fn resolve_volume_destination(root: &Path, dest: &Path) -> Result<PathBuf> {
703 let normalized = normalize_volume_destination(dest)?;
704 let relative = normalized.strip_prefix("/").map_err(|_| {
705 NucleusError::ConfigError(format!(
706 "Volume destination is not absolute after normalization: {:?}",
707 normalized
708 ))
709 })?;
710 Ok(root.join(relative))
711}
712
713pub fn mount_volumes(root: &Path, volumes: &[crate::container::VolumeMount]) -> Result<()> {
715 use crate::container::VolumeSource;
716
717 if volumes.is_empty() {
718 return Ok(());
719 }
720
721 info!("Mounting {} volume(s) into container", volumes.len());
722
723 for volume in volumes {
724 let dest = resolve_volume_destination(root, &volume.dest)?;
725
726 match &volume.source {
727 VolumeSource::Bind { source } => {
728 validate_bind_mount_source(source)?;
730
731 if std::fs::symlink_metadata(source).is_err() {
734 return Err(NucleusError::FilesystemError(format!(
735 "Volume source does not exist: {:?}",
736 source
737 )));
738 }
739
740 if let Some(parent) = dest.parent() {
741 std::fs::create_dir_all(parent).map_err(|e| {
742 NucleusError::FilesystemError(format!(
743 "Failed to create volume mount parent {:?}: {}",
744 parent, e
745 ))
746 })?;
747 }
748
749 let recursive = source.is_dir();
750 if source.is_file() {
751 std::fs::write(&dest, "").map_err(|e| {
752 NucleusError::FilesystemError(format!(
753 "Failed to create volume mount point {:?}: {}",
754 dest, e
755 ))
756 })?;
757 } else {
758 std::fs::create_dir_all(&dest).map_err(|e| {
759 NucleusError::FilesystemError(format!(
760 "Failed to create volume mount dir {:?}: {}",
761 dest, e
762 ))
763 })?;
764 }
765
766 let initial_flags = if recursive {
767 MsFlags::MS_BIND | MsFlags::MS_REC
768 } else {
769 MsFlags::MS_BIND
770 };
771 mount(
772 Some(source.as_path()),
773 &dest,
774 None::<&str>,
775 initial_flags,
776 None::<&str>,
777 )
778 .map_err(|e| {
779 NucleusError::FilesystemError(format!(
780 "Failed to bind mount volume {:?} -> {:?}: {}",
781 source, dest, e
782 ))
783 })?;
784
785 let mut remount_flags =
786 MsFlags::MS_REMOUNT | MsFlags::MS_BIND | MsFlags::MS_NOSUID | MsFlags::MS_NODEV;
787 if recursive {
788 remount_flags |= MsFlags::MS_REC;
789 }
790 if volume.read_only {
791 remount_flags |= MsFlags::MS_RDONLY;
792 }
793
794 mount(
795 None::<&str>,
796 &dest,
797 None::<&str>,
798 remount_flags,
799 None::<&str>,
800 )
801 .map_err(|e| {
802 NucleusError::FilesystemError(format!(
803 "Failed to remount volume {:?} with final flags: {}",
804 dest, e
805 ))
806 })?;
807
808 info!(
809 "Mounted bind volume {:?} -> {:?} ({})",
810 source,
811 volume.dest,
812 if volume.read_only { "ro" } else { "rw" }
813 );
814 }
815 VolumeSource::Tmpfs { size } => {
816 std::fs::create_dir_all(&dest).map_err(|e| {
817 NucleusError::FilesystemError(format!(
818 "Failed to create tmpfs mount dir {:?}: {}",
819 dest, e
820 ))
821 })?;
822
823 if let Some(value) = size.as_ref() {
826 let valid = value
827 .chars()
828 .all(|c| c.is_ascii_digit() || "kKmMgG".contains(c));
829 if !valid || value.is_empty() {
830 return Err(NucleusError::FilesystemError(format!(
831 "Invalid tmpfs size value '{}': only digits with optional K/M/G suffix allowed",
832 value
833 )));
834 }
835 }
836
837 let mount_data = size
840 .as_ref()
841 .map(|value| format!("size={},mode=0700", value))
842 .unwrap_or_else(|| "size=64M,mode=0700".to_string());
843
844 let mut flags = MsFlags::MS_NOSUID | MsFlags::MS_NODEV;
845 if volume.read_only {
846 flags |= MsFlags::MS_RDONLY;
847 }
848 mount(
849 Some("tmpfs"),
850 &dest,
851 Some("tmpfs"),
852 flags,
853 Some(mount_data.as_str()),
854 )
855 .map_err(|e| {
856 NucleusError::FilesystemError(format!(
857 "Failed to mount tmpfs volume at {:?}: {}",
858 dest, e
859 ))
860 })?;
861
862 info!(
863 "Mounted tmpfs volume at {:?}{}{}",
864 volume.dest,
865 size.as_ref()
866 .map(|value| format!(" (size={})", value))
867 .unwrap_or_default(),
868 if volume.read_only { " (ro)" } else { "" }
869 );
870 }
871 }
872 }
873
874 Ok(())
875}
876
877pub fn mount_procfs(
884 proc_path: &Path,
885 best_effort: bool,
886 read_only: bool,
887 hide_pids: bool,
888) -> Result<()> {
889 info!(
890 "Mounting procfs at {:?} (hidepid={})",
891 proc_path,
892 if hide_pids { "2" } else { "0" }
893 );
894
895 let mount_data: Option<&str> = if hide_pids { Some("hidepid=2") } else { None };
896
897 let mounted = match mount(
898 Some("proc"),
899 proc_path,
900 Some("proc"),
901 MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
902 mount_data,
903 ) {
904 Ok(_) => true,
905 Err(e) => handle_procfs_mount_failure(e, best_effort, hide_pids)?,
906 };
907
908 if mounted {
909 if read_only {
910 mount(
911 None::<&str>,
912 proc_path,
913 None::<&str>,
914 MsFlags::MS_REMOUNT
915 | MsFlags::MS_RDONLY
916 | MsFlags::MS_NOSUID
917 | MsFlags::MS_NODEV
918 | MsFlags::MS_NOEXEC,
919 mount_data,
920 )
921 .map_err(|e| {
922 NucleusError::FilesystemError(format!("Failed to remount procfs read-only: {}", e))
923 })?;
924 info!("Successfully mounted procfs (read-only)");
925 } else {
926 info!("Successfully mounted procfs");
927 }
928 }
929
930 Ok(())
931}
932
933fn handle_procfs_mount_failure(
934 e: nix::errno::Errno,
935 best_effort: bool,
936 hide_pids: bool,
937) -> Result<bool> {
938 if hide_pids {
939 return Err(NucleusError::FilesystemError(format!(
940 "Failed to mount procfs with required hidepid=2: {}",
941 e
942 )));
943 }
944
945 if best_effort {
946 warn!("Failed to mount procfs: {} (continuing anyway)", e);
947 Ok(false)
948 } else {
949 Err(NucleusError::FilesystemError(format!(
950 "Failed to mount procfs: {}",
951 e
952 )))
953 }
954}
955
956pub const PROC_NULL_MASKED: &[&str] = &[
960 "kallsyms",
961 "kcore",
962 "sched_debug",
963 "timer_list",
964 "timer_stats",
965 "keys",
966 "latency_stats",
967 "config.gz",
968 "sysrq-trigger",
969 "kpagecount",
970 "kpageflags",
971 "kpagecgroup",
972];
973
974pub const PROC_READONLY_PATHS: &[&str] = &["bus", "fs", "irq", "sys"];
976
977pub const PROC_TMPFS_MASKED: &[&str] = &["acpi", "scsi"];
979
980fn remount_proc_path_readonly(target: &Path) -> Result<()> {
981 mount(
982 Some(target),
983 target,
984 None::<&str>,
985 MsFlags::MS_BIND | MsFlags::MS_REC,
986 None::<&str>,
987 )
988 .map_err(|e| {
989 NucleusError::FilesystemError(format!(
990 "Failed to bind-mount {:?} onto itself for read-only remount: {}",
991 target, e
992 ))
993 })?;
994
995 mount(
996 None::<&str>,
997 target,
998 None::<&str>,
999 MsFlags::MS_REMOUNT
1000 | MsFlags::MS_BIND
1001 | MsFlags::MS_RDONLY
1002 | MsFlags::MS_NOSUID
1003 | MsFlags::MS_NODEV
1004 | MsFlags::MS_NOEXEC,
1005 None::<&str>,
1006 )
1007 .map_err(|e| {
1008 NucleusError::FilesystemError(format!("Failed to remount {:?} read-only: {}", target, e))
1009 })?;
1010
1011 Ok(())
1012}
1013
1014pub fn mask_proc_paths(proc_path: &Path, production: bool) -> Result<()> {
1022 info!("Masking sensitive /proc paths");
1023
1024 const CRITICAL_PROC_PATHS: &[&str] = &["kcore", "kallsyms", "sysrq-trigger"];
1025
1026 for name in PROC_READONLY_PATHS {
1027 let target = proc_path.join(name);
1028 if !target.exists() {
1029 continue;
1030 }
1031 match remount_proc_path_readonly(&target) {
1032 Ok(_) => debug!("Remounted /proc/{} read-only", name),
1033 Err(e) => {
1034 if production {
1035 return Err(NucleusError::FilesystemError(format!(
1036 "Failed to remount /proc/{} read-only in production mode: {}",
1037 name, e
1038 )));
1039 }
1040 warn!(
1041 "Failed to remount /proc/{} read-only: {} (continuing)",
1042 name, e
1043 );
1044 }
1045 }
1046 }
1047
1048 let dev_null = Path::new("/dev/null");
1049
1050 for name in PROC_NULL_MASKED {
1051 let target = proc_path.join(name);
1052 if !target.exists() {
1053 continue;
1054 }
1055 match mount(
1056 Some(dev_null),
1057 &target,
1058 None::<&str>,
1059 MsFlags::MS_BIND,
1060 None::<&str>,
1061 ) {
1062 Ok(_) => {
1063 if let Err(e) = mount(
1066 None::<&str>,
1067 &target,
1068 None::<&str>,
1069 MsFlags::MS_REMOUNT | MsFlags::MS_BIND | MsFlags::MS_RDONLY,
1070 None::<&str>,
1071 ) {
1072 if production && CRITICAL_PROC_PATHS.contains(name) {
1073 return Err(NucleusError::FilesystemError(format!(
1074 "Failed to remount /proc/{} read-only in production mode: {}",
1075 name, e
1076 )));
1077 }
1078 warn!(
1079 "Failed to remount /proc/{} read-only: {} (continuing)",
1080 name, e
1081 );
1082 }
1083 debug!("Masked /proc/{} (read-only)", name);
1084 }
1085 Err(e) => {
1086 if production && CRITICAL_PROC_PATHS.contains(name) {
1087 return Err(NucleusError::FilesystemError(format!(
1088 "Failed to mask critical /proc/{} in production mode: {}",
1089 name, e
1090 )));
1091 }
1092 warn!("Failed to mask /proc/{}: {} (continuing)", name, e);
1093 }
1094 }
1095 }
1096
1097 for name in PROC_TMPFS_MASKED {
1098 let target = proc_path.join(name);
1099 if !target.exists() {
1100 continue;
1101 }
1102 match mount(
1103 Some("tmpfs"),
1104 &target,
1105 Some("tmpfs"),
1106 MsFlags::MS_RDONLY | MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
1107 Some("size=0"),
1108 ) {
1109 Ok(_) => debug!("Masked /proc/{}", name),
1110 Err(e) => {
1111 if production {
1112 return Err(NucleusError::FilesystemError(format!(
1113 "Failed to mask /proc/{} in production mode: {}",
1114 name, e
1115 )));
1116 }
1117 warn!("Failed to mask /proc/{}: {} (continuing)", name, e);
1118 }
1119 }
1120 }
1121
1122 info!("Finished masking sensitive /proc paths");
1123 Ok(())
1124}
1125
1126pub fn switch_root(new_root: &Path, allow_chroot_fallback: bool) -> Result<()> {
1131 info!("Switching root to {:?}", new_root);
1132
1133 match pivot_root_impl(new_root) {
1134 Ok(()) => {
1135 info!("Successfully switched root using pivot_root");
1136 Ok(())
1137 }
1138 Err(e) => {
1139 if allow_chroot_fallback {
1140 warn!(
1141 "pivot_root failed ({}), falling back to chroot due to explicit \
1142 configuration",
1143 e
1144 );
1145 chroot_impl(new_root)
1146 } else {
1147 Err(NucleusError::PivotRootError(format!(
1148 "pivot_root failed: {}. chroot fallback is disabled by default; use \
1149 --allow-chroot-fallback to allow weaker isolation",
1150 e
1151 )))
1152 }
1153 }
1154 }
1155}
1156
1157fn pivot_root_impl(new_root: &Path) -> Result<()> {
1163 use nix::unistd::pivot_root;
1164
1165 let old_root = new_root.join(".old_root");
1169 std::fs::create_dir_all(&old_root).map_err(|e| {
1170 NucleusError::PivotRootError(format!("Failed to create old_root directory: {}", e))
1171 })?;
1172
1173 pivot_root(new_root, &old_root)
1175 .map_err(|e| NucleusError::PivotRootError(format!("pivot_root syscall failed: {}", e)))?;
1176
1177 std::env::set_current_dir("/")
1179 .map_err(|e| NucleusError::PivotRootError(format!("Failed to chdir to /: {}", e)))?;
1180
1181 nix::mount::umount2("/.old_root", nix::mount::MntFlags::MNT_DETACH)
1183 .map_err(|e| NucleusError::PivotRootError(format!("Failed to unmount old root: {}", e)))?;
1184
1185 let _ = std::fs::remove_dir("/.old_root");
1187
1188 Ok(())
1189}
1190
1191fn chroot_impl(new_root: &Path) -> Result<()> {
1195 fn close_non_stdio_fds_after_chroot() -> Result<()> {
1196 let ret = unsafe { libc::syscall(libc::SYS_close_range, 3u32, u32::MAX, 0u32) };
1199 if ret == 0 {
1200 return Ok(());
1201 }
1202
1203 let max_fd = match unsafe { libc::sysconf(libc::_SC_OPEN_MAX) } {
1204 n if n > 3 && n <= i32::MAX as libc::c_long => n as i32,
1205 _ => 1024,
1206 };
1207
1208 for fd in 3..max_fd {
1209 if unsafe { libc::close(fd) } != 0 {
1210 let err = std::io::Error::last_os_error();
1211 if err.raw_os_error() != Some(libc::EBADF) {
1212 return Err(NucleusError::PivotRootError(format!(
1213 "Failed to close inherited fd {} after chroot: {}",
1214 fd, err
1215 )));
1216 }
1217 }
1218 }
1219
1220 Ok(())
1221 }
1222
1223 chroot(new_root)
1224 .map_err(|e| NucleusError::PivotRootError(format!("chroot syscall failed: {}", e)))?;
1225
1226 std::env::set_current_dir("/")
1228 .map_err(|e| NucleusError::PivotRootError(format!("Failed to chdir to /: {}", e)))?;
1229
1230 close_non_stdio_fds_after_chroot()?;
1231
1232 if let Err(e) = caps::drop(
1234 None,
1235 caps::CapSet::Bounding,
1236 caps::Capability::CAP_SYS_CHROOT,
1237 ) {
1238 debug!(
1239 "Could not drop CAP_SYS_CHROOT after chroot: {} (may not be present)",
1240 e
1241 );
1242 }
1243 if let Err(e) = caps::drop(
1244 None,
1245 caps::CapSet::Effective,
1246 caps::Capability::CAP_SYS_CHROOT,
1247 ) {
1248 debug!(
1249 "Could not drop effective CAP_SYS_CHROOT: {} (may not be present)",
1250 e
1251 );
1252 }
1253 if let Err(e) = caps::drop(
1254 None,
1255 caps::CapSet::Permitted,
1256 caps::Capability::CAP_SYS_CHROOT,
1257 ) {
1258 debug!(
1259 "Could not drop permitted CAP_SYS_CHROOT: {} (may not be present)",
1260 e
1261 );
1262 }
1263
1264 info!("Successfully switched root using chroot (CAP_SYS_CHROOT dropped)");
1265
1266 Ok(())
1267}
1268
1269pub fn mount_secrets(root: &Path, secrets: &[crate::container::SecretMount]) -> Result<()> {
1274 if secrets.is_empty() {
1275 return Ok(());
1276 }
1277
1278 info!("Mounting {} secret(s) into container", secrets.len());
1279
1280 for secret in secrets {
1281 let source_fd = open(
1282 &secret.source,
1283 OFlag::O_PATH | OFlag::O_NOFOLLOW | OFlag::O_CLOEXEC,
1284 Mode::empty(),
1285 )
1286 .map_err(|e| {
1287 NucleusError::FilesystemError(format!(
1288 "Failed to open secret source {:?} with O_NOFOLLOW: {}",
1289 secret.source, e
1290 ))
1291 })?;
1292 let source_stat = fstat(&source_fd).map_err(|e| {
1293 NucleusError::FilesystemError(format!(
1294 "Failed to stat secret source {:?}: {}",
1295 secret.source, e
1296 ))
1297 })?;
1298 let source_kind = SFlag::from_bits_truncate(source_stat.st_mode);
1299 let source_is_file = source_kind == SFlag::S_IFREG;
1300 let source_is_dir = source_kind == SFlag::S_IFDIR;
1301 if !source_is_file && !source_is_dir {
1302 return Err(NucleusError::FilesystemError(format!(
1303 "Secret source {:?} must be a regular file or directory",
1304 secret.source
1305 )));
1306 }
1307 let source_fd_path = PathBuf::from(format!("/proc/self/fd/{}", source_fd.as_raw_fd()));
1308
1309 let dest = resolve_container_destination(root, &secret.dest)?;
1311
1312 if let Some(parent) = dest.parent() {
1314 std::fs::create_dir_all(parent).map_err(|e| {
1315 NucleusError::FilesystemError(format!(
1316 "Failed to create secret mount parent {:?}: {}",
1317 parent, e
1318 ))
1319 })?;
1320 }
1321
1322 if source_is_file {
1324 std::fs::write(&dest, "").map_err(|e| {
1325 NucleusError::FilesystemError(format!(
1326 "Failed to create secret mount point {:?}: {}",
1327 dest, e
1328 ))
1329 })?;
1330 } else {
1331 std::fs::create_dir_all(&dest).map_err(|e| {
1332 NucleusError::FilesystemError(format!(
1333 "Failed to create secret mount dir {:?}: {}",
1334 dest, e
1335 ))
1336 })?;
1337 }
1338
1339 mount(
1341 Some(source_fd_path.as_path()),
1342 &dest,
1343 None::<&str>,
1344 MsFlags::MS_BIND,
1345 None::<&str>,
1346 )
1347 .map_err(|e| {
1348 NucleusError::FilesystemError(format!(
1349 "Failed to bind mount secret {:?}: {}",
1350 secret.source, e
1351 ))
1352 })?;
1353
1354 mount(
1355 None::<&str>,
1356 &dest,
1357 None::<&str>,
1358 MsFlags::MS_REMOUNT
1359 | MsFlags::MS_BIND
1360 | MsFlags::MS_RDONLY
1361 | MsFlags::MS_NOSUID
1362 | MsFlags::MS_NODEV
1363 | MsFlags::MS_NOEXEC,
1364 None::<&str>,
1365 )
1366 .map_err(|e| {
1367 NucleusError::FilesystemError(format!(
1368 "Failed to remount secret {:?} read-only: {}",
1369 dest, e
1370 ))
1371 })?;
1372
1373 if source_is_file {
1375 use std::os::unix::fs::PermissionsExt;
1376 let perms = std::fs::Permissions::from_mode(secret.mode);
1377 if let Err(e) = std::fs::set_permissions(&dest, perms) {
1378 warn!(
1379 "Failed to set mode {:04o} on secret {:?}: {} (bind mount may override)",
1380 secret.mode, dest, e
1381 );
1382 }
1383 }
1384
1385 debug!(
1386 "Mounted secret {:?} -> {:?} (mode {:04o})",
1387 secret.source, secret.dest, secret.mode
1388 );
1389 }
1390
1391 Ok(())
1392}
1393
1394pub fn mount_secrets_inmemory(
1400 root: &Path,
1401 secrets: &[crate::container::SecretMount],
1402 identity: &crate::container::ProcessIdentity,
1403) -> Result<()> {
1404 if secrets.is_empty() {
1405 return Ok(());
1406 }
1407
1408 info!("Mounting {} secret(s) on in-memory tmpfs", secrets.len());
1409
1410 let secrets_dir = root.join("run/secrets");
1411 std::fs::create_dir_all(&secrets_dir).map_err(|e| {
1412 NucleusError::FilesystemError(format!(
1413 "Failed to create secrets dir {:?}: {}",
1414 secrets_dir, e
1415 ))
1416 })?;
1417
1418 if let Err(e) = mount(
1420 Some("tmpfs"),
1421 &secrets_dir,
1422 Some("tmpfs"),
1423 MsFlags::MS_NOSUID | MsFlags::MS_NODEV | MsFlags::MS_NOEXEC,
1424 Some("size=16m,mode=0700"),
1425 ) {
1426 let _ = std::fs::remove_dir_all(&secrets_dir);
1427 return Err(NucleusError::FilesystemError(format!(
1428 "Failed to mount secrets tmpfs at {:?}: {}",
1429 secrets_dir, e
1430 )));
1431 }
1432
1433 if !identity.is_root() {
1434 nix::unistd::chown(
1435 &secrets_dir,
1436 Some(nix::unistd::Uid::from_raw(identity.uid)),
1437 Some(nix::unistd::Gid::from_raw(identity.gid)),
1438 )
1439 .map_err(|e| {
1440 let _ = nix::mount::umount2(&secrets_dir, nix::mount::MntFlags::MNT_DETACH);
1441 let _ = std::fs::remove_dir_all(&secrets_dir);
1442 NucleusError::FilesystemError(format!(
1443 "Failed to set /run/secrets owner to {}:{}: {}",
1444 identity.uid, identity.gid, e
1445 ))
1446 })?;
1447 }
1448
1449 let result = mount_secrets_inmemory_inner(&secrets_dir, root, secrets, identity);
1451 if let Err(ref e) = result {
1452 let _ = nix::mount::umount2(&secrets_dir, nix::mount::MntFlags::MNT_DETACH);
1453 let _ = std::fs::remove_dir_all(&secrets_dir);
1454 return Err(NucleusError::FilesystemError(format!(
1455 "Secret mount failed (rolled back): {}",
1456 e
1457 )));
1458 }
1459
1460 info!("All secrets mounted on in-memory tmpfs");
1461 Ok(())
1462}
1463
1464fn mount_secrets_inmemory_inner(
1465 secrets_dir: &Path,
1466 root: &Path,
1467 secrets: &[crate::container::SecretMount],
1468 identity: &crate::container::ProcessIdentity,
1469) -> Result<()> {
1470 for secret in secrets {
1471 let mut content = read_regular_file_nofollow(&secret.source)?;
1472
1473 let dest = resolve_container_destination(secrets_dir, &secret.dest)?;
1475
1476 if let Some(parent) = dest.parent() {
1478 std::fs::create_dir_all(parent).map_err(|e| {
1479 NucleusError::FilesystemError(format!(
1480 "Failed to create secret parent dir {:?}: {}",
1481 parent, e
1482 ))
1483 })?;
1484 }
1485
1486 std::fs::write(&dest, &content).map_err(|e| {
1488 NucleusError::FilesystemError(format!("Failed to write secret to {:?}: {}", dest, e))
1489 })?;
1490
1491 {
1493 use std::os::unix::fs::PermissionsExt;
1494 let perms = std::fs::Permissions::from_mode(secret.mode);
1495 std::fs::set_permissions(&dest, perms).map_err(|e| {
1496 NucleusError::FilesystemError(format!(
1497 "Failed to set permissions on secret {:?}: {}",
1498 dest, e
1499 ))
1500 })?;
1501 }
1502
1503 if !identity.is_root() {
1504 nix::unistd::chown(
1505 &dest,
1506 Some(nix::unistd::Uid::from_raw(identity.uid)),
1507 Some(nix::unistd::Gid::from_raw(identity.gid)),
1508 )
1509 .map_err(|e| {
1510 NucleusError::FilesystemError(format!(
1511 "Failed to set permissions owner on secret {:?} to {}:{}: {}",
1512 dest, identity.uid, identity.gid, e
1513 ))
1514 })?;
1515 }
1516
1517 zeroize::Zeroize::zeroize(&mut content);
1519 drop(content);
1520
1521 let container_dest = resolve_container_destination(root, &secret.dest)?;
1523 if container_dest != dest {
1524 if let Some(parent) = container_dest.parent() {
1525 std::fs::create_dir_all(parent).map_err(|e| {
1526 NucleusError::FilesystemError(format!(
1527 "Failed to create secret mount parent {:?}: {}",
1528 parent, e
1529 ))
1530 })?;
1531 }
1532
1533 std::fs::write(&container_dest, "").map_err(|e| {
1534 NucleusError::FilesystemError(format!(
1535 "Failed to create secret mount point {:?}: {}",
1536 container_dest, e
1537 ))
1538 })?;
1539
1540 mount(
1541 Some(dest.as_path()),
1542 &container_dest,
1543 None::<&str>,
1544 MsFlags::MS_BIND,
1545 None::<&str>,
1546 )
1547 .map_err(|e| {
1548 NucleusError::FilesystemError(format!(
1549 "Failed to bind mount secret {:?} -> {:?}: {}",
1550 dest, container_dest, e
1551 ))
1552 })?;
1553
1554 mount(
1555 None::<&str>,
1556 &container_dest,
1557 None::<&str>,
1558 MsFlags::MS_REMOUNT
1559 | MsFlags::MS_BIND
1560 | MsFlags::MS_RDONLY
1561 | MsFlags::MS_NOSUID
1562 | MsFlags::MS_NODEV
1563 | MsFlags::MS_NOEXEC,
1564 None::<&str>,
1565 )
1566 .map_err(|e| {
1567 NucleusError::FilesystemError(format!(
1568 "Failed to remount secret {:?} read-only: {}",
1569 container_dest, e
1570 ))
1571 })?;
1572 }
1573
1574 debug!(
1575 "Secret {:?} -> {:?} (in-memory tmpfs, mode {:04o})",
1576 secret.source, secret.dest, secret.mode
1577 );
1578 }
1579
1580 Ok(())
1581}
1582
1583#[cfg(test)]
1584mod tests {
1585 use super::*;
1586 use std::os::unix::fs::symlink;
1587
1588 #[test]
1589 fn test_validate_bind_mount_source_rejects_sensitive_subtrees() {
1590 for path in [
1591 "/",
1592 "/boot",
1593 "/dev/kmsg",
1594 "/etc",
1595 "/etc/passwd",
1596 "/home/alice/.ssh",
1597 "/proc/sys",
1598 "/root/.ssh",
1599 "/run/secrets",
1600 "/sys/fs/cgroup",
1601 "/var/log",
1602 ] {
1603 let err = validate_bind_mount_source(Path::new(path)).unwrap_err();
1604 assert!(
1605 err.to_string().contains("sensitive host path"),
1606 "expected sensitive-path rejection for {path}, got: {err}"
1607 );
1608 }
1609 }
1610
1611 #[test]
1612 fn test_validate_bind_mount_source_allows_regular_host_paths() {
1613 let temp = tempfile::TempDir::new().unwrap();
1614 let safe_path = temp.path().join("data");
1615 std::fs::create_dir(&safe_path).unwrap();
1616
1617 validate_bind_mount_source(&safe_path).unwrap();
1618 }
1619
1620 #[test]
1621 fn test_validate_bind_mount_source_normalizes_parent_components_before_filtering() {
1622 let temp = tempfile::TempDir::new().unwrap();
1623 let safe_path = temp.path().join("data");
1624 std::fs::create_dir(&safe_path).unwrap();
1625
1626 validate_bind_mount_source(&safe_path.join("../data")).unwrap();
1627 }
1628
1629 #[test]
1630 fn test_bind_mount_source_policy_rejects_sensitive_paths_before_creation() {
1631 let err = validate_bind_mount_source_policy(Path::new("/tmp/../../etc/nucleus-volume"))
1632 .unwrap_err();
1633 assert!(
1634 err.to_string().contains("sensitive host path"),
1635 "expected sensitive-path rejection before path creation, got: {err}"
1636 );
1637 }
1638
1639 #[test]
1640 fn test_volume_destinations_reject_reserved_container_paths() {
1641 for path in [
1642 "/bin/tool",
1643 "/dev/null",
1644 "/etc/app",
1645 "/lib64/ld-linux-x86-64.so.2",
1646 "/nix/store/data",
1647 "/proc/sys",
1648 "/run/secrets/token",
1649 "/usr/local/bin",
1650 ] {
1651 let err = normalize_volume_destination(Path::new(path)).unwrap_err();
1652 assert!(
1653 err.to_string().contains("reserved"),
1654 "expected reserved destination rejection for {path}, got: {err}"
1655 );
1656 }
1657 }
1658
1659 #[test]
1660 fn test_volume_destinations_allow_data_paths() {
1661 assert_eq!(
1662 normalize_volume_destination(Path::new("/var/lib/app")).unwrap(),
1663 PathBuf::from("/var/lib/app")
1664 );
1665 assert_eq!(
1666 normalize_volume_destination(Path::new("/opt/app/data")).unwrap(),
1667 PathBuf::from("/opt/app/data")
1668 );
1669 }
1670
1671 #[test]
1672 fn test_production_rootfs_path_rejects_parent_traversal() {
1673 let temp = tempfile::TempDir::new().unwrap();
1674 let store = temp.path().join("store");
1675 std::fs::create_dir(&store).unwrap();
1676
1677 let err =
1678 validate_rootfs_path_under_store(&store.join("../outside-rootfs"), &store).unwrap_err();
1679
1680 assert!(
1681 err.to_string().contains("parent traversal"),
1682 "expected parent traversal rejection, got: {err}"
1683 );
1684 }
1685
1686 #[test]
1687 fn test_production_rootfs_path_rejects_symlink_escape() {
1688 let temp = tempfile::TempDir::new().unwrap();
1689 let store = temp.path().join("store");
1690 let outside = temp.path().join("outside-rootfs");
1691 std::fs::create_dir(&store).unwrap();
1692 std::fs::create_dir(&outside).unwrap();
1693 symlink(&outside, store.join("rootfs-link")).unwrap();
1694
1695 let err = validate_rootfs_path_under_store(&store.join("rootfs-link"), &store).unwrap_err();
1696
1697 assert!(
1698 err.to_string().contains("resolve under"),
1699 "expected symlink escape rejection, got: {err}"
1700 );
1701 }
1702
1703 #[test]
1704 fn test_production_rootfs_path_returns_canonical_store_target() {
1705 let temp = tempfile::TempDir::new().unwrap();
1706 let store = temp.path().join("store");
1707 let rootfs = store.join("abcd-rootfs");
1708 std::fs::create_dir(&store).unwrap();
1709 std::fs::create_dir(&rootfs).unwrap();
1710 symlink(&rootfs, store.join("rootfs-link")).unwrap();
1711
1712 let canonical =
1713 validate_rootfs_path_under_store(&store.join("rootfs-link"), &store).unwrap();
1714
1715 assert_eq!(canonical, std::fs::canonicalize(rootfs).unwrap());
1716 }
1717
1718 #[test]
1719 fn test_proc_mask_includes_sysrq_trigger() {
1720 assert!(
1721 PROC_NULL_MASKED.contains(&"sysrq-trigger"),
1722 "/proc/sysrq-trigger must be masked to prevent host DoS"
1723 );
1724 }
1725
1726 #[test]
1727 fn test_proc_mask_includes_timer_stats() {
1728 assert!(
1729 PROC_NULL_MASKED.contains(&"timer_stats"),
1730 "/proc/timer_stats must be masked to prevent kernel info leakage"
1731 );
1732 }
1733
1734 #[test]
1735 fn test_proc_mask_includes_kpage_files() {
1736 for path in &["kpagecount", "kpageflags", "kpagecgroup"] {
1737 assert!(
1738 PROC_NULL_MASKED.contains(path),
1739 "/proc/{} must be masked to prevent host memory layout leakage",
1740 path
1741 );
1742 }
1743 }
1744
1745 #[test]
1746 fn test_proc_mask_includes_oci_standard_paths() {
1747 for path in &["kallsyms", "kcore", "sched_debug", "keys", "config.gz"] {
1749 assert!(
1750 PROC_NULL_MASKED.contains(path),
1751 "/proc/{} must be in null-masked list (OCI spec)",
1752 path
1753 );
1754 }
1755 for path in &["acpi", "scsi"] {
1756 assert!(
1757 PROC_TMPFS_MASKED.contains(path),
1758 "/proc/{} must be in tmpfs-masked list (OCI spec)",
1759 path
1760 );
1761 }
1762 for path in &["bus", "fs", "irq", "sys"] {
1763 assert!(
1764 PROC_READONLY_PATHS.contains(path),
1765 "/proc/{} must be in read-only remount list (OCI spec)",
1766 path
1767 );
1768 assert!(
1769 !PROC_TMPFS_MASKED.contains(path),
1770 "/proc/{} must stay visible read-only, not hidden behind tmpfs",
1771 path
1772 );
1773 }
1774 }
1775
1776 #[test]
1777 fn test_procfs_hidepid_failure_fails_closed_even_best_effort() {
1778 let err = handle_procfs_mount_failure(nix::errno::Errno::EINVAL, true, true).unwrap_err();
1779
1780 assert!(
1781 err.to_string().contains("required hidepid=2"),
1782 "hidepid=2 failures must remain fatal in production/rootless paths, got: {err}"
1783 );
1784 }
1785
1786 #[test]
1787 fn test_procfs_best_effort_only_applies_without_hidepid() {
1788 assert!(
1789 !handle_procfs_mount_failure(nix::errno::Errno::EPERM, true, false).unwrap(),
1790 "best-effort procfs mount failures may only continue when hidepid was not requested"
1791 );
1792 }
1793
1794 #[test]
1795 fn test_parse_mountinfo_line_uses_mountinfo_mount_point_and_flags() {
1796 let line =
1797 "36 25 0:32 / /run/secrets rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,size=1024k";
1798 let (mount_point, flags) = parse_mountinfo_line(line).unwrap();
1799
1800 assert_eq!(mount_point, "/run/secrets");
1801 assert!(flags.contains("nosuid"));
1802 assert!(flags.contains("nodev"));
1803 assert!(flags.contains("noexec"));
1804 }
1805
1806 #[test]
1807 fn test_parse_mountinfo_line_decodes_escaped_mount_points() {
1808 let line = "41 25 0:40 / /path\\040with\\040spaces ro,nosuid,nodev - ext4 /dev/root ro";
1809 let (mount_point, flags) = parse_mountinfo_line(line).unwrap();
1810
1811 assert_eq!(mount_point, "/path with spaces");
1812 assert!(flags.contains("ro"));
1813 }
1814
1815 #[test]
1816 fn test_chroot_impl_closes_non_stdio_fds() {
1817 let source = include_str!("mount.rs");
1818 let fn_start = source.find("fn chroot_impl").unwrap();
1819 let after = &source[fn_start..];
1820 let open = after.find('{').unwrap();
1821 let mut depth = 0u32;
1822 let mut fn_end = open;
1823 for (i, ch) in after[open..].char_indices() {
1824 match ch {
1825 '{' => depth += 1,
1826 '}' => {
1827 depth -= 1;
1828 if depth == 0 {
1829 fn_end = open + i + 1;
1830 break;
1831 }
1832 }
1833 _ => {}
1834 }
1835 }
1836 let body = &after[..fn_end];
1837 assert!(
1838 body.contains("close_non_stdio_fds_after_chroot()?"),
1839 "chroot fallback must close inherited non-stdio fds before continuing setup"
1840 );
1841 }
1842
1843 #[test]
1844 fn test_read_regular_file_nofollow_reads_regular_file() {
1845 let temp = tempfile::tempdir().unwrap();
1846 let path = temp.path().join("secret.txt");
1847 std::fs::write(&path, "supersecret").unwrap();
1848
1849 let content = read_regular_file_nofollow(&path).unwrap();
1850 assert_eq!(content, b"supersecret");
1851 }
1852
1853 #[test]
1854 fn test_read_regular_file_nofollow_rejects_symlink() {
1855 let temp = tempfile::tempdir().unwrap();
1856 let target = temp.path().join("target.txt");
1857 let link = temp.path().join("secret-link");
1858 std::fs::write(&target, "supersecret").unwrap();
1859 symlink(&target, &link).unwrap();
1860
1861 let err = read_regular_file_nofollow(&link).unwrap_err();
1862 assert!(
1863 err.to_string().contains("O_NOFOLLOW"),
1864 "symlink reads must fail via O_NOFOLLOW"
1865 );
1866 }
1867}