1use super::landlock::LandlockManager;
2use crate::error::{NucleusError, Result};
3use crate::oci::OciBundle;
4use nix::unistd::Uid;
5use sha2::{Digest, Sha256};
6use std::ffi::CString;
7use std::fs::{self, DirBuilder, OpenOptions};
8use std::io;
9use std::os::unix::fs::{DirBuilderExt, MetadataExt, OpenOptionsExt, PermissionsExt};
10use std::path::{Component, Path, PathBuf};
11use std::process::Command;
12use tracing::{debug, info, warn};
13
14#[cfg(test)]
15const NIX_STORE_EXEC_ROOT: &str = "/nix/store";
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum GVisorNetworkMode {
20 None,
22 Sandbox,
25 Host,
27}
28
29#[derive(
31 Debug,
32 Clone,
33 Copy,
34 PartialEq,
35 Eq,
36 Default,
37 clap::ValueEnum,
38 serde::Serialize,
39 serde::Deserialize,
40)]
41pub enum GVisorPlatform {
42 #[default]
44 Systrap,
45 Kvm,
47 Ptrace,
49}
50
51impl GVisorPlatform {
52 pub fn as_flag(self) -> &'static str {
53 match self {
54 Self::Systrap => "systrap",
55 Self::Kvm => "kvm",
56 Self::Ptrace => "ptrace",
57 }
58 }
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub struct GVisorOciRunOptions {
64 pub network_mode: GVisorNetworkMode,
66 pub ignore_cgroups: bool,
68 pub runsc_rootless: bool,
70 pub require_supervisor_exec_policy: bool,
72 pub platform: GVisorPlatform,
74}
75
76impl Default for GVisorOciRunOptions {
77 fn default() -> Self {
78 Self {
79 network_mode: GVisorNetworkMode::None,
80 ignore_cgroups: false,
81 runsc_rootless: false,
82 require_supervisor_exec_policy: false,
83 platform: GVisorPlatform::default(),
84 }
85 }
86}
87
88impl GVisorOciRunOptions {
89 fn network_flag(self) -> &'static str {
90 match self.network_mode {
91 GVisorNetworkMode::None => "none",
92 GVisorNetworkMode::Sandbox => "sandbox",
93 GVisorNetworkMode::Host => "host",
94 }
95 }
96}
97
98pub struct GVisorRuntime {
103 runsc_path: String,
104}
105
106impl GVisorRuntime {
107 pub fn new() -> Result<Self> {
111 let runsc_path = Self::find_runsc()?;
112 info!("Found runsc at: {}", runsc_path);
113 Ok(Self { runsc_path })
114 }
115
116 pub fn with_path(runsc_path: String) -> Self {
122 Self { runsc_path }
123 }
124
125 pub fn resolve_path() -> Result<String> {
129 Self::find_runsc()
130 }
131
132 fn find_runsc() -> Result<String> {
134 let paths = vec![
136 "/usr/local/bin/runsc",
137 "/usr/bin/runsc",
138 "/opt/gvisor/runsc",
139 ];
140
141 for path in &paths {
142 if let Some(validated) = Self::validate_runsc_path(Path::new(path))? {
143 return Ok(validated);
144 }
145 }
146
147 if Uid::effective().is_root() {
150 return Err(NucleusError::GVisorError(
151 "runsc binary not found in trusted system paths".to_string(),
152 ));
153 }
154
155 if let Some(path_var) = std::env::var_os("PATH") {
157 for dir in std::env::split_paths(&path_var) {
158 let candidate = dir.join("runsc");
159 if let Some(validated) = Self::validate_runsc_path(&candidate)? {
160 return Ok(validated);
161 }
162 }
163 }
164
165 Err(NucleusError::GVisorError(
166 "runsc binary not found. Please install gVisor.".to_string(),
167 ))
168 }
169
170 fn validate_runsc_path(path: &Path) -> Result<Option<String>> {
171 if !path.exists() {
172 return Ok(None);
173 }
174 if !path.is_file() {
175 return Ok(None);
176 }
177
178 let canonical = std::fs::canonicalize(path).map_err(|e| {
179 NucleusError::GVisorError(format!(
180 "Failed to canonicalize runsc path {:?}: {}",
181 path, e
182 ))
183 })?;
184
185 let resolved = Self::unwrap_nix_wrapper(&canonical).unwrap_or_else(|| canonical.clone());
190
191 let metadata = std::fs::metadata(&resolved).map_err(|e| {
192 NucleusError::GVisorError(format!("Failed to stat runsc path {:?}: {}", resolved, e))
193 })?;
194
195 let mode = metadata.permissions().mode();
196 if mode & 0o022 != 0 {
197 return Err(NucleusError::GVisorError(format!(
198 "Refusing insecure runsc binary permissions at {:?} (mode {:o})",
199 resolved, mode
200 )));
201 }
202 if mode & 0o111 == 0 {
203 return Ok(None);
204 }
205
206 use std::os::unix::fs::MetadataExt;
209 let owner = metadata.uid();
210 let current_uid = nix::unistd::Uid::effective().as_raw();
211 if !Self::is_trusted_runsc_owner(&resolved, owner, current_uid) {
212 return Err(NucleusError::GVisorError(format!(
213 "Refusing runsc binary at {:?} owned by uid {} (expected root, current user {}, or immutable /nix/store artifact)",
214 resolved, owner, current_uid
215 )));
216 }
217
218 Ok(Some(resolved.to_string_lossy().to_string()))
219 }
220
221 fn is_trusted_runsc_owner(path: &Path, owner: u32, current_uid: u32) -> bool {
222 if owner == 0 || owner == current_uid {
223 return true;
224 }
225
226 if path.starts_with("/nix/store") {
232 if let Ok(meta) = std::fs::metadata(path) {
233 let mode = meta.permissions().mode();
234 if mode & 0o200 != 0 {
236 return false;
237 }
238 } else {
239 return false;
240 }
241 if let Some(parent) = path.parent() {
243 if let Ok(parent_meta) = std::fs::metadata(parent) {
244 let parent_mode = parent_meta.permissions().mode();
245 if parent_mode & 0o222 != 0 {
246 return false;
247 }
248 } else {
249 return false;
250 }
251 }
252 return true;
253 }
254
255 false
256 }
257
258 fn unwrap_nix_wrapper(path: &Path) -> Option<std::path::PathBuf> {
264 let content = std::fs::read_to_string(path).ok()?;
265 if content.len() > 4096 || !content.starts_with("#!") {
267 return None;
268 }
269 for line in content.lines().rev() {
271 let trimmed = line.trim();
272 if trimmed.starts_with("exec ") {
273 for token in trimmed.split_whitespace() {
276 let unquoted = token.trim_matches('"');
277 if unquoted.starts_with('/') && unquoted.contains("runsc") {
278 let candidate = std::path::PathBuf::from(unquoted);
279 if candidate.exists() && candidate.is_file() {
280 debug!("Resolved Nix wrapper {:?} → {:?}", path, candidate);
281 return Some(candidate);
282 }
283 }
284 }
285 }
286 }
287 None
288 }
289
290 pub fn exec_with_oci_bundle(&self, container_id: &str, bundle: &OciBundle) -> Result<()> {
296 self.exec_with_oci_bundle_options(container_id, bundle, GVisorOciRunOptions::default())
297 }
298
299 pub fn exec_with_oci_bundle_options(
310 &self,
311 container_id: &str,
312 bundle: &OciBundle,
313 options: GVisorOciRunOptions,
314 ) -> Result<()> {
315 info!(
316 "Executing with gVisor using OCI bundle at {:?} (network: {:?}, platform: {:?})",
317 bundle.bundle_path(),
318 options.network_mode,
319 options.platform,
320 );
321
322 let runsc_root = Self::secure_runsc_root(container_id)?;
327
328 let runsc_runtime_dir = runsc_root.join("runtime");
329 Self::ensure_secure_runsc_dir(&runsc_runtime_dir, "runsc runtime directory")?;
330
331 let (program_path, exec_allow_roots) =
332 self.prepare_supervisor_runsc_program(&runsc_root)?;
333
334 let mut args = self.build_oci_run_args(container_id, bundle, &runsc_root, options);
338 args[0] = program_path.to_string_lossy().to_string();
339
340 debug!("runsc OCI args: {:?}", args);
341
342 let program = CString::new(program_path.to_string_lossy().as_ref())
344 .map_err(|e| NucleusError::GVisorError(format!("Invalid runsc path: {}", e)))?;
345
346 let c_args: Result<Vec<CString>> = args
347 .iter()
348 .map(|arg| {
349 CString::new(arg.as_str())
350 .map_err(|e| NucleusError::GVisorError(format!("Invalid argument: {}", e)))
351 })
352 .collect();
353 let c_args = c_args?;
354
355 let c_env = self.exec_environment(&runsc_runtime_dir)?;
356
357 if options.runsc_rootless {
362 self.apply_supervisor_exec_policy(
363 &exec_allow_roots,
364 options.require_supervisor_exec_policy,
365 )?;
366 }
367
368 nix::unistd::execve::<std::ffi::CString, std::ffi::CString>(&program, &c_args, &c_env)?;
370
371 Ok(())
373 }
374
375 #[allow(clippy::too_many_arguments)]
379 pub fn exec_with_oci_bundle_network(
380 &self,
381 container_id: &str,
382 bundle: &OciBundle,
383 network_mode: GVisorNetworkMode,
384 ignore_cgroups: bool,
385 runsc_rootless: bool,
386 require_supervisor_exec_policy: bool,
387 platform: GVisorPlatform,
388 ) -> Result<()> {
389 self.exec_with_oci_bundle_options(
390 container_id,
391 bundle,
392 GVisorOciRunOptions {
393 network_mode,
394 ignore_cgroups,
395 runsc_rootless,
396 require_supervisor_exec_policy,
397 platform,
398 },
399 )
400 }
401
402 pub fn is_available() -> bool {
404 Self::find_runsc().is_ok()
405 }
406
407 pub fn version(&self) -> Result<String> {
409 let output = Command::new(&self.runsc_path)
410 .arg("--version")
411 .output()
412 .map_err(|e| NucleusError::GVisorError(format!("Failed to get version: {}", e)))?;
413
414 if !output.status.success() {
415 return Err(NucleusError::GVisorError(
416 "Failed to get runsc version".to_string(),
417 ));
418 }
419
420 let version = String::from_utf8_lossy(&output.stdout).to_string();
421 Ok(version.trim().to_string())
422 }
423
424 fn exec_environment(&self, runtime_dir: &Path) -> Result<Vec<CString>> {
425 let mut env = Vec::new();
426 let mut push = |key: &str, value: String| -> Result<()> {
427 env.push(
428 CString::new(format!("{}={}", key, value))
429 .map_err(|e| NucleusError::GVisorError(format!("Invalid {}: {}", key, e)))?,
430 );
431 Ok(())
432 };
433
434 push(
437 "PATH",
438 "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(),
439 )?;
440 let runtime_dir = runtime_dir.to_string_lossy().to_string();
441 push("TMPDIR", runtime_dir.clone())?;
442 push("XDG_RUNTIME_DIR", runtime_dir)?;
443
444 push("HOME", "/root".to_string())?;
448 push("USER", "root".to_string())?;
449 push("LOGNAME", "root".to_string())?;
450
451 Ok(env)
452 }
453
454 fn prepare_supervisor_runsc_program(
455 &self,
456 runsc_root: &Path,
457 ) -> Result<(PathBuf, Vec<PathBuf>)> {
458 let canonical = fs::canonicalize(&self.runsc_path).map_err(|e| {
459 NucleusError::GVisorError(format!(
460 "Failed to canonicalize runsc path {:?}: {}",
461 self.runsc_path, e
462 ))
463 })?;
464
465 Self::ensure_secure_runsc_dir(runsc_root, "runsc root directory")?;
466 let private_dir = runsc_root.join("exec-allow");
467 Self::ensure_secure_runsc_dir(&private_dir, "private runsc exec directory")?;
468
469 let stage_dir = Self::create_unique_runsc_stage_dir(&private_dir)?;
474 let staged = stage_dir.join("runsc");
475 Self::copy_runsc_nofollow(&canonical, &staged)?;
476
477 Ok((staged, Self::supervisor_exec_allow_roots(private_dir)))
478 }
479
480 fn supervisor_exec_allow_roots(program_root: PathBuf) -> Vec<PathBuf> {
481 vec![program_root]
486 }
487
488 fn secure_runsc_root(container_id: &str) -> Result<PathBuf> {
489 let artifact_base = Self::gvisor_artifact_base()?;
490 let artifact_dir = artifact_base.join(Self::runsc_state_component(container_id));
491
492 if Self::host_root_requires_trusted_runsc_ancestry() {
493 Self::ensure_trusted_host_root_runsc_ancestry(
494 &artifact_base,
495 "gVisor runsc artifact base",
496 )?;
497 }
498
499 Self::ensure_secure_runsc_dir(&artifact_base, "gVisor runsc artifact base")?;
500 Self::ensure_secure_runsc_dir(&artifact_dir, "gVisor runsc artifact directory")?;
501
502 let runsc_root = artifact_dir.join("runsc-root");
503 Self::ensure_secure_runsc_dir(&runsc_root, "runsc root directory")?;
504 Ok(runsc_root)
505 }
506
507 fn gvisor_artifact_base() -> Result<PathBuf> {
508 if let Some(path) =
509 std::env::var_os("NUCLEUS_GVISOR_ARTIFACT_BASE").filter(|path| !path.is_empty())
510 {
511 return Self::absolute_path(Path::new(&path), "gVisor artifact base");
512 }
513
514 if !Uid::effective().is_root() || Self::root_uid_maps_to_unprivileged_host_uid_from_proc() {
515 if let Some(dir) = dirs::runtime_dir() {
516 return Ok(dir.join("nucleus-gvisor"));
517 }
518 }
519
520 if Uid::effective().is_root() {
521 Ok(PathBuf::from("/run/nucleus-gvisor"))
522 } else {
523 Ok(std::env::temp_dir().join(format!("nucleus-gvisor-{}", Uid::effective().as_raw())))
524 }
525 }
526
527 fn absolute_path(path: &Path, label: &str) -> Result<PathBuf> {
528 if path.is_absolute() {
529 return Ok(path.to_path_buf());
530 }
531
532 std::env::current_dir()
533 .map(|cwd| cwd.join(path))
534 .map_err(|e| {
535 NucleusError::GVisorError(format!(
536 "Failed to resolve current directory for {} {:?}: {}",
537 label, path, e
538 ))
539 })
540 }
541
542 fn runsc_state_component(container_id: &str) -> String {
543 if container_id.len() == 32 && container_id.chars().all(|c| c.is_ascii_hexdigit()) {
544 return container_id.to_string();
545 }
546
547 let digest = Sha256::digest(container_id.as_bytes());
548 format!("id-{}", hex::encode(&digest[..16]))
549 }
550
551 fn root_uid_maps_to_unprivileged_host_uid_from_proc() -> bool {
552 fs::read_to_string("/proc/self/uid_map")
553 .map(|uid_map| Self::root_uid_maps_to_unprivileged_host_uid(&uid_map))
554 .unwrap_or(false)
555 }
556
557 fn root_uid_maps_to_unprivileged_host_uid(uid_map: &str) -> bool {
558 for line in uid_map.lines() {
559 let mut fields = line.split_whitespace();
560 let Some(namespace_start) = fields.next() else {
561 continue;
562 };
563 let Some(host_start) = fields.next() else {
564 continue;
565 };
566 let Some(length) = fields.next() else {
567 continue;
568 };
569 if fields.next().is_some() {
570 continue;
571 }
572
573 let Ok(namespace_start) = namespace_start.parse::<u64>() else {
574 continue;
575 };
576 let Ok(host_start) = host_start.parse::<u64>() else {
577 continue;
578 };
579 let Ok(length) = length.parse::<u64>() else {
580 continue;
581 };
582
583 if namespace_start == 0 && length > 0 {
584 return host_start != 0;
585 }
586 }
587
588 false
589 }
590
591 fn host_root_requires_trusted_runsc_ancestry() -> bool {
592 Uid::effective().is_root() && !Self::root_uid_maps_to_unprivileged_host_uid_from_proc()
593 }
594
595 fn ensure_trusted_host_root_runsc_ancestry(path: &Path, label: &str) -> Result<()> {
596 let path = Self::absolute_path(path, label)?;
597
598 let mut current = PathBuf::new();
599 for component in path.components() {
600 match component {
601 Component::Prefix(prefix) => current.push(prefix.as_os_str()),
602 Component::RootDir => current.push(component.as_os_str()),
603 Component::CurDir => {}
604 Component::ParentDir => {
605 return Err(NucleusError::GVisorError(format!(
606 "{} {:?} contains a parent-directory component",
607 label, path
608 )));
609 }
610 Component::Normal(name) => {
611 current.push(name);
612 match fs::symlink_metadata(¤t) {
613 Ok(metadata) => Self::ensure_trusted_host_root_runsc_ancestor_component(
614 ¤t, metadata, label,
615 )?,
616 Err(e) if e.kind() == io::ErrorKind::NotFound => break,
617 Err(e) => {
618 return Err(NucleusError::GVisorError(format!(
619 "Failed to stat {} ancestor {:?}: {}",
620 label, current, e
621 )));
622 }
623 }
624 }
625 }
626 }
627
628 Ok(())
629 }
630
631 fn ensure_trusted_host_root_runsc_ancestor_component(
632 path: &Path,
633 metadata: fs::Metadata,
634 label: &str,
635 ) -> Result<()> {
636 if metadata.file_type().is_symlink() {
637 return Err(NucleusError::GVisorError(format!(
638 "Refusing symlink {} ancestor {:?}",
639 label, path
640 )));
641 }
642 if !metadata.file_type().is_dir() {
643 return Err(NucleusError::GVisorError(format!(
644 "{} ancestor {:?} is not a directory",
645 label, path
646 )));
647 }
648
649 let owner = metadata.uid();
650 if owner != 0 {
651 return Err(NucleusError::GVisorError(format!(
652 "{} ancestor {:?} is owned by uid {} (expected root)",
653 label, path, owner
654 )));
655 }
656
657 let mode = metadata.permissions().mode();
658 if mode & 0o022 != 0 && mode & 0o1000 == 0 {
659 return Err(NucleusError::GVisorError(format!(
660 "{} ancestor {:?} has unsafe permissions {:o}",
661 label,
662 path,
663 mode & 0o7777
664 )));
665 }
666
667 Ok(())
668 }
669
670 fn ensure_secure_runsc_dir(path: &Path, label: &str) -> Result<()> {
671 if let Some(parent) = path
672 .parent()
673 .filter(|parent| !parent.as_os_str().is_empty())
674 {
675 Self::ensure_trusted_runsc_parent(parent, label)?;
676 }
677
678 let mut created = false;
679 match fs::symlink_metadata(path) {
680 Ok(metadata) if metadata.file_type().is_symlink() => {
681 return Err(NucleusError::GVisorError(format!(
682 "Refusing symlink {} {:?}",
683 label, path
684 )));
685 }
686 Ok(metadata) if !metadata.file_type().is_dir() => {
687 return Err(NucleusError::GVisorError(format!(
688 "{} {:?} is not a directory",
689 label, path
690 )));
691 }
692 Ok(_) => {}
693 Err(e) if e.kind() == io::ErrorKind::NotFound => {
694 match DirBuilder::new().mode(0o700).create(path) {
695 Ok(()) => {
696 created = true;
697 }
698 Err(create_err) if create_err.kind() == io::ErrorKind::AlreadyExists => {}
699 Err(create_err) => {
700 return Err(NucleusError::GVisorError(format!(
701 "Failed to create {} {:?}: {}",
702 label, path, create_err
703 )));
704 }
705 }
706 }
707 Err(e) => {
708 return Err(NucleusError::GVisorError(format!(
709 "Failed to stat {} {:?}: {}",
710 label, path, e
711 )));
712 }
713 }
714
715 if created {
716 fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|e| {
717 NucleusError::GVisorError(format!(
718 "Failed to secure newly-created {} permissions {:?}: {}",
719 label, path, e
720 ))
721 })?;
722 }
723
724 let dir = OpenOptions::new()
725 .read(true)
726 .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC | libc::O_DIRECTORY)
727 .open(path)
728 .map_err(|e| {
729 NucleusError::GVisorError(format!(
730 "Failed to open {} {:?} without following symlinks: {}",
731 label, path, e
732 ))
733 })?;
734
735 let metadata = dir.metadata().map_err(|e| {
736 NucleusError::GVisorError(format!("Failed to stat {} {:?}: {}", label, path, e))
737 })?;
738 if !metadata.file_type().is_dir() {
739 return Err(NucleusError::GVisorError(format!(
740 "{} {:?} is not a directory",
741 label, path
742 )));
743 }
744
745 let owner = metadata.uid();
746 let expected = Uid::effective().as_raw();
747 if owner != expected {
748 return Err(NucleusError::GVisorError(format!(
749 "{} {:?} is owned by uid {} (expected {})",
750 label, path, owner, expected
751 )));
752 }
753
754 let mode = metadata.permissions().mode() & 0o777;
755 if mode != 0o700 {
756 dir.set_permissions(fs::Permissions::from_mode(0o700))
757 .map_err(|e| {
758 NucleusError::GVisorError(format!(
759 "Failed to secure {} permissions {:?}: {}",
760 label, path, e
761 ))
762 })?;
763 }
764
765 Ok(())
766 }
767
768 fn ensure_trusted_runsc_parent(parent: &Path, label: &str) -> Result<()> {
769 let metadata = fs::symlink_metadata(parent).map_err(|e| {
770 NucleusError::GVisorError(format!(
771 "Failed to stat parent for {} {:?}: {}",
772 label, parent, e
773 ))
774 })?;
775 if metadata.file_type().is_symlink() {
776 return Err(NucleusError::GVisorError(format!(
777 "Refusing symlink parent for {} {:?}",
778 label, parent
779 )));
780 }
781 if !metadata.file_type().is_dir() {
782 return Err(NucleusError::GVisorError(format!(
783 "Parent for {} {:?} is not a directory",
784 label, parent
785 )));
786 }
787
788 let owner = metadata.uid();
789 let current = Uid::effective().as_raw();
790 let owner_trusted = owner == current || owner == 0;
791 let mode = metadata.permissions().mode();
792 let unsafe_writable = mode & 0o022 != 0 && mode & 0o1000 == 0;
793 if !owner_trusted || unsafe_writable {
794 return Err(NucleusError::GVisorError(format!(
795 "Parent for {} {:?} is not trusted (owner uid {}, mode {:o})",
796 label,
797 parent,
798 owner,
799 mode & 0o7777
800 )));
801 }
802
803 Ok(())
804 }
805
806 fn create_unique_runsc_stage_dir(private_dir: &Path) -> Result<PathBuf> {
807 let nonce = std::time::SystemTime::now()
808 .duration_since(std::time::UNIX_EPOCH)
809 .map(|duration| duration.as_nanos())
810 .unwrap_or_default();
811
812 for attempt in 0..100u32 {
813 let stage_dir = private_dir.join(format!(
814 "stage-{}-{}-{}",
815 std::process::id(),
816 nonce,
817 attempt
818 ));
819 match DirBuilder::new().mode(0o700).create(&stage_dir) {
820 Ok(()) => {
821 Self::ensure_secure_runsc_dir(&stage_dir, "runsc stage directory")?;
822 return Ok(stage_dir);
823 }
824 Err(e) if e.kind() == io::ErrorKind::AlreadyExists => continue,
825 Err(e) => {
826 return Err(NucleusError::GVisorError(format!(
827 "Failed to create runsc stage directory {:?}: {}",
828 stage_dir, e
829 )));
830 }
831 }
832 }
833
834 Err(NucleusError::GVisorError(format!(
835 "Failed to create unique runsc stage directory under {:?}",
836 private_dir
837 )))
838 }
839
840 fn copy_runsc_nofollow(source: &Path, staged: &Path) -> Result<()> {
841 let mut source_file = OpenOptions::new()
842 .read(true)
843 .custom_flags(libc::O_CLOEXEC)
844 .open(source)
845 .map_err(|e| {
846 NucleusError::GVisorError(format!(
847 "Failed to open runsc source {:?}: {}",
848 source, e
849 ))
850 })?;
851
852 let source_meta = source_file.metadata().map_err(|e| {
853 NucleusError::GVisorError(format!("Failed to stat runsc source {:?}: {}", source, e))
854 })?;
855 if !source_meta.file_type().is_file() {
856 return Err(NucleusError::GVisorError(format!(
857 "runsc source {:?} is not a regular file",
858 source
859 )));
860 }
861
862 let mut staged_file = OpenOptions::new()
863 .write(true)
864 .create_new(true)
865 .mode(0o500)
866 .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
867 .open(staged)
868 .map_err(|e| {
869 NucleusError::GVisorError(format!(
870 "Failed to create staged runsc binary {:?}: {}",
871 staged, e
872 ))
873 })?;
874
875 io::copy(&mut source_file, &mut staged_file).map_err(|e| {
876 NucleusError::GVisorError(format!(
877 "Failed to stage runsc binary from {:?} to {:?}: {}",
878 source, staged, e
879 ))
880 })?;
881 staged_file
882 .set_permissions(fs::Permissions::from_mode(0o500))
883 .map_err(|e| {
884 NucleusError::GVisorError(format!(
885 "Failed to secure staged runsc binary {:?}: {}",
886 staged, e
887 ))
888 })?;
889 staged_file.sync_all().map_err(|e| {
890 NucleusError::GVisorError(format!(
891 "Failed to sync staged runsc binary {:?}: {}",
892 staged, e
893 ))
894 })?;
895
896 Ok(())
897 }
898
899 fn apply_supervisor_exec_policy(
900 &self,
901 allowed_roots: &[PathBuf],
902 required: bool,
903 ) -> Result<()> {
904 let mut landlock = LandlockManager::new();
905 let applied = landlock.apply_execute_allowlist_policy(allowed_roots, !required)?;
906 if applied {
907 info!(
908 allowed_roots = ?allowed_roots,
909 "Applied gVisor supervisor execute allowlist"
910 );
911 } else if required {
912 return Err(NucleusError::LandlockError(
913 "Required gVisor supervisor execute allowlist was not applied".to_string(),
914 ));
915 } else {
916 warn!(
917 allowed_roots = ?allowed_roots,
918 "gVisor supervisor execute allowlist unavailable"
919 );
920 }
921 Ok(())
922 }
923
924 fn build_oci_run_args(
925 &self,
926 container_id: &str,
927 bundle: &OciBundle,
928 runsc_root: &Path,
929 options: GVisorOciRunOptions,
930 ) -> Vec<String> {
931 let mut args = vec![
932 self.runsc_path.clone(),
933 "--root".to_string(),
934 runsc_root.to_string_lossy().to_string(),
935 ];
936
937 if options.runsc_rootless {
938 args.push("--rootless".to_string());
939 }
940
941 if options.ignore_cgroups {
942 args.push("--ignore-cgroups".to_string());
943 }
944
945 args.extend([
946 "--network".to_string(),
947 options.network_flag().to_string(),
948 "--platform".to_string(),
949 options.platform.as_flag().to_string(),
950 "run".to_string(),
951 "--bundle".to_string(),
952 bundle.bundle_path().to_string_lossy().to_string(),
953 container_id.to_string(),
954 ]);
955
956 args
957 }
958}
959
960#[cfg(test)]
961mod tests {
962 use super::*;
963 use crate::oci::OciConfig;
964 use std::path::{Path, PathBuf};
965 use std::sync::{Mutex, MutexGuard};
966
967 static ENV_LOCK: Mutex<()> = Mutex::new(());
968
969 struct EnvLock {
970 _guard: MutexGuard<'static, ()>,
971 }
972
973 impl EnvLock {
974 fn acquire() -> Self {
975 Self {
976 _guard: ENV_LOCK.lock().unwrap(),
977 }
978 }
979 }
980
981 struct EnvVarGuard {
982 key: &'static str,
983 previous: Option<std::ffi::OsString>,
984 }
985
986 impl EnvVarGuard {
987 fn set(key: &'static str, value: impl AsRef<std::ffi::OsStr>) -> Self {
988 let previous = std::env::var_os(key);
989 std::env::set_var(key, value);
990 Self { key, previous }
991 }
992
993 fn remove(key: &'static str) -> Self {
994 let previous = std::env::var_os(key);
995 std::env::remove_var(key);
996 Self { key, previous }
997 }
998 }
999
1000 impl Drop for EnvVarGuard {
1001 fn drop(&mut self) {
1002 match &self.previous {
1003 Some(value) => std::env::set_var(self.key, value),
1004 None => std::env::remove_var(self.key),
1005 }
1006 }
1007 }
1008
1009 #[test]
1010 fn test_gvisor_availability() {
1011 let available = GVisorRuntime::is_available();
1014 println!("gVisor available: {}", available);
1015 }
1016
1017 #[test]
1018 fn test_gvisor_new() {
1019 let runtime = GVisorRuntime::new();
1020 if let Ok(rt) = runtime {
1021 println!("Found runsc at: {}", rt.runsc_path);
1022 if let Ok(version) = rt.version() {
1023 println!("runsc version: {}", version);
1024 }
1025 }
1026 }
1027
1028 #[test]
1029 fn test_find_runsc() {
1030 match GVisorRuntime::find_runsc() {
1032 Ok(path) => {
1033 println!("Found runsc at: {}", path);
1034 assert!(!path.is_empty());
1035 }
1036 Err(e) => {
1037 println!("runsc not found (expected if gVisor not installed): {}", e);
1038 }
1039 }
1040 }
1041
1042 #[test]
1043 fn test_validate_runsc_rejects_world_writable() {
1044 let dir = tempfile::tempdir().unwrap();
1045 let fake_runsc = dir.path().join("runsc");
1046 std::fs::write(&fake_runsc, "#!/bin/sh\necho fake").unwrap();
1047 std::fs::set_permissions(&fake_runsc, std::fs::Permissions::from_mode(0o777)).unwrap();
1049
1050 let result = GVisorRuntime::validate_runsc_path(&fake_runsc);
1051 assert!(
1052 result.is_err(),
1053 "validate_runsc_path must reject world-writable binaries"
1054 );
1055 }
1056
1057 #[test]
1058 fn test_validate_runsc_rejects_group_writable() {
1059 let dir = tempfile::tempdir().unwrap();
1060 let fake_runsc = dir.path().join("runsc");
1061 std::fs::write(&fake_runsc, "#!/bin/sh\necho fake").unwrap();
1062 std::fs::set_permissions(&fake_runsc, std::fs::Permissions::from_mode(0o775)).unwrap();
1064
1065 let result = GVisorRuntime::validate_runsc_path(&fake_runsc);
1066 assert!(
1067 result.is_err(),
1068 "validate_runsc_path must reject group-writable binaries"
1069 );
1070 }
1071
1072 #[test]
1073 fn test_runsc_owner_accepts_nix_store_artifact_owner() {
1074 let nix_binary = std::fs::read_dir("/nix/store")
1078 .ok()
1079 .and_then(|mut entries| {
1080 entries.find_map(|e| {
1081 let dir = e.ok()?.path();
1082 let candidate = dir.join("bin/runsc");
1083 if candidate.exists() {
1084 Some(candidate)
1085 } else {
1086 None
1087 }
1088 })
1089 });
1090
1091 let path = match nix_binary {
1092 Some(p) => p,
1093 None => {
1094 eprintln!("skipping: no runsc binary found in /nix/store");
1095 return;
1096 }
1097 };
1098
1099 assert!(GVisorRuntime::is_trusted_runsc_owner(&path, 65534, 1000));
1100 }
1101
1102 #[test]
1103 fn test_exec_environment_uses_hardcoded_path() {
1104 std::env::set_var("PATH", "/tmp/evil-inject/bin:/opt/attacker/sbin");
1109 let rt = GVisorRuntime::with_path("/fake/runsc".to_string());
1110 let tmp = tempfile::tempdir().unwrap();
1111 let env = rt.exec_environment(tmp.path()).unwrap();
1112 let path_entry = env
1113 .iter()
1114 .find(|e| e.to_str().is_ok_and(|s| s.starts_with("PATH=")))
1115 .expect("exec_environment must set PATH");
1116 let path_val = path_entry.to_str().unwrap();
1117 assert!(
1118 !path_val.contains("evil-inject") && !path_val.contains("attacker"),
1119 "exec_environment must use hardcoded PATH, not host PATH. Got: {}",
1120 path_val
1121 );
1122 assert_eq!(
1123 path_val, "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1124 "exec_environment PATH must be the standard hardcoded value"
1125 );
1126 }
1127
1128 #[test]
1129 fn test_precreated_rootless_args_pass_runsc_rootless() {
1130 let rt = GVisorRuntime::with_path("/nix/store/fake-runsc/bin/runsc".to_string());
1131 let tmp = tempfile::tempdir().unwrap();
1132 let bundle = OciBundle::new(
1133 tmp.path().join("bundle"),
1134 OciConfig::new(vec!["/bin/true".to_string()], None),
1135 );
1136
1137 let args = rt.build_oci_run_args(
1138 "container-id",
1139 &bundle,
1140 tmp.path(),
1141 GVisorOciRunOptions {
1142 network_mode: GVisorNetworkMode::Host,
1143 ignore_cgroups: true,
1144 runsc_rootless: true,
1145 require_supervisor_exec_policy: false,
1146 platform: GVisorPlatform::Systrap,
1147 },
1148 );
1149
1150 assert!(args.iter().any(|arg| arg == "--rootless"));
1151 assert!(args.iter().any(|arg| arg == "--ignore-cgroups"));
1152 }
1153
1154 #[test]
1155 fn test_rootless_oci_args_do_not_pass_runsc_rootless() {
1156 let rt = GVisorRuntime::with_path("/nix/store/fake-runsc/bin/runsc".to_string());
1157 let tmp = tempfile::tempdir().unwrap();
1158 let bundle = OciBundle::new(
1159 tmp.path().join("bundle"),
1160 OciConfig::new(vec!["/bin/true".to_string()], None),
1161 );
1162
1163 let args = rt.build_oci_run_args(
1164 "container-id",
1165 &bundle,
1166 tmp.path(),
1167 GVisorOciRunOptions {
1168 network_mode: GVisorNetworkMode::Host,
1169 ignore_cgroups: true,
1170 runsc_rootless: false,
1171 require_supervisor_exec_policy: false,
1172 platform: GVisorPlatform::Systrap,
1173 },
1174 );
1175
1176 assert!(!args.iter().any(|arg| arg == "--rootless"));
1177 assert!(args.iter().any(|arg| arg == "--ignore-cgroups"));
1178 }
1179
1180 #[test]
1181 fn test_non_nix_runsc_is_staged_for_supervisor_exec_policy() {
1182 let tmp = tempfile::tempdir().unwrap();
1183 let fake_runsc = tmp.path().join("runsc-source");
1184 std::fs::write(&fake_runsc, b"fake-runsc").unwrap();
1185 std::fs::set_permissions(&fake_runsc, std::fs::Permissions::from_mode(0o500)).unwrap();
1186
1187 let rt = GVisorRuntime::with_path(fake_runsc.to_string_lossy().to_string());
1188 let runsc_root = tmp.path().join("runsc-root");
1189 let (program, allow_roots) = rt.prepare_supervisor_runsc_program(&runsc_root).unwrap();
1190
1191 assert!(program.starts_with(runsc_root.join("exec-allow")));
1192 assert_eq!(allow_roots, vec![runsc_root.join("exec-allow")]);
1193 assert_eq!(std::fs::read(&program).unwrap(), b"fake-runsc");
1194 let mode = std::fs::metadata(&program).unwrap().permissions().mode() & 0o777;
1195 assert_eq!(mode, 0o500);
1196 }
1197
1198 #[test]
1199 fn test_supervisor_exec_allow_roots_do_not_include_procfs() {
1200 let roots = GVisorRuntime::supervisor_exec_allow_roots(PathBuf::from(NIX_STORE_EXEC_ROOT));
1201
1202 assert_eq!(roots, vec![PathBuf::from(NIX_STORE_EXEC_ROOT)]);
1203 assert!(
1204 !roots.iter().any(|root| root == Path::new("/proc")),
1205 "the supervisor policy must not allow recursive procfs execution"
1206 );
1207 }
1208
1209 #[test]
1210 fn test_runsc_root_uses_hardened_artifact_dir_not_bundle_parent() {
1211 let _env_lock = EnvLock::acquire();
1212 let tmp = tempfile::tempdir().unwrap();
1213 let artifact_base = tmp.path().join("gvisor-artifacts");
1214 let _artifact_base = EnvVarGuard::set("NUCLEUS_GVISOR_ARTIFACT_BASE", &artifact_base);
1215 let _runtime = EnvVarGuard::remove("XDG_RUNTIME_DIR");
1216
1217 let bundle_parent = tmp.path().join("shared");
1218 std::fs::create_dir_all(&bundle_parent).unwrap();
1219 std::fs::set_permissions(&bundle_parent, std::fs::Permissions::from_mode(0o777)).unwrap();
1220 let bundle = OciBundle::new(
1221 bundle_parent.join("bundle"),
1222 OciConfig::new(vec!["/bin/true".to_string()], None),
1223 );
1224
1225 let runsc_root = GVisorRuntime::secure_runsc_root("container-id").unwrap();
1226
1227 assert!(runsc_root
1228 .starts_with(artifact_base.join(GVisorRuntime::runsc_state_component("container-id"))));
1229 assert!(
1230 !runsc_root.starts_with(bundle.bundle_path().parent().unwrap()),
1231 "runsc root must not be derived from a custom bundle parent"
1232 );
1233 }
1234
1235 #[test]
1236 fn test_runsc_staging_rejects_symlink_exec_allow_dir() {
1237 let tmp = tempfile::tempdir().unwrap();
1238 let fake_runsc = tmp.path().join("runsc-source");
1239 std::fs::write(&fake_runsc, b"fake-runsc").unwrap();
1240 std::fs::set_permissions(&fake_runsc, std::fs::Permissions::from_mode(0o500)).unwrap();
1241
1242 let runsc_root = tmp.path().join("runsc-root");
1243 std::fs::create_dir(&runsc_root).unwrap();
1244 std::fs::set_permissions(&runsc_root, std::fs::Permissions::from_mode(0o700)).unwrap();
1245 let victim_dir = tmp.path().join("victim");
1246 std::fs::create_dir(&victim_dir).unwrap();
1247 std::os::unix::fs::symlink(&victim_dir, runsc_root.join("exec-allow")).unwrap();
1248
1249 let rt = GVisorRuntime::with_path(fake_runsc.to_string_lossy().to_string());
1250 let err = rt
1251 .prepare_supervisor_runsc_program(&runsc_root)
1252 .unwrap_err()
1253 .to_string();
1254
1255 assert!(
1256 err.contains("Refusing symlink private runsc exec directory"),
1257 "unexpected error: {}",
1258 err
1259 );
1260 assert!(
1261 !victim_dir.join("runsc").exists(),
1262 "staging must not follow the exec-allow symlink"
1263 );
1264 }
1265
1266 #[test]
1267 fn test_runsc_owner_rejects_untrusted_non_store_owner() {
1268 assert!(!GVisorRuntime::is_trusted_runsc_owner(
1269 Path::new("/tmp/runsc"),
1270 4242,
1271 1000
1272 ));
1273 }
1274}