1use super::landlock::LandlockManager;
2use crate::error::{NucleusError, Result};
3use crate::oci::OciBundle;
4use nix::unistd::Uid;
5use sha2::{Digest, Sha256};
6use std::ffi::CString;
7use std::fs::{self, DirBuilder, OpenOptions};
8use std::io;
9use std::os::unix::fs::{DirBuilderExt, MetadataExt, OpenOptionsExt, PermissionsExt};
10use std::path::{Component, Path, PathBuf};
11use std::process::Command;
12use tracing::{debug, info, warn};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub enum GVisorNetworkMode {
17 None,
19 Sandbox,
22 Host,
24}
25
26#[derive(
28 Debug,
29 Clone,
30 Copy,
31 PartialEq,
32 Eq,
33 Default,
34 clap::ValueEnum,
35 serde::Serialize,
36 serde::Deserialize,
37)]
38pub enum GVisorPlatform {
39 #[default]
41 Systrap,
42 Kvm,
44 Ptrace,
46}
47
48impl GVisorPlatform {
49 pub fn as_flag(self) -> &'static str {
50 match self {
51 Self::Systrap => "systrap",
52 Self::Kvm => "kvm",
53 Self::Ptrace => "ptrace",
54 }
55 }
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub struct GVisorOciRunOptions {
61 pub network_mode: GVisorNetworkMode,
63 pub ignore_cgroups: bool,
65 pub runsc_rootless: bool,
67 pub require_supervisor_exec_policy: bool,
69 pub platform: GVisorPlatform,
71}
72
73impl Default for GVisorOciRunOptions {
74 fn default() -> Self {
75 Self {
76 network_mode: GVisorNetworkMode::None,
77 ignore_cgroups: false,
78 runsc_rootless: false,
79 require_supervisor_exec_policy: false,
80 platform: GVisorPlatform::default(),
81 }
82 }
83}
84
85impl GVisorOciRunOptions {
86 fn network_flag(self) -> &'static str {
87 match self.network_mode {
88 GVisorNetworkMode::None => "none",
89 GVisorNetworkMode::Sandbox => "sandbox",
90 GVisorNetworkMode::Host => "host",
91 }
92 }
93}
94
95pub struct GVisorRuntime {
100 runsc_path: String,
101}
102
103impl GVisorRuntime {
104 pub fn new() -> Result<Self> {
108 let runsc_path = Self::find_runsc()?;
109 info!("Found runsc at: {}", runsc_path);
110 Ok(Self { runsc_path })
111 }
112
113 pub fn with_path(runsc_path: String) -> Self {
119 Self { runsc_path }
120 }
121
122 pub fn resolve_path() -> Result<String> {
126 Self::find_runsc()
127 }
128
129 fn find_runsc() -> Result<String> {
131 let paths = vec![
133 "/usr/local/bin/runsc",
134 "/usr/bin/runsc",
135 "/opt/gvisor/runsc",
136 ];
137
138 for path in &paths {
139 if let Some(validated) = Self::validate_runsc_path(Path::new(path))? {
140 return Ok(validated);
141 }
142 }
143
144 if Uid::effective().is_root() {
147 return Err(NucleusError::GVisorError(
148 "runsc binary not found in trusted system paths".to_string(),
149 ));
150 }
151
152 if let Some(path_var) = std::env::var_os("PATH") {
154 for dir in std::env::split_paths(&path_var) {
155 let candidate = dir.join("runsc");
156 if let Some(validated) = Self::validate_runsc_path(&candidate)? {
157 return Ok(validated);
158 }
159 }
160 }
161
162 Err(NucleusError::GVisorError(
163 "runsc binary not found. Please install gVisor.".to_string(),
164 ))
165 }
166
167 fn validate_runsc_path(path: &Path) -> Result<Option<String>> {
168 if !path.exists() {
169 return Ok(None);
170 }
171 if !path.is_file() {
172 return Ok(None);
173 }
174
175 let canonical = std::fs::canonicalize(path).map_err(|e| {
176 NucleusError::GVisorError(format!(
177 "Failed to canonicalize runsc path {:?}: {}",
178 path, e
179 ))
180 })?;
181
182 let resolved = Self::unwrap_nix_wrapper(&canonical).unwrap_or_else(|| canonical.clone());
187
188 let metadata = std::fs::metadata(&resolved).map_err(|e| {
189 NucleusError::GVisorError(format!("Failed to stat runsc path {:?}: {}", resolved, e))
190 })?;
191
192 let mode = metadata.permissions().mode();
193 if mode & 0o022 != 0 {
194 return Err(NucleusError::GVisorError(format!(
195 "Refusing insecure runsc binary permissions at {:?} (mode {:o})",
196 resolved, mode
197 )));
198 }
199 if mode & 0o111 == 0 {
200 return Ok(None);
201 }
202
203 use std::os::unix::fs::MetadataExt;
206 let owner = metadata.uid();
207 let current_uid = nix::unistd::Uid::effective().as_raw();
208 if !Self::is_trusted_runsc_owner(&resolved, owner, current_uid) {
209 return Err(NucleusError::GVisorError(format!(
210 "Refusing runsc binary at {:?} owned by uid {} (expected root, current user {}, or immutable /nix/store artifact)",
211 resolved, owner, current_uid
212 )));
213 }
214
215 Ok(Some(resolved.to_string_lossy().to_string()))
216 }
217
218 fn is_trusted_runsc_owner(path: &Path, owner: u32, current_uid: u32) -> bool {
219 if owner == 0 || owner == current_uid {
220 return true;
221 }
222
223 if path.starts_with("/nix/store") {
229 if let Ok(meta) = std::fs::metadata(path) {
230 let mode = meta.permissions().mode();
231 if mode & 0o200 != 0 {
233 return false;
234 }
235 } else {
236 return false;
237 }
238 if let Some(parent) = path.parent() {
240 if let Ok(parent_meta) = std::fs::metadata(parent) {
241 let parent_mode = parent_meta.permissions().mode();
242 if parent_mode & 0o222 != 0 {
243 return false;
244 }
245 } else {
246 return false;
247 }
248 }
249 return true;
250 }
251
252 false
253 }
254
255 fn unwrap_nix_wrapper(path: &Path) -> Option<std::path::PathBuf> {
261 let content = std::fs::read_to_string(path).ok()?;
262 if content.len() > 4096 || !content.starts_with("#!") {
264 return None;
265 }
266 for line in content.lines().rev() {
268 let trimmed = line.trim();
269 if trimmed.starts_with("exec ") {
270 for token in trimmed.split_whitespace() {
273 let unquoted = token.trim_matches('"');
274 if unquoted.starts_with('/') && unquoted.contains("runsc") {
275 let candidate = std::path::PathBuf::from(unquoted);
276 if candidate.exists() && candidate.is_file() {
277 debug!("Resolved Nix wrapper {:?} → {:?}", path, candidate);
278 return Some(candidate);
279 }
280 }
281 }
282 }
283 }
284 None
285 }
286
287 pub fn exec_with_oci_bundle(&self, container_id: &str, bundle: &OciBundle) -> Result<()> {
293 self.exec_with_oci_bundle_options(container_id, bundle, GVisorOciRunOptions::default())
294 }
295
296 pub fn exec_with_oci_bundle_options(
307 &self,
308 container_id: &str,
309 bundle: &OciBundle,
310 options: GVisorOciRunOptions,
311 ) -> Result<()> {
312 info!(
313 "Executing with gVisor using OCI bundle at {:?} (network: {:?}, platform: {:?})",
314 bundle.bundle_path(),
315 options.network_mode,
316 options.platform,
317 );
318
319 let runsc_root = Self::secure_runsc_root(container_id)?;
324
325 let runsc_runtime_dir = runsc_root.join("runtime");
326 Self::ensure_secure_runsc_dir(&runsc_runtime_dir, "runsc runtime directory")?;
327
328 let (program_path, exec_allow_roots) =
329 self.prepare_supervisor_runsc_program(&runsc_root)?;
330
331 let mut args = self.build_oci_run_args(container_id, bundle, &runsc_root, options);
335 args[0] = program_path.to_string_lossy().to_string();
336
337 debug!("runsc OCI args: {:?}", args);
338
339 let program = CString::new(program_path.to_string_lossy().as_ref())
341 .map_err(|e| NucleusError::GVisorError(format!("Invalid runsc path: {}", e)))?;
342
343 let c_args: Result<Vec<CString>> = args
344 .iter()
345 .map(|arg| {
346 CString::new(arg.as_str())
347 .map_err(|e| NucleusError::GVisorError(format!("Invalid argument: {}", e)))
348 })
349 .collect();
350 let c_args = c_args?;
351
352 let c_env = self.exec_environment(&runsc_runtime_dir)?;
353
354 if options.runsc_rootless {
364 self.apply_supervisor_exec_policy(
365 &exec_allow_roots,
366 options.require_supervisor_exec_policy,
367 )?;
368 }
369
370 nix::unistd::execve::<std::ffi::CString, std::ffi::CString>(&program, &c_args, &c_env)?;
372
373 Ok(())
375 }
376
377 #[allow(clippy::too_many_arguments)]
381 pub fn exec_with_oci_bundle_network(
382 &self,
383 container_id: &str,
384 bundle: &OciBundle,
385 network_mode: GVisorNetworkMode,
386 ignore_cgroups: bool,
387 runsc_rootless: bool,
388 require_supervisor_exec_policy: bool,
389 platform: GVisorPlatform,
390 ) -> Result<()> {
391 self.exec_with_oci_bundle_options(
392 container_id,
393 bundle,
394 GVisorOciRunOptions {
395 network_mode,
396 ignore_cgroups,
397 runsc_rootless,
398 require_supervisor_exec_policy,
399 platform,
400 },
401 )
402 }
403
404 pub fn is_available() -> bool {
406 Self::find_runsc().is_ok()
407 }
408
409 pub fn version(&self) -> Result<String> {
411 let output = Command::new(&self.runsc_path)
412 .arg("--version")
413 .output()
414 .map_err(|e| NucleusError::GVisorError(format!("Failed to get version: {}", e)))?;
415
416 if !output.status.success() {
417 return Err(NucleusError::GVisorError(
418 "Failed to get runsc version".to_string(),
419 ));
420 }
421
422 let version = String::from_utf8_lossy(&output.stdout).to_string();
423 Ok(version.trim().to_string())
424 }
425
426 fn exec_environment(&self, runtime_dir: &Path) -> Result<Vec<CString>> {
427 let mut env = Vec::new();
428 let mut push = |key: &str, value: String| -> Result<()> {
429 env.push(
430 CString::new(format!("{}={}", key, value))
431 .map_err(|e| NucleusError::GVisorError(format!("Invalid {}: {}", key, e)))?,
432 );
433 Ok(())
434 };
435
436 push(
439 "PATH",
440 "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin".to_string(),
441 )?;
442 let runtime_dir = runtime_dir.to_string_lossy().to_string();
443 push("TMPDIR", runtime_dir.clone())?;
444 push("XDG_RUNTIME_DIR", runtime_dir)?;
445
446 push("HOME", "/root".to_string())?;
450 push("USER", "root".to_string())?;
451 push("LOGNAME", "root".to_string())?;
452
453 Ok(env)
454 }
455
456 fn prepare_supervisor_runsc_program(
457 &self,
458 runsc_root: &Path,
459 ) -> Result<(PathBuf, Vec<PathBuf>)> {
460 let canonical = fs::canonicalize(&self.runsc_path).map_err(|e| {
461 NucleusError::GVisorError(format!(
462 "Failed to canonicalize runsc path {:?}: {}",
463 self.runsc_path, e
464 ))
465 })?;
466
467 if canonical.starts_with("/nix/store") {
468 return Ok((canonical, vec![PathBuf::from("/nix/store")]));
469 }
470
471 Self::ensure_secure_runsc_dir(runsc_root, "runsc root directory")?;
472 let private_dir = runsc_root.join("exec-allow");
473 Self::ensure_secure_runsc_dir(&private_dir, "private runsc exec directory")?;
474
475 let stage_dir = Self::create_unique_runsc_stage_dir(&private_dir)?;
476 let staged = stage_dir.join("runsc");
477 Self::copy_runsc_nofollow(&canonical, &staged)?;
478
479 Ok((staged, vec![private_dir]))
480 }
481
482 fn secure_runsc_root(container_id: &str) -> Result<PathBuf> {
483 let artifact_base = Self::gvisor_artifact_base()?;
484 let artifact_dir = artifact_base.join(Self::runsc_state_component(container_id));
485
486 if Self::host_root_requires_trusted_runsc_ancestry() {
487 Self::ensure_trusted_host_root_runsc_ancestry(
488 &artifact_base,
489 "gVisor runsc artifact base",
490 )?;
491 }
492
493 Self::ensure_secure_runsc_dir(&artifact_base, "gVisor runsc artifact base")?;
494 Self::ensure_secure_runsc_dir(&artifact_dir, "gVisor runsc artifact directory")?;
495
496 let runsc_root = artifact_dir.join("runsc-root");
497 Self::ensure_secure_runsc_dir(&runsc_root, "runsc root directory")?;
498 Ok(runsc_root)
499 }
500
501 fn gvisor_artifact_base() -> Result<PathBuf> {
502 if let Some(path) =
503 std::env::var_os("NUCLEUS_GVISOR_ARTIFACT_BASE").filter(|path| !path.is_empty())
504 {
505 return Self::absolute_path(Path::new(&path), "gVisor artifact base");
506 }
507
508 if !Uid::effective().is_root() || Self::root_uid_maps_to_unprivileged_host_uid_from_proc() {
509 if let Some(dir) = dirs::runtime_dir() {
510 return Ok(dir.join("nucleus-gvisor"));
511 }
512 }
513
514 if Uid::effective().is_root() {
515 Ok(PathBuf::from("/run/nucleus-gvisor"))
516 } else {
517 Ok(std::env::temp_dir().join(format!("nucleus-gvisor-{}", Uid::effective().as_raw())))
518 }
519 }
520
521 fn absolute_path(path: &Path, label: &str) -> Result<PathBuf> {
522 if path.is_absolute() {
523 return Ok(path.to_path_buf());
524 }
525
526 std::env::current_dir()
527 .map(|cwd| cwd.join(path))
528 .map_err(|e| {
529 NucleusError::GVisorError(format!(
530 "Failed to resolve current directory for {} {:?}: {}",
531 label, path, e
532 ))
533 })
534 }
535
536 fn runsc_state_component(container_id: &str) -> String {
537 if container_id.len() == 32 && container_id.chars().all(|c| c.is_ascii_hexdigit()) {
538 return container_id.to_string();
539 }
540
541 let digest = Sha256::digest(container_id.as_bytes());
542 format!("id-{}", hex::encode(&digest[..16]))
543 }
544
545 fn root_uid_maps_to_unprivileged_host_uid_from_proc() -> bool {
546 fs::read_to_string("/proc/self/uid_map")
547 .map(|uid_map| Self::root_uid_maps_to_unprivileged_host_uid(&uid_map))
548 .unwrap_or(false)
549 }
550
551 fn root_uid_maps_to_unprivileged_host_uid(uid_map: &str) -> bool {
552 for line in uid_map.lines() {
553 let mut fields = line.split_whitespace();
554 let Some(namespace_start) = fields.next() else {
555 continue;
556 };
557 let Some(host_start) = fields.next() else {
558 continue;
559 };
560 let Some(length) = fields.next() else {
561 continue;
562 };
563 if fields.next().is_some() {
564 continue;
565 }
566
567 let Ok(namespace_start) = namespace_start.parse::<u64>() else {
568 continue;
569 };
570 let Ok(host_start) = host_start.parse::<u64>() else {
571 continue;
572 };
573 let Ok(length) = length.parse::<u64>() else {
574 continue;
575 };
576
577 if namespace_start == 0 && length > 0 {
578 return host_start != 0;
579 }
580 }
581
582 false
583 }
584
585 fn host_root_requires_trusted_runsc_ancestry() -> bool {
586 Uid::effective().is_root() && !Self::root_uid_maps_to_unprivileged_host_uid_from_proc()
587 }
588
589 fn ensure_trusted_host_root_runsc_ancestry(path: &Path, label: &str) -> Result<()> {
590 let path = Self::absolute_path(path, label)?;
591
592 let mut current = PathBuf::new();
593 for component in path.components() {
594 match component {
595 Component::Prefix(prefix) => current.push(prefix.as_os_str()),
596 Component::RootDir => current.push(component.as_os_str()),
597 Component::CurDir => {}
598 Component::ParentDir => {
599 return Err(NucleusError::GVisorError(format!(
600 "{} {:?} contains a parent-directory component",
601 label, path
602 )));
603 }
604 Component::Normal(name) => {
605 current.push(name);
606 match fs::symlink_metadata(¤t) {
607 Ok(metadata) => Self::ensure_trusted_host_root_runsc_ancestor_component(
608 ¤t, metadata, label,
609 )?,
610 Err(e) if e.kind() == io::ErrorKind::NotFound => break,
611 Err(e) => {
612 return Err(NucleusError::GVisorError(format!(
613 "Failed to stat {} ancestor {:?}: {}",
614 label, current, e
615 )));
616 }
617 }
618 }
619 }
620 }
621
622 Ok(())
623 }
624
625 fn ensure_trusted_host_root_runsc_ancestor_component(
626 path: &Path,
627 metadata: fs::Metadata,
628 label: &str,
629 ) -> Result<()> {
630 if metadata.file_type().is_symlink() {
631 return Err(NucleusError::GVisorError(format!(
632 "Refusing symlink {} ancestor {:?}",
633 label, path
634 )));
635 }
636 if !metadata.file_type().is_dir() {
637 return Err(NucleusError::GVisorError(format!(
638 "{} ancestor {:?} is not a directory",
639 label, path
640 )));
641 }
642
643 let owner = metadata.uid();
644 if owner != 0 {
645 return Err(NucleusError::GVisorError(format!(
646 "{} ancestor {:?} is owned by uid {} (expected root)",
647 label, path, owner
648 )));
649 }
650
651 let mode = metadata.permissions().mode();
652 if mode & 0o022 != 0 && mode & 0o1000 == 0 {
653 return Err(NucleusError::GVisorError(format!(
654 "{} ancestor {:?} has unsafe permissions {:o}",
655 label,
656 path,
657 mode & 0o7777
658 )));
659 }
660
661 Ok(())
662 }
663
664 fn ensure_secure_runsc_dir(path: &Path, label: &str) -> Result<()> {
665 if let Some(parent) = path
666 .parent()
667 .filter(|parent| !parent.as_os_str().is_empty())
668 {
669 Self::ensure_trusted_runsc_parent(parent, label)?;
670 }
671
672 let mut created = false;
673 match fs::symlink_metadata(path) {
674 Ok(metadata) if metadata.file_type().is_symlink() => {
675 return Err(NucleusError::GVisorError(format!(
676 "Refusing symlink {} {:?}",
677 label, path
678 )));
679 }
680 Ok(metadata) if !metadata.file_type().is_dir() => {
681 return Err(NucleusError::GVisorError(format!(
682 "{} {:?} is not a directory",
683 label, path
684 )));
685 }
686 Ok(_) => {}
687 Err(e) if e.kind() == io::ErrorKind::NotFound => {
688 match DirBuilder::new().mode(0o700).create(path) {
689 Ok(()) => {
690 created = true;
691 }
692 Err(create_err) if create_err.kind() == io::ErrorKind::AlreadyExists => {}
693 Err(create_err) => {
694 return Err(NucleusError::GVisorError(format!(
695 "Failed to create {} {:?}: {}",
696 label, path, create_err
697 )));
698 }
699 }
700 }
701 Err(e) => {
702 return Err(NucleusError::GVisorError(format!(
703 "Failed to stat {} {:?}: {}",
704 label, path, e
705 )));
706 }
707 }
708
709 if created {
710 fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|e| {
711 NucleusError::GVisorError(format!(
712 "Failed to secure newly-created {} permissions {:?}: {}",
713 label, path, e
714 ))
715 })?;
716 }
717
718 let dir = OpenOptions::new()
719 .read(true)
720 .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC | libc::O_DIRECTORY)
721 .open(path)
722 .map_err(|e| {
723 NucleusError::GVisorError(format!(
724 "Failed to open {} {:?} without following symlinks: {}",
725 label, path, e
726 ))
727 })?;
728
729 let metadata = dir.metadata().map_err(|e| {
730 NucleusError::GVisorError(format!("Failed to stat {} {:?}: {}", label, path, e))
731 })?;
732 if !metadata.file_type().is_dir() {
733 return Err(NucleusError::GVisorError(format!(
734 "{} {:?} is not a directory",
735 label, path
736 )));
737 }
738
739 let owner = metadata.uid();
740 let expected = Uid::effective().as_raw();
741 if owner != expected {
742 return Err(NucleusError::GVisorError(format!(
743 "{} {:?} is owned by uid {} (expected {})",
744 label, path, owner, expected
745 )));
746 }
747
748 let mode = metadata.permissions().mode() & 0o777;
749 if mode != 0o700 {
750 dir.set_permissions(fs::Permissions::from_mode(0o700))
751 .map_err(|e| {
752 NucleusError::GVisorError(format!(
753 "Failed to secure {} permissions {:?}: {}",
754 label, path, e
755 ))
756 })?;
757 }
758
759 Ok(())
760 }
761
762 fn ensure_trusted_runsc_parent(parent: &Path, label: &str) -> Result<()> {
763 let metadata = fs::symlink_metadata(parent).map_err(|e| {
764 NucleusError::GVisorError(format!(
765 "Failed to stat parent for {} {:?}: {}",
766 label, parent, e
767 ))
768 })?;
769 if metadata.file_type().is_symlink() {
770 return Err(NucleusError::GVisorError(format!(
771 "Refusing symlink parent for {} {:?}",
772 label, parent
773 )));
774 }
775 if !metadata.file_type().is_dir() {
776 return Err(NucleusError::GVisorError(format!(
777 "Parent for {} {:?} is not a directory",
778 label, parent
779 )));
780 }
781
782 let owner = metadata.uid();
783 let current = Uid::effective().as_raw();
784 let owner_trusted = owner == current || owner == 0;
785 let mode = metadata.permissions().mode();
786 let unsafe_writable = mode & 0o022 != 0 && mode & 0o1000 == 0;
787 if !owner_trusted || unsafe_writable {
788 return Err(NucleusError::GVisorError(format!(
789 "Parent for {} {:?} is not trusted (owner uid {}, mode {:o})",
790 label,
791 parent,
792 owner,
793 mode & 0o7777
794 )));
795 }
796
797 Ok(())
798 }
799
800 fn create_unique_runsc_stage_dir(private_dir: &Path) -> Result<PathBuf> {
801 let nonce = std::time::SystemTime::now()
802 .duration_since(std::time::UNIX_EPOCH)
803 .map(|duration| duration.as_nanos())
804 .unwrap_or_default();
805
806 for attempt in 0..100u32 {
807 let stage_dir = private_dir.join(format!(
808 "stage-{}-{}-{}",
809 std::process::id(),
810 nonce,
811 attempt
812 ));
813 match DirBuilder::new().mode(0o700).create(&stage_dir) {
814 Ok(()) => {
815 Self::ensure_secure_runsc_dir(&stage_dir, "runsc stage directory")?;
816 return Ok(stage_dir);
817 }
818 Err(e) if e.kind() == io::ErrorKind::AlreadyExists => continue,
819 Err(e) => {
820 return Err(NucleusError::GVisorError(format!(
821 "Failed to create runsc stage directory {:?}: {}",
822 stage_dir, e
823 )));
824 }
825 }
826 }
827
828 Err(NucleusError::GVisorError(format!(
829 "Failed to create unique runsc stage directory under {:?}",
830 private_dir
831 )))
832 }
833
834 fn copy_runsc_nofollow(source: &Path, staged: &Path) -> Result<()> {
835 let mut source_file = OpenOptions::new()
836 .read(true)
837 .custom_flags(libc::O_CLOEXEC)
838 .open(source)
839 .map_err(|e| {
840 NucleusError::GVisorError(format!(
841 "Failed to open runsc source {:?}: {}",
842 source, e
843 ))
844 })?;
845
846 let source_meta = source_file.metadata().map_err(|e| {
847 NucleusError::GVisorError(format!("Failed to stat runsc source {:?}: {}", source, e))
848 })?;
849 if !source_meta.file_type().is_file() {
850 return Err(NucleusError::GVisorError(format!(
851 "runsc source {:?} is not a regular file",
852 source
853 )));
854 }
855
856 let mut staged_file = OpenOptions::new()
857 .write(true)
858 .create_new(true)
859 .mode(0o500)
860 .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
861 .open(staged)
862 .map_err(|e| {
863 NucleusError::GVisorError(format!(
864 "Failed to create staged runsc binary {:?}: {}",
865 staged, e
866 ))
867 })?;
868
869 io::copy(&mut source_file, &mut staged_file).map_err(|e| {
870 NucleusError::GVisorError(format!(
871 "Failed to stage runsc binary from {:?} to {:?}: {}",
872 source, staged, e
873 ))
874 })?;
875 staged_file
876 .set_permissions(fs::Permissions::from_mode(0o500))
877 .map_err(|e| {
878 NucleusError::GVisorError(format!(
879 "Failed to secure staged runsc binary {:?}: {}",
880 staged, e
881 ))
882 })?;
883 staged_file.sync_all().map_err(|e| {
884 NucleusError::GVisorError(format!(
885 "Failed to sync staged runsc binary {:?}: {}",
886 staged, e
887 ))
888 })?;
889
890 Ok(())
891 }
892
893 fn apply_supervisor_exec_policy(
894 &self,
895 allowed_roots: &[PathBuf],
896 required: bool,
897 ) -> Result<()> {
898 let mut landlock = LandlockManager::new();
899 let applied = landlock.apply_execute_allowlist_policy(allowed_roots, !required)?;
900 if applied {
901 info!(
902 allowed_roots = ?allowed_roots,
903 "Applied gVisor supervisor execute allowlist"
904 );
905 } else if required {
906 return Err(NucleusError::LandlockError(
907 "Required gVisor supervisor execute allowlist was not applied".to_string(),
908 ));
909 } else {
910 warn!(
911 allowed_roots = ?allowed_roots,
912 "gVisor supervisor execute allowlist unavailable"
913 );
914 }
915 Ok(())
916 }
917
918 fn build_oci_run_args(
919 &self,
920 container_id: &str,
921 bundle: &OciBundle,
922 runsc_root: &Path,
923 options: GVisorOciRunOptions,
924 ) -> Vec<String> {
925 let mut args = vec![
926 self.runsc_path.clone(),
927 "--root".to_string(),
928 runsc_root.to_string_lossy().to_string(),
929 ];
930
931 if options.runsc_rootless {
932 args.push("--rootless".to_string());
933 }
934
935 if options.ignore_cgroups {
936 args.push("--ignore-cgroups".to_string());
937 }
938
939 args.extend([
940 "--network".to_string(),
941 options.network_flag().to_string(),
942 "--platform".to_string(),
943 options.platform.as_flag().to_string(),
944 "run".to_string(),
945 "--bundle".to_string(),
946 bundle.bundle_path().to_string_lossy().to_string(),
947 container_id.to_string(),
948 ]);
949
950 args
951 }
952}
953
954#[cfg(test)]
955mod tests {
956 use super::*;
957 use crate::oci::OciConfig;
958 use std::path::Path;
959 use std::sync::{Mutex, MutexGuard};
960
961 static ENV_LOCK: Mutex<()> = Mutex::new(());
962
963 struct EnvLock {
964 _guard: MutexGuard<'static, ()>,
965 }
966
967 impl EnvLock {
968 fn acquire() -> Self {
969 Self {
970 _guard: ENV_LOCK.lock().unwrap(),
971 }
972 }
973 }
974
975 struct EnvVarGuard {
976 key: &'static str,
977 previous: Option<std::ffi::OsString>,
978 }
979
980 impl EnvVarGuard {
981 fn set(key: &'static str, value: impl AsRef<std::ffi::OsStr>) -> Self {
982 let previous = std::env::var_os(key);
983 std::env::set_var(key, value);
984 Self { key, previous }
985 }
986
987 fn remove(key: &'static str) -> Self {
988 let previous = std::env::var_os(key);
989 std::env::remove_var(key);
990 Self { key, previous }
991 }
992 }
993
994 impl Drop for EnvVarGuard {
995 fn drop(&mut self) {
996 match &self.previous {
997 Some(value) => std::env::set_var(self.key, value),
998 None => std::env::remove_var(self.key),
999 }
1000 }
1001 }
1002
1003 #[test]
1004 fn test_gvisor_availability() {
1005 let available = GVisorRuntime::is_available();
1008 println!("gVisor available: {}", available);
1009 }
1010
1011 #[test]
1012 fn test_gvisor_new() {
1013 let runtime = GVisorRuntime::new();
1014 if let Ok(rt) = runtime {
1015 println!("Found runsc at: {}", rt.runsc_path);
1016 if let Ok(version) = rt.version() {
1017 println!("runsc version: {}", version);
1018 }
1019 }
1020 }
1021
1022 #[test]
1023 fn test_find_runsc() {
1024 match GVisorRuntime::find_runsc() {
1026 Ok(path) => {
1027 println!("Found runsc at: {}", path);
1028 assert!(!path.is_empty());
1029 }
1030 Err(e) => {
1031 println!("runsc not found (expected if gVisor not installed): {}", e);
1032 }
1033 }
1034 }
1035
1036 #[test]
1037 fn test_validate_runsc_rejects_world_writable() {
1038 let dir = tempfile::tempdir().unwrap();
1039 let fake_runsc = dir.path().join("runsc");
1040 std::fs::write(&fake_runsc, "#!/bin/sh\necho fake").unwrap();
1041 std::fs::set_permissions(&fake_runsc, std::fs::Permissions::from_mode(0o777)).unwrap();
1043
1044 let result = GVisorRuntime::validate_runsc_path(&fake_runsc);
1045 assert!(
1046 result.is_err(),
1047 "validate_runsc_path must reject world-writable binaries"
1048 );
1049 }
1050
1051 #[test]
1052 fn test_validate_runsc_rejects_group_writable() {
1053 let dir = tempfile::tempdir().unwrap();
1054 let fake_runsc = dir.path().join("runsc");
1055 std::fs::write(&fake_runsc, "#!/bin/sh\necho fake").unwrap();
1056 std::fs::set_permissions(&fake_runsc, std::fs::Permissions::from_mode(0o775)).unwrap();
1058
1059 let result = GVisorRuntime::validate_runsc_path(&fake_runsc);
1060 assert!(
1061 result.is_err(),
1062 "validate_runsc_path must reject group-writable binaries"
1063 );
1064 }
1065
1066 #[test]
1067 fn test_runsc_owner_accepts_nix_store_artifact_owner() {
1068 let nix_binary = std::fs::read_dir("/nix/store")
1072 .ok()
1073 .and_then(|mut entries| {
1074 entries.find_map(|e| {
1075 let dir = e.ok()?.path();
1076 let candidate = dir.join("bin/runsc");
1077 if candidate.exists() {
1078 Some(candidate)
1079 } else {
1080 None
1081 }
1082 })
1083 });
1084
1085 let path = match nix_binary {
1086 Some(p) => p,
1087 None => {
1088 eprintln!("skipping: no runsc binary found in /nix/store");
1089 return;
1090 }
1091 };
1092
1093 assert!(GVisorRuntime::is_trusted_runsc_owner(&path, 65534, 1000));
1094 }
1095
1096 #[test]
1097 fn test_exec_environment_uses_hardcoded_path() {
1098 std::env::set_var("PATH", "/tmp/evil-inject/bin:/opt/attacker/sbin");
1103 let rt = GVisorRuntime::with_path("/fake/runsc".to_string());
1104 let tmp = tempfile::tempdir().unwrap();
1105 let env = rt.exec_environment(tmp.path()).unwrap();
1106 let path_entry = env
1107 .iter()
1108 .find(|e| e.to_str().is_ok_and(|s| s.starts_with("PATH=")))
1109 .expect("exec_environment must set PATH");
1110 let path_val = path_entry.to_str().unwrap();
1111 assert!(
1112 !path_val.contains("evil-inject") && !path_val.contains("attacker"),
1113 "exec_environment must use hardcoded PATH, not host PATH. Got: {}",
1114 path_val
1115 );
1116 assert_eq!(
1117 path_val, "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1118 "exec_environment PATH must be the standard hardcoded value"
1119 );
1120 }
1121
1122 #[test]
1123 fn test_precreated_rootless_args_pass_runsc_rootless() {
1124 let rt = GVisorRuntime::with_path("/nix/store/fake-runsc/bin/runsc".to_string());
1125 let tmp = tempfile::tempdir().unwrap();
1126 let bundle = OciBundle::new(
1127 tmp.path().join("bundle"),
1128 OciConfig::new(vec!["/bin/true".to_string()], None),
1129 );
1130
1131 let args = rt.build_oci_run_args(
1132 "container-id",
1133 &bundle,
1134 tmp.path(),
1135 GVisorOciRunOptions {
1136 network_mode: GVisorNetworkMode::Host,
1137 ignore_cgroups: true,
1138 runsc_rootless: true,
1139 require_supervisor_exec_policy: false,
1140 platform: GVisorPlatform::Systrap,
1141 },
1142 );
1143
1144 assert!(args.iter().any(|arg| arg == "--rootless"));
1145 assert!(args.iter().any(|arg| arg == "--ignore-cgroups"));
1146 }
1147
1148 #[test]
1149 fn test_rootless_oci_args_do_not_pass_runsc_rootless() {
1150 let rt = GVisorRuntime::with_path("/nix/store/fake-runsc/bin/runsc".to_string());
1151 let tmp = tempfile::tempdir().unwrap();
1152 let bundle = OciBundle::new(
1153 tmp.path().join("bundle"),
1154 OciConfig::new(vec!["/bin/true".to_string()], None),
1155 );
1156
1157 let args = rt.build_oci_run_args(
1158 "container-id",
1159 &bundle,
1160 tmp.path(),
1161 GVisorOciRunOptions {
1162 network_mode: GVisorNetworkMode::Host,
1163 ignore_cgroups: true,
1164 runsc_rootless: false,
1165 require_supervisor_exec_policy: false,
1166 platform: GVisorPlatform::Systrap,
1167 },
1168 );
1169
1170 assert!(!args.iter().any(|arg| arg == "--rootless"));
1171 assert!(args.iter().any(|arg| arg == "--ignore-cgroups"));
1172 }
1173
1174 #[test]
1175 fn test_non_nix_runsc_is_staged_for_supervisor_exec_policy() {
1176 let tmp = tempfile::tempdir().unwrap();
1177 let fake_runsc = tmp.path().join("runsc-source");
1178 std::fs::write(&fake_runsc, b"fake-runsc").unwrap();
1179 std::fs::set_permissions(&fake_runsc, std::fs::Permissions::from_mode(0o500)).unwrap();
1180
1181 let rt = GVisorRuntime::with_path(fake_runsc.to_string_lossy().to_string());
1182 let runsc_root = tmp.path().join("runsc-root");
1183 let (program, allow_roots) = rt.prepare_supervisor_runsc_program(&runsc_root).unwrap();
1184
1185 assert!(program.starts_with(runsc_root.join("exec-allow")));
1186 assert_eq!(allow_roots, vec![runsc_root.join("exec-allow")]);
1187 assert_eq!(std::fs::read(&program).unwrap(), b"fake-runsc");
1188 let mode = std::fs::metadata(&program).unwrap().permissions().mode() & 0o777;
1189 assert_eq!(mode, 0o500);
1190 }
1191
1192 #[test]
1193 fn test_runsc_root_uses_hardened_artifact_dir_not_bundle_parent() {
1194 let _env_lock = EnvLock::acquire();
1195 let tmp = tempfile::tempdir().unwrap();
1196 let artifact_base = tmp.path().join("gvisor-artifacts");
1197 let _artifact_base = EnvVarGuard::set("NUCLEUS_GVISOR_ARTIFACT_BASE", &artifact_base);
1198 let _runtime = EnvVarGuard::remove("XDG_RUNTIME_DIR");
1199
1200 let bundle_parent = tmp.path().join("shared");
1201 std::fs::create_dir_all(&bundle_parent).unwrap();
1202 std::fs::set_permissions(&bundle_parent, std::fs::Permissions::from_mode(0o777)).unwrap();
1203 let bundle = OciBundle::new(
1204 bundle_parent.join("bundle"),
1205 OciConfig::new(vec!["/bin/true".to_string()], None),
1206 );
1207
1208 let runsc_root = GVisorRuntime::secure_runsc_root("container-id").unwrap();
1209
1210 assert!(runsc_root
1211 .starts_with(artifact_base.join(GVisorRuntime::runsc_state_component("container-id"))));
1212 assert!(
1213 !runsc_root.starts_with(bundle.bundle_path().parent().unwrap()),
1214 "runsc root must not be derived from a custom bundle parent"
1215 );
1216 }
1217
1218 #[test]
1219 fn test_runsc_staging_rejects_symlink_exec_allow_dir() {
1220 let tmp = tempfile::tempdir().unwrap();
1221 let fake_runsc = tmp.path().join("runsc-source");
1222 std::fs::write(&fake_runsc, b"fake-runsc").unwrap();
1223 std::fs::set_permissions(&fake_runsc, std::fs::Permissions::from_mode(0o500)).unwrap();
1224
1225 let runsc_root = tmp.path().join("runsc-root");
1226 std::fs::create_dir(&runsc_root).unwrap();
1227 std::fs::set_permissions(&runsc_root, std::fs::Permissions::from_mode(0o700)).unwrap();
1228 let victim_dir = tmp.path().join("victim");
1229 std::fs::create_dir(&victim_dir).unwrap();
1230 std::os::unix::fs::symlink(&victim_dir, runsc_root.join("exec-allow")).unwrap();
1231
1232 let rt = GVisorRuntime::with_path(fake_runsc.to_string_lossy().to_string());
1233 let err = rt
1234 .prepare_supervisor_runsc_program(&runsc_root)
1235 .unwrap_err()
1236 .to_string();
1237
1238 assert!(
1239 err.contains("Refusing symlink private runsc exec directory"),
1240 "unexpected error: {}",
1241 err
1242 );
1243 assert!(
1244 !victim_dir.join("runsc").exists(),
1245 "staging must not follow the exec-allow symlink"
1246 );
1247 }
1248
1249 #[test]
1250 fn test_runsc_owner_rejects_untrusted_non_store_owner() {
1251 assert!(!GVisorRuntime::is_trusted_runsc_owner(
1252 Path::new("/tmp/runsc"),
1253 4242,
1254 1000
1255 ));
1256 }
1257}