1use crate::{idmap, oslib, passthrough, util};
6use idmap::{GidMap, IdMapSetUpPipeMessage, UidMap};
7use std::ffi::CString;
8use std::fs::{self, File};
9use std::io::{Read, Write};
10use std::os::fd::OwnedFd;
11use std::os::unix::io::{AsRawFd, FromRawFd};
12use std::path::Path;
13use std::process::{self, Command};
14use std::str::FromStr;
15use std::{error, fmt, io};
16use vhost::vhost_user::Listener;
17
18#[derive(Debug)]
19pub enum Error {
20 BindMountProcSelfFd(io::Error),
22 BindMountSharedDir(io::Error),
24 ChdirOldRoot(io::Error),
26 ChdirNewRoot(io::Error),
28 Chroot(io::Error),
30 ChrootChdir(io::Error),
32 CleanMount(io::Error),
34 CreateTempDir(io::Error),
36 DropSupplementalGroups(io::Error),
38 Fork(io::Error),
40 GetSupplementalGroups(io::Error),
42 MountBind(io::Error),
44 MountOldRoot(io::Error),
46 MountProc(io::Error),
48 MountNewRoot(io::Error),
50 MountTarget(io::Error),
52 OpenMountinfo(io::Error),
54 OpenNewRoot(io::Error),
56 OpenOldRoot(io::Error),
58 StatNewRoot(io::Error),
60 StatOldRoot(io::Error),
62 OpenProcSelf(io::Error),
64 OpenProcSelfFd(io::Error),
66 PivotRoot(io::Error),
68 RmdirTempDir(io::Error),
70 UmountOldRoot(io::Error),
72 UmountTempDir(io::Error),
74 Unshare(io::Error),
76 WriteGidMap(String),
78 WriteSetGroups(io::Error),
80 WriteUidMap(String),
82 SandboxModeInvalidUID,
84 SandboxModeInvalidUidMap,
86 SandboxModeInvalidGidMap,
88}
89
90impl error::Error for Error {}
91
92impl fmt::Display for Error {
93 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94 use self::Error::{
95 SandboxModeInvalidGidMap, SandboxModeInvalidUID, SandboxModeInvalidUidMap, WriteGidMap,
96 WriteUidMap,
97 };
98 match self {
99 SandboxModeInvalidUID => {
100 write!(
101 f,
102 "sandbox mode 'chroot' can only be used by \
103 root (Use '--sandbox namespace' instead)"
104 )
105 }
106 SandboxModeInvalidUidMap => {
107 write!(
108 f,
109 "uid_map can only be used by unprivileged user where sandbox mod is namespace \
110 (Use '--sandbox namespace' instead)"
111 )
112 }
113 SandboxModeInvalidGidMap => {
114 write!(
115 f,
116 "gid_map can only be used by unprivileged user where sandbox mod is namespace \
117 (Use '--sandbox namespace' instead)"
118 )
119 }
120 WriteUidMap(msg) => write!(f, "write to uid map failed: {msg}"),
121 WriteGidMap(msg) => write!(f, "write to gid map failed: {msg}"),
122 _ => write!(f, "{self:?}"),
123 }
124 }
125}
126
127#[derive(Copy, Clone, Debug, PartialEq, Eq)]
129pub enum SandboxMode {
130 Namespace,
132 Chroot,
134 None,
136}
137
138impl FromStr for SandboxMode {
139 type Err = &'static str;
140 fn from_str(s: &str) -> Result<Self, Self::Err> {
141 match s.to_lowercase().as_str() {
142 "namespace" => Ok(SandboxMode::Namespace),
143 "chroot" => Ok(SandboxMode::Chroot),
144 "none" => Ok(SandboxMode::None),
145 _ => Err("Unknown sandbox mode"),
146 }
147 }
148}
149
150pub struct Sandbox {
152 shared_dir: String,
155 proc_self_fd: Option<File>,
157 mountinfo_fd: Option<File>,
159 sandbox_mode: SandboxMode,
161 uid_map: Vec<UidMap>,
163 gid_map: Vec<GidMap>,
165}
166
167impl Sandbox {
168 pub fn new(
169 shared_dir: String,
170 sandbox_mode: SandboxMode,
171 uid_map: Vec<UidMap>,
172 gid_map: Vec<GidMap>,
173 ) -> io::Result<Self> {
174 let shared_dir_rp = fs::canonicalize(shared_dir)?;
175 let shared_dir_rp_str = shared_dir_rp
176 .to_str()
177 .ok_or_else(|| io::Error::from_raw_os_error(libc::EINVAL))?;
178
179 Ok(Sandbox {
180 shared_dir: shared_dir_rp_str.into(),
181 proc_self_fd: None,
182 mountinfo_fd: None,
183 sandbox_mode,
184 uid_map,
185 gid_map,
186 })
187 }
188
189 fn setup_mounts(&mut self) -> Result<(), Error> {
199 let c_proc_self = CString::new("/proc/self").unwrap();
203 let proc_self_raw = unsafe { libc::open(c_proc_self.as_ptr(), libc::O_PATH) };
204 if proc_self_raw < 0 {
205 return Err(Error::OpenProcSelf(std::io::Error::last_os_error()));
206 }
207
208 let proc_self = unsafe { File::from_raw_fd(proc_self_raw) };
211
212 oslib::mount(None, "/", None, libc::MS_SLAVE | libc::MS_REC).map_err(Error::CleanMount)?;
215
216 oslib::mount(
218 "proc".into(),
219 "/proc",
220 "proc".into(),
221 libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID | libc::MS_RELATIME,
222 )
223 .map_err(Error::MountProc)?;
224
225 oslib::mount("/proc/self/fd".into(), "/proc", None, libc::MS_BIND)
228 .map_err(Error::BindMountProcSelfFd)?;
229
230 let c_proc_dir = CString::new("/proc").unwrap();
232 let proc_self_fd = unsafe { libc::open(c_proc_dir.as_ptr(), libc::O_PATH) };
233 if proc_self_fd < 0 {
234 return Err(Error::OpenProcSelfFd(std::io::Error::last_os_error()));
235 }
236 self.proc_self_fd = Some(unsafe { File::from_raw_fd(proc_self_fd) });
238
239 oslib::mount(
241 self.shared_dir.as_str().into(),
242 self.shared_dir.as_str(),
243 None,
244 libc::MS_BIND | libc::MS_REC,
245 )
246 .map_err(Error::BindMountSharedDir)?;
247
248 let c_root_dir = CString::new("/").unwrap();
250 let oldroot_fd = unsafe {
251 libc::open(
252 c_root_dir.as_ptr(),
253 libc::O_DIRECTORY | libc::O_RDONLY | libc::O_CLOEXEC,
254 )
255 };
256 if oldroot_fd < 0 {
257 return Err(Error::OpenOldRoot(std::io::Error::last_os_error()));
258 }
259
260 let c_shared_dir = CString::new(self.shared_dir.clone()).unwrap();
262 let newroot_fd = unsafe {
263 libc::open(
264 c_shared_dir.as_ptr(),
265 libc::O_DIRECTORY | libc::O_RDONLY | libc::O_CLOEXEC,
266 )
267 };
268 if newroot_fd < 0 {
269 return Err(Error::OpenNewRoot(std::io::Error::last_os_error()));
270 }
271
272 oslib::fchdir(newroot_fd).map_err(Error::ChdirNewRoot)?;
274
275 let old_st = passthrough::statx(&oldroot_fd, None).map_err(Error::StatOldRoot)?;
277 let new_st = passthrough::statx(&newroot_fd, None).map_err(Error::StatNewRoot)?;
278 let switch_to_current_rootfs = (old_st.mnt_id == new_st.mnt_id)
279 && (old_st.st.st_dev == new_st.st.st_dev)
280 && (old_st.st.st_ino == new_st.st.st_ino);
281
282 if !switch_to_current_rootfs {
283 let c_current_dir = CString::new(".").unwrap();
285 let ret = unsafe {
286 libc::syscall(
287 libc::SYS_pivot_root,
288 c_current_dir.as_ptr(),
289 c_current_dir.as_ptr(),
290 )
291 };
292 if ret < 0 {
293 return Err(Error::PivotRoot(std::io::Error::last_os_error()));
294 }
295
296 oslib::fchdir(oldroot_fd).map_err(Error::ChdirOldRoot)?;
298
299 oslib::mount(None, ".", None, libc::MS_SLAVE | libc::MS_REC)
301 .map_err(Error::CleanMount)?;
302
303 oslib::umount2(".", libc::MNT_DETACH).map_err(Error::UmountOldRoot)?;
305
306 oslib::fchdir(newroot_fd).map_err(Error::ChdirNewRoot)?;
308 }
309
310 unsafe { libc::close(newroot_fd) };
312 unsafe { libc::close(oldroot_fd) };
313
314 let c_mountinfo = CString::new("mountinfo").unwrap();
316 let mountinfo_fd =
317 unsafe { libc::openat(proc_self.as_raw_fd(), c_mountinfo.as_ptr(), libc::O_RDONLY) };
318 if mountinfo_fd < 0 {
319 return Err(Error::OpenMountinfo(std::io::Error::last_os_error()));
320 }
321 self.mountinfo_fd = Some(unsafe { File::from_raw_fd(mountinfo_fd) });
323
324 Ok(())
325 }
326
327 fn setup_id_mappings(
329 &self,
330 uid_map: &[UidMap],
331 gid_map: &[GidMap],
332 pid: i32,
333 ) -> Result<(), Error> {
334 let current_uid = unsafe { libc::geteuid() };
335 let current_gid = unsafe { libc::getegid() };
336
337 let default_uid_map = vec![UidMap {
339 outside_uid: current_uid,
340 inside_uid: current_uid,
341 count: 1,
342 }];
343 let uid_map = if uid_map.is_empty() {
344 &default_uid_map
345 } else {
346 uid_map
347 };
348
349 let default_gid_map = vec![GidMap {
351 outside_gid: current_gid,
352 inside_gid: current_gid,
353 count: 1,
354 }];
355 let gid_map = if gid_map.is_empty() {
356 &default_gid_map
357 } else {
358 gid_map
359 };
360
361 if uid_map.len() != 1 || uid_map[0].outside_uid != current_uid || uid_map[0].count > 1 {
366 let mut newuidmap = Command::new("newuidmap");
367 newuidmap.arg(pid.to_string());
368 for entry in uid_map.iter() {
369 newuidmap.arg(entry.inside_uid.to_string());
370 newuidmap.arg(entry.outside_uid.to_string());
371 newuidmap.arg(entry.count.to_string());
372 }
373 let output = newuidmap.output().map_err(|_| {
374 Error::WriteUidMap(format!(
375 "failed to execute newuidmap: {}",
376 io::Error::last_os_error()
377 ))
378 })?;
379 if !output.status.success() {
380 return Err(Error::WriteUidMap(
381 String::from_utf8_lossy(&output.stderr).to_string(),
382 ));
383 }
384 } else {
385 std::fs::write(
387 format!("/proc/{pid}/uid_map"),
388 format!("{} {} 1", uid_map[0].inside_uid, uid_map[0].outside_uid),
389 )
390 .map_err(|e| Error::WriteUidMap(e.to_string()))?;
391 }
392
393 if gid_map.len() != 1 || gid_map[0].outside_gid != current_gid || gid_map[0].count > 1 {
394 let mut newgidmap = Command::new("newgidmap");
395 newgidmap.arg(pid.to_string());
396 for entry in gid_map.iter() {
397 newgidmap.arg(entry.inside_gid.to_string());
398 newgidmap.arg(entry.outside_gid.to_string());
399 newgidmap.arg(entry.count.to_string());
400 }
401 let output = newgidmap.output().map_err(|_| {
402 Error::WriteGidMap(format!(
403 "failed to execute newgidmap: {}",
404 io::Error::last_os_error()
405 ))
406 })?;
407 if !output.status.success() {
408 return Err(Error::WriteGidMap(
409 String::from_utf8_lossy(&output.stderr).to_string(),
410 ));
411 }
412 } else {
413 std::fs::write(format!("/proc/{pid}/setgroups"), b"deny")
415 .map_err(|e| Error::WriteGidMap(e.to_string()))?;
416 std::fs::write(
417 format!("/proc/{pid}/gid_map"),
418 format!("{} {} 1", gid_map[0].inside_gid, gid_map[0].outside_gid),
419 )
420 .map_err(|e| Error::WriteGidMap(e.to_string()))?;
421 }
422 Ok(())
423 }
424
425 pub fn enter_namespace(&mut self, listener: Listener) -> Result<Listener, Error> {
426 let uid = unsafe { libc::geteuid() };
427
428 let flags = if uid == 0 {
429 libc::CLONE_NEWPID | libc::CLONE_NEWNS | libc::CLONE_NEWNET
430 } else {
431 libc::CLONE_NEWPID | libc::CLONE_NEWNS | libc::CLONE_NEWNET | libc::CLONE_NEWUSER
433 };
434
435 let (mut x_reader, mut x_writer) = oslib::pipe().unwrap();
436 let (mut y_reader, mut y_writer) = oslib::pipe().unwrap();
437
438 let pid = util::sfork().map_err(Error::Fork)?;
439 let mut output = [0];
440
441 if pid == 0 {
446 drop(x_writer);
449 drop(y_reader);
450
451 x_reader.read_exact(&mut output).unwrap();
453 assert_eq!(output[0], IdMapSetUpPipeMessage::Request as u8);
454
455 if uid != 0 {
457 let ppid = unsafe { libc::getppid() };
458 if let Err(error) = self.setup_id_mappings(&self.uid_map, &self.gid_map, ppid) {
459 drop(x_reader);
463 drop(y_writer);
464 error!("sandbox: couldn't setup id mappings: {error}");
465 process::exit(1);
466 };
467 }
468
469 y_writer
471 .write_all(&[IdMapSetUpPipeMessage::Done as u8])
472 .unwrap_or_else(|_| process::exit(1));
473
474 process::exit(0);
476 } else {
477 let ret = unsafe { libc::unshare(flags) };
479 if ret != 0 {
480 return Err(Error::Unshare(std::io::Error::last_os_error()));
481 }
482
483 drop(x_reader);
485 drop(y_writer);
486
487 x_writer
489 .write_all(&[IdMapSetUpPipeMessage::Request as u8])
490 .unwrap();
491
492 y_reader
496 .read_exact(&mut output)
497 .unwrap_or_else(|_| process::exit(1));
498 assert_eq!(output[0], IdMapSetUpPipeMessage::Done as u8);
499
500 let mut status = 0_i32;
501 let _ = unsafe { libc::waitpid(pid, &mut status, 0) };
502
503 let mut ret = unsafe { libc::setresuid(0, 0, 0) };
505 if ret != 0 {
506 warn!("Couldn't set the process uid as root: {ret}");
507 }
508 ret = unsafe { libc::setresgid(0, 0, 0) };
509 if ret != 0 {
510 warn!("Couldn't set the process gid as root: {ret}");
511 }
512
513 let child = util::sfork().map_err(Error::Fork)?;
514 if child == 0 {
515 self.setup_mounts()?;
517 Ok(listener)
518 } else {
519 let fd = listener.as_raw_fd();
529
530 std::mem::forget(listener);
532
533 let fd = unsafe { OwnedFd::from_raw_fd(fd) };
536 drop(fd);
537
538 util::wait_for_child(child); }
540 }
541 }
542
543 pub fn enter_chroot(&mut self) -> Result<(), Error> {
544 let c_proc_self_fd = CString::new("/proc/self/fd").unwrap();
545 let proc_self_fd = unsafe { libc::open(c_proc_self_fd.as_ptr(), libc::O_PATH) };
546 if proc_self_fd < 0 {
547 return Err(Error::OpenProcSelfFd(std::io::Error::last_os_error()));
548 }
549 self.proc_self_fd = Some(unsafe { File::from_raw_fd(proc_self_fd) });
551
552 let c_mountinfo = CString::new("/proc/self/mountinfo").unwrap();
553 let mountinfo_fd = unsafe { libc::open(c_mountinfo.as_ptr(), libc::O_RDONLY) };
554 if mountinfo_fd < 0 {
555 return Err(Error::OpenMountinfo(std::io::Error::last_os_error()));
556 }
557 self.mountinfo_fd = Some(unsafe { File::from_raw_fd(mountinfo_fd) });
559
560 let c_shared_dir = CString::new(self.shared_dir.clone()).unwrap();
561 let ret = unsafe { libc::chroot(c_shared_dir.as_ptr()) };
562 if ret != 0 {
563 return Err(Error::Chroot(std::io::Error::last_os_error()));
564 }
565
566 let c_root_dir = CString::new("/").unwrap();
567 let ret = unsafe { libc::chdir(c_root_dir.as_ptr()) };
568 if ret != 0 {
569 return Err(Error::ChrootChdir(std::io::Error::last_os_error()));
570 }
571
572 Ok(())
573 }
574
575 fn must_drop_supplemental_groups(&self) -> Result<bool, Error> {
576 let uid = unsafe { libc::geteuid() };
577 if uid != 0 {
578 return Ok(false);
579 }
580
581 if !Path::new("/proc/self/ns/user").exists() {
584 return Ok(true);
585 }
586
587 let uid_mmap_data =
588 fs::read_to_string("/proc/self/uid_map").map_err(Error::DropSupplementalGroups)?;
589 let uid_map: Vec<_> = uid_mmap_data.split_whitespace().collect();
590
591 let gid_map_data =
592 fs::read_to_string("/proc/self/gid_map").map_err(Error::DropSupplementalGroups)?;
593 let gid_map: Vec<_> = gid_map_data.split_whitespace().collect();
594
595 let setgroups =
596 fs::read_to_string("/proc/self/setgroups").map_err(Error::DropSupplementalGroups)?;
597
598 let single_uid_mapping = uid_map.len() == 3 && uid_map[2] == "1";
601 let single_gid_mapping = gid_map.len() == 3 && gid_map[2] == "1";
602
603 Ok(setgroups.trim() != "deny" || !single_uid_mapping || !single_gid_mapping)
604 }
605
606 fn drop_supplemental_groups(&self) -> Result<(), Error> {
607 let ngroups = unsafe { libc::getgroups(0, std::ptr::null_mut()) };
608 if ngroups < 0 {
609 return Err(Error::GetSupplementalGroups(std::io::Error::last_os_error()));
610 } else if ngroups != 0 {
611 let ret = unsafe { libc::setgroups(0, std::ptr::null()) };
612 if ret != 0 {
613 return Err(Error::DropSupplementalGroups(
614 std::io::Error::last_os_error(),
615 ));
616 }
617 }
618
619 Ok(())
620 }
621
622 pub fn enter(&mut self, listener: Listener) -> Result<Listener, Error> {
624 let uid = unsafe { libc::geteuid() };
625 if uid != 0 && self.sandbox_mode == SandboxMode::Chroot {
626 return Err(Error::SandboxModeInvalidUID);
627 }
628
629 if !self.uid_map.is_empty() && (uid == 0 || self.sandbox_mode != SandboxMode::Namespace) {
630 return Err(Error::SandboxModeInvalidUidMap);
631 }
632
633 if !self.gid_map.is_empty() && (uid == 0 || self.sandbox_mode != SandboxMode::Namespace) {
634 return Err(Error::SandboxModeInvalidGidMap);
635 }
636
637 let must_drop_supplemental_groups = match self.must_drop_supplemental_groups() {
646 Ok(must_drop) => must_drop,
647 Err(error) => {
648 warn!(
649 "Failed to determine whether supplemental groups must be dropped: {error}; \
650 defaulting to trying to drop supplemental groups"
651 );
652 true
653 }
654 };
655
656 if must_drop_supplemental_groups {
657 self.drop_supplemental_groups()?;
658 }
659
660 match self.sandbox_mode {
661 SandboxMode::Namespace => self.enter_namespace(listener),
662 SandboxMode::Chroot => self.enter_chroot().and(Ok(listener)),
663 SandboxMode::None => Ok(listener),
664 }
665 }
666
667 pub fn get_proc_self_fd(&mut self) -> Option<File> {
668 self.proc_self_fd.take()
669 }
670
671 pub fn get_mountinfo_fd(&mut self) -> Option<File> {
672 self.mountinfo_fd.take()
673 }
674
675 pub fn get_root_dir(&self) -> String {
676 match self.sandbox_mode {
677 SandboxMode::Namespace | SandboxMode::Chroot => "/".to_string(),
678 SandboxMode::None => self.shared_dir.clone(),
679 }
680 }
681
682 pub fn get_mountinfo_prefix(&self) -> Option<String> {
685 match self.sandbox_mode {
686 SandboxMode::Namespace | SandboxMode::None => None,
687 SandboxMode::Chroot => Some(self.shared_dir.clone()),
688 }
689 }
690}