virtiofsd/
sandbox.rs

1// Copyright 2020 Red Hat, Inc. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{idmap, oslib, passthrough, util};
6use idmap::{GidMap, IdMapSetUpPipeMessage, UidMap};
7use std::ffi::CString;
8use std::fs::{self, File};
9use std::io::{Read, Write};
10use std::os::fd::OwnedFd;
11use std::os::unix::io::{AsRawFd, FromRawFd};
12use std::path::Path;
13use std::process::{self, Command};
14use std::str::FromStr;
15use std::{error, fmt, io};
16use vhost::vhost_user::Listener;
17
18#[derive(Debug)]
19pub enum Error {
20    /// Failed to bind mount `/proc/self/fd` into a temporary directory.
21    BindMountProcSelfFd(io::Error),
22    /// Failed to bind mount shared directory.
23    BindMountSharedDir(io::Error),
24    /// Failed to change to the old root directory.
25    ChdirOldRoot(io::Error),
26    /// Failed to change to the new root directory.
27    ChdirNewRoot(io::Error),
28    /// Call to libc::chroot returned an error.
29    Chroot(io::Error),
30    /// Failed to change to the root directory after the chroot call.
31    ChrootChdir(io::Error),
32    /// Failed to clean the properties of the mount point.
33    CleanMount(io::Error),
34    /// Failed to create a temporary directory.
35    CreateTempDir(io::Error),
36    /// Failed to drop supplemental groups.
37    DropSupplementalGroups(io::Error),
38    /// Call to libc::fork returned an error.
39    Fork(io::Error),
40    /// Failed to get the number of supplemental groups.
41    GetSupplementalGroups(io::Error),
42    /// Error bind-mounting a directory.
43    MountBind(io::Error),
44    /// Failed to mount old root.
45    MountOldRoot(io::Error),
46    /// Error mounting proc.
47    MountProc(io::Error),
48    /// Failed to mount new root.
49    MountNewRoot(io::Error),
50    /// Error mounting target directory.
51    MountTarget(io::Error),
52    /// Failed to open `/proc/self/mountinfo`.
53    OpenMountinfo(io::Error),
54    /// Failed to open new root.
55    OpenNewRoot(io::Error),
56    /// Failed to open old root.
57    OpenOldRoot(io::Error),
58    /// Failed to stat new root.
59    StatNewRoot(io::Error),
60    /// Failed to stat old root.
61    StatOldRoot(io::Error),
62    /// Failed to open `/proc/self`.
63    OpenProcSelf(io::Error),
64    /// Failed to open `/proc/self/fd`.
65    OpenProcSelfFd(io::Error),
66    /// Error switching root directory.
67    PivotRoot(io::Error),
68    /// Failed to remove temporary directory.
69    RmdirTempDir(io::Error),
70    /// Failed to lazily unmount old root.
71    UmountOldRoot(io::Error),
72    /// Failed to lazily unmount temporary directory.
73    UmountTempDir(io::Error),
74    /// Call to libc::unshare returned an error.
75    Unshare(io::Error),
76    /// Failed to execute `newgidmap(1)`.
77    WriteGidMap(String),
78    /// Failed to write to `/proc/self/setgroups`.
79    WriteSetGroups(io::Error),
80    /// Failed to execute `newuidmap(1)`.
81    WriteUidMap(String),
82    /// Sandbox mode unavailable for non-privileged users
83    SandboxModeInvalidUID,
84    /// Setting uid_map is only allowed inside a namespace for non-privileged users
85    SandboxModeInvalidUidMap,
86    /// Setting gid_map is only allowed inside a namespace for non-privileged users
87    SandboxModeInvalidGidMap,
88}
89
90impl error::Error for Error {}
91
92impl fmt::Display for Error {
93    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
94        use self::Error::{
95            SandboxModeInvalidGidMap, SandboxModeInvalidUID, SandboxModeInvalidUidMap, WriteGidMap,
96            WriteUidMap,
97        };
98        match self {
99            SandboxModeInvalidUID => {
100                write!(
101                    f,
102                    "sandbox mode 'chroot' can only be used by \
103                    root (Use '--sandbox namespace' instead)"
104                )
105            }
106            SandboxModeInvalidUidMap => {
107                write!(
108                    f,
109                    "uid_map can only be used by unprivileged user where sandbox mod is namespace \
110                    (Use '--sandbox namespace' instead)"
111                )
112            }
113            SandboxModeInvalidGidMap => {
114                write!(
115                    f,
116                    "gid_map can only be used by unprivileged user where sandbox mod is namespace \
117                    (Use '--sandbox namespace' instead)"
118                )
119            }
120            WriteUidMap(msg) => write!(f, "write to uid map failed: {msg}"),
121            WriteGidMap(msg) => write!(f, "write to gid map failed: {msg}"),
122            _ => write!(f, "{self:?}"),
123        }
124    }
125}
126
127/// Mechanism to be used for setting up the sandbox.
128#[derive(Copy, Clone, Debug, PartialEq, Eq)]
129pub enum SandboxMode {
130    /// Create the sandbox using Linux namespaces.
131    Namespace,
132    /// Create the sandbox using chroot.
133    Chroot,
134    /// Don't attempt to isolate the process inside a sandbox.
135    None,
136}
137
138impl FromStr for SandboxMode {
139    type Err = &'static str;
140    fn from_str(s: &str) -> Result<Self, Self::Err> {
141        match s.to_lowercase().as_str() {
142            "namespace" => Ok(SandboxMode::Namespace),
143            "chroot" => Ok(SandboxMode::Chroot),
144            "none" => Ok(SandboxMode::None),
145            _ => Err("Unknown sandbox mode"),
146        }
147    }
148}
149
150/// A helper for creating a sandbox for isolating the service.
151pub struct Sandbox {
152    /// The directory that is going to be shared with the VM. The sandbox will be constructed on top
153    /// of this directory.
154    shared_dir: String,
155    /// A `File` object for `/proc/self/fd` obtained from the sandboxed context.
156    proc_self_fd: Option<File>,
157    /// A `File` object for `/proc/self/mountinfo` obtained from the sandboxed context.
158    mountinfo_fd: Option<File>,
159    /// Mechanism to be used for setting up the sandbox.
160    sandbox_mode: SandboxMode,
161    /// UidMap to be used for `newuidmap(1)` command line arguments
162    uid_map: Vec<UidMap>,
163    /// GidMap to be used for `newgidmap(1)` command line arguments
164    gid_map: Vec<GidMap>,
165}
166
167impl Sandbox {
168    pub fn new(
169        shared_dir: String,
170        sandbox_mode: SandboxMode,
171        uid_map: Vec<UidMap>,
172        gid_map: Vec<GidMap>,
173    ) -> io::Result<Self> {
174        let shared_dir_rp = fs::canonicalize(shared_dir)?;
175        let shared_dir_rp_str = shared_dir_rp
176            .to_str()
177            .ok_or_else(|| io::Error::from_raw_os_error(libc::EINVAL))?;
178
179        Ok(Sandbox {
180            shared_dir: shared_dir_rp_str.into(),
181            proc_self_fd: None,
182            mountinfo_fd: None,
183            sandbox_mode,
184            uid_map,
185            gid_map,
186        })
187    }
188
189    // Make `self.shared_dir` our root directory, and get isolated file descriptors for
190    // `/proc/self/fd` and '/proc/self/mountinfo`.
191    //
192    // This is based on virtiofsd's setup_namespaces() and setup_mounts(), and it's very similar to
193    // the strategy used in containers. Consists on a careful sequence of mounts and bind-mounts to
194    // ensure it's not possible to escape the sandbox through `self.shared_dir` nor the file
195    // descriptor obtained for `/proc/self/fd`.
196    //
197    // It's ugly, but it's the only way until Linux implements a proper containerization API.
198    fn setup_mounts(&mut self) -> Result<(), Error> {
199        // Open an FD to `/proc/self` so we can later open `/proc/self/mountinfo`.
200        // (If we opened `/proc/self/mountinfo` now, it would appear empty by the end of this
201        // function, which is why we need to defer opening it until then.)
202        let c_proc_self = CString::new("/proc/self").unwrap();
203        let proc_self_raw = unsafe { libc::open(c_proc_self.as_ptr(), libc::O_PATH) };
204        if proc_self_raw < 0 {
205            return Err(Error::OpenProcSelf(std::io::Error::last_os_error()));
206        }
207
208        // Encapsulate the `/proc/self` FD in a `File` object so it is closed when this function
209        // returns
210        let proc_self = unsafe { File::from_raw_fd(proc_self_raw) };
211
212        // Ensure our mount changes don't affect the parent mount namespace.
213
214        oslib::mount(None, "/", None, libc::MS_SLAVE | libc::MS_REC).map_err(Error::CleanMount)?;
215
216        // Mount `/proc` in this context.
217        oslib::mount(
218            "proc".into(),
219            "/proc",
220            "proc".into(),
221            libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID | libc::MS_RELATIME,
222        )
223        .map_err(Error::MountProc)?;
224
225        // Bind-mount `/proc/self/fd` onto /proc preventing access to ancestor
226        // directories.
227        oslib::mount("/proc/self/fd".into(), "/proc", None, libc::MS_BIND)
228            .map_err(Error::BindMountProcSelfFd)?;
229
230        // Obtain a file descriptor to /proc/self/fd/ by opening bind-mounted /proc directory.
231        let c_proc_dir = CString::new("/proc").unwrap();
232        let proc_self_fd = unsafe { libc::open(c_proc_dir.as_ptr(), libc::O_PATH) };
233        if proc_self_fd < 0 {
234            return Err(Error::OpenProcSelfFd(std::io::Error::last_os_error()));
235        }
236        // Safe because we just opened this fd.
237        self.proc_self_fd = Some(unsafe { File::from_raw_fd(proc_self_fd) });
238
239        // Bind-mount `self.shared_dir` on itself so we can use as new root on `pivot_root` syscall.
240        oslib::mount(
241            self.shared_dir.as_str().into(),
242            self.shared_dir.as_str(),
243            None,
244            libc::MS_BIND | libc::MS_REC,
245        )
246        .map_err(Error::BindMountSharedDir)?;
247
248        // Get a file descriptor to our old root so we can reference it after switching root.
249        let c_root_dir = CString::new("/").unwrap();
250        let oldroot_fd = unsafe {
251            libc::open(
252                c_root_dir.as_ptr(),
253                libc::O_DIRECTORY | libc::O_RDONLY | libc::O_CLOEXEC,
254            )
255        };
256        if oldroot_fd < 0 {
257            return Err(Error::OpenOldRoot(std::io::Error::last_os_error()));
258        }
259
260        // Get a file descriptor to the new root so we can reference it after switching root.
261        let c_shared_dir = CString::new(self.shared_dir.clone()).unwrap();
262        let newroot_fd = unsafe {
263            libc::open(
264                c_shared_dir.as_ptr(),
265                libc::O_DIRECTORY | libc::O_RDONLY | libc::O_CLOEXEC,
266            )
267        };
268        if newroot_fd < 0 {
269            return Err(Error::OpenNewRoot(std::io::Error::last_os_error()));
270        }
271
272        // Change to new root directory to prepare for `pivot_root` syscall.
273        oslib::fchdir(newroot_fd).map_err(Error::ChdirNewRoot)?;
274
275        // Check if we are supposed to switch to our current rootfs
276        let old_st = passthrough::statx(&oldroot_fd, None).map_err(Error::StatOldRoot)?;
277        let new_st = passthrough::statx(&newroot_fd, None).map_err(Error::StatNewRoot)?;
278        let switch_to_current_rootfs = (old_st.mnt_id == new_st.mnt_id)
279            && (old_st.st.st_dev == new_st.st.st_dev)
280            && (old_st.st.st_ino == new_st.st.st_ino);
281
282        if !switch_to_current_rootfs {
283            // Call to `pivot_root` using `.` as both new and old root.
284            let c_current_dir = CString::new(".").unwrap();
285            let ret = unsafe {
286                libc::syscall(
287                    libc::SYS_pivot_root,
288                    c_current_dir.as_ptr(),
289                    c_current_dir.as_ptr(),
290                )
291            };
292            if ret < 0 {
293                return Err(Error::PivotRoot(std::io::Error::last_os_error()));
294            }
295
296            // Change to old root directory to prepare for cleaning up and unmounting it.
297            oslib::fchdir(oldroot_fd).map_err(Error::ChdirOldRoot)?;
298
299            // Clean up old root to avoid mount namespace propagation.
300            oslib::mount(None, ".", None, libc::MS_SLAVE | libc::MS_REC)
301                .map_err(Error::CleanMount)?;
302
303            // Lazily unmount old root.
304            oslib::umount2(".", libc::MNT_DETACH).map_err(Error::UmountOldRoot)?;
305
306            // Change to new root.
307            oslib::fchdir(newroot_fd).map_err(Error::ChdirNewRoot)?;
308        }
309
310        // We no longer need these file descriptors, so close them.
311        unsafe { libc::close(newroot_fd) };
312        unsafe { libc::close(oldroot_fd) };
313
314        // Open `/proc/self/mountinfo` now
315        let c_mountinfo = CString::new("mountinfo").unwrap();
316        let mountinfo_fd =
317            unsafe { libc::openat(proc_self.as_raw_fd(), c_mountinfo.as_ptr(), libc::O_RDONLY) };
318        if mountinfo_fd < 0 {
319            return Err(Error::OpenMountinfo(std::io::Error::last_os_error()));
320        }
321        // Safe because we just opened this fd.
322        self.mountinfo_fd = Some(unsafe { File::from_raw_fd(mountinfo_fd) });
323
324        Ok(())
325    }
326
327    /// Sets mappings for the given uid and gid.
328    fn setup_id_mappings(
329        &self,
330        uid_map: &[UidMap],
331        gid_map: &[GidMap],
332        pid: i32,
333    ) -> Result<(), Error> {
334        let current_uid = unsafe { libc::geteuid() };
335        let current_gid = unsafe { libc::getegid() };
336
337        // Take uid map or set up a 1-to-1 mapping for our current euid.
338        let default_uid_map = vec![UidMap {
339            outside_uid: current_uid,
340            inside_uid: current_uid,
341            count: 1,
342        }];
343        let uid_map = if uid_map.is_empty() {
344            &default_uid_map
345        } else {
346            uid_map
347        };
348
349        // Take gid map or set up a 1-to-1 mapping for our current gid.
350        let default_gid_map = vec![GidMap {
351            outside_gid: current_gid,
352            inside_gid: current_gid,
353            count: 1,
354        }];
355        let gid_map = if gid_map.is_empty() {
356            &default_gid_map
357        } else {
358            gid_map
359        };
360
361        // Unprivileged user can not set any mapping without any restriction.
362        // Therefore, newuidmap/newgidmap is used instead of writing directly
363        // into proc/[pid]/{uid,gid}_map if a potentially privileged action is
364        // requested (outside {u,g}id != e{u,g}id or count > 1).
365        if uid_map.len() != 1 || uid_map[0].outside_uid != current_uid || uid_map[0].count > 1 {
366            let mut newuidmap = Command::new("newuidmap");
367            newuidmap.arg(pid.to_string());
368            for entry in uid_map.iter() {
369                newuidmap.arg(entry.inside_uid.to_string());
370                newuidmap.arg(entry.outside_uid.to_string());
371                newuidmap.arg(entry.count.to_string());
372            }
373            let output = newuidmap.output().map_err(|_| {
374                Error::WriteUidMap(format!(
375                    "failed to execute newuidmap: {}",
376                    io::Error::last_os_error()
377                ))
378            })?;
379            if !output.status.success() {
380                return Err(Error::WriteUidMap(
381                    String::from_utf8_lossy(&output.stderr).to_string(),
382                ));
383            }
384        } else {
385            // Unprivileged part, we can driectly write to /proc/[pid]/uid_map.
386            std::fs::write(
387                format!("/proc/{pid}/uid_map"),
388                format!("{} {} 1", uid_map[0].inside_uid, uid_map[0].outside_uid),
389            )
390            .map_err(|e| Error::WriteUidMap(e.to_string()))?;
391        }
392
393        if gid_map.len() != 1 || gid_map[0].outside_gid != current_gid || gid_map[0].count > 1 {
394            let mut newgidmap = Command::new("newgidmap");
395            newgidmap.arg(pid.to_string());
396            for entry in gid_map.iter() {
397                newgidmap.arg(entry.inside_gid.to_string());
398                newgidmap.arg(entry.outside_gid.to_string());
399                newgidmap.arg(entry.count.to_string());
400            }
401            let output = newgidmap.output().map_err(|_| {
402                Error::WriteGidMap(format!(
403                    "failed to execute newgidmap: {}",
404                    io::Error::last_os_error()
405                ))
406            })?;
407            if !output.status.success() {
408                return Err(Error::WriteGidMap(
409                    String::from_utf8_lossy(&output.stderr).to_string(),
410                ));
411            }
412        } else {
413            // Unprivileged part, we can driectly write to /proc/[pid]/gid_map.
414            std::fs::write(format!("/proc/{pid}/setgroups"), b"deny")
415                .map_err(|e| Error::WriteGidMap(e.to_string()))?;
416            std::fs::write(
417                format!("/proc/{pid}/gid_map"),
418                format!("{} {} 1", gid_map[0].inside_gid, gid_map[0].outside_gid),
419            )
420            .map_err(|e| Error::WriteGidMap(e.to_string()))?;
421        }
422        Ok(())
423    }
424
425    pub fn enter_namespace(&mut self, listener: Listener) -> Result<Listener, Error> {
426        let uid = unsafe { libc::geteuid() };
427
428        let flags = if uid == 0 {
429            libc::CLONE_NEWPID | libc::CLONE_NEWNS | libc::CLONE_NEWNET
430        } else {
431            // If running as an unprivileged user, rely on user_namespaces(7) for isolation.
432            libc::CLONE_NEWPID | libc::CLONE_NEWNS | libc::CLONE_NEWNET | libc::CLONE_NEWUSER
433        };
434
435        let (mut x_reader, mut x_writer) = oslib::pipe().unwrap();
436        let (mut y_reader, mut y_writer) = oslib::pipe().unwrap();
437
438        let pid = util::sfork().map_err(Error::Fork)?;
439        let mut output = [0];
440
441        // First child is only responsible to setup id mapping
442        // from outside of the main thread's namespace.
443        // Pipe is used for synchronization between the main thread and the first child.
444        // That will guarantee the mapping is done before the main thread gets running.
445        if pid == 0 {
446            // First child
447            // Dropping the other end of the pipes
448            drop(x_writer);
449            drop(y_reader);
450
451            // This is waiting until unshare() returns
452            x_reader.read_exact(&mut output).unwrap();
453            assert_eq!(output[0], IdMapSetUpPipeMessage::Request as u8);
454
455            // Setup uid/gid mappings
456            if uid != 0 {
457                let ppid = unsafe { libc::getppid() };
458                if let Err(error) = self.setup_id_mappings(&self.uid_map, &self.gid_map, ppid) {
459                    // We don't really need to close the pipes here, since the OS will close the FDs
460                    // after the process exits. But let's do it explicitly to signal an error to the
461                    // other end of the pipe.
462                    drop(x_reader);
463                    drop(y_writer);
464                    error!("sandbox: couldn't setup id mappings: {error}");
465                    process::exit(1);
466                };
467            }
468
469            // Signal that mapping is done
470            y_writer
471                .write_all(&[IdMapSetUpPipeMessage::Done as u8])
472                .unwrap_or_else(|_| process::exit(1));
473
474            // Terminate this child
475            process::exit(0);
476        } else {
477            // This is the parent
478            let ret = unsafe { libc::unshare(flags) };
479            if ret != 0 {
480                return Err(Error::Unshare(std::io::Error::last_os_error()));
481            }
482
483            // Dropping the other end of the pipes
484            drop(x_reader);
485            drop(y_writer);
486
487            // Signal the first child to go ahead and setup the id mappings
488            x_writer
489                .write_all(&[IdMapSetUpPipeMessage::Request as u8])
490                .unwrap();
491
492            // Receive the signal that mapping is done. If the child process exits
493            // before setting up the mapping, closing the pipe before sending the
494            // message, `read_exact()` will fail with `UnexpectedEof`.
495            y_reader
496                .read_exact(&mut output)
497                .unwrap_or_else(|_| process::exit(1));
498            assert_eq!(output[0], IdMapSetUpPipeMessage::Done as u8);
499
500            let mut status = 0_i32;
501            let _ = unsafe { libc::waitpid(pid, &mut status, 0) };
502
503            // Set the process inside the user namespace as root
504            let mut ret = unsafe { libc::setresuid(0, 0, 0) };
505            if ret != 0 {
506                warn!("Couldn't set the process uid as root: {ret}");
507            }
508            ret = unsafe { libc::setresgid(0, 0, 0) };
509            if ret != 0 {
510                warn!("Couldn't set the process gid as root: {ret}");
511            }
512
513            let child = util::sfork().map_err(Error::Fork)?;
514            if child == 0 {
515                // Second child
516                self.setup_mounts()?;
517                Ok(listener)
518            } else {
519                // This is the parent
520
521                // The child process drops the `vhost::Listener` after the first
522                // `accept()`. However, since the parent just waits until the child
523                // ends, keeping all the FDs open, as well as the socket's FD in a
524                // listen state. This is problematic because nothing prevents a
525                // miss-configured VMM to try to connect twice to the same socket
526                // leaving the VMM waiting forever. So, let's close the listener
527                // before waiting for the child.
528                let fd = listener.as_raw_fd();
529
530                // `vhost::Listener` beside closing the FD, it will remove the socket, if dropped
531                std::mem::forget(listener);
532
533                // Let's close the FD without removing the socket file
534                // SAFETY: `fd` is open and nobody owns it
535                let fd = unsafe { OwnedFd::from_raw_fd(fd) };
536                drop(fd);
537
538                util::wait_for_child(child); // This never returns.
539            }
540        }
541    }
542
543    pub fn enter_chroot(&mut self) -> Result<(), Error> {
544        let c_proc_self_fd = CString::new("/proc/self/fd").unwrap();
545        let proc_self_fd = unsafe { libc::open(c_proc_self_fd.as_ptr(), libc::O_PATH) };
546        if proc_self_fd < 0 {
547            return Err(Error::OpenProcSelfFd(std::io::Error::last_os_error()));
548        }
549        // Safe because we just opened this fd.
550        self.proc_self_fd = Some(unsafe { File::from_raw_fd(proc_self_fd) });
551
552        let c_mountinfo = CString::new("/proc/self/mountinfo").unwrap();
553        let mountinfo_fd = unsafe { libc::open(c_mountinfo.as_ptr(), libc::O_RDONLY) };
554        if mountinfo_fd < 0 {
555            return Err(Error::OpenMountinfo(std::io::Error::last_os_error()));
556        }
557        // Safe because we just opened this fd.
558        self.mountinfo_fd = Some(unsafe { File::from_raw_fd(mountinfo_fd) });
559
560        let c_shared_dir = CString::new(self.shared_dir.clone()).unwrap();
561        let ret = unsafe { libc::chroot(c_shared_dir.as_ptr()) };
562        if ret != 0 {
563            return Err(Error::Chroot(std::io::Error::last_os_error()));
564        }
565
566        let c_root_dir = CString::new("/").unwrap();
567        let ret = unsafe { libc::chdir(c_root_dir.as_ptr()) };
568        if ret != 0 {
569            return Err(Error::ChrootChdir(std::io::Error::last_os_error()));
570        }
571
572        Ok(())
573    }
574
575    fn must_drop_supplemental_groups(&self) -> Result<bool, Error> {
576        let uid = unsafe { libc::geteuid() };
577        if uid != 0 {
578            return Ok(false);
579        }
580
581        // If we are running as root and the system does not support user namespaces,
582        // we must drop supplemental groups.
583        if !Path::new("/proc/self/ns/user").exists() {
584            return Ok(true);
585        }
586
587        let uid_mmap_data =
588            fs::read_to_string("/proc/self/uid_map").map_err(Error::DropSupplementalGroups)?;
589        let uid_map: Vec<_> = uid_mmap_data.split_whitespace().collect();
590
591        let gid_map_data =
592            fs::read_to_string("/proc/self/gid_map").map_err(Error::DropSupplementalGroups)?;
593        let gid_map: Vec<_> = gid_map_data.split_whitespace().collect();
594
595        let setgroups =
596            fs::read_to_string("/proc/self/setgroups").map_err(Error::DropSupplementalGroups)?;
597
598        // A single line mapping only has 3 fields, and the 'count' field should
599        // be 1.
600        let single_uid_mapping = uid_map.len() == 3 && uid_map[2] == "1";
601        let single_gid_mapping = gid_map.len() == 3 && gid_map[2] == "1";
602
603        Ok(setgroups.trim() != "deny" || !single_uid_mapping || !single_gid_mapping)
604    }
605
606    fn drop_supplemental_groups(&self) -> Result<(), Error> {
607        let ngroups = unsafe { libc::getgroups(0, std::ptr::null_mut()) };
608        if ngroups < 0 {
609            return Err(Error::GetSupplementalGroups(std::io::Error::last_os_error()));
610        } else if ngroups != 0 {
611            let ret = unsafe { libc::setgroups(0, std::ptr::null()) };
612            if ret != 0 {
613                return Err(Error::DropSupplementalGroups(
614                    std::io::Error::last_os_error(),
615                ));
616            }
617        }
618
619        Ok(())
620    }
621
622    /// Set up sandbox,
623    pub fn enter(&mut self, listener: Listener) -> Result<Listener, Error> {
624        let uid = unsafe { libc::geteuid() };
625        if uid != 0 && self.sandbox_mode == SandboxMode::Chroot {
626            return Err(Error::SandboxModeInvalidUID);
627        }
628
629        if !self.uid_map.is_empty() && (uid == 0 || self.sandbox_mode != SandboxMode::Namespace) {
630            return Err(Error::SandboxModeInvalidUidMap);
631        }
632
633        if !self.gid_map.is_empty() && (uid == 0 || self.sandbox_mode != SandboxMode::Namespace) {
634            return Err(Error::SandboxModeInvalidGidMap);
635        }
636
637        // We must drop supplemental groups membership if we support switching
638        // between arbitrary uids/gids, unless the following conditions are met:
639        // we're not running as root or we are inside a user namespace with only
640        // one uid and gid mapping and '/proc/self/setgroups' is equal to
641        // "deny". In both of these cases, no arbitrary uid/gid switching is
642        // possible and thus there's no need to drop supplemental groups. In
643        // both of these scenarios calling setgroups() is also not allowed so we
644        // avoid calling it since we know it will return a privilege error.
645        let must_drop_supplemental_groups = match self.must_drop_supplemental_groups() {
646            Ok(must_drop) => must_drop,
647            Err(error) => {
648                warn!(
649                    "Failed to determine whether supplemental groups must be dropped: {error}; \
650                    defaulting to trying to drop supplemental groups"
651                );
652                true
653            }
654        };
655
656        if must_drop_supplemental_groups {
657            self.drop_supplemental_groups()?;
658        }
659
660        match self.sandbox_mode {
661            SandboxMode::Namespace => self.enter_namespace(listener),
662            SandboxMode::Chroot => self.enter_chroot().and(Ok(listener)),
663            SandboxMode::None => Ok(listener),
664        }
665    }
666
667    pub fn get_proc_self_fd(&mut self) -> Option<File> {
668        self.proc_self_fd.take()
669    }
670
671    pub fn get_mountinfo_fd(&mut self) -> Option<File> {
672        self.mountinfo_fd.take()
673    }
674
675    pub fn get_root_dir(&self) -> String {
676        match self.sandbox_mode {
677            SandboxMode::Namespace | SandboxMode::Chroot => "/".to_string(),
678            SandboxMode::None => self.shared_dir.clone(),
679        }
680    }
681
682    /// Return the prefix to strip from /proc/self/mountinfo entries to get paths that are actually
683    /// accessible in our sandbox
684    pub fn get_mountinfo_prefix(&self) -> Option<String> {
685        match self.sandbox_mode {
686            SandboxMode::Namespace | SandboxMode::None => None,
687            SandboxMode::Chroot => Some(self.shared_dir.clone()),
688        }
689    }
690}