tabox/linux/
mod.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4// SPDX-License-Identifier: MPL-2.0
5
6//! This module contains the sandbox for Linux
7
8use std::fs::File;
9use std::os::unix::process::CommandExt;
10use std::path::Path;
11use std::process::Command;
12use std::ptr::null;
13use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
14use std::sync::Arc;
15use std::thread::{self, JoinHandle};
16use std::time::Instant;
17
18use anyhow::{anyhow, bail, Context};
19use nix::sys::signal::{kill, Signal};
20use nix::unistd::{self, Gid, Pid, Uid};
21
22use crate::configuration::SandboxConfiguration;
23use crate::result::{ExitStatus, ResourceUsage, SandboxExecutionResult};
24use crate::util::{setup_resource_limits, start_wall_time_watcher, strerror, wait};
25use crate::{Result, Sandbox};
26
27mod filesystem;
28mod seccomp_filter;
29
30lazy_static! {
31    /// PID of the child process, will be used to kill the child when SIGTERM or SIGINT is received.
32    static ref CHILD_PID: Arc<AtomicI32> = Arc::new(AtomicI32::new(-1));
33}
34
35/// Handler of the SIGINT and SIGTERM signals. If the child PID is available a SIGKILL will be sent
36/// to that process.
37fn sigterm_handler() {
38    let child_pid = CHILD_PID.load(Ordering::SeqCst);
39    if child_pid > 0 {
40        match kill(Pid::from_raw(child_pid), Signal::SIGKILL) {
41            Ok(()) => info!("Killed child process {}", child_pid),
42            Err(e) => error!("Cannot kill {}: {:?}", child_pid, e),
43        }
44    } else {
45        warn!("Cannot stop the child since the pid is unknown");
46    }
47}
48
49pub struct LinuxSandbox {
50    child_thread: JoinHandle<Result<SandboxExecutionResult>>,
51}
52
53impl Sandbox for LinuxSandbox {
54    fn run(config: SandboxConfiguration) -> Result<Self> {
55        trace!("Run LinuxSandbox with config {:?}", config);
56
57        // Register a signal handler that kills the child
58        unsafe { signal_hook::register(signal_hook::SIGTERM, sigterm_handler) }
59            .context("Failed to register SIGTERM handler")?;
60        unsafe { signal_hook::register(signal_hook::SIGINT, sigterm_handler) }
61            .context("Failed to register SIGINT handler")?;
62
63        // Start a child process to setup the sandbox
64        let handle = thread::Builder::new()
65            .name("Sandbox watcher".into())
66            .spawn(move || watcher(config))
67            .context("Failed to spawn sandbox watcher thread")?;
68
69        Ok(LinuxSandbox {
70            child_thread: handle,
71        })
72    }
73
74    fn wait(self) -> Result<SandboxExecutionResult> {
75        let result = self
76            .child_thread
77            .join()
78            .map_err(|e| anyhow!("Watcher thread panicked: {:?}", e))?
79            .context("Watcher thread failed")?;
80        Ok(result)
81    }
82
83    fn is_secure() -> bool {
84        true
85    }
86}
87fn watcher(config: SandboxConfiguration) -> Result<SandboxExecutionResult> {
88    let tempdir = tempfile::TempDir::new().context("Failed to create sandbox tempdir")?;
89    let sandbox_path = tempdir.path();
90
91    // uid/gid from outside the sandbox
92    let uid = unistd::getuid();
93    let gid = unistd::getgid();
94
95    trace!(
96        "Watcher process started, PID = {}, uid = {}, gid = {}",
97        unistd::getpid(),
98        uid,
99        gid
100    );
101
102    #[allow(clippy::large_enum_variant)]
103    enum ErrorMessage {
104        NoError,
105        Error(usize, [char; 1024]),
106    }
107
108    // Allocate some memory that the forked process can use to write the error. This memory is
109    // page-aligned, which is hopefully enough for ErrorMessage.
110    let shared = unsafe {
111        std::mem::transmute::<*mut libc::c_void, *mut ErrorMessage>(libc::mmap(
112            std::ptr::null_mut(),
113            std::mem::size_of::<ErrorMessage>(),
114            libc::PROT_READ | libc::PROT_WRITE,
115            libc::MAP_ANONYMOUS | libc::MAP_SHARED,
116            0,
117            0,
118        ))
119    };
120    // Cleanup the shared memory: by default there is no error (we cannot set it after because the
121    // child process execs and this memory will be unreachable).
122    unsafe { std::ptr::write(shared, ErrorMessage::NoError) };
123
124    let child_pid = spawn_child(|| {
125        if let Err(err) = child(&config, sandbox_path, uid, gid) {
126            error!("Child failed: {:?}", err);
127
128            // prepare a buffer where to write the error message
129            let message = format!("{:?}", err);
130            let message = message.chars().take(1024).collect::<Vec<_>>();
131            let mut buffer = ['\0'; 1024];
132            buffer[..message.len()].copy_from_slice(&message);
133
134            // Write the error message to the shared memory. This is safe since the parent will not
135            // read from it until this process has completely exited.
136            let error = ErrorMessage::Error(message.len(), buffer);
137            unsafe { std::ptr::write(shared, error) };
138        } else {
139            unreachable!("The child process must exec");
140        }
141    })
142    .context("Failed to spawn child process")?;
143
144    // Store the PID of the child process for letting the signal handler kill the child
145    CHILD_PID.store(child_pid, Ordering::SeqCst);
146
147    let start_time = Instant::now();
148
149    let killed = Arc::new(AtomicBool::new(false));
150
151    // Start a thread that kills the process when the wall limit expires
152    if let Some(limit) = config.wall_time_limit {
153        start_wall_time_watcher(limit, child_pid, killed.clone())?;
154    }
155
156    // Wait child for completion
157    let (status, resource_usage) = wait(child_pid).context("Failed to wait for child process")?;
158
159    // Read from shared memory if there was an error with the sandbox. At this point the child
160    // process has for sure exited, so it's safe to read.
161    if let ErrorMessage::Error(len, error) = unsafe { std::ptr::read(shared) } {
162        let message = error.iter().take(len).collect::<String>();
163        bail!("{}", message);
164    }
165
166    Ok(SandboxExecutionResult {
167        status: if killed.load(Ordering::SeqCst) {
168            ExitStatus::Killed
169        } else {
170            status
171        },
172        resource_usage: ResourceUsage {
173            wall_time_usage: (Instant::now() - start_time).as_secs_f64(),
174            ..resource_usage
175        },
176    })
177}
178
179/// Spawn the child process inside of an unshared environment.
180///
181/// This makes sure the child process exits when it's done.
182fn spawn_child(child: impl FnOnce()) -> Result<libc::pid_t> {
183    let child_pid = unsafe {
184        libc::syscall(
185            libc::SYS_clone,
186            libc::CLONE_NEWIPC
187                | libc::CLONE_NEWNET
188                | libc::CLONE_NEWNS
189                | libc::CLONE_NEWPID
190                | libc::CLONE_NEWUSER
191                | libc::CLONE_NEWUTS
192                | libc::SIGCHLD,
193            null::<libc::c_void>(),
194        )
195    } as libc::pid_t;
196
197    if child_pid < 0 {
198        bail!("clone() error: {}", strerror());
199    }
200
201    if child_pid == 0 {
202        child();
203
204        // make sure the child process exits
205        std::process::exit(1);
206    }
207
208    Ok(child_pid)
209}
210
211/// Child process
212fn child(config: &SandboxConfiguration, sandbox_path: &Path, uid: Uid, gid: Gid) -> Result<()> {
213    // Map current uid/gid to root/root inside the sandbox
214    std::fs::write("/proc/self/setgroups", "deny")
215        .context("Failed to write /proc/self/setgroups")?;
216    std::fs::write(
217        "/proc/self/uid_map",
218        format!("{} {} 1", config.uid, uid.as_raw()),
219    )
220    .context("Failed to write /proc/self/uid_map")?;
221    std::fs::write(
222        "/proc/self/gid_map",
223        format!("{} {} 1", config.gid, gid.as_raw()),
224    )
225    .context("Failed to write /proc/self/gid_map")?;
226
227    // When parent dies, I want to die too
228    if unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGKILL) < 0 } {
229        bail!("Error calling prctl(): {}", strerror());
230    };
231
232    assert_eq!(unistd::getpid().as_raw(), 1);
233
234    let mut command = Command::new(&config.executable);
235
236    command
237        .env_clear()
238        .envs(config.env.clone())
239        .args(&config.args);
240
241    if let Some(stdin) = &config.stdin {
242        let stdin = File::open(stdin)
243            .with_context(|| format!("Failed to open stdin file at {}", stdin.display()))?;
244        command.stdin(stdin);
245    }
246
247    if let Some(stdout) = &config.stdout {
248        let stdout = File::create(stdout)
249            .with_context(|| format!("Failed to open stdout file at {}", stdout.display()))?;
250        command.stdout(stdout);
251    }
252
253    if let Some(stderr) = &config.stderr {
254        let stderr = File::create(stderr)
255            .with_context(|| format!("Failed to open stderr file at {}", stderr.display()))?;
256        command.stderr(stderr);
257    }
258
259    filesystem::create(config, sandbox_path).context("Failed to create sandbox filesystem")?;
260    setup_thread_affinity(config).context("Failed to setup thread affinity")?;
261    enter_chroot(config, sandbox_path).context("Failed to enter chroot")?;
262    setup_resource_limits(config).context("Failed to setup rlimits")?;
263    setup_syscall_filter(config).context("Failed to setup syscall filter")?;
264
265    // This can only return Err... nice!
266    Err(command.exec()).context("Failed to exec child process")
267}
268
269/// Set cpu affinity
270fn setup_thread_affinity(config: &SandboxConfiguration) -> Result<()> {
271    if let Some(core) = config.cpu_core {
272        let mut cpu_set = nix::sched::CpuSet::new();
273        cpu_set.set(core)?;
274        nix::sched::sched_setaffinity(Pid::from_raw(0), &cpu_set)
275            .with_context(|| format!("Failed to set sched_setaffinity(0, {:?})", cpu_set))?
276    }
277    Ok(())
278}
279
280/// Enter the sandbox chroot and change directory
281fn enter_chroot(config: &SandboxConfiguration, sandbox_path: &Path) -> Result<()> {
282    // Chroot into the sandbox
283    unistd::chroot(sandbox_path).context("Failed to chroot")?;
284
285    // Check that things exits inside
286    if !config.executable.exists() {
287        bail!("Executable doesn't exist inside the sandbox chroot. Perhaps you need to mount some directories?");
288    }
289    if !config.working_directory.exists() {
290        bail!("Working directory doesn't exists inside chroot. Maybe you need to mount it?");
291    }
292
293    // Change to working directory
294    unistd::chdir(&config.working_directory).context("Failed to chdir")?;
295    Ok(())
296}
297
298/// Setup the Syscall filter
299fn setup_syscall_filter(config: &SandboxConfiguration) -> Result<()> {
300    if let Some(syscall_filter) = &config.syscall_filter {
301        let mut filter = seccomp_filter::SeccompFilter::new(syscall_filter.default_action)
302            .context("Failed to setup SeccompFilter")?;
303        for (syscall, action) in &syscall_filter.rules {
304            filter.filter(syscall, *action).with_context(|| {
305                format!("Failed to add syscall filter: {} {:?}", syscall, action)
306            })?;
307        }
308        filter.load().context("Failed to load syscall filter")?;
309    }
310    Ok(())
311}