Skip to main content

sandbox_rs/execution/
process.rs

1//! Process execution within sandbox namespace
2//!
3//! Key changes from the original implementation:
4//! - Stack size: 128KB (was 8KB)
5//! - Memory leak fix: clone config into closure instead of Box::into_raw
6//! - Seccomp: NO root check (seccomp only needs PR_SET_NO_NEW_PRIVS)
7//! - User namespace: sync pipe for UID/GID mapping from parent
8//! - Resource limits: applies RlimitConfig in child before execve
9
10use sandbox_cgroup::RlimitConfig;
11use sandbox_core::{Result, SandboxError};
12use sandbox_namespace::NamespaceConfig;
13use sandbox_seccomp::{SeccompBpf, SeccompFilter};
14
15use log::warn;
16use nix::sched::clone;
17use nix::sys::signal::Signal;
18use nix::unistd::{AccessFlags, Pid, access, chdir, chroot, execve};
19use std::ffi::CString;
20use std::mem;
21use std::os::fd::IntoRawFd;
22use std::os::unix::io::AsRawFd;
23use std::path::Path;
24use std::thread;
25
26use crate::execution::stream::{ProcessStream, spawn_fd_reader};
27
28/// Process execution configuration
29#[derive(Debug, Clone)]
30pub struct ProcessConfig {
31    /// Program to execute
32    pub program: String,
33    /// Program arguments
34    pub args: Vec<String>,
35    /// Environment variables
36    pub env: Vec<(String, String)>,
37    /// Working directory (inside sandbox)
38    pub cwd: Option<String>,
39    /// Root directory for chroot
40    pub chroot_dir: Option<String>,
41    /// UID to run as
42    pub uid: Option<u32>,
43    /// GID to run as
44    pub gid: Option<u32>,
45    /// Seccomp filter
46    pub seccomp: Option<SeccompFilter>,
47    /// Resource limits (unprivileged fallback via setrlimit)
48    pub rlimits: Option<RlimitConfig>,
49    /// Whether to inherit the parent environment (with optional overrides)
50    pub inherit_env: bool,
51    /// Whether to set up user namespace UID/GID mapping
52    pub use_user_namespace: bool,
53}
54
55impl Default for ProcessConfig {
56    fn default() -> Self {
57        Self {
58            program: String::new(),
59            args: Vec::new(),
60            env: Vec::new(),
61            cwd: None,
62            chroot_dir: None,
63            uid: None,
64            gid: None,
65            seccomp: None,
66            rlimits: None,
67            inherit_env: true,
68            use_user_namespace: false,
69        }
70    }
71}
72
73impl ProcessConfig {
74    /// Ensure the environment vector reflects the inherited parent environment (plus overrides)
75    fn prepare_environment(&mut self) {
76        if !self.inherit_env {
77            return;
78        }
79
80        let overrides = mem::take(&mut self.env);
81        let mut combined: Vec<(String, String)> = std::env::vars().collect();
82
83        if overrides.is_empty() {
84            self.env = combined;
85            return;
86        }
87
88        for (key, value) in overrides {
89            if let Some((_, existing)) = combined.iter_mut().find(|(k, _)| k == &key) {
90                *existing = value;
91            } else {
92                combined.push((key, value));
93            }
94        }
95
96        self.env = combined;
97    }
98}
99
100/// Resolve a program name to an absolute path using PATH semantics.
101fn resolve_program_path(
102    program: &str,
103    env: &[(String, String)],
104) -> std::result::Result<String, String> {
105    if program.contains('/') {
106        return Ok(program.to_string());
107    }
108
109    const DEFAULT_PATH: &str = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
110    let path_value = env
111        .iter()
112        .find(|(key, _)| key == "PATH")
113        .map(|(_, value)| value.as_str())
114        .unwrap_or(DEFAULT_PATH);
115
116    for entry in path_value.split(':') {
117        let dir = if entry.is_empty() { "." } else { entry };
118        let candidate = Path::new(dir).join(program);
119
120        if access(&candidate, AccessFlags::X_OK).is_ok() {
121            return Ok(candidate.to_string_lossy().into_owned());
122        }
123    }
124
125    Err(format!("execve failed: command not found: {}", program))
126}
127
128/// Result of process execution
129#[derive(Debug, Clone)]
130pub struct ProcessResult {
131    /// Process ID
132    pub pid: Pid,
133    /// Exit status
134    pub exit_status: i32,
135    /// Signal if killed
136    pub signal: Option<i32>,
137    /// Execution time in milliseconds
138    pub exec_time_ms: u64,
139}
140
141/// Process executor
142pub struct ProcessExecutor;
143
144impl ProcessExecutor {
145    /// Clone a child process with optional user namespace synchronization.
146    ///
147    /// When user namespace is enabled, creates a sync pipe so the parent can
148    /// write uid_map/gid_map before the child proceeds with setup.
149    fn clone_child(
150        mut child_fn: Box<dyn FnMut() -> isize>,
151        child_stack: &mut [u8],
152        namespace_config: &NamespaceConfig,
153        use_user_namespace: bool,
154    ) -> Result<Pid> {
155        let flags = namespace_config.to_clone_flags();
156
157        if use_user_namespace && namespace_config.user {
158            // Create sync pipe for parent→child signaling
159            let (sync_read, sync_write) =
160                nix::unistd::pipe().map_err(|e| SandboxError::Syscall(format!("pipe: {}", e)))?;
161            let sync_read_raw = sync_read.as_raw_fd();
162            let sync_write_raw = sync_write.as_raw_fd();
163
164            // Wrap the child function to wait for parent's user namespace setup
165            let wrapped = Box::new(move || -> isize {
166                // SAFETY: raw FD operations in child process after clone
167                unsafe {
168                    // Close child's copy of the write end
169                    libc::close(sync_write_raw);
170                    // Wait for parent to signal (parent writes 1 byte after uid_map setup)
171                    let mut buf = [0u8; 1];
172                    libc::read(sync_read_raw, buf.as_mut_ptr() as *mut libc::c_void, 1);
173                    libc::close(sync_read_raw);
174                }
175                child_fn()
176            });
177
178            let result =
179                unsafe { clone(wrapped, child_stack, flags, Some(Signal::SIGCHLD as i32)) };
180
181            // Parent: close our copy of the read end
182            drop(sync_read);
183
184            match result {
185                Ok(child_pid) => {
186                    // Write UID/GID mapping for the child's user namespace
187                    let uid = sandbox_core::util::get_uid();
188                    let gid = sandbox_core::util::get_gid();
189                    if let Err(e) =
190                        sandbox_namespace::user_ns::setup_user_namespace(child_pid, uid, gid)
191                    {
192                        warn!("User namespace setup failed: {}", e);
193                    }
194
195                    // Signal child to proceed
196                    // SAFETY: sync_write is a valid FD, writing 1 byte
197                    unsafe {
198                        let signal_byte: [u8; 1] = [1];
199                        libc::write(
200                            sync_write.as_raw_fd(),
201                            signal_byte.as_ptr() as *const libc::c_void,
202                            1,
203                        );
204                    }
205                    drop(sync_write);
206                    Ok(child_pid)
207                }
208                Err(e) => Err(SandboxError::Syscall(format!("clone failed: {}", e))),
209            }
210        } else {
211            // No user namespace - clone directly
212            let result =
213                unsafe { clone(child_fn, child_stack, flags, Some(Signal::SIGCHLD as i32)) };
214            result.map_err(|e| SandboxError::Syscall(format!("clone failed: {}", e)))
215        }
216    }
217
218    /// Execute process with namespace isolation
219    pub fn execute(
220        mut config: ProcessConfig,
221        namespace_config: NamespaceConfig,
222    ) -> Result<ProcessResult> {
223        let mut child_stack = vec![0u8; 131072]; // 128KB stack (was 8KB)
224
225        config.prepare_environment();
226        let use_user_ns = config.use_user_namespace;
227
228        // Move config into closure (fixes memory leak from Box::into_raw pattern)
229        let mut child_config = Some(config);
230
231        let child_pid = Self::clone_child(
232            Box::new(move || Self::child_setup(child_config.take().unwrap())),
233            &mut child_stack,
234            &namespace_config,
235            use_user_ns,
236        )?;
237
238        let start = std::time::Instant::now();
239        let status = wait_for_child(child_pid)?;
240        let exec_time_ms = start.elapsed().as_millis() as u64;
241
242        Ok(ProcessResult {
243            pid: child_pid,
244            exit_status: status,
245            signal: None,
246            exec_time_ms,
247        })
248    }
249
250    /// Execute process with streaming output
251    pub fn execute_with_stream(
252        mut config: ProcessConfig,
253        namespace_config: NamespaceConfig,
254        enable_streams: bool,
255    ) -> Result<(ProcessResult, Option<ProcessStream>)> {
256        if !enable_streams {
257            let result = Self::execute(config, namespace_config)?;
258            return Ok((result, None));
259        }
260
261        let (stdout_read, stdout_write) = nix::unistd::pipe()
262            .map_err(|e| SandboxError::Io(std::io::Error::other(format!("pipe failed: {}", e))))?;
263        let (stderr_read, stderr_write) = nix::unistd::pipe()
264            .map_err(|e| SandboxError::Io(std::io::Error::other(format!("pipe failed: {}", e))))?;
265
266        let mut child_stack = vec![0u8; 131072]; // 128KB stack
267
268        config.prepare_environment();
269        let use_user_ns = config.use_user_namespace;
270        let stdout_write_fd = stdout_write.as_raw_fd();
271        let stderr_write_fd = stderr_write.as_raw_fd();
272
273        let mut child_config = Some(config);
274
275        let child_pid = Self::clone_child(
276            Box::new(move || {
277                Self::child_setup_with_pipes(
278                    child_config.take().unwrap(),
279                    stdout_write_fd,
280                    stderr_write_fd,
281                )
282            }),
283            &mut child_stack,
284            &namespace_config,
285            use_user_ns,
286        )?;
287
288        // Parent: close write ends (child has copies via clone)
289        drop(stdout_write);
290        drop(stderr_write);
291
292        let (stream_writer, process_stream) = ProcessStream::new();
293
294        let tx1 = stream_writer.tx.clone();
295        let tx2 = stream_writer.tx.clone();
296
297        spawn_fd_reader(stdout_read.into_raw_fd(), false, tx1).map_err(|e| {
298            SandboxError::Io(std::io::Error::other(format!("spawn reader failed: {}", e)))
299        })?;
300        spawn_fd_reader(stderr_read.into_raw_fd(), true, tx2).map_err(|e| {
301            SandboxError::Io(std::io::Error::other(format!("spawn reader failed: {}", e)))
302        })?;
303
304        thread::spawn(move || match wait_for_child(child_pid) {
305            Ok(status) => {
306                let _ = stream_writer.send_exit(status, None);
307            }
308            Err(_) => {
309                let _ = stream_writer.send_exit(1, None);
310            }
311        });
312
313        let process_result = ProcessResult {
314            pid: child_pid,
315            exit_status: 0,
316            signal: None,
317            exec_time_ms: 0,
318        };
319
320        Ok((process_result, Some(process_stream)))
321    }
322
323    /// Setup child process environment.
324    ///
325    /// Order of operations:
326    /// 1. Apply resource limits (before seccomp locks things down)
327    /// 2. Chroot (if specified)
328    /// 3. Chdir
329    /// 4. Drop privileges (setgid/setuid)
330    /// 5. Apply seccomp filter (last - irreversible lockdown before execve)
331    /// 6. Execve
332    fn child_setup(config: ProcessConfig) -> isize {
333        let ProcessConfig {
334            program,
335            args,
336            env,
337            cwd,
338            chroot_dir,
339            uid,
340            gid,
341            seccomp,
342            rlimits,
343            inherit_env: _,
344            use_user_namespace: _,
345        } = config;
346
347        // 1. Apply resource limits (before seccomp, which may restrict setrlimit)
348        if let Some(ref rlimits) = rlimits
349            && let Err(e) = rlimits.apply()
350        {
351            eprintln!("Failed to apply rlimits: {}", e);
352            return 1;
353        }
354
355        // 2. Change root if specified (no root check - fails explicitly if unprivileged)
356        if let Some(chroot_path) = &chroot_dir
357            && let Err(e) = chroot(chroot_path.as_str())
358        {
359            eprintln!("chroot failed: {}", e);
360            return 1;
361        }
362
363        // 3. Change directory
364        let cwd = cwd.as_deref().unwrap_or("/");
365        if let Err(e) = chdir(cwd) {
366            eprintln!("chdir failed: {}", e);
367            return 1;
368        }
369
370        // 4. Drop privileges if specified (no root check - fails explicitly if needed)
371        if let Some(gid) = gid
372            && unsafe { libc::setgid(gid) } != 0
373        {
374            eprintln!("setgid failed");
375            return 1;
376        }
377
378        if let Some(uid) = uid
379            && unsafe { libc::setuid(uid) } != 0
380        {
381            eprintln!("setuid failed");
382            return 1;
383        }
384
385        // 5. Apply seccomp filter - NO root check!
386        // Seccomp only needs PR_SET_NO_NEW_PRIVS, which works for any process.
387        if let Some(filter) = &seccomp
388            && let Err(e) = SeccompBpf::load(filter)
389        {
390            eprintln!("Failed to load seccomp: {}", e);
391            return 1;
392        }
393
394        // 6. Prepare environment and execute
395        let env_vars: Vec<CString> = env
396            .iter()
397            .map(|(k, v)| CString::new(format!("{}={}", k, v)).unwrap())
398            .collect();
399
400        let env_refs: Vec<&CString> = env_vars.iter().collect();
401
402        let resolved_program = match resolve_program_path(&program, &env) {
403            Ok(path) => path,
404            Err(err) => {
405                eprintln!("{}", err);
406                return 1;
407            }
408        };
409
410        let program_cstring = match CString::new(resolved_program) {
411            Ok(s) => s,
412            Err(_) => {
413                eprintln!("program name contains nul byte");
414                return 1;
415            }
416        };
417
418        let args_cstrings: Vec<CString> = args
419            .iter()
420            .map(|s| CString::new(s.clone()).unwrap_or_else(|_| CString::new("").unwrap()))
421            .collect();
422
423        let mut args_refs: Vec<&CString> = vec![&program_cstring];
424        args_refs.extend(args_cstrings.iter());
425
426        match execve(&program_cstring, &args_refs, &env_refs) {
427            Ok(_) => 0,
428            Err(e) => {
429                eprintln!("execve failed: {}", e);
430                1
431            }
432        }
433    }
434
435    /// Setup child process with pipe redirection
436    fn child_setup_with_pipes(config: ProcessConfig, stdout_fd: i32, stderr_fd: i32) -> isize {
437        // SAFETY: FDs are valid from parent and we're in a child process about to exec
438        unsafe {
439            if libc::dup2(stdout_fd, 1) < 0 {
440                eprintln!("dup2 stdout failed");
441                return 1;
442            }
443            if libc::dup2(stderr_fd, 2) < 0 {
444                eprintln!("dup2 stderr failed");
445                return 1;
446            }
447            _ = libc::close(stdout_fd);
448            _ = libc::close(stderr_fd);
449        }
450
451        Self::child_setup(config)
452    }
453}
454
455/// Wait for child process and get exit status
456fn wait_for_child(pid: Pid) -> Result<i32> {
457    use nix::sys::wait::{WaitStatus, waitpid};
458
459    loop {
460        match waitpid(pid, None) {
461            Ok(WaitStatus::Exited(_, status)) => return Ok(status),
462            Ok(WaitStatus::Signaled(_, signal, _)) => {
463                return Ok(128 + signal as i32);
464            }
465            Ok(_) => continue,
466            Err(e) => return Err(SandboxError::Syscall(format!("waitpid failed: {}", e))),
467        }
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474    use nix::unistd::{ForkResult, fork};
475
476    #[test]
477    fn test_process_config_default() {
478        let config = ProcessConfig::default();
479        assert!(config.program.is_empty());
480        assert!(config.args.is_empty());
481        assert!(config.rlimits.is_none());
482        assert!(!config.use_user_namespace);
483    }
484
485    #[test]
486    fn test_process_config_with_args() {
487        let config = ProcessConfig {
488            program: "echo".to_string(),
489            args: vec!["hello".to_string(), "world".to_string()],
490            ..Default::default()
491        };
492
493        assert_eq!(config.program, "echo");
494        assert_eq!(config.args.len(), 2);
495    }
496
497    #[test]
498    fn test_process_config_with_env() {
499        let config = ProcessConfig {
500            env: vec![("MY_VAR".to_string(), "my_value".to_string())],
501            ..Default::default()
502        };
503
504        assert_eq!(config.env.len(), 1);
505        assert_eq!(config.env[0].0, "MY_VAR");
506    }
507
508    #[test]
509    fn test_process_result() {
510        let result = ProcessResult {
511            pid: Pid::from_raw(123),
512            exit_status: 0,
513            signal: None,
514            exec_time_ms: 100,
515        };
516
517        assert_eq!(result.pid, Pid::from_raw(123));
518        assert_eq!(result.exit_status, 0);
519        assert!(result.signal.is_none());
520        assert_eq!(result.exec_time_ms, 100);
521    }
522
523    #[test]
524    fn test_process_result_with_signal() {
525        let result = ProcessResult {
526            pid: Pid::from_raw(456),
527            exit_status: 0,
528            signal: Some(9),
529            exec_time_ms: 50,
530        };
531
532        assert!(result.signal.is_some());
533        assert_eq!(result.signal.unwrap(), 9);
534    }
535
536    #[test]
537    fn wait_for_child_returns_exit_status() {
538        match unsafe { fork() } {
539            Ok(ForkResult::Child) => {
540                std::process::exit(42);
541            }
542            Ok(ForkResult::Parent { child }) => {
543                let status = wait_for_child(child).unwrap();
544                assert_eq!(status, 42);
545            }
546            Err(e) => panic!("fork failed: {}", e),
547        }
548    }
549
550    #[test]
551    fn process_executor_runs_program_without_namespaces() {
552        let config = ProcessConfig {
553            program: "/bin/echo".to_string(),
554            args: vec!["sandbox".to_string()],
555            env: vec![("TEST_EXEC".to_string(), "1".to_string())],
556            ..Default::default()
557        };
558
559        let namespace = NamespaceConfig {
560            pid: false,
561            ipc: false,
562            net: false,
563            mount: false,
564            uts: false,
565            user: false,
566        };
567
568        let result = ProcessExecutor::execute(config, namespace).unwrap();
569        assert_eq!(result.exit_status, 0);
570    }
571
572    #[test]
573    fn execute_with_stream_disabled() {
574        let config = ProcessConfig {
575            program: "/bin/echo".to_string(),
576            args: vec!["test_output".to_string()],
577            ..Default::default()
578        };
579
580        let namespace = NamespaceConfig {
581            pid: false,
582            ipc: false,
583            net: false,
584            mount: false,
585            uts: false,
586            user: false,
587        };
588
589        let (result, stream) =
590            ProcessExecutor::execute_with_stream(config, namespace, false).unwrap();
591        assert_eq!(result.exit_status, 0);
592        assert!(stream.is_none());
593    }
594
595    #[test]
596    fn execute_with_stream_enabled() {
597        let config = ProcessConfig {
598            program: "/bin/echo".to_string(),
599            args: vec!["streamed_output".to_string()],
600            ..Default::default()
601        };
602
603        let namespace = NamespaceConfig {
604            pid: false,
605            ipc: false,
606            net: false,
607            mount: false,
608            uts: false,
609            user: false,
610        };
611
612        let (result, stream) =
613            ProcessExecutor::execute_with_stream(config, namespace, true).unwrap();
614        assert_eq!(result.exit_status, 0);
615        assert!(stream.is_some());
616    }
617
618    #[test]
619    fn resolve_program_path_uses_env_path() {
620        let env = vec![("PATH".to_string(), "/bin:/usr/bin".to_string())];
621        let resolved = resolve_program_path("ls", &env).unwrap();
622        assert!(
623            resolved.ends_with("/ls"),
624            "expected ls in path, got {}",
625            resolved
626        );
627    }
628
629    #[test]
630    fn resolve_program_path_reports_missing_binary() {
631        let env = vec![("PATH".to_string(), "/nonexistent".to_string())];
632        let err = resolve_program_path("definitely_missing_cmd", &env).unwrap_err();
633        assert!(err.contains("command not found"));
634    }
635
636    #[test]
637    fn wait_for_child_with_signal() {
638        match unsafe { fork() } {
639            Ok(ForkResult::Child) => {
640                unsafe { libc::raise(libc::SIGTERM) };
641                std::process::exit(1);
642            }
643            Ok(ForkResult::Parent { child }) => {
644                let status = wait_for_child(child).unwrap();
645                assert!(status > 0);
646            }
647            Err(e) => panic!("fork failed: {}", e),
648        }
649    }
650}