Skip to main content

apcore_cli/security/
sandbox.rs

1// apcore-cli — Subprocess sandbox for module execution.
2// Protocol spec: SEC-04 (Sandbox, ModuleExecutionError)
3
4use tokio::io::AsyncReadExt;
5
6use serde_json::Value;
7use thiserror::Error;
8
9// ---------------------------------------------------------------------------
10// Constants
11// ---------------------------------------------------------------------------
12
13/// Environment variable prefixes allowed through the sandbox env whitelist.
14const SANDBOX_ALLOWED_ENV_PREFIXES: &[&str] = &["APCORE_"];
15
16/// Exact environment variable names allowed through the sandbox env whitelist.
17const SANDBOX_ALLOWED_ENV_KEYS: &[&str] = &["PATH", "LANG", "LC_ALL"];
18
19/// Environment variable prefixes denied even when matched by the allow list.
20/// Credential-bearing namespaces must never reach the sandboxed child process.
21const SANDBOX_DENIED_ENV_PREFIXES: &[&str] = &["APCORE_AUTH_"];
22
23/// Exact environment variable names denied regardless of prefix match.
24const SANDBOX_DENIED_ENV_KEYS: &[&str] = &["APCORE_AUTH_API_KEY"];
25
26/// Maximum bytes collected from sandbox stdout or stderr before the child is
27/// killed and OutputParseFailed is returned. Guards against OOM from hostile
28/// or buggy modules that write unboundedly.
29const SANDBOX_OUTPUT_SIZE_LIMIT_BYTES: usize = 64 * 1024 * 1024; // 64 MiB (aligned with Python/TS)
30
31// ---------------------------------------------------------------------------
32// ModuleExecutionError
33// ---------------------------------------------------------------------------
34
35/// Errors produced during sandboxed module execution.
36#[derive(Debug, Error)]
37pub enum ModuleExecutionError {
38    /// The subprocess exited with a non-zero exit code. The captured
39    /// stderr is preserved on the error so callers can surface it for
40    /// debuggability (the subprocess panics, tracebacks, and user-facing
41    /// error prints all land here).
42    #[error("module '{module_id}' exited with code {exit_code}{}",
43            if stderr.is_empty() { String::new() } else { format!(": {stderr}") })]
44    NonZeroExit {
45        module_id: String,
46        exit_code: i32,
47        stderr: String,
48    },
49
50    /// The subprocess timed out.
51    #[error("module '{module_id}' timed out after {timeout_secs}s")]
52    Timeout {
53        module_id: String,
54        timeout_secs: u64,
55    },
56
57    /// The subprocess output could not be parsed.
58    #[error("failed to parse sandbox output for module '{module_id}': {reason}")]
59    OutputParseFailed { module_id: String, reason: String },
60
61    /// Failed to spawn the sandbox subprocess.
62    #[error("failed to spawn sandbox process: {0}")]
63    SpawnFailed(String),
64
65    /// A module-level error from the in-process apcore executor on the disabled
66    /// passthrough path. Preserved as a variant (rather than stringified) so
67    /// callers can map the underlying `ErrorCode` via
68    /// `crate::cli::map_module_error_to_exit_code`, keeping exit-code taxonomy
69    /// consistent between `--sandbox` and direct execution paths.
70    #[error(transparent)]
71    ModuleError(#[from] apcore::errors::ModuleError),
72}
73
74// ---------------------------------------------------------------------------
75// Auxiliary error types (API parity with Python/TypeScript)
76// ---------------------------------------------------------------------------
77
78/// Raised when the requested module ID is not registered in the registry.
79/// Exported for cross-SDK API parity with Python and TypeScript.
80#[derive(Debug, Error)]
81#[error("Module not found: {module_id}")]
82pub struct ModuleNotFoundError {
83    pub module_id: String,
84}
85
86/// Raised when a module's JSON Schema is structurally invalid or fails
87/// against the validator. Exported for cross-SDK API parity.
88#[derive(Debug, Error)]
89#[error("Schema validation error: {detail}")]
90pub struct SchemaValidationError {
91    pub detail: String,
92}
93
94// Sandbox
95// ---------------------------------------------------------------------------
96
97/// Executes modules in an isolated subprocess for security isolation.
98///
99/// When `enabled` is `false`, execution is performed in-process (no sandbox).
100/// When `enabled` is `true`, a child process running `sandbox_runner` handles
101/// the execution and communicates results via JSON over stdin/stdout.
102pub struct Sandbox {
103    enabled: bool,
104    timeout_secs: u64,
105}
106
107impl Sandbox {
108    /// Create a new `Sandbox`.
109    ///
110    /// # Arguments
111    /// * `enabled`    — enable subprocess isolation
112    /// * `timeout_secs` — subprocess timeout in seconds (0 = use default 300 s)
113    pub fn new(enabled: bool, timeout_secs: u64) -> Self {
114        Self {
115            enabled,
116            timeout_secs,
117        }
118    }
119
120    /// Return `true` when subprocess isolation is enabled.
121    pub fn is_enabled(&self) -> bool {
122        self.enabled
123    }
124
125    /// Execute a module, optionally in an isolated subprocess.
126    ///
127    /// # Arguments
128    /// * `module_id`  — identifier of the module to execute
129    /// * `input_data` — JSON input for the module
130    ///
131    /// Returns the module output as a `serde_json::Value`.
132    ///
133    /// # Errors
134    /// Returns `ModuleExecutionError` on timeout, non-zero exit, or parse failure.
135    ///
136    /// When `enabled` is `false`, delegates directly to `executor.call()` and
137    /// returns the result (or maps the apcore module error into a
138    /// `ModuleExecutionError::SpawnFailed`). This passthrough makes Sandbox
139    /// safe to call unconditionally from the dispatcher: callers no longer
140    /// need to branch on the `--sandbox` flag at every call site.
141    ///
142    /// When `enabled` is `true`, runs `module_id` in an isolated subprocess
143    /// via `sandbox_runner` and returns the parsed JSON output. The executor
144    /// argument is intentionally unused in this branch — the subprocess loads
145    /// its own apcore environment from the inherited `APCORE_*` env vars.
146    pub async fn execute(
147        &self,
148        module_id: &str,
149        input_data: Value,
150        executor: &apcore::Executor,
151    ) -> Result<Value, ModuleExecutionError> {
152        if !self.enabled {
153            // Passthrough: delegate to the in-process apcore::Executor and
154            // preserve the ModuleError variant so callers can map to the
155            // protocol-spec exit code.
156            return executor
157                .call(module_id, input_data, None, None)
158                .await
159                .map_err(ModuleExecutionError::ModuleError);
160        }
161        self._sandboxed_execute(module_id, input_data).await
162    }
163
164    async fn _sandboxed_execute(
165        &self,
166        module_id: &str,
167        input_data: Value,
168    ) -> Result<Value, ModuleExecutionError> {
169        use std::process::Stdio;
170        use tokio::io::AsyncWriteExt;
171        use tokio::process::Command;
172        use tokio::time::{timeout, Duration};
173
174        // Build restricted environment from whitelist.
175        let mut env: Vec<(String, String)> = Vec::new();
176        let host_env: std::collections::HashMap<String, String> = std::env::vars().collect();
177
178        for key in SANDBOX_ALLOWED_ENV_KEYS {
179            if let Some(val) = host_env.get(*key) {
180                env.push((key.to_string(), val.clone()));
181            }
182        }
183        for (k, v) in &host_env {
184            if SANDBOX_ALLOWED_ENV_PREFIXES
185                .iter()
186                .any(|prefix| k.starts_with(prefix))
187                && !SANDBOX_DENIED_ENV_PREFIXES
188                    .iter()
189                    .any(|prefix| k.starts_with(prefix))
190                && !SANDBOX_DENIED_ENV_KEYS.contains(&k.as_str())
191            {
192                env.push((k.clone(), v.clone()));
193            }
194        }
195
196        // Create temp dir for HOME/TMPDIR isolation.
197        let tmpdir = tempfile::TempDir::new()
198            .map_err(|e| ModuleExecutionError::SpawnFailed(e.to_string()))?;
199        let tmpdir_path = tmpdir.path().to_string_lossy().to_string();
200        env.push(("HOME".to_string(), tmpdir_path.clone()));
201        env.push(("TMPDIR".to_string(), tmpdir_path.clone()));
202
203        // Serialise input.
204        let input_json = serde_json::to_string(&input_data)
205            .map_err(|e| ModuleExecutionError::SpawnFailed(e.to_string()))?;
206
207        // Locate current binary.
208        let binary = std::env::current_exe()
209            .map_err(|e| ModuleExecutionError::SpawnFailed(e.to_string()))?;
210
211        let mut child = Command::new(&binary)
212            .arg("--internal-sandbox-runner")
213            .arg(module_id)
214            .stdin(Stdio::piped())
215            .stdout(Stdio::piped())
216            .stderr(Stdio::piped())
217            .env_clear()
218            .envs(env)
219            .current_dir(&tmpdir_path)
220            // Ensure the child is killed if this future is dropped (e.g. on
221            // timeout or SIGINT) — tokio's default is kill_on_drop=false,
222            // which would leak the subprocess past Err(Timeout).
223            .kill_on_drop(true)
224            .spawn()
225            .map_err(|e| ModuleExecutionError::SpawnFailed(e.to_string()))?;
226
227        // Write input to stdin.
228        if let Some(mut stdin) = child.stdin.take() {
229            stdin
230                .write_all(input_json.as_bytes())
231                .await
232                .map_err(|e| ModuleExecutionError::SpawnFailed(e.to_string()))?;
233        }
234
235        // Await with timeout, collecting stdout/stderr up to the cap.
236        let timeout_dur = if self.timeout_secs > 0 {
237            Duration::from_secs(self.timeout_secs)
238        } else {
239            Duration::from_secs(300)
240        };
241
242        // Take pipe handles before the join so the child struct can also be
243        // awaited for the exit status in the same async block.
244        let stdout_pipe = child.stdout.take();
245        let stderr_pipe = child.stderr.take();
246
247        let cap = SANDBOX_OUTPUT_SIZE_LIMIT_BYTES;
248        let collect_result = timeout(timeout_dur, async {
249            let (stdout_res, stderr_res) = tokio::join!(
250                async {
251                    let mut buf = Vec::new();
252                    if let Some(r) = stdout_pipe {
253                        let _ = r.take(cap as u64 + 1).read_to_end(&mut buf).await;
254                    }
255                    buf
256                },
257                async {
258                    let mut buf = Vec::new();
259                    if let Some(r) = stderr_pipe {
260                        let _ = r.take(cap as u64 + 1).read_to_end(&mut buf).await;
261                    }
262                    buf
263                },
264            );
265            let status = child
266                .wait()
267                .await
268                .map_err(|e| ModuleExecutionError::SpawnFailed(e.to_string()))?;
269            Ok::<_, ModuleExecutionError>((stdout_res, stderr_res, status))
270        })
271        .await
272        .map_err(|_| ModuleExecutionError::Timeout {
273            module_id: module_id.to_string(),
274            timeout_secs: self.timeout_secs,
275        })??;
276
277        let (stdout_bytes, stderr_bytes, status) = collect_result;
278
279        if stdout_bytes.len() > cap || stderr_bytes.len() > cap {
280            return Err(ModuleExecutionError::OutputParseFailed {
281                module_id: module_id.to_string(),
282                reason: format!("sandbox output exceeded {} bytes", cap),
283            });
284        }
285
286        if !status.success() {
287            let exit_code = status.code().unwrap_or(-1);
288            let stderr = String::from_utf8_lossy(&stderr_bytes).into_owned();
289            return Err(ModuleExecutionError::NonZeroExit {
290                module_id: module_id.to_string(),
291                exit_code,
292                stderr,
293            });
294        }
295
296        let stdout = String::from_utf8_lossy(&stdout_bytes).to_string();
297        crate::sandbox_runner::decode_result(&stdout).map_err(|e| {
298            ModuleExecutionError::OutputParseFailed {
299                module_id: module_id.to_string(),
300                reason: e.to_string(),
301            }
302        })
303    }
304}
305
306// ---------------------------------------------------------------------------
307// Unit tests
308// ---------------------------------------------------------------------------
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313    use serde_json::json;
314
315    #[tokio::test]
316    async fn test_sandbox_disabled_delegates_to_executor() {
317        // Audit A-003 (v0.6.x): the disabled path now passes through to the
318        // injected apcore::Executor instead of returning a "not wired" stub.
319        // We can't easily build a real executor in unit tests (it needs a
320        // Registry + Config + module discovery), so we verify the API surface
321        // accepts the executor parameter. End-to-end passthrough is exercised
322        // by tests/test_e2e.rs which constructs a real executor.
323        let sandbox = Sandbox::new(false, 5); // 5 seconds (unit is now seconds per A-D-006 fix)
324                                              // Compile-time check: signature accepts (&str, Value, &apcore::Executor).
325                                              // The body is dead code at runtime; it exists only to keep the type
326                                              // checker honest about the new signature.
327        let _check: fn(&Sandbox, &str, Value, &apcore::Executor) = |s, id, v, e| {
328            drop(s.execute(id, v, e));
329        };
330        let _ = sandbox; // suppress unused warning
331    }
332
333    #[tokio::test]
334    async fn test_sandbox_enabled_path_still_runs_subprocess() {
335        // Use a 1-second timeout — still quick enough for a unit compile-check.
336        // We don't actually invoke execute() here; just verify the API surface.
337        let sandbox = Sandbox::new(true, 1); // 1 second per A-D-006 fix (was 1ms)
338        let _check: fn(&Sandbox, &str, Value, &apcore::Executor) = |s, id, v, e| {
339            drop(s.execute(id, v, e));
340        };
341        let _ = sandbox;
342    }
343
344    #[test]
345    fn test_decode_result_valid_json() {
346        use crate::sandbox_runner::decode_result;
347        let v = decode_result(r#"{"ok":true}"#).unwrap();
348        assert_eq!(v["ok"], true);
349    }
350
351    #[test]
352    fn test_decode_result_invalid_json() {
353        use crate::sandbox_runner::decode_result;
354        assert!(decode_result("not json").is_err());
355    }
356
357    #[test]
358    fn test_encode_result_roundtrip() {
359        use crate::sandbox_runner::{decode_result, encode_result};
360        let v = json!({"result": 42});
361        let encoded = encode_result(&v);
362        let decoded = decode_result(&encoded).unwrap();
363        assert_eq!(decoded["result"], 42);
364    }
365
366    #[test]
367    fn test_sandbox_env_does_not_include_auth_api_key() {
368        // APCORE_AUTH_API_KEY must never be forwarded to the sandboxed child
369        // even though it sits under the APCORE_ prefix whitelist.
370        unsafe { std::env::set_var("APCORE_AUTH_API_KEY", "secret-key-12345") };
371        let host_env: std::collections::HashMap<String, String> = std::env::vars().collect();
372
373        let mut env: Vec<(String, String)> = Vec::new();
374        for key in SANDBOX_ALLOWED_ENV_KEYS {
375            if let Some(val) = host_env.get(*key) {
376                env.push((key.to_string(), val.clone()));
377            }
378        }
379        for (k, v) in &host_env {
380            if SANDBOX_ALLOWED_ENV_PREFIXES
381                .iter()
382                .any(|prefix| k.starts_with(prefix))
383                && !SANDBOX_DENIED_ENV_PREFIXES
384                    .iter()
385                    .any(|prefix| k.starts_with(prefix))
386                && !SANDBOX_DENIED_ENV_KEYS.contains(&k.as_str())
387            {
388                env.push((k.clone(), v.clone()));
389            }
390        }
391
392        unsafe { std::env::remove_var("APCORE_AUTH_API_KEY") };
393
394        assert!(
395            !env.iter().any(|(k, _)| k == "APCORE_AUTH_API_KEY"),
396            "APCORE_AUTH_API_KEY must not be forwarded to the sandbox environment"
397        );
398    }
399
400    #[test]
401    fn test_sandbox_env_does_not_include_auth_prefix() {
402        unsafe {
403            std::env::set_var("APCORE_AUTH_TOKEN", "bearer-xyz");
404            std::env::set_var("APCORE_AUTH_SECRET", "shh");
405        }
406        let host_env: std::collections::HashMap<String, String> = std::env::vars().collect();
407
408        let env: Vec<(String, String)> = host_env
409            .iter()
410            .filter(|(k, _)| {
411                SANDBOX_ALLOWED_ENV_PREFIXES
412                    .iter()
413                    .any(|p| k.starts_with(p))
414                    && !SANDBOX_DENIED_ENV_PREFIXES.iter().any(|p| k.starts_with(p))
415                    && !SANDBOX_DENIED_ENV_KEYS.contains(&k.as_str())
416            })
417            .map(|(k, v)| (k.clone(), v.clone()))
418            .collect();
419
420        unsafe {
421            std::env::remove_var("APCORE_AUTH_TOKEN");
422            std::env::remove_var("APCORE_AUTH_SECRET");
423        }
424
425        let leaked: Vec<_> = env
426            .iter()
427            .filter(|(k, _)| k.starts_with("APCORE_AUTH_"))
428            .collect();
429        assert!(
430            leaked.is_empty(),
431            "APCORE_AUTH_* vars must not leak into sandbox env: {leaked:?}"
432        );
433    }
434}