Skip to main content

harn_hostlib/process/
handle.rs

1//! Process abstraction trait used by `tools/proc` and
2//! `tools/long_running`.
3//!
4//! Tier 1C of the de-flake epic (#1057). Production code spawns through
5//! the [`ProcessSpawner`] trait — the default implementation in
6//! `process::real` wraps `std::process::Child` and goes through
7//! `harn_vm::process_sandbox`. Tests install a `MockSpawner` (see
8//! `process::mock`) that returns deterministic [`MockProcess`] handles,
9//! so process-tool tests no longer depend on real subprocess scheduling
10//! or wall-clock timing.
11
12use std::collections::BTreeMap;
13use std::io::{self, Read, Write};
14use std::path::PathBuf;
15use std::sync::Arc;
16use std::time::Duration;
17
18/// Resolved exit information for a finished process. Mirrors the subset of
19/// `std::process::ExitStatus` that the process-tool builtins surface.
20#[derive(Clone, Copy, Debug, PartialEq, Eq)]
21pub struct ExitStatus {
22    /// Exit code from `exit(2)` / `_exit(2)`. `None` means the process did not
23    /// exit normally (it was terminated by a signal).
24    pub code: Option<i32>,
25    /// Unix signal that terminated the process, when applicable. `None` on
26    /// non-Unix targets or when the process exited normally.
27    pub signal: Option<i32>,
28}
29
30impl ExitStatus {
31    /// Construct a normal exit with the given code.
32    pub fn from_code(code: i32) -> Self {
33        Self {
34            code: Some(code),
35            signal: None,
36        }
37    }
38
39    /// Construct a signal-terminated exit.
40    pub fn from_signal(signal: i32) -> Self {
41        Self {
42            code: None,
43            signal: Some(signal),
44        }
45    }
46}
47
48/// How a spawn should treat the parent's environment. Mirrors the legacy
49/// `EnvMode` from `tools/proc.rs`.
50#[derive(Clone, Copy, Debug, PartialEq, Eq)]
51pub enum EnvMode {
52    /// Inherit the parent's environment, then apply `env` overrides.
53    InheritClean,
54    /// Clear the environment, then apply `env`.
55    Replace,
56    /// Inherit the parent's environment and apply `env` (default behaviour).
57    Patch,
58}
59
60/// Explicit secret-bearing environment variable names that the agent's
61/// `run`/`command_run` tool must never leak into a child process (and thus
62/// into the model context, since the child's stdout is returned to the
63/// model as the tool result). These are matched case-insensitively in
64/// addition to the suffix patterns in [`is_sensitive_env_name`].
65const EXPLICIT_SENSITIVE_ENV_NAMES: &[&str] = &[
66    "GITHUB_TOKEN",
67    "GH_TOKEN",
68    "HARN_CLOUD_API_KEY",
69    "BURIN_ADMIN_TOKEN",
70    "AWS_SECRET_ACCESS_KEY",
71    "AWS_SESSION_TOKEN",
72];
73
74/// Provider-namespace prefixes whose entire family of variables is treated
75/// as secret-bearing (e.g. `ANTHROPIC_API_KEY`, `OPENAI_ORG_ID`). Matched
76/// case-insensitively against the start of the variable name.
77const SENSITIVE_ENV_PREFIXES: &[&str] = &[
78    "ANTHROPIC_",
79    "OPENAI_",
80    "OPENROUTER_",
81    "FIREWORKS_",
82    "TOGETHER_",
83    "XAI_",
84    "GROQ_",
85];
86
87/// Returns `true` when an environment variable name looks like it carries a
88/// secret (provider API key, access token, OAuth client secret, etc.) and so
89/// must be stripped from a child process spawned by the agent's `run` tool.
90///
91/// The check is deliberately conservative about credentials but permissive
92/// about ordinary build/toolchain variables: `PATH`, `HOME`, `LANG`,
93/// `CARGO_HOME`, language toolchain vars, etc. are *not* sensitive and stay
94/// in the child environment so builds and tests still work.
95///
96/// Matching is case-insensitive and covers:
97/// - suffix patterns `_API_KEY`, `_TOKEN`, `_SECRET`, `_KEY`;
98/// - the provider prefixes in [`SENSITIVE_ENV_PREFIXES`];
99/// - the explicit names in [`EXPLICIT_SENSITIVE_ENV_NAMES`].
100pub fn is_sensitive_env_name(name: &str) -> bool {
101    let upper = name.to_ascii_uppercase();
102    if EXPLICIT_SENSITIVE_ENV_NAMES.contains(&upper.as_str()) {
103        return true;
104    }
105    if SENSITIVE_ENV_PREFIXES
106        .iter()
107        .any(|prefix| upper.starts_with(prefix))
108    {
109        return true;
110    }
111    // Suffix patterns catch the long tail of provider/service credentials
112    // (`*_API_KEY`, `*_TOKEN`, `*_SECRET`, `*_KEY`) without enumerating every
113    // vendor. `_KEY` is last and broadest; it still excludes benign names
114    // like `PATH`/`HOME`/`LANG` that don't end in these suffixes.
115    upper.ends_with("_API_KEY")
116        || upper.ends_with("_TOKEN")
117        || upper.ends_with("_SECRET")
118        || upper.ends_with("_KEY")
119}
120
121/// Parameters describing a single spawn. The spawner is responsible for any
122/// sandbox setup (Linux seccomp/landlock, macOS sandbox-exec, etc.) and for
123/// configuring the child's process group when requested.
124#[derive(Clone, Debug)]
125pub struct SpawnSpec {
126    /// Builtin name surfaced in error messages (e.g. `"hostlib_tools_run_command"`).
127    pub builtin: &'static str,
128    /// Program to execute. Must be non-empty (validated by the spawner).
129    pub program: String,
130    /// Arguments to pass to the program.
131    pub args: Vec<String>,
132    /// Working directory for the child. `None` inherits the parent's cwd.
133    pub cwd: Option<PathBuf>,
134    /// Environment overrides to apply (interpretation depends on `env_mode`).
135    pub env: BTreeMap<String, String>,
136    /// How to treat the parent's environment.
137    pub env_mode: EnvMode,
138    /// Whether stdin will be written to (`true`) or piped to /dev/null (`false`).
139    pub use_stdin: bool,
140    /// Set the child's process group to its own pid (`setpgid(0, 0)`). Used
141    /// for long-running handles so the kill-by-pgid path works.
142    pub configure_process_group: bool,
143}
144
145/// Handle to a running (or finished) process. Used by both the synchronous
146/// `proc::run` path and the long-running waiter thread.
147///
148/// The trait is intentionally small: the legacy code already managed
149/// stdout/stderr drain on dedicated threads, and stdin is written once after
150/// spawn — wrapping those reads/writes via boxed trait objects keeps the
151/// real and mock paths uniform without forcing async into the rest of the
152/// hostlib.
153pub trait ProcessHandle: Send {
154    /// OS process id, when available.
155    fn pid(&self) -> Option<u32>;
156
157    /// OS process group id, when available. Falls back to [`Self::pid`] on
158    /// platforms that don't expose process groups.
159    fn process_group_id(&self) -> Option<u32>;
160
161    /// Returns a killer that can terminate the process even after the
162    /// stdout/stderr/wait halves have been moved into the waiter thread.
163    fn killer(&self) -> Arc<dyn ProcessKiller>;
164
165    /// Take ownership of the stdin pipe, if the spawn requested one.
166    fn take_stdin(&mut self) -> Option<Box<dyn Write + Send>>;
167
168    /// Take ownership of the stdout reader.
169    fn take_stdout(&mut self) -> Option<Box<dyn Read + Send>>;
170
171    /// Take ownership of the stderr reader.
172    fn take_stderr(&mut self) -> Option<Box<dyn Read + Send>>;
173
174    /// Wait for the process to exit, optionally with a timeout, while
175    /// polling `interrupt`. On timeout the spawner kills the child
176    /// (SIGKILL, historical semantics) and reports
177    /// [`WaitOutcome::TimedOut`]. When `interrupt` returns `true` (scope
178    /// cancellation, `deadline` expiry — see `harn_vm::op_interrupt`) the
179    /// spawner gracefully terminates the child's process group (SIGTERM,
180    /// then SIGKILL after `harn_vm::op_interrupt::SUBPROCESS_TERM_GRACE`)
181    /// and reports [`WaitOutcome::Interrupted`].
182    fn wait_with_timeout(
183        &mut self,
184        timeout: Option<Duration>,
185        interrupt: &dyn Fn() -> bool,
186    ) -> io::Result<WaitOutcome>;
187
188    /// Block until the process exits, no timeout, no interrupt polling.
189    /// Used by the background (`background: true`) waiter thread, whose
190    /// children deliberately outlive scope cancellation and deadlines.
191    fn wait(&mut self) -> io::Result<ExitStatus>;
192}
193
194/// How a [`ProcessHandle::wait_with_timeout`] ended.
195#[derive(Clone, Copy, Debug, PartialEq, Eq)]
196pub enum WaitOutcome {
197    /// The process exited on its own.
198    Exited(ExitStatus),
199    /// The timeout elapsed; the spawner killed the child (group).
200    TimedOut,
201    /// The interrupt callback fired; the spawner gracefully terminated the
202    /// child's process group.
203    Interrupted,
204}
205
206/// Kill side of a [`ProcessHandle`]. Cloneable via `Arc` so cancellation
207/// works after the waiter thread has taken ownership of the handle itself.
208pub trait ProcessKiller: Send + Sync {
209    /// Send SIGKILL to the process (and its process group, when applicable).
210    fn kill(&self);
211}
212
213/// Spawner abstraction: produces [`ProcessHandle`] instances.
214pub trait ProcessSpawner: Send + Sync {
215    /// Spawn the configured process.
216    fn spawn(&self, spec: SpawnSpec) -> Result<Box<dyn ProcessHandle>, ProcessError>;
217}
218
219/// Errors raised by a spawner. These map onto `HostlibError::Backend` /
220/// `HostlibError::InvalidParameter` at the call site so the script-side
221/// surface stays unchanged.
222#[derive(Clone, Debug, thiserror::Error)]
223pub enum ProcessError {
224    /// `argv` was empty or otherwise malformed.
225    #[error("invalid argv: {0}")]
226    InvalidArgv(String),
227    /// Sandbox setup (e.g. landlock policy assembly) failed.
228    #[error("sandbox setup failed: {0}")]
229    SandboxSetup(String),
230    /// Sandbox rejected the supplied cwd.
231    #[error("sandbox cwd rejected: {0}")]
232    SandboxCwd(String),
233    /// Sandbox rejected the spawn at execve time.
234    #[error("sandbox rejected spawn: {0}")]
235    SandboxSpawn(String),
236    /// Generic spawn failure (typically io::Error from `Command::spawn`).
237    #[error("spawn failed: {0}")]
238    Spawn(String),
239}
240
241use std::cell::RefCell;
242
243thread_local! {
244    static THREAD_SPAWNER: RefCell<Option<Arc<dyn ProcessSpawner>>> = const { RefCell::new(None) };
245}
246
247/// Install a per-thread spawner used by `spawn_process` from this thread.
248/// Returns a guard that restores the previous spawner on drop. Tests use
249/// this to install a [`super::mock::MockSpawner`]; production never calls
250/// it (the default real spawner runs whenever no per-thread spawner is
251/// installed).
252///
253/// Thread-local rather than global so parallel test execution is safe.
254/// Process-tool spawns happen on the test's thread; the long-running
255/// waiter threads operate on the handle that was already returned, so
256/// they don't perform spawner lookups themselves.
257pub fn install_spawner(spawner: Arc<dyn ProcessSpawner>) -> SpawnerGuard {
258    let prev = THREAD_SPAWNER.with(|slot| slot.replace(Some(spawner)));
259    SpawnerGuard { prev: Some(prev) }
260}
261
262/// Guard returned by [`install_spawner`]. Restores the previous spawner on
263/// drop so installs nest correctly across tests.
264pub struct SpawnerGuard {
265    // Outer Option distinguishes "guard already restored" (None) from
266    // "guard owes a restore" (Some(_)); inner Option carries the previous
267    // spawner slot value (which can itself be None when no spawner was set).
268    #[allow(clippy::option_option)]
269    prev: Option<Option<Arc<dyn ProcessSpawner>>>,
270}
271
272impl Drop for SpawnerGuard {
273    fn drop(&mut self) {
274        if let Some(prev) = self.prev.take() {
275            THREAD_SPAWNER.with(|slot| {
276                *slot.borrow_mut() = prev;
277            });
278        }
279    }
280}
281
282/// Return the currently installed spawner for this thread, falling back
283/// to the default real spawner.
284pub fn current_spawner() -> Arc<dyn ProcessSpawner> {
285    THREAD_SPAWNER
286        .with(|slot| slot.borrow().clone())
287        .unwrap_or_else(super::real::default_spawner)
288}
289
290/// Spawn a process via the currently installed spawner.
291pub fn spawn_process(spec: SpawnSpec) -> Result<Box<dyn ProcessHandle>, ProcessError> {
292    current_spawner().spawn(spec)
293}
294
295#[cfg(test)]
296mod tests {
297    use super::is_sensitive_env_name;
298
299    #[test]
300    fn denies_secret_bearing_names() {
301        // Suffix patterns.
302        assert!(is_sensitive_env_name("ANTHROPIC_API_KEY"));
303        assert!(is_sensitive_env_name("OPENAI_API_KEY"));
304        assert!(is_sensitive_env_name("SOME_VENDOR_TOKEN"));
305        assert!(is_sensitive_env_name("MY_CLIENT_SECRET"));
306        assert!(is_sensitive_env_name("RANDOM_KEY"));
307        // Explicit names.
308        assert!(is_sensitive_env_name("GITHUB_TOKEN"));
309        assert!(is_sensitive_env_name("GH_TOKEN"));
310        assert!(is_sensitive_env_name("HARN_CLOUD_API_KEY"));
311        assert!(is_sensitive_env_name("BURIN_ADMIN_TOKEN"));
312        assert!(is_sensitive_env_name("AWS_SECRET_ACCESS_KEY"));
313        assert!(is_sensitive_env_name("AWS_SESSION_TOKEN"));
314        // Provider prefixes (even without a key/token suffix).
315        assert!(is_sensitive_env_name("OPENROUTER_BASE_URL"));
316        assert!(is_sensitive_env_name("FIREWORKS_ACCOUNT"));
317        assert!(is_sensitive_env_name("TOGETHER_ORG"));
318        assert!(is_sensitive_env_name("XAI_REGION"));
319        assert!(is_sensitive_env_name("GROQ_PROJECT"));
320    }
321
322    #[test]
323    fn allows_benign_build_and_toolchain_names() {
324        assert!(!is_sensitive_env_name("PATH"));
325        assert!(!is_sensitive_env_name("HOME"));
326        assert!(!is_sensitive_env_name("CARGO_HOME"));
327        assert!(!is_sensitive_env_name("LANG"));
328        assert!(!is_sensitive_env_name("LC_ALL"));
329        assert!(!is_sensitive_env_name("TERM"));
330        assert!(!is_sensitive_env_name("USER"));
331        assert!(!is_sensitive_env_name("RUSTUP_HOME"));
332        assert!(!is_sensitive_env_name("CARGO_TARGET_DIR"));
333        assert!(!is_sensitive_env_name("SHELL"));
334    }
335
336    #[test]
337    fn matches_case_insensitively() {
338        assert!(is_sensitive_env_name("anthropic_api_key"));
339        assert!(is_sensitive_env_name("github_token"));
340        assert!(!is_sensitive_env_name("path"));
341    }
342}