Skip to main content

harn_hostlib/sandbox/
mod.rs

1//! The runtime arm of the permission primitive: pluggable sandbox
2//! backends that *enforce* a declared policy rather than merely gating
3//! tool dispatch.
4//!
5//! A sandbox is the runtime answer to a permission policy. The
6//! authoritative policy model lives in `harn-serve`'s `permissions`
7//! module (the `policy { read, write, exec, net }` block); this module
8//! is where that policy becomes true at execution time. `harn-serve`
9//! lowers a `PermissionPolicy` into a [`SandboxSpec`] and a backend
10//! makes the spec real:
11//!
12//! - **filesystem** — mounts scope what the spawned process can touch;
13//!   reads and writes outside the declared roots are rejected by the
14//!   underlying OS sandbox.
15//! - **process** — every command runs through `harn-vm`'s process
16//!   sandbox, which maps the policy onto Landlock/seccomp (Linux),
17//!   `sandbox-exec` (macOS), Job Objects (Windows), and `pledge`/
18//!   `unveil` (OpenBSD).
19//! - **network** — egress is governed by [`NetworkPolicy`]; a backend
20//!   advertises whether it can honour a per-host allowlist via
21//!   [`SandboxCapabilities::network_policy`].
22//!
23//! The [`LocalSandbox`] backend ships here because the process/fs
24//! enforcement it relies on already lives in `harn-vm`; remote backends
25//! (Fly Machines, Modal, E2B, …) implement the same [`SandboxBackend`]
26//! contract from wherever they run.
27
28mod local;
29
30pub use local::{LocalSandbox, LocalSandboxConfig};
31
32use std::collections::BTreeMap;
33use std::path::PathBuf;
34use std::time::Duration;
35
36use async_trait::async_trait;
37use serde::{Deserialize, Serialize};
38use thiserror::Error;
39
40/// Canonical guest mount for durable agent memory, read-only by
41/// default. Backends expose its host path through the `HARN_MEMORY_DIR`
42/// environment variable.
43pub const MEMORY_MOUNT: &str = "/mnt/memory";
44
45/// Canonical guest mount for a session's writable scratch/output
46/// directory. Backends expose its host path through the
47/// `HARN_OUTPUTS_DIR` environment variable.
48pub const OUTPUTS_MOUNT: &str = "/mnt/session/outputs";
49
50/// Errors surfaced by a [`SandboxBackend`].
51#[derive(Debug, Error)]
52pub enum SandboxError {
53    /// No live session matches the supplied id.
54    #[error("sandbox session `{0}` was not found")]
55    SessionNotFound(String),
56    /// The backend cannot honour the requested operation (e.g. a local
57    /// backend asked for a per-host egress allowlist).
58    #[error("backend `{backend}` does not support {operation}")]
59    Unsupported {
60        /// The backend that rejected the operation.
61        backend: &'static str,
62        /// A human-readable name for the unsupported operation.
63        operation: &'static str,
64    },
65    /// The request was malformed (empty command, relative mount, …).
66    #[error("sandbox request was invalid: {0}")]
67    InvalidRequest(String),
68    /// A provision/suspend/resume/terminate step failed.
69    #[error("sandbox lifecycle operation failed: {0}")]
70    Lifecycle(String),
71    /// Executing the requested command failed.
72    #[error("sandbox exec failed: {0}")]
73    Exec(String),
74    /// Applying or enforcing a network policy failed.
75    #[error("sandbox network policy failed: {0}")]
76    NetworkPolicy(String),
77    /// An underlying I/O operation failed.
78    #[error("sandbox I/O failed: {0}")]
79    Io(#[from] std::io::Error),
80    /// JSON (de)serialisation failed.
81    #[error("sandbox JSON failed: {0}")]
82    Json(#[from] serde_json::Error),
83    /// A spawned async task failed to join.
84    #[error("sandbox task failed: {0}")]
85    Task(#[from] tokio::task::JoinError),
86}
87
88/// Result alias for sandbox operations.
89pub type SandboxResult<T> = Result<T, SandboxError>;
90
91/// Stable identifier for a provisioned sandbox session.
92#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
93#[serde(transparent)]
94pub struct SandboxSessionId(pub String);
95
96impl SandboxSessionId {
97    /// Construct a session id, rejecting blank values.
98    pub fn new(value: impl Into<String>) -> SandboxResult<Self> {
99        let value = value.into();
100        if value.trim().is_empty() {
101            return Err(SandboxError::InvalidRequest(
102                "session id cannot be empty".to_string(),
103            ));
104        }
105        Ok(Self(value))
106    }
107}
108
109impl std::fmt::Display for SandboxSessionId {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        f.write_str(&self.0)
112    }
113}
114
115/// Egress policy for a sandbox session. The wire shape matches the
116/// Anthropic sandbox network-policy contract so cloud backends can
117/// forward it verbatim.
118#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
119#[serde(tag = "mode", rename_all = "snake_case")]
120pub enum NetworkPolicy {
121    /// No egress restrictions.
122    #[default]
123    Unrestricted,
124    /// Egress restricted to the listed hosts. An empty list denies all
125    /// network access.
126    Limited {
127        /// Host allowlist; empty means deny-all.
128        allowed_hosts: Vec<String>,
129    },
130}
131
132/// Whether a mount is writable by the guest.
133#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
134#[serde(rename_all = "snake_case")]
135pub enum FilesystemAccess {
136    /// The guest may read but not write.
137    ReadOnly,
138    /// The guest may read and write.
139    ReadWrite,
140}
141
142/// A requested mount: a host `source` exposed to the guest at `target`.
143#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
144pub struct FilesystemMount {
145    /// Host path to expose. Empty means "allocate a fresh directory
146    /// under the session root".
147    pub source: PathBuf,
148    /// Absolute guest path the source is mounted at.
149    pub target: String,
150    /// Read-only or read-write.
151    pub access: FilesystemAccess,
152}
153
154/// Resource ceilings applied to a session.
155#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
156pub struct ResourceLimits {
157    /// Maximum wall-clock duration for any single exec.
158    pub wall_time: Option<Duration>,
159    /// CPU count hint for backends that can allocate it.
160    pub cpu_count: Option<u32>,
161    /// Memory ceiling in megabytes.
162    pub memory_mb: Option<u32>,
163    /// Idle timeout before a backend may suspend the session.
164    pub idle_timeout: Option<Duration>,
165}
166
167/// The full request to provision a session: the runtime lowering of a
168/// declared permission policy.
169#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
170pub struct SandboxSpec {
171    /// Optional caller-chosen id; backends mint one when absent.
172    pub session_id: Option<SandboxSessionId>,
173    /// Free-form labels propagated to the backend (tenant, persona, …).
174    pub labels: BTreeMap<String, String>,
175    /// Egress policy.
176    pub network_policy: NetworkPolicy,
177    /// Mounts beyond the canonical memory/outputs pair.
178    pub mounts: Vec<FilesystemMount>,
179    /// Resource ceilings.
180    pub limits: ResourceLimits,
181}
182
183/// Lifecycle state of a session.
184#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
185#[serde(rename_all = "snake_case")]
186pub enum SandboxState {
187    /// Provisioned but not yet running.
188    Provisioned,
189    /// Live and accepting exec requests.
190    Running,
191    /// Suspended; resumes on next exec.
192    Suspended,
193    /// Torn down.
194    Terminated,
195}
196
197/// A provisioned session as seen by callers.
198#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
199pub struct SandboxSession {
200    /// Session id.
201    pub id: SandboxSessionId,
202    /// Name of the backend that owns the session.
203    pub backend: String,
204    /// Current lifecycle state.
205    pub state: SandboxState,
206    /// Mounts resolved to their host/guest paths.
207    pub mounts: Vec<ResolvedMount>,
208    /// Backend-specific metadata (e.g. the session root path).
209    pub metadata: BTreeMap<String, String>,
210}
211
212/// A mount resolved to concrete host/guest paths.
213#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
214pub struct ResolvedMount {
215    /// Absolute guest path.
216    pub target: String,
217    /// Read-only or read-write.
218    pub access: FilesystemAccess,
219    /// Host path, when the backend exposes one (remote guests may not).
220    pub host_path: Option<PathBuf>,
221}
222
223/// A command to run inside a session.
224#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
225pub struct ExecRequest {
226    /// Executable or shell builtin to run.
227    pub command: String,
228    /// Arguments.
229    pub args: Vec<String>,
230    /// Working directory; resolved against mounts then the session root.
231    pub cwd: Option<String>,
232    /// Extra environment variables.
233    pub env: BTreeMap<String, String>,
234    /// Data piped to the command's stdin.
235    pub stdin: Option<String>,
236    /// Per-exec timeout; falls back to [`ResourceLimits::wall_time`].
237    pub timeout: Option<Duration>,
238}
239
240/// The outcome of an [`ExecRequest`].
241#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
242pub struct ExecResult {
243    /// Captured stdout.
244    pub stdout: String,
245    /// Captured stderr.
246    pub stderr: String,
247    /// Process exit code.
248    pub exit_code: i32,
249    /// Whether the exec hit its timeout.
250    pub timed_out: bool,
251}
252
253impl ExecResult {
254    /// True when the command exited zero and did not time out.
255    pub fn success(&self) -> bool {
256        self.exit_code == 0 && !self.timed_out
257    }
258}
259
260/// A point-in-time snapshot handle for a session.
261#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
262pub struct SandboxSnapshot {
263    /// Session the snapshot belongs to.
264    pub session_id: SandboxSessionId,
265    /// Backend that produced it.
266    pub backend: String,
267    /// Backend-specific snapshot identifier.
268    pub snapshot_id: String,
269    /// Snapshot metadata.
270    pub metadata: BTreeMap<String, String>,
271}
272
273/// What a backend can do, so callers can degrade gracefully.
274#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
275pub struct SandboxCapabilities {
276    /// Enforces an OS-level process sandbox locally.
277    pub local_process_sandbox: bool,
278    /// Honours a per-host network allowlist.
279    pub network_policy: bool,
280    /// Supports snapshots.
281    pub snapshot: bool,
282    /// Supports resuming a suspended session.
283    pub resume: bool,
284    /// Suspends sessions after an idle timeout.
285    pub suspend_on_idle: bool,
286}
287
288/// Pluggable enforcement backend. Implementations make a [`SandboxSpec`]
289/// (the runtime lowering of a permission policy) real and run commands
290/// under it.
291#[async_trait]
292pub trait SandboxBackend: Send + Sync {
293    /// Stable backend name (used in [`SandboxSession::backend`]).
294    fn name(&self) -> &'static str;
295
296    /// What this backend can enforce.
297    fn capabilities(&self) -> SandboxCapabilities;
298
299    /// Provision a session from a spec.
300    async fn provision(&self, spec: SandboxSpec) -> SandboxResult<SandboxSession>;
301
302    /// Attach an additional mount to a live session.
303    async fn attach_filesystem(
304        &self,
305        session_id: &SandboxSessionId,
306        mount: FilesystemMount,
307    ) -> SandboxResult<SandboxSession>;
308
309    /// Apply (or update) the egress policy on a live session.
310    async fn apply_network_policy(
311        &self,
312        session_id: &SandboxSessionId,
313        policy: NetworkPolicy,
314    ) -> SandboxResult<SandboxSession>;
315
316    /// Run a command inside a session.
317    async fn exec(
318        &self,
319        session_id: &SandboxSessionId,
320        request: ExecRequest,
321    ) -> SandboxResult<ExecResult>;
322
323    /// Snapshot a session.
324    async fn snapshot(&self, session_id: &SandboxSessionId) -> SandboxResult<SandboxSnapshot>;
325
326    /// Resume a suspended session.
327    async fn resume(&self, session_id: &SandboxSessionId) -> SandboxResult<SandboxSession>;
328
329    /// Tear a session down.
330    async fn terminate(&self, session_id: &SandboxSessionId) -> SandboxResult<()>;
331}
332
333/// Normalise a guest mount target: trim trailing slashes and require an
334/// absolute path.
335pub(crate) fn normalized_mount_target(target: &str) -> SandboxResult<String> {
336    let trimmed = target.trim().trim_end_matches('/');
337    if !trimmed.starts_with('/') {
338        return Err(SandboxError::InvalidRequest(format!(
339            "mount target `{target}` must be absolute"
340        )));
341    }
342    if trimmed.split('/').any(|segment| segment == "..") {
343        return Err(SandboxError::InvalidRequest(format!(
344            "mount target `{target}` must not contain a `..` component"
345        )));
346    }
347    Ok(trimmed.to_string())
348}
349
350/// POSIX-shell-quote a value for safe inclusion in a generated command.
351pub(crate) fn sh_quote(value: &str) -> String {
352    if value.is_empty() {
353        return "''".to_string();
354    }
355    let escaped = value.replace('\'', "'\"'\"'");
356    format!("'{escaped}'")
357}
358
359/// Quote a value as a Harn string literal.
360pub(crate) fn harn_string(value: &str) -> String {
361    let mut out = String::with_capacity(value.len() + 2);
362    out.push('"');
363    for ch in value.chars() {
364        match ch {
365            '\\' => out.push_str("\\\\"),
366            '"' => out.push_str("\\\""),
367            '\n' => out.push_str("\\n"),
368            '\r' => out.push_str("\\r"),
369            '\t' => out.push_str("\\t"),
370            other => out.push(other),
371        }
372    }
373    out.push('"');
374    out
375}
376
377/// Whole seconds for a duration, clamped to at least one so `timeout(1)`
378/// never receives a zero argument.
379pub(crate) fn duration_secs(duration: Duration) -> u64 {
380    duration.as_secs().max(1)
381}
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386
387    #[test]
388    fn network_policy_uses_anthropic_compatible_shape() {
389        let json = serde_json::to_value(NetworkPolicy::Limited {
390            allowed_hosts: vec!["api.github.com".to_string()],
391        })
392        .unwrap();
393
394        assert_eq!(
395            json,
396            serde_json::json!({
397                "mode": "limited",
398                "allowed_hosts": ["api.github.com"]
399            })
400        );
401    }
402
403    #[test]
404    fn quotes_shell_values() {
405        assert_eq!(sh_quote("a'b"), "'a'\"'\"'b'");
406        assert_eq!(sh_quote(""), "''");
407    }
408
409    #[test]
410    fn normalized_mount_target_rejects_parent_traversal() {
411        let err = normalized_mount_target("/mnt/memory/../../etc/passwd").unwrap_err();
412        assert!(err
413            .to_string()
414            .contains("must not contain a `..` component"));
415    }
416}