Skip to main content

harn_hostlib/sandbox/
mod.rs

1//! The runtime arm of the permission primitive: pluggable sandbox
2//! backends that *enforce* a declared policy rather than merely gating
3//! tool dispatch.
4//!
5//! A sandbox is the runtime answer to a permission policy. The
6//! authoritative policy model lives in `harn-serve`'s `permissions`
7//! module (the `policy { read, write, exec, net }` block); this module
8//! is where that policy becomes true at execution time. `harn-serve`
9//! lowers a `PermissionPolicy` into a [`SandboxSpec`] and a backend
10//! makes the spec real:
11//!
12//! - **filesystem** — mounts scope what the spawned process can touch;
13//!   reads and writes outside the declared roots are rejected by the
14//!   underlying OS sandbox.
15//! - **process** — every command runs through `harn-vm`'s process
16//!   sandbox, which maps the policy onto Landlock/seccomp (Linux),
17//!   `sandbox-exec` (macOS), Job Objects (Windows), and `pledge`/
18//!   `unveil` (OpenBSD).
19//! - **network** — egress is governed by [`NetworkPolicy`]; a backend
20//!   advertises whether it can honour a per-host allowlist via
21//!   [`SandboxCapabilities::network_policy`].
22//!
23//! The [`LocalSandbox`] backend ships here because the process/fs
24//! enforcement it relies on already lives in `harn-vm`; remote backends
25//! (Fly Machines, Modal, E2B, …) implement the same [`SandboxBackend`]
26//! contract from wherever they run.
27
28mod local;
29
30pub use local::{LocalSandbox, LocalSandboxConfig};
31
32use std::collections::BTreeMap;
33use std::path::PathBuf;
34use std::time::Duration;
35
36use async_trait::async_trait;
37use serde::{Deserialize, Serialize};
38use thiserror::Error;
39
40/// Canonical guest mount for durable agent memory, read-only by
41/// default. Backends expose its host path through the `HARN_MEMORY_DIR`
42/// environment variable.
43pub const MEMORY_MOUNT: &str = "/mnt/memory";
44
45/// Canonical guest mount for a session's writable scratch/output
46/// directory. Backends expose its host path through the
47/// `HARN_OUTPUTS_DIR` environment variable.
48pub const OUTPUTS_MOUNT: &str = "/mnt/session/outputs";
49
50/// Errors surfaced by a [`SandboxBackend`].
51#[derive(Debug, Error)]
52pub enum SandboxError {
53    /// No live session matches the supplied id.
54    #[error("sandbox session `{0}` was not found")]
55    SessionNotFound(String),
56    /// The backend cannot honour the requested operation (e.g. a local
57    /// backend asked for a per-host egress allowlist).
58    #[error("backend `{backend}` does not support {operation}")]
59    Unsupported {
60        /// The backend that rejected the operation.
61        backend: &'static str,
62        /// A human-readable name for the unsupported operation.
63        operation: &'static str,
64    },
65    /// The request was malformed (empty command, relative mount, …).
66    #[error("sandbox request was invalid: {0}")]
67    InvalidRequest(String),
68    /// A provision/suspend/resume/terminate step failed.
69    #[error("sandbox lifecycle operation failed: {0}")]
70    Lifecycle(String),
71    /// Executing the requested command failed.
72    #[error("sandbox exec failed: {0}")]
73    Exec(String),
74    /// Applying or enforcing a network policy failed.
75    #[error("sandbox network policy failed: {0}")]
76    NetworkPolicy(String),
77    /// An underlying I/O operation failed.
78    #[error("sandbox I/O failed: {0}")]
79    Io(#[from] std::io::Error),
80    /// JSON (de)serialisation failed.
81    #[error("sandbox JSON failed: {0}")]
82    Json(#[from] serde_json::Error),
83    /// A spawned async task failed to join.
84    #[error("sandbox task failed: {0}")]
85    Task(#[from] tokio::task::JoinError),
86}
87
88/// Result alias for sandbox operations.
89pub type SandboxResult<T> = Result<T, SandboxError>;
90
91/// Stable identifier for a provisioned sandbox session.
92#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
93#[serde(transparent)]
94pub struct SandboxSessionId(pub String);
95
96impl SandboxSessionId {
97    /// Construct a session id, rejecting blank values.
98    pub fn new(value: impl Into<String>) -> SandboxResult<Self> {
99        let value = value.into();
100        if value.trim().is_empty() {
101            return Err(SandboxError::InvalidRequest(
102                "session id cannot be empty".to_string(),
103            ));
104        }
105        Ok(Self(value))
106    }
107}
108
109impl std::fmt::Display for SandboxSessionId {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        f.write_str(&self.0)
112    }
113}
114
115/// Egress policy for a sandbox session. The wire shape matches the
116/// Anthropic sandbox network-policy contract so cloud backends can
117/// forward it verbatim.
118#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
119#[serde(tag = "mode", rename_all = "snake_case")]
120pub enum NetworkPolicy {
121    /// No egress restrictions.
122    Unrestricted,
123    /// Egress restricted to the listed hosts. An empty list denies all
124    /// network access.
125    Limited {
126        /// Host allowlist; empty means deny-all.
127        allowed_hosts: Vec<String>,
128    },
129}
130
131/// Deny egress by default. A [`SandboxSpec`] built without an explicit network
132/// policy gets no network access, so new call sites are secure-by-default and
133/// must opt into egress with a host allowlist (or [`NetworkPolicy::Unrestricted`]).
134/// This matches the deny-by-default lowering in `harn-serve`
135/// (`PermissionPolicy::to_network_policy`, where an empty allowlist denies all).
136impl Default for NetworkPolicy {
137    fn default() -> Self {
138        Self::Limited {
139            allowed_hosts: Vec::new(),
140        }
141    }
142}
143
144/// Whether a mount is writable by the guest.
145#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
146#[serde(rename_all = "snake_case")]
147pub enum FilesystemAccess {
148    /// The guest may read but not write.
149    ReadOnly,
150    /// The guest may read and write.
151    ReadWrite,
152}
153
154/// A requested mount: a host `source` exposed to the guest at `target`.
155#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
156pub struct FilesystemMount {
157    /// Host path to expose. Empty means "allocate a fresh directory
158    /// under the session root".
159    pub source: PathBuf,
160    /// Absolute guest path the source is mounted at.
161    pub target: String,
162    /// Read-only or read-write.
163    pub access: FilesystemAccess,
164}
165
166/// Resource ceilings applied to a session.
167#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
168pub struct ResourceLimits {
169    /// Maximum wall-clock duration for any single exec.
170    pub wall_time: Option<Duration>,
171    /// CPU count hint for backends that can allocate it.
172    pub cpu_count: Option<u32>,
173    /// Memory ceiling in megabytes.
174    pub memory_mb: Option<u32>,
175    /// Idle timeout before a backend may suspend the session.
176    pub idle_timeout: Option<Duration>,
177}
178
179/// The full request to provision a session: the runtime lowering of a
180/// declared permission policy.
181#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
182pub struct SandboxSpec {
183    /// Optional caller-chosen id; backends mint one when absent.
184    pub session_id: Option<SandboxSessionId>,
185    /// Free-form labels propagated to the backend (tenant, persona, …).
186    pub labels: BTreeMap<String, String>,
187    /// Egress policy.
188    pub network_policy: NetworkPolicy,
189    /// Mounts beyond the canonical memory/outputs pair.
190    pub mounts: Vec<FilesystemMount>,
191    /// Resource ceilings.
192    pub limits: ResourceLimits,
193}
194
195/// Lifecycle state of a session.
196#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
197#[serde(rename_all = "snake_case")]
198pub enum SandboxState {
199    /// Provisioned but not yet running.
200    Provisioned,
201    /// Live and accepting exec requests.
202    Running,
203    /// Suspended; resumes on next exec.
204    Suspended,
205    /// Torn down.
206    Terminated,
207}
208
209/// A provisioned session as seen by callers.
210#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
211pub struct SandboxSession {
212    /// Session id.
213    pub id: SandboxSessionId,
214    /// Name of the backend that owns the session.
215    pub backend: String,
216    /// Current lifecycle state.
217    pub state: SandboxState,
218    /// Mounts resolved to their host/guest paths.
219    pub mounts: Vec<ResolvedMount>,
220    /// Backend-specific metadata (e.g. the session root path).
221    pub metadata: BTreeMap<String, String>,
222}
223
224/// A mount resolved to concrete host/guest paths.
225#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
226pub struct ResolvedMount {
227    /// Absolute guest path.
228    pub target: String,
229    /// Read-only or read-write.
230    pub access: FilesystemAccess,
231    /// Host path, when the backend exposes one (remote guests may not).
232    pub host_path: Option<PathBuf>,
233}
234
235/// A command to run inside a session.
236#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
237pub struct ExecRequest {
238    /// Executable or shell builtin to run.
239    pub command: String,
240    /// Arguments.
241    pub args: Vec<String>,
242    /// Working directory; resolved against mounts then the session root.
243    pub cwd: Option<String>,
244    /// Extra environment variables.
245    pub env: BTreeMap<String, String>,
246    /// Data piped to the command's stdin.
247    pub stdin: Option<String>,
248    /// Per-exec timeout; falls back to [`ResourceLimits::wall_time`].
249    pub timeout: Option<Duration>,
250}
251
252/// The outcome of an [`ExecRequest`].
253#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
254pub struct ExecResult {
255    /// Captured stdout.
256    pub stdout: String,
257    /// Captured stderr.
258    pub stderr: String,
259    /// Process exit code.
260    pub exit_code: i32,
261    /// Whether the exec hit its timeout.
262    pub timed_out: bool,
263}
264
265impl ExecResult {
266    /// True when the command exited zero and did not time out.
267    pub fn success(&self) -> bool {
268        self.exit_code == 0 && !self.timed_out
269    }
270}
271
272/// A point-in-time snapshot handle for a session.
273#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
274pub struct SandboxSnapshot {
275    /// Session the snapshot belongs to.
276    pub session_id: SandboxSessionId,
277    /// Backend that produced it.
278    pub backend: String,
279    /// Backend-specific snapshot identifier.
280    pub snapshot_id: String,
281    /// Snapshot metadata.
282    pub metadata: BTreeMap<String, String>,
283}
284
285/// What a backend can do, so callers can degrade gracefully.
286#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
287pub struct SandboxCapabilities {
288    /// Enforces an OS-level process sandbox locally.
289    pub local_process_sandbox: bool,
290    /// Honours a per-host network allowlist.
291    pub network_policy: bool,
292    /// Supports snapshots.
293    pub snapshot: bool,
294    /// Supports resuming a suspended session.
295    pub resume: bool,
296    /// Suspends sessions after an idle timeout.
297    pub suspend_on_idle: bool,
298}
299
300/// Pluggable enforcement backend. Implementations make a [`SandboxSpec`]
301/// (the runtime lowering of a permission policy) real and run commands
302/// under it.
303#[async_trait]
304pub trait SandboxBackend: Send + Sync {
305    /// Stable backend name (used in [`SandboxSession::backend`]).
306    fn name(&self) -> &'static str;
307
308    /// What this backend can enforce.
309    fn capabilities(&self) -> SandboxCapabilities;
310
311    /// Provision a session from a spec.
312    async fn provision(&self, spec: SandboxSpec) -> SandboxResult<SandboxSession>;
313
314    /// Attach an additional mount to a live session.
315    async fn attach_filesystem(
316        &self,
317        session_id: &SandboxSessionId,
318        mount: FilesystemMount,
319    ) -> SandboxResult<SandboxSession>;
320
321    /// Apply (or update) the egress policy on a live session.
322    async fn apply_network_policy(
323        &self,
324        session_id: &SandboxSessionId,
325        policy: NetworkPolicy,
326    ) -> SandboxResult<SandboxSession>;
327
328    /// Run a command inside a session.
329    async fn exec(
330        &self,
331        session_id: &SandboxSessionId,
332        request: ExecRequest,
333    ) -> SandboxResult<ExecResult>;
334
335    /// Snapshot a session.
336    async fn snapshot(&self, session_id: &SandboxSessionId) -> SandboxResult<SandboxSnapshot>;
337
338    /// Resume a suspended session.
339    async fn resume(&self, session_id: &SandboxSessionId) -> SandboxResult<SandboxSession>;
340
341    /// Tear a session down.
342    async fn terminate(&self, session_id: &SandboxSessionId) -> SandboxResult<()>;
343}
344
345/// Normalise a guest mount target: trim trailing slashes and require an
346/// absolute path.
347pub(crate) fn normalized_mount_target(target: &str) -> SandboxResult<String> {
348    let trimmed = target.trim().trim_end_matches('/');
349    if !trimmed.starts_with('/') {
350        return Err(SandboxError::InvalidRequest(format!(
351            "mount target `{target}` must be absolute"
352        )));
353    }
354    if trimmed.split('/').any(|segment| segment == "..") {
355        return Err(SandboxError::InvalidRequest(format!(
356            "mount target `{target}` must not contain a `..` component"
357        )));
358    }
359    Ok(trimmed.to_string())
360}
361
362/// Quote a value as a Harn string literal.
363pub(crate) fn harn_string(value: &str) -> String {
364    let mut out = String::with_capacity(value.len() + 2);
365    out.push('"');
366    for ch in value.chars() {
367        match ch {
368            '\\' => out.push_str("\\\\"),
369            '"' => out.push_str("\\\""),
370            '\n' => out.push_str("\\n"),
371            '\r' => out.push_str("\\r"),
372            '\t' => out.push_str("\\t"),
373            other => out.push(other),
374        }
375    }
376    out.push('"');
377    out
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn network_policy_uses_anthropic_compatible_shape() {
386        let json = serde_json::to_value(NetworkPolicy::Limited {
387            allowed_hosts: vec!["api.github.com".to_string()],
388        })
389        .unwrap();
390
391        assert_eq!(
392            json,
393            serde_json::json!({
394                "mode": "limited",
395                "allowed_hosts": ["api.github.com"]
396            })
397        );
398    }
399
400    #[test]
401    fn normalized_mount_target_rejects_parent_traversal() {
402        let err = normalized_mount_target("/mnt/memory/../../etc/passwd").unwrap_err();
403        assert!(err
404            .to_string()
405            .contains("must not contain a `..` component"));
406    }
407}