use std::path::Path;
use std::process::Stdio;
use std::time::Duration;
use tokio::io::AsyncWriteExt;
use tokio::process::Command;
use crate::error::IsolationError;
use crate::podman;
const PODMAN: &str = "podman";
pub const DEFAULT_IMAGE: &str = "docker.io/library/alpine:3.20";
const DROPPED_CAPS: &[&str] = &[
"SYS_ADMIN", "SYS_PTRACE", "SYS_MODULE", "SYS_RAWIO", "SYS_BOOT", "SYS_TIME", "NET_ADMIN", "NET_RAW", "DAC_READ_SEARCH", "MKNOD", "AUDIT_WRITE", ];
const PIDS_LIMIT: &str = "512";
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum SandboxNetwork {
#[default]
Bridge,
None,
}
#[derive(Debug, Clone)]
pub struct SandboxPolicy {
pub allow_post_create: bool,
pub allow_untrusted_image: bool,
pub network: SandboxNetwork,
pub require_resource_limits: bool,
}
impl Default for SandboxPolicy {
fn default() -> Self {
Self {
allow_post_create: false,
allow_untrusted_image: false,
network: SandboxNetwork::Bridge,
require_resource_limits: false,
}
}
}
#[derive(Debug, Clone)]
pub struct ExecResult {
pub stdout: String,
pub stderr: String,
pub exit_code: i32,
}
impl ExecResult {
pub fn ok(&self) -> bool {
self.exit_code == 0
}
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct BgTask {
pub id: String,
pub command: String,
pub status: String,
pub log: String,
}
struct BgTaskHandle {
id: String,
command: String,
status: std::sync::Arc<std::sync::Mutex<String>>,
log: std::sync::Arc<std::sync::Mutex<String>>,
}
pub struct SessionSandbox {
container: String,
working_dir: std::path::PathBuf,
tasks: std::sync::Mutex<Vec<BgTaskHandle>>,
terminals: std::sync::Mutex<Vec<std::sync::Arc<crate::pty::PtyTerminal>>>,
}
impl SessionSandbox {
pub async fn start(
session_id: &str,
working_dir: &Path,
image: Option<&str>,
exposed_ports: &[u16],
post_create_commands: &[String],
policy: &SandboxPolicy,
) -> Result<Self, IsolationError> {
podman::ensure_ready().await?;
let container = format!("axo-ses-{session_id}");
let dir = working_dir.to_string_lossy().to_string();
let image = match image {
Some(img) if img != DEFAULT_IMAGE && !policy.allow_untrusted_image => {
tracing::warn!(
"session requested non-default image '{img}', but \
sandbox.allow_untrusted_images is off — using the trusted \
default ({DEFAULT_IMAGE}). Enable it to opt in."
);
DEFAULT_IMAGE
}
Some(img) => img,
None => DEFAULT_IMAGE,
};
let _ = Command::new(PODMAN)
.args(["rm", "-f", &container])
.output()
.await;
let mount = format!("{dir}:{dir}:rw");
let mut with_limits = true;
let mut publish: Vec<u16> = exposed_ports.to_vec();
loop {
match Self::run_container(
&container,
&mount,
&dir,
image,
with_limits,
&publish,
policy,
)
.await
{
Ok(()) => break,
Err(e) if e.contains("cgroup") && with_limits && policy.require_resource_limits => {
return Err(IsolationError::OciContainerFailed(format!(
"resource limits required but unavailable on this host \
(cgroup delegation missing): {e}. Set \
sandbox.require_resource_limits = false to allow an \
uncapped sandbox."
)));
}
Err(e) if e.contains("cgroup") && with_limits => {
tracing::warn!(
"this host cannot apply container resource limits \
(rootless podman / no cgroup delegation) — starting \
the sandbox without memory/CPU caps"
);
with_limits = false;
}
Err(e) if Self::is_port_conflict(&e) && !publish.is_empty() => {
match Self::extract_conflicting_port(&e) {
Some(bad) if publish.contains(&bad) => {
tracing::warn!(
"host port {bad} already in use — dropping it \
from this session's published ports (other \
ports stay mapped). Free the port and \
recreate the session to get it back."
);
publish.retain(|p| *p != bad);
}
_ => {
tracing::warn!(
"port conflict but couldn't identify which \
port ({e}) — dropping all port forwarding \
for this session"
);
publish.clear();
}
}
}
Err(e) => return Err(IsolationError::OciContainerFailed(e)),
}
let _ = Command::new(PODMAN)
.args(["rm", "-f", &container])
.output()
.await;
}
Self::install_dev_essentials(&container).await;
if !post_create_commands.is_empty() && !policy.allow_post_create {
tracing::warn!(
"skipping {} project setup script(s) (postCreateCommand) for \
session container ({container}): these come from the opened \
repository and are not run automatically. Set \
sandbox.allow_post_create_command = true to enable.",
post_create_commands.len()
);
}
for script in post_create_commands
.iter()
.filter(|_| policy.allow_post_create)
{
tracing::info!(
"running post-create script in session container ({container}): {script}"
);
let out = Command::new(PODMAN)
.args(["exec", &container, "sh", "-c", script])
.output()
.await;
match out {
Ok(o) if !o.status.success() => tracing::warn!(
"post-create script failed (exit {:?}): {}",
o.status.code(),
String::from_utf8_lossy(&o.stderr).trim()
),
Err(e) => tracing::warn!("post-create script could not run: {e}"),
_ => {}
}
}
Ok(Self {
container,
working_dir: working_dir.to_path_buf(),
tasks: std::sync::Mutex::new(Vec::new()),
terminals: std::sync::Mutex::new(Vec::new()),
})
}
pub fn root(&self) -> &Path {
&self.working_dir
}
pub fn container(&self) -> &str {
&self.container
}
pub fn attach(container: &str, working_dir: &Path) -> Self {
Self {
container: container.to_string(),
working_dir: working_dir.to_path_buf(),
tasks: std::sync::Mutex::new(Vec::new()),
terminals: std::sync::Mutex::new(Vec::new()),
}
}
async fn install_dev_essentials(container: &str) {
let packages = "bash vim nano less git curl wget \
python3 py3-pip nodejs npm coreutils";
tracing::info!("provisioning session container ({container}): installing dev essentials");
let script = format!("command -v apk >/dev/null 2>&1 && apk add --no-cache {packages} >/dev/null 2>&1 || true");
let _ = Command::new(PODMAN)
.args(["exec", container, "sh", "-c", &script])
.output()
.await;
}
fn is_port_conflict(stderr: &str) -> bool {
let lc = stderr.to_lowercase();
lc.contains("port is already allocated")
|| lc.contains("address already in use")
|| lc.contains("bind: address")
|| lc.contains("rootlessport")
|| lc.contains("proxy already running")
}
pub async fn reap_orphans(known_ids: &[String]) {
let out = match Command::new(PODMAN)
.args([
"ps",
"-a",
"--filter",
"name=axo-ses-",
"--format",
"{{.Names}}",
])
.output()
.await
{
Ok(o) if o.status.success() => o,
_ => return,
};
let names = String::from_utf8_lossy(&out.stdout);
for name in names.lines().map(str::trim).filter(|n| !n.is_empty()) {
let Some(sid) = name.strip_prefix("axo-ses-") else {
continue;
};
if known_ids.iter().any(|k| k == sid) {
continue;
}
tracing::info!(
container = name,
"reaping orphaned session sandbox container (no matching session)"
);
let _ = Command::new(PODMAN).args(["rm", "-f", name]).output().await;
}
}
fn extract_conflicting_port(stderr: &str) -> Option<u16> {
let bytes = stderr.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b':' {
let mut j = i + 1;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
if j > i + 1 && j - i - 1 <= 5 {
if let Ok(n) = std::str::from_utf8(&bytes[i + 1..j])
.unwrap()
.parse::<u16>()
{
if n >= 1024 {
return Some(n);
}
}
}
i = j;
} else {
i += 1;
}
}
None
}
fn build_run_args(
container: &str,
mount: &str,
dir: &str,
image: &str,
with_limits: bool,
ports: &[u16],
policy: &SandboxPolicy,
) -> Vec<String> {
let mut args: Vec<String> = vec![
"run".into(),
"-d".into(),
"--name".into(),
container.into(),
"-v".into(),
mount.into(),
"-w".into(),
dir.into(),
];
args.push("--security-opt=no-new-privileges".into());
for cap in DROPPED_CAPS {
args.push("--cap-drop".into());
args.push((*cap).into());
}
let ports: &[u16] = match policy.network {
SandboxNetwork::None => {
args.push("--network".into());
args.push("none".into());
&[]
}
SandboxNetwork::Bridge => ports,
};
if with_limits {
args.extend([
"--memory".into(),
"2g".into(),
"--cpus".into(),
"2".into(),
"--pids-limit".into(),
PIDS_LIMIT.into(),
]);
}
for p in ports {
args.push("-p".into());
args.push(format!("{p}:{p}"));
}
args.push(image.into());
args.push("sleep".into());
args.push("infinity".into());
args
}
async fn run_container(
container: &str,
mount: &str,
dir: &str,
image: &str,
with_limits: bool,
ports: &[u16],
policy: &SandboxPolicy,
) -> Result<(), String> {
let args = Self::build_run_args(container, mount, dir, image, with_limits, ports, policy);
let out = Command::new(PODMAN)
.args(&args)
.output()
.await
.map_err(|e| format!("spawning podman: {e}"))?;
if out.status.success() {
Ok(())
} else {
Err(format!(
"starting session container: {}",
String::from_utf8_lossy(&out.stderr).trim()
))
}
}
pub async fn exec(
&self,
argv: &[&str],
timeout: Duration,
) -> Result<ExecResult, IsolationError> {
let mut cmd = Command::new(PODMAN);
cmd.arg("exec").arg(&self.container).args(argv);
let out = tokio::time::timeout(timeout, cmd.output())
.await
.map_err(|_| IsolationError::Timeout(timeout))?
.map_err(|e| IsolationError::OciContainerFailed(e.to_string()))?;
Ok(ExecResult {
stdout: String::from_utf8_lossy(&out.stdout).to_string(),
stderr: String::from_utf8_lossy(&out.stderr).to_string(),
exit_code: out.status.code().unwrap_or(-1),
})
}
pub async fn exec_stdin(
&self,
argv: &[&str],
stdin: &str,
timeout: Duration,
) -> Result<ExecResult, IsolationError> {
let mut child = Command::new(PODMAN)
.arg("exec")
.arg("-i")
.arg(&self.container)
.args(argv)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| IsolationError::OciContainerFailed(e.to_string()))?;
if let Some(mut sink) = child.stdin.take() {
sink.write_all(stdin.as_bytes())
.await
.map_err(IsolationError::Io)?;
drop(sink);
}
let out = tokio::time::timeout(timeout, child.wait_with_output())
.await
.map_err(|_| IsolationError::Timeout(timeout))?
.map_err(|e| IsolationError::OciContainerFailed(e.to_string()))?;
Ok(ExecResult {
stdout: String::from_utf8_lossy(&out.stdout).to_string(),
stderr: String::from_utf8_lossy(&out.stderr).to_string(),
exit_code: out.status.code().unwrap_or(-1),
})
}
pub fn spawn_background(&self, command: &str) -> String {
let id = format!(
"task-{:x}",
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_millis())
.unwrap_or(0)
);
let status = std::sync::Arc::new(std::sync::Mutex::new("running".to_string()));
let log = std::sync::Arc::new(std::sync::Mutex::new(String::new()));
if let Ok(mut tasks) = self.tasks.lock() {
tasks.push(BgTaskHandle {
id: id.clone(),
command: command.to_string(),
status: status.clone(),
log: log.clone(),
});
}
let container = self.container.clone();
let script = format!("{command} 2>&1");
tokio::spawn(async move {
use tokio::io::AsyncReadExt;
let mut child = match Command::new(PODMAN)
.args(["exec", &container, "sh", "-c", &script])
.stdout(Stdio::piped())
.stderr(Stdio::null())
.spawn()
{
Ok(c) => c,
Err(e) => {
if let Ok(mut s) = status.lock() {
*s = format!("failed: {e}");
}
return;
}
};
if let Some(mut out) = child.stdout.take() {
let mut buf = [0u8; 4096];
loop {
match out.read(&mut buf).await {
Ok(0) | Err(_) => break,
Ok(n) => {
if let Ok(mut l) = log.lock() {
l.push_str(&String::from_utf8_lossy(&buf[..n]));
if l.len() > 64 * 1024 {
let cut = l.len() - 64 * 1024;
l.drain(..cut);
}
}
}
}
}
}
let st = child.wait().await;
if let Ok(mut s) = status.lock() {
*s = match st {
Ok(code) => format!("exited ({})", code.code().unwrap_or(-1)),
Err(e) => format!("error: {e}"),
};
}
});
id
}
pub fn spawn_pty(
&self,
command: &str,
rows: u16,
cols: u16,
) -> Result<std::sync::Arc<crate::pty::PtyTerminal>, String> {
let id = format!(
"term-{:x}",
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_millis())
.unwrap_or(0)
);
let term = crate::pty::PtyTerminal::spawn(id, &self.container, command, rows, cols)?;
let arc = std::sync::Arc::new(term);
if let Ok(mut t) = self.terminals.lock() {
t.push(arc.clone());
}
Ok(arc)
}
pub fn get_terminal(&self, id: &str) -> Option<std::sync::Arc<crate::pty::PtyTerminal>> {
self.terminals
.lock()
.ok()?
.iter()
.find(|t| t.id == id)
.cloned()
}
pub fn kill_terminal(&self, id: &str) -> bool {
let Ok(mut ts) = self.terminals.lock() else {
return false;
};
let before = ts.len();
ts.retain(|t| t.id != id);
ts.len() < before
}
pub fn list_terminals(&self) -> Vec<(String, String, bool)> {
self.terminals
.lock()
.map(|ts| {
ts.iter()
.map(|t| (t.id.clone(), t.command.clone(), t.is_alive()))
.collect()
})
.unwrap_or_default()
}
pub fn list_tasks(&self) -> Vec<BgTask> {
self.tasks
.lock()
.map(|tasks| {
tasks
.iter()
.map(|h| BgTask {
id: h.id.clone(),
command: h.command.clone(),
status: h.status.lock().map(|s| s.clone()).unwrap_or_default(),
log: h.log.lock().map(|l| l.clone()).unwrap_or_default(),
})
.collect()
})
.unwrap_or_default()
}
pub async fn stop(&self) {
let _ = Command::new(PODMAN)
.args(["rm", "-f", &self.container])
.output()
.await;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn port_conflict_detection_covers_proxy_already_running() {
assert!(SessionSandbox::is_port_conflict(
"Error: something went wrong with the request: \"proxy already running\\n\""
));
assert!(SessionSandbox::is_port_conflict(
"rootlessport listen tcp 0.0.0.0:3000: bind: address already in use"
));
assert!(SessionSandbox::is_port_conflict(
"port is already allocated"
));
assert!(!SessionSandbox::is_port_conflict("no such image"));
assert_eq!(
SessionSandbox::extract_conflicting_port("proxy already running"),
None
);
}
#[test]
fn exec_result_ok() {
let r = ExecResult {
stdout: String::new(),
stderr: String::new(),
exit_code: 0,
};
assert!(r.ok());
let r = ExecResult { exit_code: 1, ..r };
assert!(!r.ok());
}
#[test]
fn default_policy_is_secure() {
let p = SandboxPolicy::default();
assert!(!p.allow_post_create, "post-create must be off by default");
assert!(
!p.allow_untrusted_image,
"untrusted images must be off by default"
);
assert_eq!(p.network, SandboxNetwork::Bridge);
assert!(!p.require_resource_limits);
}
#[test]
fn run_args_always_apply_hardening() {
let args = SessionSandbox::build_run_args(
"axo-ses-x",
"/w:/w:rw",
"/w",
DEFAULT_IMAGE,
true,
&[3000],
&SandboxPolicy::default(),
);
assert!(args.iter().any(|a| a == "--security-opt=no-new-privileges"));
for cap in DROPPED_CAPS {
assert!(
args.windows(2)
.any(|w| w[0] == "--cap-drop" && w[1] == *cap),
"missing --cap-drop {cap}"
);
}
assert!(args.windows(2).any(|w| w[0] == "--pids-limit"));
assert!(args.iter().any(|a| a == "--memory"));
assert!(args.windows(2).any(|w| w[0] == "-p" && w[1] == "3000:3000"));
assert!(!args
.windows(2)
.any(|w| w[0] == "--network" && w[1] == "none"));
}
#[test]
fn run_args_network_none_cuts_off_publishing() {
let policy = SandboxPolicy {
network: SandboxNetwork::None,
..SandboxPolicy::default()
};
let args = SessionSandbox::build_run_args(
"axo-ses-x",
"/w:/w:rw",
"/w",
DEFAULT_IMAGE,
false,
&[3000, 5173],
&policy,
);
assert!(args
.windows(2)
.any(|w| w[0] == "--network" && w[1] == "none"));
assert!(!args.iter().any(|a| a == "-p"));
assert!(!args.iter().any(|a| a == "--pids-limit"));
}
#[tokio::test]
#[ignore = "requires podman; run with: cargo test -p axocoatl-isolation -- --ignored"]
async fn sandbox_runs_commands_and_jails_the_directory() {
let dir = tempfile::tempdir().unwrap();
let sb = SessionSandbox::start(
"test",
dir.path(),
None,
&[],
&[],
&SandboxPolicy::default(),
)
.await
.expect("sandbox should start");
let r = sb
.exec(&["echo", "hello-sandbox"], Duration::from_secs(20))
.await
.unwrap();
assert!(r.ok());
assert!(r.stdout.contains("hello-sandbox"));
sb.exec_stdin(
&["sh", "-c", "cat > \"$1\"", "sh", "probe.txt"],
"from-inside",
Duration::from_secs(20),
)
.await
.unwrap();
let host = std::fs::read_to_string(dir.path().join("probe.txt")).unwrap();
assert_eq!(host, "from-inside");
sb.stop().await;
}
}