use std::process::Command;
use std::sync::Arc;
use std::time::Duration;
use crate::evaluator::Evaluator;
use crate::evaluators::EvaluatorError;
use crate::evaluators::code::extractor::CodeExtractor;
use crate::score::Score;
use crate::types::{EvalCase, EvalMetricResult, Invocation};
#[cfg(target_family = "unix")]
pub(crate) mod posix;
#[derive(Debug, Clone)]
pub struct SandboxLimits {
pub wall_clock: Duration,
pub cpu: Duration,
pub memory_bytes: u64,
pub max_open_files: u64,
pub allow_network: bool,
}
impl Default for SandboxLimits {
fn default() -> Self {
Self {
wall_clock: Duration::from_secs(120),
cpu: Duration::from_secs(60),
memory_bytes: 1024 * 1024 * 1024,
max_open_files: 256,
allow_network: false,
}
}
}
#[derive(Debug, Clone)]
pub struct SandboxOutcome {
pub success: bool,
pub exit_code: Option<i32>,
pub signal: Option<i32>,
pub stderr: String,
pub limit_exceeded: Option<String>,
}
impl SandboxOutcome {
#[must_use]
pub fn summary(&self) -> String {
match &self.limit_exceeded {
Some(limit) => format!("sandbox limit exceeded: {limit}"),
None if self.success => "ok".to_string(),
None => {
let detail = self
.stderr
.lines()
.filter(|line| !line.trim().is_empty())
.take(8)
.collect::<Vec<_>>()
.join("\n");
if detail.is_empty() {
match (self.exit_code, self.signal) {
(Some(code), _) => format!("exit status {code}"),
(_, Some(sig)) => format!("signal {sig}"),
_ => "non-zero exit".to_string(),
}
} else {
detail
}
}
}
}
}
pub fn run_sandboxed(
command: Command,
limits: &SandboxLimits,
) -> Result<SandboxOutcome, EvaluatorError> {
#[cfg(target_family = "unix")]
{
posix::run_sandboxed_unix(command, limits)
}
#[cfg(target_family = "windows")]
{
let _ = (command, limits);
Err(EvaluatorError::UnsupportedPlatform {
reason: "SandboxedExecutionEvaluator is Unix-only (Linux/macOS); \
FR-017 defines Windows as unsupported for this evaluator"
.to_string(),
})
}
}
pub trait SandboxRunner: Send + Sync {
fn command(&self, code: &str, workdir: &std::path::Path) -> std::io::Result<Command>;
}
#[derive(Debug, Default, Clone)]
pub struct ShellRunner;
impl SandboxRunner for ShellRunner {
fn command(&self, code: &str, workdir: &std::path::Path) -> std::io::Result<Command> {
let script = workdir.join("snippet.sh");
std::fs::write(&script, code)?;
let mut cmd = Command::new("/bin/sh");
cmd.arg(script);
cmd.current_dir(workdir);
Ok(cmd)
}
}
pub struct SandboxedExecutionEvaluator {
name: &'static str,
extractor: Arc<CodeExtractor>,
limits: SandboxLimits,
runner: Arc<dyn SandboxRunner>,
}
impl SandboxedExecutionEvaluator {
#[must_use]
pub fn new(extractor: Arc<CodeExtractor>) -> Self {
Self {
name: "sandboxed_execution",
extractor,
limits: SandboxLimits::default(),
runner: Arc::new(ShellRunner),
}
}
#[must_use]
pub const fn with_name(mut self, name: &'static str) -> Self {
self.name = name;
self
}
#[must_use]
pub fn with_limits(mut self, limits: SandboxLimits) -> Self {
self.limits = limits;
self
}
#[must_use]
pub fn with_runner(mut self, runner: Arc<dyn SandboxRunner>) -> Self {
self.runner = runner;
self
}
}
impl Evaluator for SandboxedExecutionEvaluator {
fn name(&self) -> &'static str {
self.name
}
fn evaluate(&self, _case: &EvalCase, invocation: &Invocation) -> Option<EvalMetricResult> {
let response = invocation.final_response.as_ref()?;
let code = crate::evaluators::block_on(self.extractor.extract(response))?;
let tempdir = match tempfile::TempDir::new() {
Ok(dir) => dir,
Err(err) => {
return Some(EvalMetricResult {
evaluator_name: self.name.to_string(),
score: Score::fail(),
details: Some(format!("tempdir creation failed: {err}")),
});
}
};
let command = match self.runner.command(&code, tempdir.path()) {
Ok(cmd) => cmd,
Err(err) => {
return Some(EvalMetricResult {
evaluator_name: self.name.to_string(),
score: Score::fail(),
details: Some(format!("runner failed: {err}")),
});
}
};
let (score, details) = match run_sandboxed(command, &self.limits) {
Ok(outcome) => {
let score = if outcome.success {
Score::pass()
} else {
Score::fail()
};
(score, outcome.summary())
}
Err(EvaluatorError::UnsupportedPlatform { reason }) => {
(Score::fail(), format!("unsupported platform: {reason}"))
}
Err(EvaluatorError::SandboxLimitExceeded { limit }) => {
(Score::fail(), format!("sandbox limit exceeded: {limit}"))
}
Err(err) => (Score::fail(), err.into_metric_details()),
};
Some(EvalMetricResult {
evaluator_name: self.name.to_string(),
score,
details: Some(details),
})
}
}