use std::collections::HashMap;
use std::io::Write;
use std::process::{Command, Stdio};
use std::time::Duration;
use rusqlite::{params, Connection};
use crate::error::{Result, SqzError};
const CREDENTIAL_ENV_PREFIXES: &[&str] = &[
"AWS_",
"GCLOUD_",
"GOOGLE_",
"CLOUDSDK_",
"GH_",
"GITHUB_",
"KUBECONFIG",
"DOCKER_",
"HOME",
"PATH",
"USER",
"LANG",
"TERM",
"SHELL",
"TMPDIR",
"XDG_",
];
#[derive(Debug, Clone)]
pub struct RuntimeInfo {
pub name: &'static str,
pub binary: String,
pub language: &'static str,
}
#[derive(Debug, Clone)]
pub struct SandboxResult {
pub stdout: String,
pub status_code: i32,
pub was_truncated: bool,
pub was_indexed: bool,
}
const DEFAULT_FILTER_THRESHOLD: usize = 5 * 1024;
#[derive(Debug, Clone)]
pub struct FilteredOutput {
pub matched_sections: Vec<String>,
pub vocabulary: Vec<String>,
pub total_chunks: usize,
pub matched_chunks: usize,
}
pub struct SandboxExecutor {
timeout: Duration,
max_output_bytes: usize,
filter_threshold: usize,
runtimes: HashMap<String, RuntimeInfo>,
}
pub(crate) struct OutputFilter;
impl OutputFilter {
pub fn filter(text: &str, intent: &str) -> Result<FilteredOutput> {
let chunks = Self::chunk_output(text);
let total_chunks = chunks.len();
let conn = Connection::open_in_memory()
.map_err(|e| SqzError::Other(format!("FTS5 in-memory open failed: {e}")))?;
conn.execute_batch(
r#"
CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_fts USING fts5(
chunk_id,
body,
tokenize='porter ascii'
);
"#,
)
.map_err(|e| SqzError::Other(format!("FTS5 schema creation failed: {e}")))?;
for (i, chunk) in chunks.iter().enumerate() {
conn.execute(
"INSERT INTO sandbox_fts(chunk_id, body) VALUES (?1, ?2)",
params![i.to_string(), chunk],
)
.map_err(|e| SqzError::Other(format!("FTS5 insert failed: {e}")))?;
}
let matched_sections = Self::bm25_search(&conn, intent, &chunks)?;
let matched_chunks = matched_sections.len();
let vocabulary = Self::extract_vocabulary(&conn)?;
Ok(FilteredOutput {
matched_sections,
vocabulary,
total_chunks,
matched_chunks,
})
}
fn chunk_output(text: &str) -> Vec<String> {
const MAX_CHUNK_BYTES: usize = 512;
let paragraphs: Vec<&str> = text.split("\n\n").collect();
let mut chunks = Vec::new();
for para in paragraphs {
let trimmed = para.trim();
if trimmed.is_empty() {
continue;
}
if trimmed.len() <= MAX_CHUNK_BYTES {
chunks.push(trimmed.to_string());
} else {
let mut current = String::new();
for line in trimmed.lines() {
if !current.is_empty() && current.len() + line.len() + 1 > MAX_CHUNK_BYTES {
chunks.push(std::mem::take(&mut current));
}
if !current.is_empty() {
current.push('\n');
}
current.push_str(line);
}
if !current.is_empty() {
chunks.push(current);
}
}
}
if chunks.is_empty() && !text.trim().is_empty() {
chunks.push(text.trim().to_string());
}
chunks
}
fn bm25_search(conn: &Connection, intent: &str, _chunks: &[String]) -> Result<Vec<String>> {
let sanitized: String = intent
.chars()
.map(|c| if c.is_alphanumeric() || c.is_whitespace() { c } else { ' ' })
.collect();
let terms: Vec<&str> = sanitized.split_whitespace().collect();
if terms.is_empty() {
return Ok(Vec::new());
}
let fts_query = terms.join(" OR ");
let mut stmt = conn
.prepare(
r#"SELECT body FROM sandbox_fts
WHERE sandbox_fts MATCH ?1
ORDER BY rank
LIMIT 20"#,
)
.map_err(|e| SqzError::Other(format!("FTS5 query prepare failed: {e}")))?;
let rows = stmt
.query_map(params![fts_query], |row| row.get::<_, String>(0))
.map_err(|e| SqzError::Other(format!("FTS5 query failed: {e}")))?;
let mut results = Vec::new();
for row in rows {
results.push(
row.map_err(|e| SqzError::Other(format!("FTS5 row read failed: {e}")))?,
);
}
Ok(results)
}
fn extract_vocabulary(conn: &Connection) -> Result<Vec<String>> {
conn.execute_batch(
"CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_vocab USING fts5vocab(sandbox_fts, col);",
)
.map_err(|e| SqzError::Other(format!("FTS5 vocab table creation failed: {e}")))?;
let mut stmt = conn
.prepare(
r#"SELECT term FROM sandbox_vocab
WHERE col = 'body'
ORDER BY doc DESC
LIMIT 100"#,
)
.map_err(|e| SqzError::Other(format!("vocab query prepare failed: {e}")))?;
let rows = stmt
.query_map([], |row| row.get::<_, String>(0))
.map_err(|e| SqzError::Other(format!("vocab query failed: {e}")))?;
let mut vocab = Vec::new();
for row in rows {
vocab.push(
row.map_err(|e| SqzError::Other(format!("vocab row read failed: {e}")))?,
);
}
Ok(vocab)
}
}
impl SandboxExecutor {
pub const DEFAULT_TIMEOUT_SECS: u64 = 30;
pub const DEFAULT_MAX_OUTPUT_BYTES: usize = 1_048_576;
pub fn new() -> Self {
Self::with_config(
Duration::from_secs(Self::DEFAULT_TIMEOUT_SECS),
Self::DEFAULT_MAX_OUTPUT_BYTES,
)
}
pub fn with_config(timeout: Duration, max_output_bytes: usize) -> Self {
let runtimes = detect_runtimes();
Self {
timeout,
max_output_bytes,
filter_threshold: DEFAULT_FILTER_THRESHOLD,
runtimes,
}
}
pub fn execute(&self, code: &str, language: &str) -> Result<SandboxResult> {
let lang = language.to_lowercase();
let runtime = self
.runtimes
.get(&lang)
.ok_or_else(|| SqzError::Other(format!("unsupported or unavailable runtime: {lang}")))?;
let env = build_credential_env();
let result = match lang.as_str() {
"go" => self.execute_go(code, runtime, &env),
"rust" => self.execute_rust(code, runtime, &env),
_ => self.execute_interpreted(code, runtime, &env),
}?;
Ok(result)
}
pub fn execute_with_intent(
&self,
code: &str,
language: &str,
intent: Option<&str>,
) -> Result<(SandboxResult, Option<FilteredOutput>)> {
let mut result = self.execute(code, language)?;
let should_filter = result.stdout.len() > self.filter_threshold
&& intent.map_or(false, |i| !i.trim().is_empty());
if should_filter {
let intent_str = intent.unwrap(); let filtered = OutputFilter::filter(&result.stdout, intent_str)?;
result.was_indexed = true;
result.stdout = filtered.matched_sections.join("\n\n");
Ok((result, Some(filtered)))
} else {
Ok((result, None))
}
}
pub fn available_languages(&self) -> Vec<&str> {
self.runtimes.values().map(|r| r.language).collect()
}
pub fn supported_languages(&self) -> &[&str] {
&["js", "ts", "python", "shell", "ruby", "go", "rust"]
}
pub fn is_available(&self, language: &str) -> bool {
self.runtimes.contains_key(&language.to_lowercase())
}
pub fn timeout(&self) -> Duration {
self.timeout
}
pub fn max_output_bytes(&self) -> usize {
self.max_output_bytes
}
fn execute_interpreted(
&self,
code: &str,
runtime: &RuntimeInfo,
env: &HashMap<String, String>,
) -> Result<SandboxResult> {
let ext = match runtime.language {
"js" => "js",
"ts" => "ts",
"python" => "py",
"shell" => "sh",
"ruby" => "rb",
_ => "tmp",
};
let tmp_dir = tempfile::tempdir().map_err(|e| SqzError::Io(e))?;
let script_path = tmp_dir.path().join(format!("sandbox_script.{ext}"));
{
let mut f = std::fs::File::create(&script_path)?;
f.write_all(code.as_bytes())?;
}
let mut cmd = Command::new(&runtime.binary);
if runtime.language == "ts" && runtime.name == "npx" {
cmd.arg("tsx");
}
cmd.arg(&script_path)
.stdout(Stdio::piped())
.stderr(Stdio::null()) .envs(env);
self.run_with_timeout(cmd, &format!("runtime={}", runtime.language))
}
fn execute_go(
&self,
code: &str,
runtime: &RuntimeInfo,
env: &HashMap<String, String>,
) -> Result<SandboxResult> {
let tmp_dir = tempfile::tempdir()?;
let script_path = tmp_dir.path().join("main.go");
{
let mut f = std::fs::File::create(&script_path)?;
f.write_all(code.as_bytes())?;
}
let mut cmd = Command::new(&runtime.binary);
cmd.arg("run")
.arg(&script_path)
.stdout(Stdio::piped())
.stderr(Stdio::null())
.envs(env);
self.run_with_timeout(cmd, "runtime=go")
}
fn execute_rust(
&self,
code: &str,
runtime: &RuntimeInfo,
env: &HashMap<String, String>,
) -> Result<SandboxResult> {
let tmp_dir = tempfile::tempdir()?;
let src_path = tmp_dir.path().join("sandbox.rs");
let bin_path = tmp_dir.path().join("sandbox_bin");
{
let mut f = std::fs::File::create(&src_path)?;
f.write_all(code.as_bytes())?;
}
let compile = Command::new(&runtime.binary)
.arg(&src_path)
.arg("-o")
.arg(&bin_path)
.stdout(Stdio::null())
.stderr(Stdio::null())
.envs(env)
.status();
match compile {
Ok(status) if status.success() => {}
Ok(status) => {
return Ok(SandboxResult {
stdout: String::new(),
status_code: status.code().unwrap_or(1),
was_truncated: false,
was_indexed: false,
});
}
Err(e) => return Err(SqzError::Io(e)),
}
let mut cmd = Command::new(&bin_path);
cmd.stdout(Stdio::piped())
.stderr(Stdio::null())
.envs(env);
self.run_with_timeout(cmd, "runtime=rust")
}
fn run_with_timeout(&self, mut cmd: Command, context: &str) -> Result<SandboxResult> {
let mut child = cmd.spawn().map_err(SqzError::Io)?;
let status = match wait_with_timeout(&mut child, self.timeout) {
Ok(status) => status,
Err(_) => {
let _ = child.kill();
let _ = child.wait();
return Err(SqzError::Other(format!(
"sandbox execution timed out after {}s ({})",
self.timeout.as_secs(),
context
)));
}
};
let stdout_raw = if let Some(mut stdout) = child.stdout.take() {
use std::io::Read;
let mut buf = Vec::new();
let _ = stdout.read_to_end(&mut buf);
buf
} else {
Vec::new()
};
let truncated = stdout_raw.len() > self.max_output_bytes;
let stdout_bytes = if truncated {
&stdout_raw[..self.max_output_bytes]
} else {
&stdout_raw[..]
};
let stdout = String::from_utf8_lossy(stdout_bytes).into_owned();
Ok(SandboxResult {
stdout,
status_code: status.code().unwrap_or(-1),
was_truncated: truncated,
was_indexed: false,
})
}
}
fn wait_with_timeout(
child: &mut std::process::Child,
timeout: Duration,
) -> std::result::Result<std::process::ExitStatus, ()> {
let start = std::time::Instant::now();
let poll_interval = Duration::from_millis(50);
loop {
match child.try_wait() {
Ok(Some(status)) => return Ok(status),
Ok(None) => {
if start.elapsed() >= timeout {
return Err(());
}
std::thread::sleep(poll_interval);
}
Err(_) => return Err(()),
}
}
}
fn build_credential_env() -> HashMap<String, String> {
let mut env = HashMap::new();
for (key, value) in std::env::vars() {
if CREDENTIAL_ENV_PREFIXES
.iter()
.any(|prefix| key.starts_with(prefix))
{
env.insert(key, value);
}
}
env
}
fn detect_runtimes() -> HashMap<String, RuntimeInfo> {
let mut runtimes = HashMap::new();
let candidates: &[(&str, &[&str], &str)] = &[
("js", &["node", "bun"], "js"),
("ts", &["bun", "npx"], "ts"),
("python", &["python3", "python"], "python"),
("shell", &["bash", "sh"], "shell"),
("ruby", &["ruby"], "ruby"),
("go", &["go"], "go"),
("rust", &["rustc"], "rust"),
];
for &(lang_key, binaries, lang_label) in candidates {
for &bin in binaries {
if is_binary_available(bin) {
let effective_binary = if lang_key == "ts" && bin == "npx" {
"npx".to_string()
} else {
bin.to_string()
};
runtimes.insert(
lang_key.to_string(),
RuntimeInfo {
name: bin,
binary: effective_binary,
language: lang_label,
},
);
break; }
}
}
runtimes
}
fn is_binary_available(name: &str) -> bool {
let probe = if cfg!(windows) { "where" } else { "which" };
Command::new(probe)
.arg(name)
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_detects_runtimes() {
let executor = SandboxExecutor::new();
assert!(
!executor.runtimes.is_empty(),
"should detect at least one runtime"
);
}
#[test]
fn test_supported_languages_list() {
let executor = SandboxExecutor::new();
let supported = executor.supported_languages();
assert!(supported.len() >= 6, "should list at least 6 supported languages");
assert!(supported.contains(&"js"));
assert!(supported.contains(&"python"));
assert!(supported.contains(&"shell"));
assert!(supported.contains(&"ruby"));
assert!(supported.contains(&"go"));
assert!(supported.contains(&"rust"));
}
#[test]
fn test_default_config() {
let executor = SandboxExecutor::new();
assert_eq!(executor.timeout(), Duration::from_secs(30));
assert_eq!(executor.max_output_bytes(), 1_048_576);
}
#[test]
fn test_custom_config() {
let executor = SandboxExecutor::with_config(Duration::from_secs(10), 4096);
assert_eq!(executor.timeout(), Duration::from_secs(10));
assert_eq!(executor.max_output_bytes(), 4096);
}
#[test]
#[cfg(not(windows))]
fn test_execute_shell_echo() {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return; }
let result = executor.execute("echo hello sandbox", "shell").unwrap();
assert_eq!(result.status_code, 0);
assert_eq!(result.stdout.trim(), "hello sandbox");
assert!(!result.was_truncated);
}
#[test]
#[cfg(not(windows))]
fn test_execute_shell_captures_only_stdout() {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return;
}
let code = r#"echo "visible"
echo "hidden" >&2
echo "also visible""#;
let result = executor.execute(code, "shell").unwrap();
assert!(result.stdout.contains("visible"));
assert!(result.stdout.contains("also visible"));
assert!(!result.stdout.contains("hidden"));
}
#[test]
fn test_execute_python() {
let executor = SandboxExecutor::new();
if !executor.is_available("python") {
return;
}
let result = executor.execute("print('hello from python')", "python").unwrap();
assert_eq!(result.status_code, 0);
assert_eq!(result.stdout.trim(), "hello from python");
}
#[test]
#[cfg(not(windows))]
fn test_execute_nonzero_exit() {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return;
}
let result = executor.execute("exit 42", "shell").unwrap();
assert_eq!(result.status_code, 42);
}
#[test]
#[cfg(not(windows))]
fn test_execute_timeout() {
let executor = SandboxExecutor::with_config(Duration::from_secs(1), 1024);
if !executor.is_available("shell") {
return;
}
let result = executor.execute("sleep 30", "shell");
assert!(result.is_err());
let err_msg = format!("{}", result.unwrap_err());
assert!(err_msg.contains("timed out"));
}
#[test]
fn test_execute_output_truncation() {
let executor = SandboxExecutor::with_config(Duration::from_secs(10), 32);
if !executor.is_available("shell") {
return;
}
let result = executor
.execute("for i in $(seq 1 100); do echo \"line $i\"; done", "shell")
.unwrap();
assert!(result.was_truncated);
assert!(result.stdout.len() <= 32);
}
#[test]
fn test_unsupported_runtime() {
let executor = SandboxExecutor::new();
let result = executor.execute("code", "brainfuck");
assert!(result.is_err());
let err_msg = format!("{}", result.unwrap_err());
assert!(err_msg.contains("unsupported or unavailable runtime"));
}
#[test]
fn test_case_insensitive_language() {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return;
}
let result = executor.execute("echo ok", "Shell");
assert!(result.is_ok());
}
#[test]
#[cfg(not(windows))]
fn test_credential_env_includes_path() {
let env = build_credential_env();
assert!(env.contains_key("PATH"), "PATH should be inherited");
}
#[test]
fn test_credential_env_includes_aws() {
std::env::set_var("AWS_TEST_SANDBOX", "test_value");
let env = build_credential_env();
assert_eq!(env.get("AWS_TEST_SANDBOX").map(|s| s.as_str()), Some("test_value"));
std::env::remove_var("AWS_TEST_SANDBOX");
}
#[test]
fn test_is_binary_available() {
assert!(is_binary_available("sh"));
assert!(!is_binary_available("definitely_not_a_real_binary_xyz"));
}
#[test]
fn test_chunk_output_splits_on_double_newline() {
let text = "first paragraph\n\nsecond paragraph\n\nthird paragraph";
let chunks = OutputFilter::chunk_output(text);
assert_eq!(chunks.len(), 3);
assert_eq!(chunks[0], "first paragraph");
assert_eq!(chunks[1], "second paragraph");
assert_eq!(chunks[2], "third paragraph");
}
#[test]
fn test_chunk_output_splits_large_paragraphs() {
let line = "a]".repeat(30); let big_para = (0..20).map(|i| format!("{line} line{i}")).collect::<Vec<_>>().join("\n");
assert!(big_para.len() > 512);
let chunks = OutputFilter::chunk_output(&big_para);
assert!(chunks.len() > 1, "large paragraph should be sub-split");
for chunk in &chunks {
assert!(chunk.len() <= 600, "each sub-chunk should be roughly ≤512 bytes");
}
}
#[test]
fn test_chunk_output_empty_input() {
let chunks = OutputFilter::chunk_output("");
assert!(chunks.is_empty());
}
#[test]
fn test_filter_returns_matching_sections() {
let text = "error: compilation failed at line 42\n\n\
warning: unused variable `x`\n\n\
info: build started at 10:00\n\n\
error: type mismatch in function foo\n\n\
success: 3 tests passed";
let result = OutputFilter::filter(text, "error compilation").unwrap();
assert!(!result.matched_sections.is_empty(), "should find error-related chunks");
assert!(
result.matched_sections.iter().any(|s| s.contains("error")),
"matched sections should contain the intent keyword"
);
assert!(result.total_chunks >= 4);
}
#[test]
fn test_filter_returns_vocabulary() {
let text = "the quick brown fox jumps over the lazy dog\n\n\
rust programming language is fast and safe\n\n\
memory safety without garbage collection";
let result = OutputFilter::filter(text, "rust").unwrap();
assert!(!result.vocabulary.is_empty(), "vocabulary should not be empty");
let vocab_joined = result.vocabulary.join(" ");
assert!(
vocab_joined.contains("rust") || vocab_joined.contains("fast") || vocab_joined.contains("safe"),
"vocabulary should contain terms from the indexed content"
);
}
#[test]
fn test_filter_no_match_returns_empty() {
let text = "hello world\n\nfoo bar baz";
let result = OutputFilter::filter(text, "zzzznonexistent").unwrap();
assert!(result.matched_sections.is_empty());
assert_eq!(result.matched_chunks, 0);
}
#[test]
fn test_filter_special_chars_in_intent() {
let text = "error: something went wrong\n\nwarning: check this";
let result = OutputFilter::filter(text, "error: (something) [wrong]");
assert!(result.is_ok(), "special chars in intent should be sanitized");
}
#[test]
#[cfg(not(windows))]
fn test_execute_with_intent_small_output_no_filter() {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return;
}
let (result, filtered) = executor
.execute_with_intent("echo hello", "shell", Some("hello"))
.unwrap();
assert_eq!(result.status_code, 0);
assert!(!result.was_indexed);
assert!(filtered.is_none());
}
#[test]
fn test_execute_with_intent_no_intent_no_filter() {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return;
}
let code = "for i in $(seq 1 1000); do echo \"line $i: some padding text to make it bigger\"; done";
let (result, filtered) = executor
.execute_with_intent(code, "shell", None)
.unwrap();
assert!(!result.was_indexed);
assert!(filtered.is_none());
}
#[test]
#[cfg(not(windows))]
fn test_execute_with_intent_large_output_filters() {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return;
}
let code = r#"
for i in $(seq 1 50); do echo "error: compilation failed at module $i"; done
echo ""
for i in $(seq 1 50); do echo "info: processing file $i of 200"; done
echo ""
for i in $(seq 1 50); do echo "warning: deprecated API usage in handler $i"; done
echo ""
for i in $(seq 1 50); do echo "success: test suite $i passed with 100% coverage"; done
"#;
let (result, filtered) = executor
.execute_with_intent(code, "shell", Some("error compilation"))
.unwrap();
assert!(result.was_indexed, "large output with intent should be indexed");
let filtered = filtered.expect("should have filtered output");
assert!(!filtered.matched_sections.is_empty(), "should have matched sections");
assert!(!filtered.vocabulary.is_empty(), "should have vocabulary");
assert!(filtered.total_chunks > 0);
}
#[cfg(not(windows))]
mod proptests {
use super::*;
use proptest::prelude::*;
fn safe_label() -> impl Strategy<Value = String> {
"[a-zA-Z0-9]{1,20}"
}
proptest! {
#[test]
fn prop_only_stdout_captured(
label in safe_label(),
) {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return Ok(());
}
let stdout_msg = format!("OUT_{label}");
let stderr_msg = format!("ERR_{label}");
let code = format!(
"echo \"{stdout_msg}\"\necho \"{stderr_msg}\" >&2"
);
let result = executor.execute(&code, "shell").unwrap();
prop_assert!(
result.stdout.contains(&stdout_msg),
"stdout should contain the stdout message '{}', got: '{}'",
stdout_msg, result.stdout
);
prop_assert!(
!result.stdout.contains(&stderr_msg),
"stdout should NOT contain the stderr message '{}', got: '{}'",
stderr_msg, result.stdout
);
}
}
proptest! {
#[test]
fn prop_no_shared_state_between_executions(
var_name in "[A-Z]{3,8}",
var_value in "[a-z0-9]{1,10}",
) {
let executor = SandboxExecutor::new();
if !executor.is_available("shell") {
return Ok(());
}
let unique_var = format!("SQZ_PROP_{var_name}");
let code1 = format!(
"export {unique_var}={var_value}\necho \"set {unique_var}\""
);
let result1 = executor.execute(&code1, "shell").unwrap();
prop_assert!(
result1.stdout.contains(&format!("set {unique_var}")),
"first execution should succeed"
);
let code2 = format!(
"echo \"val=${{{unique_var}:-UNSET}}\""
);
let result2 = executor.execute(&code2, "shell").unwrap();
prop_assert!(
result2.stdout.contains("val=UNSET"),
"env var from first execution should not leak into second; got: '{}'",
result2.stdout
);
}
}
}
}