sqz_engine/
sandbox_executor.rs

1use std::collections::HashMap;
2use std::io::Write;
3use std::process::{Command, Stdio};
4use std::time::Duration;
5
6use rusqlite::{params, Connection};
7
8use crate::error::{Result, SqzError};
9
10/// Environment variable names for credential passthrough.
11/// These are inherited from the parent process so that sandbox code
12/// can use authenticated CLIs (gh, aws, gcloud, kubectl, docker)
13/// without exposing credentials to the conversation context.
14const CREDENTIAL_ENV_PREFIXES: &[&str] = &[
15    // AWS
16    "AWS_",
17    // Google Cloud
18    "GCLOUD_",
19    "GOOGLE_",
20    "CLOUDSDK_",
21    // GitHub CLI
22    "GH_",
23    "GITHUB_",
24    // Kubernetes
25    "KUBECONFIG",
26    // Docker
27    "DOCKER_",
28    // General
29    "HOME",
30    "PATH",
31    "USER",
32    "LANG",
33    "TERM",
34    "SHELL",
35    "TMPDIR",
36    "XDG_",
37];
38
39/// A detected runtime with its binary path.
40#[derive(Debug, Clone)]
41pub struct RuntimeInfo {
42    pub name: &'static str,
43    pub binary: String,
44    pub language: &'static str,
45}
46
47/// Output captured from a single sandbox subprocess run.
48#[derive(Debug, Clone)]
49pub struct SandboxResult {
50    /// Text written to stdout — the only data that enters the context window.
51    pub stdout: String,
52    /// Process exit status code.
53    pub status_code: i32,
54    /// Set when stdout was cut short due to the `max_output_bytes` limit.
55    pub was_truncated: bool,
56    /// Set when stdout was routed through FTS5 intent filtering.
57    pub was_indexed: bool,
58}
59
60/// Default threshold in bytes above which intent-driven filtering kicks in.
61const DEFAULT_FILTER_THRESHOLD: usize = 5 * 1024; // 5 KB
62
63/// Result of intent-driven output filtering via FTS5 BM25 search.
64#[derive(Debug, Clone)]
65pub struct FilteredOutput {
66    /// BM25-matched sections from the original output.
67    pub matched_sections: Vec<String>,
68    /// Vocabulary of searchable terms for follow-up queries.
69    pub vocabulary: Vec<String>,
70    /// Total number of chunks the output was split into.
71    pub total_chunks: usize,
72    /// Number of chunks that matched the intent.
73    pub matched_chunks: usize,
74}
75
76/// Executes code in isolated subprocesses.
77///
78/// Only stdout enters the context window — stderr, file system side effects,
79/// and environment variables never leak into the LLM context.
80pub struct SandboxExecutor {
81    timeout: Duration,
82    max_output_bytes: usize,
83    filter_threshold: usize,
84    runtimes: HashMap<String, RuntimeInfo>,
85}
86
87// ── OutputFilter ──────────────────────────────────────────────────────────────
88
89/// Indexes large text output into an in-memory FTS5 table and returns
90/// BM25-matched sections plus a vocabulary of searchable terms.
91pub(crate) struct OutputFilter;
92
93impl OutputFilter {
94    /// Chunk `text` by double-newline paragraphs (or every ~512 bytes for
95    /// long runs without blank lines), index into FTS5, and return the
96    /// BM25-matched sections for `intent`.
97    pub fn filter(text: &str, intent: &str) -> Result<FilteredOutput> {
98        let chunks = Self::chunk_output(text);
99        let total_chunks = chunks.len();
100
101        let conn = Connection::open_in_memory()
102            .map_err(|e| SqzError::Other(format!("FTS5 in-memory open failed: {e}")))?;
103
104        conn.execute_batch(
105            r#"
106            CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_fts USING fts5(
107                chunk_id,
108                body,
109                tokenize='porter ascii'
110            );
111            "#,
112        )
113        .map_err(|e| SqzError::Other(format!("FTS5 schema creation failed: {e}")))?;
114
115        // Insert chunks
116        for (i, chunk) in chunks.iter().enumerate() {
117            conn.execute(
118                "INSERT INTO sandbox_fts(chunk_id, body) VALUES (?1, ?2)",
119                params![i.to_string(), chunk],
120            )
121            .map_err(|e| SqzError::Other(format!("FTS5 insert failed: {e}")))?;
122        }
123
124        // BM25 search
125        let matched_sections = Self::bm25_search(&conn, intent, &chunks)?;
126        let matched_chunks = matched_sections.len();
127
128        // Extract vocabulary
129        let vocabulary = Self::extract_vocabulary(&conn)?;
130
131        Ok(FilteredOutput {
132            matched_sections,
133            vocabulary,
134            total_chunks,
135            matched_chunks,
136        })
137    }
138
139    /// Split output into chunks on double-newline boundaries. If a chunk
140    /// exceeds 512 bytes, split it further on single newlines.
141    fn chunk_output(text: &str) -> Vec<String> {
142        const MAX_CHUNK_BYTES: usize = 512;
143
144        let paragraphs: Vec<&str> = text.split("\n\n").collect();
145        let mut chunks = Vec::new();
146
147        for para in paragraphs {
148            let trimmed = para.trim();
149            if trimmed.is_empty() {
150                continue;
151            }
152            if trimmed.len() <= MAX_CHUNK_BYTES {
153                chunks.push(trimmed.to_string());
154            } else {
155                // Sub-split on single newlines
156                let mut current = String::new();
157                for line in trimmed.lines() {
158                    if !current.is_empty() && current.len() + line.len() + 1 > MAX_CHUNK_BYTES {
159                        chunks.push(std::mem::take(&mut current));
160                    }
161                    if !current.is_empty() {
162                        current.push('\n');
163                    }
164                    current.push_str(line);
165                }
166                if !current.is_empty() {
167                    chunks.push(current);
168                }
169            }
170        }
171
172        // Guarantee at least one chunk even for empty-ish input
173        if chunks.is_empty() && !text.trim().is_empty() {
174            chunks.push(text.trim().to_string());
175        }
176
177        chunks
178    }
179
180    /// Query the FTS5 table with the intent and return matching chunk bodies
181    /// ranked by BM25.
182    fn bm25_search(conn: &Connection, intent: &str, _chunks: &[String]) -> Result<Vec<String>> {
183        // Sanitize intent for FTS5 query: keep alphanumeric and spaces
184        let sanitized: String = intent
185            .chars()
186            .map(|c| if c.is_alphanumeric() || c.is_whitespace() { c } else { ' ' })
187            .collect();
188        let terms: Vec<&str> = sanitized.split_whitespace().collect();
189        if terms.is_empty() {
190            return Ok(Vec::new());
191        }
192
193        // Build an OR query so partial matches still return results
194        let fts_query = terms.join(" OR ");
195
196        let mut stmt = conn
197            .prepare(
198                r#"SELECT body FROM sandbox_fts
199                   WHERE sandbox_fts MATCH ?1
200                   ORDER BY rank
201                   LIMIT 20"#,
202            )
203            .map_err(|e| SqzError::Other(format!("FTS5 query prepare failed: {e}")))?;
204
205        let rows = stmt
206            .query_map(params![fts_query], |row| row.get::<_, String>(0))
207            .map_err(|e| SqzError::Other(format!("FTS5 query failed: {e}")))?;
208
209        let mut results = Vec::new();
210        for row in rows {
211            results.push(
212                row.map_err(|e| SqzError::Other(format!("FTS5 row read failed: {e}")))?,
213            );
214        }
215        Ok(results)
216    }
217
218    /// Extract a vocabulary of distinct searchable terms from the indexed
219    /// content. Uses the FTS5 `vocab` virtual table to pull out tokens.
220    fn extract_vocabulary(conn: &Connection) -> Result<Vec<String>> {
221        // Create a vocab table over the FTS5 index using 'col' detail
222        // which gives (term, col, doc, cnt) columns.
223        conn.execute_batch(
224            "CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_vocab USING fts5vocab(sandbox_fts, col);",
225        )
226        .map_err(|e| SqzError::Other(format!("FTS5 vocab table creation failed: {e}")))?;
227
228        let mut stmt = conn
229            .prepare(
230                r#"SELECT term FROM sandbox_vocab
231                   WHERE col = 'body'
232                   ORDER BY doc DESC
233                   LIMIT 100"#,
234            )
235            .map_err(|e| SqzError::Other(format!("vocab query prepare failed: {e}")))?;
236
237        let rows = stmt
238            .query_map([], |row| row.get::<_, String>(0))
239            .map_err(|e| SqzError::Other(format!("vocab query failed: {e}")))?;
240
241        let mut vocab = Vec::new();
242        for row in rows {
243            vocab.push(
244                row.map_err(|e| SqzError::Other(format!("vocab row read failed: {e}")))?,
245            );
246        }
247        Ok(vocab)
248    }
249}
250
251impl SandboxExecutor {
252    /// Default timeout: 30 seconds.
253    pub const DEFAULT_TIMEOUT_SECS: u64 = 30;
254    /// Default max output: 1 MB.
255    pub const DEFAULT_MAX_OUTPUT_BYTES: usize = 1_048_576;
256
257    /// Create a new executor, auto-detecting available runtimes.
258    pub fn new() -> Self {
259        Self::with_config(
260            Duration::from_secs(Self::DEFAULT_TIMEOUT_SECS),
261            Self::DEFAULT_MAX_OUTPUT_BYTES,
262        )
263    }
264
265    /// Create with custom timeout and max output size.
266    pub fn with_config(timeout: Duration, max_output_bytes: usize) -> Self {
267        let runtimes = detect_runtimes();
268        Self {
269            timeout,
270            max_output_bytes,
271            filter_threshold: DEFAULT_FILTER_THRESHOLD,
272            runtimes,
273        }
274    }
275
276    /// Execute code in the given language runtime.
277    ///
278    /// Only stdout is captured and returned. Stderr is discarded.
279    /// Credentials for gh, aws, gcloud, kubectl, docker are passed through
280    /// via environment variable inheritance.
281    pub fn execute(&self, code: &str, language: &str) -> Result<SandboxResult> {
282        let lang = language.to_lowercase();
283        let runtime = self
284            .runtimes
285            .get(&lang)
286            .ok_or_else(|| SqzError::Other(format!("unsupported or unavailable runtime: {lang}")))?;
287
288        let env = build_credential_env();
289
290        let result = match lang.as_str() {
291            "go" => self.execute_go(code, runtime, &env),
292            "rust" => self.execute_rust(code, runtime, &env),
293            _ => self.execute_interpreted(code, runtime, &env),
294        }?;
295
296        Ok(result)
297    }
298
299    /// Execute code and, when stdout exceeds 5 KB and `intent` is provided,
300    /// index the full output into an in-memory FTS5 table and return only
301    /// BM25-matched sections plus a vocabulary of searchable terms.
302    ///
303    /// When the output is small or no intent is given, behaves identically
304    /// to [`execute`] (returns full stdout, `filtered` is `None`).
305    pub fn execute_with_intent(
306        &self,
307        code: &str,
308        language: &str,
309        intent: Option<&str>,
310    ) -> Result<(SandboxResult, Option<FilteredOutput>)> {
311        let mut result = self.execute(code, language)?;
312
313        let should_filter = result.stdout.len() > self.filter_threshold
314            && intent.map_or(false, |i| !i.trim().is_empty());
315
316        if should_filter {
317            let intent_str = intent.unwrap(); // safe: checked above
318            let filtered = OutputFilter::filter(&result.stdout, intent_str)?;
319            result.was_indexed = true;
320            // Replace stdout with only the matched sections so the LLM
321            // context window receives the filtered view.
322            result.stdout = filtered.matched_sections.join("\n\n");
323            Ok((result, Some(filtered)))
324        } else {
325            Ok((result, None))
326        }
327    }
328
329    /// Languages that this executor currently supports (only those detected on this system).
330    pub fn available_languages(&self) -> Vec<&str> {
331        self.runtimes.values().map(|r| r.language).collect()
332    }
333
334    /// All languages the executor can potentially support.
335    pub fn supported_languages(&self) -> &[&str] {
336        &["js", "ts", "python", "shell", "ruby", "go", "rust"]
337    }
338
339    /// Check whether a specific language runtime is available.
340    pub fn is_available(&self, language: &str) -> bool {
341        self.runtimes.contains_key(&language.to_lowercase())
342    }
343
344    /// Current timeout setting.
345    pub fn timeout(&self) -> Duration {
346        self.timeout
347    }
348
349    /// Current max output size in bytes.
350    pub fn max_output_bytes(&self) -> usize {
351        self.max_output_bytes
352    }
353
354    // ── Private helpers ───────────────────────────────────────────────────
355
356    /// Execute an interpreted language (JS, TS, Python, Shell, Ruby) by
357    /// writing code to a temp file and invoking the runtime binary.
358    fn execute_interpreted(
359        &self,
360        code: &str,
361        runtime: &RuntimeInfo,
362        env: &HashMap<String, String>,
363    ) -> Result<SandboxResult> {
364        let ext = match runtime.language {
365            "js" => "js",
366            "ts" => "ts",
367            "python" => "py",
368            "shell" => "sh",
369            "ruby" => "rb",
370            _ => "tmp",
371        };
372
373        let tmp_dir = tempfile::tempdir().map_err(|e| SqzError::Io(e))?;
374        let script_path = tmp_dir.path().join(format!("sandbox_script.{ext}"));
375        {
376            let mut f = std::fs::File::create(&script_path)?;
377            f.write_all(code.as_bytes())?;
378        }
379
380        let mut cmd = Command::new(&runtime.binary);
381
382        // Special case: TypeScript via npx needs `tsx` as the first argument
383        if runtime.language == "ts" && runtime.name == "npx" {
384            cmd.arg("tsx");
385        }
386
387        cmd.arg(&script_path)
388            .stdout(Stdio::piped())
389            .stderr(Stdio::null()) // stderr never enters context
390            .envs(env);
391
392        self.run_with_timeout(cmd, &format!("runtime={}", runtime.language))
393    }
394
395    /// Execute Go code: write to temp file, run with `go run`.
396    fn execute_go(
397        &self,
398        code: &str,
399        runtime: &RuntimeInfo,
400        env: &HashMap<String, String>,
401    ) -> Result<SandboxResult> {
402        let tmp_dir = tempfile::tempdir()?;
403        let script_path = tmp_dir.path().join("main.go");
404        {
405            let mut f = std::fs::File::create(&script_path)?;
406            f.write_all(code.as_bytes())?;
407        }
408
409        let mut cmd = Command::new(&runtime.binary);
410        cmd.arg("run")
411            .arg(&script_path)
412            .stdout(Stdio::piped())
413            .stderr(Stdio::null())
414            .envs(env);
415
416        self.run_with_timeout(cmd, "runtime=go")
417    }
418
419    /// Execute Rust code: write to temp file, compile with rustc, then run.
420    fn execute_rust(
421        &self,
422        code: &str,
423        runtime: &RuntimeInfo,
424        env: &HashMap<String, String>,
425    ) -> Result<SandboxResult> {
426        let tmp_dir = tempfile::tempdir()?;
427        let src_path = tmp_dir.path().join("sandbox.rs");
428        let bin_path = tmp_dir.path().join("sandbox_bin");
429        {
430            let mut f = std::fs::File::create(&src_path)?;
431            f.write_all(code.as_bytes())?;
432        }
433
434        // Compile
435        let compile = Command::new(&runtime.binary)
436            .arg(&src_path)
437            .arg("-o")
438            .arg(&bin_path)
439            .stdout(Stdio::null())
440            .stderr(Stdio::null())
441            .envs(env)
442            .status();
443
444        match compile {
445            Ok(status) if status.success() => {}
446            Ok(status) => {
447                return Ok(SandboxResult {
448                    stdout: String::new(),
449                    status_code: status.code().unwrap_or(1),
450                    was_truncated: false,
451                    was_indexed: false,
452                });
453            }
454            Err(e) => return Err(SqzError::Io(e)),
455        }
456
457        // Run the compiled binary
458        let mut cmd = Command::new(&bin_path);
459        cmd.stdout(Stdio::piped())
460            .stderr(Stdio::null())
461            .envs(env);
462
463        self.run_with_timeout(cmd, "runtime=rust")
464    }
465
466    /// Spawn the command, enforce timeout, capture stdout, and truncate if needed.
467    fn run_with_timeout(&self, mut cmd: Command, context: &str) -> Result<SandboxResult> {
468        let mut child = cmd.spawn().map_err(SqzError::Io)?;
469
470        // Wait with timeout
471        let status = match wait_with_timeout(&mut child, self.timeout) {
472            Ok(status) => status,
473            Err(_) => {
474                // Timeout — kill the process
475                let _ = child.kill();
476                let _ = child.wait();
477                return Err(SqzError::Other(format!(
478                    "sandbox execution timed out after {}s ({})",
479                    self.timeout.as_secs(),
480                    context
481                )));
482            }
483        };
484
485        // Read stdout
486        let stdout_raw = if let Some(mut stdout) = child.stdout.take() {
487            use std::io::Read;
488            let mut buf = Vec::new();
489            let _ = stdout.read_to_end(&mut buf);
490            buf
491        } else {
492            Vec::new()
493        };
494
495        // Truncate if needed
496        let truncated = stdout_raw.len() > self.max_output_bytes;
497        let stdout_bytes = if truncated {
498            &stdout_raw[..self.max_output_bytes]
499        } else {
500            &stdout_raw[..]
501        };
502
503        let stdout = String::from_utf8_lossy(stdout_bytes).into_owned();
504
505        Ok(SandboxResult {
506            stdout,
507            status_code: status.code().unwrap_or(-1),
508            was_truncated: truncated,
509            was_indexed: false,
510        })
511    }
512}
513
514// ── Free functions ────────────────────────────────────────────────────────────
515
516/// Wait for a child process with a timeout. Returns the exit status on success,
517/// or an error if the timeout is exceeded.
518fn wait_with_timeout(
519    child: &mut std::process::Child,
520    timeout: Duration,
521) -> std::result::Result<std::process::ExitStatus, ()> {
522    let start = std::time::Instant::now();
523    let poll_interval = Duration::from_millis(50);
524
525    loop {
526        match child.try_wait() {
527            Ok(Some(status)) => return Ok(status),
528            Ok(None) => {
529                if start.elapsed() >= timeout {
530                    return Err(());
531                }
532                std::thread::sleep(poll_interval);
533            }
534            Err(_) => return Err(()),
535        }
536    }
537}
538
539/// Build an environment map containing only credential-related variables
540/// from the current process environment.
541fn build_credential_env() -> HashMap<String, String> {
542    let mut env = HashMap::new();
543    for (key, value) in std::env::vars() {
544        if CREDENTIAL_ENV_PREFIXES
545            .iter()
546            .any(|prefix| key.starts_with(prefix))
547        {
548            env.insert(key, value);
549        }
550    }
551    env
552}
553
554/// Probe the system for available runtimes.
555fn detect_runtimes() -> HashMap<String, RuntimeInfo> {
556    let mut runtimes = HashMap::new();
557
558    let candidates: &[(&str, &[&str], &str)] = &[
559        // (language key, [binary candidates], language label)
560        ("js", &["node", "bun"], "js"),
561        ("ts", &["bun", "npx"], "ts"),
562        ("python", &["python3", "python"], "python"),
563        ("shell", &["bash", "sh"], "shell"),
564        ("ruby", &["ruby"], "ruby"),
565        ("go", &["go"], "go"),
566        ("rust", &["rustc"], "rust"),
567    ];
568
569    for &(lang_key, binaries, lang_label) in candidates {
570        for &bin in binaries {
571            if is_binary_available(bin) {
572                // For ts via npx, we use `npx tsx` as the actual command
573                let effective_binary = if lang_key == "ts" && bin == "npx" {
574                    "npx".to_string()
575                } else {
576                    bin.to_string()
577                };
578
579                runtimes.insert(
580                    lang_key.to_string(),
581                    RuntimeInfo {
582                        name: bin,
583                        binary: effective_binary,
584                        language: lang_label,
585                    },
586                );
587                break; // use first available binary
588            }
589        }
590    }
591
592    runtimes
593}
594
595/// Check if a binary is available on PATH.
596fn is_binary_available(name: &str) -> bool {
597    let probe = if cfg!(windows) { "where" } else { "which" };
598    Command::new(probe)
599        .arg(name)
600        .stdout(Stdio::null())
601        .stderr(Stdio::null())
602        .status()
603        .map(|s| s.success())
604        .unwrap_or(false)
605}
606
607// ── Tests ─────────────────────────────────────────────────────────────────────
608
609#[cfg(test)]
610mod tests {
611    use super::*;
612
613    #[test]
614    fn test_new_detects_runtimes() {
615        let executor = SandboxExecutor::new();
616        // At minimum, shell (bash/sh) should be available on any Unix system
617        assert!(
618            !executor.runtimes.is_empty(),
619            "should detect at least one runtime"
620        );
621    }
622
623    #[test]
624    fn test_supported_languages_list() {
625        let executor = SandboxExecutor::new();
626        let supported = executor.supported_languages();
627        assert!(supported.len() >= 6, "should list at least 6 supported languages");
628        assert!(supported.contains(&"js"));
629        assert!(supported.contains(&"python"));
630        assert!(supported.contains(&"shell"));
631        assert!(supported.contains(&"ruby"));
632        assert!(supported.contains(&"go"));
633        assert!(supported.contains(&"rust"));
634    }
635
636    #[test]
637    fn test_default_config() {
638        let executor = SandboxExecutor::new();
639        assert_eq!(executor.timeout(), Duration::from_secs(30));
640        assert_eq!(executor.max_output_bytes(), 1_048_576);
641    }
642
643    #[test]
644    fn test_custom_config() {
645        let executor = SandboxExecutor::with_config(Duration::from_secs(10), 4096);
646        assert_eq!(executor.timeout(), Duration::from_secs(10));
647        assert_eq!(executor.max_output_bytes(), 4096);
648    }
649
650    #[test]
651    #[cfg(not(windows))]
652    fn test_execute_shell_echo() {
653        let executor = SandboxExecutor::new();
654        if !executor.is_available("shell") {
655            return; // skip if no shell
656        }
657        let result = executor.execute("echo hello sandbox", "shell").unwrap();
658        assert_eq!(result.status_code, 0);
659        assert_eq!(result.stdout.trim(), "hello sandbox");
660        assert!(!result.was_truncated);
661    }
662
663    #[test]
664    #[cfg(not(windows))]
665    fn test_execute_shell_captures_only_stdout() {
666        let executor = SandboxExecutor::new();
667        if !executor.is_available("shell") {
668            return;
669        }
670        // Write to both stdout and stderr — only stdout should appear
671        let code = r#"echo "visible"
672echo "hidden" >&2
673echo "also visible""#;
674        let result = executor.execute(code, "shell").unwrap();
675        assert!(result.stdout.contains("visible"));
676        assert!(result.stdout.contains("also visible"));
677        assert!(!result.stdout.contains("hidden"));
678    }
679
680    #[test]
681    fn test_execute_python() {
682        let executor = SandboxExecutor::new();
683        if !executor.is_available("python") {
684            return;
685        }
686        let result = executor.execute("print('hello from python')", "python").unwrap();
687        assert_eq!(result.status_code, 0);
688        assert_eq!(result.stdout.trim(), "hello from python");
689    }
690
691    #[test]
692    #[cfg(not(windows))]
693    fn test_execute_nonzero_exit() {
694        let executor = SandboxExecutor::new();
695        if !executor.is_available("shell") {
696            return;
697        }
698        let result = executor.execute("exit 42", "shell").unwrap();
699        assert_eq!(result.status_code, 42);
700    }
701
702    #[test]
703    #[cfg(not(windows))]
704    fn test_execute_timeout() {
705        let executor = SandboxExecutor::with_config(Duration::from_secs(1), 1024);
706        if !executor.is_available("shell") {
707            return;
708        }
709        let result = executor.execute("sleep 30", "shell");
710        assert!(result.is_err());
711        let err_msg = format!("{}", result.unwrap_err());
712        assert!(err_msg.contains("timed out"));
713    }
714
715    #[test]
716    fn test_execute_output_truncation() {
717        let executor = SandboxExecutor::with_config(Duration::from_secs(10), 32);
718        if !executor.is_available("shell") {
719            return;
720        }
721        // Generate output larger than 32 bytes
722        let result = executor
723            .execute("for i in $(seq 1 100); do echo \"line $i\"; done", "shell")
724            .unwrap();
725        assert!(result.was_truncated);
726        assert!(result.stdout.len() <= 32);
727    }
728
729    #[test]
730    fn test_unsupported_runtime() {
731        let executor = SandboxExecutor::new();
732        let result = executor.execute("code", "brainfuck");
733        assert!(result.is_err());
734        let err_msg = format!("{}", result.unwrap_err());
735        assert!(err_msg.contains("unsupported or unavailable runtime"));
736    }
737
738    #[test]
739    fn test_case_insensitive_language() {
740        let executor = SandboxExecutor::new();
741        if !executor.is_available("shell") {
742            return;
743        }
744        let result = executor.execute("echo ok", "Shell");
745        assert!(result.is_ok());
746    }
747
748    #[test]
749    #[cfg(not(windows))]
750    fn test_credential_env_includes_path() {
751        let env = build_credential_env();
752        assert!(env.contains_key("PATH"), "PATH should be inherited");
753    }
754
755    #[test]
756    fn test_credential_env_includes_aws() {
757        // Temporarily set an AWS var to verify it's picked up
758        std::env::set_var("AWS_TEST_SANDBOX", "test_value");
759        let env = build_credential_env();
760        assert_eq!(env.get("AWS_TEST_SANDBOX").map(|s| s.as_str()), Some("test_value"));
761        std::env::remove_var("AWS_TEST_SANDBOX");
762    }
763
764    #[test]
765    fn test_is_binary_available() {
766        // `sh` should always be available on Unix
767        assert!(is_binary_available("sh"));
768        assert!(!is_binary_available("definitely_not_a_real_binary_xyz"));
769    }
770
771    // ── OutputFilter unit tests ───────────────────────────────────────────
772
773    #[test]
774    fn test_chunk_output_splits_on_double_newline() {
775        let text = "first paragraph\n\nsecond paragraph\n\nthird paragraph";
776        let chunks = OutputFilter::chunk_output(text);
777        assert_eq!(chunks.len(), 3);
778        assert_eq!(chunks[0], "first paragraph");
779        assert_eq!(chunks[1], "second paragraph");
780        assert_eq!(chunks[2], "third paragraph");
781    }
782
783    #[test]
784    fn test_chunk_output_splits_large_paragraphs() {
785        // Build a single paragraph > 512 bytes with many lines
786        let line = "a]".repeat(30); // 60 chars per line
787        let big_para = (0..20).map(|i| format!("{line} line{i}")).collect::<Vec<_>>().join("\n");
788        assert!(big_para.len() > 512);
789
790        let chunks = OutputFilter::chunk_output(&big_para);
791        assert!(chunks.len() > 1, "large paragraph should be sub-split");
792        for chunk in &chunks {
793            assert!(chunk.len() <= 600, "each sub-chunk should be roughly ≤512 bytes");
794        }
795    }
796
797    #[test]
798    fn test_chunk_output_empty_input() {
799        let chunks = OutputFilter::chunk_output("");
800        assert!(chunks.is_empty());
801    }
802
803    #[test]
804    fn test_filter_returns_matching_sections() {
805        let text = "error: compilation failed at line 42\n\n\
806                    warning: unused variable `x`\n\n\
807                    info: build started at 10:00\n\n\
808                    error: type mismatch in function foo\n\n\
809                    success: 3 tests passed";
810        let result = OutputFilter::filter(text, "error compilation").unwrap();
811        assert!(!result.matched_sections.is_empty(), "should find error-related chunks");
812        // At least one matched section should contain "error"
813        assert!(
814            result.matched_sections.iter().any(|s| s.contains("error")),
815            "matched sections should contain the intent keyword"
816        );
817        assert!(result.total_chunks >= 4);
818    }
819
820    #[test]
821    fn test_filter_returns_vocabulary() {
822        let text = "the quick brown fox jumps over the lazy dog\n\n\
823                    rust programming language is fast and safe\n\n\
824                    memory safety without garbage collection";
825        let result = OutputFilter::filter(text, "rust").unwrap();
826        assert!(!result.vocabulary.is_empty(), "vocabulary should not be empty");
827        // Vocabulary should contain stemmed terms from the content
828        // (porter stemmer may stem words, so check for presence of some terms)
829        let vocab_joined = result.vocabulary.join(" ");
830        assert!(
831            vocab_joined.contains("rust") || vocab_joined.contains("fast") || vocab_joined.contains("safe"),
832            "vocabulary should contain terms from the indexed content"
833        );
834    }
835
836    #[test]
837    fn test_filter_no_match_returns_empty() {
838        let text = "hello world\n\nfoo bar baz";
839        let result = OutputFilter::filter(text, "zzzznonexistent").unwrap();
840        assert!(result.matched_sections.is_empty());
841        assert_eq!(result.matched_chunks, 0);
842    }
843
844    #[test]
845    fn test_filter_special_chars_in_intent() {
846        // Intent with special characters should not crash FTS5
847        let text = "error: something went wrong\n\nwarning: check this";
848        let result = OutputFilter::filter(text, "error: (something) [wrong]");
849        assert!(result.is_ok(), "special chars in intent should be sanitized");
850    }
851
852    #[test]
853    #[cfg(not(windows))]
854    fn test_execute_with_intent_small_output_no_filter() {
855        let executor = SandboxExecutor::new();
856        if !executor.is_available("shell") {
857            return;
858        }
859        // Small output (< 5KB) should not trigger filtering
860        let (result, filtered) = executor
861            .execute_with_intent("echo hello", "shell", Some("hello"))
862            .unwrap();
863        assert_eq!(result.status_code, 0);
864        assert!(!result.was_indexed);
865        assert!(filtered.is_none());
866    }
867
868    #[test]
869    fn test_execute_with_intent_no_intent_no_filter() {
870        let executor = SandboxExecutor::new();
871        if !executor.is_available("shell") {
872            return;
873        }
874        // Even large output without intent should not filter
875        let code = "for i in $(seq 1 1000); do echo \"line $i: some padding text to make it bigger\"; done";
876        let (result, filtered) = executor
877            .execute_with_intent(code, "shell", None)
878            .unwrap();
879        assert!(!result.was_indexed);
880        assert!(filtered.is_none());
881    }
882
883    #[test]
884    #[cfg(not(windows))]
885    fn test_execute_with_intent_large_output_filters() {
886        let executor = SandboxExecutor::new();
887        if !executor.is_available("shell") {
888            return;
889        }
890        // Generate > 5KB of output with identifiable sections
891        let code = r#"
892for i in $(seq 1 50); do echo "error: compilation failed at module $i"; done
893echo ""
894for i in $(seq 1 50); do echo "info: processing file $i of 200"; done
895echo ""
896for i in $(seq 1 50); do echo "warning: deprecated API usage in handler $i"; done
897echo ""
898for i in $(seq 1 50); do echo "success: test suite $i passed with 100% coverage"; done
899"#;
900        let (result, filtered) = executor
901            .execute_with_intent(code, "shell", Some("error compilation"))
902            .unwrap();
903        assert!(result.was_indexed, "large output with intent should be indexed");
904        let filtered = filtered.expect("should have filtered output");
905        assert!(!filtered.matched_sections.is_empty(), "should have matched sections");
906        assert!(!filtered.vocabulary.is_empty(), "should have vocabulary");
907        assert!(filtered.total_chunks > 0);
908    }
909
910    // ── Property-based tests (Unix only — depend on bash/shell) ──────────
911
912    #[cfg(not(windows))]
913    mod proptests {
914        use super::*;
915        use proptest::prelude::*;
916
917        /// Generate a random alphanumeric label safe for use in shell echo commands.
918        fn safe_label() -> impl Strategy<Value = String> {
919            "[a-zA-Z0-9]{1,20}"
920        }
921
922        // **Validates: Requirements 30.1, 30.2**
923        //
924        // Property 35: Sandbox execution isolation — stdout only
925        //
926        // For any code execution that writes to both stdout and stderr,
927        // only stdout content appears in the returned SandboxResult.
928        // We use distinct prefixes to guarantee stdout and stderr
929        // messages are distinguishable.
930        proptest! {
931            #[test]
932            fn prop_only_stdout_captured(
933                label in safe_label(),
934            ) {
935                let executor = SandboxExecutor::new();
936                if !executor.is_available("shell") {
937                    return Ok(());
938                }
939
940                let stdout_msg = format!("OUT_{label}");
941                let stderr_msg = format!("ERR_{label}");
942
943                // Script writes distinct messages to stdout and stderr
944                let code = format!(
945                    "echo \"{stdout_msg}\"\necho \"{stderr_msg}\" >&2"
946                );
947                let result = executor.execute(&code, "shell").unwrap();
948
949                // stdout content must be present
950                prop_assert!(
951                    result.stdout.contains(&stdout_msg),
952                    "stdout should contain the stdout message '{}', got: '{}'",
953                    stdout_msg, result.stdout
954                );
955                // stderr content must never appear
956                prop_assert!(
957                    !result.stdout.contains(&stderr_msg),
958                    "stdout should NOT contain the stderr message '{}', got: '{}'",
959                    stderr_msg, result.stdout
960                );
961            }
962        }
963
964        // **Validates: Requirements 30.1, 30.2**
965        //
966        // Property 35: Sandbox execution isolation — subprocess isolation
967        //
968        // Each execution runs in an isolated subprocess with no shared
969        // state. Setting an env var in one execution must not be visible
970        // in a subsequent execution.
971        proptest! {
972            #[test]
973            fn prop_no_shared_state_between_executions(
974                var_name in "[A-Z]{3,8}",
975                var_value in "[a-z0-9]{1,10}",
976            ) {
977                let executor = SandboxExecutor::new();
978                if !executor.is_available("shell") {
979                    return Ok(());
980                }
981
982                let unique_var = format!("SQZ_PROP_{var_name}");
983
984                // First execution: export an env var
985                let code1 = format!(
986                    "export {unique_var}={var_value}\necho \"set {unique_var}\""
987                );
988                let result1 = executor.execute(&code1, "shell").unwrap();
989                prop_assert!(
990                    result1.stdout.contains(&format!("set {unique_var}")),
991                    "first execution should succeed"
992                );
993
994                // Second execution: try to read that env var — it should be empty
995                let code2 = format!(
996                    "echo \"val=${{{unique_var}:-UNSET}}\""
997                );
998                let result2 = executor.execute(&code2, "shell").unwrap();
999                prop_assert!(
1000                    result2.stdout.contains("val=UNSET"),
1001                    "env var from first execution should not leak into second; got: '{}'",
1002                    result2.stdout
1003                );
1004            }
1005        }
1006    }
1007}
sqz_engine/sandbox_executor.rs

sqz_engine/
sandbox_executor.rs