Skip to main content

sqz_engine/
sandbox_executor.rs

1use std::collections::HashMap;
2use std::io::Write;
3use std::process::{Command, Stdio};
4use std::time::Duration;
5
6use rusqlite::{params, Connection};
7
8use crate::error::{Result, SqzError};
9
10/// Environment variable names for credential passthrough.
11/// These are inherited from the parent process so that sandbox code
12/// can use authenticated CLIs (gh, aws, gcloud, kubectl, docker)
13/// without exposing credentials to the conversation context.
14const CREDENTIAL_ENV_PREFIXES: &[&str] = &[
15    // AWS
16    "AWS_",
17    // Google Cloud
18    "GCLOUD_",
19    "GOOGLE_",
20    "CLOUDSDK_",
21    // GitHub CLI
22    "GH_",
23    "GITHUB_",
24    // Kubernetes
25    "KUBECONFIG",
26    // Docker
27    "DOCKER_",
28    // General
29    "HOME",
30    "PATH",
31    "USER",
32    "LANG",
33    "TERM",
34    "SHELL",
35    "TMPDIR",
36    "XDG_",
37];
38
39/// A detected runtime with its binary path.
40#[derive(Debug, Clone)]
41pub struct RuntimeInfo {
42    pub name: &'static str,
43    pub binary: String,
44    pub language: &'static str,
45}
46
47/// Output captured from a single sandbox subprocess run.
48#[derive(Debug, Clone)]
49pub struct SandboxResult {
50    /// Text written to stdout — the only data that enters the context window.
51    pub stdout: String,
52    /// Process exit status code.
53    pub status_code: i32,
54    /// Set when stdout was cut short due to the `max_output_bytes` limit.
55    pub was_truncated: bool,
56    /// Set when stdout was routed through FTS5 intent filtering.
57    pub was_indexed: bool,
58}
59
60/// Threshold in bytes above which intent-driven filtering kicks in.
61const OUTPUT_FILTER_THRESHOLD: usize = 5 * 1024; // 5 KB
62
63/// Result of intent-driven output filtering via FTS5 BM25 search.
64#[derive(Debug, Clone)]
65pub struct FilteredOutput {
66    /// BM25-matched sections from the original output.
67    pub matched_sections: Vec<String>,
68    /// Vocabulary of searchable terms for follow-up queries.
69    pub vocabulary: Vec<String>,
70    /// Total number of chunks the output was split into.
71    pub total_chunks: usize,
72    /// Number of chunks that matched the intent.
73    pub matched_chunks: usize,
74}
75
76/// Executes code in isolated subprocesses.
77///
78/// Only stdout enters the context window — stderr, file system side effects,
79/// and environment variables never leak into the LLM context.
80pub struct SandboxExecutor {
81    timeout: Duration,
82    max_output_bytes: usize,
83    runtimes: HashMap<String, RuntimeInfo>,
84}
85
86// ── OutputFilter ──────────────────────────────────────────────────────────────
87
88/// Indexes large text output into an in-memory FTS5 table and returns
89/// BM25-matched sections plus a vocabulary of searchable terms.
90pub(crate) struct OutputFilter;
91
92impl OutputFilter {
93    /// Chunk `text` by double-newline paragraphs (or every ~512 bytes for
94    /// long runs without blank lines), index into FTS5, and return the
95    /// BM25-matched sections for `intent`.
96    pub fn filter(text: &str, intent: &str) -> Result<FilteredOutput> {
97        let chunks = Self::chunk_output(text);
98        let total_chunks = chunks.len();
99
100        let conn = Connection::open_in_memory()
101            .map_err(|e| SqzError::Other(format!("FTS5 in-memory open failed: {e}")))?;
102
103        conn.execute_batch(
104            r#"
105            CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_fts USING fts5(
106                chunk_id,
107                body,
108                tokenize='porter ascii'
109            );
110            "#,
111        )
112        .map_err(|e| SqzError::Other(format!("FTS5 schema creation failed: {e}")))?;
113
114        // Insert chunks
115        for (i, chunk) in chunks.iter().enumerate() {
116            conn.execute(
117                "INSERT INTO sandbox_fts(chunk_id, body) VALUES (?1, ?2)",
118                params![i.to_string(), chunk],
119            )
120            .map_err(|e| SqzError::Other(format!("FTS5 insert failed: {e}")))?;
121        }
122
123        // BM25 search
124        let matched_sections = Self::bm25_search(&conn, intent, &chunks)?;
125        let matched_chunks = matched_sections.len();
126
127        // Extract vocabulary
128        let vocabulary = Self::extract_vocabulary(&conn)?;
129
130        Ok(FilteredOutput {
131            matched_sections,
132            vocabulary,
133            total_chunks,
134            matched_chunks,
135        })
136    }
137
138    /// Split output into chunks on double-newline boundaries. If a chunk
139    /// exceeds 512 bytes, split it further on single newlines.
140    fn chunk_output(text: &str) -> Vec<String> {
141        const MAX_CHUNK_BYTES: usize = 512;
142
143        let paragraphs: Vec<&str> = text.split("\n\n").collect();
144        let mut chunks = Vec::new();
145
146        for para in paragraphs {
147            let trimmed = para.trim();
148            if trimmed.is_empty() {
149                continue;
150            }
151            if trimmed.len() <= MAX_CHUNK_BYTES {
152                chunks.push(trimmed.to_string());
153            } else {
154                // Sub-split on single newlines
155                let mut current = String::new();
156                for line in trimmed.lines() {
157                    if !current.is_empty() && current.len() + line.len() + 1 > MAX_CHUNK_BYTES {
158                        chunks.push(std::mem::take(&mut current));
159                    }
160                    if !current.is_empty() {
161                        current.push('\n');
162                    }
163                    current.push_str(line);
164                }
165                if !current.is_empty() {
166                    chunks.push(current);
167                }
168            }
169        }
170
171        // Guarantee at least one chunk even for empty-ish input
172        if chunks.is_empty() && !text.trim().is_empty() {
173            chunks.push(text.trim().to_string());
174        }
175
176        chunks
177    }
178
179    /// Query the FTS5 table with the intent and return matching chunk bodies
180    /// ranked by BM25.
181    fn bm25_search(conn: &Connection, intent: &str, _chunks: &[String]) -> Result<Vec<String>> {
182        // Sanitize intent for FTS5 query: keep alphanumeric and spaces
183        let sanitized: String = intent
184            .chars()
185            .map(|c| if c.is_alphanumeric() || c.is_whitespace() { c } else { ' ' })
186            .collect();
187        let terms: Vec<&str> = sanitized.split_whitespace().collect();
188        if terms.is_empty() {
189            return Ok(Vec::new());
190        }
191
192        // Build an OR query so partial matches still return results
193        let fts_query = terms.join(" OR ");
194
195        let mut stmt = conn
196            .prepare(
197                r#"SELECT body FROM sandbox_fts
198                   WHERE sandbox_fts MATCH ?1
199                   ORDER BY rank
200                   LIMIT 20"#,
201            )
202            .map_err(|e| SqzError::Other(format!("FTS5 query prepare failed: {e}")))?;
203
204        let rows = stmt
205            .query_map(params![fts_query], |row| row.get::<_, String>(0))
206            .map_err(|e| SqzError::Other(format!("FTS5 query failed: {e}")))?;
207
208        let mut results = Vec::new();
209        for row in rows {
210            results.push(
211                row.map_err(|e| SqzError::Other(format!("FTS5 row read failed: {e}")))?,
212            );
213        }
214        Ok(results)
215    }
216
217    /// Extract a vocabulary of distinct searchable terms from the indexed
218    /// content. Uses the FTS5 `vocab` virtual table to pull out tokens.
219    fn extract_vocabulary(conn: &Connection) -> Result<Vec<String>> {
220        // Create a vocab table over the FTS5 index using 'col' detail
221        // which gives (term, col, doc, cnt) columns.
222        conn.execute_batch(
223            "CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_vocab USING fts5vocab(sandbox_fts, col);",
224        )
225        .map_err(|e| SqzError::Other(format!("FTS5 vocab table creation failed: {e}")))?;
226
227        let mut stmt = conn
228            .prepare(
229                r#"SELECT term FROM sandbox_vocab
230                   WHERE col = 'body'
231                   ORDER BY doc DESC
232                   LIMIT 100"#,
233            )
234            .map_err(|e| SqzError::Other(format!("vocab query prepare failed: {e}")))?;
235
236        let rows = stmt
237            .query_map([], |row| row.get::<_, String>(0))
238            .map_err(|e| SqzError::Other(format!("vocab query failed: {e}")))?;
239
240        let mut vocab = Vec::new();
241        for row in rows {
242            vocab.push(
243                row.map_err(|e| SqzError::Other(format!("vocab row read failed: {e}")))?,
244            );
245        }
246        Ok(vocab)
247    }
248}
249
250impl SandboxExecutor {
251    /// Default timeout: 30 seconds.
252    pub const DEFAULT_TIMEOUT_SECS: u64 = 30;
253    /// Default max output: 1 MB.
254    pub const DEFAULT_MAX_OUTPUT_BYTES: usize = 1_048_576;
255
256    /// Create a new executor, auto-detecting available runtimes.
257    pub fn new() -> Self {
258        Self::with_config(
259            Duration::from_secs(Self::DEFAULT_TIMEOUT_SECS),
260            Self::DEFAULT_MAX_OUTPUT_BYTES,
261        )
262    }
263
264    /// Create with custom timeout and max output size.
265    pub fn with_config(timeout: Duration, max_output_bytes: usize) -> Self {
266        let runtimes = detect_runtimes();
267        Self {
268            timeout,
269            max_output_bytes,
270            runtimes,
271        }
272    }
273
274    /// Execute code in the given language runtime.
275    ///
276    /// Only stdout is captured and returned. Stderr is discarded.
277    /// Credentials for gh, aws, gcloud, kubectl, docker are passed through
278    /// via environment variable inheritance.
279    pub fn execute(&self, code: &str, language: &str) -> Result<SandboxResult> {
280        let lang = language.to_lowercase();
281        let runtime = self
282            .runtimes
283            .get(&lang)
284            .ok_or_else(|| SqzError::Other(format!("unsupported or unavailable runtime: {lang}")))?;
285
286        let env = build_credential_env();
287
288        let result = match lang.as_str() {
289            "go" => self.execute_go(code, runtime, &env),
290            "rust" => self.execute_rust(code, runtime, &env),
291            _ => self.execute_interpreted(code, runtime, &env),
292        }?;
293
294        Ok(result)
295    }
296
297    /// Execute code and, when stdout exceeds 5 KB and `intent` is provided,
298    /// index the full output into an in-memory FTS5 table and return only
299    /// BM25-matched sections plus a vocabulary of searchable terms.
300    ///
301    /// When the output is small or no intent is given, behaves identically
302    /// to [`execute`] (returns full stdout, `filtered` is `None`).
303    pub fn execute_with_intent(
304        &self,
305        code: &str,
306        language: &str,
307        intent: Option<&str>,
308    ) -> Result<(SandboxResult, Option<FilteredOutput>)> {
309        let mut result = self.execute(code, language)?;
310
311        let should_filter = result.stdout.len() > OUTPUT_FILTER_THRESHOLD
312            && intent.map_or(false, |i| !i.trim().is_empty());
313
314        if should_filter {
315            let intent_str = intent.unwrap(); // safe: checked above
316            let filtered = OutputFilter::filter(&result.stdout, intent_str)?;
317            result.was_indexed = true;
318            // Replace stdout with only the matched sections so the LLM
319            // context window receives the filtered view.
320            result.stdout = filtered.matched_sections.join("\n\n");
321            Ok((result, Some(filtered)))
322        } else {
323            Ok((result, None))
324        }
325    }
326
327    /// Languages that this executor currently supports (only those detected on this system).
328    pub fn available_languages(&self) -> Vec<&str> {
329        self.runtimes.values().map(|r| r.language).collect()
330    }
331
332    /// All languages the executor can potentially support.
333    pub fn supported_languages(&self) -> &[&str] {
334        &["js", "ts", "python", "shell", "ruby", "go", "rust"]
335    }
336
337    /// Check whether a specific language runtime is available.
338    pub fn is_available(&self, language: &str) -> bool {
339        self.runtimes.contains_key(&language.to_lowercase())
340    }
341
342    /// Current timeout setting.
343    pub fn timeout(&self) -> Duration {
344        self.timeout
345    }
346
347    /// Current max output size in bytes.
348    pub fn max_output_bytes(&self) -> usize {
349        self.max_output_bytes
350    }
351
352    // ── Private helpers ───────────────────────────────────────────────────
353
354    /// Execute an interpreted language (JS, TS, Python, Shell, Ruby) by
355    /// writing code to a temp file and invoking the runtime binary.
356    fn execute_interpreted(
357        &self,
358        code: &str,
359        runtime: &RuntimeInfo,
360        env: &HashMap<String, String>,
361    ) -> Result<SandboxResult> {
362        let ext = match runtime.language {
363            "js" => "js",
364            "ts" => "ts",
365            "python" => "py",
366            "shell" => "sh",
367            "ruby" => "rb",
368            _ => "tmp",
369        };
370
371        let tmp_dir = tempfile::tempdir().map_err(|e| SqzError::Io(e))?;
372        let script_path = tmp_dir.path().join(format!("sandbox_script.{ext}"));
373        {
374            let mut f = std::fs::File::create(&script_path)?;
375            f.write_all(code.as_bytes())?;
376        }
377
378        let mut cmd = Command::new(&runtime.binary);
379
380        // Special case: TypeScript via npx needs `tsx` as the first argument
381        if runtime.language == "ts" && runtime.name == "npx" {
382            cmd.arg("tsx");
383        }
384
385        cmd.arg(&script_path)
386            .stdout(Stdio::piped())
387            .stderr(Stdio::null()) // stderr never enters context
388            .envs(env);
389
390        self.run_with_timeout(cmd)
391    }
392
393    /// Execute Go code: write to temp file, run with `go run`.
394    fn execute_go(
395        &self,
396        code: &str,
397        runtime: &RuntimeInfo,
398        env: &HashMap<String, String>,
399    ) -> Result<SandboxResult> {
400        let tmp_dir = tempfile::tempdir()?;
401        let script_path = tmp_dir.path().join("main.go");
402        {
403            let mut f = std::fs::File::create(&script_path)?;
404            f.write_all(code.as_bytes())?;
405        }
406
407        let mut cmd = Command::new(&runtime.binary);
408        cmd.arg("run")
409            .arg(&script_path)
410            .stdout(Stdio::piped())
411            .stderr(Stdio::null())
412            .envs(env);
413
414        self.run_with_timeout(cmd)
415    }
416
417    /// Execute Rust code: write to temp file, compile with rustc, then run.
418    fn execute_rust(
419        &self,
420        code: &str,
421        runtime: &RuntimeInfo,
422        env: &HashMap<String, String>,
423    ) -> Result<SandboxResult> {
424        let tmp_dir = tempfile::tempdir()?;
425        let src_path = tmp_dir.path().join("sandbox.rs");
426        let bin_path = tmp_dir.path().join("sandbox_bin");
427        {
428            let mut f = std::fs::File::create(&src_path)?;
429            f.write_all(code.as_bytes())?;
430        }
431
432        // Compile
433        let compile = Command::new(&runtime.binary)
434            .arg(&src_path)
435            .arg("-o")
436            .arg(&bin_path)
437            .stdout(Stdio::null())
438            .stderr(Stdio::null())
439            .envs(env)
440            .status();
441
442        match compile {
443            Ok(status) if status.success() => {}
444            Ok(status) => {
445                return Ok(SandboxResult {
446                    stdout: String::new(),
447                    status_code: status.code().unwrap_or(1),
448                    was_truncated: false,
449                    was_indexed: false,
450                });
451            }
452            Err(e) => return Err(SqzError::Io(e)),
453        }
454
455        // Run the compiled binary
456        let mut cmd = Command::new(&bin_path);
457        cmd.stdout(Stdio::piped())
458            .stderr(Stdio::null())
459            .envs(env);
460
461        self.run_with_timeout(cmd)
462    }
463
464    /// Spawn the command, enforce timeout, capture stdout, and truncate if needed.
465    fn run_with_timeout(&self, mut cmd: Command) -> Result<SandboxResult> {
466        let mut child = cmd.spawn().map_err(SqzError::Io)?;
467
468        // Wait with timeout
469        let status = match wait_with_timeout(&mut child, self.timeout) {
470            Ok(status) => status,
471            Err(_) => {
472                // Timeout — kill the process
473                let _ = child.kill();
474                let _ = child.wait();
475                return Err(SqzError::Other(format!(
476                    "sandbox execution timed out after {}s",
477                    self.timeout.as_secs()
478                )));
479            }
480        };
481
482        // Read stdout
483        let stdout_raw = if let Some(mut stdout) = child.stdout.take() {
484            use std::io::Read;
485            let mut buf = Vec::new();
486            let _ = stdout.read_to_end(&mut buf);
487            buf
488        } else {
489            Vec::new()
490        };
491
492        // Truncate if needed
493        let truncated = stdout_raw.len() > self.max_output_bytes;
494        let stdout_bytes = if truncated {
495            &stdout_raw[..self.max_output_bytes]
496        } else {
497            &stdout_raw[..]
498        };
499
500        let stdout = String::from_utf8_lossy(stdout_bytes).into_owned();
501
502        Ok(SandboxResult {
503            stdout,
504            status_code: status.code().unwrap_or(-1),
505            was_truncated: truncated,
506            was_indexed: false,
507        })
508    }
509}
510
511// ── Free functions ────────────────────────────────────────────────────────────
512
513/// Wait for a child process with a timeout. Returns the exit status on success,
514/// or an error if the timeout is exceeded.
515fn wait_with_timeout(
516    child: &mut std::process::Child,
517    timeout: Duration,
518) -> std::result::Result<std::process::ExitStatus, ()> {
519    let start = std::time::Instant::now();
520    let poll_interval = Duration::from_millis(50);
521
522    loop {
523        match child.try_wait() {
524            Ok(Some(status)) => return Ok(status),
525            Ok(None) => {
526                if start.elapsed() >= timeout {
527                    return Err(());
528                }
529                std::thread::sleep(poll_interval);
530            }
531            Err(_) => return Err(()),
532        }
533    }
534}
535
536/// Build an environment map containing only credential-related variables
537/// from the current process environment.
538fn build_credential_env() -> HashMap<String, String> {
539    let mut env = HashMap::new();
540    for (key, value) in std::env::vars() {
541        if CREDENTIAL_ENV_PREFIXES
542            .iter()
543            .any(|prefix| key.starts_with(prefix))
544        {
545            env.insert(key, value);
546        }
547    }
548    env
549}
550
551/// Probe the system for available runtimes.
552fn detect_runtimes() -> HashMap<String, RuntimeInfo> {
553    let mut runtimes = HashMap::new();
554
555    let candidates: &[(&str, &[&str], &str)] = &[
556        // (language key, [binary candidates], language label)
557        ("js", &["node", "bun"], "js"),
558        ("ts", &["bun", "npx"], "ts"),
559        ("python", &["python3", "python"], "python"),
560        ("shell", &["bash", "sh"], "shell"),
561        ("ruby", &["ruby"], "ruby"),
562        ("go", &["go"], "go"),
563        ("rust", &["rustc"], "rust"),
564    ];
565
566    for &(lang_key, binaries, lang_label) in candidates {
567        for &bin in binaries {
568            if is_binary_available(bin) {
569                // For ts via npx, we use `npx tsx` as the actual command
570                let effective_binary = if lang_key == "ts" && bin == "npx" {
571                    "npx".to_string()
572                } else {
573                    bin.to_string()
574                };
575
576                runtimes.insert(
577                    lang_key.to_string(),
578                    RuntimeInfo {
579                        name: bin,
580                        binary: effective_binary,
581                        language: lang_label,
582                    },
583                );
584                break; // use first available binary
585            }
586        }
587    }
588
589    runtimes
590}
591
592/// Check if a binary is available on PATH.
593fn is_binary_available(name: &str) -> bool {
594    Command::new("which")
595        .arg(name)
596        .stdout(Stdio::null())
597        .stderr(Stdio::null())
598        .status()
599        .map(|s| s.success())
600        .unwrap_or(false)
601}
602
603// ── Tests ─────────────────────────────────────────────────────────────────────
604
605#[cfg(test)]
606mod tests {
607    use super::*;
608
609    #[test]
610    fn test_new_detects_runtimes() {
611        let executor = SandboxExecutor::new();
612        // At minimum, shell (bash/sh) should be available on any Unix system
613        assert!(
614            !executor.runtimes.is_empty(),
615            "should detect at least one runtime"
616        );
617    }
618
619    #[test]
620    fn test_supported_languages_list() {
621        let executor = SandboxExecutor::new();
622        let supported = executor.supported_languages();
623        assert!(supported.len() >= 6, "should list at least 6 supported languages");
624        assert!(supported.contains(&"js"));
625        assert!(supported.contains(&"python"));
626        assert!(supported.contains(&"shell"));
627        assert!(supported.contains(&"ruby"));
628        assert!(supported.contains(&"go"));
629        assert!(supported.contains(&"rust"));
630    }
631
632    #[test]
633    fn test_default_config() {
634        let executor = SandboxExecutor::new();
635        assert_eq!(executor.timeout(), Duration::from_secs(30));
636        assert_eq!(executor.max_output_bytes(), 1_048_576);
637    }
638
639    #[test]
640    fn test_custom_config() {
641        let executor = SandboxExecutor::with_config(Duration::from_secs(10), 4096);
642        assert_eq!(executor.timeout(), Duration::from_secs(10));
643        assert_eq!(executor.max_output_bytes(), 4096);
644    }
645
646    #[test]
647    #[cfg(not(windows))]
648    fn test_execute_shell_echo() {
649        let executor = SandboxExecutor::new();
650        if !executor.is_available("shell") {
651            return; // skip if no shell
652        }
653        let result = executor.execute("echo hello sandbox", "shell").unwrap();
654        assert_eq!(result.status_code, 0);
655        assert_eq!(result.stdout.trim(), "hello sandbox");
656        assert!(!result.was_truncated);
657    }
658
659    #[test]
660    #[cfg(not(windows))]
661    fn test_execute_shell_captures_only_stdout() {
662        let executor = SandboxExecutor::new();
663        if !executor.is_available("shell") {
664            return;
665        }
666        // Write to both stdout and stderr — only stdout should appear
667        let code = r#"echo "visible"
668echo "hidden" >&2
669echo "also visible""#;
670        let result = executor.execute(code, "shell").unwrap();
671        assert!(result.stdout.contains("visible"));
672        assert!(result.stdout.contains("also visible"));
673        assert!(!result.stdout.contains("hidden"));
674    }
675
676    #[test]
677    fn test_execute_python() {
678        let executor = SandboxExecutor::new();
679        if !executor.is_available("python") {
680            return;
681        }
682        let result = executor.execute("print('hello from python')", "python").unwrap();
683        assert_eq!(result.status_code, 0);
684        assert_eq!(result.stdout.trim(), "hello from python");
685    }
686
687    #[test]
688    #[cfg(not(windows))]
689    fn test_execute_nonzero_exit() {
690        let executor = SandboxExecutor::new();
691        if !executor.is_available("shell") {
692            return;
693        }
694        let result = executor.execute("exit 42", "shell").unwrap();
695        assert_eq!(result.status_code, 42);
696    }
697
698    #[test]
699    #[cfg(not(windows))]
700    fn test_execute_timeout() {
701        let executor = SandboxExecutor::with_config(Duration::from_secs(1), 1024);
702        if !executor.is_available("shell") {
703            return;
704        }
705        let result = executor.execute("sleep 30", "shell");
706        assert!(result.is_err());
707        let err_msg = format!("{}", result.unwrap_err());
708        assert!(err_msg.contains("timed out"));
709    }
710
711    #[test]
712    fn test_execute_output_truncation() {
713        let executor = SandboxExecutor::with_config(Duration::from_secs(10), 32);
714        if !executor.is_available("shell") {
715            return;
716        }
717        // Generate output larger than 32 bytes
718        let result = executor
719            .execute("for i in $(seq 1 100); do echo \"line $i\"; done", "shell")
720            .unwrap();
721        assert!(result.was_truncated);
722        assert!(result.stdout.len() <= 32);
723    }
724
725    #[test]
726    fn test_unsupported_runtime() {
727        let executor = SandboxExecutor::new();
728        let result = executor.execute("code", "brainfuck");
729        assert!(result.is_err());
730        let err_msg = format!("{}", result.unwrap_err());
731        assert!(err_msg.contains("unsupported or unavailable runtime"));
732    }
733
734    #[test]
735    fn test_case_insensitive_language() {
736        let executor = SandboxExecutor::new();
737        if !executor.is_available("shell") {
738            return;
739        }
740        let result = executor.execute("echo ok", "Shell");
741        assert!(result.is_ok());
742    }
743
744    #[test]
745    #[cfg(not(windows))]
746    fn test_credential_env_includes_path() {
747        let env = build_credential_env();
748        assert!(env.contains_key("PATH"), "PATH should be inherited");
749    }
750
751    #[test]
752    fn test_credential_env_includes_aws() {
753        // Temporarily set an AWS var to verify it's picked up
754        std::env::set_var("AWS_TEST_SANDBOX", "test_value");
755        let env = build_credential_env();
756        assert_eq!(env.get("AWS_TEST_SANDBOX").map(|s| s.as_str()), Some("test_value"));
757        std::env::remove_var("AWS_TEST_SANDBOX");
758    }
759
760    #[test]
761    fn test_is_binary_available() {
762        // `sh` should always be available on Unix
763        assert!(is_binary_available("sh"));
764        assert!(!is_binary_available("definitely_not_a_real_binary_xyz"));
765    }
766
767    // ── OutputFilter unit tests ───────────────────────────────────────────
768
769    #[test]
770    fn test_chunk_output_splits_on_double_newline() {
771        let text = "first paragraph\n\nsecond paragraph\n\nthird paragraph";
772        let chunks = OutputFilter::chunk_output(text);
773        assert_eq!(chunks.len(), 3);
774        assert_eq!(chunks[0], "first paragraph");
775        assert_eq!(chunks[1], "second paragraph");
776        assert_eq!(chunks[2], "third paragraph");
777    }
778
779    #[test]
780    fn test_chunk_output_splits_large_paragraphs() {
781        // Build a single paragraph > 512 bytes with many lines
782        let line = "a]".repeat(30); // 60 chars per line
783        let big_para = (0..20).map(|i| format!("{line} line{i}")).collect::<Vec<_>>().join("\n");
784        assert!(big_para.len() > 512);
785
786        let chunks = OutputFilter::chunk_output(&big_para);
787        assert!(chunks.len() > 1, "large paragraph should be sub-split");
788        for chunk in &chunks {
789            assert!(chunk.len() <= 600, "each sub-chunk should be roughly ≤512 bytes");
790        }
791    }
792
793    #[test]
794    fn test_chunk_output_empty_input() {
795        let chunks = OutputFilter::chunk_output("");
796        assert!(chunks.is_empty());
797    }
798
799    #[test]
800    fn test_filter_returns_matching_sections() {
801        let text = "error: compilation failed at line 42\n\n\
802                    warning: unused variable `x`\n\n\
803                    info: build started at 10:00\n\n\
804                    error: type mismatch in function foo\n\n\
805                    success: 3 tests passed";
806        let result = OutputFilter::filter(text, "error compilation").unwrap();
807        assert!(!result.matched_sections.is_empty(), "should find error-related chunks");
808        // At least one matched section should contain "error"
809        assert!(
810            result.matched_sections.iter().any(|s| s.contains("error")),
811            "matched sections should contain the intent keyword"
812        );
813        assert!(result.total_chunks >= 4);
814    }
815
816    #[test]
817    fn test_filter_returns_vocabulary() {
818        let text = "the quick brown fox jumps over the lazy dog\n\n\
819                    rust programming language is fast and safe\n\n\
820                    memory safety without garbage collection";
821        let result = OutputFilter::filter(text, "rust").unwrap();
822        assert!(!result.vocabulary.is_empty(), "vocabulary should not be empty");
823        // Vocabulary should contain stemmed terms from the content
824        // (porter stemmer may stem words, so check for presence of some terms)
825        let vocab_joined = result.vocabulary.join(" ");
826        assert!(
827            vocab_joined.contains("rust") || vocab_joined.contains("fast") || vocab_joined.contains("safe"),
828            "vocabulary should contain terms from the indexed content"
829        );
830    }
831
832    #[test]
833    fn test_filter_no_match_returns_empty() {
834        let text = "hello world\n\nfoo bar baz";
835        let result = OutputFilter::filter(text, "zzzznonexistent").unwrap();
836        assert!(result.matched_sections.is_empty());
837        assert_eq!(result.matched_chunks, 0);
838    }
839
840    #[test]
841    fn test_filter_special_chars_in_intent() {
842        // Intent with special characters should not crash FTS5
843        let text = "error: something went wrong\n\nwarning: check this";
844        let result = OutputFilter::filter(text, "error: (something) [wrong]");
845        assert!(result.is_ok(), "special chars in intent should be sanitized");
846    }
847
848    #[test]
849    #[cfg(not(windows))]
850    fn test_execute_with_intent_small_output_no_filter() {
851        let executor = SandboxExecutor::new();
852        if !executor.is_available("shell") {
853            return;
854        }
855        // Small output (< 5KB) should not trigger filtering
856        let (result, filtered) = executor
857            .execute_with_intent("echo hello", "shell", Some("hello"))
858            .unwrap();
859        assert_eq!(result.status_code, 0);
860        assert!(!result.was_indexed);
861        assert!(filtered.is_none());
862    }
863
864    #[test]
865    fn test_execute_with_intent_no_intent_no_filter() {
866        let executor = SandboxExecutor::new();
867        if !executor.is_available("shell") {
868            return;
869        }
870        // Even large output without intent should not filter
871        let code = "for i in $(seq 1 1000); do echo \"line $i: some padding text to make it bigger\"; done";
872        let (result, filtered) = executor
873            .execute_with_intent(code, "shell", None)
874            .unwrap();
875        assert!(!result.was_indexed);
876        assert!(filtered.is_none());
877    }
878
879    #[test]
880    #[cfg(not(windows))]
881    fn test_execute_with_intent_large_output_filters() {
882        let executor = SandboxExecutor::new();
883        if !executor.is_available("shell") {
884            return;
885        }
886        // Generate > 5KB of output with identifiable sections
887        let code = r#"
888for i in $(seq 1 50); do echo "error: compilation failed at module $i"; done
889echo ""
890for i in $(seq 1 50); do echo "info: processing file $i of 200"; done
891echo ""
892for i in $(seq 1 50); do echo "warning: deprecated API usage in handler $i"; done
893echo ""
894for i in $(seq 1 50); do echo "success: test suite $i passed with 100% coverage"; done
895"#;
896        let (result, filtered) = executor
897            .execute_with_intent(code, "shell", Some("error compilation"))
898            .unwrap();
899        assert!(result.was_indexed, "large output with intent should be indexed");
900        let filtered = filtered.expect("should have filtered output");
901        assert!(!filtered.matched_sections.is_empty(), "should have matched sections");
902        assert!(!filtered.vocabulary.is_empty(), "should have vocabulary");
903        assert!(filtered.total_chunks > 0);
904    }
905
906    // ── Property-based tests (Unix only — depend on bash/shell) ──────────
907
908    #[cfg(not(windows))]
909    mod proptests {
910        use super::*;
911        use proptest::prelude::*;
912
913        /// Generate a random alphanumeric label safe for use in shell echo commands.
914        fn safe_label() -> impl Strategy<Value = String> {
915            "[a-zA-Z0-9]{1,20}"
916        }
917
918        // **Validates: Requirements 30.1, 30.2**
919        //
920        // Property 35: Sandbox execution isolation — stdout only
921        //
922        // For any code execution that writes to both stdout and stderr,
923        // only stdout content appears in the returned SandboxResult.
924        // We use distinct prefixes to guarantee stdout and stderr
925        // messages are distinguishable.
926        proptest! {
927            #[test]
928            fn prop_only_stdout_captured(
929                label in safe_label(),
930            ) {
931                let executor = SandboxExecutor::new();
932                if !executor.is_available("shell") {
933                    return Ok(());
934                }
935
936                let stdout_msg = format!("OUT_{label}");
937                let stderr_msg = format!("ERR_{label}");
938
939                // Script writes distinct messages to stdout and stderr
940                let code = format!(
941                    "echo \"{stdout_msg}\"\necho \"{stderr_msg}\" >&2"
942                );
943                let result = executor.execute(&code, "shell").unwrap();
944
945                // stdout content must be present
946                prop_assert!(
947                    result.stdout.contains(&stdout_msg),
948                    "stdout should contain the stdout message '{}', got: '{}'",
949                    stdout_msg, result.stdout
950                );
951                // stderr content must never appear
952                prop_assert!(
953                    !result.stdout.contains(&stderr_msg),
954                    "stdout should NOT contain the stderr message '{}', got: '{}'",
955                    stderr_msg, result.stdout
956                );
957            }
958        }
959
960        // **Validates: Requirements 30.1, 30.2**
961        //
962        // Property 35: Sandbox execution isolation — subprocess isolation
963        //
964        // Each execution runs in an isolated subprocess with no shared
965        // state. Setting an env var in one execution must not be visible
966        // in a subsequent execution.
967        proptest! {
968            #[test]
969            fn prop_no_shared_state_between_executions(
970                var_name in "[A-Z]{3,8}",
971                var_value in "[a-z0-9]{1,10}",
972            ) {
973                let executor = SandboxExecutor::new();
974                if !executor.is_available("shell") {
975                    return Ok(());
976                }
977
978                let unique_var = format!("SQZ_PROP_{var_name}");
979
980                // First execution: export an env var
981                let code1 = format!(
982                    "export {unique_var}={var_value}\necho \"set {unique_var}\""
983                );
984                let result1 = executor.execute(&code1, "shell").unwrap();
985                prop_assert!(
986                    result1.stdout.contains(&format!("set {unique_var}")),
987                    "first execution should succeed"
988                );
989
990                // Second execution: try to read that env var — it should be empty
991                let code2 = format!(
992                    "echo \"val=${{{unique_var}:-UNSET}}\""
993                );
994                let result2 = executor.execute(&code2, "shell").unwrap();
995                prop_assert!(
996                    result2.stdout.contains("val=UNSET"),
997                    "env var from first execution should not leak into second; got: '{}'",
998                    result2.stdout
999                );
1000            }
1001        }
1002    }
1003}