1use std::collections::HashMap;
2use std::io::Write;
3use std::process::{Command, Stdio};
4use std::time::Duration;
5
6use rusqlite::{params, Connection};
7
8use crate::error::{Result, SqzError};
9
10const CREDENTIAL_ENV_PREFIXES: &[&str] = &[
15 "AWS_",
17 "GCLOUD_",
19 "GOOGLE_",
20 "CLOUDSDK_",
21 "GH_",
23 "GITHUB_",
24 "KUBECONFIG",
26 "DOCKER_",
28 "HOME",
30 "PATH",
31 "USER",
32 "LANG",
33 "TERM",
34 "SHELL",
35 "TMPDIR",
36 "XDG_",
37];
38
39#[derive(Debug, Clone)]
41pub struct RuntimeInfo {
42 pub name: &'static str,
43 pub binary: String,
44 pub language: &'static str,
45}
46
47#[derive(Debug, Clone)]
49pub struct SandboxResult {
50 pub stdout: String,
52 pub status_code: i32,
54 pub was_truncated: bool,
56 pub was_indexed: bool,
58}
59
60const DEFAULT_FILTER_THRESHOLD: usize = 5 * 1024; #[derive(Debug, Clone)]
65pub struct FilteredOutput {
66 pub matched_sections: Vec<String>,
68 pub vocabulary: Vec<String>,
70 pub total_chunks: usize,
72 pub matched_chunks: usize,
74}
75
76pub struct SandboxExecutor {
81 timeout: Duration,
82 max_output_bytes: usize,
83 filter_threshold: usize,
84 runtimes: HashMap<String, RuntimeInfo>,
85}
86
87pub(crate) struct OutputFilter;
92
93impl OutputFilter {
94 pub fn filter(text: &str, intent: &str) -> Result<FilteredOutput> {
98 let chunks = Self::chunk_output(text);
99 let total_chunks = chunks.len();
100
101 let conn = Connection::open_in_memory()
102 .map_err(|e| SqzError::Other(format!("FTS5 in-memory open failed: {e}")))?;
103
104 conn.execute_batch(
105 r#"
106 CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_fts USING fts5(
107 chunk_id,
108 body,
109 tokenize='porter ascii'
110 );
111 "#,
112 )
113 .map_err(|e| SqzError::Other(format!("FTS5 schema creation failed: {e}")))?;
114
115 for (i, chunk) in chunks.iter().enumerate() {
117 conn.execute(
118 "INSERT INTO sandbox_fts(chunk_id, body) VALUES (?1, ?2)",
119 params![i.to_string(), chunk],
120 )
121 .map_err(|e| SqzError::Other(format!("FTS5 insert failed: {e}")))?;
122 }
123
124 let matched_sections = Self::bm25_search(&conn, intent, &chunks)?;
126 let matched_chunks = matched_sections.len();
127
128 let vocabulary = Self::extract_vocabulary(&conn)?;
130
131 Ok(FilteredOutput {
132 matched_sections,
133 vocabulary,
134 total_chunks,
135 matched_chunks,
136 })
137 }
138
139 fn chunk_output(text: &str) -> Vec<String> {
142 const MAX_CHUNK_BYTES: usize = 512;
143
144 let paragraphs: Vec<&str> = text.split("\n\n").collect();
145 let mut chunks = Vec::new();
146
147 for para in paragraphs {
148 let trimmed = para.trim();
149 if trimmed.is_empty() {
150 continue;
151 }
152 if trimmed.len() <= MAX_CHUNK_BYTES {
153 chunks.push(trimmed.to_string());
154 } else {
155 let mut current = String::new();
157 for line in trimmed.lines() {
158 if !current.is_empty() && current.len() + line.len() + 1 > MAX_CHUNK_BYTES {
159 chunks.push(std::mem::take(&mut current));
160 }
161 if !current.is_empty() {
162 current.push('\n');
163 }
164 current.push_str(line);
165 }
166 if !current.is_empty() {
167 chunks.push(current);
168 }
169 }
170 }
171
172 if chunks.is_empty() && !text.trim().is_empty() {
174 chunks.push(text.trim().to_string());
175 }
176
177 chunks
178 }
179
180 fn bm25_search(conn: &Connection, intent: &str, _chunks: &[String]) -> Result<Vec<String>> {
183 let sanitized: String = intent
185 .chars()
186 .map(|c| if c.is_alphanumeric() || c.is_whitespace() { c } else { ' ' })
187 .collect();
188 let terms: Vec<&str> = sanitized.split_whitespace().collect();
189 if terms.is_empty() {
190 return Ok(Vec::new());
191 }
192
193 let fts_query = terms.join(" OR ");
195
196 let mut stmt = conn
197 .prepare(
198 r#"SELECT body FROM sandbox_fts
199 WHERE sandbox_fts MATCH ?1
200 ORDER BY rank
201 LIMIT 20"#,
202 )
203 .map_err(|e| SqzError::Other(format!("FTS5 query prepare failed: {e}")))?;
204
205 let rows = stmt
206 .query_map(params![fts_query], |row| row.get::<_, String>(0))
207 .map_err(|e| SqzError::Other(format!("FTS5 query failed: {e}")))?;
208
209 let mut results = Vec::new();
210 for row in rows {
211 results.push(
212 row.map_err(|e| SqzError::Other(format!("FTS5 row read failed: {e}")))?,
213 );
214 }
215 Ok(results)
216 }
217
218 fn extract_vocabulary(conn: &Connection) -> Result<Vec<String>> {
221 conn.execute_batch(
224 "CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_vocab USING fts5vocab(sandbox_fts, col);",
225 )
226 .map_err(|e| SqzError::Other(format!("FTS5 vocab table creation failed: {e}")))?;
227
228 let mut stmt = conn
229 .prepare(
230 r#"SELECT term FROM sandbox_vocab
231 WHERE col = 'body'
232 ORDER BY doc DESC
233 LIMIT 100"#,
234 )
235 .map_err(|e| SqzError::Other(format!("vocab query prepare failed: {e}")))?;
236
237 let rows = stmt
238 .query_map([], |row| row.get::<_, String>(0))
239 .map_err(|e| SqzError::Other(format!("vocab query failed: {e}")))?;
240
241 let mut vocab = Vec::new();
242 for row in rows {
243 vocab.push(
244 row.map_err(|e| SqzError::Other(format!("vocab row read failed: {e}")))?,
245 );
246 }
247 Ok(vocab)
248 }
249}
250
251impl SandboxExecutor {
252 pub const DEFAULT_TIMEOUT_SECS: u64 = 30;
254 pub const DEFAULT_MAX_OUTPUT_BYTES: usize = 1_048_576;
256
257 pub fn new() -> Self {
259 Self::with_config(
260 Duration::from_secs(Self::DEFAULT_TIMEOUT_SECS),
261 Self::DEFAULT_MAX_OUTPUT_BYTES,
262 )
263 }
264
265 pub fn with_config(timeout: Duration, max_output_bytes: usize) -> Self {
267 let runtimes = detect_runtimes();
268 Self {
269 timeout,
270 max_output_bytes,
271 filter_threshold: DEFAULT_FILTER_THRESHOLD,
272 runtimes,
273 }
274 }
275
276 pub fn execute(&self, code: &str, language: &str) -> Result<SandboxResult> {
282 let lang = language.to_lowercase();
283 let runtime = self
284 .runtimes
285 .get(&lang)
286 .ok_or_else(|| SqzError::Other(format!("unsupported or unavailable runtime: {lang}")))?;
287
288 let env = build_credential_env();
289
290 let result = match lang.as_str() {
291 "go" => self.execute_go(code, runtime, &env),
292 "rust" => self.execute_rust(code, runtime, &env),
293 _ => self.execute_interpreted(code, runtime, &env),
294 }?;
295
296 Ok(result)
297 }
298
299 pub fn execute_with_intent(
306 &self,
307 code: &str,
308 language: &str,
309 intent: Option<&str>,
310 ) -> Result<(SandboxResult, Option<FilteredOutput>)> {
311 let mut result = self.execute(code, language)?;
312
313 let should_filter = result.stdout.len() > self.filter_threshold
314 && intent.map_or(false, |i| !i.trim().is_empty());
315
316 if should_filter {
317 let intent_str = intent.unwrap(); let filtered = OutputFilter::filter(&result.stdout, intent_str)?;
319 result.was_indexed = true;
320 result.stdout = filtered.matched_sections.join("\n\n");
323 Ok((result, Some(filtered)))
324 } else {
325 Ok((result, None))
326 }
327 }
328
329 pub fn available_languages(&self) -> Vec<&str> {
331 self.runtimes.values().map(|r| r.language).collect()
332 }
333
334 pub fn supported_languages(&self) -> &[&str] {
336 &["js", "ts", "python", "shell", "ruby", "go", "rust"]
337 }
338
339 pub fn is_available(&self, language: &str) -> bool {
341 self.runtimes.contains_key(&language.to_lowercase())
342 }
343
344 pub fn timeout(&self) -> Duration {
346 self.timeout
347 }
348
349 pub fn max_output_bytes(&self) -> usize {
351 self.max_output_bytes
352 }
353
354 fn execute_interpreted(
359 &self,
360 code: &str,
361 runtime: &RuntimeInfo,
362 env: &HashMap<String, String>,
363 ) -> Result<SandboxResult> {
364 let ext = match runtime.language {
365 "js" => "js",
366 "ts" => "ts",
367 "python" => "py",
368 "shell" => "sh",
369 "ruby" => "rb",
370 _ => "tmp",
371 };
372
373 let tmp_dir = tempfile::tempdir().map_err(|e| SqzError::Io(e))?;
374 let script_path = tmp_dir.path().join(format!("sandbox_script.{ext}"));
375 {
376 let mut f = std::fs::File::create(&script_path)?;
377 f.write_all(code.as_bytes())?;
378 }
379
380 let mut cmd = Command::new(&runtime.binary);
381
382 if runtime.language == "ts" && runtime.name == "npx" {
384 cmd.arg("tsx");
385 }
386
387 cmd.arg(&script_path)
388 .stdout(Stdio::piped())
389 .stderr(Stdio::null()) .envs(env);
391
392 self.run_with_timeout(cmd, &format!("runtime={}", runtime.language))
393 }
394
395 fn execute_go(
397 &self,
398 code: &str,
399 runtime: &RuntimeInfo,
400 env: &HashMap<String, String>,
401 ) -> Result<SandboxResult> {
402 let tmp_dir = tempfile::tempdir()?;
403 let script_path = tmp_dir.path().join("main.go");
404 {
405 let mut f = std::fs::File::create(&script_path)?;
406 f.write_all(code.as_bytes())?;
407 }
408
409 let mut cmd = Command::new(&runtime.binary);
410 cmd.arg("run")
411 .arg(&script_path)
412 .stdout(Stdio::piped())
413 .stderr(Stdio::null())
414 .envs(env);
415
416 self.run_with_timeout(cmd, "runtime=go")
417 }
418
419 fn execute_rust(
421 &self,
422 code: &str,
423 runtime: &RuntimeInfo,
424 env: &HashMap<String, String>,
425 ) -> Result<SandboxResult> {
426 let tmp_dir = tempfile::tempdir()?;
427 let src_path = tmp_dir.path().join("sandbox.rs");
428 let bin_path = tmp_dir.path().join("sandbox_bin");
429 {
430 let mut f = std::fs::File::create(&src_path)?;
431 f.write_all(code.as_bytes())?;
432 }
433
434 let compile = Command::new(&runtime.binary)
436 .arg(&src_path)
437 .arg("-o")
438 .arg(&bin_path)
439 .stdout(Stdio::null())
440 .stderr(Stdio::null())
441 .envs(env)
442 .status();
443
444 match compile {
445 Ok(status) if status.success() => {}
446 Ok(status) => {
447 return Ok(SandboxResult {
448 stdout: String::new(),
449 status_code: status.code().unwrap_or(1),
450 was_truncated: false,
451 was_indexed: false,
452 });
453 }
454 Err(e) => return Err(SqzError::Io(e)),
455 }
456
457 let mut cmd = Command::new(&bin_path);
459 cmd.stdout(Stdio::piped())
460 .stderr(Stdio::null())
461 .envs(env);
462
463 self.run_with_timeout(cmd, "runtime=rust")
464 }
465
466 fn run_with_timeout(&self, mut cmd: Command, context: &str) -> Result<SandboxResult> {
468 let mut child = cmd.spawn().map_err(SqzError::Io)?;
469
470 let status = match wait_with_timeout(&mut child, self.timeout) {
472 Ok(status) => status,
473 Err(_) => {
474 let _ = child.kill();
476 let _ = child.wait();
477 return Err(SqzError::Other(format!(
478 "sandbox execution timed out after {}s ({})",
479 self.timeout.as_secs(),
480 context
481 )));
482 }
483 };
484
485 let stdout_raw = if let Some(mut stdout) = child.stdout.take() {
487 use std::io::Read;
488 let mut buf = Vec::new();
489 let _ = stdout.read_to_end(&mut buf);
490 buf
491 } else {
492 Vec::new()
493 };
494
495 let truncated = stdout_raw.len() > self.max_output_bytes;
497 let stdout_bytes = if truncated {
498 &stdout_raw[..self.max_output_bytes]
499 } else {
500 &stdout_raw[..]
501 };
502
503 let stdout = String::from_utf8_lossy(stdout_bytes).into_owned();
504
505 Ok(SandboxResult {
506 stdout,
507 status_code: status.code().unwrap_or(-1),
508 was_truncated: truncated,
509 was_indexed: false,
510 })
511 }
512}
513
514fn wait_with_timeout(
519 child: &mut std::process::Child,
520 timeout: Duration,
521) -> std::result::Result<std::process::ExitStatus, ()> {
522 let start = std::time::Instant::now();
523 let poll_interval = Duration::from_millis(50);
524
525 loop {
526 match child.try_wait() {
527 Ok(Some(status)) => return Ok(status),
528 Ok(None) => {
529 if start.elapsed() >= timeout {
530 return Err(());
531 }
532 std::thread::sleep(poll_interval);
533 }
534 Err(_) => return Err(()),
535 }
536 }
537}
538
539fn build_credential_env() -> HashMap<String, String> {
542 let mut env = HashMap::new();
543 for (key, value) in std::env::vars() {
544 if CREDENTIAL_ENV_PREFIXES
545 .iter()
546 .any(|prefix| key.starts_with(prefix))
547 {
548 env.insert(key, value);
549 }
550 }
551 env
552}
553
554fn detect_runtimes() -> HashMap<String, RuntimeInfo> {
556 let mut runtimes = HashMap::new();
557
558 let candidates: &[(&str, &[&str], &str)] = &[
559 ("js", &["node", "bun"], "js"),
561 ("ts", &["bun", "npx"], "ts"),
562 ("python", &["python3", "python"], "python"),
563 ("shell", &["bash", "sh"], "shell"),
564 ("ruby", &["ruby"], "ruby"),
565 ("go", &["go"], "go"),
566 ("rust", &["rustc"], "rust"),
567 ];
568
569 for &(lang_key, binaries, lang_label) in candidates {
570 for &bin in binaries {
571 if is_binary_available(bin) {
572 let effective_binary = if lang_key == "ts" && bin == "npx" {
574 "npx".to_string()
575 } else {
576 bin.to_string()
577 };
578
579 runtimes.insert(
580 lang_key.to_string(),
581 RuntimeInfo {
582 name: bin,
583 binary: effective_binary,
584 language: lang_label,
585 },
586 );
587 break; }
589 }
590 }
591
592 runtimes
593}
594
595fn is_binary_available(name: &str) -> bool {
597 let probe = if cfg!(windows) { "where" } else { "which" };
598 Command::new(probe)
599 .arg(name)
600 .stdout(Stdio::null())
601 .stderr(Stdio::null())
602 .status()
603 .map(|s| s.success())
604 .unwrap_or(false)
605}
606
607#[cfg(test)]
610mod tests {
611 use super::*;
612
613 #[test]
614 fn test_new_detects_runtimes() {
615 let executor = SandboxExecutor::new();
616 assert!(
618 !executor.runtimes.is_empty(),
619 "should detect at least one runtime"
620 );
621 }
622
623 #[test]
624 fn test_supported_languages_list() {
625 let executor = SandboxExecutor::new();
626 let supported = executor.supported_languages();
627 assert!(supported.len() >= 6, "should list at least 6 supported languages");
628 assert!(supported.contains(&"js"));
629 assert!(supported.contains(&"python"));
630 assert!(supported.contains(&"shell"));
631 assert!(supported.contains(&"ruby"));
632 assert!(supported.contains(&"go"));
633 assert!(supported.contains(&"rust"));
634 }
635
636 #[test]
637 fn test_default_config() {
638 let executor = SandboxExecutor::new();
639 assert_eq!(executor.timeout(), Duration::from_secs(30));
640 assert_eq!(executor.max_output_bytes(), 1_048_576);
641 }
642
643 #[test]
644 fn test_custom_config() {
645 let executor = SandboxExecutor::with_config(Duration::from_secs(10), 4096);
646 assert_eq!(executor.timeout(), Duration::from_secs(10));
647 assert_eq!(executor.max_output_bytes(), 4096);
648 }
649
650 #[test]
651 #[cfg(not(windows))]
652 fn test_execute_shell_echo() {
653 let executor = SandboxExecutor::new();
654 if !executor.is_available("shell") {
655 return; }
657 let result = executor.execute("echo hello sandbox", "shell").unwrap();
658 assert_eq!(result.status_code, 0);
659 assert_eq!(result.stdout.trim(), "hello sandbox");
660 assert!(!result.was_truncated);
661 }
662
663 #[test]
664 #[cfg(not(windows))]
665 fn test_execute_shell_captures_only_stdout() {
666 let executor = SandboxExecutor::new();
667 if !executor.is_available("shell") {
668 return;
669 }
670 let code = r#"echo "visible"
672echo "hidden" >&2
673echo "also visible""#;
674 let result = executor.execute(code, "shell").unwrap();
675 assert!(result.stdout.contains("visible"));
676 assert!(result.stdout.contains("also visible"));
677 assert!(!result.stdout.contains("hidden"));
678 }
679
680 #[test]
681 fn test_execute_python() {
682 let executor = SandboxExecutor::new();
683 if !executor.is_available("python") {
684 return;
685 }
686 let result = executor.execute("print('hello from python')", "python").unwrap();
687 assert_eq!(result.status_code, 0);
688 assert_eq!(result.stdout.trim(), "hello from python");
689 }
690
691 #[test]
692 #[cfg(not(windows))]
693 fn test_execute_nonzero_exit() {
694 let executor = SandboxExecutor::new();
695 if !executor.is_available("shell") {
696 return;
697 }
698 let result = executor.execute("exit 42", "shell").unwrap();
699 assert_eq!(result.status_code, 42);
700 }
701
702 #[test]
703 #[cfg(not(windows))]
704 fn test_execute_timeout() {
705 let executor = SandboxExecutor::with_config(Duration::from_secs(1), 1024);
706 if !executor.is_available("shell") {
707 return;
708 }
709 let result = executor.execute("sleep 30", "shell");
710 assert!(result.is_err());
711 let err_msg = format!("{}", result.unwrap_err());
712 assert!(err_msg.contains("timed out"));
713 }
714
715 #[test]
716 fn test_execute_output_truncation() {
717 let executor = SandboxExecutor::with_config(Duration::from_secs(10), 32);
718 if !executor.is_available("shell") {
719 return;
720 }
721 let result = executor
723 .execute("for i in $(seq 1 100); do echo \"line $i\"; done", "shell")
724 .unwrap();
725 assert!(result.was_truncated);
726 assert!(result.stdout.len() <= 32);
727 }
728
729 #[test]
730 fn test_unsupported_runtime() {
731 let executor = SandboxExecutor::new();
732 let result = executor.execute("code", "brainfuck");
733 assert!(result.is_err());
734 let err_msg = format!("{}", result.unwrap_err());
735 assert!(err_msg.contains("unsupported or unavailable runtime"));
736 }
737
738 #[test]
739 fn test_case_insensitive_language() {
740 let executor = SandboxExecutor::new();
741 if !executor.is_available("shell") {
742 return;
743 }
744 let result = executor.execute("echo ok", "Shell");
745 assert!(result.is_ok());
746 }
747
748 #[test]
749 #[cfg(not(windows))]
750 fn test_credential_env_includes_path() {
751 let env = build_credential_env();
752 assert!(env.contains_key("PATH"), "PATH should be inherited");
753 }
754
755 #[test]
756 fn test_credential_env_includes_aws() {
757 std::env::set_var("AWS_TEST_SANDBOX", "test_value");
759 let env = build_credential_env();
760 assert_eq!(env.get("AWS_TEST_SANDBOX").map(|s| s.as_str()), Some("test_value"));
761 std::env::remove_var("AWS_TEST_SANDBOX");
762 }
763
764 #[test]
765 fn test_is_binary_available() {
766 assert!(is_binary_available("sh"));
768 assert!(!is_binary_available("definitely_not_a_real_binary_xyz"));
769 }
770
771 #[test]
774 fn test_chunk_output_splits_on_double_newline() {
775 let text = "first paragraph\n\nsecond paragraph\n\nthird paragraph";
776 let chunks = OutputFilter::chunk_output(text);
777 assert_eq!(chunks.len(), 3);
778 assert_eq!(chunks[0], "first paragraph");
779 assert_eq!(chunks[1], "second paragraph");
780 assert_eq!(chunks[2], "third paragraph");
781 }
782
783 #[test]
784 fn test_chunk_output_splits_large_paragraphs() {
785 let line = "a]".repeat(30); let big_para = (0..20).map(|i| format!("{line} line{i}")).collect::<Vec<_>>().join("\n");
788 assert!(big_para.len() > 512);
789
790 let chunks = OutputFilter::chunk_output(&big_para);
791 assert!(chunks.len() > 1, "large paragraph should be sub-split");
792 for chunk in &chunks {
793 assert!(chunk.len() <= 600, "each sub-chunk should be roughly ≤512 bytes");
794 }
795 }
796
797 #[test]
798 fn test_chunk_output_empty_input() {
799 let chunks = OutputFilter::chunk_output("");
800 assert!(chunks.is_empty());
801 }
802
803 #[test]
804 fn test_filter_returns_matching_sections() {
805 let text = "error: compilation failed at line 42\n\n\
806 warning: unused variable `x`\n\n\
807 info: build started at 10:00\n\n\
808 error: type mismatch in function foo\n\n\
809 success: 3 tests passed";
810 let result = OutputFilter::filter(text, "error compilation").unwrap();
811 assert!(!result.matched_sections.is_empty(), "should find error-related chunks");
812 assert!(
814 result.matched_sections.iter().any(|s| s.contains("error")),
815 "matched sections should contain the intent keyword"
816 );
817 assert!(result.total_chunks >= 4);
818 }
819
820 #[test]
821 fn test_filter_returns_vocabulary() {
822 let text = "the quick brown fox jumps over the lazy dog\n\n\
823 rust programming language is fast and safe\n\n\
824 memory safety without garbage collection";
825 let result = OutputFilter::filter(text, "rust").unwrap();
826 assert!(!result.vocabulary.is_empty(), "vocabulary should not be empty");
827 let vocab_joined = result.vocabulary.join(" ");
830 assert!(
831 vocab_joined.contains("rust") || vocab_joined.contains("fast") || vocab_joined.contains("safe"),
832 "vocabulary should contain terms from the indexed content"
833 );
834 }
835
836 #[test]
837 fn test_filter_no_match_returns_empty() {
838 let text = "hello world\n\nfoo bar baz";
839 let result = OutputFilter::filter(text, "zzzznonexistent").unwrap();
840 assert!(result.matched_sections.is_empty());
841 assert_eq!(result.matched_chunks, 0);
842 }
843
844 #[test]
845 fn test_filter_special_chars_in_intent() {
846 let text = "error: something went wrong\n\nwarning: check this";
848 let result = OutputFilter::filter(text, "error: (something) [wrong]");
849 assert!(result.is_ok(), "special chars in intent should be sanitized");
850 }
851
852 #[test]
853 #[cfg(not(windows))]
854 fn test_execute_with_intent_small_output_no_filter() {
855 let executor = SandboxExecutor::new();
856 if !executor.is_available("shell") {
857 return;
858 }
859 let (result, filtered) = executor
861 .execute_with_intent("echo hello", "shell", Some("hello"))
862 .unwrap();
863 assert_eq!(result.status_code, 0);
864 assert!(!result.was_indexed);
865 assert!(filtered.is_none());
866 }
867
868 #[test]
869 fn test_execute_with_intent_no_intent_no_filter() {
870 let executor = SandboxExecutor::new();
871 if !executor.is_available("shell") {
872 return;
873 }
874 let code = "for i in $(seq 1 1000); do echo \"line $i: some padding text to make it bigger\"; done";
876 let (result, filtered) = executor
877 .execute_with_intent(code, "shell", None)
878 .unwrap();
879 assert!(!result.was_indexed);
880 assert!(filtered.is_none());
881 }
882
883 #[test]
884 #[cfg(not(windows))]
885 fn test_execute_with_intent_large_output_filters() {
886 let executor = SandboxExecutor::new();
887 if !executor.is_available("shell") {
888 return;
889 }
890 let code = r#"
892for i in $(seq 1 50); do echo "error: compilation failed at module $i"; done
893echo ""
894for i in $(seq 1 50); do echo "info: processing file $i of 200"; done
895echo ""
896for i in $(seq 1 50); do echo "warning: deprecated API usage in handler $i"; done
897echo ""
898for i in $(seq 1 50); do echo "success: test suite $i passed with 100% coverage"; done
899"#;
900 let (result, filtered) = executor
901 .execute_with_intent(code, "shell", Some("error compilation"))
902 .unwrap();
903 assert!(result.was_indexed, "large output with intent should be indexed");
904 let filtered = filtered.expect("should have filtered output");
905 assert!(!filtered.matched_sections.is_empty(), "should have matched sections");
906 assert!(!filtered.vocabulary.is_empty(), "should have vocabulary");
907 assert!(filtered.total_chunks > 0);
908 }
909
910 #[cfg(not(windows))]
913 mod proptests {
914 use super::*;
915 use proptest::prelude::*;
916
917 fn safe_label() -> impl Strategy<Value = String> {
919 "[a-zA-Z0-9]{1,20}"
920 }
921
922 proptest! {
931 #[test]
932 fn prop_only_stdout_captured(
933 label in safe_label(),
934 ) {
935 let executor = SandboxExecutor::new();
936 if !executor.is_available("shell") {
937 return Ok(());
938 }
939
940 let stdout_msg = format!("OUT_{label}");
941 let stderr_msg = format!("ERR_{label}");
942
943 let code = format!(
945 "echo \"{stdout_msg}\"\necho \"{stderr_msg}\" >&2"
946 );
947 let result = executor.execute(&code, "shell").unwrap();
948
949 prop_assert!(
951 result.stdout.contains(&stdout_msg),
952 "stdout should contain the stdout message '{}', got: '{}'",
953 stdout_msg, result.stdout
954 );
955 prop_assert!(
957 !result.stdout.contains(&stderr_msg),
958 "stdout should NOT contain the stderr message '{}', got: '{}'",
959 stderr_msg, result.stdout
960 );
961 }
962 }
963
964 proptest! {
972 #[test]
973 fn prop_no_shared_state_between_executions(
974 var_name in "[A-Z]{3,8}",
975 var_value in "[a-z0-9]{1,10}",
976 ) {
977 let executor = SandboxExecutor::new();
978 if !executor.is_available("shell") {
979 return Ok(());
980 }
981
982 let unique_var = format!("SQZ_PROP_{var_name}");
983
984 let code1 = format!(
986 "export {unique_var}={var_value}\necho \"set {unique_var}\""
987 );
988 let result1 = executor.execute(&code1, "shell").unwrap();
989 prop_assert!(
990 result1.stdout.contains(&format!("set {unique_var}")),
991 "first execution should succeed"
992 );
993
994 let code2 = format!(
996 "echo \"val=${{{unique_var}:-UNSET}}\""
997 );
998 let result2 = executor.execute(&code2, "shell").unwrap();
999 prop_assert!(
1000 result2.stdout.contains("val=UNSET"),
1001 "env var from first execution should not leak into second; got: '{}'",
1002 result2.stdout
1003 );
1004 }
1005 }
1006 }
1007}