1use std::collections::HashMap;
2use std::io::Write;
3use std::process::{Command, Stdio};
4use std::time::Duration;
5
6use rusqlite::{params, Connection};
7
8use crate::error::{Result, SqzError};
9
10const CREDENTIAL_ENV_PREFIXES: &[&str] = &[
15 "AWS_",
17 "GCLOUD_",
19 "GOOGLE_",
20 "CLOUDSDK_",
21 "GH_",
23 "GITHUB_",
24 "KUBECONFIG",
26 "DOCKER_",
28 "HOME",
30 "PATH",
31 "USER",
32 "LANG",
33 "TERM",
34 "SHELL",
35 "TMPDIR",
36 "XDG_",
37];
38
39#[derive(Debug, Clone)]
41pub struct RuntimeInfo {
42 pub name: &'static str,
43 pub binary: String,
44 pub language: &'static str,
45}
46
47#[derive(Debug, Clone)]
49pub struct SandboxResult {
50 pub stdout: String,
52 pub status_code: i32,
54 pub was_truncated: bool,
56 pub was_indexed: bool,
58}
59
60const OUTPUT_FILTER_THRESHOLD: usize = 5 * 1024; #[derive(Debug, Clone)]
65pub struct FilteredOutput {
66 pub matched_sections: Vec<String>,
68 pub vocabulary: Vec<String>,
70 pub total_chunks: usize,
72 pub matched_chunks: usize,
74}
75
76pub struct SandboxExecutor {
81 timeout: Duration,
82 max_output_bytes: usize,
83 runtimes: HashMap<String, RuntimeInfo>,
84}
85
86pub(crate) struct OutputFilter;
91
92impl OutputFilter {
93 pub fn filter(text: &str, intent: &str) -> Result<FilteredOutput> {
97 let chunks = Self::chunk_output(text);
98 let total_chunks = chunks.len();
99
100 let conn = Connection::open_in_memory()
101 .map_err(|e| SqzError::Other(format!("FTS5 in-memory open failed: {e}")))?;
102
103 conn.execute_batch(
104 r#"
105 CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_fts USING fts5(
106 chunk_id,
107 body,
108 tokenize='porter ascii'
109 );
110 "#,
111 )
112 .map_err(|e| SqzError::Other(format!("FTS5 schema creation failed: {e}")))?;
113
114 for (i, chunk) in chunks.iter().enumerate() {
116 conn.execute(
117 "INSERT INTO sandbox_fts(chunk_id, body) VALUES (?1, ?2)",
118 params![i.to_string(), chunk],
119 )
120 .map_err(|e| SqzError::Other(format!("FTS5 insert failed: {e}")))?;
121 }
122
123 let matched_sections = Self::bm25_search(&conn, intent, &chunks)?;
125 let matched_chunks = matched_sections.len();
126
127 let vocabulary = Self::extract_vocabulary(&conn)?;
129
130 Ok(FilteredOutput {
131 matched_sections,
132 vocabulary,
133 total_chunks,
134 matched_chunks,
135 })
136 }
137
138 fn chunk_output(text: &str) -> Vec<String> {
141 const MAX_CHUNK_BYTES: usize = 512;
142
143 let paragraphs: Vec<&str> = text.split("\n\n").collect();
144 let mut chunks = Vec::new();
145
146 for para in paragraphs {
147 let trimmed = para.trim();
148 if trimmed.is_empty() {
149 continue;
150 }
151 if trimmed.len() <= MAX_CHUNK_BYTES {
152 chunks.push(trimmed.to_string());
153 } else {
154 let mut current = String::new();
156 for line in trimmed.lines() {
157 if !current.is_empty() && current.len() + line.len() + 1 > MAX_CHUNK_BYTES {
158 chunks.push(std::mem::take(&mut current));
159 }
160 if !current.is_empty() {
161 current.push('\n');
162 }
163 current.push_str(line);
164 }
165 if !current.is_empty() {
166 chunks.push(current);
167 }
168 }
169 }
170
171 if chunks.is_empty() && !text.trim().is_empty() {
173 chunks.push(text.trim().to_string());
174 }
175
176 chunks
177 }
178
179 fn bm25_search(conn: &Connection, intent: &str, _chunks: &[String]) -> Result<Vec<String>> {
182 let sanitized: String = intent
184 .chars()
185 .map(|c| if c.is_alphanumeric() || c.is_whitespace() { c } else { ' ' })
186 .collect();
187 let terms: Vec<&str> = sanitized.split_whitespace().collect();
188 if terms.is_empty() {
189 return Ok(Vec::new());
190 }
191
192 let fts_query = terms.join(" OR ");
194
195 let mut stmt = conn
196 .prepare(
197 r#"SELECT body FROM sandbox_fts
198 WHERE sandbox_fts MATCH ?1
199 ORDER BY rank
200 LIMIT 20"#,
201 )
202 .map_err(|e| SqzError::Other(format!("FTS5 query prepare failed: {e}")))?;
203
204 let rows = stmt
205 .query_map(params![fts_query], |row| row.get::<_, String>(0))
206 .map_err(|e| SqzError::Other(format!("FTS5 query failed: {e}")))?;
207
208 let mut results = Vec::new();
209 for row in rows {
210 results.push(
211 row.map_err(|e| SqzError::Other(format!("FTS5 row read failed: {e}")))?,
212 );
213 }
214 Ok(results)
215 }
216
217 fn extract_vocabulary(conn: &Connection) -> Result<Vec<String>> {
220 conn.execute_batch(
223 "CREATE VIRTUAL TABLE IF NOT EXISTS sandbox_vocab USING fts5vocab(sandbox_fts, col);",
224 )
225 .map_err(|e| SqzError::Other(format!("FTS5 vocab table creation failed: {e}")))?;
226
227 let mut stmt = conn
228 .prepare(
229 r#"SELECT term FROM sandbox_vocab
230 WHERE col = 'body'
231 ORDER BY doc DESC
232 LIMIT 100"#,
233 )
234 .map_err(|e| SqzError::Other(format!("vocab query prepare failed: {e}")))?;
235
236 let rows = stmt
237 .query_map([], |row| row.get::<_, String>(0))
238 .map_err(|e| SqzError::Other(format!("vocab query failed: {e}")))?;
239
240 let mut vocab = Vec::new();
241 for row in rows {
242 vocab.push(
243 row.map_err(|e| SqzError::Other(format!("vocab row read failed: {e}")))?,
244 );
245 }
246 Ok(vocab)
247 }
248}
249
250impl SandboxExecutor {
251 pub const DEFAULT_TIMEOUT_SECS: u64 = 30;
253 pub const DEFAULT_MAX_OUTPUT_BYTES: usize = 1_048_576;
255
256 pub fn new() -> Self {
258 Self::with_config(
259 Duration::from_secs(Self::DEFAULT_TIMEOUT_SECS),
260 Self::DEFAULT_MAX_OUTPUT_BYTES,
261 )
262 }
263
264 pub fn with_config(timeout: Duration, max_output_bytes: usize) -> Self {
266 let runtimes = detect_runtimes();
267 Self {
268 timeout,
269 max_output_bytes,
270 runtimes,
271 }
272 }
273
274 pub fn execute(&self, code: &str, language: &str) -> Result<SandboxResult> {
280 let lang = language.to_lowercase();
281 let runtime = self
282 .runtimes
283 .get(&lang)
284 .ok_or_else(|| SqzError::Other(format!("unsupported or unavailable runtime: {lang}")))?;
285
286 let env = build_credential_env();
287
288 let result = match lang.as_str() {
289 "go" => self.execute_go(code, runtime, &env),
290 "rust" => self.execute_rust(code, runtime, &env),
291 _ => self.execute_interpreted(code, runtime, &env),
292 }?;
293
294 Ok(result)
295 }
296
297 pub fn execute_with_intent(
304 &self,
305 code: &str,
306 language: &str,
307 intent: Option<&str>,
308 ) -> Result<(SandboxResult, Option<FilteredOutput>)> {
309 let mut result = self.execute(code, language)?;
310
311 let should_filter = result.stdout.len() > OUTPUT_FILTER_THRESHOLD
312 && intent.map_or(false, |i| !i.trim().is_empty());
313
314 if should_filter {
315 let intent_str = intent.unwrap(); let filtered = OutputFilter::filter(&result.stdout, intent_str)?;
317 result.was_indexed = true;
318 result.stdout = filtered.matched_sections.join("\n\n");
321 Ok((result, Some(filtered)))
322 } else {
323 Ok((result, None))
324 }
325 }
326
327 pub fn available_languages(&self) -> Vec<&str> {
329 self.runtimes.values().map(|r| r.language).collect()
330 }
331
332 pub fn supported_languages(&self) -> &[&str] {
334 &["js", "ts", "python", "shell", "ruby", "go", "rust"]
335 }
336
337 pub fn is_available(&self, language: &str) -> bool {
339 self.runtimes.contains_key(&language.to_lowercase())
340 }
341
342 pub fn timeout(&self) -> Duration {
344 self.timeout
345 }
346
347 pub fn max_output_bytes(&self) -> usize {
349 self.max_output_bytes
350 }
351
352 fn execute_interpreted(
357 &self,
358 code: &str,
359 runtime: &RuntimeInfo,
360 env: &HashMap<String, String>,
361 ) -> Result<SandboxResult> {
362 let ext = match runtime.language {
363 "js" => "js",
364 "ts" => "ts",
365 "python" => "py",
366 "shell" => "sh",
367 "ruby" => "rb",
368 _ => "tmp",
369 };
370
371 let tmp_dir = tempfile::tempdir().map_err(|e| SqzError::Io(e))?;
372 let script_path = tmp_dir.path().join(format!("sandbox_script.{ext}"));
373 {
374 let mut f = std::fs::File::create(&script_path)?;
375 f.write_all(code.as_bytes())?;
376 }
377
378 let mut cmd = Command::new(&runtime.binary);
379
380 if runtime.language == "ts" && runtime.name == "npx" {
382 cmd.arg("tsx");
383 }
384
385 cmd.arg(&script_path)
386 .stdout(Stdio::piped())
387 .stderr(Stdio::null()) .envs(env);
389
390 self.run_with_timeout(cmd)
391 }
392
393 fn execute_go(
395 &self,
396 code: &str,
397 runtime: &RuntimeInfo,
398 env: &HashMap<String, String>,
399 ) -> Result<SandboxResult> {
400 let tmp_dir = tempfile::tempdir()?;
401 let script_path = tmp_dir.path().join("main.go");
402 {
403 let mut f = std::fs::File::create(&script_path)?;
404 f.write_all(code.as_bytes())?;
405 }
406
407 let mut cmd = Command::new(&runtime.binary);
408 cmd.arg("run")
409 .arg(&script_path)
410 .stdout(Stdio::piped())
411 .stderr(Stdio::null())
412 .envs(env);
413
414 self.run_with_timeout(cmd)
415 }
416
417 fn execute_rust(
419 &self,
420 code: &str,
421 runtime: &RuntimeInfo,
422 env: &HashMap<String, String>,
423 ) -> Result<SandboxResult> {
424 let tmp_dir = tempfile::tempdir()?;
425 let src_path = tmp_dir.path().join("sandbox.rs");
426 let bin_path = tmp_dir.path().join("sandbox_bin");
427 {
428 let mut f = std::fs::File::create(&src_path)?;
429 f.write_all(code.as_bytes())?;
430 }
431
432 let compile = Command::new(&runtime.binary)
434 .arg(&src_path)
435 .arg("-o")
436 .arg(&bin_path)
437 .stdout(Stdio::null())
438 .stderr(Stdio::null())
439 .envs(env)
440 .status();
441
442 match compile {
443 Ok(status) if status.success() => {}
444 Ok(status) => {
445 return Ok(SandboxResult {
446 stdout: String::new(),
447 status_code: status.code().unwrap_or(1),
448 was_truncated: false,
449 was_indexed: false,
450 });
451 }
452 Err(e) => return Err(SqzError::Io(e)),
453 }
454
455 let mut cmd = Command::new(&bin_path);
457 cmd.stdout(Stdio::piped())
458 .stderr(Stdio::null())
459 .envs(env);
460
461 self.run_with_timeout(cmd)
462 }
463
464 fn run_with_timeout(&self, mut cmd: Command) -> Result<SandboxResult> {
466 let mut child = cmd.spawn().map_err(SqzError::Io)?;
467
468 let status = match wait_with_timeout(&mut child, self.timeout) {
470 Ok(status) => status,
471 Err(_) => {
472 let _ = child.kill();
474 let _ = child.wait();
475 return Err(SqzError::Other(format!(
476 "sandbox execution timed out after {}s",
477 self.timeout.as_secs()
478 )));
479 }
480 };
481
482 let stdout_raw = if let Some(mut stdout) = child.stdout.take() {
484 use std::io::Read;
485 let mut buf = Vec::new();
486 let _ = stdout.read_to_end(&mut buf);
487 buf
488 } else {
489 Vec::new()
490 };
491
492 let truncated = stdout_raw.len() > self.max_output_bytes;
494 let stdout_bytes = if truncated {
495 &stdout_raw[..self.max_output_bytes]
496 } else {
497 &stdout_raw[..]
498 };
499
500 let stdout = String::from_utf8_lossy(stdout_bytes).into_owned();
501
502 Ok(SandboxResult {
503 stdout,
504 status_code: status.code().unwrap_or(-1),
505 was_truncated: truncated,
506 was_indexed: false,
507 })
508 }
509}
510
511fn wait_with_timeout(
516 child: &mut std::process::Child,
517 timeout: Duration,
518) -> std::result::Result<std::process::ExitStatus, ()> {
519 let start = std::time::Instant::now();
520 let poll_interval = Duration::from_millis(50);
521
522 loop {
523 match child.try_wait() {
524 Ok(Some(status)) => return Ok(status),
525 Ok(None) => {
526 if start.elapsed() >= timeout {
527 return Err(());
528 }
529 std::thread::sleep(poll_interval);
530 }
531 Err(_) => return Err(()),
532 }
533 }
534}
535
536fn build_credential_env() -> HashMap<String, String> {
539 let mut env = HashMap::new();
540 for (key, value) in std::env::vars() {
541 if CREDENTIAL_ENV_PREFIXES
542 .iter()
543 .any(|prefix| key.starts_with(prefix))
544 {
545 env.insert(key, value);
546 }
547 }
548 env
549}
550
551fn detect_runtimes() -> HashMap<String, RuntimeInfo> {
553 let mut runtimes = HashMap::new();
554
555 let candidates: &[(&str, &[&str], &str)] = &[
556 ("js", &["node", "bun"], "js"),
558 ("ts", &["bun", "npx"], "ts"),
559 ("python", &["python3", "python"], "python"),
560 ("shell", &["bash", "sh"], "shell"),
561 ("ruby", &["ruby"], "ruby"),
562 ("go", &["go"], "go"),
563 ("rust", &["rustc"], "rust"),
564 ];
565
566 for &(lang_key, binaries, lang_label) in candidates {
567 for &bin in binaries {
568 if is_binary_available(bin) {
569 let effective_binary = if lang_key == "ts" && bin == "npx" {
571 "npx".to_string()
572 } else {
573 bin.to_string()
574 };
575
576 runtimes.insert(
577 lang_key.to_string(),
578 RuntimeInfo {
579 name: bin,
580 binary: effective_binary,
581 language: lang_label,
582 },
583 );
584 break; }
586 }
587 }
588
589 runtimes
590}
591
592fn is_binary_available(name: &str) -> bool {
594 Command::new("which")
595 .arg(name)
596 .stdout(Stdio::null())
597 .stderr(Stdio::null())
598 .status()
599 .map(|s| s.success())
600 .unwrap_or(false)
601}
602
603#[cfg(test)]
606mod tests {
607 use super::*;
608
609 #[test]
610 fn test_new_detects_runtimes() {
611 let executor = SandboxExecutor::new();
612 assert!(
614 !executor.runtimes.is_empty(),
615 "should detect at least one runtime"
616 );
617 }
618
619 #[test]
620 fn test_supported_languages_list() {
621 let executor = SandboxExecutor::new();
622 let supported = executor.supported_languages();
623 assert!(supported.len() >= 6, "should list at least 6 supported languages");
624 assert!(supported.contains(&"js"));
625 assert!(supported.contains(&"python"));
626 assert!(supported.contains(&"shell"));
627 assert!(supported.contains(&"ruby"));
628 assert!(supported.contains(&"go"));
629 assert!(supported.contains(&"rust"));
630 }
631
632 #[test]
633 fn test_default_config() {
634 let executor = SandboxExecutor::new();
635 assert_eq!(executor.timeout(), Duration::from_secs(30));
636 assert_eq!(executor.max_output_bytes(), 1_048_576);
637 }
638
639 #[test]
640 fn test_custom_config() {
641 let executor = SandboxExecutor::with_config(Duration::from_secs(10), 4096);
642 assert_eq!(executor.timeout(), Duration::from_secs(10));
643 assert_eq!(executor.max_output_bytes(), 4096);
644 }
645
646 #[test]
647 #[cfg(not(windows))]
648 fn test_execute_shell_echo() {
649 let executor = SandboxExecutor::new();
650 if !executor.is_available("shell") {
651 return; }
653 let result = executor.execute("echo hello sandbox", "shell").unwrap();
654 assert_eq!(result.status_code, 0);
655 assert_eq!(result.stdout.trim(), "hello sandbox");
656 assert!(!result.was_truncated);
657 }
658
659 #[test]
660 #[cfg(not(windows))]
661 fn test_execute_shell_captures_only_stdout() {
662 let executor = SandboxExecutor::new();
663 if !executor.is_available("shell") {
664 return;
665 }
666 let code = r#"echo "visible"
668echo "hidden" >&2
669echo "also visible""#;
670 let result = executor.execute(code, "shell").unwrap();
671 assert!(result.stdout.contains("visible"));
672 assert!(result.stdout.contains("also visible"));
673 assert!(!result.stdout.contains("hidden"));
674 }
675
676 #[test]
677 fn test_execute_python() {
678 let executor = SandboxExecutor::new();
679 if !executor.is_available("python") {
680 return;
681 }
682 let result = executor.execute("print('hello from python')", "python").unwrap();
683 assert_eq!(result.status_code, 0);
684 assert_eq!(result.stdout.trim(), "hello from python");
685 }
686
687 #[test]
688 #[cfg(not(windows))]
689 fn test_execute_nonzero_exit() {
690 let executor = SandboxExecutor::new();
691 if !executor.is_available("shell") {
692 return;
693 }
694 let result = executor.execute("exit 42", "shell").unwrap();
695 assert_eq!(result.status_code, 42);
696 }
697
698 #[test]
699 #[cfg(not(windows))]
700 fn test_execute_timeout() {
701 let executor = SandboxExecutor::with_config(Duration::from_secs(1), 1024);
702 if !executor.is_available("shell") {
703 return;
704 }
705 let result = executor.execute("sleep 30", "shell");
706 assert!(result.is_err());
707 let err_msg = format!("{}", result.unwrap_err());
708 assert!(err_msg.contains("timed out"));
709 }
710
711 #[test]
712 fn test_execute_output_truncation() {
713 let executor = SandboxExecutor::with_config(Duration::from_secs(10), 32);
714 if !executor.is_available("shell") {
715 return;
716 }
717 let result = executor
719 .execute("for i in $(seq 1 100); do echo \"line $i\"; done", "shell")
720 .unwrap();
721 assert!(result.was_truncated);
722 assert!(result.stdout.len() <= 32);
723 }
724
725 #[test]
726 fn test_unsupported_runtime() {
727 let executor = SandboxExecutor::new();
728 let result = executor.execute("code", "brainfuck");
729 assert!(result.is_err());
730 let err_msg = format!("{}", result.unwrap_err());
731 assert!(err_msg.contains("unsupported or unavailable runtime"));
732 }
733
734 #[test]
735 fn test_case_insensitive_language() {
736 let executor = SandboxExecutor::new();
737 if !executor.is_available("shell") {
738 return;
739 }
740 let result = executor.execute("echo ok", "Shell");
741 assert!(result.is_ok());
742 }
743
744 #[test]
745 #[cfg(not(windows))]
746 fn test_credential_env_includes_path() {
747 let env = build_credential_env();
748 assert!(env.contains_key("PATH"), "PATH should be inherited");
749 }
750
751 #[test]
752 fn test_credential_env_includes_aws() {
753 std::env::set_var("AWS_TEST_SANDBOX", "test_value");
755 let env = build_credential_env();
756 assert_eq!(env.get("AWS_TEST_SANDBOX").map(|s| s.as_str()), Some("test_value"));
757 std::env::remove_var("AWS_TEST_SANDBOX");
758 }
759
760 #[test]
761 fn test_is_binary_available() {
762 assert!(is_binary_available("sh"));
764 assert!(!is_binary_available("definitely_not_a_real_binary_xyz"));
765 }
766
767 #[test]
770 fn test_chunk_output_splits_on_double_newline() {
771 let text = "first paragraph\n\nsecond paragraph\n\nthird paragraph";
772 let chunks = OutputFilter::chunk_output(text);
773 assert_eq!(chunks.len(), 3);
774 assert_eq!(chunks[0], "first paragraph");
775 assert_eq!(chunks[1], "second paragraph");
776 assert_eq!(chunks[2], "third paragraph");
777 }
778
779 #[test]
780 fn test_chunk_output_splits_large_paragraphs() {
781 let line = "a]".repeat(30); let big_para = (0..20).map(|i| format!("{line} line{i}")).collect::<Vec<_>>().join("\n");
784 assert!(big_para.len() > 512);
785
786 let chunks = OutputFilter::chunk_output(&big_para);
787 assert!(chunks.len() > 1, "large paragraph should be sub-split");
788 for chunk in &chunks {
789 assert!(chunk.len() <= 600, "each sub-chunk should be roughly ≤512 bytes");
790 }
791 }
792
793 #[test]
794 fn test_chunk_output_empty_input() {
795 let chunks = OutputFilter::chunk_output("");
796 assert!(chunks.is_empty());
797 }
798
799 #[test]
800 fn test_filter_returns_matching_sections() {
801 let text = "error: compilation failed at line 42\n\n\
802 warning: unused variable `x`\n\n\
803 info: build started at 10:00\n\n\
804 error: type mismatch in function foo\n\n\
805 success: 3 tests passed";
806 let result = OutputFilter::filter(text, "error compilation").unwrap();
807 assert!(!result.matched_sections.is_empty(), "should find error-related chunks");
808 assert!(
810 result.matched_sections.iter().any(|s| s.contains("error")),
811 "matched sections should contain the intent keyword"
812 );
813 assert!(result.total_chunks >= 4);
814 }
815
816 #[test]
817 fn test_filter_returns_vocabulary() {
818 let text = "the quick brown fox jumps over the lazy dog\n\n\
819 rust programming language is fast and safe\n\n\
820 memory safety without garbage collection";
821 let result = OutputFilter::filter(text, "rust").unwrap();
822 assert!(!result.vocabulary.is_empty(), "vocabulary should not be empty");
823 let vocab_joined = result.vocabulary.join(" ");
826 assert!(
827 vocab_joined.contains("rust") || vocab_joined.contains("fast") || vocab_joined.contains("safe"),
828 "vocabulary should contain terms from the indexed content"
829 );
830 }
831
832 #[test]
833 fn test_filter_no_match_returns_empty() {
834 let text = "hello world\n\nfoo bar baz";
835 let result = OutputFilter::filter(text, "zzzznonexistent").unwrap();
836 assert!(result.matched_sections.is_empty());
837 assert_eq!(result.matched_chunks, 0);
838 }
839
840 #[test]
841 fn test_filter_special_chars_in_intent() {
842 let text = "error: something went wrong\n\nwarning: check this";
844 let result = OutputFilter::filter(text, "error: (something) [wrong]");
845 assert!(result.is_ok(), "special chars in intent should be sanitized");
846 }
847
848 #[test]
849 #[cfg(not(windows))]
850 fn test_execute_with_intent_small_output_no_filter() {
851 let executor = SandboxExecutor::new();
852 if !executor.is_available("shell") {
853 return;
854 }
855 let (result, filtered) = executor
857 .execute_with_intent("echo hello", "shell", Some("hello"))
858 .unwrap();
859 assert_eq!(result.status_code, 0);
860 assert!(!result.was_indexed);
861 assert!(filtered.is_none());
862 }
863
864 #[test]
865 fn test_execute_with_intent_no_intent_no_filter() {
866 let executor = SandboxExecutor::new();
867 if !executor.is_available("shell") {
868 return;
869 }
870 let code = "for i in $(seq 1 1000); do echo \"line $i: some padding text to make it bigger\"; done";
872 let (result, filtered) = executor
873 .execute_with_intent(code, "shell", None)
874 .unwrap();
875 assert!(!result.was_indexed);
876 assert!(filtered.is_none());
877 }
878
879 #[test]
880 #[cfg(not(windows))]
881 fn test_execute_with_intent_large_output_filters() {
882 let executor = SandboxExecutor::new();
883 if !executor.is_available("shell") {
884 return;
885 }
886 let code = r#"
888for i in $(seq 1 50); do echo "error: compilation failed at module $i"; done
889echo ""
890for i in $(seq 1 50); do echo "info: processing file $i of 200"; done
891echo ""
892for i in $(seq 1 50); do echo "warning: deprecated API usage in handler $i"; done
893echo ""
894for i in $(seq 1 50); do echo "success: test suite $i passed with 100% coverage"; done
895"#;
896 let (result, filtered) = executor
897 .execute_with_intent(code, "shell", Some("error compilation"))
898 .unwrap();
899 assert!(result.was_indexed, "large output with intent should be indexed");
900 let filtered = filtered.expect("should have filtered output");
901 assert!(!filtered.matched_sections.is_empty(), "should have matched sections");
902 assert!(!filtered.vocabulary.is_empty(), "should have vocabulary");
903 assert!(filtered.total_chunks > 0);
904 }
905
906 #[cfg(not(windows))]
909 mod proptests {
910 use super::*;
911 use proptest::prelude::*;
912
913 fn safe_label() -> impl Strategy<Value = String> {
915 "[a-zA-Z0-9]{1,20}"
916 }
917
918 proptest! {
927 #[test]
928 fn prop_only_stdout_captured(
929 label in safe_label(),
930 ) {
931 let executor = SandboxExecutor::new();
932 if !executor.is_available("shell") {
933 return Ok(());
934 }
935
936 let stdout_msg = format!("OUT_{label}");
937 let stderr_msg = format!("ERR_{label}");
938
939 let code = format!(
941 "echo \"{stdout_msg}\"\necho \"{stderr_msg}\" >&2"
942 );
943 let result = executor.execute(&code, "shell").unwrap();
944
945 prop_assert!(
947 result.stdout.contains(&stdout_msg),
948 "stdout should contain the stdout message '{}', got: '{}'",
949 stdout_msg, result.stdout
950 );
951 prop_assert!(
953 !result.stdout.contains(&stderr_msg),
954 "stdout should NOT contain the stderr message '{}', got: '{}'",
955 stderr_msg, result.stdout
956 );
957 }
958 }
959
960 proptest! {
968 #[test]
969 fn prop_no_shared_state_between_executions(
970 var_name in "[A-Z]{3,8}",
971 var_value in "[a-z0-9]{1,10}",
972 ) {
973 let executor = SandboxExecutor::new();
974 if !executor.is_available("shell") {
975 return Ok(());
976 }
977
978 let unique_var = format!("SQZ_PROP_{var_name}");
979
980 let code1 = format!(
982 "export {unique_var}={var_value}\necho \"set {unique_var}\""
983 );
984 let result1 = executor.execute(&code1, "shell").unwrap();
985 prop_assert!(
986 result1.stdout.contains(&format!("set {unique_var}")),
987 "first execution should succeed"
988 );
989
990 let code2 = format!(
992 "echo \"val=${{{unique_var}:-UNSET}}\""
993 );
994 let result2 = executor.execute(&code2, "shell").unwrap();
995 prop_assert!(
996 result2.stdout.contains("val=UNSET"),
997 "env var from first execution should not leak into second; got: '{}'",
998 result2.stdout
999 );
1000 }
1001 }
1002 }
1003}