statespace_tool_runtime/
eval.rs

1//! Component block processing for dynamic markdown content.
2
3use crate::env_validation::is_reserved_env_key;
4use crate::executor::ExecutionLimits;
5use crate::sandbox::SandboxEnv;
6use std::collections::HashMap;
7use std::fmt::Write;
8use std::path::Path;
9use tokio::process::Command;
10use tracing::warn;
11
12pub const EVAL_MAX_BLOCKS_PER_DOCUMENT: usize = 32;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct EvalBlock {
16    /// Byte range of the entire fenced block (including the ``` delimiters).
17    pub range: (usize, usize),
18    pub code: String,
19}
20
21#[derive(Debug, Clone)]
22pub struct EvalResult {
23    pub output: String,
24    pub success: bool,
25}
26
27pub fn parse_eval_blocks(content: &str) -> Vec<EvalBlock> {
28    let mut blocks = Vec::new();
29    let mut search_from = 0;
30
31    while let Some(block) = find_next_eval_block(content, search_from) {
32        search_from = block.range.1;
33        blocks.push(block);
34    }
35
36    blocks
37}
38
39fn find_next_eval_block(content: &str, start: usize) -> Option<EvalBlock> {
40    let haystack = &content[start..];
41
42    let mut pos = 0;
43    loop {
44        let remaining = &haystack[pos..];
45        let fence_pos = remaining.find("```")?;
46        let abs_fence_start = start + pos + fence_pos;
47
48        if abs_fence_start > 0 && content.as_bytes()[abs_fence_start - 1] != b'\n' {
49            pos += fence_pos + 3;
50            continue;
51        }
52
53        let after_backticks = &content[abs_fence_start + 3..];
54
55        let Some(newline_pos) = after_backticks.find('\n') else {
56            pos += fence_pos + 3;
57            continue;
58        };
59
60        let info_string = after_backticks[..newline_pos].trim();
61
62        if !is_eval_info_string(info_string) {
63            pos += fence_pos + 3;
64            continue;
65        }
66
67        let code_start = abs_fence_start + 3 + newline_pos + 1;
68        let code_region = &content[code_start..];
69        let close_pos = find_closing_fence(code_region)?;
70        let code = &content[code_start..code_start + close_pos];
71        let block_end = code_start + close_pos + 3;
72
73        return Some(EvalBlock {
74            range: (abs_fence_start, block_end),
75            code: code.trim_end_matches('\n').to_string(),
76        });
77    }
78}
79
80fn find_closing_fence(content: &str) -> Option<usize> {
81    let mut pos = 0;
82    loop {
83        let remaining = &content[pos..];
84        let fence_pos = remaining.find("```")?;
85        let abs_pos = pos + fence_pos;
86
87        if abs_pos == 0 || content.as_bytes()[abs_pos - 1] == b'\n' {
88            return Some(abs_pos);
89        }
90
91        pos = abs_pos + 3;
92    }
93}
94
95fn is_eval_info_string(info: &str) -> bool {
96    info == "component"
97}
98
99/// Merge request-scoped env vars with trusted env vars for eval execution.
100///
101/// Untrusted caller-provided keys are applied first, then trusted keys are
102/// layered on top so trusted values always win when names collide.
103#[must_use]
104#[allow(clippy::implicit_hasher)]
105pub fn merge_eval_env(
106    trusted_env: &HashMap<String, String>,
107    untrusted_env: &HashMap<String, String>,
108) -> HashMap<String, String> {
109    let mut merged = HashMap::with_capacity(trusted_env.len() + untrusted_env.len());
110
111    for (key, value) in untrusted_env {
112        if !is_reserved_env_key(key) {
113            merged.insert(key.clone(), value.clone());
114        }
115    }
116
117    for (key, value) in trusted_env {
118        if !is_reserved_env_key(key) {
119            merged.insert(key.clone(), value.clone());
120        }
121    }
122
123    merged
124}
125
126#[allow(clippy::implicit_hasher)]
127pub async fn execute_eval_block(
128    block: &EvalBlock,
129    working_dir: &Path,
130    scratch_dir: Option<&Path>,
131    workspace_dir: Option<&Path>,
132    user_env: &HashMap<String, String>,
133) -> EvalResult {
134    execute_eval_block_with_sandbox(
135        block,
136        working_dir,
137        scratch_dir,
138        workspace_dir,
139        user_env,
140        &SandboxEnv::default(),
141        &ExecutionLimits::default(),
142    )
143    .await
144}
145
146#[allow(clippy::implicit_hasher)]
147pub async fn execute_eval_block_with_sandbox(
148    block: &EvalBlock,
149    working_dir: &Path,
150    scratch_dir: Option<&Path>,
151    workspace_dir: Option<&Path>,
152    user_env: &HashMap<String, String>,
153    sandbox_env: &SandboxEnv,
154    limits: &ExecutionLimits,
155) -> EvalResult {
156    let mut command = Command::new("sh");
157    command
158        .args(["-c", &block.code])
159        .current_dir(working_dir)
160        .env_clear()
161        .env("PATH", sandbox_env.path())
162        .env("HOME", sandbox_env.home())
163        .env("LANG", sandbox_env.lang())
164        .env("LC_ALL", sandbox_env.lc_all())
165        .kill_on_drop(true);
166
167    for (k, v) in user_env {
168        if !is_reserved_env_key(k) {
169            command.env(k, v);
170        }
171    }
172
173    if let Some(dir) = scratch_dir {
174        command.env("STATESPACE_SCRATCH", dir);
175    }
176    if let Some(dir) = workspace_dir {
177        command.env("STATESPACE_WORKSPACE", dir);
178    }
179
180    let fut = command.output();
181
182    let Ok(result) = tokio::time::timeout(limits.timeout, fut).await else {
183        warn!("Eval block timed out after {:?}", limits.timeout);
184        return EvalResult {
185            output: format!(
186                "[eval error: timed out after {}s]",
187                limits.timeout.as_secs()
188            ),
189            success: false,
190        };
191    };
192
193    match result {
194        Ok(output) => {
195            let stdout = String::from_utf8_lossy(&output.stdout);
196            let stderr = String::from_utf8_lossy(&output.stderr);
197
198            if output.status.success() {
199                let mut out = stdout.trim_end().to_string();
200                if out.len() > limits.max_output_bytes {
201                    let mut limit = limits.max_output_bytes;
202                    while !out.is_char_boundary(limit) {
203                        limit -= 1;
204                    }
205                    out.truncate(limit);
206                }
207                EvalResult {
208                    output: out,
209                    success: true,
210                }
211            } else {
212                let code = output.status.code().unwrap_or(-1);
213                let mut msg = format!("[eval error: exit {code}");
214                let combined = if stderr.is_empty() {
215                    stdout.trim_end().to_string()
216                } else {
217                    stderr.trim_end().to_string()
218                };
219                if !combined.is_empty() {
220                    let mut detail = combined;
221                    if detail.len() > 256 {
222                        let mut limit = 256;
223                        while !detail.is_char_boundary(limit) {
224                            limit -= 1;
225                        }
226                        detail.truncate(limit);
227                        detail.push('…');
228                    }
229                    let _ = write!(msg, " — {detail}");
230                }
231                msg.push(']');
232                warn!(exit_code = code, "Eval block failed");
233                EvalResult {
234                    output: msg,
235                    success: false,
236                }
237            }
238        }
239        Err(e) => {
240            warn!(error = %e, "Eval block execution failed");
241            EvalResult {
242                output: format!("[eval error: {e}]"),
243                success: false,
244            }
245        }
246    }
247}
248
249#[allow(clippy::implicit_hasher)]
250pub async fn process_eval_blocks(
251    content: &str,
252    working_dir: &Path,
253    user_env: &HashMap<String, String>,
254) -> String {
255    process_eval_blocks_with_sandbox(
256        content,
257        working_dir,
258        user_env,
259        &SandboxEnv::default(),
260        &ExecutionLimits::default(),
261    )
262    .await
263}
264
265#[allow(clippy::implicit_hasher)]
266pub async fn process_eval_blocks_with_sandbox(
267    content: &str,
268    working_dir: &Path,
269    user_env: &HashMap<String, String>,
270    sandbox_env: &SandboxEnv,
271    limits: &ExecutionLimits,
272) -> String {
273    let mut blocks = parse_eval_blocks(content);
274
275    if blocks.is_empty() {
276        return content.to_string();
277    }
278
279    if blocks.len() > EVAL_MAX_BLOCKS_PER_DOCUMENT {
280        warn!(
281            count = blocks.len(),
282            limit = EVAL_MAX_BLOCKS_PER_DOCUMENT,
283            "Truncating eval blocks to limit"
284        );
285        blocks.truncate(EVAL_MAX_BLOCKS_PER_DOCUMENT);
286    }
287
288    let block_ranges: Vec<(usize, (usize, usize))> = blocks
289        .iter()
290        .enumerate()
291        .map(|(i, b)| (i, b.range))
292        .collect();
293
294    let user_env = std::sync::Arc::new(user_env.clone());
295    let sandbox_env = std::sync::Arc::new(sandbox_env.clone());
296    let limits = std::sync::Arc::new(limits.clone());
297    let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(4));
298    let mut tasks = tokio::task::JoinSet::new();
299
300    for (i, block) in blocks.into_iter().enumerate() {
301        let sem = semaphore.clone();
302        let wd = working_dir.to_path_buf();
303        let env = user_env.clone();
304        let sandbox_env = sandbox_env.clone();
305        let limits = limits.clone();
306        tasks.spawn(async move {
307            let Ok(_permit) = sem.acquire().await else {
308                return (
309                    i,
310                    block.range,
311                    EvalResult {
312                        output: "[eval error: internal]".to_string(),
313                        success: false,
314                    },
315                );
316            };
317            let result = execute_eval_block_with_sandbox(
318                &block,
319                &wd,
320                None,
321                None,
322                &env,
323                &sandbox_env,
324                &limits,
325            )
326            .await;
327            (i, block.range, result)
328        });
329    }
330
331    let mut outputs: Vec<(usize, (usize, usize), EvalResult)> =
332        Vec::with_capacity(block_ranges.len());
333    while let Some(res) = tasks.join_next().await {
334        match res {
335            Ok(item) => outputs.push(item),
336            Err(e) => {
337                warn!("eval block task panicked: {e}");
338            }
339        }
340    }
341
342    let completed: std::collections::HashSet<usize> = outputs.iter().map(|(i, _, _)| *i).collect();
343    for (i, range) in &block_ranges {
344        if !completed.contains(i) {
345            outputs.push((
346                *i,
347                *range,
348                EvalResult {
349                    output: "[eval error: internal failure]".to_string(),
350                    success: false,
351                },
352            ));
353        }
354    }
355
356    outputs.sort_by(|a, b| b.1.0.cmp(&a.1.0));
357
358    let mut result = content.to_string();
359    for (_, (start, end), eval_result) in &outputs {
360        result.replace_range(*start..*end, &eval_result.output);
361    }
362
363    result
364}
365
366#[cfg(test)]
367#[allow(clippy::unwrap_used, clippy::expect_used)]
368mod tests {
369    use std::collections::HashMap;
370
371    use crate::eval::{is_eval_info_string, parse_eval_blocks};
372
373    fn empty_env() -> HashMap<String, String> {
374        HashMap::new()
375    }
376
377    #[test]
378    fn info_string_component() {
379        assert!(is_eval_info_string("component"));
380    }
381
382    #[test]
383    fn info_string_rejects_non_component() {
384        assert!(!is_eval_info_string("eval"));
385        assert!(!is_eval_info_string("rust"));
386        assert!(!is_eval_info_string("json"));
387        assert!(!is_eval_info_string(""));
388    }
389
390    #[test]
391    fn parse_single_component_block() {
392        let md = "# Title\n\n```component\necho hello\n```\n\nMore text\n";
393        let blocks = parse_eval_blocks(md);
394        assert_eq!(blocks.len(), 1);
395        assert_eq!(blocks[0].code, "echo hello");
396    }
397
398    #[test]
399    fn parse_multiple_component_blocks() {
400        let md = "```component\necho one\n```\n\ntext\n\n```component\necho two\n```\n";
401        let blocks = parse_eval_blocks(md);
402        assert_eq!(blocks.len(), 2);
403        assert_eq!(blocks[0].code, "echo one");
404        assert_eq!(blocks[1].code, "echo two");
405    }
406
407    #[test]
408    fn skip_non_component_code_blocks() {
409        let md = "```rust\nfn main() {}\n```\n\n```component\necho hi\n```\n";
410        let blocks = parse_eval_blocks(md);
411        assert_eq!(blocks.len(), 1);
412        assert_eq!(blocks[0].code, "echo hi");
413    }
414
415    #[test]
416    fn no_component_blocks() {
417        let md = "# Just a doc\n\nSome text.\n\n```json\n{}\n```\n";
418        let blocks = parse_eval_blocks(md);
419        assert!(blocks.is_empty());
420    }
421
422    #[test]
423    fn multiline_component_block() {
424        let md = "```component\necho hello\necho world\n```\n";
425        let blocks = parse_eval_blocks(md);
426        assert_eq!(blocks.len(), 1);
427        assert_eq!(blocks[0].code, "echo hello\necho world");
428    }
429
430    #[test]
431    fn component_block_preserves_range() {
432        let prefix = "# Title\n\n";
433        let block_with_newline = "```component\necho hi\n```\n";
434        let block_without_newline = "```component\necho hi\n```";
435        let suffix = "\nMore text\n";
436        let md = format!("{prefix}{block_with_newline}{suffix}");
437        let blocks = parse_eval_blocks(&md);
438        assert_eq!(blocks.len(), 1);
439        assert_eq!(blocks[0].range.0, prefix.len());
440        assert_eq!(
441            blocks[0].range.1,
442            prefix.len() + block_without_newline.len()
443        );
444    }
445
446    #[tokio::test]
447    async fn execute_eval_block_success() {
448        use crate::eval::{EvalBlock, execute_eval_block};
449        let block = EvalBlock {
450            range: (0, 0),
451            code: "echo hello".to_string(),
452        };
453        let result = execute_eval_block(
454            &block,
455            std::path::Path::new("/tmp"),
456            None,
457            None,
458            &empty_env(),
459        )
460        .await;
461        assert!(result.success);
462        assert_eq!(result.output, "hello");
463    }
464
465    #[tokio::test]
466    async fn execute_eval_block_failure() {
467        use crate::eval::{EvalBlock, execute_eval_block};
468        let block = EvalBlock {
469            range: (0, 0),
470            code: "exit 42".to_string(),
471        };
472        let result = execute_eval_block(
473            &block,
474            std::path::Path::new("/tmp"),
475            None,
476            None,
477            &empty_env(),
478        )
479        .await;
480        assert!(!result.success);
481        assert!(result.output.contains("eval error"));
482        assert!(result.output.contains("42"));
483    }
484
485    #[tokio::test]
486    async fn execute_eval_block_command_not_found() {
487        use crate::eval::{EvalBlock, execute_eval_block};
488        let block = EvalBlock {
489            range: (0, 0),
490            code: "nonexistent_command_xyz_123".to_string(),
491        };
492        let result = execute_eval_block(
493            &block,
494            std::path::Path::new("/tmp"),
495            None,
496            None,
497            &empty_env(),
498        )
499        .await;
500        assert!(!result.success);
501        assert!(result.output.contains("eval error"));
502    }
503
504    #[tokio::test]
505    async fn process_replaces_component_blocks() {
506        use crate::eval::process_eval_blocks;
507        let md = "# Title\n\n```component\necho 42\n```\n\nEnd\n";
508        let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
509        assert!(result.contains("42"));
510        assert!(!result.contains("```component"));
511        assert!(result.contains("# Title"));
512        assert!(result.contains("End"));
513    }
514
515    #[tokio::test]
516    async fn process_no_component_blocks_returns_unchanged() {
517        use crate::eval::process_eval_blocks;
518        let md = "# Just text\n\n```json\n{}\n```\n";
519        let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
520        assert_eq!(result, md);
521    }
522
523    #[tokio::test]
524    async fn process_multiple_blocks_replaced_in_order() {
525        use crate::eval::process_eval_blocks;
526        let md = "A\n\n```component\necho first\n```\n\nB\n\n```component\necho second\n```\n\nC\n";
527        let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
528        let first_pos = result.find("first").expect("first should be present");
529        let second_pos = result.find("second").expect("second should be present");
530        assert!(first_pos < second_pos);
531        assert!(result.contains("A\n"));
532        assert!(result.contains("B\n"));
533        assert!(result.contains("C\n"));
534    }
535
536    #[tokio::test]
537    async fn execute_eval_block_timeout() {
538        use crate::eval::{EvalBlock, execute_eval_block};
539        let block = EvalBlock {
540            range: (0, 0),
541            code: "while true; do :; done".to_string(),
542        };
543        let result = execute_eval_block(
544            &block,
545            std::path::Path::new("/tmp"),
546            None,
547            None,
548            &empty_env(),
549        )
550        .await;
551        assert!(!result.success);
552        assert!(result.output.contains("timed out"));
553    }
554
555    #[tokio::test]
556    async fn user_env_injected_into_subprocess() {
557        use crate::eval::{EvalBlock, execute_eval_block};
558        let block = EvalBlock {
559            range: (0, 0),
560            code: "echo $MY_SECRET".to_string(),
561        };
562        let env = HashMap::from([("MY_SECRET".into(), "hunter2".into())]);
563        let result =
564            execute_eval_block(&block, std::path::Path::new("/tmp"), None, None, &env).await;
565        assert!(result.success);
566        assert_eq!(result.output, "hunter2");
567    }
568
569    #[tokio::test]
570    async fn reserved_env_keys_not_overridden() {
571        use crate::eval::{EvalBlock, execute_eval_block};
572        let block = EvalBlock {
573            range: (0, 0),
574            code: "echo $HOME".to_string(),
575        };
576        let env = HashMap::from([("HOME".into(), "/evil".into())]);
577        let result =
578            execute_eval_block(&block, std::path::Path::new("/tmp"), None, None, &env).await;
579        assert!(result.success);
580        assert_ne!(result.output, "/evil");
581    }
582
583    #[tokio::test]
584    async fn process_eval_blocks_with_user_env() {
585        use crate::eval::process_eval_blocks;
586        let md = "```component\necho $DB\n```\n";
587        let env = HashMap::from([("DB".into(), "postgresql://localhost/test".into())]);
588        let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &env).await;
589        assert!(result.contains("postgresql://localhost/test"));
590        assert!(!result.contains("```component"));
591    }
592
593    #[test]
594    fn parse_finds_all_component_blocks_beyond_limit() {
595        use std::fmt::Write;
596        let mut md = String::new();
597        for i in 0..25 {
598            let _ = write!(md, "```component\necho {i}\n```\n\n");
599        }
600        let blocks = parse_eval_blocks(&md);
601        assert_eq!(blocks.len(), 25);
602    }
603
604    #[test]
605    fn merge_eval_env_trusted_overrides_untrusted() {
606        use crate::eval::merge_eval_env;
607
608        let trusted = HashMap::from([("USER_ID".to_string(), "42".to_string())]);
609        let untrusted = HashMap::from([
610            ("USER_ID".to_string(), "7".to_string()),
611            ("PAGE".to_string(), "stats".to_string()),
612        ]);
613
614        let merged = merge_eval_env(&trusted, &untrusted);
615        assert_eq!(merged.get("USER_ID"), Some(&"42".to_string()));
616        assert_eq!(merged.get("PAGE"), Some(&"stats".to_string()));
617    }
618
619    #[test]
620    fn merge_eval_env_filters_reserved_keys() {
621        use crate::eval::merge_eval_env;
622
623        let trusted = HashMap::from([("AWS_SECRET_ACCESS_KEY".to_string(), "x".to_string())]);
624        let untrusted = HashMap::from([
625            ("LD_PRELOAD".to_string(), "y".to_string()),
626            ("PATH".to_string(), "/tmp/evil".to_string()),
627        ]);
628
629        let merged = merge_eval_env(&trusted, &untrusted);
630        assert!(!merged.contains_key("AWS_SECRET_ACCESS_KEY"));
631        assert!(!merged.contains_key("LD_PRELOAD"));
632        assert!(!merged.contains_key("PATH"));
633    }
634}
statespace_tool_runtime/eval.rs

statespace_tool_runtime/
eval.rs