1use crate::env_validation::is_reserved_env_key;
4use crate::executor::ExecutionLimits;
5use crate::sandbox::SandboxEnv;
6use std::collections::HashMap;
7use std::fmt::Write;
8use std::path::Path;
9use tokio::process::Command;
10use tracing::warn;
11
12pub const EVAL_MAX_BLOCKS_PER_DOCUMENT: usize = 32;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct EvalBlock {
16 pub range: (usize, usize),
18 pub code: String,
19}
20
21#[derive(Debug, Clone)]
22pub struct EvalResult {
23 pub output: String,
24 pub success: bool,
25}
26
27pub fn parse_eval_blocks(content: &str) -> Vec<EvalBlock> {
28 let mut blocks = Vec::new();
29 let mut search_from = 0;
30
31 while let Some(block) = find_next_eval_block(content, search_from) {
32 search_from = block.range.1;
33 blocks.push(block);
34 }
35
36 blocks
37}
38
39fn find_next_eval_block(content: &str, start: usize) -> Option<EvalBlock> {
40 let haystack = &content[start..];
41
42 let mut pos = 0;
43 loop {
44 let remaining = &haystack[pos..];
45 let fence_pos = remaining.find("```")?;
46 let abs_fence_start = start + pos + fence_pos;
47
48 if abs_fence_start > 0 && content.as_bytes()[abs_fence_start - 1] != b'\n' {
49 pos += fence_pos + 3;
50 continue;
51 }
52
53 let after_backticks = &content[abs_fence_start + 3..];
54
55 let Some(newline_pos) = after_backticks.find('\n') else {
56 pos += fence_pos + 3;
57 continue;
58 };
59
60 let info_string = after_backticks[..newline_pos].trim();
61
62 if !is_eval_info_string(info_string) {
63 pos += fence_pos + 3;
64 continue;
65 }
66
67 let code_start = abs_fence_start + 3 + newline_pos + 1;
68 let code_region = &content[code_start..];
69 let close_pos = find_closing_fence(code_region)?;
70 let code = &content[code_start..code_start + close_pos];
71 let block_end = code_start + close_pos + 3;
72
73 return Some(EvalBlock {
74 range: (abs_fence_start, block_end),
75 code: code.trim_end_matches('\n').to_string(),
76 });
77 }
78}
79
80fn find_closing_fence(content: &str) -> Option<usize> {
81 let mut pos = 0;
82 loop {
83 let remaining = &content[pos..];
84 let fence_pos = remaining.find("```")?;
85 let abs_pos = pos + fence_pos;
86
87 if abs_pos == 0 || content.as_bytes()[abs_pos - 1] == b'\n' {
88 return Some(abs_pos);
89 }
90
91 pos = abs_pos + 3;
92 }
93}
94
95fn is_eval_info_string(info: &str) -> bool {
96 info == "component"
97}
98
99#[must_use]
104#[allow(clippy::implicit_hasher)]
105pub fn merge_eval_env(
106 trusted_env: &HashMap<String, String>,
107 untrusted_env: &HashMap<String, String>,
108) -> HashMap<String, String> {
109 let mut merged = HashMap::with_capacity(trusted_env.len() + untrusted_env.len());
110
111 for (key, value) in untrusted_env {
112 if !is_reserved_env_key(key) {
113 merged.insert(key.clone(), value.clone());
114 }
115 }
116
117 for (key, value) in trusted_env {
118 if !is_reserved_env_key(key) {
119 merged.insert(key.clone(), value.clone());
120 }
121 }
122
123 merged
124}
125
126#[allow(clippy::implicit_hasher)]
127pub async fn execute_eval_block(
128 block: &EvalBlock,
129 working_dir: &Path,
130 scratch_dir: Option<&Path>,
131 workspace_dir: Option<&Path>,
132 user_env: &HashMap<String, String>,
133) -> EvalResult {
134 execute_eval_block_with_sandbox(
135 block,
136 working_dir,
137 scratch_dir,
138 workspace_dir,
139 user_env,
140 &SandboxEnv::default(),
141 &ExecutionLimits::default(),
142 )
143 .await
144}
145
146#[allow(clippy::implicit_hasher)]
147pub async fn execute_eval_block_with_sandbox(
148 block: &EvalBlock,
149 working_dir: &Path,
150 scratch_dir: Option<&Path>,
151 workspace_dir: Option<&Path>,
152 user_env: &HashMap<String, String>,
153 sandbox_env: &SandboxEnv,
154 limits: &ExecutionLimits,
155) -> EvalResult {
156 let mut command = Command::new("sh");
157 command
158 .args(["-c", &block.code])
159 .current_dir(working_dir)
160 .env_clear()
161 .env("PATH", sandbox_env.path())
162 .env("HOME", sandbox_env.home())
163 .env("LANG", sandbox_env.lang())
164 .env("LC_ALL", sandbox_env.lc_all())
165 .kill_on_drop(true);
166
167 for (k, v) in user_env {
168 if !is_reserved_env_key(k) {
169 command.env(k, v);
170 }
171 }
172
173 if let Some(dir) = scratch_dir {
174 command.env("STATESPACE_SCRATCH", dir);
175 }
176 if let Some(dir) = workspace_dir {
177 command.env("STATESPACE_WORKSPACE", dir);
178 }
179
180 let fut = command.output();
181
182 let Ok(result) = tokio::time::timeout(limits.timeout, fut).await else {
183 warn!("Eval block timed out after {:?}", limits.timeout);
184 return EvalResult {
185 output: format!(
186 "[eval error: timed out after {}s]",
187 limits.timeout.as_secs()
188 ),
189 success: false,
190 };
191 };
192
193 match result {
194 Ok(output) => {
195 let stdout = String::from_utf8_lossy(&output.stdout);
196 let stderr = String::from_utf8_lossy(&output.stderr);
197
198 if output.status.success() {
199 let mut out = stdout.trim_end().to_string();
200 if out.len() > limits.max_output_bytes {
201 let mut limit = limits.max_output_bytes;
202 while !out.is_char_boundary(limit) {
203 limit -= 1;
204 }
205 out.truncate(limit);
206 }
207 EvalResult {
208 output: out,
209 success: true,
210 }
211 } else {
212 let code = output.status.code().unwrap_or(-1);
213 let mut msg = format!("[eval error: exit {code}");
214 let combined = if stderr.is_empty() {
215 stdout.trim_end().to_string()
216 } else {
217 stderr.trim_end().to_string()
218 };
219 if !combined.is_empty() {
220 let mut detail = combined;
221 if detail.len() > 256 {
222 let mut limit = 256;
223 while !detail.is_char_boundary(limit) {
224 limit -= 1;
225 }
226 detail.truncate(limit);
227 detail.push('…');
228 }
229 let _ = write!(msg, " — {detail}");
230 }
231 msg.push(']');
232 warn!(exit_code = code, "Eval block failed");
233 EvalResult {
234 output: msg,
235 success: false,
236 }
237 }
238 }
239 Err(e) => {
240 warn!(error = %e, "Eval block execution failed");
241 EvalResult {
242 output: format!("[eval error: {e}]"),
243 success: false,
244 }
245 }
246 }
247}
248
249#[allow(clippy::implicit_hasher)]
250pub async fn process_eval_blocks(
251 content: &str,
252 working_dir: &Path,
253 user_env: &HashMap<String, String>,
254) -> String {
255 process_eval_blocks_with_sandbox(
256 content,
257 working_dir,
258 user_env,
259 &SandboxEnv::default(),
260 &ExecutionLimits::default(),
261 )
262 .await
263}
264
265#[allow(clippy::implicit_hasher)]
266pub async fn process_eval_blocks_with_sandbox(
267 content: &str,
268 working_dir: &Path,
269 user_env: &HashMap<String, String>,
270 sandbox_env: &SandboxEnv,
271 limits: &ExecutionLimits,
272) -> String {
273 let mut blocks = parse_eval_blocks(content);
274
275 if blocks.is_empty() {
276 return content.to_string();
277 }
278
279 if blocks.len() > EVAL_MAX_BLOCKS_PER_DOCUMENT {
280 warn!(
281 count = blocks.len(),
282 limit = EVAL_MAX_BLOCKS_PER_DOCUMENT,
283 "Truncating eval blocks to limit"
284 );
285 blocks.truncate(EVAL_MAX_BLOCKS_PER_DOCUMENT);
286 }
287
288 let block_ranges: Vec<(usize, (usize, usize))> = blocks
289 .iter()
290 .enumerate()
291 .map(|(i, b)| (i, b.range))
292 .collect();
293
294 let user_env = std::sync::Arc::new(user_env.clone());
295 let sandbox_env = std::sync::Arc::new(sandbox_env.clone());
296 let limits = std::sync::Arc::new(limits.clone());
297 let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(4));
298 let mut tasks = tokio::task::JoinSet::new();
299
300 for (i, block) in blocks.into_iter().enumerate() {
301 let sem = semaphore.clone();
302 let wd = working_dir.to_path_buf();
303 let env = user_env.clone();
304 let sandbox_env = sandbox_env.clone();
305 let limits = limits.clone();
306 tasks.spawn(async move {
307 let Ok(_permit) = sem.acquire().await else {
308 return (
309 i,
310 block.range,
311 EvalResult {
312 output: "[eval error: internal]".to_string(),
313 success: false,
314 },
315 );
316 };
317 let result = execute_eval_block_with_sandbox(
318 &block,
319 &wd,
320 None,
321 None,
322 &env,
323 &sandbox_env,
324 &limits,
325 )
326 .await;
327 (i, block.range, result)
328 });
329 }
330
331 let mut outputs: Vec<(usize, (usize, usize), EvalResult)> =
332 Vec::with_capacity(block_ranges.len());
333 while let Some(res) = tasks.join_next().await {
334 match res {
335 Ok(item) => outputs.push(item),
336 Err(e) => {
337 warn!("eval block task panicked: {e}");
338 }
339 }
340 }
341
342 let completed: std::collections::HashSet<usize> = outputs.iter().map(|(i, _, _)| *i).collect();
343 for (i, range) in &block_ranges {
344 if !completed.contains(i) {
345 outputs.push((
346 *i,
347 *range,
348 EvalResult {
349 output: "[eval error: internal failure]".to_string(),
350 success: false,
351 },
352 ));
353 }
354 }
355
356 outputs.sort_by(|a, b| b.1.0.cmp(&a.1.0));
357
358 let mut result = content.to_string();
359 for (_, (start, end), eval_result) in &outputs {
360 result.replace_range(*start..*end, &eval_result.output);
361 }
362
363 result
364}
365
366#[cfg(test)]
367#[allow(clippy::unwrap_used, clippy::expect_used)]
368mod tests {
369 use std::collections::HashMap;
370
371 use crate::eval::{is_eval_info_string, parse_eval_blocks};
372
373 fn empty_env() -> HashMap<String, String> {
374 HashMap::new()
375 }
376
377 #[test]
378 fn info_string_component() {
379 assert!(is_eval_info_string("component"));
380 }
381
382 #[test]
383 fn info_string_rejects_non_component() {
384 assert!(!is_eval_info_string("eval"));
385 assert!(!is_eval_info_string("rust"));
386 assert!(!is_eval_info_string("json"));
387 assert!(!is_eval_info_string(""));
388 }
389
390 #[test]
391 fn parse_single_component_block() {
392 let md = "# Title\n\n```component\necho hello\n```\n\nMore text\n";
393 let blocks = parse_eval_blocks(md);
394 assert_eq!(blocks.len(), 1);
395 assert_eq!(blocks[0].code, "echo hello");
396 }
397
398 #[test]
399 fn parse_multiple_component_blocks() {
400 let md = "```component\necho one\n```\n\ntext\n\n```component\necho two\n```\n";
401 let blocks = parse_eval_blocks(md);
402 assert_eq!(blocks.len(), 2);
403 assert_eq!(blocks[0].code, "echo one");
404 assert_eq!(blocks[1].code, "echo two");
405 }
406
407 #[test]
408 fn skip_non_component_code_blocks() {
409 let md = "```rust\nfn main() {}\n```\n\n```component\necho hi\n```\n";
410 let blocks = parse_eval_blocks(md);
411 assert_eq!(blocks.len(), 1);
412 assert_eq!(blocks[0].code, "echo hi");
413 }
414
415 #[test]
416 fn no_component_blocks() {
417 let md = "# Just a doc\n\nSome text.\n\n```json\n{}\n```\n";
418 let blocks = parse_eval_blocks(md);
419 assert!(blocks.is_empty());
420 }
421
422 #[test]
423 fn multiline_component_block() {
424 let md = "```component\necho hello\necho world\n```\n";
425 let blocks = parse_eval_blocks(md);
426 assert_eq!(blocks.len(), 1);
427 assert_eq!(blocks[0].code, "echo hello\necho world");
428 }
429
430 #[test]
431 fn component_block_preserves_range() {
432 let prefix = "# Title\n\n";
433 let block_with_newline = "```component\necho hi\n```\n";
434 let block_without_newline = "```component\necho hi\n```";
435 let suffix = "\nMore text\n";
436 let md = format!("{prefix}{block_with_newline}{suffix}");
437 let blocks = parse_eval_blocks(&md);
438 assert_eq!(blocks.len(), 1);
439 assert_eq!(blocks[0].range.0, prefix.len());
440 assert_eq!(
441 blocks[0].range.1,
442 prefix.len() + block_without_newline.len()
443 );
444 }
445
446 #[tokio::test]
447 async fn execute_eval_block_success() {
448 use crate::eval::{EvalBlock, execute_eval_block};
449 let block = EvalBlock {
450 range: (0, 0),
451 code: "echo hello".to_string(),
452 };
453 let result = execute_eval_block(
454 &block,
455 std::path::Path::new("/tmp"),
456 None,
457 None,
458 &empty_env(),
459 )
460 .await;
461 assert!(result.success);
462 assert_eq!(result.output, "hello");
463 }
464
465 #[tokio::test]
466 async fn execute_eval_block_failure() {
467 use crate::eval::{EvalBlock, execute_eval_block};
468 let block = EvalBlock {
469 range: (0, 0),
470 code: "exit 42".to_string(),
471 };
472 let result = execute_eval_block(
473 &block,
474 std::path::Path::new("/tmp"),
475 None,
476 None,
477 &empty_env(),
478 )
479 .await;
480 assert!(!result.success);
481 assert!(result.output.contains("eval error"));
482 assert!(result.output.contains("42"));
483 }
484
485 #[tokio::test]
486 async fn execute_eval_block_command_not_found() {
487 use crate::eval::{EvalBlock, execute_eval_block};
488 let block = EvalBlock {
489 range: (0, 0),
490 code: "nonexistent_command_xyz_123".to_string(),
491 };
492 let result = execute_eval_block(
493 &block,
494 std::path::Path::new("/tmp"),
495 None,
496 None,
497 &empty_env(),
498 )
499 .await;
500 assert!(!result.success);
501 assert!(result.output.contains("eval error"));
502 }
503
504 #[tokio::test]
505 async fn process_replaces_component_blocks() {
506 use crate::eval::process_eval_blocks;
507 let md = "# Title\n\n```component\necho 42\n```\n\nEnd\n";
508 let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
509 assert!(result.contains("42"));
510 assert!(!result.contains("```component"));
511 assert!(result.contains("# Title"));
512 assert!(result.contains("End"));
513 }
514
515 #[tokio::test]
516 async fn process_no_component_blocks_returns_unchanged() {
517 use crate::eval::process_eval_blocks;
518 let md = "# Just text\n\n```json\n{}\n```\n";
519 let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
520 assert_eq!(result, md);
521 }
522
523 #[tokio::test]
524 async fn process_multiple_blocks_replaced_in_order() {
525 use crate::eval::process_eval_blocks;
526 let md = "A\n\n```component\necho first\n```\n\nB\n\n```component\necho second\n```\n\nC\n";
527 let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
528 let first_pos = result.find("first").expect("first should be present");
529 let second_pos = result.find("second").expect("second should be present");
530 assert!(first_pos < second_pos);
531 assert!(result.contains("A\n"));
532 assert!(result.contains("B\n"));
533 assert!(result.contains("C\n"));
534 }
535
536 #[tokio::test]
537 async fn execute_eval_block_timeout() {
538 use crate::eval::{EvalBlock, execute_eval_block};
539 let block = EvalBlock {
540 range: (0, 0),
541 code: "while true; do :; done".to_string(),
542 };
543 let result = execute_eval_block(
544 &block,
545 std::path::Path::new("/tmp"),
546 None,
547 None,
548 &empty_env(),
549 )
550 .await;
551 assert!(!result.success);
552 assert!(result.output.contains("timed out"));
553 }
554
555 #[tokio::test]
556 async fn user_env_injected_into_subprocess() {
557 use crate::eval::{EvalBlock, execute_eval_block};
558 let block = EvalBlock {
559 range: (0, 0),
560 code: "echo $MY_SECRET".to_string(),
561 };
562 let env = HashMap::from([("MY_SECRET".into(), "hunter2".into())]);
563 let result =
564 execute_eval_block(&block, std::path::Path::new("/tmp"), None, None, &env).await;
565 assert!(result.success);
566 assert_eq!(result.output, "hunter2");
567 }
568
569 #[tokio::test]
570 async fn reserved_env_keys_not_overridden() {
571 use crate::eval::{EvalBlock, execute_eval_block};
572 let block = EvalBlock {
573 range: (0, 0),
574 code: "echo $HOME".to_string(),
575 };
576 let env = HashMap::from([("HOME".into(), "/evil".into())]);
577 let result =
578 execute_eval_block(&block, std::path::Path::new("/tmp"), None, None, &env).await;
579 assert!(result.success);
580 assert_ne!(result.output, "/evil");
581 }
582
583 #[tokio::test]
584 async fn process_eval_blocks_with_user_env() {
585 use crate::eval::process_eval_blocks;
586 let md = "```component\necho $DB\n```\n";
587 let env = HashMap::from([("DB".into(), "postgresql://localhost/test".into())]);
588 let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &env).await;
589 assert!(result.contains("postgresql://localhost/test"));
590 assert!(!result.contains("```component"));
591 }
592
593 #[test]
594 fn parse_finds_all_component_blocks_beyond_limit() {
595 use std::fmt::Write;
596 let mut md = String::new();
597 for i in 0..25 {
598 let _ = write!(md, "```component\necho {i}\n```\n\n");
599 }
600 let blocks = parse_eval_blocks(&md);
601 assert_eq!(blocks.len(), 25);
602 }
603
604 #[test]
605 fn merge_eval_env_trusted_overrides_untrusted() {
606 use crate::eval::merge_eval_env;
607
608 let trusted = HashMap::from([("USER_ID".to_string(), "42".to_string())]);
609 let untrusted = HashMap::from([
610 ("USER_ID".to_string(), "7".to_string()),
611 ("PAGE".to_string(), "stats".to_string()),
612 ]);
613
614 let merged = merge_eval_env(&trusted, &untrusted);
615 assert_eq!(merged.get("USER_ID"), Some(&"42".to_string()));
616 assert_eq!(merged.get("PAGE"), Some(&"stats".to_string()));
617 }
618
619 #[test]
620 fn merge_eval_env_filters_reserved_keys() {
621 use crate::eval::merge_eval_env;
622
623 let trusted = HashMap::from([("AWS_SECRET_ACCESS_KEY".to_string(), "x".to_string())]);
624 let untrusted = HashMap::from([
625 ("LD_PRELOAD".to_string(), "y".to_string()),
626 ("PATH".to_string(), "/tmp/evil".to_string()),
627 ]);
628
629 let merged = merge_eval_env(&trusted, &untrusted);
630 assert!(!merged.contains_key("AWS_SECRET_ACCESS_KEY"));
631 assert!(!merged.contains_key("LD_PRELOAD"));
632 assert!(!merged.contains_key("PATH"));
633 }
634}