1use crate::env_validation::is_reserved_env_key;
4use crate::sandbox::SandboxEnv;
5use std::collections::HashMap;
6use std::fmt::Write;
7use std::path::Path;
8use std::time::Duration;
9use tokio::process::Command;
10use tracing::warn;
11
12pub const EVAL_BLOCK_TIMEOUT: Duration = Duration::from_secs(5);
13pub const EVAL_MAX_BLOCKS_PER_DOCUMENT: usize = 20;
14pub const EVAL_MAX_OUTPUT_BYTES: usize = 1024 * 1024; #[derive(Debug, Clone, PartialEq, Eq)]
17pub struct EvalBlock {
18 pub range: (usize, usize),
20 pub code: String,
21}
22
23#[derive(Debug, Clone)]
24pub struct EvalResult {
25 pub output: String,
26 pub success: bool,
27}
28
29pub fn parse_eval_blocks(content: &str) -> Vec<EvalBlock> {
30 let mut blocks = Vec::new();
31 let mut search_from = 0;
32
33 while let Some(block) = find_next_eval_block(content, search_from) {
34 search_from = block.range.1;
35 blocks.push(block);
36 }
37
38 blocks
39}
40
41fn find_next_eval_block(content: &str, start: usize) -> Option<EvalBlock> {
42 let haystack = &content[start..];
43
44 let mut pos = 0;
45 loop {
46 let remaining = &haystack[pos..];
47 let fence_pos = remaining.find("```")?;
48 let abs_fence_start = start + pos + fence_pos;
49
50 if abs_fence_start > 0 && content.as_bytes()[abs_fence_start - 1] != b'\n' {
51 pos += fence_pos + 3;
52 continue;
53 }
54
55 let after_backticks = &content[abs_fence_start + 3..];
56
57 let Some(newline_pos) = after_backticks.find('\n') else {
58 pos += fence_pos + 3;
59 continue;
60 };
61
62 let info_string = after_backticks[..newline_pos].trim();
63
64 if !is_eval_info_string(info_string) {
65 pos += fence_pos + 3;
66 continue;
67 }
68
69 let code_start = abs_fence_start + 3 + newline_pos + 1;
70 let code_region = &content[code_start..];
71 let close_pos = find_closing_fence(code_region)?;
72 let code = &content[code_start..code_start + close_pos];
73 let block_end = code_start + close_pos + 3; return Some(EvalBlock {
76 range: (abs_fence_start, block_end),
77 code: code.trim_end_matches('\n').to_string(),
78 });
79 }
80}
81
82fn find_closing_fence(content: &str) -> Option<usize> {
83 let mut pos = 0;
84 loop {
85 let remaining = &content[pos..];
86 let fence_pos = remaining.find("```")?;
87 let abs_pos = pos + fence_pos;
88
89 if abs_pos == 0 || content.as_bytes()[abs_pos - 1] == b'\n' {
90 return Some(abs_pos);
91 }
92
93 pos = abs_pos + 3;
94 }
95}
96
97fn is_eval_info_string(info: &str) -> bool {
98 info == "component"
99}
100
101#[must_use]
106#[allow(clippy::implicit_hasher)]
107pub fn merge_eval_env(
108 trusted_env: &HashMap<String, String>,
109 untrusted_env: &HashMap<String, String>,
110) -> HashMap<String, String> {
111 let mut merged = HashMap::with_capacity(trusted_env.len() + untrusted_env.len());
112
113 for (key, value) in untrusted_env {
114 if !is_reserved_env_key(key) {
115 merged.insert(key.clone(), value.clone());
116 }
117 }
118
119 for (key, value) in trusted_env {
120 if !is_reserved_env_key(key) {
121 merged.insert(key.clone(), value.clone());
122 }
123 }
124
125 merged
126}
127
128#[allow(clippy::implicit_hasher)]
129pub async fn execute_eval_block(
130 block: &EvalBlock,
131 working_dir: &Path,
132 scratch_dir: Option<&Path>,
133 workspace_dir: Option<&Path>,
134 user_env: &HashMap<String, String>,
135) -> EvalResult {
136 execute_eval_block_with_sandbox(
137 block,
138 working_dir,
139 scratch_dir,
140 workspace_dir,
141 user_env,
142 &SandboxEnv::default(),
143 )
144 .await
145}
146
147#[allow(clippy::implicit_hasher)]
148pub async fn execute_eval_block_with_sandbox(
149 block: &EvalBlock,
150 working_dir: &Path,
151 scratch_dir: Option<&Path>,
152 workspace_dir: Option<&Path>,
153 user_env: &HashMap<String, String>,
154 sandbox_env: &SandboxEnv,
155) -> EvalResult {
156 let mut command = Command::new("sh");
157 command
158 .args(["-c", &block.code])
159 .current_dir(working_dir)
160 .env_clear()
161 .env("PATH", sandbox_env.path())
162 .env("HOME", sandbox_env.home())
163 .env("LANG", sandbox_env.lang())
164 .env("LC_ALL", sandbox_env.lc_all())
165 .kill_on_drop(true);
166
167 for (k, v) in user_env {
168 if !is_reserved_env_key(k) {
169 command.env(k, v);
170 }
171 }
172
173 if let Some(dir) = scratch_dir {
174 command.env("STATESPACE_SCRATCH", dir);
175 }
176 if let Some(dir) = workspace_dir {
177 command.env("STATESPACE_WORKSPACE", dir);
178 }
179
180 let fut = command.output();
181
182 let Ok(result) = tokio::time::timeout(EVAL_BLOCK_TIMEOUT, fut).await else {
183 warn!("Eval block timed out after {:?}", EVAL_BLOCK_TIMEOUT);
184 return EvalResult {
185 output: "[eval error: timed out after 5s]".to_string(),
186 success: false,
187 };
188 };
189
190 match result {
191 Ok(output) => {
192 let stdout = String::from_utf8_lossy(&output.stdout);
193 let stderr = String::from_utf8_lossy(&output.stderr);
194
195 if output.status.success() {
196 let mut out = stdout.trim_end().to_string();
197 if out.len() > EVAL_MAX_OUTPUT_BYTES {
198 let mut limit = EVAL_MAX_OUTPUT_BYTES;
199 while !out.is_char_boundary(limit) {
200 limit -= 1;
201 }
202 out.truncate(limit);
203 }
204 EvalResult {
205 output: out,
206 success: true,
207 }
208 } else {
209 let code = output.status.code().unwrap_or(-1);
210 let mut msg = format!("[eval error: exit {code}");
211 let combined = if stderr.is_empty() {
212 stdout.trim_end().to_string()
213 } else {
214 stderr.trim_end().to_string()
215 };
216 if !combined.is_empty() {
217 let mut detail = combined;
218 if detail.len() > 256 {
219 let mut limit = 256;
220 while !detail.is_char_boundary(limit) {
221 limit -= 1;
222 }
223 detail.truncate(limit);
224 detail.push('…');
225 }
226 let _ = write!(msg, " — {detail}");
227 }
228 msg.push(']');
229 warn!(exit_code = code, "Eval block failed");
230 EvalResult {
231 output: msg,
232 success: false,
233 }
234 }
235 }
236 Err(e) => {
237 warn!(error = %e, "Eval block execution failed");
238 EvalResult {
239 output: format!("[eval error: {e}]"),
240 success: false,
241 }
242 }
243 }
244}
245
246#[allow(clippy::implicit_hasher)]
247pub async fn process_eval_blocks(
248 content: &str,
249 working_dir: &Path,
250 user_env: &HashMap<String, String>,
251) -> String {
252 process_eval_blocks_with_sandbox(content, working_dir, user_env, &SandboxEnv::default()).await
253}
254
255#[allow(clippy::implicit_hasher)]
256pub async fn process_eval_blocks_with_sandbox(
257 content: &str,
258 working_dir: &Path,
259 user_env: &HashMap<String, String>,
260 sandbox_env: &SandboxEnv,
261) -> String {
262 let mut blocks = parse_eval_blocks(content);
263
264 if blocks.is_empty() {
265 return content.to_string();
266 }
267
268 if blocks.len() > EVAL_MAX_BLOCKS_PER_DOCUMENT {
269 warn!(
270 count = blocks.len(),
271 limit = EVAL_MAX_BLOCKS_PER_DOCUMENT,
272 "Truncating eval blocks to limit"
273 );
274 blocks.truncate(EVAL_MAX_BLOCKS_PER_DOCUMENT);
275 }
276
277 let block_ranges: Vec<(usize, (usize, usize))> = blocks
278 .iter()
279 .enumerate()
280 .map(|(i, b)| (i, b.range))
281 .collect();
282
283 let user_env = std::sync::Arc::new(user_env.clone());
284 let sandbox_env = std::sync::Arc::new(sandbox_env.clone());
285 let sem = std::sync::Arc::new(tokio::sync::Semaphore::new(4));
286 let mut set = tokio::task::JoinSet::new();
287
288 for (i, block) in blocks.into_iter().enumerate() {
289 let sem = sem.clone();
290 let wd = working_dir.to_path_buf();
291 let env = user_env.clone();
292 let sandbox_env = sandbox_env.clone();
293 set.spawn(async move {
294 let Ok(_permit) = sem.acquire().await else {
295 return (
296 i,
297 block.range,
298 EvalResult {
299 output: "[eval error: internal]".to_string(),
300 success: false,
301 },
302 );
303 };
304 let result =
305 execute_eval_block_with_sandbox(&block, &wd, None, None, &env, &sandbox_env).await;
306 (i, block.range, result)
307 });
308 }
309
310 let mut outputs: Vec<(usize, (usize, usize), EvalResult)> =
311 Vec::with_capacity(block_ranges.len());
312 while let Some(res) = set.join_next().await {
313 match res {
314 Ok(item) => outputs.push(item),
315 Err(e) => {
316 warn!("eval block task panicked: {e}");
317 }
318 }
319 }
320
321 let completed: std::collections::HashSet<usize> = outputs.iter().map(|(i, _, _)| *i).collect();
322 for (i, range) in &block_ranges {
323 if !completed.contains(i) {
324 outputs.push((
325 *i,
326 *range,
327 EvalResult {
328 output: "[eval error: internal failure]".to_string(),
329 success: false,
330 },
331 ));
332 }
333 }
334
335 outputs.sort_by(|a, b| b.1.0.cmp(&a.1.0));
336
337 let mut result = content.to_string();
338 for (_, (start, end), eval_result) in &outputs {
339 result.replace_range(*start..*end, &eval_result.output);
340 }
341
342 result
343}
344
345#[cfg(test)]
346#[allow(clippy::unwrap_used, clippy::expect_used)]
347mod tests {
348 use std::collections::HashMap;
349
350 use crate::eval::{is_eval_info_string, parse_eval_blocks};
351
352 fn empty_env() -> HashMap<String, String> {
353 HashMap::new()
354 }
355
356 #[test]
357 fn info_string_component() {
358 assert!(is_eval_info_string("component"));
359 }
360
361 #[test]
362 fn info_string_rejects_non_component() {
363 assert!(!is_eval_info_string("eval"));
364 assert!(!is_eval_info_string("rust"));
365 assert!(!is_eval_info_string("json"));
366 assert!(!is_eval_info_string(""));
367 }
368
369 #[test]
370 fn parse_single_component_block() {
371 let md = "# Title\n\n```component\necho hello\n```\n\nMore text\n";
372 let blocks = parse_eval_blocks(md);
373 assert_eq!(blocks.len(), 1);
374 assert_eq!(blocks[0].code, "echo hello");
375 }
376
377 #[test]
378 fn parse_multiple_component_blocks() {
379 let md = "```component\necho one\n```\n\ntext\n\n```component\necho two\n```\n";
380 let blocks = parse_eval_blocks(md);
381 assert_eq!(blocks.len(), 2);
382 assert_eq!(blocks[0].code, "echo one");
383 assert_eq!(blocks[1].code, "echo two");
384 }
385
386 #[test]
387 fn skip_non_component_code_blocks() {
388 let md = "```rust\nfn main() {}\n```\n\n```component\necho hi\n```\n";
389 let blocks = parse_eval_blocks(md);
390 assert_eq!(blocks.len(), 1);
391 assert_eq!(blocks[0].code, "echo hi");
392 }
393
394 #[test]
395 fn no_component_blocks() {
396 let md = "# Just a doc\n\nSome text.\n\n```json\n{}\n```\n";
397 let blocks = parse_eval_blocks(md);
398 assert!(blocks.is_empty());
399 }
400
401 #[test]
402 fn multiline_component_block() {
403 let md = "```component\necho hello\necho world\n```\n";
404 let blocks = parse_eval_blocks(md);
405 assert_eq!(blocks.len(), 1);
406 assert_eq!(blocks[0].code, "echo hello\necho world");
407 }
408
409 #[test]
410 fn component_block_preserves_range() {
411 let prefix = "# Title\n\n";
412 let block_with_newline = "```component\necho hi\n```\n";
413 let block_without_newline = "```component\necho hi\n```";
414 let suffix = "\nMore text\n";
415 let md = format!("{prefix}{block_with_newline}{suffix}");
416 let blocks = parse_eval_blocks(&md);
417 assert_eq!(blocks.len(), 1);
418 assert_eq!(blocks[0].range.0, prefix.len());
419 assert_eq!(
420 blocks[0].range.1,
421 prefix.len() + block_without_newline.len()
422 );
423 }
424
425 #[tokio::test]
426 async fn execute_eval_block_success() {
427 use crate::eval::{EvalBlock, execute_eval_block};
428 let block = EvalBlock {
429 range: (0, 0),
430 code: "echo hello".to_string(),
431 };
432 let result = execute_eval_block(
433 &block,
434 std::path::Path::new("/tmp"),
435 None,
436 None,
437 &empty_env(),
438 )
439 .await;
440 assert!(result.success);
441 assert_eq!(result.output, "hello");
442 }
443
444 #[tokio::test]
445 async fn execute_eval_block_failure() {
446 use crate::eval::{EvalBlock, execute_eval_block};
447 let block = EvalBlock {
448 range: (0, 0),
449 code: "exit 42".to_string(),
450 };
451 let result = execute_eval_block(
452 &block,
453 std::path::Path::new("/tmp"),
454 None,
455 None,
456 &empty_env(),
457 )
458 .await;
459 assert!(!result.success);
460 assert!(result.output.contains("eval error"));
461 assert!(result.output.contains("42"));
462 }
463
464 #[tokio::test]
465 async fn execute_eval_block_command_not_found() {
466 use crate::eval::{EvalBlock, execute_eval_block};
467 let block = EvalBlock {
468 range: (0, 0),
469 code: "nonexistent_command_xyz_123".to_string(),
470 };
471 let result = execute_eval_block(
472 &block,
473 std::path::Path::new("/tmp"),
474 None,
475 None,
476 &empty_env(),
477 )
478 .await;
479 assert!(!result.success);
480 assert!(result.output.contains("eval error"));
481 }
482
483 #[tokio::test]
484 async fn process_replaces_component_blocks() {
485 use crate::eval::process_eval_blocks;
486 let md = "# Title\n\n```component\necho 42\n```\n\nEnd\n";
487 let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
488 assert!(result.contains("42"));
489 assert!(!result.contains("```component"));
490 assert!(result.contains("# Title"));
491 assert!(result.contains("End"));
492 }
493
494 #[tokio::test]
495 async fn process_no_component_blocks_returns_unchanged() {
496 use crate::eval::process_eval_blocks;
497 let md = "# Just text\n\n```json\n{}\n```\n";
498 let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
499 assert_eq!(result, md);
500 }
501
502 #[tokio::test]
503 async fn process_multiple_blocks_replaced_in_order() {
504 use crate::eval::process_eval_blocks;
505 let md = "A\n\n```component\necho first\n```\n\nB\n\n```component\necho second\n```\n\nC\n";
506 let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &empty_env()).await;
507 let first_pos = result.find("first").expect("first should be present");
508 let second_pos = result.find("second").expect("second should be present");
509 assert!(first_pos < second_pos);
510 assert!(result.contains("A\n"));
511 assert!(result.contains("B\n"));
512 assert!(result.contains("C\n"));
513 }
514
515 #[tokio::test]
516 async fn execute_eval_block_timeout() {
517 use crate::eval::{EvalBlock, execute_eval_block};
518 let block = EvalBlock {
519 range: (0, 0),
520 code: "while true; do :; done".to_string(),
521 };
522 let result = execute_eval_block(
523 &block,
524 std::path::Path::new("/tmp"),
525 None,
526 None,
527 &empty_env(),
528 )
529 .await;
530 assert!(!result.success);
531 assert!(result.output.contains("timed out"));
532 }
533
534 #[tokio::test]
535 async fn user_env_injected_into_subprocess() {
536 use crate::eval::{EvalBlock, execute_eval_block};
537 let block = EvalBlock {
538 range: (0, 0),
539 code: "echo $MY_SECRET".to_string(),
540 };
541 let env = HashMap::from([("MY_SECRET".into(), "hunter2".into())]);
542 let result =
543 execute_eval_block(&block, std::path::Path::new("/tmp"), None, None, &env).await;
544 assert!(result.success);
545 assert_eq!(result.output, "hunter2");
546 }
547
548 #[tokio::test]
549 async fn reserved_env_keys_not_overridden() {
550 use crate::eval::{EvalBlock, execute_eval_block};
551 let block = EvalBlock {
552 range: (0, 0),
553 code: "echo $HOME".to_string(),
554 };
555 let env = HashMap::from([("HOME".into(), "/evil".into())]);
556 let result =
557 execute_eval_block(&block, std::path::Path::new("/tmp"), None, None, &env).await;
558 assert!(result.success);
559 assert_ne!(result.output, "/evil");
560 }
561
562 #[tokio::test]
563 async fn process_eval_blocks_with_user_env() {
564 use crate::eval::process_eval_blocks;
565 let md = "```component\necho $DB\n```\n";
566 let env = HashMap::from([("DB".into(), "postgresql://localhost/test".into())]);
567 let result = process_eval_blocks(md, std::path::Path::new("/tmp"), &env).await;
568 assert!(result.contains("postgresql://localhost/test"));
569 assert!(!result.contains("```component"));
570 }
571
572 #[test]
573 fn parse_finds_all_component_blocks_beyond_limit() {
574 use std::fmt::Write;
575 let mut md = String::new();
576 for i in 0..25 {
577 let _ = write!(md, "```component\necho {i}\n```\n\n");
578 }
579 let blocks = parse_eval_blocks(&md);
580 assert_eq!(blocks.len(), 25);
581 }
582
583 #[test]
584 fn merge_eval_env_trusted_overrides_untrusted() {
585 use crate::eval::merge_eval_env;
586
587 let trusted = HashMap::from([("USER_ID".to_string(), "42".to_string())]);
588 let untrusted = HashMap::from([
589 ("USER_ID".to_string(), "7".to_string()),
590 ("PAGE".to_string(), "stats".to_string()),
591 ]);
592
593 let merged = merge_eval_env(&trusted, &untrusted);
594 assert_eq!(merged.get("USER_ID"), Some(&"42".to_string()));
595 assert_eq!(merged.get("PAGE"), Some(&"stats".to_string()));
596 }
597
598 #[test]
599 fn merge_eval_env_filters_reserved_keys() {
600 use crate::eval::merge_eval_env;
601
602 let trusted = HashMap::from([("AWS_SECRET_ACCESS_KEY".to_string(), "x".to_string())]);
603 let untrusted = HashMap::from([
604 ("LD_PRELOAD".to_string(), "y".to_string()),
605 ("PATH".to_string(), "/tmp/evil".to_string()),
606 ]);
607
608 let merged = merge_eval_env(&trusted, &untrusted);
609 assert!(!merged.contains_key("AWS_SECRET_ACCESS_KEY"));
610 assert!(!merged.contains_key("LD_PRELOAD"));
611 assert!(!merged.contains_key("PATH"));
612 }
613}