Skip to main content

harn_vm/stdlib/
process.rs

1use std::cell::RefCell;
2use std::collections::BTreeMap;
3use std::io::Write as _;
4use std::path::PathBuf;
5use std::process::Stdio;
6use std::rc::Rc;
7use std::sync::mpsc;
8use std::time::{Duration, Instant};
9
10use crate::orchestration::RunExecutionRecord;
11use crate::value::{VmError, VmValue};
12use crate::vm::Vm;
13
14const HARN_REPLAY_ENV: &str = "HARN_REPLAY";
15
16thread_local! {
17    pub(crate) static VM_SOURCE_DIR: RefCell<Option<PathBuf>> = const { RefCell::new(None) };
18    static VM_EXECUTION_CONTEXT: RefCell<Option<RunExecutionRecord>> = const { RefCell::new(None) };
19}
20
21/// Set the source directory for the current thread (called by VM on file execution).
22pub(crate) fn set_thread_source_dir(dir: &std::path::Path) {
23    VM_SOURCE_DIR.with(|sd| *sd.borrow_mut() = Some(normalize_context_path(dir)));
24}
25
26pub(crate) fn normalize_context_path(path: &std::path::Path) -> PathBuf {
27    if path.is_absolute() {
28        return path.to_path_buf();
29    }
30    std::env::current_dir()
31        .map(|cwd| cwd.join(path))
32        .unwrap_or_else(|_| path.to_path_buf())
33}
34
35pub fn set_thread_execution_context(context: Option<RunExecutionRecord>) {
36    VM_EXECUTION_CONTEXT.with(|current| *current.borrow_mut() = context);
37}
38
39pub(crate) fn current_execution_context() -> Option<RunExecutionRecord> {
40    VM_EXECUTION_CONTEXT.with(|current| current.borrow().clone())
41}
42
43/// Reset thread-local process state (for test isolation).
44pub(crate) fn reset_process_state() {
45    VM_SOURCE_DIR.with(|sd| *sd.borrow_mut() = None);
46    VM_EXECUTION_CONTEXT.with(|current| *current.borrow_mut() = None);
47}
48
49pub fn execution_root_path() -> PathBuf {
50    current_execution_context()
51        .and_then(|context| context.cwd.map(PathBuf::from))
52        .or_else(|| std::env::current_dir().ok())
53        .unwrap_or_else(|| PathBuf::from("."))
54}
55
56pub fn source_root_path() -> PathBuf {
57    VM_SOURCE_DIR
58        .with(|sd| sd.borrow().clone())
59        .or_else(|| {
60            current_execution_context().and_then(|context| context.source_dir.map(PathBuf::from))
61        })
62        .or_else(|| current_execution_context().and_then(|context| context.cwd.map(PathBuf::from)))
63        .or_else(|| std::env::current_dir().ok())
64        .unwrap_or_else(|| PathBuf::from("."))
65}
66
67pub fn asset_root_path() -> PathBuf {
68    source_root_path()
69}
70
71fn env_override(name: &str) -> Option<String> {
72    (name == HARN_REPLAY_ENV && crate::triggers::dispatcher::current_dispatch_is_replay())
73        .then(|| "1".to_string())
74}
75
76pub(crate) fn read_env_value(name: &str) -> Option<String> {
77    env_override(name)
78        .or_else(|| current_execution_context().and_then(|context| context.env.get(name).cloned()))
79        .or_else(|| std::env::var(name).ok())
80}
81
82pub fn runtime_root_base() -> PathBuf {
83    find_project_root(&execution_root_path())
84        .or_else(|| find_project_root(&source_root_path()))
85        .unwrap_or_else(source_root_path)
86}
87
88/// Lexically collapse `..` components in `path`. Returns `None` if a
89/// `..` would pop a non-Normal component (i.e. the path tries to walk
90/// above its root anchor). This is a pure-string canonicalization that
91/// does NOT hit the filesystem — symlinks are not followed.
92fn lexically_collapse(path: &std::path::Path) -> Option<PathBuf> {
93    use std::path::Component;
94    let mut out: Vec<Component> = Vec::new();
95    for component in path.components() {
96        match component {
97            Component::CurDir => {}
98            Component::ParentDir => {
99                let popped = out.pop();
100                if !matches!(popped, Some(Component::Normal(_))) {
101                    return None;
102                }
103            }
104            other => out.push(other),
105        }
106    }
107    Some(out.iter().collect())
108}
109
110pub fn resolve_source_relative_path(path: &str) -> PathBuf {
111    let candidate = PathBuf::from(path);
112    if candidate.is_absolute() {
113        return candidate;
114    }
115    let root = execution_root_path();
116    let joined = root.join(&candidate);
117    // Defense-in-depth path-traversal check (paired with the deferred
118    // F3 sandbox-by-default fix): refuse to resolve a path that
119    // escapes the project root via `..` components. We anchor against
120    // `runtime_root_base()` (the project root), which is broader than
121    // `execution_root_path()` and lets benign sibling-dir walks like
122    // `read_file("../fixtures/payload.json")` from `tests/` succeed.
123    if path_escapes_project_root(&joined) {
124        return root.join("__harn_rejected_parent_dir_traversal__");
125    }
126    joined
127}
128
129pub fn resolve_source_asset_path(path: &str) -> PathBuf {
130    let candidate = PathBuf::from(path);
131    if candidate.is_absolute() {
132        return candidate;
133    }
134    let root = asset_root_path();
135    let joined = root.join(&candidate);
136    if path_escapes_project_root(&joined) {
137        return root.join("__harn_rejected_parent_dir_traversal__");
138    }
139    joined
140}
141
142/// Returns `true` when `joined` (which may contain raw `..`
143/// components) cannot be lexically collapsed without popping past its
144/// root component — i.e. the relative input had more `..` than the
145/// joined depth allows, escaping the filesystem root.
146///
147/// This is intentionally a narrow check: it doesn't try to enforce
148/// that the path stays inside a logical "project root", because the
149/// project root isn't always reliably resolvable (and benign uses
150/// like `../fixtures/x.json` from a `tests/` subdir are legitimate).
151/// The sandbox layer remains the authoritative defense for arbitrary
152/// `..` traversal; this guard plugs the most egregious escapes
153/// (`../../../../etc/passwd`) for the no-sandbox-by-default
154/// `harn run` path.
155fn path_escapes_project_root(joined: &std::path::Path) -> bool {
156    lexically_collapse(joined).is_none()
157}
158
159pub(crate) fn register_process_builtins(vm: &mut Vm) {
160    vm.register_builtin("env", |args, _out| {
161        let name = args.first().map(|a| a.display()).unwrap_or_default();
162        if let Some(value) = read_env_value(&name) {
163            return Ok(VmValue::String(Rc::from(value)));
164        }
165        Ok(VmValue::Nil)
166    });
167
168    vm.register_builtin("env_or", |args, _out| {
169        let name = args.first().map(|a| a.display()).unwrap_or_default();
170        let default = args.get(1).cloned().unwrap_or(VmValue::Nil);
171        if let Some(value) = read_env_value(&name) {
172            return Ok(VmValue::String(Rc::from(value)));
173        }
174        Ok(default)
175    });
176
177    // `timestamp` / `elapsed` are now registered by clock.rs so they
178    // honor mock_time / advance_time. Don't register here.
179
180    vm.register_builtin("exit", |args, _out| {
181        let code = args.first().and_then(|a| a.as_int()).unwrap_or(0);
182        std::process::exit(code as i32);
183    });
184
185    vm.register_builtin("exec", |args, _out| {
186        if args.is_empty() {
187            return Err(VmError::Thrown(VmValue::String(Rc::from(
188                "exec: command is required",
189            ))));
190        }
191        let cmd = args[0].display();
192        let cmd_args: Vec<String> = args[1..].iter().map(|a| a.display()).collect();
193        let output = exec_command(None, &cmd, &cmd_args)?;
194        Ok(vm_output_to_value(output))
195    });
196
197    vm.register_builtin("shell", |args, _out| {
198        let cmd = args.first().map(|a| a.display()).unwrap_or_default();
199        if cmd.is_empty() {
200            return Err(VmError::Thrown(VmValue::String(Rc::from(
201                "shell: command string is required",
202            ))));
203        }
204        let invocation = crate::shells::default_shell_invocation(&cmd)
205            .map_err(|error| VmError::Runtime(format!("shell: {error}")))?;
206        let output = exec_shell_args(None, &invocation.program, &invocation.args)?;
207        Ok(vm_output_to_value(output))
208    });
209
210    vm.register_builtin("exec_at", |args, _out| {
211        if args.len() < 2 {
212            return Err(VmError::Thrown(VmValue::String(Rc::from(
213                "exec_at: directory and command are required",
214            ))));
215        }
216        let dir = args[0].display();
217        let cmd = args[1].display();
218        let cmd_args: Vec<String> = args[2..].iter().map(|a| a.display()).collect();
219        let output = exec_command(Some(dir.as_str()), &cmd, &cmd_args)?;
220        Ok(vm_output_to_value(output))
221    });
222
223    vm.register_builtin("shell_at", |args, _out| {
224        if args.len() < 2 {
225            return Err(VmError::Thrown(VmValue::String(Rc::from(
226                "shell_at: directory and command string are required",
227            ))));
228        }
229        let dir = args[0].display();
230        let cmd = args[1].display();
231        if cmd.is_empty() {
232            return Err(VmError::Thrown(VmValue::String(Rc::from(
233                "shell_at: command string is required",
234            ))));
235        }
236        let invocation = crate::shells::default_shell_invocation(&cmd)
237            .map_err(|error| VmError::Runtime(format!("shell_at: {error}")))?;
238        let output = exec_shell_args(Some(dir.as_str()), &invocation.program, &invocation.args)?;
239        Ok(vm_output_to_value(output))
240    });
241
242    // `elapsed` registered by clock.rs (mockable). See note above.
243
244    vm.register_builtin("username", |_args, _out| {
245        let user = std::env::var("USER")
246            .or_else(|_| std::env::var("USERNAME"))
247            .unwrap_or_default();
248        Ok(VmValue::String(Rc::from(user)))
249    });
250
251    vm.register_builtin("hostname", |_args, _out| {
252        let name = std::env::var("HOSTNAME")
253            .or_else(|_| std::env::var("COMPUTERNAME"))
254            .or_else(|_| {
255                std::process::Command::new("hostname")
256                    .output()
257                    .ok()
258                    .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
259                    .ok_or(std::env::VarError::NotPresent)
260            })
261            .unwrap_or_default();
262        Ok(VmValue::String(Rc::from(name)))
263    });
264
265    vm.register_builtin("platform", |_args, _out| {
266        let os = if cfg!(target_os = "macos") {
267            "darwin"
268        } else if cfg!(target_os = "linux") {
269            "linux"
270        } else if cfg!(target_os = "windows") {
271            "windows"
272        } else {
273            std::env::consts::OS
274        };
275        Ok(VmValue::String(Rc::from(os)))
276    });
277
278    vm.register_builtin("arch", |_args, _out| {
279        Ok(VmValue::String(Rc::from(std::env::consts::ARCH)))
280    });
281
282    vm.register_builtin("home_dir", |_args, _out| {
283        let home = std::env::var("HOME")
284            .or_else(|_| std::env::var("USERPROFILE"))
285            .unwrap_or_default();
286        Ok(VmValue::String(Rc::from(home)))
287    });
288
289    vm.register_builtin("pid", |_args, _out| {
290        Ok(VmValue::Int(std::process::id() as i64))
291    });
292
293    vm.register_builtin("date_iso", |_args, _out| {
294        // `date_iso` reads the OS wall clock directly (it predates the
295        // unified `clock_mock`). Routing through `leak_audit::wall_now`
296        // keeps the production behavior unchanged but surfaces the call
297        // in `testbench_clock_leaks()` whenever a script invokes it
298        // under a paused testbench session, so fidelity hazards are
299        // visible instead of silently corrupting tapes.
300        let now = crate::clock_mock::leak_audit::wall_now("stdlib/date_iso");
301        let dt: chrono::DateTime<chrono::Utc> = now.into();
302        Ok(VmValue::String(Rc::from(
303            dt.to_rfc3339_opts(chrono::SecondsFormat::Millis, true),
304        )))
305    });
306
307    vm.register_builtin("cwd", |_args, _out| {
308        let dir = current_execution_context()
309            .and_then(|context| context.cwd)
310            .or_else(|| {
311                std::env::current_dir()
312                    .ok()
313                    .map(|p| p.to_string_lossy().into_owned())
314            })
315            .unwrap_or_default();
316        Ok(VmValue::String(Rc::from(dir)))
317    });
318
319    vm.register_builtin("execution_root", |_args, _out| {
320        Ok(VmValue::String(Rc::from(
321            execution_root_path().to_string_lossy().into_owned(),
322        )))
323    });
324
325    vm.register_builtin("asset_root", |_args, _out| {
326        Ok(VmValue::String(Rc::from(
327            asset_root_path().to_string_lossy().into_owned(),
328        )))
329    });
330
331    vm.register_builtin("runtime_paths", |_args, _out| {
332        let runtime_base = runtime_root_base();
333        let mut paths = BTreeMap::new();
334        paths.insert(
335            "execution_root".to_string(),
336            VmValue::String(Rc::from(
337                execution_root_path().to_string_lossy().into_owned(),
338            )),
339        );
340        paths.insert(
341            "asset_root".to_string(),
342            VmValue::String(Rc::from(asset_root_path().to_string_lossy().into_owned())),
343        );
344        paths.insert(
345            "state_root".to_string(),
346            VmValue::String(Rc::from(
347                crate::runtime_paths::state_root(&runtime_base)
348                    .to_string_lossy()
349                    .into_owned(),
350            )),
351        );
352        paths.insert(
353            "run_root".to_string(),
354            VmValue::String(Rc::from(
355                crate::runtime_paths::run_root(&runtime_base)
356                    .to_string_lossy()
357                    .into_owned(),
358            )),
359        );
360        paths.insert(
361            "worktree_root".to_string(),
362            VmValue::String(Rc::from(
363                crate::runtime_paths::worktree_root(&runtime_base)
364                    .to_string_lossy()
365                    .into_owned(),
366            )),
367        );
368        Ok(VmValue::Dict(Rc::new(paths)))
369    });
370
371    // `spawn_captured(opts)` runs an external command synchronously and
372    // returns its captured output. Unlike `exec(...)` (which is variadic
373    // positional and inherits the parent's stdin), this builtin takes a
374    // structured options dict and supports feeding a stdin payload,
375    // overriding the cwd/env, and bounding execution time:
376    //
377    //   spawn_captured({
378    //     cmd: "git",
379    //     args: ["log", "--oneline", "-n", "5"],
380    //     cwd: "/path/to/repo",       // optional
381    //     env: {KEY: "value"},        // optional, merged onto inherited env
382    //     stdin: "payload",           // optional string or bytes
383    //     timeout_ms: 5000,           // optional, kills child on expiry
384    //   })
385    //
386    // Returns `{exit_code: int, stdout: string, stderr: string,
387    // duration_ms: int, success: bool, timed_out: bool}`. When the
388    // process times out, `exit_code` is -1, `success` is false, and
389    // `timed_out` is true. This is the free-builtin landing site for
390    // the deferred `harness.process.spawn_captured` sub-handle
391    // (see #2297).
392    vm.register_builtin("spawn_captured", |args, _out| {
393        let opts = match args.first() {
394            Some(VmValue::Dict(opts)) => opts.clone(),
395            _ => {
396                return Err(VmError::Runtime(
397                    "spawn_captured: options dict is required".to_string(),
398                ));
399            }
400        };
401        let cmd = match opts.get("cmd").map(|v| v.display()).unwrap_or_default() {
402            s if s.is_empty() => {
403                return Err(VmError::Runtime(
404                    "spawn_captured: opts.cmd is required".to_string(),
405                ));
406            }
407            s => s,
408        };
409        let cmd_args: Vec<String> = match opts.get("args") {
410            Some(VmValue::List(items)) => items.iter().map(|v| v.display()).collect(),
411            None | Some(VmValue::Nil) => Vec::new(),
412            Some(other) => {
413                return Err(VmError::Runtime(format!(
414                    "spawn_captured: opts.args must be a list of strings, got {}",
415                    other.type_name()
416                )));
417            }
418        };
419        let cwd = opts
420            .get("cwd")
421            .map(|v| v.display())
422            .filter(|s| !s.is_empty());
423        let env_overrides: Vec<(String, String)> = match opts.get("env") {
424            Some(VmValue::Dict(env)) => env.iter().map(|(k, v)| (k.clone(), v.display())).collect(),
425            None | Some(VmValue::Nil) => Vec::new(),
426            Some(other) => {
427                return Err(VmError::Runtime(format!(
428                    "spawn_captured: opts.env must be a dict, got {}",
429                    other.type_name()
430                )));
431            }
432        };
433        let stdin_bytes: Option<Vec<u8>> = match opts.get("stdin") {
434            Some(VmValue::Bytes(bytes)) => Some(bytes.as_slice().to_vec()),
435            Some(VmValue::String(s)) => Some(s.as_bytes().to_vec()),
436            None | Some(VmValue::Nil) => None,
437            Some(other) => {
438                return Err(VmError::Runtime(format!(
439                    "spawn_captured: opts.stdin must be string or bytes, got {}",
440                    other.type_name()
441                )));
442            }
443        };
444        let timeout = opts
445            .get("timeout_ms")
446            .and_then(|v| v.as_int())
447            .filter(|n| *n > 0)
448            .map(|n| Duration::from_millis(n as u64));
449
450        let mut command = std::process::Command::new(&cmd);
451        command.args(&cmd_args);
452        if let Some(cwd) = cwd.as_ref() {
453            command.current_dir(cwd);
454        }
455        for (key, value) in &env_overrides {
456            command.env(key, value);
457        }
458        command.stdout(Stdio::piped()).stderr(Stdio::piped());
459        if stdin_bytes.is_some() {
460            command.stdin(Stdio::piped());
461        } else {
462            command.stdin(Stdio::null());
463        }
464
465        let started = Instant::now();
466        let mut child = command.spawn().map_err(|error| {
467            VmError::Thrown(VmValue::String(Rc::from(format!(
468                "spawn_captured: failed to spawn '{cmd}': {error}"
469            ))))
470        })?;
471
472        if let (Some(payload), Some(mut stdin)) = (stdin_bytes, child.stdin.take()) {
473            // Best-effort write; if the child closes stdin early we still
474            // surface stdout/stderr/exit_code rather than failing outright.
475            let _ = stdin.write_all(&payload);
476        }
477
478        let (output, timed_out) = match timeout {
479            None => match child.wait_with_output() {
480                Ok(output) => (output, false),
481                Err(error) => {
482                    return Err(VmError::Thrown(VmValue::String(Rc::from(format!(
483                        "spawn_captured: wait failed: {error}"
484                    )))));
485                }
486            },
487            Some(limit) => {
488                // Poll via a small `try_wait` loop instead of pulling in
489                // a full async runtime. This keeps the builtin
490                // dependency-free and matches the synchronous shape that
491                // ported `.harn` scripts expect.
492                let deadline = started + limit;
493                let mut timed_out = false;
494                loop {
495                    match child.try_wait() {
496                        Ok(Some(_)) => break,
497                        Ok(None) => {
498                            if Instant::now() >= deadline {
499                                let _ = child.kill();
500                                let _ = child.wait();
501                                timed_out = true;
502                                break;
503                            }
504                            std::thread::sleep(Duration::from_millis(10));
505                        }
506                        Err(error) => {
507                            return Err(VmError::Thrown(VmValue::String(Rc::from(format!(
508                                "spawn_captured: poll failed: {error}"
509                            )))));
510                        }
511                    }
512                }
513                if timed_out {
514                    // Drain pipes via channels so a slow stderr reader
515                    // doesn't block forever after we've killed the child.
516                    let stdout_handle = child.stdout.take();
517                    let stderr_handle = child.stderr.take();
518                    let (tx_out, rx_out) = mpsc::channel::<Vec<u8>>();
519                    let (tx_err, rx_err) = mpsc::channel::<Vec<u8>>();
520                    if let Some(mut s) = stdout_handle {
521                        std::thread::spawn(move || {
522                            use std::io::Read as _;
523                            let mut buf = Vec::new();
524                            let _ = s.read_to_end(&mut buf);
525                            let _ = tx_out.send(buf);
526                        });
527                    }
528                    if let Some(mut s) = stderr_handle {
529                        std::thread::spawn(move || {
530                            use std::io::Read as _;
531                            let mut buf = Vec::new();
532                            let _ = s.read_to_end(&mut buf);
533                            let _ = tx_err.send(buf);
534                        });
535                    }
536                    let stdout = rx_out
537                        .recv_timeout(Duration::from_millis(100))
538                        .unwrap_or_default();
539                    let stderr = rx_err
540                        .recv_timeout(Duration::from_millis(100))
541                        .unwrap_or_default();
542                    (
543                        std::process::Output {
544                            status: std::process::ExitStatus::default(),
545                            stdout,
546                            stderr,
547                        },
548                        true,
549                    )
550                } else {
551                    // The child has already exited; wait_with_output
552                    // reaps + drains the pipes in one call.
553                    match child.wait_with_output() {
554                        Ok(output) => (output, false),
555                        Err(error) => {
556                            return Err(VmError::Thrown(VmValue::String(Rc::from(format!(
557                                "spawn_captured: wait failed: {error}"
558                            )))));
559                        }
560                    }
561                }
562            }
563        };
564
565        let duration_ms = started.elapsed().as_millis() as i64;
566        let exit_code = if timed_out {
567            -1
568        } else {
569            output.status.code().unwrap_or(-1) as i64
570        };
571        let success = if timed_out {
572            false
573        } else {
574            output.status.success()
575        };
576        let mut result = BTreeMap::new();
577        result.insert("exit_code".to_string(), VmValue::Int(exit_code));
578        result.insert(
579            "stdout".to_string(),
580            VmValue::String(Rc::from(String::from_utf8_lossy(&output.stdout).as_ref())),
581        );
582        result.insert(
583            "stderr".to_string(),
584            VmValue::String(Rc::from(String::from_utf8_lossy(&output.stderr).as_ref())),
585        );
586        result.insert("duration_ms".to_string(), VmValue::Int(duration_ms));
587        result.insert("success".to_string(), VmValue::Bool(success));
588        result.insert("timed_out".to_string(), VmValue::Bool(timed_out));
589        Ok(VmValue::Dict(Rc::new(result)))
590    });
591
592    // `term_width()` / `term_height()` return the current terminal
593    // dimensions in columns and rows. Reads `COLUMNS` / `LINES` env vars
594    // first (so test harnesses can pin a value), falls back to the
595    // platform `ioctl` size, and finally defaults to 80x24 when neither
596    // is available (e.g. when stdout is not a TTY). These are the
597    // free-builtin landing sites for the deferred `harness.term.*`
598    // sub-handles (see #2297). `std/tui` already exposes
599    // `__tui_terminal_width` for its renderer; these builtins give
600    // ported subcommands a stable, non-prefixed name they can call
601    // without importing the tui module.
602    vm.register_builtin("term_width", |_args, _out| {
603        Ok(VmValue::Int(read_term_dimension("COLUMNS", true) as i64))
604    });
605    vm.register_builtin("term_height", |_args, _out| {
606        Ok(VmValue::Int(read_term_dimension("LINES", false) as i64))
607    });
608}
609
610const DEFAULT_TERM_WIDTH: usize = 80;
611const DEFAULT_TERM_HEIGHT: usize = 24;
612
613fn read_term_dimension(env_var: &str, is_width: bool) -> usize {
614    if let Ok(raw) = std::env::var(env_var) {
615        if let Ok(parsed) = raw.trim().parse::<usize>() {
616            if parsed > 0 {
617                return parsed;
618            }
619        }
620    }
621    platform_term_dimensions()
622        .map(|(w, h)| if is_width { w } else { h })
623        .unwrap_or(if is_width {
624            DEFAULT_TERM_WIDTH
625        } else {
626            DEFAULT_TERM_HEIGHT
627        })
628}
629
630#[cfg(unix)]
631fn platform_term_dimensions() -> Option<(usize, usize)> {
632    let mut winsize = std::mem::MaybeUninit::<libc::winsize>::zeroed();
633    let rc = unsafe { libc::ioctl(libc::STDOUT_FILENO, libc::TIOCGWINSZ, winsize.as_mut_ptr()) };
634    if rc != 0 {
635        return None;
636    }
637    let winsize = unsafe { winsize.assume_init() };
638    if winsize.ws_col == 0 && winsize.ws_row == 0 {
639        return None;
640    }
641    Some((winsize.ws_col as usize, winsize.ws_row as usize))
642}
643
644#[cfg(not(unix))]
645fn platform_term_dimensions() -> Option<(usize, usize)> {
646    None
647}
648
649/// Find the project root by walking up from a base directory looking for harn.toml.
650pub fn find_project_root(base: &std::path::Path) -> Option<std::path::PathBuf> {
651    let mut dir = base.to_path_buf();
652    loop {
653        if dir.join("harn.toml").exists() {
654            return Some(dir);
655        }
656        if !dir.pop() {
657            return None;
658        }
659    }
660}
661
662/// Register builtins that depend on source directory context.
663pub(crate) fn register_path_builtins(vm: &mut Vm) {
664    vm.register_builtin("source_dir", |_args, _out| {
665        let dir = VM_SOURCE_DIR.with(|sd| sd.borrow().clone());
666        match dir {
667            Some(d) => Ok(VmValue::String(Rc::from(d.to_string_lossy().into_owned()))),
668            None => {
669                let cwd = std::env::current_dir()
670                    .map(|p| p.to_string_lossy().into_owned())
671                    .unwrap_or_default();
672                Ok(VmValue::String(Rc::from(cwd)))
673            }
674        }
675    });
676
677    vm.register_builtin("project_root", |_args, _out| {
678        let base = current_execution_context()
679            .and_then(|context| context.cwd.map(PathBuf::from))
680            .or_else(|| VM_SOURCE_DIR.with(|sd| sd.borrow().clone()))
681            .or_else(|| std::env::current_dir().ok())
682            .unwrap_or_else(|| PathBuf::from("."));
683        match find_project_root(&base) {
684            Some(root) => Ok(VmValue::String(Rc::from(
685                root.to_string_lossy().into_owned(),
686            ))),
687            None => Ok(VmValue::Nil),
688        }
689    });
690}
691
692fn vm_output_to_value(output: std::process::Output) -> VmValue {
693    let mut result = BTreeMap::new();
694    result.insert(
695        "stdout".to_string(),
696        VmValue::String(Rc::from(String::from_utf8_lossy(&output.stdout).as_ref())),
697    );
698    result.insert(
699        "stderr".to_string(),
700        VmValue::String(Rc::from(String::from_utf8_lossy(&output.stderr).as_ref())),
701    );
702    result.insert(
703        "status".to_string(),
704        VmValue::Int(output.status.code().unwrap_or(-1) as i64),
705    );
706    result.insert(
707        "success".to_string(),
708        VmValue::Bool(output.status.success()),
709    );
710    VmValue::Dict(Rc::new(result))
711}
712
713fn exec_command(
714    dir: Option<&str>,
715    cmd: &str,
716    args: &[String],
717) -> Result<std::process::Output, VmError> {
718    let config = process_command_config(dir)?;
719    crate::stdlib::sandbox::command_output(cmd, args, &config)
720        .map_err(|error| prefix_process_error(error, "exec"))
721}
722
723#[cfg(test)]
724fn exec_shell(
725    dir: Option<&str>,
726    shell: &str,
727    flag: &str,
728    script: &str,
729) -> Result<std::process::Output, VmError> {
730    let args = vec![flag.to_string(), script.to_string()];
731    exec_shell_args(dir, shell, &args)
732}
733
734fn exec_shell_args(
735    dir: Option<&str>,
736    shell: &str,
737    args: &[String],
738) -> Result<std::process::Output, VmError> {
739    let config = process_command_config(dir)?;
740    crate::stdlib::sandbox::command_output(shell, args, &config)
741        .map_err(|error| prefix_process_error(error, "shell"))
742}
743
744fn process_command_config(
745    dir: Option<&str>,
746) -> Result<crate::stdlib::sandbox::ProcessCommandConfig, VmError> {
747    let mut config = crate::stdlib::sandbox::ProcessCommandConfig {
748        stdin_null: true,
749        ..Default::default()
750    };
751    if let Some(dir) = dir {
752        let resolved = resolve_command_dir(dir);
753        crate::stdlib::sandbox::enforce_process_cwd(&resolved)?;
754        config.cwd = Some(resolved);
755    } else if let Some(context) = current_execution_context() {
756        if let Some(cwd) = context.cwd.filter(|cwd| !cwd.is_empty()) {
757            crate::stdlib::sandbox::enforce_process_cwd(std::path::Path::new(&cwd))?;
758            config.cwd = Some(std::path::PathBuf::from(cwd));
759        }
760        if !context.env.is_empty() {
761            config.env.extend(context.env);
762        }
763    }
764    if let Some(value) = env_override(HARN_REPLAY_ENV) {
765        config.env.push((HARN_REPLAY_ENV.to_string(), value));
766    }
767    Ok(config)
768}
769
770fn prefix_process_error(error: VmError, prefix: &str) -> VmError {
771    match error {
772        VmError::Thrown(VmValue::String(message)) => VmError::Thrown(VmValue::String(Rc::from(
773            format!("{prefix} failed: {message}"),
774        ))),
775        other => other,
776    }
777}
778
779fn resolve_command_dir(dir: &str) -> PathBuf {
780    let candidate = PathBuf::from(dir);
781    if candidate.is_absolute() {
782        return candidate;
783    }
784    if let Some(cwd) = current_execution_context().and_then(|context| context.cwd) {
785        return PathBuf::from(cwd).join(candidate);
786    }
787    if let Some(source_dir) = VM_SOURCE_DIR.with(|sd| sd.borrow().clone()) {
788        return source_dir.join(candidate);
789    }
790    candidate
791}
792
793#[cfg(test)]
794mod tests {
795    use super::*;
796
797    #[test]
798    fn lexically_collapse_resolves_sibling_walk() {
799        let path = PathBuf::from("/tmp/project/tests/../fixtures/x.json");
800        let collapsed = lexically_collapse(&path).expect("sibling walk");
801        assert_eq!(collapsed, PathBuf::from("/tmp/project/fixtures/x.json"));
802    }
803
804    #[test]
805    fn lexically_collapse_blocks_escape_past_root() {
806        // `/app/../etc/passwd` would lexically resolve to `/etc/passwd`,
807        // but the pop hits a RootDir which is not Normal — refuse.
808        let path = PathBuf::from("/app/../../etc/passwd");
809        assert!(lexically_collapse(&path).is_none());
810    }
811
812    #[test]
813    fn lexically_collapse_strips_curdir() {
814        let path = PathBuf::from("/app/./logs/today.txt");
815        let collapsed = lexically_collapse(&path).expect("curdir is benign");
816        assert_eq!(collapsed, PathBuf::from("/app/logs/today.txt"));
817    }
818
819    #[test]
820    fn resolve_source_relative_path_blocks_obvious_escape() {
821        let dir =
822            std::env::temp_dir().join(format!("harn-process-escape-{}", uuid::Uuid::now_v7()));
823        std::fs::create_dir_all(&dir).unwrap();
824        set_thread_source_dir(&dir);
825        set_thread_execution_context(Some(crate::orchestration::RunExecutionRecord {
826            cwd: Some(dir.to_string_lossy().into_owned()),
827            source_dir: Some(dir.to_string_lossy().into_owned()),
828            env: BTreeMap::new(),
829            adapter: None,
830            repo_path: None,
831            worktree_path: None,
832            branch: None,
833            base_ref: None,
834            cleanup: None,
835        }));
836        // A long string of `..` should escape the temp-root and trip
837        // the rejection sentinel, so the file read fails NotFound
838        // instead of escaping to a different filesystem location.
839        let resolved = resolve_source_relative_path("../../../../../../../../etc/passwd");
840        assert!(
841            resolved
842                .to_string_lossy()
843                .contains("__harn_rejected_parent_dir_traversal__"),
844            "expected rejection sentinel, got {resolved:?}"
845        );
846        reset_process_state();
847        let _ = std::fs::remove_dir_all(&dir);
848    }
849
850    #[test]
851    fn resolve_source_relative_path_ignores_thread_source_dir_without_execution_context() {
852        let dir = std::env::temp_dir().join(format!("harn-process-{}", uuid::Uuid::now_v7()));
853        std::fs::create_dir_all(&dir).unwrap();
854        let current_dir = std::env::current_dir().unwrap();
855        set_thread_source_dir(&dir);
856        let resolved = resolve_source_relative_path("templates/prompt.txt");
857        assert_eq!(resolved, current_dir.join("templates/prompt.txt"));
858        reset_process_state();
859        let _ = std::fs::remove_dir_all(&dir);
860    }
861
862    #[test]
863    fn resolve_source_relative_path_prefers_execution_cwd_over_source_dir() {
864        let cwd = std::env::temp_dir().join(format!("harn-process-cwd-{}", uuid::Uuid::now_v7()));
865        let source_dir =
866            std::env::temp_dir().join(format!("harn-process-source-{}", uuid::Uuid::now_v7()));
867        std::fs::create_dir_all(&cwd).unwrap();
868        std::fs::create_dir_all(&source_dir).unwrap();
869        set_thread_source_dir(&source_dir);
870        set_thread_execution_context(Some(crate::orchestration::RunExecutionRecord {
871            cwd: Some(cwd.to_string_lossy().into_owned()),
872            source_dir: Some(source_dir.to_string_lossy().into_owned()),
873            env: BTreeMap::new(),
874            adapter: None,
875            repo_path: None,
876            worktree_path: None,
877            branch: None,
878            base_ref: None,
879            cleanup: None,
880        }));
881        let resolved = resolve_source_relative_path("templates/prompt.txt");
882        assert_eq!(resolved, cwd.join("templates/prompt.txt"));
883        reset_process_state();
884        let _ = std::fs::remove_dir_all(&cwd);
885        let _ = std::fs::remove_dir_all(&source_dir);
886    }
887
888    #[test]
889    fn resolve_source_asset_path_prefers_execution_source_dir_over_cwd() {
890        let cwd = std::env::temp_dir().join(format!("harn-asset-cwd-{}", uuid::Uuid::now_v7()));
891        let source_dir =
892            std::env::temp_dir().join(format!("harn-asset-source-{}", uuid::Uuid::now_v7()));
893        std::fs::create_dir_all(&cwd).unwrap();
894        std::fs::create_dir_all(&source_dir).unwrap();
895        set_thread_source_dir(&source_dir);
896        set_thread_execution_context(Some(crate::orchestration::RunExecutionRecord {
897            cwd: Some(cwd.to_string_lossy().into_owned()),
898            source_dir: Some(source_dir.to_string_lossy().into_owned()),
899            env: BTreeMap::new(),
900            adapter: None,
901            repo_path: None,
902            worktree_path: None,
903            branch: None,
904            base_ref: None,
905            cleanup: None,
906        }));
907        let resolved = resolve_source_asset_path("templates/prompt.txt");
908        assert_eq!(resolved, source_dir.join("templates/prompt.txt"));
909        reset_process_state();
910        let _ = std::fs::remove_dir_all(&cwd);
911        let _ = std::fs::remove_dir_all(&source_dir);
912    }
913
914    #[test]
915    fn set_thread_source_dir_absolutizes_relative_paths() {
916        reset_process_state();
917        let current_dir = std::env::current_dir().unwrap();
918        set_thread_source_dir(std::path::Path::new("scripts"));
919        assert_eq!(source_root_path(), current_dir.join("scripts"));
920        reset_process_state();
921    }
922
923    #[test]
924    fn exec_context_sets_default_cwd_and_env() {
925        let dir = std::env::temp_dir().join(format!("harn-process-ctx-{}", uuid::Uuid::now_v7()));
926        std::fs::create_dir_all(&dir).unwrap();
927        std::fs::write(dir.join("marker.txt"), "ok").unwrap();
928        set_thread_execution_context(Some(RunExecutionRecord {
929            cwd: Some(dir.to_string_lossy().into_owned()),
930            env: BTreeMap::from([("HARN_PROCESS_TEST".to_string(), "present".to_string())]),
931            ..Default::default()
932        }));
933        let output = exec_shell(
934            None,
935            "sh",
936            "-c",
937            "printf '%s:' \"$HARN_PROCESS_TEST\" && test -f marker.txt",
938        )
939        .unwrap();
940        assert!(output.status.success());
941        assert_eq!(String::from_utf8_lossy(&output.stdout), "present:");
942        reset_process_state();
943        let _ = std::fs::remove_dir_all(&dir);
944    }
945
946    #[test]
947    fn exec_at_resolves_relative_to_execution_cwd() {
948        let dir = std::env::temp_dir().join(format!("harn-process-rel-{}", uuid::Uuid::now_v7()));
949        std::fs::create_dir_all(dir.join("nested")).unwrap();
950        std::fs::write(dir.join("nested").join("marker.txt"), "ok").unwrap();
951        set_thread_execution_context(Some(RunExecutionRecord {
952            cwd: Some(dir.to_string_lossy().into_owned()),
953            ..Default::default()
954        }));
955        let output = exec_shell(Some("nested"), "sh", "-c", "test -f marker.txt").unwrap();
956        assert!(output.status.success());
957        reset_process_state();
958        let _ = std::fs::remove_dir_all(&dir);
959    }
960
961    #[test]
962    fn runtime_paths_uses_configurable_state_roots() {
963        let base =
964            std::env::temp_dir().join(format!("harn-process-runtime-{}", uuid::Uuid::now_v7()));
965        std::fs::create_dir_all(&base).unwrap();
966        std::env::set_var(crate::runtime_paths::HARN_STATE_DIR_ENV, ".custom-harn");
967        std::env::set_var(crate::runtime_paths::HARN_RUN_DIR_ENV, ".custom-runs");
968        std::env::set_var(
969            crate::runtime_paths::HARN_WORKTREE_DIR_ENV,
970            ".custom-worktrees",
971        );
972        set_thread_execution_context(Some(RunExecutionRecord {
973            cwd: Some(base.to_string_lossy().into_owned()),
974            ..Default::default()
975        }));
976
977        let mut vm = crate::vm::Vm::new();
978        register_process_builtins(&mut vm);
979        let mut out = String::new();
980        let builtin = vm
981            .builtins
982            .get("runtime_paths")
983            .expect("runtime_paths builtin");
984        let paths = match builtin(&[], &mut out).unwrap() {
985            VmValue::Dict(map) => map,
986            other => panic!("expected dict, got {other:?}"),
987        };
988        assert_eq!(
989            paths.get("state_root").unwrap().display(),
990            base.join(".custom-harn").display().to_string()
991        );
992        assert_eq!(
993            paths.get("run_root").unwrap().display(),
994            base.join(".custom-runs").display().to_string()
995        );
996        assert_eq!(
997            paths.get("worktree_root").unwrap().display(),
998            base.join(".custom-worktrees").display().to_string()
999        );
1000
1001        reset_process_state();
1002        std::env::remove_var(crate::runtime_paths::HARN_STATE_DIR_ENV);
1003        std::env::remove_var(crate::runtime_paths::HARN_RUN_DIR_ENV);
1004        std::env::remove_var(crate::runtime_paths::HARN_WORKTREE_DIR_ENV);
1005        let _ = std::fs::remove_dir_all(&base);
1006    }
1007}