Skip to main content

harn_vm/
vm.rs

1mod format;
2mod imports;
3mod methods;
4mod ops;
5
6use std::cell::RefCell;
7use std::collections::{BTreeMap, HashSet};
8use std::future::Future;
9use std::pin::Pin;
10use std::rc::Rc;
11use std::time::Instant;
12
13use crate::chunk::{Chunk, CompiledFunction, Constant};
14use crate::value::{
15    ErrorCategory, ModuleFunctionRegistry, VmAsyncBuiltinFn, VmBuiltinFn, VmClosure, VmEnv,
16    VmError, VmTaskHandle, VmValue,
17};
18
19thread_local! {
20    static CURRENT_ASYNC_BUILTIN_CHILD_VM: RefCell<Vec<Vm>> = const { RefCell::new(Vec::new()) };
21}
22
23/// RAII guard that starts a tracing span on creation and ends it on drop.
24struct ScopeSpan(u64);
25
26impl ScopeSpan {
27    fn new(kind: crate::tracing::SpanKind, name: String) -> Self {
28        Self(crate::tracing::span_start(kind, name))
29    }
30}
31
32impl Drop for ScopeSpan {
33    fn drop(&mut self) {
34        crate::tracing::span_end(self.0);
35    }
36}
37
38/// Call frame for function execution.
39pub(crate) struct CallFrame {
40    pub(crate) chunk: Chunk,
41    pub(crate) ip: usize,
42    pub(crate) stack_base: usize,
43    pub(crate) saved_env: VmEnv,
44    /// Iterator stack depth to restore when this frame unwinds.
45    pub(crate) saved_iterator_depth: usize,
46    /// Function name for stack traces (empty for top-level pipeline).
47    pub(crate) fn_name: String,
48    /// Number of arguments actually passed by the caller (for default arg support).
49    pub(crate) argc: usize,
50    /// Saved VM_SOURCE_DIR to restore when this frame is popped.
51    /// Set when entering a closure that originated from an imported module.
52    pub(crate) saved_source_dir: Option<std::path::PathBuf>,
53    /// Module-local named functions available to symbolic calls within this frame.
54    pub(crate) module_functions: Option<ModuleFunctionRegistry>,
55    /// Shared module-level env for top-level `var` / `let` bindings of
56    /// this frame's originating module. Looked up after `self.env` and
57    /// before `self.globals` by `GetVar` / `SetVar`, giving each module
58    /// its own live static state that persists across calls. See the
59    /// `module_state` field on `VmClosure` for the full rationale.
60    pub(crate) module_state: Option<crate::value::ModuleState>,
61}
62
63/// Exception handler for try/catch.
64pub(crate) struct ExceptionHandler {
65    pub(crate) catch_ip: usize,
66    pub(crate) stack_depth: usize,
67    pub(crate) frame_depth: usize,
68    pub(crate) env_scope_depth: usize,
69    /// If non-empty, this catch only handles errors whose enum_name matches.
70    pub(crate) error_type: String,
71}
72
73/// Debug action returned by the debug hook.
74#[derive(Debug, Clone, PartialEq)]
75pub enum DebugAction {
76    /// Continue execution normally.
77    Continue,
78    /// Stop (breakpoint hit, step complete).
79    Stop,
80}
81
82/// Information about current execution state for the debugger.
83#[derive(Debug, Clone)]
84pub struct DebugState {
85    pub line: usize,
86    pub variables: BTreeMap<String, VmValue>,
87    pub frame_name: String,
88    pub frame_depth: usize,
89}
90
91/// Iterator state for for-in loops: either a pre-collected vec, an async channel, or a generator.
92pub(crate) enum IterState {
93    Vec {
94        items: Vec<VmValue>,
95        idx: usize,
96    },
97    Channel {
98        receiver: std::sync::Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
99        closed: std::sync::Arc<std::sync::atomic::AtomicBool>,
100    },
101    Generator {
102        gen: crate::value::VmGenerator,
103    },
104}
105
106#[derive(Clone)]
107pub(crate) struct LoadedModule {
108    pub(crate) functions: BTreeMap<String, Rc<VmClosure>>,
109    pub(crate) public_names: HashSet<String>,
110}
111
112/// The Harn bytecode virtual machine.
113pub struct Vm {
114    pub(crate) stack: Vec<VmValue>,
115    pub(crate) env: VmEnv,
116    pub(crate) output: String,
117    pub(crate) builtins: BTreeMap<String, VmBuiltinFn>,
118    pub(crate) async_builtins: BTreeMap<String, VmAsyncBuiltinFn>,
119    /// Iterator state for for-in loops.
120    pub(crate) iterators: Vec<IterState>,
121    /// Call frame stack.
122    pub(crate) frames: Vec<CallFrame>,
123    /// Exception handler stack.
124    pub(crate) exception_handlers: Vec<ExceptionHandler>,
125    /// Spawned async task handles.
126    pub(crate) spawned_tasks: BTreeMap<String, VmTaskHandle>,
127    /// Counter for generating unique task IDs.
128    pub(crate) task_counter: u64,
129    /// Active deadline stack: (deadline_instant, frame_depth).
130    pub(crate) deadlines: Vec<(Instant, usize)>,
131    /// Breakpoints (source line numbers).
132    pub(crate) breakpoints: Vec<usize>,
133    /// Whether the VM is in step mode.
134    pub(crate) step_mode: bool,
135    /// The frame depth at which stepping started (for step-over).
136    pub(crate) step_frame_depth: usize,
137    /// Whether the VM is currently stopped at a debug point.
138    pub(crate) stopped: bool,
139    /// Last source line executed (to detect line changes).
140    pub(crate) last_line: usize,
141    /// Source directory for resolving imports.
142    pub(crate) source_dir: Option<std::path::PathBuf>,
143    /// Modules currently being imported (cycle prevention).
144    pub(crate) imported_paths: Vec<std::path::PathBuf>,
145    /// Loaded module cache keyed by canonical or synthetic module path.
146    pub(crate) module_cache: BTreeMap<std::path::PathBuf, LoadedModule>,
147    /// Source file path for error reporting.
148    pub(crate) source_file: Option<String>,
149    /// Source text for error reporting.
150    pub(crate) source_text: Option<String>,
151    /// Optional bridge for delegating unknown builtins in bridge mode.
152    pub(crate) bridge: Option<Rc<crate::bridge::HostBridge>>,
153    /// Builtins denied by sandbox mode (`--deny` / `--allow` flags).
154    pub(crate) denied_builtins: HashSet<String>,
155    /// Cancellation token for cooperative graceful shutdown (set by parent).
156    pub(crate) cancel_token: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
157    /// Captured stack trace from the most recent error (fn_name, line, col).
158    pub(crate) error_stack_trace: Vec<(String, usize, usize, Option<String>)>,
159    /// Yield channel sender for generator execution. When set, `Op::Yield`
160    /// sends values through this channel instead of being a no-op.
161    pub(crate) yield_sender: Option<tokio::sync::mpsc::Sender<VmValue>>,
162    /// Project root directory (detected via harn.toml).
163    /// Used as base directory for metadata, store, and checkpoint operations.
164    pub(crate) project_root: Option<std::path::PathBuf>,
165    /// Global constants (e.g. `pi`, `e`). Checked as a fallback in `GetVar`
166    /// after the environment, so user-defined variables can shadow them.
167    pub(crate) globals: BTreeMap<String, VmValue>,
168}
169
170impl Vm {
171    pub fn new() -> Self {
172        Self {
173            stack: Vec::with_capacity(256),
174            env: VmEnv::new(),
175            output: String::new(),
176            builtins: BTreeMap::new(),
177            async_builtins: BTreeMap::new(),
178            iterators: Vec::new(),
179            frames: Vec::new(),
180            exception_handlers: Vec::new(),
181            spawned_tasks: BTreeMap::new(),
182            task_counter: 0,
183            deadlines: Vec::new(),
184            breakpoints: Vec::new(),
185            step_mode: false,
186            step_frame_depth: 0,
187            stopped: false,
188            last_line: 0,
189            source_dir: None,
190            imported_paths: Vec::new(),
191            module_cache: BTreeMap::new(),
192            source_file: None,
193            source_text: None,
194            bridge: None,
195            denied_builtins: HashSet::new(),
196            cancel_token: None,
197            error_stack_trace: Vec::new(),
198            yield_sender: None,
199            project_root: None,
200            globals: BTreeMap::new(),
201        }
202    }
203
204    /// Set the bridge for delegating unknown builtins in bridge mode.
205    pub fn set_bridge(&mut self, bridge: Rc<crate::bridge::HostBridge>) {
206        self.bridge = Some(bridge);
207    }
208
209    /// Set builtins that are denied in sandbox mode.
210    /// When called, the given builtin names will produce a permission error.
211    pub fn set_denied_builtins(&mut self, denied: HashSet<String>) {
212        self.denied_builtins = denied;
213    }
214
215    /// Set source info for error reporting (file path and source text).
216    pub fn set_source_info(&mut self, file: &str, text: &str) {
217        self.source_file = Some(file.to_string());
218        self.source_text = Some(text.to_string());
219    }
220
221    /// Set breakpoints by source line number.
222    pub fn set_breakpoints(&mut self, lines: Vec<usize>) {
223        self.breakpoints = lines;
224    }
225
226    /// Enable step mode (stop at next line).
227    pub fn set_step_mode(&mut self, step: bool) {
228        self.step_mode = step;
229        self.step_frame_depth = self.frames.len();
230    }
231
232    /// Enable step-over mode (stop at next line at same or lower frame depth).
233    pub fn set_step_over(&mut self) {
234        self.step_mode = true;
235        self.step_frame_depth = self.frames.len();
236    }
237
238    /// Enable step-out mode (stop when returning from current frame).
239    pub fn set_step_out(&mut self) {
240        self.step_mode = true;
241        self.step_frame_depth = self.frames.len().saturating_sub(1);
242    }
243
244    /// Check if the VM is stopped at a debug point.
245    pub fn is_stopped(&self) -> bool {
246        self.stopped
247    }
248
249    /// Get the current debug state (variables, line, etc.).
250    pub fn debug_state(&self) -> DebugState {
251        let line = self.current_line();
252        let variables = self.env.all_variables();
253        let frame_name = if self.frames.len() > 1 {
254            format!("frame_{}", self.frames.len() - 1)
255        } else {
256            "pipeline".to_string()
257        };
258        DebugState {
259            line,
260            variables,
261            frame_name,
262            frame_depth: self.frames.len(),
263        }
264    }
265
266    /// Get all stack frames for the debugger.
267    pub fn debug_stack_frames(&self) -> Vec<(String, usize)> {
268        let mut frames = Vec::new();
269        for (i, frame) in self.frames.iter().enumerate() {
270            let line = if frame.ip > 0 && frame.ip - 1 < frame.chunk.lines.len() {
271                frame.chunk.lines[frame.ip - 1] as usize
272            } else {
273                0
274            };
275            let name = if frame.fn_name.is_empty() {
276                if i == 0 {
277                    "pipeline".to_string()
278                } else {
279                    format!("fn_{}", i)
280                }
281            } else {
282                frame.fn_name.clone()
283            };
284            frames.push((name, line));
285        }
286        frames
287    }
288
289    /// Get the current source line.
290    fn current_line(&self) -> usize {
291        if let Some(frame) = self.frames.last() {
292            let ip = if frame.ip > 0 { frame.ip - 1 } else { 0 };
293            if ip < frame.chunk.lines.len() {
294                return frame.chunk.lines[ip] as usize;
295            }
296        }
297        0
298    }
299
300    /// Execute one instruction, returning whether to stop (breakpoint/step).
301    /// Returns Ok(None) to continue, Ok(Some(val)) on program end, Err on error.
302    pub async fn step_execute(&mut self) -> Result<Option<(VmValue, bool)>, VmError> {
303        // Check if we need to stop at this line
304        let current_line = self.current_line();
305        let line_changed = current_line != self.last_line && current_line > 0;
306
307        if line_changed {
308            self.last_line = current_line;
309
310            // Check breakpoints
311            if self.breakpoints.contains(&current_line) {
312                self.stopped = true;
313                return Ok(Some((VmValue::Nil, true))); // true = stopped
314            }
315
316            // Check step mode
317            if self.step_mode && self.frames.len() <= self.step_frame_depth + 1 {
318                self.step_mode = false;
319                self.stopped = true;
320                return Ok(Some((VmValue::Nil, true))); // true = stopped
321            }
322        }
323
324        // Execute one instruction cycle
325        self.stopped = false;
326        self.execute_one_cycle().await
327    }
328
329    /// Execute a single instruction cycle.
330    async fn execute_one_cycle(&mut self) -> Result<Option<(VmValue, bool)>, VmError> {
331        // Check deadline
332        if let Some(&(deadline, _)) = self.deadlines.last() {
333            if Instant::now() > deadline {
334                self.deadlines.pop();
335                let err = VmError::Thrown(VmValue::String(Rc::from("Deadline exceeded")));
336                match self.handle_error(err) {
337                    Ok(None) => return Ok(None),
338                    Ok(Some(val)) => return Ok(Some((val, false))),
339                    Err(e) => return Err(e),
340                }
341            }
342        }
343
344        // Get current frame
345        let frame = match self.frames.last_mut() {
346            Some(f) => f,
347            None => {
348                let val = self.stack.pop().unwrap_or(VmValue::Nil);
349                return Ok(Some((val, false)));
350            }
351        };
352
353        // Check if we've reached end of chunk
354        if frame.ip >= frame.chunk.code.len() {
355            let val = self.stack.pop().unwrap_or(VmValue::Nil);
356            let popped_frame = self.frames.pop().unwrap();
357            if self.frames.is_empty() {
358                return Ok(Some((val, false)));
359            } else {
360                self.iterators.truncate(popped_frame.saved_iterator_depth);
361                self.env = popped_frame.saved_env;
362                self.stack.truncate(popped_frame.stack_base);
363                self.stack.push(val);
364                return Ok(None);
365            }
366        }
367
368        let op = frame.chunk.code[frame.ip];
369        frame.ip += 1;
370
371        match self.execute_op(op).await {
372            Ok(Some(val)) => Ok(Some((val, false))),
373            Ok(None) => Ok(None),
374            Err(VmError::Return(val)) => {
375                if let Some(popped_frame) = self.frames.pop() {
376                    if let Some(ref dir) = popped_frame.saved_source_dir {
377                        crate::stdlib::set_thread_source_dir(dir);
378                    }
379                    let current_depth = self.frames.len();
380                    self.exception_handlers
381                        .retain(|h| h.frame_depth <= current_depth);
382                    if self.frames.is_empty() {
383                        return Ok(Some((val, false)));
384                    }
385                    self.iterators.truncate(popped_frame.saved_iterator_depth);
386                    self.env = popped_frame.saved_env;
387                    self.stack.truncate(popped_frame.stack_base);
388                    self.stack.push(val);
389                    Ok(None)
390                } else {
391                    Ok(Some((val, false)))
392                }
393            }
394            Err(e) => {
395                if self.error_stack_trace.is_empty() {
396                    self.error_stack_trace = self.capture_stack_trace();
397                }
398                match self.handle_error(e) {
399                    Ok(None) => {
400                        self.error_stack_trace.clear();
401                        Ok(None)
402                    }
403                    Ok(Some(val)) => Ok(Some((val, false))),
404                    Err(e) => Err(self.enrich_error_with_line(e)),
405                }
406            }
407        }
408    }
409
410    /// Initialize execution (push the initial frame).
411    pub fn start(&mut self, chunk: &Chunk) {
412        self.frames.push(CallFrame {
413            chunk: chunk.clone(),
414            ip: 0,
415            stack_base: self.stack.len(),
416            saved_env: self.env.clone(),
417            saved_iterator_depth: self.iterators.len(),
418            fn_name: String::new(),
419            argc: 0,
420            saved_source_dir: None,
421            module_functions: None,
422            module_state: None,
423        });
424    }
425
426    /// Register a sync builtin function.
427    pub fn register_builtin<F>(&mut self, name: &str, f: F)
428    where
429        F: Fn(&[VmValue], &mut String) -> Result<VmValue, VmError> + 'static,
430    {
431        self.builtins.insert(name.to_string(), Rc::new(f));
432    }
433
434    /// Remove a sync builtin (so an async version can take precedence).
435    pub fn unregister_builtin(&mut self, name: &str) {
436        self.builtins.remove(name);
437    }
438
439    /// Register an async builtin function.
440    pub fn register_async_builtin<F, Fut>(&mut self, name: &str, f: F)
441    where
442        F: Fn(Vec<VmValue>) -> Fut + 'static,
443        Fut: Future<Output = Result<VmValue, VmError>> + 'static,
444    {
445        self.async_builtins
446            .insert(name.to_string(), Rc::new(move |args| Box::pin(f(args))));
447    }
448
449    /// Create a child VM that shares builtins and env but has fresh execution state.
450    /// Used for parallel/spawn to fork the VM for concurrent tasks.
451    fn child_vm(&self) -> Vm {
452        Vm {
453            stack: Vec::with_capacity(64),
454            env: self.env.clone(),
455            output: String::new(),
456            builtins: self.builtins.clone(),
457            async_builtins: self.async_builtins.clone(),
458            iterators: Vec::new(),
459            frames: Vec::new(),
460            exception_handlers: Vec::new(),
461            spawned_tasks: BTreeMap::new(),
462            task_counter: 0,
463            deadlines: self.deadlines.clone(),
464            breakpoints: Vec::new(),
465            step_mode: false,
466            step_frame_depth: 0,
467            stopped: false,
468            last_line: 0,
469            source_dir: self.source_dir.clone(),
470            imported_paths: Vec::new(),
471            module_cache: self.module_cache.clone(),
472            source_file: self.source_file.clone(),
473            source_text: self.source_text.clone(),
474            bridge: self.bridge.clone(),
475            denied_builtins: self.denied_builtins.clone(),
476            cancel_token: None,
477            error_stack_trace: Vec::new(),
478            yield_sender: None,
479            project_root: self.project_root.clone(),
480            globals: self.globals.clone(),
481        }
482    }
483
484    /// Set the source directory for import resolution and introspection.
485    /// Also auto-detects the project root if not already set.
486    pub fn set_source_dir(&mut self, dir: &std::path::Path) {
487        self.source_dir = Some(dir.to_path_buf());
488        crate::stdlib::set_thread_source_dir(dir);
489        // Auto-detect project root if not explicitly set.
490        if self.project_root.is_none() {
491            self.project_root = crate::stdlib::process::find_project_root(dir);
492        }
493    }
494
495    /// Explicitly set the project root directory.
496    /// Used by ACP/CLI to override auto-detection.
497    pub fn set_project_root(&mut self, root: &std::path::Path) {
498        self.project_root = Some(root.to_path_buf());
499    }
500
501    /// Get the project root directory, falling back to source_dir.
502    pub fn project_root(&self) -> Option<&std::path::Path> {
503        self.project_root.as_deref().or(self.source_dir.as_deref())
504    }
505
506    /// Return all registered builtin names (sync + async).
507    pub fn builtin_names(&self) -> Vec<String> {
508        let mut names: Vec<String> = self.builtins.keys().cloned().collect();
509        names.extend(self.async_builtins.keys().cloned());
510        names
511    }
512
513    /// Set a global constant (e.g. `pi`, `e`).
514    /// Stored separately from the environment so user-defined variables can shadow them.
515    pub fn set_global(&mut self, name: &str, value: VmValue) {
516        self.globals.insert(name.to_string(), value);
517    }
518
519    /// Get the captured output.
520    pub fn output(&self) -> &str {
521        &self.output
522    }
523
524    /// Execute a compiled chunk.
525    pub async fn execute(&mut self, chunk: &Chunk) -> Result<VmValue, VmError> {
526        let span_id = crate::tracing::span_start(crate::tracing::SpanKind::Pipeline, "main".into());
527        let result = self.run_chunk(chunk).await;
528        crate::tracing::span_end(span_id);
529        result
530    }
531
532    /// Convert a VmError into either a handled exception (returning Ok) or a propagated error.
533    fn handle_error(&mut self, error: VmError) -> Result<Option<VmValue>, VmError> {
534        // Extract the thrown value from the error
535        let thrown_value = match &error {
536            VmError::Thrown(v) => v.clone(),
537            other => VmValue::String(Rc::from(other.to_string())),
538        };
539
540        if let Some(handler) = self.exception_handlers.pop() {
541            // Check if this is a typed catch that doesn't match the thrown value
542            if !handler.error_type.is_empty() {
543                let matches = match &thrown_value {
544                    VmValue::EnumVariant { enum_name, .. } => *enum_name == handler.error_type,
545                    _ => false,
546                };
547                if !matches {
548                    // This handler doesn't match — try the next one
549                    return self.handle_error(error);
550                }
551            }
552
553            // Unwind call frames back to the handler's frame depth
554            while self.frames.len() > handler.frame_depth {
555                if let Some(frame) = self.frames.pop() {
556                    if let Some(ref dir) = frame.saved_source_dir {
557                        crate::stdlib::set_thread_source_dir(dir);
558                    }
559                    self.iterators.truncate(frame.saved_iterator_depth);
560                    self.env = frame.saved_env;
561                }
562            }
563
564            // Clean up deadlines from unwound frames
565            while self
566                .deadlines
567                .last()
568                .is_some_and(|d| d.1 > handler.frame_depth)
569            {
570                self.deadlines.pop();
571            }
572
573            self.env.truncate_scopes(handler.env_scope_depth);
574
575            // Restore stack to handler's depth
576            self.stack.truncate(handler.stack_depth);
577
578            // Push the error value onto the stack (catch body can access it)
579            self.stack.push(thrown_value);
580
581            // Set the IP in the current frame to the catch handler
582            if let Some(frame) = self.frames.last_mut() {
583                frame.ip = handler.catch_ip;
584            }
585
586            Ok(None) // Continue execution
587        } else {
588            Err(error) // No handler, propagate
589        }
590    }
591
592    async fn run_chunk(&mut self, chunk: &Chunk) -> Result<VmValue, VmError> {
593        self.run_chunk_entry(chunk, 0, None, None, None).await
594    }
595
596    async fn run_chunk_entry(
597        &mut self,
598        chunk: &Chunk,
599        argc: usize,
600        saved_source_dir: Option<std::path::PathBuf>,
601        module_functions: Option<ModuleFunctionRegistry>,
602        module_state: Option<crate::value::ModuleState>,
603    ) -> Result<VmValue, VmError> {
604        self.frames.push(CallFrame {
605            chunk: chunk.clone(),
606            ip: 0,
607            stack_base: self.stack.len(),
608            saved_env: self.env.clone(),
609            saved_iterator_depth: self.iterators.len(),
610            fn_name: String::new(),
611            argc,
612            saved_source_dir,
613            module_functions,
614            module_state,
615        });
616
617        loop {
618            // Check deadline before each instruction
619            if let Some(&(deadline, _)) = self.deadlines.last() {
620                if Instant::now() > deadline {
621                    self.deadlines.pop();
622                    let err = VmError::Thrown(VmValue::String(Rc::from("Deadline exceeded")));
623                    match self.handle_error(err) {
624                        Ok(None) => continue,
625                        Ok(Some(val)) => return Ok(val),
626                        Err(e) => return Err(e),
627                    }
628                }
629            }
630
631            // Get current frame
632            let frame = match self.frames.last_mut() {
633                Some(f) => f,
634                None => return Ok(self.stack.pop().unwrap_or(VmValue::Nil)),
635            };
636
637            // Check if we've reached end of chunk
638            if frame.ip >= frame.chunk.code.len() {
639                let val = self.stack.pop().unwrap_or(VmValue::Nil);
640                let popped_frame = self.frames.pop().unwrap();
641                if let Some(ref dir) = popped_frame.saved_source_dir {
642                    crate::stdlib::set_thread_source_dir(dir);
643                }
644
645                if self.frames.is_empty() {
646                    // We're done with the top-level chunk
647                    return Ok(val);
648                } else {
649                    // Returning from a function call
650                    self.iterators.truncate(popped_frame.saved_iterator_depth);
651                    self.env = popped_frame.saved_env;
652                    self.stack.truncate(popped_frame.stack_base);
653                    self.stack.push(val);
654                    continue;
655                }
656            }
657
658            let op = frame.chunk.code[frame.ip];
659            frame.ip += 1;
660
661            match self.execute_op(op).await {
662                Ok(Some(val)) => return Ok(val),
663                Ok(None) => continue,
664                Err(VmError::Return(val)) => {
665                    // Pop the current frame
666                    if let Some(popped_frame) = self.frames.pop() {
667                        if let Some(ref dir) = popped_frame.saved_source_dir {
668                            crate::stdlib::set_thread_source_dir(dir);
669                        }
670                        // Clean up exception handlers from the returned frame
671                        let current_depth = self.frames.len();
672                        self.exception_handlers
673                            .retain(|h| h.frame_depth <= current_depth);
674
675                        if self.frames.is_empty() {
676                            return Ok(val);
677                        }
678                        self.iterators.truncate(popped_frame.saved_iterator_depth);
679                        self.env = popped_frame.saved_env;
680                        self.stack.truncate(popped_frame.stack_base);
681                        self.stack.push(val);
682                    } else {
683                        return Ok(val);
684                    }
685                }
686                Err(e) => {
687                    // Capture stack trace before error handling unwinds frames
688                    if self.error_stack_trace.is_empty() {
689                        self.error_stack_trace = self.capture_stack_trace();
690                    }
691                    match self.handle_error(e) {
692                        Ok(None) => {
693                            self.error_stack_trace.clear();
694                            continue; // Handler found, continue
695                        }
696                        Ok(Some(val)) => return Ok(val),
697                        Err(e) => return Err(self.enrich_error_with_line(e)),
698                    }
699                }
700            }
701        }
702    }
703
704    /// Capture the current call stack as (fn_name, line, col, source_file) tuples.
705    fn capture_stack_trace(&self) -> Vec<(String, usize, usize, Option<String>)> {
706        self.frames
707            .iter()
708            .map(|f| {
709                let idx = if f.ip > 0 { f.ip - 1 } else { 0 };
710                let line = f.chunk.lines.get(idx).copied().unwrap_or(0) as usize;
711                let col = f.chunk.columns.get(idx).copied().unwrap_or(0) as usize;
712                (f.fn_name.clone(), line, col, f.chunk.source_file.clone())
713            })
714            .collect()
715    }
716
717    /// Enrich a VmError with source line information from the captured stack
718    /// trace. Appends ` (line N)` to error variants whose messages don't
719    /// already carry location context.
720    fn enrich_error_with_line(&self, error: VmError) -> VmError {
721        // Determine the line from the captured stack trace (innermost frame).
722        let line = self
723            .error_stack_trace
724            .last()
725            .map(|(_, l, _, _)| *l)
726            .unwrap_or_else(|| self.current_line());
727        if line == 0 {
728            return error;
729        }
730        let suffix = format!(" (line {line})");
731        match error {
732            VmError::Runtime(msg) => VmError::Runtime(format!("{msg}{suffix}")),
733            VmError::TypeError(msg) => VmError::TypeError(format!("{msg}{suffix}")),
734            VmError::DivisionByZero => VmError::Runtime(format!("Division by zero{suffix}")),
735            VmError::UndefinedVariable(name) => {
736                VmError::Runtime(format!("Undefined variable: {name}{suffix}"))
737            }
738            VmError::UndefinedBuiltin(name) => {
739                VmError::Runtime(format!("Undefined builtin: {name}{suffix}"))
740            }
741            VmError::ImmutableAssignment(name) => VmError::Runtime(format!(
742                "Cannot assign to immutable binding: {name}{suffix}"
743            )),
744            VmError::StackOverflow => {
745                VmError::Runtime(format!("Stack overflow: too many nested calls{suffix}"))
746            }
747            // Leave these untouched:
748            // - Thrown: user-thrown errors should not be silently modified
749            // - CategorizedError: structured errors for agent orchestration
750            // - Return: control flow, not a real error
751            // - StackUnderflow / InvalidInstruction: internal VM bugs
752            other => other,
753        }
754    }
755
756    const MAX_FRAMES: usize = 512;
757
758    /// Build the call-time env for a closure invocation.
759    ///
760    /// Harn is **lexically scoped for data**: a closure sees exactly the
761    /// data names it captured at creation time, plus its parameters,
762    /// plus names from its originating module's `module_state`, plus
763    /// the module-function registry. The caller's *data* locals are
764    /// intentionally not visible — that would be dynamic scoping, which
765    /// is neither what Harn's TS-flavored surface suggests to users nor
766    /// something real stdlib code relies on.
767    ///
768    /// **Exception: closure-typed bindings.** Function *names* are
769    /// late-bound, Python-`LOAD_GLOBAL`-style. When a local recursive
770    /// fn is declared in a pipeline body (or inside another function),
771    /// the closure is created BEFORE its own name is defined in the
772    /// enclosing scope, so `closure.env` captures a snapshot that is
773    /// missing the self-reference. To make `fn fact(n) { fact(n-1) }`
774    /// work without a letrec trick, we merge closure-typed entries
775    /// from the caller's scope stack — but only closure-typed ones.
776    /// Data locals are never leaked across call boundaries, so the
777    /// surprising "caller's variable magically visible in callee"
778    /// semantic is ruled out.
779    ///
780    /// Imported module closures have `module_state` set, at which
781    /// point the full lexical environment is already available via
782    /// `closure.env` + `module_state`, and we skip the closure merge
783    /// entirely as a fast path. This is the hot path for context-
784    /// builder workloads (~65% of VM CPU before this optimization).
785    fn closure_call_env(caller_env: &VmEnv, closure: &VmClosure) -> VmEnv {
786        if closure.module_state.is_some() {
787            return closure.env.clone();
788        }
789        let mut call_env = closure.env.clone();
790        // Late-bind only closure-typed names from the caller — enough
791        // for local recursive / mutually-recursive fns to self-reference
792        // without leaking caller-local data into the callee.
793        for scope in &caller_env.scopes {
794            for (name, (val, mutable)) in &scope.vars {
795                if matches!(val, VmValue::Closure(_)) && call_env.get(name).is_none() {
796                    let _ = call_env.define(name, val.clone(), *mutable);
797                }
798            }
799        }
800        call_env
801    }
802
803    fn resolve_named_closure(&self, name: &str) -> Option<Rc<VmClosure>> {
804        if let Some(VmValue::Closure(closure)) = self.env.get(name) {
805            return Some(closure);
806        }
807        self.frames
808            .last()
809            .and_then(|frame| frame.module_functions.as_ref())
810            .and_then(|registry| registry.borrow().get(name).cloned())
811    }
812
813    /// Push a new call frame for a closure invocation.
814    fn push_closure_frame(
815        &mut self,
816        closure: &VmClosure,
817        args: &[VmValue],
818        _parent_functions: &[CompiledFunction],
819    ) -> Result<(), VmError> {
820        if self.frames.len() >= Self::MAX_FRAMES {
821            return Err(VmError::StackOverflow);
822        }
823        let saved_env = self.env.clone();
824
825        // If this closure originated from an imported module, switch
826        // the thread-local source dir so that render() and other
827        // source-relative builtins resolve relative to the module.
828        let saved_source_dir = if let Some(ref dir) = closure.source_dir {
829            let prev = crate::stdlib::process::VM_SOURCE_DIR.with(|sd| sd.borrow().clone());
830            crate::stdlib::set_thread_source_dir(dir);
831            prev
832        } else {
833            None
834        };
835
836        let mut call_env = Self::closure_call_env(&saved_env, closure);
837        call_env.push_scope();
838
839        let default_start = closure
840            .func
841            .default_start
842            .unwrap_or(closure.func.params.len());
843        let param_count = closure.func.params.len();
844        for (i, param) in closure.func.params.iter().enumerate() {
845            if closure.func.has_rest_param && i == param_count - 1 {
846                // Rest parameter: collect remaining args into a list
847                let rest_args = if i < args.len() {
848                    args[i..].to_vec()
849                } else {
850                    Vec::new()
851                };
852                let _ = call_env.define(param, VmValue::List(std::rc::Rc::new(rest_args)), false);
853            } else if i < args.len() {
854                let _ = call_env.define(param, args[i].clone(), false);
855            } else if i < default_start {
856                let _ = call_env.define(param, VmValue::Nil, false);
857            }
858        }
859
860        self.env = call_env;
861
862        self.frames.push(CallFrame {
863            chunk: closure.func.chunk.clone(),
864            ip: 0,
865            stack_base: self.stack.len(),
866            saved_env,
867            saved_iterator_depth: self.iterators.len(),
868            fn_name: closure.func.name.clone(),
869            argc: args.len(),
870            saved_source_dir,
871            module_functions: closure.module_functions.clone(),
872            module_state: closure.module_state.clone(),
873        });
874
875        Ok(())
876    }
877
878    /// Create a generator value by spawning the closure body as an async task.
879    /// The generator body communicates yielded values through an mpsc channel.
880    pub(crate) fn create_generator(&self, closure: &VmClosure, args: &[VmValue]) -> VmValue {
881        use crate::value::VmGenerator;
882
883        // Buffer size of 1: the generator produces one value at a time.
884        let (tx, rx) = tokio::sync::mpsc::channel::<VmValue>(1);
885
886        let mut child = self.child_vm();
887        child.yield_sender = Some(tx);
888
889        // Set up the environment for the generator body. The generator
890        // body runs in its own child VM; closure_call_env walks the
891        // current (parent) env so locally-defined generator closures
892        // can self-reference via the narrow closure-only merge. See
893        // `Vm::closure_call_env`.
894        let parent_env = self.env.clone();
895        let mut call_env = Self::closure_call_env(&parent_env, closure);
896        call_env.push_scope();
897
898        let default_start = closure
899            .func
900            .default_start
901            .unwrap_or(closure.func.params.len());
902        let param_count = closure.func.params.len();
903        for (i, param) in closure.func.params.iter().enumerate() {
904            if closure.func.has_rest_param && i == param_count - 1 {
905                let rest_args = if i < args.len() {
906                    args[i..].to_vec()
907                } else {
908                    Vec::new()
909                };
910                let _ = call_env.define(param, VmValue::List(std::rc::Rc::new(rest_args)), false);
911            } else if i < args.len() {
912                let _ = call_env.define(param, args[i].clone(), false);
913            } else if i < default_start {
914                let _ = call_env.define(param, VmValue::Nil, false);
915            }
916        }
917        child.env = call_env;
918
919        let chunk = closure.func.chunk.clone();
920        let saved_source_dir = if let Some(ref dir) = closure.source_dir {
921            let prev = crate::stdlib::process::VM_SOURCE_DIR.with(|sd| sd.borrow().clone());
922            crate::stdlib::set_thread_source_dir(dir);
923            prev
924        } else {
925            None
926        };
927        let module_functions = closure.module_functions.clone();
928        let module_state = closure.module_state.clone();
929        let argc = args.len();
930        // Spawn the generator body as an async task.
931        // The task will execute until return, sending yielded values through the channel.
932        tokio::task::spawn_local(async move {
933            let _ = child
934                .run_chunk_entry(
935                    &chunk,
936                    argc,
937                    saved_source_dir,
938                    module_functions,
939                    module_state,
940                )
941                .await;
942            // When the generator body finishes (return or fall-through),
943            // the sender is dropped, signaling completion to the receiver.
944        });
945
946        VmValue::Generator(VmGenerator {
947            done: Rc::new(std::cell::Cell::new(false)),
948            receiver: Rc::new(tokio::sync::Mutex::new(rx)),
949        })
950    }
951
952    fn pop(&mut self) -> Result<VmValue, VmError> {
953        self.stack.pop().ok_or(VmError::StackUnderflow)
954    }
955
956    fn peek(&self) -> Result<&VmValue, VmError> {
957        self.stack.last().ok_or(VmError::StackUnderflow)
958    }
959
960    fn const_string(c: &Constant) -> Result<String, VmError> {
961        match c {
962            Constant::String(s) => Ok(s.clone()),
963            _ => Err(VmError::TypeError("expected string constant".into())),
964        }
965    }
966
967    /// Call a closure (used by method calls like .map/.filter etc.)
968    /// Uses recursive execution for simplicity in method dispatch.
969    fn call_closure<'a>(
970        &'a mut self,
971        closure: &'a VmClosure,
972        args: &'a [VmValue],
973        _parent_functions: &'a [CompiledFunction],
974    ) -> Pin<Box<dyn Future<Output = Result<VmValue, VmError>> + 'a>> {
975        Box::pin(async move {
976            let saved_env = self.env.clone();
977            let saved_frames = std::mem::take(&mut self.frames);
978            let saved_handlers = std::mem::take(&mut self.exception_handlers);
979            let saved_iterators = std::mem::take(&mut self.iterators);
980            let saved_deadlines = std::mem::take(&mut self.deadlines);
981
982            let mut call_env = Self::closure_call_env(&saved_env, closure);
983            call_env.push_scope();
984
985            let default_start = closure
986                .func
987                .default_start
988                .unwrap_or(closure.func.params.len());
989            let param_count = closure.func.params.len();
990            for (i, param) in closure.func.params.iter().enumerate() {
991                if closure.func.has_rest_param && i == param_count - 1 {
992                    let rest_args = if i < args.len() {
993                        args[i..].to_vec()
994                    } else {
995                        Vec::new()
996                    };
997                    let _ =
998                        call_env.define(param, VmValue::List(std::rc::Rc::new(rest_args)), false);
999                } else if i < args.len() {
1000                    let _ = call_env.define(param, args[i].clone(), false);
1001                } else if i < default_start {
1002                    let _ = call_env.define(param, VmValue::Nil, false);
1003                }
1004            }
1005
1006            self.env = call_env;
1007            let argc = args.len();
1008            let saved_source_dir = if let Some(ref dir) = closure.source_dir {
1009                let prev = crate::stdlib::process::VM_SOURCE_DIR.with(|sd| sd.borrow().clone());
1010                crate::stdlib::set_thread_source_dir(dir);
1011                prev
1012            } else {
1013                None
1014            };
1015            let result = self
1016                .run_chunk_entry(
1017                    &closure.func.chunk,
1018                    argc,
1019                    saved_source_dir,
1020                    closure.module_functions.clone(),
1021                    closure.module_state.clone(),
1022                )
1023                .await;
1024
1025            self.env = saved_env;
1026            self.frames = saved_frames;
1027            self.exception_handlers = saved_handlers;
1028            self.iterators = saved_iterators;
1029            self.deadlines = saved_deadlines;
1030
1031            result
1032        })
1033    }
1034
1035    /// Invoke a value as a callable. Supports `VmValue::Closure` and
1036    /// `VmValue::BuiltinRef`, so builtin names passed by reference (e.g.
1037    /// `dict.rekey(snake_to_camel)`) dispatch through the same code path as
1038    /// user-defined closures.
1039    #[allow(clippy::manual_async_fn)]
1040    fn call_callable_value<'a>(
1041        &'a mut self,
1042        callable: &'a VmValue,
1043        args: &'a [VmValue],
1044        functions: &'a [CompiledFunction],
1045    ) -> Pin<Box<dyn Future<Output = Result<VmValue, VmError>> + 'a>> {
1046        Box::pin(async move {
1047            match callable {
1048                VmValue::Closure(closure) => self.call_closure(closure, args, functions).await,
1049                VmValue::BuiltinRef(name) => {
1050                    let name_owned = name.to_string();
1051                    self.call_named_builtin(&name_owned, args.to_vec()).await
1052                }
1053                other => Err(VmError::TypeError(format!(
1054                    "expected callable, got {}",
1055                    other.type_name()
1056                ))),
1057            }
1058        })
1059    }
1060
1061    /// Returns true if `v` is callable via `call_callable_value`.
1062    fn is_callable_value(v: &VmValue) -> bool {
1063        matches!(v, VmValue::Closure(_) | VmValue::BuiltinRef(_))
1064    }
1065
1066    /// Public wrapper for `call_closure`, used by the MCP server to invoke
1067    /// tool handler closures from outside the VM execution loop.
1068    pub async fn call_closure_pub(
1069        &mut self,
1070        closure: &VmClosure,
1071        args: &[VmValue],
1072        functions: &[CompiledFunction],
1073    ) -> Result<VmValue, VmError> {
1074        self.call_closure(closure, args, functions).await
1075    }
1076
1077    /// Resolve a named builtin: sync builtins → async builtins → bridge → error.
1078    /// Used by Call, TailCall, and Pipe handlers to avoid duplicating this lookup.
1079    async fn call_named_builtin(
1080        &mut self,
1081        name: &str,
1082        args: Vec<VmValue>,
1083    ) -> Result<VmValue, VmError> {
1084        // Auto-trace LLM calls and tool calls
1085        let span_kind = match name {
1086            "llm_call" | "llm_stream" | "agent_loop" => Some(crate::tracing::SpanKind::LlmCall),
1087            "mcp_call" => Some(crate::tracing::SpanKind::ToolCall),
1088            _ => None,
1089        };
1090        let _span = span_kind.map(|kind| ScopeSpan::new(kind, name.to_string()));
1091
1092        // Sandbox check: deny builtins blocked by --deny/--allow flags.
1093        if self.denied_builtins.contains(name) {
1094            return Err(VmError::CategorizedError {
1095                message: format!("Tool '{}' is not permitted.", name),
1096                category: ErrorCategory::ToolRejected,
1097            });
1098        }
1099        crate::orchestration::enforce_current_policy_for_builtin(name, &args)?;
1100        if let Some(builtin) = self.builtins.get(name).cloned() {
1101            builtin(&args, &mut self.output)
1102        } else if let Some(async_builtin) = self.async_builtins.get(name).cloned() {
1103            CURRENT_ASYNC_BUILTIN_CHILD_VM.with(|slot| {
1104                slot.borrow_mut().push(self.child_vm());
1105            });
1106            let result = async_builtin(args).await;
1107            CURRENT_ASYNC_BUILTIN_CHILD_VM.with(|slot| {
1108                slot.borrow_mut().pop();
1109            });
1110            result
1111        } else if let Some(bridge) = &self.bridge {
1112            crate::orchestration::enforce_current_policy_for_bridge_builtin(name)?;
1113            let args_json: Vec<serde_json::Value> =
1114                args.iter().map(crate::llm::vm_value_to_json).collect();
1115            let result = bridge
1116                .call(
1117                    "builtin_call",
1118                    serde_json::json!({"name": name, "args": args_json}),
1119                )
1120                .await?;
1121            Ok(crate::bridge::json_result_to_vm_value(&result))
1122        } else {
1123            let all_builtins = self
1124                .builtins
1125                .keys()
1126                .chain(self.async_builtins.keys())
1127                .map(|s| s.as_str());
1128            if let Some(suggestion) = crate::value::closest_match(name, all_builtins) {
1129                return Err(VmError::Runtime(format!(
1130                    "Undefined builtin: {name} (did you mean `{suggestion}`?)"
1131                )));
1132            }
1133            Err(VmError::UndefinedBuiltin(name.to_string()))
1134        }
1135    }
1136}
1137
1138/// Clone the VM at the top of the async-builtin child VM stack, returning a
1139/// fresh `Vm` instance that callers own and can use without coordinating
1140/// with other concurrent users of the stack. This replaces the legacy
1141/// `take/restore` pattern: that pattern serialized access because only one
1142/// consumer could hold the single stack entry at a time, which prevented
1143/// any form of concurrent tool-handler execution within a single
1144/// agent_loop iteration. Cloning is cheap — the VM struct shares its
1145/// heavy state (env, builtins, bridge, module_cache) via `Arc`/`Rc` — so
1146/// multiple concurrent handlers can each have their own execution context.
1147///
1148/// Returns `None` if no parent VM is currently pushed on the stack.
1149pub fn clone_async_builtin_child_vm() -> Option<Vm> {
1150    CURRENT_ASYNC_BUILTIN_CHILD_VM.with(|slot| slot.borrow().last().map(|vm| vm.child_vm()))
1151}
1152
1153/// Legacy API preserved for backward compatibility with any out-of-tree
1154/// callers. New code should use `clone_async_builtin_child_vm()` instead
1155/// — `take` serializes concurrent callers because only one can hold the
1156/// popped value at a time. Internally this now delegates to a clone so
1157/// even legacy callers don't deadlock each other, but the name is kept
1158/// until external callers migrate.
1159#[deprecated(
1160    note = "use clone_async_builtin_child_vm() — take/restore serialized concurrent callers"
1161)]
1162pub fn take_async_builtin_child_vm() -> Option<Vm> {
1163    clone_async_builtin_child_vm()
1164}
1165
1166/// Legacy API — now a no-op because `take_async_builtin_child_vm` returns
1167/// a clone rather than popping the stack, so there is nothing to restore.
1168/// Kept for backward compatibility.
1169#[deprecated(note = "clone_async_builtin_child_vm does not need a matching restore call")]
1170pub fn restore_async_builtin_child_vm(_vm: Vm) {
1171    // No-op: the new clone-based API doesn't require restoration since
1172    // the caller owns a fresh clone and the stack is never mutated.
1173    CURRENT_ASYNC_BUILTIN_CHILD_VM.with(|slot| {
1174        // Intentionally ignore — kept as a syntactic no-op block so the
1175        // function signature remains stable.
1176        let _ = slot;
1177    });
1178}
1179
1180impl Default for Vm {
1181    fn default() -> Self {
1182        Self::new()
1183    }
1184}
1185
1186#[cfg(test)]
1187mod tests {
1188    use super::*;
1189    use crate::compiler::Compiler;
1190    use crate::stdlib::register_vm_stdlib;
1191    use harn_lexer::Lexer;
1192    use harn_parser::Parser;
1193
1194    fn run_harn(source: &str) -> (String, VmValue) {
1195        let rt = tokio::runtime::Builder::new_current_thread()
1196            .enable_all()
1197            .build()
1198            .unwrap();
1199        rt.block_on(async {
1200            let local = tokio::task::LocalSet::new();
1201            local
1202                .run_until(async {
1203                    let mut lexer = Lexer::new(source);
1204                    let tokens = lexer.tokenize().unwrap();
1205                    let mut parser = Parser::new(tokens);
1206                    let program = parser.parse().unwrap();
1207                    let chunk = Compiler::new().compile(&program).unwrap();
1208
1209                    let mut vm = Vm::new();
1210                    register_vm_stdlib(&mut vm);
1211                    let result = vm.execute(&chunk).await.unwrap();
1212                    (vm.output().to_string(), result)
1213                })
1214                .await
1215        })
1216    }
1217
1218    fn run_output(source: &str) -> String {
1219        run_harn(source).0.trim_end().to_string()
1220    }
1221
1222    fn run_harn_result(source: &str) -> Result<(String, VmValue), VmError> {
1223        let rt = tokio::runtime::Builder::new_current_thread()
1224            .enable_all()
1225            .build()
1226            .unwrap();
1227        rt.block_on(async {
1228            let local = tokio::task::LocalSet::new();
1229            local
1230                .run_until(async {
1231                    let mut lexer = Lexer::new(source);
1232                    let tokens = lexer.tokenize().unwrap();
1233                    let mut parser = Parser::new(tokens);
1234                    let program = parser.parse().unwrap();
1235                    let chunk = Compiler::new().compile(&program).unwrap();
1236
1237                    let mut vm = Vm::new();
1238                    register_vm_stdlib(&mut vm);
1239                    let result = vm.execute(&chunk).await?;
1240                    Ok((vm.output().to_string(), result))
1241                })
1242                .await
1243        })
1244    }
1245
1246    #[test]
1247    fn test_arithmetic() {
1248        let out =
1249            run_output("pipeline t(task) { log(2 + 3)\nlog(10 - 4)\nlog(3 * 5)\nlog(10 / 3) }");
1250        assert_eq!(out, "[harn] 5\n[harn] 6\n[harn] 15\n[harn] 3");
1251    }
1252
1253    #[test]
1254    fn test_mixed_arithmetic() {
1255        let out = run_output("pipeline t(task) { log(3 + 1.5)\nlog(10 - 2.5) }");
1256        assert_eq!(out, "[harn] 4.5\n[harn] 7.5");
1257    }
1258
1259    #[test]
1260    fn test_comparisons() {
1261        let out =
1262            run_output("pipeline t(task) { log(1 < 2)\nlog(2 > 3)\nlog(1 == 1)\nlog(1 != 2) }");
1263        assert_eq!(out, "[harn] true\n[harn] false\n[harn] true\n[harn] true");
1264    }
1265
1266    #[test]
1267    fn test_let_var() {
1268        let out = run_output("pipeline t(task) { let x = 42\nlog(x)\nvar y = 1\ny = 2\nlog(y) }");
1269        assert_eq!(out, "[harn] 42\n[harn] 2");
1270    }
1271
1272    #[test]
1273    fn test_if_else() {
1274        let out = run_output(
1275            r#"pipeline t(task) { if true { log("yes") } if false { log("wrong") } else { log("no") } }"#,
1276        );
1277        assert_eq!(out, "[harn] yes\n[harn] no");
1278    }
1279
1280    #[test]
1281    fn test_while_loop() {
1282        let out = run_output("pipeline t(task) { var i = 0\n while i < 5 { i = i + 1 }\n log(i) }");
1283        assert_eq!(out, "[harn] 5");
1284    }
1285
1286    #[test]
1287    fn test_for_in() {
1288        let out = run_output("pipeline t(task) { for item in [1, 2, 3] { log(item) } }");
1289        assert_eq!(out, "[harn] 1\n[harn] 2\n[harn] 3");
1290    }
1291
1292    #[test]
1293    fn test_inner_for_return_does_not_leak_iterator_into_caller() {
1294        let out = run_output(
1295            r#"pipeline t(task) {
1296  fn first_match() {
1297    for pattern in ["a", "b"] {
1298      return pattern
1299    }
1300    return ""
1301  }
1302
1303  var seen = []
1304  for path in ["outer"] {
1305    seen = seen + [path + ":" + first_match()]
1306  }
1307  log(join(seen, ","))
1308}"#,
1309        );
1310        assert_eq!(out, "[harn] outer:a");
1311    }
1312
1313    #[test]
1314    fn test_fn_decl_and_call() {
1315        let out = run_output("pipeline t(task) { fn add(a, b) { return a + b }\nlog(add(3, 4)) }");
1316        assert_eq!(out, "[harn] 7");
1317    }
1318
1319    #[test]
1320    fn test_closure() {
1321        let out = run_output("pipeline t(task) { let double = { x -> x * 2 }\nlog(double(5)) }");
1322        assert_eq!(out, "[harn] 10");
1323    }
1324
1325    #[test]
1326    fn test_closure_capture() {
1327        let out = run_output(
1328            "pipeline t(task) { let base = 10\nfn offset(x) { return x + base }\nlog(offset(5)) }",
1329        );
1330        assert_eq!(out, "[harn] 15");
1331    }
1332
1333    #[test]
1334    fn test_string_concat() {
1335        let out = run_output(
1336            r#"pipeline t(task) { let a = "hello" + " " + "world"
1337log(a) }"#,
1338        );
1339        assert_eq!(out, "[harn] hello world");
1340    }
1341
1342    #[test]
1343    fn test_list_map() {
1344        let out = run_output(
1345            "pipeline t(task) { let doubled = [1, 2, 3].map({ x -> x * 2 })\nlog(doubled) }",
1346        );
1347        assert_eq!(out, "[harn] [2, 4, 6]");
1348    }
1349
1350    #[test]
1351    fn test_list_filter() {
1352        let out = run_output(
1353            "pipeline t(task) { let big = [1, 2, 3, 4, 5].filter({ x -> x > 3 })\nlog(big) }",
1354        );
1355        assert_eq!(out, "[harn] [4, 5]");
1356    }
1357
1358    #[test]
1359    fn test_list_reduce() {
1360        let out = run_output(
1361            "pipeline t(task) { let sum = [1, 2, 3, 4].reduce(0, { acc, x -> acc + x })\nlog(sum) }",
1362        );
1363        assert_eq!(out, "[harn] 10");
1364    }
1365
1366    #[test]
1367    fn test_dict_access() {
1368        let out = run_output(
1369            r#"pipeline t(task) { let d = {name: "test", value: 42}
1370log(d.name)
1371log(d.value) }"#,
1372        );
1373        assert_eq!(out, "[harn] test\n[harn] 42");
1374    }
1375
1376    #[test]
1377    fn test_dict_methods() {
1378        let out = run_output(
1379            r#"pipeline t(task) { let d = {a: 1, b: 2}
1380log(d.keys())
1381log(d.values())
1382log(d.has("a"))
1383log(d.has("z")) }"#,
1384        );
1385        assert_eq!(
1386            out,
1387            "[harn] [a, b]\n[harn] [1, 2]\n[harn] true\n[harn] false"
1388        );
1389    }
1390
1391    #[test]
1392    fn test_pipe_operator() {
1393        let out = run_output(
1394            "pipeline t(task) { fn double(x) { return x * 2 }\nlet r = 5 |> double\nlog(r) }",
1395        );
1396        assert_eq!(out, "[harn] 10");
1397    }
1398
1399    #[test]
1400    fn test_pipe_with_closure() {
1401        let out = run_output(
1402            r#"pipeline t(task) { let r = "hello world" |> { s -> s.split(" ") }
1403log(r) }"#,
1404        );
1405        assert_eq!(out, "[harn] [hello, world]");
1406    }
1407
1408    #[test]
1409    fn test_nil_coalescing() {
1410        let out = run_output(
1411            r#"pipeline t(task) { let a = nil ?? "fallback"
1412log(a)
1413let b = "present" ?? "fallback"
1414log(b) }"#,
1415        );
1416        assert_eq!(out, "[harn] fallback\n[harn] present");
1417    }
1418
1419    #[test]
1420    fn test_logical_operators() {
1421        let out =
1422            run_output("pipeline t(task) { log(true && false)\nlog(true || false)\nlog(!true) }");
1423        assert_eq!(out, "[harn] false\n[harn] true\n[harn] false");
1424    }
1425
1426    #[test]
1427    fn test_match() {
1428        let out = run_output(
1429            r#"pipeline t(task) { let x = "b"
1430match x { "a" -> { log("first") } "b" -> { log("second") } "c" -> { log("third") } } }"#,
1431        );
1432        assert_eq!(out, "[harn] second");
1433    }
1434
1435    #[test]
1436    fn test_subscript() {
1437        let out = run_output("pipeline t(task) { let arr = [10, 20, 30]\nlog(arr[1]) }");
1438        assert_eq!(out, "[harn] 20");
1439    }
1440
1441    #[test]
1442    fn test_string_methods() {
1443        let out = run_output(
1444            r#"pipeline t(task) { log("hello world".replace("world", "harn"))
1445log("a,b,c".split(","))
1446log("  hello  ".trim())
1447log("hello".starts_with("hel"))
1448log("hello".ends_with("lo"))
1449log("hello".substring(1, 3)) }"#,
1450        );
1451        assert_eq!(
1452            out,
1453            "[harn] hello harn\n[harn] [a, b, c]\n[harn] hello\n[harn] true\n[harn] true\n[harn] el"
1454        );
1455    }
1456
1457    #[test]
1458    fn test_list_properties() {
1459        let out = run_output(
1460            "pipeline t(task) { let list = [1, 2, 3]\nlog(list.count)\nlog(list.empty)\nlog(list.first)\nlog(list.last) }",
1461        );
1462        assert_eq!(out, "[harn] 3\n[harn] false\n[harn] 1\n[harn] 3");
1463    }
1464
1465    #[test]
1466    fn test_recursive_function() {
1467        let out = run_output(
1468            "pipeline t(task) { fn fib(n) { if n <= 1 { return n } return fib(n - 1) + fib(n - 2) }\nlog(fib(10)) }",
1469        );
1470        assert_eq!(out, "[harn] 55");
1471    }
1472
1473    #[test]
1474    fn test_ternary() {
1475        let out = run_output(
1476            r#"pipeline t(task) { let x = 5
1477let r = x > 0 ? "positive" : "non-positive"
1478log(r) }"#,
1479        );
1480        assert_eq!(out, "[harn] positive");
1481    }
1482
1483    #[test]
1484    fn test_for_in_dict() {
1485        let out = run_output(
1486            "pipeline t(task) { let d = {a: 1, b: 2}\nfor entry in d { log(entry.key) } }",
1487        );
1488        assert_eq!(out, "[harn] a\n[harn] b");
1489    }
1490
1491    #[test]
1492    fn test_list_any_all() {
1493        let out = run_output(
1494            "pipeline t(task) { let nums = [2, 4, 6]\nlog(nums.any({ x -> x > 5 }))\nlog(nums.all({ x -> x > 0 }))\nlog(nums.all({ x -> x > 3 })) }",
1495        );
1496        assert_eq!(out, "[harn] true\n[harn] true\n[harn] false");
1497    }
1498
1499    #[test]
1500    fn test_disassembly() {
1501        let mut lexer = Lexer::new("pipeline t(task) { log(2 + 3) }");
1502        let tokens = lexer.tokenize().unwrap();
1503        let mut parser = Parser::new(tokens);
1504        let program = parser.parse().unwrap();
1505        let chunk = Compiler::new().compile(&program).unwrap();
1506        let disasm = chunk.disassemble("test");
1507        assert!(disasm.contains("CONSTANT"));
1508        assert!(disasm.contains("ADD"));
1509        assert!(disasm.contains("CALL"));
1510    }
1511
1512    // --- Error handling tests ---
1513
1514    #[test]
1515    fn test_try_catch_basic() {
1516        let out = run_output(
1517            r#"pipeline t(task) { try { throw "oops" } catch(e) { log("caught: " + e) } }"#,
1518        );
1519        assert_eq!(out, "[harn] caught: oops");
1520    }
1521
1522    #[test]
1523    fn test_try_no_error() {
1524        let out = run_output(
1525            r#"pipeline t(task) {
1526var result = 0
1527try { result = 42 } catch(e) { result = 0 }
1528log(result)
1529}"#,
1530        );
1531        assert_eq!(out, "[harn] 42");
1532    }
1533
1534    #[test]
1535    fn test_throw_uncaught() {
1536        let result = run_harn_result(r#"pipeline t(task) { throw "boom" }"#);
1537        assert!(result.is_err());
1538    }
1539
1540    // --- Additional test coverage ---
1541
1542    fn run_vm(source: &str) -> String {
1543        let rt = tokio::runtime::Builder::new_current_thread()
1544            .enable_all()
1545            .build()
1546            .unwrap();
1547        rt.block_on(async {
1548            let local = tokio::task::LocalSet::new();
1549            local
1550                .run_until(async {
1551                    let mut lexer = Lexer::new(source);
1552                    let tokens = lexer.tokenize().unwrap();
1553                    let mut parser = Parser::new(tokens);
1554                    let program = parser.parse().unwrap();
1555                    let chunk = Compiler::new().compile(&program).unwrap();
1556                    let mut vm = Vm::new();
1557                    register_vm_stdlib(&mut vm);
1558                    vm.execute(&chunk).await.unwrap();
1559                    vm.output().to_string()
1560                })
1561                .await
1562        })
1563    }
1564
1565    fn run_vm_err(source: &str) -> String {
1566        let rt = tokio::runtime::Builder::new_current_thread()
1567            .enable_all()
1568            .build()
1569            .unwrap();
1570        rt.block_on(async {
1571            let local = tokio::task::LocalSet::new();
1572            local
1573                .run_until(async {
1574                    let mut lexer = Lexer::new(source);
1575                    let tokens = lexer.tokenize().unwrap();
1576                    let mut parser = Parser::new(tokens);
1577                    let program = parser.parse().unwrap();
1578                    let chunk = Compiler::new().compile(&program).unwrap();
1579                    let mut vm = Vm::new();
1580                    register_vm_stdlib(&mut vm);
1581                    match vm.execute(&chunk).await {
1582                        Err(e) => format!("{}", e),
1583                        Ok(_) => panic!("Expected error"),
1584                    }
1585                })
1586                .await
1587        })
1588    }
1589
1590    #[test]
1591    fn test_hello_world() {
1592        let out = run_vm(r#"pipeline default(task) { log("hello") }"#);
1593        assert_eq!(out, "[harn] hello\n");
1594    }
1595
1596    #[test]
1597    fn test_arithmetic_new() {
1598        let out = run_vm("pipeline default(task) { log(2 + 3) }");
1599        assert_eq!(out, "[harn] 5\n");
1600    }
1601
1602    #[test]
1603    fn test_string_concat_new() {
1604        let out = run_vm(r#"pipeline default(task) { log("a" + "b") }"#);
1605        assert_eq!(out, "[harn] ab\n");
1606    }
1607
1608    #[test]
1609    fn test_if_else_new() {
1610        let out = run_vm("pipeline default(task) { if true { log(1) } else { log(2) } }");
1611        assert_eq!(out, "[harn] 1\n");
1612    }
1613
1614    #[test]
1615    fn test_for_loop_new() {
1616        let out = run_vm("pipeline default(task) { for i in [1, 2, 3] { log(i) } }");
1617        assert_eq!(out, "[harn] 1\n[harn] 2\n[harn] 3\n");
1618    }
1619
1620    #[test]
1621    fn test_while_loop_new() {
1622        let out = run_vm("pipeline default(task) { var i = 0\nwhile i < 3 { log(i)\ni = i + 1 } }");
1623        assert_eq!(out, "[harn] 0\n[harn] 1\n[harn] 2\n");
1624    }
1625
1626    #[test]
1627    fn test_function_call_new() {
1628        let out =
1629            run_vm("pipeline default(task) { fn add(a, b) { return a + b }\nlog(add(2, 3)) }");
1630        assert_eq!(out, "[harn] 5\n");
1631    }
1632
1633    #[test]
1634    fn test_closure_new() {
1635        let out = run_vm("pipeline default(task) { let f = { x -> x * 2 }\nlog(f(5)) }");
1636        assert_eq!(out, "[harn] 10\n");
1637    }
1638
1639    #[test]
1640    fn test_recursion() {
1641        let out = run_vm("pipeline default(task) { fn fact(n) { if n <= 1 { return 1 }\nreturn n * fact(n - 1) }\nlog(fact(5)) }");
1642        assert_eq!(out, "[harn] 120\n");
1643    }
1644
1645    #[test]
1646    fn test_try_catch_new() {
1647        let out = run_vm(r#"pipeline default(task) { try { throw "err" } catch (e) { log(e) } }"#);
1648        assert_eq!(out, "[harn] err\n");
1649    }
1650
1651    #[test]
1652    fn test_try_no_error_new() {
1653        let out = run_vm("pipeline default(task) { try { log(1) } catch (e) { log(2) } }");
1654        assert_eq!(out, "[harn] 1\n");
1655    }
1656
1657    #[test]
1658    fn test_list_map_new() {
1659        let out =
1660            run_vm("pipeline default(task) { let r = [1, 2, 3].map({ x -> x * 2 })\nlog(r) }");
1661        assert_eq!(out, "[harn] [2, 4, 6]\n");
1662    }
1663
1664    #[test]
1665    fn test_list_filter_new() {
1666        let out = run_vm(
1667            "pipeline default(task) { let r = [1, 2, 3, 4].filter({ x -> x > 2 })\nlog(r) }",
1668        );
1669        assert_eq!(out, "[harn] [3, 4]\n");
1670    }
1671
1672    #[test]
1673    fn test_dict_access_new() {
1674        let out = run_vm("pipeline default(task) { let d = {name: \"Alice\"}\nlog(d.name) }");
1675        assert_eq!(out, "[harn] Alice\n");
1676    }
1677
1678    #[test]
1679    fn test_string_interpolation() {
1680        let out = run_vm("pipeline default(task) { let x = 42\nlog(\"val=${x}\") }");
1681        assert_eq!(out, "[harn] val=42\n");
1682    }
1683
1684    #[test]
1685    fn test_match_new() {
1686        let out = run_vm(
1687            "pipeline default(task) { let x = \"b\"\nmatch x { \"a\" -> { log(1) } \"b\" -> { log(2) } } }",
1688        );
1689        assert_eq!(out, "[harn] 2\n");
1690    }
1691
1692    #[test]
1693    fn test_json_roundtrip() {
1694        let out = run_vm("pipeline default(task) { let s = json_stringify({a: 1})\nlog(s) }");
1695        assert!(out.contains("\"a\""));
1696        assert!(out.contains("1"));
1697    }
1698
1699    #[test]
1700    fn test_type_of() {
1701        let out = run_vm("pipeline default(task) { log(type_of(42))\nlog(type_of(\"hi\")) }");
1702        assert_eq!(out, "[harn] int\n[harn] string\n");
1703    }
1704
1705    #[test]
1706    fn test_stack_overflow() {
1707        let err = run_vm_err("pipeline default(task) { fn f() { f() }\nf() }");
1708        assert!(
1709            err.contains("stack") || err.contains("overflow") || err.contains("recursion"),
1710            "Expected stack overflow error, got: {}",
1711            err
1712        );
1713    }
1714
1715    #[test]
1716    fn test_division_by_zero() {
1717        let err = run_vm_err("pipeline default(task) { log(1 / 0) }");
1718        assert!(
1719            err.contains("Division by zero") || err.contains("division"),
1720            "Expected division by zero error, got: {}",
1721            err
1722        );
1723    }
1724
1725    #[test]
1726    fn test_float_division_by_zero_uses_ieee_values() {
1727        let out = run_vm(
1728            "pipeline default(task) { log(is_nan(0.0 / 0.0))\nlog(is_infinite(1.0 / 0.0))\nlog(is_infinite(-1.0 / 0.0)) }",
1729        );
1730        assert_eq!(out, "[harn] true\n[harn] true\n[harn] true\n");
1731    }
1732
1733    #[test]
1734    fn test_reusing_catch_binding_name_in_same_block() {
1735        let out = run_vm(
1736            r#"pipeline default(task) {
1737try {
1738    throw "a"
1739} catch e {
1740    log(e)
1741}
1742try {
1743    throw "b"
1744} catch e {
1745    log(e)
1746}
1747}"#,
1748        );
1749        assert_eq!(out, "[harn] a\n[harn] b\n");
1750    }
1751
1752    #[test]
1753    fn test_try_catch_nested() {
1754        let out = run_output(
1755            r#"pipeline t(task) {
1756try {
1757    try {
1758        throw "inner"
1759    } catch(e) {
1760        log("inner caught: " + e)
1761        throw "outer"
1762    }
1763} catch(e2) {
1764    log("outer caught: " + e2)
1765}
1766}"#,
1767        );
1768        assert_eq!(
1769            out,
1770            "[harn] inner caught: inner\n[harn] outer caught: outer"
1771        );
1772    }
1773
1774    // --- Concurrency tests ---
1775
1776    #[test]
1777    fn test_parallel_basic() {
1778        let out = run_output(
1779            "pipeline t(task) { let results = parallel(3) { i -> i * 10 }\nlog(results) }",
1780        );
1781        assert_eq!(out, "[harn] [0, 10, 20]");
1782    }
1783
1784    #[test]
1785    fn test_parallel_no_variable() {
1786        let out = run_output("pipeline t(task) { let results = parallel(3) { 42 }\nlog(results) }");
1787        assert_eq!(out, "[harn] [42, 42, 42]");
1788    }
1789
1790    #[test]
1791    fn test_parallel_map_basic() {
1792        let out = run_output(
1793            "pipeline t(task) { let results = parallel_map([1, 2, 3]) { x -> x * x }\nlog(results) }",
1794        );
1795        assert_eq!(out, "[harn] [1, 4, 9]");
1796    }
1797
1798    #[test]
1799    fn test_spawn_await() {
1800        let out = run_output(
1801            r#"pipeline t(task) {
1802let handle = spawn { log("spawned") }
1803let result = await(handle)
1804log("done")
1805}"#,
1806        );
1807        assert_eq!(out, "[harn] spawned\n[harn] done");
1808    }
1809
1810    #[test]
1811    fn test_spawn_cancel() {
1812        let out = run_output(
1813            r#"pipeline t(task) {
1814let handle = spawn { log("should be cancelled") }
1815cancel(handle)
1816log("cancelled")
1817}"#,
1818        );
1819        assert_eq!(out, "[harn] cancelled");
1820    }
1821
1822    #[test]
1823    fn test_spawn_returns_value() {
1824        let out = run_output("pipeline t(task) { let h = spawn { 42 }\nlet r = await(h)\nlog(r) }");
1825        assert_eq!(out, "[harn] 42");
1826    }
1827
1828    // --- Deadline tests ---
1829
1830    #[test]
1831    fn test_deadline_success() {
1832        let out = run_output(
1833            r#"pipeline t(task) {
1834let result = deadline 5s { log("within deadline")
183542 }
1836log(result)
1837}"#,
1838        );
1839        assert_eq!(out, "[harn] within deadline\n[harn] 42");
1840    }
1841
1842    #[test]
1843    fn test_deadline_exceeded() {
1844        let result = run_harn_result(
1845            r#"pipeline t(task) {
1846deadline 1ms {
1847  var i = 0
1848  while i < 1000000 { i = i + 1 }
1849}
1850}"#,
1851        );
1852        assert!(result.is_err());
1853    }
1854
1855    #[test]
1856    fn test_deadline_caught_by_try() {
1857        let out = run_output(
1858            r#"pipeline t(task) {
1859try {
1860  deadline 1ms {
1861    var i = 0
1862    while i < 1000000 { i = i + 1 }
1863  }
1864} catch(e) {
1865  log("caught")
1866}
1867}"#,
1868        );
1869        assert_eq!(out, "[harn] caught");
1870    }
1871
1872    /// Helper that runs Harn source with a set of denied builtins.
1873    fn run_harn_with_denied(
1874        source: &str,
1875        denied: HashSet<String>,
1876    ) -> Result<(String, VmValue), VmError> {
1877        let rt = tokio::runtime::Builder::new_current_thread()
1878            .enable_all()
1879            .build()
1880            .unwrap();
1881        rt.block_on(async {
1882            let local = tokio::task::LocalSet::new();
1883            local
1884                .run_until(async {
1885                    let mut lexer = Lexer::new(source);
1886                    let tokens = lexer.tokenize().unwrap();
1887                    let mut parser = Parser::new(tokens);
1888                    let program = parser.parse().unwrap();
1889                    let chunk = Compiler::new().compile(&program).unwrap();
1890
1891                    let mut vm = Vm::new();
1892                    register_vm_stdlib(&mut vm);
1893                    vm.set_denied_builtins(denied);
1894                    let result = vm.execute(&chunk).await?;
1895                    Ok((vm.output().to_string(), result))
1896                })
1897                .await
1898        })
1899    }
1900
1901    #[test]
1902    fn test_sandbox_deny_builtin() {
1903        let denied: HashSet<String> = ["push".to_string()].into_iter().collect();
1904        let result = run_harn_with_denied(
1905            r#"pipeline t(task) {
1906let xs = [1, 2]
1907push(xs, 3)
1908}"#,
1909            denied,
1910        );
1911        let err = result.unwrap_err();
1912        let msg = format!("{err}");
1913        assert!(
1914            msg.contains("not permitted"),
1915            "expected not permitted, got: {msg}"
1916        );
1917        assert!(
1918            msg.contains("push"),
1919            "expected builtin name in error, got: {msg}"
1920        );
1921    }
1922
1923    #[test]
1924    fn test_sandbox_allowed_builtin_works() {
1925        // Denying "push" should not block "log"
1926        let denied: HashSet<String> = ["push".to_string()].into_iter().collect();
1927        let result = run_harn_with_denied(r#"pipeline t(task) { log("hello") }"#, denied);
1928        let (output, _) = result.unwrap();
1929        assert_eq!(output.trim(), "[harn] hello");
1930    }
1931
1932    #[test]
1933    fn test_sandbox_empty_denied_set() {
1934        // With an empty denied set, everything should work.
1935        let result = run_harn_with_denied(r#"pipeline t(task) { log("ok") }"#, HashSet::new());
1936        let (output, _) = result.unwrap();
1937        assert_eq!(output.trim(), "[harn] ok");
1938    }
1939
1940    #[test]
1941    fn test_sandbox_propagates_to_spawn() {
1942        // Denied builtins should propagate to spawned VMs.
1943        let denied: HashSet<String> = ["push".to_string()].into_iter().collect();
1944        let result = run_harn_with_denied(
1945            r#"pipeline t(task) {
1946let handle = spawn {
1947  let xs = [1, 2]
1948  push(xs, 3)
1949}
1950await(handle)
1951}"#,
1952            denied,
1953        );
1954        let err = result.unwrap_err();
1955        let msg = format!("{err}");
1956        assert!(
1957            msg.contains("not permitted"),
1958            "expected not permitted in spawned VM, got: {msg}"
1959        );
1960    }
1961
1962    #[test]
1963    fn test_sandbox_propagates_to_parallel() {
1964        // Denied builtins should propagate to parallel VMs.
1965        let denied: HashSet<String> = ["push".to_string()].into_iter().collect();
1966        let result = run_harn_with_denied(
1967            r#"pipeline t(task) {
1968let results = parallel(2) { i ->
1969  let xs = [1, 2]
1970  push(xs, 3)
1971}
1972}"#,
1973            denied,
1974        );
1975        let err = result.unwrap_err();
1976        let msg = format!("{err}");
1977        assert!(
1978            msg.contains("not permitted"),
1979            "expected not permitted in parallel VM, got: {msg}"
1980        );
1981    }
1982
1983    #[test]
1984    fn test_if_else_has_lexical_block_scope() {
1985        let out = run_output(
1986            r#"pipeline t(task) {
1987let x = "outer"
1988if true {
1989  let x = "inner"
1990  log(x)
1991} else {
1992  let x = "other"
1993  log(x)
1994}
1995log(x)
1996}"#,
1997        );
1998        assert_eq!(out, "[harn] inner\n[harn] outer");
1999    }
2000
2001    #[test]
2002    fn test_loop_and_catch_bindings_are_block_scoped() {
2003        let out = run_output(
2004            r#"pipeline t(task) {
2005let label = "outer"
2006for item in [1, 2] {
2007  let label = "loop ${item}"
2008  log(label)
2009}
2010try {
2011  throw("boom")
2012} catch (label) {
2013  log(label)
2014}
2015log(label)
2016}"#,
2017        );
2018        assert_eq!(
2019            out,
2020            "[harn] loop 1\n[harn] loop 2\n[harn] boom\n[harn] outer"
2021        );
2022    }
2023}