harn_vm/
vm.rs

1mod format;
2mod methods;
3mod ops;
4
5use std::cell::RefCell;
6use std::collections::{BTreeMap, HashSet};
7use std::future::Future;
8use std::pin::Pin;
9use std::rc::Rc;
10use std::time::Instant;
11
12use crate::chunk::{Chunk, CompiledFunction, Constant};
13use crate::value::{
14    ErrorCategory, VmAsyncBuiltinFn, VmBuiltinFn, VmClosure, VmEnv, VmError, VmTaskHandle, VmValue,
15};
16
17thread_local! {
18    static CURRENT_ASYNC_BUILTIN_CHILD_VM: RefCell<Vec<Vm>> = const { RefCell::new(Vec::new()) };
19}
20
21/// RAII guard that starts a tracing span on creation and ends it on drop.
22struct ScopeSpan(u64);
23
24impl ScopeSpan {
25    fn new(kind: crate::tracing::SpanKind, name: String) -> Self {
26        Self(crate::tracing::span_start(kind, name))
27    }
28}
29
30impl Drop for ScopeSpan {
31    fn drop(&mut self) {
32        crate::tracing::span_end(self.0);
33    }
34}
35
36/// Call frame for function execution.
37pub(crate) struct CallFrame {
38    pub(crate) chunk: Chunk,
39    pub(crate) ip: usize,
40    pub(crate) stack_base: usize,
41    pub(crate) saved_env: VmEnv,
42    /// Function name for stack traces (empty for top-level pipeline).
43    pub(crate) fn_name: String,
44    /// Number of arguments actually passed by the caller (for default arg support).
45    pub(crate) argc: usize,
46    /// Saved VM_SOURCE_DIR to restore when this frame is popped.
47    /// Set when entering a closure that originated from an imported module.
48    pub(crate) saved_source_dir: Option<std::path::PathBuf>,
49}
50
51/// Exception handler for try/catch.
52pub(crate) struct ExceptionHandler {
53    pub(crate) catch_ip: usize,
54    pub(crate) stack_depth: usize,
55    pub(crate) frame_depth: usize,
56    /// If non-empty, this catch only handles errors whose enum_name matches.
57    pub(crate) error_type: String,
58}
59
60/// Debug action returned by the debug hook.
61#[derive(Debug, Clone, PartialEq)]
62pub enum DebugAction {
63    /// Continue execution normally.
64    Continue,
65    /// Stop (breakpoint hit, step complete).
66    Stop,
67}
68
69/// Information about current execution state for the debugger.
70#[derive(Debug, Clone)]
71pub struct DebugState {
72    pub line: usize,
73    pub variables: BTreeMap<String, VmValue>,
74    pub frame_name: String,
75    pub frame_depth: usize,
76}
77
78/// Iterator state for for-in loops: either a pre-collected vec, an async channel, or a generator.
79pub(crate) enum IterState {
80    Vec {
81        items: Vec<VmValue>,
82        idx: usize,
83    },
84    Channel {
85        receiver: std::sync::Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
86        closed: std::sync::Arc<std::sync::atomic::AtomicBool>,
87    },
88    Generator {
89        gen: crate::value::VmGenerator,
90    },
91}
92
93/// The Harn bytecode virtual machine.
94pub struct Vm {
95    pub(crate) stack: Vec<VmValue>,
96    pub(crate) env: VmEnv,
97    pub(crate) output: String,
98    pub(crate) builtins: BTreeMap<String, VmBuiltinFn>,
99    pub(crate) async_builtins: BTreeMap<String, VmAsyncBuiltinFn>,
100    /// Iterator state for for-in loops.
101    pub(crate) iterators: Vec<IterState>,
102    /// Call frame stack.
103    pub(crate) frames: Vec<CallFrame>,
104    /// Exception handler stack.
105    pub(crate) exception_handlers: Vec<ExceptionHandler>,
106    /// Spawned async task handles.
107    pub(crate) spawned_tasks: BTreeMap<String, VmTaskHandle>,
108    /// Counter for generating unique task IDs.
109    pub(crate) task_counter: u64,
110    /// Active deadline stack: (deadline_instant, frame_depth).
111    pub(crate) deadlines: Vec<(Instant, usize)>,
112    /// Breakpoints (source line numbers).
113    pub(crate) breakpoints: Vec<usize>,
114    /// Whether the VM is in step mode.
115    pub(crate) step_mode: bool,
116    /// The frame depth at which stepping started (for step-over).
117    pub(crate) step_frame_depth: usize,
118    /// Whether the VM is currently stopped at a debug point.
119    pub(crate) stopped: bool,
120    /// Last source line executed (to detect line changes).
121    pub(crate) last_line: usize,
122    /// Source directory for resolving imports.
123    pub(crate) source_dir: Option<std::path::PathBuf>,
124    /// Already-imported file paths (cycle prevention).
125    pub(crate) imported_paths: Vec<std::path::PathBuf>,
126    /// Source file path for error reporting.
127    pub(crate) source_file: Option<String>,
128    /// Source text for error reporting.
129    pub(crate) source_text: Option<String>,
130    /// Optional bridge for delegating unknown builtins in bridge mode.
131    pub(crate) bridge: Option<Rc<crate::bridge::HostBridge>>,
132    /// Builtins denied by sandbox mode (`--deny` / `--allow` flags).
133    pub(crate) denied_builtins: HashSet<String>,
134    /// Cancellation token for cooperative graceful shutdown (set by parent).
135    pub(crate) cancel_token: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
136    /// Captured stack trace from the most recent error (fn_name, line, col).
137    pub(crate) error_stack_trace: Vec<(String, usize, usize)>,
138    /// Yield channel sender for generator execution. When set, `Op::Yield`
139    /// sends values through this channel instead of being a no-op.
140    pub(crate) yield_sender: Option<tokio::sync::mpsc::Sender<VmValue>>,
141    /// Project root directory (detected via harn.toml).
142    /// Used as base directory for metadata, store, and checkpoint operations.
143    pub(crate) project_root: Option<std::path::PathBuf>,
144    /// Global constants (e.g. `pi`, `e`). Checked as a fallback in `GetVar`
145    /// after the environment, so user-defined variables can shadow them.
146    pub(crate) globals: BTreeMap<String, VmValue>,
147}
148
149impl Vm {
150    pub fn new() -> Self {
151        Self {
152            stack: Vec::with_capacity(256),
153            env: VmEnv::new(),
154            output: String::new(),
155            builtins: BTreeMap::new(),
156            async_builtins: BTreeMap::new(),
157            iterators: Vec::new(),
158            frames: Vec::new(),
159            exception_handlers: Vec::new(),
160            spawned_tasks: BTreeMap::new(),
161            task_counter: 0,
162            deadlines: Vec::new(),
163            breakpoints: Vec::new(),
164            step_mode: false,
165            step_frame_depth: 0,
166            stopped: false,
167            last_line: 0,
168            source_dir: None,
169            imported_paths: Vec::new(),
170            source_file: None,
171            source_text: None,
172            bridge: None,
173            denied_builtins: HashSet::new(),
174            cancel_token: None,
175            error_stack_trace: Vec::new(),
176            yield_sender: None,
177            project_root: None,
178            globals: BTreeMap::new(),
179        }
180    }
181
182    /// Set the bridge for delegating unknown builtins in bridge mode.
183    pub fn set_bridge(&mut self, bridge: Rc<crate::bridge::HostBridge>) {
184        self.bridge = Some(bridge);
185    }
186
187    /// Set builtins that are denied in sandbox mode.
188    /// When called, the given builtin names will produce a permission error.
189    pub fn set_denied_builtins(&mut self, denied: HashSet<String>) {
190        self.denied_builtins = denied;
191    }
192
193    /// Set source info for error reporting (file path and source text).
194    pub fn set_source_info(&mut self, file: &str, text: &str) {
195        self.source_file = Some(file.to_string());
196        self.source_text = Some(text.to_string());
197    }
198
199    /// Set breakpoints by source line number.
200    pub fn set_breakpoints(&mut self, lines: Vec<usize>) {
201        self.breakpoints = lines;
202    }
203
204    /// Enable step mode (stop at next line).
205    pub fn set_step_mode(&mut self, step: bool) {
206        self.step_mode = step;
207        self.step_frame_depth = self.frames.len();
208    }
209
210    /// Enable step-over mode (stop at next line at same or lower frame depth).
211    pub fn set_step_over(&mut self) {
212        self.step_mode = true;
213        self.step_frame_depth = self.frames.len();
214    }
215
216    /// Enable step-out mode (stop when returning from current frame).
217    pub fn set_step_out(&mut self) {
218        self.step_mode = true;
219        self.step_frame_depth = self.frames.len().saturating_sub(1);
220    }
221
222    /// Check if the VM is stopped at a debug point.
223    pub fn is_stopped(&self) -> bool {
224        self.stopped
225    }
226
227    /// Get the current debug state (variables, line, etc.).
228    pub fn debug_state(&self) -> DebugState {
229        let line = self.current_line();
230        let variables = self.env.all_variables();
231        let frame_name = if self.frames.len() > 1 {
232            format!("frame_{}", self.frames.len() - 1)
233        } else {
234            "pipeline".to_string()
235        };
236        DebugState {
237            line,
238            variables,
239            frame_name,
240            frame_depth: self.frames.len(),
241        }
242    }
243
244    /// Get all stack frames for the debugger.
245    pub fn debug_stack_frames(&self) -> Vec<(String, usize)> {
246        let mut frames = Vec::new();
247        for (i, frame) in self.frames.iter().enumerate() {
248            let line = if frame.ip > 0 && frame.ip - 1 < frame.chunk.lines.len() {
249                frame.chunk.lines[frame.ip - 1] as usize
250            } else {
251                0
252            };
253            let name = if frame.fn_name.is_empty() {
254                if i == 0 {
255                    "pipeline".to_string()
256                } else {
257                    format!("fn_{}", i)
258                }
259            } else {
260                frame.fn_name.clone()
261            };
262            frames.push((name, line));
263        }
264        frames
265    }
266
267    /// Get the current source line.
268    fn current_line(&self) -> usize {
269        if let Some(frame) = self.frames.last() {
270            let ip = if frame.ip > 0 { frame.ip - 1 } else { 0 };
271            if ip < frame.chunk.lines.len() {
272                return frame.chunk.lines[ip] as usize;
273            }
274        }
275        0
276    }
277
278    /// Execute one instruction, returning whether to stop (breakpoint/step).
279    /// Returns Ok(None) to continue, Ok(Some(val)) on program end, Err on error.
280    pub async fn step_execute(&mut self) -> Result<Option<(VmValue, bool)>, VmError> {
281        // Check if we need to stop at this line
282        let current_line = self.current_line();
283        let line_changed = current_line != self.last_line && current_line > 0;
284
285        if line_changed {
286            self.last_line = current_line;
287
288            // Check breakpoints
289            if self.breakpoints.contains(&current_line) {
290                self.stopped = true;
291                return Ok(Some((VmValue::Nil, true))); // true = stopped
292            }
293
294            // Check step mode
295            if self.step_mode && self.frames.len() <= self.step_frame_depth + 1 {
296                self.step_mode = false;
297                self.stopped = true;
298                return Ok(Some((VmValue::Nil, true))); // true = stopped
299            }
300        }
301
302        // Execute one instruction cycle
303        self.stopped = false;
304        self.execute_one_cycle().await
305    }
306
307    /// Execute a single instruction cycle.
308    async fn execute_one_cycle(&mut self) -> Result<Option<(VmValue, bool)>, VmError> {
309        // Check deadline
310        if let Some(&(deadline, _)) = self.deadlines.last() {
311            if Instant::now() > deadline {
312                self.deadlines.pop();
313                let err = VmError::Thrown(VmValue::String(Rc::from("Deadline exceeded")));
314                match self.handle_error(err) {
315                    Ok(None) => return Ok(None),
316                    Ok(Some(val)) => return Ok(Some((val, false))),
317                    Err(e) => return Err(e),
318                }
319            }
320        }
321
322        // Get current frame
323        let frame = match self.frames.last_mut() {
324            Some(f) => f,
325            None => {
326                let val = self.stack.pop().unwrap_or(VmValue::Nil);
327                return Ok(Some((val, false)));
328            }
329        };
330
331        // Check if we've reached end of chunk
332        if frame.ip >= frame.chunk.code.len() {
333            let val = self.stack.pop().unwrap_or(VmValue::Nil);
334            let popped_frame = self.frames.pop().unwrap();
335            if self.frames.is_empty() {
336                return Ok(Some((val, false)));
337            } else {
338                self.env = popped_frame.saved_env;
339                self.stack.truncate(popped_frame.stack_base);
340                self.stack.push(val);
341                return Ok(None);
342            }
343        }
344
345        let op = frame.chunk.code[frame.ip];
346        frame.ip += 1;
347
348        match self.execute_op(op).await {
349            Ok(Some(val)) => Ok(Some((val, false))),
350            Ok(None) => Ok(None),
351            Err(VmError::Return(val)) => {
352                if let Some(popped_frame) = self.frames.pop() {
353                    if let Some(ref dir) = popped_frame.saved_source_dir {
354                        crate::stdlib::set_thread_source_dir(dir);
355                    }
356                    let current_depth = self.frames.len();
357                    self.exception_handlers
358                        .retain(|h| h.frame_depth <= current_depth);
359                    if self.frames.is_empty() {
360                        return Ok(Some((val, false)));
361                    }
362                    self.env = popped_frame.saved_env;
363                    self.stack.truncate(popped_frame.stack_base);
364                    self.stack.push(val);
365                    Ok(None)
366                } else {
367                    Ok(Some((val, false)))
368                }
369            }
370            Err(e) => {
371                if self.error_stack_trace.is_empty() {
372                    self.error_stack_trace = self.capture_stack_trace();
373                }
374                match self.handle_error(e) {
375                    Ok(None) => {
376                        self.error_stack_trace.clear();
377                        Ok(None)
378                    }
379                    Ok(Some(val)) => Ok(Some((val, false))),
380                    Err(e) => Err(self.enrich_error_with_line(e)),
381                }
382            }
383        }
384    }
385
386    /// Initialize execution (push the initial frame).
387    pub fn start(&mut self, chunk: &Chunk) {
388        self.frames.push(CallFrame {
389            chunk: chunk.clone(),
390            ip: 0,
391            stack_base: self.stack.len(),
392            saved_env: self.env.clone(),
393            fn_name: String::new(),
394            argc: 0,
395            saved_source_dir: None,
396        });
397    }
398
399    /// Register a sync builtin function.
400    pub fn register_builtin<F>(&mut self, name: &str, f: F)
401    where
402        F: Fn(&[VmValue], &mut String) -> Result<VmValue, VmError> + 'static,
403    {
404        self.builtins.insert(name.to_string(), Rc::new(f));
405    }
406
407    /// Remove a sync builtin (so an async version can take precedence).
408    pub fn unregister_builtin(&mut self, name: &str) {
409        self.builtins.remove(name);
410    }
411
412    /// Register an async builtin function.
413    pub fn register_async_builtin<F, Fut>(&mut self, name: &str, f: F)
414    where
415        F: Fn(Vec<VmValue>) -> Fut + 'static,
416        Fut: Future<Output = Result<VmValue, VmError>> + 'static,
417    {
418        self.async_builtins
419            .insert(name.to_string(), Rc::new(move |args| Box::pin(f(args))));
420    }
421
422    /// Create a child VM that shares builtins and env but has fresh execution state.
423    /// Used for parallel/spawn to fork the VM for concurrent tasks.
424    fn child_vm(&self) -> Vm {
425        Vm {
426            stack: Vec::with_capacity(64),
427            env: self.env.clone(),
428            output: String::new(),
429            builtins: self.builtins.clone(),
430            async_builtins: self.async_builtins.clone(),
431            iterators: Vec::new(),
432            frames: Vec::new(),
433            exception_handlers: Vec::new(),
434            spawned_tasks: BTreeMap::new(),
435            task_counter: 0,
436            deadlines: self.deadlines.clone(),
437            breakpoints: Vec::new(),
438            step_mode: false,
439            step_frame_depth: 0,
440            stopped: false,
441            last_line: 0,
442            source_dir: self.source_dir.clone(),
443            imported_paths: Vec::new(),
444            source_file: self.source_file.clone(),
445            source_text: self.source_text.clone(),
446            bridge: self.bridge.clone(),
447            denied_builtins: self.denied_builtins.clone(),
448            cancel_token: None,
449            error_stack_trace: Vec::new(),
450            yield_sender: None,
451            project_root: self.project_root.clone(),
452            globals: self.globals.clone(),
453        }
454    }
455
456    /// Set the source directory for import resolution and introspection.
457    /// Also auto-detects the project root if not already set.
458    pub fn set_source_dir(&mut self, dir: &std::path::Path) {
459        self.source_dir = Some(dir.to_path_buf());
460        crate::stdlib::set_thread_source_dir(dir);
461        // Auto-detect project root if not explicitly set.
462        if self.project_root.is_none() {
463            self.project_root = crate::stdlib::process::find_project_root(dir);
464        }
465    }
466
467    /// Explicitly set the project root directory.
468    /// Used by ACP/CLI to override auto-detection.
469    pub fn set_project_root(&mut self, root: &std::path::Path) {
470        self.project_root = Some(root.to_path_buf());
471    }
472
473    /// Get the project root directory, falling back to source_dir.
474    pub fn project_root(&self) -> Option<&std::path::Path> {
475        self.project_root.as_deref().or(self.source_dir.as_deref())
476    }
477
478    /// Return all registered builtin names (sync + async).
479    pub fn builtin_names(&self) -> Vec<String> {
480        let mut names: Vec<String> = self.builtins.keys().cloned().collect();
481        names.extend(self.async_builtins.keys().cloned());
482        names
483    }
484
485    /// Set a global constant (e.g. `pi`, `e`).
486    /// Stored separately from the environment so user-defined variables can shadow them.
487    pub fn set_global(&mut self, name: &str, value: VmValue) {
488        self.globals.insert(name.to_string(), value);
489    }
490
491    /// Execute an import, reading and running the file's declarations.
492    fn execute_import<'a>(
493        &'a mut self,
494        path: &'a str,
495        selected_names: Option<&'a [String]>,
496    ) -> Pin<Box<dyn Future<Output = Result<(), VmError>> + 'a>> {
497        Box::pin(async move {
498            use std::path::PathBuf;
499            let _import_span = ScopeSpan::new(crate::tracing::SpanKind::Import, path.to_string());
500
501            // ── Embedded stdlib modules (import "std/...") ──────────────
502            if let Some(module) = path.strip_prefix("std/") {
503                if let Some(source) = crate::stdlib_modules::get_stdlib_source(module) {
504                    let synthetic = PathBuf::from(format!("<stdlib>/{module}.harn"));
505                    if self.imported_paths.contains(&synthetic) {
506                        return Ok(());
507                    }
508                    self.imported_paths.push(synthetic);
509
510                    let mut lexer = harn_lexer::Lexer::new(source);
511                    let tokens = lexer.tokenize().map_err(|e| {
512                        VmError::Runtime(format!("stdlib lex error in std/{module}: {e}"))
513                    })?;
514                    let mut parser = harn_parser::Parser::new(tokens);
515                    let program = parser.parse().map_err(|e| {
516                        VmError::Runtime(format!("stdlib parse error in std/{module}: {e}"))
517                    })?;
518
519                    self.import_declarations(&program, selected_names, None)
520                        .await?;
521                    return Ok(());
522                }
523                return Err(VmError::Runtime(format!(
524                    "Unknown stdlib module: std/{module}"
525                )));
526            }
527
528            // ── Filesystem-based imports ────────────────────────────────
529            let base = self
530                .source_dir
531                .clone()
532                .unwrap_or_else(|| PathBuf::from("."));
533            let mut file_path = base.join(path);
534
535            // Try with .harn extension if no extension
536            if !file_path.exists() && file_path.extension().is_none() {
537                file_path.set_extension("harn");
538            }
539
540            // Try .harn/packages/ fallback (then .burin/packages/ for compat)
541            if !file_path.exists() {
542                for pkg_dir in [".harn/packages", ".burin/packages"] {
543                    let pkg_path = base.join(pkg_dir).join(path);
544                    if pkg_path.exists() {
545                        file_path = if pkg_path.is_dir() {
546                            let lib = pkg_path.join("lib.harn");
547                            if lib.exists() {
548                                lib
549                            } else {
550                                pkg_path
551                            }
552                        } else {
553                            pkg_path
554                        };
555                        break;
556                    }
557                    let mut pkg_harn = pkg_path.clone();
558                    pkg_harn.set_extension("harn");
559                    if pkg_harn.exists() {
560                        file_path = pkg_harn;
561                        break;
562                    }
563                }
564            }
565
566            // Cycle detection
567            let canonical = file_path
568                .canonicalize()
569                .unwrap_or_else(|_| file_path.clone());
570            if self.imported_paths.contains(&canonical) {
571                return Ok(()); // already imported
572            }
573            self.imported_paths.push(canonical);
574
575            // Read, lex, parse
576            let source = std::fs::read_to_string(&file_path).map_err(|e| {
577                VmError::Runtime(format!(
578                    "Import error: cannot read '{}': {e}",
579                    file_path.display()
580                ))
581            })?;
582
583            let mut lexer = harn_lexer::Lexer::new(&source);
584            let tokens = lexer
585                .tokenize()
586                .map_err(|e| VmError::Runtime(format!("Import lex error: {e}")))?;
587            let mut parser = harn_parser::Parser::new(tokens);
588            let program = parser
589                .parse()
590                .map_err(|e| VmError::Runtime(format!("Import parse error: {e}")))?;
591
592            self.import_declarations(&program, selected_names, Some(&file_path))
593                .await?;
594
595            Ok(())
596        })
597    }
598
599    /// Process top-level declarations from an imported module.
600    /// `file_path` is `None` for embedded stdlib modules.
601    fn import_declarations<'a>(
602        &'a mut self,
603        program: &'a [harn_parser::SNode],
604        selected_names: Option<&'a [String]>,
605        file_path: Option<&'a std::path::Path>,
606    ) -> Pin<Box<dyn Future<Output = Result<(), VmError>> + 'a>> {
607        Box::pin(async move {
608            let has_pub = program
609                .iter()
610                .any(|n| matches!(&n.node, harn_parser::Node::FnDecl { is_pub: true, .. }));
611
612            for node in program {
613                match &node.node {
614                    harn_parser::Node::FnDecl {
615                        name,
616                        params,
617                        body,
618                        is_pub,
619                        ..
620                    } => {
621                        // For selective imports: import any function that was explicitly named
622                        // For wildcard imports: if module has pub fns, only import pub ones;
623                        //   if no pub fns, import everything (backward compat)
624                        if selected_names.is_none() && has_pub && !is_pub {
625                            continue;
626                        }
627                        if let Some(names) = selected_names {
628                            if !names.contains(name) {
629                                continue;
630                            }
631                        }
632                        // Check for import collision before compiling
633                        if let Some(VmValue::Closure(_)) = self.env.get(name) {
634                            let module = file_path
635                                .map(|p| p.display().to_string())
636                                .unwrap_or_else(|| "<stdlib>".to_string());
637                            return Err(VmError::Runtime(format!(
638                                "Import collision: '{name}' is already defined when importing {module}. \
639                                 Use selective imports to disambiguate: import {{ {name} }} from \"...\""
640                            )));
641                        }
642                        // Compile the function body into a closure and define it
643                        let mut compiler = crate::Compiler::new();
644                        let func_chunk = compiler
645                            .compile_fn_body(params, body)
646                            .map_err(|e| VmError::Runtime(format!("Import compile error: {e}")))?;
647                        let closure = VmClosure {
648                            func: func_chunk,
649                            env: self.env.clone(),
650                            source_dir: file_path
651                                .and_then(|fp| fp.parent().map(|p| p.to_path_buf())),
652                        };
653                        self.env
654                            .define(name, VmValue::Closure(Rc::new(closure)), false)?;
655                    }
656                    harn_parser::Node::ImportDecl { path: sub_path } => {
657                        let old_dir = self.source_dir.clone();
658                        if let Some(fp) = file_path {
659                            if let Some(parent) = fp.parent() {
660                                self.source_dir = Some(parent.to_path_buf());
661                            }
662                        }
663                        self.execute_import(sub_path, None).await?;
664                        self.source_dir = old_dir;
665                    }
666                    harn_parser::Node::SelectiveImport {
667                        names,
668                        path: sub_path,
669                    } => {
670                        let old_dir = self.source_dir.clone();
671                        if let Some(fp) = file_path {
672                            if let Some(parent) = fp.parent() {
673                                self.source_dir = Some(parent.to_path_buf());
674                            }
675                        }
676                        self.execute_import(sub_path, Some(names)).await?;
677                        self.source_dir = old_dir;
678                    }
679                    _ => {} // Skip other top-level nodes (pipelines, enums, etc.)
680                }
681            }
682
683            Ok(())
684        })
685    }
686
687    /// Get the captured output.
688    pub fn output(&self) -> &str {
689        &self.output
690    }
691
692    /// Execute a compiled chunk.
693    pub async fn execute(&mut self, chunk: &Chunk) -> Result<VmValue, VmError> {
694        let span_id = crate::tracing::span_start(crate::tracing::SpanKind::Pipeline, "main".into());
695        let result = self.run_chunk(chunk).await;
696        crate::tracing::span_end(span_id);
697        result
698    }
699
700    /// Convert a VmError into either a handled exception (returning Ok) or a propagated error.
701    fn handle_error(&mut self, error: VmError) -> Result<Option<VmValue>, VmError> {
702        // Extract the thrown value from the error
703        let thrown_value = match &error {
704            VmError::Thrown(v) => v.clone(),
705            other => VmValue::String(Rc::from(other.to_string())),
706        };
707
708        if let Some(handler) = self.exception_handlers.pop() {
709            // Check if this is a typed catch that doesn't match the thrown value
710            if !handler.error_type.is_empty() {
711                let matches = match &thrown_value {
712                    VmValue::EnumVariant { enum_name, .. } => *enum_name == handler.error_type,
713                    _ => false,
714                };
715                if !matches {
716                    // This handler doesn't match — try the next one
717                    return self.handle_error(error);
718                }
719            }
720
721            // Unwind call frames back to the handler's frame depth
722            while self.frames.len() > handler.frame_depth {
723                if let Some(frame) = self.frames.pop() {
724                    if let Some(ref dir) = frame.saved_source_dir {
725                        crate::stdlib::set_thread_source_dir(dir);
726                    }
727                    self.env = frame.saved_env;
728                }
729            }
730
731            // Clean up deadlines from unwound frames
732            while self
733                .deadlines
734                .last()
735                .is_some_and(|d| d.1 > handler.frame_depth)
736            {
737                self.deadlines.pop();
738            }
739
740            // Restore stack to handler's depth
741            self.stack.truncate(handler.stack_depth);
742
743            // Push the error value onto the stack (catch body can access it)
744            self.stack.push(thrown_value);
745
746            // Set the IP in the current frame to the catch handler
747            if let Some(frame) = self.frames.last_mut() {
748                frame.ip = handler.catch_ip;
749            }
750
751            Ok(None) // Continue execution
752        } else {
753            Err(error) // No handler, propagate
754        }
755    }
756
757    async fn run_chunk(&mut self, chunk: &Chunk) -> Result<VmValue, VmError> {
758        self.run_chunk_with_argc(chunk, 0).await
759    }
760
761    async fn run_chunk_with_argc(
762        &mut self,
763        chunk: &Chunk,
764        argc: usize,
765    ) -> Result<VmValue, VmError> {
766        self.frames.push(CallFrame {
767            chunk: chunk.clone(),
768            ip: 0,
769            stack_base: self.stack.len(),
770            saved_env: self.env.clone(),
771            fn_name: String::new(),
772            argc,
773            saved_source_dir: None,
774        });
775
776        loop {
777            // Check deadline before each instruction
778            if let Some(&(deadline, _)) = self.deadlines.last() {
779                if Instant::now() > deadline {
780                    self.deadlines.pop();
781                    let err = VmError::Thrown(VmValue::String(Rc::from("Deadline exceeded")));
782                    match self.handle_error(err) {
783                        Ok(None) => continue,
784                        Ok(Some(val)) => return Ok(val),
785                        Err(e) => return Err(e),
786                    }
787                }
788            }
789
790            // Get current frame
791            let frame = match self.frames.last_mut() {
792                Some(f) => f,
793                None => return Ok(self.stack.pop().unwrap_or(VmValue::Nil)),
794            };
795
796            // Check if we've reached end of chunk
797            if frame.ip >= frame.chunk.code.len() {
798                let val = self.stack.pop().unwrap_or(VmValue::Nil);
799                let popped_frame = self.frames.pop().unwrap();
800
801                if self.frames.is_empty() {
802                    // We're done with the top-level chunk
803                    return Ok(val);
804                } else {
805                    // Returning from a function call
806                    self.env = popped_frame.saved_env;
807                    self.stack.truncate(popped_frame.stack_base);
808                    self.stack.push(val);
809                    continue;
810                }
811            }
812
813            let op = frame.chunk.code[frame.ip];
814            frame.ip += 1;
815
816            match self.execute_op(op).await {
817                Ok(Some(val)) => return Ok(val),
818                Ok(None) => continue,
819                Err(VmError::Return(val)) => {
820                    // Pop the current frame
821                    if let Some(popped_frame) = self.frames.pop() {
822                        if let Some(ref dir) = popped_frame.saved_source_dir {
823                            crate::stdlib::set_thread_source_dir(dir);
824                        }
825                        // Clean up exception handlers from the returned frame
826                        let current_depth = self.frames.len();
827                        self.exception_handlers
828                            .retain(|h| h.frame_depth <= current_depth);
829
830                        if self.frames.is_empty() {
831                            return Ok(val);
832                        }
833                        self.env = popped_frame.saved_env;
834                        self.stack.truncate(popped_frame.stack_base);
835                        self.stack.push(val);
836                    } else {
837                        return Ok(val);
838                    }
839                }
840                Err(e) => {
841                    // Capture stack trace before error handling unwinds frames
842                    if self.error_stack_trace.is_empty() {
843                        self.error_stack_trace = self.capture_stack_trace();
844                    }
845                    match self.handle_error(e) {
846                        Ok(None) => {
847                            self.error_stack_trace.clear();
848                            continue; // Handler found, continue
849                        }
850                        Ok(Some(val)) => return Ok(val),
851                        Err(e) => return Err(self.enrich_error_with_line(e)),
852                    }
853                }
854            }
855        }
856    }
857
858    /// Capture the current call stack as (fn_name, line, col) tuples.
859    fn capture_stack_trace(&self) -> Vec<(String, usize, usize)> {
860        self.frames
861            .iter()
862            .map(|f| {
863                let idx = if f.ip > 0 { f.ip - 1 } else { 0 };
864                let line = f.chunk.lines.get(idx).copied().unwrap_or(0) as usize;
865                let col = f.chunk.columns.get(idx).copied().unwrap_or(0) as usize;
866                (f.fn_name.clone(), line, col)
867            })
868            .collect()
869    }
870
871    /// Enrich a VmError with source line information from the captured stack
872    /// trace. Appends ` (line N)` to error variants whose messages don't
873    /// already carry location context.
874    fn enrich_error_with_line(&self, error: VmError) -> VmError {
875        // Determine the line from the captured stack trace (innermost frame).
876        let line = self
877            .error_stack_trace
878            .last()
879            .map(|(_, l, _)| *l)
880            .unwrap_or_else(|| self.current_line());
881        if line == 0 {
882            return error;
883        }
884        let suffix = format!(" (line {line})");
885        match error {
886            VmError::Runtime(msg) => VmError::Runtime(format!("{msg}{suffix}")),
887            VmError::TypeError(msg) => VmError::TypeError(format!("{msg}{suffix}")),
888            VmError::DivisionByZero => VmError::Runtime(format!("Division by zero{suffix}")),
889            VmError::UndefinedVariable(name) => {
890                VmError::Runtime(format!("Undefined variable: {name}{suffix}"))
891            }
892            VmError::UndefinedBuiltin(name) => {
893                VmError::Runtime(format!("Undefined builtin: {name}{suffix}"))
894            }
895            VmError::ImmutableAssignment(name) => VmError::Runtime(format!(
896                "Cannot assign to immutable binding: {name}{suffix}"
897            )),
898            VmError::StackOverflow => {
899                VmError::Runtime(format!("Stack overflow: too many nested calls{suffix}"))
900            }
901            // Leave these untouched:
902            // - Thrown: user-thrown errors should not be silently modified
903            // - CategorizedError: structured errors for agent orchestration
904            // - Return: control flow, not a real error
905            // - StackUnderflow / InvalidInstruction: internal VM bugs
906            other => other,
907        }
908    }
909
910    const MAX_FRAMES: usize = 512;
911
912    /// Merge the caller's env into a closure's captured env for function calls.
913    fn merge_env_into_closure(caller_env: &VmEnv, closure: &VmClosure) -> VmEnv {
914        let mut call_env = closure.env.clone();
915        for scope in &caller_env.scopes {
916            for (name, (val, mutable)) in &scope.vars {
917                if call_env.get(name).is_none() {
918                    let _ = call_env.define(name, val.clone(), *mutable);
919                }
920            }
921        }
922        call_env
923    }
924
925    /// Push a new call frame for a closure invocation.
926    fn push_closure_frame(
927        &mut self,
928        closure: &VmClosure,
929        args: &[VmValue],
930        _parent_functions: &[CompiledFunction],
931    ) -> Result<(), VmError> {
932        if self.frames.len() >= Self::MAX_FRAMES {
933            return Err(VmError::StackOverflow);
934        }
935        let saved_env = self.env.clone();
936
937        // If this closure originated from an imported module, switch
938        // the thread-local source dir so that render() and other
939        // source-relative builtins resolve relative to the module.
940        let saved_source_dir = if let Some(ref dir) = closure.source_dir {
941            let prev = crate::stdlib::process::VM_SOURCE_DIR.with(|sd| sd.borrow().clone());
942            crate::stdlib::set_thread_source_dir(dir);
943            prev
944        } else {
945            None
946        };
947
948        let mut call_env = Self::merge_env_into_closure(&saved_env, closure);
949        call_env.push_scope();
950
951        let default_start = closure
952            .func
953            .default_start
954            .unwrap_or(closure.func.params.len());
955        for (i, param) in closure.func.params.iter().enumerate() {
956            if i < args.len() {
957                let _ = call_env.define(param, args[i].clone(), false);
958            } else if i < default_start {
959                let _ = call_env.define(param, VmValue::Nil, false);
960            }
961        }
962
963        self.env = call_env;
964
965        self.frames.push(CallFrame {
966            chunk: closure.func.chunk.clone(),
967            ip: 0,
968            stack_base: self.stack.len(),
969            saved_env,
970            fn_name: closure.func.name.clone(),
971            argc: args.len(),
972            saved_source_dir,
973        });
974
975        Ok(())
976    }
977
978    /// Create a generator value by spawning the closure body as an async task.
979    /// The generator body communicates yielded values through an mpsc channel.
980    pub(crate) fn create_generator(&self, closure: &VmClosure, args: &[VmValue]) -> VmValue {
981        use crate::value::VmGenerator;
982
983        // Buffer size of 1: the generator produces one value at a time.
984        let (tx, rx) = tokio::sync::mpsc::channel::<VmValue>(1);
985
986        let mut child = self.child_vm();
987        child.yield_sender = Some(tx);
988
989        // Set up the environment for the generator body
990        let saved_env = child.env.clone();
991        let mut call_env = Self::merge_env_into_closure(&saved_env, closure);
992        call_env.push_scope();
993
994        let default_start = closure
995            .func
996            .default_start
997            .unwrap_or(closure.func.params.len());
998        for (i, param) in closure.func.params.iter().enumerate() {
999            if i < args.len() {
1000                let _ = call_env.define(param, args[i].clone(), false);
1001            } else if i < default_start {
1002                let _ = call_env.define(param, VmValue::Nil, false);
1003            }
1004        }
1005        child.env = call_env;
1006
1007        let chunk = closure.func.chunk.clone();
1008        // Spawn the generator body as an async task.
1009        // The task will execute until return, sending yielded values through the channel.
1010        tokio::task::spawn_local(async move {
1011            let _ = child.run_chunk(&chunk).await;
1012            // When the generator body finishes (return or fall-through),
1013            // the sender is dropped, signaling completion to the receiver.
1014        });
1015
1016        VmValue::Generator(VmGenerator {
1017            done: Rc::new(std::cell::Cell::new(false)),
1018            receiver: Rc::new(tokio::sync::Mutex::new(rx)),
1019        })
1020    }
1021
1022    fn pop(&mut self) -> Result<VmValue, VmError> {
1023        self.stack.pop().ok_or(VmError::StackUnderflow)
1024    }
1025
1026    fn peek(&self) -> Result<&VmValue, VmError> {
1027        self.stack.last().ok_or(VmError::StackUnderflow)
1028    }
1029
1030    fn const_string(c: &Constant) -> Result<String, VmError> {
1031        match c {
1032            Constant::String(s) => Ok(s.clone()),
1033            _ => Err(VmError::TypeError("expected string constant".into())),
1034        }
1035    }
1036
1037    /// Call a closure (used by method calls like .map/.filter etc.)
1038    /// Uses recursive execution for simplicity in method dispatch.
1039    fn call_closure<'a>(
1040        &'a mut self,
1041        closure: &'a VmClosure,
1042        args: &'a [VmValue],
1043        _parent_functions: &'a [CompiledFunction],
1044    ) -> Pin<Box<dyn Future<Output = Result<VmValue, VmError>> + 'a>> {
1045        Box::pin(async move {
1046            let saved_env = self.env.clone();
1047            let saved_frames = std::mem::take(&mut self.frames);
1048            let saved_handlers = std::mem::take(&mut self.exception_handlers);
1049            let saved_iterators = std::mem::take(&mut self.iterators);
1050            let saved_deadlines = std::mem::take(&mut self.deadlines);
1051
1052            let mut call_env = Self::merge_env_into_closure(&saved_env, closure);
1053            call_env.push_scope();
1054
1055            let default_start = closure
1056                .func
1057                .default_start
1058                .unwrap_or(closure.func.params.len());
1059            for (i, param) in closure.func.params.iter().enumerate() {
1060                if i < args.len() {
1061                    let _ = call_env.define(param, args[i].clone(), false);
1062                } else if i < default_start {
1063                    let _ = call_env.define(param, VmValue::Nil, false);
1064                }
1065            }
1066
1067            self.env = call_env;
1068            let argc = args.len();
1069            let result = self.run_chunk_with_argc(&closure.func.chunk, argc).await;
1070
1071            self.env = saved_env;
1072            self.frames = saved_frames;
1073            self.exception_handlers = saved_handlers;
1074            self.iterators = saved_iterators;
1075            self.deadlines = saved_deadlines;
1076
1077            result
1078        })
1079    }
1080
1081    /// Public wrapper for `call_closure`, used by the MCP server to invoke
1082    /// tool handler closures from outside the VM execution loop.
1083    pub async fn call_closure_pub(
1084        &mut self,
1085        closure: &VmClosure,
1086        args: &[VmValue],
1087        functions: &[CompiledFunction],
1088    ) -> Result<VmValue, VmError> {
1089        self.call_closure(closure, args, functions).await
1090    }
1091
1092    /// Resolve a named builtin: sync builtins → async builtins → bridge → error.
1093    /// Used by Call, TailCall, and Pipe handlers to avoid duplicating this lookup.
1094    async fn call_named_builtin(
1095        &mut self,
1096        name: &str,
1097        args: Vec<VmValue>,
1098    ) -> Result<VmValue, VmError> {
1099        // Auto-trace LLM calls and tool calls
1100        let span_kind = match name {
1101            "llm_call" | "llm_stream" | "agent_loop" => Some(crate::tracing::SpanKind::LlmCall),
1102            "mcp_call" => Some(crate::tracing::SpanKind::ToolCall),
1103            _ => None,
1104        };
1105        let _span = span_kind.map(|kind| ScopeSpan::new(kind, name.to_string()));
1106
1107        // Sandbox check: deny builtins blocked by --deny/--allow flags.
1108        if self.denied_builtins.contains(name) {
1109            return Err(VmError::CategorizedError {
1110                message: format!("Tool '{}' is not permitted.", name),
1111                category: ErrorCategory::ToolRejected,
1112            });
1113        }
1114        crate::orchestration::enforce_current_policy_for_builtin(name, &args)?;
1115        if let Some(builtin) = self.builtins.get(name).cloned() {
1116            builtin(&args, &mut self.output)
1117        } else if let Some(async_builtin) = self.async_builtins.get(name).cloned() {
1118            CURRENT_ASYNC_BUILTIN_CHILD_VM.with(|slot| {
1119                slot.borrow_mut().push(self.child_vm());
1120            });
1121            let result = async_builtin(args).await;
1122            CURRENT_ASYNC_BUILTIN_CHILD_VM.with(|slot| {
1123                slot.borrow_mut().pop();
1124            });
1125            result
1126        } else if let Some(bridge) = &self.bridge {
1127            crate::orchestration::enforce_current_policy_for_bridge_builtin(name)?;
1128            let args_json: Vec<serde_json::Value> =
1129                args.iter().map(crate::llm::vm_value_to_json).collect();
1130            let result = bridge
1131                .call(
1132                    "builtin_call",
1133                    serde_json::json!({"name": name, "args": args_json}),
1134                )
1135                .await?;
1136            Ok(crate::bridge::json_result_to_vm_value(&result))
1137        } else {
1138            let all_builtins = self
1139                .builtins
1140                .keys()
1141                .chain(self.async_builtins.keys())
1142                .map(|s| s.as_str());
1143            if let Some(suggestion) = crate::value::closest_match(name, all_builtins) {
1144                return Err(VmError::Runtime(format!(
1145                    "Undefined builtin: {name} (did you mean `{suggestion}`?)"
1146                )));
1147            }
1148            Err(VmError::UndefinedBuiltin(name.to_string()))
1149        }
1150    }
1151}
1152
1153pub fn take_async_builtin_child_vm() -> Option<Vm> {
1154    CURRENT_ASYNC_BUILTIN_CHILD_VM.with(|slot| slot.borrow_mut().pop())
1155}
1156
1157pub fn restore_async_builtin_child_vm(vm: Vm) {
1158    CURRENT_ASYNC_BUILTIN_CHILD_VM.with(|slot| {
1159        slot.borrow_mut().push(vm);
1160    });
1161}
1162
1163impl Default for Vm {
1164    fn default() -> Self {
1165        Self::new()
1166    }
1167}
1168
1169#[cfg(test)]
1170mod tests {
1171    use super::*;
1172    use crate::compiler::Compiler;
1173    use crate::stdlib::register_vm_stdlib;
1174    use harn_lexer::Lexer;
1175    use harn_parser::Parser;
1176
1177    fn run_harn(source: &str) -> (String, VmValue) {
1178        let rt = tokio::runtime::Builder::new_current_thread()
1179            .enable_all()
1180            .build()
1181            .unwrap();
1182        rt.block_on(async {
1183            let local = tokio::task::LocalSet::new();
1184            local
1185                .run_until(async {
1186                    let mut lexer = Lexer::new(source);
1187                    let tokens = lexer.tokenize().unwrap();
1188                    let mut parser = Parser::new(tokens);
1189                    let program = parser.parse().unwrap();
1190                    let chunk = Compiler::new().compile(&program).unwrap();
1191
1192                    let mut vm = Vm::new();
1193                    register_vm_stdlib(&mut vm);
1194                    let result = vm.execute(&chunk).await.unwrap();
1195                    (vm.output().to_string(), result)
1196                })
1197                .await
1198        })
1199    }
1200
1201    fn run_output(source: &str) -> String {
1202        run_harn(source).0.trim_end().to_string()
1203    }
1204
1205    fn run_harn_result(source: &str) -> Result<(String, VmValue), VmError> {
1206        let rt = tokio::runtime::Builder::new_current_thread()
1207            .enable_all()
1208            .build()
1209            .unwrap();
1210        rt.block_on(async {
1211            let local = tokio::task::LocalSet::new();
1212            local
1213                .run_until(async {
1214                    let mut lexer = Lexer::new(source);
1215                    let tokens = lexer.tokenize().unwrap();
1216                    let mut parser = Parser::new(tokens);
1217                    let program = parser.parse().unwrap();
1218                    let chunk = Compiler::new().compile(&program).unwrap();
1219
1220                    let mut vm = Vm::new();
1221                    register_vm_stdlib(&mut vm);
1222                    let result = vm.execute(&chunk).await?;
1223                    Ok((vm.output().to_string(), result))
1224                })
1225                .await
1226        })
1227    }
1228
1229    #[test]
1230    fn test_arithmetic() {
1231        let out =
1232            run_output("pipeline t(task) { log(2 + 3)\nlog(10 - 4)\nlog(3 * 5)\nlog(10 / 3) }");
1233        assert_eq!(out, "[harn] 5\n[harn] 6\n[harn] 15\n[harn] 3");
1234    }
1235
1236    #[test]
1237    fn test_mixed_arithmetic() {
1238        let out = run_output("pipeline t(task) { log(3 + 1.5)\nlog(10 - 2.5) }");
1239        assert_eq!(out, "[harn] 4.5\n[harn] 7.5");
1240    }
1241
1242    #[test]
1243    fn test_comparisons() {
1244        let out =
1245            run_output("pipeline t(task) { log(1 < 2)\nlog(2 > 3)\nlog(1 == 1)\nlog(1 != 2) }");
1246        assert_eq!(out, "[harn] true\n[harn] false\n[harn] true\n[harn] true");
1247    }
1248
1249    #[test]
1250    fn test_let_var() {
1251        let out = run_output("pipeline t(task) { let x = 42\nlog(x)\nvar y = 1\ny = 2\nlog(y) }");
1252        assert_eq!(out, "[harn] 42\n[harn] 2");
1253    }
1254
1255    #[test]
1256    fn test_if_else() {
1257        let out = run_output(
1258            r#"pipeline t(task) { if true { log("yes") } if false { log("wrong") } else { log("no") } }"#,
1259        );
1260        assert_eq!(out, "[harn] yes\n[harn] no");
1261    }
1262
1263    #[test]
1264    fn test_while_loop() {
1265        let out = run_output("pipeline t(task) { var i = 0\n while i < 5 { i = i + 1 }\n log(i) }");
1266        assert_eq!(out, "[harn] 5");
1267    }
1268
1269    #[test]
1270    fn test_for_in() {
1271        let out = run_output("pipeline t(task) { for item in [1, 2, 3] { log(item) } }");
1272        assert_eq!(out, "[harn] 1\n[harn] 2\n[harn] 3");
1273    }
1274
1275    #[test]
1276    fn test_fn_decl_and_call() {
1277        let out = run_output("pipeline t(task) { fn add(a, b) { return a + b }\nlog(add(3, 4)) }");
1278        assert_eq!(out, "[harn] 7");
1279    }
1280
1281    #[test]
1282    fn test_closure() {
1283        let out = run_output("pipeline t(task) { let double = { x -> x * 2 }\nlog(double(5)) }");
1284        assert_eq!(out, "[harn] 10");
1285    }
1286
1287    #[test]
1288    fn test_closure_capture() {
1289        let out = run_output(
1290            "pipeline t(task) { let base = 10\nfn offset(x) { return x + base }\nlog(offset(5)) }",
1291        );
1292        assert_eq!(out, "[harn] 15");
1293    }
1294
1295    #[test]
1296    fn test_string_concat() {
1297        let out = run_output(
1298            r#"pipeline t(task) { let a = "hello" + " " + "world"
1299log(a) }"#,
1300        );
1301        assert_eq!(out, "[harn] hello world");
1302    }
1303
1304    #[test]
1305    fn test_list_map() {
1306        let out = run_output(
1307            "pipeline t(task) { let doubled = [1, 2, 3].map({ x -> x * 2 })\nlog(doubled) }",
1308        );
1309        assert_eq!(out, "[harn] [2, 4, 6]");
1310    }
1311
1312    #[test]
1313    fn test_list_filter() {
1314        let out = run_output(
1315            "pipeline t(task) { let big = [1, 2, 3, 4, 5].filter({ x -> x > 3 })\nlog(big) }",
1316        );
1317        assert_eq!(out, "[harn] [4, 5]");
1318    }
1319
1320    #[test]
1321    fn test_list_reduce() {
1322        let out = run_output(
1323            "pipeline t(task) { let sum = [1, 2, 3, 4].reduce(0, { acc, x -> acc + x })\nlog(sum) }",
1324        );
1325        assert_eq!(out, "[harn] 10");
1326    }
1327
1328    #[test]
1329    fn test_dict_access() {
1330        let out = run_output(
1331            r#"pipeline t(task) { let d = {name: "test", value: 42}
1332log(d.name)
1333log(d.value) }"#,
1334        );
1335        assert_eq!(out, "[harn] test\n[harn] 42");
1336    }
1337
1338    #[test]
1339    fn test_dict_methods() {
1340        let out = run_output(
1341            r#"pipeline t(task) { let d = {a: 1, b: 2}
1342log(d.keys())
1343log(d.values())
1344log(d.has("a"))
1345log(d.has("z")) }"#,
1346        );
1347        assert_eq!(
1348            out,
1349            "[harn] [a, b]\n[harn] [1, 2]\n[harn] true\n[harn] false"
1350        );
1351    }
1352
1353    #[test]
1354    fn test_pipe_operator() {
1355        let out = run_output(
1356            "pipeline t(task) { fn double(x) { return x * 2 }\nlet r = 5 |> double\nlog(r) }",
1357        );
1358        assert_eq!(out, "[harn] 10");
1359    }
1360
1361    #[test]
1362    fn test_pipe_with_closure() {
1363        let out = run_output(
1364            r#"pipeline t(task) { let r = "hello world" |> { s -> s.split(" ") }
1365log(r) }"#,
1366        );
1367        assert_eq!(out, "[harn] [hello, world]");
1368    }
1369
1370    #[test]
1371    fn test_nil_coalescing() {
1372        let out = run_output(
1373            r#"pipeline t(task) { let a = nil ?? "fallback"
1374log(a)
1375let b = "present" ?? "fallback"
1376log(b) }"#,
1377        );
1378        assert_eq!(out, "[harn] fallback\n[harn] present");
1379    }
1380
1381    #[test]
1382    fn test_logical_operators() {
1383        let out =
1384            run_output("pipeline t(task) { log(true && false)\nlog(true || false)\nlog(!true) }");
1385        assert_eq!(out, "[harn] false\n[harn] true\n[harn] false");
1386    }
1387
1388    #[test]
1389    fn test_match() {
1390        let out = run_output(
1391            r#"pipeline t(task) { let x = "b"
1392match x { "a" -> { log("first") } "b" -> { log("second") } "c" -> { log("third") } } }"#,
1393        );
1394        assert_eq!(out, "[harn] second");
1395    }
1396
1397    #[test]
1398    fn test_subscript() {
1399        let out = run_output("pipeline t(task) { let arr = [10, 20, 30]\nlog(arr[1]) }");
1400        assert_eq!(out, "[harn] 20");
1401    }
1402
1403    #[test]
1404    fn test_string_methods() {
1405        let out = run_output(
1406            r#"pipeline t(task) { log("hello world".replace("world", "harn"))
1407log("a,b,c".split(","))
1408log("  hello  ".trim())
1409log("hello".starts_with("hel"))
1410log("hello".ends_with("lo"))
1411log("hello".substring(1, 3)) }"#,
1412        );
1413        assert_eq!(
1414            out,
1415            "[harn] hello harn\n[harn] [a, b, c]\n[harn] hello\n[harn] true\n[harn] true\n[harn] el"
1416        );
1417    }
1418
1419    #[test]
1420    fn test_list_properties() {
1421        let out = run_output(
1422            "pipeline t(task) { let list = [1, 2, 3]\nlog(list.count)\nlog(list.empty)\nlog(list.first)\nlog(list.last) }",
1423        );
1424        assert_eq!(out, "[harn] 3\n[harn] false\n[harn] 1\n[harn] 3");
1425    }
1426
1427    #[test]
1428    fn test_recursive_function() {
1429        let out = run_output(
1430            "pipeline t(task) { fn fib(n) { if n <= 1 { return n } return fib(n - 1) + fib(n - 2) }\nlog(fib(10)) }",
1431        );
1432        assert_eq!(out, "[harn] 55");
1433    }
1434
1435    #[test]
1436    fn test_ternary() {
1437        let out = run_output(
1438            r#"pipeline t(task) { let x = 5
1439let r = x > 0 ? "positive" : "non-positive"
1440log(r) }"#,
1441        );
1442        assert_eq!(out, "[harn] positive");
1443    }
1444
1445    #[test]
1446    fn test_for_in_dict() {
1447        let out = run_output(
1448            "pipeline t(task) { let d = {a: 1, b: 2}\nfor entry in d { log(entry.key) } }",
1449        );
1450        assert_eq!(out, "[harn] a\n[harn] b");
1451    }
1452
1453    #[test]
1454    fn test_list_any_all() {
1455        let out = run_output(
1456            "pipeline t(task) { let nums = [2, 4, 6]\nlog(nums.any({ x -> x > 5 }))\nlog(nums.all({ x -> x > 0 }))\nlog(nums.all({ x -> x > 3 })) }",
1457        );
1458        assert_eq!(out, "[harn] true\n[harn] true\n[harn] false");
1459    }
1460
1461    #[test]
1462    fn test_disassembly() {
1463        let mut lexer = Lexer::new("pipeline t(task) { log(2 + 3) }");
1464        let tokens = lexer.tokenize().unwrap();
1465        let mut parser = Parser::new(tokens);
1466        let program = parser.parse().unwrap();
1467        let chunk = Compiler::new().compile(&program).unwrap();
1468        let disasm = chunk.disassemble("test");
1469        assert!(disasm.contains("CONSTANT"));
1470        assert!(disasm.contains("ADD"));
1471        assert!(disasm.contains("CALL"));
1472    }
1473
1474    // --- Error handling tests ---
1475
1476    #[test]
1477    fn test_try_catch_basic() {
1478        let out = run_output(
1479            r#"pipeline t(task) { try { throw "oops" } catch(e) { log("caught: " + e) } }"#,
1480        );
1481        assert_eq!(out, "[harn] caught: oops");
1482    }
1483
1484    #[test]
1485    fn test_try_no_error() {
1486        let out = run_output(
1487            r#"pipeline t(task) {
1488var result = 0
1489try { result = 42 } catch(e) { result = 0 }
1490log(result)
1491}"#,
1492        );
1493        assert_eq!(out, "[harn] 42");
1494    }
1495
1496    #[test]
1497    fn test_throw_uncaught() {
1498        let result = run_harn_result(r#"pipeline t(task) { throw "boom" }"#);
1499        assert!(result.is_err());
1500    }
1501
1502    // --- Additional test coverage ---
1503
1504    fn run_vm(source: &str) -> String {
1505        let rt = tokio::runtime::Builder::new_current_thread()
1506            .enable_all()
1507            .build()
1508            .unwrap();
1509        rt.block_on(async {
1510            let local = tokio::task::LocalSet::new();
1511            local
1512                .run_until(async {
1513                    let mut lexer = Lexer::new(source);
1514                    let tokens = lexer.tokenize().unwrap();
1515                    let mut parser = Parser::new(tokens);
1516                    let program = parser.parse().unwrap();
1517                    let chunk = Compiler::new().compile(&program).unwrap();
1518                    let mut vm = Vm::new();
1519                    register_vm_stdlib(&mut vm);
1520                    vm.execute(&chunk).await.unwrap();
1521                    vm.output().to_string()
1522                })
1523                .await
1524        })
1525    }
1526
1527    fn run_vm_err(source: &str) -> String {
1528        let rt = tokio::runtime::Builder::new_current_thread()
1529            .enable_all()
1530            .build()
1531            .unwrap();
1532        rt.block_on(async {
1533            let local = tokio::task::LocalSet::new();
1534            local
1535                .run_until(async {
1536                    let mut lexer = Lexer::new(source);
1537                    let tokens = lexer.tokenize().unwrap();
1538                    let mut parser = Parser::new(tokens);
1539                    let program = parser.parse().unwrap();
1540                    let chunk = Compiler::new().compile(&program).unwrap();
1541                    let mut vm = Vm::new();
1542                    register_vm_stdlib(&mut vm);
1543                    match vm.execute(&chunk).await {
1544                        Err(e) => format!("{}", e),
1545                        Ok(_) => panic!("Expected error"),
1546                    }
1547                })
1548                .await
1549        })
1550    }
1551
1552    #[test]
1553    fn test_hello_world() {
1554        let out = run_vm(r#"pipeline default(task) { log("hello") }"#);
1555        assert_eq!(out, "[harn] hello\n");
1556    }
1557
1558    #[test]
1559    fn test_arithmetic_new() {
1560        let out = run_vm("pipeline default(task) { log(2 + 3) }");
1561        assert_eq!(out, "[harn] 5\n");
1562    }
1563
1564    #[test]
1565    fn test_string_concat_new() {
1566        let out = run_vm(r#"pipeline default(task) { log("a" + "b") }"#);
1567        assert_eq!(out, "[harn] ab\n");
1568    }
1569
1570    #[test]
1571    fn test_if_else_new() {
1572        let out = run_vm("pipeline default(task) { if true { log(1) } else { log(2) } }");
1573        assert_eq!(out, "[harn] 1\n");
1574    }
1575
1576    #[test]
1577    fn test_for_loop_new() {
1578        let out = run_vm("pipeline default(task) { for i in [1, 2, 3] { log(i) } }");
1579        assert_eq!(out, "[harn] 1\n[harn] 2\n[harn] 3\n");
1580    }
1581
1582    #[test]
1583    fn test_while_loop_new() {
1584        let out = run_vm("pipeline default(task) { var i = 0\nwhile i < 3 { log(i)\ni = i + 1 } }");
1585        assert_eq!(out, "[harn] 0\n[harn] 1\n[harn] 2\n");
1586    }
1587
1588    #[test]
1589    fn test_function_call_new() {
1590        let out =
1591            run_vm("pipeline default(task) { fn add(a, b) { return a + b }\nlog(add(2, 3)) }");
1592        assert_eq!(out, "[harn] 5\n");
1593    }
1594
1595    #[test]
1596    fn test_closure_new() {
1597        let out = run_vm("pipeline default(task) { let f = { x -> x * 2 }\nlog(f(5)) }");
1598        assert_eq!(out, "[harn] 10\n");
1599    }
1600
1601    #[test]
1602    fn test_recursion() {
1603        let out = run_vm("pipeline default(task) { fn fact(n) { if n <= 1 { return 1 }\nreturn n * fact(n - 1) }\nlog(fact(5)) }");
1604        assert_eq!(out, "[harn] 120\n");
1605    }
1606
1607    #[test]
1608    fn test_try_catch_new() {
1609        let out = run_vm(r#"pipeline default(task) { try { throw "err" } catch (e) { log(e) } }"#);
1610        assert_eq!(out, "[harn] err\n");
1611    }
1612
1613    #[test]
1614    fn test_try_no_error_new() {
1615        let out = run_vm("pipeline default(task) { try { log(1) } catch (e) { log(2) } }");
1616        assert_eq!(out, "[harn] 1\n");
1617    }
1618
1619    #[test]
1620    fn test_list_map_new() {
1621        let out =
1622            run_vm("pipeline default(task) { let r = [1, 2, 3].map({ x -> x * 2 })\nlog(r) }");
1623        assert_eq!(out, "[harn] [2, 4, 6]\n");
1624    }
1625
1626    #[test]
1627    fn test_list_filter_new() {
1628        let out = run_vm(
1629            "pipeline default(task) { let r = [1, 2, 3, 4].filter({ x -> x > 2 })\nlog(r) }",
1630        );
1631        assert_eq!(out, "[harn] [3, 4]\n");
1632    }
1633
1634    #[test]
1635    fn test_dict_access_new() {
1636        let out = run_vm("pipeline default(task) { let d = {name: \"Alice\"}\nlog(d.name) }");
1637        assert_eq!(out, "[harn] Alice\n");
1638    }
1639
1640    #[test]
1641    fn test_string_interpolation() {
1642        let out = run_vm("pipeline default(task) { let x = 42\nlog(\"val=${x}\") }");
1643        assert_eq!(out, "[harn] val=42\n");
1644    }
1645
1646    #[test]
1647    fn test_match_new() {
1648        let out = run_vm(
1649            "pipeline default(task) { let x = \"b\"\nmatch x { \"a\" -> { log(1) } \"b\" -> { log(2) } } }",
1650        );
1651        assert_eq!(out, "[harn] 2\n");
1652    }
1653
1654    #[test]
1655    fn test_json_roundtrip() {
1656        let out = run_vm("pipeline default(task) { let s = json_stringify({a: 1})\nlog(s) }");
1657        assert!(out.contains("\"a\""));
1658        assert!(out.contains("1"));
1659    }
1660
1661    #[test]
1662    fn test_type_of() {
1663        let out = run_vm("pipeline default(task) { log(type_of(42))\nlog(type_of(\"hi\")) }");
1664        assert_eq!(out, "[harn] int\n[harn] string\n");
1665    }
1666
1667    #[test]
1668    fn test_stack_overflow() {
1669        let err = run_vm_err("pipeline default(task) { fn f() { f() }\nf() }");
1670        assert!(
1671            err.contains("stack") || err.contains("overflow") || err.contains("recursion"),
1672            "Expected stack overflow error, got: {}",
1673            err
1674        );
1675    }
1676
1677    #[test]
1678    fn test_division_by_zero() {
1679        let err = run_vm_err("pipeline default(task) { log(1 / 0) }");
1680        assert!(
1681            err.contains("Division by zero") || err.contains("division"),
1682            "Expected division by zero error, got: {}",
1683            err
1684        );
1685    }
1686
1687    #[test]
1688    fn test_float_division_by_zero_uses_ieee_values() {
1689        let out = run_vm(
1690            "pipeline default(task) { log(is_nan(0.0 / 0.0))\nlog(is_infinite(1.0 / 0.0))\nlog(is_infinite(-1.0 / 0.0)) }",
1691        );
1692        assert_eq!(out, "[harn] true\n[harn] true\n[harn] true\n");
1693    }
1694
1695    #[test]
1696    fn test_reusing_catch_binding_name_in_same_block() {
1697        let out = run_vm(
1698            r#"pipeline default(task) {
1699try {
1700    throw "a"
1701} catch e {
1702    log(e)
1703}
1704try {
1705    throw "b"
1706} catch e {
1707    log(e)
1708}
1709}"#,
1710        );
1711        assert_eq!(out, "[harn] a\n[harn] b\n");
1712    }
1713
1714    #[test]
1715    fn test_try_catch_nested() {
1716        let out = run_output(
1717            r#"pipeline t(task) {
1718try {
1719    try {
1720        throw "inner"
1721    } catch(e) {
1722        log("inner caught: " + e)
1723        throw "outer"
1724    }
1725} catch(e2) {
1726    log("outer caught: " + e2)
1727}
1728}"#,
1729        );
1730        assert_eq!(
1731            out,
1732            "[harn] inner caught: inner\n[harn] outer caught: outer"
1733        );
1734    }
1735
1736    // --- Concurrency tests ---
1737
1738    #[test]
1739    fn test_parallel_basic() {
1740        let out = run_output(
1741            "pipeline t(task) { let results = parallel(3) { i -> i * 10 }\nlog(results) }",
1742        );
1743        assert_eq!(out, "[harn] [0, 10, 20]");
1744    }
1745
1746    #[test]
1747    fn test_parallel_no_variable() {
1748        let out = run_output("pipeline t(task) { let results = parallel(3) { 42 }\nlog(results) }");
1749        assert_eq!(out, "[harn] [42, 42, 42]");
1750    }
1751
1752    #[test]
1753    fn test_parallel_map_basic() {
1754        let out = run_output(
1755            "pipeline t(task) { let results = parallel_map([1, 2, 3]) { x -> x * x }\nlog(results) }",
1756        );
1757        assert_eq!(out, "[harn] [1, 4, 9]");
1758    }
1759
1760    #[test]
1761    fn test_spawn_await() {
1762        let out = run_output(
1763            r#"pipeline t(task) {
1764let handle = spawn { log("spawned") }
1765let result = await(handle)
1766log("done")
1767}"#,
1768        );
1769        assert_eq!(out, "[harn] spawned\n[harn] done");
1770    }
1771
1772    #[test]
1773    fn test_spawn_cancel() {
1774        let out = run_output(
1775            r#"pipeline t(task) {
1776let handle = spawn { log("should be cancelled") }
1777cancel(handle)
1778log("cancelled")
1779}"#,
1780        );
1781        assert_eq!(out, "[harn] cancelled");
1782    }
1783
1784    #[test]
1785    fn test_spawn_returns_value() {
1786        let out = run_output("pipeline t(task) { let h = spawn { 42 }\nlet r = await(h)\nlog(r) }");
1787        assert_eq!(out, "[harn] 42");
1788    }
1789
1790    // --- Deadline tests ---
1791
1792    #[test]
1793    fn test_deadline_success() {
1794        let out = run_output(
1795            r#"pipeline t(task) {
1796let result = deadline 5s { log("within deadline")
179742 }
1798log(result)
1799}"#,
1800        );
1801        assert_eq!(out, "[harn] within deadline\n[harn] 42");
1802    }
1803
1804    #[test]
1805    fn test_deadline_exceeded() {
1806        let result = run_harn_result(
1807            r#"pipeline t(task) {
1808deadline 1ms {
1809  var i = 0
1810  while i < 1000000 { i = i + 1 }
1811}
1812}"#,
1813        );
1814        assert!(result.is_err());
1815    }
1816
1817    #[test]
1818    fn test_deadline_caught_by_try() {
1819        let out = run_output(
1820            r#"pipeline t(task) {
1821try {
1822  deadline 1ms {
1823    var i = 0
1824    while i < 1000000 { i = i + 1 }
1825  }
1826} catch(e) {
1827  log("caught")
1828}
1829}"#,
1830        );
1831        assert_eq!(out, "[harn] caught");
1832    }
1833
1834    /// Helper that runs Harn source with a set of denied builtins.
1835    fn run_harn_with_denied(
1836        source: &str,
1837        denied: HashSet<String>,
1838    ) -> Result<(String, VmValue), VmError> {
1839        let rt = tokio::runtime::Builder::new_current_thread()
1840            .enable_all()
1841            .build()
1842            .unwrap();
1843        rt.block_on(async {
1844            let local = tokio::task::LocalSet::new();
1845            local
1846                .run_until(async {
1847                    let mut lexer = Lexer::new(source);
1848                    let tokens = lexer.tokenize().unwrap();
1849                    let mut parser = Parser::new(tokens);
1850                    let program = parser.parse().unwrap();
1851                    let chunk = Compiler::new().compile(&program).unwrap();
1852
1853                    let mut vm = Vm::new();
1854                    register_vm_stdlib(&mut vm);
1855                    vm.set_denied_builtins(denied);
1856                    let result = vm.execute(&chunk).await?;
1857                    Ok((vm.output().to_string(), result))
1858                })
1859                .await
1860        })
1861    }
1862
1863    #[test]
1864    fn test_sandbox_deny_builtin() {
1865        let denied: HashSet<String> = ["push".to_string()].into_iter().collect();
1866        let result = run_harn_with_denied(
1867            r#"pipeline t(task) {
1868let xs = [1, 2]
1869push(xs, 3)
1870}"#,
1871            denied,
1872        );
1873        let err = result.unwrap_err();
1874        let msg = format!("{err}");
1875        assert!(
1876            msg.contains("not permitted"),
1877            "expected not permitted, got: {msg}"
1878        );
1879        assert!(
1880            msg.contains("push"),
1881            "expected builtin name in error, got: {msg}"
1882        );
1883    }
1884
1885    #[test]
1886    fn test_sandbox_allowed_builtin_works() {
1887        // Denying "push" should not block "log"
1888        let denied: HashSet<String> = ["push".to_string()].into_iter().collect();
1889        let result = run_harn_with_denied(r#"pipeline t(task) { log("hello") }"#, denied);
1890        let (output, _) = result.unwrap();
1891        assert_eq!(output.trim(), "[harn] hello");
1892    }
1893
1894    #[test]
1895    fn test_sandbox_empty_denied_set() {
1896        // With an empty denied set, everything should work.
1897        let result = run_harn_with_denied(r#"pipeline t(task) { log("ok") }"#, HashSet::new());
1898        let (output, _) = result.unwrap();
1899        assert_eq!(output.trim(), "[harn] ok");
1900    }
1901
1902    #[test]
1903    fn test_sandbox_propagates_to_spawn() {
1904        // Denied builtins should propagate to spawned VMs.
1905        let denied: HashSet<String> = ["push".to_string()].into_iter().collect();
1906        let result = run_harn_with_denied(
1907            r#"pipeline t(task) {
1908let handle = spawn {
1909  let xs = [1, 2]
1910  push(xs, 3)
1911}
1912await(handle)
1913}"#,
1914            denied,
1915        );
1916        let err = result.unwrap_err();
1917        let msg = format!("{err}");
1918        assert!(
1919            msg.contains("not permitted"),
1920            "expected not permitted in spawned VM, got: {msg}"
1921        );
1922    }
1923
1924    #[test]
1925    fn test_sandbox_propagates_to_parallel() {
1926        // Denied builtins should propagate to parallel VMs.
1927        let denied: HashSet<String> = ["push".to_string()].into_iter().collect();
1928        let result = run_harn_with_denied(
1929            r#"pipeline t(task) {
1930let results = parallel(2) { i ->
1931  let xs = [1, 2]
1932  push(xs, 3)
1933}
1934}"#,
1935            denied,
1936        );
1937        let err = result.unwrap_err();
1938        let msg = format!("{err}");
1939        assert!(
1940            msg.contains("not permitted"),
1941            "expected not permitted in parallel VM, got: {msg}"
1942        );
1943    }
1944}
harn_vm/vm.rs

harn_vm/
vm.rs