Skip to main content

harn_vm/vm/
state.rs

1use std::collections::{BTreeMap, HashSet};
2use std::rc::Rc;
3use std::sync::Arc;
4use std::time::Instant;
5
6use crate::chunk::{Chunk, ChunkRef, Constant};
7use crate::runtime_limits::RuntimeLimits;
8use crate::value::{
9    ModuleFunctionRegistry, VmAsyncBuiltinFn, VmBuiltinFn, VmEnv, VmError, VmTaskHandle, VmValue,
10};
11use crate::BuiltinId;
12
13use super::debug::DebugHook;
14use super::modules::LoadedModule;
15use super::VmBuiltinMetadata;
16
17/// RAII guard that starts a tracing span on creation and ends it on drop.
18pub(crate) struct ScopeSpan(u64);
19
20impl ScopeSpan {
21    pub(crate) fn new(kind: crate::tracing::SpanKind, name: String) -> Self {
22        Self(crate::tracing::span_start(kind, name))
23    }
24}
25
26impl Drop for ScopeSpan {
27    fn drop(&mut self) {
28        crate::tracing::span_end(self.0);
29    }
30}
31
32#[derive(Clone)]
33pub(crate) struct LocalSlot {
34    pub(crate) value: VmValue,
35    pub(crate) initialized: bool,
36    pub(crate) synced: bool,
37}
38
39#[derive(Clone)]
40pub(crate) struct InterruptHandler {
41    pub(crate) handle: i64,
42    pub(crate) signals: Vec<String>,
43    pub(crate) once: bool,
44    pub(crate) graceful_timeout_ms: Option<u64>,
45    pub(crate) handler: VmValue,
46}
47
48/// Call frame for function execution.
49pub(crate) struct CallFrame {
50    pub(crate) chunk: ChunkRef,
51    pub(crate) ip: usize,
52    pub(crate) stack_base: usize,
53    pub(crate) saved_env: VmEnv,
54    /// Env snapshot captured at call-time, *after* argument binding. Used
55    /// by the debugger's `restartFrame` to rewind this frame to its
56    /// entry state (re-binding args from the original values) without
57    /// re-entering the call site. Cheap to clone because `VmEnv` is
58    /// already cloned into `saved_env` on every call. `None` for
59    /// scratch frames (evaluate, import init) where restart isn't
60    /// meaningful.
61    pub(crate) initial_env: Option<VmEnv>,
62    pub(crate) initial_local_slots: Option<Vec<LocalSlot>>,
63    /// Iterator stack depth to restore when this frame unwinds.
64    pub(crate) saved_iterator_depth: usize,
65    /// Function name for stack traces (empty for top-level pipeline).
66    pub(crate) fn_name: String,
67    /// Number of arguments actually passed by the caller (for default arg support).
68    pub(crate) argc: usize,
69    /// Saved VM_SOURCE_DIR to restore when this frame is popped.
70    /// Set when entering a closure that originated from an imported module.
71    pub(crate) saved_source_dir: Option<std::path::PathBuf>,
72    /// Module-local named functions available to symbolic calls within this frame.
73    pub(crate) module_functions: Option<ModuleFunctionRegistry>,
74    /// Shared module-level env for top-level `var` / `let` bindings of
75    /// this frame's originating module. Looked up after `self.env` and
76    /// before `self.globals` by `GetVar` / `SetVar`, giving each module
77    /// its own live static state that persists across calls. See the
78    /// `module_state` field on `VmClosure` for the full rationale.
79    pub(crate) module_state: Option<crate::value::ModuleState>,
80    /// Slot-indexed locals for compiler-resolved names in this frame.
81    pub(crate) local_slots: Vec<LocalSlot>,
82    /// Env scope index that corresponds to compiler local scope depth 0.
83    pub(crate) local_scope_base: usize,
84    /// Current compiler local scope depth, updated by PushScope/PopScope.
85    pub(crate) local_scope_depth: usize,
86}
87
88/// Exception handler for try/catch.
89pub(crate) struct ExceptionHandler {
90    pub(crate) catch_ip: usize,
91    pub(crate) stack_depth: usize,
92    pub(crate) frame_depth: usize,
93    pub(crate) env_scope_depth: usize,
94    /// When present, this catch only handles errors whose enum_name matches.
95    pub(crate) error_type: Option<Rc<str>>,
96}
97
98/// Iterator state for for-in loops.
99pub(crate) enum IterState {
100    Vec {
101        items: Rc<Vec<VmValue>>,
102        idx: usize,
103    },
104    Dict {
105        entries: Rc<BTreeMap<String, VmValue>>,
106        keys: Vec<String>,
107        idx: usize,
108    },
109    Channel {
110        receiver: std::sync::Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
111        closed: std::sync::Arc<std::sync::atomic::AtomicBool>,
112    },
113    Generator {
114        gen: std::rc::Rc<crate::value::VmGenerator>,
115    },
116    Stream {
117        stream: std::rc::Rc<crate::value::VmStream>,
118    },
119    /// Step through a lazy range without materializing a Vec.
120    /// Inclusive ranges keep `end` as an actual value so `i64::MAX to i64::MAX`
121    /// still yields one item instead of overflowing a one-past-end sentinel.
122    Range {
123        next: i64,
124        end: i64,
125        inclusive: bool,
126        done: bool,
127    },
128    VmIter {
129        handle: std::rc::Rc<std::cell::RefCell<crate::vm::iter::VmIter>>,
130    },
131}
132
133#[derive(Clone)]
134pub(crate) enum VmBuiltinDispatch {
135    Sync(VmBuiltinFn),
136    Async(VmAsyncBuiltinFn),
137}
138
139#[derive(Clone)]
140pub(crate) struct VmBuiltinEntry {
141    pub(crate) name: Rc<str>,
142    pub(crate) dispatch: VmBuiltinDispatch,
143}
144
145pub(crate) type DeferredBuiltinRegistrar = fn(&mut Vm);
146
147/// The Harn bytecode virtual machine.
148pub struct Vm {
149    pub(crate) stack: Vec<VmValue>,
150    pub(crate) env: VmEnv,
151    pub(crate) output: String,
152    pub(crate) builtins: Rc<BTreeMap<String, VmBuiltinFn>>,
153    pub(crate) async_builtins: Rc<BTreeMap<String, VmAsyncBuiltinFn>>,
154    pub(crate) builtin_metadata: Rc<BTreeMap<String, VmBuiltinMetadata>>,
155    /// Numeric side index for builtins. Name-keyed maps remain authoritative;
156    /// this index is the hot path for direct builtin bytecode and callback refs.
157    pub(crate) builtins_by_id: Rc<BTreeMap<BuiltinId, VmBuiltinEntry>>,
158    /// IDs with detected name collisions. Collided names safely fall back to
159    /// the authoritative name-keyed lookup path.
160    pub(crate) builtin_id_collisions: Rc<HashSet<BuiltinId>>,
161    /// Builtins whose registration can be deferred until first use.
162    pub(crate) deferred_builtin_registrars: Rc<BTreeMap<String, DeferredBuiltinRegistrar>>,
163    /// Iterator state for for-in loops.
164    pub(crate) iterators: Vec<IterState>,
165    /// Call frame stack.
166    pub(crate) frames: Vec<CallFrame>,
167    /// Exception handler stack.
168    pub(crate) exception_handlers: Vec<ExceptionHandler>,
169    /// Spawned async task handles.
170    pub(crate) spawned_tasks: BTreeMap<String, VmTaskHandle>,
171    /// Shared process-local synchronization primitives inherited by child VMs.
172    pub(crate) sync_runtime: Arc<crate::synchronization::VmSyncRuntime>,
173    /// Shared process-local cells, maps, and mailboxes inherited by child VMs.
174    pub(crate) shared_state_runtime: Rc<crate::shared_state::VmSharedStateRuntime>,
175    /// Permits acquired by lexical synchronization blocks in this VM.
176    pub(crate) held_sync_guards: Vec<crate::synchronization::VmSyncHeldGuard>,
177    /// Counter for generating unique task IDs.
178    pub(crate) task_counter: u64,
179    /// Counter for logical runtime-context task groups.
180    pub(crate) runtime_context_counter: u64,
181    /// Logical runtime task context visible through `runtime_context()`.
182    pub(crate) runtime_context: crate::runtime_context::RuntimeContext,
183    /// Active deadline stack: (deadline_instant, frame_depth).
184    pub(crate) deadlines: Vec<(Instant, usize)>,
185    /// Breakpoints, keyed by source-file path so a breakpoint at line N
186    /// in `auto.harn` doesn't also fire when execution hits line N in an
187    /// imported lib. The empty-string key is a wildcard used by callers
188    /// that don't track source paths (legacy `set_breakpoints` API).
189    pub(crate) breakpoints: BTreeMap<String, std::collections::BTreeSet<usize>>,
190    /// Function-name breakpoints. Any closure call whose
191    /// `CompiledFunction.name` matches an entry here raises a stop on
192    /// entry, regardless of the call site's file or line. Lets the IDE
193    /// break on `llm_call` / `host_run_pipeline` / any user pipeline
194    /// function without pinning down a source location first.
195    pub(crate) function_breakpoints: std::collections::BTreeSet<String>,
196    /// Latched on `push_closure_frame` when the callee's name matches
197    /// `function_breakpoints`; consumed by the next step so the stop is
198    /// reported with reason="function breakpoint" and the breakpoint
199    /// name available for the DAP `stopped` event.
200    pub(crate) pending_function_bp: Option<String>,
201    /// Whether the VM is in step mode.
202    pub(crate) step_mode: bool,
203    /// The frame depth at which stepping started (for step-over).
204    pub(crate) step_frame_depth: usize,
205    /// Whether the VM is currently stopped at a debug point.
206    pub(crate) stopped: bool,
207    /// Last source line executed (to detect line changes).
208    pub(crate) last_line: usize,
209    /// Source directory for resolving imports.
210    pub(crate) source_dir: Option<std::path::PathBuf>,
211    /// Modules currently being imported (cycle prevention).
212    pub(crate) imported_paths: Vec<std::path::PathBuf>,
213    /// Loaded module cache keyed by canonical or synthetic module path.
214    pub(crate) module_cache: Rc<BTreeMap<std::path::PathBuf, LoadedModule>>,
215    /// Source text keyed by canonical or synthetic module path for debugger retrieval.
216    pub(crate) source_cache: Rc<BTreeMap<std::path::PathBuf, String>>,
217    /// Source file path for error reporting.
218    pub(crate) source_file: Option<String>,
219    /// Source text for error reporting.
220    pub(crate) source_text: Option<String>,
221    /// Optional bridge for delegating unknown builtins in bridge mode.
222    pub(crate) bridge: Option<Rc<crate::bridge::HostBridge>>,
223    /// Builtins denied by sandbox mode (`--deny` / `--allow` flags).
224    pub(crate) denied_builtins: Rc<HashSet<String>>,
225    /// Cancellation token for cooperative graceful shutdown (set by parent).
226    pub(crate) cancel_token: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
227    pub(crate) interrupt_signal_token: Option<std::sync::Arc<std::sync::Mutex<Option<String>>>>,
228    /// Remaining instruction-boundary checks before a requested host
229    /// cancellation is forcefully raised. This gives `is_cancelled()` loops a
230    /// deterministic chance to return cleanly without letting non-cooperative
231    /// CPU-bound code run forever.
232    pub(crate) cancel_grace_instructions_remaining: Option<usize>,
233    /// User-visible interrupt handlers registered through `std/signal`.
234    pub(crate) interrupt_handlers: Vec<InterruptHandler>,
235    pub(crate) next_interrupt_handle: i64,
236    pub(crate) pending_interrupt_signal: Option<String>,
237    pub(crate) interrupted: bool,
238    pub(crate) dispatching_interrupt: bool,
239    pub(crate) interrupt_handler_deadline: Option<Instant>,
240    /// Captured stack trace from the most recent error (fn_name, line, col).
241    pub(crate) error_stack_trace: Vec<(String, usize, usize, Option<String>)>,
242    /// Yield channel sender for generator execution. When set, `Op::Yield`
243    /// sends values through this channel instead of being a no-op.
244    pub(crate) yield_sender: Option<tokio::sync::mpsc::Sender<Result<VmValue, VmError>>>,
245    /// Project root directory (detected via harn.toml).
246    /// Used as base directory for metadata, store, and checkpoint operations.
247    pub(crate) project_root: Option<std::path::PathBuf>,
248    /// Global constants (e.g. `pi`, `e`). Checked as a fallback in `GetVar`
249    /// after the environment, so user-defined variables can shadow them.
250    pub(crate) globals: Rc<BTreeMap<String, VmValue>>,
251    /// Optional debugger hook invoked when execution advances to a new source line.
252    pub(crate) debug_hook: Option<Box<DebugHook>>,
253    /// Effective runtime ceilings for this VM execution.
254    pub(crate) runtime_limits: RuntimeLimits,
255}
256
257/// Reusable VM baseline for hosts that need many clean executions with the
258/// same stable builtin/source setup.
259///
260/// The baseline intentionally does not snapshot execution state. Each
261/// instantiation gets fresh stacks, frames, tasks, cancellation fields, sync
262/// primitives, shared cells/maps/mailboxes, and debug state. Builtin tables are
263/// shared through `Rc` until a per-execution rebind needs copy-on-write.
264#[derive(Clone)]
265pub struct VmBaseline {
266    builtins: Rc<BTreeMap<String, VmBuiltinFn>>,
267    async_builtins: Rc<BTreeMap<String, VmAsyncBuiltinFn>>,
268    builtin_metadata: Rc<BTreeMap<String, VmBuiltinMetadata>>,
269    builtins_by_id: Rc<BTreeMap<BuiltinId, VmBuiltinEntry>>,
270    builtin_id_collisions: Rc<HashSet<BuiltinId>>,
271    deferred_builtin_registrars: Rc<BTreeMap<String, DeferredBuiltinRegistrar>>,
272    source_dir: Option<std::path::PathBuf>,
273    source_file: Option<String>,
274    source_text: Option<String>,
275    project_root: Option<std::path::PathBuf>,
276    globals: Rc<BTreeMap<String, VmValue>>,
277    denied_builtins: Rc<HashSet<String>>,
278    runtime_limits: RuntimeLimits,
279}
280
281impl VmBaseline {
282    pub fn from_vm(vm: &Vm) -> Self {
283        Self {
284            builtins: Rc::clone(&vm.builtins),
285            async_builtins: Rc::clone(&vm.async_builtins),
286            builtin_metadata: Rc::clone(&vm.builtin_metadata),
287            builtins_by_id: Rc::clone(&vm.builtins_by_id),
288            builtin_id_collisions: Rc::clone(&vm.builtin_id_collisions),
289            deferred_builtin_registrars: Rc::clone(&vm.deferred_builtin_registrars),
290            source_dir: vm.source_dir.clone(),
291            source_file: vm.source_file.clone(),
292            source_text: vm.source_text.clone(),
293            project_root: vm.project_root.clone(),
294            globals: Rc::clone(&vm.globals),
295            denied_builtins: Rc::clone(&vm.denied_builtins),
296            runtime_limits: vm.runtime_limits,
297        }
298    }
299
300    pub fn instantiate(&self) -> Vm {
301        let mut source_cache = BTreeMap::new();
302        if let (Some(file), Some(text)) = (&self.source_file, &self.source_text) {
303            source_cache.insert(std::path::PathBuf::from(file), text.clone());
304        }
305        if let Some(dir) = &self.source_dir {
306            crate::stdlib::set_thread_source_dir(dir);
307        }
308
309        let mut vm = Vm {
310            stack: Vec::with_capacity(256),
311            env: VmEnv::new(),
312            output: String::new(),
313            builtins: Rc::clone(&self.builtins),
314            async_builtins: Rc::clone(&self.async_builtins),
315            builtin_metadata: Rc::clone(&self.builtin_metadata),
316            builtins_by_id: Rc::clone(&self.builtins_by_id),
317            builtin_id_collisions: Rc::clone(&self.builtin_id_collisions),
318            deferred_builtin_registrars: Rc::clone(&self.deferred_builtin_registrars),
319            iterators: Vec::new(),
320            frames: Vec::new(),
321            exception_handlers: Vec::new(),
322            spawned_tasks: BTreeMap::new(),
323            sync_runtime: Arc::new(crate::synchronization::VmSyncRuntime::new()),
324            shared_state_runtime: Rc::new(crate::shared_state::VmSharedStateRuntime::new()),
325            held_sync_guards: Vec::new(),
326            task_counter: 0,
327            runtime_context_counter: 0,
328            runtime_context: crate::runtime_context::RuntimeContext::root(),
329            deadlines: Vec::new(),
330            breakpoints: BTreeMap::new(),
331            function_breakpoints: std::collections::BTreeSet::new(),
332            pending_function_bp: None,
333            step_mode: false,
334            step_frame_depth: 0,
335            stopped: false,
336            last_line: 0,
337            source_dir: self.source_dir.clone(),
338            imported_paths: Vec::new(),
339            module_cache: Rc::new(BTreeMap::new()),
340            source_cache: Rc::new(source_cache),
341            source_file: self.source_file.clone(),
342            source_text: self.source_text.clone(),
343            bridge: None,
344            denied_builtins: Rc::clone(&self.denied_builtins),
345            cancel_token: None,
346            interrupt_signal_token: None,
347            cancel_grace_instructions_remaining: None,
348            interrupt_handlers: Vec::new(),
349            next_interrupt_handle: 1,
350            pending_interrupt_signal: None,
351            interrupted: false,
352            dispatching_interrupt: false,
353            interrupt_handler_deadline: None,
354            error_stack_trace: Vec::new(),
355            yield_sender: None,
356            project_root: self.project_root.clone(),
357            globals: Rc::clone(&self.globals),
358            debug_hook: None,
359            runtime_limits: self.runtime_limits,
360        };
361
362        crate::stdlib::rebind_execution_state_builtins(&mut vm);
363        vm
364    }
365}
366
367impl Vm {
368    pub(crate) fn fresh_local_slots(chunk: &Chunk) -> Vec<LocalSlot> {
369        chunk
370            .local_slots
371            .iter()
372            .map(|_| LocalSlot {
373                value: VmValue::Nil,
374                initialized: false,
375                synced: false,
376            })
377            .collect()
378    }
379
380    pub(crate) fn bind_param_slots(
381        slots: &mut [LocalSlot],
382        func: &crate::chunk::CompiledFunction,
383        args: &[VmValue],
384        synced: bool,
385    ) {
386        Self::bind_param_slots_args(slots, func, &super::CallArgs::Slice(args), synced);
387    }
388
389    pub(crate) fn bind_param_slots_args(
390        slots: &mut [LocalSlot],
391        func: &crate::chunk::CompiledFunction,
392        args: &super::CallArgs<'_>,
393        synced: bool,
394    ) {
395        let param_count = func.params.len();
396        for (i, _param) in func.params.iter().enumerate() {
397            if i >= slots.len() {
398                break;
399            }
400            if func.has_rest_param && i == param_count - 1 {
401                let rest_args = args.to_vec_from(i);
402                slots[i].value = VmValue::List(Rc::new(rest_args));
403                slots[i].initialized = true;
404                slots[i].synced = synced;
405            } else if let Some(arg) = args.get(i) {
406                slots[i].value = arg.clone();
407                slots[i].initialized = true;
408                slots[i].synced = synced;
409            }
410        }
411    }
412
413    pub(crate) fn visible_variables(&self) -> BTreeMap<String, VmValue> {
414        let mut vars = self.env.all_variables();
415        let Some(frame) = self.frames.last() else {
416            return vars;
417        };
418        for (slot, info) in frame.local_slots.iter().zip(frame.chunk.local_slots.iter()) {
419            if slot.initialized && info.scope_depth <= frame.local_scope_depth {
420                vars.insert(info.name.clone(), slot.value.clone());
421            }
422        }
423        vars
424    }
425
426    pub(crate) fn sync_current_frame_locals_to_env(&mut self) {
427        let frames = &mut self.frames;
428        let env = &mut self.env;
429        let Some(frame) = frames.last_mut() else {
430            return;
431        };
432        let local_scope_base = frame.local_scope_base;
433        let local_scope_depth = frame.local_scope_depth;
434        for (slot, info) in frame
435            .local_slots
436            .iter_mut()
437            .zip(frame.chunk.local_slots.iter())
438        {
439            if slot.initialized && !slot.synced && info.scope_depth <= local_scope_depth {
440                slot.synced = true;
441                let scope_idx = local_scope_base + info.scope_depth;
442                while env.scopes.len() <= scope_idx {
443                    env.push_scope();
444                }
445                Rc::make_mut(&mut env.scopes[scope_idx].vars)
446                    .insert(info.name.clone(), (slot.value.clone(), info.mutable));
447            }
448        }
449    }
450
451    pub(crate) fn closure_call_env_for_current_frame(
452        &self,
453        closure: &crate::value::VmClosure,
454    ) -> VmEnv {
455        if closure.module_state.is_some() {
456            return closure.env.clone();
457        }
458        let call_env = Self::closure_call_env(&self.env, closure);
459        // Same compile-time short-circuit as the env walk in
460        // `closure_call_env`: when the callee body never resolves an
461        // outer name through the env, injecting closure-typed *slot*
462        // locals from the caller's frame is wasted work too.
463        if !closure.func.chunk.references_outer_names {
464            return call_env;
465        }
466        let mut call_env = call_env;
467        let Some(frame) = self.frames.last() else {
468            return call_env;
469        };
470        for (slot, info) in frame
471            .local_slots
472            .iter()
473            .zip(frame.chunk.local_slots.iter())
474            .filter(|(slot, info)| slot.initialized && info.scope_depth <= frame.local_scope_depth)
475        {
476            if matches!(slot.value, VmValue::Closure(_)) && !call_env.contains(&info.name) {
477                let _ = call_env.define(&info.name, slot.value.clone(), info.mutable);
478            }
479        }
480        call_env
481    }
482
483    pub(crate) fn active_local_slot_value(&self, name: &str) -> Option<VmValue> {
484        let frame = self.frames.last()?;
485        let idx = self.active_local_slot_index(name)?;
486        frame.local_slots.get(idx).map(|slot| slot.value.clone())
487    }
488
489    /// Returns the slot index of an initialized active local with the given
490    /// name, walking from innermost to outermost scope. Used by hot paths
491    /// (subscript-store, etc.) that want to mutate the slot value in place
492    /// without paying a defensive `VmValue::clone` first.
493    pub(crate) fn active_local_slot_index(&self, name: &str) -> Option<usize> {
494        let frame = self.frames.last()?;
495        for (idx, info) in frame.chunk.local_slots.iter().enumerate().rev() {
496            if info.name == name && info.scope_depth <= frame.local_scope_depth {
497                if let Some(slot) = frame.local_slots.get(idx) {
498                    if slot.initialized {
499                        return Some(idx);
500                    }
501                }
502            }
503        }
504        None
505    }
506
507    pub(crate) fn assign_active_local_slot(
508        &mut self,
509        name: &str,
510        value: VmValue,
511        debug: bool,
512    ) -> Result<bool, VmError> {
513        let Some(frame) = self.frames.last_mut() else {
514            return Ok(false);
515        };
516        for (idx, info) in frame.chunk.local_slots.iter().enumerate().rev() {
517            if info.name == name && info.scope_depth <= frame.local_scope_depth {
518                if !debug && !info.mutable {
519                    return Err(VmError::ImmutableAssignment(name.to_string()));
520                }
521                if let Some(slot) = frame.local_slots.get_mut(idx) {
522                    slot.value = value;
523                    slot.initialized = true;
524                    slot.synced = false;
525                    return Ok(true);
526                }
527            }
528        }
529        Ok(false)
530    }
531
532    pub fn new() -> Self {
533        Self {
534            stack: Vec::with_capacity(256),
535            env: VmEnv::new(),
536            output: String::new(),
537            builtins: Rc::new(BTreeMap::new()),
538            async_builtins: Rc::new(BTreeMap::new()),
539            builtin_metadata: Rc::new(BTreeMap::new()),
540            builtins_by_id: Rc::new(BTreeMap::new()),
541            builtin_id_collisions: Rc::new(HashSet::new()),
542            deferred_builtin_registrars: Rc::new(BTreeMap::new()),
543            iterators: Vec::new(),
544            frames: Vec::new(),
545            exception_handlers: Vec::new(),
546            spawned_tasks: BTreeMap::new(),
547            sync_runtime: Arc::new(crate::synchronization::VmSyncRuntime::new()),
548            shared_state_runtime: Rc::new(crate::shared_state::VmSharedStateRuntime::new()),
549            held_sync_guards: Vec::new(),
550            task_counter: 0,
551            runtime_context_counter: 0,
552            runtime_context: crate::runtime_context::RuntimeContext::root(),
553            deadlines: Vec::new(),
554            breakpoints: BTreeMap::new(),
555            function_breakpoints: std::collections::BTreeSet::new(),
556            pending_function_bp: None,
557            step_mode: false,
558            step_frame_depth: 0,
559            stopped: false,
560            last_line: 0,
561            source_dir: None,
562            imported_paths: Vec::new(),
563            module_cache: Rc::new(BTreeMap::new()),
564            source_cache: Rc::new(BTreeMap::new()),
565            source_file: None,
566            source_text: None,
567            bridge: None,
568            denied_builtins: Rc::new(HashSet::new()),
569            cancel_token: None,
570            interrupt_signal_token: None,
571            cancel_grace_instructions_remaining: None,
572            interrupt_handlers: Vec::new(),
573            next_interrupt_handle: 1,
574            pending_interrupt_signal: None,
575            interrupted: false,
576            dispatching_interrupt: false,
577            interrupt_handler_deadline: None,
578            error_stack_trace: Vec::new(),
579            yield_sender: None,
580            project_root: None,
581            globals: Rc::new(BTreeMap::new()),
582            debug_hook: None,
583            runtime_limits: RuntimeLimits::default(),
584        }
585    }
586
587    pub fn baseline(&self) -> VmBaseline {
588        VmBaseline::from_vm(self)
589    }
590
591    /// Return the effective runtime limit profile for this VM.
592    pub fn runtime_limits(&self) -> RuntimeLimits {
593        self.runtime_limits
594    }
595
596    /// Return a host/debug report describing the VM's effective runtime limits.
597    pub fn runtime_limit_report(&self) -> crate::RuntimeLimitsReport {
598        self.runtime_limits.report()
599    }
600
601    /// Returns true if any debugging affordance is active — DAP hook,
602    /// line breakpoints, or function breakpoints. Call-site code uses
603    /// this to decide whether to capture per-frame restart snapshots
604    /// (`initial_env`, `initial_local_slots`); without a debugger those
605    /// snapshots are dead weight, so skipping them removes two
606    /// allocations from every function call hot path.
607    ///
608    /// All three signals are stable across a function call's lifetime
609    /// (they're set before pipeline execution starts), so the gate is
610    /// consistent between frame creation and any later `restart_frame`
611    /// invocation. The three `is_empty` checks compile to a handful of
612    /// branch-predicted memory probes — cheaper than a single
613    /// `BTreeMap` clone, which is what we're avoiding.
614    #[inline]
615    pub(crate) fn debugger_attached(&self) -> bool {
616        self.debug_hook.is_some()
617            || !self.breakpoints.is_empty()
618            || !self.function_breakpoints.is_empty()
619    }
620
621    /// Set the bridge for delegating unknown builtins in bridge mode.
622    pub fn set_bridge(&mut self, bridge: Rc<crate::bridge::HostBridge>) {
623        self.bridge = Some(bridge);
624    }
625
626    /// Set builtins that are denied in sandbox mode.
627    /// When called, the given builtin names will produce a permission error.
628    pub fn set_denied_builtins(&mut self, denied: HashSet<String>) {
629        self.denied_builtins = Rc::new(denied);
630    }
631
632    /// Set source info for error reporting (file path and source text).
633    pub fn set_source_info(&mut self, file: &str, text: &str) {
634        self.source_file = Some(file.to_string());
635        self.source_text = Some(text.to_string());
636        Rc::make_mut(&mut self.source_cache)
637            .insert(std::path::PathBuf::from(file), text.to_string());
638    }
639
640    /// Initialize execution (push the initial frame).
641    pub fn start(&mut self, chunk: &Chunk) {
642        // The top-level pipeline frame captures env at start so
643        // restartFrame on the outermost frame rewinds to the
644        // pre-pipeline state — basically "restart session" in
645        // debugger terms. Skipped when no debugger is attached:
646        // the snapshot is dead weight in that case and dominates
647        // call-overhead bench numbers (~5-10%).
648        let debugger = self.debugger_attached();
649        let initial_env = if debugger {
650            Some(self.env.clone())
651        } else {
652            None
653        };
654        let initial_local_slots = if debugger {
655            Some(Self::fresh_local_slots(chunk))
656        } else {
657            None
658        };
659        self.frames.push(CallFrame {
660            chunk: Rc::new(chunk.clone()),
661            ip: 0,
662            stack_base: self.stack.len(),
663            saved_env: self.env.clone(),
664            initial_env,
665            initial_local_slots,
666            saved_iterator_depth: self.iterators.len(),
667            fn_name: String::new(),
668            argc: 0,
669            saved_source_dir: None,
670            module_functions: None,
671            module_state: None,
672            local_slots: Self::fresh_local_slots(chunk),
673            local_scope_base: self.env.scope_depth().saturating_sub(1),
674            local_scope_depth: 0,
675        });
676    }
677
678    /// Create a child VM that shares builtins and env but has fresh execution state.
679    /// Used for parallel/spawn to fork the VM for concurrent tasks.
680    pub(crate) fn child_vm(&self) -> Vm {
681        Vm {
682            stack: Vec::with_capacity(64),
683            env: self.env.clone(),
684            output: String::new(),
685            builtins: Rc::clone(&self.builtins),
686            async_builtins: Rc::clone(&self.async_builtins),
687            builtin_metadata: Rc::clone(&self.builtin_metadata),
688            builtins_by_id: Rc::clone(&self.builtins_by_id),
689            builtin_id_collisions: Rc::clone(&self.builtin_id_collisions),
690            deferred_builtin_registrars: Rc::clone(&self.deferred_builtin_registrars),
691            iterators: Vec::new(),
692            frames: Vec::new(),
693            exception_handlers: Vec::new(),
694            spawned_tasks: BTreeMap::new(),
695            sync_runtime: self.sync_runtime.clone(),
696            shared_state_runtime: self.shared_state_runtime.clone(),
697            held_sync_guards: Vec::new(),
698            task_counter: 0,
699            runtime_context_counter: self.runtime_context_counter,
700            runtime_context: self.runtime_context.clone(),
701            deadlines: self.deadlines.clone(),
702            breakpoints: BTreeMap::new(),
703            function_breakpoints: std::collections::BTreeSet::new(),
704            pending_function_bp: None,
705            step_mode: false,
706            step_frame_depth: 0,
707            stopped: false,
708            last_line: 0,
709            source_dir: self.source_dir.clone(),
710            imported_paths: Vec::new(),
711            module_cache: Rc::clone(&self.module_cache),
712            source_cache: Rc::clone(&self.source_cache),
713            source_file: self.source_file.clone(),
714            source_text: self.source_text.clone(),
715            bridge: self.bridge.clone(),
716            denied_builtins: Rc::clone(&self.denied_builtins),
717            cancel_token: self.cancel_token.clone(),
718            interrupt_signal_token: self.interrupt_signal_token.clone(),
719            cancel_grace_instructions_remaining: None,
720            interrupt_handlers: Vec::new(),
721            next_interrupt_handle: 1,
722            pending_interrupt_signal: None,
723            interrupted: self.interrupted,
724            dispatching_interrupt: false,
725            interrupt_handler_deadline: None,
726            error_stack_trace: Vec::new(),
727            yield_sender: None,
728            project_root: self.project_root.clone(),
729            globals: Rc::clone(&self.globals),
730            debug_hook: None,
731            runtime_limits: self.runtime_limits,
732        }
733    }
734
735    /// Create a child VM for external adapters that need to invoke Harn
736    /// closures while sharing the parent's builtins, globals, and module state.
737    pub(crate) fn child_vm_for_host(&self) -> Vm {
738        self.child_vm()
739    }
740
741    /// Request cancellation for every outstanding child task owned by this VM
742    /// and then abort the join handles. This prevents un-awaited spawned tasks
743    /// from outliving their parent execution scope.
744    pub(crate) fn cancel_spawned_tasks(&mut self) {
745        for (_, task) in std::mem::take(&mut self.spawned_tasks) {
746            task.cancel_token
747                .store(true, std::sync::atomic::Ordering::SeqCst);
748            task.handle.abort();
749        }
750    }
751
752    /// Set the source directory for import resolution and introspection.
753    /// Also auto-detects the project root if not already set.
754    pub fn set_source_dir(&mut self, dir: &std::path::Path) {
755        let dir = crate::stdlib::process::normalize_context_path(dir);
756        self.source_dir = Some(dir.clone());
757        crate::stdlib::set_thread_source_dir(&dir);
758        // Auto-detect project root if not explicitly set.
759        if self.project_root.is_none() {
760            self.project_root = crate::stdlib::process::find_project_root(&dir);
761        }
762    }
763
764    /// Explicitly set the project root directory.
765    /// Used by ACP/CLI to override auto-detection.
766    pub fn set_project_root(&mut self, root: &std::path::Path) {
767        self.project_root = Some(root.to_path_buf());
768    }
769
770    /// Get the project root directory, falling back to source_dir.
771    pub fn project_root(&self) -> Option<&std::path::Path> {
772        self.project_root.as_deref().or(self.source_dir.as_deref())
773    }
774
775    /// Return all registered builtin names (sync + async).
776    pub fn builtin_names(&self) -> Vec<String> {
777        let mut names: Vec<String> = self.builtins.keys().cloned().collect();
778        names.extend(self.async_builtins.keys().cloned());
779        names
780    }
781
782    /// Return discoverable metadata for registered builtins.
783    pub fn builtin_metadata(&self) -> Vec<VmBuiltinMetadata> {
784        self.builtin_metadata.values().cloned().collect()
785    }
786
787    /// Return discoverable metadata for a registered builtin name.
788    pub fn builtin_metadata_for(&self, name: &str) -> Option<&VmBuiltinMetadata> {
789        self.builtin_metadata.get(name)
790    }
791
792    /// Set a global constant (e.g. `pi`, `e`).
793    /// Stored separately from the environment so user-defined variables can shadow them.
794    pub fn set_global(&mut self, name: &str, value: VmValue) {
795        Rc::make_mut(&mut self.globals).insert(name.to_string(), value);
796    }
797
798    /// Install the script's `Harness` capability handle as the `harness`
799    /// global so the auto-call emitted by `Compiler::compile()` (for
800    /// `fn main(harness: Harness)` entrypoints) can read it. Hosts that
801    /// drive the VM directly (CLI, MCP server, composition runtime) call
802    /// this once before `execute()`.
803    pub fn set_harness(&mut self, harness: crate::harness::Harness) {
804        self.set_global("harness", harness.into_vm_value());
805    }
806
807    /// Get the captured output.
808    pub fn output(&self) -> &str {
809        &self.output
810    }
811
812    /// Drain and return the captured output, leaving the buffer empty.
813    /// Used by the async-builtin dispatch path to forward closure output
814    /// from a child VM back to its parent.
815    pub fn take_output(&mut self) -> String {
816        std::mem::take(&mut self.output)
817    }
818
819    /// Append text to this VM's captured output. Used to forward output
820    /// from child VMs (e.g. closures invoked via `call_closure_pub`)
821    /// back into the parent stream.
822    pub fn append_output(&mut self, text: &str) {
823        self.output.push_str(text);
824    }
825
826    pub(crate) fn pop(&mut self) -> Result<VmValue, VmError> {
827        self.stack.pop().ok_or(VmError::StackUnderflow)
828    }
829
830    pub(crate) fn peek(&self) -> Result<&VmValue, VmError> {
831        self.stack.last().ok_or(VmError::StackUnderflow)
832    }
833
834    pub(crate) fn const_str(c: &Constant) -> Result<&str, VmError> {
835        match c {
836            Constant::String(s) => Ok(s.as_str()),
837            _ => Err(VmError::TypeError("expected string constant".into())),
838        }
839    }
840
841    pub(crate) fn release_sync_guards_for_current_scope(&mut self) {
842        let depth = self.env.scope_depth();
843        self.held_sync_guards
844            .retain(|guard| guard.env_scope_depth < depth);
845    }
846
847    pub(crate) fn release_sync_guards_after_unwind(
848        &mut self,
849        frame_depth: usize,
850        env_scope_depth: usize,
851    ) {
852        self.held_sync_guards.retain(|guard| {
853            guard.frame_depth <= frame_depth && guard.env_scope_depth <= env_scope_depth
854        });
855    }
856
857    pub(crate) fn release_sync_guards_for_frame(&mut self, frame_depth: usize) {
858        self.held_sync_guards
859            .retain(|guard| guard.frame_depth != frame_depth);
860    }
861}
862
863impl Drop for Vm {
864    fn drop(&mut self) {
865        self.cancel_spawned_tasks();
866    }
867}
868
869impl Default for Vm {
870    fn default() -> Self {
871        Self::new()
872    }
873}
874
875#[cfg(test)]
876mod tests {
877    use std::rc::Rc;
878
879    use super::*;
880
881    fn baseline_with_stdlib(source: &str) -> VmBaseline {
882        let mut vm = Vm::new();
883        crate::register_vm_stdlib(&mut vm);
884        vm.set_source_info("baseline_test.harn", source);
885        vm.set_global("stable_global", VmValue::String(Rc::from("baseline")));
886        vm.baseline()
887    }
888
889    #[test]
890    fn vm_baseline_instantiates_clean_mutable_execution_state() {
891        let baseline = baseline_with_stdlib("pipeline main() { __io_println(stable_global) }");
892
893        let mut dirty = baseline.instantiate();
894        dirty.stack.push(VmValue::Int(42));
895        dirty.output.push_str("dirty");
896        dirty.task_counter = 9;
897        dirty.runtime_context_counter = 7;
898        dirty
899            .error_stack_trace
900            .push(("main".to_string(), 1, 1, None));
901
902        let clean = baseline.instantiate();
903        assert!(clean.stack.is_empty());
904        assert!(clean.output.is_empty());
905        assert!(clean.frames.is_empty());
906        assert!(clean.exception_handlers.is_empty());
907        assert!(clean.spawned_tasks.is_empty());
908        assert!(clean.held_sync_guards.is_empty());
909        assert_eq!(clean.task_counter, 0);
910        assert_eq!(clean.runtime_context_counter, 0);
911        assert!(clean.deadlines.is_empty());
912        assert!(clean.cancel_token.is_none());
913        assert!(clean.interrupt_handlers.is_empty());
914        assert!(clean.error_stack_trace.is_empty());
915        assert!(clean.bridge.is_none());
916        assert!(clean
917            .globals
918            .get("stable_global")
919            .is_some_and(|value| value.display() == "baseline"));
920    }
921
922    #[test]
923    fn vm_reports_effective_runtime_limits() {
924        let vm = Vm::new();
925
926        assert_eq!(vm.runtime_limits(), RuntimeLimits::default());
927        assert_eq!(
928            vm.runtime_limit_report().entries.len(),
929            crate::RUNTIME_LIMIT_DESCRIPTIONS.len()
930        );
931        assert_eq!(vm.child_vm().runtime_limits(), vm.runtime_limits());
932        assert_eq!(
933            vm.baseline().instantiate().runtime_limits(),
934            vm.runtime_limits()
935        );
936    }
937
938    #[tokio::test(flavor = "current_thread")]
939    async fn vm_baseline_rebinds_shared_state_builtins_per_instance() {
940        let local = tokio::task::LocalSet::new();
941        local
942            .run_until(async {
943                let source = r#"
944pipeline main() {
945  let cell = shared_cell({scope: "task_group", key: "turn", initial: 0})
946  __io_println(shared_get(cell))
947  shared_set(cell, shared_get(cell) + 1)
948}"#;
949                let chunk = crate::compile_source(source).expect("compile");
950                let baseline = baseline_with_stdlib(source);
951
952                let mut first = baseline.instantiate();
953                first.execute(&chunk).await.expect("first execute");
954                assert_eq!(first.output(), "0\n");
955
956                let mut second = baseline.instantiate();
957                second.execute(&chunk).await.expect("second execute");
958                assert_eq!(
959                    second.output(),
960                    "0\n",
961                    "shared state created by the first VM must not leak into the next baseline instance"
962                );
963            })
964            .await;
965    }
966}