Skip to main content

harn_vm/vm/
state.rs

1use std::collections::{BTreeMap, HashSet};
2use std::rc::Rc;
3use std::sync::Arc;
4use std::time::Instant;
5
6use crate::chunk::{Chunk, ChunkRef, Constant};
7use crate::value::{
8    ModuleFunctionRegistry, VmAsyncBuiltinFn, VmBuiltinFn, VmEnv, VmError, VmTaskHandle, VmValue,
9};
10use crate::BuiltinId;
11
12use super::debug::DebugHook;
13use super::modules::LoadedModule;
14use super::VmBuiltinMetadata;
15
16/// RAII guard that starts a tracing span on creation and ends it on drop.
17pub(crate) struct ScopeSpan(u64);
18
19impl ScopeSpan {
20    pub(crate) fn new(kind: crate::tracing::SpanKind, name: String) -> Self {
21        Self(crate::tracing::span_start(kind, name))
22    }
23}
24
25impl Drop for ScopeSpan {
26    fn drop(&mut self) {
27        crate::tracing::span_end(self.0);
28    }
29}
30
31#[derive(Clone)]
32pub(crate) struct LocalSlot {
33    pub(crate) value: VmValue,
34    pub(crate) initialized: bool,
35    pub(crate) synced: bool,
36}
37
38#[derive(Clone)]
39pub(crate) struct InterruptHandler {
40    pub(crate) handle: i64,
41    pub(crate) signals: Vec<String>,
42    pub(crate) once: bool,
43    pub(crate) graceful_timeout_ms: Option<u64>,
44    pub(crate) handler: VmValue,
45}
46
47/// Call frame for function execution.
48pub(crate) struct CallFrame {
49    pub(crate) chunk: ChunkRef,
50    pub(crate) ip: usize,
51    pub(crate) stack_base: usize,
52    pub(crate) saved_env: VmEnv,
53    /// Env snapshot captured at call-time, *after* argument binding. Used
54    /// by the debugger's `restartFrame` to rewind this frame to its
55    /// entry state (re-binding args from the original values) without
56    /// re-entering the call site. Cheap to clone because `VmEnv` is
57    /// already cloned into `saved_env` on every call. `None` for
58    /// scratch frames (evaluate, import init) where restart isn't
59    /// meaningful.
60    pub(crate) initial_env: Option<VmEnv>,
61    pub(crate) initial_local_slots: Option<Vec<LocalSlot>>,
62    /// Iterator stack depth to restore when this frame unwinds.
63    pub(crate) saved_iterator_depth: usize,
64    /// Function name for stack traces (empty for top-level pipeline).
65    pub(crate) fn_name: String,
66    /// Number of arguments actually passed by the caller (for default arg support).
67    pub(crate) argc: usize,
68    /// Saved VM_SOURCE_DIR to restore when this frame is popped.
69    /// Set when entering a closure that originated from an imported module.
70    pub(crate) saved_source_dir: Option<std::path::PathBuf>,
71    /// Module-local named functions available to symbolic calls within this frame.
72    pub(crate) module_functions: Option<ModuleFunctionRegistry>,
73    /// Shared module-level env for top-level `var` / `let` bindings of
74    /// this frame's originating module. Looked up after `self.env` and
75    /// before `self.globals` by `GetVar` / `SetVar`, giving each module
76    /// its own live static state that persists across calls. See the
77    /// `module_state` field on `VmClosure` for the full rationale.
78    pub(crate) module_state: Option<crate::value::ModuleState>,
79    /// Slot-indexed locals for compiler-resolved names in this frame.
80    pub(crate) local_slots: Vec<LocalSlot>,
81    /// Env scope index that corresponds to compiler local scope depth 0.
82    pub(crate) local_scope_base: usize,
83    /// Current compiler local scope depth, updated by PushScope/PopScope.
84    pub(crate) local_scope_depth: usize,
85}
86
87/// Exception handler for try/catch.
88pub(crate) struct ExceptionHandler {
89    pub(crate) catch_ip: usize,
90    pub(crate) stack_depth: usize,
91    pub(crate) frame_depth: usize,
92    pub(crate) env_scope_depth: usize,
93    /// If non-empty, this catch only handles errors whose enum_name matches.
94    pub(crate) error_type: String,
95}
96
97/// Iterator state for for-in loops.
98pub(crate) enum IterState {
99    Vec {
100        items: Rc<Vec<VmValue>>,
101        idx: usize,
102    },
103    Dict {
104        entries: Rc<BTreeMap<String, VmValue>>,
105        keys: Vec<String>,
106        idx: usize,
107    },
108    Channel {
109        receiver: std::sync::Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
110        closed: std::sync::Arc<std::sync::atomic::AtomicBool>,
111    },
112    Generator {
113        gen: std::rc::Rc<crate::value::VmGenerator>,
114    },
115    Stream {
116        stream: std::rc::Rc<crate::value::VmStream>,
117    },
118    /// Step through a lazy range without materializing a Vec.
119    /// Inclusive ranges keep `end` as an actual value so `i64::MAX to i64::MAX`
120    /// still yields one item instead of overflowing a one-past-end sentinel.
121    Range {
122        next: i64,
123        end: i64,
124        inclusive: bool,
125        done: bool,
126    },
127    VmIter {
128        handle: std::rc::Rc<std::cell::RefCell<crate::vm::iter::VmIter>>,
129    },
130}
131
132#[derive(Clone)]
133pub(crate) enum VmBuiltinDispatch {
134    Sync(VmBuiltinFn),
135    Async(VmAsyncBuiltinFn),
136}
137
138#[derive(Clone)]
139pub(crate) struct VmBuiltinEntry {
140    pub(crate) name: Rc<str>,
141    pub(crate) dispatch: VmBuiltinDispatch,
142}
143
144pub(crate) type DeferredBuiltinRegistrar = fn(&mut Vm);
145
146/// The Harn bytecode virtual machine.
147pub struct Vm {
148    pub(crate) stack: Vec<VmValue>,
149    pub(crate) env: VmEnv,
150    pub(crate) output: String,
151    pub(crate) builtins: Rc<BTreeMap<String, VmBuiltinFn>>,
152    pub(crate) async_builtins: Rc<BTreeMap<String, VmAsyncBuiltinFn>>,
153    pub(crate) builtin_metadata: Rc<BTreeMap<String, VmBuiltinMetadata>>,
154    /// Numeric side index for builtins. Name-keyed maps remain authoritative;
155    /// this index is the hot path for direct builtin bytecode and callback refs.
156    pub(crate) builtins_by_id: Rc<BTreeMap<BuiltinId, VmBuiltinEntry>>,
157    /// IDs with detected name collisions. Collided names safely fall back to
158    /// the authoritative name-keyed lookup path.
159    pub(crate) builtin_id_collisions: Rc<HashSet<BuiltinId>>,
160    /// Builtins whose registration can be deferred until first use.
161    pub(crate) deferred_builtin_registrars: Rc<BTreeMap<String, DeferredBuiltinRegistrar>>,
162    /// Iterator state for for-in loops.
163    pub(crate) iterators: Vec<IterState>,
164    /// Call frame stack.
165    pub(crate) frames: Vec<CallFrame>,
166    /// Exception handler stack.
167    pub(crate) exception_handlers: Vec<ExceptionHandler>,
168    /// Spawned async task handles.
169    pub(crate) spawned_tasks: BTreeMap<String, VmTaskHandle>,
170    /// Shared process-local synchronization primitives inherited by child VMs.
171    pub(crate) sync_runtime: Arc<crate::synchronization::VmSyncRuntime>,
172    /// Shared process-local cells, maps, and mailboxes inherited by child VMs.
173    pub(crate) shared_state_runtime: Rc<crate::shared_state::VmSharedStateRuntime>,
174    /// Permits acquired by lexical synchronization blocks in this VM.
175    pub(crate) held_sync_guards: Vec<crate::synchronization::VmSyncHeldGuard>,
176    /// Counter for generating unique task IDs.
177    pub(crate) task_counter: u64,
178    /// Counter for logical runtime-context task groups.
179    pub(crate) runtime_context_counter: u64,
180    /// Logical runtime task context visible through `runtime_context()`.
181    pub(crate) runtime_context: crate::runtime_context::RuntimeContext,
182    /// Active deadline stack: (deadline_instant, frame_depth).
183    pub(crate) deadlines: Vec<(Instant, usize)>,
184    /// Breakpoints, keyed by source-file path so a breakpoint at line N
185    /// in `auto.harn` doesn't also fire when execution hits line N in an
186    /// imported lib. The empty-string key is a wildcard used by callers
187    /// that don't track source paths (legacy `set_breakpoints` API).
188    pub(crate) breakpoints: BTreeMap<String, std::collections::BTreeSet<usize>>,
189    /// Function-name breakpoints. Any closure call whose
190    /// `CompiledFunction.name` matches an entry here raises a stop on
191    /// entry, regardless of the call site's file or line. Lets the IDE
192    /// break on `llm_call` / `host_run_pipeline` / any user pipeline
193    /// function without pinning down a source location first.
194    pub(crate) function_breakpoints: std::collections::BTreeSet<String>,
195    /// Latched on `push_closure_frame` when the callee's name matches
196    /// `function_breakpoints`; consumed by the next step so the stop is
197    /// reported with reason="function breakpoint" and the breakpoint
198    /// name available for the DAP `stopped` event.
199    pub(crate) pending_function_bp: Option<String>,
200    /// Whether the VM is in step mode.
201    pub(crate) step_mode: bool,
202    /// The frame depth at which stepping started (for step-over).
203    pub(crate) step_frame_depth: usize,
204    /// Whether the VM is currently stopped at a debug point.
205    pub(crate) stopped: bool,
206    /// Last source line executed (to detect line changes).
207    pub(crate) last_line: usize,
208    /// Source directory for resolving imports.
209    pub(crate) source_dir: Option<std::path::PathBuf>,
210    /// Modules currently being imported (cycle prevention).
211    pub(crate) imported_paths: Vec<std::path::PathBuf>,
212    /// Loaded module cache keyed by canonical or synthetic module path.
213    pub(crate) module_cache: Rc<BTreeMap<std::path::PathBuf, LoadedModule>>,
214    /// Source text keyed by canonical or synthetic module path for debugger retrieval.
215    pub(crate) source_cache: Rc<BTreeMap<std::path::PathBuf, String>>,
216    /// Source file path for error reporting.
217    pub(crate) source_file: Option<String>,
218    /// Source text for error reporting.
219    pub(crate) source_text: Option<String>,
220    /// Optional bridge for delegating unknown builtins in bridge mode.
221    pub(crate) bridge: Option<Rc<crate::bridge::HostBridge>>,
222    /// Builtins denied by sandbox mode (`--deny` / `--allow` flags).
223    pub(crate) denied_builtins: Rc<HashSet<String>>,
224    /// Cancellation token for cooperative graceful shutdown (set by parent).
225    pub(crate) cancel_token: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
226    pub(crate) interrupt_signal_token: Option<std::sync::Arc<std::sync::Mutex<Option<String>>>>,
227    /// Remaining instruction-boundary checks before a requested host
228    /// cancellation is forcefully raised. This gives `is_cancelled()` loops a
229    /// deterministic chance to return cleanly without letting non-cooperative
230    /// CPU-bound code run forever.
231    pub(crate) cancel_grace_instructions_remaining: Option<usize>,
232    /// User-visible interrupt handlers registered through `std/signal`.
233    pub(crate) interrupt_handlers: Vec<InterruptHandler>,
234    pub(crate) next_interrupt_handle: i64,
235    pub(crate) pending_interrupt_signal: Option<String>,
236    pub(crate) interrupted: bool,
237    pub(crate) dispatching_interrupt: bool,
238    pub(crate) interrupt_handler_deadline: Option<Instant>,
239    /// Captured stack trace from the most recent error (fn_name, line, col).
240    pub(crate) error_stack_trace: Vec<(String, usize, usize, Option<String>)>,
241    /// Yield channel sender for generator execution. When set, `Op::Yield`
242    /// sends values through this channel instead of being a no-op.
243    pub(crate) yield_sender: Option<tokio::sync::mpsc::Sender<Result<VmValue, VmError>>>,
244    /// Project root directory (detected via harn.toml).
245    /// Used as base directory for metadata, store, and checkpoint operations.
246    pub(crate) project_root: Option<std::path::PathBuf>,
247    /// Global constants (e.g. `pi`, `e`). Checked as a fallback in `GetVar`
248    /// after the environment, so user-defined variables can shadow them.
249    pub(crate) globals: Rc<BTreeMap<String, VmValue>>,
250    /// Optional debugger hook invoked when execution advances to a new source line.
251    pub(crate) debug_hook: Option<Box<DebugHook>>,
252}
253
254/// Reusable VM baseline for hosts that need many clean executions with the
255/// same stable builtin/source setup.
256///
257/// The baseline intentionally does not snapshot execution state. Each
258/// instantiation gets fresh stacks, frames, tasks, cancellation fields, sync
259/// primitives, shared cells/maps/mailboxes, and debug state. Builtin tables are
260/// shared through `Rc` until a per-execution rebind needs copy-on-write.
261#[derive(Clone)]
262pub struct VmBaseline {
263    builtins: Rc<BTreeMap<String, VmBuiltinFn>>,
264    async_builtins: Rc<BTreeMap<String, VmAsyncBuiltinFn>>,
265    builtin_metadata: Rc<BTreeMap<String, VmBuiltinMetadata>>,
266    builtins_by_id: Rc<BTreeMap<BuiltinId, VmBuiltinEntry>>,
267    builtin_id_collisions: Rc<HashSet<BuiltinId>>,
268    deferred_builtin_registrars: Rc<BTreeMap<String, DeferredBuiltinRegistrar>>,
269    source_dir: Option<std::path::PathBuf>,
270    source_file: Option<String>,
271    source_text: Option<String>,
272    project_root: Option<std::path::PathBuf>,
273    globals: Rc<BTreeMap<String, VmValue>>,
274    denied_builtins: Rc<HashSet<String>>,
275}
276
277impl VmBaseline {
278    pub fn from_vm(vm: &Vm) -> Self {
279        Self {
280            builtins: Rc::clone(&vm.builtins),
281            async_builtins: Rc::clone(&vm.async_builtins),
282            builtin_metadata: Rc::clone(&vm.builtin_metadata),
283            builtins_by_id: Rc::clone(&vm.builtins_by_id),
284            builtin_id_collisions: Rc::clone(&vm.builtin_id_collisions),
285            deferred_builtin_registrars: Rc::clone(&vm.deferred_builtin_registrars),
286            source_dir: vm.source_dir.clone(),
287            source_file: vm.source_file.clone(),
288            source_text: vm.source_text.clone(),
289            project_root: vm.project_root.clone(),
290            globals: Rc::clone(&vm.globals),
291            denied_builtins: Rc::clone(&vm.denied_builtins),
292        }
293    }
294
295    pub fn instantiate(&self) -> Vm {
296        let mut source_cache = BTreeMap::new();
297        if let (Some(file), Some(text)) = (&self.source_file, &self.source_text) {
298            source_cache.insert(std::path::PathBuf::from(file), text.clone());
299        }
300        if let Some(dir) = &self.source_dir {
301            crate::stdlib::set_thread_source_dir(dir);
302        }
303
304        let mut vm = Vm {
305            stack: Vec::with_capacity(256),
306            env: VmEnv::new(),
307            output: String::new(),
308            builtins: Rc::clone(&self.builtins),
309            async_builtins: Rc::clone(&self.async_builtins),
310            builtin_metadata: Rc::clone(&self.builtin_metadata),
311            builtins_by_id: Rc::clone(&self.builtins_by_id),
312            builtin_id_collisions: Rc::clone(&self.builtin_id_collisions),
313            deferred_builtin_registrars: Rc::clone(&self.deferred_builtin_registrars),
314            iterators: Vec::new(),
315            frames: Vec::new(),
316            exception_handlers: Vec::new(),
317            spawned_tasks: BTreeMap::new(),
318            sync_runtime: Arc::new(crate::synchronization::VmSyncRuntime::new()),
319            shared_state_runtime: Rc::new(crate::shared_state::VmSharedStateRuntime::new()),
320            held_sync_guards: Vec::new(),
321            task_counter: 0,
322            runtime_context_counter: 0,
323            runtime_context: crate::runtime_context::RuntimeContext::root(),
324            deadlines: Vec::new(),
325            breakpoints: BTreeMap::new(),
326            function_breakpoints: std::collections::BTreeSet::new(),
327            pending_function_bp: None,
328            step_mode: false,
329            step_frame_depth: 0,
330            stopped: false,
331            last_line: 0,
332            source_dir: self.source_dir.clone(),
333            imported_paths: Vec::new(),
334            module_cache: Rc::new(BTreeMap::new()),
335            source_cache: Rc::new(source_cache),
336            source_file: self.source_file.clone(),
337            source_text: self.source_text.clone(),
338            bridge: None,
339            denied_builtins: Rc::clone(&self.denied_builtins),
340            cancel_token: None,
341            interrupt_signal_token: None,
342            cancel_grace_instructions_remaining: None,
343            interrupt_handlers: Vec::new(),
344            next_interrupt_handle: 1,
345            pending_interrupt_signal: None,
346            interrupted: false,
347            dispatching_interrupt: false,
348            interrupt_handler_deadline: None,
349            error_stack_trace: Vec::new(),
350            yield_sender: None,
351            project_root: self.project_root.clone(),
352            globals: Rc::clone(&self.globals),
353            debug_hook: None,
354        };
355
356        crate::stdlib::rebind_execution_state_builtins(&mut vm);
357        vm
358    }
359}
360
361impl Vm {
362    pub(crate) fn fresh_local_slots(chunk: &Chunk) -> Vec<LocalSlot> {
363        chunk
364            .local_slots
365            .iter()
366            .map(|_| LocalSlot {
367                value: VmValue::Nil,
368                initialized: false,
369                synced: false,
370            })
371            .collect()
372    }
373
374    pub(crate) fn bind_param_slots(
375        slots: &mut [LocalSlot],
376        func: &crate::chunk::CompiledFunction,
377        args: &[VmValue],
378        synced: bool,
379    ) {
380        Self::bind_param_slots_args(slots, func, &super::CallArgs::Slice(args), synced);
381    }
382
383    pub(crate) fn bind_param_slots_args(
384        slots: &mut [LocalSlot],
385        func: &crate::chunk::CompiledFunction,
386        args: &super::CallArgs<'_>,
387        synced: bool,
388    ) {
389        let param_count = func.params.len();
390        for (i, _param) in func.params.iter().enumerate() {
391            if i >= slots.len() {
392                break;
393            }
394            if func.has_rest_param && i == param_count - 1 {
395                let rest_args = args.to_vec_from(i);
396                slots[i].value = VmValue::List(Rc::new(rest_args));
397                slots[i].initialized = true;
398                slots[i].synced = synced;
399            } else if let Some(arg) = args.get(i) {
400                slots[i].value = arg.clone();
401                slots[i].initialized = true;
402                slots[i].synced = synced;
403            }
404        }
405    }
406
407    pub(crate) fn visible_variables(&self) -> BTreeMap<String, VmValue> {
408        let mut vars = self.env.all_variables();
409        let Some(frame) = self.frames.last() else {
410            return vars;
411        };
412        for (slot, info) in frame.local_slots.iter().zip(frame.chunk.local_slots.iter()) {
413            if slot.initialized && info.scope_depth <= frame.local_scope_depth {
414                vars.insert(info.name.clone(), slot.value.clone());
415            }
416        }
417        vars
418    }
419
420    pub(crate) fn sync_current_frame_locals_to_env(&mut self) {
421        let frames = &mut self.frames;
422        let env = &mut self.env;
423        let Some(frame) = frames.last_mut() else {
424            return;
425        };
426        let local_scope_base = frame.local_scope_base;
427        let local_scope_depth = frame.local_scope_depth;
428        for (slot, info) in frame
429            .local_slots
430            .iter_mut()
431            .zip(frame.chunk.local_slots.iter())
432        {
433            if slot.initialized && !slot.synced && info.scope_depth <= local_scope_depth {
434                slot.synced = true;
435                let scope_idx = local_scope_base + info.scope_depth;
436                while env.scopes.len() <= scope_idx {
437                    env.push_scope();
438                }
439                Rc::make_mut(&mut env.scopes[scope_idx].vars)
440                    .insert(info.name.clone(), (slot.value.clone(), info.mutable));
441            }
442        }
443    }
444
445    pub(crate) fn closure_call_env_for_current_frame(
446        &self,
447        closure: &crate::value::VmClosure,
448    ) -> VmEnv {
449        if closure.module_state.is_some() {
450            return closure.env.clone();
451        }
452        let mut call_env = Self::closure_call_env(&self.env, closure);
453        let Some(frame) = self.frames.last() else {
454            return call_env;
455        };
456        for (slot, info) in frame
457            .local_slots
458            .iter()
459            .zip(frame.chunk.local_slots.iter())
460            .filter(|(slot, info)| slot.initialized && info.scope_depth <= frame.local_scope_depth)
461        {
462            if matches!(slot.value, VmValue::Closure(_)) && !call_env.contains(&info.name) {
463                let _ = call_env.define(&info.name, slot.value.clone(), info.mutable);
464            }
465        }
466        call_env
467    }
468
469    pub(crate) fn active_local_slot_value(&self, name: &str) -> Option<VmValue> {
470        let frame = self.frames.last()?;
471        let idx = self.active_local_slot_index(name)?;
472        frame.local_slots.get(idx).map(|slot| slot.value.clone())
473    }
474
475    /// Returns the slot index of an initialized active local with the given
476    /// name, walking from innermost to outermost scope. Used by hot paths
477    /// (subscript-store, etc.) that want to mutate the slot value in place
478    /// without paying a defensive `VmValue::clone` first.
479    pub(crate) fn active_local_slot_index(&self, name: &str) -> Option<usize> {
480        let frame = self.frames.last()?;
481        for (idx, info) in frame.chunk.local_slots.iter().enumerate().rev() {
482            if info.name == name && info.scope_depth <= frame.local_scope_depth {
483                if let Some(slot) = frame.local_slots.get(idx) {
484                    if slot.initialized {
485                        return Some(idx);
486                    }
487                }
488            }
489        }
490        None
491    }
492
493    pub(crate) fn assign_active_local_slot(
494        &mut self,
495        name: &str,
496        value: VmValue,
497        debug: bool,
498    ) -> Result<bool, VmError> {
499        let Some(frame) = self.frames.last_mut() else {
500            return Ok(false);
501        };
502        for (idx, info) in frame.chunk.local_slots.iter().enumerate().rev() {
503            if info.name == name && info.scope_depth <= frame.local_scope_depth {
504                if !debug && !info.mutable {
505                    return Err(VmError::ImmutableAssignment(name.to_string()));
506                }
507                if let Some(slot) = frame.local_slots.get_mut(idx) {
508                    slot.value = value;
509                    slot.initialized = true;
510                    slot.synced = false;
511                    return Ok(true);
512                }
513            }
514        }
515        Ok(false)
516    }
517
518    pub fn new() -> Self {
519        Self {
520            stack: Vec::with_capacity(256),
521            env: VmEnv::new(),
522            output: String::new(),
523            builtins: Rc::new(BTreeMap::new()),
524            async_builtins: Rc::new(BTreeMap::new()),
525            builtin_metadata: Rc::new(BTreeMap::new()),
526            builtins_by_id: Rc::new(BTreeMap::new()),
527            builtin_id_collisions: Rc::new(HashSet::new()),
528            deferred_builtin_registrars: Rc::new(BTreeMap::new()),
529            iterators: Vec::new(),
530            frames: Vec::new(),
531            exception_handlers: Vec::new(),
532            spawned_tasks: BTreeMap::new(),
533            sync_runtime: Arc::new(crate::synchronization::VmSyncRuntime::new()),
534            shared_state_runtime: Rc::new(crate::shared_state::VmSharedStateRuntime::new()),
535            held_sync_guards: Vec::new(),
536            task_counter: 0,
537            runtime_context_counter: 0,
538            runtime_context: crate::runtime_context::RuntimeContext::root(),
539            deadlines: Vec::new(),
540            breakpoints: BTreeMap::new(),
541            function_breakpoints: std::collections::BTreeSet::new(),
542            pending_function_bp: None,
543            step_mode: false,
544            step_frame_depth: 0,
545            stopped: false,
546            last_line: 0,
547            source_dir: None,
548            imported_paths: Vec::new(),
549            module_cache: Rc::new(BTreeMap::new()),
550            source_cache: Rc::new(BTreeMap::new()),
551            source_file: None,
552            source_text: None,
553            bridge: None,
554            denied_builtins: Rc::new(HashSet::new()),
555            cancel_token: None,
556            interrupt_signal_token: None,
557            cancel_grace_instructions_remaining: None,
558            interrupt_handlers: Vec::new(),
559            next_interrupt_handle: 1,
560            pending_interrupt_signal: None,
561            interrupted: false,
562            dispatching_interrupt: false,
563            interrupt_handler_deadline: None,
564            error_stack_trace: Vec::new(),
565            yield_sender: None,
566            project_root: None,
567            globals: Rc::new(BTreeMap::new()),
568            debug_hook: None,
569        }
570    }
571
572    pub fn baseline(&self) -> VmBaseline {
573        VmBaseline::from_vm(self)
574    }
575
576    /// Returns true if any debugging affordance is active — DAP hook,
577    /// line breakpoints, or function breakpoints. Call-site code uses
578    /// this to decide whether to capture per-frame restart snapshots
579    /// (`initial_env`, `initial_local_slots`); without a debugger those
580    /// snapshots are dead weight, so skipping them removes two
581    /// allocations from every function call hot path.
582    ///
583    /// All three signals are stable across a function call's lifetime
584    /// (they're set before pipeline execution starts), so the gate is
585    /// consistent between frame creation and any later `restart_frame`
586    /// invocation. The three `is_empty` checks compile to a handful of
587    /// branch-predicted memory probes — cheaper than a single
588    /// `BTreeMap` clone, which is what we're avoiding.
589    #[inline]
590    pub(crate) fn debugger_attached(&self) -> bool {
591        self.debug_hook.is_some()
592            || !self.breakpoints.is_empty()
593            || !self.function_breakpoints.is_empty()
594    }
595
596    /// Set the bridge for delegating unknown builtins in bridge mode.
597    pub fn set_bridge(&mut self, bridge: Rc<crate::bridge::HostBridge>) {
598        self.bridge = Some(bridge);
599    }
600
601    /// Set builtins that are denied in sandbox mode.
602    /// When called, the given builtin names will produce a permission error.
603    pub fn set_denied_builtins(&mut self, denied: HashSet<String>) {
604        self.denied_builtins = Rc::new(denied);
605    }
606
607    /// Set source info for error reporting (file path and source text).
608    pub fn set_source_info(&mut self, file: &str, text: &str) {
609        self.source_file = Some(file.to_string());
610        self.source_text = Some(text.to_string());
611        Rc::make_mut(&mut self.source_cache)
612            .insert(std::path::PathBuf::from(file), text.to_string());
613    }
614
615    /// Initialize execution (push the initial frame).
616    pub fn start(&mut self, chunk: &Chunk) {
617        // The top-level pipeline frame captures env at start so
618        // restartFrame on the outermost frame rewinds to the
619        // pre-pipeline state — basically "restart session" in
620        // debugger terms. Skipped when no debugger is attached:
621        // the snapshot is dead weight in that case and dominates
622        // call-overhead bench numbers (~5-10%).
623        let debugger = self.debugger_attached();
624        let initial_env = if debugger {
625            Some(self.env.clone())
626        } else {
627            None
628        };
629        let initial_local_slots = if debugger {
630            Some(Self::fresh_local_slots(chunk))
631        } else {
632            None
633        };
634        self.frames.push(CallFrame {
635            chunk: Rc::new(chunk.clone()),
636            ip: 0,
637            stack_base: self.stack.len(),
638            saved_env: self.env.clone(),
639            initial_env,
640            initial_local_slots,
641            saved_iterator_depth: self.iterators.len(),
642            fn_name: String::new(),
643            argc: 0,
644            saved_source_dir: None,
645            module_functions: None,
646            module_state: None,
647            local_slots: Self::fresh_local_slots(chunk),
648            local_scope_base: self.env.scope_depth().saturating_sub(1),
649            local_scope_depth: 0,
650        });
651    }
652
653    /// Create a child VM that shares builtins and env but has fresh execution state.
654    /// Used for parallel/spawn to fork the VM for concurrent tasks.
655    pub(crate) fn child_vm(&self) -> Vm {
656        Vm {
657            stack: Vec::with_capacity(64),
658            env: self.env.clone(),
659            output: String::new(),
660            builtins: Rc::clone(&self.builtins),
661            async_builtins: Rc::clone(&self.async_builtins),
662            builtin_metadata: Rc::clone(&self.builtin_metadata),
663            builtins_by_id: Rc::clone(&self.builtins_by_id),
664            builtin_id_collisions: Rc::clone(&self.builtin_id_collisions),
665            deferred_builtin_registrars: Rc::clone(&self.deferred_builtin_registrars),
666            iterators: Vec::new(),
667            frames: Vec::new(),
668            exception_handlers: Vec::new(),
669            spawned_tasks: BTreeMap::new(),
670            sync_runtime: self.sync_runtime.clone(),
671            shared_state_runtime: self.shared_state_runtime.clone(),
672            held_sync_guards: Vec::new(),
673            task_counter: 0,
674            runtime_context_counter: self.runtime_context_counter,
675            runtime_context: self.runtime_context.clone(),
676            deadlines: self.deadlines.clone(),
677            breakpoints: BTreeMap::new(),
678            function_breakpoints: std::collections::BTreeSet::new(),
679            pending_function_bp: None,
680            step_mode: false,
681            step_frame_depth: 0,
682            stopped: false,
683            last_line: 0,
684            source_dir: self.source_dir.clone(),
685            imported_paths: Vec::new(),
686            module_cache: Rc::clone(&self.module_cache),
687            source_cache: Rc::clone(&self.source_cache),
688            source_file: self.source_file.clone(),
689            source_text: self.source_text.clone(),
690            bridge: self.bridge.clone(),
691            denied_builtins: Rc::clone(&self.denied_builtins),
692            cancel_token: self.cancel_token.clone(),
693            interrupt_signal_token: self.interrupt_signal_token.clone(),
694            cancel_grace_instructions_remaining: None,
695            interrupt_handlers: Vec::new(),
696            next_interrupt_handle: 1,
697            pending_interrupt_signal: None,
698            interrupted: self.interrupted,
699            dispatching_interrupt: false,
700            interrupt_handler_deadline: None,
701            error_stack_trace: Vec::new(),
702            yield_sender: None,
703            project_root: self.project_root.clone(),
704            globals: Rc::clone(&self.globals),
705            debug_hook: None,
706        }
707    }
708
709    /// Create a child VM for external adapters that need to invoke Harn
710    /// closures while sharing the parent's builtins, globals, and module state.
711    pub(crate) fn child_vm_for_host(&self) -> Vm {
712        self.child_vm()
713    }
714
715    /// Request cancellation for every outstanding child task owned by this VM
716    /// and then abort the join handles. This prevents un-awaited spawned tasks
717    /// from outliving their parent execution scope.
718    pub(crate) fn cancel_spawned_tasks(&mut self) {
719        for (_, task) in std::mem::take(&mut self.spawned_tasks) {
720            task.cancel_token
721                .store(true, std::sync::atomic::Ordering::SeqCst);
722            task.handle.abort();
723        }
724    }
725
726    /// Set the source directory for import resolution and introspection.
727    /// Also auto-detects the project root if not already set.
728    pub fn set_source_dir(&mut self, dir: &std::path::Path) {
729        let dir = crate::stdlib::process::normalize_context_path(dir);
730        self.source_dir = Some(dir.clone());
731        crate::stdlib::set_thread_source_dir(&dir);
732        // Auto-detect project root if not explicitly set.
733        if self.project_root.is_none() {
734            self.project_root = crate::stdlib::process::find_project_root(&dir);
735        }
736    }
737
738    /// Explicitly set the project root directory.
739    /// Used by ACP/CLI to override auto-detection.
740    pub fn set_project_root(&mut self, root: &std::path::Path) {
741        self.project_root = Some(root.to_path_buf());
742    }
743
744    /// Get the project root directory, falling back to source_dir.
745    pub fn project_root(&self) -> Option<&std::path::Path> {
746        self.project_root.as_deref().or(self.source_dir.as_deref())
747    }
748
749    /// Return all registered builtin names (sync + async).
750    pub fn builtin_names(&self) -> Vec<String> {
751        let mut names: Vec<String> = self.builtins.keys().cloned().collect();
752        names.extend(self.async_builtins.keys().cloned());
753        names
754    }
755
756    /// Return discoverable metadata for registered builtins.
757    pub fn builtin_metadata(&self) -> Vec<VmBuiltinMetadata> {
758        self.builtin_metadata.values().cloned().collect()
759    }
760
761    /// Return discoverable metadata for a registered builtin name.
762    pub fn builtin_metadata_for(&self, name: &str) -> Option<&VmBuiltinMetadata> {
763        self.builtin_metadata.get(name)
764    }
765
766    /// Set a global constant (e.g. `pi`, `e`).
767    /// Stored separately from the environment so user-defined variables can shadow them.
768    pub fn set_global(&mut self, name: &str, value: VmValue) {
769        Rc::make_mut(&mut self.globals).insert(name.to_string(), value);
770    }
771
772    /// Install the script's `Harness` capability handle as the `harness`
773    /// global so the auto-call emitted by `Compiler::compile()` (for
774    /// `fn main(harness: Harness)` entrypoints) can read it. Hosts that
775    /// drive the VM directly (CLI, MCP server, composition runtime) call
776    /// this once before `execute()`.
777    pub fn set_harness(&mut self, harness: crate::harness::Harness) {
778        self.set_global("harness", harness.into_vm_value());
779    }
780
781    /// Get the captured output.
782    pub fn output(&self) -> &str {
783        &self.output
784    }
785
786    /// Drain and return the captured output, leaving the buffer empty.
787    /// Used by the async-builtin dispatch path to forward closure output
788    /// from a child VM back to its parent.
789    pub fn take_output(&mut self) -> String {
790        std::mem::take(&mut self.output)
791    }
792
793    /// Append text to this VM's captured output. Used to forward output
794    /// from child VMs (e.g. closures invoked via `call_closure_pub`)
795    /// back into the parent stream.
796    pub fn append_output(&mut self, text: &str) {
797        self.output.push_str(text);
798    }
799
800    pub(crate) fn pop(&mut self) -> Result<VmValue, VmError> {
801        self.stack.pop().ok_or(VmError::StackUnderflow)
802    }
803
804    pub(crate) fn peek(&self) -> Result<&VmValue, VmError> {
805        self.stack.last().ok_or(VmError::StackUnderflow)
806    }
807
808    pub(crate) fn const_string(c: &Constant) -> Result<String, VmError> {
809        match c {
810            Constant::String(s) => Ok(s.clone()),
811            _ => Err(VmError::TypeError("expected string constant".into())),
812        }
813    }
814
815    pub(crate) fn const_str(c: &Constant) -> Result<&str, VmError> {
816        match c {
817            Constant::String(s) => Ok(s.as_str()),
818            _ => Err(VmError::TypeError("expected string constant".into())),
819        }
820    }
821
822    pub(crate) fn release_sync_guards_for_current_scope(&mut self) {
823        let depth = self.env.scope_depth();
824        self.held_sync_guards
825            .retain(|guard| guard.env_scope_depth < depth);
826    }
827
828    pub(crate) fn release_sync_guards_after_unwind(
829        &mut self,
830        frame_depth: usize,
831        env_scope_depth: usize,
832    ) {
833        self.held_sync_guards.retain(|guard| {
834            guard.frame_depth <= frame_depth && guard.env_scope_depth <= env_scope_depth
835        });
836    }
837
838    pub(crate) fn release_sync_guards_for_frame(&mut self, frame_depth: usize) {
839        self.held_sync_guards
840            .retain(|guard| guard.frame_depth != frame_depth);
841    }
842}
843
844impl Drop for Vm {
845    fn drop(&mut self) {
846        self.cancel_spawned_tasks();
847    }
848}
849
850impl Default for Vm {
851    fn default() -> Self {
852        Self::new()
853    }
854}
855
856#[cfg(test)]
857mod tests {
858    use std::rc::Rc;
859
860    use super::*;
861
862    fn baseline_with_stdlib(source: &str) -> VmBaseline {
863        let mut vm = Vm::new();
864        crate::register_vm_stdlib(&mut vm);
865        vm.set_source_info("baseline_test.harn", source);
866        vm.set_global("stable_global", VmValue::String(Rc::from("baseline")));
867        vm.baseline()
868    }
869
870    #[test]
871    fn vm_baseline_instantiates_clean_mutable_execution_state() {
872        let baseline = baseline_with_stdlib("pipeline main() { __io_println(stable_global) }");
873
874        let mut dirty = baseline.instantiate();
875        dirty.stack.push(VmValue::Int(42));
876        dirty.output.push_str("dirty");
877        dirty.task_counter = 9;
878        dirty.runtime_context_counter = 7;
879        dirty
880            .error_stack_trace
881            .push(("main".to_string(), 1, 1, None));
882
883        let clean = baseline.instantiate();
884        assert!(clean.stack.is_empty());
885        assert!(clean.output.is_empty());
886        assert!(clean.frames.is_empty());
887        assert!(clean.exception_handlers.is_empty());
888        assert!(clean.spawned_tasks.is_empty());
889        assert!(clean.held_sync_guards.is_empty());
890        assert_eq!(clean.task_counter, 0);
891        assert_eq!(clean.runtime_context_counter, 0);
892        assert!(clean.deadlines.is_empty());
893        assert!(clean.cancel_token.is_none());
894        assert!(clean.interrupt_handlers.is_empty());
895        assert!(clean.error_stack_trace.is_empty());
896        assert!(clean.bridge.is_none());
897        assert!(clean
898            .globals
899            .get("stable_global")
900            .is_some_and(|value| value.display() == "baseline"));
901    }
902
903    #[tokio::test(flavor = "current_thread")]
904    async fn vm_baseline_rebinds_shared_state_builtins_per_instance() {
905        let local = tokio::task::LocalSet::new();
906        local
907            .run_until(async {
908                let source = r#"
909pipeline main() {
910  let cell = shared_cell({scope: "task_group", key: "turn", initial: 0})
911  __io_println(shared_get(cell))
912  shared_set(cell, shared_get(cell) + 1)
913}"#;
914                let chunk = crate::compile_source(source).expect("compile");
915                let baseline = baseline_with_stdlib(source);
916
917                let mut first = baseline.instantiate();
918                first.execute(&chunk).await.expect("first execute");
919                assert_eq!(first.output(), "0\n");
920
921                let mut second = baseline.instantiate();
922                second.execute(&chunk).await.expect("second execute");
923                assert_eq!(
924                    second.output(),
925                    "0\n",
926                    "shared state created by the first VM must not leak into the next baseline instance"
927                );
928            })
929            .await;
930    }
931}