Skip to main content

harn_vm/vm/
state.rs

1use std::collections::{BTreeMap, HashSet};
2use std::rc::Rc;
3use std::sync::Arc;
4use std::time::Instant;
5
6use crate::chunk::{Chunk, ChunkRef, Constant};
7use crate::value::{
8    ModuleFunctionRegistry, VmAsyncBuiltinFn, VmBuiltinFn, VmEnv, VmError, VmTaskHandle, VmValue,
9};
10use crate::BuiltinId;
11
12use super::debug::DebugHook;
13use super::modules::LoadedModule;
14
15/// RAII guard that starts a tracing span on creation and ends it on drop.
16pub(crate) struct ScopeSpan(u64);
17
18impl ScopeSpan {
19    pub(crate) fn new(kind: crate::tracing::SpanKind, name: String) -> Self {
20        Self(crate::tracing::span_start(kind, name))
21    }
22}
23
24impl Drop for ScopeSpan {
25    fn drop(&mut self) {
26        crate::tracing::span_end(self.0);
27    }
28}
29
30#[derive(Clone)]
31pub(crate) struct LocalSlot {
32    pub(crate) value: VmValue,
33    pub(crate) initialized: bool,
34    pub(crate) synced: bool,
35}
36
37/// Call frame for function execution.
38pub(crate) struct CallFrame {
39    pub(crate) chunk: ChunkRef,
40    pub(crate) ip: usize,
41    pub(crate) stack_base: usize,
42    pub(crate) saved_env: VmEnv,
43    /// Env snapshot captured at call-time, *after* argument binding. Used
44    /// by the debugger's `restartFrame` to rewind this frame to its
45    /// entry state (re-binding args from the original values) without
46    /// re-entering the call site. Cheap to clone because `VmEnv` is
47    /// already cloned into `saved_env` on every call. `None` for
48    /// scratch frames (evaluate, import init) where restart isn't
49    /// meaningful.
50    pub(crate) initial_env: Option<VmEnv>,
51    pub(crate) initial_local_slots: Option<Vec<LocalSlot>>,
52    /// Iterator stack depth to restore when this frame unwinds.
53    pub(crate) saved_iterator_depth: usize,
54    /// Function name for stack traces (empty for top-level pipeline).
55    pub(crate) fn_name: String,
56    /// Number of arguments actually passed by the caller (for default arg support).
57    pub(crate) argc: usize,
58    /// Saved VM_SOURCE_DIR to restore when this frame is popped.
59    /// Set when entering a closure that originated from an imported module.
60    pub(crate) saved_source_dir: Option<std::path::PathBuf>,
61    /// Module-local named functions available to symbolic calls within this frame.
62    pub(crate) module_functions: Option<ModuleFunctionRegistry>,
63    /// Shared module-level env for top-level `var` / `let` bindings of
64    /// this frame's originating module. Looked up after `self.env` and
65    /// before `self.globals` by `GetVar` / `SetVar`, giving each module
66    /// its own live static state that persists across calls. See the
67    /// `module_state` field on `VmClosure` for the full rationale.
68    pub(crate) module_state: Option<crate::value::ModuleState>,
69    /// Slot-indexed locals for compiler-resolved names in this frame.
70    pub(crate) local_slots: Vec<LocalSlot>,
71    /// Env scope index that corresponds to compiler local scope depth 0.
72    pub(crate) local_scope_base: usize,
73    /// Current compiler local scope depth, updated by PushScope/PopScope.
74    pub(crate) local_scope_depth: usize,
75}
76
77/// Exception handler for try/catch.
78pub(crate) struct ExceptionHandler {
79    pub(crate) catch_ip: usize,
80    pub(crate) stack_depth: usize,
81    pub(crate) frame_depth: usize,
82    pub(crate) env_scope_depth: usize,
83    /// If non-empty, this catch only handles errors whose enum_name matches.
84    pub(crate) error_type: String,
85}
86
87/// Iterator state for for-in loops.
88pub(crate) enum IterState {
89    Vec {
90        items: Rc<Vec<VmValue>>,
91        idx: usize,
92    },
93    Dict {
94        entries: Rc<BTreeMap<String, VmValue>>,
95        keys: Vec<String>,
96        idx: usize,
97    },
98    Channel {
99        receiver: std::sync::Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
100        closed: std::sync::Arc<std::sync::atomic::AtomicBool>,
101    },
102    Generator {
103        gen: crate::value::VmGenerator,
104    },
105    /// Step through a lazy range without materializing a Vec.
106    /// `next` holds the value to emit on the next IterNext; `stop` is
107    /// the first value that terminates the iteration (one past the end).
108    Range {
109        next: i64,
110        stop: i64,
111    },
112    VmIter {
113        handle: std::rc::Rc<std::cell::RefCell<crate::vm::iter::VmIter>>,
114    },
115}
116
117#[derive(Clone)]
118pub(crate) enum VmBuiltinDispatch {
119    Sync(VmBuiltinFn),
120    Async(VmAsyncBuiltinFn),
121}
122
123#[derive(Clone)]
124pub(crate) struct VmBuiltinEntry {
125    pub(crate) name: Rc<str>,
126    pub(crate) dispatch: VmBuiltinDispatch,
127}
128
129/// The Harn bytecode virtual machine.
130pub struct Vm {
131    pub(crate) stack: Vec<VmValue>,
132    pub(crate) env: VmEnv,
133    pub(crate) output: String,
134    pub(crate) builtins: BTreeMap<String, VmBuiltinFn>,
135    pub(crate) async_builtins: BTreeMap<String, VmAsyncBuiltinFn>,
136    /// Numeric side index for builtins. Name-keyed maps remain authoritative;
137    /// this index is the hot path for direct builtin bytecode and callback refs.
138    pub(crate) builtins_by_id: BTreeMap<BuiltinId, VmBuiltinEntry>,
139    /// IDs with detected name collisions. Collided names safely fall back to
140    /// the authoritative name-keyed lookup path.
141    pub(crate) builtin_id_collisions: HashSet<BuiltinId>,
142    /// Iterator state for for-in loops.
143    pub(crate) iterators: Vec<IterState>,
144    /// Call frame stack.
145    pub(crate) frames: Vec<CallFrame>,
146    /// Exception handler stack.
147    pub(crate) exception_handlers: Vec<ExceptionHandler>,
148    /// Spawned async task handles.
149    pub(crate) spawned_tasks: BTreeMap<String, VmTaskHandle>,
150    /// Shared process-local synchronization primitives inherited by child VMs.
151    pub(crate) sync_runtime: Arc<crate::synchronization::VmSyncRuntime>,
152    /// Shared process-local cells, maps, and mailboxes inherited by child VMs.
153    pub(crate) shared_state_runtime: Rc<crate::shared_state::VmSharedStateRuntime>,
154    /// Permits acquired by lexical synchronization blocks in this VM.
155    pub(crate) held_sync_guards: Vec<crate::synchronization::VmSyncHeldGuard>,
156    /// Counter for generating unique task IDs.
157    pub(crate) task_counter: u64,
158    /// Counter for logical runtime-context task groups.
159    pub(crate) runtime_context_counter: u64,
160    /// Logical runtime task context visible through `runtime_context()`.
161    pub(crate) runtime_context: crate::runtime_context::RuntimeContext,
162    /// Active deadline stack: (deadline_instant, frame_depth).
163    pub(crate) deadlines: Vec<(Instant, usize)>,
164    /// Breakpoints, keyed by source-file path so a breakpoint at line N
165    /// in `auto.harn` doesn't also fire when execution hits line N in an
166    /// imported lib. The empty-string key is a wildcard used by callers
167    /// that don't track source paths (legacy `set_breakpoints` API).
168    pub(crate) breakpoints: BTreeMap<String, std::collections::BTreeSet<usize>>,
169    /// Function-name breakpoints. Any closure call whose
170    /// `CompiledFunction.name` matches an entry here raises a stop on
171    /// entry, regardless of the call site's file or line. Lets the IDE
172    /// break on `llm_call` / `host_run_pipeline` / any user pipeline
173    /// function without pinning down a source location first.
174    pub(crate) function_breakpoints: std::collections::BTreeSet<String>,
175    /// Latched on `push_closure_frame` when the callee's name matches
176    /// `function_breakpoints`; consumed by the next step so the stop is
177    /// reported with reason="function breakpoint" and the breakpoint
178    /// name available for the DAP `stopped` event.
179    pub(crate) pending_function_bp: Option<String>,
180    /// Whether the VM is in step mode.
181    pub(crate) step_mode: bool,
182    /// The frame depth at which stepping started (for step-over).
183    pub(crate) step_frame_depth: usize,
184    /// Whether the VM is currently stopped at a debug point.
185    pub(crate) stopped: bool,
186    /// Last source line executed (to detect line changes).
187    pub(crate) last_line: usize,
188    /// Source directory for resolving imports.
189    pub(crate) source_dir: Option<std::path::PathBuf>,
190    /// Modules currently being imported (cycle prevention).
191    pub(crate) imported_paths: Vec<std::path::PathBuf>,
192    /// Loaded module cache keyed by canonical or synthetic module path.
193    pub(crate) module_cache: BTreeMap<std::path::PathBuf, LoadedModule>,
194    /// Source file path for error reporting.
195    pub(crate) source_file: Option<String>,
196    /// Source text for error reporting.
197    pub(crate) source_text: Option<String>,
198    /// Optional bridge for delegating unknown builtins in bridge mode.
199    pub(crate) bridge: Option<Rc<crate::bridge::HostBridge>>,
200    /// Builtins denied by sandbox mode (`--deny` / `--allow` flags).
201    pub(crate) denied_builtins: HashSet<String>,
202    /// Cancellation token for cooperative graceful shutdown (set by parent).
203    pub(crate) cancel_token: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
204    /// Captured stack trace from the most recent error (fn_name, line, col).
205    pub(crate) error_stack_trace: Vec<(String, usize, usize, Option<String>)>,
206    /// Yield channel sender for generator execution. When set, `Op::Yield`
207    /// sends values through this channel instead of being a no-op.
208    pub(crate) yield_sender: Option<tokio::sync::mpsc::Sender<VmValue>>,
209    /// Project root directory (detected via harn.toml).
210    /// Used as base directory for metadata, store, and checkpoint operations.
211    pub(crate) project_root: Option<std::path::PathBuf>,
212    /// Global constants (e.g. `pi`, `e`). Checked as a fallback in `GetVar`
213    /// after the environment, so user-defined variables can shadow them.
214    pub(crate) globals: BTreeMap<String, VmValue>,
215    /// Optional debugger hook invoked when execution advances to a new source line.
216    pub(crate) debug_hook: Option<Box<DebugHook>>,
217}
218
219impl Vm {
220    pub(crate) fn fresh_local_slots(chunk: &Chunk) -> Vec<LocalSlot> {
221        chunk
222            .local_slots
223            .iter()
224            .map(|_| LocalSlot {
225                value: VmValue::Nil,
226                initialized: false,
227                synced: false,
228            })
229            .collect()
230    }
231
232    pub(crate) fn bind_param_slots(
233        slots: &mut [LocalSlot],
234        func: &crate::chunk::CompiledFunction,
235        args: &[VmValue],
236        synced: bool,
237    ) {
238        let default_start = func.default_start.unwrap_or(func.params.len());
239        let param_count = func.params.len();
240        for (i, _param) in func.params.iter().enumerate() {
241            if i >= slots.len() {
242                break;
243            }
244            if func.has_rest_param && i == param_count - 1 {
245                let rest_args = if i < args.len() {
246                    args[i..].to_vec()
247                } else {
248                    Vec::new()
249                };
250                slots[i].value = VmValue::List(Rc::new(rest_args));
251                slots[i].initialized = true;
252                slots[i].synced = synced;
253            } else if i < args.len() {
254                slots[i].value = args[i].clone();
255                slots[i].initialized = true;
256                slots[i].synced = synced;
257            } else if i < default_start {
258                slots[i].value = VmValue::Nil;
259                slots[i].initialized = true;
260                slots[i].synced = synced;
261            }
262        }
263    }
264
265    pub(crate) fn visible_variables(&self) -> BTreeMap<String, VmValue> {
266        let mut vars = self.env.all_variables();
267        let Some(frame) = self.frames.last() else {
268            return vars;
269        };
270        for (slot, info) in frame.local_slots.iter().zip(frame.chunk.local_slots.iter()) {
271            if slot.initialized && info.scope_depth <= frame.local_scope_depth {
272                vars.insert(info.name.clone(), slot.value.clone());
273            }
274        }
275        vars
276    }
277
278    pub(crate) fn sync_current_frame_locals_to_env(&mut self) {
279        let Some(frame) = self.frames.last_mut() else {
280            return;
281        };
282        let local_scope_base = frame.local_scope_base;
283        let local_scope_depth = frame.local_scope_depth;
284        let entries = frame
285            .local_slots
286            .iter_mut()
287            .zip(frame.chunk.local_slots.iter())
288            .filter_map(|(slot, info)| {
289                if slot.initialized && !slot.synced && info.scope_depth <= local_scope_depth {
290                    slot.synced = true;
291                    Some((
292                        local_scope_base + info.scope_depth,
293                        info.name.clone(),
294                        slot.value.clone(),
295                        info.mutable,
296                    ))
297                } else {
298                    None
299                }
300            })
301            .collect::<Vec<_>>();
302        for (scope_idx, name, value, mutable) in entries {
303            while self.env.scopes.len() <= scope_idx {
304                self.env.push_scope();
305            }
306            self.env.scopes[scope_idx]
307                .vars
308                .insert(name, (value, mutable));
309        }
310    }
311
312    pub(crate) fn closure_call_env_for_current_frame(
313        &self,
314        closure: &crate::value::VmClosure,
315    ) -> VmEnv {
316        if closure.module_state.is_some() {
317            return closure.env.clone();
318        }
319        let mut call_env = Self::closure_call_env(&self.env, closure);
320        let Some(frame) = self.frames.last() else {
321            return call_env;
322        };
323        for (slot, info) in frame
324            .local_slots
325            .iter()
326            .zip(frame.chunk.local_slots.iter())
327            .filter(|(slot, info)| slot.initialized && info.scope_depth <= frame.local_scope_depth)
328        {
329            if matches!(slot.value, VmValue::Closure(_)) && call_env.get(&info.name).is_none() {
330                let _ = call_env.define(&info.name, slot.value.clone(), info.mutable);
331            }
332        }
333        call_env
334    }
335
336    pub(crate) fn active_local_slot_value(&self, name: &str) -> Option<VmValue> {
337        let frame = self.frames.last()?;
338        for (idx, info) in frame.chunk.local_slots.iter().enumerate().rev() {
339            if info.name == name && info.scope_depth <= frame.local_scope_depth {
340                let slot = frame.local_slots.get(idx)?;
341                if slot.initialized {
342                    return Some(slot.value.clone());
343                }
344            }
345        }
346        None
347    }
348
349    pub(crate) fn assign_active_local_slot(
350        &mut self,
351        name: &str,
352        value: VmValue,
353        debug: bool,
354    ) -> Result<bool, VmError> {
355        let Some(frame) = self.frames.last_mut() else {
356            return Ok(false);
357        };
358        for (idx, info) in frame.chunk.local_slots.iter().enumerate().rev() {
359            if info.name == name && info.scope_depth <= frame.local_scope_depth {
360                if !debug && !info.mutable {
361                    return Err(VmError::ImmutableAssignment(name.to_string()));
362                }
363                if let Some(slot) = frame.local_slots.get_mut(idx) {
364                    slot.value = value;
365                    slot.initialized = true;
366                    slot.synced = false;
367                    return Ok(true);
368                }
369            }
370        }
371        Ok(false)
372    }
373
374    pub fn new() -> Self {
375        Self {
376            stack: Vec::with_capacity(256),
377            env: VmEnv::new(),
378            output: String::new(),
379            builtins: BTreeMap::new(),
380            async_builtins: BTreeMap::new(),
381            builtins_by_id: BTreeMap::new(),
382            builtin_id_collisions: HashSet::new(),
383            iterators: Vec::new(),
384            frames: Vec::new(),
385            exception_handlers: Vec::new(),
386            spawned_tasks: BTreeMap::new(),
387            sync_runtime: Arc::new(crate::synchronization::VmSyncRuntime::new()),
388            shared_state_runtime: Rc::new(crate::shared_state::VmSharedStateRuntime::new()),
389            held_sync_guards: Vec::new(),
390            task_counter: 0,
391            runtime_context_counter: 0,
392            runtime_context: crate::runtime_context::RuntimeContext::root(),
393            deadlines: Vec::new(),
394            breakpoints: BTreeMap::new(),
395            function_breakpoints: std::collections::BTreeSet::new(),
396            pending_function_bp: None,
397            step_mode: false,
398            step_frame_depth: 0,
399            stopped: false,
400            last_line: 0,
401            source_dir: None,
402            imported_paths: Vec::new(),
403            module_cache: BTreeMap::new(),
404            source_file: None,
405            source_text: None,
406            bridge: None,
407            denied_builtins: HashSet::new(),
408            cancel_token: None,
409            error_stack_trace: Vec::new(),
410            yield_sender: None,
411            project_root: None,
412            globals: BTreeMap::new(),
413            debug_hook: None,
414        }
415    }
416
417    /// Set the bridge for delegating unknown builtins in bridge mode.
418    pub fn set_bridge(&mut self, bridge: Rc<crate::bridge::HostBridge>) {
419        self.bridge = Some(bridge);
420    }
421
422    /// Set builtins that are denied in sandbox mode.
423    /// When called, the given builtin names will produce a permission error.
424    pub fn set_denied_builtins(&mut self, denied: HashSet<String>) {
425        self.denied_builtins = denied;
426    }
427
428    /// Set source info for error reporting (file path and source text).
429    pub fn set_source_info(&mut self, file: &str, text: &str) {
430        self.source_file = Some(file.to_string());
431        self.source_text = Some(text.to_string());
432    }
433
434    /// Initialize execution (push the initial frame).
435    pub fn start(&mut self, chunk: &Chunk) {
436        let initial_env = self.env.clone();
437        self.frames.push(CallFrame {
438            chunk: Rc::new(chunk.clone()),
439            ip: 0,
440            stack_base: self.stack.len(),
441            saved_env: self.env.clone(),
442            // The top-level pipeline frame captures env at start so
443            // restartFrame on the outermost frame rewinds to the
444            // pre-pipeline state — basically "restart session" in
445            // debugger terms.
446            initial_env: Some(initial_env),
447            initial_local_slots: Some(Self::fresh_local_slots(chunk)),
448            saved_iterator_depth: self.iterators.len(),
449            fn_name: String::new(),
450            argc: 0,
451            saved_source_dir: None,
452            module_functions: None,
453            module_state: None,
454            local_slots: Self::fresh_local_slots(chunk),
455            local_scope_base: self.env.scope_depth().saturating_sub(1),
456            local_scope_depth: 0,
457        });
458    }
459
460    /// Create a child VM that shares builtins and env but has fresh execution state.
461    /// Used for parallel/spawn to fork the VM for concurrent tasks.
462    pub(crate) fn child_vm(&self) -> Vm {
463        Vm {
464            stack: Vec::with_capacity(64),
465            env: self.env.clone(),
466            output: String::new(),
467            builtins: self.builtins.clone(),
468            async_builtins: self.async_builtins.clone(),
469            builtins_by_id: self.builtins_by_id.clone(),
470            builtin_id_collisions: self.builtin_id_collisions.clone(),
471            iterators: Vec::new(),
472            frames: Vec::new(),
473            exception_handlers: Vec::new(),
474            spawned_tasks: BTreeMap::new(),
475            sync_runtime: self.sync_runtime.clone(),
476            shared_state_runtime: self.shared_state_runtime.clone(),
477            held_sync_guards: Vec::new(),
478            task_counter: 0,
479            runtime_context_counter: self.runtime_context_counter,
480            runtime_context: self.runtime_context.clone(),
481            deadlines: self.deadlines.clone(),
482            breakpoints: BTreeMap::new(),
483            function_breakpoints: std::collections::BTreeSet::new(),
484            pending_function_bp: None,
485            step_mode: false,
486            step_frame_depth: 0,
487            stopped: false,
488            last_line: 0,
489            source_dir: self.source_dir.clone(),
490            imported_paths: Vec::new(),
491            module_cache: self.module_cache.clone(),
492            source_file: self.source_file.clone(),
493            source_text: self.source_text.clone(),
494            bridge: self.bridge.clone(),
495            denied_builtins: self.denied_builtins.clone(),
496            cancel_token: self.cancel_token.clone(),
497            error_stack_trace: Vec::new(),
498            yield_sender: None,
499            project_root: self.project_root.clone(),
500            globals: self.globals.clone(),
501            debug_hook: None,
502        }
503    }
504
505    /// Create a child VM for external adapters that need to invoke Harn
506    /// closures while sharing the parent's builtins, globals, and module state.
507    pub(crate) fn child_vm_for_host(&self) -> Vm {
508        self.child_vm()
509    }
510
511    /// Set the source directory for import resolution and introspection.
512    /// Also auto-detects the project root if not already set.
513    pub fn set_source_dir(&mut self, dir: &std::path::Path) {
514        let dir = crate::stdlib::process::normalize_context_path(dir);
515        self.source_dir = Some(dir.clone());
516        crate::stdlib::set_thread_source_dir(&dir);
517        // Auto-detect project root if not explicitly set.
518        if self.project_root.is_none() {
519            self.project_root = crate::stdlib::process::find_project_root(&dir);
520        }
521    }
522
523    /// Explicitly set the project root directory.
524    /// Used by ACP/CLI to override auto-detection.
525    pub fn set_project_root(&mut self, root: &std::path::Path) {
526        self.project_root = Some(root.to_path_buf());
527    }
528
529    /// Get the project root directory, falling back to source_dir.
530    pub fn project_root(&self) -> Option<&std::path::Path> {
531        self.project_root.as_deref().or(self.source_dir.as_deref())
532    }
533
534    /// Return all registered builtin names (sync + async).
535    pub fn builtin_names(&self) -> Vec<String> {
536        let mut names: Vec<String> = self.builtins.keys().cloned().collect();
537        names.extend(self.async_builtins.keys().cloned());
538        names
539    }
540
541    /// Set a global constant (e.g. `pi`, `e`).
542    /// Stored separately from the environment so user-defined variables can shadow them.
543    pub fn set_global(&mut self, name: &str, value: VmValue) {
544        self.globals.insert(name.to_string(), value);
545    }
546
547    /// Get the captured output.
548    pub fn output(&self) -> &str {
549        &self.output
550    }
551
552    pub(crate) fn pop(&mut self) -> Result<VmValue, VmError> {
553        self.stack.pop().ok_or(VmError::StackUnderflow)
554    }
555
556    pub(crate) fn peek(&self) -> Result<&VmValue, VmError> {
557        self.stack.last().ok_or(VmError::StackUnderflow)
558    }
559
560    pub(crate) fn const_string(c: &Constant) -> Result<String, VmError> {
561        match c {
562            Constant::String(s) => Ok(s.clone()),
563            _ => Err(VmError::TypeError("expected string constant".into())),
564        }
565    }
566
567    pub(crate) fn release_sync_guards_for_current_scope(&mut self) {
568        let depth = self.env.scope_depth();
569        self.held_sync_guards
570            .retain(|guard| guard.env_scope_depth < depth);
571    }
572
573    pub(crate) fn release_sync_guards_after_unwind(
574        &mut self,
575        frame_depth: usize,
576        env_scope_depth: usize,
577    ) {
578        self.held_sync_guards.retain(|guard| {
579            guard.frame_depth <= frame_depth && guard.env_scope_depth <= env_scope_depth
580        });
581    }
582
583    pub(crate) fn release_sync_guards_for_frame(&mut self, frame_depth: usize) {
584        self.held_sync_guards
585            .retain(|guard| guard.frame_depth != frame_depth);
586    }
587}
588
589impl Default for Vm {
590    fn default() -> Self {
591        Self::new()
592    }
593}