Skip to main content

harn_vm/vm/
state.rs

1use std::collections::{BTreeMap, HashSet};
2use std::rc::Rc;
3use std::time::Instant;
4
5use crate::chunk::{Chunk, ChunkRef, Constant};
6use crate::value::{
7    ModuleFunctionRegistry, VmAsyncBuiltinFn, VmBuiltinFn, VmEnv, VmError, VmTaskHandle, VmValue,
8};
9use crate::BuiltinId;
10
11use super::debug::DebugHook;
12use super::modules::LoadedModule;
13
14/// RAII guard that starts a tracing span on creation and ends it on drop.
15pub(crate) struct ScopeSpan(u64);
16
17impl ScopeSpan {
18    pub(crate) fn new(kind: crate::tracing::SpanKind, name: String) -> Self {
19        Self(crate::tracing::span_start(kind, name))
20    }
21}
22
23impl Drop for ScopeSpan {
24    fn drop(&mut self) {
25        crate::tracing::span_end(self.0);
26    }
27}
28
29#[derive(Clone)]
30pub(crate) struct LocalSlot {
31    pub(crate) value: VmValue,
32    pub(crate) initialized: bool,
33    pub(crate) synced: bool,
34}
35
36/// Call frame for function execution.
37pub(crate) struct CallFrame {
38    pub(crate) chunk: ChunkRef,
39    pub(crate) ip: usize,
40    pub(crate) stack_base: usize,
41    pub(crate) saved_env: VmEnv,
42    /// Env snapshot captured at call-time, *after* argument binding. Used
43    /// by the debugger's `restartFrame` to rewind this frame to its
44    /// entry state (re-binding args from the original values) without
45    /// re-entering the call site. Cheap to clone because `VmEnv` is
46    /// already cloned into `saved_env` on every call. `None` for
47    /// scratch frames (evaluate, import init) where restart isn't
48    /// meaningful.
49    pub(crate) initial_env: Option<VmEnv>,
50    pub(crate) initial_local_slots: Option<Vec<LocalSlot>>,
51    /// Iterator stack depth to restore when this frame unwinds.
52    pub(crate) saved_iterator_depth: usize,
53    /// Function name for stack traces (empty for top-level pipeline).
54    pub(crate) fn_name: String,
55    /// Number of arguments actually passed by the caller (for default arg support).
56    pub(crate) argc: usize,
57    /// Saved VM_SOURCE_DIR to restore when this frame is popped.
58    /// Set when entering a closure that originated from an imported module.
59    pub(crate) saved_source_dir: Option<std::path::PathBuf>,
60    /// Module-local named functions available to symbolic calls within this frame.
61    pub(crate) module_functions: Option<ModuleFunctionRegistry>,
62    /// Shared module-level env for top-level `var` / `let` bindings of
63    /// this frame's originating module. Looked up after `self.env` and
64    /// before `self.globals` by `GetVar` / `SetVar`, giving each module
65    /// its own live static state that persists across calls. See the
66    /// `module_state` field on `VmClosure` for the full rationale.
67    pub(crate) module_state: Option<crate::value::ModuleState>,
68    /// Slot-indexed locals for compiler-resolved names in this frame.
69    pub(crate) local_slots: Vec<LocalSlot>,
70    /// Env scope index that corresponds to compiler local scope depth 0.
71    pub(crate) local_scope_base: usize,
72    /// Current compiler local scope depth, updated by PushScope/PopScope.
73    pub(crate) local_scope_depth: usize,
74}
75
76/// Exception handler for try/catch.
77pub(crate) struct ExceptionHandler {
78    pub(crate) catch_ip: usize,
79    pub(crate) stack_depth: usize,
80    pub(crate) frame_depth: usize,
81    pub(crate) env_scope_depth: usize,
82    /// If non-empty, this catch only handles errors whose enum_name matches.
83    pub(crate) error_type: String,
84}
85
86/// Iterator state for for-in loops.
87pub(crate) enum IterState {
88    Vec {
89        items: Rc<Vec<VmValue>>,
90        idx: usize,
91    },
92    Dict {
93        entries: Rc<BTreeMap<String, VmValue>>,
94        keys: Vec<String>,
95        idx: usize,
96    },
97    Channel {
98        receiver: std::sync::Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
99        closed: std::sync::Arc<std::sync::atomic::AtomicBool>,
100    },
101    Generator {
102        gen: crate::value::VmGenerator,
103    },
104    /// Step through a lazy range without materializing a Vec.
105    /// `next` holds the value to emit on the next IterNext; `stop` is
106    /// the first value that terminates the iteration (one past the end).
107    Range {
108        next: i64,
109        stop: i64,
110    },
111    VmIter {
112        handle: std::rc::Rc<std::cell::RefCell<crate::vm::iter::VmIter>>,
113    },
114}
115
116#[derive(Clone)]
117pub(crate) enum VmBuiltinDispatch {
118    Sync(VmBuiltinFn),
119    Async(VmAsyncBuiltinFn),
120}
121
122#[derive(Clone)]
123pub(crate) struct VmBuiltinEntry {
124    pub(crate) name: Rc<str>,
125    pub(crate) dispatch: VmBuiltinDispatch,
126}
127
128/// The Harn bytecode virtual machine.
129pub struct Vm {
130    pub(crate) stack: Vec<VmValue>,
131    pub(crate) env: VmEnv,
132    pub(crate) output: String,
133    pub(crate) builtins: BTreeMap<String, VmBuiltinFn>,
134    pub(crate) async_builtins: BTreeMap<String, VmAsyncBuiltinFn>,
135    /// Numeric side index for builtins. Name-keyed maps remain authoritative;
136    /// this index is the hot path for direct builtin bytecode and callback refs.
137    pub(crate) builtins_by_id: BTreeMap<BuiltinId, VmBuiltinEntry>,
138    /// IDs with detected name collisions. Collided names safely fall back to
139    /// the authoritative name-keyed lookup path.
140    pub(crate) builtin_id_collisions: HashSet<BuiltinId>,
141    /// Iterator state for for-in loops.
142    pub(crate) iterators: Vec<IterState>,
143    /// Call frame stack.
144    pub(crate) frames: Vec<CallFrame>,
145    /// Exception handler stack.
146    pub(crate) exception_handlers: Vec<ExceptionHandler>,
147    /// Spawned async task handles.
148    pub(crate) spawned_tasks: BTreeMap<String, VmTaskHandle>,
149    /// Counter for generating unique task IDs.
150    pub(crate) task_counter: u64,
151    /// Counter for logical runtime-context task groups.
152    pub(crate) runtime_context_counter: u64,
153    /// Logical runtime task context visible through `runtime_context()`.
154    pub(crate) runtime_context: crate::runtime_context::RuntimeContext,
155    /// Active deadline stack: (deadline_instant, frame_depth).
156    pub(crate) deadlines: Vec<(Instant, usize)>,
157    /// Breakpoints, keyed by source-file path so a breakpoint at line N
158    /// in `auto.harn` doesn't also fire when execution hits line N in an
159    /// imported lib. The empty-string key is a wildcard used by callers
160    /// that don't track source paths (legacy `set_breakpoints` API).
161    pub(crate) breakpoints: BTreeMap<String, std::collections::BTreeSet<usize>>,
162    /// Function-name breakpoints. Any closure call whose
163    /// `CompiledFunction.name` matches an entry here raises a stop on
164    /// entry, regardless of the call site's file or line. Lets the IDE
165    /// break on `llm_call` / `host_run_pipeline` / any user pipeline
166    /// function without pinning down a source location first.
167    pub(crate) function_breakpoints: std::collections::BTreeSet<String>,
168    /// Latched on `push_closure_frame` when the callee's name matches
169    /// `function_breakpoints`; consumed by the next step so the stop is
170    /// reported with reason="function breakpoint" and the breakpoint
171    /// name available for the DAP `stopped` event.
172    pub(crate) pending_function_bp: Option<String>,
173    /// Whether the VM is in step mode.
174    pub(crate) step_mode: bool,
175    /// The frame depth at which stepping started (for step-over).
176    pub(crate) step_frame_depth: usize,
177    /// Whether the VM is currently stopped at a debug point.
178    pub(crate) stopped: bool,
179    /// Last source line executed (to detect line changes).
180    pub(crate) last_line: usize,
181    /// Source directory for resolving imports.
182    pub(crate) source_dir: Option<std::path::PathBuf>,
183    /// Modules currently being imported (cycle prevention).
184    pub(crate) imported_paths: Vec<std::path::PathBuf>,
185    /// Loaded module cache keyed by canonical or synthetic module path.
186    pub(crate) module_cache: BTreeMap<std::path::PathBuf, LoadedModule>,
187    /// Source file path for error reporting.
188    pub(crate) source_file: Option<String>,
189    /// Source text for error reporting.
190    pub(crate) source_text: Option<String>,
191    /// Optional bridge for delegating unknown builtins in bridge mode.
192    pub(crate) bridge: Option<Rc<crate::bridge::HostBridge>>,
193    /// Builtins denied by sandbox mode (`--deny` / `--allow` flags).
194    pub(crate) denied_builtins: HashSet<String>,
195    /// Cancellation token for cooperative graceful shutdown (set by parent).
196    pub(crate) cancel_token: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
197    /// Captured stack trace from the most recent error (fn_name, line, col).
198    pub(crate) error_stack_trace: Vec<(String, usize, usize, Option<String>)>,
199    /// Yield channel sender for generator execution. When set, `Op::Yield`
200    /// sends values through this channel instead of being a no-op.
201    pub(crate) yield_sender: Option<tokio::sync::mpsc::Sender<VmValue>>,
202    /// Project root directory (detected via harn.toml).
203    /// Used as base directory for metadata, store, and checkpoint operations.
204    pub(crate) project_root: Option<std::path::PathBuf>,
205    /// Global constants (e.g. `pi`, `e`). Checked as a fallback in `GetVar`
206    /// after the environment, so user-defined variables can shadow them.
207    pub(crate) globals: BTreeMap<String, VmValue>,
208    /// Optional debugger hook invoked when execution advances to a new source line.
209    pub(crate) debug_hook: Option<Box<DebugHook>>,
210}
211
212impl Vm {
213    pub(crate) fn fresh_local_slots(chunk: &Chunk) -> Vec<LocalSlot> {
214        chunk
215            .local_slots
216            .iter()
217            .map(|_| LocalSlot {
218                value: VmValue::Nil,
219                initialized: false,
220                synced: false,
221            })
222            .collect()
223    }
224
225    pub(crate) fn bind_param_slots(
226        slots: &mut [LocalSlot],
227        func: &crate::chunk::CompiledFunction,
228        args: &[VmValue],
229        synced: bool,
230    ) {
231        let default_start = func.default_start.unwrap_or(func.params.len());
232        let param_count = func.params.len();
233        for (i, _param) in func.params.iter().enumerate() {
234            if i >= slots.len() {
235                break;
236            }
237            if func.has_rest_param && i == param_count - 1 {
238                let rest_args = if i < args.len() {
239                    args[i..].to_vec()
240                } else {
241                    Vec::new()
242                };
243                slots[i].value = VmValue::List(Rc::new(rest_args));
244                slots[i].initialized = true;
245                slots[i].synced = synced;
246            } else if i < args.len() {
247                slots[i].value = args[i].clone();
248                slots[i].initialized = true;
249                slots[i].synced = synced;
250            } else if i < default_start {
251                slots[i].value = VmValue::Nil;
252                slots[i].initialized = true;
253                slots[i].synced = synced;
254            }
255        }
256    }
257
258    pub(crate) fn visible_variables(&self) -> BTreeMap<String, VmValue> {
259        let mut vars = self.env.all_variables();
260        let Some(frame) = self.frames.last() else {
261            return vars;
262        };
263        for (slot, info) in frame.local_slots.iter().zip(frame.chunk.local_slots.iter()) {
264            if slot.initialized && info.scope_depth <= frame.local_scope_depth {
265                vars.insert(info.name.clone(), slot.value.clone());
266            }
267        }
268        vars
269    }
270
271    pub(crate) fn sync_current_frame_locals_to_env(&mut self) {
272        let Some(frame) = self.frames.last_mut() else {
273            return;
274        };
275        let local_scope_base = frame.local_scope_base;
276        let local_scope_depth = frame.local_scope_depth;
277        let entries = frame
278            .local_slots
279            .iter_mut()
280            .zip(frame.chunk.local_slots.iter())
281            .filter_map(|(slot, info)| {
282                if slot.initialized && !slot.synced && info.scope_depth <= local_scope_depth {
283                    slot.synced = true;
284                    Some((
285                        local_scope_base + info.scope_depth,
286                        info.name.clone(),
287                        slot.value.clone(),
288                        info.mutable,
289                    ))
290                } else {
291                    None
292                }
293            })
294            .collect::<Vec<_>>();
295        for (scope_idx, name, value, mutable) in entries {
296            while self.env.scopes.len() <= scope_idx {
297                self.env.push_scope();
298            }
299            self.env.scopes[scope_idx]
300                .vars
301                .insert(name, (value, mutable));
302        }
303    }
304
305    pub(crate) fn closure_call_env_for_current_frame(
306        &self,
307        closure: &crate::value::VmClosure,
308    ) -> VmEnv {
309        if closure.module_state.is_some() {
310            return closure.env.clone();
311        }
312        let mut call_env = Self::closure_call_env(&self.env, closure);
313        let Some(frame) = self.frames.last() else {
314            return call_env;
315        };
316        for (slot, info) in frame
317            .local_slots
318            .iter()
319            .zip(frame.chunk.local_slots.iter())
320            .filter(|(slot, info)| slot.initialized && info.scope_depth <= frame.local_scope_depth)
321        {
322            if matches!(slot.value, VmValue::Closure(_)) && call_env.get(&info.name).is_none() {
323                let _ = call_env.define(&info.name, slot.value.clone(), info.mutable);
324            }
325        }
326        call_env
327    }
328
329    pub(crate) fn active_local_slot_value(&self, name: &str) -> Option<VmValue> {
330        let frame = self.frames.last()?;
331        for (idx, info) in frame.chunk.local_slots.iter().enumerate().rev() {
332            if info.name == name && info.scope_depth <= frame.local_scope_depth {
333                let slot = frame.local_slots.get(idx)?;
334                if slot.initialized {
335                    return Some(slot.value.clone());
336                }
337            }
338        }
339        None
340    }
341
342    pub(crate) fn assign_active_local_slot(
343        &mut self,
344        name: &str,
345        value: VmValue,
346        debug: bool,
347    ) -> Result<bool, VmError> {
348        let Some(frame) = self.frames.last_mut() else {
349            return Ok(false);
350        };
351        for (idx, info) in frame.chunk.local_slots.iter().enumerate().rev() {
352            if info.name == name && info.scope_depth <= frame.local_scope_depth {
353                if !debug && !info.mutable {
354                    return Err(VmError::ImmutableAssignment(name.to_string()));
355                }
356                if let Some(slot) = frame.local_slots.get_mut(idx) {
357                    slot.value = value;
358                    slot.initialized = true;
359                    slot.synced = false;
360                    return Ok(true);
361                }
362            }
363        }
364        Ok(false)
365    }
366
367    pub fn new() -> Self {
368        Self {
369            stack: Vec::with_capacity(256),
370            env: VmEnv::new(),
371            output: String::new(),
372            builtins: BTreeMap::new(),
373            async_builtins: BTreeMap::new(),
374            builtins_by_id: BTreeMap::new(),
375            builtin_id_collisions: HashSet::new(),
376            iterators: Vec::new(),
377            frames: Vec::new(),
378            exception_handlers: Vec::new(),
379            spawned_tasks: BTreeMap::new(),
380            task_counter: 0,
381            runtime_context_counter: 0,
382            runtime_context: crate::runtime_context::RuntimeContext::root(),
383            deadlines: Vec::new(),
384            breakpoints: BTreeMap::new(),
385            function_breakpoints: std::collections::BTreeSet::new(),
386            pending_function_bp: None,
387            step_mode: false,
388            step_frame_depth: 0,
389            stopped: false,
390            last_line: 0,
391            source_dir: None,
392            imported_paths: Vec::new(),
393            module_cache: BTreeMap::new(),
394            source_file: None,
395            source_text: None,
396            bridge: None,
397            denied_builtins: HashSet::new(),
398            cancel_token: None,
399            error_stack_trace: Vec::new(),
400            yield_sender: None,
401            project_root: None,
402            globals: BTreeMap::new(),
403            debug_hook: None,
404        }
405    }
406
407    /// Set the bridge for delegating unknown builtins in bridge mode.
408    pub fn set_bridge(&mut self, bridge: Rc<crate::bridge::HostBridge>) {
409        self.bridge = Some(bridge);
410    }
411
412    /// Set builtins that are denied in sandbox mode.
413    /// When called, the given builtin names will produce a permission error.
414    pub fn set_denied_builtins(&mut self, denied: HashSet<String>) {
415        self.denied_builtins = denied;
416    }
417
418    /// Set source info for error reporting (file path and source text).
419    pub fn set_source_info(&mut self, file: &str, text: &str) {
420        self.source_file = Some(file.to_string());
421        self.source_text = Some(text.to_string());
422    }
423
424    /// Initialize execution (push the initial frame).
425    pub fn start(&mut self, chunk: &Chunk) {
426        let initial_env = self.env.clone();
427        self.frames.push(CallFrame {
428            chunk: Rc::new(chunk.clone()),
429            ip: 0,
430            stack_base: self.stack.len(),
431            saved_env: self.env.clone(),
432            // The top-level pipeline frame captures env at start so
433            // restartFrame on the outermost frame rewinds to the
434            // pre-pipeline state — basically "restart session" in
435            // debugger terms.
436            initial_env: Some(initial_env),
437            initial_local_slots: Some(Self::fresh_local_slots(chunk)),
438            saved_iterator_depth: self.iterators.len(),
439            fn_name: String::new(),
440            argc: 0,
441            saved_source_dir: None,
442            module_functions: None,
443            module_state: None,
444            local_slots: Self::fresh_local_slots(chunk),
445            local_scope_base: self.env.scope_depth().saturating_sub(1),
446            local_scope_depth: 0,
447        });
448    }
449
450    /// Create a child VM that shares builtins and env but has fresh execution state.
451    /// Used for parallel/spawn to fork the VM for concurrent tasks.
452    pub(crate) fn child_vm(&self) -> Vm {
453        Vm {
454            stack: Vec::with_capacity(64),
455            env: self.env.clone(),
456            output: String::new(),
457            builtins: self.builtins.clone(),
458            async_builtins: self.async_builtins.clone(),
459            builtins_by_id: self.builtins_by_id.clone(),
460            builtin_id_collisions: self.builtin_id_collisions.clone(),
461            iterators: Vec::new(),
462            frames: Vec::new(),
463            exception_handlers: Vec::new(),
464            spawned_tasks: BTreeMap::new(),
465            task_counter: 0,
466            runtime_context_counter: self.runtime_context_counter,
467            runtime_context: self.runtime_context.clone(),
468            deadlines: self.deadlines.clone(),
469            breakpoints: BTreeMap::new(),
470            function_breakpoints: std::collections::BTreeSet::new(),
471            pending_function_bp: None,
472            step_mode: false,
473            step_frame_depth: 0,
474            stopped: false,
475            last_line: 0,
476            source_dir: self.source_dir.clone(),
477            imported_paths: Vec::new(),
478            module_cache: self.module_cache.clone(),
479            source_file: self.source_file.clone(),
480            source_text: self.source_text.clone(),
481            bridge: self.bridge.clone(),
482            denied_builtins: self.denied_builtins.clone(),
483            cancel_token: self.cancel_token.clone(),
484            error_stack_trace: Vec::new(),
485            yield_sender: None,
486            project_root: self.project_root.clone(),
487            globals: self.globals.clone(),
488            debug_hook: None,
489        }
490    }
491
492    /// Create a child VM for external adapters that need to invoke Harn
493    /// closures while sharing the parent's builtins, globals, and module state.
494    pub(crate) fn child_vm_for_host(&self) -> Vm {
495        self.child_vm()
496    }
497
498    /// Set the source directory for import resolution and introspection.
499    /// Also auto-detects the project root if not already set.
500    pub fn set_source_dir(&mut self, dir: &std::path::Path) {
501        let dir = crate::stdlib::process::normalize_context_path(dir);
502        self.source_dir = Some(dir.clone());
503        crate::stdlib::set_thread_source_dir(&dir);
504        // Auto-detect project root if not explicitly set.
505        if self.project_root.is_none() {
506            self.project_root = crate::stdlib::process::find_project_root(&dir);
507        }
508    }
509
510    /// Explicitly set the project root directory.
511    /// Used by ACP/CLI to override auto-detection.
512    pub fn set_project_root(&mut self, root: &std::path::Path) {
513        self.project_root = Some(root.to_path_buf());
514    }
515
516    /// Get the project root directory, falling back to source_dir.
517    pub fn project_root(&self) -> Option<&std::path::Path> {
518        self.project_root.as_deref().or(self.source_dir.as_deref())
519    }
520
521    /// Return all registered builtin names (sync + async).
522    pub fn builtin_names(&self) -> Vec<String> {
523        let mut names: Vec<String> = self.builtins.keys().cloned().collect();
524        names.extend(self.async_builtins.keys().cloned());
525        names
526    }
527
528    /// Set a global constant (e.g. `pi`, `e`).
529    /// Stored separately from the environment so user-defined variables can shadow them.
530    pub fn set_global(&mut self, name: &str, value: VmValue) {
531        self.globals.insert(name.to_string(), value);
532    }
533
534    /// Get the captured output.
535    pub fn output(&self) -> &str {
536        &self.output
537    }
538
539    pub(crate) fn pop(&mut self) -> Result<VmValue, VmError> {
540        self.stack.pop().ok_or(VmError::StackUnderflow)
541    }
542
543    pub(crate) fn peek(&self) -> Result<&VmValue, VmError> {
544        self.stack.last().ok_or(VmError::StackUnderflow)
545    }
546
547    pub(crate) fn const_string(c: &Constant) -> Result<String, VmError> {
548        match c {
549            Constant::String(s) => Ok(s.clone()),
550            _ => Err(VmError::TypeError("expected string constant".into())),
551        }
552    }
553}
554
555impl Default for Vm {
556    fn default() -> Self {
557        Self::new()
558    }
559}