Skip to main content

harn_vm/vm/
state.rs

1use std::collections::{BTreeMap, HashSet};
2use std::rc::Rc;
3use std::time::Instant;
4
5use crate::chunk::{Chunk, Constant};
6use crate::value::{
7    ModuleFunctionRegistry, VmAsyncBuiltinFn, VmBuiltinFn, VmEnv, VmError, VmTaskHandle, VmValue,
8};
9
10use super::debug::DebugHook;
11use super::modules::LoadedModule;
12
13/// RAII guard that starts a tracing span on creation and ends it on drop.
14pub(crate) struct ScopeSpan(u64);
15
16impl ScopeSpan {
17    pub(crate) fn new(kind: crate::tracing::SpanKind, name: String) -> Self {
18        Self(crate::tracing::span_start(kind, name))
19    }
20}
21
22impl Drop for ScopeSpan {
23    fn drop(&mut self) {
24        crate::tracing::span_end(self.0);
25    }
26}
27
28/// Call frame for function execution.
29pub(crate) struct CallFrame {
30    pub(crate) chunk: Chunk,
31    pub(crate) ip: usize,
32    pub(crate) stack_base: usize,
33    pub(crate) saved_env: VmEnv,
34    /// Env snapshot captured at call-time, *after* argument binding. Used
35    /// by the debugger's `restartFrame` to rewind this frame to its
36    /// entry state (re-binding args from the original values) without
37    /// re-entering the call site. Cheap to clone because `VmEnv` is
38    /// already cloned into `saved_env` on every call. `None` for
39    /// scratch frames (evaluate, import init) where restart isn't
40    /// meaningful.
41    pub(crate) initial_env: Option<VmEnv>,
42    /// Iterator stack depth to restore when this frame unwinds.
43    pub(crate) saved_iterator_depth: usize,
44    /// Function name for stack traces (empty for top-level pipeline).
45    pub(crate) fn_name: String,
46    /// Number of arguments actually passed by the caller (for default arg support).
47    pub(crate) argc: usize,
48    /// Saved VM_SOURCE_DIR to restore when this frame is popped.
49    /// Set when entering a closure that originated from an imported module.
50    pub(crate) saved_source_dir: Option<std::path::PathBuf>,
51    /// Module-local named functions available to symbolic calls within this frame.
52    pub(crate) module_functions: Option<ModuleFunctionRegistry>,
53    /// Shared module-level env for top-level `var` / `let` bindings of
54    /// this frame's originating module. Looked up after `self.env` and
55    /// before `self.globals` by `GetVar` / `SetVar`, giving each module
56    /// its own live static state that persists across calls. See the
57    /// `module_state` field on `VmClosure` for the full rationale.
58    pub(crate) module_state: Option<crate::value::ModuleState>,
59}
60
61/// Exception handler for try/catch.
62pub(crate) struct ExceptionHandler {
63    pub(crate) catch_ip: usize,
64    pub(crate) stack_depth: usize,
65    pub(crate) frame_depth: usize,
66    pub(crate) env_scope_depth: usize,
67    /// If non-empty, this catch only handles errors whose enum_name matches.
68    pub(crate) error_type: String,
69}
70
71/// Iterator state for for-in loops: either a pre-collected vec, an async channel, or a generator.
72pub(crate) enum IterState {
73    Vec {
74        items: Vec<VmValue>,
75        idx: usize,
76    },
77    Channel {
78        receiver: std::sync::Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
79        closed: std::sync::Arc<std::sync::atomic::AtomicBool>,
80    },
81    Generator {
82        gen: crate::value::VmGenerator,
83    },
84    /// Step through a lazy range without materializing a Vec.
85    /// `next` holds the value to emit on the next IterNext; `stop` is
86    /// the first value that terminates the iteration (one past the end).
87    Range {
88        next: i64,
89        stop: i64,
90    },
91    VmIter {
92        handle: std::rc::Rc<std::cell::RefCell<crate::vm::iter::VmIter>>,
93    },
94}
95
96/// The Harn bytecode virtual machine.
97pub struct Vm {
98    pub(crate) stack: Vec<VmValue>,
99    pub(crate) env: VmEnv,
100    pub(crate) output: String,
101    pub(crate) builtins: BTreeMap<String, VmBuiltinFn>,
102    pub(crate) async_builtins: BTreeMap<String, VmAsyncBuiltinFn>,
103    /// Iterator state for for-in loops.
104    pub(crate) iterators: Vec<IterState>,
105    /// Call frame stack.
106    pub(crate) frames: Vec<CallFrame>,
107    /// Exception handler stack.
108    pub(crate) exception_handlers: Vec<ExceptionHandler>,
109    /// Spawned async task handles.
110    pub(crate) spawned_tasks: BTreeMap<String, VmTaskHandle>,
111    /// Counter for generating unique task IDs.
112    pub(crate) task_counter: u64,
113    /// Active deadline stack: (deadline_instant, frame_depth).
114    pub(crate) deadlines: Vec<(Instant, usize)>,
115    /// Breakpoints, keyed by source-file path so a breakpoint at line N
116    /// in `auto.harn` doesn't also fire when execution hits line N in an
117    /// imported lib. The empty-string key is a wildcard used by callers
118    /// that don't track source paths (legacy `set_breakpoints` API).
119    pub(crate) breakpoints: BTreeMap<String, std::collections::BTreeSet<usize>>,
120    /// Function-name breakpoints. Any closure call whose
121    /// `CompiledFunction.name` matches an entry here raises a stop on
122    /// entry, regardless of the call site's file or line. Lets the IDE
123    /// break on `llm_call` / `host_run_pipeline` / any user pipeline
124    /// function without pinning down a source location first.
125    pub(crate) function_breakpoints: std::collections::BTreeSet<String>,
126    /// Latched on `push_closure_frame` when the callee's name matches
127    /// `function_breakpoints`; consumed by the next step so the stop is
128    /// reported with reason="function breakpoint" and the breakpoint
129    /// name available for the DAP `stopped` event.
130    pub(crate) pending_function_bp: Option<String>,
131    /// Whether the VM is in step mode.
132    pub(crate) step_mode: bool,
133    /// The frame depth at which stepping started (for step-over).
134    pub(crate) step_frame_depth: usize,
135    /// Whether the VM is currently stopped at a debug point.
136    pub(crate) stopped: bool,
137    /// Last source line executed (to detect line changes).
138    pub(crate) last_line: usize,
139    /// Source directory for resolving imports.
140    pub(crate) source_dir: Option<std::path::PathBuf>,
141    /// Modules currently being imported (cycle prevention).
142    pub(crate) imported_paths: Vec<std::path::PathBuf>,
143    /// Loaded module cache keyed by canonical or synthetic module path.
144    pub(crate) module_cache: BTreeMap<std::path::PathBuf, LoadedModule>,
145    /// Source file path for error reporting.
146    pub(crate) source_file: Option<String>,
147    /// Source text for error reporting.
148    pub(crate) source_text: Option<String>,
149    /// Optional bridge for delegating unknown builtins in bridge mode.
150    pub(crate) bridge: Option<Rc<crate::bridge::HostBridge>>,
151    /// Builtins denied by sandbox mode (`--deny` / `--allow` flags).
152    pub(crate) denied_builtins: HashSet<String>,
153    /// Cancellation token for cooperative graceful shutdown (set by parent).
154    pub(crate) cancel_token: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
155    /// Captured stack trace from the most recent error (fn_name, line, col).
156    pub(crate) error_stack_trace: Vec<(String, usize, usize, Option<String>)>,
157    /// Yield channel sender for generator execution. When set, `Op::Yield`
158    /// sends values through this channel instead of being a no-op.
159    pub(crate) yield_sender: Option<tokio::sync::mpsc::Sender<VmValue>>,
160    /// Project root directory (detected via harn.toml).
161    /// Used as base directory for metadata, store, and checkpoint operations.
162    pub(crate) project_root: Option<std::path::PathBuf>,
163    /// Global constants (e.g. `pi`, `e`). Checked as a fallback in `GetVar`
164    /// after the environment, so user-defined variables can shadow them.
165    pub(crate) globals: BTreeMap<String, VmValue>,
166    /// Optional debugger hook invoked when execution advances to a new source line.
167    pub(crate) debug_hook: Option<Box<DebugHook>>,
168}
169
170impl Vm {
171    pub fn new() -> Self {
172        Self {
173            stack: Vec::with_capacity(256),
174            env: VmEnv::new(),
175            output: String::new(),
176            builtins: BTreeMap::new(),
177            async_builtins: BTreeMap::new(),
178            iterators: Vec::new(),
179            frames: Vec::new(),
180            exception_handlers: Vec::new(),
181            spawned_tasks: BTreeMap::new(),
182            task_counter: 0,
183            deadlines: Vec::new(),
184            breakpoints: BTreeMap::new(),
185            function_breakpoints: std::collections::BTreeSet::new(),
186            pending_function_bp: None,
187            step_mode: false,
188            step_frame_depth: 0,
189            stopped: false,
190            last_line: 0,
191            source_dir: None,
192            imported_paths: Vec::new(),
193            module_cache: BTreeMap::new(),
194            source_file: None,
195            source_text: None,
196            bridge: None,
197            denied_builtins: HashSet::new(),
198            cancel_token: None,
199            error_stack_trace: Vec::new(),
200            yield_sender: None,
201            project_root: None,
202            globals: BTreeMap::new(),
203            debug_hook: None,
204        }
205    }
206
207    /// Set the bridge for delegating unknown builtins in bridge mode.
208    pub fn set_bridge(&mut self, bridge: Rc<crate::bridge::HostBridge>) {
209        self.bridge = Some(bridge);
210    }
211
212    /// Set builtins that are denied in sandbox mode.
213    /// When called, the given builtin names will produce a permission error.
214    pub fn set_denied_builtins(&mut self, denied: HashSet<String>) {
215        self.denied_builtins = denied;
216    }
217
218    /// Set source info for error reporting (file path and source text).
219    pub fn set_source_info(&mut self, file: &str, text: &str) {
220        self.source_file = Some(file.to_string());
221        self.source_text = Some(text.to_string());
222    }
223
224    /// Initialize execution (push the initial frame).
225    pub fn start(&mut self, chunk: &Chunk) {
226        let initial_env = self.env.clone();
227        self.frames.push(CallFrame {
228            chunk: chunk.clone(),
229            ip: 0,
230            stack_base: self.stack.len(),
231            saved_env: self.env.clone(),
232            // The top-level pipeline frame captures env at start so
233            // restartFrame on the outermost frame rewinds to the
234            // pre-pipeline state — basically "restart session" in
235            // debugger terms.
236            initial_env: Some(initial_env),
237            saved_iterator_depth: self.iterators.len(),
238            fn_name: String::new(),
239            argc: 0,
240            saved_source_dir: None,
241            module_functions: None,
242            module_state: None,
243        });
244    }
245
246    /// Create a child VM that shares builtins and env but has fresh execution state.
247    /// Used for parallel/spawn to fork the VM for concurrent tasks.
248    pub(crate) fn child_vm(&self) -> Vm {
249        Vm {
250            stack: Vec::with_capacity(64),
251            env: self.env.clone(),
252            output: String::new(),
253            builtins: self.builtins.clone(),
254            async_builtins: self.async_builtins.clone(),
255            iterators: Vec::new(),
256            frames: Vec::new(),
257            exception_handlers: Vec::new(),
258            spawned_tasks: BTreeMap::new(),
259            task_counter: 0,
260            deadlines: self.deadlines.clone(),
261            breakpoints: BTreeMap::new(),
262            function_breakpoints: std::collections::BTreeSet::new(),
263            pending_function_bp: None,
264            step_mode: false,
265            step_frame_depth: 0,
266            stopped: false,
267            last_line: 0,
268            source_dir: self.source_dir.clone(),
269            imported_paths: Vec::new(),
270            module_cache: self.module_cache.clone(),
271            source_file: self.source_file.clone(),
272            source_text: self.source_text.clone(),
273            bridge: self.bridge.clone(),
274            denied_builtins: self.denied_builtins.clone(),
275            cancel_token: self.cancel_token.clone(),
276            error_stack_trace: Vec::new(),
277            yield_sender: None,
278            project_root: self.project_root.clone(),
279            globals: self.globals.clone(),
280            debug_hook: None,
281        }
282    }
283
284    /// Create a child VM for external adapters that need to invoke Harn
285    /// closures while sharing the parent's builtins, globals, and module state.
286    pub(crate) fn child_vm_for_host(&self) -> Vm {
287        self.child_vm()
288    }
289
290    /// Set the source directory for import resolution and introspection.
291    /// Also auto-detects the project root if not already set.
292    pub fn set_source_dir(&mut self, dir: &std::path::Path) {
293        let dir = crate::stdlib::process::normalize_context_path(dir);
294        self.source_dir = Some(dir.clone());
295        crate::stdlib::set_thread_source_dir(&dir);
296        // Auto-detect project root if not explicitly set.
297        if self.project_root.is_none() {
298            self.project_root = crate::stdlib::process::find_project_root(&dir);
299        }
300    }
301
302    /// Explicitly set the project root directory.
303    /// Used by ACP/CLI to override auto-detection.
304    pub fn set_project_root(&mut self, root: &std::path::Path) {
305        self.project_root = Some(root.to_path_buf());
306    }
307
308    /// Get the project root directory, falling back to source_dir.
309    pub fn project_root(&self) -> Option<&std::path::Path> {
310        self.project_root.as_deref().or(self.source_dir.as_deref())
311    }
312
313    /// Return all registered builtin names (sync + async).
314    pub fn builtin_names(&self) -> Vec<String> {
315        let mut names: Vec<String> = self.builtins.keys().cloned().collect();
316        names.extend(self.async_builtins.keys().cloned());
317        names
318    }
319
320    /// Set a global constant (e.g. `pi`, `e`).
321    /// Stored separately from the environment so user-defined variables can shadow them.
322    pub fn set_global(&mut self, name: &str, value: VmValue) {
323        self.globals.insert(name.to_string(), value);
324    }
325
326    /// Get the captured output.
327    pub fn output(&self) -> &str {
328        &self.output
329    }
330
331    pub(crate) fn pop(&mut self) -> Result<VmValue, VmError> {
332        self.stack.pop().ok_or(VmError::StackUnderflow)
333    }
334
335    pub(crate) fn peek(&self) -> Result<&VmValue, VmError> {
336        self.stack.last().ok_or(VmError::StackUnderflow)
337    }
338
339    pub(crate) fn const_string(c: &Constant) -> Result<String, VmError> {
340        match c {
341            Constant::String(s) => Ok(s.clone()),
342            _ => Err(VmError::TypeError("expected string constant".into())),
343        }
344    }
345}
346
347impl Default for Vm {
348    fn default() -> Self {
349        Self::new()
350    }
351}