1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
use std::collections::{BTreeMap, HashSet};
use std::rc::Rc;
use std::time::Instant;
use crate::chunk::{Chunk, Constant};
use crate::value::{
ModuleFunctionRegistry, VmAsyncBuiltinFn, VmBuiltinFn, VmEnv, VmError, VmTaskHandle, VmValue,
};
use super::debug::DebugHook;
use super::modules::LoadedModule;
/// RAII guard that starts a tracing span on creation and ends it on drop.
pub(crate) struct ScopeSpan(u64);
impl ScopeSpan {
pub(crate) fn new(kind: crate::tracing::SpanKind, name: String) -> Self {
Self(crate::tracing::span_start(kind, name))
}
}
impl Drop for ScopeSpan {
fn drop(&mut self) {
crate::tracing::span_end(self.0);
}
}
/// Call frame for function execution.
pub(crate) struct CallFrame {
pub(crate) chunk: Chunk,
pub(crate) ip: usize,
pub(crate) stack_base: usize,
pub(crate) saved_env: VmEnv,
/// Env snapshot captured at call-time, *after* argument binding. Used
/// by the debugger's `restartFrame` to rewind this frame to its
/// entry state (re-binding args from the original values) without
/// re-entering the call site. Cheap to clone because `VmEnv` is
/// already cloned into `saved_env` on every call. `None` for
/// scratch frames (evaluate, import init) where restart isn't
/// meaningful.
pub(crate) initial_env: Option<VmEnv>,
/// Iterator stack depth to restore when this frame unwinds.
pub(crate) saved_iterator_depth: usize,
/// Function name for stack traces (empty for top-level pipeline).
pub(crate) fn_name: String,
/// Number of arguments actually passed by the caller (for default arg support).
pub(crate) argc: usize,
/// Saved VM_SOURCE_DIR to restore when this frame is popped.
/// Set when entering a closure that originated from an imported module.
pub(crate) saved_source_dir: Option<std::path::PathBuf>,
/// Module-local named functions available to symbolic calls within this frame.
pub(crate) module_functions: Option<ModuleFunctionRegistry>,
/// Shared module-level env for top-level `var` / `let` bindings of
/// this frame's originating module. Looked up after `self.env` and
/// before `self.globals` by `GetVar` / `SetVar`, giving each module
/// its own live static state that persists across calls. See the
/// `module_state` field on `VmClosure` for the full rationale.
pub(crate) module_state: Option<crate::value::ModuleState>,
}
/// Exception handler for try/catch.
pub(crate) struct ExceptionHandler {
pub(crate) catch_ip: usize,
pub(crate) stack_depth: usize,
pub(crate) frame_depth: usize,
pub(crate) env_scope_depth: usize,
/// If non-empty, this catch only handles errors whose enum_name matches.
pub(crate) error_type: String,
}
/// Iterator state for for-in loops: either a pre-collected vec, an async channel, or a generator.
pub(crate) enum IterState {
Vec {
items: Vec<VmValue>,
idx: usize,
},
Channel {
receiver: std::sync::Arc<tokio::sync::Mutex<tokio::sync::mpsc::Receiver<VmValue>>>,
closed: std::sync::Arc<std::sync::atomic::AtomicBool>,
},
Generator {
gen: crate::value::VmGenerator,
},
/// Step through a lazy range without materializing a Vec.
/// `next` holds the value to emit on the next IterNext; `stop` is
/// the first value that terminates the iteration (one past the end).
Range {
next: i64,
stop: i64,
},
VmIter {
handle: std::rc::Rc<std::cell::RefCell<crate::vm::iter::VmIter>>,
},
}
/// The Harn bytecode virtual machine.
pub struct Vm {
pub(crate) stack: Vec<VmValue>,
pub(crate) env: VmEnv,
pub(crate) output: String,
pub(crate) builtins: BTreeMap<String, VmBuiltinFn>,
pub(crate) async_builtins: BTreeMap<String, VmAsyncBuiltinFn>,
/// Iterator state for for-in loops.
pub(crate) iterators: Vec<IterState>,
/// Call frame stack.
pub(crate) frames: Vec<CallFrame>,
/// Exception handler stack.
pub(crate) exception_handlers: Vec<ExceptionHandler>,
/// Spawned async task handles.
pub(crate) spawned_tasks: BTreeMap<String, VmTaskHandle>,
/// Counter for generating unique task IDs.
pub(crate) task_counter: u64,
/// Active deadline stack: (deadline_instant, frame_depth).
pub(crate) deadlines: Vec<(Instant, usize)>,
/// Breakpoints, keyed by source-file path so a breakpoint at line N
/// in `auto.harn` doesn't also fire when execution hits line N in an
/// imported lib. The empty-string key is a wildcard used by callers
/// that don't track source paths (legacy `set_breakpoints` API).
pub(crate) breakpoints: BTreeMap<String, std::collections::BTreeSet<usize>>,
/// Function-name breakpoints. Any closure call whose
/// `CompiledFunction.name` matches an entry here raises a stop on
/// entry, regardless of the call site's file or line. Lets the IDE
/// break on `llm_call` / `host_run_pipeline` / any user pipeline
/// function without pinning down a source location first.
pub(crate) function_breakpoints: std::collections::BTreeSet<String>,
/// Latched on `push_closure_frame` when the callee's name matches
/// `function_breakpoints`; consumed by the next step so the stop is
/// reported with reason="function breakpoint" and the breakpoint
/// name available for the DAP `stopped` event.
pub(crate) pending_function_bp: Option<String>,
/// Whether the VM is in step mode.
pub(crate) step_mode: bool,
/// The frame depth at which stepping started (for step-over).
pub(crate) step_frame_depth: usize,
/// Whether the VM is currently stopped at a debug point.
pub(crate) stopped: bool,
/// Last source line executed (to detect line changes).
pub(crate) last_line: usize,
/// Source directory for resolving imports.
pub(crate) source_dir: Option<std::path::PathBuf>,
/// Modules currently being imported (cycle prevention).
pub(crate) imported_paths: Vec<std::path::PathBuf>,
/// Loaded module cache keyed by canonical or synthetic module path.
pub(crate) module_cache: BTreeMap<std::path::PathBuf, LoadedModule>,
/// Source file path for error reporting.
pub(crate) source_file: Option<String>,
/// Source text for error reporting.
pub(crate) source_text: Option<String>,
/// Optional bridge for delegating unknown builtins in bridge mode.
pub(crate) bridge: Option<Rc<crate::bridge::HostBridge>>,
/// Builtins denied by sandbox mode (`--deny` / `--allow` flags).
pub(crate) denied_builtins: HashSet<String>,
/// Cancellation token for cooperative graceful shutdown (set by parent).
pub(crate) cancel_token: Option<std::sync::Arc<std::sync::atomic::AtomicBool>>,
/// Captured stack trace from the most recent error (fn_name, line, col).
pub(crate) error_stack_trace: Vec<(String, usize, usize, Option<String>)>,
/// Yield channel sender for generator execution. When set, `Op::Yield`
/// sends values through this channel instead of being a no-op.
pub(crate) yield_sender: Option<tokio::sync::mpsc::Sender<VmValue>>,
/// Project root directory (detected via harn.toml).
/// Used as base directory for metadata, store, and checkpoint operations.
pub(crate) project_root: Option<std::path::PathBuf>,
/// Global constants (e.g. `pi`, `e`). Checked as a fallback in `GetVar`
/// after the environment, so user-defined variables can shadow them.
pub(crate) globals: BTreeMap<String, VmValue>,
/// Optional debugger hook invoked when execution advances to a new source line.
pub(crate) debug_hook: Option<Box<DebugHook>>,
}
impl Vm {
pub fn new() -> Self {
Self {
stack: Vec::with_capacity(256),
env: VmEnv::new(),
output: String::new(),
builtins: BTreeMap::new(),
async_builtins: BTreeMap::new(),
iterators: Vec::new(),
frames: Vec::new(),
exception_handlers: Vec::new(),
spawned_tasks: BTreeMap::new(),
task_counter: 0,
deadlines: Vec::new(),
breakpoints: BTreeMap::new(),
function_breakpoints: std::collections::BTreeSet::new(),
pending_function_bp: None,
step_mode: false,
step_frame_depth: 0,
stopped: false,
last_line: 0,
source_dir: None,
imported_paths: Vec::new(),
module_cache: BTreeMap::new(),
source_file: None,
source_text: None,
bridge: None,
denied_builtins: HashSet::new(),
cancel_token: None,
error_stack_trace: Vec::new(),
yield_sender: None,
project_root: None,
globals: BTreeMap::new(),
debug_hook: None,
}
}
/// Set the bridge for delegating unknown builtins in bridge mode.
pub fn set_bridge(&mut self, bridge: Rc<crate::bridge::HostBridge>) {
self.bridge = Some(bridge);
}
/// Set builtins that are denied in sandbox mode.
/// When called, the given builtin names will produce a permission error.
pub fn set_denied_builtins(&mut self, denied: HashSet<String>) {
self.denied_builtins = denied;
}
/// Set source info for error reporting (file path and source text).
pub fn set_source_info(&mut self, file: &str, text: &str) {
self.source_file = Some(file.to_string());
self.source_text = Some(text.to_string());
}
/// Initialize execution (push the initial frame).
pub fn start(&mut self, chunk: &Chunk) {
let initial_env = self.env.clone();
self.frames.push(CallFrame {
chunk: chunk.clone(),
ip: 0,
stack_base: self.stack.len(),
saved_env: self.env.clone(),
// The top-level pipeline frame captures env at start so
// restartFrame on the outermost frame rewinds to the
// pre-pipeline state — basically "restart session" in
// debugger terms.
initial_env: Some(initial_env),
saved_iterator_depth: self.iterators.len(),
fn_name: String::new(),
argc: 0,
saved_source_dir: None,
module_functions: None,
module_state: None,
});
}
/// Create a child VM that shares builtins and env but has fresh execution state.
/// Used for parallel/spawn to fork the VM for concurrent tasks.
pub(crate) fn child_vm(&self) -> Vm {
Vm {
stack: Vec::with_capacity(64),
env: self.env.clone(),
output: String::new(),
builtins: self.builtins.clone(),
async_builtins: self.async_builtins.clone(),
iterators: Vec::new(),
frames: Vec::new(),
exception_handlers: Vec::new(),
spawned_tasks: BTreeMap::new(),
task_counter: 0,
deadlines: self.deadlines.clone(),
breakpoints: BTreeMap::new(),
function_breakpoints: std::collections::BTreeSet::new(),
pending_function_bp: None,
step_mode: false,
step_frame_depth: 0,
stopped: false,
last_line: 0,
source_dir: self.source_dir.clone(),
imported_paths: Vec::new(),
module_cache: self.module_cache.clone(),
source_file: self.source_file.clone(),
source_text: self.source_text.clone(),
bridge: self.bridge.clone(),
denied_builtins: self.denied_builtins.clone(),
cancel_token: None,
error_stack_trace: Vec::new(),
yield_sender: None,
project_root: self.project_root.clone(),
globals: self.globals.clone(),
debug_hook: None,
}
}
/// Create a child VM for external adapters that need to invoke Harn
/// closures while sharing the parent's builtins, globals, and module state.
pub(crate) fn child_vm_for_host(&self) -> Vm {
self.child_vm()
}
/// Set the source directory for import resolution and introspection.
/// Also auto-detects the project root if not already set.
pub fn set_source_dir(&mut self, dir: &std::path::Path) {
self.source_dir = Some(dir.to_path_buf());
crate::stdlib::set_thread_source_dir(dir);
// Auto-detect project root if not explicitly set.
if self.project_root.is_none() {
self.project_root = crate::stdlib::process::find_project_root(dir);
}
}
/// Explicitly set the project root directory.
/// Used by ACP/CLI to override auto-detection.
pub fn set_project_root(&mut self, root: &std::path::Path) {
self.project_root = Some(root.to_path_buf());
}
/// Get the project root directory, falling back to source_dir.
pub fn project_root(&self) -> Option<&std::path::Path> {
self.project_root.as_deref().or(self.source_dir.as_deref())
}
/// Return all registered builtin names (sync + async).
pub fn builtin_names(&self) -> Vec<String> {
let mut names: Vec<String> = self.builtins.keys().cloned().collect();
names.extend(self.async_builtins.keys().cloned());
names
}
/// Set a global constant (e.g. `pi`, `e`).
/// Stored separately from the environment so user-defined variables can shadow them.
pub fn set_global(&mut self, name: &str, value: VmValue) {
self.globals.insert(name.to_string(), value);
}
/// Get the captured output.
pub fn output(&self) -> &str {
&self.output
}
pub(crate) fn pop(&mut self) -> Result<VmValue, VmError> {
self.stack.pop().ok_or(VmError::StackUnderflow)
}
pub(crate) fn peek(&self) -> Result<&VmValue, VmError> {
self.stack.last().ok_or(VmError::StackUnderflow)
}
pub(crate) fn const_string(c: &Constant) -> Result<String, VmError> {
match c {
Constant::String(s) => Ok(s.clone()),
_ => Err(VmError::TypeError("expected string constant".into())),
}
}
}
impl Default for Vm {
fn default() -> Self {
Self::new()
}
}