harn_vm/vm/debug.rs
1use std::rc::Rc;
2
3use crate::chunk::{Chunk, Constant};
4use crate::value::{VmError, VmValue};
5
6use super::{CallFrame, Vm};
7
8/// Debug action returned by the debug hook.
9#[derive(Debug, Clone, PartialEq)]
10pub enum DebugAction {
11 /// Continue execution normally.
12 Continue,
13 /// Stop (breakpoint hit, step complete).
14 Stop,
15}
16
17/// Information about current execution state for the debugger.
18#[derive(Debug, Clone)]
19pub struct DebugState {
20 pub line: usize,
21 pub variables: std::collections::BTreeMap<String, VmValue>,
22 pub frame_name: String,
23 pub frame_depth: usize,
24}
25
26pub(super) type DebugHook = dyn FnMut(&DebugState) -> DebugAction;
27
28impl Vm {
29 /// Replace breakpoints for a single source file. Pass an empty string
30 /// (or call `set_breakpoints` for the wildcard equivalent) to install
31 /// breakpoints that match every file — useful for ad-hoc CLI runs
32 /// where the embedder doesn't track per-file source paths.
33 pub fn set_breakpoints_for_file(&mut self, file: &str, lines: Vec<usize>) {
34 if lines.is_empty() {
35 self.breakpoints.remove(file);
36 return;
37 }
38 self.breakpoints
39 .insert(file.to_string(), lines.into_iter().collect());
40 }
41
42 /// Backwards-compatible wildcard form. Stores all lines under the
43 /// empty-string key, which matches *any* source file at the check
44 /// site. Existing embedders that don't track file scoping still work.
45 pub fn set_breakpoints(&mut self, lines: Vec<usize>) {
46 self.set_breakpoints_for_file("", lines);
47 }
48
49 /// Replace the function-breakpoint set. Every subsequent closure
50 /// call whose name matches one of the provided strings will pause
51 /// on entry. Empty vec clears the set.
52 pub fn set_function_breakpoints(&mut self, names: Vec<String>) {
53 self.function_breakpoints = names.into_iter().collect();
54 // Clear any pending latch so a stale entry from the previous
55 // configuration doesn't fire once.
56 self.pending_function_bp = None;
57 }
58
59 /// Returns the current function-breakpoint name set. Used by the
60 /// DAP adapter to build the `setFunctionBreakpoints` response with
61 /// verified=true per registered name.
62 pub fn function_breakpoint_names(&self) -> Vec<String> {
63 self.function_breakpoints.iter().cloned().collect()
64 }
65
66 /// Drain any pending function-breakpoint name latched by the most
67 /// recent closure entry. Returns `Some(name)` exactly once per hit
68 /// so the caller can emit a single `stopped` event.
69 pub fn take_pending_function_bp(&mut self) -> Option<String> {
70 self.pending_function_bp.take()
71 }
72
73 /// Source file path of the currently executing frame, if known.
74 pub(crate) fn current_source_file(&self) -> Option<&str> {
75 self.frames
76 .last()
77 .and_then(|f| f.chunk.source_file.as_deref())
78 }
79
80 /// True when a breakpoint at `line` is set for the current frame's
81 /// source file (or the wildcard set covers it).
82 pub(crate) fn breakpoint_matches(&self, line: usize) -> bool {
83 if let Some(wild) = self.breakpoints.get("") {
84 if wild.contains(&line) {
85 return true;
86 }
87 }
88 if let Some(file) = self.current_source_file() {
89 if let Some(set) = self.breakpoints.get(file) {
90 if set.contains(&line) {
91 return true;
92 }
93 }
94 // Some callers send a relative or differently-prefixed path
95 // than the chunk records; fall back to suffix comparison so
96 // foo.harn matches /abs/path/foo.harn and vice-versa.
97 for (key, set) in &self.breakpoints {
98 if key.is_empty() {
99 continue;
100 }
101 if (file.ends_with(key.as_str()) || key.ends_with(file)) && set.contains(&line) {
102 return true;
103 }
104 }
105 }
106 false
107 }
108
109 /// Enable step mode (stop at the next source line regardless of
110 /// frame depth — i.e. step-in semantics, descending into calls).
111 pub fn set_step_mode(&mut self, step: bool) {
112 self.step_mode = step;
113 self.step_frame_depth = usize::MAX;
114 }
115
116 /// Enable step-over mode (stop at the next source line in the current
117 /// frame or a shallower one, skipping past any nested calls).
118 pub fn set_step_over(&mut self) {
119 self.step_mode = true;
120 self.step_frame_depth = self.frames.len();
121 }
122
123 /// Register a debug hook invoked whenever execution advances to a new source line.
124 pub fn set_debug_hook<F>(&mut self, hook: F)
125 where
126 F: FnMut(&DebugState) -> DebugAction + 'static,
127 {
128 self.debug_hook = Some(Box::new(hook));
129 }
130
131 /// Clear the current debug hook.
132 pub fn clear_debug_hook(&mut self) {
133 self.debug_hook = None;
134 }
135
136 /// Enable step-out mode (stop at the next source line *after* the
137 /// current frame has returned — strictly shallower than where the
138 /// user requested the step-out).
139 pub fn set_step_out(&mut self) {
140 self.step_mode = true;
141 // Condition site compares `frames.len() <= step_frame_depth`, so
142 // storing N-1 makes the stop fire only after the current frame
143 // pops (frames.len() drops from N to N-1 or less). Clamp to 0 for
144 // the top frame — caller handles that via the usize::MAX sentinel
145 // if they wanted step-in semantics.
146 self.step_frame_depth = self.frames.len().saturating_sub(1);
147 }
148
149 /// Check if the VM is stopped at a debug point.
150 pub fn is_stopped(&self) -> bool {
151 self.stopped
152 }
153
154 /// Get the current debug state (variables, line, etc.).
155 pub fn debug_state(&self) -> DebugState {
156 let line = self.current_line();
157 let variables = self.visible_variables();
158 let frame_name = if self.frames.len() > 1 {
159 format!("frame_{}", self.frames.len() - 1)
160 } else {
161 "pipeline".to_string()
162 };
163 DebugState {
164 line,
165 variables,
166 frame_name,
167 frame_depth: self.frames.len(),
168 }
169 }
170
171 /// Call sites (name + ip) on `line` within the current frame's
172 /// chunk — drives DAP `stepInTargets` (#112). Walks the chunk's
173 /// parallel lines array, surfaces every Call / MethodCall /
174 /// CallSpread and pairs it with the name of the constant or
175 /// identifier preceding the call when we can derive it cheaply.
176 pub fn call_sites_on_line(&self, line: u32) -> Vec<(u32, String)> {
177 let Some(frame) = self.frames.last() else {
178 return Vec::new();
179 };
180 let chunk = &frame.chunk;
181 let mut out = Vec::new();
182 let code = &chunk.code;
183 let lines = &chunk.lines;
184 let mut ip: usize = 0;
185 while ip < code.len() {
186 let op = code[ip];
187 if ip < lines.len() && lines[ip] == line {
188 // 0x00 .. 0x99 covers the opcode space the compiler
189 // emits for calls. Rather than decode every op, we
190 // pattern-match on the Call-family opcodes via
191 // their numeric tag — stable because harn-vm locks
192 // opcodes with pin tests.
193 if matches!(op, 0x40..=0x44) {
194 // Best-effort label: take the most recent
195 // LoadConst / LoadGlobal constant value.
196 let label = Self::label_preceding_call(chunk, ip);
197 out.push((ip as u32, label));
198 }
199 }
200 ip += 1;
201 }
202 out
203 }
204
205 fn label_preceding_call(chunk: &Chunk, call_ip: usize) -> String {
206 // Walk backwards a few instructions to find a LoadConst that
207 // resolves to a string (the callee name). Good enough for
208 // the IDE menu; deep callee resolution can land later if
209 // needed.
210 let mut back = call_ip.saturating_sub(6);
211 while back < call_ip {
212 let op = chunk.code[back];
213 // LoadConst opcodes (range covers the two-byte tag) —
214 // fall back to "call" when none found.
215 if (op == 0x01 || op == 0x02) && back + 2 < chunk.code.len() {
216 let idx = (u16::from(chunk.code[back + 1]) << 8) | u16::from(chunk.code[back + 2]);
217 if let Some(Constant::String(s)) = chunk.constants.get(idx as usize) {
218 return s.clone();
219 }
220 }
221 back += 1;
222 }
223 "call".to_string()
224 }
225
226 /// Install (or replace) the cooperative cancellation token on
227 /// this VM. Callers (DAP adapter, embedded host) flip the
228 /// wrapped AtomicBool to request graceful shutdown; the step
229 /// loop checks `is_cancel_requested()` at every instruction and
230 /// exits with `VmError::Cancelled` when set.
231 pub fn install_cancel_token(&mut self, token: std::sync::Arc<std::sync::atomic::AtomicBool>) {
232 self.cancel_token = Some(token);
233 }
234
235 /// Signal cooperative cancellation on this VM — the step loop
236 /// unwinds on its next instruction check. Lazily allocates a
237 /// fresh token when none is installed so hosts don't need to
238 /// pre-plumb it on every launch. Returns the Arc so the caller
239 /// can hold onto it and re-signal later if needed.
240 pub fn signal_cancel(&mut self) -> std::sync::Arc<std::sync::atomic::AtomicBool> {
241 let token = self.cancel_token.clone().unwrap_or_else(|| {
242 let t = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
243 self.cancel_token = Some(t.clone());
244 t
245 });
246 token.store(true, std::sync::atomic::Ordering::SeqCst);
247 token
248 }
249
250 /// True when cooperative cancellation has been requested.
251 pub fn is_cancel_requested(&self) -> bool {
252 self.cancel_token
253 .as_ref()
254 .map(|t| t.load(std::sync::atomic::Ordering::SeqCst))
255 .unwrap_or(false)
256 }
257
258 /// Identifiers visible at the given frame's scope — locals plus
259 /// every registered builtin + async builtin. Drives DAP
260 /// `completions` (#109) so the REPL autocomplete surfaces
261 /// everything the unified evaluator can reach.
262 pub fn identifiers_in_scope(&self, _frame_id: usize) -> Vec<String> {
263 let mut out: Vec<String> = self.visible_variables().keys().cloned().collect();
264 out.extend(self.builtins.keys().cloned());
265 out.extend(self.async_builtins.keys().cloned());
266 out.sort();
267 out.dedup();
268 out
269 }
270
271 /// Get all stack frames for the debugger.
272 pub fn debug_stack_frames(&self) -> Vec<(String, usize)> {
273 let mut frames = Vec::new();
274 for (i, frame) in self.frames.iter().enumerate() {
275 let line = if frame.ip > 0 && frame.ip - 1 < frame.chunk.lines.len() {
276 frame.chunk.lines[frame.ip - 1] as usize
277 } else {
278 0
279 };
280 let name = if frame.fn_name.is_empty() {
281 if i == 0 {
282 "pipeline".to_string()
283 } else {
284 format!("fn_{}", i)
285 }
286 } else {
287 frame.fn_name.clone()
288 };
289 frames.push((name, line));
290 }
291 frames
292 }
293
294 /// Get the current source line.
295 pub(crate) fn current_line(&self) -> usize {
296 if let Some(frame) = self.frames.last() {
297 let ip = if frame.ip > 0 { frame.ip - 1 } else { 0 };
298 if ip < frame.chunk.lines.len() {
299 return frame.chunk.lines[ip] as usize;
300 }
301 }
302 0
303 }
304
305 /// Execute one instruction, returning whether to stop (breakpoint/step).
306 /// Returns Ok(None) to continue, Ok(Some(val)) on program end, Err on error.
307 ///
308 /// Line-change detection reads the line of the instruction we're
309 /// *about to execute* (`lines[ip]`) rather than the byte before
310 /// `ip`. After a jump, `ip-1` still points into the skipped region,
311 /// which previously reported phantom stops on the tail of a
312 /// not-taken branch (e.g. `host_metadata_save()` highlighted even
313 /// though `any_stale` was false). Using `lines[ip]` — combined with
314 /// cleanup ops emitted at line 0 after branch/loop exits — keeps
315 /// the debugger aligned with what's actually going to run.
316 pub async fn step_execute(&mut self) -> Result<Option<(VmValue, bool)>, VmError> {
317 // Cooperative cancellation (#108): the DAP adapter flips the
318 // shared flag when the IDE presses the Stop pill. Check here
319 // before any instruction work so the loop unwinds promptly
320 // on the next tick.
321 if self.is_cancel_requested() {
322 return Err(VmError::Thrown(VmValue::String(Rc::from(
323 "kind:cancelled:VM cancelled by host",
324 ))));
325 }
326 let current_line = self.upcoming_line();
327 let line_changed = current_line != self.last_line && current_line > 0;
328
329 if line_changed {
330 self.last_line = current_line;
331
332 let state = self.debug_state();
333 if let Some(hook) = self.debug_hook.as_mut() {
334 if matches!(hook(&state), DebugAction::Stop) {
335 self.stopped = true;
336 return Ok(Some((VmValue::Nil, true)));
337 }
338 }
339
340 if self.breakpoint_matches(current_line) {
341 self.stopped = true;
342 return Ok(Some((VmValue::Nil, true)));
343 }
344
345 // Function-breakpoint latch: set by push_closure_frame when
346 // the callee's name is in `function_breakpoints`. Stop with
347 // the same shape as a line BP so the DAP adapter's
348 // classify_breakpoint_hit emits a standard stopped event.
349 if self.pending_function_bp.is_some() {
350 self.stopped = true;
351 return Ok(Some((VmValue::Nil, true)));
352 }
353
354 // step_frame_depth is the deepest frame count at which a stop
355 // is acceptable. set_step_mode uses usize::MAX (any depth,
356 // step-in), set_step_over uses N (same frame or shallower),
357 // set_step_out uses N-1 (strictly shallower than where the
358 // step-out was requested).
359 if self.step_mode && self.frames.len() <= self.step_frame_depth {
360 self.step_mode = false;
361 self.stopped = true;
362 return Ok(Some((VmValue::Nil, true)));
363 }
364 }
365
366 self.stopped = false;
367 self.execute_one_cycle().await
368 }
369
370 /// Line of the instruction *about to execute* — used by the
371 /// debugger for line-change detection so the first cycle after a
372 /// jump doesn't report a stale line from the skipped region.
373 pub(crate) fn upcoming_line(&self) -> usize {
374 if let Some(frame) = self.frames.last() {
375 if frame.ip < frame.chunk.lines.len() {
376 return frame.chunk.lines[frame.ip] as usize;
377 }
378 }
379 0
380 }
381
382 /// Number of live call frames. Used by the DAP adapter to
383 /// translate stackTrace ids (1-based, innermost first) back to
384 /// the VM's 0-based outermost-first index when processing
385 /// `restartFrame`.
386 pub fn frame_count(&self) -> usize {
387 self.frames.len()
388 }
389
390 /// Rewind the given frame to its entry state so stepping resumes
391 /// from the first instruction of the function with the original
392 /// arguments re-bound. Higher frames above `frame_id` are dropped.
393 /// Returns an error if the frame has no captured `initial_env`
394 /// (scratch / evaluator frames don't) or if the id is out of range.
395 ///
396 /// Side effects already performed by the restarted frame (tool
397 /// calls, file writes, host_call round-trips) are *not* rolled
398 /// back — DAP leaves that to the adapter's discretion. The IDE
399 /// should warn on frames whose source text contains obvious
400 /// side-effectful calls before invoking restartFrame.
401 pub fn restart_frame(&mut self, frame_id: usize) -> Result<(), VmError> {
402 if frame_id >= self.frames.len() {
403 return Err(VmError::Runtime(format!(
404 "restartFrame: frame id {frame_id} out of range (have {} frames)",
405 self.frames.len()
406 )));
407 }
408 let Some(initial_env) = self.frames[frame_id].initial_env.clone() else {
409 return Err(VmError::Runtime(
410 "restartFrame: target frame was not captured for restart (scratch / evaluator frame)"
411 .into(),
412 ));
413 };
414 let initial_local_slots = self.frames[frame_id].initial_local_slots.clone();
415 // Drop every frame above the target. Each pop restores its
416 // saved_iterator_depth into `self.iterators` so iterator state
417 // unwinds consistently.
418 while self.frames.len() > frame_id + 1 {
419 let popped = self.frames.pop().expect("bounds checked above");
420 self.iterators.truncate(popped.saved_iterator_depth);
421 }
422 // Rewind the target frame.
423 let frame = self
424 .frames
425 .last_mut()
426 .expect("frame_id within bounds guarantees a frame");
427 frame.ip = 0;
428 let stack_base = frame.stack_base;
429 let saved_iter_depth = frame.saved_iterator_depth;
430 self.stack.truncate(stack_base);
431 self.iterators.truncate(saved_iter_depth);
432 if let Some(initial_local_slots) = initial_local_slots {
433 frame.local_slots = initial_local_slots;
434 frame.local_scope_depth = 0;
435 }
436 self.env = initial_env;
437 self.last_line = 0;
438 self.stopped = false;
439 Ok(())
440 }
441
442 /// Assign a new value to a named binding in the paused VM's env.
443 /// Returns the value that was actually stored (after coercion, if
444 /// the VM performed any) so the caller can echo it back to the
445 /// DAP client. Fails if the name does not resolve to a mutable
446 /// binding in any live scope.
447 ///
448 /// The provided `value_expr` goes through the unified evaluator so
449 /// callers can type expressions like `plan.tasks.len() + 1` in the
450 /// Locals inline-edit field, not just literals.
451 pub async fn set_variable_in_frame(
452 &mut self,
453 name: &str,
454 value_expr: &str,
455 frame_id: usize,
456 ) -> Result<VmValue, VmError> {
457 let value = self.evaluate_in_frame(value_expr, frame_id).await?;
458 // Debug-specific assign: bypasses the `let` immutability gate
459 // because the user is explicitly editing in the IDE, and
460 // almost every pipeline binding is `let`. The underlying
461 // binding's mutability flag is preserved so runtime behavior
462 // after the override is unchanged.
463 if !self.assign_active_local_slot(name, value.clone(), true)? {
464 self.env
465 .assign_debug(name, value.clone())
466 .map_err(|e| match e {
467 VmError::UndefinedVariable(n) => {
468 VmError::Runtime(format!("setVariable: '{n}' is not in the current scope"))
469 }
470 other => other,
471 })?;
472 }
473 Ok(value)
474 }
475
476 /// Evaluate a Harn expression against the currently paused frame's
477 /// scope and return its value. This is the single evaluation path
478 /// used by hover tips, watch expressions, conditional breakpoints,
479 /// logpoint interpolation, and `setVariable` / `setExpression`
480 /// before we had a unified evaluator there were four separate
481 /// mini-parsers, each with its own rough edges (see burin-code #85).
482 ///
483 /// The expression is wrapped as `let __r = (<expr>)` so arbitrary
484 /// infix chains, ternaries, and access paths parse uniformly. A
485 /// scratch `CallFrame` runs the wrapped bytecode with `saved_env`
486 /// pointing at the caller's env, so the compiled expression sees
487 /// every local in scope. When the scratch frame pops, the caller's
488 /// env is automatically restored.
489 ///
490 /// A fixed instruction budget guards against runaway expressions
491 /// (infinite loops, accidental recursion) wedging the debugger.
492 /// Side effects — including `llm_call`, `host_*`, and file mutators
493 /// — are not blocked here; callers that invoke this for read-only
494 /// surfaces (hover, watch) should reject obviously-side-effectful
495 /// expressions before calling.
496 pub async fn evaluate_in_frame(
497 &mut self,
498 expr: &str,
499 _frame_id: usize,
500 ) -> Result<VmValue, VmError> {
501 let trimmed = expr.trim();
502 if trimmed.is_empty() {
503 return Err(VmError::Runtime("evaluate: empty expression".into()));
504 }
505
506 // Wrap as a pipeline whose body *returns* the expression. The
507 // explicit `return` compiles to `push value + Op::Return`, and
508 // Op::Return's frame-exit path pushes that value onto the
509 // caller's stack — which is where we read it from below.
510 // Avoids the script-mode compile path that trails a Pop+Nil
511 // sequence after every expression statement, which would
512 // clobber the result before we could capture it.
513 let wrapped = format!("pipeline default() {{\n return ({trimmed})\n}}\n");
514 let program = harn_parser::check_source_strict(&wrapped)
515 .map_err(|e| VmError::Runtime(format!("evaluate: parse error: {e}")))?;
516 let mut chunk = crate::compiler::Compiler::new()
517 .compile(&program)
518 .map_err(|e| VmError::Runtime(format!("evaluate: compile error: {e}")))?;
519 // Inherit the current frame's source file so any runtime error
520 // enriched with `(line N)` attributes cleanly.
521 if let Some(current) = self.frames.last() {
522 chunk.source_file = current.chunk.source_file.clone();
523 }
524
525 // Snapshot every piece of VM state the scratch frame could
526 // perturb. Evaluation MUST be transparent: step state, scope
527 // depth, iterator depth, and the line-change baseline all
528 // restore on exit so the paused session continues exactly as
529 // before the user typed an expression into the REPL.
530 self.sync_current_frame_locals_to_env();
531 let saved_stack_len = self.stack.len();
532 let saved_frame_count = self.frames.len();
533 let saved_iter_depth = self.iterators.len();
534 let saved_scope_depth = self.env.scope_depth();
535 let saved_last_line = self.last_line;
536 let saved_step_mode = self.step_mode;
537 let saved_step_frame_depth = self.step_frame_depth;
538 let saved_stopped = self.stopped;
539 let saved_env = self.env.clone();
540
541 // Disable stepping during evaluation; otherwise the debug hook
542 // would fire on every synthetic line and block the pause UI.
543 self.step_mode = false;
544 self.stopped = false;
545
546 let local_slots = Self::fresh_local_slots(&chunk);
547 self.frames.push(CallFrame {
548 chunk: Rc::new(chunk),
549 ip: 0,
550 stack_base: saved_stack_len,
551 saved_env,
552 // Scratch evaluator frames never accept restartFrame — the
553 // REPL/watch user expects read-only inspection semantics,
554 // not replay — so skip the clone.
555 initial_env: None,
556 initial_local_slots: None,
557 saved_iterator_depth: saved_iter_depth,
558 fn_name: "<eval>".to_string(),
559 argc: 0,
560 saved_source_dir: self.source_dir.clone(),
561 module_functions: None,
562 module_state: None,
563 local_slots,
564 local_scope_base: self.env.scope_depth().saturating_sub(1),
565 local_scope_depth: 0,
566 });
567
568 // Drive one op at a time with a fixed budget. A pure expression
569 // is typically < 20 instructions; 10k gives plenty of headroom
570 // for e.g. a list comprehension without letting a bad loop
571 // hang the debugger forever.
572 const MAX_EVAL_STEPS: usize = 10_000;
573 let mut err: Option<VmError> = None;
574 for _ in 0..MAX_EVAL_STEPS {
575 if self.frames.len() <= saved_frame_count {
576 break;
577 }
578 match self.execute_one_cycle().await {
579 Ok(_) => {
580 if self.frames.len() <= saved_frame_count {
581 break;
582 }
583 }
584 Err(e) => {
585 err = Some(e);
586 break;
587 }
588 }
589 }
590
591 // Read the result before restoring the stack — frame exit
592 // pushes the last-computed value onto the caller's stack, so
593 // it sits at `saved_stack_len` if execution completed cleanly.
594 let result = if self.stack.len() > saved_stack_len {
595 Some(self.stack[saved_stack_len].clone())
596 } else {
597 None
598 };
599
600 // Unconditional cleanup so a mid-execution error doesn't leak
601 // scratch state into the live session.
602 self.frames.truncate(saved_frame_count);
603 self.stack.truncate(saved_stack_len);
604 self.iterators.truncate(saved_iter_depth);
605 self.env.truncate_scopes(saved_scope_depth);
606 self.last_line = saved_last_line;
607 self.step_mode = saved_step_mode;
608 self.step_frame_depth = saved_step_frame_depth;
609 self.stopped = saved_stopped;
610
611 if let Some(e) = err {
612 return Err(e);
613 }
614 result.ok_or_else(|| {
615 VmError::Runtime(
616 "evaluate: step budget exceeded before the expression produced a value".into(),
617 )
618 })
619 }
620}