harn_vm/vm/debug.rs
1use std::rc::Rc;
2
3use crate::chunk::{Chunk, Constant};
4use crate::value::{VmError, VmValue};
5
6use super::{CallFrame, Vm};
7
8/// Debug action returned by the debug hook.
9#[derive(Debug, Clone, PartialEq)]
10pub enum DebugAction {
11 /// Continue execution normally.
12 Continue,
13 /// Stop (breakpoint hit, step complete).
14 Stop,
15}
16
17/// Information about current execution state for the debugger.
18#[derive(Debug, Clone)]
19pub struct DebugState {
20 pub line: usize,
21 pub variables: std::collections::BTreeMap<String, VmValue>,
22 pub frame_name: String,
23 pub frame_depth: usize,
24}
25
26pub(super) type DebugHook = dyn FnMut(&DebugState) -> DebugAction;
27
28impl Vm {
29 /// Replace breakpoints for a single source file. Pass an empty string
30 /// (or call `set_breakpoints` for the wildcard equivalent) to install
31 /// breakpoints that match every file — useful for ad-hoc CLI runs
32 /// where the embedder doesn't track per-file source paths.
33 pub fn set_breakpoints_for_file(&mut self, file: &str, lines: Vec<usize>) {
34 if lines.is_empty() {
35 self.breakpoints.remove(file);
36 return;
37 }
38 self.breakpoints
39 .insert(file.to_string(), lines.into_iter().collect());
40 }
41
42 /// Backwards-compatible wildcard form. Stores all lines under the
43 /// empty-string key, which matches *any* source file at the check
44 /// site. Existing embedders that don't track file scoping still work.
45 pub fn set_breakpoints(&mut self, lines: Vec<usize>) {
46 self.set_breakpoints_for_file("", lines);
47 }
48
49 /// Replace the function-breakpoint set. Every subsequent closure
50 /// call whose name matches one of the provided strings will pause
51 /// on entry. Empty vec clears the set.
52 pub fn set_function_breakpoints(&mut self, names: Vec<String>) {
53 self.function_breakpoints = names.into_iter().collect();
54 // Clear any pending latch so a stale entry from the previous
55 // configuration doesn't fire once.
56 self.pending_function_bp = None;
57 }
58
59 /// Returns the current function-breakpoint name set. Used by the
60 /// DAP adapter to build the `setFunctionBreakpoints` response with
61 /// verified=true per registered name.
62 pub fn function_breakpoint_names(&self) -> Vec<String> {
63 self.function_breakpoints.iter().cloned().collect()
64 }
65
66 /// Drain any pending function-breakpoint name latched by the most
67 /// recent closure entry. Returns `Some(name)` exactly once per hit
68 /// so the caller can emit a single `stopped` event.
69 pub fn take_pending_function_bp(&mut self) -> Option<String> {
70 self.pending_function_bp.take()
71 }
72
73 /// Source file path of the currently executing frame, if known.
74 pub(crate) fn current_source_file(&self) -> Option<&str> {
75 self.frames
76 .last()
77 .and_then(|f| f.chunk.source_file.as_deref())
78 }
79
80 /// True when a breakpoint at `line` is set for the current frame's
81 /// source file (or the wildcard set covers it).
82 pub(crate) fn breakpoint_matches(&self, line: usize) -> bool {
83 if let Some(wild) = self.breakpoints.get("") {
84 if wild.contains(&line) {
85 return true;
86 }
87 }
88 if let Some(file) = self.current_source_file() {
89 if let Some(set) = self.breakpoints.get(file) {
90 if set.contains(&line) {
91 return true;
92 }
93 }
94 // Some callers send a relative or differently-prefixed path
95 // than the chunk records; fall back to suffix comparison so
96 // foo.harn matches /abs/path/foo.harn and vice-versa.
97 for (key, set) in &self.breakpoints {
98 if key.is_empty() {
99 continue;
100 }
101 if (file.ends_with(key.as_str()) || key.ends_with(file)) && set.contains(&line) {
102 return true;
103 }
104 }
105 }
106 false
107 }
108
109 /// Enable step mode (stop at the next source line regardless of
110 /// frame depth — i.e. step-in semantics, descending into calls).
111 pub fn set_step_mode(&mut self, step: bool) {
112 self.step_mode = step;
113 self.step_frame_depth = usize::MAX;
114 }
115
116 /// Enable step-over mode (stop at the next source line in the current
117 /// frame or a shallower one, skipping past any nested calls).
118 pub fn set_step_over(&mut self) {
119 self.step_mode = true;
120 self.step_frame_depth = self.frames.len();
121 }
122
123 /// Register a debug hook invoked whenever execution advances to a new source line.
124 pub fn set_debug_hook<F>(&mut self, hook: F)
125 where
126 F: FnMut(&DebugState) -> DebugAction + 'static,
127 {
128 self.debug_hook = Some(Box::new(hook));
129 }
130
131 /// Clear the current debug hook.
132 pub fn clear_debug_hook(&mut self) {
133 self.debug_hook = None;
134 }
135
136 /// Enable step-out mode (stop at the next source line *after* the
137 /// current frame has returned — strictly shallower than where the
138 /// user requested the step-out).
139 pub fn set_step_out(&mut self) {
140 self.step_mode = true;
141 // Condition site compares `frames.len() <= step_frame_depth`, so
142 // storing N-1 makes the stop fire only after the current frame
143 // pops (frames.len() drops from N to N-1 or less). Clamp to 0 for
144 // the top frame — caller handles that via the usize::MAX sentinel
145 // if they wanted step-in semantics.
146 self.step_frame_depth = self.frames.len().saturating_sub(1);
147 }
148
149 /// Check if the VM is stopped at a debug point.
150 pub fn is_stopped(&self) -> bool {
151 self.stopped
152 }
153
154 /// Get the current debug state (variables, line, etc.).
155 pub fn debug_state(&self) -> DebugState {
156 let line = self.current_line();
157 let variables = self.visible_variables();
158 let frame_name = if self.frames.len() > 1 {
159 format!("frame_{}", self.frames.len() - 1)
160 } else {
161 "pipeline".to_string()
162 };
163 DebugState {
164 line,
165 variables,
166 frame_name,
167 frame_depth: self.frames.len(),
168 }
169 }
170
171 /// Call sites (name + ip) on `line` within the current frame's
172 /// chunk — drives DAP `stepInTargets` (#112). Walks the chunk's
173 /// parallel lines array, surfaces every Call / MethodCall /
174 /// CallSpread and pairs it with the name of the constant or
175 /// identifier preceding the call when we can derive it cheaply.
176 pub fn call_sites_on_line(&self, line: u32) -> Vec<(u32, String)> {
177 let Some(frame) = self.frames.last() else {
178 return Vec::new();
179 };
180 let chunk = &frame.chunk;
181 let mut out = Vec::new();
182 let code = &chunk.code;
183 let lines = &chunk.lines;
184 let mut ip: usize = 0;
185 while ip < code.len() {
186 let op = code[ip];
187 if ip < lines.len() && lines[ip] == line {
188 // 0x00 .. 0x99 covers the opcode space the compiler
189 // emits for calls. Rather than decode every op, we
190 // pattern-match on the Call-family opcodes via
191 // their numeric tag — stable because harn-vm locks
192 // opcodes with pin tests.
193 if matches!(op, 0x40..=0x44) {
194 // Best-effort label: take the most recent
195 // LoadConst / LoadGlobal constant value.
196 let label = Self::label_preceding_call(chunk, ip);
197 out.push((ip as u32, label));
198 }
199 }
200 ip += 1;
201 }
202 out
203 }
204
205 fn label_preceding_call(chunk: &Chunk, call_ip: usize) -> String {
206 // Walk backwards a few instructions to find a LoadConst that
207 // resolves to a string (the callee name). Good enough for
208 // the IDE menu; deep callee resolution can land later if
209 // needed.
210 let mut back = call_ip.saturating_sub(6);
211 while back < call_ip {
212 let op = chunk.code[back];
213 // LoadConst opcodes (range covers the two-byte tag) —
214 // fall back to "call" when none found.
215 if (op == 0x01 || op == 0x02) && back + 2 < chunk.code.len() {
216 let idx = (u16::from(chunk.code[back + 1]) << 8) | u16::from(chunk.code[back + 2]);
217 if let Some(Constant::String(s)) = chunk.constants.get(idx as usize) {
218 return s.clone();
219 }
220 }
221 back += 1;
222 }
223 "call".to_string()
224 }
225
226 /// Install (or replace) the cooperative cancellation token on
227 /// this VM. Callers (DAP adapter, embedded host) flip the
228 /// wrapped AtomicBool to request graceful shutdown; the step
229 /// loop checks `is_cancel_requested()` at every instruction and
230 /// exits with `VmError::Cancelled` when set.
231 pub fn install_cancel_token(&mut self, token: std::sync::Arc<std::sync::atomic::AtomicBool>) {
232 self.cancel_token = Some(token);
233 self.cancel_grace_instructions_remaining = None;
234 }
235
236 /// Signal cooperative cancellation on this VM — the step loop
237 /// unwinds on its next instruction check. Lazily allocates a
238 /// fresh token when none is installed so hosts don't need to
239 /// pre-plumb it on every launch. Returns the Arc so the caller
240 /// can hold onto it and re-signal later if needed.
241 pub fn signal_cancel(&mut self) -> std::sync::Arc<std::sync::atomic::AtomicBool> {
242 let token = self.cancel_token.clone().unwrap_or_else(|| {
243 let t = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
244 self.cancel_token = Some(t.clone());
245 t
246 });
247 token.store(true, std::sync::atomic::Ordering::SeqCst);
248 token
249 }
250
251 /// True when cooperative cancellation has been requested.
252 pub fn is_cancel_requested(&self) -> bool {
253 self.cancel_token
254 .as_ref()
255 .map(|t| t.load(std::sync::atomic::Ordering::SeqCst))
256 .unwrap_or(false)
257 }
258
259 /// Identifiers visible at the given frame's scope — locals plus
260 /// every registered builtin + async builtin. Drives DAP
261 /// `completions` (#109) so the REPL autocomplete surfaces
262 /// everything the unified evaluator can reach.
263 pub fn identifiers_in_scope(&self, _frame_id: usize) -> Vec<String> {
264 let mut out: Vec<String> = self.visible_variables().keys().cloned().collect();
265 out.extend(self.builtins.keys().cloned());
266 out.extend(self.async_builtins.keys().cloned());
267 out.sort();
268 out.dedup();
269 out
270 }
271
272 /// Get all stack frames for the debugger.
273 pub fn debug_stack_frames(&self) -> Vec<(String, usize)> {
274 let mut frames = Vec::new();
275 for (i, frame) in self.frames.iter().enumerate() {
276 let line = if frame.ip > 0 && frame.ip - 1 < frame.chunk.lines.len() {
277 frame.chunk.lines[frame.ip - 1] as usize
278 } else {
279 0
280 };
281 let name = if frame.fn_name.is_empty() {
282 if i == 0 {
283 "pipeline".to_string()
284 } else {
285 format!("fn_{}", i)
286 }
287 } else {
288 frame.fn_name.clone()
289 };
290 frames.push((name, line));
291 }
292 frames
293 }
294
295 /// Get the current source line.
296 pub(crate) fn current_line(&self) -> usize {
297 if let Some(frame) = self.frames.last() {
298 let ip = if frame.ip > 0 { frame.ip - 1 } else { 0 };
299 if ip < frame.chunk.lines.len() {
300 return frame.chunk.lines[ip] as usize;
301 }
302 }
303 0
304 }
305
306 /// Execute one instruction, returning whether to stop (breakpoint/step).
307 /// Returns Ok(None) to continue, Ok(Some(val)) on program end, Err on error.
308 ///
309 /// Line-change detection reads the line of the instruction we're
310 /// *about to execute* (`lines[ip]`) rather than the byte before
311 /// `ip`. After a jump, `ip-1` still points into the skipped region,
312 /// which previously reported phantom stops on the tail of a
313 /// not-taken branch (e.g. `host_metadata_save()` highlighted even
314 /// though `any_stale` was false). Using `lines[ip]` — combined with
315 /// cleanup ops emitted at line 0 after branch/loop exits — keeps
316 /// the debugger aligned with what's actually going to run.
317 pub async fn step_execute(&mut self) -> Result<Option<(VmValue, bool)>, VmError> {
318 // Cooperative cancellation (#108): the DAP adapter flips the
319 // shared flag when the IDE presses the Stop pill. Check here
320 // before any instruction work so the loop unwinds promptly
321 // on the next tick.
322 if self.is_cancel_requested() {
323 return Err(VmError::Thrown(VmValue::String(Rc::from(
324 "kind:cancelled:VM cancelled by host",
325 ))));
326 }
327 let current_line = self.upcoming_line();
328 let line_changed = current_line != self.last_line && current_line > 0;
329
330 if line_changed {
331 self.last_line = current_line;
332
333 let state = self.debug_state();
334 if let Some(hook) = self.debug_hook.as_mut() {
335 if matches!(hook(&state), DebugAction::Stop) {
336 self.stopped = true;
337 return Ok(Some((VmValue::Nil, true)));
338 }
339 }
340
341 if self.breakpoint_matches(current_line) {
342 self.stopped = true;
343 return Ok(Some((VmValue::Nil, true)));
344 }
345
346 // Function-breakpoint latch: set by push_closure_frame when
347 // the callee's name is in `function_breakpoints`. Stop with
348 // the same shape as a line BP so the DAP adapter's
349 // classify_breakpoint_hit emits a standard stopped event.
350 if self.pending_function_bp.is_some() {
351 self.stopped = true;
352 return Ok(Some((VmValue::Nil, true)));
353 }
354
355 // step_frame_depth is the deepest frame count at which a stop
356 // is acceptable. set_step_mode uses usize::MAX (any depth,
357 // step-in), set_step_over uses N (same frame or shallower),
358 // set_step_out uses N-1 (strictly shallower than where the
359 // step-out was requested).
360 if self.step_mode && self.frames.len() <= self.step_frame_depth {
361 self.step_mode = false;
362 self.stopped = true;
363 return Ok(Some((VmValue::Nil, true)));
364 }
365 }
366
367 self.stopped = false;
368 self.execute_one_cycle().await
369 }
370
371 /// Line of the instruction *about to execute* — used by the
372 /// debugger for line-change detection so the first cycle after a
373 /// jump doesn't report a stale line from the skipped region.
374 pub(crate) fn upcoming_line(&self) -> usize {
375 if let Some(frame) = self.frames.last() {
376 if frame.ip < frame.chunk.lines.len() {
377 return frame.chunk.lines[frame.ip] as usize;
378 }
379 }
380 0
381 }
382
383 /// Number of live call frames. Used by the DAP adapter to
384 /// translate stackTrace ids (1-based, innermost first) back to
385 /// the VM's 0-based outermost-first index when processing
386 /// `restartFrame`.
387 pub fn frame_count(&self) -> usize {
388 self.frames.len()
389 }
390
391 /// Rewind the given frame to its entry state so stepping resumes
392 /// from the first instruction of the function with the original
393 /// arguments re-bound. Higher frames above `frame_id` are dropped.
394 /// Returns an error if the frame has no captured `initial_env`
395 /// (scratch / evaluator frames don't) or if the id is out of range.
396 ///
397 /// Side effects already performed by the restarted frame (tool
398 /// calls, file writes, host_call round-trips) are *not* rolled
399 /// back — DAP leaves that to the adapter's discretion. The IDE
400 /// should warn on frames whose source text contains obvious
401 /// side-effectful calls before invoking restartFrame.
402 pub fn restart_frame(&mut self, frame_id: usize) -> Result<(), VmError> {
403 if frame_id >= self.frames.len() {
404 return Err(VmError::Runtime(format!(
405 "restartFrame: frame id {frame_id} out of range (have {} frames)",
406 self.frames.len()
407 )));
408 }
409 let Some(initial_env) = self.frames[frame_id].initial_env.clone() else {
410 return Err(VmError::Runtime(
411 "restartFrame: target frame was not captured for restart (scratch / evaluator frame)"
412 .into(),
413 ));
414 };
415 let initial_local_slots = self.frames[frame_id].initial_local_slots.clone();
416 // Drop every frame above the target. Each pop restores its
417 // saved_iterator_depth into `self.iterators` so iterator state
418 // unwinds consistently.
419 while self.frames.len() > frame_id + 1 {
420 let popped = self.frames.pop().expect("bounds checked above");
421 self.iterators.truncate(popped.saved_iterator_depth);
422 }
423 // Rewind the target frame.
424 let frame = self
425 .frames
426 .last_mut()
427 .expect("frame_id within bounds guarantees a frame");
428 frame.ip = 0;
429 let stack_base = frame.stack_base;
430 let saved_iter_depth = frame.saved_iterator_depth;
431 self.stack.truncate(stack_base);
432 self.iterators.truncate(saved_iter_depth);
433 if let Some(initial_local_slots) = initial_local_slots {
434 frame.local_slots = initial_local_slots;
435 frame.local_scope_depth = 0;
436 }
437 self.env = initial_env;
438 self.last_line = 0;
439 self.stopped = false;
440 Ok(())
441 }
442
443 /// Assign a new value to a named binding in the paused VM's env.
444 /// Returns the value that was actually stored (after coercion, if
445 /// the VM performed any) so the caller can echo it back to the
446 /// DAP client. Fails if the name does not resolve to a mutable
447 /// binding in any live scope.
448 ///
449 /// The provided `value_expr` goes through the unified evaluator so
450 /// callers can type expressions like `plan.tasks.len() + 1` in the
451 /// Locals inline-edit field, not just literals.
452 pub async fn set_variable_in_frame(
453 &mut self,
454 name: &str,
455 value_expr: &str,
456 frame_id: usize,
457 ) -> Result<VmValue, VmError> {
458 let value = self.evaluate_in_frame(value_expr, frame_id).await?;
459 // Debug-specific assign: bypasses the `let` immutability gate
460 // because the user is explicitly editing in the IDE, and
461 // almost every pipeline binding is `let`. The underlying
462 // binding's mutability flag is preserved so runtime behavior
463 // after the override is unchanged.
464 if !self.assign_active_local_slot(name, value.clone(), true)? {
465 self.env
466 .assign_debug(name, value.clone())
467 .map_err(|e| match e {
468 VmError::UndefinedVariable(n) => {
469 VmError::Runtime(format!("setVariable: '{n}' is not in the current scope"))
470 }
471 other => other,
472 })?;
473 }
474 Ok(value)
475 }
476
477 /// Evaluate a Harn expression against the currently paused frame's
478 /// scope and return its value. This is the single evaluation path
479 /// used by hover tips, watch expressions, conditional breakpoints,
480 /// logpoint interpolation, and `setVariable` / `setExpression`
481 /// before we had a unified evaluator there were four separate
482 /// mini-parsers, each with its own rough edges (see burin-code #85).
483 ///
484 /// The expression is wrapped as `let __r = (<expr>)` so arbitrary
485 /// infix chains, ternaries, and access paths parse uniformly. A
486 /// scratch `CallFrame` runs the wrapped bytecode with `saved_env`
487 /// pointing at the caller's env, so the compiled expression sees
488 /// every local in scope. When the scratch frame pops, the caller's
489 /// env is automatically restored.
490 ///
491 /// A fixed instruction budget guards against runaway expressions
492 /// (infinite loops, accidental recursion) wedging the debugger.
493 /// Side effects — including `llm_call`, `host_*`, and file mutators
494 /// — are not blocked here; callers that invoke this for read-only
495 /// surfaces (hover, watch) should reject obviously-side-effectful
496 /// expressions before calling.
497 pub async fn evaluate_in_frame(
498 &mut self,
499 expr: &str,
500 _frame_id: usize,
501 ) -> Result<VmValue, VmError> {
502 let trimmed = expr.trim();
503 if trimmed.is_empty() {
504 return Err(VmError::Runtime("evaluate: empty expression".into()));
505 }
506
507 // Wrap as a pipeline whose body *returns* the expression. The
508 // explicit `return` compiles to `push value + Op::Return`, and
509 // Op::Return's frame-exit path pushes that value onto the
510 // caller's stack — which is where we read it from below.
511 // Avoids the script-mode compile path that trails a Pop+Nil
512 // sequence after every expression statement, which would
513 // clobber the result before we could capture it.
514 let wrapped = format!("pipeline default() {{\n return ({trimmed})\n}}\n");
515 let program = harn_parser::check_source_strict(&wrapped)
516 .map_err(|e| VmError::Runtime(format!("evaluate: parse error: {e}")))?;
517 let mut chunk = crate::compiler::Compiler::new()
518 .compile(&program)
519 .map_err(|e| VmError::Runtime(format!("evaluate: compile error: {e}")))?;
520 // Inherit the current frame's source file so any runtime error
521 // enriched with `(line N)` attributes cleanly.
522 if let Some(current) = self.frames.last() {
523 chunk.source_file = current.chunk.source_file.clone();
524 }
525
526 // Snapshot every piece of VM state the scratch frame could
527 // perturb. Evaluation MUST be transparent: step state, scope
528 // depth, iterator depth, and the line-change baseline all
529 // restore on exit so the paused session continues exactly as
530 // before the user typed an expression into the REPL.
531 self.sync_current_frame_locals_to_env();
532 let saved_stack_len = self.stack.len();
533 let saved_frame_count = self.frames.len();
534 let saved_iter_depth = self.iterators.len();
535 let saved_scope_depth = self.env.scope_depth();
536 let saved_last_line = self.last_line;
537 let saved_step_mode = self.step_mode;
538 let saved_step_frame_depth = self.step_frame_depth;
539 let saved_stopped = self.stopped;
540 let saved_env = self.env.clone();
541
542 // Disable stepping during evaluation; otherwise the debug hook
543 // would fire on every synthetic line and block the pause UI.
544 self.step_mode = false;
545 self.stopped = false;
546
547 let local_slots = Self::fresh_local_slots(&chunk);
548 self.frames.push(CallFrame {
549 chunk: Rc::new(chunk),
550 ip: 0,
551 stack_base: saved_stack_len,
552 saved_env,
553 // Scratch evaluator frames never accept restartFrame — the
554 // REPL/watch user expects read-only inspection semantics,
555 // not replay — so skip the clone.
556 initial_env: None,
557 initial_local_slots: None,
558 saved_iterator_depth: saved_iter_depth,
559 fn_name: "<eval>".to_string(),
560 argc: 0,
561 saved_source_dir: self.source_dir.clone(),
562 module_functions: None,
563 module_state: None,
564 local_slots,
565 local_scope_base: self.env.scope_depth().saturating_sub(1),
566 local_scope_depth: 0,
567 });
568
569 // Drive one op at a time with a fixed budget. A pure expression
570 // is typically < 20 instructions; 10k gives plenty of headroom
571 // for e.g. a list comprehension without letting a bad loop
572 // hang the debugger forever.
573 const MAX_EVAL_STEPS: usize = 10_000;
574 let mut err: Option<VmError> = None;
575 for _ in 0..MAX_EVAL_STEPS {
576 if self.frames.len() <= saved_frame_count {
577 break;
578 }
579 match self.execute_one_cycle().await {
580 Ok(_) => {
581 if self.frames.len() <= saved_frame_count {
582 break;
583 }
584 }
585 Err(e) => {
586 err = Some(e);
587 break;
588 }
589 }
590 }
591
592 // Read the result before restoring the stack — frame exit
593 // pushes the last-computed value onto the caller's stack, so
594 // it sits at `saved_stack_len` if execution completed cleanly.
595 let result = if self.stack.len() > saved_stack_len {
596 Some(self.stack[saved_stack_len].clone())
597 } else {
598 None
599 };
600
601 // Unconditional cleanup so a mid-execution error doesn't leak
602 // scratch state into the live session.
603 self.frames.truncate(saved_frame_count);
604 self.stack.truncate(saved_stack_len);
605 self.iterators.truncate(saved_iter_depth);
606 self.env.truncate_scopes(saved_scope_depth);
607 self.last_line = saved_last_line;
608 self.step_mode = saved_step_mode;
609 self.step_frame_depth = saved_step_frame_depth;
610 self.stopped = saved_stopped;
611
612 if let Some(e) = err {
613 return Err(e);
614 }
615 result.ok_or_else(|| {
616 VmError::Runtime(
617 "evaluate: step budget exceeded before the expression produced a value".into(),
618 )
619 })
620 }
621}