harn_vm/vm/debug.rs
1use std::rc::Rc;
2
3use crate::chunk::{Chunk, Constant};
4use crate::value::{VmError, VmValue};
5
6use super::{CallFrame, Vm};
7
8/// Debug action returned by the debug hook.
9#[derive(Debug, Clone, PartialEq)]
10pub enum DebugAction {
11 /// Continue execution normally.
12 Continue,
13 /// Stop (breakpoint hit, step complete).
14 Stop,
15}
16
17/// Information about current execution state for the debugger.
18#[derive(Debug, Clone)]
19pub struct DebugState {
20 pub line: usize,
21 pub variables: std::collections::BTreeMap<String, VmValue>,
22 pub frame_name: String,
23 pub frame_depth: usize,
24}
25
26pub(super) type DebugHook = dyn FnMut(&DebugState) -> DebugAction;
27
28impl Vm {
29 /// Replace breakpoints for a single source file. Pass an empty string
30 /// (or call `set_breakpoints` for the wildcard equivalent) to install
31 /// breakpoints that match every file — useful for ad-hoc CLI runs
32 /// where the embedder doesn't track per-file source paths.
33 pub fn set_breakpoints_for_file(&mut self, file: &str, lines: Vec<usize>) {
34 if lines.is_empty() {
35 self.breakpoints.remove(file);
36 return;
37 }
38 self.breakpoints
39 .insert(file.to_string(), lines.into_iter().collect());
40 }
41
42 /// Backwards-compatible wildcard form. Stores all lines under the
43 /// empty-string key, which matches *any* source file at the check
44 /// site. Existing embedders that don't track file scoping still work.
45 pub fn set_breakpoints(&mut self, lines: Vec<usize>) {
46 self.set_breakpoints_for_file("", lines);
47 }
48
49 /// Replace the function-breakpoint set. Every subsequent closure
50 /// call whose name matches one of the provided strings will pause
51 /// on entry. Empty vec clears the set.
52 pub fn set_function_breakpoints(&mut self, names: Vec<String>) {
53 self.function_breakpoints = names.into_iter().collect();
54 // Clear any pending latch so a stale entry from the previous
55 // configuration doesn't fire once.
56 self.pending_function_bp = None;
57 }
58
59 /// Returns the current function-breakpoint name set. Used by the
60 /// DAP adapter to build the `setFunctionBreakpoints` response with
61 /// verified=true per registered name.
62 pub fn function_breakpoint_names(&self) -> Vec<String> {
63 self.function_breakpoints.iter().cloned().collect()
64 }
65
66 /// Drain any pending function-breakpoint name latched by the most
67 /// recent closure entry. Returns `Some(name)` exactly once per hit
68 /// so the caller can emit a single `stopped` event.
69 pub fn take_pending_function_bp(&mut self) -> Option<String> {
70 self.pending_function_bp.take()
71 }
72
73 /// Source file path of the currently executing frame, if known.
74 pub(crate) fn current_source_file(&self) -> Option<&str> {
75 self.frames
76 .last()
77 .and_then(|f| f.chunk.source_file.as_deref())
78 }
79
80 /// True when a breakpoint at `line` is set for the current frame's
81 /// source file (or the wildcard set covers it).
82 pub(crate) fn breakpoint_matches(&self, line: usize) -> bool {
83 if let Some(wild) = self.breakpoints.get("") {
84 if wild.contains(&line) {
85 return true;
86 }
87 }
88 if let Some(file) = self.current_source_file() {
89 if let Some(set) = self.breakpoints.get(file) {
90 if set.contains(&line) {
91 return true;
92 }
93 }
94 // Some callers send a relative or differently-prefixed path
95 // than the chunk records; fall back to suffix comparison so
96 // foo.harn matches /abs/path/foo.harn and vice-versa.
97 for (key, set) in &self.breakpoints {
98 if key.is_empty() {
99 continue;
100 }
101 if (file.ends_with(key.as_str()) || key.ends_with(file)) && set.contains(&line) {
102 return true;
103 }
104 }
105 }
106 false
107 }
108
109 /// Enable step mode (stop at the next source line regardless of
110 /// frame depth — i.e. step-in semantics, descending into calls).
111 pub fn set_step_mode(&mut self, step: bool) {
112 self.step_mode = step;
113 self.step_frame_depth = usize::MAX;
114 }
115
116 /// Enable step-over mode (stop at the next source line in the current
117 /// frame or a shallower one, skipping past any nested calls).
118 pub fn set_step_over(&mut self) {
119 self.step_mode = true;
120 self.step_frame_depth = self.frames.len();
121 }
122
123 /// Register a debug hook invoked whenever execution advances to a new source line.
124 pub fn set_debug_hook<F>(&mut self, hook: F)
125 where
126 F: FnMut(&DebugState) -> DebugAction + 'static,
127 {
128 self.debug_hook = Some(Box::new(hook));
129 }
130
131 /// Clear the current debug hook.
132 pub fn clear_debug_hook(&mut self) {
133 self.debug_hook = None;
134 }
135
136 /// Enable step-out mode (stop at the next source line *after* the
137 /// current frame has returned — strictly shallower than where the
138 /// user requested the step-out).
139 pub fn set_step_out(&mut self) {
140 self.step_mode = true;
141 // Condition site compares `frames.len() <= step_frame_depth`, so
142 // storing N-1 makes the stop fire only after the current frame
143 // pops (frames.len() drops from N to N-1 or less). Clamp to 0 for
144 // the top frame — caller handles that via the usize::MAX sentinel
145 // if they wanted step-in semantics.
146 self.step_frame_depth = self.frames.len().saturating_sub(1);
147 }
148
149 /// Check if the VM is stopped at a debug point.
150 pub fn is_stopped(&self) -> bool {
151 self.stopped
152 }
153
154 /// Get the current debug state (variables, line, etc.).
155 pub fn debug_state(&self) -> DebugState {
156 let line = self.current_line();
157 let variables = self.env.all_variables();
158 let frame_name = if self.frames.len() > 1 {
159 format!("frame_{}", self.frames.len() - 1)
160 } else {
161 "pipeline".to_string()
162 };
163 DebugState {
164 line,
165 variables,
166 frame_name,
167 frame_depth: self.frames.len(),
168 }
169 }
170
171 /// Call sites (name + ip) on `line` within the current frame's
172 /// chunk — drives DAP `stepInTargets` (#112). Walks the chunk's
173 /// parallel lines array, surfaces every Call / MethodCall /
174 /// CallSpread and pairs it with the name of the constant or
175 /// identifier preceding the call when we can derive it cheaply.
176 pub fn call_sites_on_line(&self, line: u32) -> Vec<(u32, String)> {
177 let Some(frame) = self.frames.last() else {
178 return Vec::new();
179 };
180 let chunk = &frame.chunk;
181 let mut out = Vec::new();
182 let code = &chunk.code;
183 let lines = &chunk.lines;
184 let mut ip: usize = 0;
185 while ip < code.len() {
186 let op = code[ip];
187 if ip < lines.len() && lines[ip] == line {
188 // 0x00 .. 0x99 covers the opcode space the compiler
189 // emits for calls. Rather than decode every op, we
190 // pattern-match on the Call-family opcodes via
191 // their numeric tag — stable because harn-vm locks
192 // opcodes with pin tests.
193 if matches!(op, 0x40..=0x44) {
194 // Best-effort label: take the most recent
195 // LoadConst / LoadGlobal constant value.
196 let label = Self::label_preceding_call(chunk, ip);
197 out.push((ip as u32, label));
198 }
199 }
200 ip += 1;
201 }
202 out
203 }
204
205 fn label_preceding_call(chunk: &Chunk, call_ip: usize) -> String {
206 // Walk backwards a few instructions to find a LoadConst that
207 // resolves to a string (the callee name). Good enough for
208 // the IDE menu; deep callee resolution can land later if
209 // needed.
210 let mut back = call_ip.saturating_sub(6);
211 while back < call_ip {
212 let op = chunk.code[back];
213 // LoadConst opcodes (range covers the two-byte tag) —
214 // fall back to "call" when none found.
215 if (op == 0x01 || op == 0x02) && back + 2 < chunk.code.len() {
216 let idx = (u16::from(chunk.code[back + 1]) << 8) | u16::from(chunk.code[back + 2]);
217 if let Some(Constant::String(s)) = chunk.constants.get(idx as usize) {
218 return s.clone();
219 }
220 }
221 back += 1;
222 }
223 "call".to_string()
224 }
225
226 /// Install (or replace) the cooperative cancellation token on
227 /// this VM. Callers (DAP adapter, embedded host) flip the
228 /// wrapped AtomicBool to request graceful shutdown; the step
229 /// loop checks `is_cancel_requested()` at every instruction and
230 /// exits with `VmError::Cancelled` when set.
231 pub fn install_cancel_token(&mut self, token: std::sync::Arc<std::sync::atomic::AtomicBool>) {
232 self.cancel_token = Some(token);
233 }
234
235 /// Signal cooperative cancellation on this VM — the step loop
236 /// unwinds on its next instruction check. Lazily allocates a
237 /// fresh token when none is installed so hosts don't need to
238 /// pre-plumb it on every launch. Returns the Arc so the caller
239 /// can hold onto it and re-signal later if needed.
240 pub fn signal_cancel(&mut self) -> std::sync::Arc<std::sync::atomic::AtomicBool> {
241 let token = self.cancel_token.clone().unwrap_or_else(|| {
242 let t = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
243 self.cancel_token = Some(t.clone());
244 t
245 });
246 token.store(true, std::sync::atomic::Ordering::SeqCst);
247 token
248 }
249
250 /// True when cooperative cancellation has been requested.
251 pub fn is_cancel_requested(&self) -> bool {
252 self.cancel_token
253 .as_ref()
254 .map(|t| t.load(std::sync::atomic::Ordering::SeqCst))
255 .unwrap_or(false)
256 }
257
258 /// Identifiers visible at the given frame's scope — locals plus
259 /// every registered builtin + async builtin. Drives DAP
260 /// `completions` (#109) so the REPL autocomplete surfaces
261 /// everything the unified evaluator can reach.
262 pub fn identifiers_in_scope(&self, _frame_id: usize) -> Vec<String> {
263 let mut out: Vec<String> = self.env.all_variables().keys().cloned().collect();
264 out.extend(self.builtins.keys().cloned());
265 out.extend(self.async_builtins.keys().cloned());
266 out.sort();
267 out.dedup();
268 out
269 }
270
271 /// Get all stack frames for the debugger.
272 pub fn debug_stack_frames(&self) -> Vec<(String, usize)> {
273 let mut frames = Vec::new();
274 for (i, frame) in self.frames.iter().enumerate() {
275 let line = if frame.ip > 0 && frame.ip - 1 < frame.chunk.lines.len() {
276 frame.chunk.lines[frame.ip - 1] as usize
277 } else {
278 0
279 };
280 let name = if frame.fn_name.is_empty() {
281 if i == 0 {
282 "pipeline".to_string()
283 } else {
284 format!("fn_{}", i)
285 }
286 } else {
287 frame.fn_name.clone()
288 };
289 frames.push((name, line));
290 }
291 frames
292 }
293
294 /// Get the current source line.
295 pub(crate) fn current_line(&self) -> usize {
296 if let Some(frame) = self.frames.last() {
297 let ip = if frame.ip > 0 { frame.ip - 1 } else { 0 };
298 if ip < frame.chunk.lines.len() {
299 return frame.chunk.lines[ip] as usize;
300 }
301 }
302 0
303 }
304
305 /// Execute one instruction, returning whether to stop (breakpoint/step).
306 /// Returns Ok(None) to continue, Ok(Some(val)) on program end, Err on error.
307 ///
308 /// Line-change detection reads the line of the instruction we're
309 /// *about to execute* (`lines[ip]`) rather than the byte before
310 /// `ip`. After a jump, `ip-1` still points into the skipped region,
311 /// which previously reported phantom stops on the tail of a
312 /// not-taken branch (e.g. `host_metadata_save()` highlighted even
313 /// though `any_stale` was false). Using `lines[ip]` — combined with
314 /// cleanup ops emitted at line 0 after branch/loop exits — keeps
315 /// the debugger aligned with what's actually going to run.
316 pub async fn step_execute(&mut self) -> Result<Option<(VmValue, bool)>, VmError> {
317 // Cooperative cancellation (#108): the DAP adapter flips the
318 // shared flag when the IDE presses the Stop pill. Check here
319 // before any instruction work so the loop unwinds promptly
320 // on the next tick.
321 if self.is_cancel_requested() {
322 return Err(VmError::Thrown(VmValue::String(Rc::from(
323 "kind:cancelled:VM cancelled by host",
324 ))));
325 }
326 let current_line = self.upcoming_line();
327 let line_changed = current_line != self.last_line && current_line > 0;
328
329 if line_changed {
330 self.last_line = current_line;
331
332 let state = self.debug_state();
333 if let Some(hook) = self.debug_hook.as_mut() {
334 if matches!(hook(&state), DebugAction::Stop) {
335 self.stopped = true;
336 return Ok(Some((VmValue::Nil, true)));
337 }
338 }
339
340 if self.breakpoint_matches(current_line) {
341 self.stopped = true;
342 return Ok(Some((VmValue::Nil, true)));
343 }
344
345 // Function-breakpoint latch: set by push_closure_frame when
346 // the callee's name is in `function_breakpoints`. Stop with
347 // the same shape as a line BP so the DAP adapter's
348 // classify_breakpoint_hit emits a standard stopped event.
349 if self.pending_function_bp.is_some() {
350 self.stopped = true;
351 return Ok(Some((VmValue::Nil, true)));
352 }
353
354 // step_frame_depth is the deepest frame count at which a stop
355 // is acceptable. set_step_mode uses usize::MAX (any depth,
356 // step-in), set_step_over uses N (same frame or shallower),
357 // set_step_out uses N-1 (strictly shallower than where the
358 // step-out was requested).
359 if self.step_mode && self.frames.len() <= self.step_frame_depth {
360 self.step_mode = false;
361 self.stopped = true;
362 return Ok(Some((VmValue::Nil, true)));
363 }
364 }
365
366 self.stopped = false;
367 self.execute_one_cycle().await
368 }
369
370 /// Line of the instruction *about to execute* — used by the
371 /// debugger for line-change detection so the first cycle after a
372 /// jump doesn't report a stale line from the skipped region.
373 pub(crate) fn upcoming_line(&self) -> usize {
374 if let Some(frame) = self.frames.last() {
375 if frame.ip < frame.chunk.lines.len() {
376 return frame.chunk.lines[frame.ip] as usize;
377 }
378 }
379 0
380 }
381
382 /// Number of live call frames. Used by the DAP adapter to
383 /// translate stackTrace ids (1-based, innermost first) back to
384 /// the VM's 0-based outermost-first index when processing
385 /// `restartFrame`.
386 pub fn frame_count(&self) -> usize {
387 self.frames.len()
388 }
389
390 /// Rewind the given frame to its entry state so stepping resumes
391 /// from the first instruction of the function with the original
392 /// arguments re-bound. Higher frames above `frame_id` are dropped.
393 /// Returns an error if the frame has no captured `initial_env`
394 /// (scratch / evaluator frames don't) or if the id is out of range.
395 ///
396 /// Side effects already performed by the restarted frame (tool
397 /// calls, file writes, host_call round-trips) are *not* rolled
398 /// back — DAP leaves that to the adapter's discretion. The IDE
399 /// should warn on frames whose source text contains obvious
400 /// side-effectful calls before invoking restartFrame.
401 pub fn restart_frame(&mut self, frame_id: usize) -> Result<(), VmError> {
402 if frame_id >= self.frames.len() {
403 return Err(VmError::Runtime(format!(
404 "restartFrame: frame id {frame_id} out of range (have {} frames)",
405 self.frames.len()
406 )));
407 }
408 let Some(initial_env) = self.frames[frame_id].initial_env.clone() else {
409 return Err(VmError::Runtime(
410 "restartFrame: target frame was not captured for restart (scratch / evaluator frame)"
411 .into(),
412 ));
413 };
414 // Drop every frame above the target. Each pop restores its
415 // saved_iterator_depth into `self.iterators` so iterator state
416 // unwinds consistently.
417 while self.frames.len() > frame_id + 1 {
418 let popped = self.frames.pop().expect("bounds checked above");
419 self.iterators.truncate(popped.saved_iterator_depth);
420 }
421 // Rewind the target frame.
422 let frame = self
423 .frames
424 .last_mut()
425 .expect("frame_id within bounds guarantees a frame");
426 frame.ip = 0;
427 let stack_base = frame.stack_base;
428 let saved_iter_depth = frame.saved_iterator_depth;
429 self.stack.truncate(stack_base);
430 self.iterators.truncate(saved_iter_depth);
431 self.env = initial_env;
432 self.last_line = 0;
433 self.stopped = false;
434 Ok(())
435 }
436
437 /// Assign a new value to a named binding in the paused VM's env.
438 /// Returns the value that was actually stored (after coercion, if
439 /// the VM performed any) so the caller can echo it back to the
440 /// DAP client. Fails if the name does not resolve to a mutable
441 /// binding in any live scope.
442 ///
443 /// The provided `value_expr` goes through the unified evaluator so
444 /// callers can type expressions like `plan.tasks.len() + 1` in the
445 /// Locals inline-edit field, not just literals.
446 pub async fn set_variable_in_frame(
447 &mut self,
448 name: &str,
449 value_expr: &str,
450 frame_id: usize,
451 ) -> Result<VmValue, VmError> {
452 let value = self.evaluate_in_frame(value_expr, frame_id).await?;
453 // Debug-specific assign: bypasses the `let` immutability gate
454 // because the user is explicitly editing in the IDE, and
455 // almost every pipeline binding is `let`. The underlying
456 // binding's mutability flag is preserved so runtime behavior
457 // after the override is unchanged.
458 self.env
459 .assign_debug(name, value.clone())
460 .map_err(|e| match e {
461 VmError::UndefinedVariable(n) => {
462 VmError::Runtime(format!("setVariable: '{n}' is not in the current scope"))
463 }
464 other => other,
465 })?;
466 Ok(value)
467 }
468
469 /// Evaluate a Harn expression against the currently paused frame's
470 /// scope and return its value. This is the single evaluation path
471 /// used by hover tips, watch expressions, conditional breakpoints,
472 /// logpoint interpolation, and `setVariable` / `setExpression`
473 /// before we had a unified evaluator there were four separate
474 /// mini-parsers, each with its own rough edges (see burin-code #85).
475 ///
476 /// The expression is wrapped as `let __r = (<expr>)` so arbitrary
477 /// infix chains, ternaries, and access paths parse uniformly. A
478 /// scratch `CallFrame` runs the wrapped bytecode with `saved_env`
479 /// pointing at the caller's env, so the compiled expression sees
480 /// every local in scope. When the scratch frame pops, the caller's
481 /// env is automatically restored.
482 ///
483 /// A fixed instruction budget guards against runaway expressions
484 /// (infinite loops, accidental recursion) wedging the debugger.
485 /// Side effects — including `llm_call`, `host_*`, and file mutators
486 /// — are not blocked here; callers that invoke this for read-only
487 /// surfaces (hover, watch) should reject obviously-side-effectful
488 /// expressions before calling.
489 pub async fn evaluate_in_frame(
490 &mut self,
491 expr: &str,
492 _frame_id: usize,
493 ) -> Result<VmValue, VmError> {
494 let trimmed = expr.trim();
495 if trimmed.is_empty() {
496 return Err(VmError::Runtime("evaluate: empty expression".into()));
497 }
498
499 // Wrap as a pipeline whose body *returns* the expression. The
500 // explicit `return` compiles to `push value + Op::Return`, and
501 // Op::Return's frame-exit path pushes that value onto the
502 // caller's stack — which is where we read it from below.
503 // Avoids the script-mode compile path that trails a Pop+Nil
504 // sequence after every expression statement, which would
505 // clobber the result before we could capture it.
506 let wrapped = format!("pipeline default() {{\n return ({trimmed})\n}}\n");
507 let program = harn_parser::check_source_strict(&wrapped)
508 .map_err(|e| VmError::Runtime(format!("evaluate: parse error: {e}")))?;
509 let mut chunk = crate::compiler::Compiler::new()
510 .compile(&program)
511 .map_err(|e| VmError::Runtime(format!("evaluate: compile error: {e}")))?;
512 // Inherit the current frame's source file so any runtime error
513 // enriched with `(line N)` attributes cleanly.
514 if let Some(current) = self.frames.last() {
515 chunk.source_file = current.chunk.source_file.clone();
516 }
517
518 // Snapshot every piece of VM state the scratch frame could
519 // perturb. Evaluation MUST be transparent: step state, scope
520 // depth, iterator depth, and the line-change baseline all
521 // restore on exit so the paused session continues exactly as
522 // before the user typed an expression into the REPL.
523 let saved_stack_len = self.stack.len();
524 let saved_frame_count = self.frames.len();
525 let saved_iter_depth = self.iterators.len();
526 let saved_scope_depth = self.env.scope_depth();
527 let saved_last_line = self.last_line;
528 let saved_step_mode = self.step_mode;
529 let saved_step_frame_depth = self.step_frame_depth;
530 let saved_stopped = self.stopped;
531 let saved_env = self.env.clone();
532
533 // Disable stepping during evaluation; otherwise the debug hook
534 // would fire on every synthetic line and block the pause UI.
535 self.step_mode = false;
536 self.stopped = false;
537
538 self.frames.push(CallFrame {
539 chunk,
540 ip: 0,
541 stack_base: saved_stack_len,
542 saved_env,
543 // Scratch evaluator frames never accept restartFrame — the
544 // REPL/watch user expects read-only inspection semantics,
545 // not replay — so skip the clone.
546 initial_env: None,
547 saved_iterator_depth: saved_iter_depth,
548 fn_name: "<eval>".to_string(),
549 argc: 0,
550 saved_source_dir: self.source_dir.clone(),
551 module_functions: None,
552 module_state: None,
553 });
554
555 // Drive one op at a time with a fixed budget. A pure expression
556 // is typically < 20 instructions; 10k gives plenty of headroom
557 // for e.g. a list comprehension without letting a bad loop
558 // hang the debugger forever.
559 const MAX_EVAL_STEPS: usize = 10_000;
560 let mut err: Option<VmError> = None;
561 for _ in 0..MAX_EVAL_STEPS {
562 if self.frames.len() <= saved_frame_count {
563 break;
564 }
565 match self.execute_one_cycle().await {
566 Ok(_) => {
567 if self.frames.len() <= saved_frame_count {
568 break;
569 }
570 }
571 Err(e) => {
572 err = Some(e);
573 break;
574 }
575 }
576 }
577
578 // Read the result before restoring the stack — frame exit
579 // pushes the last-computed value onto the caller's stack, so
580 // it sits at `saved_stack_len` if execution completed cleanly.
581 let result = if self.stack.len() > saved_stack_len {
582 Some(self.stack[saved_stack_len].clone())
583 } else {
584 None
585 };
586
587 // Unconditional cleanup so a mid-execution error doesn't leak
588 // scratch state into the live session.
589 self.frames.truncate(saved_frame_count);
590 self.stack.truncate(saved_stack_len);
591 self.iterators.truncate(saved_iter_depth);
592 self.env.truncate_scopes(saved_scope_depth);
593 self.last_line = saved_last_line;
594 self.step_mode = saved_step_mode;
595 self.step_frame_depth = saved_step_frame_depth;
596 self.stopped = saved_stopped;
597
598 if let Some(e) = err {
599 return Err(e);
600 }
601 result.ok_or_else(|| {
602 VmError::Runtime(
603 "evaluate: step budget exceeded before the expression produced a value".into(),
604 )
605 })
606 }
607}