plotnik_lib/engine/
vm.rs

1//! Virtual machine for executing compiled Plotnik queries.
2
3use arborium_tree_sitter::{Node, Tree};
4
5use crate::bytecode::{
6    Call, EffectOp, EffectOpcode, Entrypoint, Instruction, Match, Module, Nav, NodeTypeIR,
7    StepAddr, Trampoline,
8};
9
10/// Get the nav for continue_search (always a sibling move).
11/// Up/Stay/Epsilon variants return Next as a default since they don't do sibling search.
12fn continuation_nav(nav: Nav) -> Nav {
13    match nav {
14        Nav::Down | Nav::Next => Nav::Next,
15        Nav::DownSkip | Nav::NextSkip => Nav::NextSkip,
16        Nav::DownExact | Nav::NextExact => Nav::NextExact,
17        Nav::Epsilon
18        | Nav::Up(_)
19        | Nav::UpSkipTrivia(_)
20        | Nav::UpExact(_)
21        | Nav::Stay
22        | Nav::StayExact => Nav::Next,
23    }
24}
25
26use super::checkpoint::{Checkpoint, CheckpointStack};
27use super::cursor::{CursorWrapper, SkipPolicy};
28
29/// Derive skip policy from navigation mode without navigating.
30/// Used when retrying a Call to determine the policy for the next checkpoint.
31/// Epsilon/Stay/Up variants return None since they don't retry among siblings.
32fn skip_policy_for_nav(nav: Nav) -> Option<SkipPolicy> {
33    match nav {
34        Nav::Down | Nav::Next => Some(SkipPolicy::Any),
35        Nav::DownSkip | Nav::NextSkip => Some(SkipPolicy::Trivia),
36        Nav::DownExact | Nav::NextExact => Some(SkipPolicy::Exact),
37        Nav::Epsilon
38        | Nav::Stay
39        | Nav::StayExact
40        | Nav::Up(_)
41        | Nav::UpSkipTrivia(_)
42        | Nav::UpExact(_) => None,
43    }
44}
45use super::effect::{EffectLog, RuntimeEffect};
46use super::error::RuntimeError;
47use super::frame::FrameArena;
48use super::trace::{NoopTracer, Tracer};
49
50/// Runtime limits for query execution.
51#[derive(Clone, Copy, Debug)]
52pub struct FuelLimits {
53    /// Maximum total steps (default: 1,000,000).
54    pub(crate) exec_fuel: u32,
55    /// Maximum call depth (default: 1,024).
56    pub(crate) recursion_limit: u32,
57}
58
59impl Default for FuelLimits {
60    fn default() -> Self {
61        Self {
62            exec_fuel: 1_000_000,
63            recursion_limit: 1024,
64        }
65    }
66}
67
68impl FuelLimits {
69    /// Create new fuel limits with defaults.
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    /// Set the execution fuel limit.
75    pub fn exec_fuel(mut self, fuel: u32) -> Self {
76        self.exec_fuel = fuel;
77        self
78    }
79
80    /// Set the recursion limit.
81    pub fn recursion_limit(mut self, limit: u32) -> Self {
82        self.recursion_limit = limit;
83        self
84    }
85
86    pub fn get_exec_fuel(&self) -> u32 {
87        self.exec_fuel
88    }
89    pub fn get_recursion_limit(&self) -> u32 {
90        self.recursion_limit
91    }
92}
93
94/// Virtual machine state for query execution.
95pub struct VM<'t> {
96    pub(crate) cursor: CursorWrapper<'t>,
97    /// Current instruction pointer (raw u16, 0 is valid at runtime).
98    pub(crate) ip: u16,
99    pub(crate) frames: FrameArena,
100    pub(crate) checkpoints: CheckpointStack,
101    pub(crate) effects: EffectLog<'t>,
102    pub(crate) matched_node: Option<Node<'t>>,
103
104    // Fuel tracking
105    pub(crate) exec_fuel: u32,
106    pub(crate) recursion_depth: u32,
107    pub(crate) limits: FuelLimits,
108
109    /// When true, the next Call instruction should skip navigation (use Stay).
110    /// This is set when backtracking to a Call retry checkpoint after advancing
111    /// the cursor to a new sibling. The Call's navigation was already done, and
112    /// we're now at the correct position for the callee to match.
113    pub(crate) skip_call_nav: bool,
114
115    /// Suppression depth counter. When > 0, effects are suppressed (not emitted to log).
116    /// Incremented by SuppressBegin, decremented by SuppressEnd.
117    pub(crate) suppress_depth: u16,
118
119    /// Target address for Trampoline instruction.
120    /// Set from entrypoint before execution; Trampoline jumps to this address.
121    pub(crate) entrypoint_target: u16,
122}
123
124/// Builder for VM instances.
125pub struct VMBuilder<'t> {
126    tree: &'t Tree,
127    trivia_types: Vec<u16>,
128    limits: FuelLimits,
129}
130
131impl<'t> VMBuilder<'t> {
132    /// Create a new VM builder.
133    pub fn new(tree: &'t Tree) -> Self {
134        Self {
135            tree,
136            trivia_types: Vec::new(),
137            limits: FuelLimits::default(),
138        }
139    }
140
141    /// Set the trivia types to skip during navigation.
142    pub fn trivia_types(mut self, types: Vec<u16>) -> Self {
143        self.trivia_types = types;
144        self
145    }
146
147    /// Set the fuel limits.
148    pub fn limits(mut self, limits: FuelLimits) -> Self {
149        self.limits = limits;
150        self
151    }
152
153    /// Set the execution fuel limit.
154    pub fn exec_fuel(mut self, fuel: u32) -> Self {
155        self.limits = self.limits.exec_fuel(fuel);
156        self
157    }
158
159    /// Set the recursion limit.
160    pub fn recursion_limit(mut self, limit: u32) -> Self {
161        self.limits = self.limits.recursion_limit(limit);
162        self
163    }
164
165    /// Build the VM.
166    pub fn build(self) -> VM<'t> {
167        VM {
168            cursor: CursorWrapper::new(self.tree.walk(), self.trivia_types),
169            ip: 0,
170            frames: FrameArena::new(),
171            checkpoints: CheckpointStack::new(),
172            effects: EffectLog::new(),
173            matched_node: None,
174            exec_fuel: self.limits.get_exec_fuel(),
175            recursion_depth: 0,
176            limits: self.limits,
177            skip_call_nav: false,
178            suppress_depth: 0,
179            entrypoint_target: 0,
180        }
181    }
182}
183
184impl<'t> VM<'t> {
185    /// Create a VM builder.
186    pub fn builder(tree: &'t Tree) -> VMBuilder<'t> {
187        VMBuilder::new(tree)
188    }
189
190    /// Create a new VM for execution.
191    #[deprecated(note = "Use VM::builder(tree).trivia_types(...).build() instead")]
192    pub fn new(tree: &'t Tree, trivia_types: Vec<u16>, limits: FuelLimits) -> Self {
193        Self::builder(tree)
194            .trivia_types(trivia_types)
195            .limits(limits)
196            .build()
197    }
198
199    /// Helper for internal checkpoint creation (eliminates duplication).
200    fn create_checkpoint(&self, ip: u16, skip_policy: Option<SkipPolicy>) -> Checkpoint {
201        Checkpoint::new(
202            self.cursor.descendant_index(),
203            self.effects.len(),
204            self.frames.current(),
205            self.recursion_depth,
206            ip,
207            skip_policy,
208            self.suppress_depth,
209        )
210    }
211
212    /// Execute query from entrypoint, returning effect log.
213    ///
214    /// This is a convenience method that uses `NoopTracer`, which gets
215    /// completely optimized away at compile time.
216    pub fn execute(
217        self,
218        module: &Module,
219        bootstrap: StepAddr,
220        entrypoint: &Entrypoint,
221    ) -> Result<EffectLog<'t>, RuntimeError> {
222        self.execute_with(module, bootstrap, entrypoint, &mut NoopTracer)
223    }
224
225    /// Execute query with a tracer for debugging.
226    ///
227    /// The tracer is generic, so `NoopTracer` calls are optimized away
228    /// while `PrintTracer` calls collect execution trace.
229    ///
230    /// `bootstrap` is the preamble entry point - caller decides which preamble to use.
231    pub fn execute_with<T: Tracer>(
232        mut self,
233        module: &Module,
234        bootstrap: StepAddr,
235        entrypoint: &Entrypoint,
236        tracer: &mut T,
237    ) -> Result<EffectLog<'t>, RuntimeError> {
238        // Bootstrap address: where VM starts execution (preamble entry point).
239        // Caller provides this, enabling different preamble types (root-match, recursive, etc.).
240        self.ip = bootstrap;
241        self.entrypoint_target = entrypoint.target;
242        tracer.trace_enter_preamble();
243
244        loop {
245            // Fuel check
246            if self.exec_fuel == 0 {
247                return Err(RuntimeError::ExecFuelExhausted(self.limits.exec_fuel));
248            }
249            self.exec_fuel -= 1;
250
251            // Fetch and dispatch
252            let instr = module.decode_step(self.ip);
253            tracer.trace_instruction(self.ip, &instr);
254
255            let result = match instr {
256                Instruction::Match(m) => self.exec_match(m, tracer),
257                Instruction::Call(c) => self.exec_call(c, tracer),
258                Instruction::Return(_) => self.exec_return(tracer),
259                Instruction::Trampoline(t) => self.exec_trampoline(t, tracer),
260            };
261
262            match result {
263                Ok(()) | Err(RuntimeError::Backtracked) => continue,
264                Err(RuntimeError::Accept) => return Ok(self.effects),
265                Err(e) => return Err(e),
266            }
267        }
268    }
269
270    fn exec_match<T: Tracer>(&mut self, m: Match<'_>, tracer: &mut T) -> Result<(), RuntimeError> {
271        for effect_op in m.pre_effects() {
272            self.emit_effect(effect_op, tracer);
273        }
274
275        // Only clear matched_node for non-epsilon transitions.
276        // For epsilon, preserve matched_node from previous match or return.
277        if !m.is_epsilon() {
278            self.matched_node = None;
279            self.navigate_and_match(m, tracer)?;
280        }
281
282        for effect_op in m.post_effects() {
283            self.emit_effect(effect_op, tracer);
284        }
285
286        self.branch_to_successors(m, tracer)
287    }
288
289    fn navigate_and_match<T: Tracer>(
290        &mut self,
291        m: Match<'_>,
292        tracer: &mut T,
293    ) -> Result<(), RuntimeError> {
294        let Some(policy) = self.cursor.navigate(m.nav) else {
295            tracer.trace_nav_failure(m.nav);
296            return self.backtrack(tracer);
297        };
298        tracer.trace_nav(m.nav, self.cursor.node());
299
300        let cont_nav = continuation_nav(m.nav);
301        loop {
302            if !self.node_matches(m, tracer) {
303                self.advance_or_backtrack(policy, cont_nav, tracer)?;
304                continue;
305            }
306            break;
307        }
308
309        tracer.trace_match_success(self.cursor.node());
310        if let Some(field_id) = m.node_field {
311            tracer.trace_field_success(field_id);
312        }
313
314        self.matched_node = Some(self.cursor.node());
315
316        for field_id in m.neg_fields() {
317            if self.cursor.node().child_by_field_id(field_id).is_some() {
318                return self.backtrack(tracer);
319            }
320        }
321
322        Ok(())
323    }
324
325    /// Check if current node matches type and field constraints.
326    fn node_matches<T: Tracer>(&self, m: Match<'_>, tracer: &mut T) -> bool {
327        let node = self.cursor.node();
328
329        // Check node type constraint
330        match m.node_type {
331            NodeTypeIR::Any => {
332                // Any node matches - no check needed
333            }
334            NodeTypeIR::Named(None) => {
335                // `(_)` wildcard: must be a named node
336                if !node.is_named() {
337                    tracer.trace_match_failure(node);
338                    return false;
339                }
340            }
341            NodeTypeIR::Named(Some(expected)) => {
342                // Specific named type: check kind_id
343                if node.kind_id() != expected.get() {
344                    tracer.trace_match_failure(node);
345                    return false;
346                }
347            }
348            NodeTypeIR::Anonymous(None) => {
349                // Any anonymous node: must NOT be named
350                if node.is_named() {
351                    tracer.trace_match_failure(node);
352                    return false;
353                }
354            }
355            NodeTypeIR::Anonymous(Some(expected)) => {
356                // Specific anonymous type: check kind_id
357                if node.kind_id() != expected.get() {
358                    tracer.trace_match_failure(node);
359                    return false;
360                }
361            }
362        }
363
364        // Check field constraint
365        if let Some(expected) = m.node_field
366            && self.cursor.field_id() != Some(expected)
367        {
368            tracer.trace_field_failure(node);
369            return false;
370        }
371        true
372    }
373
374    fn branch_to_successors<T: Tracer>(
375        &mut self,
376        m: Match<'_>,
377        tracer: &mut T,
378    ) -> Result<(), RuntimeError> {
379        if m.succ_count() == 0 {
380            return Err(RuntimeError::Accept);
381        }
382
383        // Push checkpoints for alternate branches (in reverse order)
384        for i in (1..m.succ_count()).rev() {
385            self.checkpoints
386                .push(self.create_checkpoint(m.successor(i).get(), None));
387            tracer.trace_checkpoint_created(self.ip);
388        }
389
390        self.ip = m.successor(0).get();
391        Ok(())
392    }
393
394    fn exec_call<T: Tracer>(&mut self, c: Call, tracer: &mut T) -> Result<(), RuntimeError> {
395        if self.recursion_depth >= self.limits.recursion_limit {
396            return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
397        }
398
399        // Get skip policy: from navigation (normal) or from nav mode (retry)
400        let skip_policy = if self.skip_call_nav {
401            // Retry: skip navigation, just check field, derive policy from nav mode
402            self.skip_call_nav = false;
403            self.check_field(c.node_field, tracer)?;
404            skip_policy_for_nav(c.nav)
405        } else {
406            // Normal: navigate and capture skip policy
407            self.navigate_to_field_with_policy(c.nav, c.node_field, tracer)?
408        };
409
410        // Push checkpoint for retry (both normal and retry paths need this)
411        if let Some(policy) = skip_policy
412            && policy != SkipPolicy::Exact
413        {
414            self.checkpoints
415                .push(self.create_checkpoint(self.ip, Some(policy)));
416            tracer.trace_checkpoint_created(self.ip);
417        }
418
419        // Save tree depth AFTER navigation. On Return, we go up to this depth.
420        let saved_depth = self.cursor.depth();
421        tracer.trace_call(c.target.get());
422        self.frames.push(c.next.get(), saved_depth);
423        self.recursion_depth += 1;
424        self.ip = c.target.get();
425        Ok(())
426    }
427
428    /// Execute a Trampoline instruction.
429    ///
430    /// Trampoline is like Call, but the target comes from VM context (entrypoint_target)
431    /// rather than being encoded in the instruction. Used for universal entry preamble.
432    fn exec_trampoline<T: Tracer>(
433        &mut self,
434        t: Trampoline,
435        tracer: &mut T,
436    ) -> Result<(), RuntimeError> {
437        if self.recursion_depth >= self.limits.recursion_limit {
438            return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
439        }
440
441        // Trampoline doesn't navigate - it's always at root, cursor stays at root
442        let saved_depth = self.cursor.depth();
443        tracer.trace_call(self.entrypoint_target);
444        self.frames.push(t.next.get(), saved_depth);
445        self.recursion_depth += 1;
446        self.ip = self.entrypoint_target;
447        Ok(())
448    }
449
450    /// Navigate to a field and return the skip policy for retry support.
451    ///
452    /// Returns `Some(policy)` if navigation was performed, `None` if Stay nav was used.
453    fn navigate_to_field_with_policy<T: Tracer>(
454        &mut self,
455        nav: Nav,
456        field: Option<std::num::NonZeroU16>,
457        tracer: &mut T,
458    ) -> Result<Option<SkipPolicy>, RuntimeError> {
459        if nav == Nav::Stay || nav == Nav::StayExact {
460            self.check_field(field, tracer)?;
461            return Ok(None);
462        }
463
464        let Some(policy) = self.cursor.navigate(nav) else {
465            tracer.trace_nav_failure(nav);
466            return Err(self.backtrack(tracer).unwrap_err());
467        };
468        tracer.trace_nav(nav, self.cursor.node());
469
470        let Some(field_id) = field else {
471            return Ok(Some(policy));
472        };
473
474        let cont_nav = continuation_nav(nav);
475        loop {
476            if self.cursor.field_id() == Some(field_id) {
477                tracer.trace_field_success(field_id);
478                return Ok(Some(policy));
479            }
480            tracer.trace_field_failure(self.cursor.node());
481            self.advance_or_backtrack(policy, cont_nav, tracer)?;
482        }
483    }
484
485    fn check_field<T: Tracer>(
486        &mut self,
487        field: Option<std::num::NonZeroU16>,
488        tracer: &mut T,
489    ) -> Result<(), RuntimeError> {
490        let Some(field_id) = field else {
491            return Ok(());
492        };
493        if self.cursor.field_id() != Some(field_id) {
494            tracer.trace_field_failure(self.cursor.node());
495            return self.backtrack(tracer);
496        }
497        tracer.trace_field_success(field_id);
498        Ok(())
499    }
500
501    fn exec_return<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
502        tracer.trace_return();
503
504        // If no frames, we're returning from top-level entrypoint → Accept
505        if self.frames.is_empty() {
506            return Err(RuntimeError::Accept);
507        }
508
509        let (return_addr, saved_depth) = self.frames.pop();
510        self.recursion_depth -= 1;
511
512        // Prune frames (O(1) amortized)
513        self.frames.prune(self.checkpoints.max_frame_ref());
514
515        // Set matched_node BEFORE going up so effects after
516        // a Call can capture the node that the callee matched.
517        self.matched_node = Some(self.cursor.node());
518
519        // Go up to saved depth level. This preserves sibling advances
520        // (continue_search at same level) while restoring level when
521        // the callee descended into children.
522        while self.cursor.depth() > saved_depth {
523            if !self.cursor.goto_parent() {
524                break;
525            }
526        }
527
528        self.ip = return_addr;
529        Ok(())
530    }
531
532    fn backtrack<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
533        let cp = self.checkpoints.pop().ok_or(RuntimeError::NoMatch)?;
534        tracer.trace_backtrack();
535        self.cursor.goto_descendant(cp.descendant_index);
536        self.effects.truncate(cp.effect_watermark);
537        self.frames.restore(cp.frame_index);
538        self.recursion_depth = cp.recursion_depth;
539        self.suppress_depth = cp.suppress_depth;
540
541        // Call retry: advance cursor to next sibling before re-executing
542        if let Some(policy) = cp.skip_policy {
543            if !self.cursor.continue_search(policy) {
544                return self.backtrack(tracer);
545            }
546            tracer.trace_nav(continuation_nav(Nav::Down), self.cursor.node());
547            self.skip_call_nav = true;
548        }
549
550        self.ip = cp.ip;
551        Err(RuntimeError::Backtracked)
552    }
553
554    /// Advance to next sibling or backtrack if search exhausted.
555    fn advance_or_backtrack<T: Tracer>(
556        &mut self,
557        policy: SkipPolicy,
558        cont_nav: Nav,
559        tracer: &mut T,
560    ) -> Result<(), RuntimeError> {
561        if !self.cursor.continue_search(policy) {
562            return self.backtrack(tracer);
563        }
564        tracer.trace_nav(cont_nav, self.cursor.node());
565        Ok(())
566    }
567
568    fn emit_effect<T: Tracer>(&mut self, op: EffectOp, tracer: &mut T) {
569        use EffectOpcode::*;
570
571        let effect = match op.opcode {
572            // Suppress control: trace then update depth
573            SuppressBegin => {
574                tracer.trace_suppress_control(SuppressBegin, self.suppress_depth > 0);
575                self.suppress_depth += 1;
576                return;
577            }
578            SuppressEnd => {
579                self.suppress_depth = self
580                    .suppress_depth
581                    .checked_sub(1)
582                    .expect("SuppressEnd without matching SuppressBegin");
583                tracer.trace_suppress_control(SuppressEnd, self.suppress_depth > 0);
584                return;
585            }
586
587            // Skip data effects when suppressing, but trace them
588            _ if self.suppress_depth > 0 => {
589                tracer.trace_effect_suppressed(op.opcode, op.payload);
590                return;
591            }
592
593            // Data effects
594            Node => {
595                RuntimeEffect::Node(self.matched_node.expect("Node effect without matched_node"))
596            }
597            Text => {
598                RuntimeEffect::Text(self.matched_node.expect("Text effect without matched_node"))
599            }
600            Arr => RuntimeEffect::Arr,
601            Push => RuntimeEffect::Push,
602            EndArr => RuntimeEffect::EndArr,
603            Obj => RuntimeEffect::Obj,
604            EndObj => RuntimeEffect::EndObj,
605            Set => RuntimeEffect::Set(op.payload as u16),
606            Enum => RuntimeEffect::Enum(op.payload as u16),
607            EndEnum => RuntimeEffect::EndEnum,
608            Clear => RuntimeEffect::Clear,
609            Null => RuntimeEffect::Null,
610        };
611
612        tracer.trace_effect(&effect);
613        self.effects.push(effect);
614    }
615}