plotnik_vm/engine/
vm.rs

1//! Virtual machine for executing compiled Plotnik queries.
2
3use arborium_tree_sitter::{Node, Tree};
4
5use plotnik_bytecode::{
6    Call, EffectOp, EffectOpcode, Entrypoint, Instruction, Match, Module, Nav, NodeTypeIR,
7    PredicateOp, StepAddr, Trampoline,
8};
9
10/// Get the nav for continue_search (always a sibling move).
11/// Up/Stay/Epsilon variants return Next as a default since they don't do sibling search.
12fn continuation_nav(nav: Nav) -> Nav {
13    match nav {
14        Nav::Down | Nav::Next => Nav::Next,
15        Nav::DownSkip | Nav::NextSkip => Nav::NextSkip,
16        Nav::DownExact | Nav::NextExact => Nav::NextExact,
17        Nav::Epsilon
18        | Nav::Up(_)
19        | Nav::UpSkipTrivia(_)
20        | Nav::UpExact(_)
21        | Nav::Stay
22        | Nav::StayExact => Nav::Next,
23    }
24}
25
26use super::checkpoint::{Checkpoint, CheckpointStack};
27use super::cursor::{CursorWrapper, SkipPolicy};
28
29/// Derive skip policy from navigation mode without navigating.
30/// Used when retrying a Call to determine the policy for the next checkpoint.
31/// Epsilon/Stay/Up variants return None since they don't retry among siblings.
32fn skip_policy_for_nav(nav: Nav) -> Option<SkipPolicy> {
33    match nav {
34        Nav::Down | Nav::Next => Some(SkipPolicy::Any),
35        Nav::DownSkip | Nav::NextSkip => Some(SkipPolicy::Trivia),
36        Nav::DownExact | Nav::NextExact => Some(SkipPolicy::Exact),
37        Nav::Epsilon
38        | Nav::Stay
39        | Nav::StayExact
40        | Nav::Up(_)
41        | Nav::UpSkipTrivia(_)
42        | Nav::UpExact(_) => None,
43    }
44}
45use super::effect::{EffectLog, RuntimeEffect};
46use super::error::RuntimeError;
47use super::frame::FrameArena;
48use super::trace::{NoopTracer, Tracer};
49
50/// Runtime limits for query execution.
51#[derive(Clone, Copy, Debug)]
52pub struct FuelLimits {
53    /// Maximum total steps (default: 1,000,000).
54    pub(crate) exec_fuel: u32,
55    /// Maximum call depth (default: 1,024).
56    pub(crate) recursion_limit: u32,
57}
58
59impl Default for FuelLimits {
60    fn default() -> Self {
61        Self {
62            exec_fuel: 1_000_000,
63            recursion_limit: 1024,
64        }
65    }
66}
67
68impl FuelLimits {
69    /// Create new fuel limits with defaults.
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    /// Set the execution fuel limit.
75    pub fn exec_fuel(mut self, fuel: u32) -> Self {
76        self.exec_fuel = fuel;
77        self
78    }
79
80    /// Set the recursion limit.
81    pub fn recursion_limit(mut self, limit: u32) -> Self {
82        self.recursion_limit = limit;
83        self
84    }
85
86    pub fn get_exec_fuel(&self) -> u32 {
87        self.exec_fuel
88    }
89    pub fn get_recursion_limit(&self) -> u32 {
90        self.recursion_limit
91    }
92}
93
94/// Virtual machine state for query execution.
95pub struct VM<'t> {
96    pub(crate) cursor: CursorWrapper<'t>,
97    /// Current instruction pointer (raw u16, 0 is valid at runtime).
98    pub(crate) ip: u16,
99    pub(crate) frames: FrameArena,
100    pub(crate) checkpoints: CheckpointStack,
101    pub(crate) effects: EffectLog<'t>,
102    pub(crate) matched_node: Option<Node<'t>>,
103
104    // Fuel tracking
105    pub(crate) exec_fuel: u32,
106    pub(crate) recursion_depth: u32,
107    pub(crate) limits: FuelLimits,
108
109    /// When true, the next Call instruction should skip navigation (use Stay).
110    /// This is set when backtracking to a Call retry checkpoint after advancing
111    /// the cursor to a new sibling. The Call's navigation was already done, and
112    /// we're now at the correct position for the callee to match.
113    pub(crate) skip_call_nav: bool,
114
115    /// Suppression depth counter. When > 0, effects are suppressed (not emitted to log).
116    /// Incremented by SuppressBegin, decremented by SuppressEnd.
117    pub(crate) suppress_depth: u16,
118
119    /// Target address for Trampoline instruction.
120    /// Set from entrypoint before execution; Trampoline jumps to this address.
121    pub(crate) entrypoint_target: u16,
122
123    /// Source text for predicate evaluation.
124    pub(crate) source: &'t str,
125}
126
127/// Builder for VM instances.
128pub struct VMBuilder<'t> {
129    source: &'t str,
130    tree: &'t Tree,
131    trivia_types: Vec<u16>,
132    limits: FuelLimits,
133}
134
135impl<'t> VMBuilder<'t> {
136    /// Create a new VM builder.
137    pub fn new(source: &'t str, tree: &'t Tree) -> Self {
138        Self {
139            source,
140            tree,
141            trivia_types: Vec::new(),
142            limits: FuelLimits::default(),
143        }
144    }
145
146    /// Set the trivia types to skip during navigation.
147    pub fn trivia_types(mut self, types: Vec<u16>) -> Self {
148        self.trivia_types = types;
149        self
150    }
151
152    /// Set the fuel limits.
153    pub fn limits(mut self, limits: FuelLimits) -> Self {
154        self.limits = limits;
155        self
156    }
157
158    /// Set the execution fuel limit.
159    pub fn exec_fuel(mut self, fuel: u32) -> Self {
160        self.limits = self.limits.exec_fuel(fuel);
161        self
162    }
163
164    /// Set the recursion limit.
165    pub fn recursion_limit(mut self, limit: u32) -> Self {
166        self.limits = self.limits.recursion_limit(limit);
167        self
168    }
169
170    /// Build the VM.
171    pub fn build(self) -> VM<'t> {
172        VM {
173            cursor: CursorWrapper::new(self.tree.walk(), self.trivia_types),
174            ip: 0,
175            frames: FrameArena::new(),
176            checkpoints: CheckpointStack::new(),
177            effects: EffectLog::new(),
178            matched_node: None,
179            exec_fuel: self.limits.get_exec_fuel(),
180            recursion_depth: 0,
181            limits: self.limits,
182            skip_call_nav: false,
183            suppress_depth: 0,
184            entrypoint_target: 0,
185            source: self.source,
186        }
187    }
188}
189
190impl<'t> VM<'t> {
191    /// Create a VM builder.
192    pub fn builder(source: &'t str, tree: &'t Tree) -> VMBuilder<'t> {
193        VMBuilder::new(source, tree)
194    }
195
196    /// Create a new VM for execution.
197    #[deprecated(note = "Use VM::builder(source, tree).trivia_types(...).build() instead")]
198    pub fn new(
199        source: &'t str,
200        tree: &'t Tree,
201        trivia_types: Vec<u16>,
202        limits: FuelLimits,
203    ) -> Self {
204        Self::builder(source, tree)
205            .trivia_types(trivia_types)
206            .limits(limits)
207            .build()
208    }
209
210    /// Helper for internal checkpoint creation (eliminates duplication).
211    fn create_checkpoint(&self, ip: u16, skip_policy: Option<SkipPolicy>) -> Checkpoint {
212        Checkpoint::new(
213            self.cursor.descendant_index(),
214            self.effects.len(),
215            self.frames.current(),
216            self.recursion_depth,
217            ip,
218            skip_policy,
219            self.suppress_depth,
220        )
221    }
222
223    /// Execute query from entrypoint, returning effect log.
224    ///
225    /// This is a convenience method that uses `NoopTracer`, which gets
226    /// completely optimized away at compile time.
227    pub fn execute(
228        self,
229        module: &Module,
230        bootstrap: StepAddr,
231        entrypoint: &Entrypoint,
232    ) -> Result<EffectLog<'t>, RuntimeError> {
233        self.execute_with(module, bootstrap, entrypoint, &mut NoopTracer)
234    }
235
236    /// Execute query with a tracer for debugging.
237    ///
238    /// The tracer is generic, so `NoopTracer` calls are optimized away
239    /// while `PrintTracer` calls collect execution trace.
240    ///
241    /// `bootstrap` is the preamble entry point - caller decides which preamble to use.
242    pub fn execute_with<T: Tracer>(
243        mut self,
244        module: &Module,
245        bootstrap: StepAddr,
246        entrypoint: &Entrypoint,
247        tracer: &mut T,
248    ) -> Result<EffectLog<'t>, RuntimeError> {
249        // Bootstrap address: where VM starts execution (preamble entry point).
250        // Caller provides this, enabling different preamble types (root-match, recursive, etc.).
251        self.ip = bootstrap;
252        self.entrypoint_target = entrypoint.target;
253        tracer.trace_enter_preamble();
254
255        loop {
256            // Fuel check
257            if self.exec_fuel == 0 {
258                return Err(RuntimeError::ExecFuelExhausted(self.limits.exec_fuel));
259            }
260            self.exec_fuel -= 1;
261
262            // Fetch and dispatch
263            let instr = module.decode_step(self.ip);
264            tracer.trace_instruction(self.ip, &instr);
265
266            let result = match instr {
267                Instruction::Match(m) => self.exec_match(m, module, tracer),
268                Instruction::Call(c) => self.exec_call(c, tracer),
269                Instruction::Return(_) => self.exec_return(tracer),
270                Instruction::Trampoline(t) => self.exec_trampoline(t, tracer),
271            };
272
273            match result {
274                Ok(()) | Err(RuntimeError::Backtracked) => continue,
275                Err(RuntimeError::Accept) => return Ok(self.effects),
276                Err(e) => return Err(e),
277            }
278        }
279    }
280
281    fn exec_match<T: Tracer>(
282        &mut self,
283        m: Match<'_>,
284        module: &Module,
285        tracer: &mut T,
286    ) -> Result<(), RuntimeError> {
287        for effect_op in m.pre_effects() {
288            self.emit_effect(effect_op, tracer);
289        }
290
291        // Only clear matched_node for non-epsilon transitions.
292        // For epsilon, preserve matched_node from previous match or return.
293        if !m.is_epsilon() {
294            self.matched_node = None;
295            self.navigate_and_match(m, module, tracer)?;
296        }
297
298        for effect_op in m.post_effects() {
299            self.emit_effect(effect_op, tracer);
300        }
301
302        self.branch_to_successors(m, tracer)
303    }
304
305    fn navigate_and_match<T: Tracer>(
306        &mut self,
307        m: Match<'_>,
308        module: &Module,
309        tracer: &mut T,
310    ) -> Result<(), RuntimeError> {
311        let Some(policy) = self.cursor.navigate(m.nav) else {
312            tracer.trace_nav_failure(m.nav);
313            return self.backtrack(tracer);
314        };
315        tracer.trace_nav(m.nav, self.cursor.node());
316
317        let cont_nav = continuation_nav(m.nav);
318        loop {
319            if !self.node_matches(m, tracer) {
320                self.advance_or_backtrack(policy, cont_nav, tracer)?;
321                continue;
322            }
323            break;
324        }
325
326        tracer.trace_match_success(self.cursor.node());
327        if let Some(field_id) = m.node_field {
328            tracer.trace_field_success(field_id);
329        }
330
331        self.matched_node = Some(self.cursor.node());
332
333        for field_id in m.neg_fields() {
334            if self.cursor.node().child_by_field_id(field_id).is_some() {
335                return self.backtrack(tracer);
336            }
337        }
338
339        // Check predicate if present
340        if let Some((op, is_regex, value_ref)) = m.predicate()
341            && !self.evaluate_predicate(op, is_regex, value_ref, module)
342        {
343            return self.backtrack(tracer);
344        }
345
346        Ok(())
347    }
348
349    /// Evaluate a predicate against the matched node's text.
350    ///
351    /// - `op`: 0=Eq, 1=Ne, 2=StartsWith, 3=EndsWith, 4=Contains, 5=RegexMatch, 6=RegexNoMatch
352    /// - `is_regex`: true if value_ref is a RegexTable index
353    /// - `value_ref`: index into StringTable or RegexTable
354    fn evaluate_predicate(&self, op: u8, is_regex: bool, value_ref: u16, module: &Module) -> bool {
355        let node = self.cursor.node();
356        let node_text = &self.source[node.start_byte()..node.end_byte()];
357        let op = PredicateOp::from_byte(op);
358
359        if is_regex {
360            let regex_bytes = module.regexes().get_by_index(value_ref as usize);
361            assert!(
362                !regex_bytes.is_empty(),
363                "regex predicate references empty DFA bytes"
364            );
365            let dfa = plotnik_bytecode::deserialize_dfa(regex_bytes)
366                .expect("regex DFA deserialization failed");
367
368            use regex_automata::Input;
369            use regex_automata::dfa::Automaton;
370            let input = Input::new(node_text);
371            let matched = dfa.try_search_fwd(&input).ok().flatten().is_some();
372
373            match op {
374                PredicateOp::RegexMatch => matched,
375                PredicateOp::RegexNoMatch => !matched,
376                _ => unreachable!("non-regex op with is_regex=true"),
377            }
378        } else {
379            let target = module.strings().get_by_index(value_ref as usize);
380
381            match op {
382                PredicateOp::Eq => node_text == target,
383                PredicateOp::Ne => node_text != target,
384                PredicateOp::StartsWith => node_text.starts_with(target),
385                PredicateOp::EndsWith => node_text.ends_with(target),
386                PredicateOp::Contains => node_text.contains(target),
387                _ => unreachable!("regex op with is_regex=false"),
388            }
389        }
390    }
391
392    /// Check if current node matches type and field constraints.
393    fn node_matches<T: Tracer>(&self, m: Match<'_>, tracer: &mut T) -> bool {
394        let node = self.cursor.node();
395
396        // Check node type constraint
397        match m.node_type {
398            NodeTypeIR::Any => {
399                // Any node matches - no check needed
400            }
401            NodeTypeIR::Named(None) => {
402                // `(_)` wildcard: must be a named node
403                if !node.is_named() {
404                    tracer.trace_match_failure(node);
405                    return false;
406                }
407            }
408            NodeTypeIR::Named(Some(expected)) => {
409                // Specific named type: check kind_id
410                if node.kind_id() != expected.get() {
411                    tracer.trace_match_failure(node);
412                    return false;
413                }
414            }
415            NodeTypeIR::Anonymous(None) => {
416                // Any anonymous node: must NOT be named
417                if node.is_named() {
418                    tracer.trace_match_failure(node);
419                    return false;
420                }
421            }
422            NodeTypeIR::Anonymous(Some(expected)) => {
423                // Specific anonymous type: check kind_id
424                if node.kind_id() != expected.get() {
425                    tracer.trace_match_failure(node);
426                    return false;
427                }
428            }
429        }
430
431        // Check field constraint
432        if let Some(expected) = m.node_field
433            && self.cursor.field_id() != Some(expected)
434        {
435            tracer.trace_field_failure(node);
436            return false;
437        }
438        true
439    }
440
441    fn branch_to_successors<T: Tracer>(
442        &mut self,
443        m: Match<'_>,
444        tracer: &mut T,
445    ) -> Result<(), RuntimeError> {
446        if m.succ_count() == 0 {
447            return Err(RuntimeError::Accept);
448        }
449
450        // Push checkpoints for alternate branches (in reverse order)
451        for i in (1..m.succ_count()).rev() {
452            self.checkpoints
453                .push(self.create_checkpoint(m.successor(i).get(), None));
454            tracer.trace_checkpoint_created(self.ip);
455        }
456
457        self.ip = m.successor(0).get();
458        Ok(())
459    }
460
461    fn exec_call<T: Tracer>(&mut self, c: Call, tracer: &mut T) -> Result<(), RuntimeError> {
462        if self.recursion_depth >= self.limits.recursion_limit {
463            return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
464        }
465
466        // Get skip policy: from navigation (normal) or from nav mode (retry)
467        let skip_policy = if self.skip_call_nav {
468            // Retry: skip navigation, just check field, derive policy from nav mode
469            self.skip_call_nav = false;
470            self.check_field(c.node_field, tracer)?;
471            skip_policy_for_nav(c.nav)
472        } else {
473            // Normal: navigate and capture skip policy
474            self.navigate_to_field_with_policy(c.nav, c.node_field, tracer)?
475        };
476
477        // Push checkpoint for retry (both normal and retry paths need this)
478        if let Some(policy) = skip_policy
479            && policy != SkipPolicy::Exact
480        {
481            self.checkpoints
482                .push(self.create_checkpoint(self.ip, Some(policy)));
483            tracer.trace_checkpoint_created(self.ip);
484        }
485
486        // Save tree depth AFTER navigation. On Return, we go up to this depth.
487        let saved_depth = self.cursor.depth();
488        tracer.trace_call(c.target.get());
489        self.frames.push(c.next.get(), saved_depth);
490        self.recursion_depth += 1;
491        self.ip = c.target.get();
492        Ok(())
493    }
494
495    /// Execute a Trampoline instruction.
496    ///
497    /// Trampoline is like Call, but the target comes from VM context (entrypoint_target)
498    /// rather than being encoded in the instruction. Used for universal entry preamble.
499    fn exec_trampoline<T: Tracer>(
500        &mut self,
501        t: Trampoline,
502        tracer: &mut T,
503    ) -> Result<(), RuntimeError> {
504        if self.recursion_depth >= self.limits.recursion_limit {
505            return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
506        }
507
508        // Trampoline doesn't navigate - it's always at root, cursor stays at root
509        let saved_depth = self.cursor.depth();
510        tracer.trace_call(self.entrypoint_target);
511        self.frames.push(t.next.get(), saved_depth);
512        self.recursion_depth += 1;
513        self.ip = self.entrypoint_target;
514        Ok(())
515    }
516
517    /// Navigate to a field and return the skip policy for retry support.
518    ///
519    /// Returns `Some(policy)` if navigation was performed, `None` if Stay nav was used.
520    fn navigate_to_field_with_policy<T: Tracer>(
521        &mut self,
522        nav: Nav,
523        field: Option<std::num::NonZeroU16>,
524        tracer: &mut T,
525    ) -> Result<Option<SkipPolicy>, RuntimeError> {
526        if nav == Nav::Stay || nav == Nav::StayExact {
527            self.check_field(field, tracer)?;
528            return Ok(None);
529        }
530
531        let Some(policy) = self.cursor.navigate(nav) else {
532            tracer.trace_nav_failure(nav);
533            return Err(self.backtrack(tracer).unwrap_err());
534        };
535        tracer.trace_nav(nav, self.cursor.node());
536
537        let Some(field_id) = field else {
538            return Ok(Some(policy));
539        };
540
541        let cont_nav = continuation_nav(nav);
542        loop {
543            if self.cursor.field_id() == Some(field_id) {
544                tracer.trace_field_success(field_id);
545                return Ok(Some(policy));
546            }
547            tracer.trace_field_failure(self.cursor.node());
548            self.advance_or_backtrack(policy, cont_nav, tracer)?;
549        }
550    }
551
552    fn check_field<T: Tracer>(
553        &mut self,
554        field: Option<std::num::NonZeroU16>,
555        tracer: &mut T,
556    ) -> Result<(), RuntimeError> {
557        let Some(field_id) = field else {
558            return Ok(());
559        };
560        if self.cursor.field_id() != Some(field_id) {
561            tracer.trace_field_failure(self.cursor.node());
562            return self.backtrack(tracer);
563        }
564        tracer.trace_field_success(field_id);
565        Ok(())
566    }
567
568    fn exec_return<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
569        tracer.trace_return();
570
571        // If no frames, we're returning from top-level entrypoint → Accept
572        if self.frames.is_empty() {
573            return Err(RuntimeError::Accept);
574        }
575
576        let (return_addr, saved_depth) = self.frames.pop();
577        self.recursion_depth -= 1;
578
579        // Prune frames (O(1) amortized)
580        self.frames.prune(self.checkpoints.max_frame_ref());
581
582        // Set matched_node BEFORE going up so effects after
583        // a Call can capture the node that the callee matched.
584        self.matched_node = Some(self.cursor.node());
585
586        // Go up to saved depth level. This preserves sibling advances
587        // (continue_search at same level) while restoring level when
588        // the callee descended into children.
589        while self.cursor.depth() > saved_depth {
590            if !self.cursor.goto_parent() {
591                break;
592            }
593        }
594
595        self.ip = return_addr;
596        Ok(())
597    }
598
599    fn backtrack<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
600        let cp = self.checkpoints.pop().ok_or(RuntimeError::NoMatch)?;
601        tracer.trace_backtrack();
602        self.cursor.goto_descendant(cp.descendant_index);
603        self.effects.truncate(cp.effect_watermark);
604        self.frames.restore(cp.frame_index);
605        self.recursion_depth = cp.recursion_depth;
606        self.suppress_depth = cp.suppress_depth;
607
608        // Call retry: advance cursor to next sibling before re-executing
609        if let Some(policy) = cp.skip_policy {
610            if !self.cursor.continue_search(policy) {
611                return self.backtrack(tracer);
612            }
613            tracer.trace_nav(continuation_nav(Nav::Down), self.cursor.node());
614            self.skip_call_nav = true;
615        }
616
617        self.ip = cp.ip;
618        Err(RuntimeError::Backtracked)
619    }
620
621    /// Advance to next sibling or backtrack if search exhausted.
622    fn advance_or_backtrack<T: Tracer>(
623        &mut self,
624        policy: SkipPolicy,
625        cont_nav: Nav,
626        tracer: &mut T,
627    ) -> Result<(), RuntimeError> {
628        if !self.cursor.continue_search(policy) {
629            return self.backtrack(tracer);
630        }
631        tracer.trace_nav(cont_nav, self.cursor.node());
632        Ok(())
633    }
634
635    fn emit_effect<T: Tracer>(&mut self, op: EffectOp, tracer: &mut T) {
636        use EffectOpcode::*;
637
638        let effect = match op.opcode {
639            // Suppress control: trace then update depth
640            SuppressBegin => {
641                tracer.trace_suppress_control(SuppressBegin, self.suppress_depth > 0);
642                self.suppress_depth += 1;
643                return;
644            }
645            SuppressEnd => {
646                self.suppress_depth = self
647                    .suppress_depth
648                    .checked_sub(1)
649                    .expect("SuppressEnd without matching SuppressBegin");
650                tracer.trace_suppress_control(SuppressEnd, self.suppress_depth > 0);
651                return;
652            }
653
654            // Skip data effects when suppressing, but trace them
655            _ if self.suppress_depth > 0 => {
656                tracer.trace_effect_suppressed(op.opcode, op.payload);
657                return;
658            }
659
660            // Data effects
661            Node => {
662                RuntimeEffect::Node(self.matched_node.expect("Node effect without matched_node"))
663            }
664            Text => {
665                RuntimeEffect::Text(self.matched_node.expect("Text effect without matched_node"))
666            }
667            Arr => RuntimeEffect::Arr,
668            Push => RuntimeEffect::Push,
669            EndArr => RuntimeEffect::EndArr,
670            Obj => RuntimeEffect::Obj,
671            EndObj => RuntimeEffect::EndObj,
672            Set => RuntimeEffect::Set(op.payload as u16),
673            Enum => RuntimeEffect::Enum(op.payload as u16),
674            EndEnum => RuntimeEffect::EndEnum,
675            Clear => RuntimeEffect::Clear,
676            Null => RuntimeEffect::Null,
677        };
678
679        tracer.trace_effect(&effect);
680        self.effects.push(effect);
681    }
682}