plotnik_vm/engine/
vm.rs

1//! Virtual machine for executing compiled Plotnik queries.
2
3use arborium_tree_sitter::{Node, Tree};
4
5use plotnik_bytecode::{
6    Call, EffectOp, EffectOpcode, Entrypoint, Instruction, Match, Module, Nav, NodeTypeIR,
7    PredicateOp, StepAddr, Trampoline,
8};
9
10/// Get the nav for continue_search (always a sibling move).
11/// Up/Stay/Epsilon variants return Next as a default since they don't do sibling search.
12fn continuation_nav(nav: Nav) -> Nav {
13    match nav {
14        Nav::Down | Nav::Next => Nav::Next,
15        Nav::DownSkip | Nav::NextSkip => Nav::NextSkip,
16        Nav::DownExact | Nav::NextExact => Nav::NextExact,
17        Nav::Epsilon
18        | Nav::Up(_)
19        | Nav::UpSkipTrivia(_)
20        | Nav::UpExact(_)
21        | Nav::Stay
22        | Nav::StayExact => Nav::Next,
23    }
24}
25
26use super::checkpoint::{Checkpoint, CheckpointStack};
27use super::cursor::{CursorWrapper, SkipPolicy};
28
29/// Derive skip policy from navigation mode without navigating.
30/// Used when retrying a Call to determine the policy for the next checkpoint.
31/// Epsilon/Stay/Up variants return None since they don't retry among siblings.
32fn skip_policy_for_nav(nav: Nav) -> Option<SkipPolicy> {
33    match nav {
34        Nav::Down | Nav::Next => Some(SkipPolicy::Any),
35        Nav::DownSkip | Nav::NextSkip => Some(SkipPolicy::Trivia),
36        Nav::DownExact | Nav::NextExact => Some(SkipPolicy::Exact),
37        Nav::Epsilon
38        | Nav::Stay
39        | Nav::StayExact
40        | Nav::Up(_)
41        | Nav::UpSkipTrivia(_)
42        | Nav::UpExact(_) => None,
43    }
44}
45use super::effect::{EffectLog, RuntimeEffect};
46use super::error::RuntimeError;
47use super::frame::FrameArena;
48use super::trace::{NoopTracer, Tracer};
49
50/// Runtime limits for query execution.
51#[derive(Clone, Copy, Debug)]
52pub struct FuelLimits {
53    /// Maximum total steps (default: 1,000,000).
54    pub(crate) exec_fuel: u32,
55    /// Maximum call depth (default: 1,024).
56    pub(crate) recursion_limit: u32,
57}
58
59impl Default for FuelLimits {
60    fn default() -> Self {
61        Self {
62            exec_fuel: 1_000_000,
63            recursion_limit: 1024,
64        }
65    }
66}
67
68impl FuelLimits {
69    /// Create new fuel limits with defaults.
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    /// Set the execution fuel limit.
75    pub fn exec_fuel(mut self, fuel: u32) -> Self {
76        self.exec_fuel = fuel;
77        self
78    }
79
80    /// Set the recursion limit.
81    pub fn recursion_limit(mut self, limit: u32) -> Self {
82        self.recursion_limit = limit;
83        self
84    }
85
86    pub fn get_exec_fuel(&self) -> u32 {
87        self.exec_fuel
88    }
89    pub fn get_recursion_limit(&self) -> u32 {
90        self.recursion_limit
91    }
92}
93
94/// Virtual machine state for query execution.
95pub struct VM<'t> {
96    pub(crate) cursor: CursorWrapper<'t>,
97    /// Current instruction pointer (raw u16, 0 is valid at runtime).
98    pub(crate) ip: u16,
99    pub(crate) frames: FrameArena,
100    pub(crate) checkpoints: CheckpointStack,
101    pub(crate) effects: EffectLog<'t>,
102    pub(crate) matched_node: Option<Node<'t>>,
103
104    // Fuel tracking
105    pub(crate) exec_fuel: u32,
106    pub(crate) recursion_depth: u32,
107    pub(crate) limits: FuelLimits,
108
109    /// When true, the next Call instruction should skip navigation (use Stay).
110    /// This is set when backtracking to a Call retry checkpoint after advancing
111    /// the cursor to a new sibling. The Call's navigation was already done, and
112    /// we're now at the correct position for the callee to match.
113    pub(crate) skip_call_nav: bool,
114
115    /// Suppression depth counter. When > 0, effects are suppressed (not emitted to log).
116    /// Incremented by SuppressBegin, decremented by SuppressEnd.
117    pub(crate) suppress_depth: u16,
118
119    /// Target address for Trampoline instruction.
120    /// Set from entrypoint before execution; Trampoline jumps to this address.
121    pub(crate) entrypoint_target: u16,
122
123    /// Source text for predicate evaluation.
124    pub(crate) source: &'t str,
125}
126
127/// Builder for VM instances.
128pub struct VMBuilder<'t> {
129    source: &'t str,
130    tree: &'t Tree,
131    trivia_types: Vec<u16>,
132    limits: FuelLimits,
133}
134
135impl<'t> VMBuilder<'t> {
136    /// Create a new VM builder.
137    pub fn new(source: &'t str, tree: &'t Tree) -> Self {
138        Self {
139            source,
140            tree,
141            trivia_types: Vec::new(),
142            limits: FuelLimits::default(),
143        }
144    }
145
146    /// Set the trivia types to skip during navigation.
147    pub fn trivia_types(mut self, types: Vec<u16>) -> Self {
148        self.trivia_types = types;
149        self
150    }
151
152    /// Set the fuel limits.
153    pub fn limits(mut self, limits: FuelLimits) -> Self {
154        self.limits = limits;
155        self
156    }
157
158    /// Set the execution fuel limit.
159    pub fn exec_fuel(mut self, fuel: u32) -> Self {
160        self.limits = self.limits.exec_fuel(fuel);
161        self
162    }
163
164    /// Set the recursion limit.
165    pub fn recursion_limit(mut self, limit: u32) -> Self {
166        self.limits = self.limits.recursion_limit(limit);
167        self
168    }
169
170    /// Build the VM.
171    pub fn build(self) -> VM<'t> {
172        VM {
173            cursor: CursorWrapper::new(self.tree.walk(), self.trivia_types),
174            ip: 0,
175            frames: FrameArena::new(),
176            checkpoints: CheckpointStack::new(),
177            effects: EffectLog::new(),
178            matched_node: None,
179            exec_fuel: self.limits.get_exec_fuel(),
180            recursion_depth: 0,
181            limits: self.limits,
182            skip_call_nav: false,
183            suppress_depth: 0,
184            entrypoint_target: 0,
185            source: self.source,
186        }
187    }
188}
189
190impl<'t> VM<'t> {
191    /// Create a VM builder.
192    pub fn builder(source: &'t str, tree: &'t Tree) -> VMBuilder<'t> {
193        VMBuilder::new(source, tree)
194    }
195
196    /// Create a new VM for execution.
197    #[deprecated(note = "Use VM::builder(source, tree).trivia_types(...).build() instead")]
198    pub fn new(
199        source: &'t str,
200        tree: &'t Tree,
201        trivia_types: Vec<u16>,
202        limits: FuelLimits,
203    ) -> Self {
204        Self::builder(source, tree)
205            .trivia_types(trivia_types)
206            .limits(limits)
207            .build()
208    }
209
210    /// Helper for internal checkpoint creation (eliminates duplication).
211    fn create_checkpoint(&self, ip: u16, skip_policy: Option<SkipPolicy>) -> Checkpoint {
212        Checkpoint::new(
213            self.cursor.descendant_index(),
214            self.effects.len(),
215            self.frames.current(),
216            self.recursion_depth,
217            ip,
218            skip_policy,
219            self.suppress_depth,
220        )
221    }
222
223    /// Execute query from entrypoint, returning effect log.
224    ///
225    /// This is a convenience method that uses `NoopTracer`, which gets
226    /// completely optimized away at compile time.
227    pub fn execute(
228        self,
229        module: &Module,
230        bootstrap: StepAddr,
231        entrypoint: &Entrypoint,
232    ) -> Result<EffectLog<'t>, RuntimeError> {
233        self.execute_with(module, bootstrap, entrypoint, &mut NoopTracer)
234    }
235
236    /// Execute query with a tracer for debugging.
237    ///
238    /// The tracer is generic, so `NoopTracer` calls are optimized away
239    /// while `PrintTracer` calls collect execution trace.
240    ///
241    /// `bootstrap` is the preamble entry point - caller decides which preamble to use.
242    pub fn execute_with<T: Tracer>(
243        mut self,
244        module: &Module,
245        bootstrap: StepAddr,
246        entrypoint: &Entrypoint,
247        tracer: &mut T,
248    ) -> Result<EffectLog<'t>, RuntimeError> {
249        // Bootstrap address: where VM starts execution (preamble entry point).
250        // Caller provides this, enabling different preamble types (root-match, recursive, etc.).
251        self.ip = bootstrap;
252        self.entrypoint_target = entrypoint.target;
253        tracer.trace_enter_preamble();
254
255        loop {
256            // Fuel check
257            if self.exec_fuel == 0 {
258                return Err(RuntimeError::ExecFuelExhausted(self.limits.exec_fuel));
259            }
260            self.exec_fuel -= 1;
261
262            // Fetch and dispatch
263            let instr = module.decode_step(self.ip);
264            tracer.trace_instruction(self.ip, &instr);
265
266            let result = match instr {
267                Instruction::Match(m) => self.exec_match(m, module, tracer),
268                Instruction::Call(c) => self.exec_call(c, tracer),
269                Instruction::Return(_) => self.exec_return(tracer),
270                Instruction::Trampoline(t) => self.exec_trampoline(t, tracer),
271            };
272
273            match result {
274                Ok(()) | Err(RuntimeError::Backtracked) => continue,
275                Err(RuntimeError::Accept) => return Ok(self.effects),
276                Err(e) => return Err(e),
277            }
278        }
279    }
280
281    fn exec_match<T: Tracer>(
282        &mut self,
283        m: Match<'_>,
284        module: &Module,
285        tracer: &mut T,
286    ) -> Result<(), RuntimeError> {
287        for effect_op in m.pre_effects() {
288            self.emit_effect(effect_op, tracer);
289        }
290
291        // Only clear matched_node for non-epsilon transitions.
292        // For epsilon, preserve matched_node from previous match or return.
293        if !m.is_epsilon() {
294            self.matched_node = None;
295            self.navigate_and_match(m, module, tracer)?;
296        }
297
298        for effect_op in m.post_effects() {
299            self.emit_effect(effect_op, tracer);
300        }
301
302        self.branch_to_successors(m, tracer)
303    }
304
305    fn navigate_and_match<T: Tracer>(
306        &mut self,
307        m: Match<'_>,
308        module: &Module,
309        tracer: &mut T,
310    ) -> Result<(), RuntimeError> {
311        let Some(policy) = self.cursor.navigate(m.nav) else {
312            tracer.trace_nav_failure(m.nav);
313            return self.backtrack(tracer);
314        };
315        tracer.trace_nav(m.nav, self.cursor.node());
316
317        let cont_nav = continuation_nav(m.nav);
318        loop {
319            if !self.node_matches(m, tracer) {
320                self.advance_or_backtrack(policy, cont_nav, tracer)?;
321                continue;
322            }
323            break;
324        }
325
326        tracer.trace_match_success(self.cursor.node());
327        if let Some(field_id) = m.node_field {
328            tracer.trace_field_success(field_id);
329        }
330
331        self.matched_node = Some(self.cursor.node());
332
333        for field_id in m.neg_fields() {
334            if self.cursor.node().child_by_field_id(field_id).is_some() {
335                return self.backtrack(tracer);
336            }
337        }
338
339        // Check predicate if present
340        if let Some((op, is_regex, value_ref)) = m.predicate()
341            && !self.evaluate_predicate(op, is_regex, value_ref, module)
342        {
343            return self.backtrack(tracer);
344        }
345
346        Ok(())
347    }
348
349    /// Evaluate a predicate against the matched node's text.
350    ///
351    /// - `op`: 0=Eq, 1=Ne, 2=StartsWith, 3=EndsWith, 4=Contains, 5=RegexMatch, 6=RegexNoMatch
352    /// - `is_regex`: true if value_ref is a RegexTable index
353    /// - `value_ref`: index into StringTable or RegexTable
354    fn evaluate_predicate(&self, op: u8, is_regex: bool, value_ref: u16, module: &Module) -> bool {
355        let node = self.cursor.node();
356        let node_text = &self.source[node.start_byte()..node.end_byte()];
357        let op = PredicateOp::from_byte(op);
358
359        if is_regex {
360            let regex_bytes = module.regexes().get_by_index(value_ref as usize);
361            assert!(
362                !regex_bytes.is_empty(),
363                "regex predicate references empty DFA bytes"
364            );
365            let dfa = plotnik_bytecode::deserialize_dfa(regex_bytes)
366                .expect("regex DFA deserialization failed");
367
368            use regex_automata::Input;
369            use regex_automata::dfa::Automaton;
370            let input = Input::new(node_text);
371            let matched = dfa
372                .try_search_fwd(&input)
373                .expect("DFA search failed")
374                .is_some();
375
376            match op {
377                PredicateOp::RegexMatch => matched,
378                PredicateOp::RegexNoMatch => !matched,
379                _ => unreachable!("non-regex op with is_regex=true"),
380            }
381        } else {
382            let target = module.strings().get_by_index(value_ref as usize);
383
384            match op {
385                PredicateOp::Eq => node_text == target,
386                PredicateOp::Ne => node_text != target,
387                PredicateOp::StartsWith => node_text.starts_with(target),
388                PredicateOp::EndsWith => node_text.ends_with(target),
389                PredicateOp::Contains => node_text.contains(target),
390                _ => unreachable!("regex op with is_regex=false"),
391            }
392        }
393    }
394
395    /// Check if current node matches type and field constraints.
396    fn node_matches<T: Tracer>(&self, m: Match<'_>, tracer: &mut T) -> bool {
397        let node = self.cursor.node();
398
399        // Check node type constraint
400        match m.node_type {
401            NodeTypeIR::Any => {
402                // Any node matches - no check needed
403            }
404            NodeTypeIR::Named(None) => {
405                // `(_)` wildcard: must be a named node
406                if !node.is_named() {
407                    tracer.trace_match_failure(node);
408                    return false;
409                }
410            }
411            NodeTypeIR::Named(Some(expected)) => {
412                // Specific named type: check kind_id
413                if node.kind_id() != expected.get() {
414                    tracer.trace_match_failure(node);
415                    return false;
416                }
417            }
418            NodeTypeIR::Anonymous(None) => {
419                // Any anonymous node: must NOT be named
420                if node.is_named() {
421                    tracer.trace_match_failure(node);
422                    return false;
423                }
424            }
425            NodeTypeIR::Anonymous(Some(expected)) => {
426                // Specific anonymous type: check kind_id
427                if node.kind_id() != expected.get() {
428                    tracer.trace_match_failure(node);
429                    return false;
430                }
431            }
432        }
433
434        // Check field constraint
435        if let Some(expected) = m.node_field
436            && self.cursor.field_id() != Some(expected)
437        {
438            tracer.trace_field_failure(node);
439            return false;
440        }
441        true
442    }
443
444    fn branch_to_successors<T: Tracer>(
445        &mut self,
446        m: Match<'_>,
447        tracer: &mut T,
448    ) -> Result<(), RuntimeError> {
449        if m.succ_count() == 0 {
450            return Err(RuntimeError::Accept);
451        }
452
453        // Push checkpoints for alternate branches (in reverse order)
454        for i in (1..m.succ_count()).rev() {
455            self.checkpoints
456                .push(self.create_checkpoint(m.successor(i).get(), None));
457            tracer.trace_checkpoint_created(self.ip);
458        }
459
460        self.ip = m.successor(0).get();
461        Ok(())
462    }
463
464    fn exec_call<T: Tracer>(&mut self, c: Call, tracer: &mut T) -> Result<(), RuntimeError> {
465        if self.recursion_depth >= self.limits.recursion_limit {
466            return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
467        }
468
469        // Get skip policy: from navigation (normal) or from nav mode (retry)
470        let skip_policy = if self.skip_call_nav {
471            // Retry: skip navigation, just check field, derive policy from nav mode
472            self.skip_call_nav = false;
473            self.check_field(c.node_field, tracer)?;
474            skip_policy_for_nav(c.nav)
475        } else {
476            // Normal: navigate and capture skip policy
477            self.navigate_to_field_with_policy(c.nav, c.node_field, tracer)?
478        };
479
480        // Push checkpoint for retry (both normal and retry paths need this)
481        if let Some(policy) = skip_policy
482            && policy != SkipPolicy::Exact
483        {
484            self.checkpoints
485                .push(self.create_checkpoint(self.ip, Some(policy)));
486            tracer.trace_checkpoint_created(self.ip);
487        }
488
489        // Save tree depth AFTER navigation. On Return, we go up to this depth.
490        let saved_depth = self.cursor.depth();
491        tracer.trace_call(c.target.get());
492        self.frames.push(c.next.get(), saved_depth);
493        self.recursion_depth += 1;
494        self.ip = c.target.get();
495        Ok(())
496    }
497
498    /// Execute a Trampoline instruction.
499    ///
500    /// Trampoline is like Call, but the target comes from VM context (entrypoint_target)
501    /// rather than being encoded in the instruction. Used for universal entry preamble.
502    fn exec_trampoline<T: Tracer>(
503        &mut self,
504        t: Trampoline,
505        tracer: &mut T,
506    ) -> Result<(), RuntimeError> {
507        if self.recursion_depth >= self.limits.recursion_limit {
508            return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
509        }
510
511        // Trampoline doesn't navigate - it's always at root, cursor stays at root
512        let saved_depth = self.cursor.depth();
513        tracer.trace_call(self.entrypoint_target);
514        self.frames.push(t.next.get(), saved_depth);
515        self.recursion_depth += 1;
516        self.ip = self.entrypoint_target;
517        Ok(())
518    }
519
520    /// Navigate to a field and return the skip policy for retry support.
521    ///
522    /// Returns `Some(policy)` if navigation was performed, `None` if Stay nav was used.
523    fn navigate_to_field_with_policy<T: Tracer>(
524        &mut self,
525        nav: Nav,
526        field: Option<std::num::NonZeroU16>,
527        tracer: &mut T,
528    ) -> Result<Option<SkipPolicy>, RuntimeError> {
529        if nav == Nav::Stay || nav == Nav::StayExact {
530            self.check_field(field, tracer)?;
531            return Ok(None);
532        }
533
534        let Some(policy) = self.cursor.navigate(nav) else {
535            tracer.trace_nav_failure(nav);
536            return Err(self.backtrack(tracer).unwrap_err());
537        };
538        tracer.trace_nav(nav, self.cursor.node());
539
540        let Some(field_id) = field else {
541            return Ok(Some(policy));
542        };
543
544        let cont_nav = continuation_nav(nav);
545        loop {
546            if self.cursor.field_id() == Some(field_id) {
547                tracer.trace_field_success(field_id);
548                return Ok(Some(policy));
549            }
550            tracer.trace_field_failure(self.cursor.node());
551            self.advance_or_backtrack(policy, cont_nav, tracer)?;
552        }
553    }
554
555    fn check_field<T: Tracer>(
556        &mut self,
557        field: Option<std::num::NonZeroU16>,
558        tracer: &mut T,
559    ) -> Result<(), RuntimeError> {
560        let Some(field_id) = field else {
561            return Ok(());
562        };
563        if self.cursor.field_id() != Some(field_id) {
564            tracer.trace_field_failure(self.cursor.node());
565            return self.backtrack(tracer);
566        }
567        tracer.trace_field_success(field_id);
568        Ok(())
569    }
570
571    fn exec_return<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
572        tracer.trace_return();
573
574        // If no frames, we're returning from top-level entrypoint → Accept
575        if self.frames.is_empty() {
576            return Err(RuntimeError::Accept);
577        }
578
579        let (return_addr, saved_depth) = self.frames.pop();
580        self.recursion_depth -= 1;
581
582        // Prune frames (O(1) amortized)
583        self.frames.prune(self.checkpoints.max_frame_ref());
584
585        // Set matched_node BEFORE going up so effects after
586        // a Call can capture the node that the callee matched.
587        self.matched_node = Some(self.cursor.node());
588
589        // Go up to saved depth level. This preserves sibling advances
590        // (continue_search at same level) while restoring level when
591        // the callee descended into children.
592        while self.cursor.depth() > saved_depth {
593            if !self.cursor.goto_parent() {
594                break;
595            }
596        }
597
598        self.ip = return_addr;
599        Ok(())
600    }
601
602    fn backtrack<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
603        let cp = self.checkpoints.pop().ok_or(RuntimeError::NoMatch)?;
604        tracer.trace_backtrack();
605        self.cursor.goto_descendant(cp.descendant_index);
606        self.effects.truncate(cp.effect_watermark);
607        self.frames.restore(cp.frame_index);
608        self.recursion_depth = cp.recursion_depth;
609        self.suppress_depth = cp.suppress_depth;
610
611        // Call retry: advance cursor to next sibling before re-executing
612        if let Some(policy) = cp.skip_policy {
613            if !self.cursor.continue_search(policy) {
614                return self.backtrack(tracer);
615            }
616            tracer.trace_nav(continuation_nav(Nav::Down), self.cursor.node());
617            self.skip_call_nav = true;
618        }
619
620        self.ip = cp.ip;
621        Err(RuntimeError::Backtracked)
622    }
623
624    /// Advance to next sibling or backtrack if search exhausted.
625    fn advance_or_backtrack<T: Tracer>(
626        &mut self,
627        policy: SkipPolicy,
628        cont_nav: Nav,
629        tracer: &mut T,
630    ) -> Result<(), RuntimeError> {
631        if !self.cursor.continue_search(policy) {
632            return self.backtrack(tracer);
633        }
634        tracer.trace_nav(cont_nav, self.cursor.node());
635        Ok(())
636    }
637
638    fn emit_effect<T: Tracer>(&mut self, op: EffectOp, tracer: &mut T) {
639        use EffectOpcode::*;
640
641        let effect = match op.opcode {
642            // Suppress control: trace then update depth
643            SuppressBegin => {
644                tracer.trace_suppress_control(SuppressBegin, self.suppress_depth > 0);
645                self.suppress_depth += 1;
646                return;
647            }
648            SuppressEnd => {
649                self.suppress_depth = self
650                    .suppress_depth
651                    .checked_sub(1)
652                    .expect("SuppressEnd without matching SuppressBegin");
653                tracer.trace_suppress_control(SuppressEnd, self.suppress_depth > 0);
654                return;
655            }
656
657            // Skip data effects when suppressing, but trace them
658            _ if self.suppress_depth > 0 => {
659                tracer.trace_effect_suppressed(op.opcode, op.payload);
660                return;
661            }
662
663            // Data effects
664            Node => {
665                RuntimeEffect::Node(self.matched_node.expect("Node effect without matched_node"))
666            }
667            Text => {
668                RuntimeEffect::Text(self.matched_node.expect("Text effect without matched_node"))
669            }
670            Arr => RuntimeEffect::Arr,
671            Push => RuntimeEffect::Push,
672            EndArr => RuntimeEffect::EndArr,
673            Obj => RuntimeEffect::Obj,
674            EndObj => RuntimeEffect::EndObj,
675            Set => RuntimeEffect::Set(op.payload as u16),
676            Enum => RuntimeEffect::Enum(op.payload as u16),
677            EndEnum => RuntimeEffect::EndEnum,
678            Clear => RuntimeEffect::Clear,
679            Null => RuntimeEffect::Null,
680        };
681
682        tracer.trace_effect(&effect);
683        self.effects.push(effect);
684    }
685}