1use arborium_tree_sitter::{Node, Tree};
4
5use plotnik_bytecode::{
6 Call, EffectOp, EffectOpcode, Entrypoint, Instruction, Match, Module, Nav, NodeTypeIR,
7 PredicateOp, StepAddr, Trampoline,
8};
9
10fn continuation_nav(nav: Nav) -> Nav {
13 match nav {
14 Nav::Down | Nav::Next => Nav::Next,
15 Nav::DownSkip | Nav::NextSkip => Nav::NextSkip,
16 Nav::DownExact | Nav::NextExact => Nav::NextExact,
17 Nav::Epsilon
18 | Nav::Up(_)
19 | Nav::UpSkipTrivia(_)
20 | Nav::UpExact(_)
21 | Nav::Stay
22 | Nav::StayExact => Nav::Next,
23 }
24}
25
26use super::checkpoint::{Checkpoint, CheckpointStack};
27use super::cursor::{CursorWrapper, SkipPolicy};
28
29fn skip_policy_for_nav(nav: Nav) -> Option<SkipPolicy> {
33 match nav {
34 Nav::Down | Nav::Next => Some(SkipPolicy::Any),
35 Nav::DownSkip | Nav::NextSkip => Some(SkipPolicy::Trivia),
36 Nav::DownExact | Nav::NextExact => Some(SkipPolicy::Exact),
37 Nav::Epsilon
38 | Nav::Stay
39 | Nav::StayExact
40 | Nav::Up(_)
41 | Nav::UpSkipTrivia(_)
42 | Nav::UpExact(_) => None,
43 }
44}
45use super::effect::{EffectLog, RuntimeEffect};
46use super::error::RuntimeError;
47use super::frame::FrameArena;
48use super::trace::{NoopTracer, Tracer};
49
50#[derive(Clone, Copy, Debug)]
52pub struct FuelLimits {
53 pub(crate) exec_fuel: u32,
55 pub(crate) recursion_limit: u32,
57}
58
59impl Default for FuelLimits {
60 fn default() -> Self {
61 Self {
62 exec_fuel: 1_000_000,
63 recursion_limit: 1024,
64 }
65 }
66}
67
68impl FuelLimits {
69 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn exec_fuel(mut self, fuel: u32) -> Self {
76 self.exec_fuel = fuel;
77 self
78 }
79
80 pub fn recursion_limit(mut self, limit: u32) -> Self {
82 self.recursion_limit = limit;
83 self
84 }
85
86 pub fn get_exec_fuel(&self) -> u32 {
87 self.exec_fuel
88 }
89 pub fn get_recursion_limit(&self) -> u32 {
90 self.recursion_limit
91 }
92}
93
94pub struct VM<'t> {
96 pub(crate) cursor: CursorWrapper<'t>,
97 pub(crate) ip: u16,
99 pub(crate) frames: FrameArena,
100 pub(crate) checkpoints: CheckpointStack,
101 pub(crate) effects: EffectLog<'t>,
102 pub(crate) matched_node: Option<Node<'t>>,
103
104 pub(crate) exec_fuel: u32,
106 pub(crate) recursion_depth: u32,
107 pub(crate) limits: FuelLimits,
108
109 pub(crate) skip_call_nav: bool,
114
115 pub(crate) suppress_depth: u16,
118
119 pub(crate) entrypoint_target: u16,
122
123 pub(crate) source: &'t str,
125}
126
127pub struct VMBuilder<'t> {
129 source: &'t str,
130 tree: &'t Tree,
131 trivia_types: Vec<u16>,
132 limits: FuelLimits,
133}
134
135impl<'t> VMBuilder<'t> {
136 pub fn new(source: &'t str, tree: &'t Tree) -> Self {
138 Self {
139 source,
140 tree,
141 trivia_types: Vec::new(),
142 limits: FuelLimits::default(),
143 }
144 }
145
146 pub fn trivia_types(mut self, types: Vec<u16>) -> Self {
148 self.trivia_types = types;
149 self
150 }
151
152 pub fn limits(mut self, limits: FuelLimits) -> Self {
154 self.limits = limits;
155 self
156 }
157
158 pub fn exec_fuel(mut self, fuel: u32) -> Self {
160 self.limits = self.limits.exec_fuel(fuel);
161 self
162 }
163
164 pub fn recursion_limit(mut self, limit: u32) -> Self {
166 self.limits = self.limits.recursion_limit(limit);
167 self
168 }
169
170 pub fn build(self) -> VM<'t> {
172 VM {
173 cursor: CursorWrapper::new(self.tree.walk(), self.trivia_types),
174 ip: 0,
175 frames: FrameArena::new(),
176 checkpoints: CheckpointStack::new(),
177 effects: EffectLog::new(),
178 matched_node: None,
179 exec_fuel: self.limits.get_exec_fuel(),
180 recursion_depth: 0,
181 limits: self.limits,
182 skip_call_nav: false,
183 suppress_depth: 0,
184 entrypoint_target: 0,
185 source: self.source,
186 }
187 }
188}
189
190impl<'t> VM<'t> {
191 pub fn builder(source: &'t str, tree: &'t Tree) -> VMBuilder<'t> {
193 VMBuilder::new(source, tree)
194 }
195
196 #[deprecated(note = "Use VM::builder(source, tree).trivia_types(...).build() instead")]
198 pub fn new(
199 source: &'t str,
200 tree: &'t Tree,
201 trivia_types: Vec<u16>,
202 limits: FuelLimits,
203 ) -> Self {
204 Self::builder(source, tree)
205 .trivia_types(trivia_types)
206 .limits(limits)
207 .build()
208 }
209
210 fn create_checkpoint(&self, ip: u16, skip_policy: Option<SkipPolicy>) -> Checkpoint {
212 Checkpoint::new(
213 self.cursor.descendant_index(),
214 self.effects.len(),
215 self.frames.current(),
216 self.recursion_depth,
217 ip,
218 skip_policy,
219 self.suppress_depth,
220 )
221 }
222
223 pub fn execute(
228 self,
229 module: &Module,
230 bootstrap: StepAddr,
231 entrypoint: &Entrypoint,
232 ) -> Result<EffectLog<'t>, RuntimeError> {
233 self.execute_with(module, bootstrap, entrypoint, &mut NoopTracer)
234 }
235
236 pub fn execute_with<T: Tracer>(
243 mut self,
244 module: &Module,
245 bootstrap: StepAddr,
246 entrypoint: &Entrypoint,
247 tracer: &mut T,
248 ) -> Result<EffectLog<'t>, RuntimeError> {
249 self.ip = bootstrap;
252 self.entrypoint_target = entrypoint.target;
253 tracer.trace_enter_preamble();
254
255 loop {
256 if self.exec_fuel == 0 {
258 return Err(RuntimeError::ExecFuelExhausted(self.limits.exec_fuel));
259 }
260 self.exec_fuel -= 1;
261
262 let instr = module.decode_step(self.ip);
264 tracer.trace_instruction(self.ip, &instr);
265
266 let result = match instr {
267 Instruction::Match(m) => self.exec_match(m, module, tracer),
268 Instruction::Call(c) => self.exec_call(c, tracer),
269 Instruction::Return(_) => self.exec_return(tracer),
270 Instruction::Trampoline(t) => self.exec_trampoline(t, tracer),
271 };
272
273 match result {
274 Ok(()) | Err(RuntimeError::Backtracked) => continue,
275 Err(RuntimeError::Accept) => return Ok(self.effects),
276 Err(e) => return Err(e),
277 }
278 }
279 }
280
281 fn exec_match<T: Tracer>(
282 &mut self,
283 m: Match<'_>,
284 module: &Module,
285 tracer: &mut T,
286 ) -> Result<(), RuntimeError> {
287 for effect_op in m.pre_effects() {
288 self.emit_effect(effect_op, tracer);
289 }
290
291 if !m.is_epsilon() {
294 self.matched_node = None;
295 self.navigate_and_match(m, module, tracer)?;
296 }
297
298 for effect_op in m.post_effects() {
299 self.emit_effect(effect_op, tracer);
300 }
301
302 self.branch_to_successors(m, tracer)
303 }
304
305 fn navigate_and_match<T: Tracer>(
306 &mut self,
307 m: Match<'_>,
308 module: &Module,
309 tracer: &mut T,
310 ) -> Result<(), RuntimeError> {
311 let Some(policy) = self.cursor.navigate(m.nav) else {
312 tracer.trace_nav_failure(m.nav);
313 return self.backtrack(tracer);
314 };
315 tracer.trace_nav(m.nav, self.cursor.node());
316
317 let cont_nav = continuation_nav(m.nav);
318 loop {
319 if !self.node_matches(m, tracer) {
320 self.advance_or_backtrack(policy, cont_nav, tracer)?;
321 continue;
322 }
323 break;
324 }
325
326 tracer.trace_match_success(self.cursor.node());
327 if let Some(field_id) = m.node_field {
328 tracer.trace_field_success(field_id);
329 }
330
331 self.matched_node = Some(self.cursor.node());
332
333 for field_id in m.neg_fields() {
334 if self.cursor.node().child_by_field_id(field_id).is_some() {
335 return self.backtrack(tracer);
336 }
337 }
338
339 if let Some((op, is_regex, value_ref)) = m.predicate()
341 && !self.evaluate_predicate(op, is_regex, value_ref, module)
342 {
343 return self.backtrack(tracer);
344 }
345
346 Ok(())
347 }
348
349 fn evaluate_predicate(&self, op: u8, is_regex: bool, value_ref: u16, module: &Module) -> bool {
355 let node = self.cursor.node();
356 let node_text = &self.source[node.start_byte()..node.end_byte()];
357 let op = PredicateOp::from_byte(op);
358
359 if is_regex {
360 let regex_bytes = module.regexes().get_by_index(value_ref as usize);
361 assert!(
362 !regex_bytes.is_empty(),
363 "regex predicate references empty DFA bytes"
364 );
365 let dfa = plotnik_bytecode::deserialize_dfa(regex_bytes)
366 .expect("regex DFA deserialization failed");
367
368 use regex_automata::Input;
369 use regex_automata::dfa::Automaton;
370 let input = Input::new(node_text);
371 let matched = dfa
372 .try_search_fwd(&input)
373 .expect("DFA search failed")
374 .is_some();
375
376 match op {
377 PredicateOp::RegexMatch => matched,
378 PredicateOp::RegexNoMatch => !matched,
379 _ => unreachable!("non-regex op with is_regex=true"),
380 }
381 } else {
382 let target = module.strings().get_by_index(value_ref as usize);
383
384 match op {
385 PredicateOp::Eq => node_text == target,
386 PredicateOp::Ne => node_text != target,
387 PredicateOp::StartsWith => node_text.starts_with(target),
388 PredicateOp::EndsWith => node_text.ends_with(target),
389 PredicateOp::Contains => node_text.contains(target),
390 _ => unreachable!("regex op with is_regex=false"),
391 }
392 }
393 }
394
395 fn node_matches<T: Tracer>(&self, m: Match<'_>, tracer: &mut T) -> bool {
397 let node = self.cursor.node();
398
399 match m.node_type {
401 NodeTypeIR::Any => {
402 }
404 NodeTypeIR::Named(None) => {
405 if !node.is_named() {
407 tracer.trace_match_failure(node);
408 return false;
409 }
410 }
411 NodeTypeIR::Named(Some(expected)) => {
412 if node.kind_id() != expected.get() {
414 tracer.trace_match_failure(node);
415 return false;
416 }
417 }
418 NodeTypeIR::Anonymous(None) => {
419 if node.is_named() {
421 tracer.trace_match_failure(node);
422 return false;
423 }
424 }
425 NodeTypeIR::Anonymous(Some(expected)) => {
426 if node.kind_id() != expected.get() {
428 tracer.trace_match_failure(node);
429 return false;
430 }
431 }
432 }
433
434 if let Some(expected) = m.node_field
436 && self.cursor.field_id() != Some(expected)
437 {
438 tracer.trace_field_failure(node);
439 return false;
440 }
441 true
442 }
443
444 fn branch_to_successors<T: Tracer>(
445 &mut self,
446 m: Match<'_>,
447 tracer: &mut T,
448 ) -> Result<(), RuntimeError> {
449 if m.succ_count() == 0 {
450 return Err(RuntimeError::Accept);
451 }
452
453 for i in (1..m.succ_count()).rev() {
455 self.checkpoints
456 .push(self.create_checkpoint(m.successor(i).get(), None));
457 tracer.trace_checkpoint_created(self.ip);
458 }
459
460 self.ip = m.successor(0).get();
461 Ok(())
462 }
463
464 fn exec_call<T: Tracer>(&mut self, c: Call, tracer: &mut T) -> Result<(), RuntimeError> {
465 if self.recursion_depth >= self.limits.recursion_limit {
466 return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
467 }
468
469 let skip_policy = if self.skip_call_nav {
471 self.skip_call_nav = false;
473 self.check_field(c.node_field, tracer)?;
474 skip_policy_for_nav(c.nav)
475 } else {
476 self.navigate_to_field_with_policy(c.nav, c.node_field, tracer)?
478 };
479
480 if let Some(policy) = skip_policy
482 && policy != SkipPolicy::Exact
483 {
484 self.checkpoints
485 .push(self.create_checkpoint(self.ip, Some(policy)));
486 tracer.trace_checkpoint_created(self.ip);
487 }
488
489 let saved_depth = self.cursor.depth();
491 tracer.trace_call(c.target.get());
492 self.frames.push(c.next.get(), saved_depth);
493 self.recursion_depth += 1;
494 self.ip = c.target.get();
495 Ok(())
496 }
497
498 fn exec_trampoline<T: Tracer>(
503 &mut self,
504 t: Trampoline,
505 tracer: &mut T,
506 ) -> Result<(), RuntimeError> {
507 if self.recursion_depth >= self.limits.recursion_limit {
508 return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
509 }
510
511 let saved_depth = self.cursor.depth();
513 tracer.trace_call(self.entrypoint_target);
514 self.frames.push(t.next.get(), saved_depth);
515 self.recursion_depth += 1;
516 self.ip = self.entrypoint_target;
517 Ok(())
518 }
519
520 fn navigate_to_field_with_policy<T: Tracer>(
524 &mut self,
525 nav: Nav,
526 field: Option<std::num::NonZeroU16>,
527 tracer: &mut T,
528 ) -> Result<Option<SkipPolicy>, RuntimeError> {
529 if nav == Nav::Stay || nav == Nav::StayExact {
530 self.check_field(field, tracer)?;
531 return Ok(None);
532 }
533
534 let Some(policy) = self.cursor.navigate(nav) else {
535 tracer.trace_nav_failure(nav);
536 return Err(self.backtrack(tracer).unwrap_err());
537 };
538 tracer.trace_nav(nav, self.cursor.node());
539
540 let Some(field_id) = field else {
541 return Ok(Some(policy));
542 };
543
544 let cont_nav = continuation_nav(nav);
545 loop {
546 if self.cursor.field_id() == Some(field_id) {
547 tracer.trace_field_success(field_id);
548 return Ok(Some(policy));
549 }
550 tracer.trace_field_failure(self.cursor.node());
551 self.advance_or_backtrack(policy, cont_nav, tracer)?;
552 }
553 }
554
555 fn check_field<T: Tracer>(
556 &mut self,
557 field: Option<std::num::NonZeroU16>,
558 tracer: &mut T,
559 ) -> Result<(), RuntimeError> {
560 let Some(field_id) = field else {
561 return Ok(());
562 };
563 if self.cursor.field_id() != Some(field_id) {
564 tracer.trace_field_failure(self.cursor.node());
565 return self.backtrack(tracer);
566 }
567 tracer.trace_field_success(field_id);
568 Ok(())
569 }
570
571 fn exec_return<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
572 tracer.trace_return();
573
574 if self.frames.is_empty() {
576 return Err(RuntimeError::Accept);
577 }
578
579 let (return_addr, saved_depth) = self.frames.pop();
580 self.recursion_depth -= 1;
581
582 self.frames.prune(self.checkpoints.max_frame_ref());
584
585 self.matched_node = Some(self.cursor.node());
588
589 while self.cursor.depth() > saved_depth {
593 if !self.cursor.goto_parent() {
594 break;
595 }
596 }
597
598 self.ip = return_addr;
599 Ok(())
600 }
601
602 fn backtrack<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
603 let cp = self.checkpoints.pop().ok_or(RuntimeError::NoMatch)?;
604 tracer.trace_backtrack();
605 self.cursor.goto_descendant(cp.descendant_index);
606 self.effects.truncate(cp.effect_watermark);
607 self.frames.restore(cp.frame_index);
608 self.recursion_depth = cp.recursion_depth;
609 self.suppress_depth = cp.suppress_depth;
610
611 if let Some(policy) = cp.skip_policy {
613 if !self.cursor.continue_search(policy) {
614 return self.backtrack(tracer);
615 }
616 tracer.trace_nav(continuation_nav(Nav::Down), self.cursor.node());
617 self.skip_call_nav = true;
618 }
619
620 self.ip = cp.ip;
621 Err(RuntimeError::Backtracked)
622 }
623
624 fn advance_or_backtrack<T: Tracer>(
626 &mut self,
627 policy: SkipPolicy,
628 cont_nav: Nav,
629 tracer: &mut T,
630 ) -> Result<(), RuntimeError> {
631 if !self.cursor.continue_search(policy) {
632 return self.backtrack(tracer);
633 }
634 tracer.trace_nav(cont_nav, self.cursor.node());
635 Ok(())
636 }
637
638 fn emit_effect<T: Tracer>(&mut self, op: EffectOp, tracer: &mut T) {
639 use EffectOpcode::*;
640
641 let effect = match op.opcode {
642 SuppressBegin => {
644 tracer.trace_suppress_control(SuppressBegin, self.suppress_depth > 0);
645 self.suppress_depth += 1;
646 return;
647 }
648 SuppressEnd => {
649 self.suppress_depth = self
650 .suppress_depth
651 .checked_sub(1)
652 .expect("SuppressEnd without matching SuppressBegin");
653 tracer.trace_suppress_control(SuppressEnd, self.suppress_depth > 0);
654 return;
655 }
656
657 _ if self.suppress_depth > 0 => {
659 tracer.trace_effect_suppressed(op.opcode, op.payload);
660 return;
661 }
662
663 Node => {
665 RuntimeEffect::Node(self.matched_node.expect("Node effect without matched_node"))
666 }
667 Text => {
668 RuntimeEffect::Text(self.matched_node.expect("Text effect without matched_node"))
669 }
670 Arr => RuntimeEffect::Arr,
671 Push => RuntimeEffect::Push,
672 EndArr => RuntimeEffect::EndArr,
673 Obj => RuntimeEffect::Obj,
674 EndObj => RuntimeEffect::EndObj,
675 Set => RuntimeEffect::Set(op.payload as u16),
676 Enum => RuntimeEffect::Enum(op.payload as u16),
677 EndEnum => RuntimeEffect::EndEnum,
678 Clear => RuntimeEffect::Clear,
679 Null => RuntimeEffect::Null,
680 };
681
682 tracer.trace_effect(&effect);
683 self.effects.push(effect);
684 }
685}