1use arborium_tree_sitter::{Node, Tree};
4
5use plotnik_bytecode::{
6 Call, EffectOp, EffectOpcode, Entrypoint, Instruction, Match, Module, Nav, NodeTypeIR,
7 PredicateOp, StepAddr, Trampoline,
8};
9
10fn continuation_nav(nav: Nav) -> Nav {
13 match nav {
14 Nav::Down | Nav::Next => Nav::Next,
15 Nav::DownSkip | Nav::NextSkip => Nav::NextSkip,
16 Nav::DownExact | Nav::NextExact => Nav::NextExact,
17 Nav::Epsilon
18 | Nav::Up(_)
19 | Nav::UpSkipTrivia(_)
20 | Nav::UpExact(_)
21 | Nav::Stay
22 | Nav::StayExact => Nav::Next,
23 }
24}
25
26use super::checkpoint::{Checkpoint, CheckpointStack};
27use super::cursor::{CursorWrapper, SkipPolicy};
28
29fn skip_policy_for_nav(nav: Nav) -> Option<SkipPolicy> {
33 match nav {
34 Nav::Down | Nav::Next => Some(SkipPolicy::Any),
35 Nav::DownSkip | Nav::NextSkip => Some(SkipPolicy::Trivia),
36 Nav::DownExact | Nav::NextExact => Some(SkipPolicy::Exact),
37 Nav::Epsilon
38 | Nav::Stay
39 | Nav::StayExact
40 | Nav::Up(_)
41 | Nav::UpSkipTrivia(_)
42 | Nav::UpExact(_) => None,
43 }
44}
45use super::effect::{EffectLog, RuntimeEffect};
46use super::error::RuntimeError;
47use super::frame::FrameArena;
48use super::trace::{NoopTracer, Tracer};
49
50#[derive(Clone, Copy, Debug)]
52pub struct FuelLimits {
53 pub(crate) exec_fuel: u32,
55 pub(crate) recursion_limit: u32,
57}
58
59impl Default for FuelLimits {
60 fn default() -> Self {
61 Self {
62 exec_fuel: 1_000_000,
63 recursion_limit: 1024,
64 }
65 }
66}
67
68impl FuelLimits {
69 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn exec_fuel(mut self, fuel: u32) -> Self {
76 self.exec_fuel = fuel;
77 self
78 }
79
80 pub fn recursion_limit(mut self, limit: u32) -> Self {
82 self.recursion_limit = limit;
83 self
84 }
85
86 pub fn get_exec_fuel(&self) -> u32 {
87 self.exec_fuel
88 }
89 pub fn get_recursion_limit(&self) -> u32 {
90 self.recursion_limit
91 }
92}
93
94pub struct VM<'t> {
96 pub(crate) cursor: CursorWrapper<'t>,
97 pub(crate) ip: u16,
99 pub(crate) frames: FrameArena,
100 pub(crate) checkpoints: CheckpointStack,
101 pub(crate) effects: EffectLog<'t>,
102 pub(crate) matched_node: Option<Node<'t>>,
103
104 pub(crate) exec_fuel: u32,
106 pub(crate) recursion_depth: u32,
107 pub(crate) limits: FuelLimits,
108
109 pub(crate) skip_call_nav: bool,
114
115 pub(crate) suppress_depth: u16,
118
119 pub(crate) entrypoint_target: u16,
122
123 pub(crate) source: &'t str,
125}
126
127pub struct VMBuilder<'t> {
129 source: &'t str,
130 tree: &'t Tree,
131 trivia_types: Vec<u16>,
132 limits: FuelLimits,
133}
134
135impl<'t> VMBuilder<'t> {
136 pub fn new(source: &'t str, tree: &'t Tree) -> Self {
138 Self {
139 source,
140 tree,
141 trivia_types: Vec::new(),
142 limits: FuelLimits::default(),
143 }
144 }
145
146 pub fn trivia_types(mut self, types: Vec<u16>) -> Self {
148 self.trivia_types = types;
149 self
150 }
151
152 pub fn limits(mut self, limits: FuelLimits) -> Self {
154 self.limits = limits;
155 self
156 }
157
158 pub fn exec_fuel(mut self, fuel: u32) -> Self {
160 self.limits = self.limits.exec_fuel(fuel);
161 self
162 }
163
164 pub fn recursion_limit(mut self, limit: u32) -> Self {
166 self.limits = self.limits.recursion_limit(limit);
167 self
168 }
169
170 pub fn build(self) -> VM<'t> {
172 VM {
173 cursor: CursorWrapper::new(self.tree.walk(), self.trivia_types),
174 ip: 0,
175 frames: FrameArena::new(),
176 checkpoints: CheckpointStack::new(),
177 effects: EffectLog::new(),
178 matched_node: None,
179 exec_fuel: self.limits.get_exec_fuel(),
180 recursion_depth: 0,
181 limits: self.limits,
182 skip_call_nav: false,
183 suppress_depth: 0,
184 entrypoint_target: 0,
185 source: self.source,
186 }
187 }
188}
189
190impl<'t> VM<'t> {
191 pub fn builder(source: &'t str, tree: &'t Tree) -> VMBuilder<'t> {
193 VMBuilder::new(source, tree)
194 }
195
196 #[deprecated(note = "Use VM::builder(source, tree).trivia_types(...).build() instead")]
198 pub fn new(
199 source: &'t str,
200 tree: &'t Tree,
201 trivia_types: Vec<u16>,
202 limits: FuelLimits,
203 ) -> Self {
204 Self::builder(source, tree)
205 .trivia_types(trivia_types)
206 .limits(limits)
207 .build()
208 }
209
210 fn create_checkpoint(&self, ip: u16, skip_policy: Option<SkipPolicy>) -> Checkpoint {
212 Checkpoint::new(
213 self.cursor.descendant_index(),
214 self.effects.len(),
215 self.frames.current(),
216 self.recursion_depth,
217 ip,
218 skip_policy,
219 self.suppress_depth,
220 )
221 }
222
223 pub fn execute(
228 self,
229 module: &Module,
230 bootstrap: StepAddr,
231 entrypoint: &Entrypoint,
232 ) -> Result<EffectLog<'t>, RuntimeError> {
233 self.execute_with(module, bootstrap, entrypoint, &mut NoopTracer)
234 }
235
236 pub fn execute_with<T: Tracer>(
243 mut self,
244 module: &Module,
245 bootstrap: StepAddr,
246 entrypoint: &Entrypoint,
247 tracer: &mut T,
248 ) -> Result<EffectLog<'t>, RuntimeError> {
249 self.ip = bootstrap;
252 self.entrypoint_target = entrypoint.target;
253 tracer.trace_enter_preamble();
254
255 loop {
256 if self.exec_fuel == 0 {
258 return Err(RuntimeError::ExecFuelExhausted(self.limits.exec_fuel));
259 }
260 self.exec_fuel -= 1;
261
262 let instr = module.decode_step(self.ip);
264 tracer.trace_instruction(self.ip, &instr);
265
266 let result = match instr {
267 Instruction::Match(m) => self.exec_match(m, module, tracer),
268 Instruction::Call(c) => self.exec_call(c, tracer),
269 Instruction::Return(_) => self.exec_return(tracer),
270 Instruction::Trampoline(t) => self.exec_trampoline(t, tracer),
271 };
272
273 match result {
274 Ok(()) | Err(RuntimeError::Backtracked) => continue,
275 Err(RuntimeError::Accept) => return Ok(self.effects),
276 Err(e) => return Err(e),
277 }
278 }
279 }
280
281 fn exec_match<T: Tracer>(
282 &mut self,
283 m: Match<'_>,
284 module: &Module,
285 tracer: &mut T,
286 ) -> Result<(), RuntimeError> {
287 for effect_op in m.pre_effects() {
288 self.emit_effect(effect_op, tracer);
289 }
290
291 if !m.is_epsilon() {
294 self.matched_node = None;
295 self.navigate_and_match(m, module, tracer)?;
296 }
297
298 for effect_op in m.post_effects() {
299 self.emit_effect(effect_op, tracer);
300 }
301
302 self.branch_to_successors(m, tracer)
303 }
304
305 fn navigate_and_match<T: Tracer>(
306 &mut self,
307 m: Match<'_>,
308 module: &Module,
309 tracer: &mut T,
310 ) -> Result<(), RuntimeError> {
311 let Some(policy) = self.cursor.navigate(m.nav) else {
312 tracer.trace_nav_failure(m.nav);
313 return self.backtrack(tracer);
314 };
315 tracer.trace_nav(m.nav, self.cursor.node());
316
317 let cont_nav = continuation_nav(m.nav);
318 loop {
319 if !self.node_matches(m, tracer) {
320 self.advance_or_backtrack(policy, cont_nav, tracer)?;
321 continue;
322 }
323 break;
324 }
325
326 tracer.trace_match_success(self.cursor.node());
327 if let Some(field_id) = m.node_field {
328 tracer.trace_field_success(field_id);
329 }
330
331 self.matched_node = Some(self.cursor.node());
332
333 for field_id in m.neg_fields() {
334 if self.cursor.node().child_by_field_id(field_id).is_some() {
335 return self.backtrack(tracer);
336 }
337 }
338
339 if let Some((op, is_regex, value_ref)) = m.predicate()
341 && !self.evaluate_predicate(op, is_regex, value_ref, module)
342 {
343 return self.backtrack(tracer);
344 }
345
346 Ok(())
347 }
348
349 fn evaluate_predicate(&self, op: u8, is_regex: bool, value_ref: u16, module: &Module) -> bool {
355 let node = self.cursor.node();
356 let node_text = &self.source[node.start_byte()..node.end_byte()];
357 let op = PredicateOp::from_byte(op);
358
359 if is_regex {
360 let regex_bytes = module.regexes().get_by_index(value_ref as usize);
361 assert!(
362 !regex_bytes.is_empty(),
363 "regex predicate references empty DFA bytes"
364 );
365 let dfa = plotnik_bytecode::deserialize_dfa(regex_bytes)
366 .expect("regex DFA deserialization failed");
367
368 use regex_automata::Input;
369 use regex_automata::dfa::Automaton;
370 let input = Input::new(node_text);
371 let matched = dfa.try_search_fwd(&input).ok().flatten().is_some();
372
373 match op {
374 PredicateOp::RegexMatch => matched,
375 PredicateOp::RegexNoMatch => !matched,
376 _ => unreachable!("non-regex op with is_regex=true"),
377 }
378 } else {
379 let target = module.strings().get_by_index(value_ref as usize);
380
381 match op {
382 PredicateOp::Eq => node_text == target,
383 PredicateOp::Ne => node_text != target,
384 PredicateOp::StartsWith => node_text.starts_with(target),
385 PredicateOp::EndsWith => node_text.ends_with(target),
386 PredicateOp::Contains => node_text.contains(target),
387 _ => unreachable!("regex op with is_regex=false"),
388 }
389 }
390 }
391
392 fn node_matches<T: Tracer>(&self, m: Match<'_>, tracer: &mut T) -> bool {
394 let node = self.cursor.node();
395
396 match m.node_type {
398 NodeTypeIR::Any => {
399 }
401 NodeTypeIR::Named(None) => {
402 if !node.is_named() {
404 tracer.trace_match_failure(node);
405 return false;
406 }
407 }
408 NodeTypeIR::Named(Some(expected)) => {
409 if node.kind_id() != expected.get() {
411 tracer.trace_match_failure(node);
412 return false;
413 }
414 }
415 NodeTypeIR::Anonymous(None) => {
416 if node.is_named() {
418 tracer.trace_match_failure(node);
419 return false;
420 }
421 }
422 NodeTypeIR::Anonymous(Some(expected)) => {
423 if node.kind_id() != expected.get() {
425 tracer.trace_match_failure(node);
426 return false;
427 }
428 }
429 }
430
431 if let Some(expected) = m.node_field
433 && self.cursor.field_id() != Some(expected)
434 {
435 tracer.trace_field_failure(node);
436 return false;
437 }
438 true
439 }
440
441 fn branch_to_successors<T: Tracer>(
442 &mut self,
443 m: Match<'_>,
444 tracer: &mut T,
445 ) -> Result<(), RuntimeError> {
446 if m.succ_count() == 0 {
447 return Err(RuntimeError::Accept);
448 }
449
450 for i in (1..m.succ_count()).rev() {
452 self.checkpoints
453 .push(self.create_checkpoint(m.successor(i).get(), None));
454 tracer.trace_checkpoint_created(self.ip);
455 }
456
457 self.ip = m.successor(0).get();
458 Ok(())
459 }
460
461 fn exec_call<T: Tracer>(&mut self, c: Call, tracer: &mut T) -> Result<(), RuntimeError> {
462 if self.recursion_depth >= self.limits.recursion_limit {
463 return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
464 }
465
466 let skip_policy = if self.skip_call_nav {
468 self.skip_call_nav = false;
470 self.check_field(c.node_field, tracer)?;
471 skip_policy_for_nav(c.nav)
472 } else {
473 self.navigate_to_field_with_policy(c.nav, c.node_field, tracer)?
475 };
476
477 if let Some(policy) = skip_policy
479 && policy != SkipPolicy::Exact
480 {
481 self.checkpoints
482 .push(self.create_checkpoint(self.ip, Some(policy)));
483 tracer.trace_checkpoint_created(self.ip);
484 }
485
486 let saved_depth = self.cursor.depth();
488 tracer.trace_call(c.target.get());
489 self.frames.push(c.next.get(), saved_depth);
490 self.recursion_depth += 1;
491 self.ip = c.target.get();
492 Ok(())
493 }
494
495 fn exec_trampoline<T: Tracer>(
500 &mut self,
501 t: Trampoline,
502 tracer: &mut T,
503 ) -> Result<(), RuntimeError> {
504 if self.recursion_depth >= self.limits.recursion_limit {
505 return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
506 }
507
508 let saved_depth = self.cursor.depth();
510 tracer.trace_call(self.entrypoint_target);
511 self.frames.push(t.next.get(), saved_depth);
512 self.recursion_depth += 1;
513 self.ip = self.entrypoint_target;
514 Ok(())
515 }
516
517 fn navigate_to_field_with_policy<T: Tracer>(
521 &mut self,
522 nav: Nav,
523 field: Option<std::num::NonZeroU16>,
524 tracer: &mut T,
525 ) -> Result<Option<SkipPolicy>, RuntimeError> {
526 if nav == Nav::Stay || nav == Nav::StayExact {
527 self.check_field(field, tracer)?;
528 return Ok(None);
529 }
530
531 let Some(policy) = self.cursor.navigate(nav) else {
532 tracer.trace_nav_failure(nav);
533 return Err(self.backtrack(tracer).unwrap_err());
534 };
535 tracer.trace_nav(nav, self.cursor.node());
536
537 let Some(field_id) = field else {
538 return Ok(Some(policy));
539 };
540
541 let cont_nav = continuation_nav(nav);
542 loop {
543 if self.cursor.field_id() == Some(field_id) {
544 tracer.trace_field_success(field_id);
545 return Ok(Some(policy));
546 }
547 tracer.trace_field_failure(self.cursor.node());
548 self.advance_or_backtrack(policy, cont_nav, tracer)?;
549 }
550 }
551
552 fn check_field<T: Tracer>(
553 &mut self,
554 field: Option<std::num::NonZeroU16>,
555 tracer: &mut T,
556 ) -> Result<(), RuntimeError> {
557 let Some(field_id) = field else {
558 return Ok(());
559 };
560 if self.cursor.field_id() != Some(field_id) {
561 tracer.trace_field_failure(self.cursor.node());
562 return self.backtrack(tracer);
563 }
564 tracer.trace_field_success(field_id);
565 Ok(())
566 }
567
568 fn exec_return<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
569 tracer.trace_return();
570
571 if self.frames.is_empty() {
573 return Err(RuntimeError::Accept);
574 }
575
576 let (return_addr, saved_depth) = self.frames.pop();
577 self.recursion_depth -= 1;
578
579 self.frames.prune(self.checkpoints.max_frame_ref());
581
582 self.matched_node = Some(self.cursor.node());
585
586 while self.cursor.depth() > saved_depth {
590 if !self.cursor.goto_parent() {
591 break;
592 }
593 }
594
595 self.ip = return_addr;
596 Ok(())
597 }
598
599 fn backtrack<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
600 let cp = self.checkpoints.pop().ok_or(RuntimeError::NoMatch)?;
601 tracer.trace_backtrack();
602 self.cursor.goto_descendant(cp.descendant_index);
603 self.effects.truncate(cp.effect_watermark);
604 self.frames.restore(cp.frame_index);
605 self.recursion_depth = cp.recursion_depth;
606 self.suppress_depth = cp.suppress_depth;
607
608 if let Some(policy) = cp.skip_policy {
610 if !self.cursor.continue_search(policy) {
611 return self.backtrack(tracer);
612 }
613 tracer.trace_nav(continuation_nav(Nav::Down), self.cursor.node());
614 self.skip_call_nav = true;
615 }
616
617 self.ip = cp.ip;
618 Err(RuntimeError::Backtracked)
619 }
620
621 fn advance_or_backtrack<T: Tracer>(
623 &mut self,
624 policy: SkipPolicy,
625 cont_nav: Nav,
626 tracer: &mut T,
627 ) -> Result<(), RuntimeError> {
628 if !self.cursor.continue_search(policy) {
629 return self.backtrack(tracer);
630 }
631 tracer.trace_nav(cont_nav, self.cursor.node());
632 Ok(())
633 }
634
635 fn emit_effect<T: Tracer>(&mut self, op: EffectOp, tracer: &mut T) {
636 use EffectOpcode::*;
637
638 let effect = match op.opcode {
639 SuppressBegin => {
641 tracer.trace_suppress_control(SuppressBegin, self.suppress_depth > 0);
642 self.suppress_depth += 1;
643 return;
644 }
645 SuppressEnd => {
646 self.suppress_depth = self
647 .suppress_depth
648 .checked_sub(1)
649 .expect("SuppressEnd without matching SuppressBegin");
650 tracer.trace_suppress_control(SuppressEnd, self.suppress_depth > 0);
651 return;
652 }
653
654 _ if self.suppress_depth > 0 => {
656 tracer.trace_effect_suppressed(op.opcode, op.payload);
657 return;
658 }
659
660 Node => {
662 RuntimeEffect::Node(self.matched_node.expect("Node effect without matched_node"))
663 }
664 Text => {
665 RuntimeEffect::Text(self.matched_node.expect("Text effect without matched_node"))
666 }
667 Arr => RuntimeEffect::Arr,
668 Push => RuntimeEffect::Push,
669 EndArr => RuntimeEffect::EndArr,
670 Obj => RuntimeEffect::Obj,
671 EndObj => RuntimeEffect::EndObj,
672 Set => RuntimeEffect::Set(op.payload as u16),
673 Enum => RuntimeEffect::Enum(op.payload as u16),
674 EndEnum => RuntimeEffect::EndEnum,
675 Clear => RuntimeEffect::Clear,
676 Null => RuntimeEffect::Null,
677 };
678
679 tracer.trace_effect(&effect);
680 self.effects.push(effect);
681 }
682}