1use arborium_tree_sitter::{Node, Tree};
4
5use plotnik_bytecode::{
6 Call, EffectOp, EffectOpcode, Entrypoint, Instruction, Match, Module, Nav, NodeTypeIR,
7 PredicateOp, StepAddr, Trampoline,
8};
9
10fn continuation_nav(nav: Nav) -> Nav {
13 match nav {
14 Nav::Down | Nav::Next => Nav::Next,
15 Nav::DownSkip | Nav::NextSkip => Nav::NextSkip,
16 Nav::DownExact | Nav::NextExact => Nav::NextExact,
17 Nav::Epsilon
18 | Nav::Up(_)
19 | Nav::UpSkipTrivia(_)
20 | Nav::UpExact(_)
21 | Nav::Stay
22 | Nav::StayExact => Nav::Next,
23 }
24}
25
26use super::checkpoint::{Checkpoint, CheckpointStack};
27use super::cursor::{CursorWrapper, SkipPolicy};
28
29fn skip_policy_for_nav(nav: Nav) -> Option<SkipPolicy> {
33 match nav {
34 Nav::Down | Nav::Next => Some(SkipPolicy::Any),
35 Nav::DownSkip | Nav::NextSkip => Some(SkipPolicy::Trivia),
36 Nav::DownExact | Nav::NextExact => Some(SkipPolicy::Exact),
37 Nav::Epsilon
38 | Nav::Stay
39 | Nav::StayExact
40 | Nav::Up(_)
41 | Nav::UpSkipTrivia(_)
42 | Nav::UpExact(_) => None,
43 }
44}
45use super::effect::{EffectLog, RuntimeEffect};
46use super::error::RuntimeError;
47use super::frame::FrameArena;
48use super::trace::{NoopTracer, Tracer};
49
50#[derive(Clone, Copy, Debug)]
52pub struct FuelLimits {
53 pub(crate) exec_fuel: u32,
55 pub(crate) recursion_limit: u32,
57}
58
59impl Default for FuelLimits {
60 fn default() -> Self {
61 Self {
62 exec_fuel: 1_000_000,
63 recursion_limit: 1024,
64 }
65 }
66}
67
68impl FuelLimits {
69 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn exec_fuel(mut self, fuel: u32) -> Self {
76 self.exec_fuel = fuel;
77 self
78 }
79
80 pub fn recursion_limit(mut self, limit: u32) -> Self {
82 self.recursion_limit = limit;
83 self
84 }
85
86 pub fn get_exec_fuel(&self) -> u32 {
87 self.exec_fuel
88 }
89 pub fn get_recursion_limit(&self) -> u32 {
90 self.recursion_limit
91 }
92}
93
94pub struct VM<'t> {
96 pub(crate) cursor: CursorWrapper<'t>,
97 pub(crate) ip: u16,
99 pub(crate) frames: FrameArena,
100 pub(crate) checkpoints: CheckpointStack,
101 pub(crate) effects: EffectLog<'t>,
102 pub(crate) matched_node: Option<Node<'t>>,
103
104 pub(crate) exec_fuel: u32,
106 pub(crate) recursion_depth: u32,
107 pub(crate) limits: FuelLimits,
108
109 pub(crate) skip_call_nav: bool,
114
115 pub(crate) suppress_depth: u16,
118
119 pub(crate) entrypoint_target: u16,
122
123 pub(crate) source: &'t str,
125}
126
127pub struct VMBuilder<'t> {
129 source: &'t str,
130 tree: &'t Tree,
131 trivia_types: Vec<u16>,
132 limits: FuelLimits,
133}
134
135impl<'t> VMBuilder<'t> {
136 pub fn new(source: &'t str, tree: &'t Tree) -> Self {
138 Self {
139 source,
140 tree,
141 trivia_types: Vec::new(),
142 limits: FuelLimits::default(),
143 }
144 }
145
146 pub fn trivia_types(mut self, types: Vec<u16>) -> Self {
148 self.trivia_types = types;
149 self
150 }
151
152 pub fn limits(mut self, limits: FuelLimits) -> Self {
154 self.limits = limits;
155 self
156 }
157
158 pub fn exec_fuel(mut self, fuel: u32) -> Self {
160 self.limits = self.limits.exec_fuel(fuel);
161 self
162 }
163
164 pub fn recursion_limit(mut self, limit: u32) -> Self {
166 self.limits = self.limits.recursion_limit(limit);
167 self
168 }
169
170 pub fn build(self) -> VM<'t> {
172 VM {
173 cursor: CursorWrapper::new(self.tree.walk(), self.trivia_types),
174 ip: 0,
175 frames: FrameArena::new(),
176 checkpoints: CheckpointStack::new(),
177 effects: EffectLog::new(),
178 matched_node: None,
179 exec_fuel: self.limits.get_exec_fuel(),
180 recursion_depth: 0,
181 limits: self.limits,
182 skip_call_nav: false,
183 suppress_depth: 0,
184 entrypoint_target: 0,
185 source: self.source,
186 }
187 }
188}
189
190impl<'t> VM<'t> {
191 pub fn builder(source: &'t str, tree: &'t Tree) -> VMBuilder<'t> {
193 VMBuilder::new(source, tree)
194 }
195
196 #[deprecated(note = "Use VM::builder(source, tree).trivia_types(...).build() instead")]
198 pub fn new(source: &'t str, tree: &'t Tree, trivia_types: Vec<u16>, limits: FuelLimits) -> Self {
199 Self::builder(source, tree)
200 .trivia_types(trivia_types)
201 .limits(limits)
202 .build()
203 }
204
205 fn create_checkpoint(&self, ip: u16, skip_policy: Option<SkipPolicy>) -> Checkpoint {
207 Checkpoint::new(
208 self.cursor.descendant_index(),
209 self.effects.len(),
210 self.frames.current(),
211 self.recursion_depth,
212 ip,
213 skip_policy,
214 self.suppress_depth,
215 )
216 }
217
218 pub fn execute(
223 self,
224 module: &Module,
225 bootstrap: StepAddr,
226 entrypoint: &Entrypoint,
227 ) -> Result<EffectLog<'t>, RuntimeError> {
228 self.execute_with(module, bootstrap, entrypoint, &mut NoopTracer)
229 }
230
231 pub fn execute_with<T: Tracer>(
238 mut self,
239 module: &Module,
240 bootstrap: StepAddr,
241 entrypoint: &Entrypoint,
242 tracer: &mut T,
243 ) -> Result<EffectLog<'t>, RuntimeError> {
244 self.ip = bootstrap;
247 self.entrypoint_target = entrypoint.target;
248 tracer.trace_enter_preamble();
249
250 loop {
251 if self.exec_fuel == 0 {
253 return Err(RuntimeError::ExecFuelExhausted(self.limits.exec_fuel));
254 }
255 self.exec_fuel -= 1;
256
257 let instr = module.decode_step(self.ip);
259 tracer.trace_instruction(self.ip, &instr);
260
261 let result = match instr {
262 Instruction::Match(m) => self.exec_match(m, module, tracer),
263 Instruction::Call(c) => self.exec_call(c, tracer),
264 Instruction::Return(_) => self.exec_return(tracer),
265 Instruction::Trampoline(t) => self.exec_trampoline(t, tracer),
266 };
267
268 match result {
269 Ok(()) | Err(RuntimeError::Backtracked) => continue,
270 Err(RuntimeError::Accept) => return Ok(self.effects),
271 Err(e) => return Err(e),
272 }
273 }
274 }
275
276 fn exec_match<T: Tracer>(
277 &mut self,
278 m: Match<'_>,
279 module: &Module,
280 tracer: &mut T,
281 ) -> Result<(), RuntimeError> {
282 for effect_op in m.pre_effects() {
283 self.emit_effect(effect_op, tracer);
284 }
285
286 if !m.is_epsilon() {
289 self.matched_node = None;
290 self.navigate_and_match(m, module, tracer)?;
291 }
292
293 for effect_op in m.post_effects() {
294 self.emit_effect(effect_op, tracer);
295 }
296
297 self.branch_to_successors(m, tracer)
298 }
299
300 fn navigate_and_match<T: Tracer>(
301 &mut self,
302 m: Match<'_>,
303 module: &Module,
304 tracer: &mut T,
305 ) -> Result<(), RuntimeError> {
306 let Some(policy) = self.cursor.navigate(m.nav) else {
307 tracer.trace_nav_failure(m.nav);
308 return self.backtrack(tracer);
309 };
310 tracer.trace_nav(m.nav, self.cursor.node());
311
312 let cont_nav = continuation_nav(m.nav);
313 loop {
314 if !self.node_matches(m, tracer) {
315 self.advance_or_backtrack(policy, cont_nav, tracer)?;
316 continue;
317 }
318 break;
319 }
320
321 tracer.trace_match_success(self.cursor.node());
322 if let Some(field_id) = m.node_field {
323 tracer.trace_field_success(field_id);
324 }
325
326 self.matched_node = Some(self.cursor.node());
327
328 for field_id in m.neg_fields() {
329 if self.cursor.node().child_by_field_id(field_id).is_some() {
330 return self.backtrack(tracer);
331 }
332 }
333
334 if let Some((op, is_regex, value_ref)) = m.predicate()
336 && !self.evaluate_predicate(op, is_regex, value_ref, module)
337 {
338 return self.backtrack(tracer);
339 }
340
341 Ok(())
342 }
343
344 fn evaluate_predicate(&self, op: u8, is_regex: bool, value_ref: u16, module: &Module) -> bool {
350 let node = self.cursor.node();
351 let node_text = &self.source[node.start_byte()..node.end_byte()];
352 let op = PredicateOp::from_byte(op);
353
354 if is_regex {
355 let regex_bytes = module.regexes().get_by_index(value_ref as usize);
356 assert!(
357 !regex_bytes.is_empty(),
358 "regex predicate references empty DFA bytes"
359 );
360 let dfa = plotnik_bytecode::deserialize_dfa(regex_bytes)
361 .expect("regex DFA deserialization failed");
362
363 use regex_automata::dfa::Automaton;
364 use regex_automata::Input;
365 let input = Input::new(node_text);
366 let matched = dfa.try_search_fwd(&input).ok().flatten().is_some();
367
368 match op {
369 PredicateOp::RegexMatch => matched,
370 PredicateOp::RegexNoMatch => !matched,
371 _ => unreachable!("non-regex op with is_regex=true"),
372 }
373 } else {
374 let target = module.strings().get_by_index(value_ref as usize);
375
376 match op {
377 PredicateOp::Eq => node_text == target,
378 PredicateOp::Ne => node_text != target,
379 PredicateOp::StartsWith => node_text.starts_with(target),
380 PredicateOp::EndsWith => node_text.ends_with(target),
381 PredicateOp::Contains => node_text.contains(target),
382 _ => unreachable!("regex op with is_regex=false"),
383 }
384 }
385 }
386
387 fn node_matches<T: Tracer>(&self, m: Match<'_>, tracer: &mut T) -> bool {
389 let node = self.cursor.node();
390
391 match m.node_type {
393 NodeTypeIR::Any => {
394 }
396 NodeTypeIR::Named(None) => {
397 if !node.is_named() {
399 tracer.trace_match_failure(node);
400 return false;
401 }
402 }
403 NodeTypeIR::Named(Some(expected)) => {
404 if node.kind_id() != expected.get() {
406 tracer.trace_match_failure(node);
407 return false;
408 }
409 }
410 NodeTypeIR::Anonymous(None) => {
411 if node.is_named() {
413 tracer.trace_match_failure(node);
414 return false;
415 }
416 }
417 NodeTypeIR::Anonymous(Some(expected)) => {
418 if node.kind_id() != expected.get() {
420 tracer.trace_match_failure(node);
421 return false;
422 }
423 }
424 }
425
426 if let Some(expected) = m.node_field
428 && self.cursor.field_id() != Some(expected)
429 {
430 tracer.trace_field_failure(node);
431 return false;
432 }
433 true
434 }
435
436 fn branch_to_successors<T: Tracer>(
437 &mut self,
438 m: Match<'_>,
439 tracer: &mut T,
440 ) -> Result<(), RuntimeError> {
441 if m.succ_count() == 0 {
442 return Err(RuntimeError::Accept);
443 }
444
445 for i in (1..m.succ_count()).rev() {
447 self.checkpoints
448 .push(self.create_checkpoint(m.successor(i).get(), None));
449 tracer.trace_checkpoint_created(self.ip);
450 }
451
452 self.ip = m.successor(0).get();
453 Ok(())
454 }
455
456 fn exec_call<T: Tracer>(&mut self, c: Call, tracer: &mut T) -> Result<(), RuntimeError> {
457 if self.recursion_depth >= self.limits.recursion_limit {
458 return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
459 }
460
461 let skip_policy = if self.skip_call_nav {
463 self.skip_call_nav = false;
465 self.check_field(c.node_field, tracer)?;
466 skip_policy_for_nav(c.nav)
467 } else {
468 self.navigate_to_field_with_policy(c.nav, c.node_field, tracer)?
470 };
471
472 if let Some(policy) = skip_policy
474 && policy != SkipPolicy::Exact
475 {
476 self.checkpoints
477 .push(self.create_checkpoint(self.ip, Some(policy)));
478 tracer.trace_checkpoint_created(self.ip);
479 }
480
481 let saved_depth = self.cursor.depth();
483 tracer.trace_call(c.target.get());
484 self.frames.push(c.next.get(), saved_depth);
485 self.recursion_depth += 1;
486 self.ip = c.target.get();
487 Ok(())
488 }
489
490 fn exec_trampoline<T: Tracer>(
495 &mut self,
496 t: Trampoline,
497 tracer: &mut T,
498 ) -> Result<(), RuntimeError> {
499 if self.recursion_depth >= self.limits.recursion_limit {
500 return Err(RuntimeError::RecursionLimitExceeded(self.recursion_depth));
501 }
502
503 let saved_depth = self.cursor.depth();
505 tracer.trace_call(self.entrypoint_target);
506 self.frames.push(t.next.get(), saved_depth);
507 self.recursion_depth += 1;
508 self.ip = self.entrypoint_target;
509 Ok(())
510 }
511
512 fn navigate_to_field_with_policy<T: Tracer>(
516 &mut self,
517 nav: Nav,
518 field: Option<std::num::NonZeroU16>,
519 tracer: &mut T,
520 ) -> Result<Option<SkipPolicy>, RuntimeError> {
521 if nav == Nav::Stay || nav == Nav::StayExact {
522 self.check_field(field, tracer)?;
523 return Ok(None);
524 }
525
526 let Some(policy) = self.cursor.navigate(nav) else {
527 tracer.trace_nav_failure(nav);
528 return Err(self.backtrack(tracer).unwrap_err());
529 };
530 tracer.trace_nav(nav, self.cursor.node());
531
532 let Some(field_id) = field else {
533 return Ok(Some(policy));
534 };
535
536 let cont_nav = continuation_nav(nav);
537 loop {
538 if self.cursor.field_id() == Some(field_id) {
539 tracer.trace_field_success(field_id);
540 return Ok(Some(policy));
541 }
542 tracer.trace_field_failure(self.cursor.node());
543 self.advance_or_backtrack(policy, cont_nav, tracer)?;
544 }
545 }
546
547 fn check_field<T: Tracer>(
548 &mut self,
549 field: Option<std::num::NonZeroU16>,
550 tracer: &mut T,
551 ) -> Result<(), RuntimeError> {
552 let Some(field_id) = field else {
553 return Ok(());
554 };
555 if self.cursor.field_id() != Some(field_id) {
556 tracer.trace_field_failure(self.cursor.node());
557 return self.backtrack(tracer);
558 }
559 tracer.trace_field_success(field_id);
560 Ok(())
561 }
562
563 fn exec_return<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
564 tracer.trace_return();
565
566 if self.frames.is_empty() {
568 return Err(RuntimeError::Accept);
569 }
570
571 let (return_addr, saved_depth) = self.frames.pop();
572 self.recursion_depth -= 1;
573
574 self.frames.prune(self.checkpoints.max_frame_ref());
576
577 self.matched_node = Some(self.cursor.node());
580
581 while self.cursor.depth() > saved_depth {
585 if !self.cursor.goto_parent() {
586 break;
587 }
588 }
589
590 self.ip = return_addr;
591 Ok(())
592 }
593
594 fn backtrack<T: Tracer>(&mut self, tracer: &mut T) -> Result<(), RuntimeError> {
595 let cp = self.checkpoints.pop().ok_or(RuntimeError::NoMatch)?;
596 tracer.trace_backtrack();
597 self.cursor.goto_descendant(cp.descendant_index);
598 self.effects.truncate(cp.effect_watermark);
599 self.frames.restore(cp.frame_index);
600 self.recursion_depth = cp.recursion_depth;
601 self.suppress_depth = cp.suppress_depth;
602
603 if let Some(policy) = cp.skip_policy {
605 if !self.cursor.continue_search(policy) {
606 return self.backtrack(tracer);
607 }
608 tracer.trace_nav(continuation_nav(Nav::Down), self.cursor.node());
609 self.skip_call_nav = true;
610 }
611
612 self.ip = cp.ip;
613 Err(RuntimeError::Backtracked)
614 }
615
616 fn advance_or_backtrack<T: Tracer>(
618 &mut self,
619 policy: SkipPolicy,
620 cont_nav: Nav,
621 tracer: &mut T,
622 ) -> Result<(), RuntimeError> {
623 if !self.cursor.continue_search(policy) {
624 return self.backtrack(tracer);
625 }
626 tracer.trace_nav(cont_nav, self.cursor.node());
627 Ok(())
628 }
629
630 fn emit_effect<T: Tracer>(&mut self, op: EffectOp, tracer: &mut T) {
631 use EffectOpcode::*;
632
633 let effect = match op.opcode {
634 SuppressBegin => {
636 tracer.trace_suppress_control(SuppressBegin, self.suppress_depth > 0);
637 self.suppress_depth += 1;
638 return;
639 }
640 SuppressEnd => {
641 self.suppress_depth = self
642 .suppress_depth
643 .checked_sub(1)
644 .expect("SuppressEnd without matching SuppressBegin");
645 tracer.trace_suppress_control(SuppressEnd, self.suppress_depth > 0);
646 return;
647 }
648
649 _ if self.suppress_depth > 0 => {
651 tracer.trace_effect_suppressed(op.opcode, op.payload);
652 return;
653 }
654
655 Node => {
657 RuntimeEffect::Node(self.matched_node.expect("Node effect without matched_node"))
658 }
659 Text => {
660 RuntimeEffect::Text(self.matched_node.expect("Text effect without matched_node"))
661 }
662 Arr => RuntimeEffect::Arr,
663 Push => RuntimeEffect::Push,
664 EndArr => RuntimeEffect::EndArr,
665 Obj => RuntimeEffect::Obj,
666 EndObj => RuntimeEffect::EndObj,
667 Set => RuntimeEffect::Set(op.payload as u16),
668 Enum => RuntimeEffect::Enum(op.payload as u16),
669 EndEnum => RuntimeEffect::EndEnum,
670 Clear => RuntimeEffect::Clear,
671 Null => RuntimeEffect::Null,
672 };
673
674 tracer.trace_effect(&effect);
675 self.effects.push(effect);
676 }
677}