1#![allow(
42 clippy::type_complexity,
43 reason = "The private Classifier type is very complex, but we specifically macro it out."
44)]
45use crate::{
46 automaton::{error::CompilerError, Automaton, State},
47 classification::{
48 simd::{self, config_simd, dispatch_simd, Simd, SimdConfiguration},
49 structural::{BracketType, Structural, StructuralIterator as _},
50 },
51 debug,
52 depth::Depth,
53 engine::{
54 error::EngineError,
55 head_skipping::{CanHeadSkip, HeadSkip, ResumeState},
56 select_root_query,
57 tail_skipping::TailSkip,
58 Compiler, Engine, Input,
59 },
60 input::error::InputErrorConvertible as _,
61 result::{
62 approx_span::ApproxSpanRecorder, count::CountRecorder, index::IndexRecorder, nodes::NodesRecorder, Match,
63 MatchCount, MatchIndex, MatchSpan, MatchedNodeType, Recorder, Sink,
64 },
65 string_pattern::StringPattern,
66 FallibleIterator as _, MaskType, BLOCK_SIZE,
67};
68use rsonpath_syntax::{num::JsonUInt, JsonPathQuery};
69use smallvec::{smallvec, SmallVec};
70
71#[derive(Clone, Debug)]
76pub struct MainEngine {
77 automaton: Automaton,
78 simd: SimdConfiguration,
79}
80
81static_assertions::assert_impl_all!(MainEngine: Send, Sync);
82
83impl MainEngine {
84 #[inline(always)]
86 #[must_use]
87 pub fn automaton(&self) -> &Automaton {
88 &self.automaton
89 }
90}
91
92impl Compiler for MainEngine {
93 type E = Self;
94
95 #[inline(always)]
96 fn compile_query(query: &JsonPathQuery) -> Result<Self, CompilerError> {
97 let automaton = Automaton::new(query)?;
98 debug!("DFA:\n {}", automaton);
99 let simd = simd::configure();
100 log::info!("SIMD configuration:\n {simd}");
101 Ok(Self { automaton, simd })
102 }
103
104 #[inline(always)]
105 fn from_compiled_query(automaton: Automaton) -> Self::E {
106 let simd = simd::configure();
107 log::info!("SIMD configuration:\n {simd}");
108 Self { automaton, simd }
109 }
110}
111
112impl Engine for MainEngine {
123 #[inline]
124 fn count<I>(&self, input: &I) -> Result<MatchCount, EngineError>
125 where
126 I: Input,
127 {
128 if self.automaton.is_select_root_query() {
129 return select_root_query::count(input);
130 }
131 if self.automaton.is_empty_query() {
132 return Ok(0);
133 }
134
135 let recorder = CountRecorder::new();
136 config_simd!(self.simd => |simd| {
137 let executor = query_executor(&self.automaton, input, &recorder, simd);
138 executor.run()
139 })?;
140
141 Ok(recorder.into())
142 }
143
144 #[inline]
145 fn indices<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
146 where
147 I: Input,
148 S: Sink<MatchIndex>,
149 {
150 if self.automaton.is_select_root_query() {
151 return select_root_query::index(input, sink);
152 }
153 if self.automaton.is_empty_query() {
154 return Ok(());
155 }
156
157 let recorder = IndexRecorder::new(sink, input.leading_padding_len());
158 config_simd!(self.simd => |simd| {
159 let executor = query_executor(&self.automaton, input, &recorder, simd);
160 executor.run()
161 })?;
162
163 Ok(())
164 }
165
166 #[inline]
167 fn approximate_spans<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
168 where
169 I: Input,
170 S: Sink<MatchSpan>,
171 {
172 if self.automaton.is_select_root_query() {
173 return select_root_query::approx_span(input, sink);
174 }
175 if self.automaton.is_empty_query() {
176 return Ok(());
177 }
178
179 let recorder = ApproxSpanRecorder::new(sink, input.leading_padding_len());
180 config_simd!(self.simd => |simd| {
181 let executor = query_executor(&self.automaton, input, &recorder, simd);
182 executor.run()
183 })?;
184
185 Ok(())
186 }
187
188 #[inline]
189 fn matches<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
190 where
191 I: Input,
192 S: Sink<Match>,
193 {
194 if self.automaton.is_select_root_query() {
195 return select_root_query::match_(input, sink);
196 }
197 if self.automaton.is_empty_query() {
198 return Ok(());
199 }
200
201 let recorder = NodesRecorder::build_recorder(sink, input.leading_padding_len());
202 config_simd!(self.simd => |simd| {
203 let executor = query_executor(&self.automaton, input, &recorder, simd);
204 executor.run()
205 })?;
206
207 Ok(())
208 }
209}
210
211macro_rules! Classifier {
215 () => {
216 TailSkip<
217 'i,
218 I::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
219 V::QuotesClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
220 V::StructuralClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
221 V,
222 BLOCK_SIZE>
223 };
224}
225
226struct Executor<'i, 'r, I, R, V> {
228 depth: Depth,
230 state: State,
232 next_event: Option<Structural>,
234 is_list: bool,
236 array_count: JsonUInt,
239 stack: SmallStack,
241 automaton: &'i Automaton,
243 input: &'i I,
245 recorder: &'r R,
247 simd: V,
249}
250
251fn query_executor<'i, 'r, I, R, V: Simd>(
253 automaton: &'i Automaton,
254 input: &'i I,
255 recorder: &'r R,
256 simd: V,
257) -> Executor<'i, 'r, I, R, V>
258where
259 I: Input,
260 R: Recorder<I::Block<'i, BLOCK_SIZE>>,
261{
262 Executor {
263 depth: Depth::ZERO,
264 state: automaton.initial_state(),
265 stack: SmallStack::new(),
266 automaton,
267 input,
268 recorder,
269 simd,
270 next_event: None,
271 is_list: false,
272 array_count: JsonUInt::ZERO,
273 }
274}
275
276impl<'i, 'r, I, R, V> Executor<'i, 'r, I, R, V>
277where
278 'i: 'r,
279 I: Input,
280 R: Recorder<I::Block<'i, BLOCK_SIZE>>,
281 V: Simd,
282{
283 fn run(mut self) -> Result<(), EngineError> {
284 let mb_head_skip = HeadSkip::new(self.input, self.automaton, self.simd);
288
289 match mb_head_skip {
290 Some(head_skip) => head_skip.run_head_skipping(&mut self),
291 None => self.run_and_exit(),
292 }
293 }
294
295 fn run_and_exit(mut self) -> Result<(), EngineError> {
298 let iter = self.input.iter_blocks(self.recorder);
299 let quote_classifier = self.simd.classify_quoted_sequences(iter);
300 let structural_classifier = self.simd.classify_structural_characters(quote_classifier);
301 let mut classifier = TailSkip::new(structural_classifier, self.simd);
302
303 self.run_on_subtree(&mut classifier)?;
304
305 self.verify_subtree_closed()
306 }
307
308 fn run_on_subtree(&mut self, classifier: &mut Classifier!()) -> Result<(), EngineError> {
312 dispatch_simd!(self.simd; self, classifier =>
313 fn<'i, 'r, I, R, V>(eng: &mut Executor<'i, 'r, I, R, V>, classifier: &mut Classifier!()) -> Result<(), EngineError>
314 where
315 'i: 'r,
316 I: Input,
317 R: Recorder<I::Block<'i, BLOCK_SIZE>>,
318 V: Simd
319 {
320 loop {
321 if eng.next_event.is_none() {
323 eng.next_event = match classifier.next() {
324 Ok(e) => e,
325 Err(err) => return Err(EngineError::InputError(err)),
326 };
327 }
328 if let Some(event) = eng.next_event.take() {
329 debug!("====================");
330 debug!("Event = {:?}", event);
331 debug!("Depth = {:?}", eng.depth);
332 debug!("Stack = {:?}", eng.stack);
333 debug!("State = {:?}", eng.state);
334 debug!("====================");
335
336 match event {
337 Structural::Colon(idx) => eng.handle_colon(classifier, idx)?,
338 Structural::Comma(idx) => eng.handle_comma(classifier, idx)?,
339 Structural::Opening(b, idx) => eng.handle_opening(classifier, b, idx)?,
340 Structural::Closing(_, idx) => {
341 eng.handle_closing(classifier, idx)?;
342
343 if eng.depth == Depth::ZERO {
344 break;
345 }
346 }
347 }
348 } else {
349 break;
350 }
351 }
352
353 Ok(())
354 })
355 }
356
357 #[inline(always)]
361 fn handle_colon(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
362 debug!("Colon");
363
364 if let Some((_, c)) = self.input.seek_non_whitespace_forward(idx + 1).e()? {
367 if c == b'{' || c == b'[' {
368 return Ok(());
369 }
370 }
371
372 let mut any_matched = false;
375
376 for (member_name, target) in self.automaton[self.state].member_transitions() {
377 if self.automaton.is_accepting(*target) && self.is_match(idx, member_name.as_ref())? {
378 self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
379 any_matched = true;
380 break;
381 }
382 }
383 let fallback_state = self.automaton[self.state].fallback_state();
385 if !any_matched && self.automaton.is_accepting(fallback_state) {
386 self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
387 }
388
389 if any_matched && self.automaton.is_unitary(self.state) {
393 self.next_event = classifier.next()?;
399 match self.next_event {
400 None | Some(Structural::Closing(_, _)) => {
401 return Ok(());
402 }
403 Some(Structural::Comma(idx)) => self.recorder.record_value_terminator(idx, self.depth)?,
404 Some(Structural::Colon(_) | Structural::Opening(_, _)) => (),
405 }
406 let bracket_type = self.current_node_bracket_type();
407 debug!("Skipping unique state from {bracket_type:?}");
408 let stop_at = classifier.skip(bracket_type)?;
409 self.next_event = Some(Structural::Closing(bracket_type, stop_at));
412 }
413
414 Ok(())
415 }
416
417 #[inline(always)]
421 fn handle_comma(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
422 debug!("Comma");
423
424 self.recorder.record_value_terminator(idx, self.depth)?;
425
426 if self.is_list {
427 if self.array_count.try_increment().is_err() {
429 return Ok(());
430 }
431
432 if let Some((_, c)) = self.input.seek_non_whitespace_forward(idx + 1).e()? {
435 if c == b'{' || c == b'[' {
436 return Ok(());
437 }
438 }
439
440 let is_fallback_accepting = self.automaton.is_accepting(self.automaton[self.state].fallback_state());
442
443 if is_fallback_accepting
444 || self
445 .automaton
446 .has_array_index_transition_to_accepting(self.state, &self.array_count)
447 {
448 debug!("Accepting list item on comma.");
449 self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
450
451 if self.automaton.is_unitary(self.state) {
452 self.next_event = classifier.next()?;
453 match self.next_event {
454 None | Some(Structural::Closing(_, _)) => {
455 return Ok(());
456 }
457 Some(Structural::Comma(idx)) => self.recorder.record_value_terminator(idx, self.depth)?,
458 Some(Structural::Colon(_) | Structural::Opening(_, _)) => (),
459 }
460 debug!("Skipping unique state from {:?}", BracketType::Square);
461 let stop_at = classifier.skip(BracketType::Square)?;
462 self.next_event = Some(Structural::Closing(BracketType::Square, stop_at));
463 }
464 }
465 }
466
467 Ok(())
468 }
469
470 #[inline(always)]
472 fn handle_opening(
473 &mut self,
474 classifier: &mut Classifier!(),
475 bracket_type: BracketType,
476 idx: usize,
477 ) -> Result<(), EngineError> {
478 debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",);
479
480 let mut any_matched = false;
482 if self.is_list {
483 for trans in self.automaton[self.state].array_transitions() {
484 if trans.matches(self.array_count) {
485 let target = trans.target_state();
486 any_matched = true;
487 self.transition_to(target, bracket_type);
488 if self.automaton.is_accepting(target) {
489 debug!("Accept {idx}");
490 self.record_match_detected_at(idx, NodeType::Complex(bracket_type))?;
491 }
492 break;
493 }
494 }
495 } else {
496 let colon_idx = self.find_preceding_colon(idx);
497
498 for (member_name, target) in self.automaton[self.state].member_transitions() {
499 if let Some(colon_idx) = colon_idx {
500 if self.is_match(colon_idx, member_name.as_ref())? {
501 any_matched = true;
502 self.transition_to(*target, bracket_type);
503 if self.automaton.is_accepting(*target) {
504 debug!("Accept {idx}");
505 self.record_match_detected_at(colon_idx + 1, NodeType::Complex(bracket_type))?;
506 }
507 break;
508 }
509 }
510 }
511 }
512
513 if !any_matched && self.depth != Depth::ZERO {
515 let fallback = self.automaton[self.state].fallback_state();
516 debug!("Falling back to {fallback}");
517
518 if self.automaton.is_rejecting(fallback) {
519 let closing_idx = classifier.skip(bracket_type)?;
522 return self.recorder.record_value_terminator(closing_idx, self.depth);
523 } else {
524 self.transition_to(fallback, bracket_type);
525 }
526
527 if self.automaton.is_accepting(fallback) {
528 self.record_match_detected_at(idx, NodeType::Complex(bracket_type))?;
529 }
530 }
531
532 self.depth
534 .increment()
535 .map_err(|err| EngineError::DepthAboveLimit(idx, err))?;
536
537 self.is_list = bracket_type == BracketType::Square;
538 let mut needs_commas = false;
539
540 if self.is_list {
546 let fallback = self.automaton[self.state].fallback_state();
547 let is_fallback_accepting = self.automaton.is_accepting(fallback);
548
549 if is_fallback_accepting || self.automaton.has_any_array_item_transition(self.state) {
550 needs_commas = true;
551 self.array_count = JsonUInt::ZERO;
552 debug!("Initialized array count to {}", self.array_count);
553
554 let wants_first_item =
555 is_fallback_accepting || self.automaton.has_first_array_index_transition_to_accepting(self.state);
556
557 if wants_first_item {
558 let next = self.input.seek_non_whitespace_forward(idx + 1).e()?;
559
560 match next {
567 Some((_, b'[' | b'{' | b']')) => (), Some((value_idx, _)) => {
569 self.record_match_detected_at(value_idx, NodeType::Atomic)?;
570 }
571 _ => (),
572 }
573 }
574 }
575 }
576
577 if !self.is_list && self.automaton.has_transition_to_accepting(self.state) {
579 classifier.turn_colons_and_commas_on(idx);
583 } else if needs_commas {
584 classifier.turn_colons_off();
585 classifier.turn_commas_on(idx);
586 } else {
587 classifier.turn_colons_and_commas_off();
588 }
589
590 Ok(())
591 }
592
593 #[inline(always)]
595 fn handle_closing(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
596 debug!("Closing, decreasing depth and popping stack.");
597
598 self.depth
599 .decrement()
600 .map_err(|err| EngineError::DepthBelowZero(idx, err))?;
601 self.recorder.record_value_terminator(idx, self.depth)?;
602
603 if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
605 self.state = stack_frame.state;
606 self.is_list = stack_frame.is_list;
607 self.array_count = stack_frame.array_count;
608
609 debug!("Restored array count to {}", self.array_count);
610
611 if self.automaton.is_unitary(self.state) {
614 let bracket_type = self.current_node_bracket_type();
615 debug!("Skipping unique state from {bracket_type:?}");
616 let close_idx = classifier.skip(bracket_type)?;
617 self.next_event = Some(Structural::Closing(bracket_type, close_idx));
620 return Ok(());
621 }
622 }
623
624 if self.is_list {
625 if self.automaton.is_accepting(self.automaton[self.state].fallback_state())
626 || self.automaton.has_any_array_item_transition(self.state)
627 {
628 classifier.turn_commas_on(idx);
629 } else {
630 classifier.turn_commas_off();
631 }
632 } else if self.automaton.has_transition_to_accepting(self.state) {
633 classifier.turn_colons_and_commas_on(idx);
634 } else {
635 classifier.turn_colons_off();
636 }
637
638 Ok(())
639 }
640
641 #[inline(always)]
644 fn transition_to(&mut self, target: State, opening: BracketType) {
645 let target_is_list = opening == BracketType::Square;
646
647 let fallback = self.automaton[self.state].fallback_state();
648 let is_fallback_accepting = self.automaton.is_accepting(fallback);
649 let searching_list = is_fallback_accepting || self.automaton.has_any_array_item_transition(self.state);
650
651 if target != self.state || target_is_list != self.is_list || searching_list {
653 debug!(
654 "push {}, goto {target}, is_list = {target_is_list}, array_count: {}",
655 self.state, self.array_count
656 );
657
658 self.stack.push(StackFrame {
659 depth: *self.depth,
660 state: self.state,
661 is_list: self.is_list,
662 array_count: self.array_count,
663 });
664 self.state = target;
665 }
666 }
667
668 fn find_preceding_colon(&self, idx: usize) -> Option<usize> {
670 if self.depth == Depth::ZERO {
671 None
672 } else {
673 let (char_idx, char) = self.input.seek_non_whitespace_backward(idx - 1)?;
674
675 (char == b':').then_some(char_idx)
676 }
677 }
678
679 #[inline(always)]
681 fn is_match(&self, idx: usize, member_name: &StringPattern) -> Result<bool, EngineError> {
682 let len = member_name.quoted().len();
683
684 let closing_quote_idx = match self.input.seek_backward(idx - 1, b'"') {
686 Some(x) => x,
687 None => return Err(EngineError::MalformedStringQuotes(idx - 1)),
688 };
689
690 if closing_quote_idx + 1 < len {
692 return Ok(false);
693 }
694
695 let start_idx = closing_quote_idx + 1 - len;
697 self.input
698 .is_member_match(start_idx, closing_quote_idx + 1, member_name)
699 .map_err(|x| x.into().into())
700 }
701
702 fn record_match_detected_at(&self, start_idx: usize, ty: NodeType) -> Result<(), EngineError> {
704 debug!("Reporting result somewhere after {start_idx} with node type {ty:?}");
705
706 let index = match ty {
707 NodeType::Complex(BracketType::Curly) => self.input.seek_forward(start_idx, [b'{']).e()?,
708 NodeType::Complex(BracketType::Square) => self.input.seek_forward(start_idx, [b'[']).e()?,
709 NodeType::Atomic => self.input.seek_non_whitespace_forward(start_idx).e()?,
710 }
711 .map(|x| x.0);
712
713 match index {
714 Some(idx) => self.recorder.record_match(idx, self.depth, ty.into()),
715 None => Err(EngineError::MissingItem()),
716 }
717 }
718
719 fn verify_subtree_closed(&self) -> Result<(), EngineError> {
721 if self.depth != Depth::ZERO {
722 Err(EngineError::MissingClosingCharacter())
723 } else {
724 Ok(())
725 }
726 }
727
728 fn current_node_bracket_type(&self) -> BracketType {
730 if self.is_list {
731 BracketType::Square
732 } else {
733 BracketType::Curly
734 }
735 }
736}
737
738#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
741struct StackFrame {
742 depth: u8,
743 state: State,
744 is_list: bool,
745 array_count: JsonUInt,
746}
747
748#[derive(Debug)]
749struct SmallStack {
750 contents: SmallVec<[StackFrame; 128]>,
751}
752
753impl SmallStack {
754 fn new() -> Self {
755 Self { contents: smallvec![] }
756 }
757
758 #[inline]
759 fn peek(&self) -> Option<StackFrame> {
760 self.contents.last().copied()
761 }
762
763 #[inline]
764 fn pop_if_at_or_below(&mut self, depth: u8) -> Option<StackFrame> {
765 if let Some(stack_frame) = self.peek() {
766 if depth <= stack_frame.depth {
767 return self.contents.pop();
768 }
769 }
770 None
771 }
772
773 #[inline]
774 fn push(&mut self, value: StackFrame) {
775 self.contents.push(value)
776 }
777}
778
779impl<'i, 'r, I, R, V> CanHeadSkip<'i, 'r, I, R, V> for Executor<'i, 'r, I, R, V>
780where
781 I: Input,
782 R: Recorder<I::Block<'i, BLOCK_SIZE>>,
783 V: Simd,
784 'i: 'r,
785{
786 fn run_on_subtree(
787 &mut self,
788 next_event: Structural,
789 state: State,
790 structural_classifier: V::StructuralClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
791 ) -> Result<ResumeState<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>, V, MaskType>, EngineError> {
792 let mut classifier = TailSkip::new(structural_classifier, self.simd);
793
794 self.state = state;
795 self.next_event = Some(next_event);
796
797 self.run_on_subtree(&mut classifier)?;
798 self.verify_subtree_closed()?;
799
800 Ok(ResumeState(classifier.stop()))
801 }
802
803 fn recorder(&mut self) -> &'r R {
804 self.recorder
805 }
806}
807
808#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
809enum NodeType {
810 Atomic,
811 Complex(BracketType),
812}
813
814impl From<NodeType> for MatchedNodeType {
815 #[inline(always)]
816 fn from(value: NodeType) -> Self {
817 match value {
818 NodeType::Atomic => Self::Atomic,
819 NodeType::Complex(_) => Self::Complex,
820 }
821 }
822}