1#![allow(clippy::type_complexity)] use crate::{
43 automaton::{error::CompilerError, Automaton, State},
44 classification::{
45 simd::{self, config_simd, dispatch_simd, Simd, SimdConfiguration},
46 structural::{BracketType, Structural, StructuralIterator},
47 },
48 debug,
49 depth::Depth,
50 engine::{
51 error::EngineError,
52 head_skipping::{CanHeadSkip, HeadSkip, ResumeState},
53 select_root_query,
54 tail_skipping::TailSkip,
55 Compiler, Engine, Input,
56 },
57 input::error::InputErrorConvertible,
58 result::{
59 approx_span::ApproxSpanRecorder, count::CountRecorder, index::IndexRecorder, nodes::NodesRecorder, Match,
60 MatchCount, MatchIndex, MatchSpan, MatchedNodeType, Recorder, Sink,
61 },
62 string_pattern::StringPattern,
63 FallibleIterator, MaskType, BLOCK_SIZE,
64};
65use rsonpath_syntax::{num::JsonUInt, JsonPathQuery};
66use smallvec::{smallvec, SmallVec};
67
68#[derive(Clone, Debug)]
73pub struct MainEngine {
74 automaton: Automaton,
75 simd: SimdConfiguration,
76}
77
78static_assertions::assert_impl_all!(MainEngine: Send, Sync);
79
80impl MainEngine {
81 #[inline(always)]
83 #[must_use]
84 pub fn automaton(&self) -> &Automaton {
85 &self.automaton
86 }
87}
88
89impl Compiler for MainEngine {
90 type E = Self;
91
92 #[must_use = "compiling the query only creates an engine instance that should be used"]
93 #[inline(always)]
94 fn compile_query(query: &JsonPathQuery) -> Result<Self, CompilerError> {
95 let automaton = Automaton::new(query)?;
96 debug!("DFA:\n {}", automaton);
97 let simd = simd::configure();
98 log::info!("SIMD configuration:\n {}", simd);
99 Ok(Self { automaton, simd })
100 }
101
102 #[inline(always)]
103 fn from_compiled_query(automaton: Automaton) -> Self::E {
104 let simd = simd::configure();
105 log::info!("SIMD configuration:\n {}", simd);
106 Self { automaton, simd }
107 }
108}
109
110impl Engine for MainEngine {
121 #[inline]
122 fn count<I>(&self, input: &I) -> Result<MatchCount, EngineError>
123 where
124 I: Input,
125 {
126 if self.automaton.is_select_root_query() {
127 return select_root_query::count(input);
128 }
129 if self.automaton.is_empty_query() {
130 return Ok(0);
131 }
132
133 let recorder = CountRecorder::new();
134 config_simd!(self.simd => |simd| {
135 let executor = query_executor(&self.automaton, input, &recorder, simd);
136 executor.run()
137 })?;
138
139 Ok(recorder.into())
140 }
141
142 #[inline]
143 fn indices<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
144 where
145 I: Input,
146 S: Sink<MatchIndex>,
147 {
148 if self.automaton.is_select_root_query() {
149 return select_root_query::index(input, sink);
150 }
151 if self.automaton.is_empty_query() {
152 return Ok(());
153 }
154
155 let recorder = IndexRecorder::new(sink, input.leading_padding_len());
156 config_simd!(self.simd => |simd| {
157 let executor = query_executor(&self.automaton, input, &recorder, simd);
158 executor.run()
159 })?;
160
161 Ok(())
162 }
163
164 #[inline]
165 fn approximate_spans<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
166 where
167 I: Input,
168 S: Sink<MatchSpan>,
169 {
170 if self.automaton.is_select_root_query() {
171 return select_root_query::approx_span(input, sink);
172 }
173 if self.automaton.is_empty_query() {
174 return Ok(());
175 }
176
177 let recorder = ApproxSpanRecorder::new(sink, input.leading_padding_len());
178 config_simd!(self.simd => |simd| {
179 let executor = query_executor(&self.automaton, input, &recorder, simd);
180 executor.run()
181 })?;
182
183 Ok(())
184 }
185
186 #[inline]
187 fn matches<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
188 where
189 I: Input,
190 S: Sink<Match>,
191 {
192 if self.automaton.is_select_root_query() {
193 return select_root_query::match_(input, sink);
194 }
195 if self.automaton.is_empty_query() {
196 return Ok(());
197 }
198
199 let recorder = NodesRecorder::build_recorder(sink, input.leading_padding_len());
200 config_simd!(self.simd => |simd| {
201 let executor = query_executor(&self.automaton, input, &recorder, simd);
202 executor.run()
203 })?;
204
205 Ok(())
206 }
207}
208
209macro_rules! Classifier {
213 () => {
214 TailSkip<
215 'i,
216 I::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
217 V::QuotesClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
218 V::StructuralClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
219 V,
220 BLOCK_SIZE>
221 };
222}
223
224struct Executor<'i, 'r, I, R, V> {
226 depth: Depth,
228 state: State,
230 next_event: Option<Structural>,
232 is_list: bool,
234 array_count: JsonUInt,
237 stack: SmallStack,
239 automaton: &'i Automaton,
241 input: &'i I,
243 recorder: &'r R,
245 simd: V,
247}
248
249fn query_executor<'i, 'r, I, R, V: Simd>(
251 automaton: &'i Automaton,
252 input: &'i I,
253 recorder: &'r R,
254 simd: V,
255) -> Executor<'i, 'r, I, R, V>
256where
257 I: Input,
258 R: Recorder<I::Block<'i, BLOCK_SIZE>>,
259{
260 Executor {
261 depth: Depth::ZERO,
262 state: automaton.initial_state(),
263 stack: SmallStack::new(),
264 automaton,
265 input,
266 recorder,
267 simd,
268 next_event: None,
269 is_list: false,
270 array_count: JsonUInt::ZERO,
271 }
272}
273
274impl<'i, 'r, I, R, V> Executor<'i, 'r, I, R, V>
275where
276 'i: 'r,
277 I: Input,
278 R: Recorder<I::Block<'i, BLOCK_SIZE>>,
279 V: Simd,
280{
281 fn run(mut self) -> Result<(), EngineError> {
282 let mb_head_skip = HeadSkip::new(self.input, self.automaton, self.simd);
286
287 match mb_head_skip {
288 Some(head_skip) => head_skip.run_head_skipping(&mut self),
289 None => self.run_and_exit(),
290 }
291 }
292
293 fn run_and_exit(mut self) -> Result<(), EngineError> {
296 let iter = self.input.iter_blocks(self.recorder);
297 let quote_classifier = self.simd.classify_quoted_sequences(iter);
298 let structural_classifier = self.simd.classify_structural_characters(quote_classifier);
299 let mut classifier = TailSkip::new(structural_classifier, self.simd);
300
301 self.run_on_subtree(&mut classifier)?;
302
303 self.verify_subtree_closed()
304 }
305
306 fn run_on_subtree(&mut self, classifier: &mut Classifier!()) -> Result<(), EngineError> {
310 dispatch_simd!(self.simd; self, classifier =>
311 fn<'i, 'r, I, R, V>(eng: &mut Executor<'i, 'r, I, R, V>, classifier: &mut Classifier!()) -> Result<(), EngineError>
312 where
313 'i: 'r,
314 I: Input,
315 R: Recorder<I::Block<'i, BLOCK_SIZE>>,
316 V: Simd
317 {
318 loop {
319 if eng.next_event.is_none() {
321 eng.next_event = match classifier.next() {
322 Ok(e) => e,
323 Err(err) => return Err(EngineError::InputError(err)),
324 };
325 }
326 if let Some(event) = eng.next_event.take() {
327 debug!("====================");
328 debug!("Event = {:?}", event);
329 debug!("Depth = {:?}", eng.depth);
330 debug!("Stack = {:?}", eng.stack);
331 debug!("State = {:?}", eng.state);
332 debug!("====================");
333
334 match event {
335 Structural::Colon(idx) => eng.handle_colon(classifier, idx)?,
336 Structural::Comma(idx) => eng.handle_comma(classifier, idx)?,
337 Structural::Opening(b, idx) => eng.handle_opening(classifier, b, idx)?,
338 Structural::Closing(_, idx) => {
339 eng.handle_closing(classifier, idx)?;
340
341 if eng.depth == Depth::ZERO {
342 break;
343 }
344 }
345 }
346 } else {
347 break;
348 }
349 }
350
351 Ok(())
352 })
353 }
354
355 #[inline(always)]
359 fn handle_colon(
360 &mut self,
361 #[allow(unused_variables)] classifier: &mut Classifier!(),
362 idx: usize,
363 ) -> Result<(), EngineError> {
364 debug!("Colon");
365
366 if let Some((_, c)) = self.input.seek_non_whitespace_forward(idx + 1).e()? {
369 if c == b'{' || c == b'[' {
370 return Ok(());
371 }
372 }
373
374 let mut any_matched = false;
377
378 for (member_name, target) in self.automaton[self.state].member_transitions() {
379 if self.automaton.is_accepting(*target) && self.is_match(idx, member_name.as_ref())? {
380 self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
381 any_matched = true;
382 break;
383 }
384 }
385 let fallback_state = self.automaton[self.state].fallback_state();
387 if !any_matched && self.automaton.is_accepting(fallback_state) {
388 self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
389 }
390
391 if any_matched && self.automaton.is_unitary(self.state) {
395 self.next_event = classifier.next()?;
401 match self.next_event {
402 None | Some(Structural::Closing(_, _)) => {
403 return Ok(());
404 }
405 Some(Structural::Comma(idx)) => self.recorder.record_value_terminator(idx, self.depth)?,
406 Some(Structural::Colon(_) | Structural::Opening(_, _)) => (),
407 }
408 let bracket_type = self.current_node_bracket_type();
409 debug!("Skipping unique state from {bracket_type:?}");
410 let stop_at = classifier.skip(bracket_type)?;
411 self.next_event = Some(Structural::Closing(bracket_type, stop_at));
414 }
415
416 Ok(())
417 }
418
419 #[inline(always)]
423 fn handle_comma(&mut self, _classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
424 debug!("Comma");
425
426 self.recorder.record_value_terminator(idx, self.depth)?;
427
428 if self.is_list {
429 if self.array_count.try_increment().is_err() {
431 return Ok(());
432 }
433
434 if let Some((_, c)) = self.input.seek_non_whitespace_forward(idx + 1).e()? {
437 if c == b'{' || c == b'[' {
438 return Ok(());
439 }
440 }
441
442 let is_fallback_accepting = self.automaton.is_accepting(self.automaton[self.state].fallback_state());
444
445 if is_fallback_accepting
446 || self
447 .automaton
448 .has_array_index_transition_to_accepting(self.state, &self.array_count)
449 {
450 debug!("Accepting list item on comma.");
451 self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
452 }
453 }
454
455 Ok(())
456 }
457
458 #[inline(always)]
460 fn handle_opening(
461 &mut self,
462 classifier: &mut Classifier!(),
463 bracket_type: BracketType,
464 idx: usize,
465 ) -> Result<(), EngineError> {
466 debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",);
467
468 let mut any_matched = false;
470 if self.is_list {
471 for trans in self.automaton[self.state].array_transitions() {
472 if trans.matches(self.array_count) {
473 let target = trans.target_state();
474 any_matched = true;
475 self.transition_to(target, bracket_type);
476 if self.automaton.is_accepting(target) {
477 debug!("Accept {idx}");
478 self.record_match_detected_at(idx, NodeType::Complex(bracket_type))?;
479 }
480 break;
481 }
482 }
483 } else {
484 let colon_idx = self.find_preceding_colon(idx);
485
486 for (member_name, target) in self.automaton[self.state].member_transitions() {
487 if let Some(colon_idx) = colon_idx {
488 if self.is_match(colon_idx, member_name.as_ref())? {
489 any_matched = true;
490 self.transition_to(*target, bracket_type);
491 if self.automaton.is_accepting(*target) {
492 debug!("Accept {idx}");
493 self.record_match_detected_at(colon_idx + 1, NodeType::Complex(bracket_type))?;
494 }
495 break;
496 }
497 }
498 }
499 }
500
501 if !any_matched && self.depth != Depth::ZERO {
503 let fallback = self.automaton[self.state].fallback_state();
504 debug!("Falling back to {fallback}");
505
506 if self.automaton.is_rejecting(fallback) {
507 let closing_idx = classifier.skip(bracket_type)?;
510 return self.recorder.record_value_terminator(closing_idx, self.depth);
511 } else {
512 self.transition_to(fallback, bracket_type);
513 }
514
515 if self.automaton.is_accepting(fallback) {
516 self.record_match_detected_at(idx, NodeType::Complex(bracket_type))?;
517 }
518 }
519
520 self.depth
522 .increment()
523 .map_err(|err| EngineError::DepthAboveLimit(idx, err))?;
524
525 self.is_list = bracket_type == BracketType::Square;
526 let mut needs_commas = false;
527
528 if self.is_list {
534 let fallback = self.automaton[self.state].fallback_state();
535 let is_fallback_accepting = self.automaton.is_accepting(fallback);
536
537 if is_fallback_accepting || self.automaton.has_any_array_item_transition(self.state) {
538 needs_commas = true;
539 self.array_count = JsonUInt::ZERO;
540 debug!("Initialized array count to {}", self.array_count);
541
542 let wants_first_item =
543 is_fallback_accepting || self.automaton.has_first_array_index_transition_to_accepting(self.state);
544
545 if wants_first_item {
546 let next = self.input.seek_non_whitespace_forward(idx + 1).e()?;
547
548 match next {
555 Some((_, b'[' | b'{' | b']')) => (), Some((value_idx, _)) => {
557 self.record_match_detected_at(value_idx, NodeType::Atomic)?;
558 }
559 _ => (),
560 }
561 }
562 }
563 }
564
565 if !self.is_list && self.automaton.has_transition_to_accepting(self.state) {
567 classifier.turn_colons_and_commas_on(idx);
571 } else if needs_commas {
572 classifier.turn_colons_off();
573 classifier.turn_commas_on(idx);
574 } else {
575 classifier.turn_colons_and_commas_off();
576 }
577
578 Ok(())
579 }
580
581 #[inline(always)]
583 fn handle_closing(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
584 debug!("Closing, decreasing depth and popping stack.");
585
586 self.depth
587 .decrement()
588 .map_err(|err| EngineError::DepthBelowZero(idx, err))?;
589 self.recorder.record_value_terminator(idx, self.depth)?;
590
591 if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
593 self.state = stack_frame.state;
594 self.is_list = stack_frame.is_list;
595 self.array_count = stack_frame.array_count;
596
597 debug!("Restored array count to {}", self.array_count);
598
599 if self.automaton.is_unitary(self.state) {
602 let bracket_type = self.current_node_bracket_type();
603 debug!("Skipping unique state from {bracket_type:?}");
604 let close_idx = classifier.skip(bracket_type)?;
605 self.next_event = Some(Structural::Closing(bracket_type, close_idx));
608 return Ok(());
609 }
610 }
611
612 if self.is_list {
613 if self.automaton.is_accepting(self.automaton[self.state].fallback_state())
614 || self.automaton.has_any_array_item_transition(self.state)
615 {
616 classifier.turn_commas_on(idx);
617 } else {
618 classifier.turn_commas_off();
619 }
620 } else if self.automaton.has_transition_to_accepting(self.state) {
621 classifier.turn_colons_and_commas_on(idx);
622 } else {
623 classifier.turn_colons_off();
624 }
625
626 Ok(())
627 }
628
629 #[inline(always)]
632 fn transition_to(&mut self, target: State, opening: BracketType) {
633 let target_is_list = opening == BracketType::Square;
634
635 let fallback = self.automaton[self.state].fallback_state();
636 let is_fallback_accepting = self.automaton.is_accepting(fallback);
637 let searching_list = is_fallback_accepting || self.automaton.has_any_array_item_transition(self.state);
638
639 if target != self.state || target_is_list != self.is_list || searching_list {
641 debug!(
642 "push {}, goto {target}, is_list = {target_is_list}, array_count: {}",
643 self.state, self.array_count
644 );
645
646 self.stack.push(StackFrame {
647 depth: *self.depth,
648 state: self.state,
649 is_list: self.is_list,
650 array_count: self.array_count,
651 });
652 self.state = target;
653 }
654 }
655
656 fn find_preceding_colon(&self, idx: usize) -> Option<usize> {
658 if self.depth == Depth::ZERO {
659 None
660 } else {
661 let (char_idx, char) = self.input.seek_non_whitespace_backward(idx - 1)?;
662
663 (char == b':').then_some(char_idx)
664 }
665 }
666
667 #[inline(always)]
669 fn is_match(&self, idx: usize, member_name: &StringPattern) -> Result<bool, EngineError> {
670 let len = member_name.quoted().len();
671
672 let closing_quote_idx = match self.input.seek_backward(idx - 1, b'"') {
674 Some(x) => x,
675 None => return Err(EngineError::MalformedStringQuotes(idx - 1)),
676 };
677
678 if closing_quote_idx + 1 < len {
680 return Ok(false);
681 }
682
683 let start_idx = closing_quote_idx + 1 - len;
685 self.input
686 .is_member_match(start_idx, closing_quote_idx + 1, member_name)
687 .map_err(|x| x.into().into())
688 }
689
690 fn record_match_detected_at(&mut self, start_idx: usize, ty: NodeType) -> Result<(), EngineError> {
692 debug!("Reporting result somewhere after {start_idx} with node type {ty:?}");
693
694 let index = match ty {
695 NodeType::Complex(BracketType::Curly) => self.input.seek_forward(start_idx, [b'{']).e()?,
696 NodeType::Complex(BracketType::Square) => self.input.seek_forward(start_idx, [b'[']).e()?,
697 NodeType::Atomic => self.input.seek_non_whitespace_forward(start_idx).e()?,
698 }
699 .map(|x| x.0);
700
701 match index {
702 Some(idx) => self.recorder.record_match(idx, self.depth, ty.into()),
703 None => Err(EngineError::MissingItem()),
704 }
705 }
706
707 fn verify_subtree_closed(&self) -> Result<(), EngineError> {
709 if self.depth != Depth::ZERO {
710 Err(EngineError::MissingClosingCharacter())
711 } else {
712 Ok(())
713 }
714 }
715
716 fn current_node_bracket_type(&self) -> BracketType {
718 if self.is_list {
719 BracketType::Square
720 } else {
721 BracketType::Curly
722 }
723 }
724}
725
726#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
729struct StackFrame {
730 depth: u8,
731 state: State,
732 is_list: bool,
733 array_count: JsonUInt,
734}
735
736#[derive(Debug)]
737struct SmallStack {
738 contents: SmallVec<[StackFrame; 128]>,
739}
740
741impl SmallStack {
742 fn new() -> Self {
743 Self { contents: smallvec![] }
744 }
745
746 #[inline]
747 fn peek(&mut self) -> Option<StackFrame> {
748 self.contents.last().copied()
749 }
750
751 #[inline]
752 fn pop_if_at_or_below(&mut self, depth: u8) -> Option<StackFrame> {
753 if let Some(stack_frame) = self.peek() {
754 if depth <= stack_frame.depth {
755 return self.contents.pop();
756 }
757 }
758 None
759 }
760
761 #[inline]
762 fn push(&mut self, value: StackFrame) {
763 self.contents.push(value)
764 }
765}
766
767impl<'i, 'r, I, R, V> CanHeadSkip<'i, 'r, I, R, V> for Executor<'i, 'r, I, R, V>
768where
769 I: Input,
770 R: Recorder<I::Block<'i, BLOCK_SIZE>>,
771 V: Simd,
772 'i: 'r,
773{
774 fn run_on_subtree(
775 &mut self,
776 next_event: Structural,
777 state: State,
778 structural_classifier: V::StructuralClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
779 ) -> Result<ResumeState<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>, V, MaskType>, EngineError> {
780 let mut classifier = TailSkip::new(structural_classifier, self.simd);
781
782 self.state = state;
783 self.next_event = Some(next_event);
784
785 self.run_on_subtree(&mut classifier)?;
786 self.verify_subtree_closed()?;
787
788 Ok(ResumeState(classifier.stop()))
789 }
790
791 fn recorder(&mut self) -> &'r R {
792 self.recorder
793 }
794}
795
796#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
797enum NodeType {
798 Atomic,
799 Complex(BracketType),
800}
801
802impl From<NodeType> for MatchedNodeType {
803 #[inline(always)]
804 fn from(value: NodeType) -> Self {
805 match value {
806 NodeType::Atomic => Self::Atomic,
807 NodeType::Complex(_) => Self::Complex,
808 }
809 }
810}