rsonpath/engine/
main.rs

1//! Main implementation of a JSONPath query engine.
2//!
3//! Core engine for processing of JSONPath queries, based on the
4//! [Stackless Processing of Streamed Trees](https://hal.archives-ouvertes.fr/hal-03021960) paper.
5//! Entire query execution is done without recursion, with an explicit minimal stack, linearly through
6//! the JSON structure, which allows efficient SIMD operations and optimized register usage.
7//!
8
9/* ## Overview
10 *
11 * This is the most complex part of the engine. It glues together all the moving parts from the other modules
12 * and executes the main loop, iterating over the stream of [`Structural`] characters from the classifier.
13 *
14 * The main job is to do as little work as possible, and skip as much as possible. Head skipping is handled
15 * independently in [`HeadSkip`], but the engine needs to drive tail-skipping. This is done by inspecting the
16 * automaton at various points and skipping if we are:
17 * - in a rejecting state, in which case nothing really matters until end of the subtree;
18 * - in a unitary state after having already taken the only possible non-rejecting transition.
19 *
20 * The base work that has to be done at all times is reacting to opening and closing characters that dictate
21 * the tree structure. Member name before an opening character can be easily found with a quick look-behind (skipping whitespace).
22 * Most of the time atomic values don't matter - they're an empty subtree. They need to be handled in only two cases:
23 *   - we are in a state that can accept (has a transition to an accepting state) and need to know the locations
24 *     of atomics to possibly record them as matches;
25 *   - we are in a list and the concrete value of the current index matters for the automaton, in which
26 *     case we need to count atomics to advance the counter.
27 *
28 * In the first case, we require colons and commas to delimit atomics within objects and lists.
29 * The "special" case of the first element in a list is handled separately and rather ungracefully.
30 * In the second case, we only need commas to use as "milestones" to count our way through the list.
31 *
32 * ## Executor state
33 *
34 * The core driver of control is the current state of the query automaton.
35 * We also need an auxiliary piece of information on whether we are in a list, and what
36 * is the current list index. When entering a subtree and changing the state, it has to be preserved
37 * on a stack. The stack used is supposed to be "small", in the sense that we only push when any part of the
38 * state changes.
39 */
40
41#![allow(clippy::type_complexity)] // The private Classifier type is very complex, but we specifically macro it out.
42use crate::{
43    automaton::{error::CompilerError, Automaton, State},
44    classification::{
45        simd::{self, config_simd, dispatch_simd, Simd, SimdConfiguration},
46        structural::{BracketType, Structural, StructuralIterator},
47    },
48    debug,
49    depth::Depth,
50    engine::{
51        error::EngineError,
52        head_skipping::{CanHeadSkip, HeadSkip, ResumeState},
53        select_root_query,
54        tail_skipping::TailSkip,
55        Compiler, Engine, Input,
56    },
57    input::error::InputErrorConvertible,
58    result::{
59        approx_span::ApproxSpanRecorder, count::CountRecorder, index::IndexRecorder, nodes::NodesRecorder, Match,
60        MatchCount, MatchIndex, MatchSpan, MatchedNodeType, Recorder, Sink,
61    },
62    string_pattern::StringPattern,
63    FallibleIterator, MaskType, BLOCK_SIZE,
64};
65use rsonpath_syntax::{num::JsonUInt, JsonPathQuery};
66use smallvec::{smallvec, SmallVec};
67
68/// Main engine for a fixed JSONPath query.
69///
70/// The engine is stateless, meaning that it can be executed
71/// on any number of separate inputs, even on separate threads.
72#[derive(Clone, Debug)]
73pub struct MainEngine {
74    automaton: Automaton,
75    simd: SimdConfiguration,
76}
77
78static_assertions::assert_impl_all!(MainEngine: Send, Sync);
79
80impl MainEngine {
81    /// Get a reference to the underlying compiled query.
82    #[inline(always)]
83    #[must_use]
84    pub fn automaton(&self) -> &Automaton {
85        &self.automaton
86    }
87}
88
89impl Compiler for MainEngine {
90    type E = Self;
91
92    #[must_use = "compiling the query only creates an engine instance that should be used"]
93    #[inline(always)]
94    fn compile_query(query: &JsonPathQuery) -> Result<Self, CompilerError> {
95        let automaton = Automaton::new(query)?;
96        debug!("DFA:\n {}", automaton);
97        let simd = simd::configure();
98        log::info!("SIMD configuration:\n {}", simd);
99        Ok(Self { automaton, simd })
100    }
101
102    #[inline(always)]
103    fn from_compiled_query(automaton: Automaton) -> Self::E {
104        let simd = simd::configure();
105        log::info!("SIMD configuration:\n {}", simd);
106        Self { automaton, simd }
107    }
108}
109
110/* The engine has multiple entry methods depending on what type of result is required.
111 * This allows more efficient implementations for simpler results. For example,
112 * getting full Match objects is the most expensive, while a simple count is very fast in comparison.
113 *
114 * The logic for each entry point is analogous:
115 * - we separately handle an empty query, which immediately returns an empty result,
116 *   and a root-only query, which can be much faster in many cases.
117 * - we set up an appropriate Recorder impl for the result type.
118 * - we configure SIMD and run the Executor in its context.
119 */
120impl Engine for MainEngine {
121    #[inline]
122    fn count<I>(&self, input: &I) -> Result<MatchCount, EngineError>
123    where
124        I: Input,
125    {
126        if self.automaton.is_select_root_query() {
127            return select_root_query::count(input);
128        }
129        if self.automaton.is_empty_query() {
130            return Ok(0);
131        }
132
133        let recorder = CountRecorder::new();
134        config_simd!(self.simd => |simd| {
135            let executor = query_executor(&self.automaton, input, &recorder, simd);
136            executor.run()
137        })?;
138
139        Ok(recorder.into())
140    }
141
142    #[inline]
143    fn indices<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
144    where
145        I: Input,
146        S: Sink<MatchIndex>,
147    {
148        if self.automaton.is_select_root_query() {
149            return select_root_query::index(input, sink);
150        }
151        if self.automaton.is_empty_query() {
152            return Ok(());
153        }
154
155        let recorder = IndexRecorder::new(sink, input.leading_padding_len());
156        config_simd!(self.simd => |simd| {
157            let executor = query_executor(&self.automaton, input, &recorder, simd);
158            executor.run()
159        })?;
160
161        Ok(())
162    }
163
164    #[inline]
165    fn approximate_spans<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
166    where
167        I: Input,
168        S: Sink<MatchSpan>,
169    {
170        if self.automaton.is_select_root_query() {
171            return select_root_query::approx_span(input, sink);
172        }
173        if self.automaton.is_empty_query() {
174            return Ok(());
175        }
176
177        let recorder = ApproxSpanRecorder::new(sink, input.leading_padding_len());
178        config_simd!(self.simd => |simd| {
179            let executor = query_executor(&self.automaton, input, &recorder, simd);
180            executor.run()
181        })?;
182
183        Ok(())
184    }
185
186    #[inline]
187    fn matches<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
188    where
189        I: Input,
190        S: Sink<Match>,
191    {
192        if self.automaton.is_select_root_query() {
193            return select_root_query::match_(input, sink);
194        }
195        if self.automaton.is_empty_query() {
196            return Ok(());
197        }
198
199        let recorder = NodesRecorder::build_recorder(sink, input.leading_padding_len());
200        config_simd!(self.simd => |simd| {
201            let executor = query_executor(&self.automaton, input, &recorder, simd);
202            executor.run()
203        })?;
204
205        Ok(())
206    }
207}
208
209// This is a convenience macro to hide the enormous type of the classifier.
210// It expects generic types `I` (the Input implementation), `R` (the Recorder),
211// and `V` (the SIMD context).
212macro_rules! Classifier {
213    () => {
214        TailSkip<
215            'i,
216            I::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
217            V::QuotesClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
218            V::StructuralClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
219            V,
220            BLOCK_SIZE>
221    };
222}
223
224/// This is the heart of an Engine run that holds the entire execution state.
225struct Executor<'i, 'r, I, R, V> {
226    /// Current depth in the JSON tree.
227    depth: Depth,
228    /// Current automaton state.
229    state: State,
230    /// Lookahead of at most one Structural character.
231    next_event: Option<Structural>,
232    /// Whether the current JSON node is a list.
233    is_list: bool,
234    /// Index of the next element in the list, if is_list is true.
235    // FIXME: This and is_list can probably be merged into Option<JsonUInt> carrying all the information.
236    array_count: JsonUInt,
237    /// Execution stack.
238    stack: SmallStack,
239    /// Read-only access to the query automaton.
240    automaton: &'i Automaton,
241    /// Handle to the input.
242    input: &'i I,
243    /// Handle to the recorder.
244    recorder: &'r R,
245    /// Resolved SIMD context.
246    simd: V,
247}
248
249/// Initialize the [`Executor`] for the initial state of a query.
250fn query_executor<'i, 'r, I, R, V: Simd>(
251    automaton: &'i Automaton,
252    input: &'i I,
253    recorder: &'r R,
254    simd: V,
255) -> Executor<'i, 'r, I, R, V>
256where
257    I: Input,
258    R: Recorder<I::Block<'i, BLOCK_SIZE>>,
259{
260    Executor {
261        depth: Depth::ZERO,
262        state: automaton.initial_state(),
263        stack: SmallStack::new(),
264        automaton,
265        input,
266        recorder,
267        simd,
268        next_event: None,
269        is_list: false,
270        array_count: JsonUInt::ZERO,
271    }
272}
273
274impl<'i, 'r, I, R, V> Executor<'i, 'r, I, R, V>
275where
276    'i: 'r,
277    I: Input,
278    R: Recorder<I::Block<'i, BLOCK_SIZE>>,
279    V: Simd,
280{
281    fn run(mut self) -> Result<(), EngineError> {
282        // First we check if head-skipping is possible for a given query automaton.
283        // If yes, delegate the control to HeadSkip and give it full access to this Executor.
284        // Otherwise, we run our normal one-shot engine.
285        let mb_head_skip = HeadSkip::new(self.input, self.automaton, self.simd);
286
287        match mb_head_skip {
288            Some(head_skip) => head_skip.run_head_skipping(&mut self),
289            None => self.run_and_exit(),
290        }
291    }
292
293    /// One-shot run of the engine on whatever JSON tree starts at the current input.
294    /// As soon as the tree is closed, the engine exits.
295    fn run_and_exit(mut self) -> Result<(), EngineError> {
296        let iter = self.input.iter_blocks(self.recorder);
297        let quote_classifier = self.simd.classify_quoted_sequences(iter);
298        let structural_classifier = self.simd.classify_structural_characters(quote_classifier);
299        let mut classifier = TailSkip::new(structural_classifier, self.simd);
300
301        self.run_on_subtree(&mut classifier)?;
302
303        self.verify_subtree_closed()
304    }
305
306    /// This is _the_ main loop, the heart and the soul of the engine.
307    /// We loop through the document based on the `classifier`'s outputs and handle each event.
308    /// Once the perceived depth of the document goes to zero, this method terminates.
309    fn run_on_subtree(&mut self, classifier: &mut Classifier!()) -> Result<(), EngineError> {
310        dispatch_simd!(self.simd; self, classifier =>
311        fn<'i, 'r, I, R, V>(eng: &mut Executor<'i, 'r, I, R, V>, classifier: &mut Classifier!()) -> Result<(), EngineError>
312        where
313            'i: 'r,
314            I: Input,
315            R: Recorder<I::Block<'i, BLOCK_SIZE>>,
316            V: Simd
317        {
318            loop {
319                // Fetch the next element only if the lookahead is empty.
320                if eng.next_event.is_none() {
321                    eng.next_event = match classifier.next() {
322                        Ok(e) => e,
323                        Err(err) => return Err(EngineError::InputError(err)),
324                    };
325                }
326                if let Some(event) = eng.next_event.take() {
327                    debug!("====================");
328                    debug!("Event = {:?}", event);
329                    debug!("Depth = {:?}", eng.depth);
330                    debug!("Stack = {:?}", eng.stack);
331                    debug!("State = {:?}", eng.state);
332                    debug!("====================");
333
334                    match event {
335                        Structural::Colon(idx) => eng.handle_colon(classifier, idx)?,
336                        Structural::Comma(idx) => eng.handle_comma(classifier, idx)?,
337                        Structural::Opening(b, idx) => eng.handle_opening(classifier, b, idx)?,
338                        Structural::Closing(_, idx) => {
339                            eng.handle_closing(classifier, idx)?;
340
341                            if eng.depth == Depth::ZERO {
342                                break;
343                            }
344                        }
345                    }
346                } else {
347                    break;
348                }
349            }
350
351            Ok(())
352        })
353    }
354
355    /// Handle a colon at index `idx`.
356    /// This method only handles atomic values after the colon.
357    /// Objects and arrays are processed at their respective opening character.
358    #[inline(always)]
359    fn handle_colon(
360        &mut self,
361        #[allow(unused_variables)] classifier: &mut Classifier!(),
362        idx: usize,
363    ) -> Result<(), EngineError> {
364        debug!("Colon");
365
366        // Lookahead to see if the next character is an opening.
367        // If yes, the logic will be handled in handle_opening and we bail.
368        if let Some((_, c)) = self.input.seek_non_whitespace_forward(idx + 1).e()? {
369            if c == b'{' || c == b'[' {
370                return Ok(());
371            }
372        }
373
374        // Atomic values are only relevant if the automaton accepts.
375        // Look at accepting transitions and try to match them with the label.
376        let mut any_matched = false;
377
378        for (member_name, target) in self.automaton[self.state].member_transitions() {
379            if self.automaton.is_accepting(*target) && self.is_match(idx, member_name.as_ref())? {
380                self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
381                any_matched = true;
382                break;
383            }
384        }
385        // Alternatively, match consider the fallback transition if it accepts.
386        let fallback_state = self.automaton[self.state].fallback_state();
387        if !any_matched && self.automaton.is_accepting(fallback_state) {
388            self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
389        }
390
391        // Tail skipping.
392        // If we are in a unitary state and have matched a transition, we can skip the rest of the subtree,
393        // since member names are unique.
394        if any_matched && self.automaton.is_unitary(self.state) {
395            // We need to look ahead for some bookkeeping.
396            // 1. If the next event is closing then there's no reason to spin up the skipping machinery,
397            //    since it would exit immediately anyway.
398            // 2. If the next character is a comma then we need to notify the recorder.
399            // 3. Realistically, a colon should never happen. An opening is not interesting and will be skipped.
400            self.next_event = classifier.next()?;
401            match self.next_event {
402                None | Some(Structural::Closing(_, _)) => {
403                    return Ok(());
404                }
405                Some(Structural::Comma(idx)) => self.recorder.record_value_terminator(idx, self.depth)?,
406                Some(Structural::Colon(_) | Structural::Opening(_, _)) => (),
407            }
408            let bracket_type = self.current_node_bracket_type();
409            debug!("Skipping unique state from {bracket_type:?}");
410            let stop_at = classifier.skip(bracket_type)?;
411            // Skipping stops at the closing character *and consumes it*. We still need the main loop to properly
412            // handle a closing, so we set the lookahead to the correct character.
413            self.next_event = Some(Structural::Closing(bracket_type, stop_at));
414        }
415
416        Ok(())
417    }
418
419    /// Handle a comma at index `idx`.
420    /// This method only handles atomic values after the comma.
421    /// Objects and arrays are processed at their respective opening character.
422    #[inline(always)]
423    fn handle_comma(&mut self, _classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
424        debug!("Comma");
425
426        self.recorder.record_value_terminator(idx, self.depth)?;
427
428        if self.is_list {
429            // If the index increment exceeds the field's limit, give up.
430            if self.array_count.try_increment().is_err() {
431                return Ok(());
432            }
433
434            // Lookahead to see if the next character is an opening.
435            // If yes, the logic will be handled in handle_opening and we bail.
436            if let Some((_, c)) = self.input.seek_non_whitespace_forward(idx + 1).e()? {
437                if c == b'{' || c == b'[' {
438                    return Ok(());
439                }
440            }
441
442            // Check the fallback transition first since it's cheap, then check for the specific index.
443            let is_fallback_accepting = self.automaton.is_accepting(self.automaton[self.state].fallback_state());
444
445            if is_fallback_accepting
446                || self
447                    .automaton
448                    .has_array_index_transition_to_accepting(self.state, &self.array_count)
449            {
450                debug!("Accepting list item on comma.");
451                self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
452            }
453        }
454
455        Ok(())
456    }
457
458    /// Handle the opening of a subtree with given `bracket_type` at index `idx`.
459    #[inline(always)]
460    fn handle_opening(
461        &mut self,
462        classifier: &mut Classifier!(),
463        bracket_type: BracketType,
464        idx: usize,
465    ) -> Result<(), EngineError> {
466        debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",);
467
468        // Check all transitions relevant to the current subtree - array if in list, member if not.
469        let mut any_matched = false;
470        if self.is_list {
471            for trans in self.automaton[self.state].array_transitions() {
472                if trans.matches(self.array_count) {
473                    let target = trans.target_state();
474                    any_matched = true;
475                    self.transition_to(target, bracket_type);
476                    if self.automaton.is_accepting(target) {
477                        debug!("Accept {idx}");
478                        self.record_match_detected_at(idx, NodeType::Complex(bracket_type))?;
479                    }
480                    break;
481                }
482            }
483        } else {
484            let colon_idx = self.find_preceding_colon(idx);
485
486            for (member_name, target) in self.automaton[self.state].member_transitions() {
487                if let Some(colon_idx) = colon_idx {
488                    if self.is_match(colon_idx, member_name.as_ref())? {
489                        any_matched = true;
490                        self.transition_to(*target, bracket_type);
491                        if self.automaton.is_accepting(*target) {
492                            debug!("Accept {idx}");
493                            self.record_match_detected_at(colon_idx + 1, NodeType::Complex(bracket_type))?;
494                        }
495                        break;
496                    }
497                }
498            }
499        }
500
501        // If nothing matched trigger the fallback transition.
502        if !any_matched && self.depth != Depth::ZERO {
503            let fallback = self.automaton[self.state].fallback_state();
504            debug!("Falling back to {fallback}");
505
506            if self.automaton.is_rejecting(fallback) {
507                // Tail skipping. Skip the entire subtree. The skipping consumes the closing character.
508                // We still need to notify the recorder - in case the value being skipped was actually accepted.
509                let closing_idx = classifier.skip(bracket_type)?;
510                return self.recorder.record_value_terminator(closing_idx, self.depth);
511            } else {
512                self.transition_to(fallback, bracket_type);
513            }
514
515            if self.automaton.is_accepting(fallback) {
516                self.record_match_detected_at(idx, NodeType::Complex(bracket_type))?;
517            }
518        }
519
520        // At this point we will be actually digging into the subtree.
521        self.depth
522            .increment()
523            .map_err(|err| EngineError::DepthAboveLimit(idx, err))?;
524
525        self.is_list = bracket_type == BracketType::Square;
526        let mut needs_commas = false;
527
528        // If we're starting a list, there's a very hairy problem of accepting the first element in the list,
529        // if it is atomic. We process objects and arrays on their opening character, and atomics on their preceding comma.
530        // The first element doesn't have a preceding comma, so if it needs to be accepted we need to handle it now.
531        //
532        // Additionally, whether to enable commas or not depends on whether an item of the list can ever be accepted.
533        if self.is_list {
534            let fallback = self.automaton[self.state].fallback_state();
535            let is_fallback_accepting = self.automaton.is_accepting(fallback);
536
537            if is_fallback_accepting || self.automaton.has_any_array_item_transition(self.state) {
538                needs_commas = true;
539                self.array_count = JsonUInt::ZERO;
540                debug!("Initialized array count to {}", self.array_count);
541
542                let wants_first_item =
543                    is_fallback_accepting || self.automaton.has_first_array_index_transition_to_accepting(self.state);
544
545                if wants_first_item {
546                    let next = self.input.seek_non_whitespace_forward(idx + 1).e()?;
547
548                    // We only handle the match if it exists and is atomic. The possible cases
549                    // in a well-formed JSON for the next character are:
550                    // - '[', for an array value
551                    // - '{' for an object value
552                    // - ']' if the list was empty and has no values
553                    // - otherwise it's the first character of an atomic value.
554                    match next {
555                        Some((_, b'[' | b'{' | b']')) => (), // Complex value or empty list.
556                        Some((value_idx, _)) => {
557                            self.record_match_detected_at(value_idx, NodeType::Atomic)?;
558                        }
559                        _ => (),
560                    }
561                }
562            }
563        }
564
565        // Decide which structural characters need to be handled in this subtree.
566        if !self.is_list && self.automaton.has_transition_to_accepting(self.state) {
567            // When accepting values in an object we need colons for the member names,
568            // and commas to report where atomic values end (for the Recorder).
569            // This is the only case that needs colons.
570            classifier.turn_colons_and_commas_on(idx);
571        } else if needs_commas {
572            classifier.turn_colons_off();
573            classifier.turn_commas_on(idx);
574        } else {
575            classifier.turn_colons_and_commas_off();
576        }
577
578        Ok(())
579    }
580
581    /// Handle the closing of a subtree at index `idx`.
582    #[inline(always)]
583    fn handle_closing(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
584        debug!("Closing, decreasing depth and popping stack.");
585
586        self.depth
587            .decrement()
588            .map_err(|err| EngineError::DepthBelowZero(idx, err))?;
589        self.recorder.record_value_terminator(idx, self.depth)?;
590
591        // Restore the state from the stack if the transition was not a loop.
592        if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
593            self.state = stack_frame.state;
594            self.is_list = stack_frame.is_list;
595            self.array_count = stack_frame.array_count;
596
597            debug!("Restored array count to {}", self.array_count);
598
599            // We have taken a transition when entering the just-closed subtree. If the state is unitary
600            // we can just skip the rest of the current subtree.
601            if self.automaton.is_unitary(self.state) {
602                let bracket_type = self.current_node_bracket_type();
603                debug!("Skipping unique state from {bracket_type:?}");
604                let close_idx = classifier.skip(bracket_type)?;
605                // Skipping stops at the closing character *and consumes it*. We still need the main loop to properly
606                // handle a closing, so we set the lookahead to the correct character.
607                self.next_event = Some(Structural::Closing(bracket_type, close_idx));
608                return Ok(());
609            }
610        }
611
612        if self.is_list {
613            if self.automaton.is_accepting(self.automaton[self.state].fallback_state())
614                || self.automaton.has_any_array_item_transition(self.state)
615            {
616                classifier.turn_commas_on(idx);
617            } else {
618                classifier.turn_commas_off();
619            }
620        } else if self.automaton.has_transition_to_accepting(self.state) {
621            classifier.turn_colons_and_commas_on(idx);
622        } else {
623            classifier.turn_colons_off();
624        }
625
626        Ok(())
627    }
628
629    /// Trigger the transition to the `target` state into a new subtree
630    /// that opened with `opening`.
631    #[inline(always)]
632    fn transition_to(&mut self, target: State, opening: BracketType) {
633        let target_is_list = opening == BracketType::Square;
634
635        let fallback = self.automaton[self.state].fallback_state();
636        let is_fallback_accepting = self.automaton.is_accepting(fallback);
637        let searching_list = is_fallback_accepting || self.automaton.has_any_array_item_transition(self.state);
638
639        // To keep the stack small, we only push if the state only changes in any meaningful way.
640        if target != self.state || target_is_list != self.is_list || searching_list {
641            debug!(
642                "push {}, goto {target}, is_list = {target_is_list}, array_count: {}",
643                self.state, self.array_count
644            );
645
646            self.stack.push(StackFrame {
647                depth: *self.depth,
648                state: self.state,
649                is_list: self.is_list,
650                array_count: self.array_count,
651            });
652            self.state = target;
653        }
654    }
655
656    /// Find the preceding non-whitespace character and return its index if it's a colon.
657    fn find_preceding_colon(&self, idx: usize) -> Option<usize> {
658        if self.depth == Depth::ZERO {
659            None
660        } else {
661            let (char_idx, char) = self.input.seek_non_whitespace_backward(idx - 1)?;
662
663            (char == b':').then_some(char_idx)
664        }
665    }
666
667    /// Check if the label ended with a colon at index `idx` matches the `member_name`.
668    #[inline(always)]
669    fn is_match(&self, idx: usize, member_name: &StringPattern) -> Result<bool, EngineError> {
670        let len = member_name.quoted().len();
671
672        // The colon can be preceded by whitespace before the actual label.
673        let closing_quote_idx = match self.input.seek_backward(idx - 1, b'"') {
674            Some(x) => x,
675            None => return Err(EngineError::MalformedStringQuotes(idx - 1)),
676        };
677
678        // First check if the length matches.
679        if closing_quote_idx + 1 < len {
680            return Ok(false);
681        }
682
683        // Do the expensive memcmp.
684        let start_idx = closing_quote_idx + 1 - len;
685        self.input
686            .is_member_match(start_idx, closing_quote_idx + 1, member_name)
687            .map_err(|x| x.into().into())
688    }
689
690    /// Pass information to the Recorder that we found a match of type `ty` at `start_idx`.
691    fn record_match_detected_at(&mut self, start_idx: usize, ty: NodeType) -> Result<(), EngineError> {
692        debug!("Reporting result somewhere after {start_idx} with node type {ty:?}");
693
694        let index = match ty {
695            NodeType::Complex(BracketType::Curly) => self.input.seek_forward(start_idx, [b'{']).e()?,
696            NodeType::Complex(BracketType::Square) => self.input.seek_forward(start_idx, [b'[']).e()?,
697            NodeType::Atomic => self.input.seek_non_whitespace_forward(start_idx).e()?,
698        }
699        .map(|x| x.0);
700
701        match index {
702            Some(idx) => self.recorder.record_match(idx, self.depth, ty.into()),
703            None => Err(EngineError::MissingItem()),
704        }
705    }
706
707    /// Verify that we have reached zero depth, raise an error if not.
708    fn verify_subtree_closed(&self) -> Result<(), EngineError> {
709        if self.depth != Depth::ZERO {
710            Err(EngineError::MissingClosingCharacter())
711        } else {
712            Ok(())
713        }
714    }
715
716    /// Get the [`BracketType`] of current subtree.
717    fn current_node_bracket_type(&self) -> BracketType {
718        if self.is_list {
719            BracketType::Square
720        } else {
721            BracketType::Curly
722        }
723    }
724}
725
726/// A single frame on the [`Executor`]'s stack enabling restoration of the entire
727/// execution state to before a subtree opening.
728#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
729struct StackFrame {
730    depth: u8,
731    state: State,
732    is_list: bool,
733    array_count: JsonUInt,
734}
735
736#[derive(Debug)]
737struct SmallStack {
738    contents: SmallVec<[StackFrame; 128]>,
739}
740
741impl SmallStack {
742    fn new() -> Self {
743        Self { contents: smallvec![] }
744    }
745
746    #[inline]
747    fn peek(&mut self) -> Option<StackFrame> {
748        self.contents.last().copied()
749    }
750
751    #[inline]
752    fn pop_if_at_or_below(&mut self, depth: u8) -> Option<StackFrame> {
753        if let Some(stack_frame) = self.peek() {
754            if depth <= stack_frame.depth {
755                return self.contents.pop();
756            }
757        }
758        None
759    }
760
761    #[inline]
762    fn push(&mut self, value: StackFrame) {
763        self.contents.push(value)
764    }
765}
766
767impl<'i, 'r, I, R, V> CanHeadSkip<'i, 'r, I, R, V> for Executor<'i, 'r, I, R, V>
768where
769    I: Input,
770    R: Recorder<I::Block<'i, BLOCK_SIZE>>,
771    V: Simd,
772    'i: 'r,
773{
774    fn run_on_subtree(
775        &mut self,
776        next_event: Structural,
777        state: State,
778        structural_classifier: V::StructuralClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
779    ) -> Result<ResumeState<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>, V, MaskType>, EngineError> {
780        let mut classifier = TailSkip::new(structural_classifier, self.simd);
781
782        self.state = state;
783        self.next_event = Some(next_event);
784
785        self.run_on_subtree(&mut classifier)?;
786        self.verify_subtree_closed()?;
787
788        Ok(ResumeState(classifier.stop()))
789    }
790
791    fn recorder(&mut self) -> &'r R {
792        self.recorder
793    }
794}
795
796#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
797enum NodeType {
798    Atomic,
799    Complex(BracketType),
800}
801
802impl From<NodeType> for MatchedNodeType {
803    #[inline(always)]
804    fn from(value: NodeType) -> Self {
805        match value {
806            NodeType::Atomic => Self::Atomic,
807            NodeType::Complex(_) => Self::Complex,
808        }
809    }
810}