Skip to main content

rsonpath/engine/
main.rs

1//! Main implementation of a JSONPath query engine.
2//!
3//! Core engine for processing of JSONPath queries, based on the
4//! [Stackless Processing of Streamed Trees](https://hal.archives-ouvertes.fr/hal-03021960) paper.
5//! Entire query execution is done without recursion, with an explicit minimal stack, linearly through
6//! the JSON structure, which allows efficient SIMD operations and optimized register usage.
7//!
8
9/* ## Overview
10 *
11 * This is the most complex part of the engine. It glues together all the moving parts from the other modules
12 * and executes the main loop, iterating over the stream of [`Structural`] characters from the classifier.
13 *
14 * The main job is to do as little work as possible, and skip as much as possible. Head skipping is handled
15 * independently in [`HeadSkip`], but the engine needs to drive tail-skipping. This is done by inspecting the
16 * automaton at various points and skipping if we are:
17 * - in a rejecting state, in which case nothing really matters until end of the subtree;
18 * - in a unitary state after having already taken the only possible non-rejecting transition.
19 *
20 * The base work that has to be done at all times is reacting to opening and closing characters that dictate
21 * the tree structure. Member name before an opening character can be easily found with a quick look-behind (skipping whitespace).
22 * Most of the time atomic values don't matter - they're an empty subtree. They need to be handled in only two cases:
23 *   - we are in a state that can accept (has a transition to an accepting state) and need to know the locations
24 *     of atomics to possibly record them as matches;
25 *   - we are in a list and the concrete value of the current index matters for the automaton, in which
26 *     case we need to count atomics to advance the counter.
27 *
28 * In the first case, we require colons and commas to delimit atomics within objects and lists.
29 * The "special" case of the first element in a list is handled separately and rather ungracefully.
30 * In the second case, we only need commas to use as "milestones" to count our way through the list.
31 *
32 * ## Executor state
33 *
34 * The core driver of control is the current state of the query automaton.
35 * We also need an auxiliary piece of information on whether we are in a list, and what
36 * is the current list index. When entering a subtree and changing the state, it has to be preserved
37 * on a stack. The stack used is supposed to be "small", in the sense that we only push when any part of the
38 * state changes.
39 */
40
41#![allow(
42    clippy::type_complexity,
43    reason = "The private Classifier type is very complex, but we specifically macro it out."
44)]
45use crate::{
46    automaton::{error::CompilerError, Automaton, State},
47    classification::{
48        simd::{self, config_simd, dispatch_simd, Simd, SimdConfiguration},
49        structural::{BracketType, Structural, StructuralIterator as _},
50    },
51    debug,
52    depth::Depth,
53    engine::{
54        error::EngineError,
55        head_skipping::{CanHeadSkip, HeadSkip, ResumeState},
56        select_root_query,
57        tail_skipping::TailSkip,
58        Compiler, Engine, Input,
59    },
60    input::error::InputErrorConvertible as _,
61    result::{
62        approx_span::ApproxSpanRecorder, count::CountRecorder, index::IndexRecorder, nodes::NodesRecorder, Match,
63        MatchCount, MatchIndex, MatchSpan, MatchedNodeType, Recorder, Sink,
64    },
65    string_pattern::StringPattern,
66    FallibleIterator as _, MaskType, BLOCK_SIZE,
67};
68use rsonpath_syntax::{num::JsonUInt, JsonPathQuery};
69use smallvec::{smallvec, SmallVec};
70
71/// Main engine for a fixed JSONPath query.
72///
73/// The engine is stateless, meaning that it can be executed
74/// on any number of separate inputs, even on separate threads.
75#[derive(Clone, Debug)]
76pub struct MainEngine {
77    automaton: Automaton,
78    simd: SimdConfiguration,
79}
80
81static_assertions::assert_impl_all!(MainEngine: Send, Sync);
82
83impl MainEngine {
84    /// Get a reference to the underlying compiled query.
85    #[inline(always)]
86    #[must_use]
87    pub fn automaton(&self) -> &Automaton {
88        &self.automaton
89    }
90}
91
92impl Compiler for MainEngine {
93    type E = Self;
94
95    #[inline(always)]
96    fn compile_query(query: &JsonPathQuery) -> Result<Self, CompilerError> {
97        let automaton = Automaton::new(query)?;
98        debug!("DFA:\n {}", automaton);
99        let simd = simd::configure();
100        log::info!("SIMD configuration:\n {simd}");
101        Ok(Self { automaton, simd })
102    }
103
104    #[inline(always)]
105    fn from_compiled_query(automaton: Automaton) -> Self::E {
106        let simd = simd::configure();
107        log::info!("SIMD configuration:\n {simd}");
108        Self { automaton, simd }
109    }
110}
111
112/* The engine has multiple entry methods depending on what type of result is required.
113 * This allows more efficient implementations for simpler results. For example,
114 * getting full Match objects is the most expensive, while a simple count is very fast in comparison.
115 *
116 * The logic for each entry point is analogous:
117 * - we separately handle an empty query, which immediately returns an empty result,
118 *   and a root-only query, which can be much faster in many cases.
119 * - we set up an appropriate Recorder impl for the result type.
120 * - we configure SIMD and run the Executor in its context.
121 */
122impl Engine for MainEngine {
123    #[inline]
124    fn count<I>(&self, input: &I) -> Result<MatchCount, EngineError>
125    where
126        I: Input,
127    {
128        if self.automaton.is_select_root_query() {
129            return select_root_query::count(input);
130        }
131        if self.automaton.is_empty_query() {
132            return Ok(0);
133        }
134
135        let recorder = CountRecorder::new();
136        config_simd!(self.simd => |simd| {
137            let executor = query_executor(&self.automaton, input, &recorder, simd);
138            executor.run()
139        })?;
140
141        Ok(recorder.into())
142    }
143
144    #[inline]
145    fn indices<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
146    where
147        I: Input,
148        S: Sink<MatchIndex>,
149    {
150        if self.automaton.is_select_root_query() {
151            return select_root_query::index(input, sink);
152        }
153        if self.automaton.is_empty_query() {
154            return Ok(());
155        }
156
157        let recorder = IndexRecorder::new(sink, input.leading_padding_len());
158        config_simd!(self.simd => |simd| {
159            let executor = query_executor(&self.automaton, input, &recorder, simd);
160            executor.run()
161        })?;
162
163        Ok(())
164    }
165
166    #[inline]
167    fn approximate_spans<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
168    where
169        I: Input,
170        S: Sink<MatchSpan>,
171    {
172        if self.automaton.is_select_root_query() {
173            return select_root_query::approx_span(input, sink);
174        }
175        if self.automaton.is_empty_query() {
176            return Ok(());
177        }
178
179        let recorder = ApproxSpanRecorder::new(sink, input.leading_padding_len());
180        config_simd!(self.simd => |simd| {
181            let executor = query_executor(&self.automaton, input, &recorder, simd);
182            executor.run()
183        })?;
184
185        Ok(())
186    }
187
188    #[inline]
189    fn matches<I, S>(&self, input: &I, sink: &mut S) -> Result<(), EngineError>
190    where
191        I: Input,
192        S: Sink<Match>,
193    {
194        if self.automaton.is_select_root_query() {
195            return select_root_query::match_(input, sink);
196        }
197        if self.automaton.is_empty_query() {
198            return Ok(());
199        }
200
201        let recorder = NodesRecorder::build_recorder(sink, input.leading_padding_len());
202        config_simd!(self.simd => |simd| {
203            let executor = query_executor(&self.automaton, input, &recorder, simd);
204            executor.run()
205        })?;
206
207        Ok(())
208    }
209}
210
211// This is a convenience macro to hide the enormous type of the classifier.
212// It expects generic types `I` (the Input implementation), `R` (the Recorder),
213// and `V` (the SIMD context).
214macro_rules! Classifier {
215    () => {
216        TailSkip<
217            'i,
218            I::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
219            V::QuotesClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
220            V::StructuralClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
221            V,
222            BLOCK_SIZE>
223    };
224}
225
226/// This is the heart of an Engine run that holds the entire execution state.
227struct Executor<'i, 'r, I, R, V> {
228    /// Current depth in the JSON tree.
229    depth: Depth,
230    /// Current automaton state.
231    state: State,
232    /// Lookahead of at most one Structural character.
233    next_event: Option<Structural>,
234    /// Whether the current JSON node is a list.
235    is_list: bool,
236    /// Index of the next element in the list, if is_list is true.
237    // FIXME: This and is_list can probably be merged into Option<JsonUInt> carrying all the information.
238    array_count: JsonUInt,
239    /// Execution stack.
240    stack: SmallStack,
241    /// Read-only access to the query automaton.
242    automaton: &'i Automaton,
243    /// Handle to the input.
244    input: &'i I,
245    /// Handle to the recorder.
246    recorder: &'r R,
247    /// Resolved SIMD context.
248    simd: V,
249}
250
251/// Initialize the [`Executor`] for the initial state of a query.
252fn query_executor<'i, 'r, I, R, V: Simd>(
253    automaton: &'i Automaton,
254    input: &'i I,
255    recorder: &'r R,
256    simd: V,
257) -> Executor<'i, 'r, I, R, V>
258where
259    I: Input,
260    R: Recorder<I::Block<'i, BLOCK_SIZE>>,
261{
262    Executor {
263        depth: Depth::ZERO,
264        state: automaton.initial_state(),
265        stack: SmallStack::new(),
266        automaton,
267        input,
268        recorder,
269        simd,
270        next_event: None,
271        is_list: false,
272        array_count: JsonUInt::ZERO,
273    }
274}
275
276impl<'i, 'r, I, R, V> Executor<'i, 'r, I, R, V>
277where
278    'i: 'r,
279    I: Input,
280    R: Recorder<I::Block<'i, BLOCK_SIZE>>,
281    V: Simd,
282{
283    fn run(mut self) -> Result<(), EngineError> {
284        // First we check if head-skipping is possible for a given query automaton.
285        // If yes, delegate the control to HeadSkip and give it full access to this Executor.
286        // Otherwise, we run our normal one-shot engine.
287        let mb_head_skip = HeadSkip::new(self.input, self.automaton, self.simd);
288
289        match mb_head_skip {
290            Some(head_skip) => head_skip.run_head_skipping(&mut self),
291            None => self.run_and_exit(),
292        }
293    }
294
295    /// One-shot run of the engine on whatever JSON tree starts at the current input.
296    /// As soon as the tree is closed, the engine exits.
297    fn run_and_exit(mut self) -> Result<(), EngineError> {
298        let iter = self.input.iter_blocks(self.recorder);
299        let quote_classifier = self.simd.classify_quoted_sequences(iter);
300        let structural_classifier = self.simd.classify_structural_characters(quote_classifier);
301        let mut classifier = TailSkip::new(structural_classifier, self.simd);
302
303        self.run_on_subtree(&mut classifier)?;
304
305        self.verify_subtree_closed()
306    }
307
308    /// This is _the_ main loop, the heart and the soul of the engine.
309    /// We loop through the document based on the `classifier`'s outputs and handle each event.
310    /// Once the perceived depth of the document goes to zero, this method terminates.
311    fn run_on_subtree(&mut self, classifier: &mut Classifier!()) -> Result<(), EngineError> {
312        dispatch_simd!(self.simd; self, classifier =>
313        fn<'i, 'r, I, R, V>(eng: &mut Executor<'i, 'r, I, R, V>, classifier: &mut Classifier!()) -> Result<(), EngineError>
314        where
315            'i: 'r,
316            I: Input,
317            R: Recorder<I::Block<'i, BLOCK_SIZE>>,
318            V: Simd
319        {
320            loop {
321                // Fetch the next element only if the lookahead is empty.
322                if eng.next_event.is_none() {
323                    eng.next_event = match classifier.next() {
324                        Ok(e) => e,
325                        Err(err) => return Err(EngineError::InputError(err)),
326                    };
327                }
328                if let Some(event) = eng.next_event.take() {
329                    debug!("====================");
330                    debug!("Event = {:?}", event);
331                    debug!("Depth = {:?}", eng.depth);
332                    debug!("Stack = {:?}", eng.stack);
333                    debug!("State = {:?}", eng.state);
334                    debug!("====================");
335
336                    match event {
337                        Structural::Colon(idx) => eng.handle_colon(classifier, idx)?,
338                        Structural::Comma(idx) => eng.handle_comma(classifier, idx)?,
339                        Structural::Opening(b, idx) => eng.handle_opening(classifier, b, idx)?,
340                        Structural::Closing(_, idx) => {
341                            eng.handle_closing(classifier, idx)?;
342
343                            if eng.depth == Depth::ZERO {
344                                break;
345                            }
346                        }
347                    }
348                } else {
349                    break;
350                }
351            }
352
353            Ok(())
354        })
355    }
356
357    /// Handle a colon at index `idx`.
358    /// This method only handles atomic values after the colon.
359    /// Objects and arrays are processed at their respective opening character.
360    #[inline(always)]
361    fn handle_colon(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
362        debug!("Colon");
363
364        // Lookahead to see if the next character is an opening.
365        // If yes, the logic will be handled in handle_opening and we bail.
366        if let Some((_, c)) = self.input.seek_non_whitespace_forward(idx + 1).e()? {
367            if c == b'{' || c == b'[' {
368                return Ok(());
369            }
370        }
371
372        // Atomic values are only relevant if the automaton accepts.
373        // Look at accepting transitions and try to match them with the label.
374        let mut any_matched = false;
375
376        for (member_name, target) in self.automaton[self.state].member_transitions() {
377            if self.automaton.is_accepting(*target) && self.is_match(idx, member_name.as_ref())? {
378                self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
379                any_matched = true;
380                break;
381            }
382        }
383        // Alternatively, match consider the fallback transition if it accepts.
384        let fallback_state = self.automaton[self.state].fallback_state();
385        if !any_matched && self.automaton.is_accepting(fallback_state) {
386            self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
387        }
388
389        // Tail skipping.
390        // If we are in a unitary state and have matched a transition, we can skip the rest of the subtree,
391        // since member names are unique.
392        if any_matched && self.automaton.is_unitary(self.state) {
393            // We need to look ahead for some bookkeeping.
394            // 1. If the next event is closing then there's no reason to spin up the skipping machinery,
395            //    since it would exit immediately anyway.
396            // 2. If the next character is a comma then we need to notify the recorder.
397            // 3. Realistically, a colon should never happen. An opening is not interesting and will be skipped.
398            self.next_event = classifier.next()?;
399            match self.next_event {
400                None | Some(Structural::Closing(_, _)) => {
401                    return Ok(());
402                }
403                Some(Structural::Comma(idx)) => self.recorder.record_value_terminator(idx, self.depth)?,
404                Some(Structural::Colon(_) | Structural::Opening(_, _)) => (),
405            }
406            let bracket_type = self.current_node_bracket_type();
407            debug!("Skipping unique state from {bracket_type:?}");
408            let stop_at = classifier.skip(bracket_type)?;
409            // Skipping stops at the closing character *and consumes it*. We still need the main loop to properly
410            // handle a closing, so we set the lookahead to the correct character.
411            self.next_event = Some(Structural::Closing(bracket_type, stop_at));
412        }
413
414        Ok(())
415    }
416
417    /// Handle a comma at index `idx`.
418    /// This method only handles atomic values after the comma.
419    /// Objects and arrays are processed at their respective opening character.
420    #[inline(always)]
421    fn handle_comma(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
422        debug!("Comma");
423
424        self.recorder.record_value_terminator(idx, self.depth)?;
425
426        if self.is_list {
427            // If the index increment exceeds the field's limit, give up.
428            if self.array_count.try_increment().is_err() {
429                return Ok(());
430            }
431
432            // Lookahead to see if the next character is an opening.
433            // If yes, the logic will be handled in handle_opening and we bail.
434            if let Some((_, c)) = self.input.seek_non_whitespace_forward(idx + 1).e()? {
435                if c == b'{' || c == b'[' {
436                    return Ok(());
437                }
438            }
439
440            // Check the fallback transition first since it's cheap, then check for the specific index.
441            let is_fallback_accepting = self.automaton.is_accepting(self.automaton[self.state].fallback_state());
442
443            if is_fallback_accepting
444                || self
445                    .automaton
446                    .has_array_index_transition_to_accepting(self.state, &self.array_count)
447            {
448                debug!("Accepting list item on comma.");
449                self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
450
451                if self.automaton.is_unitary(self.state) {
452                    self.next_event = classifier.next()?;
453                    match self.next_event {
454                        None | Some(Structural::Closing(_, _)) => {
455                            return Ok(());
456                        }
457                        Some(Structural::Comma(idx)) => self.recorder.record_value_terminator(idx, self.depth)?,
458                        Some(Structural::Colon(_) | Structural::Opening(_, _)) => (),
459                    }
460                    debug!("Skipping unique state from {:?}", BracketType::Square);
461                    let stop_at = classifier.skip(BracketType::Square)?;
462                    self.next_event = Some(Structural::Closing(BracketType::Square, stop_at));
463                }
464            }
465        }
466
467        Ok(())
468    }
469
470    /// Handle the opening of a subtree with given `bracket_type` at index `idx`.
471    #[inline(always)]
472    fn handle_opening(
473        &mut self,
474        classifier: &mut Classifier!(),
475        bracket_type: BracketType,
476        idx: usize,
477    ) -> Result<(), EngineError> {
478        debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",);
479
480        // Check all transitions relevant to the current subtree - array if in list, member if not.
481        let mut any_matched = false;
482        if self.is_list {
483            for trans in self.automaton[self.state].array_transitions() {
484                if trans.matches(self.array_count) {
485                    let target = trans.target_state();
486                    any_matched = true;
487                    self.transition_to(target, bracket_type);
488                    if self.automaton.is_accepting(target) {
489                        debug!("Accept {idx}");
490                        self.record_match_detected_at(idx, NodeType::Complex(bracket_type))?;
491                    }
492                    break;
493                }
494            }
495        } else {
496            let colon_idx = self.find_preceding_colon(idx);
497
498            for (member_name, target) in self.automaton[self.state].member_transitions() {
499                if let Some(colon_idx) = colon_idx {
500                    if self.is_match(colon_idx, member_name.as_ref())? {
501                        any_matched = true;
502                        self.transition_to(*target, bracket_type);
503                        if self.automaton.is_accepting(*target) {
504                            debug!("Accept {idx}");
505                            self.record_match_detected_at(colon_idx + 1, NodeType::Complex(bracket_type))?;
506                        }
507                        break;
508                    }
509                }
510            }
511        }
512
513        // If nothing matched trigger the fallback transition.
514        if !any_matched && self.depth != Depth::ZERO {
515            let fallback = self.automaton[self.state].fallback_state();
516            debug!("Falling back to {fallback}");
517
518            if self.automaton.is_rejecting(fallback) {
519                // Tail skipping. Skip the entire subtree. The skipping consumes the closing character.
520                // We still need to notify the recorder - in case the value being skipped was actually accepted.
521                let closing_idx = classifier.skip(bracket_type)?;
522                return self.recorder.record_value_terminator(closing_idx, self.depth);
523            } else {
524                self.transition_to(fallback, bracket_type);
525            }
526
527            if self.automaton.is_accepting(fallback) {
528                self.record_match_detected_at(idx, NodeType::Complex(bracket_type))?;
529            }
530        }
531
532        // At this point we will be actually digging into the subtree.
533        self.depth
534            .increment()
535            .map_err(|err| EngineError::DepthAboveLimit(idx, err))?;
536
537        self.is_list = bracket_type == BracketType::Square;
538        let mut needs_commas = false;
539
540        // If we're starting a list, there's a very hairy problem of accepting the first element in the list,
541        // if it is atomic. We process objects and arrays on their opening character, and atomics on their preceding comma.
542        // The first element doesn't have a preceding comma, so if it needs to be accepted we need to handle it now.
543        //
544        // Additionally, whether to enable commas or not depends on whether an item of the list can ever be accepted.
545        if self.is_list {
546            let fallback = self.automaton[self.state].fallback_state();
547            let is_fallback_accepting = self.automaton.is_accepting(fallback);
548
549            if is_fallback_accepting || self.automaton.has_any_array_item_transition(self.state) {
550                needs_commas = true;
551                self.array_count = JsonUInt::ZERO;
552                debug!("Initialized array count to {}", self.array_count);
553
554                let wants_first_item =
555                    is_fallback_accepting || self.automaton.has_first_array_index_transition_to_accepting(self.state);
556
557                if wants_first_item {
558                    let next = self.input.seek_non_whitespace_forward(idx + 1).e()?;
559
560                    // We only handle the match if it exists and is atomic. The possible cases
561                    // in a well-formed JSON for the next character are:
562                    // - '[', for an array value
563                    // - '{' for an object value
564                    // - ']' if the list was empty and has no values
565                    // - otherwise it's the first character of an atomic value.
566                    match next {
567                        Some((_, b'[' | b'{' | b']')) => (), // Complex value or empty list.
568                        Some((value_idx, _)) => {
569                            self.record_match_detected_at(value_idx, NodeType::Atomic)?;
570                        }
571                        _ => (),
572                    }
573                }
574            }
575        }
576
577        // Decide which structural characters need to be handled in this subtree.
578        if !self.is_list && self.automaton.has_transition_to_accepting(self.state) {
579            // When accepting values in an object we need colons for the member names,
580            // and commas to report where atomic values end (for the Recorder).
581            // This is the only case that needs colons.
582            classifier.turn_colons_and_commas_on(idx);
583        } else if needs_commas {
584            classifier.turn_colons_off();
585            classifier.turn_commas_on(idx);
586        } else {
587            classifier.turn_colons_and_commas_off();
588        }
589
590        Ok(())
591    }
592
593    /// Handle the closing of a subtree at index `idx`.
594    #[inline(always)]
595    fn handle_closing(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
596        debug!("Closing, decreasing depth and popping stack.");
597
598        self.depth
599            .decrement()
600            .map_err(|err| EngineError::DepthBelowZero(idx, err))?;
601        self.recorder.record_value_terminator(idx, self.depth)?;
602
603        // Restore the state from the stack if the transition was not a loop.
604        if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
605            self.state = stack_frame.state;
606            self.is_list = stack_frame.is_list;
607            self.array_count = stack_frame.array_count;
608
609            debug!("Restored array count to {}", self.array_count);
610
611            // We have taken a transition when entering the just-closed subtree. If the state is unitary
612            // we can just skip the rest of the current subtree.
613            if self.automaton.is_unitary(self.state) {
614                let bracket_type = self.current_node_bracket_type();
615                debug!("Skipping unique state from {bracket_type:?}");
616                let close_idx = classifier.skip(bracket_type)?;
617                // Skipping stops at the closing character *and consumes it*. We still need the main loop to properly
618                // handle a closing, so we set the lookahead to the correct character.
619                self.next_event = Some(Structural::Closing(bracket_type, close_idx));
620                return Ok(());
621            }
622        }
623
624        if self.is_list {
625            if self.automaton.is_accepting(self.automaton[self.state].fallback_state())
626                || self.automaton.has_any_array_item_transition(self.state)
627            {
628                classifier.turn_commas_on(idx);
629            } else {
630                classifier.turn_commas_off();
631            }
632        } else if self.automaton.has_transition_to_accepting(self.state) {
633            classifier.turn_colons_and_commas_on(idx);
634        } else {
635            classifier.turn_colons_off();
636        }
637
638        Ok(())
639    }
640
641    /// Trigger the transition to the `target` state into a new subtree
642    /// that opened with `opening`.
643    #[inline(always)]
644    fn transition_to(&mut self, target: State, opening: BracketType) {
645        let target_is_list = opening == BracketType::Square;
646
647        let fallback = self.automaton[self.state].fallback_state();
648        let is_fallback_accepting = self.automaton.is_accepting(fallback);
649        let searching_list = is_fallback_accepting || self.automaton.has_any_array_item_transition(self.state);
650
651        // To keep the stack small, we only push if the state only changes in any meaningful way.
652        if target != self.state || target_is_list != self.is_list || searching_list {
653            debug!(
654                "push {}, goto {target}, is_list = {target_is_list}, array_count: {}",
655                self.state, self.array_count
656            );
657
658            self.stack.push(StackFrame {
659                depth: *self.depth,
660                state: self.state,
661                is_list: self.is_list,
662                array_count: self.array_count,
663            });
664            self.state = target;
665        }
666    }
667
668    /// Find the preceding non-whitespace character and return its index if it's a colon.
669    fn find_preceding_colon(&self, idx: usize) -> Option<usize> {
670        if self.depth == Depth::ZERO {
671            None
672        } else {
673            let (char_idx, char) = self.input.seek_non_whitespace_backward(idx - 1)?;
674
675            (char == b':').then_some(char_idx)
676        }
677    }
678
679    /// Check if the label ended with a colon at index `idx` matches the `member_name`.
680    #[inline(always)]
681    fn is_match(&self, idx: usize, member_name: &StringPattern) -> Result<bool, EngineError> {
682        let len = member_name.quoted().len();
683
684        // The colon can be preceded by whitespace before the actual label.
685        let closing_quote_idx = match self.input.seek_backward(idx - 1, b'"') {
686            Some(x) => x,
687            None => return Err(EngineError::MalformedStringQuotes(idx - 1)),
688        };
689
690        // First check if the length matches.
691        if closing_quote_idx + 1 < len {
692            return Ok(false);
693        }
694
695        // Do the expensive memcmp.
696        let start_idx = closing_quote_idx + 1 - len;
697        self.input
698            .is_member_match(start_idx, closing_quote_idx + 1, member_name)
699            .map_err(|x| x.into().into())
700    }
701
702    /// Pass information to the Recorder that we found a match of type `ty` at `start_idx`.
703    fn record_match_detected_at(&self, start_idx: usize, ty: NodeType) -> Result<(), EngineError> {
704        debug!("Reporting result somewhere after {start_idx} with node type {ty:?}");
705
706        let index = match ty {
707            NodeType::Complex(BracketType::Curly) => self.input.seek_forward(start_idx, [b'{']).e()?,
708            NodeType::Complex(BracketType::Square) => self.input.seek_forward(start_idx, [b'[']).e()?,
709            NodeType::Atomic => self.input.seek_non_whitespace_forward(start_idx).e()?,
710        }
711        .map(|x| x.0);
712
713        match index {
714            Some(idx) => self.recorder.record_match(idx, self.depth, ty.into()),
715            None => Err(EngineError::MissingItem()),
716        }
717    }
718
719    /// Verify that we have reached zero depth, raise an error if not.
720    fn verify_subtree_closed(&self) -> Result<(), EngineError> {
721        if self.depth != Depth::ZERO {
722            Err(EngineError::MissingClosingCharacter())
723        } else {
724            Ok(())
725        }
726    }
727
728    /// Get the [`BracketType`] of current subtree.
729    fn current_node_bracket_type(&self) -> BracketType {
730        if self.is_list {
731            BracketType::Square
732        } else {
733            BracketType::Curly
734        }
735    }
736}
737
738/// A single frame on the [`Executor`]'s stack enabling restoration of the entire
739/// execution state to before a subtree opening.
740#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
741struct StackFrame {
742    depth: u8,
743    state: State,
744    is_list: bool,
745    array_count: JsonUInt,
746}
747
748#[derive(Debug)]
749struct SmallStack {
750    contents: SmallVec<[StackFrame; 128]>,
751}
752
753impl SmallStack {
754    fn new() -> Self {
755        Self { contents: smallvec![] }
756    }
757
758    #[inline]
759    fn peek(&self) -> Option<StackFrame> {
760        self.contents.last().copied()
761    }
762
763    #[inline]
764    fn pop_if_at_or_below(&mut self, depth: u8) -> Option<StackFrame> {
765        if let Some(stack_frame) = self.peek() {
766            if depth <= stack_frame.depth {
767                return self.contents.pop();
768            }
769        }
770        None
771    }
772
773    #[inline]
774    fn push(&mut self, value: StackFrame) {
775        self.contents.push(value)
776    }
777}
778
779impl<'i, 'r, I, R, V> CanHeadSkip<'i, 'r, I, R, V> for Executor<'i, 'r, I, R, V>
780where
781    I: Input,
782    R: Recorder<I::Block<'i, BLOCK_SIZE>>,
783    V: Simd,
784    'i: 'r,
785{
786    fn run_on_subtree(
787        &mut self,
788        next_event: Structural,
789        state: State,
790        structural_classifier: V::StructuralClassifier<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>>,
791    ) -> Result<ResumeState<'i, I::BlockIterator<'i, 'r, R, BLOCK_SIZE>, V, MaskType>, EngineError> {
792        let mut classifier = TailSkip::new(structural_classifier, self.simd);
793
794        self.state = state;
795        self.next_event = Some(next_event);
796
797        self.run_on_subtree(&mut classifier)?;
798        self.verify_subtree_closed()?;
799
800        Ok(ResumeState(classifier.stop()))
801    }
802
803    fn recorder(&mut self) -> &'r R {
804        self.recorder
805    }
806}
807
808#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
809enum NodeType {
810    Atomic,
811    Complex(BracketType),
812}
813
814impl From<NodeType> for MatchedNodeType {
815    #[inline(always)]
816    fn from(value: NodeType) -> Self {
817        match value {
818            NodeType::Atomic => Self::Atomic,
819            NodeType::Complex(_) => Self::Complex,
820        }
821    }
822}