rsonpath/
automaton.rs

1//! Automaton representations of a JSONPath query.
2mod array_transition_set;
3pub mod error;
4mod minimizer;
5mod nfa;
6mod small_set;
7mod state;
8
9pub use state::{State, StateAttributes};
10
11use crate::{automaton::error::CompilerError, debug, string_pattern::StringPattern};
12use nfa::NondeterministicAutomaton;
13use rsonpath_syntax::{num::JsonUInt, JsonPathQuery};
14use smallvec::SmallVec;
15use std::{fmt::Display, ops::Index, sync::Arc};
16
17/// A minimal, deterministic automaton representing a JSONPath query.
18#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
19#[derive(Clone, Debug, PartialEq, Eq)]
20pub struct Automaton {
21    states: Vec<StateTable>,
22}
23
24/// Transition when a JSON member name matches a [`StringPattern`].
25pub type MemberTransition = (Arc<StringPattern>, State);
26
27/// Transition on elements of an array with indices specified by either a single index
28/// or a simple slice expression.
29#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
30#[derive(Clone, Debug, PartialEq, Eq)]
31pub struct ArrayTransition {
32    label: ArrayTransitionLabel,
33    target: State,
34}
35
36/// Represent the distinct methods of moving on a match between states.
37#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
38#[derive(Debug, Copy, PartialEq, Clone, Eq)]
39pub(super) enum ArrayTransitionLabel {
40    /// Transition on the n-th element of an array, with n specified by a [`JsonUInt`].
41    Index(JsonUInt),
42    /// Transition on elements of array matched by a slice expression - bounds and a step.
43    Slice(SimpleSlice),
44}
45
46/// A transition table of a single [`State`] of an [`Automaton`].
47///
48/// Contains transitions triggered by matching member names or array indices, and a fallback transition
49/// triggered when none of the labelled transitions match.
50#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
51#[derive(Debug, Clone)]
52pub struct StateTable {
53    attributes: StateAttributes,
54    member_transitions: SmallVec<[MemberTransition; 2]>,
55    array_transitions: SmallVec<[ArrayTransition; 2]>,
56    fallback_state: State,
57}
58
59#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
60#[derive(Debug, Copy, PartialEq, Clone, Eq)]
61pub(crate) struct SimpleSlice {
62    start: JsonUInt,
63    end: Option<JsonUInt>,
64    step: JsonUInt,
65}
66
67impl Default for StateTable {
68    #[inline]
69    fn default() -> Self {
70        Self {
71            attributes: StateAttributes::default(),
72            member_transitions: SmallVec::default(),
73            array_transitions: SmallVec::default(),
74            fallback_state: State(0),
75        }
76    }
77}
78
79impl PartialEq for StateTable {
80    #[inline]
81    fn eq(&self, other: &Self) -> bool {
82        return self.fallback_state == other.fallback_state
83            && set_eq(&self.array_transitions, &other.array_transitions)
84            && set_eq(&self.member_transitions, &other.member_transitions)
85            && self.attributes == other.attributes;
86
87        #[inline(always)]
88        fn set_eq<T: Eq, A: smallvec::Array<Item = T>>(left: &SmallVec<A>, right: &SmallVec<A>) -> bool {
89            left.len() == right.len()
90                && left.iter().all(|x| right.contains(x))
91                && right.iter().all(|x| left.contains(x))
92        }
93    }
94}
95
96impl Eq for StateTable {}
97
98impl Index<State> for Automaton {
99    type Output = StateTable;
100
101    #[inline(always)]
102    fn index(&self, index: State) -> &Self::Output {
103        &self.states[index.0 as usize]
104    }
105}
106
107impl ArrayTransition {
108    pub(crate) fn new(label: ArrayTransitionLabel, target: State) -> Self {
109        Self { label, target }
110    }
111
112    #[inline(always)]
113    pub(crate) fn target_state(&self) -> State {
114        self.target
115    }
116
117    #[inline(always)]
118    pub(crate) fn matches(&self, index: JsonUInt) -> bool {
119        self.label.matches(index)
120    }
121}
122
123impl ArrayTransitionLabel {
124    pub(crate) fn matches(&self, index: JsonUInt) -> bool {
125        match self {
126            Self::Index(i) => index.eq(i),
127            Self::Slice(s) => s.contains(index),
128        }
129    }
130
131    fn matches_at_most_once(&self) -> bool {
132        match self {
133            Self::Index(_) => true,
134            Self::Slice(slice) => {
135                slice.step == JsonUInt::ZERO && slice.end.map_or(false, |end| slice.start.as_u64() + 1 >= end.as_u64())
136            }
137        }
138    }
139}
140
141impl From<JsonUInt> for ArrayTransitionLabel {
142    #[must_use]
143    #[inline(always)]
144    fn from(index: JsonUInt) -> Self {
145        Self::Index(index)
146    }
147}
148
149impl From<SimpleSlice> for ArrayTransitionLabel {
150    #[must_use]
151    #[inline(always)]
152    fn from(slice: SimpleSlice) -> Self {
153        Self::Slice(slice)
154    }
155}
156
157impl Automaton {
158    /// Convert a [`JsonPathQuery`] into a minimal deterministic automaton.
159    ///
160    /// # Errors
161    /// - [`CompilerError::QueryTooComplex`] raised if the query is too complex
162    ///   and the automaton size was exceeded.
163    /// - [`CompilerError::NotSupported`] raised if the query contains elements
164    ///   not yet supported by the compiler.
165    #[inline]
166    pub fn new(query: &JsonPathQuery) -> Result<Self, CompilerError> {
167        let nfa = NondeterministicAutomaton::new(query)?;
168        debug!("NFA: {}", nfa);
169        Self::minimize(nfa)
170    }
171
172    /// Returns whether this automaton represents the select-root JSONPath query ('$').
173    ///
174    /// # Examples
175    /// ```rust
176    /// # use rsonpath::automaton::*;
177    /// let query = rsonpath_syntax::parse("$").unwrap();
178    /// let automaton = Automaton::new(&query).unwrap();
179    ///
180    /// assert!(automaton.is_select_root_query());
181    /// ```
182    ///
183    /// ```rust
184    /// # use rsonpath::automaton::*;
185    /// let query = rsonpath_syntax::parse("$.a").unwrap();
186    /// let automaton = Automaton::new(&query).unwrap();
187    ///
188    /// assert!(!automaton.is_select_root_query());
189    /// ```
190    #[must_use]
191    #[inline(always)]
192    pub fn is_select_root_query(&self) -> bool {
193        self.states.len() == 2
194            && self.states[1].array_transitions.is_empty()
195            && self.states[1].member_transitions.is_empty()
196            && self.states[1].fallback_state == State(0)
197            && self.states[1].attributes.is_accepting()
198    }
199
200    /// Returns whether this automaton represents an empty JSONPath query that cannot accept anything.
201    ///
202    /// A query like this can be created by, for example, putting a trivially false filter
203    /// or an empty slice into the query.
204    ///
205    /// # Examples
206    /// ```rust
207    /// # use rsonpath::automaton::*;
208    /// let query = rsonpath_syntax::parse("$[::0]").unwrap();
209    /// let automaton = Automaton::new(&query).unwrap();
210    ///
211    /// assert!(automaton.is_empty_query());
212    /// ```
213    ///
214    /// ```rust
215    /// # use rsonpath::automaton::*;
216    /// let query = rsonpath_syntax::parse("$").unwrap();
217    /// let automaton = Automaton::new(&query).unwrap();
218    ///
219    /// assert!(!automaton.is_empty_query());
220    /// ```
221    #[must_use]
222    #[inline(always)]
223    pub fn is_empty_query(&self) -> bool {
224        self.states.len() == 2
225            && self.states[1].array_transitions.is_empty()
226            && self.states[1].member_transitions.is_empty()
227            && self.states[1].fallback_state == State(0)
228            && !self.states[1].attributes.is_accepting()
229    }
230
231    /// Returns the rejecting state of the automaton.
232    ///
233    /// The state is defined as the unique state from which there
234    /// exists no accepting run. If the query automaton reaches this state,
235    /// the current subtree is guaranteed to have no matches.
236    #[must_use]
237    #[inline(always)]
238    #[allow(clippy::unused_self)] /* This is for stability. If the implementation changes so that
239                                   * this is not always a 0 we don't want to have to change callsites.
240                                   */
241    pub fn rejecting_state(&self) -> State {
242        State(0)
243    }
244
245    /// Returns the initial state of the automaton.
246    ///
247    /// Query execution should start from this state.
248    #[must_use]
249    #[inline(always)]
250    #[allow(clippy::unused_self)] /* This is for stability. If the implementation changes so that
251                                   * this is not always a 1 we don't want to have to change callsites.
252                                   */
253    pub fn initial_state(&self) -> State {
254        State(1)
255    }
256
257    /// Returns whether the given state is accepting.
258    ///
259    /// # Example
260    /// ```rust
261    /// # use rsonpath::automaton::*;
262    /// let query = rsonpath_syntax::parse("$.a").unwrap();
263    /// let automaton = Automaton::new(&query).unwrap();
264    /// let state_2 = automaton[automaton.initial_state()].member_transitions()[0].1;
265    ///
266    /// assert!(automaton.is_accepting(state_2));
267    /// ```
268    #[must_use]
269    #[inline(always)]
270    pub fn is_accepting(&self, state: State) -> bool {
271        self[state].attributes.is_accepting()
272    }
273
274    /// Returns whether the given state transitions to any list.
275    ///
276    /// # Example
277    /// ```rust
278    /// # use rsonpath::automaton::*;
279    /// let query = rsonpath_syntax::parse("$[2]").unwrap();
280    /// let automaton = Automaton::new(&query).unwrap();
281    /// let state = automaton.initial_state();
282    ///
283    /// assert!(automaton.has_any_array_item_transition(state));
284    /// ```
285    #[must_use]
286    #[inline(always)]
287    pub fn has_any_array_item_transition(&self, state: State) -> bool {
288        self[state].attributes.has_array_transition()
289    }
290
291    /// Returns whether the given state is accepting the first item in a list.
292    ///
293    /// # Example
294    /// ```rust
295    /// # use rsonpath::automaton::*;
296    /// let query = rsonpath_syntax::parse("$[0]").unwrap();
297    /// let automaton = Automaton::new(&query).unwrap();
298    /// let state = automaton.initial_state();
299    ///
300    /// assert!(automaton.has_first_array_index_transition_to_accepting(state));
301    /// ```
302    /// ```rust
303    /// # use rsonpath::automaton::*;
304    /// let query = rsonpath_syntax::parse("$[1]").unwrap();
305    /// let automaton = Automaton::new(&query).unwrap();
306    /// let state = automaton.initial_state();
307    ///
308    /// assert!(!automaton.has_first_array_index_transition_to_accepting(state));
309    /// ```
310    #[must_use]
311    #[inline(always)]
312    pub fn has_first_array_index_transition_to_accepting(&self, state: State) -> bool {
313        self.has_array_index_transition_to_accepting(state, &JsonUInt::ZERO)
314    }
315
316    /// Returns whether the given state is accepting the item at a given index in a list.
317    ///
318    /// # Example
319    /// ```rust
320    /// # use rsonpath_syntax::num::JsonUInt;
321    /// # use rsonpath::automaton::*;
322    /// let query = rsonpath_syntax::parse("$[1]").unwrap();
323    /// let automaton = Automaton::new(&query).unwrap();
324    /// let state = automaton.initial_state();
325    /// let match_index_1 = JsonUInt::try_from(1).unwrap();
326    /// let match_index_2 = JsonUInt::try_from(2).unwrap();
327    ///
328    /// assert!(automaton.has_array_index_transition_to_accepting(state, &match_index_1));
329    /// assert!(!automaton.has_array_index_transition_to_accepting(state, &match_index_2));
330    /// ```
331    #[must_use]
332    #[inline(always)]
333    pub fn has_array_index_transition_to_accepting(&self, state: State, match_index: &JsonUInt) -> bool {
334        let state = &self[state];
335        state.attributes.has_array_transition_to_accepting()
336            && state
337                .array_transitions()
338                .iter()
339                .any(|trans| self.is_accepting(trans.target) && trans.matches(*match_index))
340    }
341
342    /// Returns whether the given state has any transitions
343    /// (labelled or fallback) to an accepting state.
344    ///
345    /// # Example
346    /// ```rust
347    /// # use rsonpath::automaton::*;
348    /// let query = rsonpath_syntax::parse("$.a").unwrap();
349    /// let automaton = Automaton::new(&query).unwrap();
350    ///
351    /// assert!(automaton.has_transition_to_accepting(automaton.initial_state()));
352    /// ```
353    #[must_use]
354    #[inline(always)]
355    pub fn has_transition_to_accepting(&self, state: State) -> bool {
356        self[state].attributes.has_transition_to_accepting()
357    }
358
359    /// Returns whether the given state is rejecting, i.e.
360    /// there exist no accepting runs from it.
361    ///
362    /// # Example
363    /// ```rust
364    /// # use rsonpath::automaton::*;
365    /// let query = rsonpath_syntax::parse("$.a").unwrap();
366    /// let automaton = Automaton::new(&query).unwrap();
367    ///
368    /// assert!(automaton.is_rejecting(automaton.rejecting_state()));
369    /// ```
370    #[must_use]
371    #[inline(always)]
372    pub fn is_rejecting(&self, state: State) -> bool {
373        self[state].attributes.is_rejecting()
374    }
375
376    /// Returns whether the given state is unitary.
377    /// A unitary state is one that has exactly one labelled transition
378    /// and its fallback targets the rejecting state.
379    ///
380    /// Intuitively, there exists only one label that progresses towards
381    /// acceptance from this state.
382    ///
383    /// # Example
384    /// ```rust
385    /// # use rsonpath::automaton::*;
386    /// let query = rsonpath_syntax::parse("$.a").unwrap();
387    /// let automaton = Automaton::new(&query).unwrap();
388    ///
389    /// assert!(automaton.is_unitary(automaton.initial_state()));
390    /// ```
391    #[must_use]
392    #[inline(always)]
393    pub fn is_unitary(&self, state: State) -> bool {
394        self[state].attributes.is_unitary()
395    }
396
397    fn minimize(nfa: NondeterministicAutomaton) -> Result<Self, CompilerError> {
398        minimizer::minimize(nfa)
399    }
400}
401
402impl StateTable {
403    /// Returns the state to which a fallback transition leads.
404    ///
405    /// A fallback transition is the catch-all transition triggered
406    /// if none of the transitions were triggered.
407    #[must_use]
408    #[inline(always)]
409    pub fn fallback_state(&self) -> State {
410        self.fallback_state
411    }
412
413    /// Returns the collection of labelled array transitions from this state.
414    ///
415    /// A transition is triggered if the [`ArrayTransition`] is matched and leads
416    /// to the contained [`State`].
417    #[must_use]
418    #[inline(always)]
419    pub fn array_transitions(&self) -> &[ArrayTransition] {
420        &self.array_transitions
421    }
422
423    /// Returns the collection of labelled member transitions from this state.
424    ///
425    /// A transition is triggered if the [`MemberTransition`] is matched and leads
426    /// to the contained [`State`].
427    #[must_use]
428    #[inline(always)]
429    pub fn member_transitions(&self) -> &[MemberTransition] {
430        &self.member_transitions
431    }
432}
433
434impl Display for ArrayTransitionLabel {
435    #[inline(always)]
436    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
437        match self {
438            Self::Index(index) => write!(f, "{}", index.as_u64()),
439            Self::Slice(slice) => {
440                if let Some(end) = slice.end {
441                    write!(f, "[{}:{}:{}]", slice.start, end, slice.step)
442                } else {
443                    write!(f, "[{}::{}]", slice.start, slice.step)
444                }
445            }
446        }
447    }
448}
449
450impl Display for Automaton {
451    #[inline]
452    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
453        writeln!(f, "digraph {{")?;
454
455        for (i, state) in self.states.iter().enumerate() {
456            let mut color_one = "fillcolor=\"white;0.5";
457            let mut color_two = ":white\"";
458            let mut shape = "shape=circle";
459
460            if state.attributes.is_accepting() {
461                shape = "shape=doublecircle";
462            }
463            if state.attributes.is_unitary() {
464                color_one = "fillcolor=\"darkgoldenrod2;0.5";
465            }
466            if state.attributes.has_transition_to_accepting() {
467                color_two = ":dodgerblue\"";
468            }
469            if state.attributes.is_rejecting() {
470                color_one = "fillcolor=gray";
471                color_two = "";
472            }
473
474            let attrs = [shape, "style=filled", "gradientangle=45", color_one, color_two].join(" ");
475
476            writeln!(f, "node [{attrs}]; {i}")?;
477        }
478
479        for (i, transitions) in self.states.iter().enumerate() {
480            for array_transition in &transitions.array_transitions {
481                match array_transition.label {
482                    ArrayTransitionLabel::Index(label) => writeln!(
483                        f,
484                        "  {i} -> {} [label=\"[{}]\"]",
485                        array_transition.target.0,
486                        label.as_u64()
487                    )?,
488                    ArrayTransitionLabel::Slice(label) => {
489                        if let Some(end) = label.end {
490                            writeln!(
491                                f,
492                                "  {i} -> {} [label=\"[{}:{}:{}]\"]",
493                                array_transition.target.0,
494                                label.start.as_u64(),
495                                end.as_u64(),
496                                label.step.as_u64()
497                            )?
498                        } else {
499                            writeln!(
500                                f,
501                                "  {i} -> {} [label=\"[{}::{}]\"]",
502                                array_transition.target.0,
503                                label.start.as_u64(),
504                                label.step.as_u64()
505                            )?
506                        }
507                    }
508                }
509            }
510            for (label, state) in &transitions.member_transitions {
511                writeln!(
512                    f,
513                    "  {i} -> {} [label=\"{}\"]",
514                    state.0,
515                    std::str::from_utf8(label.unquoted()).expect("labels to be valid utf8")
516                )?
517            }
518            writeln!(f, "  {i} -> {} [label=\"*\"]", transitions.fallback_state.0)?;
519        }
520        write!(f, "}}")?;
521        Ok(())
522    }
523}
524
525impl SimpleSlice {
526    fn new(start: JsonUInt, end: Option<JsonUInt>, step: JsonUInt) -> Self {
527        Self { start, end, step }
528    }
529
530    #[inline(always)]
531    #[must_use]
532    fn contains(&self, index: JsonUInt) -> bool {
533        if index < self.start {
534            return false;
535        }
536        let offset = index.as_u64() - self.start.as_u64();
537        if let Some(end) = self.end {
538            index < end && offset % self.step.as_u64() == 0
539        } else {
540            offset % self.step.as_u64() == 0
541        }
542    }
543}
544
545#[cfg(test)]
546mod tests {
547    use super::SimpleSlice;
548    use rsonpath_syntax::num::JsonUInt;
549    use test_case::test_case;
550
551    #[test_case(0.into(), None, 1.into(), 0.into() => true)]
552    #[test_case(2.into(), None, 1.into(), 3.into() => true)]
553    #[test_case(2.into(), None, 2.into(), 3.into() => false)]
554    #[test_case(3.into(), None, 2.into(), 3.into() => true)]
555    #[test_case(2.into(), None, 2.into(), 4.into() => true)]
556    #[test_case(2.into(), Some(6.into()), 2.into(), 2.into() => true)]
557    #[test_case(2.into(), Some(6.into()), 2.into(), 6.into() => false)]
558    fn simple_slice_containment(start: JsonUInt, end: Option<JsonUInt>, step: JsonUInt, idx: JsonUInt) -> bool {
559        let slice = SimpleSlice::new(start, end, step);
560        slice.contains(idx)
561    }
562}