Skip to main content

rsonpath/
automaton.rs

1//! Automaton representations of a JSONPath query.
2mod array_transition_set;
3pub mod error;
4mod minimizer;
5mod nfa;
6mod small_set;
7mod state;
8
9pub use state::{State, StateAttributes};
10
11use crate::{automaton::error::CompilerError, debug, string_pattern::StringPattern};
12use nfa::NondeterministicAutomaton;
13use rsonpath_syntax::{num::JsonUInt, JsonPathQuery};
14use smallvec::SmallVec;
15use std::{fmt::Display, ops::Index, sync::Arc};
16
17/// A minimal, deterministic automaton representing a JSONPath query.
18#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
19#[derive(Clone, Debug, PartialEq, Eq)]
20pub struct Automaton {
21    states: Vec<StateTable>,
22}
23
24/// Transition when a JSON member name matches a [`StringPattern`].
25pub type MemberTransition = (Arc<StringPattern>, State);
26
27/// Transition on elements of an array with indices specified by either a single index
28/// or a simple slice expression.
29#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
30#[derive(Clone, Debug, PartialEq, Eq)]
31pub struct ArrayTransition {
32    label: ArrayTransitionLabel,
33    target: State,
34}
35
36/// Represent the distinct methods of moving on a match between states.
37#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
38#[derive(Debug, Copy, PartialEq, Clone, Eq)]
39pub(super) enum ArrayTransitionLabel {
40    /// Transition on the n-th element of an array, with n specified by a [`JsonUInt`].
41    Index(JsonUInt),
42    /// Transition on elements of array matched by a slice expression - bounds and a step.
43    Slice(SimpleSlice),
44}
45
46/// A transition table of a single [`State`] of an [`Automaton`].
47///
48/// Contains transitions triggered by matching member names or array indices, and a fallback transition
49/// triggered when none of the labelled transitions match.
50#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
51#[derive(Debug, Clone)]
52pub struct StateTable {
53    attributes: StateAttributes,
54    member_transitions: SmallVec<[MemberTransition; 2]>,
55    array_transitions: SmallVec<[ArrayTransition; 2]>,
56    fallback_state: State,
57}
58
59#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
60#[derive(Debug, Copy, PartialEq, Clone, Eq)]
61pub(crate) struct SimpleSlice {
62    start: JsonUInt,
63    end: Option<JsonUInt>,
64    step: JsonUInt,
65}
66
67impl Default for StateTable {
68    #[inline]
69    fn default() -> Self {
70        Self {
71            attributes: StateAttributes::default(),
72            member_transitions: SmallVec::default(),
73            array_transitions: SmallVec::default(),
74            fallback_state: State(0),
75        }
76    }
77}
78
79impl PartialEq for StateTable {
80    #[inline]
81    fn eq(&self, other: &Self) -> bool {
82        return self.fallback_state == other.fallback_state
83            && set_eq(&self.array_transitions, &other.array_transitions)
84            && set_eq(&self.member_transitions, &other.member_transitions)
85            && self.attributes == other.attributes;
86
87        #[inline(always)]
88        fn set_eq<T: Eq, A: smallvec::Array<Item = T>>(left: &SmallVec<A>, right: &SmallVec<A>) -> bool {
89            left.len() == right.len()
90                && left.iter().all(|x| right.contains(x))
91                && right.iter().all(|x| left.contains(x))
92        }
93    }
94}
95
96impl Eq for StateTable {}
97
98impl Index<State> for Automaton {
99    type Output = StateTable;
100
101    #[inline(always)]
102    fn index(&self, index: State) -> &Self::Output {
103        &self.states[index.0 as usize]
104    }
105}
106
107impl ArrayTransition {
108    pub(crate) fn new(label: ArrayTransitionLabel, target: State) -> Self {
109        Self { label, target }
110    }
111
112    #[inline(always)]
113    pub(crate) fn target_state(&self) -> State {
114        self.target
115    }
116
117    #[inline(always)]
118    pub(crate) fn matches(&self, index: JsonUInt) -> bool {
119        self.label.matches(index)
120    }
121}
122
123impl ArrayTransitionLabel {
124    pub(crate) fn matches(&self, index: JsonUInt) -> bool {
125        match self {
126            Self::Index(i) => index.eq(i),
127            Self::Slice(s) => s.contains(index),
128        }
129    }
130
131    fn matches_at_most_once(&self) -> bool {
132        match self {
133            Self::Index(_) => true,
134            Self::Slice(slice) => {
135                slice.step == JsonUInt::ZERO && slice.end.is_some_and(|end| slice.start.as_u64() + 1 >= end.as_u64())
136            }
137        }
138    }
139}
140
141impl From<JsonUInt> for ArrayTransitionLabel {
142    #[inline(always)]
143    fn from(index: JsonUInt) -> Self {
144        Self::Index(index)
145    }
146}
147
148impl From<SimpleSlice> for ArrayTransitionLabel {
149    #[inline(always)]
150    fn from(slice: SimpleSlice) -> Self {
151        Self::Slice(slice)
152    }
153}
154
155impl Automaton {
156    /// Convert a [`JsonPathQuery`] into a minimal deterministic automaton.
157    ///
158    /// # Errors
159    /// - [`CompilerError::QueryTooComplex`] raised if the query is too complex
160    ///   and the automaton size was exceeded.
161    /// - [`CompilerError::NotSupported`] raised if the query contains elements
162    ///   not yet supported by the compiler.
163    #[inline]
164    pub fn new(query: &JsonPathQuery) -> Result<Self, CompilerError> {
165        let nfa = NondeterministicAutomaton::new(query)?;
166        debug!("NFA: {}", nfa);
167        Self::minimize(nfa)
168    }
169
170    /// Returns whether this automaton represents the select-root JSONPath query ('$').
171    ///
172    /// # Examples
173    /// ```rust
174    /// # use rsonpath::automaton::*;
175    /// let query = rsonpath_syntax::parse("$").unwrap();
176    /// let automaton = Automaton::new(&query).unwrap();
177    ///
178    /// assert!(automaton.is_select_root_query());
179    /// ```
180    ///
181    /// ```rust
182    /// # use rsonpath::automaton::*;
183    /// let query = rsonpath_syntax::parse("$.a").unwrap();
184    /// let automaton = Automaton::new(&query).unwrap();
185    ///
186    /// assert!(!automaton.is_select_root_query());
187    /// ```
188    #[must_use]
189    #[inline(always)]
190    pub fn is_select_root_query(&self) -> bool {
191        self.states.len() == 2
192            && self.states[1].array_transitions.is_empty()
193            && self.states[1].member_transitions.is_empty()
194            && self.states[1].fallback_state == State(0)
195            && self.states[1].attributes.is_accepting()
196    }
197
198    /// Returns whether this automaton represents an empty JSONPath query that cannot accept anything.
199    ///
200    /// A query like this can be created by, for example, putting a trivially false filter
201    /// or an empty slice into the query.
202    ///
203    /// # Examples
204    /// ```rust
205    /// # use rsonpath::automaton::*;
206    /// let query = rsonpath_syntax::parse("$[::0]").unwrap();
207    /// let automaton = Automaton::new(&query).unwrap();
208    ///
209    /// assert!(automaton.is_empty_query());
210    /// ```
211    ///
212    /// ```rust
213    /// # use rsonpath::automaton::*;
214    /// let query = rsonpath_syntax::parse("$").unwrap();
215    /// let automaton = Automaton::new(&query).unwrap();
216    ///
217    /// assert!(!automaton.is_empty_query());
218    /// ```
219    #[must_use]
220    #[inline(always)]
221    pub fn is_empty_query(&self) -> bool {
222        self.states.len() == 2
223            && self.states[1].array_transitions.is_empty()
224            && self.states[1].member_transitions.is_empty()
225            && self.states[1].fallback_state == State(0)
226            && !self.states[1].attributes.is_accepting()
227    }
228
229    /// Returns the rejecting state of the automaton.
230    ///
231    /// The state is defined as the unique state from which there
232    /// exists no accepting run. If the query automaton reaches this state,
233    /// the current subtree is guaranteed to have no matches.
234    #[must_use]
235    #[inline(always)]
236    #[allow(
237        clippy::unused_self,
238        reason = "This is for stability. If the implementation changes so that
239                                   this is not always a 0 we don't want to have to change callsites."
240    )]
241    pub fn rejecting_state(&self) -> State {
242        State(0)
243    }
244
245    /// Returns the initial state of the automaton.
246    ///
247    /// Query execution should start from this state.
248    #[must_use]
249    #[inline(always)]
250    #[allow(
251        clippy::unused_self,
252        reason = "This is for stability. If the implementation changes so that
253                                   this is not always a 1 we don't want to have to change callsites."
254    )]
255    pub fn initial_state(&self) -> State {
256        State(1)
257    }
258
259    /// Returns whether the given state is accepting.
260    ///
261    /// # Example
262    /// ```rust
263    /// # use rsonpath::automaton::*;
264    /// let query = rsonpath_syntax::parse("$.a").unwrap();
265    /// let automaton = Automaton::new(&query).unwrap();
266    /// let state_2 = automaton[automaton.initial_state()].member_transitions()[0].1;
267    ///
268    /// assert!(automaton.is_accepting(state_2));
269    /// ```
270    #[must_use]
271    #[inline(always)]
272    pub fn is_accepting(&self, state: State) -> bool {
273        self[state].attributes.is_accepting()
274    }
275
276    /// Returns whether the given state transitions to any list.
277    ///
278    /// # Example
279    /// ```rust
280    /// # use rsonpath::automaton::*;
281    /// let query = rsonpath_syntax::parse("$[2]").unwrap();
282    /// let automaton = Automaton::new(&query).unwrap();
283    /// let state = automaton.initial_state();
284    ///
285    /// assert!(automaton.has_any_array_item_transition(state));
286    /// ```
287    #[must_use]
288    #[inline(always)]
289    pub fn has_any_array_item_transition(&self, state: State) -> bool {
290        self[state].attributes.has_array_transition()
291    }
292
293    /// Returns whether the given state is accepting the first item in a list.
294    ///
295    /// # Example
296    /// ```rust
297    /// # use rsonpath::automaton::*;
298    /// let query = rsonpath_syntax::parse("$[0]").unwrap();
299    /// let automaton = Automaton::new(&query).unwrap();
300    /// let state = automaton.initial_state();
301    ///
302    /// assert!(automaton.has_first_array_index_transition_to_accepting(state));
303    /// ```
304    /// ```rust
305    /// # use rsonpath::automaton::*;
306    /// let query = rsonpath_syntax::parse("$[1]").unwrap();
307    /// let automaton = Automaton::new(&query).unwrap();
308    /// let state = automaton.initial_state();
309    ///
310    /// assert!(!automaton.has_first_array_index_transition_to_accepting(state));
311    /// ```
312    #[must_use]
313    #[inline(always)]
314    pub fn has_first_array_index_transition_to_accepting(&self, state: State) -> bool {
315        self.has_array_index_transition_to_accepting(state, &JsonUInt::ZERO)
316    }
317
318    /// Returns whether the given state is accepting the item at a given index in a list.
319    ///
320    /// # Example
321    /// ```rust
322    /// # use rsonpath_syntax::num::JsonUInt;
323    /// # use rsonpath::automaton::*;
324    /// let query = rsonpath_syntax::parse("$[1]").unwrap();
325    /// let automaton = Automaton::new(&query).unwrap();
326    /// let state = automaton.initial_state();
327    /// let match_index_1 = JsonUInt::try_from(1).unwrap();
328    /// let match_index_2 = JsonUInt::try_from(2).unwrap();
329    ///
330    /// assert!(automaton.has_array_index_transition_to_accepting(state, &match_index_1));
331    /// assert!(!automaton.has_array_index_transition_to_accepting(state, &match_index_2));
332    /// ```
333    #[must_use]
334    #[inline(always)]
335    pub fn has_array_index_transition_to_accepting(&self, state: State, match_index: &JsonUInt) -> bool {
336        let state = &self[state];
337        state.attributes.has_array_transition_to_accepting()
338            && state
339                .array_transitions()
340                .iter()
341                .any(|trans| self.is_accepting(trans.target) && trans.matches(*match_index))
342    }
343
344    /// Returns whether the given state has any transitions
345    /// (labelled or fallback) to an accepting state.
346    ///
347    /// # Example
348    /// ```rust
349    /// # use rsonpath::automaton::*;
350    /// let query = rsonpath_syntax::parse("$.a").unwrap();
351    /// let automaton = Automaton::new(&query).unwrap();
352    ///
353    /// assert!(automaton.has_transition_to_accepting(automaton.initial_state()));
354    /// ```
355    #[must_use]
356    #[inline(always)]
357    pub fn has_transition_to_accepting(&self, state: State) -> bool {
358        self[state].attributes.has_transition_to_accepting()
359    }
360
361    /// Returns whether the given state is rejecting, i.e.
362    /// there exist no accepting runs from it.
363    ///
364    /// # Example
365    /// ```rust
366    /// # use rsonpath::automaton::*;
367    /// let query = rsonpath_syntax::parse("$.a").unwrap();
368    /// let automaton = Automaton::new(&query).unwrap();
369    ///
370    /// assert!(automaton.is_rejecting(automaton.rejecting_state()));
371    /// ```
372    #[must_use]
373    #[inline(always)]
374    pub fn is_rejecting(&self, state: State) -> bool {
375        self[state].attributes.is_rejecting()
376    }
377
378    /// Returns whether the given state is unitary.
379    /// A unitary state is one that has exactly one labelled transition
380    /// and its fallback targets the rejecting state.
381    ///
382    /// Intuitively, there exists only one label that progresses towards
383    /// acceptance from this state.
384    ///
385    /// # Example
386    /// ```rust
387    /// # use rsonpath::automaton::*;
388    /// let query = rsonpath_syntax::parse("$.a").unwrap();
389    /// let automaton = Automaton::new(&query).unwrap();
390    ///
391    /// assert!(automaton.is_unitary(automaton.initial_state()));
392    /// ```
393    #[must_use]
394    #[inline(always)]
395    pub fn is_unitary(&self, state: State) -> bool {
396        self[state].attributes.is_unitary()
397    }
398
399    fn minimize(nfa: NondeterministicAutomaton) -> Result<Self, CompilerError> {
400        minimizer::minimize(nfa)
401    }
402}
403
404impl StateTable {
405    /// Returns the state to which a fallback transition leads.
406    ///
407    /// A fallback transition is the catch-all transition triggered
408    /// if none of the transitions were triggered.
409    #[must_use]
410    #[inline(always)]
411    pub fn fallback_state(&self) -> State {
412        self.fallback_state
413    }
414
415    /// Returns the collection of labelled array transitions from this state.
416    ///
417    /// A transition is triggered if the [`ArrayTransition`] is matched and leads
418    /// to the contained [`State`].
419    #[must_use]
420    #[inline(always)]
421    pub fn array_transitions(&self) -> &[ArrayTransition] {
422        &self.array_transitions
423    }
424
425    /// Returns the collection of labelled member transitions from this state.
426    ///
427    /// A transition is triggered if the [`MemberTransition`] is matched and leads
428    /// to the contained [`State`].
429    #[must_use]
430    #[inline(always)]
431    pub fn member_transitions(&self) -> &[MemberTransition] {
432        &self.member_transitions
433    }
434}
435
436impl Display for ArrayTransitionLabel {
437    #[inline(always)]
438    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
439        match self {
440            Self::Index(index) => write!(f, "{}", index.as_u64()),
441            Self::Slice(slice) => {
442                if let Some(end) = slice.end {
443                    write!(f, "[{}:{}:{}]", slice.start, end, slice.step)
444                } else {
445                    write!(f, "[{}::{}]", slice.start, slice.step)
446                }
447            }
448        }
449    }
450}
451
452impl Display for Automaton {
453    #[inline]
454    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
455        writeln!(f, "digraph {{")?;
456
457        for (i, state) in self.states.iter().enumerate() {
458            let mut color_one = "fillcolor=\"white;0.5";
459            let mut color_two = ":white\"";
460            let mut shape = "shape=circle";
461
462            if state.attributes.is_accepting() {
463                shape = "shape=doublecircle";
464            }
465            if state.attributes.is_unitary() {
466                color_one = "fillcolor=\"darkgoldenrod2;0.5";
467            }
468            if state.attributes.has_transition_to_accepting() {
469                color_two = ":dodgerblue\"";
470            }
471            if state.attributes.is_rejecting() {
472                color_one = "fillcolor=gray";
473                color_two = "";
474            }
475
476            let attrs = [shape, "style=filled", "gradientangle=45", color_one, color_two].join(" ");
477
478            writeln!(f, "node [{attrs}]; {i}")?;
479        }
480
481        for (i, transitions) in self.states.iter().enumerate() {
482            for array_transition in &transitions.array_transitions {
483                match array_transition.label {
484                    ArrayTransitionLabel::Index(label) => writeln!(
485                        f,
486                        "  {i} -> {} [label=\"[{}]\"]",
487                        array_transition.target.0,
488                        label.as_u64()
489                    )?,
490                    ArrayTransitionLabel::Slice(label) => {
491                        if let Some(end) = label.end {
492                            writeln!(
493                                f,
494                                "  {i} -> {} [label=\"[{}:{}:{}]\"]",
495                                array_transition.target.0,
496                                label.start.as_u64(),
497                                end.as_u64(),
498                                label.step.as_u64()
499                            )?
500                        } else {
501                            writeln!(
502                                f,
503                                "  {i} -> {} [label=\"[{}::{}]\"]",
504                                array_transition.target.0,
505                                label.start.as_u64(),
506                                label.step.as_u64()
507                            )?
508                        }
509                    }
510                }
511            }
512            for (label, state) in &transitions.member_transitions {
513                writeln!(
514                    f,
515                    "  {i} -> {} [label=\"{}\"]",
516                    state.0,
517                    std::str::from_utf8(label.unquoted()).expect("labels to be valid utf8")
518                )?
519            }
520            writeln!(f, "  {i} -> {} [label=\"*\"]", transitions.fallback_state.0)?;
521        }
522        write!(f, "}}")?;
523        Ok(())
524    }
525}
526
527impl SimpleSlice {
528    fn new(start: JsonUInt, end: Option<JsonUInt>, step: JsonUInt) -> Self {
529        Self { start, end, step }
530    }
531
532    #[inline(always)]
533    #[must_use]
534    fn contains(&self, index: JsonUInt) -> bool {
535        if index < self.start {
536            return false;
537        }
538        let offset = index.as_u64() - self.start.as_u64();
539        if let Some(end) = self.end {
540            index < end && offset.is_multiple_of(self.step.as_u64())
541        } else {
542            offset.is_multiple_of(self.step.as_u64())
543        }
544    }
545}
546
547#[cfg(test)]
548mod tests {
549    use super::SimpleSlice;
550    use rsonpath_syntax::num::JsonUInt;
551    use test_case::test_case;
552
553    #[test_case(0.into(), None, 1.into(), 0.into() => true)]
554    #[test_case(2.into(), None, 1.into(), 3.into() => true)]
555    #[test_case(2.into(), None, 2.into(), 3.into() => false)]
556    #[test_case(3.into(), None, 2.into(), 3.into() => true)]
557    #[test_case(2.into(), None, 2.into(), 4.into() => true)]
558    #[test_case(2.into(), Some(6.into()), 2.into(), 2.into() => true)]
559    #[test_case(2.into(), Some(6.into()), 2.into(), 6.into() => false)]
560    fn simple_slice_containment(start: JsonUInt, end: Option<JsonUInt>, step: JsonUInt, idx: JsonUInt) -> bool {
561        let slice = SimpleSlice::new(start, end, step);
562        slice.contains(idx)
563    }
564}