scopegraphs_regular_expressions/lib.rs
1//! # Scope Graphs Regular Expressions
2//!
3//! To query a scope graph,
4//! you have to specify what paths are valid for the query to take.
5//! You do so using a regular expression over the labels on the edges of the scope graph.
6//!
7//! See <https://docs.rs/scopegraphs> for all documentation
8#![warn(missing_docs)]
9#![cfg_attr(not(docsrs), allow(rustdoc::broken_intra_doc_links))]
10
11use proc_macro2::{LexError, TokenStream};
12use thiserror::Error;
13
14mod compile;
15#[cfg(feature = "dynamic")]
16mod dynamic;
17mod parse;
18mod regex;
19
20mod emit;
21
22#[cfg(feature = "dot")]
23mod dot;
24
25pub use compile::Automaton;
26#[cfg(feature = "dynamic")]
27pub use dynamic::DynamicMatcher;
28pub use regex::Regex;
29
30/// A type that can match a regex. Can be created at compile time
31/// through the [`compile_regex`](scopegraphs::compile_regex) macro,
32/// or at runtime with the `dynamic` feature through [`Automaton::matcher`].
33pub trait RegexMatcher<A>: Clone {
34 /// Takes a transition in the state machine, accepting a single symbol.
35 ///
36 /// If the symbol was not accepted, because the regex doesn't allow it, the new state is empty,
37 /// which can be checked using [`RegexMatcher::is_empty`].
38 fn step(&mut self, inp: A);
39
40 /// Steps once for each element in the iterator
41 fn step_many(&mut self, inp: impl IntoIterator<Item = A>) {
42 for i in inp {
43 self.step(i);
44 }
45 }
46
47 /// Steps once for each element in the iterator
48 fn step_unless_empty(&mut self, inp: impl IntoIterator<Item = A>) {
49 for i in inp {
50 self.step(i);
51 if self.is_empty() {
52 return;
53 }
54 }
55 }
56
57 /// Returns true if the regular expression accepts the input iterator
58 fn accepts(&self, iter: impl IntoIterator<Item = A>) -> bool {
59 let mut this = self.clone();
60 this.step_unless_empty(iter);
61 this.is_accepting()
62 }
63
64 /// Returns true if the regular expression accepts a word of which the input iterator is a prefix
65 fn accepts_prefix(&self, iter: impl IntoIterator<Item = A>) -> bool {
66 let mut this = self.clone();
67 this.step_unless_empty(iter);
68 !this.is_empty()
69 }
70
71 /// Returns whether this state is a final state.
72 ///
73 /// A Final state is a state without outgoing edges
74 fn is_final(&self) -> bool;
75
76 /// Returns whether the current state would accept the regular expression.
77 fn is_accepting(&self) -> bool;
78
79 /// Returns whether the regular expression is stuck:
80 /// there are no more outgoing edges and it's not currently accepting.
81 fn is_empty(&self) -> bool {
82 self.is_final() && !self.is_accepting()
83 }
84}
85
86/// An error that can ocur while parsing a regular expression.
87#[derive(Error, Debug)]
88pub enum ParseError {
89 /// You used tokens that aren't valid Rust tokens
90 ///
91 /// (scope graph regular expressions need to both be valid Rust tokens,
92 /// as well as having a valid regular expression syntax)
93 #[error("lex error: {0}")]
94 Lex(#[from] LexError),
95
96 /// Incorrect syntax for regular expressions.
97 ///
98 /// For correct syntax, see [`parse_regex`]
99 #[error("parse error: {0}")]
100 Parse(#[from] syn::Error),
101}
102
103/// parse a string to a regular expression
104///
105/// the syntax of a regular expression is:
106/// ```bnf
107/// E -> 0
108/// E -> e
109/// E -> <ident>
110/// E -> ~ E
111/// E -> E *
112/// E -> E +
113/// E -> E ?
114/// E -> E E
115/// E -> E | E
116/// E -> E & E
117/// ```
118///
119/// The `<ident>` needs to be a valid rust identifier
120pub fn parse_regex(input: impl AsRef<str>) -> Result<Regex, ParseError> {
121 let stream: TokenStream = input.as_ref().parse()?;
122 Ok(parse_regex_token_stream(stream)?)
123}
124
125/// parse a rust [`TokenStream`](TokenStream) to a regular expression
126///
127/// // TODO: figure out to add a link to scopegraphs-macros
128/// Implementation detail for [`scopegraphs-macros`]()
129#[doc(hidden)]
130pub fn parse_regex_token_stream(input: TokenStream) -> syn::Result<Regex> {
131 syn::parse2(input)
132}