brzozowski_regex/lib.rs
1// Copyright 2024 Hendrik van Antwerpen
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! A regular expression library over sequences of arbitrary symbols, based on
16//! the regular expression derivatives of Brzozowski (1964). The implementation
17//! is is heavily based on the description by Owens et al (2009).
18//!
19//! Regular expressions can be defined over any alphabet type that implements the
20//! traits `Clone`, `Eq`, `Hash`, and `Ord`. The following constructs are supported:
21//!
22//! - The empty set `∅`
23//! - The empty string `ε`
24//! - A symbol _s_
25//! - Concatenation `R S`
26//! - Closure `R*`
27//! - Disjunction `R | S`
28//! - Conjunction `R & R`
29//! - Complement `¬R`
30//!
31//! ## Constructing regular expressions
32//!
33//! Regular expressions can be constructed using builder methods or, more conveniently,
34//! using short-hands.
35//!
36//! An example using builder methods:
37//!
38//! ```
39//! use brzozowski_regex::Regex;
40//!
41//! let r = Regex::and(
42//! Regex::complement(Regex::empty_set()),
43//! Regex::concat(
44//! Regex::concat(
45//! Regex::closure(Regex::symbol(5)),
46//! Regex::symbol(7),
47//! ),
48//! Regex::or(Regex::empty_string(), Regex::symbol(11)),
49//! ),
50//! );
51//! ```
52//!
53//! Constructing regular expression susing these builder methods can be quite verbose.
54//! A set of short-hands is available to write regular expression literals more compactly:
55//!
56//! - `EXPR.s()` builds a symbol from the expressions value.
57//! - `().r()` builds an empty set.
58//! - `[ REGEX* ].r()` builds a concatenation from the given regular expressions. If
59//! the array is empty, it builds the empty string.
60//! - `REGEX.c()` build the closure of the given regular expression.
61//!
62//! Additionally, the following operations are overloaded for regular expressions:
63//!
64//! - `REGEX + REGEX` builds the concatenation of the two given regular expressions.
65//! - `REGEX | REGEX` builds the logical _or_ of the two given regular expressions.
66//! - `REGEX & REGEX` builds the logical _and_ of the two given regular expressions.
67//! - `!REGEX` builds the complement of the given regular expression.
68//!
69//! The same example using short-hands and operators:
70//!
71//! ```
72//! use brzozowski_regex::syntax::*; // imports short-hands
73//! use brzozowski_regex::Regex;
74//!
75//! let r = !().r() & [ 5.s().c(), 7.s(), ([].r() | 11.s()) ].r();
76//! ```
77//!
78//! ## Matching with automata
79//!
80//! The recommended way to match regular expressions is to turn them into an automaton.
81//! One can either match a whole sequence of symbols at once against the automaton, or
82//! create a matcher for incremental matching.
83//!
84//! Matching a whole sequence at once works as follows:
85//!
86//! ```
87//! use brzozowski_regex::syntax::*; // import short-hands
88//! use brzozowski_regex::Regex;
89//!
90//! let r = !().r() & [ 5.s().c(), 7.s(), ([].r() | 11.s()) ].r();
91//! let a = r.to_automaton();
92//! assert!(a.matches([5, 5, 7, 11]));
93//! ```
94//!
95//! Using a matcher allows for incremental matching. After matching a symbol or
96//! sequence of symbols the matcher returns one of the following results:
97//!
98//! - _accept_: the sequence until now matches the regular expression
99//! - _may-accept_: the sequence is not a match, but adding more symbols may
100//! lead to an accept.
101//! - _reject_: the sequence is not a match, and adding more symbols will not
102//! change that.
103//!
104//! The matcher can also return the current residual regular expression.
105//!
106//! A matcher is used as follows:
107//!
108//! ```
109//! use brzozowski_regex::syntax::*; // import short-hands
110//! use brzozowski_regex::Acceptance;
111//! use brzozowski_regex::Regex;
112//!
113//! let r = !().r() & [ 5.s().c(), 7.s(), ([].r() | 11.s()) ].r();
114//! let a = r.to_automaton();
115//! let mut m = a.to_matcher();
116//! assert_eq!(Acceptance::MayAccept, m.next(&5));
117//! assert_eq!(Acceptance::Accept, m.next_iter([5, 7]));
118//! assert_eq!(&(11.s() | [].r()), m.regex());
119//! assert_eq!(Acceptance::Reject, m.next(&13));
120//! ```
121//!
122//! ## Directly working with derivatives
123//!
124//! It is also possible to run operations on regular expressions directly, such as
125//! testing for nullability, computing the derivative w.r.t. a symbol or a sequence
126//! of symbols, and matching a sequence of symbols. _Note that matching directly
127//! against regular expression value is very inefficient compared to constructing an
128//! automaton first!_
129//!
130//! Examples of operation on regular expression values:
131//!
132//! ```
133//! use brzozowski_regex::syntax::*; // import short-hands
134//! use brzozowski_regex::Regex;
135//!
136//! let r = [7.s(), 11.s().c()].r();
137//! assert!(r.matches([7, 11]));
138//!
139//! let s = r.derive(&7);
140//! assert!(s.is_nullable());
141//!
142//! let s = s.derive_iter([11, 13]);
143//! assert!(!s.is_nullable());
144//! ```
145//!
146//! ## References
147//!
148//! - Brzozowski, Janusz A. “Derivatives of Regular Expressions.” J. ACM 11, no. 4 (October 1964): 481–94. <https://doi.org/10.1145/321239.321249>.
149//! - Owens, Scott, John Reppy, and Aaron Turon. “Regular-Expression Derivatives Re-Examined.” Journal of Functional Programming 19, no. 2 (March 2009): 173–90. <https://doi.org/10.1017/S0956796808007090>.
150
151use std::hash::Hash;
152
153mod automaton;
154pub mod builder;
155mod derivation;
156mod display;
157mod nullability;
158mod ops;
159
160/// Trait describing types that can be used as an alphabet for regular expression
161/// queries.
162pub trait Alphabet: Clone + Eq + Hash + Ord {}
163
164impl<S> Alphabet for S where S: Clone + Eq + Hash + Ord {}
165
166/// Regular expressions over symbols of the given alphabet.
167pub type Regex<S> = builder::Regex<builder::Default<S>>;
168
169#[doc(inline)]
170pub use automaton::Acceptance;
171#[doc(inline)]
172pub use automaton::FiniteAutomaton;
173#[doc(inline)]
174pub use automaton::Matcher;
175
176pub mod syntax {
177 //! Short-hand syntax for writing regular expression literals.
178
179 use super::*;
180
181 pub trait IntoRegex<S: Alphabet> {
182 fn r(self) -> Regex<S>;
183 }
184
185 impl<S: Alphabet, T> IntoRegex<S> for T
186 where
187 T: ops::IntoRegex<builder::Default<S>>,
188 {
189 fn r(self) -> Regex<S> {
190 <Self as ops::IntoRegex<builder::Default<S>>>::r(self)
191 }
192 }
193
194 pub trait IntoSymbol<S: Alphabet> {
195 fn s(self) -> Regex<S>;
196 }
197
198 impl<S: Alphabet, T> IntoSymbol<S> for T
199 where
200 T: ops::IntoSymbol<builder::Default<S>>,
201 {
202 fn s(self) -> Regex<S> {
203 <Self as ops::IntoSymbol<builder::Default<S>>>::s(self)
204 }
205 }
206
207 pub trait IntoClosure<S: Alphabet> {
208 fn c(self) -> Regex<S>;
209 }
210
211 impl<S: Alphabet, T> IntoClosure<S> for T
212 where
213 T: ops::IntoClosure<builder::Default<S>>,
214 {
215 fn c(self) -> Regex<S> {
216 <Self as ops::IntoClosure<builder::Default<S>>>::c(self)
217 }
218 }
219}