inator/
lib.rs

1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
5 */
6
7//! Nondeterministic finite automata with epsilon transitions and algorithms to compile them into optimal deterministic finite automata.
8
9#![deny(warnings)]
10#![allow(unknown_lints)]
11#![warn(
12    clippy::all,
13    clippy::missing_docs_in_private_items,
14    clippy::nursery,
15    clippy::pedantic,
16    clippy::perf,
17    clippy::restriction,
18    clippy::cargo,
19    elided_lifetimes_in_paths,
20    missing_docs,
21    rustdoc::all
22)]
23// https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html
24#![warn(
25    absolute_paths_not_starting_with_crate,
26    elided_lifetimes_in_paths,
27    explicit_outlives_requirements,
28    keyword_idents,
29    let_underscore_drop,
30    macro_use_extern_crate,
31    meta_variable_misuse,
32    missing_abi,
33    missing_copy_implementations,
34    missing_debug_implementations,
35    missing_docs,
36    non_ascii_idents,
37    noop_method_call,
38    pointer_structural_match,
39    rust_2021_incompatible_closure_captures,
40    rust_2021_incompatible_or_patterns,
41    rust_2021_prefixes_incompatible_syntax,
42    rust_2021_prelude_collisions,
43    single_use_lifetimes,
44    trivial_casts,
45    trivial_numeric_casts,
46    unreachable_pub,
47    unsafe_code,
48    unsafe_op_in_unsafe_fn,
49    unstable_features,
50    unused_crate_dependencies,
51    unused_extern_crates,
52    unused_import_braces,
53    unused_lifetimes,
54    unused_macro_rules,
55    unused_qualifications,
56    unused_results,
57    unused_tuple_struct_fields,
58    variant_size_differences
59)]
60#![allow(
61    clippy::blanket_clippy_restriction_lints,
62    clippy::cargo_common_metadata,
63    clippy::expect_used,
64    clippy::implicit_return,
65    clippy::inline_always,
66    clippy::let_underscore_untyped,
67    clippy::min_ident_chars,
68    clippy::missing_trait_methods,
69    clippy::mod_module_files,
70    clippy::multiple_unsafe_ops_per_block,
71    clippy::needless_borrowed_reference,
72    clippy::partial_pub_fields,
73    clippy::pub_use,
74    clippy::pub_with_shorthand,
75    clippy::question_mark_used,
76    clippy::redundant_pub_crate,
77    clippy::ref_patterns,
78    clippy::semicolon_outside_block,
79    clippy::separated_literal_suffix,
80    clippy::similar_names,
81    clippy::single_call_fn,
82    clippy::single_char_lifetime_names,
83    clippy::std_instead_of_alloc,
84    clippy::string_add,
85    clippy::use_self,
86    clippy::wildcard_imports
87)]
88
89/// Unwrap if we're debugging but `unwrap_unchecked` if we're not.
90#[cfg(any(debug_assertions, test))]
91macro_rules! unwrap {
92    ($expr:expr) => {
93        $expr.unwrap()
94    };
95}
96
97/// Unwrap if we're debugging but `unwrap_unchecked` if we're not.
98#[cfg(not(any(debug_assertions, test)))]
99macro_rules! unwrap {
100    ($expr:expr) => {{
101        #[allow(unsafe_code)]
102        let result = unsafe { $expr.unwrap_unchecked() };
103        result
104    }};
105}
106
107/// Unwrap if we're debugging but `unwrap_unchecked` if we're not.
108#[cfg(any(debug_assertions, test))]
109macro_rules! get {
110    ($expr:expr, $index:expr) => {
111        $expr.get($index).unwrap()
112    };
113}
114
115/// Unwrap if we're debugging but `unwrap_unchecked` if we're not.
116#[cfg(not(any(debug_assertions, test)))]
117macro_rules! get {
118    ($expr:expr, $index:expr) => {{
119        #[allow(unsafe_code)]
120        let result = unsafe { $expr.get_unchecked($index) };
121        result
122    }};
123}
124
125/// Unwrap if we're debugging but `unwrap_unchecked` if we're not.
126#[cfg(any(debug_assertions, test))]
127macro_rules! get_mut {
128    ($expr:expr, $index:expr) => {
129        $expr.get_mut($index).unwrap()
130    };
131}
132
133/// Unwrap if we're debugging but `unwrap_unchecked` if we're not.
134#[cfg(not(any(debug_assertions, test)))]
135macro_rules! get_mut {
136    ($expr:expr, $index:expr) => {{
137        #[allow(unsafe_code)]
138        let result = unsafe { $expr.get_unchecked_mut($index) };
139        result
140    }};
141}
142
143// TODO: write an inherent impl for `Nfa<char>` with a bunch of stuff like `parenthesized`
144
145// TODO: have a recommended path for each thing, e.g. instead of `optional` have `encouraged` and `discouraged` then use this to format
146
147mod brzozowski;
148mod dfa;
149mod expr;
150mod fuzz;
151mod nfa;
152mod ops;
153mod powerset_construction;
154
155#[cfg(test)]
156mod test;
157
158pub use {
159    dfa::Graph as Compiled,
160    expr::Expression,
161    fuzz::{Fuzzer, NeverAccepts},
162    ops::Lazy as Parser,
163};
164
165use ops::Lazy;
166
167/// Promise a parser that can't be computed yet (usually because it nests itself).
168#[must_use]
169#[inline(always)]
170#[allow(clippy::ref_option_ref)]
171pub const fn postponed<I: Clone + Ord>() -> Lazy<I> {
172    Lazy::Postponed
173}
174
175/// Accept only the empty string.
176#[must_use]
177#[inline(always)]
178pub fn empty<I: Clone + Ord>() -> Lazy<I> {
179    Lazy::Immediate(nfa::Graph::empty())
180}
181
182/// Accept this token if we see it here, but throw it away.
183#[must_use]
184#[inline(always)]
185pub fn ignore<I: Clone + Ord>(token: I) -> Lazy<I> {
186    Lazy::Immediate(nfa::Graph::unit(token, None))
187}
188
189/// Accept this token if we see it here, then call this user-defined function on it.
190#[must_use]
191#[inline(always)]
192pub fn on<I: Clone + Ord>(token: I, fn_name: &'static str) -> Lazy<I> {
193    Lazy::Immediate(nfa::Graph::unit(token, Some(fn_name)))
194}
195
196/// Accept this sequence of tokens if we see it here, then call this user-defined function on it.
197#[must_use]
198#[inline(always)]
199#[allow(clippy::arithmetic_side_effects)]
200pub fn on_seq<I: Clone + Ord, II: IntoIterator<Item = I>>(
201    tokens: II,
202    fn_name: &'static str,
203) -> Lazy<I> {
204    let mut v: Vec<_> = tokens.into_iter().collect();
205    let Some(last) = v.pop() else {
206        return empty();
207    };
208    seq(v.into_iter().map(|token| ignore(token))) >> on(last, fn_name)
209}
210
211/// Accept either this token or nothing.
212#[inline]
213#[must_use]
214pub fn opt<I: Clone + Ord>(token: I) -> Lazy<I> {
215    ignore(token).optional()
216}
217
218/// A single character of whitespace (or exactly one "\r\n").
219#[inline]
220#[must_use]
221#[allow(clippy::arithmetic_side_effects)]
222pub fn single_space() -> Lazy<char> {
223    ignore(' ') | ignore('\n') | (ignore('\r') >> ignore('\n'))
224}
225
226/// Any amount of whitespace.
227#[inline]
228#[must_use]
229#[allow(clippy::arithmetic_side_effects)]
230pub fn space() -> Lazy<char> {
231    single_space().star()
232}
233
234/// Surround this language in parentheses.
235/// Note that whitespace around the language--e.g. "( A )"--is fine.
236#[inline]
237#[must_use]
238#[allow(clippy::arithmetic_side_effects)]
239pub fn parenthesized(p: Lazy<char>) -> Lazy<char> {
240    ignore('(') + p + ignore(')')
241}
242
243/// Accept anything accepted by any of these parsers.
244#[inline]
245#[must_use]
246pub fn any<I: Clone + Ord, II: IntoIterator<Item = Lazy<I>>>(alternatives: II) -> Lazy<I> {
247    alternatives
248        .into_iter()
249        .fold(Lazy::Immediate(nfa::Graph::void()), |acc, p| acc | p)
250}
251
252/// Accept if and only if each parser accepts in order.
253#[inline]
254#[must_use]
255#[allow(clippy::arithmetic_side_effects)]
256pub fn seq<I: Clone + Ord, II: IntoIterator<Item = Lazy<I>>>(alternatives: II) -> Lazy<I> {
257    alternatives
258        .into_iter()
259        .fold(Lazy::Immediate(nfa::Graph::void()), |acc, p| acc >> p)
260}