fn_bnf/
lib.rs

1#![no_std]
2#![cfg_attr(any(docsrs, feature = "__internal_test_docs"), allow(internal_features))]
3#![cfg_attr(any(docsrs, feature = "__internal_test_docs"), feature(rustdoc_internals))]
4#![forbid(unsafe_code)]
5#![cfg_attr(feature = "error_in_core", feature(error_in_core))]
6
7/*!
8
9[![Repository](https://img.shields.io/badge/-GitHub-%23181717?style=flat&logo=github&labelColor=%23555555&color=%23181717)](https://github.com/balt-dev/fn-bnf)
10[![Latest version](https://img.shields.io/crates/v/fn-bnf.svg)](
11https://crates.io/crates/fn-bnf)
12[![Documentation](https://docs.rs/fn-bnf/badge.svg)](
13https://docs.rs/fn-bnf)
14[![MSRV](https://img.shields.io/badge/MSRV-1.81.0-red)](
15https://gist.github.com/alexheretic/d1e98d8433b602e57f5d0a9637927e0c)
16[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](
17https://github.com/rust-secure-code/safety-dance/)
18[![License](https://img.shields.io/crates/l/fn-bnf.svg)](
19https://github.com/balt-dev/fn-bnf/blob/master/LICENSE-MIT)
20
21# fn-bnf
22
23This crate contains a `no_std` compatible, low-allocation parsing library
24that uses a BNF-like syntax with the [`define!`] macro to allow for
25using arbitrary Rust items as grammar rules, 
26and for parsing both `str`s and any `[T]` (for example, `[u8]` or `[Token]`).
27
28If you just want to skip to writing grammars, look at the documentation for [`define!`]. 
29
30# Feature flags
31
32This crate has two feature flags:
33- `more_tuple_impls`, raising the amount of elements [`Rule`] is implemented for
34on tuples of [`Rule`]s from 16 to 256 - however, enabling this will raise compilation times dramatically
35- `error_in_core`, enabling use of this library before Rust 1.81.0 on `nightly` compilers - 
36  however, continued support for versions below 1.81.0 is not guaranteed
37
38# A note about the stack
39
40This library's very lifeblood is deep - and likely recursive - function calls.
41You may run into stack overflow issues if you have an overly complex grammar, or are blindly parsing malicious input.
42
43# Licensing
44
45This crate is dual-licensed under the Apache 2.0 or MIT licenses.
46*/
47
48extern crate self as fn_bnf;
49
50use core::{marker::PhantomData, ops::{Index, RangeTo}};
51
52extern crate alloc;
53#[doc(hidden)]
54pub use alloc::{boxed::Box, vec::Vec};
55#[doc(hidden)]
56pub use core::{error::Error, fmt::Write};
57
58mod rules;
59pub use rules::*;
60
61/// Contains some common error types.
62pub mod errors {
63    crate::err! {
64        pub UnmatchedInput: "expected input to match function",
65        pub UnexpectedMatch: "matched input unexpectedly",
66        pub UnexpectedEOF: "unexpected end of input",
67        pub ExhaustedInput: "no options matched for rule",
68        pub NoMatch: "could not match literal"
69    }
70
71    /// A canonical error for when a grammar simply finds something it doesn't expect.
72    #[derive(Debug, Copy, Clone)]
73    pub struct Unexpected<T: core::fmt::Debug> { val: T }
74    impl<T: core::fmt::Debug> Unexpected<T> {
75        /// Creates a new instance of this type.
76        #[inline]
77        pub const fn new(val: T) -> Self {
78            Self { val }
79        }
80    }
81
82    impl<T: core::fmt::Debug> core::fmt::Display for Unexpected<T> {
83        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
84            write!(f, "unexpected token: {:?}", self.val)
85        }
86    }
87
88    impl<T: core::fmt::Debug> core::error::Error for Unexpected<T> {}
89}
90use errors::*;
91
92mod def_tuple;
93mod definitions;
94
95#[derive(Default, Debug)]
96/// Holds data about something that went wrong while parsing a rule.
97pub struct ParseError {
98    /// The underlying error that caused parsing to halt, if any.
99    /// 
100    /// Note that for ZST [`Error`] implementations, like those created by [`crate::err!`],
101    /// `Box<dyn Error>` doesn't allocate.
102    pub cause: Option<Box<dyn Error>>,
103    /// The name of the rule that failed.
104    pub rule_name: Option<&'static str>, 
105    /// The slice index where the rule failed to parse.
106    pub index: usize,
107}
108
109impl ParseError {
110    #[must_use]
111    /// Creates a new instance of a parsing error.
112    pub const fn new(cause: Option<Box<dyn Error>>, rule_name: Option<&'static str>, index: usize) -> Self {
113        Self { cause, rule_name, index }
114    }
115}
116
117impl core::fmt::Display for ParseError {
118    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
119        if !f.alternate() {
120            if let Some(cause) = self.cause.as_ref() {
121                return write!(f, "{cause}");
122            }
123        }
124        write!(f, "error")?;
125        if let Some(name) = self.rule_name {
126            write!(f, " in rule {name}")?;
127        }
128        write!(f, " at index {}", self.index)?;
129        if let Some(cause) = self.cause.as_ref() {
130            write!(f, ": {cause}")?;
131        }
132        Ok(())
133    }
134}
135
136impl Error for ParseError {
137    fn source(&self) -> Option<&(dyn Error + 'static)> {
138        self.cause.as_deref()
139    }
140}
141
142/// Defines a rule's name separate from the [`Rule`] trait.
143/// 
144/// Most of the time, the [derive macro](fn_bnf_macro::NamedRule) works well enough for this purpose.
145/// 
146/// # Why a separate trait?
147/// Since [`Rule`] is bound by its slice type,
148/// accessing this function would require knowing that type unambiguously.
149/// 
150/// However, within [`crate::define`], the macro can only call `.name()`,
151/// as it doesn't know the type of the underlying rule,
152/// _meaning_ that it can't resolve any ambiguity caused by a rule that's generic over multiple slice types.
153/// 
154/// However, as this trait doesn't include the slice type anywhere, there is no ambiguity.
155pub trait NamedRule {
156    /// Defines the name printed in errors including this rule.
157    fn name(&self) -> Option<&'static str> { None }
158}
159
160/// Trait dictating that something can be used as a rule within a parsing grammar.
161/// 
162/// # Implementation
163/// Imlpementing this trait means that anyone can use your struct as a rule
164/// in any of their their grammars with a supported slice type.
165/// 
166/// If you're defining simple rules that don't depend on the input,
167/// you _can_ make rules generic over all slice types!
168/// 
169/// This is done for most of the "helper rules", like [`Any`], in this crate.
170pub trait Rule<'input, SliceType: ?Sized + 'input>: NamedRule {
171    /// The type of the value of a successful parse of this rule.
172    type Output;
173
174    /// Parses a rule at a given index with a given input.
175    /// 
176    /// # Errors
177    /// Errors if the rule fails to parse.
178    /// 
179    /// # Correctness
180    /// When a parse succeeds, you must replace the borrowed input and index
181    /// with a slice of it past the index you stopped parsing at - for example,
182    /// ```ignore
183    /// *input = &input[2..];
184    /// *index += 2;
185    /// ```
186    /// 
187    /// You also must reset the values of `input` and `index` if an error occurs.
188    /// 
189    /// For example, this can be done as follows:
190    /// ```ignore
191    /// let before = (*input, *index);
192    /// // later...
193    /// let res = match inner_rule.parse_at(input, index) {
194    ///     Ok(v) => v,
195    ///     Err(err) => {
196    ///         (*input, *index) = before;
197    ///         return Err(err);
198    ///     }
199    /// }
200    /// ```
201    /// 
202    /// Fsilure to do uphold of these could cause other code using your rule to misbehave,
203    /// potentially inducing panics and/or non-termination.
204    /// 
205    /// **As this is not a safety contract, implementors cannot rely on this for soundness in `unsafe` code.** 
206    fn parse_at<'cursor, 'this, 'index>(&'this self, input: &'cursor mut &'input SliceType, index: &'index mut usize)
207        -> Result<Self::Output, ParseError> where 'input: 'this;
208    
209    /// Parses a given rule at the start of some input.
210    /// 
211    /// # Errors
212    /// Errors if the rule fails to parse.
213    fn parse<'this>(&'this self, input: &'input SliceType)
214        -> Result<(&'input SliceType, Self::Output), ParseError>
215        where 'input: 'this
216    {
217        let input = &mut &*input;
218        self.parse_at(input, &mut 0)
219            .map(|tree| (*input, tree))
220    }
221
222    /// Maps an infallible function onto the output of a rule.
223    #[inline]
224    fn map_parsed<Output, F: Fn(Self::Output) -> Output>(self, function: F) -> Map<'input, SliceType, Self, Output, F> where Self: Sized {
225        Map { inner: self, func: function, _p: PhantomData }
226    }
227
228    /// Maps a function onto the output of a rule, passing the error back if it fails.
229    #[inline]
230    fn try_map_parsed<Output, E: Error + 'static, F: Fn(Self::Output) -> Result<Output, E>>(self, function: F) -> TryMap<'input, SliceType, Self, Output, E, F> where Self: Sized {
231        TryMap { inner: self, func: function, _p: PhantomData }
232    }
233
234    /// Matches when this fails, and vice versa.
235    #[inline]
236    fn prevent(self) -> Not<Self> where Self: Sized {
237        Not(self)
238    }
239
240    /// Repeats this rule a known amount of times.
241    #[inline]
242    fn repeat_for<const REPS: usize>(self) -> RepeatFor<'input, SliceType, Self, REPS> where Self: Sized {
243        RepeatFor::<'input, SliceType, Self, REPS>(self, PhantomData)
244    }
245
246    /// Repeats this rule a set amount of times.
247    #[inline]
248    fn repeat(self, reps: usize) -> Repeat<'input, SliceType, Self> where Self: Sized {
249        Repeat::<'input, SliceType, Self>(self, reps, PhantomData)
250    }
251
252    /// Repeats this rule at most a set amount of times.
253    #[inline]
254    fn take(self, at_most: usize) -> Many<'input, SliceType, Self> where Self: Sized {
255        Many::limited(self, at_most)
256    }
257
258    /// Repeats this rule forever until it fails.
259    #[inline]
260    fn hoard(self) -> Many<'input, SliceType, Self> where Self: Sized {
261        Many::unlimited(self)
262    }
263
264    /// Repeats this rule at most a set amount of times, separated by another rule.
265    #[inline]
266    fn take_sep<R: Rule<'input, SliceType>>(self, separator: R, at_most: usize) -> Separated<'input, SliceType, Self, R> where Self: Sized {
267        Separated::limited(self, separator, at_most)
268    }
269
270    /// Repeats this rule forever until it fails, separated by another rule.
271    #[inline]
272    fn hoard_sep<R: Rule<'input, SliceType>>(self, separator: R) -> Separated<'input, SliceType, Self, R> where Self: Sized {
273        Separated::unlimited(self, separator)
274    }
275
276    /// Repeats this rule until the end of input, failing if it ever does.
277    #[inline]
278    fn consume_all(self) -> Consume<'input, SliceType, Self> 
279        where Self: Sized, 
280        Consume<'input, SliceType, Self>: Rule<'input, SliceType, Output = Vec<Self::Output>>
281    {
282        Consume(self, PhantomData)
283    }
284
285    /// Captures the output and span of this rule, returning them along with the output in a [`Span`].
286    #[inline]
287    fn spanned(self) -> Spanned<'input, SliceType, Self>
288        where Self: Sized, SliceType: Index<RangeTo<usize>, Output = SliceType>
289    {
290        Spanned { rule: self, _p: PhantomData }
291    }
292
293    /// Tries to capture this rule, returning [`None`] if it doesn't match.
294    #[inline]
295    fn attempt(self) -> Attempt<'input, SliceType, Self>
296        where Self: Sized
297    {
298        Attempt(self, PhantomData)
299    }
300}
301
302/// Derive macro generating an impl of the trait `NamedRule`.
303pub use fn_bnf_macro::NamedRule;
304
305// TO EXPLAIN:
306//     The basic syntax
307//     'input and arg_#
308//     AnyRule
309//     Also, linkhere to [`Rule`] and explain there how to impl manually!
310
311/// Allows defining multiple custom rules using a PEG-like syntax.
312/// 
313/// # Syntax and explanation
314/// 
315/// First, we have the name, visibility, and "slice type" of the grammar:
316/// ```ignore
317/// pub grammar Name<Type>
318/// ```
319/// The visibility and name are self-explanatory, 
320/// and the "slice type" determines what type this grammar parses.
321/// 
322/// If you want to parse a string, you can use `str` as your slice type,
323/// or if you want to parse bytes, you create a grammar with type `[u8]`.
324/// 
325/// Note that the type inside a `[T]` can be just about anything, so this works fine:
326/// ```ignore
327/// enum Token { LParen, RParen, Number(f64), Plus, Minus, Asterisk, Slash }
328/// define! {
329///     pub grammar Expression<[Token]> { /* ... */ }
330/// }
331/// ```
332/// However, due to limitations caused by trait implementation collisions[^1], 
333/// you can't use a `T` as a rule for a `[T]` grammar -
334/// you have to use a 1-long array.
335/// 
336/// Moving on, we come to defining individual rules,
337/// which is better described with a few examples:
338/// 
339/// ```ignore
340/// /// Parses either `Tap`, `Tag`, or `Box`.
341/// Sample -> (&'input str, Option<char>) = "Ta", ('p', 'g') : "Box";
342/// ```
343/// 
344/// We see here the basic syntax of defining a rule:
345/// - Any attributes/documentation
346/// - The rule's name
347/// - The rule's return type
348/// - `=`
349/// - A `:` separated list of potential ways to parse the rule (we call these "parsing paths"),
350///   where each option is a list of `,` separated [`Rule`]s.
351/// 
352/// A rule's return type is a tuple with arity determined by its options.
353/// If two different paths have different arity, then
354/// any return types past the minimum amount between all options
355/// will be wrapped in an [`Option`], returning `None` for rules which don't have them.
356/// 
357/// For example, this:
358/// ```ignore
359/// A -> (char, Option<char>) = 'b', 'c' : 'd';
360/// ```
361/// will return `('b', Some('c'))` for `"bc"`, but `("d", None)` for `"d"`.
362/// 
363/// Paths are parsed sequentially, so in the above example, 
364/// 
365/// As a special case, a rule with arity 1 will return its inner type instead of a 1-tuple,
366/// and a rule with arity 0 will return `()`.
367/// 
368/// Let's look at a more complicated example:
369/// ```ignore
370/// /// Parses an unsigned  number, potentially surrounded by parentheses.
371/// pub ParenNumber -> u32 = _ '(', ParenNumber, _ ')' : Number;
372/// /// Parses an unsigned number.
373/// Number -> u32 try_from(u32::from_str) = While::from(char::is_ascii_digit);
374/// ```
375/// 
376/// Here, we see a few things.
377/// 
378/// Firstly, rules support arbitrary documentation and attributes,
379/// all of which will be applied to the generated `struct`.
380/// 
381/// Second, there's support for a visibility in front of a rule - 
382/// a rule must be `pub` (or `pub(super)`, or `pub(crate)`, etc.)
383/// to be accessible outside of the grammar's definition.
384/// 
385/// After that, we see some new syntax in the form of `_`.
386/// Prefixing any part of a path with an underscore makes the grammar
387/// not store its output, and not save it as an argument.
388/// 
389/// Finally, we see `try_from`.
390/// This, along with its infallible counterpart `from`,
391/// takes the output of your entire rule and passes it into a given function.
392/// 
393/// This function will need to take the amount and type of arguments 
394/// equal to the arity and elements of the tuple it would usually output -
395/// in layman's terms, the tuple is "unpacked" into the function, similar to `...` in JavaScript or `*` in Python.
396/// 
397/// A function given to `from` must return the type of the rule, 
398/// and a function given to `try_from` must return a `Result<T, E>`,
399/// where T is the type of the rule, and `E` is any type that implements [`Error`]. 
400/// 
401/// We also see that rules can _recurse_ - although this should be avoided if reasonably possible.
402/// If a rule recurses too deep, then Rust's call stack will be exhausted and your program will crash!
403/// If you truly need deep recursion, look into crates like [`stacker`](https://docs.rs/stacker/).
404/// 
405/// Finally, let's look at an advanced example:
406/// ```ignore
407/// pub Doubled<'input, R> {rule: R, _p: PhantomData<&'input str>} -> &'input str
408///     where R: Rule<'input, str, Output = &'input str>
409///     = &self.rule, _ arg_0;
410/// ```
411/// 
412/// We see here that rules are simply structs, and can take generics and fields as such.
413/// 
414/// Also to note is the usage of `arg_0`.
415/// Previously parsed arguments (ignoring silenced ones) are left in scope as `arg_N`,
416/// where N is the index of the argument.
417/// 
418/// Importantly, `'input` is special - it's always in scope, even if not declared,
419/// and is the lifetime of the data that's being passed in.
420/// 
421/// Anything that implements `for<'input> Rule<'input, T>`
422/// will work in _any_ grammar of slice type `T` - 
423/// they don't have to even be from the same macro!
424/// 
425/// For example, this is fine:
426/// ```ignore
427/// define! {
428///     grammar Theta<str> {
429///         pub Foo -> &'input str = "foo"; 
430///     }
431/// }
432///
433/// define! {
434///     grammar Delta<str> {
435///         pub Bar -> &'input str = Theta::Foo;
436///     }
437/// }
438/// ```
439/// 
440/// Alternatively, if, <sub>God forbid, </sub>something happens so that you need to implement a [`Rule`] manually, 
441/// without the help of this macro, look at the documentation for that trait - it'll tell you everything you need to know.
442/// 
443/// # Example
444/// Below is the entire source code for `examples/math.rs`, showing a simple CLI calculator app.
445/// ```rust
446#[doc = include_str!("../examples/math.rs")]
447/// ```
448/// 
449/// [^1]: It's ambiguous between `[T]` and `[&T]`, and between `[T]` and `[(T, )]`.
450///       Ideally, the library could `impl !Rule` for these, but even disregarding
451///       the fact that that's nightly, it doesn't work - at least, for now.
452///       Said bounds would be equivalent to [specialization](https://github.com/rust-lang/rust/issues/31844),
453///       which is a long way off.
454pub use fn_bnf_macro::define;
455
456// Reimplementing never_say_never because it has documentation that messes with ours
457// never_say_never is under the MIT license, among others: 
458// https://github.com/danielhenrymantilla/never-say-never.rs/blob/master/LICENSE-MIT
459mod fn_trait {
460    pub trait FnOnce<Args> { type Output; }
461    impl<F, R> FnOnce<()> for F where F: core::ops::FnOnce() -> R { type Output = R; }
462}
463use fn_trait::FnOnce;
464
465/// The [`!`] type.
466/// 
467/// This type is (meant to be) unstable, but it's much more useful than
468/// [`core::convert::Infallible`] for writing grammars due to its coercion rules,
469/// so it is exported here.
470/// 
471pub type Never = <fn() -> ! as FnOnce<()>>::Output;