fn_bnf/lib.rs
1#![no_std]
2#![cfg_attr(any(docsrs, feature = "__internal_test_docs"), allow(internal_features))]
3#![cfg_attr(any(docsrs, feature = "__internal_test_docs"), feature(rustdoc_internals))]
4#![forbid(unsafe_code)]
5#![cfg_attr(feature = "error_in_core", feature(error_in_core))]
6
7/*!
8
9[](https://github.com/balt-dev/fn-bnf)
10[](
11https://crates.io/crates/fn-bnf)
12[](
13https://docs.rs/fn-bnf)
14[](
15https://gist.github.com/alexheretic/d1e98d8433b602e57f5d0a9637927e0c)
16[](
17https://github.com/rust-secure-code/safety-dance/)
18[](
19https://github.com/balt-dev/fn-bnf/blob/master/LICENSE-MIT)
20
21# fn-bnf
22
23This crate contains a `no_std` compatible, low-allocation parsing library
24that uses a BNF-like syntax with the [`define!`] macro to allow for
25using arbitrary Rust items as grammar rules,
26and for parsing both `str`s and any `[T]` (for example, `[u8]` or `[Token]`).
27
28If you just want to skip to writing grammars, look at the documentation for [`define!`].
29
30# Feature flags
31
32This crate has two feature flags:
33- `more_tuple_impls`, raising the amount of elements [`Rule`] is implemented for
34on tuples of [`Rule`]s from 16 to 256 - however, enabling this will raise compilation times dramatically
35- `error_in_core`, enabling use of this library before Rust 1.81.0 on `nightly` compilers -
36 however, continued support for versions below 1.81.0 is not guaranteed
37
38# A note about the stack
39
40This library's very lifeblood is deep - and likely recursive - function calls.
41You may run into stack overflow issues if you have an overly complex grammar, or are blindly parsing malicious input.
42
43# Licensing
44
45This crate is dual-licensed under the Apache 2.0 or MIT licenses.
46*/
47
48extern crate self as fn_bnf;
49
50use core::{marker::PhantomData, ops::{Index, RangeTo}};
51
52extern crate alloc;
53#[doc(hidden)]
54pub use alloc::{boxed::Box, vec::Vec};
55#[doc(hidden)]
56pub use core::{error::Error, fmt::Write};
57
58mod rules;
59pub use rules::*;
60
61/// Contains some common error types.
62pub mod errors {
63 crate::err! {
64 pub UnmatchedInput: "expected input to match function",
65 pub UnexpectedMatch: "matched input unexpectedly",
66 pub UnexpectedEOF: "unexpected end of input",
67 pub ExhaustedInput: "no options matched for rule",
68 pub NoMatch: "could not match literal"
69 }
70
71 /// A canonical error for when a grammar simply finds something it doesn't expect.
72 #[derive(Debug, Copy, Clone)]
73 pub struct Unexpected<T: core::fmt::Debug> { val: T }
74 impl<T: core::fmt::Debug> Unexpected<T> {
75 /// Creates a new instance of this type.
76 #[inline]
77 pub const fn new(val: T) -> Self {
78 Self { val }
79 }
80 }
81
82 impl<T: core::fmt::Debug> core::fmt::Display for Unexpected<T> {
83 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
84 write!(f, "unexpected token: {:?}", self.val)
85 }
86 }
87
88 impl<T: core::fmt::Debug> core::error::Error for Unexpected<T> {}
89}
90use errors::*;
91
92mod def_tuple;
93mod definitions;
94
95#[derive(Default, Debug)]
96/// Holds data about something that went wrong while parsing a rule.
97pub struct ParseError {
98 /// The underlying error that caused parsing to halt, if any.
99 ///
100 /// Note that for ZST [`Error`] implementations, like those created by [`crate::err!`],
101 /// `Box<dyn Error>` doesn't allocate.
102 pub cause: Option<Box<dyn Error>>,
103 /// The name of the rule that failed.
104 pub rule_name: Option<&'static str>,
105 /// The slice index where the rule failed to parse.
106 pub index: usize,
107}
108
109impl ParseError {
110 #[must_use]
111 /// Creates a new instance of a parsing error.
112 pub const fn new(cause: Option<Box<dyn Error>>, rule_name: Option<&'static str>, index: usize) -> Self {
113 Self { cause, rule_name, index }
114 }
115}
116
117impl core::fmt::Display for ParseError {
118 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
119 if !f.alternate() {
120 if let Some(cause) = self.cause.as_ref() {
121 return write!(f, "{cause}");
122 }
123 }
124 write!(f, "error")?;
125 if let Some(name) = self.rule_name {
126 write!(f, " in rule {name}")?;
127 }
128 write!(f, " at index {}", self.index)?;
129 if let Some(cause) = self.cause.as_ref() {
130 write!(f, ": {cause}")?;
131 }
132 Ok(())
133 }
134}
135
136impl Error for ParseError {
137 fn source(&self) -> Option<&(dyn Error + 'static)> {
138 self.cause.as_deref()
139 }
140}
141
142/// Defines a rule's name separate from the [`Rule`] trait.
143///
144/// Most of the time, the [derive macro](fn_bnf_macro::NamedRule) works well enough for this purpose.
145///
146/// # Why a separate trait?
147/// Since [`Rule`] is bound by its slice type,
148/// accessing this function would require knowing that type unambiguously.
149///
150/// However, within [`crate::define`], the macro can only call `.name()`,
151/// as it doesn't know the type of the underlying rule,
152/// _meaning_ that it can't resolve any ambiguity caused by a rule that's generic over multiple slice types.
153///
154/// However, as this trait doesn't include the slice type anywhere, there is no ambiguity.
155pub trait NamedRule {
156 /// Defines the name printed in errors including this rule.
157 fn name(&self) -> Option<&'static str> { None }
158}
159
160/// Trait dictating that something can be used as a rule within a parsing grammar.
161///
162/// # Implementation
163/// Imlpementing this trait means that anyone can use your struct as a rule
164/// in any of their their grammars with a supported slice type.
165///
166/// If you're defining simple rules that don't depend on the input,
167/// you _can_ make rules generic over all slice types!
168///
169/// This is done for most of the "helper rules", like [`Any`], in this crate.
170pub trait Rule<'input, SliceType: ?Sized + 'input>: NamedRule {
171 /// The type of the value of a successful parse of this rule.
172 type Output;
173
174 /// Parses a rule at a given index with a given input.
175 ///
176 /// # Errors
177 /// Errors if the rule fails to parse.
178 ///
179 /// # Correctness
180 /// When a parse succeeds, you must replace the borrowed input and index
181 /// with a slice of it past the index you stopped parsing at - for example,
182 /// ```ignore
183 /// *input = &input[2..];
184 /// *index += 2;
185 /// ```
186 ///
187 /// You also must reset the values of `input` and `index` if an error occurs.
188 ///
189 /// For example, this can be done as follows:
190 /// ```ignore
191 /// let before = (*input, *index);
192 /// // later...
193 /// let res = match inner_rule.parse_at(input, index) {
194 /// Ok(v) => v,
195 /// Err(err) => {
196 /// (*input, *index) = before;
197 /// return Err(err);
198 /// }
199 /// }
200 /// ```
201 ///
202 /// Fsilure to do uphold of these could cause other code using your rule to misbehave,
203 /// potentially inducing panics and/or non-termination.
204 ///
205 /// **As this is not a safety contract, implementors cannot rely on this for soundness in `unsafe` code.**
206 fn parse_at<'cursor, 'this, 'index>(&'this self, input: &'cursor mut &'input SliceType, index: &'index mut usize)
207 -> Result<Self::Output, ParseError> where 'input: 'this;
208
209 /// Parses a given rule at the start of some input.
210 ///
211 /// # Errors
212 /// Errors if the rule fails to parse.
213 fn parse<'this>(&'this self, input: &'input SliceType)
214 -> Result<(&'input SliceType, Self::Output), ParseError>
215 where 'input: 'this
216 {
217 let input = &mut &*input;
218 self.parse_at(input, &mut 0)
219 .map(|tree| (*input, tree))
220 }
221
222 /// Maps an infallible function onto the output of a rule.
223 #[inline]
224 fn map_parsed<Output, F: Fn(Self::Output) -> Output>(self, function: F) -> Map<'input, SliceType, Self, Output, F> where Self: Sized {
225 Map { inner: self, func: function, _p: PhantomData }
226 }
227
228 /// Maps a function onto the output of a rule, passing the error back if it fails.
229 #[inline]
230 fn try_map_parsed<Output, E: Error + 'static, F: Fn(Self::Output) -> Result<Output, E>>(self, function: F) -> TryMap<'input, SliceType, Self, Output, E, F> where Self: Sized {
231 TryMap { inner: self, func: function, _p: PhantomData }
232 }
233
234 /// Matches when this fails, and vice versa.
235 #[inline]
236 fn prevent(self) -> Not<Self> where Self: Sized {
237 Not(self)
238 }
239
240 /// Repeats this rule a known amount of times.
241 #[inline]
242 fn repeat_for<const REPS: usize>(self) -> RepeatFor<'input, SliceType, Self, REPS> where Self: Sized {
243 RepeatFor::<'input, SliceType, Self, REPS>(self, PhantomData)
244 }
245
246 /// Repeats this rule a set amount of times.
247 #[inline]
248 fn repeat(self, reps: usize) -> Repeat<'input, SliceType, Self> where Self: Sized {
249 Repeat::<'input, SliceType, Self>(self, reps, PhantomData)
250 }
251
252 /// Repeats this rule at most a set amount of times.
253 #[inline]
254 fn take(self, at_most: usize) -> Many<'input, SliceType, Self> where Self: Sized {
255 Many::limited(self, at_most)
256 }
257
258 /// Repeats this rule forever until it fails.
259 #[inline]
260 fn hoard(self) -> Many<'input, SliceType, Self> where Self: Sized {
261 Many::unlimited(self)
262 }
263
264 /// Repeats this rule at most a set amount of times, separated by another rule.
265 #[inline]
266 fn take_sep<R: Rule<'input, SliceType>>(self, separator: R, at_most: usize) -> Separated<'input, SliceType, Self, R> where Self: Sized {
267 Separated::limited(self, separator, at_most)
268 }
269
270 /// Repeats this rule forever until it fails, separated by another rule.
271 #[inline]
272 fn hoard_sep<R: Rule<'input, SliceType>>(self, separator: R) -> Separated<'input, SliceType, Self, R> where Self: Sized {
273 Separated::unlimited(self, separator)
274 }
275
276 /// Repeats this rule until the end of input, failing if it ever does.
277 #[inline]
278 fn consume_all(self) -> Consume<'input, SliceType, Self>
279 where Self: Sized,
280 Consume<'input, SliceType, Self>: Rule<'input, SliceType, Output = Vec<Self::Output>>
281 {
282 Consume(self, PhantomData)
283 }
284
285 /// Captures the output and span of this rule, returning them along with the output in a [`Span`].
286 #[inline]
287 fn spanned(self) -> Spanned<'input, SliceType, Self>
288 where Self: Sized, SliceType: Index<RangeTo<usize>, Output = SliceType>
289 {
290 Spanned { rule: self, _p: PhantomData }
291 }
292
293 /// Tries to capture this rule, returning [`None`] if it doesn't match.
294 #[inline]
295 fn attempt(self) -> Attempt<'input, SliceType, Self>
296 where Self: Sized
297 {
298 Attempt(self, PhantomData)
299 }
300}
301
302/// Derive macro generating an impl of the trait `NamedRule`.
303pub use fn_bnf_macro::NamedRule;
304
305// TO EXPLAIN:
306// The basic syntax
307// 'input and arg_#
308// AnyRule
309// Also, linkhere to [`Rule`] and explain there how to impl manually!
310
311/// Allows defining multiple custom rules using a PEG-like syntax.
312///
313/// # Syntax and explanation
314///
315/// First, we have the name, visibility, and "slice type" of the grammar:
316/// ```ignore
317/// pub grammar Name<Type>
318/// ```
319/// The visibility and name are self-explanatory,
320/// and the "slice type" determines what type this grammar parses.
321///
322/// If you want to parse a string, you can use `str` as your slice type,
323/// or if you want to parse bytes, you create a grammar with type `[u8]`.
324///
325/// Note that the type inside a `[T]` can be just about anything, so this works fine:
326/// ```ignore
327/// enum Token { LParen, RParen, Number(f64), Plus, Minus, Asterisk, Slash }
328/// define! {
329/// pub grammar Expression<[Token]> { /* ... */ }
330/// }
331/// ```
332/// However, due to limitations caused by trait implementation collisions[^1],
333/// you can't use a `T` as a rule for a `[T]` grammar -
334/// you have to use a 1-long array.
335///
336/// Moving on, we come to defining individual rules,
337/// which is better described with a few examples:
338///
339/// ```ignore
340/// /// Parses either `Tap`, `Tag`, or `Box`.
341/// Sample -> (&'input str, Option<char>) = "Ta", ('p', 'g') : "Box";
342/// ```
343///
344/// We see here the basic syntax of defining a rule:
345/// - Any attributes/documentation
346/// - The rule's name
347/// - The rule's return type
348/// - `=`
349/// - A `:` separated list of potential ways to parse the rule (we call these "parsing paths"),
350/// where each option is a list of `,` separated [`Rule`]s.
351///
352/// A rule's return type is a tuple with arity determined by its options.
353/// If two different paths have different arity, then
354/// any return types past the minimum amount between all options
355/// will be wrapped in an [`Option`], returning `None` for rules which don't have them.
356///
357/// For example, this:
358/// ```ignore
359/// A -> (char, Option<char>) = 'b', 'c' : 'd';
360/// ```
361/// will return `('b', Some('c'))` for `"bc"`, but `("d", None)` for `"d"`.
362///
363/// Paths are parsed sequentially, so in the above example,
364///
365/// As a special case, a rule with arity 1 will return its inner type instead of a 1-tuple,
366/// and a rule with arity 0 will return `()`.
367///
368/// Let's look at a more complicated example:
369/// ```ignore
370/// /// Parses an unsigned number, potentially surrounded by parentheses.
371/// pub ParenNumber -> u32 = _ '(', ParenNumber, _ ')' : Number;
372/// /// Parses an unsigned number.
373/// Number -> u32 try_from(u32::from_str) = While::from(char::is_ascii_digit);
374/// ```
375///
376/// Here, we see a few things.
377///
378/// Firstly, rules support arbitrary documentation and attributes,
379/// all of which will be applied to the generated `struct`.
380///
381/// Second, there's support for a visibility in front of a rule -
382/// a rule must be `pub` (or `pub(super)`, or `pub(crate)`, etc.)
383/// to be accessible outside of the grammar's definition.
384///
385/// After that, we see some new syntax in the form of `_`.
386/// Prefixing any part of a path with an underscore makes the grammar
387/// not store its output, and not save it as an argument.
388///
389/// Finally, we see `try_from`.
390/// This, along with its infallible counterpart `from`,
391/// takes the output of your entire rule and passes it into a given function.
392///
393/// This function will need to take the amount and type of arguments
394/// equal to the arity and elements of the tuple it would usually output -
395/// in layman's terms, the tuple is "unpacked" into the function, similar to `...` in JavaScript or `*` in Python.
396///
397/// A function given to `from` must return the type of the rule,
398/// and a function given to `try_from` must return a `Result<T, E>`,
399/// where T is the type of the rule, and `E` is any type that implements [`Error`].
400///
401/// We also see that rules can _recurse_ - although this should be avoided if reasonably possible.
402/// If a rule recurses too deep, then Rust's call stack will be exhausted and your program will crash!
403/// If you truly need deep recursion, look into crates like [`stacker`](https://docs.rs/stacker/).
404///
405/// Finally, let's look at an advanced example:
406/// ```ignore
407/// pub Doubled<'input, R> {rule: R, _p: PhantomData<&'input str>} -> &'input str
408/// where R: Rule<'input, str, Output = &'input str>
409/// = &self.rule, _ arg_0;
410/// ```
411///
412/// We see here that rules are simply structs, and can take generics and fields as such.
413///
414/// Also to note is the usage of `arg_0`.
415/// Previously parsed arguments (ignoring silenced ones) are left in scope as `arg_N`,
416/// where N is the index of the argument.
417///
418/// Importantly, `'input` is special - it's always in scope, even if not declared,
419/// and is the lifetime of the data that's being passed in.
420///
421/// Anything that implements `for<'input> Rule<'input, T>`
422/// will work in _any_ grammar of slice type `T` -
423/// they don't have to even be from the same macro!
424///
425/// For example, this is fine:
426/// ```ignore
427/// define! {
428/// grammar Theta<str> {
429/// pub Foo -> &'input str = "foo";
430/// }
431/// }
432///
433/// define! {
434/// grammar Delta<str> {
435/// pub Bar -> &'input str = Theta::Foo;
436/// }
437/// }
438/// ```
439///
440/// Alternatively, if, <sub>God forbid, </sub>something happens so that you need to implement a [`Rule`] manually,
441/// without the help of this macro, look at the documentation for that trait - it'll tell you everything you need to know.
442///
443/// # Example
444/// Below is the entire source code for `examples/math.rs`, showing a simple CLI calculator app.
445/// ```rust
446#[doc = include_str!("../examples/math.rs")]
447/// ```
448///
449/// [^1]: It's ambiguous between `[T]` and `[&T]`, and between `[T]` and `[(T, )]`.
450/// Ideally, the library could `impl !Rule` for these, but even disregarding
451/// the fact that that's nightly, it doesn't work - at least, for now.
452/// Said bounds would be equivalent to [specialization](https://github.com/rust-lang/rust/issues/31844),
453/// which is a long way off.
454pub use fn_bnf_macro::define;
455
456// Reimplementing never_say_never because it has documentation that messes with ours
457// never_say_never is under the MIT license, among others:
458// https://github.com/danielhenrymantilla/never-say-never.rs/blob/master/LICENSE-MIT
459mod fn_trait {
460 pub trait FnOnce<Args> { type Output; }
461 impl<F, R> FnOnce<()> for F where F: core::ops::FnOnce() -> R { type Output = R; }
462}
463use fn_trait::FnOnce;
464
465/// The [`!`] type.
466///
467/// This type is (meant to be) unstable, but it's much more useful than
468/// [`core::convert::Infallible`] for writing grammars due to its coercion rules,
469/// so it is exported here.
470///
471pub type Never = <fn() -> ! as FnOnce<()>>::Output;