shrimple_parser/
lib.rs

1//! Zero-dependency library with no-std support for writing parsers in a concise functional style
2//! & with rich error-reporting.
3//!
4//! Every parser is a function that maps an [`Input`]. Parsers can match on [`Pattern`]s.
5//!
6//! The basic form of the function is
7//!
8//! ```rust,ignore
9//! use shrimple_parser::{Input, ParsingResult};
10//!
11//! fn parse_foo<In: Input>(input: In) -> ParsingResult<In, Foo, FooParseError> { ... }
12//! ```
13//!
14//! If the parser is infallible, i.e. never returns an unrecoverable error, it's customary to make
15//! it generic over the reason type, to make combining it easier.
16//!
17//! ```rust,ignore
18//! fn parse_foo<In: Input, Reason>(input: In) -> ParsingResult<In, Foo, Reason> { ... }
19//! ```
20//!
21//! Kinds of errors are distinguished via a user-defined `Reason` type, which signals what did
22//! a parser expect.
23//! A [`ParsingError`] can also have no reason, which will mean that the error is recoverable.
24//!
25//! Some built-in parsers can have [`core::convert::Infallible`] as their error reason,
26//! which means that any error the parser may ever return is recoverable.
27//!
28//! The distinction between recoverable & fatal errors is important for parsers that need to try
29//! multiple options.
30//!
31//! Error reporting with precise location in the source is facilitated by
32//! constructing a [`FullParsingError`] with methods such as
33//! [`Parser::with_full_error`], [`ParsingError::with_src_loc`]
34
35#![cfg_attr(
36    feature = "nightly",
37    feature(unboxed_closures, fn_traits, tuple_trait, doc_auto_cfg)
38)]
39
40mod error;
41mod input;
42mod loc;
43pub mod pattern;
44pub mod tuple;
45pub mod utils;
46
47pub use {
48    error::{FullParsingError, ParsingError, ParsingResult},
49    input::Input,
50    loc::{FullLocation, Location},
51    pattern::Pattern,
52};
53
54#[cfg(feature = "proc-macro2")]
55pub use loc::LineColumnToLocationError;
56
57use {
58    core::{
59        convert::Infallible,
60        fmt::{Debug, Display},
61        iter::FusedIterator,
62        marker::PhantomData,
63        mem::take,
64    },
65    tuple::{map_second, tuple, Tuple},
66    utils::PathLike,
67};
68
69/// A trait alias for a function that maps from the input & intermediate output to the rest of the
70/// input & a different output.
71///
72/// Used in [`Parser::map`].
73///
74/// See [`match_out`] for a convenient way to create such a mapper.
75pub trait MappingParser<In, Out, NewOut, Reason = Infallible>:
76    Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
77{
78}
79
80impl<In, Out, NewOut, Reason, F> MappingParser<In, Out, NewOut, Reason> for F where
81    F: Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
82{
83}
84
85/// A trait representing a function that takes some string-like input and
86/// returns either a tuple of (the rest of the input, the output) or a [`ParsingError`].
87pub trait Parser<In: Input, Out, Reason = Infallible>:
88    Sized + FnMut(In) -> ParsingResult<In, Out, Reason>
89{
90    /// Use the parser to produce the output.
91    #[expect(clippy::missing_errors_doc)]
92    fn parse(&mut self, input: In) -> ParsingResult<In, Out, Reason> {
93        self(input)
94    }
95
96    /// Turns output into a recoverable error if the output doesn't meet a condition.
97    fn filter(mut self, mut f: impl FnMut(&Out) -> bool) -> impl Parser<In, Out, Reason> {
98        move |src| match self(src.clone()) {
99            Ok((rest, res)) if f(&res) => Ok((rest, res)),
100            Ok(_) => Err(ParsingError::new_recoverable(src)),
101            Err(err) => Err(err),
102        }
103    }
104
105    /// Like [`Parser::filter`], but the possible error is instead fatal, with `reason`
106    // TODO: better name maybe?
107    fn filter_fatal(
108        mut self,
109        reason: Reason,
110        mut f: impl FnMut(&Out) -> bool,
111    ) -> impl Parser<In, Out, Reason>
112    where
113        Reason: Clone,
114    {
115        move |src| match self(src.clone()) {
116            Ok((rest, res)) if f(&res) => Ok((rest, res)),
117            Ok(_) => Err(ParsingError::new(src, reason.clone())),
118            Err(err) => Err(err),
119        }
120    }
121
122    /// Changes the error reason by passing it through `f`.
123    fn map_reason<NewReason>(
124        mut self,
125        mut f: impl FnMut(Reason) -> NewReason,
126    ) -> impl Parser<In, Out, NewReason> {
127        move |src| self(src).map_err(|e| e.map_reason(&mut f))
128    }
129
130    /// Converts the reason, if present, to another type using the [`From`] trait.
131    fn adapt_reason<NewReason>(mut self) -> impl Parser<In, Out, NewReason>
132    where
133        Infallible: From<Reason>,
134    {
135        move |i| self(i).map_err(ParsingError::adapt_reason)
136    }
137
138    /// Transforms the input & the output of the parser, if present.
139    ///
140    /// The argument is a function that maps the input & the current output of the parser to the
141    /// rest of the input & the new output.
142    ///
143    /// See [`match_out`]
144    fn map<NewOut>(
145        mut self,
146        mut parser: impl MappingParser<In, Out, NewOut, Reason>,
147    ) -> impl Parser<In, NewOut, Reason> {
148        move |src| self(src).and_then(|(i, o)| parser(i, o))
149    }
150
151    /// Like [`Parser::map`], but only maps the current output, if present.
152    fn map_out<NewOut>(
153        mut self,
154        mut f: impl FnMut(Out) -> NewOut,
155    ) -> impl Parser<In, NewOut, Reason> {
156        move |src| self(src).map(map_second(&mut f))
157    }
158
159    /// Tranforms the output of the parser, if present, or try parsing the next value.
160    fn map_until<NewOut>(
161        mut self,
162        mut f: impl FnMut(Out) -> Option<NewOut>,
163    ) -> impl Parser<In, NewOut, Reason> {
164        move |mut src| loop {
165            let (rest, value) = self(take(&mut src)).map(map_second(&mut f))?;
166            src = rest;
167            let Some(value) = value else {
168                continue;
169            };
170            return Ok((src, value));
171        }
172    }
173
174    /// Like [`Parser::map`], but calls the provdied function using the Nightly [`FnMut::call_mut`]
175    /// method, effectively spreading the output as the arguments of the function.
176    ///
177    /// The following nIghtly Rust code:
178    /// ```ignore
179    /// use shrimple_parser::Parser;
180    /// parser.call(u32::pow)
181    /// ```
182    /// is equivalent to the following stable Rust code:
183    /// ```ignore
184    /// use shrimple_parser::Parser;
185    /// parser.map(|(x, y)| u32::pow(x, y))
186    /// ```
187    /// `T` for this method is constrained not by the [`crate::Tuple`] trait, but by the unstable
188    /// standard trait [`core::marker::Tuple`], which means that `T` can be a tuple of absolutely
189    /// any length.
190    ///
191    /// See also: [`crate::call`], a macro for a stable alternative to this method.
192    #[cfg(feature = "nightly")]
193    fn call<F>(mut self, mut f: F) -> impl Parser<In, F::Output, Reason>
194    where
195        F: FnMut<Out>,
196        Out: core::marker::Tuple,
197    {
198        move |src| self(src).map(map_second(|x| f.call_mut(x)))
199    }
200
201    /// Replaces a recoverable error with the result of `parser`.
202    ///
203    /// The input fed into the second parser is the rest of the input returned by the first parser.
204    ///
205    /// # Warning
206    /// Do not use this in combination with [`Parser::iter`]; Use [`Parser::or_nonempty`]
207    fn or(mut self, mut parser: impl Parser<In, Out, Reason>) -> impl Parser<In, Out, Reason> {
208        move |src| {
209            let fallback = src.clone();
210            match self(src) {
211                Ok(res) => Ok(res),
212                Err(err) if err.is_recoverable() => parser(fallback),
213                Err(err) => Err(err),
214            }
215        }
216    }
217
218    /// Like [`Parser::or`], but keeps the error if the rest of the input is empty.
219    ///
220    /// This allows to avoid slipping into an infinite loop, e.g. when using [`Parser::iter`]
221    /// somewhere down the line.
222    fn or_nonempty(
223        mut self,
224        mut parser: impl Parser<In, Out, Reason>,
225    ) -> impl Parser<In, Out, Reason> {
226        move |src| {
227            let fallback = src.clone();
228            match self(src) {
229                Ok(res) => Ok(res),
230                Err(err) if err.is_recoverable() && !err.rest.is_empty() => parser(fallback),
231                Err(err) => Err(err),
232            }
233        }
234    }
235
236    /// Replaces a recoverable error with the transformed remains of the input.
237    /// If the rest of the input in the recoverable error is already empty, does nothing.
238    /// The returned remains of the input are an empty string.
239    fn or_map_rest(mut self, mut f: impl FnMut(In) -> Out) -> impl Parser<In, Out, Reason> {
240        move |src| {
241            let fallback = src.clone();
242            match self(src) {
243                Ok(res) => Ok(res),
244                Err(err) if err.is_recoverable() && !err.rest.is_empty() => {
245                    Ok((In::default(), f(fallback)))
246                }
247                Err(err) => Err(err),
248            }
249        }
250    }
251
252    /// Replaces a recoverable error with `value` & the rest of the input in the recoverable error.
253    ///
254    /// Be aware that `value` will be cloned every time it's to be returned.
255    ///
256    /// See [`Parser::or`], [`Parser::or_nonempty`], [`Parser::or_map_rest`].
257    fn or_value(mut self, value: Out) -> impl Parser<In, Out, Reason>
258    where
259        Out: Clone,
260    {
261        move |src| {
262            let fallback = src.clone();
263            match self(src) {
264                Ok(res) => Ok(res),
265                Err(err) if err.is_recoverable() => Ok((fallback, value.clone())),
266                Err(err) => Err(err),
267            }
268        }
269    }
270
271    /// Parses the rest of the input after the first parser, returning both outputs
272    /// & short-circuiting on an error.
273    ///
274    /// The reason for the errors of the first parser is adapted to the one of the second parser.
275    ///
276    /// See also [`Parser::add`], [`Parser::and_value`].
277    fn and<Other>(
278        mut self,
279        mut parser: impl Parser<In, Other, Reason>,
280    ) -> impl Parser<In, (Out, Other), Reason> {
281        move |src| {
282            let (rest, out) = self(src)?;
283            let (rest, new_out) = parser(rest)?;
284            Ok((rest, (out, new_out)))
285        }
286    }
287
288    /// Adds a value to the output of the parser
289    ///
290    /// Be aware that `value` will be cloned every time it's to be returned.
291    ///
292    /// See [`Parser::and`].
293    fn and_value<Other: Clone>(mut self, value: Other) -> impl Parser<In, (Out, Other), Reason> {
294        move |src| {
295            let (rest, out) = self(src)?;
296            Ok((rest, (out, value.clone())))
297        }
298    }
299
300    /// Like [`Parser::and`], but specific to parsers that output a tuple:
301    /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
302    fn add<New>(
303        mut self,
304        mut parser: impl Parser<In, New, Reason>,
305    ) -> impl Parser<In, Out::Appended<New>, Reason>
306    where
307        Out: Tuple,
308    {
309        move |src| {
310            let (rest, out) = self(src)?;
311            let (rest, new_out) = parser(rest)?;
312            Ok((rest, out.append(new_out)))
313        }
314    }
315
316    /// Like [`Parser::and_value`], but specific to parsers that output a tuple:
317    /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
318    fn add_value<Other: Clone>(
319        mut self,
320        value: Other,
321    ) -> impl Parser<In, Out::Appended<Other>, Reason>
322    where
323        Out: Tuple,
324    {
325        move |src| {
326            let (rest, out) = self(src)?;
327            Ok((rest, out.append(value.clone())))
328        }
329    }
330
331    /// Like [`Parser::and`], but discards the output of the first parser.
332    /// The reason for the errors of the first parser is adapted to the one of the second parser.
333    fn then<NewOut>(
334        mut self,
335        mut parser: impl Parser<In, NewOut, Reason>,
336    ) -> impl Parser<In, NewOut, Reason> {
337        move |src| {
338            let rest = self(src)?.0;
339            let (rest, out) = parser(rest)?;
340            Ok((rest, out))
341        }
342    }
343
344    /// Same as [`Parser::and`] but discards the output and the recoverable error of the second parser.
345    ///
346    /// Effectively, all this function does is advance the input to right after the second parser,
347    /// if it succeeds, otherwise the input stays as if only the first parser was called.
348    fn skip<Skipped>(
349        mut self,
350        mut parser: impl Parser<In, Skipped, Reason>,
351    ) -> impl Parser<In, Out, Reason> {
352        move |src| {
353            let (rest, out) = self(src)?;
354            let rest = match parser(rest) {
355                Ok((rest, _)) => rest,
356                Err(err) if err.is_recoverable() => err.rest,
357                Err(err) => return Err(err),
358            };
359            Ok((rest, out))
360        }
361    }
362
363    /// Sets the reason for errors returned from the parser, making all errors fatal.
364    fn expect<NewReason: Clone>(mut self, expected: NewReason) -> impl Parser<In, Out, NewReason> {
365        move |src| self(src).map_err(|e| e.reason(expected.clone()))
366    }
367
368    /// Makes a recoverable error fatal by giving it a reason. If the error is already fatal,
369    /// nothing is changed.
370    fn or_reason(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
371    where
372        Reason: Clone,
373    {
374        move |src| self(src).map_err(|e| e.or_reason(reason.clone()))
375    }
376
377    /// Like [`Parser::or_reason`] but does nothing if the rest of the input is empty.
378    ///
379    /// Be aware that `reason` is cloned every time it's to be returned.
380    fn or_reason_if_nonempty(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
381    where
382        Reason: Clone,
383    {
384        move |src| self(src).map_err(|e| e.or_reason_if_nonempty(reason.clone()))
385    }
386
387    /// Adds the part of the input that was consumed by the parser to the outputs.
388    ///
389    /// If the input increased in length after the parser (which should not happen), an empty
390    /// string is added.
391    /// See also [`Parser::add_span`], which adds the span to the tuple of other outputs.
392    fn get_span(self) -> impl Parser<In, (Out, In), Reason> {
393        self.map_out(tuple).add_span()
394    }
395
396    /// Like [`Parser::get_span`], but adds the output to the tuple of other outputs using the
397    /// [`Tuple`] trait.
398    fn add_span(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
399    where
400        Out: Tuple,
401    {
402        move |src| {
403            let (rest, out) = self(src.clone())?;
404            let end = src.len().saturating_sub(rest.len());
405            let consumed = src.before(end);
406            Ok((rest, out.append(consumed)))
407        }
408    }
409
410    /// Adds a copy of rest of the input to the output.
411    fn get_rest(self) -> impl Parser<In, (Out, In), Reason> {
412        self.map_out(tuple).add_rest()
413    }
414
415    /// Like [`Parser::get_rest`], but adds the input to the tuple of other outputs using the
416    /// [`Tuple`] trait.
417    fn add_rest(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
418    where
419        Out: Tuple,
420    {
421        move |src| self(src).map(|(rest, out)| (rest.clone(), out.append(rest)))
422    }
423
424    /// Replaces a recoverable error with `None`, making the output optional.
425    fn maybe(mut self) -> impl Parser<In, Option<Out>, Reason> {
426        move |src| match self(src) {
427            Ok((rest, out)) => Ok((rest, Some(out))),
428            Err(err) if err.is_recoverable() => Ok((err.rest, None)),
429            Err(err) => Err(err),
430        }
431    }
432
433    /// Replaces the output with `true` and a recoverable error with `false`
434    fn ok(mut self) -> impl Parser<In, bool, Reason> {
435        move |src| match self(src) {
436            Ok((rest, _)) => Ok((rest, true)),
437            Err(err) if err.is_recoverable() => Ok((err.rest, false)),
438            Err(err) => Err(err),
439        }
440    }
441
442    /// Repeats the parser until an error is met, discarding all the output.
443    fn repeat(mut self) -> impl Parser<In, (), Reason> {
444        move |mut src| loop {
445            match self(src) {
446                Ok((rest, _)) => src = rest,
447                Err(err) if err.is_recoverable() => return Ok((err.rest, ())),
448                Err(err) => return Err(err),
449            }
450        }
451    }
452
453    /// Applies the parser repeatedly, collecting the output into a collection, until an error is
454    /// met.
455    fn collect<C: Default + Extend<Out>>(mut self) -> impl Parser<In, C, Reason> {
456        move |mut src| {
457            let mut res = C::default();
458            loop {
459                match self(src) {
460                    Ok((rest, new)) => {
461                        res.extend([new]);
462                        src = rest;
463                    }
464                    Err(err) if err.is_recoverable() => return Ok((err.rest, res)),
465                    Err(err) => return Err(err),
466                }
467            }
468        }
469    }
470
471    /// Prints the output using its `Debug` implementation & the first 16 bytes of the rest of the
472    /// input, all along with a custom provided message.
473    fn dbg(mut self, label: impl Display) -> impl Parser<In, Out, Reason>
474    where
475        In: Input,
476        Out: Debug,
477        Reason: Debug,
478    {
479        move |src| match self(src) {
480            Ok((rest, out)) => {
481                let until = rest.char_indices().nth(16).map_or(rest.len(), |x| x.0);
482                let r = &rest[..until].escape_debug();
483                eprintln!("{label}: Ok({out:?}) : {r}...");
484                Ok((rest, out))
485            }
486            Err(err) => {
487                let until = err
488                    .rest
489                    .char_indices()
490                    .nth(16)
491                    .map_or(err.rest.len(), |x| x.0);
492                let r = &err.rest[..until].escape_debug();
493                eprintln!("{label}: Err({:?}) : {r}...", err.reason);
494                Err(err)
495            }
496        }
497    }
498
499    /// Turns the parser into an iterator that yields output until the first recoverable error.
500    /// If an error is yielded from the iterator, it's guaranteed to be fatal.
501    fn iter(self, input: In) -> Iter<In, Out, Reason, Self> {
502        Iter {
503            input: Some(input),
504            parser: self,
505            _params: PhantomData,
506        }
507    }
508
509    /// Augments the parsing error, if present, with location in the `input`.
510    /// `path` is the reported path to the file where the error occured.
511    /// Note that the `input` passed here is only used for error reporting, not as the input to the
512    /// parser.
513    fn with_full_error<'a>(
514        mut self,
515        path: impl PathLike<'a>,
516        full_src: &'a str,
517    ) -> impl FnOnce(In) -> Result<(In, Out), FullParsingError<'a, Reason>>
518    where
519        In: Input,
520    {
521        move |src| self(src).map_err(|e| e.with_src_loc(path, full_src))
522    }
523}
524
525impl<In, Out, Reason, F> Parser<In, Out, Reason> for F
526where
527    In: Input,
528    F: FnMut(In) -> ParsingResult<In, Out, Reason>,
529{
530}
531
532/// Iterator returned by [`Parser::iter`]
533pub struct Iter<In, Out, Reason, P> {
534    input: Option<In>,
535    parser: P,
536    _params: PhantomData<(Out, Reason)>,
537}
538
539impl<In, Out, Reason, P> Iterator for Iter<In, Out, Reason, P>
540where
541    In: Input,
542    P: Parser<In, Out, Reason>,
543{
544    type Item = Result<Out, ParsingError<In, Reason>>;
545
546    fn next(&mut self) -> Option<Self::Item> {
547        let input = self.input.take()?;
548        match (self.parser)(input) {
549            Ok((rest, res)) => {
550                self.input = Some(rest);
551                Some(Ok(res))
552            }
553            Err(err) if err.is_recoverable() => None,
554            Err(err) => Some(Err(err)),
555        }
556    }
557}
558
559impl<In, Out, Reason, P> FusedIterator for Iter<In, Out, Reason, P>
560where
561    In: Input,
562    P: Parser<In, Out, Reason>,
563{
564}
565
566impl<In, Out, Reason, P> Iter<In, Out, Reason, P>
567where
568    In: Input,
569    P: Parser<In, Out, Reason>,
570{
571    /// Returned the part of the input that hasn't been processed by the parser yet.
572    pub const fn remainder(&self) -> Option<&In> {
573        self.input.as_ref()
574    }
575}
576
577/// Returns a parser that always returns the provided value.
578///
579/// Beware that the value is always cloned.
580pub fn ready<In: Input, T: Clone, Reason>(value: T) -> impl Parser<In, T, Reason> {
581    move |i| Ok((i, value.clone()))
582}
583
584/// Parses any 1 character from the input.
585///
586/// A shorter equivalent of `pattern::parse(pattern::AnyChar)`.
587///
588/// # Errors
589/// Returns a recoverable error if the input is empty.
590pub fn parse_char<In: Input, Reason>(input: In) -> ParsingResult<In, char, Reason> {
591    match input.chars().next() {
592        Some(ch) => Ok((input.before(ch.len_utf8()), ch)),
593        None => Err(ParsingError::new_recoverable(input)),
594    }
595}
596
597/// Parses a sequence of Unicode whitespace. See [`char::is_whitespace`] for the definition of
598/// that.
599///
600/// # Errors
601/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
602/// is empty.
603pub fn parse_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
604    let ws_len = input.len() - input.trim_start().len();
605    Ok(input.split_at(ws_len).rev())
606}
607
608/// Parses a sequence of ASCII whitespace. See [`char::is_ascii_whitespace`] for the definition of
609/// that.
610///
611/// # Errors
612/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
613/// is empty.
614pub fn parse_ascii_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
615    let ws_len = input.len() - input.trim_ascii_start().len();
616    Ok(input.split_at(ws_len).rev())
617}