shrimple_parser/
lib.rs

1//! Zero-dependency library with no-std support for writing parsers in a concise functional style
2//! & with rich error-reporting.
3//!
4//! Every parser is a function that maps an [`Input`]. Parsers can match on [`Pattern`]s.
5//!
6//! The basic form of the function is
7//!
8//! ```rust,ignore
9//! use shrimple_parser::{Input, ParsingResult};
10//!
11//! fn parse_foo<In: Input>(input: In) -> ParsingResult<In, Foo, FooParseError> { ... }
12//! ```
13//!
14//! If the parser is infallible, i.e. never returns an unrecoverable error, it's customary to make
15//! it generic over the reason type, to make combining it easier.
16//!
17//! ```rust,ignore
18//! fn parse_foo<In: Input, Reason>(input: In) -> ParsingResult<In, Foo, Reason> { ... }
19//! ```
20//!
21//! Kinds of errors are distinguished via a user-defined `Reason` type, which signals what did
22//! a parser expect.
23//! A [`ParsingError`] can also have no reason, which will mean that the error is recoverable.
24//!
25//! Some built-in parsers can have [`core::convert::Infallible`] as their error reason,
26//! which means that any error the parser may ever return is recoverable.
27//!
28//! The distinction between recoverable & fatal errors is important for parsers that need to try
29//! multiple options.
30//!
31//! Error reporting with precise location in the source is facilitated by
32//! constructing a [`FullParsingError`] with methods such as
33//! [`Parser::with_full_error`], [`ParsingError::with_src_loc`]
34
35#![cfg_attr(
36    feature = "nightly",
37    feature(unboxed_closures, fn_traits, tuple_trait, doc_auto_cfg)
38)]
39
40mod error;
41mod input;
42mod loc;
43pub mod pattern;
44pub mod tuple;
45pub mod utils;
46
47pub use {
48    error::{FullParsingError, ParsingError, ParsingResult},
49    input::Input,
50    loc::{FullLocation, Location},
51    pattern::Pattern,
52};
53
54#[cfg(feature = "proc-macro2")]
55pub use loc::LineColumnToLocationError;
56
57use {
58    core::{
59        convert::Infallible,
60        fmt::{Debug, Display},
61        iter::FusedIterator,
62        marker::PhantomData,
63        mem::take,
64    },
65    tuple::{map_second, tuple, Tuple},
66    utils::PathLike,
67};
68
69/// A trait alias for a function that maps from the input & intermediate output to the rest of the
70/// input & a different output.
71///
72/// Used in [`Parser::map`].
73///
74/// See [`match_out`] for a convenient way to create such a mapper.
75pub trait MappingParser<In, Out, NewOut, Reason = Infallible>:
76    Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
77{
78}
79
80impl<In, Out, NewOut, Reason, F> MappingParser<In, Out, NewOut, Reason> for F where
81    F: Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
82{
83}
84
85/// A trait representing a function that takes some string-like input and
86/// returns either a tuple of (the rest of the input, the output) or a [`ParsingError`].
87pub trait Parser<In: Input, Out, Reason = Infallible>:
88    Sized + FnMut(In) -> ParsingResult<In, Out, Reason>
89{
90    /// Use the parser to produce the output.
91    #[expect(clippy::missing_errors_doc)]
92    fn parse(&mut self, input: In) -> ParsingResult<In, Out, Reason> {
93        self(input)
94    }
95
96    /// Turns output into a recoverable error if the output doesn't meet a condition.
97    fn filter(mut self, mut f: impl FnMut(&Out) -> bool) -> impl Parser<In, Out, Reason> {
98        move |src| match self(src.clone()) {
99            Ok((rest, res)) if f(&res) => Ok((rest, res)),
100            Ok(_) => Err(ParsingError::new_recoverable(src)),
101            Err(err) => Err(err),
102        }
103    }
104
105    /// Like [`Parser::filter`], but the possible error is instead fatal, with `reason`
106    // TODO: better name maybe?
107    fn filter_fatal(
108        mut self,
109        reason: Reason,
110        mut f: impl FnMut(&Out) -> bool,
111    ) -> impl Parser<In, Out, Reason>
112    where
113        Reason: Clone,
114    {
115        move |src| match self(src.clone()) {
116            Ok((rest, res)) if f(&res) => Ok((rest, res)),
117            Ok(_) => Err(ParsingError::new(src, reason.clone())),
118            Err(err) => Err(err),
119        }
120    }
121
122    /// Changes the error reason by passing it through `f`.
123    fn map_reason<NewReason>(
124        mut self,
125        mut f: impl FnMut(Reason) -> NewReason,
126    ) -> impl Parser<In, Out, NewReason> {
127        move |src| self(src).map_err(|e| e.map_reason(&mut f))
128    }
129
130    /// Converts the reason, if present, to another type using the [`From`] trait.
131    fn adapt_reason<NewReason>(mut self) -> impl Parser<In, Out, NewReason>
132    where
133        Infallible: From<Reason>,
134    {
135        move |i| self(i).map_err(ParsingError::adapt_reason)
136    }
137
138    /// Transforms the input & the output of the parser, if present.
139    ///
140    /// The argument is a function that maps the input & the current output of the parser to the
141    /// rest of the input & the new output.
142    ///
143    /// See [`match_out`]
144    fn map<NewOut>(
145        mut self,
146        mut parser: impl MappingParser<In, Out, NewOut, Reason>,
147    ) -> impl Parser<In, NewOut, Reason> {
148        move |src| self(src).and_then(|(i, o)| parser(i, o))
149    }
150
151    /// Like [`Parser::map`], but only maps the current output, if present.
152    fn map_out<NewOut>(
153        mut self,
154        mut f: impl FnMut(Out) -> NewOut,
155    ) -> impl Parser<In, NewOut, Reason> {
156        move |src| self(src).map(map_second(&mut f))
157    }
158
159    /// Tranforms the output of the parser, if present, or try parsing the next value.
160    fn map_until<NewOut>(
161        mut self,
162        mut f: impl FnMut(Out) -> Option<NewOut>,
163    ) -> impl Parser<In, NewOut, Reason> {
164        move |mut src| loop {
165            let (rest, value) = self(take(&mut src)).map(map_second(&mut f))?;
166            src = rest;
167            let Some(value) = value else {
168                continue;
169            };
170            return Ok((src, value));
171        }
172    }
173
174    /// Like [`Parser::map`], but calls the provdied function using the Nightly [`FnMut::call_mut`]
175    /// method, effectively spreading the output as the arguments of the function.
176    ///
177    /// The following nIghtly Rust code:
178    /// ```ignore
179    /// use shrimple_parser::Parser;
180    /// parser.call(u32::pow)
181    /// ```
182    /// is equivalent to the following stable Rust code:
183    /// ```ignore
184    /// use shrimple_parser::Parser;
185    /// parser.map(|(x, y)| u32::pow(x, y))
186    /// ```
187    /// `T` for this method is constrained not by the [`crate::Tuple`] trait, but by the unstable
188    /// standard trait [`core::marker::Tuple`], which means that `T` can be a tuple of absolutely
189    /// any length.
190    ///
191    /// See also: [`crate::call`], a macro for a stable alternative to this method.
192    #[cfg(feature = "nightly")]
193    fn call<F>(mut self, mut f: F) -> impl Parser<In, F::Output, Reason>
194    where
195        F: FnMut<Out>,
196        Out: core::marker::Tuple,
197    {
198        move |src| self(src).map(map_second(|x| f.call_mut(x)))
199    }
200
201    /// Replaces a recoverable error with the result of `parser`.
202    ///
203    /// The input fed into the second parser is the rest of the input returned by the first parser.
204    ///
205    /// # Warning
206    /// Do not use this in combination with [`Parser::iter`]; Use [`Parser::or_nonempty`]
207    fn or(mut self, mut parser: impl Parser<In, Out, Reason>) -> impl Parser<In, Out, Reason> {
208        move |src| {
209            let fallback = src.clone();
210            match self(src) {
211                Ok(res) => Ok(res),
212                Err(err) if err.is_recoverable() => parser(fallback),
213                Err(err) => Err(err),
214            }
215        }
216    }
217
218    /// Like [`Parser::or`], but keeps the error if the rest of the input is empty.
219    ///
220    /// This allows to avoid slipping into an infinite loop, e.g. when using [`Parser::iter`]
221    /// somewhere down the line.
222    fn or_nonempty(
223        mut self,
224        mut parser: impl Parser<In, Out, Reason>,
225    ) -> impl Parser<In, Out, Reason> {
226        move |src| {
227            let fallback = src.clone();
228            match self(src) {
229                Ok(res) => Ok(res),
230                Err(err) if err.is_recoverable() && !err.rest.is_empty() => parser(fallback),
231                Err(err) => Err(err),
232            }
233        }
234    }
235
236    /// Replaces a recoverable error with the transformed remains of the input.
237    /// If the rest of the input in the recoverable error is already empty, does nothing.
238    /// The returned remains of the input are an empty string.
239    fn or_map_rest(mut self, mut f: impl FnMut(In) -> Out) -> impl Parser<In, Out, Reason> {
240        move |src| {
241            let fallback = src.clone();
242            match self(src) {
243                Ok(res) => Ok(res),
244                Err(err) if err.is_recoverable() && !err.rest.is_empty() => {
245                    Ok((In::default(), f(fallback)))
246                }
247                Err(err) => Err(err),
248            }
249        }
250    }
251
252    /// Replaces a recoverable error with `value` & the rest of the input in the recoverable error.
253    ///
254    /// Be aware that `value` will be cloned every time it's to be returned.
255    ///
256    /// See [`Parser::or`], [`Parser::or_nonempty`], [`Parser::or_map_rest`].
257    fn or_value(mut self, value: Out) -> impl Parser<In, Out, Reason>
258    where
259        Out: Clone,
260    {
261        move |src| {
262            let fallback = src.clone();
263            match self(src) {
264                Ok(res) => Ok(res),
265                Err(err) if err.is_recoverable() => Ok((fallback, value.clone())),
266                Err(err) => Err(err),
267            }
268        }
269    }
270
271    /// Parses the rest of the input after the first parser, returning both outputs
272    /// & short-circuiting on an error.
273    ///
274    /// The reason for the errors of the first parser is adapted to the one of the second parser.
275    ///
276    /// See also [`Parser::add`], [`Parser::and_value`].
277    fn and<Other>(
278        mut self,
279        mut parser: impl Parser<In, Other, Reason>,
280    ) -> impl Parser<In, (Out, Other), Reason> {
281        move |src| {
282            let (rest, out) = self(src.clone())?;
283            match parser(rest) {
284                Ok((rest, new_out)) => Ok((rest, (out, new_out))),
285                Err(mut err) => {
286                    if err.is_recoverable() {
287                        err.rest = src;
288                    }
289                    Err(err)
290                }
291            }
292        }
293    }
294
295    /// Adds a value to the output of the parser
296    ///
297    /// Be aware that `value` will be cloned every time it's to be returned.
298    ///
299    /// See [`Parser::and`].
300    fn and_value<Other: Clone>(mut self, value: Other) -> impl Parser<In, (Out, Other), Reason> {
301        move |src| {
302            let (rest, out) = self(src)?;
303            Ok((rest, (out, value.clone())))
304        }
305    }
306
307    /// Like [`Parser::and`], but specific to parsers that output a tuple:
308    /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
309    fn add<New>(
310        mut self,
311        mut parser: impl Parser<In, New, Reason>,
312    ) -> impl Parser<In, Out::Appended<New>, Reason>
313    where
314        Out: Tuple,
315    {
316        move |src| {
317            let (rest, out) = self(src.clone())?;
318            match parser(rest) {
319                Ok((rest, new_out)) => Ok((rest, out.append(new_out))),
320                Err(mut err) => {
321                    if err.is_recoverable() {
322                        err.rest = src;
323                    }
324                    Err(err)
325                }
326            }
327        }
328    }
329
330    /// Like [`Parser::and_value`], but specific to parsers that output a tuple:
331    /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
332    fn add_value<Other: Clone>(
333        mut self,
334        value: Other,
335    ) -> impl Parser<In, Out::Appended<Other>, Reason>
336    where
337        Out: Tuple,
338    {
339        move |src| {
340            let (rest, out) = self(src)?;
341            Ok((rest, out.append(value.clone())))
342        }
343    }
344
345    /// Like [`Parser::and`], but discards the output of the first parser.
346    /// The reason for the errors of the first parser is adapted to the one of the second parser.
347    fn then<NewOut>(
348        mut self,
349        mut parser: impl Parser<In, NewOut, Reason>,
350    ) -> impl Parser<In, NewOut, Reason> {
351        move |src| {
352            let rest = self(src.clone())?.0;
353            parser(rest).map_err(|mut err| {
354                if err.is_recoverable() {
355                    err.rest = src;
356                }
357                err
358            })
359        }
360    }
361
362    /// Same as [`Parser::and`] but discards the output and the recoverable error of the second parser.
363    ///
364    /// Effectively, all this function does is advance the input to right after the second parser,
365    /// if it succeeds, otherwise the input stays as if only the first parser was called.
366    fn skip<Skipped>(
367        mut self,
368        mut parser: impl Parser<In, Skipped, Reason>,
369    ) -> impl Parser<In, Out, Reason> {
370        move |src| {
371            let (rest, out) = self(src.clone())?;
372            match parser(rest) {
373                Ok((rest, _)) => Ok((rest, out)),
374                Err(mut err) => {
375                    if err.is_recoverable() {
376                        err.rest = src;
377                    }
378                    Err(err)
379                }
380            }
381        }
382    }
383
384    /// Sets the reason for errors returned from the parser, making all errors fatal.
385    fn expect<NewReason: Clone>(mut self, expected: NewReason) -> impl Parser<In, Out, NewReason> {
386        move |src| self(src).map_err(|e| e.reason(expected.clone()))
387    }
388
389    /// Makes a recoverable error fatal by giving it a reason. If the error is already fatal,
390    /// nothing is changed.
391    fn or_reason(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
392    where
393        Reason: Clone,
394    {
395        move |src| self(src).map_err(|e| e.or_reason(reason.clone()))
396    }
397
398    /// Like [`Parser::or_reason`] but does nothing if the rest of the input is empty.
399    ///
400    /// Be aware that `reason` is cloned every time it's to be returned.
401    fn or_reason_if_nonempty(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
402    where
403        Reason: Clone,
404    {
405        move |src| self(src).map_err(|e| e.or_reason_if_nonempty(reason.clone()))
406    }
407
408    /// Adds the part of the input that was consumed by the parser to the outputs.
409    ///
410    /// If the input increased in length after the parser (which should not happen), an empty
411    /// string is added.
412    /// See also [`Parser::add_span`], which adds the span to the tuple of other outputs.
413    fn get_span(self) -> impl Parser<In, (Out, In), Reason> {
414        self.map_out(tuple).add_span()
415    }
416
417    /// Like [`Parser::get_span`], but adds the output to the tuple of other outputs using the
418    /// [`Tuple`] trait.
419    fn add_span(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
420    where
421        Out: Tuple,
422    {
423        move |src| {
424            let (rest, out) = self(src.clone())?;
425            let end = src.len().saturating_sub(rest.len());
426            let consumed = src.before(end);
427            Ok((rest, out.append(consumed)))
428        }
429    }
430
431    /// Adds a copy of rest of the input to the output.
432    fn get_rest(self) -> impl Parser<In, (Out, In), Reason> {
433        self.map_out(tuple).add_rest()
434    }
435
436    /// Like [`Parser::get_rest`], but adds the input to the tuple of other outputs using the
437    /// [`Tuple`] trait.
438    fn add_rest(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
439    where
440        Out: Tuple,
441    {
442        move |src| self(src).map(|(rest, out)| (rest.clone(), out.append(rest)))
443    }
444
445    /// Replaces a recoverable error with `None`, making the output optional.
446    fn maybe(mut self) -> impl Parser<In, Option<Out>, Reason> {
447        move |src| match self(src) {
448            Ok((rest, out)) => Ok((rest, Some(out))),
449            Err(err) if err.is_recoverable() => Ok((err.rest, None)),
450            Err(err) => Err(err),
451        }
452    }
453
454    /// Replaces the output with `true` and a recoverable error with `false`
455    fn ok(mut self) -> impl Parser<In, bool, Reason> {
456        move |src| match self(src) {
457            Ok((rest, _)) => Ok((rest, true)),
458            Err(err) if err.is_recoverable() => Ok((err.rest, false)),
459            Err(err) => Err(err),
460        }
461    }
462
463    /// Repeats the parser until an error is met, discarding all the output.
464    fn repeat(mut self) -> impl Parser<In, (), Reason> {
465        move |mut src| loop {
466            match self(src) {
467                Ok((rest, _)) => src = rest,
468                Err(err) if err.is_recoverable() => return Ok((err.rest, ())),
469                Err(err) => return Err(err),
470            }
471        }
472    }
473
474    /// Applies the parser repeatedly, collecting the output into a collection, until an error is
475    /// met.
476    fn collect<C: Default + Extend<Out>>(mut self) -> impl Parser<In, C, Reason> {
477        move |mut src| {
478            let mut res = C::default();
479            loop {
480                match self(src) {
481                    Ok((rest, new)) => {
482                        res.extend([new]);
483                        src = rest;
484                    }
485                    Err(err) if err.is_recoverable() => return Ok((err.rest, res)),
486                    Err(err) => return Err(err),
487                }
488            }
489        }
490    }
491
492    /// Prints the output using its `Debug` implementation & the first 16 bytes of the rest of the
493    /// input, all along with a custom provided message.
494    fn dbg(mut self, label: impl Display) -> impl Parser<In, Out, Reason>
495    where
496        In: Input,
497        Out: Debug,
498        Reason: Debug,
499    {
500        move |src| match self(src) {
501            Ok((rest, out)) => {
502                let until = rest.char_indices().nth(16).map_or(rest.len(), |x| x.0);
503                let r = &rest[..until].escape_debug();
504                eprintln!("{label}: Ok({out:?}) : {r}...");
505                Ok((rest, out))
506            }
507            Err(err) => {
508                let until = err
509                    .rest
510                    .char_indices()
511                    .nth(16)
512                    .map_or(err.rest.len(), |x| x.0);
513                let r = &err.rest[..until].escape_debug();
514                eprintln!("{label}: Err({:?}) : {r}...", err.reason);
515                Err(err)
516            }
517        }
518    }
519
520    /// Turns the parser into an iterator that yields output until the first recoverable error.
521    /// If an error is yielded from the iterator, it's guaranteed to be fatal.
522    fn iter(self, input: In) -> Iter<In, Out, Reason, Self> {
523        Iter {
524            input: Some(input),
525            parser: self,
526            _params: PhantomData,
527        }
528    }
529
530    /// Augments the parsing error, if present, with location in the `input`.
531    /// `path` is the reported path to the file where the error occured.
532    /// Note that the `input` passed here is only used for error reporting, not as the input to the
533    /// parser.
534    fn with_full_error<'a>(
535        mut self,
536        path: impl PathLike<'a>,
537        full_src: &'a str,
538    ) -> impl FnOnce(In) -> Result<(In, Out), FullParsingError<'a, Reason>>
539    where
540        In: Input,
541    {
542        move |src| self(src).map_err(|e| e.with_src_loc(path, full_src))
543    }
544}
545
546impl<In, Out, Reason, F> Parser<In, Out, Reason> for F
547where
548    In: Input,
549    F: FnMut(In) -> ParsingResult<In, Out, Reason>,
550{
551}
552
553/// Iterator returned by [`Parser::iter`]
554pub struct Iter<In, Out, Reason, P> {
555    input: Option<In>,
556    parser: P,
557    _params: PhantomData<(Out, Reason)>,
558}
559
560impl<In, Out, Reason, P> Iterator for Iter<In, Out, Reason, P>
561where
562    In: Input,
563    P: Parser<In, Out, Reason>,
564{
565    type Item = Result<Out, ParsingError<In, Reason>>;
566
567    fn next(&mut self) -> Option<Self::Item> {
568        let input = self.input.take()?;
569        match (self.parser)(input) {
570            Ok((rest, res)) => {
571                self.input = Some(rest);
572                Some(Ok(res))
573            }
574            Err(err) if err.is_recoverable() => None,
575            Err(err) => Some(Err(err)),
576        }
577    }
578}
579
580impl<In, Out, Reason, P> FusedIterator for Iter<In, Out, Reason, P>
581where
582    In: Input,
583    P: Parser<In, Out, Reason>,
584{
585}
586
587impl<In, Out, Reason, P> Iter<In, Out, Reason, P>
588where
589    In: Input,
590    P: Parser<In, Out, Reason>,
591{
592    /// Returned the part of the input that hasn't been processed by the parser yet.
593    pub const fn remainder(&self) -> Option<&In> {
594        self.input.as_ref()
595    }
596}
597
598/// Returns a parser that always returns the provided value.
599///
600/// Beware that the value is always cloned.
601pub fn ready<In: Input, T: Clone, Reason>(value: T) -> impl Parser<In, T, Reason> {
602    move |i| Ok((i, value.clone()))
603}
604
605/// Parses any 1 character from the input.
606///
607/// A shorter equivalent of `pattern::parse(pattern::AnyChar)`.
608///
609/// # Errors
610/// Returns a recoverable error if the input is empty.
611pub fn parse_char<In: Input, Reason>(input: In) -> ParsingResult<In, char, Reason> {
612    match input.chars().next() {
613        Some(ch) => Ok((input.before(ch.len_utf8()), ch)),
614        None => Err(ParsingError::new_recoverable(input)),
615    }
616}
617
618/// Parses a sequence of Unicode whitespace. See [`char::is_whitespace`] for the definition of
619/// that.
620///
621/// # Errors
622/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
623/// is empty.
624pub fn parse_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
625    let ws_len = input.len() - input.trim_start().len();
626    Ok(input.split_at(ws_len).rev())
627}
628
629/// Parses a sequence of ASCII whitespace. See [`char::is_ascii_whitespace`] for the definition of
630/// that.
631///
632/// # Errors
633/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
634/// is empty.
635pub fn parse_ascii_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
636    let ws_len = input.len() - input.trim_ascii_start().len();
637    Ok(input.split_at(ws_len).rev())
638}