shrimple_parser/
lib.rs

1//! Zero-dependency library with no-std support for writing parsers in a concise functional style
2//! & with rich error-reporting.
3//!
4//! Every parser is a function that maps an [`Input`]. Parsers can match on [`Pattern`]s.
5//!
6//! The basic form of the function is
7//!
8//! ```rust,ignore
9//! use shrimple_parser::{Input, ParsingResult};
10//!
11//! fn parse_foo<In: Input>(input: In) -> ParsingResult<In, Foo, FooParseError> { ... }
12//! ```
13//!
14//! If the parser is infallible, i.e. never returns an unrecoverable error, it's customary to make
15//! it generic over the reason type, to make combining it easier.
16//!
17//! ```rust,ignore
18//! fn parse_foo<In: Input, Reason>(input: In) -> ParsingResult<In, Foo, Reason> { ... }
19//! ```
20//!
21//! Kinds of errors are distinguished via a user-defined `Reason` type, which signals what did
22//! a parser expect.
23//! A [`ParsingError`] can also have no reason, which will mean that the error is recoverable.
24//!
25//! Some built-in parsers can have [`core::convert::Infallible`] as their error reason,
26//! which means that any error the parser may ever return is recoverable.
27//!
28//! The distinction between recoverable & fatal errors is important for parsers that need to try
29//! multiple options.
30//!
31//! Error reporting with precise location in the source is facilitated by
32//! constructing a [`FullParsingError`] with methods such as
33//! [`Parser::with_full_error`], [`ParsingError::with_src_loc`]
34
35#![cfg_attr(
36    feature = "nightly",
37    feature(unboxed_closures, fn_traits, tuple_trait, doc_auto_cfg)
38)]
39
40mod error;
41mod input;
42mod loc;
43pub mod pattern;
44pub mod tuple;
45pub mod utils;
46
47pub use {
48    error::{FullParsingError, ParsingError, ParsingResult},
49    input::Input,
50    loc::{FullLocation, Location},
51    pattern::Pattern,
52};
53
54use {
55    core::{
56        convert::Infallible,
57        fmt::{Debug, Display},
58        iter::FusedIterator,
59        marker::PhantomData,
60        mem::take,
61    },
62    tuple::{map_second, tuple, Tuple},
63    utils::PathLike,
64};
65
66/// A trait alias for a function that maps from the input & intermediate output to the rest of the
67/// input & a different output.
68///
69/// Used in [`Parser::map`].
70///
71/// See [`match_out`] for a convenient way to create such a mapper.
72pub trait MappingParser<In, Out, NewOut, Reason = Infallible>:
73    Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
74{
75}
76
77impl<In, Out, NewOut, Reason, F> MappingParser<In, Out, NewOut, Reason> for F where
78    F: Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
79{
80}
81
82/// A trait representing a function that takes some string-like input and
83/// returns either a tuple of (the rest of the input, the output) or a [`ParsingError`].
84pub trait Parser<In: Input, Out, Reason = Infallible>:
85    Sized + FnMut(In) -> ParsingResult<In, Out, Reason>
86{
87    /// Use the parser to produce the output.
88    #[expect(clippy::missing_errors_doc)]
89    fn parse(&mut self, input: In) -> ParsingResult<In, Out, Reason> {
90        self(input)
91    }
92
93    /// Turns output into a recoverable error if the output doesn't meet a condition.
94    fn filter(mut self, mut f: impl FnMut(&Out) -> bool) -> impl Parser<In, Out, Reason> {
95        move |src| match self(src.clone()) {
96            Ok((rest, res)) if f(&res) => Ok((rest, res)),
97            Ok(_) => Err(ParsingError::new_recoverable(src)),
98            Err(err) => Err(err),
99        }
100    }
101
102    /// Like [`Parser::filter`], but the possible error is instead fatal, with `reason`
103    // TODO: better name maybe?
104    fn filter_fatal(
105        mut self,
106        reason: Reason,
107        mut f: impl FnMut(&Out) -> bool,
108    ) -> impl Parser<In, Out, Reason>
109    where
110        Reason: Clone,
111    {
112        move |src| match self(src.clone()) {
113            Ok((rest, res)) if f(&res) => Ok((rest, res)),
114            Ok(_) => Err(ParsingError::new(src, reason.clone())),
115            Err(err) => Err(err),
116        }
117    }
118
119    /// Changes the error reason by passing it through `f`.
120    fn map_reason<NewReason>(
121        mut self,
122        mut f: impl FnMut(Reason) -> NewReason,
123    ) -> impl Parser<In, Out, NewReason> {
124        move |src| self(src).map_err(|e| e.map_reason(&mut f))
125    }
126
127    /// Converts the reason, if present, to another type using the [`From`] trait.
128    fn adapt_reason<NewReason>(mut self) -> impl Parser<In, Out, NewReason>
129    where
130        Infallible: From<Reason>,
131    {
132        move |i| self(i).map_err(ParsingError::adapt_reason)
133    }
134
135    /// Transforms the input & the output of the parser, if present.
136    ///
137    /// The argument is a function that maps the input & the current output of the parser to the
138    /// rest of the input & the new output.
139    ///
140    /// See [`match_out`]
141    fn map<NewOut>(
142        mut self,
143        mut parser: impl MappingParser<In, Out, NewOut, Reason>,
144    ) -> impl Parser<In, NewOut, Reason> {
145        move |src| self(src).and_then(|(i, o)| parser(i, o))
146    }
147
148    /// Like [`Parser::map`], but only maps the current output, if present.
149    fn map_out<NewOut>(
150        mut self,
151        mut f: impl FnMut(Out) -> NewOut,
152    ) -> impl Parser<In, NewOut, Reason> {
153        move |src| self(src).map(map_second(&mut f))
154    }
155
156    /// Tranforms the output of the parser, if present, or try parsing the next value.
157    fn map_until<NewOut>(
158        mut self,
159        mut f: impl FnMut(Out) -> Option<NewOut>,
160    ) -> impl Parser<In, NewOut, Reason> {
161        move |mut src| loop {
162            let (rest, value) = self(take(&mut src)).map(map_second(&mut f))?;
163            src = rest;
164            let Some(value) = value else {
165                continue;
166            };
167            return Ok((src, value));
168        }
169    }
170
171    /// Like [`Parser::map`], but calls the provdied function using the Nightly [`FnMut::call_mut`]
172    /// method, effectively spreading the output as the arguments of the function.
173    ///
174    /// The following nIghtly Rust code:
175    /// ```ignore
176    /// use shrimple_parser::Parser;
177    /// parser.call(u32::pow)
178    /// ```
179    /// is equivalent to the following stable Rust code:
180    /// ```ignore
181    /// use shrimple_parser::Parser;
182    /// parser.map(|(x, y)| u32::pow(x, y))
183    /// ```
184    /// `T` for this method is constrained not by the [`crate::Tuple`] trait, but by the unstable
185    /// standard trait [`core::marker::Tuple`], which means that `T` can be a tuple of absolutely
186    /// any length.
187    ///
188    /// See also: [`crate::call`], a macro for a stable alternative to this method.
189    #[cfg(feature = "nightly")]
190    fn call<F>(mut self, mut f: F) -> impl Parser<In, F::Output, Reason>
191    where
192        F: FnMut<Out>,
193        Out: core::marker::Tuple,
194    {
195        move |src| self(src).map(map_second(|x| f.call_mut(x)))
196    }
197
198    /// Replaces a recoverable error with the result of `parser`.
199    ///
200    /// The input fed into the second parser is the rest of the input returned by the first parser.
201    ///
202    /// # Warning
203    /// Do not use this in combination with [`Parser::iter`]; Use [`Parser::or_nonempty`]
204    fn or(mut self, mut parser: impl Parser<In, Out, Reason>) -> impl Parser<In, Out, Reason> {
205        move |src| {
206            let fallback = src.clone();
207            match self(src) {
208                Ok(res) => Ok(res),
209                Err(err) if err.is_recoverable() => parser(fallback),
210                Err(err) => Err(err),
211            }
212        }
213    }
214
215    /// Like [`Parser::or`], but keeps the error if the rest of the input is empty.
216    ///
217    /// This allows to avoid slipping into an infinite loop, e.g. when using [`Parser::iter`]
218    /// somewhere down the line.
219    fn or_nonempty(
220        mut self,
221        mut parser: impl Parser<In, Out, Reason>,
222    ) -> impl Parser<In, Out, Reason> {
223        move |src| {
224            let fallback = src.clone();
225            match self(src) {
226                Ok(res) => Ok(res),
227                Err(err) if err.is_recoverable() && !err.rest.is_empty() => parser(fallback),
228                Err(err) => Err(err),
229            }
230        }
231    }
232
233    /// Replaces a recoverable error with the transformed remains of the input.
234    /// If the rest of the input in the recoverable error is already empty, does nothing.
235    /// The returned remains of the input are an empty string.
236    fn or_map_rest(mut self, mut f: impl FnMut(In) -> Out) -> impl Parser<In, Out, Reason> {
237        move |src| {
238            let fallback = src.clone();
239            match self(src) {
240                Ok(res) => Ok(res),
241                Err(err) if err.is_recoverable() && !err.rest.is_empty() => {
242                    Ok((In::default(), f(fallback)))
243                }
244                Err(err) => Err(err),
245            }
246        }
247    }
248
249    /// Replaces a recoverable error with `value` & the rest of the input in the recoverable error.
250    ///
251    /// Be aware that `value` will be cloned every time it's to be returned.
252    ///
253    /// See [`Parser::or`], [`Parser::or_nonempty`], [`Parser::or_map_rest`].
254    fn or_value(mut self, value: Out) -> impl Parser<In, Out, Reason>
255    where
256        Out: Clone,
257    {
258        move |src| {
259            let fallback = src.clone();
260            match self(src) {
261                Ok(res) => Ok(res),
262                Err(err) if err.is_recoverable() => Ok((fallback, value.clone())),
263                Err(err) => Err(err),
264            }
265        }
266    }
267
268    /// Parses the rest of the input after the first parser, returning both outputs
269    /// & short-circuiting on an error.
270    ///
271    /// The reason for the errors of the first parser is adapted to the one of the second parser.
272    ///
273    /// See also [`Parser::add`], [`Parser::and_value`].
274    fn and<Other>(
275        mut self,
276        mut parser: impl Parser<In, Other, Reason>,
277    ) -> impl Parser<In, (Out, Other), Reason> {
278        move |src| {
279            let (rest, out) = self(src)?;
280            let (rest, new_out) = parser(rest)?;
281            Ok((rest, (out, new_out)))
282        }
283    }
284
285    /// Adds a value to the output of the parser
286    ///
287    /// Be aware that `value` will be cloned every time it's to be returned.
288    ///
289    /// See [`Parser::and`].
290    fn and_value<Other: Clone>(mut self, value: Other) -> impl Parser<In, (Out, Other), Reason> {
291        move |src| {
292            let (rest, out) = self(src)?;
293            Ok((rest, (out, value.clone())))
294        }
295    }
296
297    /// Like [`Parser::and`], but specific to parsers that output a tuple:
298    /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
299    fn add<New>(
300        mut self,
301        mut parser: impl Parser<In, New, Reason>,
302    ) -> impl Parser<In, Out::Appended<New>, Reason>
303    where
304        Out: Tuple,
305    {
306        move |src| {
307            let (rest, out) = self(src)?;
308            let (rest, new_out) = parser(rest)?;
309            Ok((rest, out.append(new_out)))
310        }
311    }
312
313    /// Like [`Parser::and_value`], but specific to parsers that output a tuple:
314    /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
315    fn add_value<Other: Clone>(
316        mut self,
317        value: Other,
318    ) -> impl Parser<In, Out::Appended<Other>, Reason>
319    where
320        Out: Tuple,
321    {
322        move |src| {
323            let (rest, out) = self(src)?;
324            Ok((rest, out.append(value.clone())))
325        }
326    }
327
328    /// Like [`Parser::and`], but discards the output of the first parser.
329    /// The reason for the errors of the first parser is adapted to the one of the second parser.
330    fn then<NewOut>(
331        mut self,
332        mut parser: impl Parser<In, NewOut, Reason>,
333    ) -> impl Parser<In, NewOut, Reason> {
334        move |src| {
335            let rest = self(src)?.0;
336            let (rest, out) = parser(rest)?;
337            Ok((rest, out))
338        }
339    }
340
341    /// Same as [`Parser::and`] but discards the output and the recoverable error of the second parser.
342    ///
343    /// Effectively, all this function does is advance the input to right after the second parser,
344    /// if it succeeds, otherwise the input stays as if only the first parser was called.
345    fn skip<Skipped>(
346        mut self,
347        mut parser: impl Parser<In, Skipped, Reason>,
348    ) -> impl Parser<In, Out, Reason> {
349        move |src| {
350            let (rest, out) = self(src)?;
351            let rest = match parser(rest) {
352                Ok((rest, _)) => rest,
353                Err(err) if err.is_recoverable() => err.rest,
354                Err(err) => return Err(err),
355            };
356            Ok((rest, out))
357        }
358    }
359
360    /// Sets the reason for errors returned from the parser, making all errors fatal.
361    fn expect<NewReason: Clone>(mut self, expected: NewReason) -> impl Parser<In, Out, NewReason> {
362        move |src| self(src).map_err(|e| e.reason(expected.clone()))
363    }
364
365    /// Makes a recoverable error fatal by giving it a reason. If the error is already fatal,
366    /// nothing is changed.
367    fn or_reason(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
368    where
369        Reason: Clone,
370    {
371        move |src| self(src).map_err(|e| e.or_reason(reason.clone()))
372    }
373
374    /// Like [`Parser::or_reason`] but does nothing if the rest of the input is empty.
375    ///
376    /// Be aware that `reason` is cloned every time it's to be returned.
377    fn or_reason_if_nonempty(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
378    where
379        Reason: Clone,
380    {
381        move |src| self(src).map_err(|e| e.or_reason_if_nonempty(reason.clone()))
382    }
383
384    /// Adds the part of the input that was consumed by the parser to the outputs.
385    ///
386    /// If the input increased in length after the parser (which should not happen), an empty
387    /// string is added.
388    /// See also [`Parser::add_span`], which adds the span to the tuple of other outputs.
389    fn get_span(self) -> impl Parser<In, (Out, In), Reason> {
390        self.map_out(tuple).add_span()
391    }
392
393    /// Like [`Parser::get_span`], but adds the output to the tuple of other outputs using the
394    /// [`Tuple`] trait.
395    fn add_span(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
396    where
397        Out: Tuple,
398    {
399        move |src| {
400            let (rest, out) = self(src.clone())?;
401            let end = src.len().saturating_sub(rest.len());
402            let consumed = src.before(end);
403            Ok((rest, out.append(consumed)))
404        }
405    }
406
407    /// Adds a copy of rest of the input to the output.
408    fn get_rest(self) -> impl Parser<In, (Out, In), Reason> {
409        self.map_out(tuple).add_rest()
410    }
411
412    /// Like [`Parser::get_rest`], but adds the input to the tuple of other outputs using the
413    /// [`Tuple`] trait.
414    fn add_rest(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
415    where
416        Out: Tuple,
417    {
418        move |src| self(src).map(|(rest, out)| (rest.clone(), out.append(rest)))
419    }
420
421    /// Replaces a recoverable error with `None`, making the output optional.
422    fn maybe(mut self) -> impl Parser<In, Option<Out>, Reason> {
423        move |src| match self(src) {
424            Ok((rest, out)) => Ok((rest, Some(out))),
425            Err(err) if err.is_recoverable() => Ok((err.rest, None)),
426            Err(err) => Err(err),
427        }
428    }
429
430    /// Replaces the output with `true` and a recoverable error with `false`
431    fn ok(mut self) -> impl Parser<In, bool, Reason> {
432        move |src| match self(src) {
433            Ok((rest, _)) => Ok((rest, true)),
434            Err(err) if err.is_recoverable() => Ok((err.rest, false)),
435            Err(err) => Err(err),
436        }
437    }
438
439    /// Repeats the parser until an error is met, discarding all the output.
440    fn repeat(mut self) -> impl Parser<In, (), Reason> {
441        move |mut src| loop {
442            match self(src) {
443                Ok((rest, _)) => src = rest,
444                Err(err) if err.is_recoverable() => return Ok((err.rest, ())),
445                Err(err) => return Err(err),
446            }
447        }
448    }
449
450    /// Applies the parser repeatedly, collecting the output into a collection, until an error is
451    /// met.
452    fn collect<C: Default + Extend<Out>>(mut self) -> impl Parser<In, C, Reason> {
453        move |mut src| {
454            let mut res = C::default();
455            loop {
456                match self(src) {
457                    Ok((rest, new)) => {
458                        res.extend([new]);
459                        src = rest;
460                    }
461                    Err(err) if err.is_recoverable() => return Ok((err.rest, res)),
462                    Err(err) => return Err(err),
463                }
464            }
465        }
466    }
467
468    /// Prints the output using its `Debug` implementation & the first 16 bytes of the rest of the
469    /// input, all along with a custom provided message.
470    fn dbg(mut self, label: impl Display) -> impl Parser<In, Out, Reason>
471    where
472        In: Input,
473        Out: Debug,
474        Reason: Debug,
475    {
476        move |src| match self(src) {
477            Ok((rest, out)) => {
478                let until = rest.char_indices().nth(16).map_or(rest.len(), |x| x.0);
479                let r = &rest[..until].escape_debug();
480                println!("{label}: Ok({out:?}) : {r}...");
481                Ok((rest, out))
482            }
483            Err(err) => {
484                let until = err
485                    .rest
486                    .char_indices()
487                    .nth(16)
488                    .map_or(err.rest.len(), |x| x.0);
489                let r = &err.rest[..until].escape_debug();
490                println!("{label}: Err({:?}) : {r}...", err.reason);
491                Err(err)
492            }
493        }
494    }
495
496    /// Turns the parser into an iterator that yields output until the first recoverable error.
497    /// If an error is yielded from the iterator, it's guaranteed to be fatal.
498    fn iter(self, input: In) -> Iter<In, Out, Reason, Self> {
499        Iter {
500            input: Some(input),
501            parser: self,
502            _params: PhantomData,
503        }
504    }
505
506    /// Augments the parsing error, if present, with location in the `input`.
507    /// `path` is the reported path to the file where the error occured.
508    /// Note that the `input` passed here is only used for error reporting, not as the input to the
509    /// parser.
510    fn with_full_error<'a>(
511        mut self,
512        path: impl PathLike<'a>,
513        full_src: &'a str,
514    ) -> impl FnOnce(In) -> Result<(In, Out), FullParsingError<'a, Reason>>
515    where
516        In: Input,
517    {
518        move |src| self(src).map_err(|e| e.with_src_loc(path, full_src))
519    }
520}
521
522impl<In, Out, Reason, F> Parser<In, Out, Reason> for F
523where
524    In: Input,
525    F: FnMut(In) -> ParsingResult<In, Out, Reason>,
526{
527}
528
529/// Iterator returned by [`Parser::iter`]
530pub struct Iter<In, Out, Reason, P> {
531    input: Option<In>,
532    parser: P,
533    _params: PhantomData<(Out, Reason)>,
534}
535
536impl<In, Out, Reason, P> Iterator for Iter<In, Out, Reason, P>
537where
538    In: Input,
539    P: Parser<In, Out, Reason>,
540{
541    type Item = Result<Out, ParsingError<In, Reason>>;
542
543    fn next(&mut self) -> Option<Self::Item> {
544        let input = self.input.take()?;
545        match (self.parser)(input) {
546            Ok((rest, res)) => {
547                self.input = Some(rest);
548                Some(Ok(res))
549            }
550            Err(err) if err.is_recoverable() => None,
551            Err(err) => Some(Err(err)),
552        }
553    }
554}
555
556impl<In, Out, Reason, P> FusedIterator for Iter<In, Out, Reason, P>
557where
558    In: Input,
559    P: Parser<In, Out, Reason>,
560{
561}
562
563impl<In, Out, Reason, P> Iter<In, Out, Reason, P>
564where
565    In: Input,
566    P: Parser<In, Out, Reason>,
567{
568    /// Returned the part of the input that hasn't been processed by the parser yet.
569    pub const fn remainder(&self) -> Option<&In> {
570        self.input.as_ref()
571    }
572}
573
574/// Returns a parser that always returns the provided value.
575///
576/// Beware that the value is always cloned.
577pub fn ready<In: Input, T: Clone, Reason>(value: T) -> impl Parser<In, T, Reason> {
578    move |i| Ok((i, value.clone()))
579}
580
581/// Parses any 1 character from the input.
582///
583/// A shorter equivalent of `pattern::parse(pattern::AnyChar)`.
584///
585/// # Errors
586/// Returns a recoverable error if the input is empty.
587pub fn parse_char<In: Input, Reason>(input: In) -> ParsingResult<In, char, Reason> {
588    match input.chars().next() {
589        Some(ch) => Ok((input.before(ch.len_utf8()), ch)),
590        None => Err(ParsingError::new_recoverable(input)),
591    }
592}
593
594/// Parses a sequence of Unicode whitespace. See [`char::is_whitespace`] for the definition of
595/// that.
596///
597/// # Errors
598/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
599/// is empty.
600pub fn parse_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
601    let ws_len = input.len() - input.trim_start().len();
602    Ok(input.split_at(ws_len).rev())
603}
604
605/// Parses a sequence of ASCII whitespace. See [`char::is_ascii_whitespace`] for the definition of
606/// that.
607///
608/// # Errors
609/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
610/// is empty.
611pub fn parse_ascii_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
612    let ws_len = input.len() - input.trim_ascii_start().len();
613    Ok(input.split_at(ws_len).rev())
614}