shrimple_parser/lib.rs
1//! Zero-dependency library with no-std support for writing parsers in a concise functional style
2//! & with rich error-reporting.
3//!
4//! Every parser is a function that maps an [`Input`]. Parsers can match on [`Pattern`]s.
5//!
6//! The basic form of the function is
7//!
8//! ```rust,ignore
9//! use shrimple_parser::{Input, ParsingResult};
10//!
11//! fn parse_foo<In: Input>(input: In) -> ParsingResult<In, Foo, FooParseError> { ... }
12//! ```
13//!
14//! If the parser is infallible, i.e. never returns an unrecoverable error, it's customary to make
15//! it generic over the reason type, to make combining it easier.
16//!
17//! ```rust,ignore
18//! fn parse_foo<In: Input, Reason>(input: In) -> ParsingResult<In, Foo, Reason> { ... }
19//! ```
20//!
21//! Kinds of errors are distinguished via a user-defined `Reason` type, which signals what did
22//! a parser expect.
23//! A [`ParsingError`] can also have no reason, which will mean that the error is recoverable.
24//!
25//! Some built-in parsers can have [`core::convert::Infallible`] as their error reason,
26//! which means that any error the parser may ever return is recoverable.
27//!
28//! The distinction between recoverable & fatal errors is important for parsers that need to try
29//! multiple options.
30//!
31//! Error reporting with precise location in the source is facilitated by
32//! constructing a [`FullParsingError`] with methods such as
33//! [`Parser::with_full_error`], [`ParsingError::with_src_loc`]
34
35#![cfg_attr(
36 feature = "nightly",
37 feature(unboxed_closures, fn_traits, tuple_trait, doc_auto_cfg)
38)]
39
40mod error;
41mod input;
42mod loc;
43pub mod pattern;
44pub mod tuple;
45pub mod utils;
46
47pub use {
48 error::{FullParsingError, ParsingError, ParsingResult},
49 input::Input,
50 loc::{FullLocation, Location},
51 pattern::Pattern,
52};
53
54#[cfg(feature = "proc-macro2")]
55pub use loc::LineColumnToLocationError;
56
57use {
58 core::{
59 convert::Infallible,
60 fmt::{Debug, Display},
61 iter::FusedIterator,
62 marker::PhantomData,
63 mem::take,
64 },
65 tuple::{map_second, tuple, Tuple},
66 utils::PathLike,
67};
68
69/// A trait alias for a function that maps from the input & intermediate output to the rest of the
70/// input & a different output.
71///
72/// Used in [`Parser::map`].
73///
74/// See [`match_out`] for a convenient way to create such a mapper.
75pub trait MappingParser<In, Out, NewOut, Reason = Infallible>:
76 Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
77{
78}
79
80impl<In, Out, NewOut, Reason, F> MappingParser<In, Out, NewOut, Reason> for F where
81 F: Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
82{
83}
84
85/// A trait representing a function that takes some string-like input and
86/// returns either a tuple of (the rest of the input, the output) or a [`ParsingError`].
87pub trait Parser<In: Input, Out, Reason = Infallible>:
88 Sized + FnMut(In) -> ParsingResult<In, Out, Reason>
89{
90 /// Use the parser to produce the output.
91 #[expect(clippy::missing_errors_doc)]
92 fn parse(&mut self, input: In) -> ParsingResult<In, Out, Reason> {
93 self(input)
94 }
95
96 /// Turns output into a recoverable error if the output doesn't meet a condition.
97 fn filter(mut self, mut f: impl FnMut(&Out) -> bool) -> impl Parser<In, Out, Reason> {
98 move |src| match self(src.clone()) {
99 Ok((rest, res)) if f(&res) => Ok((rest, res)),
100 Ok(_) => Err(ParsingError::new_recoverable(src)),
101 Err(err) => Err(err),
102 }
103 }
104
105 /// Like [`Parser::filter`], but the possible error is instead fatal, with `reason`
106 // TODO: better name maybe?
107 fn filter_fatal(
108 mut self,
109 reason: Reason,
110 mut f: impl FnMut(&Out) -> bool,
111 ) -> impl Parser<In, Out, Reason>
112 where
113 Reason: Clone,
114 {
115 move |src| match self(src.clone()) {
116 Ok((rest, res)) if f(&res) => Ok((rest, res)),
117 Ok(_) => Err(ParsingError::new(src, reason.clone())),
118 Err(err) => Err(err),
119 }
120 }
121
122 /// Changes the error reason by passing it through `f`.
123 fn map_reason<NewReason>(
124 mut self,
125 mut f: impl FnMut(Reason) -> NewReason,
126 ) -> impl Parser<In, Out, NewReason> {
127 move |src| self(src).map_err(|e| e.map_reason(&mut f))
128 }
129
130 /// Converts the reason, if present, to another type using the [`From`] trait.
131 fn adapt_reason<NewReason>(mut self) -> impl Parser<In, Out, NewReason>
132 where
133 Infallible: From<Reason>,
134 {
135 move |i| self(i).map_err(ParsingError::adapt_reason)
136 }
137
138 /// Transforms the input & the output of the parser, if present.
139 ///
140 /// The argument is a function that maps the input & the current output of the parser to the
141 /// rest of the input & the new output.
142 ///
143 /// See [`match_out`]
144 fn map<NewOut>(
145 mut self,
146 mut parser: impl MappingParser<In, Out, NewOut, Reason>,
147 ) -> impl Parser<In, NewOut, Reason> {
148 move |src| self(src).and_then(|(i, o)| parser(i, o))
149 }
150
151 /// Like [`Parser::map`], but only maps the current output, if present.
152 fn map_out<NewOut>(
153 mut self,
154 mut f: impl FnMut(Out) -> NewOut,
155 ) -> impl Parser<In, NewOut, Reason> {
156 move |src| self(src).map(map_second(&mut f))
157 }
158
159 /// Tranforms the output of the parser, if present, or try parsing the next value.
160 fn map_until<NewOut>(
161 mut self,
162 mut f: impl FnMut(Out) -> Option<NewOut>,
163 ) -> impl Parser<In, NewOut, Reason> {
164 move |mut src| loop {
165 let (rest, value) = self(take(&mut src)).map(map_second(&mut f))?;
166 src = rest;
167 let Some(value) = value else {
168 continue;
169 };
170 return Ok((src, value));
171 }
172 }
173
174 /// Like [`Parser::map`], but calls the provdied function using the Nightly [`FnMut::call_mut`]
175 /// method, effectively spreading the output as the arguments of the function.
176 ///
177 /// The following nIghtly Rust code:
178 /// ```ignore
179 /// use shrimple_parser::Parser;
180 /// parser.call(u32::pow)
181 /// ```
182 /// is equivalent to the following stable Rust code:
183 /// ```ignore
184 /// use shrimple_parser::Parser;
185 /// parser.map(|(x, y)| u32::pow(x, y))
186 /// ```
187 /// `T` for this method is constrained not by the [`crate::Tuple`] trait, but by the unstable
188 /// standard trait [`core::marker::Tuple`], which means that `T` can be a tuple of absolutely
189 /// any length.
190 ///
191 /// See also: [`crate::call`], a macro for a stable alternative to this method.
192 #[cfg(feature = "nightly")]
193 fn call<F>(mut self, mut f: F) -> impl Parser<In, F::Output, Reason>
194 where
195 F: FnMut<Out>,
196 Out: core::marker::Tuple,
197 {
198 move |src| self(src).map(map_second(|x| f.call_mut(x)))
199 }
200
201 /// Replaces a recoverable error with the result of `parser`.
202 ///
203 /// The input fed into the second parser is the rest of the input returned by the first parser.
204 ///
205 /// # Warning
206 /// Do not use this in combination with [`Parser::iter`]; Use [`Parser::or_nonempty`]
207 fn or(mut self, mut parser: impl Parser<In, Out, Reason>) -> impl Parser<In, Out, Reason> {
208 move |src| {
209 let fallback = src.clone();
210 match self(src) {
211 Ok(res) => Ok(res),
212 Err(err) if err.is_recoverable() => parser(fallback),
213 Err(err) => Err(err),
214 }
215 }
216 }
217
218 /// Like [`Parser::or`], but keeps the error if the rest of the input is empty.
219 ///
220 /// This allows to avoid slipping into an infinite loop, e.g. when using [`Parser::iter`]
221 /// somewhere down the line.
222 fn or_nonempty(
223 mut self,
224 mut parser: impl Parser<In, Out, Reason>,
225 ) -> impl Parser<In, Out, Reason> {
226 move |src| {
227 let fallback = src.clone();
228 match self(src) {
229 Ok(res) => Ok(res),
230 Err(err) if err.is_recoverable() && !err.rest.is_empty() => parser(fallback),
231 Err(err) => Err(err),
232 }
233 }
234 }
235
236 /// Replaces a recoverable error with the transformed remains of the input.
237 /// If the rest of the input in the recoverable error is already empty, does nothing.
238 /// The returned remains of the input are an empty string.
239 fn or_map_rest(mut self, mut f: impl FnMut(In) -> Out) -> impl Parser<In, Out, Reason> {
240 move |src| {
241 let fallback = src.clone();
242 match self(src) {
243 Ok(res) => Ok(res),
244 Err(err) if err.is_recoverable() && !err.rest.is_empty() => {
245 Ok((In::default(), f(fallback)))
246 }
247 Err(err) => Err(err),
248 }
249 }
250 }
251
252 /// Replaces a recoverable error with `value` & the rest of the input in the recoverable error.
253 ///
254 /// Be aware that `value` will be cloned every time it's to be returned.
255 ///
256 /// See [`Parser::or`], [`Parser::or_nonempty`], [`Parser::or_map_rest`].
257 fn or_value(mut self, value: Out) -> impl Parser<In, Out, Reason>
258 where
259 Out: Clone,
260 {
261 move |src| {
262 let fallback = src.clone();
263 match self(src) {
264 Ok(res) => Ok(res),
265 Err(err) if err.is_recoverable() => Ok((fallback, value.clone())),
266 Err(err) => Err(err),
267 }
268 }
269 }
270
271 /// Parses the rest of the input after the first parser, returning both outputs
272 /// & short-circuiting on an error.
273 ///
274 /// The reason for the errors of the first parser is adapted to the one of the second parser.
275 ///
276 /// See also [`Parser::add`], [`Parser::and_value`].
277 fn and<Other>(
278 mut self,
279 mut parser: impl Parser<In, Other, Reason>,
280 ) -> impl Parser<In, (Out, Other), Reason> {
281 move |src| {
282 let (rest, out) = self(src.clone())?;
283 match parser(rest) {
284 Ok((rest, new_out)) => Ok((rest, (out, new_out))),
285 Err(mut err) => {
286 if err.is_recoverable() {
287 err.rest = src;
288 }
289 Err(err)
290 }
291 }
292 }
293 }
294
295 /// Adds a value to the output of the parser
296 ///
297 /// Be aware that `value` will be cloned every time it's to be returned.
298 ///
299 /// See [`Parser::and`].
300 fn and_value<Other: Clone>(mut self, value: Other) -> impl Parser<In, (Out, Other), Reason> {
301 move |src| {
302 let (rest, out) = self(src)?;
303 Ok((rest, (out, value.clone())))
304 }
305 }
306
307 /// Like [`Parser::and`], but specific to parsers that output a tuple:
308 /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
309 fn add<New>(
310 mut self,
311 mut parser: impl Parser<In, New, Reason>,
312 ) -> impl Parser<In, Out::Appended<New>, Reason>
313 where
314 Out: Tuple,
315 {
316 move |src| {
317 let (rest, out) = self(src.clone())?;
318 match parser(rest) {
319 Ok((rest, new_out)) => Ok((rest, out.append(new_out))),
320 Err(mut err) => {
321 if err.is_recoverable() {
322 err.rest = src;
323 }
324 Err(err)
325 }
326 }
327 }
328 }
329
330 /// Like [`Parser::and_value`], but specific to parsers that output a tuple:
331 /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
332 fn add_value<Other: Clone>(
333 mut self,
334 value: Other,
335 ) -> impl Parser<In, Out::Appended<Other>, Reason>
336 where
337 Out: Tuple,
338 {
339 move |src| {
340 let (rest, out) = self(src)?;
341 Ok((rest, out.append(value.clone())))
342 }
343 }
344
345 /// Like [`Parser::and`], but discards the output of the first parser.
346 /// The reason for the errors of the first parser is adapted to the one of the second parser.
347 fn then<NewOut>(
348 mut self,
349 mut parser: impl Parser<In, NewOut, Reason>,
350 ) -> impl Parser<In, NewOut, Reason> {
351 move |src| {
352 let rest = self(src.clone())?.0;
353 parser(rest).map_err(|mut err| {
354 if err.is_recoverable() {
355 err.rest = src;
356 }
357 err
358 })
359 }
360 }
361
362 /// Same as [`Parser::and`] but discards the output and the recoverable error of the second parser.
363 ///
364 /// Effectively, all this function does is advance the input to right after the second parser,
365 /// if it succeeds, otherwise the input stays as if only the first parser was called.
366 fn skip<Skipped>(
367 mut self,
368 mut parser: impl Parser<In, Skipped, Reason>,
369 ) -> impl Parser<In, Out, Reason> {
370 move |src| {
371 let (rest, out) = self(src.clone())?;
372 match parser(rest) {
373 Ok((rest, _)) => Ok((rest, out)),
374 Err(mut err) => {
375 if err.is_recoverable() {
376 err.rest = src;
377 }
378 Err(err)
379 }
380 }
381 }
382 }
383
384 /// Sets the reason for errors returned from the parser, making all errors fatal.
385 fn expect<NewReason: Clone>(mut self, expected: NewReason) -> impl Parser<In, Out, NewReason> {
386 move |src| self(src).map_err(|e| e.reason(expected.clone()))
387 }
388
389 /// Makes a recoverable error fatal by giving it a reason. If the error is already fatal,
390 /// nothing is changed.
391 fn or_reason(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
392 where
393 Reason: Clone,
394 {
395 move |src| self(src).map_err(|e| e.or_reason(reason.clone()))
396 }
397
398 /// Like [`Parser::or_reason`] but does nothing if the rest of the input is empty.
399 ///
400 /// Be aware that `reason` is cloned every time it's to be returned.
401 fn or_reason_if_nonempty(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
402 where
403 Reason: Clone,
404 {
405 move |src| self(src).map_err(|e| e.or_reason_if_nonempty(reason.clone()))
406 }
407
408 /// Adds the part of the input that was consumed by the parser to the outputs.
409 ///
410 /// If the input increased in length after the parser (which should not happen), an empty
411 /// string is added.
412 /// See also [`Parser::add_span`], which adds the span to the tuple of other outputs.
413 fn get_span(self) -> impl Parser<In, (Out, In), Reason> {
414 self.map_out(tuple).add_span()
415 }
416
417 /// Like [`Parser::get_span`], but adds the output to the tuple of other outputs using the
418 /// [`Tuple`] trait.
419 fn add_span(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
420 where
421 Out: Tuple,
422 {
423 move |src| {
424 let (rest, out) = self(src.clone())?;
425 let end = src.len().saturating_sub(rest.len());
426 let consumed = src.before(end);
427 Ok((rest, out.append(consumed)))
428 }
429 }
430
431 /// Adds a copy of rest of the input to the output.
432 fn get_rest(self) -> impl Parser<In, (Out, In), Reason> {
433 self.map_out(tuple).add_rest()
434 }
435
436 /// Like [`Parser::get_rest`], but adds the input to the tuple of other outputs using the
437 /// [`Tuple`] trait.
438 fn add_rest(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
439 where
440 Out: Tuple,
441 {
442 move |src| self(src).map(|(rest, out)| (rest.clone(), out.append(rest)))
443 }
444
445 /// Replaces a recoverable error with `None`, making the output optional.
446 fn maybe(mut self) -> impl Parser<In, Option<Out>, Reason> {
447 move |src| match self(src) {
448 Ok((rest, out)) => Ok((rest, Some(out))),
449 Err(err) if err.is_recoverable() => Ok((err.rest, None)),
450 Err(err) => Err(err),
451 }
452 }
453
454 /// Replaces the output with `true` and a recoverable error with `false`
455 fn ok(mut self) -> impl Parser<In, bool, Reason> {
456 move |src| match self(src) {
457 Ok((rest, _)) => Ok((rest, true)),
458 Err(err) if err.is_recoverable() => Ok((err.rest, false)),
459 Err(err) => Err(err),
460 }
461 }
462
463 /// Repeats the parser until an error is met, discarding all the output.
464 fn repeat(mut self) -> impl Parser<In, (), Reason> {
465 move |mut src| loop {
466 match self(src) {
467 Ok((rest, _)) => src = rest,
468 Err(err) if err.is_recoverable() => return Ok((err.rest, ())),
469 Err(err) => return Err(err),
470 }
471 }
472 }
473
474 /// Applies the parser repeatedly, collecting the output into a collection, until an error is
475 /// met.
476 fn collect<C: Default + Extend<Out>>(mut self) -> impl Parser<In, C, Reason> {
477 move |mut src| {
478 let mut res = C::default();
479 loop {
480 match self(src) {
481 Ok((rest, new)) => {
482 res.extend([new]);
483 src = rest;
484 }
485 Err(err) if err.is_recoverable() => return Ok((err.rest, res)),
486 Err(err) => return Err(err),
487 }
488 }
489 }
490 }
491
492 /// Prints the output using its `Debug` implementation & the first 16 bytes of the rest of the
493 /// input, all along with a custom provided message.
494 fn dbg(mut self, label: impl Display) -> impl Parser<In, Out, Reason>
495 where
496 In: Input,
497 Out: Debug,
498 Reason: Debug,
499 {
500 move |src| match self(src) {
501 Ok((rest, out)) => {
502 let until = rest.char_indices().nth(16).map_or(rest.len(), |x| x.0);
503 let r = &rest[..until].escape_debug();
504 eprintln!("{label}: Ok({out:?}) : {r}...");
505 Ok((rest, out))
506 }
507 Err(err) => {
508 let until = err
509 .rest
510 .char_indices()
511 .nth(16)
512 .map_or(err.rest.len(), |x| x.0);
513 let r = &err.rest[..until].escape_debug();
514 eprintln!("{label}: Err({:?}) : {r}...", err.reason);
515 Err(err)
516 }
517 }
518 }
519
520 /// Turns the parser into an iterator that yields output until the first recoverable error.
521 /// If an error is yielded from the iterator, it's guaranteed to be fatal.
522 fn iter(self, input: In) -> Iter<In, Out, Reason, Self> {
523 Iter {
524 input: Some(input),
525 parser: self,
526 _params: PhantomData,
527 }
528 }
529
530 /// Augments the parsing error, if present, with location in the `input`.
531 /// `path` is the reported path to the file where the error occured.
532 /// Note that the `input` passed here is only used for error reporting, not as the input to the
533 /// parser.
534 fn with_full_error<'a>(
535 mut self,
536 path: impl PathLike<'a>,
537 full_src: &'a str,
538 ) -> impl FnOnce(In) -> Result<(In, Out), FullParsingError<'a, Reason>>
539 where
540 In: Input,
541 {
542 move |src| self(src).map_err(|e| e.with_src_loc(path, full_src))
543 }
544}
545
546impl<In, Out, Reason, F> Parser<In, Out, Reason> for F
547where
548 In: Input,
549 F: FnMut(In) -> ParsingResult<In, Out, Reason>,
550{
551}
552
553/// Iterator returned by [`Parser::iter`]
554pub struct Iter<In, Out, Reason, P> {
555 input: Option<In>,
556 parser: P,
557 _params: PhantomData<(Out, Reason)>,
558}
559
560impl<In, Out, Reason, P> Iterator for Iter<In, Out, Reason, P>
561where
562 In: Input,
563 P: Parser<In, Out, Reason>,
564{
565 type Item = Result<Out, ParsingError<In, Reason>>;
566
567 fn next(&mut self) -> Option<Self::Item> {
568 let input = self.input.take()?;
569 match (self.parser)(input) {
570 Ok((rest, res)) => {
571 self.input = Some(rest);
572 Some(Ok(res))
573 }
574 Err(err) if err.is_recoverable() => None,
575 Err(err) => Some(Err(err)),
576 }
577 }
578}
579
580impl<In, Out, Reason, P> FusedIterator for Iter<In, Out, Reason, P>
581where
582 In: Input,
583 P: Parser<In, Out, Reason>,
584{
585}
586
587impl<In, Out, Reason, P> Iter<In, Out, Reason, P>
588where
589 In: Input,
590 P: Parser<In, Out, Reason>,
591{
592 /// Returned the part of the input that hasn't been processed by the parser yet.
593 pub const fn remainder(&self) -> Option<&In> {
594 self.input.as_ref()
595 }
596}
597
598/// Returns a parser that always returns the provided value.
599///
600/// Beware that the value is always cloned.
601pub fn ready<In: Input, T: Clone, Reason>(value: T) -> impl Parser<In, T, Reason> {
602 move |i| Ok((i, value.clone()))
603}
604
605/// Parses any 1 character from the input.
606///
607/// A shorter equivalent of `pattern::parse(pattern::AnyChar)`.
608///
609/// # Errors
610/// Returns a recoverable error if the input is empty.
611pub fn parse_char<In: Input, Reason>(input: In) -> ParsingResult<In, char, Reason> {
612 match input.chars().next() {
613 Some(ch) => Ok((input.before(ch.len_utf8()), ch)),
614 None => Err(ParsingError::new_recoverable(input)),
615 }
616}
617
618/// Parses a sequence of Unicode whitespace. See [`char::is_whitespace`] for the definition of
619/// that.
620///
621/// # Errors
622/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
623/// is empty.
624pub fn parse_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
625 let ws_len = input.len() - input.trim_start().len();
626 Ok(input.split_at(ws_len).rev())
627}
628
629/// Parses a sequence of ASCII whitespace. See [`char::is_ascii_whitespace`] for the definition of
630/// that.
631///
632/// # Errors
633/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
634/// is empty.
635pub fn parse_ascii_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
636 let ws_len = input.len() - input.trim_ascii_start().len();
637 Ok(input.split_at(ws_len).rev())
638}