shrimple_parser/lib.rs
1//! Zero-dependency library with no-std support for writing parsers in a concise functional style
2//! & with rich error-reporting.
3//!
4//! Every parser is a function that maps an [`Input`]. Parsers can match on [`Pattern`]s.
5//!
6//! The basic form of the function is
7//!
8//! ```rust,ignore
9//! use shrimple_parser::{Input, ParsingResult};
10//!
11//! fn parse_foo<In: Input>(input: In) -> ParsingResult<In, Foo, FooParseError> { ... }
12//! ```
13//!
14//! If the parser is infallible, i.e. never returns an unrecoverable error, it's customary to make
15//! it generic over the reason type, to make combining it easier.
16//!
17//! ```rust,ignore
18//! fn parse_foo<In: Input, Reason>(input: In) -> ParsingResult<In, Foo, Reason> { ... }
19//! ```
20//!
21//! Kinds of errors are distinguished via a user-defined `Reason` type, which signals what did
22//! a parser expect.
23//! A [`ParsingError`] can also have no reason, which will mean that the error is recoverable.
24//!
25//! Some built-in parsers can have [`core::convert::Infallible`] as their error reason,
26//! which means that any error the parser may ever return is recoverable.
27//!
28//! The distinction between recoverable & fatal errors is important for parsers that need to try
29//! multiple options.
30//!
31//! Error reporting with precise location in the source is facilitated by
32//! constructing a [`FullParsingError`] with methods such as
33//! [`Parser::with_full_error`], [`ParsingError::with_src_loc`]
34
35#![cfg_attr(
36 feature = "nightly",
37 feature(unboxed_closures, fn_traits, tuple_trait, doc_auto_cfg)
38)]
39
40mod error;
41mod input;
42mod loc;
43pub mod pattern;
44pub mod tuple;
45pub mod utils;
46
47pub use {
48 error::{FullParsingError, ParsingError, ParsingResult},
49 input::Input,
50 loc::{FullLocation, Location},
51 pattern::Pattern,
52};
53
54#[cfg(feature = "proc-macro2")]
55pub use loc::LineColumnToLocationError;
56
57use {
58 core::{
59 convert::Infallible,
60 fmt::{Debug, Display},
61 iter::FusedIterator,
62 marker::PhantomData,
63 mem::take,
64 },
65 tuple::{map_second, tuple, Tuple},
66 utils::PathLike,
67};
68
69/// A trait alias for a function that maps from the input & intermediate output to the rest of the
70/// input & a different output.
71///
72/// Used in [`Parser::map`].
73///
74/// See [`match_out`] for a convenient way to create such a mapper.
75pub trait MappingParser<In, Out, NewOut, Reason = Infallible>:
76 Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
77{
78}
79
80impl<In, Out, NewOut, Reason, F> MappingParser<In, Out, NewOut, Reason> for F where
81 F: Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
82{
83}
84
85/// A trait representing a function that takes some string-like input and
86/// returns either a tuple of (the rest of the input, the output) or a [`ParsingError`].
87pub trait Parser<In: Input, Out, Reason = Infallible>:
88 Sized + FnMut(In) -> ParsingResult<In, Out, Reason>
89{
90 /// Use the parser to produce the output.
91 #[expect(clippy::missing_errors_doc)]
92 fn parse(&mut self, input: In) -> ParsingResult<In, Out, Reason> {
93 self(input)
94 }
95
96 /// Turns output into a recoverable error if the output doesn't meet a condition.
97 fn filter(mut self, mut f: impl FnMut(&Out) -> bool) -> impl Parser<In, Out, Reason> {
98 move |src| match self(src.clone()) {
99 Ok((rest, res)) if f(&res) => Ok((rest, res)),
100 Ok(_) => Err(ParsingError::new_recoverable(src)),
101 Err(err) => Err(err),
102 }
103 }
104
105 /// Like [`Parser::filter`], but the possible error is instead fatal, with `reason`
106 // TODO: better name maybe?
107 fn filter_fatal(
108 mut self,
109 reason: Reason,
110 mut f: impl FnMut(&Out) -> bool,
111 ) -> impl Parser<In, Out, Reason>
112 where
113 Reason: Clone,
114 {
115 move |src| match self(src.clone()) {
116 Ok((rest, res)) if f(&res) => Ok((rest, res)),
117 Ok(_) => Err(ParsingError::new(src, reason.clone())),
118 Err(err) => Err(err),
119 }
120 }
121
122 /// Changes the error reason by passing it through `f`.
123 fn map_reason<NewReason>(
124 mut self,
125 mut f: impl FnMut(Reason) -> NewReason,
126 ) -> impl Parser<In, Out, NewReason> {
127 move |src| self(src).map_err(|e| e.map_reason(&mut f))
128 }
129
130 /// Converts the reason, if present, to another type using the [`From`] trait.
131 fn adapt_reason<NewReason>(mut self) -> impl Parser<In, Out, NewReason>
132 where
133 Infallible: From<Reason>,
134 {
135 move |i| self(i).map_err(ParsingError::adapt_reason)
136 }
137
138 /// Transforms the input & the output of the parser, if present.
139 ///
140 /// The argument is a function that maps the input & the current output of the parser to the
141 /// rest of the input & the new output.
142 ///
143 /// See [`match_out`]
144 fn map<NewOut>(
145 mut self,
146 mut parser: impl MappingParser<In, Out, NewOut, Reason>,
147 ) -> impl Parser<In, NewOut, Reason> {
148 move |src| self(src).and_then(|(i, o)| parser(i, o))
149 }
150
151 /// Like [`Parser::map`], but only maps the current output, if present.
152 fn map_out<NewOut>(
153 mut self,
154 mut f: impl FnMut(Out) -> NewOut,
155 ) -> impl Parser<In, NewOut, Reason> {
156 move |src| self(src).map(map_second(&mut f))
157 }
158
159 /// Tranforms the output of the parser, if present, or try parsing the next value.
160 fn map_until<NewOut>(
161 mut self,
162 mut f: impl FnMut(Out) -> Option<NewOut>,
163 ) -> impl Parser<In, NewOut, Reason> {
164 move |mut src| loop {
165 let (rest, value) = self(take(&mut src)).map(map_second(&mut f))?;
166 src = rest;
167 let Some(value) = value else {
168 continue;
169 };
170 return Ok((src, value));
171 }
172 }
173
174 /// Like [`Parser::map`], but calls the provdied function using the Nightly [`FnMut::call_mut`]
175 /// method, effectively spreading the output as the arguments of the function.
176 ///
177 /// The following nIghtly Rust code:
178 /// ```ignore
179 /// use shrimple_parser::Parser;
180 /// parser.call(u32::pow)
181 /// ```
182 /// is equivalent to the following stable Rust code:
183 /// ```ignore
184 /// use shrimple_parser::Parser;
185 /// parser.map(|(x, y)| u32::pow(x, y))
186 /// ```
187 /// `T` for this method is constrained not by the [`crate::Tuple`] trait, but by the unstable
188 /// standard trait [`core::marker::Tuple`], which means that `T` can be a tuple of absolutely
189 /// any length.
190 ///
191 /// See also: [`crate::call`], a macro for a stable alternative to this method.
192 #[cfg(feature = "nightly")]
193 fn call<F>(mut self, mut f: F) -> impl Parser<In, F::Output, Reason>
194 where
195 F: FnMut<Out>,
196 Out: core::marker::Tuple,
197 {
198 move |src| self(src).map(map_second(|x| f.call_mut(x)))
199 }
200
201 /// Replaces a recoverable error with the result of `parser`.
202 ///
203 /// The input fed into the second parser is the rest of the input returned by the first parser.
204 ///
205 /// # Warning
206 /// Do not use this in combination with [`Parser::iter`]; Use [`Parser::or_nonempty`]
207 fn or(mut self, mut parser: impl Parser<In, Out, Reason>) -> impl Parser<In, Out, Reason> {
208 move |src| {
209 let fallback = src.clone();
210 match self(src) {
211 Ok(res) => Ok(res),
212 Err(err) if err.is_recoverable() => parser(fallback),
213 Err(err) => Err(err),
214 }
215 }
216 }
217
218 /// Like [`Parser::or`], but keeps the error if the rest of the input is empty.
219 ///
220 /// This allows to avoid slipping into an infinite loop, e.g. when using [`Parser::iter`]
221 /// somewhere down the line.
222 fn or_nonempty(
223 mut self,
224 mut parser: impl Parser<In, Out, Reason>,
225 ) -> impl Parser<In, Out, Reason> {
226 move |src| {
227 let fallback = src.clone();
228 match self(src) {
229 Ok(res) => Ok(res),
230 Err(err) if err.is_recoverable() && !err.rest.is_empty() => parser(fallback),
231 Err(err) => Err(err),
232 }
233 }
234 }
235
236 /// Replaces a recoverable error with the transformed remains of the input.
237 /// If the rest of the input in the recoverable error is already empty, does nothing.
238 /// The returned remains of the input are an empty string.
239 fn or_map_rest(mut self, mut f: impl FnMut(In) -> Out) -> impl Parser<In, Out, Reason> {
240 move |src| {
241 let fallback = src.clone();
242 match self(src) {
243 Ok(res) => Ok(res),
244 Err(err) if err.is_recoverable() && !err.rest.is_empty() => {
245 Ok((In::default(), f(fallback)))
246 }
247 Err(err) => Err(err),
248 }
249 }
250 }
251
252 /// Replaces a recoverable error with `value` & the rest of the input in the recoverable error.
253 ///
254 /// Be aware that `value` will be cloned every time it's to be returned.
255 ///
256 /// See [`Parser::or`], [`Parser::or_nonempty`], [`Parser::or_map_rest`].
257 fn or_value(mut self, value: Out) -> impl Parser<In, Out, Reason>
258 where
259 Out: Clone,
260 {
261 move |src| {
262 let fallback = src.clone();
263 match self(src) {
264 Ok(res) => Ok(res),
265 Err(err) if err.is_recoverable() => Ok((fallback, value.clone())),
266 Err(err) => Err(err),
267 }
268 }
269 }
270
271 /// Parses the rest of the input after the first parser, returning both outputs
272 /// & short-circuiting on an error.
273 ///
274 /// The reason for the errors of the first parser is adapted to the one of the second parser.
275 ///
276 /// See also [`Parser::add`], [`Parser::and_value`].
277 fn and<Other>(
278 mut self,
279 mut parser: impl Parser<In, Other, Reason>,
280 ) -> impl Parser<In, (Out, Other), Reason> {
281 move |src| {
282 let (rest, out) = self(src)?;
283 let (rest, new_out) = parser(rest)?;
284 Ok((rest, (out, new_out)))
285 }
286 }
287
288 /// Adds a value to the output of the parser
289 ///
290 /// Be aware that `value` will be cloned every time it's to be returned.
291 ///
292 /// See [`Parser::and`].
293 fn and_value<Other: Clone>(mut self, value: Other) -> impl Parser<In, (Out, Other), Reason> {
294 move |src| {
295 let (rest, out) = self(src)?;
296 Ok((rest, (out, value.clone())))
297 }
298 }
299
300 /// Like [`Parser::and`], but specific to parsers that output a tuple:
301 /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
302 fn add<New>(
303 mut self,
304 mut parser: impl Parser<In, New, Reason>,
305 ) -> impl Parser<In, Out::Appended<New>, Reason>
306 where
307 Out: Tuple,
308 {
309 move |src| {
310 let (rest, out) = self(src)?;
311 let (rest, new_out) = parser(rest)?;
312 Ok((rest, out.append(new_out)))
313 }
314 }
315
316 /// Like [`Parser::and_value`], but specific to parsers that output a tuple:
317 /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
318 fn add_value<Other: Clone>(
319 mut self,
320 value: Other,
321 ) -> impl Parser<In, Out::Appended<Other>, Reason>
322 where
323 Out: Tuple,
324 {
325 move |src| {
326 let (rest, out) = self(src)?;
327 Ok((rest, out.append(value.clone())))
328 }
329 }
330
331 /// Like [`Parser::and`], but discards the output of the first parser.
332 /// The reason for the errors of the first parser is adapted to the one of the second parser.
333 fn then<NewOut>(
334 mut self,
335 mut parser: impl Parser<In, NewOut, Reason>,
336 ) -> impl Parser<In, NewOut, Reason> {
337 move |src| {
338 let rest = self(src)?.0;
339 let (rest, out) = parser(rest)?;
340 Ok((rest, out))
341 }
342 }
343
344 /// Same as [`Parser::and`] but discards the output and the recoverable error of the second parser.
345 ///
346 /// Effectively, all this function does is advance the input to right after the second parser,
347 /// if it succeeds, otherwise the input stays as if only the first parser was called.
348 fn skip<Skipped>(
349 mut self,
350 mut parser: impl Parser<In, Skipped, Reason>,
351 ) -> impl Parser<In, Out, Reason> {
352 move |src| {
353 let (rest, out) = self(src)?;
354 let rest = match parser(rest) {
355 Ok((rest, _)) => rest,
356 Err(err) if err.is_recoverable() => err.rest,
357 Err(err) => return Err(err),
358 };
359 Ok((rest, out))
360 }
361 }
362
363 /// Sets the reason for errors returned from the parser, making all errors fatal.
364 fn expect<NewReason: Clone>(mut self, expected: NewReason) -> impl Parser<In, Out, NewReason> {
365 move |src| self(src).map_err(|e| e.reason(expected.clone()))
366 }
367
368 /// Makes a recoverable error fatal by giving it a reason. If the error is already fatal,
369 /// nothing is changed.
370 fn or_reason(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
371 where
372 Reason: Clone,
373 {
374 move |src| self(src).map_err(|e| e.or_reason(reason.clone()))
375 }
376
377 /// Like [`Parser::or_reason`] but does nothing if the rest of the input is empty.
378 ///
379 /// Be aware that `reason` is cloned every time it's to be returned.
380 fn or_reason_if_nonempty(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
381 where
382 Reason: Clone,
383 {
384 move |src| self(src).map_err(|e| e.or_reason_if_nonempty(reason.clone()))
385 }
386
387 /// Adds the part of the input that was consumed by the parser to the outputs.
388 ///
389 /// If the input increased in length after the parser (which should not happen), an empty
390 /// string is added.
391 /// See also [`Parser::add_span`], which adds the span to the tuple of other outputs.
392 fn get_span(self) -> impl Parser<In, (Out, In), Reason> {
393 self.map_out(tuple).add_span()
394 }
395
396 /// Like [`Parser::get_span`], but adds the output to the tuple of other outputs using the
397 /// [`Tuple`] trait.
398 fn add_span(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
399 where
400 Out: Tuple,
401 {
402 move |src| {
403 let (rest, out) = self(src.clone())?;
404 let end = src.len().saturating_sub(rest.len());
405 let consumed = src.before(end);
406 Ok((rest, out.append(consumed)))
407 }
408 }
409
410 /// Adds a copy of rest of the input to the output.
411 fn get_rest(self) -> impl Parser<In, (Out, In), Reason> {
412 self.map_out(tuple).add_rest()
413 }
414
415 /// Like [`Parser::get_rest`], but adds the input to the tuple of other outputs using the
416 /// [`Tuple`] trait.
417 fn add_rest(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
418 where
419 Out: Tuple,
420 {
421 move |src| self(src).map(|(rest, out)| (rest.clone(), out.append(rest)))
422 }
423
424 /// Replaces a recoverable error with `None`, making the output optional.
425 fn maybe(mut self) -> impl Parser<In, Option<Out>, Reason> {
426 move |src| match self(src) {
427 Ok((rest, out)) => Ok((rest, Some(out))),
428 Err(err) if err.is_recoverable() => Ok((err.rest, None)),
429 Err(err) => Err(err),
430 }
431 }
432
433 /// Replaces the output with `true` and a recoverable error with `false`
434 fn ok(mut self) -> impl Parser<In, bool, Reason> {
435 move |src| match self(src) {
436 Ok((rest, _)) => Ok((rest, true)),
437 Err(err) if err.is_recoverable() => Ok((err.rest, false)),
438 Err(err) => Err(err),
439 }
440 }
441
442 /// Repeats the parser until an error is met, discarding all the output.
443 fn repeat(mut self) -> impl Parser<In, (), Reason> {
444 move |mut src| loop {
445 match self(src) {
446 Ok((rest, _)) => src = rest,
447 Err(err) if err.is_recoverable() => return Ok((err.rest, ())),
448 Err(err) => return Err(err),
449 }
450 }
451 }
452
453 /// Applies the parser repeatedly, collecting the output into a collection, until an error is
454 /// met.
455 fn collect<C: Default + Extend<Out>>(mut self) -> impl Parser<In, C, Reason> {
456 move |mut src| {
457 let mut res = C::default();
458 loop {
459 match self(src) {
460 Ok((rest, new)) => {
461 res.extend([new]);
462 src = rest;
463 }
464 Err(err) if err.is_recoverable() => return Ok((err.rest, res)),
465 Err(err) => return Err(err),
466 }
467 }
468 }
469 }
470
471 /// Prints the output using its `Debug` implementation & the first 16 bytes of the rest of the
472 /// input, all along with a custom provided message.
473 fn dbg(mut self, label: impl Display) -> impl Parser<In, Out, Reason>
474 where
475 In: Input,
476 Out: Debug,
477 Reason: Debug,
478 {
479 move |src| match self(src) {
480 Ok((rest, out)) => {
481 let until = rest.char_indices().nth(16).map_or(rest.len(), |x| x.0);
482 let r = &rest[..until].escape_debug();
483 eprintln!("{label}: Ok({out:?}) : {r}...");
484 Ok((rest, out))
485 }
486 Err(err) => {
487 let until = err
488 .rest
489 .char_indices()
490 .nth(16)
491 .map_or(err.rest.len(), |x| x.0);
492 let r = &err.rest[..until].escape_debug();
493 eprintln!("{label}: Err({:?}) : {r}...", err.reason);
494 Err(err)
495 }
496 }
497 }
498
499 /// Turns the parser into an iterator that yields output until the first recoverable error.
500 /// If an error is yielded from the iterator, it's guaranteed to be fatal.
501 fn iter(self, input: In) -> Iter<In, Out, Reason, Self> {
502 Iter {
503 input: Some(input),
504 parser: self,
505 _params: PhantomData,
506 }
507 }
508
509 /// Augments the parsing error, if present, with location in the `input`.
510 /// `path` is the reported path to the file where the error occured.
511 /// Note that the `input` passed here is only used for error reporting, not as the input to the
512 /// parser.
513 fn with_full_error<'a>(
514 mut self,
515 path: impl PathLike<'a>,
516 full_src: &'a str,
517 ) -> impl FnOnce(In) -> Result<(In, Out), FullParsingError<'a, Reason>>
518 where
519 In: Input,
520 {
521 move |src| self(src).map_err(|e| e.with_src_loc(path, full_src))
522 }
523}
524
525impl<In, Out, Reason, F> Parser<In, Out, Reason> for F
526where
527 In: Input,
528 F: FnMut(In) -> ParsingResult<In, Out, Reason>,
529{
530}
531
532/// Iterator returned by [`Parser::iter`]
533pub struct Iter<In, Out, Reason, P> {
534 input: Option<In>,
535 parser: P,
536 _params: PhantomData<(Out, Reason)>,
537}
538
539impl<In, Out, Reason, P> Iterator for Iter<In, Out, Reason, P>
540where
541 In: Input,
542 P: Parser<In, Out, Reason>,
543{
544 type Item = Result<Out, ParsingError<In, Reason>>;
545
546 fn next(&mut self) -> Option<Self::Item> {
547 let input = self.input.take()?;
548 match (self.parser)(input) {
549 Ok((rest, res)) => {
550 self.input = Some(rest);
551 Some(Ok(res))
552 }
553 Err(err) if err.is_recoverable() => None,
554 Err(err) => Some(Err(err)),
555 }
556 }
557}
558
559impl<In, Out, Reason, P> FusedIterator for Iter<In, Out, Reason, P>
560where
561 In: Input,
562 P: Parser<In, Out, Reason>,
563{
564}
565
566impl<In, Out, Reason, P> Iter<In, Out, Reason, P>
567where
568 In: Input,
569 P: Parser<In, Out, Reason>,
570{
571 /// Returned the part of the input that hasn't been processed by the parser yet.
572 pub const fn remainder(&self) -> Option<&In> {
573 self.input.as_ref()
574 }
575}
576
577/// Returns a parser that always returns the provided value.
578///
579/// Beware that the value is always cloned.
580pub fn ready<In: Input, T: Clone, Reason>(value: T) -> impl Parser<In, T, Reason> {
581 move |i| Ok((i, value.clone()))
582}
583
584/// Parses any 1 character from the input.
585///
586/// A shorter equivalent of `pattern::parse(pattern::AnyChar)`.
587///
588/// # Errors
589/// Returns a recoverable error if the input is empty.
590pub fn parse_char<In: Input, Reason>(input: In) -> ParsingResult<In, char, Reason> {
591 match input.chars().next() {
592 Some(ch) => Ok((input.before(ch.len_utf8()), ch)),
593 None => Err(ParsingError::new_recoverable(input)),
594 }
595}
596
597/// Parses a sequence of Unicode whitespace. See [`char::is_whitespace`] for the definition of
598/// that.
599///
600/// # Errors
601/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
602/// is empty.
603pub fn parse_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
604 let ws_len = input.len() - input.trim_start().len();
605 Ok(input.split_at(ws_len).rev())
606}
607
608/// Parses a sequence of ASCII whitespace. See [`char::is_ascii_whitespace`] for the definition of
609/// that.
610///
611/// # Errors
612/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
613/// is empty.
614pub fn parse_ascii_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
615 let ws_len = input.len() - input.trim_ascii_start().len();
616 Ok(input.split_at(ws_len).rev())
617}