shrimple_parser/lib.rs
1//! Zero-dependency library with no-std support for writing parsers in a concise functional style
2//! & with rich error-reporting.
3//!
4//! Every parser is a function that maps an [`Input`]. Parsers can match on [`Pattern`]s.
5//!
6//! The basic form of the function is
7//!
8//! ```rust,ignore
9//! use shrimple_parser::{Input, ParsingResult};
10//!
11//! fn parse_foo<In: Input>(input: In) -> ParsingResult<In, Foo, FooParseError> { ... }
12//! ```
13//!
14//! If the parser is infallible, i.e. never returns an unrecoverable error, it's customary to make
15//! it generic over the reason type, to make combining it easier.
16//!
17//! ```rust,ignore
18//! fn parse_foo<In: Input, Reason>(input: In) -> ParsingResult<In, Foo, Reason> { ... }
19//! ```
20//!
21//! Kinds of errors are distinguished via a user-defined `Reason` type, which signals what did
22//! a parser expect.
23//! A [`ParsingError`] can also have no reason, which will mean that the error is recoverable.
24//!
25//! Some built-in parsers can have [`core::convert::Infallible`] as their error reason,
26//! which means that any error the parser may ever return is recoverable.
27//!
28//! The distinction between recoverable & fatal errors is important for parsers that need to try
29//! multiple options.
30//!
31//! Error reporting with precise location in the source is facilitated by
32//! constructing a [`FullParsingError`] with methods such as
33//! [`Parser::with_full_error`], [`ParsingError::with_src_loc`]
34
35#![cfg_attr(
36 feature = "nightly",
37 feature(unboxed_closures, fn_traits, tuple_trait, doc_auto_cfg)
38)]
39
40mod error;
41mod input;
42mod loc;
43pub mod pattern;
44pub mod tuple;
45pub mod utils;
46
47pub use {
48 error::{FullParsingError, ParsingError, ParsingResult},
49 input::Input,
50 loc::{FullLocation, Location},
51 pattern::Pattern,
52};
53
54use {
55 core::{
56 convert::Infallible,
57 fmt::{Debug, Display},
58 iter::FusedIterator,
59 marker::PhantomData,
60 mem::take,
61 },
62 tuple::{map_second, tuple, Tuple},
63 utils::PathLike,
64};
65
66/// A trait alias for a function that maps from the input & intermediate output to the rest of the
67/// input & a different output.
68///
69/// Used in [`Parser::map`].
70///
71/// See [`match_out`] for a convenient way to create such a mapper.
72pub trait MappingParser<In, Out, NewOut, Reason = Infallible>:
73 Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
74{
75}
76
77impl<In, Out, NewOut, Reason, F> MappingParser<In, Out, NewOut, Reason> for F where
78 F: Sized + FnMut(In, Out) -> ParsingResult<In, NewOut, Reason>
79{
80}
81
82/// A trait representing a function that takes some string-like input and
83/// returns either a tuple of (the rest of the input, the output) or a [`ParsingError`].
84pub trait Parser<In: Input, Out, Reason = Infallible>:
85 Sized + FnMut(In) -> ParsingResult<In, Out, Reason>
86{
87 /// Use the parser to produce the output.
88 #[expect(clippy::missing_errors_doc)]
89 fn parse(&mut self, input: In) -> ParsingResult<In, Out, Reason> {
90 self(input)
91 }
92
93 /// Turns output into a recoverable error if the output doesn't meet a condition.
94 fn filter(mut self, mut f: impl FnMut(&Out) -> bool) -> impl Parser<In, Out, Reason> {
95 move |src| match self(src.clone()) {
96 Ok((rest, res)) if f(&res) => Ok((rest, res)),
97 Ok(_) => Err(ParsingError::new_recoverable(src)),
98 Err(err) => Err(err),
99 }
100 }
101
102 /// Like [`Parser::filter`], but the possible error is instead fatal, with `reason`
103 // TODO: better name maybe?
104 fn filter_fatal(
105 mut self,
106 reason: Reason,
107 mut f: impl FnMut(&Out) -> bool,
108 ) -> impl Parser<In, Out, Reason>
109 where
110 Reason: Clone,
111 {
112 move |src| match self(src.clone()) {
113 Ok((rest, res)) if f(&res) => Ok((rest, res)),
114 Ok(_) => Err(ParsingError::new(src, reason.clone())),
115 Err(err) => Err(err),
116 }
117 }
118
119 /// Changes the error reason by passing it through `f`.
120 fn map_reason<NewReason>(
121 mut self,
122 mut f: impl FnMut(Reason) -> NewReason,
123 ) -> impl Parser<In, Out, NewReason> {
124 move |src| self(src).map_err(|e| e.map_reason(&mut f))
125 }
126
127 /// Converts the reason, if present, to another type using the [`From`] trait.
128 fn adapt_reason<NewReason>(mut self) -> impl Parser<In, Out, NewReason>
129 where
130 Infallible: From<Reason>,
131 {
132 move |i| self(i).map_err(ParsingError::adapt_reason)
133 }
134
135 /// Transforms the input & the output of the parser, if present.
136 ///
137 /// The argument is a function that maps the input & the current output of the parser to the
138 /// rest of the input & the new output.
139 ///
140 /// See [`match_out`]
141 fn map<NewOut>(
142 mut self,
143 mut parser: impl MappingParser<In, Out, NewOut, Reason>,
144 ) -> impl Parser<In, NewOut, Reason> {
145 move |src| self(src).and_then(|(i, o)| parser(i, o))
146 }
147
148 /// Like [`Parser::map`], but only maps the current output, if present.
149 fn map_out<NewOut>(
150 mut self,
151 mut f: impl FnMut(Out) -> NewOut,
152 ) -> impl Parser<In, NewOut, Reason> {
153 move |src| self(src).map(map_second(&mut f))
154 }
155
156 /// Tranforms the output of the parser, if present, or try parsing the next value.
157 fn map_until<NewOut>(
158 mut self,
159 mut f: impl FnMut(Out) -> Option<NewOut>,
160 ) -> impl Parser<In, NewOut, Reason> {
161 move |mut src| loop {
162 let (rest, value) = self(take(&mut src)).map(map_second(&mut f))?;
163 src = rest;
164 let Some(value) = value else {
165 continue;
166 };
167 return Ok((src, value));
168 }
169 }
170
171 /// Like [`Parser::map`], but calls the provdied function using the Nightly [`FnMut::call_mut`]
172 /// method, effectively spreading the output as the arguments of the function.
173 ///
174 /// The following nIghtly Rust code:
175 /// ```ignore
176 /// use shrimple_parser::Parser;
177 /// parser.call(u32::pow)
178 /// ```
179 /// is equivalent to the following stable Rust code:
180 /// ```ignore
181 /// use shrimple_parser::Parser;
182 /// parser.map(|(x, y)| u32::pow(x, y))
183 /// ```
184 /// `T` for this method is constrained not by the [`crate::Tuple`] trait, but by the unstable
185 /// standard trait [`core::marker::Tuple`], which means that `T` can be a tuple of absolutely
186 /// any length.
187 ///
188 /// See also: [`crate::call`], a macro for a stable alternative to this method.
189 #[cfg(feature = "nightly")]
190 fn call<F>(mut self, mut f: F) -> impl Parser<In, F::Output, Reason>
191 where
192 F: FnMut<Out>,
193 Out: core::marker::Tuple,
194 {
195 move |src| self(src).map(map_second(|x| f.call_mut(x)))
196 }
197
198 /// Replaces a recoverable error with the result of `parser`.
199 ///
200 /// The input fed into the second parser is the rest of the input returned by the first parser.
201 ///
202 /// # Warning
203 /// Do not use this in combination with [`Parser::iter`]; Use [`Parser::or_nonempty`]
204 fn or(mut self, mut parser: impl Parser<In, Out, Reason>) -> impl Parser<In, Out, Reason> {
205 move |src| {
206 let fallback = src.clone();
207 match self(src) {
208 Ok(res) => Ok(res),
209 Err(err) if err.is_recoverable() => parser(fallback),
210 Err(err) => Err(err),
211 }
212 }
213 }
214
215 /// Like [`Parser::or`], but keeps the error if the rest of the input is empty.
216 ///
217 /// This allows to avoid slipping into an infinite loop, e.g. when using [`Parser::iter`]
218 /// somewhere down the line.
219 fn or_nonempty(
220 mut self,
221 mut parser: impl Parser<In, Out, Reason>,
222 ) -> impl Parser<In, Out, Reason> {
223 move |src| {
224 let fallback = src.clone();
225 match self(src) {
226 Ok(res) => Ok(res),
227 Err(err) if err.is_recoverable() && !err.rest.is_empty() => parser(fallback),
228 Err(err) => Err(err),
229 }
230 }
231 }
232
233 /// Replaces a recoverable error with the transformed remains of the input.
234 /// If the rest of the input in the recoverable error is already empty, does nothing.
235 /// The returned remains of the input are an empty string.
236 fn or_map_rest(mut self, mut f: impl FnMut(In) -> Out) -> impl Parser<In, Out, Reason> {
237 move |src| {
238 let fallback = src.clone();
239 match self(src) {
240 Ok(res) => Ok(res),
241 Err(err) if err.is_recoverable() && !err.rest.is_empty() => {
242 Ok((In::default(), f(fallback)))
243 }
244 Err(err) => Err(err),
245 }
246 }
247 }
248
249 /// Replaces a recoverable error with `value` & the rest of the input in the recoverable error.
250 ///
251 /// Be aware that `value` will be cloned every time it's to be returned.
252 ///
253 /// See [`Parser::or`], [`Parser::or_nonempty`], [`Parser::or_map_rest`].
254 fn or_value(mut self, value: Out) -> impl Parser<In, Out, Reason>
255 where
256 Out: Clone,
257 {
258 move |src| {
259 let fallback = src.clone();
260 match self(src) {
261 Ok(res) => Ok(res),
262 Err(err) if err.is_recoverable() => Ok((fallback, value.clone())),
263 Err(err) => Err(err),
264 }
265 }
266 }
267
268 /// Parses the rest of the input after the first parser, returning both outputs
269 /// & short-circuiting on an error.
270 ///
271 /// The reason for the errors of the first parser is adapted to the one of the second parser.
272 ///
273 /// See also [`Parser::add`], [`Parser::and_value`].
274 fn and<Other>(
275 mut self,
276 mut parser: impl Parser<In, Other, Reason>,
277 ) -> impl Parser<In, (Out, Other), Reason> {
278 move |src| {
279 let (rest, out) = self(src)?;
280 let (rest, new_out) = parser(rest)?;
281 Ok((rest, (out, new_out)))
282 }
283 }
284
285 /// Adds a value to the output of the parser
286 ///
287 /// Be aware that `value` will be cloned every time it's to be returned.
288 ///
289 /// See [`Parser::and`].
290 fn and_value<Other: Clone>(mut self, value: Other) -> impl Parser<In, (Out, Other), Reason> {
291 move |src| {
292 let (rest, out) = self(src)?;
293 Ok((rest, (out, value.clone())))
294 }
295 }
296
297 /// Like [`Parser::and`], but specific to parsers that output a tuple:
298 /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
299 fn add<New>(
300 mut self,
301 mut parser: impl Parser<In, New, Reason>,
302 ) -> impl Parser<In, Out::Appended<New>, Reason>
303 where
304 Out: Tuple,
305 {
306 move |src| {
307 let (rest, out) = self(src)?;
308 let (rest, new_out) = parser(rest)?;
309 Ok((rest, out.append(new_out)))
310 }
311 }
312
313 /// Like [`Parser::and_value`], but specific to parsers that output a tuple:
314 /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
315 fn add_value<Other: Clone>(
316 mut self,
317 value: Other,
318 ) -> impl Parser<In, Out::Appended<Other>, Reason>
319 where
320 Out: Tuple,
321 {
322 move |src| {
323 let (rest, out) = self(src)?;
324 Ok((rest, out.append(value.clone())))
325 }
326 }
327
328 /// Like [`Parser::and`], but discards the output of the first parser.
329 /// The reason for the errors of the first parser is adapted to the one of the second parser.
330 fn then<NewOut>(
331 mut self,
332 mut parser: impl Parser<In, NewOut, Reason>,
333 ) -> impl Parser<In, NewOut, Reason> {
334 move |src| {
335 let rest = self(src)?.0;
336 let (rest, out) = parser(rest)?;
337 Ok((rest, out))
338 }
339 }
340
341 /// Same as [`Parser::and`] but discards the output and the recoverable error of the second parser.
342 ///
343 /// Effectively, all this function does is advance the input to right after the second parser,
344 /// if it succeeds, otherwise the input stays as if only the first parser was called.
345 fn skip<Skipped>(
346 mut self,
347 mut parser: impl Parser<In, Skipped, Reason>,
348 ) -> impl Parser<In, Out, Reason> {
349 move |src| {
350 let (rest, out) = self(src)?;
351 let rest = match parser(rest) {
352 Ok((rest, _)) => rest,
353 Err(err) if err.is_recoverable() => err.rest,
354 Err(err) => return Err(err),
355 };
356 Ok((rest, out))
357 }
358 }
359
360 /// Sets the reason for errors returned from the parser, making all errors fatal.
361 fn expect<NewReason: Clone>(mut self, expected: NewReason) -> impl Parser<In, Out, NewReason> {
362 move |src| self(src).map_err(|e| e.reason(expected.clone()))
363 }
364
365 /// Makes a recoverable error fatal by giving it a reason. If the error is already fatal,
366 /// nothing is changed.
367 fn or_reason(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
368 where
369 Reason: Clone,
370 {
371 move |src| self(src).map_err(|e| e.or_reason(reason.clone()))
372 }
373
374 /// Like [`Parser::or_reason`] but does nothing if the rest of the input is empty.
375 ///
376 /// Be aware that `reason` is cloned every time it's to be returned.
377 fn or_reason_if_nonempty(mut self, reason: Reason) -> impl Parser<In, Out, Reason>
378 where
379 Reason: Clone,
380 {
381 move |src| self(src).map_err(|e| e.or_reason_if_nonempty(reason.clone()))
382 }
383
384 /// Adds the part of the input that was consumed by the parser to the outputs.
385 ///
386 /// If the input increased in length after the parser (which should not happen), an empty
387 /// string is added.
388 /// See also [`Parser::add_span`], which adds the span to the tuple of other outputs.
389 fn get_span(self) -> impl Parser<In, (Out, In), Reason> {
390 self.map_out(tuple).add_span()
391 }
392
393 /// Like [`Parser::get_span`], but adds the output to the tuple of other outputs using the
394 /// [`Tuple`] trait.
395 fn add_span(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
396 where
397 Out: Tuple,
398 {
399 move |src| {
400 let (rest, out) = self(src.clone())?;
401 let end = src.len().saturating_sub(rest.len());
402 let consumed = src.before(end);
403 Ok((rest, out.append(consumed)))
404 }
405 }
406
407 /// Adds a copy of rest of the input to the output.
408 fn get_rest(self) -> impl Parser<In, (Out, In), Reason> {
409 self.map_out(tuple).add_rest()
410 }
411
412 /// Like [`Parser::get_rest`], but adds the input to the tuple of other outputs using the
413 /// [`Tuple`] trait.
414 fn add_rest(mut self) -> impl Parser<In, Out::Appended<In>, Reason>
415 where
416 Out: Tuple,
417 {
418 move |src| self(src).map(|(rest, out)| (rest.clone(), out.append(rest)))
419 }
420
421 /// Replaces a recoverable error with `None`, making the output optional.
422 fn maybe(mut self) -> impl Parser<In, Option<Out>, Reason> {
423 move |src| match self(src) {
424 Ok((rest, out)) => Ok((rest, Some(out))),
425 Err(err) if err.is_recoverable() => Ok((err.rest, None)),
426 Err(err) => Err(err),
427 }
428 }
429
430 /// Replaces the output with `true` and a recoverable error with `false`
431 fn ok(mut self) -> impl Parser<In, bool, Reason> {
432 move |src| match self(src) {
433 Ok((rest, _)) => Ok((rest, true)),
434 Err(err) if err.is_recoverable() => Ok((err.rest, false)),
435 Err(err) => Err(err),
436 }
437 }
438
439 /// Repeats the parser until an error is met, discarding all the output.
440 fn repeat(mut self) -> impl Parser<In, (), Reason> {
441 move |mut src| loop {
442 match self(src) {
443 Ok((rest, _)) => src = rest,
444 Err(err) if err.is_recoverable() => return Ok((err.rest, ())),
445 Err(err) => return Err(err),
446 }
447 }
448 }
449
450 /// Applies the parser repeatedly, collecting the output into a collection, until an error is
451 /// met.
452 fn collect<C: Default + Extend<Out>>(mut self) -> impl Parser<In, C, Reason> {
453 move |mut src| {
454 let mut res = C::default();
455 loop {
456 match self(src) {
457 Ok((rest, new)) => {
458 res.extend([new]);
459 src = rest;
460 }
461 Err(err) if err.is_recoverable() => return Ok((err.rest, res)),
462 Err(err) => return Err(err),
463 }
464 }
465 }
466 }
467
468 /// Prints the output using its `Debug` implementation & the first 16 bytes of the rest of the
469 /// input, all along with a custom provided message.
470 fn dbg(mut self, label: impl Display) -> impl Parser<In, Out, Reason>
471 where
472 In: Input,
473 Out: Debug,
474 Reason: Debug,
475 {
476 move |src| match self(src) {
477 Ok((rest, out)) => {
478 let until = rest.char_indices().nth(16).map_or(rest.len(), |x| x.0);
479 let r = &rest[..until].escape_debug();
480 println!("{label}: Ok({out:?}) : {r}...");
481 Ok((rest, out))
482 }
483 Err(err) => {
484 let until = err
485 .rest
486 .char_indices()
487 .nth(16)
488 .map_or(err.rest.len(), |x| x.0);
489 let r = &err.rest[..until].escape_debug();
490 println!("{label}: Err({:?}) : {r}...", err.reason);
491 Err(err)
492 }
493 }
494 }
495
496 /// Turns the parser into an iterator that yields output until the first recoverable error.
497 /// If an error is yielded from the iterator, it's guaranteed to be fatal.
498 fn iter(self, input: In) -> Iter<In, Out, Reason, Self> {
499 Iter {
500 input: Some(input),
501 parser: self,
502 _params: PhantomData,
503 }
504 }
505
506 /// Augments the parsing error, if present, with location in the `input`.
507 /// `path` is the reported path to the file where the error occured.
508 /// Note that the `input` passed here is only used for error reporting, not as the input to the
509 /// parser.
510 fn with_full_error<'a>(
511 mut self,
512 path: impl PathLike<'a>,
513 full_src: &'a str,
514 ) -> impl FnOnce(In) -> Result<(In, Out), FullParsingError<'a, Reason>>
515 where
516 In: Input,
517 {
518 move |src| self(src).map_err(|e| e.with_src_loc(path, full_src))
519 }
520}
521
522impl<In, Out, Reason, F> Parser<In, Out, Reason> for F
523where
524 In: Input,
525 F: FnMut(In) -> ParsingResult<In, Out, Reason>,
526{
527}
528
529/// Iterator returned by [`Parser::iter`]
530pub struct Iter<In, Out, Reason, P> {
531 input: Option<In>,
532 parser: P,
533 _params: PhantomData<(Out, Reason)>,
534}
535
536impl<In, Out, Reason, P> Iterator for Iter<In, Out, Reason, P>
537where
538 In: Input,
539 P: Parser<In, Out, Reason>,
540{
541 type Item = Result<Out, ParsingError<In, Reason>>;
542
543 fn next(&mut self) -> Option<Self::Item> {
544 let input = self.input.take()?;
545 match (self.parser)(input) {
546 Ok((rest, res)) => {
547 self.input = Some(rest);
548 Some(Ok(res))
549 }
550 Err(err) if err.is_recoverable() => None,
551 Err(err) => Some(Err(err)),
552 }
553 }
554}
555
556impl<In, Out, Reason, P> FusedIterator for Iter<In, Out, Reason, P>
557where
558 In: Input,
559 P: Parser<In, Out, Reason>,
560{
561}
562
563impl<In, Out, Reason, P> Iter<In, Out, Reason, P>
564where
565 In: Input,
566 P: Parser<In, Out, Reason>,
567{
568 /// Returned the part of the input that hasn't been processed by the parser yet.
569 pub const fn remainder(&self) -> Option<&In> {
570 self.input.as_ref()
571 }
572}
573
574/// Returns a parser that always returns the provided value.
575///
576/// Beware that the value is always cloned.
577pub fn ready<In: Input, T: Clone, Reason>(value: T) -> impl Parser<In, T, Reason> {
578 move |i| Ok((i, value.clone()))
579}
580
581/// Parses any 1 character from the input.
582///
583/// A shorter equivalent of `pattern::parse(pattern::AnyChar)`.
584///
585/// # Errors
586/// Returns a recoverable error if the input is empty.
587pub fn parse_char<In: Input, Reason>(input: In) -> ParsingResult<In, char, Reason> {
588 match input.chars().next() {
589 Some(ch) => Ok((input.before(ch.len_utf8()), ch)),
590 None => Err(ParsingError::new_recoverable(input)),
591 }
592}
593
594/// Parses a sequence of Unicode whitespace. See [`char::is_whitespace`] for the definition of
595/// that.
596///
597/// # Errors
598/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
599/// is empty.
600pub fn parse_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
601 let ws_len = input.len() - input.trim_start().len();
602 Ok(input.split_at(ws_len).rev())
603}
604
605/// Parses a sequence of ASCII whitespace. See [`char::is_ascii_whitespace`] for the definition of
606/// that.
607///
608/// # Errors
609/// Never returns an error. If there's no whitespace at tbe start of the input, the returned string
610/// is empty.
611pub fn parse_ascii_whitespace<In: Input, Reason>(input: In) -> ParsingResult<In, In, Reason> {
612 let ws_len = input.len() - input.trim_ascii_start().len();
613 Ok(input.split_at(ws_len).rev())
614}