inpt/lib.rs
1//! Inpt is a derive crate for dumb type-level text parsing.
2//!
3//! # Introduction
4//! Imagine you need to chop up an annoying string and convert all the bits to useful types.
5//! You could write that sort of code by hand using `split` and `from_str`, but the boiler-plate
6//! of unwrapping and checking quickly looses all charm. Especially since that sort of parsing
7//! shows up a lot in timed programming competitions like [advent of code](https://adventofcode.com).
8//!
9//! Inpt tries to write that sort of parsing code for you, automatically splitting input strings
10//! based on field types and an optional regex. Inpt is absolutely _not_ performant, strict, or formal.
11//! Whenever possible, it does the obvious thing:
12//! <div style="display: flex; flex-direction: row; align-items: stretch; gap: 1em">
13//!
14//! ```rust, no_run
15//! #[inpt::main]
16//! fn main(x: f32, y: f32) {
17//! println!("{}", x * y);
18//! }
19//! ```
20//!
21//! ```text
22//! $ echo '6,7' | cargo run
23//! 42
24//! ```
25//!
26//! </div>
27//!
28//! ## Contents
29//! - [Introduction](crate#introduction)
30//! - [Example](crate#example)
31//! - [Struct Syntax](crate#struct-syntax)
32//! - [regex](crate#regex)
33//! - [from, try_from](crate#from-try_from)
34//! - [skip](crate#skip)
35//! - [option](crate#option)
36//! - [before/after](crate#before-after)
37//! - [bounds](crate#bounds)
38//! - [from_str](crate#from_str)
39//! - [from_iter](crate#from_iter)
40//! - [trim](crate#trim)
41//! - [split](crate#split)
42//! - [Enum Syntax](crate#enum-syntax)
43//! - [enum regex](crate#enum-regex)
44//! - [Main](crate#main)
45//!
46//! # Example
47//! ```rust
48//! use inpt::{Inpt, inpt};
49//!
50//! #[derive(Inpt)]
51//! #[inpt(regex = r"(.)=([-\d]+)\.\.([-\d]+),?")]
52//! struct Axis {
53//! name: char,
54//! start: i32,
55//! end: i32,
56//! }
57//!
58//! #[derive(Inpt)]
59//! #[inpt(regex = "target area:")]
60//! struct Target {
61//! #[inpt(after)]
62//! axes: Vec<Axis>,
63//! }
64//!
65//! impl Target {
66//! fn area(&self) -> i32 {
67//! self.axes.iter().map(|Axis { start, end, ..}| end - start).product()
68//! }
69//! }
70//!
71//!
72//! let target = inpt::<Target>("target area: x=119..176, y=-114..84").unwrap();
73//! assert_eq!(target.area(), 11286);
74//! ```
75//!
76//! # Struct Syntax
77//! The [`Inpt`](macro@Inpt) derive macro can do a few neat tricks, listed here. In its default setting,
78//! the fields of the struct are parsed in order, with each field consuming as much of the input as
79//! possible before moving on:
80//!
81//! ```rust
82//! # use inpt::{inpt, Inpt};
83//! #[derive(Inpt, Debug, PartialEq)]
84//! struct OrderedFields<'s>(char, i32, &'s str);
85//!
86//! assert_eq!(
87//! inpt::<OrderedFields>("A113 is a classroom").unwrap(),
88//! OrderedFields('A', 113, "is a classroom"),
89//! )
90//! ```
91//!
92//! This behavior is also implemented for arrays, tuples, and a number of collection types.
93//!
94//! ## regex
95//! When the `#[inpt(regex = r".*")]` struct attribute is given, the fields are no longer
96//! parsed one after the another. Instead the regex is matched against the remaining input, and
97//! the fields are parsed from the regex's numbered capture groups. I recommend that regexes are given as
98//! [raw strings](https://doc.rust-lang.org/reference/tokens.html#raw-string-literals) to avoid
99//! double-escapes and quoting.
100//! ```rust
101//! # use inpt::{inpt, Inpt};
102//! #[derive(Inpt, Debug, PartialEq)]
103//! #[inpt(regex = r"(.*) number ([a-zA-Z])(\d+)")]
104//! struct RegexFields<'s>(&'s str, char, i32);
105//!
106//! assert_eq!(
107//! inpt::<RegexFields>("classroom number A113").unwrap(),
108//! RegexFields("classroom", 'A', 113),
109//! )
110//! ```
111//!
112//! Ungreedy/lazy repetitions can be very useful when splitting inputs. Like rewriting a while loop as an until loop,
113//! a regex `([^!]*)!` can be rewritten as `(.*?)!`. This is particularly helpful when we want to stop after finding multiple characters,
114//! like the 3 quotes that end a multi-line string in Python or Julia: `"""(.*?)"""`.
115//!
116//! Be aware that when such a regex is used multiple times to parse a sequence of fields,
117//! the last regex match is forced to parse all remaining input, even if normally lazy:
118//! ```rust
119//! # use inpt::{inpt, Inpt};
120//! #[derive(Inpt, Debug, PartialEq)]
121//! #[inpt(regex = r"(.+?),")]
122//! struct Part<'s>(&'s str);
123//!
124//! assert_eq!(
125//! inpt::<[Part; 3]>("my,list,of,many,words,").unwrap(),
126//! [Part("my"), Part("list"), Part("of,many,words")],
127//! )
128//! ```
129//!
130//! ## from, try_from
131//! When the `#[inpt(from = "T")]` or `#[inpt(try_from = "T")]` struct attributes are given, T is parsed instead
132//! of the struct itself, and the From or TryFrom traits are used to convert.
133//! ```rust
134//! # use inpt::{inpt, Inpt};
135//! # use std::{convert::TryFrom, fmt, error::Error};
136//! use inpt::split::{Group, Line};
137//!
138//! #[derive(Inpt)]
139//! #[inpt(try_from = "Group<Vec<Line<Vec<T>>>>")]
140//! struct Grid<T> {
141//! width: usize,
142//! table: Vec<T>,
143//! }
144//!
145//! #[derive(Debug)]
146//! struct UnevenGridError;
147//! impl fmt::Display for UnevenGridError {
148//! fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149//! f.write_str("grid rows must have even length")
150//! }
151//! }
152//! impl Error for UnevenGridError {}
153//!
154//! impl<'s, T> TryFrom<Group<Vec<Line<Vec<T>>>>> for Grid<T> {
155//! type Error = UnevenGridError;
156//!
157//! fn try_from(Group { inner: lines }: Group<Vec<Line<Vec<T>>>>)
158//! -> Result<Self, Self::Error>
159//! {
160//! let mut width = None;
161//! let mut table = Vec::new();
162//! for Line { inner: mut line } in lines {
163//! width = match width {
164//! Some(w) if w == line.len() => Some(w),
165//! Some(_) => return Err(UnevenGridError),
166//! None => Some(line.len()),
167//! };
168//! table.append(&mut line);
169//! }
170//! Ok(Grid {
171//! width: width.ok_or(UnevenGridError)?,
172//! table,
173//! })
174//! }
175//! }
176//!
177//! assert_eq!(inpt::<Grid<char>>("##\n##").unwrap().width, 2);
178//! ```
179//!
180//!
181//! ## skip
182//! The `#[inpt(skip)]` field attribute can be used to ignore fields when parsing
183//! and instead insert their `Default::default()`.
184//!
185//! ## option
186//! If a capture group corresponds to a field with type `Option`, the field will be set to `None` when the group is not captured
187//! by the match, rather than producing an error.
188//! ```rust
189//! # use inpt::{inpt, Inpt};
190//! #[derive(Inpt, Debug, PartialEq)]
191//! #[inpt(regex = r"(.*) letter ([a-zA-Z])(\d+)?")]
192//! struct RegexFields<'s>(&'s str, char, Option<i32>);
193//!
194//! assert_eq!(
195//! inpt::<RegexFields>("classroom letter A").unwrap(),
196//! RegexFields("classroom", 'A', None),
197//! )
198//! ```
199//!
200//! ## before, after
201//! Any fields marked with the `#[inpt(before)]` attribute will be parsed sequentially, consuming input prior to matching the given regex.
202//! After the regex is matched, remaining input is consumed by any fields marked `#[inpt(after)]`. Having such a field causes the regex
203//! to again behave lazily in the example above.
204//! ```rust
205//! # use inpt::{inpt, Inpt};
206//! #[derive(Inpt, Debug, PartialEq)]
207//! #[inpt(regex = r"is a")]
208//! struct RegexFields<'s>(
209//! #[inpt(before)] char,
210//! #[inpt(before)] i32,
211//! #[inpt(after)] &'s str,
212//! );
213//!
214//! assert_eq!(
215//! inpt::<RegexFields>("A113 is a classroom").unwrap(),
216//! RegexFields('A', 113, "classroom"),
217//! )
218//! ```
219//!
220//! ## bounds
221//! By default the derive macro adds `T: Inpt<'s>` bounds to every parsed field of a struct, as well as a `Self: 's` bound.
222//! This greatly improves error messages and improves the ergonomics around generic structs. However, it is sometimes necessary
223//! to replace those automatic bounds entirely. If you ever get
224//! "<code><b color="ff5555">error\[E0275\]</b><b>: overflow evaluating the requirement \`T: Inpt<'_>\`</b></code>", try solving it
225//! with a `#[inpt(bounds = "")]` attribute.
226//!
227//! ```rust
228//! # use inpt::{inpt, Inpt};
229//! use inpt::InptError;
230//!
231//! #[derive(Inpt)]
232//! #[inpt(regex = "(.)(.+)?")]
233//! #[inpt(bounds = "")]
234//! struct Recursive(char, Option<Box<Recursive>>);
235//!
236//! let chars: Recursive = inpt("abc").unwrap();
237//! # assert_eq!(chars.0, 'a');
238//! # assert_eq!(chars.1.as_ref().unwrap().0, 'b');
239//! # assert_eq!(chars.1.as_ref().unwrap().1.as_ref().unwrap().0, 'c');
240//! ```
241//!
242//! ## from_str
243//! Although Rust integers and strings all implement the `Inpt` trait, some types can only be parsed using `FromStr`.
244//! The derive macro can be told to use a type's [`FromStr`](std::str::FromStr) implementation with the `#[inpt(from_str)]` field attribute.
245//! Because the `from_str` function consumes an entire string instead of chopping off just the beginning, the attribute
246//! can only be placed on the last field of a struct, or on fields receiving regex capture groups.
247//!
248//! ```rust
249//! # use inpt::{inpt, Inpt};
250//! use std::net::{IpAddr};
251//!
252//! #[derive(Inpt, Debug, PartialEq)]
253//! #[inpt(regex = r"route from (\S+) to")]
254//! struct Routing {
255//! #[inpt(from_str)]
256//! from: IpAddr,
257//! #[inpt(from_str, after)]
258//! to: IpAddr,
259//! }
260//!
261//! let route: Routing = inpt("route from 192.168.1.2 to 127.0.0.1").unwrap();
262//! # assert_eq!(&route.from.to_string(), "192.168.1.2");
263//! # assert_eq!(&route.to.to_string(), "127.0.0.1");
264//! ```
265//!
266//! ## from_iter
267//! It is quite easy to repeatedly parse a type, either by using [`Vec`](std::vec::Vec)'s own inpt implementation,
268//! or parsing then collecting a [`InptIter`]. This can also be accessed inside the derive macro using the
269//! `#[inpt(from_iter = "T")]` field attribute, which calls into [`FromIterator<T>`](std::iter::FromIterator).
270//! The item type has to be specified because some collections can be built from multiple different item types
271//! (e.g. `String` can be collected from an iterator of `char`, an iterator of `&str`, or an iterator of `String`).
272//! Like the from_str attribute, the from_iter attribute consumes an entire string and so must appear at the end
273//! of the struct, or otherwise parse a regex capture group.
274//!
275//! ```rust
276//! # use inpt::{inpt, Inpt};
277//! use std::collections::HashMap;
278//!
279//! #[derive(Inpt, Debug, PartialEq)]
280//! struct Rooms {
281//! #[inpt(from_iter = "(char, u32)")]
282//! letter_to_number: HashMap<char, u32>,
283//! }
284//!
285//! assert_eq!(
286//! inpt::<Rooms>("B5 A113 F111").unwrap().letter_to_number,
287//! [('A', 113), ('B', 5), ('F', 111)].into_iter().collect::<HashMap<_, _>>(),
288//! )
289//! ```
290//!
291//! ## trim
292//!
293//! By default, inpt trims all whitespace between fields. However, some types implement more specific trimming rules.
294//! For example, all number types additionally trim adjacent commas and semicolons:
295//! ```rust
296//! # use inpt::inpt;
297//! assert_eq!(
298//! inpt::<Vec<i32>>("1,2;3 4").unwrap(),
299//! vec![1, 2, 3, 4],
300//! )
301//! ```
302//!
303//! Users of this crate can specify characters to trim with the `#[inpt(trim = r"\s")]` struct attribute. The attribute
304//! syntax is the same as for [regex character classes](https://docs.rs/regex/1/regex/#character-classes) including
305//! ranges, negation, intersection, and unicode class names.
306//! ```
307//! # use inpt::{inpt, Inpt};
308//! #[derive(Inpt)]
309//! #[inpt(trim = r"\p{Punctuation}")]
310//! struct Sentence<'s>(&'s str);
311//!
312//! assert_eq!(
313//! inpt::<Sentence>("ยกI love regexes ๐!").unwrap().0,
314//! "I love regexes ๐",
315//! )
316//! ```
317//!
318//! _The trim attribute is also available on fields._ In this case, the attribute will forcibly override the trimming
319//! behavior of the field's immediate type. This works particularly well with the from_iter attribute.
320//! ```
321//! # use inpt::{inpt, Inpt};
322//! #[derive(Inpt)]
323//! struct PhoneNumber {
324//! #[inpt(from_iter = "u32", trim = r"+\-()\s")]
325//! digits: Vec<u32>,
326//! }
327//!
328//! assert_eq!(
329//! inpt::<PhoneNumber>("+(1)(425) 555-0100").unwrap().digits,
330//! vec![1, 425, 555, 0100],
331//! )
332//! ```
333//!
334//! Trimming can be broadly disabled by setting `trim = ""` on a wrapper struct (e.g. [`NoTrim`]), as the default
335//! trimmable character class is inherited by types deeper in the parse tree.
336//!
337//! ## split
338//!
339//! Sometimes a whole regex is overkill to separate fields, and you only need some kind of delimiter.
340//! The wrapper types in [`inpt::split`](mod@crate::split) accomplish exactly this: they stop consuming
341//! input as soon as the corresponding delimiter is reached.
342//! The field attribute `#[inpt(split = "T")]` is used to parse a field
343//! as if it were wrapped in the given type.
344//!
345//! ```rust
346//! # use inpt::{inpt, Inpt};
347//! #[derive(Inpt, Debug, PartialEq)]
348//! struct Request<'s> {
349//! #[inpt(split = "Line")]
350//! method: &'s str,
351//! body: &'s str,
352//! }
353//!
354//! assert_eq!(
355//! inpt::<Request>("
356//! PUT
357//! crabs are perfect animals
358//! ").unwrap(),
359//! Request {
360//! method: "PUT",
361//! body: "crabs are perfect animals",
362//! },
363//! )
364//! ````
365//!
366//! # Enum Syntax
367//!
368//! Structs and enums support all the same attributes, listed above. But the process of parsing an enum is
369//! somewhat different. Inpt will attempt to parse each variant, returning the first that is successfully parsed.
370//!
371//! ```
372//! # use inpt::{inpt, Inpt};
373//! #[derive(Inpt)]
374//! enum Math {
375//! #[inpt(regex = r"(.*)\+(.*)")]
376//! Add(f64, f64),
377//! #[inpt(regex = r"(.*)\*(.*)")]
378//! Mul(f64, f64),
379//! }
380//!
381//! impl Math {
382//! fn solve(self) -> f64 {
383//! match self {
384//! Math::Add(a, b) => a + b,
385//! Math::Mul(a, b) => a * b,
386//! }
387//! }
388//! }
389//!
390//! assert_eq!(inpt::<Math>("2.6+5.0").unwrap().solve(), 7.6);
391//! assert_eq!(inpt::<Math>("2.6*5.0").unwrap().solve(), 13.0);
392//!
393//! ```
394//!
395//! ## enum regex
396//!
397//! Although a `#[regex = r".*"]` attribute is not _required_ on every variant, it is strongly encouraged. Without
398//! a regex to pick the correct set of fields, inpt has to guess-and-check each individually. Not only can this
399//! cause parsing cost to explode exponentially, it makes bugs and errors almost impossible to track down.
400//!
401//! When a regex is specified:
402//! - if an error occurs before the regex match, the next variant may be tried
403//! - if the regex does not match, the next variant is always tried
404//! - if an error occurs inside capture group or after the regex match, an error is immediately produced
405//!
406//! # Main
407//! Although inpt can be used with any source of text, it is most common to parse
408//! stdin and report errors on stderr. The [`#[inpt::main]`](macro@main) attribute macro is built
409//! to facilitate this. Applied to a function, it works exactly like `#[derive(Inpt)]` except
410//! arguments behave like fields, and the function as a whole behaves like a struct. The created function
411//! will have the same name, visibility, and return type, but will parse stdin instead of receiving arguments.
412//!
413//! ```rust,no_run
414//! #[inpt::main(regex = r"(?:my name is|i am) (.+)")]
415//! fn main(name: &'static str) {
416//! println!("hello {name}!");
417//! }
418//! ```
419//!
420//! If stdin can not be parsed, the cause of the error is clearly reported by [`error::InptError::annotated_stderr`]
421//!
422//! <code style="display: block; padding: 1em">$ echo 'call me sam' | cargo run -\-example hello
423//! <span color="ff5555">INPT ERROR</span> in stdin:1:1
424//! <b color="f1fa8c"><hello::main::Arguments></b><b color="ff5555">< </b><b color="bd93f9">/(?:my name is|i am) (.+)/</b><b color="ff5555"> ></b>call me sam<b color="ff5555"></regex></b><b color="f1fa8c"></hello::main::Arguments></b>
425//! </code>
426//!
427//! Note that lifetime elision does not currently work, so all borrows must use either `'static` or a generic lifetime.
428//!
429//!
430
431use once_cell::sync::OnceCell;
432use regex::{Regex, RegexBuilder};
433use std::any::type_name;
434use std::collections::HashSet;
435use std::error::Error as StdError;
436use std::marker::PhantomData;
437use std::ops::Deref;
438use std::{io, process};
439
440/// Apply to a main function to parse stdin.
441///
442/// See [the syntax documentation](crate#main) for more details.
443pub use inpt_macros::main;
444
445/// Apply to a struct so that it can be parsed.
446///
447/// See [the syntax documentation](crate#struct-syntax) for more details.
448pub use inpt_macros::Inpt;
449
450/// Parse a [regex character class](https://docs.rs/regex/1/regex/#character-classes),
451/// and return an instance of [`CharClass`].
452///
453/// The [ucd-generate](https://github.com/BurntSushi/ucd-generate) command line tool is the underlying source of truth
454/// for these tables, although the `char_class!` macro depends on it indirectly, via the
455/// [regex-syntax crate](https://docs.rs/regex-syntax/).
456pub use inpt_macros::char_class;
457
458mod error;
459mod impls;
460pub mod split;
461
462pub use error::ResultExt;
463pub use error::{InptError, InptResult};
464
465extern crate self as inpt;
466
467/// The output of a single parsing step.
468#[derive(Debug)]
469pub struct InptStep<'s, T> {
470 /// The parsed type if successful, or the cause of the error if not.
471 pub data: InptResult<'s, T>,
472 /// The remaining input to parse.
473 pub rest: &'s str,
474}
475
476impl<'s, T> InptStep<'s, T> {
477 /// Apply the given function to the successfully parsed type if any,
478 /// keeping the error and remaining input the same.
479 pub fn map<V>(self, f: impl FnOnce(T) -> V) -> InptStep<'s, V> {
480 InptStep {
481 data: self.data.map(f),
482 rest: self.rest,
483 }
484 }
485
486 /// Apply the given function to the successfully parsed type if any,
487 /// producing a converted error if the function fails.
488 pub fn try_map<V, E>(self, f: impl FnOnce(T) -> Result<V, E>) -> InptStep<'s, V>
489 where
490 E: StdError,
491 {
492 use self::error::InptContext;
493 InptStep {
494 data: self.data.and_then(|x| match f(x) {
495 Ok(x) => Ok(x),
496 Err(e) => Err(InptError {
497 context: vec![InptContext::AtStart, InptContext::Message(e.to_string())],
498 }),
499 }),
500 rest: self.rest,
501 }
502 }
503}
504
505/// A class of characters, as defined by [`char_class!`](macro@char_class) and used for trimming.
506#[derive(Clone, Copy)]
507pub struct CharClass(#[doc(hidden)] pub &'static [(char, char)]);
508
509/// The "\s" character class, used by default to trim types during parsing.
510pub const WHITESPACE: CharClass = inpt_macros::char_class!(r"\s");
511
512impl CharClass {
513 /// Tests if the given char is a member of this class.
514 pub fn contains(self, c: char) -> bool {
515 match self.0.binary_search_by(|range| range.1.cmp(&c)) {
516 Ok(_) => true,
517 Err(index) => index < self.0.len() && self.0[index].0 <= c,
518 }
519 }
520
521 /// Trim this character class off the beginning/end of the given string.
522 pub fn trim(self, text: &str, end: bool) -> &str {
523 match end {
524 true => text.trim_matches(|c| self.contains(c)),
525 false => text.trim_start_matches(|c| self.contains(c)),
526 }
527 }
528}
529
530/// The core parsing trait.
531///
532/// Although this can be implemented manually without too much work,
533/// it is best to derive it as described in the [top-level documentation](crate#struct-syntax).
534pub trait Inpt<'s>: Sized + 's {
535 /// Consume a token of this type from the given text if possible, returning an error if not.
536 /// If `end` is false, all remaining text should also be returned. Otherwise the text should be
537 /// consumed entirely. All text must be trimmed with the given [`CharClass`] (typically but not
538 /// always returned from [`Inpt::default_trim`]) before being passed to this function.
539 ///
540 /// <b color="ff5555">Consider [deriving `Inpt`](crate#struct-syntax) or calling [`crate::inpt()`] instead of using this directly.</b>
541 fn step(
542 text: &'s str,
543 end: bool,
544 trimmed: CharClass,
545 guard: &mut RecursionGuard,
546 ) -> InptStep<'s, Self>;
547
548 /// Determine the class of characters which this type would like trimmed off of any text before parsing.
549 /// This is inherited from the parent type if not otherwise defined.
550 fn default_trim(inherited: CharClass) -> CharClass {
551 inherited
552 }
553}
554
555/// Lazily parse input as a sequence of type `T`.
556pub struct InptIter<'s, T> {
557 /// The remaining text to parse.
558 pub text: &'s str,
559 /// The first error encountered, if any.
560 pub outcome: Result<(), InptError<'s>>,
561 trim: CharClass,
562 _p: PhantomData<T>,
563}
564
565impl<'s, T: Inpt<'s>> InptIter<'s, T> {
566 /// Start parsing the given text, inheriting trim from the given class.
567 pub fn new(text: &'s str, trim: CharClass) -> Self {
568 Self {
569 text,
570 trim,
571 outcome: Ok(()),
572 _p: PhantomData,
573 }
574 }
575}
576
577impl<'s, T: Inpt<'s>> Iterator for InptIter<'s, T> {
578 type Item = T;
579
580 fn next(&mut self) -> Option<Self::Item> {
581 let rest = self.trim.trim(self.text, false);
582 // Really we should have previously trimmed with `true` in this case.
583 if rest.is_empty() {
584 return None;
585 }
586
587 let InptStep { data, rest } = T::step(rest, false, self.trim, &mut RecursionGuard::new());
588 self.text = rest;
589 match data {
590 Ok(data) => Some(data),
591 Err(err) => {
592 if self.outcome.is_ok() {
593 self.outcome = Err(err);
594 }
595 None
596 }
597 }
598 }
599}
600
601impl<'s, T: Inpt<'s>> Inpt<'s> for InptIter<'s, T> {
602 fn step(
603 text: &'s str,
604 _end: bool,
605 trimmed: CharClass,
606 _: &mut RecursionGuard,
607 ) -> InptStep<'s, Self> {
608 InptStep {
609 data: Ok(Self::new(text, trimmed)),
610 rest: &text[text.len()..],
611 }
612 }
613}
614
615/// Broadly disables the default whitespace trimming on the inner type.
616///
617/// Types with custom trimming rules (e.g. number types) are not effected.
618#[derive(Inpt, Debug, Copy, Clone)]
619#[inpt(trim = "")]
620pub struct NoTrim<T>(pub T);
621
622/// A const regex used internally by [`macro@Inpt`].
623pub struct LazyRegex {
624 source: &'static str,
625 re: OnceCell<Regex>,
626}
627
628impl LazyRegex {
629 pub const fn new(source: &'static str) -> Self {
630 Self {
631 source,
632 re: OnceCell::new(),
633 }
634 }
635}
636
637/// Prevents infinite parse trees.
638pub struct RecursionGuard {
639 hit: HashSet<(usize, fn() -> &'static str)>,
640}
641
642impl RecursionGuard {
643 /// Start with no `check` calls registered.
644 pub fn new() -> Self {
645 RecursionGuard {
646 hit: Default::default(),
647 }
648 }
649
650 /// Call the given function if `check::<T>` has not already been
651 /// performed at the start of `text`, otherwise produce an error.
652 pub fn check<'s, T>(
653 &mut self,
654 text: &'s str,
655 with: impl FnOnce(&mut Self) -> InptStep<'s, T>,
656 ) -> InptStep<'s, T> {
657 let key = (
658 text.as_ptr() as usize,
659 type_name::<T> as fn() -> &'static str,
660 );
661 if self.hit.insert(key) {
662 let res = with(self);
663 self.hit.remove(&key);
664 res
665 } else {
666 InptStep {
667 data: Err(InptError::recursion_at_start::<T>()),
668 rest: text,
669 }
670 }
671 }
672}
673
674impl Deref for LazyRegex {
675 type Target = Regex;
676
677 fn deref(&self) -> &Self::Target {
678 self.re
679 .get_or_init(|| RegexBuilder::new(self.source).build().unwrap())
680 }
681}
682
683/// *The point of this crate.* Parse `T` from the given string.
684pub fn inpt<'s, T: Inpt<'s>>(text: &'s str) -> InptResult<'s, T> {
685 let trimmed = T::default_trim(WHITESPACE);
686 T::step(
687 trimmed.trim(text, true),
688 true,
689 trimmed,
690 &mut RecursionGuard::new(),
691 )
692 .data
693}
694
695/// Parse `T` from the beginning of the given string.
696pub fn inpt_step<'s, T: Inpt<'s>>(text: &'s str) -> InptStep<'s, T> {
697 let trimmed = T::default_trim(WHITESPACE);
698 T::step(
699 trimmed.trim(text, false),
700 false,
701 trimmed,
702 &mut RecursionGuard::new(),
703 )
704}
705
706/// Parse `T` from stdin and print any errors on stderr.
707///
708/// If parsing fails, the process will be exited with status -1.
709///
710/// Note that the input text will be permanently allocated on the heap, allowing
711/// the parsed type to contain `&'static str` strings.
712/// This is the core function used by the [`#[inpt::main]` macro](crate#main).
713pub fn inpt_stdio<T>() -> T
714where
715 T: Inpt<'static>,
716{
717 let mut text = Box::new(String::new());
718 io::Read::read_to_string(&mut io::stdin(), &mut text)
719 .expect("could not read utf8 text from stdin");
720 match inpt::<T>(Box::leak(text).as_str()) {
721 Ok(value) => value,
722 Err(err) => {
723 err.annotated_stderr("stdin").unwrap();
724 process::exit(-1)
725 }
726 }
727}