inpt/
lib.rs

1//! Inpt is a derive crate for dumb type-level text parsing.
2//!
3//! # Introduction
4//! Imagine you need to chop up an annoying string and convert all the bits to useful types.
5//! You could write that sort of code by hand using `split` and `from_str`, but the boiler-plate
6//! of unwrapping and checking quickly looses all charm. Especially since that sort of parsing
7//! shows up a lot in timed programming competitions like [advent of code](https://adventofcode.com).
8//!
9//! Inpt tries to write that sort of parsing code for you, automatically splitting input strings
10//! based on field types and an optional regex. Inpt is absolutely _not_ performant, strict, or formal.
11//! Whenever possible, it does the obvious thing:
12//! <div style="display: flex; flex-direction: row; align-items: stretch; gap: 1em">
13//!
14//! ```rust, no_run
15//! #[inpt::main]
16//! fn main(x: f32, y: f32) {
17//!     println!("{}", x * y);
18//! }
19//! ```
20//!
21//! ```text
22//! $ echo '6,7' | cargo run
23//! 42
24//! ```
25//!
26//! </div>
27//!
28//! ## Contents
29//! - [Introduction](crate#introduction)
30//! - [Example](crate#example)
31//! - [Struct Syntax](crate#struct-syntax)
32//!     - [regex](crate#regex)
33//!     - [from, try_from](crate#from-try_from)
34//!     - [skip](crate#skip)
35//!     - [option](crate#option)
36//!     - [before/after](crate#before-after)
37//!     - [bounds](crate#bounds)
38//!     - [from_str](crate#from_str)
39//!     - [from_iter](crate#from_iter)
40//!     - [trim](crate#trim)
41//!     - [split](crate#split)
42//! - [Enum Syntax](crate#enum-syntax)
43//!     - [enum regex](crate#enum-regex)
44//! - [Main](crate#main)
45//!
46//! # Example
47//! ```rust
48//! use inpt::{Inpt, inpt};
49//!
50//! #[derive(Inpt)]
51//! #[inpt(regex = r"(.)=([-\d]+)\.\.([-\d]+),?")]
52//! struct Axis {
53//!     name: char,
54//!     start: i32,
55//!     end: i32,
56//! }
57//!
58//! #[derive(Inpt)]
59//! #[inpt(regex = "target area:")]
60//! struct Target {
61//!     #[inpt(after)]
62//!     axes: Vec<Axis>,
63//! }
64//!
65//! impl Target {
66//!     fn area(&self) -> i32 {
67//!         self.axes.iter().map(|Axis { start, end, ..}| end - start).product()
68//!     }
69//! }
70//!
71//!
72//! let target = inpt::<Target>("target area: x=119..176, y=-114..84").unwrap();
73//! assert_eq!(target.area(), 11286);
74//! ```
75//!
76//! # Struct Syntax
77//! The [`Inpt`](macro@Inpt) derive macro can do a few neat tricks, listed here. In its default setting,
78//! the fields of the struct are parsed in order, with each field consuming as much of the input as
79//! possible before moving on:
80//!
81//! ```rust
82//! # use inpt::{inpt, Inpt};
83//! #[derive(Inpt, Debug, PartialEq)]
84//! struct OrderedFields<'s>(char, i32, &'s str);
85//!
86//! assert_eq!(
87//!     inpt::<OrderedFields>("A113 is a classroom").unwrap(),
88//!     OrderedFields('A', 113, "is a classroom"),
89//! )
90//! ```
91//!
92//! This behavior is also implemented for arrays, tuples, and a number of collection types.
93//!
94//! ## regex
95//! When the `#[inpt(regex = r".*")]` struct attribute is given, the fields are no longer
96//! parsed one after the another. Instead the regex is matched against the remaining input, and
97//! the fields are parsed from the regex's numbered capture groups. I recommend that regexes are given as
98//! [raw strings](https://doc.rust-lang.org/reference/tokens.html#raw-string-literals) to avoid
99//! double-escapes and quoting.
100//! ```rust
101//! # use inpt::{inpt, Inpt};
102//! #[derive(Inpt, Debug, PartialEq)]
103//! #[inpt(regex = r"(.*) number ([a-zA-Z])(\d+)")]
104//! struct RegexFields<'s>(&'s str, char, i32);
105//!
106//! assert_eq!(
107//!     inpt::<RegexFields>("classroom number A113").unwrap(),
108//!     RegexFields("classroom", 'A', 113),
109//! )
110//! ```
111//!
112//! Ungreedy/lazy repetitions can be very useful when splitting inputs. Like rewriting a while loop as an until loop,
113//! a regex `([^!]*)!` can be rewritten as `(.*?)!`. This is particularly helpful when we want to stop after finding multiple characters,
114//! like the 3 quotes that end a multi-line string in Python or Julia: `"""(.*?)"""`.
115//!
116//! Be aware that when such a regex is used multiple times to parse a sequence of fields,
117//! the last regex match is forced to parse all remaining input, even if normally lazy:
118//! ```rust
119//! # use inpt::{inpt, Inpt};
120//! #[derive(Inpt, Debug, PartialEq)]
121//! #[inpt(regex = r"(.+?),")]
122//! struct Part<'s>(&'s str);
123//!
124//! assert_eq!(
125//!     inpt::<[Part; 3]>("my,list,of,many,words,").unwrap(),
126//!     [Part("my"), Part("list"), Part("of,many,words")],
127//! )
128//! ```
129//!
130//! ## from, try_from
131//! When the `#[inpt(from = "T")]` or `#[inpt(try_from = "T")]` struct attributes are given, T is parsed instead
132//! of the struct itself, and the From or TryFrom traits are used to convert.
133//! ```rust
134//! # use inpt::{inpt, Inpt};
135//! # use std::{convert::TryFrom, fmt, error::Error};
136//! use inpt::split::{Group, Line};
137//!
138//! #[derive(Inpt)]
139//! #[inpt(try_from = "Group<Vec<Line<Vec<T>>>>")]
140//! struct Grid<T> {
141//!     width: usize,
142//!     table: Vec<T>,
143//! }
144//!
145//! #[derive(Debug)]
146//! struct UnevenGridError;
147//! impl fmt::Display for UnevenGridError {
148//!     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149//!         f.write_str("grid rows must have even length")
150//!     }
151//! }
152//! impl Error for UnevenGridError {}
153//!
154//! impl<'s, T> TryFrom<Group<Vec<Line<Vec<T>>>>> for Grid<T> {
155//!     type Error = UnevenGridError;
156//!
157//!     fn try_from(Group { inner: lines }: Group<Vec<Line<Vec<T>>>>)
158//!             -> Result<Self, Self::Error>
159//!     {
160//!         let mut width = None;
161//!         let mut table = Vec::new();
162//!         for Line { inner: mut line } in lines {
163//!             width = match width {
164//!                 Some(w) if w == line.len() => Some(w),
165//!                 Some(_) => return Err(UnevenGridError),
166//!                 None => Some(line.len()),
167//!             };
168//!             table.append(&mut line);
169//!         }
170//!         Ok(Grid {
171//!             width: width.ok_or(UnevenGridError)?,
172//!             table,
173//!         })
174//!     }
175//! }
176//!
177//! assert_eq!(inpt::<Grid<char>>("##\n##").unwrap().width, 2);
178//! ```
179//!
180//!
181//! ## skip
182//! The `#[inpt(skip)]` field attribute can be used to ignore fields when parsing
183//! and instead insert their `Default::default()`.
184//!
185//! ## option
186//! If a capture group corresponds to a field with type `Option`, the field will be set to `None` when the group is not captured
187//! by the match, rather than producing an error.
188//! ```rust
189//! # use inpt::{inpt, Inpt};
190//! #[derive(Inpt, Debug, PartialEq)]
191//! #[inpt(regex = r"(.*) letter ([a-zA-Z])(\d+)?")]
192//! struct RegexFields<'s>(&'s str, char, Option<i32>);
193//!
194//! assert_eq!(
195//!     inpt::<RegexFields>("classroom letter A").unwrap(),
196//!     RegexFields("classroom", 'A', None),
197//! )
198//! ```
199//!
200//! ## before, after
201//! Any fields marked with the `#[inpt(before)]` attribute will be parsed sequentially, consuming input prior to matching the given regex.
202//! After the regex is matched, remaining input is consumed by any fields marked `#[inpt(after)]`. Having such a field causes the regex
203//! to again behave lazily in the example above.
204//! ```rust
205//! # use inpt::{inpt, Inpt};
206//! #[derive(Inpt, Debug, PartialEq)]
207//! #[inpt(regex = r"is a")]
208//! struct RegexFields<'s>(
209//!     #[inpt(before)] char,
210//!     #[inpt(before)] i32,
211//!     #[inpt(after)] &'s str,
212//! );
213//!
214//! assert_eq!(
215//!     inpt::<RegexFields>("A113 is a classroom").unwrap(),
216//!     RegexFields('A', 113, "classroom"),
217//! )
218//! ```
219//!
220//! ## bounds
221//! By default the derive macro adds `T: Inpt<'s>` bounds to every parsed field of a struct, as well as a `Self: 's` bound.
222//! This greatly improves error messages and improves the ergonomics around generic structs. However, it is sometimes necessary
223//! to replace those automatic bounds entirely. If you ever get
224//! "<code><b color="ff5555">error\[E0275\]</b><b>: overflow evaluating the requirement \`T: Inpt&lt;'_&gt;\`</b></code>", try solving it
225//! with a `#[inpt(bounds = "")]` attribute.
226//!
227//! ```rust
228//! # use inpt::{inpt, Inpt};
229//! use inpt::InptError;
230//!
231//! #[derive(Inpt)]
232//! #[inpt(regex = "(.)(.+)?")]
233//! #[inpt(bounds = "")]
234//! struct Recursive(char, Option<Box<Recursive>>);
235//!
236//! let chars: Recursive = inpt("abc").unwrap();
237//! # assert_eq!(chars.0, 'a');
238//! # assert_eq!(chars.1.as_ref().unwrap().0, 'b');
239//! # assert_eq!(chars.1.as_ref().unwrap().1.as_ref().unwrap().0, 'c');
240//! ```
241//!
242//! ## from_str
243//! Although Rust integers and strings all implement the `Inpt` trait, some types can only be parsed using `FromStr`.
244//! The derive macro can be told to use a type's [`FromStr`](std::str::FromStr) implementation with the `#[inpt(from_str)]` field attribute.
245//! Because the `from_str` function consumes an entire string instead of chopping off just the beginning, the attribute
246//! can only be placed on the last field of a struct, or on fields receiving regex capture groups.
247//!
248//! ```rust
249//! # use inpt::{inpt, Inpt};
250//! use std::net::{IpAddr};
251//!
252//! #[derive(Inpt, Debug, PartialEq)]
253//! #[inpt(regex = r"route from (\S+) to")]
254//! struct Routing {
255//!     #[inpt(from_str)]
256//!     from: IpAddr,
257//!     #[inpt(from_str, after)]
258//!     to: IpAddr,
259//! }
260//!
261//! let route: Routing = inpt("route from 192.168.1.2 to 127.0.0.1").unwrap();
262//! # assert_eq!(&route.from.to_string(), "192.168.1.2");
263//! # assert_eq!(&route.to.to_string(), "127.0.0.1");
264//! ```
265//!
266//! ## from_iter
267//! It is quite easy to repeatedly parse a type, either by using [`Vec`](std::vec::Vec)'s own inpt implementation,
268//! or parsing then collecting a [`InptIter`]. This can also be accessed inside the derive macro using the
269//! `#[inpt(from_iter = "T")]` field attribute, which calls into [`FromIterator<T>`](std::iter::FromIterator).
270//! The item type has to be specified because some collections can be built from multiple different item types
271//! (e.g. `String` can be collected from an iterator of `char`, an iterator of `&str`, or an iterator of `String`).
272//! Like the from_str attribute, the from_iter attribute consumes an entire string and so must appear at the end
273//! of the struct, or otherwise parse a regex capture group.
274//!
275//! ```rust
276//! # use inpt::{inpt, Inpt};
277//! use std::collections::HashMap;
278//!
279//! #[derive(Inpt, Debug, PartialEq)]
280//! struct Rooms {
281//!     #[inpt(from_iter = "(char, u32)")]
282//!     letter_to_number: HashMap<char, u32>,
283//! }
284//!
285//! assert_eq!(
286//!     inpt::<Rooms>("B5 A113 F111").unwrap().letter_to_number,
287//!     [('A', 113), ('B', 5), ('F', 111)].into_iter().collect::<HashMap<_, _>>(),
288//! )
289//! ```
290//!
291//! ## trim
292//!
293//! By default, inpt trims all whitespace between fields. However, some types implement more specific trimming rules.
294//! For example, all number types additionally trim adjacent commas and semicolons:
295//! ```rust
296//! # use inpt::inpt;
297//! assert_eq!(
298//!     inpt::<Vec<i32>>("1,2;3 4").unwrap(),
299//!     vec![1, 2, 3, 4],
300//! )
301//! ```
302//!
303//! Users of this crate can specify characters to trim with the `#[inpt(trim = r"\s")]` struct attribute. The attribute
304//! syntax is the same as for [regex character classes](https://docs.rs/regex/1/regex/#character-classes) including
305//! ranges, negation, intersection, and unicode class names.
306//! ```
307//! # use inpt::{inpt, Inpt};
308//! #[derive(Inpt)]
309//! #[inpt(trim = r"\p{Punctuation}")]
310//! struct Sentence<'s>(&'s str);
311//!
312//! assert_eq!(
313//!     inpt::<Sentence>("¡I love regexes 💕!").unwrap().0,
314//!     "I love regexes 💕",
315//! )
316//! ```
317//!
318//! _The trim attribute is also available on fields._ In this case, the attribute will forcibly override the trimming
319//! behavior of the field's immediate type. This works particularly well with the from_iter attribute.
320//! ```
321//! # use inpt::{inpt, Inpt};
322//! #[derive(Inpt)]
323//! struct PhoneNumber {
324//!     #[inpt(from_iter = "u32", trim = r"+\-()\s")]
325//!     digits: Vec<u32>,
326//! }
327//!
328//! assert_eq!(
329//!     inpt::<PhoneNumber>("+(1)(425) 555-0100").unwrap().digits,
330//!     vec![1, 425, 555, 0100],
331//! )
332//! ```
333//!
334//! Trimming can be broadly disabled by setting `trim = ""` on a wrapper struct (e.g. [`NoTrim`]), as the default
335//! trimmable character class is inherited by types deeper in the parse tree.
336//!
337//! ## split
338//!
339//! Sometimes a whole regex is overkill to separate fields, and you only need some kind of delimiter.
340//! The wrapper types in [`inpt::split`](mod@crate::split) accomplish exactly this: they stop consuming
341//! input as soon as the corresponding delimiter is reached.
342//! The field attribute `#[inpt(split = "T")]` is used to parse a field
343//! as if it were wrapped in the given type.
344//!
345//! ```rust
346//! # use inpt::{inpt, Inpt};
347//! #[derive(Inpt, Debug, PartialEq)]
348//! struct Request<'s> {
349//!     #[inpt(split = "Line")]
350//!     method: &'s str,
351//!     body: &'s str,
352//! }
353//!
354//! assert_eq!(
355//!     inpt::<Request>("
356//!          PUT
357//!          crabs are perfect animals
358//!     ").unwrap(),
359//!     Request {
360//!         method: "PUT",
361//!         body: "crabs are perfect animals",
362//!     },
363//! )
364//! ````
365//!
366//! # Enum Syntax
367//!
368//! Structs and enums support all the same attributes, listed above. But the process of parsing an enum is
369//! somewhat different. Inpt will attempt to parse each variant, returning the first that is successfully parsed.
370//!
371//! ```
372//! # use inpt::{inpt, Inpt};
373//! #[derive(Inpt)]
374//! enum Math {
375//!     #[inpt(regex = r"(.*)\+(.*)")]
376//!     Add(f64, f64),
377//!     #[inpt(regex = r"(.*)\*(.*)")]
378//!     Mul(f64, f64),
379//! }
380//!
381//! impl Math {
382//!     fn solve(self) -> f64 {
383//!         match self {
384//!             Math::Add(a, b) => a + b,
385//!             Math::Mul(a, b) => a * b,
386//!         }
387//!     }
388//! }
389//!
390//! assert_eq!(inpt::<Math>("2.6+5.0").unwrap().solve(), 7.6);
391//! assert_eq!(inpt::<Math>("2.6*5.0").unwrap().solve(), 13.0);
392//!
393//! ```
394//!
395//! ## enum regex
396//!
397//! Although a `#[regex = r".*"]` attribute is not _required_ on every variant, it is strongly encouraged. Without
398//! a regex to pick the correct set of fields, inpt has to guess-and-check each individually. Not only can this
399//! cause parsing cost to explode exponentially, it makes bugs and errors almost impossible to track down.
400//!
401//! When a regex is specified:
402//! - if an error occurs before the regex match, the next variant may be tried
403//! - if the regex does not match, the next variant is always tried
404//! - if an error occurs inside capture group or after the regex match, an error is immediately produced
405//!
406//! # Main
407//! Although inpt can be used with any source of text, it is most common to parse
408//! stdin and report errors on stderr. The [`#[inpt::main]`](macro@main) attribute macro is built
409//! to facilitate this. Applied to a function, it works exactly like `#[derive(Inpt)]` except
410//! arguments behave like fields, and the function as a whole behaves like a struct. The created function
411//! will have the same name, visibility, and return type, but will parse stdin instead of receiving arguments.
412//!
413//! ```rust,no_run
414//! #[inpt::main(regex = r"(?:my name is|i am) (.+)")]
415//! fn main(name: &'static str) {
416//!     println!("hello {name}!");
417//! }
418//! ```
419//!
420//! If stdin can not be parsed, the cause of the error is clearly reported by [`error::InptError::annotated_stderr`]
421//!
422//! <code style="display: block; padding: 1em">$ echo 'call me sam' | cargo run -\-example hello
423//! <span color="ff5555">INPT ERROR</span> in stdin:1:1
424//! <b color="f1fa8c">&lt;hello::main::Arguments&gt;</b><b color="ff5555">&lt; </b><b color="bd93f9">/(?:my name is|i am) (.+)/</b><b color="ff5555"> &gt;</b>call me sam<b color="ff5555">&lt;/regex&gt;</b><b color="f1fa8c">&lt;/hello::main::Arguments&gt;</b>
425//! </code>
426//!
427//! Note that lifetime elision does not currently work, so all borrows must use either `'static` or a generic lifetime.
428//!
429//!
430
431use once_cell::sync::OnceCell;
432use regex::{Regex, RegexBuilder};
433use std::any::type_name;
434use std::collections::HashSet;
435use std::error::Error as StdError;
436use std::marker::PhantomData;
437use std::ops::Deref;
438use std::{io, process};
439
440/// Apply to a main function to parse stdin.
441///
442/// See [the syntax documentation](crate#main) for more details.
443pub use inpt_macros::main;
444
445/// Apply to a struct so that it can be parsed.
446///
447/// See [the syntax documentation](crate#struct-syntax) for more details.
448pub use inpt_macros::Inpt;
449
450/// Parse a [regex character class](https://docs.rs/regex/1/regex/#character-classes),
451/// and return an instance of [`CharClass`].
452///
453/// The [ucd-generate](https://github.com/BurntSushi/ucd-generate) command line tool is the underlying source of truth
454/// for these tables, although the `char_class!` macro depends on it indirectly, via the
455/// [regex-syntax crate](https://docs.rs/regex-syntax/).
456pub use inpt_macros::char_class;
457
458mod error;
459mod impls;
460pub mod split;
461
462pub use error::ResultExt;
463pub use error::{InptError, InptResult};
464
465extern crate self as inpt;
466
467/// The output of a single parsing step.
468#[derive(Debug)]
469pub struct InptStep<'s, T> {
470    /// The parsed type if successful, or the cause of the error if not.
471    pub data: InptResult<'s, T>,
472    /// The remaining input to parse.
473    pub rest: &'s str,
474}
475
476impl<'s, T> InptStep<'s, T> {
477    /// Apply the given function to the successfully parsed type if any,
478    /// keeping the error and remaining input the same.
479    pub fn map<V>(self, f: impl FnOnce(T) -> V) -> InptStep<'s, V> {
480        InptStep {
481            data: self.data.map(f),
482            rest: self.rest,
483        }
484    }
485
486    /// Apply the given function to the successfully parsed type if any,
487    /// producing a converted error if the function fails.
488    pub fn try_map<V, E>(self, f: impl FnOnce(T) -> Result<V, E>) -> InptStep<'s, V>
489    where
490        E: StdError,
491    {
492        use self::error::InptContext;
493        InptStep {
494            data: self.data.and_then(|x| match f(x) {
495                Ok(x) => Ok(x),
496                Err(e) => Err(InptError {
497                    context: vec![InptContext::AtStart, InptContext::Message(e.to_string())],
498                }),
499            }),
500            rest: self.rest,
501        }
502    }
503}
504
505/// A class of characters, as defined by [`char_class!`](macro@char_class) and used for trimming.
506#[derive(Clone, Copy)]
507pub struct CharClass(#[doc(hidden)] pub &'static [(char, char)]);
508
509/// The "\s" character class, used by default to trim types during parsing.
510pub const WHITESPACE: CharClass = inpt_macros::char_class!(r"\s");
511
512impl CharClass {
513    /// Tests if the given char is a member of this class.
514    pub fn contains(self, c: char) -> bool {
515        match self.0.binary_search_by(|range| range.1.cmp(&c)) {
516            Ok(_) => true,
517            Err(index) => index < self.0.len() && self.0[index].0 <= c,
518        }
519    }
520
521    /// Trim this character class off the beginning/end of the given string.
522    pub fn trim(self, text: &str, end: bool) -> &str {
523        match end {
524            true => text.trim_matches(|c| self.contains(c)),
525            false => text.trim_start_matches(|c| self.contains(c)),
526        }
527    }
528}
529
530/// The core parsing trait.
531///
532/// Although this can be implemented manually without too much work,
533/// it is best to derive it as described in the [top-level documentation](crate#struct-syntax).
534pub trait Inpt<'s>: Sized + 's {
535    /// Consume a token of this type from the given text if possible, returning an error if not.
536    /// If `end` is false, all remaining text should also be returned. Otherwise the text should be
537    /// consumed entirely. All text must be trimmed with the given [`CharClass`] (typically but not
538    /// always returned from [`Inpt::default_trim`]) before being passed to this function.
539    ///
540    /// <b color="ff5555">Consider [deriving `Inpt`](crate#struct-syntax) or calling [`crate::inpt()`] instead of using this directly.</b>
541    fn step(
542        text: &'s str,
543        end: bool,
544        trimmed: CharClass,
545        guard: &mut RecursionGuard,
546    ) -> InptStep<'s, Self>;
547
548    /// Determine the class of characters which this type would like trimmed off of any text before parsing.
549    /// This is inherited from the parent type if not otherwise defined.
550    fn default_trim(inherited: CharClass) -> CharClass {
551        inherited
552    }
553}
554
555/// Lazily parse input as a sequence of type `T`.
556pub struct InptIter<'s, T> {
557    /// The remaining text to parse.
558    pub text: &'s str,
559    /// The first error encountered, if any.
560    pub outcome: Result<(), InptError<'s>>,
561    trim: CharClass,
562    _p: PhantomData<T>,
563}
564
565impl<'s, T: Inpt<'s>> InptIter<'s, T> {
566    /// Start parsing the given text, inheriting trim from the given class.
567    pub fn new(text: &'s str, trim: CharClass) -> Self {
568        Self {
569            text,
570            trim,
571            outcome: Ok(()),
572            _p: PhantomData,
573        }
574    }
575}
576
577impl<'s, T: Inpt<'s>> Iterator for InptIter<'s, T> {
578    type Item = T;
579
580    fn next(&mut self) -> Option<Self::Item> {
581        let rest = self.trim.trim(self.text, false);
582        // Really we should have previously trimmed with `true` in this case.
583        if rest.is_empty() {
584            return None;
585        }
586
587        let InptStep { data, rest } = T::step(rest, false, self.trim, &mut RecursionGuard::new());
588        self.text = rest;
589        match data {
590            Ok(data) => Some(data),
591            Err(err) => {
592                if self.outcome.is_ok() {
593                    self.outcome = Err(err);
594                }
595                None
596            }
597        }
598    }
599}
600
601impl<'s, T: Inpt<'s>> Inpt<'s> for InptIter<'s, T> {
602    fn step(
603        text: &'s str,
604        _end: bool,
605        trimmed: CharClass,
606        _: &mut RecursionGuard,
607    ) -> InptStep<'s, Self> {
608        InptStep {
609            data: Ok(Self::new(text, trimmed)),
610            rest: &text[text.len()..],
611        }
612    }
613}
614
615/// Broadly disables the default whitespace trimming on the inner type.
616///
617/// Types with custom trimming rules (e.g. number types) are not effected.
618#[derive(Inpt, Debug, Copy, Clone)]
619#[inpt(trim = "")]
620pub struct NoTrim<T>(pub T);
621
622/// A const regex used internally by [`macro@Inpt`].
623pub struct LazyRegex {
624    source: &'static str,
625    re: OnceCell<Regex>,
626}
627
628impl LazyRegex {
629    pub const fn new(source: &'static str) -> Self {
630        Self {
631            source,
632            re: OnceCell::new(),
633        }
634    }
635}
636
637/// Prevents infinite parse trees.
638pub struct RecursionGuard {
639    hit: HashSet<(usize, fn() -> &'static str)>,
640}
641
642impl RecursionGuard {
643    /// Start with no `check` calls registered.
644    pub fn new() -> Self {
645        RecursionGuard {
646            hit: Default::default(),
647        }
648    }
649
650    /// Call the given function if `check::<T>` has not already been
651    /// performed at the start of `text`, otherwise produce an error.
652    pub fn check<'s, T>(
653        &mut self,
654        text: &'s str,
655        with: impl FnOnce(&mut Self) -> InptStep<'s, T>,
656    ) -> InptStep<'s, T> {
657        let key = (
658            text.as_ptr() as usize,
659            type_name::<T> as fn() -> &'static str,
660        );
661        if self.hit.insert(key) {
662            let res = with(self);
663            self.hit.remove(&key);
664            res
665        } else {
666            InptStep {
667                data: Err(InptError::recursion_at_start::<T>()),
668                rest: text,
669            }
670        }
671    }
672}
673
674impl Deref for LazyRegex {
675    type Target = Regex;
676
677    fn deref(&self) -> &Self::Target {
678        self.re
679            .get_or_init(|| RegexBuilder::new(self.source).build().unwrap())
680    }
681}
682
683/// *The point of this crate.* Parse `T` from the given string.
684pub fn inpt<'s, T: Inpt<'s>>(text: &'s str) -> InptResult<'s, T> {
685    let trimmed = T::default_trim(WHITESPACE);
686    T::step(
687        trimmed.trim(text, true),
688        true,
689        trimmed,
690        &mut RecursionGuard::new(),
691    )
692    .data
693}
694
695/// Parse `T` from the beginning of the given string.
696pub fn inpt_step<'s, T: Inpt<'s>>(text: &'s str) -> InptStep<'s, T> {
697    let trimmed = T::default_trim(WHITESPACE);
698    T::step(
699        trimmed.trim(text, false),
700        false,
701        trimmed,
702        &mut RecursionGuard::new(),
703    )
704}
705
706/// Parse `T` from stdin and print any errors on stderr.
707///
708/// If parsing fails, the process will be exited with status -1.
709///
710/// Note that the input text will be permanently allocated on the heap, allowing
711/// the parsed type to contain `&'static str` strings.
712/// This is the core function used by the [`#[inpt::main]` macro](crate#main).
713pub fn inpt_stdio<T>() -> T
714where
715    T: Inpt<'static>,
716{
717    let mut text = Box::new(String::new());
718    io::Read::read_to_string(&mut io::stdin(), &mut text)
719        .expect("could not read utf8 text from stdin");
720    match inpt::<T>(Box::leak(text).as_str()) {
721        Ok(value) => value,
722        Err(err) => {
723            err.annotated_stderr("stdin").unwrap();
724            process::exit(-1)
725        }
726    }
727}
inpt/lib.rs

inpt/
lib.rs