1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
//! Inpt is a derive crate for dumb type-level text parsing.
//!
//! # Introduction
//! Imagine you need to chop up an annoying string and convert all the bits to useful types.
//! You could write that sort of code by hand using `split` and `from_str`, but the boiler-plate
//! of unwrapping and checking quickly looses all charm. Especially since that sort of parsing
//! shows up a lot in timed programming competitions like [advent of code](https://adventofcode.com).
//!
//! Inpt tries to write that sort of parsing code for you, automatically splitting input strings
//! based on field types and an optional regex. Inpt is absolutely _not_ performant, strict, or formal.
//! Whenever possible, it does the obvious thing:
//! <div style="display: flex; flex-direction: row; align-items: stretch; gap: 1em">
//!
//! ```rust, no_run
//! #[inpt::main]
//! fn main(x: f32, y: f32) {
//! println!("{}", x * y);
//! }
//! ```
//!
//! ```text
//! $ echo '6,7' | cargo run
//! 42
//! ```
//!
//! </div>
//!
//! ## Contents
//! - [Introduction](crate#introduction)
//! - [Example](crate#example)
//! - [Struct Syntax](crate#struct-syntax)
//! - [regex](crate#regex)
//! - [from, try_from](crate#from-try_from)
//! - [skip](crate#skip)
//! - [option](crate#option)
//! - [before/after](crate#before-after)
//! - [bounds](crate#bounds)
//! - [from_str](crate#from_str)
//! - [from_iter](crate#from_iter)
//! - [trim](crate#trim)
//! - [split](crate#split)
//! - [Enum Syntax](crate#enum-syntax)
//! - [enum regex](crate#enum-regex)
//! - [Main](crate#main)
//!
//! # Example
//! ```rust
//! use inpt::{Inpt, inpt};
//!
//! #[derive(Inpt)]
//! #[inpt(regex = r"(.)=([-\d]+)\.\.([-\d]+),?")]
//! struct Axis {
//! name: char,
//! start: i32,
//! end: i32,
//! }
//!
//! #[derive(Inpt)]
//! #[inpt(regex = "target area:")]
//! struct Target {
//! #[inpt(after)]
//! axes: Vec<Axis>,
//! }
//!
//! impl Target {
//! fn area(&self) -> i32 {
//! self.axes.iter().map(|Axis { start, end, ..}| end - start).product()
//! }
//! }
//!
//!
//! let target = inpt::<Target>("target area: x=119..176, y=-114..84").unwrap();
//! assert_eq!(target.area(), 11286);
//! ```
//!
//! # Struct Syntax
//! The [`Inpt`](macro@Inpt) derive macro can do a few neat tricks, listed here. In its default setting,
//! the fields of the struct are parsed in order, with each field consuming as much of the input as
//! possible before moving on:
//!
//! ```rust
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt, Debug, PartialEq)]
//! struct OrderedFields<'s>(char, i32, &'s str);
//!
//! assert_eq!(
//! inpt::<OrderedFields>("A113 is a classroom").unwrap(),
//! OrderedFields('A', 113, "is a classroom"),
//! )
//! ```
//!
//! This behavior is also implemented for arrays, tuples, and a number of collection types.
//!
//! ## regex
//! When the `#[inpt(regex = r".*")]` struct attribute is given, the fields are no longer
//! parsed one after the another. Instead the regex is matched against the remaining input, and
//! the fields are parsed from the regex's numbered capture groups. I recommend that regexes are given as
//! [raw strings](https://doc.rust-lang.org/reference/tokens.html#raw-string-literals) to avoid
//! double-escapes and quoting.
//! ```rust
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt, Debug, PartialEq)]
//! #[inpt(regex = r"(.*) number ([a-zA-Z])(\d+)")]
//! struct RegexFields<'s>(&'s str, char, i32);
//!
//! assert_eq!(
//! inpt::<RegexFields>("classroom number A113").unwrap(),
//! RegexFields("classroom", 'A', 113),
//! )
//! ```
//!
//! Ungreedy/lazy repetitions can be very useful when splitting inputs. Like rewriting a while loop as an until loop,
//! a regex `([^!]*)!` can be rewritten as `(.*?)!`. This is particularly helpful when we want to stop after finding multiple characters,
//! like the 3 quotes that end a multi-line string in Python or Julia: `"""(.*?)"""`.
//!
//! Be aware that when such a regex is used multiple times to parse a sequence of fields,
//! the last regex match is forced to parse all remaining input, even if normally lazy:
//! ```rust
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt, Debug, PartialEq)]
//! #[inpt(regex = r"(.+?),")]
//! struct Part<'s>(&'s str);
//!
//! assert_eq!(
//! inpt::<[Part; 3]>("my,list,of,many,words,").unwrap(),
//! [Part("my"), Part("list"), Part("of,many,words")],
//! )
//! ```
//!
//! ## from, try_from
//! When the `#[inpt(from = "T")]` or `#[inpt(try_from = "T")]` struct attributes are given, T is parsed instead
//! of the struct itself, and the From or TryFrom traits are used to convert.
//! ```rust
//! # use inpt::{inpt, Inpt};
//! # use std::{convert::TryFrom, fmt, error::Error};
//! use inpt::split::{Group, Line};
//!
//! #[derive(Inpt)]
//! #[inpt(try_from = "Group<Vec<Line<Vec<T>>>>")]
//! struct Grid<T> {
//! width: usize,
//! table: Vec<T>,
//! }
//!
//! #[derive(Debug)]
//! struct UnevenGridError;
//! impl fmt::Display for UnevenGridError {
//! fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
//! f.write_str("grid rows must have even length")
//! }
//! }
//! impl Error for UnevenGridError {}
//!
//! impl<'s, T> TryFrom<Group<Vec<Line<Vec<T>>>>> for Grid<T> {
//! type Error = UnevenGridError;
//!
//! fn try_from(Group { inner: lines }: Group<Vec<Line<Vec<T>>>>)
//! -> Result<Self, Self::Error>
//! {
//! let mut width = None;
//! let mut table = Vec::new();
//! for Line { inner: mut line } in lines {
//! width = match width {
//! Some(w) if w == line.len() => Some(w),
//! Some(_) => return Err(UnevenGridError),
//! None => Some(line.len()),
//! };
//! table.append(&mut line);
//! }
//! Ok(Grid {
//! width: width.ok_or(UnevenGridError)?,
//! table,
//! })
//! }
//! }
//!
//! assert_eq!(inpt::<Grid<char>>("##\n##").unwrap().width, 2);
//! ```
//!
//!
//! ## skip
//! The `#[inpt(skip)]` field attribute can be used to ignore fields when parsing
//! and instead insert their `Default::default()`.
//!
//! ## option
//! If a capture group corresponds to a field with type `Option`, the field will be set to `None` when the group is not captured
//! by the match, rather than producing an error.
//! ```rust
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt, Debug, PartialEq)]
//! #[inpt(regex = r"(.*) letter ([a-zA-Z])(\d+)?")]
//! struct RegexFields<'s>(&'s str, char, Option<i32>);
//!
//! assert_eq!(
//! inpt::<RegexFields>("classroom letter A").unwrap(),
//! RegexFields("classroom", 'A', None),
//! )
//! ```
//!
//! ## before, after
//! Any fields marked with the `#[inpt(before)]` attribute will be parsed sequentially, consuming input prior to matching the given regex.
//! After the regex is matched, remaining input is consumed by any fields marked `#[inpt(after)]`. Having such a field causes the regex
//! to again behave lazily in the example above.
//! ```rust
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt, Debug, PartialEq)]
//! #[inpt(regex = r"is a")]
//! struct RegexFields<'s>(
//! #[inpt(before)] char,
//! #[inpt(before)] i32,
//! #[inpt(after)] &'s str,
//! );
//!
//! assert_eq!(
//! inpt::<RegexFields>("A113 is a classroom").unwrap(),
//! RegexFields('A', 113, "classroom"),
//! )
//! ```
//!
//! ## bounds
//! By default the derive macro adds `T: Inpt<'s>` bounds to every parsed field of a struct, as well as a `Self: 's` bound.
//! This greatly improves error messages and improves the ergonomics around generic structs. However, it is sometimes necessary
//! to replace those automatic bounds entirely. If you ever get
//! "<code><b color="ff5555">error\[E0275\]</b><b>: overflow evaluating the requirement \`T: Inpt<'_>\`</b></code>", try solving it
//! with a `#[inpt(bounds = "")]` attribute.
//!
//! ```rust
//! # use inpt::{inpt, Inpt};
//! use inpt::InptError;
//!
//! #[derive(Inpt)]
//! #[inpt(regex = "(.)(.+)?")]
//! #[inpt(bounds = "")]
//! struct Recursive(char, Option<Box<Recursive>>);
//!
//! let chars: Recursive = inpt("abc").unwrap();
//! # assert_eq!(chars.0, 'a');
//! # assert_eq!(chars.1.as_ref().unwrap().0, 'b');
//! # assert_eq!(chars.1.as_ref().unwrap().1.as_ref().unwrap().0, 'c');
//! ```
//!
//! ## from_str
//! Although Rust integers and strings all implement the `Inpt` trait, some types can only be parsed using `FromStr`.
//! The derive macro can be told to use a type's [`FromStr`](std::str::FromStr) implementation with the `#[inpt(from_str)]` field attribute.
//! Because the `from_str` function consumes an entire string instead of chopping off just the beginning, the attribute
//! can only be placed on the last field of a struct, or on fields receiving regex capture groups.
//!
//! ```rust
//! # use inpt::{inpt, Inpt};
//! use std::net::{IpAddr};
//!
//! #[derive(Inpt, Debug, PartialEq)]
//! #[inpt(regex = r"route from (\S+) to")]
//! struct Routing {
//! #[inpt(from_str)]
//! from: IpAddr,
//! #[inpt(from_str, after)]
//! to: IpAddr,
//! }
//!
//! let route: Routing = inpt("route from 192.168.1.2 to 127.0.0.1").unwrap();
//! # assert_eq!(&route.from.to_string(), "192.168.1.2");
//! # assert_eq!(&route.to.to_string(), "127.0.0.1");
//! ```
//!
//! ## from_iter
//! It is quite easy to repeatedly parse a type, either by using [`Vec`](std::vec::Vec)'s own inpt implementation,
//! or parsing then collecting a [`InptIter`]. This can also be accessed inside the derive macro using the
//! `#[inpt(from_iter = "T")]` field attribute, which calls into [`FromIterator<T>`](std::iter::FromIterator).
//! The item type has to be specified because some collections can be built from multiple different item types
//! (e.g. `String` can be collected from an iterator of `char`, an iterator of `&str`, or an iterator of `String`).
//! Like the from_str attribute, the from_iter attribute consumes an entire string and so must appear at the end
//! of the struct, or otherwise parse a regex capture group.
//!
//! ```rust
//! # use inpt::{inpt, Inpt};
//! use std::collections::HashMap;
//!
//! #[derive(Inpt, Debug, PartialEq)]
//! struct Rooms {
//! #[inpt(from_iter = "(char, u32)")]
//! letter_to_number: HashMap<char, u32>,
//! }
//!
//! assert_eq!(
//! inpt::<Rooms>("B5 A113 F111").unwrap().letter_to_number,
//! [('A', 113), ('B', 5), ('F', 111)].into_iter().collect::<HashMap<_, _>>(),
//! )
//! ```
//!
//! ## trim
//!
//! By default, inpt trims all whitespace between fields. However, some types implement more specific trimming rules.
//! For example, all number types additionally trim adjacent commas and semicolons:
//! ```rust
//! # use inpt::inpt;
//! assert_eq!(
//! inpt::<Vec<i32>>("1,2;3 4").unwrap(),
//! vec![1, 2, 3, 4],
//! )
//! ```
//!
//! Users of this crate can specify characters to trim with the `#[inpt(trim = r"\s")]` struct attribute. The attribute
//! syntax is the same as for [regex character classes](https://docs.rs/regex/1/regex/#character-classes) including
//! ranges, negation, intersection, and unicode class names.
//! ```
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt)]
//! #[inpt(trim = r"\p{Punctuation}")]
//! struct Sentence<'s>(&'s str);
//!
//! assert_eq!(
//! inpt::<Sentence>("¡I love regexes 💕!").unwrap().0,
//! "I love regexes 💕",
//! )
//! ```
//!
//! _The trim attribute is also available on fields._ In this case, the attribute will forcibly override the trimming
//! behavior of the field's immediate type. This works particularly well with the from_iter attribute.
//! ```
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt)]
//! struct PhoneNumber {
//! #[inpt(from_iter = "u32", trim = r"+\-()\s")]
//! digits: Vec<u32>,
//! }
//!
//! assert_eq!(
//! inpt::<PhoneNumber>("+(1)(425) 555-0100").unwrap().digits,
//! vec![1, 425, 555, 0100],
//! )
//! ```
//!
//! Trimming can be broadly disabled by setting `trim = ""` on a wrapper struct (e.g. [`NoTrim`]), as the default
//! trimmable character class is inherited by types deeper in the parse tree.
//!
//! ## split
//!
//! Sometimes a whole regex is overkill to separate fields, and you only need some kind of delimiter.
//! The wrapper types in [`inpt::split`](mod@crate::split) accomplish exactly this: they stop consuming
//! input as soon as the corresponding delimiter is reached.
//! The field attribute `#[inpt(split = "T")]` is used to parse a field
//! as if it were wrapped in the given type.
//!
//! ```rust
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt, Debug, PartialEq)]
//! struct Request<'s> {
//! #[inpt(split = "Line")]
//! method: &'s str,
//! body: &'s str,
//! }
//!
//! assert_eq!(
//! inpt::<Request>("
//! PUT
//! crabs are perfect animals
//! ").unwrap(),
//! Request {
//! method: "PUT",
//! body: "crabs are perfect animals",
//! },
//! )
//! ````
//!
//! # Enum Syntax
//!
//! Structs and enums support all the same attributes, listed above. But the process of parsing an enum is
//! somewhat different. Inpt will attempt to parse each variant, returning the first that is successfully parsed.
//!
//! ```
//! # use inpt::{inpt, Inpt};
//! #[derive(Inpt)]
//! enum Math {
//! #[inpt(regex = r"(.*)\+(.*)")]
//! Add(f64, f64),
//! #[inpt(regex = r"(.*)\*(.*)")]
//! Mul(f64, f64),
//! }
//!
//! impl Math {
//! fn solve(self) -> f64 {
//! match self {
//! Math::Add(a, b) => a + b,
//! Math::Mul(a, b) => a * b,
//! }
//! }
//! }
//!
//! assert_eq!(inpt::<Math>("2.6+5.0").unwrap().solve(), 7.6);
//! assert_eq!(inpt::<Math>("2.6*5.0").unwrap().solve(), 13.0);
//!
//! ```
//!
//! ## enum regex
//!
//! Although a `#[regex = r".*"]` attribute is not _required_ on every variant, it is strongly encouraged. Without
//! a regex to pick the correct set of fields, inpt has to guess-and-check each individually. Not only can this
//! cause parsing cost to explode exponentially, it makes bugs and errors almost impossible to track down.
//!
//! When a regex is specified:
//! - if an error occurs before the regex match, the next variant may be tried
//! - if the regex does not match, the next variant is always tried
//! - if an error occurs inside capture group or after the regex match, an error is immediately produced
//!
//! # Main
//! Although inpt can be used with any source of text, it is most common to parse
//! stdin and report errors on stderr. The [`#[inpt::main]`](macro@main) attribute macro is built
//! to facilitate this. Applied to a function, it works exactly like `#[derive(Inpt)]` except
//! arguments behave like fields, and the function as a whole behaves like a struct. The created function
//! will have the same name, visibility, and return type, but will parse stdin instead of receiving arguments.
//!
//! ```rust,no_run
//! #[inpt::main(regex = r"(?:my name is|i am) (.+)")]
//! fn main(name: &'static str) {
//! println!("hello {name}!");
//! }
//! ```
//!
//! If stdin can not be parsed, the cause of the error is clearly reported by [`error::InptError::annotated_stderr`]
//!
//! <code style="display: block; padding: 1em">$ echo 'call me sam' | cargo run -\-example hello
//! <span color="ff5555">INPT ERROR</span> in stdin:1:1
//! <b color="f1fa8c"><hello::main::Arguments></b><b color="ff5555">< </b><b color="bd93f9">/(?:my name is|i am) (.+)/</b><b color="ff5555"> ></b>call me sam<b color="ff5555"></regex></b><b color="f1fa8c"></hello::main::Arguments></b>
//! </code>
//!
//! Note that lifetime elision does not currently work, so all borrows must use either `'static` or a generic lifetime.
//!
//!
use OnceCell;
use ;
use type_name;
use HashSet;
use Error as StdError;
use PhantomData;
use Deref;
use ;
/// Apply to a main function to parse stdin.
///
/// See [the syntax documentation](crate#main) for more details.
pub use main;
/// Apply to a struct so that it can be parsed.
///
/// See [the syntax documentation](crate#struct-syntax) for more details.
pub use Inpt;
/// Parse a [regex character class](https://docs.rs/regex/1/regex/#character-classes),
/// and return an instance of [`CharClass`].
///
/// The [ucd-generate](https://github.com/BurntSushi/ucd-generate) command line tool is the underlying source of truth
/// for these tables, although the `char_class!` macro depends on it indirectly, via the
/// [regex-syntax crate](https://docs.rs/regex-syntax/).
pub use char_class;
pub use ResultExt;
pub use ;
extern crate self as inpt;
/// The output of a single parsing step.
/// A class of characters, as defined by [`char_class!`](macro@char_class) and used for trimming.
] pub &'static );
/// The "\s" character class, used by default to trim types during parsing.
pub const WHITESPACE: CharClass = char_class!;
/// The core parsing trait.
///
/// Although this can be implemented manually without too much work,
/// it is best to derive it as described in the [top-level documentation](crate#struct-syntax).
/// Lazily parse input as a sequence of type `T`.
/// Broadly disables the default whitespace trimming on the inner type.
///
/// Types with custom trimming rules (e.g. number types) are not effected.
;
/// A const regex used internally by [`macro@Inpt`].
/// Prevents infinite parse trees.
/// *The point of this crate.* Parse `T` from the given string.
/// Parse `T` from the beginning of the given string.
/// Parse `T` from stdin and print any errors on stderr.
///
/// If parsing fails, the process will be exited with status -1.
///
/// Note that the input text will be permanently allocated on the heap, allowing
/// the parsed type to contain `&'static str` strings.
/// This is the core function used by the [`#[inpt::main]` macro](crate#main).