aoc_parse/parsers/
primitive.rs

1use std::{num::ParseIntError, str::FromStr};
2
3use lazy_static::lazy_static;
4use num_bigint::{BigInt, BigUint, ParseBigIntError};
5use num_traits::Num;
6use regex::Regex;
7
8use crate::{parsers::regex::RegexParser, ParseContext, ParseIter, Reported};
9
10/// A trivial ParseIter that presents exactly one match and holds a
11/// pre-converted value.
12///
13/// Some parsers, like `u64`, do all the work of conversion as part of
14/// confirming that a match is valid. Rather than do the work again during the
15/// `convert` phase, the answer is stored in this iterator.
16pub struct BasicParseIter<T> {
17    pub(crate) end: usize,
18    pub(crate) value: T,
19}
20
21impl<'parse, T> ParseIter<'parse> for BasicParseIter<T>
22where
23    T: Clone,
24{
25    type RawOutput = (T,);
26
27    fn match_end(&self) -> usize {
28        self.end
29    }
30
31    fn backtrack(&mut self, _context: &mut ParseContext<'parse>) -> Result<(), Reported> {
32        Err(Reported)
33    }
34
35    fn convert(&self) -> (T,) {
36        (self.value.clone(),)
37    }
38}
39
40// --- Global regexes that are compiled on first use
41
42macro_rules! regexes {
43    ( $( $name:ident = $re:expr ; )* ) => {
44        $(
45            pub(crate) fn $name() -> &'static Regex {
46                lazy_static! {
47                    static ref RE: Regex = Regex::new($re).unwrap();
48                }
49                &RE
50            }
51        )*
52    }
53}
54
55regexes! {
56    uint_regex = r"\A[0-9]+";
57    int_regex = r"\A[+-]?[0-9]+";
58    bool_regex = r"\A(?:true|false)";
59    uint_bin_regex = r"\A[01]+";
60    int_bin_regex = r"\A[+-]?[01]+";
61    uint_hex_regex = r"\A[0-9A-Fa-f]+";
62    int_hex_regex = r"\A[+-]?[0-9A-Fa-f]+";
63    float_regex = r"(?i)\A[+-]?(?:infinity|inf|nan|(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)(?:e[+-]?[0-9]+)?)";
64}
65
66// --- Parsers that use FromStr
67
68macro_rules! from_str_parse_impl {
69        ( $( $ty:ident )+ , $re_name:ident) => {
70            $(
71                /// Parse a value of a primitive type (using its `FromStr`
72                /// implementation in the Rust standard library).
73                #[allow(non_upper_case_globals)]
74                pub const $ty: RegexParser<$ty, <$ty as FromStr>::Err> =
75                    RegexParser {
76                        regex: $re_name,
77                        parse_fn: <$ty as FromStr>::from_str,
78                    };
79            )+
80        };
81    }
82
83from_str_parse_impl!(u8 u16 u32 u64 u128 usize, uint_regex);
84from_str_parse_impl!(i8 i16 i32 i64 i128 isize, int_regex);
85from_str_parse_impl!(f32 f64, float_regex);
86from_str_parse_impl!(bool, bool_regex);
87
88/// Parse a BigUint (using its `FromStr` implementation in the `num-bigint`
89/// crate, except that underscores between digits are not accepted and a
90/// leading `+` sign is not accepted).
91#[allow(non_upper_case_globals)]
92pub const big_uint: RegexParser<BigUint, <BigUint as FromStr>::Err> = RegexParser {
93    regex: uint_regex,
94    parse_fn: <BigUint as FromStr>::from_str,
95};
96
97/// Parse a BigInt (using its `FromStr` implementation in the `num-bigint`
98/// crate, except that underscores between digits are not accepted).
99#[allow(non_upper_case_globals)]
100pub const big_int: RegexParser<BigInt, <BigInt as FromStr>::Err> = RegexParser {
101    regex: int_regex,
102    parse_fn: <BigInt as FromStr>::from_str,
103};
104
105// --- Parsers for `_bin` and `_hex` integers
106
107macro_rules! from_str_radix_parsers {
108    ( $( ( $ty:ident , $bin:ident , $hex:ident ) ),* ; $bin_re:ident, $hex_re:ident ) => {
109        $(
110            /// Parse an integer written in base 2, using the `from_str_radix`
111            /// static method from the Rust standard library.
112            #[allow(non_upper_case_globals)]
113            pub const $bin: RegexParser<$ty, ParseIntError> = RegexParser {
114                regex: $bin_re,
115                parse_fn: |s| $ty::from_str_radix(s, 2),
116            };
117
118            /// Parse an integer written in base 16, using the `from_str_radix`
119            /// static method from the Rust standard library.
120            #[allow(non_upper_case_globals)]
121            pub const $hex: RegexParser<$ty, ParseIntError> = RegexParser {
122                regex: $hex_re,
123                parse_fn: |s| $ty::from_str_radix(s, 16),
124            };
125        )*
126    }
127}
128
129from_str_radix_parsers!(
130    (u8, u8_bin, u8_hex),
131    (u16, u16_bin, u16_hex),
132    (u32, u32_bin, u32_hex),
133    (u64, u64_bin, u64_hex),
134    (u128, u128_bin, u128_hex),
135    (usize, usize_bin, usize_hex);
136    uint_bin_regex,
137    uint_hex_regex
138);
139
140from_str_radix_parsers!(
141    (i8, i8_bin, i8_hex),
142    (i16, i16_bin, i16_hex),
143    (i32, i32_bin, i32_hex),
144    (i64, i64_bin, i64_hex),
145    (i128, i128_bin, i128_hex),
146    (isize, isize_bin, isize_hex);
147    int_bin_regex,
148    int_hex_regex
149);
150
151/// Parse a [`BigUint`] written in base 2 (using its [`Num`] impl from the
152/// `num-bigint` crate, except that underscores between digits are not
153/// accepted and a leading `+` sign is not accepted).
154#[allow(non_upper_case_globals)]
155pub const big_uint_bin: RegexParser<BigUint, ParseBigIntError> = RegexParser {
156    regex: uint_bin_regex,
157    parse_fn: |s| BigUint::from_str_radix(s, 2),
158};
159
160/// Parse a [`BigUint`] written in base 16 (using its [`Num`] impl from the
161/// `num-bigint` crate, except that underscores between digits are not
162/// accepted and a leading `+` sign is not accepted).
163#[allow(non_upper_case_globals)]
164pub const big_uint_hex: RegexParser<BigUint, ParseBigIntError> = RegexParser {
165    regex: uint_hex_regex,
166    parse_fn: |s| BigUint::from_str_radix(s, 16),
167};
168
169/// Parse a [`BigInt`] written in base 2 (using its [`Num`] impl from the
170/// `num-bigint` crate, except that underscores between digits are not
171/// accepted).
172#[allow(non_upper_case_globals)]
173pub const big_int_bin: RegexParser<BigInt, ParseBigIntError> = RegexParser {
174    regex: int_bin_regex,
175    parse_fn: |s| BigInt::from_str_radix(s, 2),
176};
177
178/// Parse a [`BigInt`] written in base 16 (using its [`Num`] impl from the
179/// `num-bigint` crate, except that underscores between digits are not
180/// accepted).
181#[allow(non_upper_case_globals)]
182pub const big_int_hex: RegexParser<BigInt, ParseBigIntError> = RegexParser {
183    regex: int_hex_regex,
184    parse_fn: |s| BigInt::from_str_radix(s, 16),
185};
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190    use crate::testing::*;
191
192    #[test]
193    fn test_bool() {
194        assert_parse_eq(bool, "true", true);
195        assert_parse_eq(bool, "false", false);
196        assert_no_parse(bool, "t");
197        assert_no_parse(bool, "");
198        assert_no_parse(bool, " true");
199        assert_no_parse(bool, "false ");
200    }
201
202    #[test]
203    fn test_parse_hex() {
204        assert_no_parse(&i32_hex, "+");
205        assert_no_parse(&i32_hex, "-");
206        assert_no_parse(&i32_hex, "+ 4");
207        assert_no_parse(&i32_hex, "+ 4");
208        assert_parse_eq(&i32_hex, "7BCDEF01", 0x7bcdef01);
209        assert_parse_eq(&i32_hex, "7fffffff", i32::MAX);
210        assert_no_parse(&i32_hex, "80000000");
211        assert_parse_eq(&i32_hex, "-80000000", i32::MIN);
212        assert_no_parse(&i32_hex, "-80000001");
213
214        let p = sequence(i32_hex, i32_hex);
215        assert_no_parse(&p, "12");
216        assert_no_parse(&p, "01230123ABCDABCD");
217        assert_parse_eq(&p, "-1+1", (-1, 1));
218
219        assert_no_parse(&u32_hex, "-1");
220        assert_no_parse(&u32_hex, "+d3d32e2e");
221        assert_parse_eq(&u32_hex, "ffffffff", u32::MAX);
222        assert_parse_eq(&u32_hex, "ffffffff", u32::MAX);
223        assert_parse_eq(
224            &u32_hex,
225            "0000000000000000000000000000000000000000000000000000000000000000ffffffff",
226            u32::MAX,
227        );
228    }
229
230    #[test]
231    fn test_bigint() {
232        assert_no_parse(big_uint, "");
233        assert_no_parse(big_uint, "+");
234        assert_no_parse(big_uint, "+11");
235        assert_no_parse(big_uint, "-");
236        assert_parse_eq(big_uint, "0", BigUint::default());
237        assert_parse_eq(
238            big_uint,
239            "982371952794802135871309821709317509287109324809324983409383209484381293480",
240            "982371952794802135871309821709317509287109324809324983409383209484381293480"
241                .parse::<BigUint>()
242                .unwrap(),
243        );
244
245        assert_no_parse(big_int, "");
246        assert_no_parse(big_int, "+");
247        assert_no_parse(big_int, "-");
248        assert_parse_eq(big_int, "-0", BigInt::default());
249        assert_parse_eq(big_int, "+0", BigInt::default());
250        assert_parse_eq(big_int, "00", BigInt::default());
251        assert_no_parse(big_int, "-+31");
252        assert_no_parse(big_int, " 0");
253        assert_parse_eq(
254            big_int,
255            "-4819487135612398473187093223859843207984094321710984370927309128460723598212",
256            "-4819487135612398473187093223859843207984094321710984370927309128460723598212"
257                .parse::<BigInt>()
258                .unwrap(),
259        );
260
261        assert_parse_eq(
262            big_uint_hex,
263            "0000000000000000000000000000000000000000000000000000000000000000ffffffff",
264            BigUint::from(u32::MAX),
265        );
266        assert_no_parse(big_uint_hex, "13a4g3");
267        assert_no_parse(big_uint_bin, "1001012");
268        assert_no_parse(big_int_hex, "13A4G3");
269        assert_no_parse(big_int_bin, "1001012");
270    }
271
272    #[test]
273    fn test_float() {
274        assert_parse_eq(f32, "1.25", 1.25_f32);
275        assert_parse_eq(f64, "-0", -0.0);
276        assert_parse_eq(f64, "-340282366.9209385e+30", -2.0f64.powi(128));
277        assert_parse_eq(f64, "+Infinity", f64::INFINITY);
278        assert_parse_eq(f32, ".375e9", 375_000_000.0);
279        assert_no_parse(f32, "infin");
280        assert_no_parse(f32, "-.");
281        assert_no_parse(f32, ".");
282        assert_no_parse(f32, "-");
283        assert_no_parse(f32, "+");
284        assert_no_parse(f32, "");
285        assert_no_parse(f64, "e12");
286        assert_no_parse(f64, "6.022e");
287        assert_no_parse(f64, "6.022e+");
288    }
289}