nom_parse_trait/
lib.rs

1//! #nom-parse-trait
2//!
3//! This is an extension to the popular nom crate, that provides a `ParseFrom`
4//! trait that can be implemented on any data that can be parsed in a singular way.
5//! This means it should have a `parse` function available and the signature of
6//! that function is compatible with the `nom::Parser` trait.
7//!
8//! The main usage of this is to easily combine parsers of different types.
9//! To see the real power of this trait, take a look at he nom-parse-macros trait,
10//! which makes it possible easily implement this trait on data types.
11//!
12//! ## Generic vs Specific parsers
13//!
14//! The `ParseFrom` trait is generic over the input type, which means that you can
15//! define it generically over any input type that nom supports. The downside of this
16//! is that you will need a bunch of restrictions to the input type in a `where` block.
17//! Also, using a generic parser implementation can be more annoying to use, since in
18//! some cases Rust can't infer the type of the input or error. See the
19//! [generic_input](examples/generic_input.rs) example for an example of this.
20//!
21//! If you already know what types of input and error you are going to use in the program,
22//! using a specific implementation can be more convenient. See the [simple](examples/simple.rs)
23//! example for an example of this.
24
25use branch::alt;
26use combinator::value;
27use nom::bytes::complete::tag;
28use nom::character::complete::space0;
29use nom::*;
30use std::collections::{HashMap, HashSet};
31use std::hash::{BuildHasher, Hash};
32
33/// A trait for types that can be parsed from the given input.
34pub trait ParseFrom<I, E = error::Error<I>>
35where
36    Self: Sized,
37{
38    /// A function that can act as a nom Parser type that parses some of the input and
39    /// returns an instance of this type.
40    fn parse(input: I) -> IResult<I, Self, E>;
41}
42
43/// An extension for the ParseFrom trait with extra functionality to make parse a bit easier.
44pub trait ParseFromExt<I, E>
45where
46    Self: Sized,
47{
48    fn parse_complete(input: I) -> Result<Self, E>;
49}
50
51impl<I, E, T: ParseFrom<I, E>> ParseFromExt<I, E> for T
52where
53    I: Input,
54    E: error::ParseError<I>,
55{
56    /// Parse the input and return the result if the input is fully consumed.
57    ///
58    /// If the input is not fully consumed, an "Eof" error is returned with the rest of the characters.
59    ///
60    /// # Panics
61    /// This function panics if the parser returns an "Incomplete" error. If you want to use this
62    /// on streaming parser, please make sure you handle the "Incomplete" error before calling this function.
63    fn parse_complete(input: I) -> Result<Self, E> {
64        match T::parse(input) {
65            Ok((rest, result)) if rest.input_len() == 0 => Ok(result),
66            Ok((rest, _)) => Err(E::from_error_kind(rest, error::ErrorKind::Eof)),
67            Err(Err::Incomplete(_)) => panic!("Parser returned an incomplete error"),
68            Err(Err::Error(e)) | Err(Err::Failure(e)) => Err(e),
69        }
70    }
71}
72
73macro_rules! unsigned_parsable {
74    ($($ty:tt)+) => {
75        $(
76        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
77        where
78            I: Input,
79            <I as Input>::Item: AsChar,
80        {
81            fn parse(input: I) -> nom::IResult<I, Self, E> {
82                nom::character::complete::$ty(input)
83            }
84        }
85        )*
86    }
87}
88
89unsigned_parsable!(u16 u32 u64 u128);
90
91macro_rules! signed_parsable {
92    ($($ty:tt)+) => {
93        $(
94        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
95        where
96            I: Input,
97            <I as Input>::Item: AsChar,
98            I: for <'a> Compare<&'a[u8]>,
99        {
100            fn parse(input: I) -> nom::IResult<I, Self, E> {
101                nom::character::complete::$ty(input)
102            }
103        }
104        )*
105    }
106}
107
108signed_parsable!(i8 i16 i32 i64 i128);
109
110macro_rules! floating_parsable {
111    ($($ty:tt)+) => {
112        $(
113        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
114        where
115            I: Input + Offset + AsBytes + Compare<&'static str>,
116            <I as Input>::Item: AsChar,
117            <I as Input>::Iter: Clone,
118            I: for<'a> Compare<&'a [u8]>,
119        {
120            fn parse(input: I) -> nom::IResult<I, Self, E> {
121                use std::str::FromStr;
122                use nom::number::complete::recognize_float_or_exceptions;
123                use std::str::from_utf8;
124
125                let (i, s) = recognize_float_or_exceptions(input)?;
126                match from_utf8(s.as_bytes()).ok().and_then(|s| $ty::from_str(s).ok()) {
127                    Some(f) => Ok((i, f)),
128                    None => Err(nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::Float))),
129                }
130            }
131        }
132        )*
133    }
134}
135
136floating_parsable!(f32 f64);
137
138/// Support reading the words "true" or "false" from the input and interpreting them as boolean values.
139impl<I, E: error::ParseError<I>> ParseFrom<I, E> for bool
140where
141    I: Input + Compare<&'static str>,
142{
143    fn parse(input: I) -> IResult<I, Self, E> {
144        alt((value(true, tag("true")), value(false, tag("false")))).parse(input)
145    }
146}
147
148/// Support reading a single character from the input.
149impl<I, E: error::ParseError<I>> ParseFrom<I, E> for char
150where
151    I: Input,
152    <I as Input>::Item: AsChar,
153{
154    fn parse(input: I) -> IResult<I, Self, E> {
155        let char = input
156            .iter_elements()
157            .next()
158            .ok_or_else(|| Err::Error(E::from_error_kind(input.clone(), error::ErrorKind::Eof)))?
159            .as_char();
160        let (rest, _) = input.take_split(char.len());
161        Ok((rest, char))
162    }
163}
164
165/// Support reading a single byte from the input. This is NOT a parsed number, but the raw byte value.
166impl<I, E: error::ParseError<I>> ParseFrom<I, E> for u8
167where
168    I: Input,
169    <I as Input>::Item: AsBytes,
170{
171    fn parse(input: I) -> IResult<I, Self, E> {
172        let item = input
173            .iter_elements()
174            .next()
175            .ok_or_else(|| Err::Error(E::from_error_kind(input.clone(), error::ErrorKind::Eof)))?;
176        let bytes = item.as_bytes();
177        if bytes.len() != 1 {
178            return Err(Err::Error(E::from_error_kind(
179                input,
180                error::ErrorKind::Char,
181            )));
182        }
183        let (rest, _) = input.take_split(bytes.len());
184        Ok((rest, bytes[0]))
185    }
186}
187
188/// Support parsing a vector of ParseFrom types from the input. This uses the line_ending parser
189/// to separate the items.
190impl<I, E: error::ParseError<I>, T: ParseFrom<I, E>> ParseFrom<I, E> for Vec<T>
191where
192    I: Input + Compare<&'static str>,
193{
194    fn parse(input: I) -> IResult<I, Self, E> {
195        multi::separated_list0(character::complete::line_ending, T::parse).parse(input)
196    }
197}
198
199/// Support parsing a HashSet of ParseFrom types from the input. This uses the line_ending parser
200/// to separate the items.
201impl<I, E: error::ParseError<I>, T: ParseFrom<I, E>, S> ParseFrom<I, E> for HashSet<T, S>
202where
203    I: Input + Compare<&'static str>,
204    T: Eq + Hash,
205    S: BuildHasher + Default,
206{
207    fn parse(input: I) -> IResult<I, Self, E> {
208        combinator::map(
209            multi::separated_list0(character::complete::line_ending, T::parse),
210            |list| list.into_iter().collect(),
211        )
212        .parse(input)
213    }
214}
215
216/// Support parsing a HashMap of ParseFrom types from the input. This uses the line_ending parser
217/// to separate the items and the "=" sign to separate the key and value.
218impl<I, E: error::ParseError<I>, K: ParseFrom<I, E>, V: ParseFrom<I, E>, S> ParseFrom<I, E>
219    for HashMap<K, V, S>
220where
221    I: Input + Compare<&'static str>,
222    <I as Input>::Item: AsChar + Copy,
223    K: Eq + Hash,
224    S: BuildHasher + Default,
225{
226    fn parse(input: I) -> IResult<I, Self, E> {
227        combinator::map(
228            multi::separated_list0(
229                character::complete::line_ending,
230                sequence::separated_pair(K::parse, (space0, tag("="), space0), V::parse),
231            ),
232            |list| list.into_iter().collect(),
233        )
234        .parse(input)
235    }
236}
237
238impl<const N: usize, I, E: error::ParseError<I>, T: ParseFrom<I, E>> ParseFrom<I, E> for [T; N]
239where
240    I: Input + Compare<&'static str>,
241    <I as Input>::Item: AsChar + Copy,
242{
243    fn parse(mut input: I) -> IResult<I, Self, E> {
244        use std::mem::*;
245        let mut arr: [MaybeUninit<T>; N] = unsafe { MaybeUninit::uninit().assume_init() };
246        if N > 0 {
247            let mut separator = (space0, tag::<_, I, E>(","), space0);
248
249            let (rest, value) = T::parse(input)?;
250            arr[0].write(value);
251            input = rest;
252
253            for i in 1..N {
254                match separator.parse(input).map(|(rest, _)| T::parse(rest)) {
255                    Ok(Ok((rest, value))) => {
256                        arr[i].write(value);
257                        input = rest;
258                    }
259                    Ok(Err(e)) | Err(e) => {
260                        // There was an error parsing the separator or the value
261                        // We need to clean up the already initialized elements
262                        for j in 0..i {
263                            unsafe {
264                                arr[j].assume_init_drop();
265                            }
266                        }
267                        return Err(e);
268                    }
269                }
270            }
271        }
272        Ok((input, arr.map(|x| unsafe { x.assume_init() })))
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    macro_rules! test_unsigned {
279        ($($ty:tt)+) => {
280            $(
281                mod $ty {
282                    use crate::*;
283                    use nom::error::*;
284
285                    #[test]
286                    fn test_normal_parsing() {
287                        assert_eq!(Ok::<_, Error<_>>(123), $ty::parse_complete(b"123".as_ref()));
288                        assert_eq!(Ok::<_, Err<Error<_>>>((b"a".as_ref(), 999)), $ty::parse(b"999a".as_ref()));
289
290                        assert_eq!(Ok::<_, Error<_>>(123), $ty::parse_complete("123"));
291                        assert_eq!(Ok::<_, Err<Error<_>>>(("a", 999)), $ty::parse("999a"));
292                    }
293
294                    #[test]
295                    fn test_overflow() {
296                        let too_big = format!("{}00", $ty::MAX);
297
298                        assert_eq!(
299                            Err(Error::from_error_kind(too_big.as_str(), ErrorKind::Digit)),
300                            u16::parse_complete(too_big.as_str())
301                        );
302                        assert_eq!(
303                            Err(Error::from_error_kind(too_big.as_bytes(), ErrorKind::Digit)),
304                            u16::parse_complete(too_big.as_bytes())
305                        );
306                    }
307                }
308            )*
309        };
310    }
311
312    test_unsigned!(u16 u32 u64 u128);
313    test_unsigned!(i16 i32 i64 i128);
314
315    mod floats {
316        use crate::*;
317
318        #[test]
319        fn parse_f32() {
320            assert_eq!(Ok::<_, ()>(6e8), f32::parse_complete("6e8"));
321            assert_eq!(
322                Ok::<_, ()>(3.14e-2),
323                f32::parse_complete(b"3.14e-2".as_ref())
324            );
325        }
326
327        #[test]
328        fn parse_f64() {
329            assert_eq!(Ok::<_, ()>(6e8), f64::parse_complete("6e8"));
330            assert_eq!(
331                Ok::<_, ()>(3.14e-2),
332                f64::parse_complete(b"3.14e-2".as_ref())
333            );
334        }
335    }
336
337    mod char {
338        use crate::*;
339        use nom::error::*;
340        use nom::multi::many1;
341
342        #[test]
343        fn read_characters() {
344            let input = "TðŒ🏃";
345
346            let result: Result<_, Error<_>> = many1(char::parse).parse(input).finish();
347
348            assert_eq!(Ok(("", vec!['T', 'ð', 'Œ', '🏃'])), result);
349        }
350
351        #[test]
352        fn read_bytes() {
353            let input = b"1234".as_ref();
354
355            let result: Result<_, Error<_>> = many1(char::parse).parse(input).finish();
356
357            assert_eq!(Ok((b"".as_ref(), vec!['1', '2', '3', '4'])), result);
358        }
359    }
360
361    mod collections {
362        use crate::*;
363        use nom::error::*;
364
365        #[test]
366        fn test_vec_of_numbers() {
367            let input = "1\n2\n3\n4\n5";
368            let expected = vec![1, 2, 3, 4, 5];
369
370            assert_eq!(
371                Ok::<_, Error<_>>(expected),
372                Vec::<u32>::parse_complete(input)
373            );
374        }
375
376        #[test]
377        fn test_set_of_numbers() {
378            let input = "1\n2\n3\n4\n5";
379            let expected = vec![1, 2, 3, 4, 5].into_iter().collect();
380
381            assert_eq!(
382                Ok::<_, Error<_>>(expected),
383                HashSet::<u32>::parse_complete(input)
384            );
385        }
386
387        #[test]
388        fn test_map_of_numbers() {
389            let input = "a = 1\nb = 2\nc = 3\nd = 4\ne = 5";
390            let expected = vec![('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)]
391                .into_iter()
392                .collect();
393
394            assert_eq!(
395                Ok::<_, Error<_>>(expected),
396                HashMap::<char, u32>::parse_complete(input)
397            );
398        }
399
400        #[test]
401        fn test_array_of_numbers() {
402            let input = "1, 2, 3, 4, 5";
403            let expected = [1, 2, 3, 4, 5];
404
405            assert_eq!(
406                Ok::<_, Error<_>>(expected),
407                <[u32; 5]>::parse_complete(input)
408            )
409        }
410
411        #[test]
412        fn test_empty_array_of_numbers() {
413            let input = "";
414            let expected: [u32; 0] = [];
415
416            assert_eq!(
417                Ok::<_, Error<_>>(expected),
418                <[u32; 0]>::parse_complete(input)
419            );
420        }
421    }
422}