nom_parse_trait/
lib.rs

1//! #nom-parse-trait
2//!
3//! This is an extension to the popular nom crate, that provides a `ParseFrom`
4//! trait that can be implemented on any data that can be parsed in a singular way.
5//! This means it should have a `parse` function available and the signature of
6//! that function is compatible with the `nom::Parser` trait.
7//!
8//! The main usage of this is to easily combine parsers of different types.
9//! To see the real power of this trait, take a look at he nom-parse-macros trait,
10//! which makes it possible easily implement this trait on data types.
11//!
12//! ## Generic vs Specific parsers
13//!
14//! The `ParseFrom` trait is generic over the input type, which means that you can
15//! define it generically over any input type that nom supports. The downside of this
16//! is that you will need a bunch of restrictions to the input type in a `where` block.
17//! Also, using a generic parser implementation can be more annoying to use, since in
18//! some cases Rust can't infer the type of the input or error. See the
19//! [generic_input](examples/generic_input.rs) example for an example of this.
20//!
21//! If you already know what types of input and error you are going to use in the program,
22//! using a specific implementation can be more convenient. See the [simple](examples/simple.rs)
23//! example for an example of this.
24
25use branch::alt;
26use combinator::value;
27use nom::bytes::complete::tag;
28use nom::character::complete::space0;
29use nom::*;
30use std::collections::{HashMap, HashSet};
31use std::hash::{BuildHasher, Hash};
32
33/// A trait for types that can be parsed from the given input.
34pub trait ParseFrom<I, E = error::Error<I>>
35where
36    Self: Sized,
37{
38    /// A function that can act as a nom Parser type that parses some of the input and
39    /// returns an instance of this type.
40    fn parse(input: I) -> IResult<I, Self, E>;
41}
42
43/// An extension for the ParseFrom trait with extra functionality to make parse a bit easier.
44pub trait ParseFromExt<I, E>
45where
46    Self: Sized,
47{
48    fn parse_complete(input: I) -> Result<Self, E>;
49}
50
51impl<I, E, T: ParseFrom<I, E>> ParseFromExt<I, E> for T
52where
53    I: Input,
54    E: error::ParseError<I>,
55{
56    /// Parse the input and return the result if the input is fully consumed.
57    ///
58    /// If the input is not fully consumed, an "Eof" error is returned with the rest of the characters.
59    ///
60    /// # Panics
61    /// This function panics if the parser returns an "Incomplete" error. If you want to use this
62    /// on streaming parser, please make sure you handle the "Incomplete" error before calling this function.
63    fn parse_complete(input: I) -> Result<Self, E> {
64        match T::parse(input) {
65            Ok((rest, result)) if rest.input_len() == 0 => Ok(result),
66            Ok((rest, _)) => Err(E::from_error_kind(rest, error::ErrorKind::Eof)),
67            Err(Err::Incomplete(_)) => panic!("Parser returned an incomplete error"),
68            Err(Err::Error(e)) | Err(Err::Failure(e)) => Err(e),
69        }
70    }
71}
72
73macro_rules! unsigned_parsable {
74    ($($ty:tt)+) => {
75        $(
76        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
77        where
78            I: Input,
79            <I as Input>::Item: AsChar,
80        {
81            fn parse(input: I) -> nom::IResult<I, Self, E> {
82                nom::character::complete::$ty(input)
83            }
84        }
85        )*
86    }
87}
88
89unsigned_parsable!(u16 u32 u64 u128);
90
91macro_rules! signed_parsable {
92    ($($ty:tt)+) => {
93        $(
94        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
95        where
96            I: Input,
97            <I as Input>::Item: AsChar,
98            I: for <'a> Compare<&'a[u8]>,
99        {
100            fn parse(input: I) -> nom::IResult<I, Self, E> {
101                nom::character::complete::$ty(input)
102            }
103        }
104        )*
105    }
106}
107
108signed_parsable!(i8 i16 i32 i64 i128);
109
110macro_rules! floating_parsable {
111    ($($ty:tt)+) => {
112        $(
113        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
114        where
115            I: Input + Offset + AsBytes + Compare<&'static str>,
116            <I as Input>::Item: AsChar,
117            <I as Input>::Iter: Clone,
118            I: for<'a> Compare<&'a [u8]>,
119        {
120            fn parse(input: I) -> nom::IResult<I, Self, E> {
121                use std::str::FromStr;
122                use nom::number::complete::recognize_float_or_exceptions;
123                use std::str::from_utf8;
124
125                let (i, s) = recognize_float_or_exceptions(input)?;
126                match from_utf8(s.as_bytes()).ok().and_then(|s| $ty::from_str(s).ok()) {
127                    Some(f) => Ok((i, f)),
128                    None => Err(nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::Float))),
129                }
130            }
131        }
132        )*
133    }
134}
135
136floating_parsable!(f32 f64);
137
138/// Support reading the words "true" or "false" from the input and interpreting them as boolean values.
139impl<I, E: error::ParseError<I>> ParseFrom<I, E> for bool
140where
141    I: Input + Compare<&'static str>,
142{
143    fn parse(input: I) -> IResult<I, Self, E> {
144        alt((value(true, tag("true")), value(false, tag("false")))).parse(input)
145    }
146}
147
148/// Support reading a single character from the input.
149impl<I, E: error::ParseError<I>> ParseFrom<I, E> for char
150where
151    I: Input,
152    <I as Input>::Item: AsChar,
153{
154    fn parse(input: I) -> IResult<I, Self, E> {
155        if input.input_len() == 0 {
156            return Err(Err::Error(E::from_error_kind(input, error::ErrorKind::Eof)));
157        }
158
159        let char = input.iter_elements().next().unwrap().as_char();
160        let (rest, _) = input.take_split(char.len());
161        Ok((rest, char))
162    }
163}
164
165/// Support reading a single byte from the input. This is NOT a parsed number, but the raw byte value.
166impl<I, E: error::ParseError<I>> ParseFrom<I, E> for u8
167where
168    I: Input,
169    <I as Input>::Item: AsChar,
170{
171    fn parse(input: I) -> IResult<I, Self, E> {
172        if input.input_len() == 0 {
173            return Err(Err::Error(E::from_error_kind(input, error::ErrorKind::Eof)));
174        }
175
176        let char = input.iter_elements().next().unwrap().as_char();
177        if char.len() != 1 {
178            return Err(Err::Error(E::from_error_kind(
179                input,
180                error::ErrorKind::Char,
181            )));
182        }
183
184        let (rest, _) = input.take_split(1);
185        Ok((rest, char as u8))
186    }
187}
188
189/// Support parsing a vector of ParseFrom types from the input. This uses the line_ending parser
190/// to separate the items.
191impl<I, E: error::ParseError<I>, T: ParseFrom<I, E>> ParseFrom<I, E> for Vec<T>
192where
193    I: Input + Compare<&'static str>,
194{
195    fn parse(input: I) -> IResult<I, Self, E> {
196        multi::separated_list0(character::complete::line_ending, T::parse).parse(input)
197    }
198}
199
200/// Support parsing a HashSet of ParseFrom types from the input. This uses the line_ending parser
201/// to separate the items.
202impl<I, E: error::ParseError<I>, T: ParseFrom<I, E>, S> ParseFrom<I, E> for HashSet<T, S>
203where
204    I: Input + Compare<&'static str>,
205    T: Eq + Hash,
206    S: BuildHasher + Default,
207{
208    fn parse(input: I) -> IResult<I, Self, E> {
209        combinator::map(
210            multi::separated_list0(character::complete::line_ending, T::parse),
211            |list| list.into_iter().collect(),
212        )
213        .parse(input)
214    }
215}
216
217/// Support parsing a HashMap of ParseFrom types from the input. This uses the line_ending parser
218/// to separate the items and the "=" sign to separate the key and value.
219impl<I, E: error::ParseError<I>, K: ParseFrom<I, E>, V: ParseFrom<I, E>, S> ParseFrom<I, E>
220    for HashMap<K, V, S>
221where
222    I: Input + Compare<&'static str>,
223    <I as Input>::Item: AsChar + Copy,
224    K: Eq + Hash,
225    S: BuildHasher + Default,
226{
227    fn parse(input: I) -> IResult<I, Self, E> {
228        combinator::map(
229            multi::separated_list0(
230                character::complete::line_ending,
231                sequence::separated_pair(K::parse, (space0, tag("="), space0), V::parse),
232            ),
233            |list| list.into_iter().collect(),
234        )
235        .parse(input)
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    macro_rules! test_unsigned {
242        ($($ty:tt)+) => {
243            $(
244                mod $ty {
245                    use crate::*;
246                    use nom::error::*;
247
248                    #[test]
249                    fn test_normal_parsing() {
250                        assert_eq!(Ok::<_, Error<_>>(123), $ty::parse_complete(b"123".as_ref()));
251                        assert_eq!(Ok::<_, Err<Error<_>>>((b"a".as_ref(), 999)), $ty::parse(b"999a".as_ref()));
252
253                        assert_eq!(Ok::<_, Error<_>>(123), $ty::parse_complete("123"));
254                        assert_eq!(Ok::<_, Err<Error<_>>>(("a", 999)), $ty::parse("999a"));
255                    }
256
257                    #[test]
258                    fn test_overflow() {
259                        let too_big = format!("{}00", $ty::MAX);
260
261                        assert_eq!(
262                            Err(Error::from_error_kind(too_big.as_str(), ErrorKind::Digit)),
263                            u16::parse_complete(too_big.as_str())
264                        );
265                        assert_eq!(
266                            Err(Error::from_error_kind(too_big.as_bytes(), ErrorKind::Digit)),
267                            u16::parse_complete(too_big.as_bytes())
268                        );
269                    }
270                }
271            )*
272        };
273    }
274
275    test_unsigned!(u16 u32 u64 u128);
276    test_unsigned!(i16 i32 i64 i128);
277
278    mod floats {
279        use crate::*;
280
281        #[test]
282        fn parse_f32() {
283            assert_eq!(Ok::<_, ()>(6e8), f32::parse_complete("6e8"));
284            assert_eq!(
285                Ok::<_, ()>(3.14e-2),
286                f32::parse_complete(b"3.14e-2".as_ref())
287            );
288        }
289
290        #[test]
291        fn parse_f64() {
292            assert_eq!(Ok::<_, ()>(6e8), f64::parse_complete("6e8"));
293            assert_eq!(
294                Ok::<_, ()>(3.14e-2),
295                f64::parse_complete(b"3.14e-2".as_ref())
296            );
297        }
298    }
299
300    mod char {
301        use crate::*;
302        use nom::error::*;
303        use nom::multi::many1;
304
305        #[test]
306        fn read_characters() {
307            let input = "TðŒ🏃";
308
309            let result: Result<_, Error<_>> = many1(char::parse).parse(input).finish();
310
311            assert_eq!(Ok(("", vec!['T', 'ð', 'Œ', '🏃'])), result);
312        }
313
314        #[test]
315        fn read_bytes() {
316            let input = b"1234".as_ref();
317
318            let result: Result<_, Error<_>> = many1(char::parse).parse(input).finish();
319
320            assert_eq!(Ok((b"".as_ref(), vec!['1', '2', '3', '4'])), result);
321        }
322    }
323
324    mod collections {
325        use crate::*;
326        use nom::error::*;
327
328        #[test]
329        fn test_vec_of_numbers() {
330            let input = "1\n2\n3\n4\n5";
331            let expected = vec![1, 2, 3, 4, 5];
332
333            assert_eq!(
334                Ok::<_, Error<_>>(expected),
335                Vec::<u32>::parse_complete(input)
336            );
337        }
338
339        #[test]
340        fn test_set_of_numbers() {
341            let input = "1\n2\n3\n4\n5";
342            let expected = vec![1, 2, 3, 4, 5].into_iter().collect();
343
344            assert_eq!(
345                Ok::<_, Error<_>>(expected),
346                HashSet::<u32>::parse_complete(input)
347            );
348        }
349
350        #[test]
351        fn test_map_of_numbers() {
352            let input = "a = 1\nb = 2\nc = 3\nd = 4\ne = 5";
353            let expected = vec![('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)]
354                .into_iter()
355                .collect();
356
357            assert_eq!(
358                Ok::<_, Error<_>>(expected),
359                HashMap::<char, u32>::parse_complete(input)
360            );
361        }
362    }
363}