nom_parse_trait/
lib.rs

1//! #nom-parse-trait
2//!
3//! This is an extension to the popular nom crate, that provides a `ParseFrom`
4//! trait that can be implemented on any data that can be parsed in a singular way.
5//! This means it should have a `parse` function available and the signature of
6//! that function is compatible with the `nom::Parser` trait.
7//!
8//! The main usage of this is to easily combine parsers of different types.
9//! To see the real power of this trait, take a look at he nom-parse-macros trait,
10//! which makes it possible easily implement this trait on data types.
11//!
12//! ## Generic vs Specific parsers
13//!
14//! The `ParseFrom` trait is generic over the input type, which means that you can
15//! define it generically over any input type that nom supports. The downside of this
16//! is that you will need a bunch of restrictions to the input type in a `where` block.
17//! Also, using a generic parser implementation can be more annoying to use, since in
18//! some cases Rust can't infer the type of the input or error. See the
19//! [generic_input](examples/generic_input.rs) example for an example of this.
20//!
21//! If you already know what types of input and error you are going to use in the program,
22//! using a specific implementation can be more convenient. See the [simple](examples/simple.rs)
23//! example for an example of this.
24
25use branch::alt;
26use combinator::value;
27use nom::bytes::complete::tag;
28use nom::character::complete::space0;
29use nom::*;
30use std::collections::{HashMap, HashSet};
31use std::hash::{BuildHasher, Hash};
32
33/// A trait for types that can be parsed from the given input.
34pub trait ParseFrom<I, E = error::Error<I>>
35where
36    Self: Sized,
37{
38    /// A function that can act as a nom Parser type that parses some of the input and
39    /// returns an instance of this type.
40    fn parse(input: I) -> IResult<I, Self, E>;
41}
42
43/// An extension for the ParseFrom trait with extra functionality to make parse a bit easier.
44pub trait ParseFromExt<I, E>
45where
46    Self: Sized,
47{
48    fn parse_complete(input: I) -> Result<Self, E>;
49}
50
51impl<I, E, T: ParseFrom<I, E>> ParseFromExt<I, E> for T
52where
53    I: Input,
54    E: error::ParseError<I>,
55{
56    /// Parse the input and return the result if the input is fully consumed.
57    ///
58    /// If the input is not fully consumed, an "Eof" error is returned with the rest of the characters.
59    ///
60    /// # Panics
61    /// This function panics if the parser returns an "Incomplete" error. If you want to use this
62    /// on streaming parser, please make sure you handle the "Incomplete" error before calling this function.
63    fn parse_complete(input: I) -> Result<Self, E> {
64        match T::parse(input) {
65            Ok((rest, result)) if rest.input_len() == 0 => Ok(result),
66            Ok((rest, _)) => Err(E::from_error_kind(rest, error::ErrorKind::Eof)),
67            Err(Err::Incomplete(_)) => panic!("Parser returned an incomplete error"),
68            Err(Err::Error(e)) | Err(Err::Failure(e)) => Err(e),
69        }
70    }
71}
72
73macro_rules! wrapper_types {
74    ($($ty:ty),+) => {
75        $(
76            impl<I, E: error::ParseError<I>, T: ParseFrom<I, E>> ParseFrom<I, E> for $ty {
77                fn parse(input: I) -> IResult<I, Self, E> {
78                    combinator::map(T::parse, |val| Self::new(val)).parse(input)
79                }
80            }
81        )*
82    }
83}
84
85wrapper_types!(
86    Box<T>,
87    std::cell::Cell<T>,
88    std::cell::RefCell<T>,
89    std::rc::Rc<T>,
90    std::sync::Arc<T>,
91    std::sync::Mutex<T>,
92    std::sync::RwLock<T>
93);
94
95macro_rules! unsigned_parsable {
96    ($($ty:tt)+) => {
97        $(
98        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
99        where
100            I: Input,
101            <I as Input>::Item: AsChar,
102        {
103            fn parse(input: I) -> nom::IResult<I, Self, E> {
104                nom::character::complete::$ty(input)
105            }
106        }
107        )*
108    }
109}
110
111unsigned_parsable!(u16 u32 u64 u128);
112
113macro_rules! signed_parsable {
114    ($($ty:tt)+) => {
115        $(
116        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
117        where
118            I: Input,
119            <I as Input>::Item: AsChar,
120            I: for <'a> Compare<&'a[u8]>,
121        {
122            fn parse(input: I) -> nom::IResult<I, Self, E> {
123                nom::character::complete::$ty(input)
124            }
125        }
126        )*
127    }
128}
129
130signed_parsable!(i8 i16 i32 i64 i128);
131
132macro_rules! floating_parsable {
133    ($($ty:tt)+) => {
134        $(
135        impl<I, E: error::ParseError<I>> ParseFrom<I, E> for $ty
136        where
137            I: Input + Offset + AsBytes + Compare<&'static str>,
138            <I as Input>::Item: AsChar,
139            <I as Input>::Iter: Clone,
140            I: for<'a> Compare<&'a [u8]>,
141        {
142            fn parse(input: I) -> nom::IResult<I, Self, E> {
143                use std::str::FromStr;
144                use nom::number::complete::recognize_float_or_exceptions;
145                use std::str::from_utf8;
146
147                let (i, s) = recognize_float_or_exceptions(input)?;
148                match from_utf8(s.as_bytes()).ok().and_then(|s| $ty::from_str(s).ok()) {
149                    Some(f) => Ok((i, f)),
150                    None => Err(nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::Float))),
151                }
152            }
153        }
154        )*
155    }
156}
157
158floating_parsable!(f32 f64);
159
160/// Support reading the words "true" or "false" from the input and interpreting them as boolean values.
161impl<I, E: error::ParseError<I>> ParseFrom<I, E> for bool
162where
163    I: Input + Compare<&'static str>,
164{
165    fn parse(input: I) -> IResult<I, Self, E> {
166        alt((value(true, tag("true")), value(false, tag("false")))).parse(input)
167    }
168}
169
170/// Support reading a single character from the input.
171impl<I, E: error::ParseError<I>> ParseFrom<I, E> for char
172where
173    I: Input,
174    <I as Input>::Item: AsChar,
175{
176    fn parse(input: I) -> IResult<I, Self, E> {
177        let char = input
178            .iter_elements()
179            .next()
180            .ok_or_else(|| Err::Error(E::from_error_kind(input.clone(), error::ErrorKind::Eof)))?
181            .as_char();
182        let (rest, _) = input.take_split(char.len());
183        Ok((rest, char))
184    }
185}
186
187/// Support reading a single byte from the input. This is NOT a parsed number, but the raw byte value.
188impl<I, E: error::ParseError<I>> ParseFrom<I, E> for u8
189where
190    I: Input,
191    <I as Input>::Item: AsBytes,
192{
193    fn parse(input: I) -> IResult<I, Self, E> {
194        let item = input
195            .iter_elements()
196            .next()
197            .ok_or_else(|| Err::Error(E::from_error_kind(input.clone(), error::ErrorKind::Eof)))?;
198        let bytes = item.as_bytes();
199        if bytes.len() != 1 {
200            return Err(Err::Error(E::from_error_kind(
201                input,
202                error::ErrorKind::Char,
203            )));
204        }
205        let (rest, _) = input.take_split(bytes.len());
206        Ok((rest, bytes[0]))
207    }
208}
209
210/// Support parsing a vector of ParseFrom types from the input. This uses the line_ending parser
211/// to separate the items.
212impl<I, E: error::ParseError<I>, T: ParseFrom<I, E>> ParseFrom<I, E> for Vec<T>
213where
214    I: Input + Compare<&'static str>,
215{
216    fn parse(input: I) -> IResult<I, Self, E> {
217        multi::separated_list0(character::complete::line_ending, T::parse).parse(input)
218    }
219}
220
221/// Support parsing a HashSet of ParseFrom types from the input. This uses the line_ending parser
222/// to separate the items.
223impl<I, E: error::ParseError<I>, T: ParseFrom<I, E>, S> ParseFrom<I, E> for HashSet<T, S>
224where
225    I: Input + Compare<&'static str>,
226    T: Eq + Hash,
227    S: BuildHasher + Default,
228{
229    fn parse(input: I) -> IResult<I, Self, E> {
230        combinator::map(
231            multi::separated_list0(character::complete::line_ending, T::parse),
232            |list| list.into_iter().collect(),
233        )
234        .parse(input)
235    }
236}
237
238/// Support parsing a HashMap of ParseFrom types from the input. This uses the line_ending parser
239/// to separate the items and the "=" sign to separate the key and value.
240impl<I, E: error::ParseError<I>, K: ParseFrom<I, E>, V: ParseFrom<I, E>, S> ParseFrom<I, E>
241    for HashMap<K, V, S>
242where
243    I: Input + Compare<&'static str>,
244    <I as Input>::Item: AsChar + Copy,
245    K: Eq + Hash,
246    S: BuildHasher + Default,
247{
248    fn parse(input: I) -> IResult<I, Self, E> {
249        combinator::map(
250            multi::separated_list0(
251                character::complete::line_ending,
252                sequence::separated_pair(K::parse, (space0, tag("="), space0), V::parse),
253            ),
254            |list| list.into_iter().collect(),
255        )
256        .parse(input)
257    }
258}
259
260impl<const N: usize, I, E: error::ParseError<I>, T: ParseFrom<I, E>> ParseFrom<I, E> for [T; N]
261where
262    I: Input + Compare<&'static str>,
263    <I as Input>::Item: AsChar + Copy,
264{
265    fn parse(mut input: I) -> IResult<I, Self, E> {
266        use std::mem::*;
267        let mut arr: [MaybeUninit<T>; N] = unsafe { MaybeUninit::uninit().assume_init() };
268        if N > 0 {
269            let mut separator = (space0, tag::<_, I, E>(","), space0);
270
271            let (rest, value) = T::parse(input)?;
272            arr[0].write(value);
273            input = rest;
274
275            for i in 1..N {
276                match separator.parse(input).map(|(rest, _)| T::parse(rest)) {
277                    Ok(Ok((rest, value))) => {
278                        arr[i].write(value);
279                        input = rest;
280                    }
281                    Ok(Err(e)) | Err(e) => {
282                        // There was an error parsing the separator or the value
283                        // We need to clean up the already initialized elements
284                        unsafe {
285                            arr[0..i].iter_mut().for_each(|it| it.assume_init_drop());
286                        }
287                        return Err(e);
288                    }
289                }
290            }
291        }
292        Ok((input, arr.map(|x| unsafe { x.assume_init() })))
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    macro_rules! test_unsigned {
299        ($($ty:tt)+) => {
300            $(
301                mod $ty {
302                    use crate::*;
303                    use nom::error::*;
304
305                    #[test]
306                    fn test_normal_parsing() {
307                        assert_eq!(Ok::<_, Error<_>>(123), $ty::parse_complete(b"123".as_ref()));
308                        assert_eq!(Ok::<_, Err<Error<_>>>((b"a".as_ref(), 999)), $ty::parse(b"999a".as_ref()));
309
310                        assert_eq!(Ok::<_, Error<_>>(123), $ty::parse_complete("123"));
311                        assert_eq!(Ok::<_, Err<Error<_>>>(("a", 999)), $ty::parse("999a"));
312                    }
313
314                    #[test]
315                    fn test_overflow() {
316                        let too_big = format!("{}00", $ty::MAX);
317
318                        assert_eq!(
319                            Err(Error::from_error_kind(too_big.as_str(), ErrorKind::Digit)),
320                            u16::parse_complete(too_big.as_str())
321                        );
322                        assert_eq!(
323                            Err(Error::from_error_kind(too_big.as_bytes(), ErrorKind::Digit)),
324                            u16::parse_complete(too_big.as_bytes())
325                        );
326                    }
327                }
328            )*
329        };
330    }
331
332    test_unsigned!(u16 u32 u64 u128);
333    test_unsigned!(i16 i32 i64 i128);
334
335    mod floats {
336        use crate::*;
337
338        #[test]
339        fn parse_f32() {
340            assert_eq!(Ok::<_, ()>(6e8), f32::parse_complete("6e8"));
341            assert_eq!(
342                Ok::<_, ()>(3.14e-2),
343                f32::parse_complete(b"3.14e-2".as_ref())
344            );
345        }
346
347        #[test]
348        fn parse_f64() {
349            assert_eq!(Ok::<_, ()>(6e8), f64::parse_complete("6e8"));
350            assert_eq!(
351                Ok::<_, ()>(3.14e-2),
352                f64::parse_complete(b"3.14e-2".as_ref())
353            );
354        }
355    }
356
357    mod char {
358        use crate::*;
359        use nom::error::*;
360        use nom::multi::many1;
361
362        #[test]
363        fn read_characters() {
364            let input = "TðŒ🏃";
365
366            let result: Result<_, Error<_>> = many1(char::parse).parse(input).finish();
367
368            assert_eq!(Ok(("", vec!['T', 'ð', 'Œ', '🏃'])), result);
369        }
370
371        #[test]
372        fn read_bytes() {
373            let input = b"1234".as_ref();
374
375            let result: Result<_, Error<_>> = many1(char::parse).parse(input).finish();
376
377            assert_eq!(Ok((b"".as_ref(), vec!['1', '2', '3', '4'])), result);
378        }
379    }
380
381    mod collections {
382        use crate::*;
383        use nom::error::*;
384
385        #[test]
386        fn test_vec_of_numbers() {
387            let input = "1\n2\n3\n4\n5";
388            let expected = vec![1, 2, 3, 4, 5];
389
390            assert_eq!(
391                Ok::<_, Error<_>>(expected),
392                Vec::<u32>::parse_complete(input)
393            );
394        }
395
396        #[test]
397        fn test_set_of_numbers() {
398            let input = "1\n2\n3\n4\n5";
399            let expected = vec![1, 2, 3, 4, 5].into_iter().collect();
400
401            assert_eq!(
402                Ok::<_, Error<_>>(expected),
403                HashSet::<u32>::parse_complete(input)
404            );
405        }
406
407        #[test]
408        fn test_map_of_numbers() {
409            let input = "a = 1\nb = 2\nc = 3\nd = 4\ne = 5";
410            let expected = vec![('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)]
411                .into_iter()
412                .collect();
413
414            assert_eq!(
415                Ok::<_, Error<_>>(expected),
416                HashMap::<char, u32>::parse_complete(input)
417            );
418        }
419
420        #[test]
421        fn test_array_of_numbers() {
422            let input = "1, 2, 3, 4, 5";
423            let expected = [1, 2, 3, 4, 5];
424
425            assert_eq!(
426                Ok::<_, Error<_>>(expected),
427                <[u32; 5]>::parse_complete(input)
428            )
429        }
430
431        #[test]
432        fn test_empty_array_of_numbers() {
433            let input = "";
434            let expected: [u32; 0] = [];
435
436            assert_eq!(
437                Ok::<_, Error<_>>(expected),
438                <[u32; 0]>::parse_complete(input)
439            );
440        }
441    }
442
443    mod wrapping {
444        use std::{rc::Rc, sync::Arc};
445
446        use crate::*;
447        use nom::error::*;
448
449        #[test]
450        fn test_box() {
451            let input = "12";
452            let expected = Box::new(12i32);
453
454            assert_eq!(
455                Ok::<_, Error<_>>(expected),
456                Box::<i32>::parse_complete(input)
457            )
458        }
459
460        #[test]
461        fn test_rc() {
462            let input = "12";
463            let expected = Rc::new(12i32);
464
465            assert_eq!(
466                Ok::<_, Error<_>>(expected),
467                Rc::<i32>::parse_complete(input)
468            )
469        }
470
471        #[test]
472        fn test_arc() {
473            let input = "12";
474            let expected = Arc::new(12i32);
475
476            assert_eq!(
477                Ok::<_, Error<_>>(expected),
478                Arc::<i32>::parse_complete(input)
479            )
480        }
481    }
482}