nom_xml/
parse.rs

1//parse.rs
2
3use crate::{transcode::Decode, IResult, Name};
4use nom::{
5    bytes::complete::tag,
6    character::complete::{char, satisfy},
7    combinator::{map, opt, recognize},
8    multi::{many0, many1, separated_list1},
9    sequence::tuple,
10    Offset,
11};
12type CaptureSpan<'a, O> = Box<dyn FnMut(&'a str) -> IResult<&'a str, (&'a str, O)> + 'a>;
13
14pub trait Parse<'a>: Sized {
15    type Args;
16    type Output; //TODO: refactor this to have default values when associated type defaults are stabalized
17    fn parse(_input: &'a str, _args: Self::Args) -> Self::Output {
18        unimplemented!()
19    }
20
21    // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
22    // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
23    fn is_char(c: char) -> bool {
24        matches!(c, '\u{9}' | '\u{A}' | '\u{D}' | '\u{20}'..='\u{D7FF}' | '\u{E000}'..='\u{FFFD}' | '\u{10000}'..='\u{10FFFF}')
25    }
26
27    fn parse_char(input: &str) -> IResult<&str, char> {
28        satisfy(Self::is_char)(input)
29    }
30
31    // [3] S ::= (#x20 | #x9 | #xD | #xA)+
32    // AKA [3] S ::= (' '  | '\t' | '\r' | '\n')+
33    fn is_whitespace(c: char) -> bool {
34        matches!(c, ' ' | '\t' | '\r' | '\n')
35    }
36
37    fn parse_multispace1(input: &str) -> IResult<&str, ()> {
38        let (input, _) = many1(satisfy(Self::is_whitespace))(input)?;
39        Ok((input, ()))
40    }
41
42    fn parse_multispace0(input: &str) -> IResult<&str, ()> {
43        let (input, _) = many0(satisfy(Self::is_whitespace))(input)?;
44        Ok((input, ()))
45    }
46
47    /*
48    [4] NameStartChar ::=
49        ":"                 | [A-Z]             | "_"           | [a-z]
50        | [#xC0-#xD6]       | [#xD8-#xF6]       | [#xF8-#x2FF]
51        | [#x370-#x37D]     | [#x37F-#x1FFF]    | [#x200C-#x200D]
52        | [#x2070-#x218F]   | [#x2C00-#x2FEF]   | [#x3001-#xD7FF]
53        | [#xF900-#xFDCF]   | [#xFDF0-#xFFFD]   | [#x10000-#xEFFFF]
54    */
55    fn is_name_start_char(c: char) -> bool {
56        matches!(c, ':' | 'A'..='Z' | '_' | 'a'..='z' |
57            '\u{C0}'..='\u{D6}' | '\u{D8}'..='\u{F6}' | '\u{F8}'..='\u{2FF}' |
58            '\u{370}'..='\u{37D}' | '\u{37F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' |
59            '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' |
60            '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}')
61    }
62
63    /*  [4a] NameChar ::=
64                NameStartChar |
65                "-" | "." | [0-9] | #xB7 |
66                [#x0300-#x036F] | [#x203F-#x2040]
67    */
68    fn is_name_char(c: char) -> bool {
69        Self::is_name_start_char(c)
70            || matches!(c, '-' | '.' | '0'..='9' | '\u{B7}' |
71            '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}')
72    }
73
74    fn parse_name_char(input: &str) -> IResult<&str, char> {
75        satisfy(Self::is_name_char)(input)
76    }
77
78    fn parse_name_start_char(input: &str) -> IResult<&str, char> {
79        satisfy(Self::is_name_start_char)(input)
80    }
81
82    // [7] Nmtoken ::= (NameChar)+
83    fn parse_nmtoken(input: &str) -> IResult<&str, String> {
84        let (input, result) = recognize(many1(Self::parse_name_char))(input)?;
85        Ok((input, result.to_string()))
86    }
87
88    // [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
89    fn parse_nmtokens(input: &str) -> IResult<&str, Vec<String>> {
90        separated_list1(char(' '), Self::parse_nmtoken)(input)
91    }
92
93    // [5] Name ::= NameStartChar (NameChar)*
94    fn parse_name(input: &str) -> IResult<&str, Name> {
95        map(
96            tuple((Self::parse_name_start_char, opt(Self::parse_nmtoken))),
97            |(start_char, rest_chars)| {
98                let mut name = start_char.to_string();
99                if let Some(rest) = rest_chars {
100                    name.push_str(&rest);
101                }
102
103                let name_clone = name.clone();
104                // Attempt to decode the cloned name.
105                let local_part = match name_clone.decode() {
106                    Ok(decoded) => decoded.into_owned(),
107                    Err(_) => name,
108                };
109                Name {
110                    prefix: None,
111                    local_part,
112                }
113            },
114        )(input)
115    }
116
117    // [6] Names ::= Name (#x20 Name)*
118    fn parse_names(input: &str) -> IResult<&str, Vec<Name>> {
119        separated_list1(char(' '), Self::parse_name)(input)
120    }
121
122    //[25] Eq ::=  S? '=' S?
123    fn parse_eq(input: &str) -> IResult<&str, ()> {
124        let (input, _) = Self::parse_multispace0(input)?;
125        let (input, _) = tag("=")(input)?;
126        let (input, _) = Self::parse_multispace0(input)?;
127        Ok((input, ()))
128    }
129
130    fn capture_span<O, F>(mut f: F) -> CaptureSpan<'a, O>
131    where
132        F: FnMut(&'a str) -> IResult<&'a str, O> + 'a,
133    {
134        Box::new(move |input: &'a str| {
135            let (remaining, result) = f(input)?;
136            let offset = input.offset(remaining);
137            Ok((remaining, (&input[..offset], result)))
138        })
139    }
140}