Skip to main content

xrust/parser/combinators/
whitespace.rs

1use std::cmp::Ordering;
2
3use crate::item::Node;
4use crate::parser::combinators::alt::alt4;
5use crate::parser::combinators::many::{many0, many1};
6use crate::parser::combinators::map::map;
7use crate::parser::combinators::tag::tag;
8use crate::parser::combinators::tuple::tuple3;
9use crate::parser::{ParseError, ParseInput, StaticState};
10use qualname::{NamespacePrefix, NamespaceUri};
11
12pub fn whitespace0<'a, N: Node, L>()
13-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
14where
15    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
16{
17    //TODO add support for xml:space
18    map(
19        many0(alt4(tag(" "), tag("\t"), tag("\r"), tag("\n"))),
20        |_| (),
21    )
22}
23
24pub(crate) fn whitespace1<'a, N: Node, L>()
25-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
26where
27    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
28{
29    //TODO add support for xml:space
30    map(
31        many1(alt4(tag(" "), tag("\t"), tag("\r"), tag("\n"))),
32        |_| (),
33    )
34}
35
36pub(crate) fn xpwhitespace<'a, N: Node, L>()
37-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
38where
39    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
40{
41    map(
42        tuple3(
43            whitespace0(),
44            take_until_balanced("(:", ":)"),
45            whitespace0(),
46        ),
47        |_| (),
48    )
49}
50
51/// Parse nested input.
52///
53/// Inspired by 'take_until_unbalanced' from parse_hyperlinks crate.
54/// We can't use the parse_hyperlinks version since it only takes character delimiters.
55/// Also, this function does not need to consider escaped brackets.
56/// The function assumes that the open and close delimiters are the same length.
57///
58/// This function consumes the delimiters.
59/// The start delimiter must be the first token in the input. Finding this sets the bracket count to 1.
60/// After that there are 4 scenarios:
61///
62/// * The close delimiter is not found. This is an error.
63/// * There is no open delimiter. In this case, consume up to and including the close delimiter. If the bracket count is 1 then return Ok, otherwise error.
64/// * There is an open delimiter. If the open occurs after the close, then consume up to and including the close delimiter. If the bracket count is 1 then return Ok, otherwise error.
65/// * The open delimiter occurs before the close. In this case, increment the bracket count and continue after the open delimiter.
66fn take_until_balanced<'a, N: Node, L>(
67    open: &'static str,
68    close: &'static str,
69) -> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
70where
71    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
72{
73    move |(input, state), _ss| {
74        let mut pos = 0;
75        let mut counter = 0;
76        let mut bracket_counter = 0;
77
78        loop {
79            counter += 1;
80            if counter > 1000 {
81                return Err(ParseError::EntityDepth {
82                    row: 0,
83                    col: counter,
84                });
85            }
86            match (input[pos..].find(open), input[pos..].find(close)) {
87                (Some(0), _) => {
88                    bracket_counter += 1;
89                    pos += open.len();
90                    //let _: Vec<_> = (&mut input).take(open.len()).collect();
91                    match (input[pos..].find(open), input[pos..].find(close)) {
92                        (_, None) => {
93                            // Scenario 1
94                            return Err(ParseError::Unbalanced);
95                        }
96                        (Some(o), Some(c)) => {
97                            // Scenario 3/4
98                            if o > c {
99                                // Scenario 3
100                                if bracket_counter == 1 {
101                                    //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
102                                    pos += c + close.len();
103                                    return Ok(((&input[pos..], state), ()));
104                                } else {
105                                    return Err(ParseError::Unbalanced);
106                                }
107                            } else {
108                                // Scenario 4
109                                bracket_counter += 1;
110                                //let _: Vec<_> = (&mut input).take(o + open.len()).collect();
111                                pos += o + close.len();
112                            }
113                        }
114                        (_, Some(c)) => {
115                            // Scenario 2
116                            match bracket_counter.cmp(&1) {
117                                Ordering::Greater => {
118                                    bracket_counter -= 1;
119                                    //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
120                                    pos += c + close.len();
121                                }
122                                Ordering::Equal => {
123                                    //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
124                                    pos += c + close.len();
125                                    return Ok(((&input[pos..], state), ()));
126                                }
127                                Ordering::Less => {
128                                    return Err(ParseError::Unbalanced);
129                                }
130                            }
131                        }
132                    }
133                }
134                (None, Some(c)) => {
135                    // Scenario 2
136                    match bracket_counter.cmp(&1) {
137                        Ordering::Greater => {
138                            bracket_counter -= 1;
139                            //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
140                            pos += c + close.len();
141                        }
142                        Ordering::Equal => {
143                            //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
144                            pos += c + close.len();
145                            return Ok(((&input[pos..], state), ()));
146                        }
147                        Ordering::Less => {
148                            return Err(ParseError::Unbalanced);
149                        }
150                    }
151                }
152                _ => return Ok(((&input[pos..], state), ())),
153            }
154        }
155    }
156}