xrust/parser/combinators/whitespace.rs
1use std::cmp::Ordering;
2
3use crate::item::Node;
4use crate::parser::combinators::alt::alt4;
5use crate::parser::combinators::many::{many0, many1};
6use crate::parser::combinators::map::map;
7use crate::parser::combinators::tag::tag;
8use crate::parser::combinators::tuple::tuple3;
9use crate::parser::{ParseError, ParseInput, StaticState};
10use qualname::{NamespacePrefix, NamespaceUri};
11
12pub fn whitespace0<'a, N: Node, L>()
13-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
14where
15 L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
16{
17 //TODO add support for xml:space
18 map(
19 many0(alt4(tag(" "), tag("\t"), tag("\r"), tag("\n"))),
20 |_| (),
21 )
22}
23
24pub(crate) fn whitespace1<'a, N: Node, L>()
25-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
26where
27 L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
28{
29 //TODO add support for xml:space
30 map(
31 many1(alt4(tag(" "), tag("\t"), tag("\r"), tag("\n"))),
32 |_| (),
33 )
34}
35
36pub(crate) fn xpwhitespace<'a, N: Node, L>()
37-> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
38where
39 L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
40{
41 map(
42 tuple3(
43 whitespace0(),
44 take_until_balanced("(:", ":)"),
45 whitespace0(),
46 ),
47 |_| (),
48 )
49}
50
51/// Parse nested input.
52///
53/// Inspired by 'take_until_unbalanced' from parse_hyperlinks crate.
54/// We can't use the parse_hyperlinks version since it only takes character delimiters.
55/// Also, this function does not need to consider escaped brackets.
56/// The function assumes that the open and close delimiters are the same length.
57///
58/// This function consumes the delimiters.
59/// The start delimiter must be the first token in the input. Finding this sets the bracket count to 1.
60/// After that there are 4 scenarios:
61///
62/// * The close delimiter is not found. This is an error.
63/// * There is no open delimiter. In this case, consume up to and including the close delimiter. If the bracket count is 1 then return Ok, otherwise error.
64/// * There is an open delimiter. If the open occurs after the close, then consume up to and including the close delimiter. If the bracket count is 1 then return Ok, otherwise error.
65/// * The open delimiter occurs before the close. In this case, increment the bracket count and continue after the open delimiter.
66fn take_until_balanced<'a, N: Node, L>(
67 open: &'static str,
68 close: &'static str,
69) -> impl Fn(ParseInput<'a, N>, &mut StaticState<L>) -> Result<(ParseInput<'a, N>, ()), ParseError>
70where
71 L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError>,
72{
73 move |(input, state), _ss| {
74 let mut pos = 0;
75 let mut counter = 0;
76 let mut bracket_counter = 0;
77
78 loop {
79 counter += 1;
80 if counter > 1000 {
81 return Err(ParseError::EntityDepth {
82 row: 0,
83 col: counter,
84 });
85 }
86 match (input[pos..].find(open), input[pos..].find(close)) {
87 (Some(0), _) => {
88 bracket_counter += 1;
89 pos += open.len();
90 //let _: Vec<_> = (&mut input).take(open.len()).collect();
91 match (input[pos..].find(open), input[pos..].find(close)) {
92 (_, None) => {
93 // Scenario 1
94 return Err(ParseError::Unbalanced);
95 }
96 (Some(o), Some(c)) => {
97 // Scenario 3/4
98 if o > c {
99 // Scenario 3
100 if bracket_counter == 1 {
101 //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
102 pos += c + close.len();
103 return Ok(((&input[pos..], state), ()));
104 } else {
105 return Err(ParseError::Unbalanced);
106 }
107 } else {
108 // Scenario 4
109 bracket_counter += 1;
110 //let _: Vec<_> = (&mut input).take(o + open.len()).collect();
111 pos += o + close.len();
112 }
113 }
114 (_, Some(c)) => {
115 // Scenario 2
116 match bracket_counter.cmp(&1) {
117 Ordering::Greater => {
118 bracket_counter -= 1;
119 //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
120 pos += c + close.len();
121 }
122 Ordering::Equal => {
123 //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
124 pos += c + close.len();
125 return Ok(((&input[pos..], state), ()));
126 }
127 Ordering::Less => {
128 return Err(ParseError::Unbalanced);
129 }
130 }
131 }
132 }
133 }
134 (None, Some(c)) => {
135 // Scenario 2
136 match bracket_counter.cmp(&1) {
137 Ordering::Greater => {
138 bracket_counter -= 1;
139 //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
140 pos += c + close.len();
141 }
142 Ordering::Equal => {
143 //let _: Vec<_> = (&mut input).take(c + close.len()).collect();
144 pos += c + close.len();
145 return Ok(((&input[pos..], state), ()));
146 }
147 Ordering::Less => {
148 return Err(ParseError::Unbalanced);
149 }
150 }
151 }
152 _ => return Ok(((&input[pos..], state), ())),
153 }
154 }
155 }
156}