parse_wiki_text_2/
parse.rs1#[derive(Debug)]
9pub enum ParseError<'a> {
10 TimedOut {
12 execution_time: std::time::Duration,
14 output: crate::Output<'a>,
16 },
17}
18
19#[must_use]
20pub fn parse<'a>(
21 configuration: &crate::Configuration,
22 wiki_text: &'a str,
23 max_duration: std::time::Duration,
24) -> Result<crate::Output<'a>, ParseError<'a>> {
25 let mut state = crate::State {
26 flushed_position: 0,
27 nodes: vec![],
28 scan_position: 0,
29 stack: vec![],
30 warnings: vec![],
31 wiki_text,
32 };
33 {
35 let mut has_line_break = false;
36 let mut position = 0;
37 loop {
38 match state.get_byte(position) {
39 Some(b'\n') => {
40 if has_line_break {
41 state.warnings.push(crate::Warning {
42 end: position + 1,
43 message: crate::WarningMessage::RepeatedEmptyLine,
44 start: position,
45 });
46 }
47 has_line_break = true;
48 position += 1;
49 state.flushed_position = position;
50 state.scan_position = position;
51 }
52 Some(b' ') => position += 1,
53 Some(b'#') => {
54 crate::redirect::parse_redirect(
55 &mut state,
56 configuration,
57 position,
58 );
59 break;
60 }
61 _ => break,
62 }
63 }
64 }
65 let mut loop_counter = 0;
66 let start_time = std::time::Instant::now();
67
68 crate::line::parse_beginning_of_line(&mut state, None);
69 loop {
70 match state.get_byte(state.scan_position) {
71 None => {
72 crate::line::parse_end_of_line(&mut state);
73 if state.scan_position < state.wiki_text.len() {
74 continue;
75 }
76
77 if let Some(crate::OpenNode { nodes, start, .. }) =
79 state.stack.pop()
80 {
81 state.warnings.push(crate::Warning {
82 end: state.scan_position,
83 message: crate::WarningMessage::MissingEndTagRewinding,
84 start,
85 });
86 state.rewind(nodes, start);
87 } else {
88 break;
89 }
90 }
91 Some(0..=8 | 11..=31 | 127) => {
93 state.warnings.push(crate::Warning {
94 end: state.scan_position + 1,
95 message: crate::WarningMessage::InvalidCharacter,
96 start: state.scan_position,
97 });
98 state.scan_position += 1;
99 }
100 Some(b'\n') => {
101 crate::line::parse_end_of_line(&mut state);
102 }
103 Some(b'!')
104 if state.get_byte(state.scan_position + 1) == Some(b'!')
105 && match state.stack.last() {
106 Some(crate::OpenNode {
107 type_: crate::OpenNodeType::Table(..),
108 ..
109 }) => true,
110 _ => false,
111 } =>
112 {
113 crate::table::parse_heading_cell(&mut state);
114 }
115 Some(b'&') => crate::character_entity::parse_character_entity(
116 &mut state,
117 configuration,
118 ),
119 Some(b'\'') => {
120 if state.get_byte(state.scan_position + 1) == Some(b'\'') {
121 crate::bold_italic::parse_bold_italic(&mut state);
122 } else {
123 state.scan_position += 1;
124 }
125 }
126 Some(b'<') => match state.get_byte(state.scan_position + 1) {
127 Some(b'!')
128 if state.get_byte(state.scan_position + 2)
129 == Some(b'-') && state
130 .get_byte(state.scan_position + 3)
131 == Some(b'-') =>
132 {
133 crate::comment::parse_comment(&mut state)
134 }
135 Some(b'/') => {
136 crate::tag::parse_end_tag(&mut state, configuration)
137 }
138 _ => crate::tag::parse_start_tag(&mut state, configuration),
139 },
140 Some(b'=') => {
141 crate::template::parse_parameter_name_end(&mut state);
142 }
143 Some(b'[') => {
144 if state.get_byte(state.scan_position + 1) == Some(b'[') {
145 crate::link::parse_link_start(&mut state, configuration);
146 } else {
147 crate::external_link::parse_external_link_start(
148 &mut state,
149 configuration,
150 );
151 }
152 }
153 Some(b']') => match state.stack.pop() {
154 None => state.scan_position += 1,
155 Some(crate::OpenNode {
156 nodes,
157 start,
158 type_: crate::OpenNodeType::ExternalLink,
159 }) => {
160 crate::external_link::parse_external_link_end(
161 &mut state, start, nodes,
162 );
163 }
164 Some(crate::OpenNode {
165 nodes,
166 start,
167 type_: crate::OpenNodeType::Link { namespace, target },
168 }) => {
169 if state.get_byte(state.scan_position + 1) == Some(b']') {
170 crate::link::parse_link_end(
171 &mut state,
172 &configuration,
173 start,
174 nodes,
175 namespace,
176 target,
177 );
178 } else {
179 state.scan_position += 1;
180 state.stack.push(crate::OpenNode {
181 nodes,
182 start,
183 type_: crate::OpenNodeType::Link {
184 namespace,
185 target,
186 },
187 });
188 }
189 }
190 Some(open_node) => {
191 state.scan_position += 1;
192 state.stack.push(open_node);
193 }
194 },
195 Some(b'_') => {
196 if state.get_byte(state.scan_position + 1) == Some(b'_') {
197 crate::magic_word::parse_magic_word(
198 &mut state,
199 configuration,
200 );
201 } else {
202 state.scan_position += 1;
203 }
204 }
205 Some(b'{') => {
206 if state.get_byte(state.scan_position + 1) == Some(b'{') {
207 crate::template::parse_template_start(&mut state);
208 } else {
209 state.scan_position += 1;
210 }
211 }
212 Some(b'|') => match state.stack.last_mut() {
213 Some(crate::OpenNode {
214 type_: crate::OpenNodeType::Parameter { default: None, .. },
215 ..
216 }) => {
217 crate::template::parse_parameter_separator(&mut state);
218 }
219 Some(crate::OpenNode {
220 type_: crate::OpenNodeType::Table(..),
221 ..
222 }) => {
223 crate::table::parse_inline_token(&mut state);
224 }
225 Some(crate::OpenNode {
226 type_: crate::OpenNodeType::Template { .. },
227 ..
228 }) => {
229 crate::template::parse_template_separator(&mut state);
230 }
231 _ => state.scan_position += 1,
232 },
233 Some(b'}') => {
234 if state.get_byte(state.scan_position + 1) == Some(b'}') {
235 crate::template::parse_template_end(&mut state);
236 } else {
237 state.scan_position += 1;
238 }
239 }
240 _ => {
241 state.scan_position += 1;
242 }
243 }
244
245 if !max_duration.is_zero() && loop_counter == 10_000 {
246 loop_counter = 0;
247 if start_time.elapsed() > max_duration {
248 state.flush(state.scan_position);
249
250 return Err(ParseError::TimedOut {
251 execution_time: start_time.elapsed(),
252 output: crate::Output {
253 nodes: state.nodes,
254 warnings: state.warnings,
255 },
256 });
257 }
258 }
259
260 loop_counter += 1;
261 }
262
263 let end_position = state.skip_whitespace_backwards(wiki_text.len());
264 state.flush(end_position);
265 Ok(crate::Output {
266 nodes: state.nodes,
267 warnings: state.warnings,
268 })
269}