parse_wiki_text_2/
parse.rs

1// Copyright 2019 Fredrik Portström <https://portstrom.com>
2// This is free software distributed under the terms specified in
3// the file LICENSE at the top-level directory of this distribution.
4
5/// Get's returned if the parsing received an unrecoverable error.
6///
7/// At the moment, the only error that can occur is a timeout.
8#[derive(Debug)]
9pub enum ParseError<'a> {
10	/// The parsing took too long and was aborted.
11	TimedOut {
12		/// The time the parsing took before it was aborted.
13		execution_time: std::time::Duration,
14		/// The output that was generated before the parsing was aborted.
15		output: crate::Output<'a>,
16	},
17}
18
19#[must_use]
20pub fn parse<'a>(
21	configuration: &crate::Configuration,
22	wiki_text: &'a str,
23	max_duration: std::time::Duration,
24) -> Result<crate::Output<'a>, ParseError<'a>> {
25	let mut state = crate::State {
26		flushed_position: 0,
27		nodes: vec![],
28		scan_position: 0,
29		stack: vec![],
30		warnings: vec![],
31		wiki_text,
32	};
33	// parse redirects and warn about repeated newlines
34	{
35		let mut has_line_break = false;
36		let mut position = 0;
37		loop {
38			match state.get_byte(position) {
39				Some(b'\n') => {
40					if has_line_break {
41						state.warnings.push(crate::Warning {
42							end: position + 1,
43							message: crate::WarningMessage::RepeatedEmptyLine,
44							start: position,
45						});
46					}
47					has_line_break = true;
48					position += 1;
49					state.flushed_position = position;
50					state.scan_position = position;
51				}
52				Some(b' ') => position += 1,
53				Some(b'#') => {
54					crate::redirect::parse_redirect(
55						&mut state,
56						configuration,
57						position,
58					);
59					break;
60				}
61				_ => break,
62			}
63		}
64	}
65	let mut loop_counter = 0;
66	let start_time = std::time::Instant::now();
67
68	crate::line::parse_beginning_of_line(&mut state, None);
69	loop {
70		match state.get_byte(state.scan_position) {
71			None => {
72				crate::line::parse_end_of_line(&mut state);
73				if state.scan_position < state.wiki_text.len() {
74					continue;
75				}
76
77				// rewind until we find a text node
78				if let Some(crate::OpenNode { nodes, start, .. }) =
79					state.stack.pop()
80				{
81					state.warnings.push(crate::Warning {
82						end: state.scan_position,
83						message: crate::WarningMessage::MissingEndTagRewinding,
84						start,
85					});
86					state.rewind(nodes, start);
87				} else {
88					break;
89				}
90			}
91			// invalid characters
92			Some(0..=8 | 11..=31 | 127) => {
93				state.warnings.push(crate::Warning {
94					end: state.scan_position + 1,
95					message: crate::WarningMessage::InvalidCharacter,
96					start: state.scan_position,
97				});
98				state.scan_position += 1;
99			}
100			Some(b'\n') => {
101				crate::line::parse_end_of_line(&mut state);
102			}
103			Some(b'!')
104				if state.get_byte(state.scan_position + 1) == Some(b'!')
105					&& match state.stack.last() {
106						Some(crate::OpenNode {
107							type_: crate::OpenNodeType::Table(..),
108							..
109						}) => true,
110						_ => false,
111					} =>
112			{
113				crate::table::parse_heading_cell(&mut state);
114			}
115			Some(b'&') => crate::character_entity::parse_character_entity(
116				&mut state,
117				configuration,
118			),
119			Some(b'\'') => {
120				if state.get_byte(state.scan_position + 1) == Some(b'\'') {
121					crate::bold_italic::parse_bold_italic(&mut state);
122				} else {
123					state.scan_position += 1;
124				}
125			}
126			Some(b'<') => match state.get_byte(state.scan_position + 1) {
127				Some(b'!')
128					if state.get_byte(state.scan_position + 2)
129						== Some(b'-') && state
130						.get_byte(state.scan_position + 3)
131						== Some(b'-') =>
132				{
133					crate::comment::parse_comment(&mut state)
134				}
135				Some(b'/') => {
136					crate::tag::parse_end_tag(&mut state, configuration)
137				}
138				_ => crate::tag::parse_start_tag(&mut state, configuration),
139			},
140			Some(b'=') => {
141				crate::template::parse_parameter_name_end(&mut state);
142			}
143			Some(b'[') => {
144				if state.get_byte(state.scan_position + 1) == Some(b'[') {
145					crate::link::parse_link_start(&mut state, configuration);
146				} else {
147					crate::external_link::parse_external_link_start(
148						&mut state,
149						configuration,
150					);
151				}
152			}
153			Some(b']') => match state.stack.pop() {
154				None => state.scan_position += 1,
155				Some(crate::OpenNode {
156					nodes,
157					start,
158					type_: crate::OpenNodeType::ExternalLink,
159				}) => {
160					crate::external_link::parse_external_link_end(
161						&mut state, start, nodes,
162					);
163				}
164				Some(crate::OpenNode {
165					nodes,
166					start,
167					type_: crate::OpenNodeType::Link { namespace, target },
168				}) => {
169					if state.get_byte(state.scan_position + 1) == Some(b']') {
170						crate::link::parse_link_end(
171							&mut state,
172							&configuration,
173							start,
174							nodes,
175							namespace,
176							target,
177						);
178					} else {
179						state.scan_position += 1;
180						state.stack.push(crate::OpenNode {
181							nodes,
182							start,
183							type_: crate::OpenNodeType::Link {
184								namespace,
185								target,
186							},
187						});
188					}
189				}
190				Some(open_node) => {
191					state.scan_position += 1;
192					state.stack.push(open_node);
193				}
194			},
195			Some(b'_') => {
196				if state.get_byte(state.scan_position + 1) == Some(b'_') {
197					crate::magic_word::parse_magic_word(
198						&mut state,
199						configuration,
200					);
201				} else {
202					state.scan_position += 1;
203				}
204			}
205			Some(b'{') => {
206				if state.get_byte(state.scan_position + 1) == Some(b'{') {
207					crate::template::parse_template_start(&mut state);
208				} else {
209					state.scan_position += 1;
210				}
211			}
212			Some(b'|') => match state.stack.last_mut() {
213				Some(crate::OpenNode {
214					type_: crate::OpenNodeType::Parameter { default: None, .. },
215					..
216				}) => {
217					crate::template::parse_parameter_separator(&mut state);
218				}
219				Some(crate::OpenNode {
220					type_: crate::OpenNodeType::Table(..),
221					..
222				}) => {
223					crate::table::parse_inline_token(&mut state);
224				}
225				Some(crate::OpenNode {
226					type_: crate::OpenNodeType::Template { .. },
227					..
228				}) => {
229					crate::template::parse_template_separator(&mut state);
230				}
231				_ => state.scan_position += 1,
232			},
233			Some(b'}') => {
234				if state.get_byte(state.scan_position + 1) == Some(b'}') {
235					crate::template::parse_template_end(&mut state);
236				} else {
237					state.scan_position += 1;
238				}
239			}
240			_ => {
241				state.scan_position += 1;
242			}
243		}
244
245		if !max_duration.is_zero() && loop_counter == 10_000 {
246			loop_counter = 0;
247			if start_time.elapsed() > max_duration {
248				state.flush(state.scan_position);
249
250				return Err(ParseError::TimedOut {
251					execution_time: start_time.elapsed(),
252					output: crate::Output {
253						nodes: state.nodes,
254						warnings: state.warnings,
255					},
256				});
257			}
258		}
259
260		loop_counter += 1;
261	}
262
263	let end_position = state.skip_whitespace_backwards(wiki_text.len());
264	state.flush(end_position);
265	Ok(crate::Output {
266		nodes: state.nodes,
267		warnings: state.warnings,
268	})
269}