rustla/parser/state_machine/mod.rs
1/*!
2This module contains the `State` type and the different transition functions corresponding to each state
3in its submodules.
4
5Copyright © 2020 Santtu Söderholm
6*/
7
8// ===============================================
9// Submodules for namespacing transition functions
10// ===============================================
11pub mod aplus;
12pub mod aplus_questionnaire;
13pub mod block_quote;
14pub mod body;
15pub mod bullet_list;
16pub mod common;
17pub mod definition_list;
18pub mod enumerated_list;
19pub mod field_list;
20pub mod inline;
21pub mod literal_block;
22pub mod transitions;
23pub mod unknown_transitions;
24
25use std::collections::HashMap;
26use lazy_static::lazy_static;
27use regex;
28
29use super::*;
30
31/// An enum of states.
32/// The variants are used as keys to the static `TRANSITION_MAP`, which stores vectors of
33/// transitions as values.
34#[derive(Debug, PartialEq, Eq, Hash)]
35pub enum State {
36
37 /// A state for parsing body nodes inside admonitions.
38 Admonition,
39
40 /// A state for detecting reStructuredText & Sphinx body elements,
41 /// in addition to column breaks in the form of `::newcol` for A+ nodes that support them.
42 /// These include the Point of Interest directive.
43 AplusMultiCol,
44
45 /// A state for recognizing the sub-directives:
46 /// 1. `pick-one`,
47 /// 2. `pick-any` and
48 /// 3. `freetext`
49 AplusQuestionnaire,
50
51 /// A state for detecting choices and assignments inside a A+ questionnaire
52 /// subdirective `pick-one`.
53 AplusPickOne,
54
55 /// A state for detecting choices and assignments inside a A+ questionnaire
56 /// subdirective `pick-any`.
57 AplusPickAny,
58
59 /// A state for recognizing body elements such as lists or footnotes when focused on document root.
60 Body,
61
62 /// A state for detecting body elements inside a section.
63 Section,
64
65 /// A state for recognizing body elements inside a block quote.
66 /// In addition to normal body elements, attributions are also
67 /// recognized as such in this state.
68 BlockQuote,
69
70 /// In this state, the parser only recognizes empty lines and bullet list items.
71 BulletList,
72
73 /// Citation nodes may contain arbitrary body elements.
74 /// This state is therefore reserved for recognizing them when focused on a citation node.
75 Citation,
76
77 /// Definition lists may only contain empty lines and definition list items.
78 DefinitionList,
79
80 /// When in this state, the parser only recognizes empty lines and enumerated list items.
81 EnumeratedList,
82
83 HyperlinkTarget,
84
85 /// List items of any type, such as enumerated or field list items can contain arbitrary body elements.
86 /// This state is reserved for recognizing them when focused on one of the list item type nodes.
87 ListItem,
88
89 /// When focused on a field list node, the parser only recognizes empty lines and field list items.
90 FieldList,
91
92 Figure,
93
94 /// Footnotes can contain arbitrary body elements.
95 /// This state is reserved for recognizing them when focused on a footnote node.
96 Footnote,
97
98 /// There are 3 different types of hyperlink targets:
99 ///
100 /// 1. *internal*, which link to body elements that directly follow them,
101 /// 2. *external*, that reference external URIs and
102 /// 3. *indirect*, which reference other hyperlink targets inside the same document.
103 ///
104 /// ??? Normally, an external or indirect hyperlink target would simply be a node on its own, that simply contains a reference label
105 /// of some kind. However, chained *internal* hyperlinks all reference the same target node,
106 /// so a state of its own (this one) is reserved for parsing them until a node of a different kind (including other types
107 /// of hyperlink targets) is encountered. Once this happens, all of the internal hyperlinks are set to point
108 /// to this same target node. ???
109 InternalHyperlinkTarget,
110
111 /// When focused on an option list, only empty lines and option list items are recognized.
112 /// This state is reserved for that purpose.
113 OptionList,
114
115 /// Empty and line block lines (lines beginning with '`|`') are recognized in this state.
116 LineBlock,
117
118 /// A state for recognizing bullet list items inside a ListTable
119 ListTable,
120
121 /// A state for parsing field lists inside diretives. Field lists located inside directive nodes
122 /// work as directive parameters or settings.
123 ExtensionOptions,
124
125 /// A state for parsing section titles and document transitions (a.k.a. `\hrulefill` commands in LaTeX terms).
126 Line,
127
128 /// A state for parsing empty lines and literal blocks of text.
129 /// Literal blocks are (non-contiguous) indented or "quoted" blocks of text that
130 /// are preceded by a paragraph ending in a `::`.
131 LiteralBlock,
132
133 /// An explicit failure state. Allows explicit signalling of transition failures.
134 Failure,
135
136 /// An End of File state. Could have also been named EOI, as in end of input,
137 /// as this state is transitioned to when a parser reaches the end of its source input:
138 /// This does not neecssarily correspond to the end of the given file during nested parsing sessions,
139 /// as nested parsers are usually limited to a parsijng single block of text behind a node indentifier.
140 EOF,
141}
142
143// ====================
144// Statemachine methods
145// ====================
146impl State {
147
148 /// Transitions a `StateMachine` into a `Failure` state using the From trait,
149 /// the implementation of which automatically implements the Into trait.
150 pub fn to_failure(self) -> Self {
151 match self {
152 _ => State::Failure,
153 }
154 }
155
156 /// Retrieves the list of transitions based on a given `StateMachine` variant
157 /// using a `match` statement. First checks for end states that don't contain transitions,
158 /// such as `EOF` or `Failure` and if these are not matched,
159 /// retrieves a list of transitions from the `TRANSITION_MAP`.
160 pub fn get_transitions(
161 &self,
162 line_cursor: &LineCursor,
163 ) -> Result<&Vec<Transition>, &'static str> {
164 match self {
165 State::EOF => Err("Already moved past EOF. No transitions to perform.\n"),
166 State::Failure => Err("Failure state has no transitions\n"),
167 State::Section
168 | State::ListItem
169 | State::Footnote
170 | State::Citation
171 | Self::Admonition
172 | Self::Figure => Ok(TRANSITION_MAP.get(&State::Body).unwrap()),
173 _ => {
174 if let Some(transition_table) = TRANSITION_MAP.get(self) {
175 Ok(transition_table)
176 } else {
177 panic!(
178 "Found no transition table for state {:#?} on line {}",
179 self,
180 line_cursor.sum_total()
181 )
182 }
183 }
184 }
185 }
186}
187
188/// =================================
189/// StateMachine associated functions
190/// =================================
191impl State {
192
193 /// Takes in a reference/slice to an associated array of uncompiled transitions
194 /// and compiles the regex patterns found. Returns a `Vec<Transition>` with compiled state machines
195 /// in palce of the regex patterns.
196 ///
197 /// Error handling needs to be added.
198 fn compile_state_transitions(transitions: &[UncompiledTransition]) -> Vec<Transition> {
199 let mut compiled_transitions = Vec::with_capacity(transitions.len());
200
201 for (pat_name, expr, fun) in transitions.iter() {
202 let r = regex::Regex::new(expr).unwrap();
203 compiled_transitions.push((*pat_name, r, *fun));
204 }
205
206 compiled_transitions
207 }
208}
209
210/// =================================
211/// StateMachine associated constants
212/// =================================
213impl State {}
214
215lazy_static! {
216
217 /// A static map of transititions for each `State` of the `Parser`.
218 ///
219 /// With this regexes are only compiled into automata once.
220 pub static ref TRANSITION_MAP: HashMap<State, Vec<(Pattern, regex::Regex, TransitionMethod)>> = {
221
222 let mut action_map = collections::HashMap::with_capacity(10);
223
224 let body_actions = State::compile_state_transitions(&State::BODY_TRANSITIONS);
225 action_map.insert(State::Body, body_actions);
226
227 let block_quote_actions = State::compile_state_transitions(&State::BLOCK_QUOTE_TRANSITIONS);
228 action_map.insert(State::BlockQuote, block_quote_actions);
229
230 let bullet_actions = State::compile_state_transitions(&State::BULLET_LIST_TRANSITIONS);
231 action_map.insert(State::BulletList, bullet_actions);
232
233 let definition_actions = State::compile_state_transitions(&State::DEFINITION_LIST_TRANSITIONS);
234 action_map.insert(State::DefinitionList, definition_actions);
235
236 let enumerated_actions = State::compile_state_transitions(&State::ENUMERATED_LIST_TRANSITIONS);
237 action_map.insert(State::EnumeratedList, enumerated_actions);
238
239 let field_actions = State::compile_state_transitions(&State::FIELD_LIST_TRANSITIONS);
240 action_map.insert(State::FieldList, field_actions);
241
242 let option_actions = State::compile_state_transitions(&State::OPTION_LIST_TRANSITIONS);
243 action_map.insert(State::OptionList, option_actions);
244
245 let line_block_actions = State::compile_state_transitions(&State::LINE_BLOCK_TRANSITIONS);
246 action_map.insert(State::LineBlock, line_block_actions);
247
248 let literal_block_actions = State::compile_state_transitions(&State::LITERAL_BLOCK_TRANSITIONS);
249 action_map.insert(State::LiteralBlock, literal_block_actions);
250
251 let extension_option_actions = State::compile_state_transitions(&State::EXTENSION_OPTION_TRANSITIONS);
252 action_map.insert(State::ExtensionOptions, extension_option_actions);
253
254 let line_actions = State::compile_state_transitions(&State::LINE_TRANSITIONS);
255 action_map.insert(State::Line, line_actions);
256
257 let list_table_actions = State::compile_state_transitions(&State::LIST_TABLE_TRANSITIONS);
258 action_map.insert(State::ListTable, list_table_actions);
259
260 // A+
261 let aplus_multicol_actions = State::compile_state_transitions(&State::APLUS_MULTICOL_TRANSITIONS);
262 action_map.insert(State::AplusMultiCol, aplus_multicol_actions);
263
264 let aplus_questionnaire_actions = State::compile_state_transitions(&State::APLUS_QUESTIONNAIRE_TRANSITIONS);
265 action_map.insert(State::AplusQuestionnaire, aplus_questionnaire_actions);
266
267 action_map
268
269 };
270
271 /// Inline text has different parsing requirements than (nested)
272 /// `Body` elements as they do not form blocks of text,
273 /// making detecting by source line impractical.
274 ///
275 /// Instead, a block of source text is given to `Parser::parse_inline_nodes`
276 /// which is then scanned with regular expressions.
277 pub static ref COMPILED_INLINE_TRANSITIONS: Vec<(Pattern, regex::Regex, InlineParsingMethod)> = {
278
279 let mut inline_transitions = Vec::with_capacity(State::INLINE_TRANSITIONS.len());
280
281 for (pat_name, expr, fun) in State::INLINE_TRANSITIONS.iter() {
282 let r = regex::Regex::new(expr).unwrap();
283 inline_transitions.push((*pat_name, r, *fun));
284 }
285
286 inline_transitions
287 };
288}
289
290impl Parser {
291 /// Checks whether the line following the current one allows for the construction of an enumerate list item.
292 /// Either the following line has to be blank, indented or the next enumerator in
293 /// the current list has to be constructable from it.
294 fn is_enumerated_list_item(
295 src_lines: &Vec<String>,
296 line_cursor: &mut LineCursor,
297 captures: ®ex::Captures,
298 section_level: &mut usize,
299 base_indent: usize,
300 detected_enumerator_indent: usize,
301 detected_number: usize,
302 detected_kind: EnumKind,
303 detected_delims: EnumDelims,
304 pattern_name: &Pattern,
305 list_kind: &EnumKind,
306 in_list_item: bool,
307 list_item_number: usize,
308 list_start_index: usize,
309 ) -> bool {
310
311 use crate::parser::automata::ENUMERATOR_AUTOMATON;
312
313 if let Some(next_line) = src_lines.get(line_cursor.relative_offset() + 1) {
314 let next_line_indent = next_line
315 .chars()
316 .take_while(|c| c.is_whitespace())
317 .count() + base_indent;
318
319 if next_line.trim().is_empty() || next_line_indent > detected_enumerator_indent {
320 return true
321 } else if next_line_indent == detected_enumerator_indent {
322 if let Some(next_captures) = ENUMERATOR_AUTOMATON.captures(next_line) {
323 let (next_number, next_kind, next_delims) = match converters::enum_captures_to_int_kind_and_delims(
324 &next_captures,
325 Some(list_kind),
326 in_list_item,
327 true,
328 list_item_number,
329 list_start_index
330 ) {
331 Some((number, kind, delims)) => (number, kind, delims),
332 None => return false
333 };
334 if ! (
335 next_number == detected_number + 1
336 && next_kind == detected_kind
337 && next_delims == detected_delims
338 ) {
339 eprintln!("Non-matching enumerator on next line...");
340 return false
341 } else {
342 true
343 }
344 } else {
345 return false
346 }
347 } else {
348 true
349 }
350 } else {
351 true
352 }
353 }
354}