pulldown_latex/
parser.rs

1//! Contains the [`Parser`], which Transforms input `LaTeX` into a stream of `Result<Event, ParserError>`.
2//!
3//! The parser is used as an iterator, and the events it generates can be rendered by a renderer.
4//! The `mahtml` renderer provided by this crate is available through [`push_mathml`] and [`write_mathml`].
5//!
6//! [`push_mathml`]: crate::mathml::push_mathml
7//! [`write_mathml`]: crate::mathml::write_mathml
8pub mod error;
9mod lex;
10mod macros;
11mod primitives;
12mod state;
13pub mod storage;
14mod tables;
15
16use std::ops::Range;
17
18use macros::MacroContext;
19
20use crate::event::{Event, Grouping, ScriptPosition, ScriptType};
21
22use self::{state::ParserState, storage::Storage};
23
24pub(crate) use error::{ErrorKind, InnerResult, ParserError};
25
26/// The parser completes the task of transforming the input `LaTeX` into a symbolic representation,
27/// namely a stream of [`Event`]s.
28///
29/// Transforming the events into rendered math is a task for the
30/// [`mahtml`](crate::mathml) renderer.
31///
32/// The algorithm of the [`Parser`] is driven by the [`Parser::next`] method.
33/// This method is provided through the [`Iterator`] trait implementation, thus an end user should
34/// only need to use the [`Parser`] as an iterator of `Result<Event, ParserError>`.
35#[derive(Debug)]
36pub struct Parser<'store> {
37    /// The next thing that should be parsed or outputed.
38    ///
39    /// When this is a string/substring, we should parse it. Some commands output
40    /// multiple events, so we need to keep track of them and ouput them in the next
41    /// iteration before continuing parsing.
42    ///
43    /// Instructions are stored backward in this stack, in the sense that the next event to be popped
44    /// is the next event to be outputed.
45    instruction_stack: Vec<Instruction<'store>>,
46
47    /// This buffer serves as a staging area when parsing a command.
48    ///
49    /// When a token is parsed, it is first pushed to this buffer, then scripts are checked
50    /// (superscript, and subscript), and then the events are moved from the buffer to the instruction stack.
51    buffer: Vec<Instruction<'store>>,
52
53    /// Macro definitions.
54    macro_context: MacroContext<'store>,
55
56    /// Where Macros are expanded if ever needed.
57    storage: &'store bumpalo::Bump,
58
59    /// A stack that serves to provide context when an error occurs.
60    span_stack: SpanStack<'store>,
61}
62
63impl<'store> Parser<'store> {
64    /// Create a new parser from the given input string and storage.
65    pub fn new<'input>(input: &'input str, storage: &'store Storage) -> Self
66    where
67        'input: 'store,
68    {
69        let mut instruction_stack = Vec::with_capacity(32);
70        instruction_stack.push(Instruction::SubGroup {
71            content: input,
72            allowed_alignment_count: None,
73        });
74        let buffer = Vec::with_capacity(16);
75        Self {
76            instruction_stack,
77            buffer,
78            macro_context: MacroContext::new(),
79            storage: &storage.0,
80            span_stack: SpanStack::from_input(input),
81        }
82    }
83}
84
85impl<'store> Iterator for Parser<'store> {
86    type Item = Result<Event<'store>, ParserError>;
87
88    fn next(&mut self) -> Option<Self::Item> {
89        match self.instruction_stack.last_mut() {
90            Some(Instruction::Event(_)) => Some(Ok(self
91                .instruction_stack
92                .pop()
93                .and_then(|i| match i {
94                    Instruction::Event(e) => Some(e),
95                    _ => None,
96                })
97                .expect("there is something in the stack"))),
98            Some(Instruction::SubGroup { content, .. }) if content.trim_start().is_empty() => {
99                self.instruction_stack.pop();
100                self.next()
101            }
102            Some(Instruction::SubGroup {
103                content,
104                allowed_alignment_count,
105                ..
106            }) => {
107                let state = ParserState {
108                    allowed_alignment_count: allowed_alignment_count.as_mut(),
109                    ..Default::default()
110                };
111
112                let inner = InnerParser {
113                    content,
114                    buffer: &mut self.buffer,
115                    state,
116                    macro_context: &mut self.macro_context,
117                    storage: self.storage,
118                    span_stack: &mut self.span_stack,
119                };
120
121                let (desc, rest) = inner.parse_next();
122                *content = rest;
123
124                let script_event = match desc {
125                    Err(e) => {
126                        let content_str = *content;
127                        return Some(Err(ParserError::new(
128                            e,
129                            content_str.as_ptr(),
130                            &mut self.span_stack,
131                        )));
132                    }
133                    Ok(Some((e, desc))) => {
134                        if desc.subscript_start > desc.superscript_start {
135                            let content = self.buffer.drain(desc.superscript_start..).rev();
136                            let added_len = content.len();
137
138                            self.instruction_stack.reserve(added_len);
139                            let spare =
140                                &mut self.instruction_stack.spare_capacity_mut()[..added_len];
141                            let mut idx = desc.subscript_start - desc.superscript_start;
142
143                            for e in content {
144                                if idx == added_len {
145                                    idx = 0;
146                                }
147                                spare[idx].write(e);
148                                idx += 1;
149                            }
150
151                            // Safety: The new length is less than the vector's capacity because we
152                            // reserved `added_len` previously. Every element in the vector up to
153                            // that new length is also initialized by the loop.
154                            unsafe {
155                                self.instruction_stack
156                                    .set_len(self.instruction_stack.len() + added_len)
157                            };
158                        } else {
159                            self.instruction_stack
160                                .extend(self.buffer.drain(desc.subscript_start..).rev());
161                        }
162                        Some(e)
163                    }
164                    Ok(None) => None,
165                };
166
167                self.instruction_stack.extend(self.buffer.drain(..).rev());
168                if let Some(e) = script_event {
169                    self.instruction_stack.push(Instruction::Event(e));
170                }
171                self.next()
172            }
173            None => None,
174        }
175    }
176}
177
178#[derive(Debug)]
179struct InnerParser<'b, 'store> {
180    content: &'store str,
181    buffer: &'b mut Vec<Instruction<'store>>,
182    state: ParserState<'b>,
183    macro_context: &'b mut MacroContext<'store>,
184    storage: &'store bumpalo::Bump,
185    span_stack: &'b mut SpanStack<'store>,
186}
187
188impl<'b, 'store> InnerParser<'b, 'store> {
189    /// Parse an arugment and pushes the argument to the stack surrounded by a
190    /// group: [..., EndGroup, Argument, BeginGroup], when the argument is a subgroup.
191    /// Otherwise, it pushes the argument to the stack ungrouped.
192    fn handle_argument(&mut self, argument: Argument<'store>) -> InnerResult<()> {
193        match argument {
194            Argument::Token(token) => {
195                self.state.handling_argument = true;
196                match token {
197                    Token::ControlSequence(cs) => self.handle_primitive(cs)?,
198                    Token::Character(c) => self.handle_char_token(c)?,
199                };
200            }
201            Argument::Group(group) => {
202                self.buffer.extend([
203                    Instruction::Event(Event::Begin(Grouping::Normal)),
204                    Instruction::SubGroup {
205                        content: group,
206                        allowed_alignment_count: None,
207                    },
208                    Instruction::Event(Event::End),
209                ]);
210            }
211        };
212        Ok(())
213    }
214
215    /// ## Script parsing
216    ///
217    /// The script parser first checks for directives about script placement, i.e. `\limits` and `\nolimits`,
218    /// if the `allow_script_modifiers` flag is set on the parser state. If the flag is set, and if more than one directive is found,
219    /// the last one takes effect, as per the [`amsmath docs`][amsdocs] (section 7.3). If the flag is not set, and a limit modifying
220    /// directive is found, the parser emits an error.
221    ///
222    /// [amsdocs]: https://mirror.its.dal.ca/ctan/macros/latex/required/amsmath/amsldoc.pdf
223    fn parse(&mut self) -> InnerResult<Option<(Event<'store>, ScriptDescriptor)>> {
224        // 1. Parse the next token and output everything to the staging stack.
225        let original_content = self.content.trim_start();
226        let token = match lex::token(&mut self.content) {
227            Ok(token) => token,
228            Err(ErrorKind::Token) => return Ok(None),
229            Err(e) => return Err(e),
230        };
231        match token {
232            Token::ControlSequence(cs) => {
233                if let Some(result) =
234                    self.macro_context
235                        .try_expand_in(cs, self.content, self.storage)
236                {
237                    let (new_content, arguments_consumed_length) = result?;
238                    let call_site_length = cs.len() + arguments_consumed_length + 1;
239                    self.span_stack
240                        .add(new_content, original_content, call_site_length);
241
242                    self.content = new_content;
243                    return self.parse();
244                }
245
246                self.handle_primitive(cs)?
247            }
248            Token::Character(c) => self.handle_char_token(c)?,
249        };
250
251        // 2. Check for scripts, to complete the atom.
252        if self.state.skip_scripts {
253            return Ok(None);
254        }
255
256        if self.state.allow_script_modifiers {
257            if let Some(limits) = lex::limit_modifiers(&mut self.content) {
258                if limits {
259                    self.state.script_position = ScriptPosition::AboveBelow;
260                } else {
261                    self.state.script_position = ScriptPosition::Right;
262                }
263            }
264        }
265
266        self.content = self.content.trim_start();
267        let subscript_first = match self.content.chars().next() {
268            Some('^') => false,
269            Some('_') => true,
270            _ => return Ok(None),
271        };
272        self.content = &self.content[1..];
273
274        let first_script_start = self.buffer.len();
275        let arg = lex::argument(&mut self.content)?;
276        self.handle_argument(arg)?;
277        let second_script_start = self.buffer.len();
278        let next_char = self.content.chars().next();
279        if (next_char == Some('_') && !subscript_first)
280            || (next_char == Some('^') && subscript_first)
281        {
282            self.content = &self.content[1..];
283            let arg = lex::argument(&mut self.content)?;
284            self.handle_argument(arg)?;
285
286            match self.content.chars().next() {
287                Some('_') => return Err(ErrorKind::DoubleSubscript),
288                Some('^') => return Err(ErrorKind::DoubleSuperscript),
289                _ => {}
290            }
291        } else if next_char == Some('_') || next_char == Some('^') {
292            return Err(if subscript_first {
293                ErrorKind::DoubleSubscript
294            } else {
295                ErrorKind::DoubleSuperscript
296            });
297        }
298        let second_script_end = self.buffer.len();
299
300        Ok(Some(if second_script_start == second_script_end {
301            if subscript_first {
302                (
303                    Event::Script {
304                        ty: ScriptType::Subscript,
305                        position: self.state.script_position,
306                    },
307                    ScriptDescriptor {
308                        subscript_start: first_script_start,
309                        superscript_start: second_script_start,
310                    },
311                )
312            } else {
313                (
314                    Event::Script {
315                        ty: ScriptType::Superscript,
316                        position: self.state.script_position,
317                    },
318                    ScriptDescriptor {
319                        subscript_start: second_script_start,
320                        superscript_start: first_script_start,
321                    },
322                )
323            }
324        } else {
325            (
326                Event::Script {
327                    ty: ScriptType::SubSuperscript,
328                    position: self.state.script_position,
329                },
330                if subscript_first {
331                    ScriptDescriptor {
332                        subscript_start: first_script_start,
333                        superscript_start: second_script_start,
334                    }
335                } else {
336                    ScriptDescriptor {
337                        subscript_start: second_script_start,
338                        superscript_start: first_script_start,
339                    }
340                },
341            )
342        }))
343    }
344
345    fn parse_next(
346        mut self,
347    ) -> (
348        InnerResult<Option<(Event<'store>, ScriptDescriptor)>>,
349        &'store str,
350    ) {
351        (self.parse(), self.content)
352    }
353}
354
355struct ScriptDescriptor {
356    subscript_start: usize,
357    superscript_start: usize,
358}
359
360#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
361pub(crate) enum Token<'a> {
362    ControlSequence(&'a str),
363    Character(CharToken<'a>),
364}
365
366#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
367pub(crate) struct CharToken<'a> {
368    char: &'a str,
369}
370
371/// A verified character that retains the string context.
372impl<'a> CharToken<'a> {
373    fn from_str(s: &'a str) -> Self {
374        debug_assert!(
375            s.chars().next().is_some(),
376            "CharToken must be constructed from a non-empty string"
377        );
378        Self { char: s }
379    }
380
381    fn as_str(&self) -> &'a str {
382        self.char
383    }
384}
385
386impl From<CharToken<'_>> for char {
387    fn from(token: CharToken) -> char {
388        token.char.chars().next().unwrap()
389    }
390}
391
392#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
393enum Argument<'a> {
394    Token(Token<'a>),
395    Group(&'a str),
396}
397
398#[derive(Debug, Clone)]
399enum Instruction<'a> {
400    /// Send the event
401    Event(Event<'a>),
402    /// Parse the substring
403    SubGroup {
404        content: &'a str,
405        allowed_alignment_count: Option<AlignmentCount>,
406    },
407}
408
409#[derive(Debug, Clone)]
410struct AlignmentCount {
411    count: u16,
412    max: u16,
413}
414
415impl AlignmentCount {
416    fn new(max: u16) -> Self {
417        Self { count: 0, max }
418    }
419
420    fn reset(&mut self) {
421        self.count = 0;
422    }
423
424    fn increment(&mut self) {
425        self.count += 1;
426    }
427
428    fn can_increment(&self) -> bool {
429        self.count < self.max
430    }
431}
432
433/// For error reporting purposes.
434///
435/// Stores the context in which the parser is currently if an error were to arise.
436#[derive(Debug, Clone)]
437struct SpanStack<'store> {
438    /// The original input given to the parser.
439    input: &'store str,
440    /// Expansions of macros.
441    expansions: Vec<ExpansionSpan<'store>>,
442}
443
444impl<'store> SpanStack<'store> {
445    fn from_input(input: &'store str) -> Self {
446        Self {
447            input,
448            expansions: Vec::new(),
449        }
450    }
451
452    fn add(&mut self, full_expansion: &'store str, call_site: &str, call_site_length: usize) {
453        let call_site_start = self.reach_original_call_site(call_site.as_ptr());
454        let expansion_length = (call_site_length as isize
455            - (call_site.len() as isize - full_expansion.len() as isize))
456            as usize;
457
458        self.expansions.push(ExpansionSpan {
459            full_expansion,
460            expansion_length,
461            call_site_in_origin: call_site_start..call_site_start + call_site_length,
462        });
463    }
464
465    /// Navigate down the stack until we reach the original span for the given substring. Returns
466    /// the index of the beginning of the call-site in the top-most span in the stack.
467    fn reach_original_call_site(&mut self, substr_start: *const u8) -> usize {
468        let ptr_val = substr_start as isize;
469
470        while let Some(expansion) = self.expansions.last() {
471            let expansion_ptr = expansion.full_expansion.as_ptr() as isize;
472
473            if ptr_val >= expansion_ptr
474                && ptr_val <= expansion_ptr + expansion.full_expansion.len() as isize
475            {
476                return (ptr_val - expansion_ptr) as usize;
477            }
478            self.expansions.pop();
479        }
480        let input_start = self.input.as_ptr() as isize;
481
482        assert!(ptr_val >= input_start && ptr_val <= input_start + self.input.len() as isize);
483        (ptr_val - input_start) as usize
484    }
485}
486
487/// A span of the input string. Used for error reporting.
488/// ```text
489///         full_expansion: [ -- Expanded --- | -- Rest -- ]
490///                        /                   \ < effective_expansion_stop
491///        [ -- Before -- | ---- Call Site ---- | -- Rest -- ]
492///                       ^---------------------^
493///                        declaration_in_origin
494/// ```
495#[derive(Debug, Clone)]
496struct ExpansionSpan<'a> {
497    /// The fully expaned string which is allocated in storage.
498    ///
499    /// This includes the expanded part and the included remaining.
500    full_expansion: &'a str,
501    /// The index where the expanded part ends and where the rest is equivalent to the rest of the
502    /// original string.
503    expansion_length: usize,
504    /// What the expansion replaces in the original string (where the macro invocation is in the
505    /// original string).
506    ///
507    /// The original string is the string coming before itself in the expansion stack.
508    call_site_in_origin: Range<usize>,
509}
510
511#[cfg(test)]
512mod tests {
513    use crate::event::{Content, DelimiterType, RelationContent, Visual};
514
515    use super::*;
516
517    #[test]
518    fn substr_instructions() {
519        let store = Storage::new();
520        let parser = Parser::new("\\bar{y}", &store);
521
522        let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
523
524        assert_eq!(
525            events,
526            vec![
527                Event::Script {
528                    ty: ScriptType::Superscript,
529                    position: ScriptPosition::AboveBelow
530                },
531                Event::Begin(Grouping::Normal),
532                Event::Content(Content::Ordinary {
533                    content: 'y',
534                    stretchy: false
535                }),
536                Event::End,
537                Event::Content(Content::Ordinary {
538                    content: '‾',
539                    stretchy: false,
540                }),
541            ]
542        );
543    }
544
545    #[test]
546    fn subsuperscript() {
547        let store = Storage::new();
548        let parser = Parser::new(r"a^{1+3}_2", &store);
549        let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
550
551        assert_eq!(
552            events,
553            vec![
554                Event::Script {
555                    ty: ScriptType::SubSuperscript,
556                    position: ScriptPosition::Right
557                },
558                Event::Content(Content::Ordinary {
559                    content: 'a',
560                    stretchy: false,
561                }),
562                Event::Content(Content::Number("2")),
563                Event::Begin(Grouping::Normal),
564                Event::Content(Content::Number("1")),
565                Event::Content(Content::BinaryOp {
566                    content: '+',
567                    small: false
568                }),
569                Event::Content(Content::Number("3")),
570                Event::End,
571            ]
572        );
573    }
574    #[test]
575    fn subscript_torture() {
576        let store = Storage::new();
577        let parser = Parser::new(r"a_{5_{5_{5_{5_{5_{5_{5_{5_{5_{5_{5_5}}}}}}}}}}}", &store);
578        let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
579
580        assert_eq!(
581            events,
582            vec![
583                Event::Script {
584                    ty: ScriptType::Subscript,
585                    position: ScriptPosition::Right
586                },
587                Event::Content(Content::Ordinary {
588                    content: 'a',
589                    stretchy: false,
590                }),
591                Event::Begin(Grouping::Normal),
592                Event::Script {
593                    ty: ScriptType::Subscript,
594                    position: ScriptPosition::Right
595                },
596                Event::Content(Content::Number("5")),
597                Event::Begin(Grouping::Normal),
598                Event::Script {
599                    ty: ScriptType::Subscript,
600                    position: ScriptPosition::Right
601                },
602                Event::Content(Content::Number("5")),
603                Event::Begin(Grouping::Normal),
604                Event::Script {
605                    ty: ScriptType::Subscript,
606                    position: ScriptPosition::Right
607                },
608                Event::Content(Content::Number("5")),
609                Event::Begin(Grouping::Normal),
610                Event::Script {
611                    ty: ScriptType::Subscript,
612                    position: ScriptPosition::Right
613                },
614                Event::Content(Content::Number("5")),
615                Event::Begin(Grouping::Normal),
616                Event::Script {
617                    ty: ScriptType::Subscript,
618                    position: ScriptPosition::Right
619                },
620                Event::Content(Content::Number("5")),
621                Event::Begin(Grouping::Normal),
622                Event::Script {
623                    ty: ScriptType::Subscript,
624                    position: ScriptPosition::Right
625                },
626                Event::Content(Content::Number("5")),
627                Event::Begin(Grouping::Normal),
628                Event::Script {
629                    ty: ScriptType::Subscript,
630                    position: ScriptPosition::Right
631                },
632                Event::Content(Content::Number("5")),
633                Event::Begin(Grouping::Normal),
634                Event::Script {
635                    ty: ScriptType::Subscript,
636                    position: ScriptPosition::Right
637                },
638                Event::Content(Content::Number("5")),
639                Event::Begin(Grouping::Normal),
640                Event::Script {
641                    ty: ScriptType::Subscript,
642                    position: ScriptPosition::Right
643                },
644                Event::Content(Content::Number("5")),
645                Event::Begin(Grouping::Normal),
646                Event::Script {
647                    ty: ScriptType::Subscript,
648                    position: ScriptPosition::Right
649                },
650                Event::Content(Content::Number("5")),
651                Event::Begin(Grouping::Normal),
652                Event::Script {
653                    ty: ScriptType::Subscript,
654                    position: ScriptPosition::Right
655                },
656                Event::Content(Content::Number("5")),
657                Event::Content(Content::Number("5")),
658                Event::End,
659                Event::End,
660                Event::End,
661                Event::End,
662                Event::End,
663                Event::End,
664                Event::End,
665                Event::End,
666                Event::End,
667                Event::End,
668                Event::End,
669            ]
670        )
671    }
672
673    #[test]
674    fn fraction() {
675        let store = Storage::new();
676        let parser = Parser::new(r"\frac{1}{2}_2^4", &store);
677        let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
678
679        assert_eq!(
680            events,
681            vec![
682                Event::Script {
683                    ty: ScriptType::SubSuperscript,
684                    position: ScriptPosition::Right
685                },
686                Event::Visual(Visual::Fraction(None)),
687                Event::Begin(Grouping::Normal),
688                Event::Content(Content::Number("1")),
689                Event::End,
690                Event::Begin(Grouping::Normal),
691                Event::Content(Content::Number("2")),
692                Event::End,
693                Event::Content(Content::Number("2")),
694                Event::Content(Content::Number("4")),
695            ]
696        );
697    }
698
699    #[test]
700    fn multidigit_number() {
701        let store = Storage::new();
702        let parser = Parser::new("123", &store);
703        let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
704
705        assert_eq!(events, vec![Event::Content(Content::Number("123"))]);
706    }
707
708    #[test]
709    fn error() {
710        let store = Storage::new();
711        let parser = Parser::new(
712            r"\def\blah#1#2{\fra#1#2} \def\abc#1{\blah{a}#1} \abc{b}",
713            &store,
714        );
715        let events = parser.collect::<Vec<_>>();
716
717        assert!(events[0].is_err());
718    }
719
720    #[test]
721    fn no_limits() {
722        let store = Storage::new();
723        let parser = Parser::new(r#"\lim \nolimits _{x \to 0} f(x)"#, &store);
724        let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
725        assert_eq!(
726            events,
727            vec![
728                Event::Script {
729                    ty: ScriptType::Subscript,
730                    position: ScriptPosition::Right
731                },
732                Event::Content(Content::Function("lim")),
733                Event::Begin(Grouping::Normal),
734                Event::Content(Content::Ordinary {
735                    content: 'x',
736                    stretchy: false
737                }),
738                Event::Content(Content::Relation {
739                    content: RelationContent::single_char('→'),
740                    small: false
741                }),
742                Event::Content(Content::Number("0")),
743                Event::End,
744                Event::Content(Content::Ordinary {
745                    content: 'f',
746                    stretchy: false
747                }),
748                Event::Content(Content::Delimiter {
749                    content: '(',
750                    size: None,
751                    ty: DelimiterType::Open
752                }),
753                Event::Content(Content::Ordinary {
754                    content: 'x',
755                    stretchy: false
756                }),
757                Event::Content(Content::Delimiter {
758                    content: ')',
759                    size: None,
760                    ty: DelimiterType::Close
761                }),
762            ]
763        );
764    }
765
766    #[test]
767    fn expansions_in_groups() {
768        let store = Storage::new();
769        let mut parser = Parser::new(
770            r"\def\abc#1{#1} {\abc{a} + \abc{b}} = c \shoulderror",
771            &store,
772        );
773        assert!(parser.by_ref().collect::<Result<Vec<_>, _>>().is_err());
774        assert!(parser.span_stack.expansions.is_empty());
775    }
776}
777
778// Token parsing procedure, as per TeXbook p. 46-47.
779//
780// This is roughly what the lexer implementation will look like for text mode.
781//
782// 1. Trim any trailing whitespace from a line.
783//
784// 2. If '\' (escape character) is encountered, parse the next token.
785//  'is_ascii_alphabetic' => parse until an non ASCII alphabetic, and the name is the token
786//  '\n' => _The name is empty_???
787//  'otherwise' => parse next character, and the name is the symbol.
788//
789//  Go to SkipBlanks mode if the token is a word or a space symbol.
790//  Otherwise, go to MidLine mode.
791//
792// 3. If `^^` is found:
793//  - If the following are two characters of type ASCII lowercase letter or digit,
794//  then `^^__` is converted to the correspoding ascii value.
795//  - If the following is a single ASCII character, then `^^_` is converted to the corresponding ASCII
796//  value with the formula: if `c` is the character, then `c + 64` if `c` if the character has code
797//  between 0 and 63, and `c - 64` if the character has code between 64 and 127.
798//
799//  __Note__: This rule takes precedence over escape character parsing. If such a sequence is found
800//  in an escape sequence, it is converted to the corresponding ASCII value.
801//
802// 4. If the token is a single character, go to MidLine mode.
803//
804// 5. If the token is an end of line, go to the next line. If nothing was on the line (were in NewLine state), then the
805//  `par` token is emitted, meaning that a new paragraph should be started.
806//  If the state was MidLine, then the newline is transformed into a space.
807//  If the state was SkipBlanks, then the newline is ignored.
808//
809// 6. Ignore characters from the `Ignore` category.
810//
811// 7. If the token is a space and the mode is MidLine, the space is transformed into a space token.
812//
813// 8. If the token is a comment, ignore the rest of the line, and go to the next line.
814//
815// 9. Go to newlines on the next line.