mathml_latex/parser/
mod.rs

1use crate::{block::LaTeXCommand, LaTeXBlock};
2
3use pex::{helpers::whitespace, ParseResult, ParseState, StopBecause};
4
5mod block;
6mod sup_sub;
7
8pub fn parse_latex(s: &str) -> Result<LaTeXNode, StopBecause> {
9    let state = ParseState::new(s.trim_end()).skip(whitespace);
10    match LaTeXNode::parse(state) {
11        ParseResult::Pending(state, compound) if state.is_empty() => Ok(compound),
12        ParseResult::Pending(state, ..) => Err(StopBecause::ExpectEof { position: state.start_offset }),
13        ParseResult::Stop(e) => Err(e),
14    }
15}
16
17#[derive(Clone, Debug)]
18pub enum LaTeXNode<'i> {
19    ArticleRoot {
20        children: Vec<LaTeXNode<'i>>,
21    },
22    ArticleText {
23        text: &'i str,
24    },
25    MathRoot {
26        children: Vec<LaTeXNode<'i>>,
27    },
28    Row {
29        children: Vec<LaTeXNode<'i>>,
30    },
31    Block(LaTeXBlock<'i>),
32    Command(LaTeXCommand<'i>),
33    MathText {
34        text: &'i str,
35    },
36    Number {
37        number: &'i str,
38    },
39    Operation {
40        operator: &'i str,
41    },
42    Superscript {
43        lhs: Box<LaTeXNode<'i>>,
44        rhs: Box<LaTeXNode<'i>>,
45    },
46    Letter {
47        identifier: &'i str,
48    },
49    /// `\\`
50    NewLine,
51    /// `&`
52    Ampersand,
53}
54
55impl<'i> LaTeXNode<'i> {
56    pub fn refine(self) -> Self {
57        match self {
58            LaTeXNode::Row { mut children } => {
59                if children.len() == 1 {
60                    children.remove(0)
61                }
62                else {
63                    LaTeXNode::Row { children }
64                }
65            }
66            _ => self,
67        }
68    }
69}
70
71impl<'i> LaTeXNode<'i> {
72    pub fn parse(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
73        let (state, node) = input.begin_choice().or_else(Self::parse_combined).or_else(Self::parse_row).end_choice()?;
74        state.finish(node)
75    }
76    fn parse_block(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
77        let (state, block) = LaTeXBlock::parse(input)?;
78        state.finish(LaTeXNode::Block(block))
79    }
80    fn parse_combined(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
81        let (state, lhs) = input.begin_choice().or_else(Self::parse_super_script).end_choice()?;
82        state.finish(lhs)
83    }
84    /// `group := '{' atomic* '}'`
85    fn parse_group(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
86        let (state, _) = input.match_char('{')?;
87        let (state, children) = state.match_repeats(LaTeXNode::parse_atomic)?;
88        let (state, _) = state.skip(whitespace).match_char('}')?;
89        state.finish(LaTeXNode::Row { children }.refine())
90    }
91    /// `row := atomic*`
92    fn parse_row(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
93        let (state, children) = input.match_repeats(LaTeXNode::parse_atomic)?;
94        state.finish(LaTeXNode::Row { children }.refine())
95    }
96    fn parse_command(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
97        let (state, _) = input.match_char('\\')?;
98        let (state, cmd) = state
99            .begin_choice()
100            .or_else(|state| state.match_char(' ').map_inner(|_| " "))
101            .or_else(|state| state.match_str_if(|c| c.is_ascii_alphabetic(), "ASCII_ALPHA"))
102            .end_choice()?;
103        if cmd.eq("begin") {
104            Err(StopBecause::ShouldNotBe { message: "\\begin", position: state.start_offset })?;
105        }
106        if cmd.eq("end") {
107            Err(StopBecause::ShouldNotBe { message: "\\end", position: state.start_offset })?;
108        }
109        let (state, args) = state.match_repeats(|state| state.skip(whitespace).match_fn(LaTeXNode::parse_group))?;
110        state.finish(LaTeXNode::Command(LaTeXCommand { name: cmd, children: args }))
111    }
112    fn parse_atomic(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
113        let (state, node) = input
114            .skip(whitespace)
115            .begin_choice()
116            .or_else(Self::parse_block)
117            .or_else(Self::parse_group)
118            .or_else(Self::parse_command)
119            .or_else(Self::parse_letter)
120            .or_else(Self::parse_operator)
121            .or_else(Self::parse_number)
122            .or_else(Self::parse_special)
123            .end_choice()?;
124        state.finish(node)
125    }
126    // pub fn parse_maybe_digit(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
127    //     let (state, node) = input.begin_choice().or_else(Self::parse_number).or_else(Self::parse_letter).end_choice()?;
128    //     state.finish(node)
129    // }
130    // 1
131    // 1.0
132    fn parse_number(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
133        let (state, dec) = input.begin_choice().or_else(pex::helpers::dec_str).end_choice()?;
134        state.finish(LaTeXNode::Number { number: dec })
135    }
136    /// a
137    /// ax
138    pub fn parse_letter(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
139        let (state, x) = input.match_str_if(|c| c.is_ascii_alphabetic(), "ASCII_ALPHA")?;
140        state.finish(LaTeXNode::Letter { identifier: x })
141    }
142    fn parse_operator(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
143        let (state, dec) = input
144            .begin_choice()
145            .or_else(|state| state.match_str("+", false))
146            .or_else(|state| state.match_str("-", false).map_inner(|_| "−"))
147            .end_choice()?;
148        state.finish(LaTeXNode::Operation { operator: dec })
149    }
150    fn parse_special(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
151        let (state, item) = input
152            .begin_choice()
153            .or_else(|state| state.match_str("\\\\", false).map_inner(|_| LaTeXNode::NewLine))
154            .or_else(|state| state.match_str("&", false).map_inner(|_| LaTeXNode::Ampersand))
155            .end_choice()?;
156        state.finish(item)
157    }
158}