mathml_latex/parser/
mod.rs1use crate::{block::LaTeXCommand, LaTeXBlock};
2
3use pex::{helpers::whitespace, ParseResult, ParseState, StopBecause};
4
5mod block;
6mod sup_sub;
7
8pub fn parse_latex(s: &str) -> Result<LaTeXNode, StopBecause> {
9 let state = ParseState::new(s.trim_end()).skip(whitespace);
10 match LaTeXNode::parse(state) {
11 ParseResult::Pending(state, compound) if state.is_empty() => Ok(compound),
12 ParseResult::Pending(state, ..) => Err(StopBecause::ExpectEof { position: state.start_offset }),
13 ParseResult::Stop(e) => Err(e),
14 }
15}
16
17#[derive(Clone, Debug)]
18pub enum LaTeXNode<'i> {
19 ArticleRoot {
20 children: Vec<LaTeXNode<'i>>,
21 },
22 ArticleText {
23 text: &'i str,
24 },
25 MathRoot {
26 children: Vec<LaTeXNode<'i>>,
27 },
28 Row {
29 children: Vec<LaTeXNode<'i>>,
30 },
31 Block(LaTeXBlock<'i>),
32 Command(LaTeXCommand<'i>),
33 MathText {
34 text: &'i str,
35 },
36 Number {
37 number: &'i str,
38 },
39 Operation {
40 operator: &'i str,
41 },
42 Superscript {
43 lhs: Box<LaTeXNode<'i>>,
44 rhs: Box<LaTeXNode<'i>>,
45 },
46 Letter {
47 identifier: &'i str,
48 },
49 NewLine,
51 Ampersand,
53}
54
55impl<'i> LaTeXNode<'i> {
56 pub fn refine(self) -> Self {
57 match self {
58 LaTeXNode::Row { mut children } => {
59 if children.len() == 1 {
60 children.remove(0)
61 }
62 else {
63 LaTeXNode::Row { children }
64 }
65 }
66 _ => self,
67 }
68 }
69}
70
71impl<'i> LaTeXNode<'i> {
72 pub fn parse(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
73 let (state, node) = input.begin_choice().or_else(Self::parse_combined).or_else(Self::parse_row).end_choice()?;
74 state.finish(node)
75 }
76 fn parse_block(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
77 let (state, block) = LaTeXBlock::parse(input)?;
78 state.finish(LaTeXNode::Block(block))
79 }
80 fn parse_combined(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
81 let (state, lhs) = input.begin_choice().or_else(Self::parse_super_script).end_choice()?;
82 state.finish(lhs)
83 }
84 fn parse_group(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
86 let (state, _) = input.match_char('{')?;
87 let (state, children) = state.match_repeats(LaTeXNode::parse_atomic)?;
88 let (state, _) = state.skip(whitespace).match_char('}')?;
89 state.finish(LaTeXNode::Row { children }.refine())
90 }
91 fn parse_row(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
93 let (state, children) = input.match_repeats(LaTeXNode::parse_atomic)?;
94 state.finish(LaTeXNode::Row { children }.refine())
95 }
96 fn parse_command(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
97 let (state, _) = input.match_char('\\')?;
98 let (state, cmd) = state
99 .begin_choice()
100 .or_else(|state| state.match_char(' ').map_inner(|_| " "))
101 .or_else(|state| state.match_str_if(|c| c.is_ascii_alphabetic(), "ASCII_ALPHA"))
102 .end_choice()?;
103 if cmd.eq("begin") {
104 Err(StopBecause::ShouldNotBe { message: "\\begin", position: state.start_offset })?;
105 }
106 if cmd.eq("end") {
107 Err(StopBecause::ShouldNotBe { message: "\\end", position: state.start_offset })?;
108 }
109 let (state, args) = state.match_repeats(|state| state.skip(whitespace).match_fn(LaTeXNode::parse_group))?;
110 state.finish(LaTeXNode::Command(LaTeXCommand { name: cmd, children: args }))
111 }
112 fn parse_atomic(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
113 let (state, node) = input
114 .skip(whitespace)
115 .begin_choice()
116 .or_else(Self::parse_block)
117 .or_else(Self::parse_group)
118 .or_else(Self::parse_command)
119 .or_else(Self::parse_letter)
120 .or_else(Self::parse_operator)
121 .or_else(Self::parse_number)
122 .or_else(Self::parse_special)
123 .end_choice()?;
124 state.finish(node)
125 }
126 fn parse_number(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
133 let (state, dec) = input.begin_choice().or_else(pex::helpers::dec_str).end_choice()?;
134 state.finish(LaTeXNode::Number { number: dec })
135 }
136 pub fn parse_letter(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
139 let (state, x) = input.match_str_if(|c| c.is_ascii_alphabetic(), "ASCII_ALPHA")?;
140 state.finish(LaTeXNode::Letter { identifier: x })
141 }
142 fn parse_operator(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
143 let (state, dec) = input
144 .begin_choice()
145 .or_else(|state| state.match_str("+", false))
146 .or_else(|state| state.match_str("-", false).map_inner(|_| "−"))
147 .end_choice()?;
148 state.finish(LaTeXNode::Operation { operator: dec })
149 }
150 fn parse_special(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
151 let (state, item) = input
152 .begin_choice()
153 .or_else(|state| state.match_str("\\\\", false).map_inner(|_| LaTeXNode::NewLine))
154 .or_else(|state| state.match_str("&", false).map_inner(|_| LaTeXNode::Ampersand))
155 .end_choice()?;
156 state.finish(item)
157 }
158}