1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
use crate::{block::LaTeXCommand, LaTeXBlock};

use pex::{helpers::whitespace, ParseResult, ParseState, StopBecause};

mod block;
mod sup_sub;

pub fn parse_latex(s: &str) -> Result<LaTeXNode, StopBecause> {
    let state = ParseState::new(s.trim_end()).skip(whitespace);
    match LaTeXNode::parse(state) {
        ParseResult::Pending(state, compound) if state.is_empty() => Ok(compound),
        ParseResult::Pending(state, ..) => Err(StopBecause::ExpectEof { position: state.start_offset }),
        ParseResult::Stop(e) => Err(e),
    }
}

#[derive(Clone, Debug)]
pub enum LaTeXNode<'i> {
    ArticleRoot {
        children: Vec<LaTeXNode<'i>>,
    },
    ArticleText {
        text: &'i str,
    },
    MathRoot {
        children: Vec<LaTeXNode<'i>>,
    },
    Row {
        children: Vec<LaTeXNode<'i>>,
    },
    Block(LaTeXBlock<'i>),
    Command(LaTeXCommand<'i>),
    MathText {
        text: &'i str,
    },
    Number {
        number: &'i str,
    },
    Operation {
        operator: &'i str,
    },
    Superscript {
        lhs: Box<LaTeXNode<'i>>,
        rhs: Box<LaTeXNode<'i>>,
    },
    Letter {
        identifier: &'i str,
    },
    /// `\\`
    NewLine,
    /// `&`
    Ampersand,
}

impl<'i> LaTeXNode<'i> {
    pub fn refine(self) -> Self {
        match self {
            LaTeXNode::Row { mut children } => {
                if children.len() == 1 {
                    children.remove(0)
                }
                else {
                    LaTeXNode::Row { children }
                }
            }
            _ => self,
        }
    }
}

impl<'i> LaTeXNode<'i> {
    pub fn parse(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, node) = input.begin_choice().or_else(Self::parse_combined).or_else(Self::parse_row).end_choice()?;
        state.finish(node)
    }
    fn parse_block(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, block) = LaTeXBlock::parse(input)?;
        state.finish(LaTeXNode::Block(block))
    }
    fn parse_combined(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, lhs) = input.begin_choice().or_else(Self::parse_super_script).end_choice()?;
        state.finish(lhs)
    }
    /// `group := '{' atomic* '}'`
    fn parse_group(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, _) = input.match_char('{')?;
        let (state, children) = state.match_repeats(LaTeXNode::parse_atomic)?;
        let (state, _) = state.skip(whitespace).match_char('}')?;
        state.finish(LaTeXNode::Row { children }.refine())
    }
    /// `row := atomic*`
    fn parse_row(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, children) = input.match_repeats(LaTeXNode::parse_atomic)?;
        state.finish(LaTeXNode::Row { children }.refine())
    }
    fn parse_command(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, _) = input.match_char('\\')?;
        let (state, cmd) = state
            .begin_choice()
            .or_else(|state| state.match_char(' ').map_inner(|_| " "))
            .or_else(|state| state.match_str_if(|c| c.is_ascii_alphabetic(), "ASCII_ALPHA"))
            .end_choice()?;
        if cmd.eq("begin") {
            Err(StopBecause::ShouldNotBe { message: "\\begin", position: state.start_offset })?;
        }
        if cmd.eq("end") {
            Err(StopBecause::ShouldNotBe { message: "\\end", position: state.start_offset })?;
        }
        let (state, args) = state.match_repeats(|state| state.skip(whitespace).match_fn(LaTeXNode::parse_group))?;
        state.finish(LaTeXNode::Command(LaTeXCommand { name: cmd, children: args }))
    }
    fn parse_atomic(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, node) = input
            .skip(whitespace)
            .begin_choice()
            .or_else(Self::parse_block)
            .or_else(Self::parse_group)
            .or_else(Self::parse_command)
            .or_else(Self::parse_letter)
            .or_else(Self::parse_operator)
            .or_else(Self::parse_number)
            .or_else(Self::parse_special)
            .end_choice()?;
        state.finish(node)
    }
    // pub fn parse_maybe_digit(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
    //     let (state, node) = input.begin_choice().or_else(Self::parse_number).or_else(Self::parse_letter).end_choice()?;
    //     state.finish(node)
    // }
    // 1
    // 1.0
    fn parse_number(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, dec) = input.begin_choice().or_else(pex::helpers::dec_str).end_choice()?;
        state.finish(LaTeXNode::Number { number: dec })
    }
    /// a
    /// ax
    pub fn parse_letter(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, x) = input.match_str_if(|c| c.is_ascii_alphabetic(), "ASCII_ALPHA")?;
        state.finish(LaTeXNode::Letter { identifier: x })
    }
    fn parse_operator(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, dec) = input
            .begin_choice()
            .or_else(|state| state.match_str("+", false))
            .or_else(|state| state.match_str("-", false).map_inner(|_| "−"))
            .end_choice()?;
        state.finish(LaTeXNode::Operation { operator: dec })
    }
    fn parse_special(input: ParseState<'i>) -> ParseResult<LaTeXNode<'i>> {
        let (state, item) = input
            .begin_choice()
            .or_else(|state| state.match_str("\\\\", false).map_inner(|_| LaTeXNode::NewLine))
            .or_else(|state| state.match_str("&", false).map_inner(|_| LaTeXNode::Ampersand))
            .end_choice()?;
        state.finish(item)
    }
}