Skip to main content

asciimath_parser/
parse.rs

1use crate::tree::{
2    Expression, Frac, Func, Group, Intermediate, Matrix, Script, ScriptFunc, Simple, SimpleBinary,
3    SimpleFunc, SimpleScript, SimpleUnary,
4};
5use crate::{Token, Tokenizer};
6
7fn next_simple<'a>(
8    tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone),
9    stop: Option<Token>,
10) -> Option<Simple<'a>> {
11    let cloned = tokens.clone();
12    match tokens.next() {
13        Some((_, token)) if Some(token) == stop => {
14            *tokens = cloned; // rewind
15            None
16        }
17        Some((num, Token::Number)) => Some(Simple::Number(num)),
18        Some((text, Token::Text)) => Some(Simple::Text(text)),
19        Some((ident, Token::Ident)) => Some(Simple::Ident(ident)),
20        Some((symb, Token::Symbol)) => Some(Simple::Symbol(symb)),
21        Some((unary, Token::Unary)) => {
22            Some(SimpleUnary::new(unary, next_simple(tokens, None).unwrap_or_default()).into())
23        }
24        Some((func, Token::Function)) => {
25            Some(SimpleFunc::new(func, next_simple(tokens, None).unwrap_or_default()).into())
26        }
27        Some((binary, Token::Binary)) => Some(
28            SimpleBinary::new(
29                binary,
30                next_simple(tokens, None).unwrap_or_default(),
31                next_simple(tokens, None).unwrap_or_default(),
32            )
33            .into(),
34        ),
35        Some((_, Token::CloseBracket)) => {
36            // always stop on close bracket
37            *tokens = cloned; // rewind
38            None
39        }
40        Some((open, Token::OpenBracket)) => {
41            let cloned = tokens.clone();
42            // first try to parse matrix
43            Some(if let Some(matrix) = next_matrix(tokens, open) {
44                matrix.into()
45            } else {
46                *tokens = cloned; // rewind before matrix
47                next_open_group(tokens, open).into()
48            })
49        }
50        Some((open, Token::OpenCloseBracket)) => Some(next_open_close_group(tokens, open)),
51        Some((raw, Token::Frac | Token::Super | Token::Sub | Token::Sep)) => {
52            Some(Simple::Symbol(raw))
53        }
54        None => None,
55    }
56}
57
58fn next_open_group<'a>(
59    tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone),
60    open: &'a str,
61) -> Group<'a> {
62    let expr = next_expression(tokens, None);
63    let close = match tokens.next() {
64        Some((bracket, Token::CloseBracket)) => bracket,
65        Some(_) => unreachable!("terminated on non-closing-bracket token"),
66        None => "",
67    };
68    Group::new(open, expr, close)
69}
70
71fn next_open_close_group<'a>(
72    tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone),
73    open: &'a str,
74) -> Simple<'a> {
75    let cloned = tokens.clone();
76    if let Some(first) = next_intermediate(tokens, None) {
77        // Here we take the first token, even if it's another OpenCloseBracket
78        let mut inters = vec![first];
79        while let Some(inter) = next_intermediate(tokens, Some(Token::OpenCloseBracket)) {
80            inters.push(inter);
81        }
82        match tokens.next() {
83            Some((close, Token::OpenCloseBracket)) => {
84                Simple::Group(Group::new(open, inters, close))
85            }
86            Some((_, Token::CloseBracket)) | None => {
87                *tokens = cloned; // rewind
88                Simple::Symbol(open)
89            }
90            Some(_) => unreachable!("terminated on non-bracket token"),
91        }
92    } else {
93        // empty so must return symbol
94        Simple::Symbol(open)
95    }
96}
97
98fn next_expression<'a>(
99    tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone),
100    stop: Option<Token>,
101) -> Expression<'a> {
102    let mut inters = Vec::new();
103    while let Some(inter) = next_intermediate(tokens, stop) {
104        inters.push(inter);
105    }
106    inters.into()
107}
108
109fn next_matrix_row<'a>(
110    tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone),
111    exprs: &mut impl Extend<Expression<'a>>,
112) -> Option<(&'a str, usize, &'a str)> {
113    let open = match tokens.next() {
114        Some((open, Token::OpenBracket)) => Some(open),
115        _ => None,
116    }?;
117    let mut len = 1;
118    exprs.extend([next_expression(tokens, Some(Token::Sep))]);
119    loop {
120        match tokens.next() {
121            Some((_, Token::Sep)) => {
122                exprs.extend([next_expression(tokens, Some(Token::Sep))]);
123                len += 1;
124            }
125            Some((close, Token::CloseBracket)) => {
126                return Some((open, len, close));
127            }
128            _ => return None,
129        }
130    }
131}
132
133fn next_matrix<'a>(
134    tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone),
135    left: &'a str,
136) -> Option<Matrix<'a>> {
137    let mut data = Vec::new();
138    let (open, num_cols, close) = next_matrix_row(tokens, &mut data)?;
139    loop {
140        match tokens.next() {
141            Some((_, Token::Sep)) => {
142                let (no, ncols, nc) = next_matrix_row(tokens, &mut data)?;
143                if no != open || ncols != num_cols || nc != close {
144                    return None;
145                }
146            }
147            Some((right, Token::CloseBracket))
148                if data.len() > 1 && open == left && close == right =>
149            {
150                return Some(Matrix::new(left, data, num_cols, right));
151            }
152            _ => return None,
153        }
154    }
155}
156
157fn next_script<'a>(tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone)) -> Script<'a> {
158    let cloned = tokens.clone();
159    match tokens.next() {
160        Some((_, Token::Super)) => Script::Super(next_simple(tokens, None).unwrap_or_default()),
161        Some((_, Token::Sub)) => {
162            let sub = next_simple(tokens, None).unwrap_or_default();
163            let cloned = tokens.clone();
164            if let Some((_, Token::Super)) = tokens.next() {
165                Script::Subsuper(sub, next_simple(tokens, None).unwrap_or_default())
166            } else {
167                *tokens = cloned; // rewind
168                Script::Sub(sub)
169            }
170        }
171        _ => {
172            *tokens = cloned; // rewind
173            Script::None
174        }
175    }
176}
177
178fn next_script_func<'a>(
179    tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone),
180    stop: Option<Token>,
181) -> Option<ScriptFunc<'a>> {
182    let cloned = tokens.clone();
183    if let Some((func, Token::Function)) = tokens.next() {
184        Some(
185            Func::new(
186                func,
187                next_script(tokens),
188                next_script_func(tokens, None).unwrap_or_default(),
189            )
190            .into(),
191        )
192    } else {
193        *tokens = cloned; // rewind
194        next_simple(tokens, stop).map(|simp| SimpleScript::new(simp, next_script(tokens)).into())
195    }
196}
197
198fn next_intermediate<'a>(
199    tokens: &mut (impl Iterator<Item = (&'a str, Token)> + Clone),
200    stop: Option<Token>,
201) -> Option<Intermediate<'a>> {
202    next_script_func(tokens, stop).map(|base| {
203        let cloned = tokens.clone();
204        if let Some((_, Token::Frac)) = tokens.next() {
205            Intermediate::Frac(Frac::new(
206                base,
207                next_script_func(tokens, None).unwrap_or_default(),
208            ))
209        } else {
210            *tokens = cloned; // rewind
211            Intermediate::ScriptFunc(base)
212        }
213    })
214}
215
216/// Parse a tokenized expression
217pub fn parse_tokens<'a, T, I>(tokens: T) -> Expression<'a>
218where
219    I: Iterator<Item = (&'a str, Token)> + Clone,
220    T: IntoIterator<IntoIter = I>,
221{
222    let mut tokens = tokens.into_iter().fuse();
223    let mut inters = Vec::new();
224    while let Some((close, Token::CloseBracket)) = {
225        while let Some(inter) = next_intermediate(&mut tokens, None) {
226            inters.push(inter);
227        }
228        tokens.next()
229    } {
230        // NOTE we could insert the token as an extra symbol instead of closing with an invisible
231        // bracket
232        let group = Simple::Group(Group::new("", inters, close));
233        inters = vec![group.into()];
234    }
235    // NOTE this can still hide errors if the last token is unexpected
236    debug_assert!(tokens.next().is_none(), "didn't exhaust tokens");
237    Expression::from(inters)
238}
239
240/// Parse a string returning an asciimath expression
241///
242/// This uses an extended set of asciimath tokens that are accessible in [`crate::ASCIIMATH_TOKENS`].
243#[must_use]
244pub fn parse(inp: &str) -> Expression<'_> {
245    parse_tokens(Tokenizer::new(inp))
246}
247
248#[cfg(test)]
249mod tests {
250    use crate::tree::{
251        Expression, Frac, Func, Group, Intermediate, Matrix, Simple, SimpleBinary, SimpleFunc,
252        SimpleScript, SimpleUnary,
253    };
254
255    #[test]
256    fn complex_precedence() {
257        let expr = super::parse("sin_a^b c_d / (abs h)_i^j");
258        let expected = [Frac::new(
259            Func::with_subsuper(
260                "sin",
261                Simple::Ident("a"),
262                Simple::Ident("b"),
263                SimpleScript::with_sub(Simple::Ident("c"), Simple::Ident("d")),
264            ),
265            SimpleScript::with_subsuper(
266                Group::from_iter("(", [SimpleUnary::new("abs", Simple::Ident("h"))], ")"),
267                Simple::Ident("i"),
268                Simple::Ident("j"),
269            ),
270        )]
271        .into_iter()
272        .collect();
273        assert_eq!(expr, expected);
274    }
275
276    #[test]
277    fn missing_sub() {
278        let expr = super::parse("a_");
279        let expected =
280            Expression::from_iter([SimpleScript::with_sub(Simple::Ident("a"), Simple::Missing)]);
281        assert_eq!(expr, expected);
282    }
283
284    #[test]
285    fn missing_super() {
286        let expr = super::parse("a^");
287        let expected = [SimpleScript::with_super(
288            Simple::Ident("a"),
289            Simple::Missing,
290        )]
291        .into_iter()
292        .collect();
293        assert_eq!(expr, expected);
294    }
295
296    #[test]
297    fn missing_group_subsuper() {
298        // NOTE crashes asciimath
299        let expr = super::parse("(a_b^)");
300        let expected = [Group::from_iter(
301            "(",
302            [SimpleScript::with_subsuper(
303                Simple::Ident("a"),
304                Simple::Ident("b"),
305                Simple::Missing,
306            )],
307            ")",
308        )]
309        .into_iter()
310        .collect();
311        assert_eq!(expr, expected);
312    }
313
314    #[test]
315    fn missing_group_unary() {
316        // NOTE crashes asciimath
317        let expr = super::parse("(sqrt)");
318        let expected = [Group::from_iter(
319            "(",
320            [SimpleUnary::new("sqrt", Simple::Missing)],
321            ")",
322        )]
323        .into_iter()
324        .collect();
325        assert_eq!(expr, expected);
326    }
327
328    #[test]
329    fn unmatched_close() {
330        let expr = super::parse(")");
331        let expected = [Group::new("", Expression::default(), ")")]
332            .into_iter()
333            .collect();
334        assert_eq!(expr, expected);
335    }
336
337    #[test]
338    fn simple_bracket_matching() {
339        let expr = super::parse("|a|");
340        let expected = [Group::from_iter("|", [Simple::Ident("a")], "|")]
341            .into_iter()
342            .collect();
343        assert_eq!(expr, expected);
344    }
345
346    #[test]
347    fn eager_bracket_matching() {
348        let expr = super::parse("|a|b|c|"); // "|:a:|b|:c:|"
349        let expected = [
350            Group::from_iter("|", [Simple::Ident("a")], "|").into(),
351            Simple::Ident("b"),
352            Group::from_iter("|", [Simple::Ident("c")], "|").into(),
353        ]
354        .into_iter()
355        .collect();
356        assert_eq!(expr, expected);
357    }
358
359    #[test]
360    fn close_bracket_matching() {
361        let expr = super::parse("(a|b)c|d"); // "(:a|b:)c|d" not "(a|:b)c:|d"
362        let expected = [
363            Group::from_iter(
364                "(",
365                [Simple::Ident("a"), Simple::Symbol("|"), Simple::Ident("b")],
366                ")",
367            )
368            .into(),
369            Simple::Ident("c"),
370            Simple::Symbol("|"),
371            Simple::Ident("d"),
372        ]
373        .into_iter()
374        .collect();
375        assert_eq!(expr, expected);
376    }
377
378    #[test]
379    fn open_close_nonempty() {
380        let expr = super::parse("| |");
381        let expected = [Simple::Symbol("|"), Simple::Symbol("|")]
382            .into_iter()
383            .collect();
384        assert_eq!(expr, expected);
385    }
386
387    #[test]
388    fn double_open_close() {
389        let expr = super::parse("||x||");
390        let expected = Expression::from_iter([Group::from_iter(
391            "|",
392            [Group::from_iter("|", [Simple::Ident("x")], "|")],
393            "|",
394        )]);
395        assert_eq!(expr, expected);
396    }
397
398    #[test]
399    fn simple_function() {
400        let expr = super::parse("sin x");
401        let expected = [Func::without_scripts("sin", Simple::Ident("x"))]
402            .into_iter()
403            .collect();
404        assert_eq!(expr, expected);
405    }
406
407    #[test]
408    fn complex_function() {
409        let expr = super::parse("sin_cos a cos^b c");
410        let expected = [Func::with_sub(
411            "sin",
412            SimpleFunc::new("cos", Simple::Ident("a")),
413            Func::with_super("cos", Simple::Ident("b"), Simple::Ident("c")),
414        )]
415        .into_iter()
416        .collect();
417        assert_eq!(expr, expected);
418    }
419
420    #[test]
421    fn unary_power_precidence() {
422        let expr = super::parse("sin_a b^c / d");
423        let expected = [Intermediate::Frac(Frac::new(
424            Func::with_sub(
425                "sin",
426                Simple::Ident("a"),
427                SimpleScript::with_super(Simple::Ident("b"), Simple::Ident("c")),
428            ),
429            Simple::Ident("d"),
430        ))]
431        .into();
432        assert_eq!(expr, expected);
433    }
434
435    #[test]
436    fn matrix_parsing() {
437        let expr = super::parse("[[a, b], [c, d]]");
438        let expected = [Matrix::new(
439            "[",
440            [
441                [Simple::Ident("a")].into_iter().collect(),
442                [Simple::Ident("b")].into_iter().collect(),
443                [Simple::Ident("c")].into_iter().collect(),
444                [Simple::Ident("d")].into_iter().collect(),
445            ],
446            2,
447            "]",
448        )]
449        .into_iter()
450        .collect();
451        assert_eq!(expr, expected);
452    }
453
454    #[test]
455    fn no_singleton_matrix() {
456        let expr = super::parse("[[a]]");
457        let expected = [Group::from_iter(
458            "[",
459            [Group::from_iter("[", [Simple::Ident("a")], "]")],
460            "]",
461        )]
462        .into_iter()
463        .collect();
464        assert_eq!(expr, expected);
465    }
466
467    #[test]
468    fn sets_as_groups() {
469        // asciimath treats sets special, here we opt to make matrix parsing a little more strict
470        // to avoid the possibility
471        let expr = super::parse("{(x, y), (a, b)}");
472        let expected = [Group::from_iter(
473            "{",
474            [
475                Group::from_iter(
476                    "(",
477                    [Simple::Ident("x"), Simple::Symbol(","), Simple::Ident("y")],
478                    ")",
479                )
480                .into(),
481                Simple::Symbol(","),
482                Group::from_iter(
483                    "(",
484                    [Simple::Ident("a"), Simple::Symbol(","), Simple::Ident("b")],
485                    ")",
486                )
487                .into(),
488            ],
489            "}",
490        )]
491        .into_iter()
492        .collect();
493        assert_eq!(expr, expected);
494    }
495
496    #[test]
497    fn simple_binary() {
498        let expr = super::parse("root 3");
499        let expected = [SimpleBinary::new(
500            "root",
501            Simple::Number("3"),
502            Simple::Missing,
503        )]
504        .into_iter()
505        .collect();
506        assert_eq!(expr, expected);
507    }
508}