1use crate::tokenizer::{Token, TokenStream};
3use crate::{
4 AccentKind, AlignRow, CaseRow, ColumnAlign, Delimiter, FracStyle, LimitStyle, MathExpr,
5 MathFont, MathOperator, MathSpace, MathStyle, MatrixDelimiters, OperatorKind, SmashMode,
6 symbols,
7};
8
9pub(crate) fn parse_expr(ts: &mut TokenStream) -> MathExpr {
14 let items = parse_row(ts);
15 flatten_row(items)
16}
17
18pub(crate) fn parse_row(ts: &mut TokenStream) -> Vec<MathExpr> {
23 let mut items: Vec<MathExpr> = Vec::new();
24
25 loop {
26 ts.skip_whitespace();
27
28 match ts.peek() {
29 Token::Eof
30 | Token::RBrace
31 | Token::End(_)
32 | Token::Ampersand
33 | Token::DoubleBackslash => break,
34
35 Token::Command(cmd) if cmd == "right" => break,
37
38 _ => {
39 let atom = parse_atom(ts);
40 let with_scripts = parse_scripts(ts, atom);
41 items.push(with_scripts);
42 }
43 }
44 }
45
46 items
47}
48
49fn parse_atom(ts: &mut TokenStream) -> MathExpr {
51 ts.skip_whitespace();
52
53 match ts.peek().clone() {
54 Token::LBrace => parse_group(ts),
55
56 Token::Letter(c) => {
57 ts.next();
58 MathExpr::Ident {
59 value: c.to_string(),
60 }
61 }
62
63 Token::Digit(c) => {
64 let mut s = String::new();
66 s.push(c);
67 ts.next();
68 loop {
69 match ts.peek() {
70 Token::Digit(d) => {
71 s.push(*d);
72 ts.next();
73 }
74 Token::Dot => {
75 if matches!(ts.peek_ahead(1), Token::Digit(_)) {
77 s.push('.');
78 ts.next(); } else {
80 break;
81 }
82 }
83 _ => break,
84 }
85 }
86 MathExpr::Number { value: s }
87 }
88
89 Token::Dot => {
90 ts.next();
91 MathExpr::Number {
92 value: ".".to_string(),
93 }
94 }
95
96 Token::Plus => {
97 ts.next();
98 MathExpr::Operator(MathOperator {
99 symbol: "+".to_string(),
100 kind: OperatorKind::Binary,
101 })
102 }
103
104 Token::Minus => {
105 ts.next();
106 MathExpr::Operator(MathOperator {
107 symbol: "-".to_string(),
108 kind: OperatorKind::Binary,
109 })
110 }
111
112 Token::Equals => {
113 ts.next();
114 MathExpr::Operator(MathOperator {
115 symbol: "=".to_string(),
116 kind: OperatorKind::Relation,
117 })
118 }
119
120 Token::LessThan => {
121 ts.next();
122 MathExpr::Operator(MathOperator {
123 symbol: "<".to_string(),
124 kind: OperatorKind::Relation,
125 })
126 }
127
128 Token::GreaterThan => {
129 ts.next();
130 MathExpr::Operator(MathOperator {
131 symbol: ">".to_string(),
132 kind: OperatorKind::Relation,
133 })
134 }
135
136 Token::Comma => {
137 ts.next();
138 MathExpr::Operator(MathOperator {
139 symbol: ",".to_string(),
140 kind: OperatorKind::Punctuation,
141 })
142 }
143
144 Token::Semicolon => {
145 ts.next();
146 MathExpr::Operator(MathOperator {
147 symbol: ";".to_string(),
148 kind: OperatorKind::Punctuation,
149 })
150 }
151
152 Token::Colon => {
153 ts.next();
154 MathExpr::Operator(MathOperator {
155 symbol: ":".to_string(),
156 kind: OperatorKind::Punctuation,
157 })
158 }
159
160 Token::Bang => {
161 ts.next();
162 MathExpr::Operator(MathOperator {
163 symbol: "!".to_string(),
164 kind: OperatorKind::Postfix,
165 })
166 }
167
168 Token::Prime => {
169 ts.next();
170 MathExpr::Ident {
172 value: "′".to_string(),
173 }
174 }
175
176 Token::Pipe => {
177 ts.next();
178 MathExpr::Operator(MathOperator {
179 symbol: "|".to_string(),
180 kind: OperatorKind::Binary,
181 })
182 }
183
184 Token::LParen => {
185 ts.next();
186 MathExpr::Operator(MathOperator {
187 symbol: "(".to_string(),
188 kind: OperatorKind::Prefix,
189 })
190 }
191
192 Token::RParen => {
193 ts.next();
194 MathExpr::Operator(MathOperator {
195 symbol: ")".to_string(),
196 kind: OperatorKind::Postfix,
197 })
198 }
199
200 Token::LBracket => {
201 ts.next();
202 MathExpr::Operator(MathOperator {
203 symbol: "[".to_string(),
204 kind: OperatorKind::Prefix,
205 })
206 }
207
208 Token::RBracket => {
209 ts.next();
210 MathExpr::Operator(MathOperator {
211 symbol: "]".to_string(),
212 kind: OperatorKind::Postfix,
213 })
214 }
215
216 Token::Tilde => {
217 ts.next();
218 MathExpr::Space(MathSpace::Thin)
219 }
220
221 Token::ThinSpace => {
222 ts.next();
223 MathExpr::Space(MathSpace::Thin)
224 }
225
226 Token::MedSpace => {
227 ts.next();
228 MathExpr::Space(MathSpace::Medium)
229 }
230
231 Token::NegThinSpace => {
232 ts.next();
233 MathExpr::Space(MathSpace::NegThin)
234 }
235
236 Token::Command(cmd) => {
237 ts.next();
238 parse_command(ts, &cmd)
239 }
240
241 Token::Begin(env) => {
242 ts.next();
243 parse_environment(ts, &env)
244 }
245
246 Token::Caret => {
248 ts.next();
249 let script = parse_script_arg(ts);
250 MathExpr::Superscript {
251 base: Box::new(MathExpr::Error {
252 raw: "^".to_string(),
253 message: "^ without a base".to_string(),
254 }),
255 script: Box::new(script),
256 }
257 }
258
259 Token::Underscore => {
260 ts.next();
261 let script = parse_script_arg(ts);
262 MathExpr::Subscript {
263 base: Box::new(MathExpr::Error {
264 raw: "_".to_string(),
265 message: "_ without a base".to_string(),
266 }),
267 script: Box::new(script),
268 }
269 }
270
271 Token::DoubleBackslash => {
273 let raw = "\\\\".to_string();
274 ts.next();
275 MathExpr::Error {
276 raw: raw.clone(),
277 message: "unexpected row separator \\\\".to_string(),
278 }
279 }
280
281 Token::Ampersand => {
282 ts.next();
283 MathExpr::Error {
284 raw: "&".to_string(),
285 message: "unexpected & outside environment".to_string(),
286 }
287 }
288
289 Token::Whitespace => {
290 ts.next();
291 MathExpr::Space(MathSpace::Thin)
292 }
293
294 Token::Eof | Token::RBrace | Token::End(_) => {
295 MathExpr::Error {
297 raw: String::new(),
298 message: "unexpected end of input".to_string(),
299 }
300 }
301 }
302}
303
304fn parse_scripts(ts: &mut TokenStream, base: MathExpr) -> MathExpr {
306 ts.skip_whitespace();
307
308 let has_sub = matches!(ts.peek(), Token::Underscore);
309 let has_sup = matches!(ts.peek(), Token::Caret);
310
311 if !has_sub && !has_sup {
312 return base;
313 }
314
315 let mut sub: Option<Box<MathExpr>> = None;
316 let mut sup: Option<Box<MathExpr>> = None;
317
318 for _ in 0..2 {
320 ts.skip_whitespace();
321 match ts.peek().clone() {
322 Token::Underscore if sub.is_none() => {
323 ts.next();
324 sub = Some(Box::new(parse_script_arg(ts)));
325 }
326 Token::Caret if sup.is_none() => {
327 ts.next();
328 sup = Some(Box::new(parse_script_arg(ts)));
329 }
330 _ => break,
331 }
332 }
333
334 match (sub, sup) {
335 (Some(s), None) => MathExpr::Subscript {
336 base: Box::new(base),
337 script: s,
338 },
339 (None, Some(s)) => MathExpr::Superscript {
340 base: Box::new(base),
341 script: s,
342 },
343 (Some(sub), Some(sup)) => MathExpr::Subsuperscript {
344 base: Box::new(base),
345 sub,
346 sup,
347 },
348 (None, None) => base, }
350}
351
352fn parse_script_arg(ts: &mut TokenStream) -> MathExpr {
355 ts.skip_whitespace();
356 if matches!(ts.peek(), Token::LBrace) {
357 parse_group(ts)
358 } else {
359 parse_atom(ts)
360 }
361}
362
363pub(crate) fn parse_group(ts: &mut TokenStream) -> MathExpr {
366 if !matches!(ts.peek(), Token::LBrace) {
368 return MathExpr::Error {
369 raw: String::new(),
370 message: "expected { but found something else".to_string(),
371 };
372 }
373 ts.next(); let items = parse_row(ts);
376
377 if matches!(ts.peek(), Token::RBrace) {
378 ts.next(); } else {
380 return MathExpr::Error {
382 raw: format!("{{{}", exprs_to_raw(&items)),
383 message: "unmatched { — missing }".to_string(),
384 };
385 }
386
387 flatten_row(items)
388}
389
390pub(crate) fn parse_command(ts: &mut TokenStream, cmd: &str) -> MathExpr {
394 match cmd {
396 "quad" => return MathExpr::Space(MathSpace::Quad),
397 "qquad" => return MathExpr::Space(MathSpace::QQuad),
398 "," => return MathExpr::Space(MathSpace::Thin),
399 ";" | ":" => return MathExpr::Space(MathSpace::Medium),
400 "!" => return MathExpr::Space(MathSpace::NegThin),
401 " " => return MathExpr::Space(MathSpace::Thin),
402 _ => {}
403 }
404
405 match cmd {
407 "infty" => {
408 return MathExpr::Ident {
409 value: "∞".to_string(),
410 };
411 }
412 "partial" => {
413 return MathExpr::Ident {
414 value: "∂".to_string(),
415 };
416 }
417 "nabla" => {
418 return MathExpr::Ident {
419 value: "∇".to_string(),
420 };
421 }
422 "ell" => {
423 return MathExpr::Ident {
424 value: "ℓ".to_string(),
425 };
426 }
427 "hbar" => {
428 return MathExpr::Ident {
429 value: "ℏ".to_string(),
430 };
431 }
432 "emptyset" => {
433 return MathExpr::Ident {
434 value: "∅".to_string(),
435 };
436 }
437 "varnothing" => {
438 return MathExpr::Ident {
439 value: "∅".to_string(),
440 };
441 }
442 "aleph" => {
443 return MathExpr::Ident {
444 value: "ℵ".to_string(),
445 };
446 }
447 "forall" => {
448 return MathExpr::Ident {
449 value: "∀".to_string(),
450 };
451 }
452 "exists" => {
453 return MathExpr::Ident {
454 value: "∃".to_string(),
455 };
456 }
457 _ => {}
458 }
459
460 if let Some(sym) = symbols::greek_letter(cmd) {
462 return MathExpr::Ident {
463 value: sym.to_string(),
464 };
465 }
466
467 if let Some((sym, kind)) = symbols::operator(cmd) {
469 return MathExpr::Operator(MathOperator {
470 symbol: sym.to_string(),
471 kind,
472 });
473 }
474
475 if let Some(sym) = symbols::large_operator(cmd) {
477 let mut lower: Option<Box<MathExpr>> = None;
478 let mut upper: Option<Box<MathExpr>> = None;
479
480 for _ in 0..2 {
482 ts.skip_whitespace();
483 match ts.peek().clone() {
484 Token::Underscore if lower.is_none() => {
485 ts.next();
486 lower = Some(Box::new(parse_script_arg(ts)));
487 }
488 Token::Caret if upper.is_none() => {
489 ts.next();
490 upper = Some(Box::new(parse_script_arg(ts)));
491 }
492 _ => break,
493 }
494 }
495
496 return MathExpr::BigOperator {
497 op: MathOperator {
498 symbol: sym.to_string(),
499 kind: OperatorKind::Large,
500 },
501 limits: LimitStyle::DisplayLimits,
502 lower,
503 upper,
504 };
505 }
506
507 if let Some(name) = symbols::named_operator(cmd) {
509 return MathExpr::Operator(MathOperator {
510 symbol: name.to_string(),
511 kind: OperatorKind::Prefix,
512 });
513 }
514
515 match cmd {
517 "frac" => {
518 let num = parse_group(ts);
519 let den = parse_group(ts);
520 return MathExpr::Frac {
521 numerator: Box::new(num),
522 denominator: Box::new(den),
523 style: FracStyle::Auto,
524 };
525 }
526 "dfrac" => {
527 let num = parse_group(ts);
528 let den = parse_group(ts);
529 return MathExpr::Frac {
530 numerator: Box::new(num),
531 denominator: Box::new(den),
532 style: FracStyle::Display,
533 };
534 }
535 "tfrac" => {
536 let num = parse_group(ts);
537 let den = parse_group(ts);
538 return MathExpr::Frac {
539 numerator: Box::new(num),
540 denominator: Box::new(den),
541 style: FracStyle::Text,
542 };
543 }
544 "binom" => {
545 let num = parse_group(ts);
547 let den = parse_group(ts);
548 return MathExpr::Fenced {
550 open: Delimiter::Paren,
551 close: Delimiter::Paren,
552 body: vec![MathExpr::Frac {
553 numerator: Box::new(num),
554 denominator: Box::new(den),
555 style: FracStyle::Auto,
556 }],
557 };
558 }
559 _ => {}
560 }
561
562 if cmd == "sqrt" {
564 ts.skip_whitespace();
565 let index = if matches!(ts.peek(), Token::LBracket) {
567 ts.next(); let idx_items = parse_until_rbracket(ts);
569 let idx = flatten_row(idx_items);
570 if matches!(ts.peek(), Token::RBracket) {
571 ts.next(); }
573 Some(Box::new(idx))
574 } else {
575 None
576 };
577
578 let body = parse_group(ts);
579 return MathExpr::Sqrt {
580 index,
581 body: Box::new(body),
582 };
583 }
584
585 if let Some(font) = symbols::font_override_command(cmd) {
587 if matches!(font, MathFont::Roman) && matches!(cmd, "text" | "mbox") {
589 ts.skip_whitespace();
590 let raw = ts.read_raw_brace_string().unwrap_or_default();
591 return MathExpr::Text { value: raw };
592 }
593 let body = parse_group(ts);
594 return MathExpr::FontOverride {
595 font,
596 body: Box::new(body),
597 };
598 }
599
600 if cmd == "left" {
602 return parse_delimited(ts);
603 }
604
605 if cmd == "right" {
607 return MathExpr::Error {
609 raw: "\\right".to_string(),
610 message: "\\right without matching \\left".to_string(),
611 };
612 }
613
614 if let Some(kind) = accent_kind(cmd) {
616 let body = parse_group(ts);
617 return MathExpr::Accent {
618 kind,
619 body: Box::new(body),
620 };
621 }
622
623 match cmd {
625 "overline" => {
626 let body = parse_group(ts);
627 return MathExpr::Overline {
628 body: Box::new(body),
629 };
630 }
631 "underline" => {
632 let body = parse_group(ts);
633 return MathExpr::Underline {
634 body: Box::new(body),
635 };
636 }
637 "overbrace" => {
638 let body = parse_group(ts);
639 return MathExpr::Overbrace {
640 body: Box::new(body),
641 annotation: None,
642 };
643 }
644 "underbrace" => {
645 let body = parse_group(ts);
646 return MathExpr::Underbrace {
647 body: Box::new(body),
648 annotation: None,
649 };
650 }
651 "overset" | "stackrel" => {
652 let above = parse_group(ts);
653 let base = parse_group(ts);
654 return MathExpr::Overset {
655 over: Box::new(above),
656 base: Box::new(base),
657 };
658 }
659 "underset" => {
660 let below = parse_group(ts);
661 let base = parse_group(ts);
662 return MathExpr::Underset {
663 under: Box::new(below),
664 base: Box::new(base),
665 };
666 }
667 _ => {}
668 }
669
670 match cmd {
672 "displaystyle" => {
673 return MathExpr::StyleOverride {
674 style: MathStyle::Display,
675 body: Box::new(parse_style_body(ts)),
676 };
677 }
678 "textstyle" => {
679 return MathExpr::StyleOverride {
680 style: MathStyle::Text,
681 body: Box::new(parse_style_body(ts)),
682 };
683 }
684 "scriptstyle" => {
685 return MathExpr::StyleOverride {
686 style: MathStyle::Script,
687 body: Box::new(parse_style_body(ts)),
688 };
689 }
690 "scriptscriptstyle" => {
691 return MathExpr::StyleOverride {
692 style: MathStyle::ScriptScript,
693 body: Box::new(parse_style_body(ts)),
694 };
695 }
696 _ => {}
697 }
698
699 match cmd {
701 "phantom" => {
702 let body = parse_group(ts);
703 return MathExpr::Phantom {
704 body: Box::new(body),
705 };
706 }
707 "hphantom" => {
708 let body = parse_group(ts);
709 return MathExpr::HPhantom {
710 body: Box::new(body),
711 };
712 }
713 "vphantom" => {
714 let body = parse_group(ts);
715 return MathExpr::VPhantom {
716 body: Box::new(body),
717 };
718 }
719 "smash" => {
720 ts.skip_whitespace();
722 let mode = if matches!(ts.peek(), Token::LBracket) {
723 ts.next(); let mode_str = collect_until_rbracket_str(ts);
725 if matches!(ts.peek(), Token::RBracket) {
726 ts.next();
727 }
728 match mode_str.trim() {
729 "t" => SmashMode::Top,
730 "b" => SmashMode::Bottom,
731 _ => SmashMode::Both,
732 }
733 } else {
734 SmashMode::Both
735 };
736 let body = parse_group(ts);
737 return MathExpr::Smash {
738 mode,
739 body: Box::new(body),
740 };
741 }
742 _ => {}
743 }
744
745 match cmd {
747 "color" | "textcolor" => {
748 let color_group = parse_group(ts);
749 let color_name = extract_text_content(&color_group);
750 let body = parse_group(ts);
751 return MathExpr::Color {
752 color: color_name,
753 body: Box::new(body),
754 };
755 }
756 _ => {}
757 }
758
759 if cmd == "operatorname" {
761 let name_group = parse_group(ts);
762 let name = extract_text_content(&name_group);
763 return MathExpr::Operator(MathOperator {
764 symbol: name,
765 kind: OperatorKind::Prefix,
766 });
767 }
768
769 if cmd == "ce" {
771 let body = parse_group(ts);
772 let raw = extract_text_content(&body);
773 return MathExpr::Chem { value: raw };
774 }
775
776 match cmd {
779 "langle" => {
780 return MathExpr::Ident {
781 value: "⟨".to_string(),
782 };
783 }
784 "rangle" => {
785 return MathExpr::Ident {
786 value: "⟩".to_string(),
787 };
788 }
789 "lbrace" | "{" => {
790 return MathExpr::Ident {
791 value: "{".to_string(),
792 };
793 }
794 "rbrace" | "}" => {
795 return MathExpr::Ident {
796 value: "}".to_string(),
797 };
798 }
799 "lvert" | "|" => {
800 return MathExpr::Ident {
801 value: "|".to_string(),
802 };
803 }
804 "rvert" => {
805 return MathExpr::Ident {
806 value: "|".to_string(),
807 };
808 }
809 "lVert" => {
810 return MathExpr::Ident {
811 value: "‖".to_string(),
812 };
813 }
814 "rVert" => {
815 return MathExpr::Ident {
816 value: "‖".to_string(),
817 };
818 }
819 "lceil" => {
820 return MathExpr::Ident {
821 value: "⌈".to_string(),
822 };
823 }
824 "rceil" => {
825 return MathExpr::Ident {
826 value: "⌉".to_string(),
827 };
828 }
829 "lfloor" => {
830 return MathExpr::Ident {
831 value: "⌊".to_string(),
832 };
833 }
834 "rfloor" => {
835 return MathExpr::Ident {
836 value: "⌋".to_string(),
837 };
838 }
839 _ => {}
840 }
841
842 match cmd {
844 "ldots" | "dots" => {
845 return MathExpr::Ident {
846 value: "…".to_string(),
847 };
848 }
849 "cdots" => {
850 return MathExpr::Ident {
851 value: "⋯".to_string(),
852 };
853 }
854 "vdots" => {
855 return MathExpr::Ident {
856 value: "⋮".to_string(),
857 };
858 }
859 "ddots" => {
860 return MathExpr::Ident {
861 value: "⋱".to_string(),
862 };
863 }
864 "prime" => {
865 return MathExpr::Ident {
866 value: "′".to_string(),
867 };
868 }
869 "circ" => {
870 return MathExpr::Operator(MathOperator {
871 symbol: "∘".to_string(),
872 kind: OperatorKind::Binary,
873 });
874 }
875 "bullet" => {
876 return MathExpr::Operator(MathOperator {
877 symbol: "•".to_string(),
878 kind: OperatorKind::Binary,
879 });
880 }
881 "star" => {
882 return MathExpr::Operator(MathOperator {
883 symbol: "⋆".to_string(),
884 kind: OperatorKind::Binary,
885 });
886 }
887 "perp" => {
888 return MathExpr::Ident {
889 value: "⊥".to_string(),
890 };
891 }
892 "top" => {
893 return MathExpr::Ident {
894 value: "⊤".to_string(),
895 };
896 }
897 "angle" => {
898 return MathExpr::Ident {
899 value: "∠".to_string(),
900 };
901 }
902 "triangle" => {
903 return MathExpr::Ident {
904 value: "△".to_string(),
905 };
906 }
907 "square" => {
908 return MathExpr::Ident {
909 value: "□".to_string(),
910 };
911 }
912 "therefore" => {
913 return MathExpr::Ident {
914 value: "∴".to_string(),
915 };
916 }
917 "because" => {
918 return MathExpr::Ident {
919 value: "∵".to_string(),
920 };
921 }
922 "checkmark" => {
923 return MathExpr::Ident {
924 value: "✓".to_string(),
925 };
926 }
927 _ => {}
928 }
929
930 MathExpr::Error {
932 raw: format!("\\{}", cmd),
933 message: format!("unknown command: \\{}", cmd),
934 }
935}
936
937fn parse_delimited(ts: &mut TokenStream) -> MathExpr {
942 ts.skip_whitespace();
943 let open = parse_delimiter_token(ts);
944
945 let body_items = parse_row(ts);
946
947 ts.skip_whitespace();
948
949 let close = if matches!(ts.peek(), Token::Command(cmd) if cmd == "right") {
951 ts.next(); ts.skip_whitespace();
953 parse_delimiter_token(ts)
954 } else {
955 return MathExpr::Error {
957 raw: format!("\\left{}", delimiter_to_raw(open)),
958 message: "\\left without matching \\right".to_string(),
959 };
960 };
961
962 MathExpr::Fenced {
963 open,
964 close,
965 body: body_items,
966 }
967}
968
969fn parse_delimiter_token(ts: &mut TokenStream) -> Delimiter {
971 match ts.peek().clone() {
972 Token::LParen => {
973 ts.next();
974 Delimiter::Paren
975 }
976 Token::RParen => {
977 ts.next();
978 Delimiter::Paren
979 }
980 Token::LBracket => {
981 ts.next();
982 Delimiter::Bracket
983 }
984 Token::RBracket => {
985 ts.next();
986 Delimiter::Bracket
987 }
988 Token::Pipe => {
989 ts.next();
990 Delimiter::Pipe
991 }
992 Token::Command(cmd) => {
993 ts.next();
994 match cmd.as_str() {
995 "{" | "lbrace" => Delimiter::Brace,
996 "}" | "rbrace" => Delimiter::Brace,
997 "langle" => Delimiter::Angle,
998 "rangle" => Delimiter::Angle,
999 "|" | "lVert" | "rVert" | "Vert" => Delimiter::DoublePipe,
1000 "lvert" | "rvert" | "vert" => Delimiter::Pipe,
1001 "lceil" | "rceil" => Delimiter::Ceil,
1002 "lfloor" | "rfloor" => Delimiter::Floor,
1003 "." => Delimiter::None, _ => Delimiter::None,
1005 }
1006 }
1007 Token::LBrace => {
1008 ts.next();
1009 Delimiter::Brace
1010 }
1011 Token::RBrace => {
1012 ts.next();
1013 Delimiter::Brace
1014 }
1015 _ => {
1016 Delimiter::None
1018 }
1019 }
1020}
1021
1022fn delimiter_to_raw(d: Delimiter) -> &'static str {
1023 match d {
1024 Delimiter::Paren => "(",
1025 Delimiter::Bracket => "[",
1026 Delimiter::Brace => "\\{",
1027 Delimiter::Angle => "\\langle",
1028 Delimiter::Pipe => "|",
1029 Delimiter::DoublePipe => "\\|",
1030 Delimiter::Floor => "\\lfloor",
1031 Delimiter::Ceil => "\\lceil",
1032 Delimiter::None => ".",
1033 }
1034}
1035
1036pub(crate) fn parse_environment(ts: &mut TokenStream, env: &str) -> MathExpr {
1041 match env {
1042 "matrix" => parse_matrix_env(ts, env, MatrixDelimiters::Plain),
1044 "pmatrix" => parse_matrix_env(ts, env, MatrixDelimiters::Paren),
1045 "bmatrix" => parse_matrix_env(ts, env, MatrixDelimiters::Bracket),
1046 "Bmatrix" => parse_matrix_env(ts, env, MatrixDelimiters::Brace),
1047 "vmatrix" => parse_matrix_env(ts, env, MatrixDelimiters::Pipe),
1048 "Vmatrix" => parse_matrix_env(ts, env, MatrixDelimiters::DoublePipe),
1049 "smallmatrix" => parse_matrix_env(ts, env, MatrixDelimiters::Plain),
1050
1051 "cases" | "cases*" => parse_cases_env(ts, env),
1053
1054 "align" | "align*" | "aligned" => parse_align_env(ts, env),
1056 "gather" | "gather*" | "gathered" => parse_gather_env(ts, env),
1057 "alignat" | "alignat*" => parse_align_env(ts, env),
1058
1059 "array" => parse_array_env(ts),
1061
1062 "CD" => {
1064 let raw = collect_until_end(ts, "CD");
1065 consume_end(ts, "CD");
1066 MathExpr::Error {
1067 raw: format!("\\begin{{CD}}{raw}\\end{{CD}}"),
1068 message: "commutative diagrams (\\begin{CD}) are not supported".to_string(),
1069 }
1070 }
1071
1072 _ => {
1074 let raw = collect_until_end(ts, env);
1075 consume_end(ts, env);
1076 MathExpr::Error {
1077 raw: format!("\\begin{{{env}}}{raw}\\end{{{env}}}"),
1078 message: format!("unknown environment: {env}"),
1079 }
1080 }
1081 }
1082}
1083
1084fn parse_matrix_env(ts: &mut TokenStream, env: &str, delimiters: MatrixDelimiters) -> MathExpr {
1087 let rows = parse_matrix_body(ts, env);
1088 MathExpr::Matrix { rows, delimiters }
1089}
1090
1091fn parse_matrix_body(ts: &mut TokenStream, env: &str) -> Vec<Vec<MathExpr>> {
1094 let mut all_rows: Vec<Vec<MathExpr>> = Vec::new();
1095 let mut current_cells: Vec<MathExpr> = Vec::new();
1096 let mut current_cell: Vec<MathExpr> = Vec::new();
1097
1098 loop {
1099 ts.skip_whitespace();
1100 match ts.peek().clone() {
1101 Token::Eof => break,
1102 Token::End(e) if e == env => {
1103 ts.next(); break;
1105 }
1106 Token::End(_) => break,
1107 Token::Ampersand => {
1108 ts.next();
1109 current_cells.push(flatten_row(std::mem::take(&mut current_cell)));
1110 }
1111 Token::DoubleBackslash => {
1112 ts.next();
1113 current_cells.push(flatten_row(std::mem::take(&mut current_cell)));
1115 all_rows.push(std::mem::take(&mut current_cells));
1116 }
1117 _ => {
1118 let atom = parse_atom(ts);
1119 let scripted = parse_scripts(ts, atom);
1120 current_cell.push(scripted);
1121 }
1122 }
1123 }
1124
1125 current_cells.push(flatten_row(current_cell));
1127 if !current_cells.is_empty() {
1128 let non_empty = current_cells
1130 .iter()
1131 .any(|e| !matches!(e, MathExpr::Row { children: r } if r.is_empty()));
1132 if non_empty {
1133 all_rows.push(current_cells);
1134 }
1135 }
1136
1137 all_rows
1138}
1139
1140fn parse_cases_env(ts: &mut TokenStream, env: &str) -> MathExpr {
1143 let mut rows: Vec<CaseRow> = Vec::new();
1144 let mut current_expr: Vec<MathExpr> = Vec::new();
1145 let mut current_cond: Option<Vec<MathExpr>> = None;
1146 let mut in_condition = false;
1147
1148 loop {
1149 ts.skip_whitespace();
1150 match ts.peek().clone() {
1151 Token::Eof => break,
1152 Token::End(e) if e == env => {
1153 ts.next();
1154 break;
1155 }
1156 Token::End(_) => break,
1157 Token::Ampersand => {
1158 ts.next();
1159 if !in_condition {
1161 in_condition = true;
1162 current_cond = Some(Vec::new());
1163 }
1164 }
1165 Token::DoubleBackslash => {
1166 ts.next();
1167 let cond = current_cond.take().map(flatten_row);
1168 rows.push(CaseRow {
1169 expr: flatten_row(std::mem::take(&mut current_expr)),
1170 condition: cond,
1171 });
1172 in_condition = false;
1173 }
1174 _ => {
1175 let atom = parse_atom(ts);
1176 let scripted = parse_scripts(ts, atom);
1177 if in_condition {
1178 current_cond.get_or_insert_with(Vec::new).push(scripted);
1179 } else {
1180 current_expr.push(scripted);
1181 }
1182 }
1183 }
1184 }
1185
1186 let cond = current_cond.map(flatten_row);
1188 if !current_expr.is_empty() || cond.is_some() {
1189 rows.push(CaseRow {
1190 expr: flatten_row(current_expr),
1191 condition: cond,
1192 });
1193 }
1194
1195 MathExpr::Cases { rows }
1196}
1197
1198fn parse_align_env(ts: &mut TokenStream, env: &str) -> MathExpr {
1201 let rows = parse_align_rows(ts, env);
1202 let numbered = !env.ends_with('*');
1203 MathExpr::Align { rows, numbered }
1204}
1205
1206fn parse_gather_env(ts: &mut TokenStream, env: &str) -> MathExpr {
1207 let align_rows = parse_align_rows(ts, env);
1208 let numbered = !env.ends_with('*');
1209 let rows: Vec<MathExpr> = align_rows
1211 .into_iter()
1212 .map(|row| flatten_row(row.cells))
1213 .collect();
1214 MathExpr::Gather { rows, numbered }
1215}
1216
1217fn parse_align_rows(ts: &mut TokenStream, env: &str) -> Vec<AlignRow> {
1218 let mut result: Vec<AlignRow> = Vec::new();
1219 let mut current_cells: Vec<MathExpr> = Vec::new();
1220 let mut current_cell: Vec<MathExpr> = Vec::new();
1221
1222 loop {
1223 ts.skip_whitespace();
1224 match ts.peek().clone() {
1225 Token::Eof => break,
1226 Token::End(e) if e == env => {
1227 ts.next();
1228 break;
1229 }
1230 Token::End(_) => break,
1231 Token::Ampersand => {
1232 ts.next();
1233 current_cells.push(flatten_row(std::mem::take(&mut current_cell)));
1234 }
1235 Token::DoubleBackslash => {
1236 ts.next();
1237 current_cells.push(flatten_row(std::mem::take(&mut current_cell)));
1238 result.push(AlignRow {
1239 cells: std::mem::take(&mut current_cells),
1240 label: None,
1241 });
1242 }
1243 Token::Command(cmd) if cmd == "label" => {
1244 ts.next();
1246 let label_body = parse_group(ts);
1247 let label = extract_text_content(&label_body);
1248 current_cells.push(flatten_row(std::mem::take(&mut current_cell)));
1250 result.push(AlignRow {
1251 cells: std::mem::take(&mut current_cells),
1252 label: Some(label),
1253 });
1254 }
1255 _ => {
1256 let atom = parse_atom(ts);
1257 let scripted = parse_scripts(ts, atom);
1258 current_cell.push(scripted);
1259 }
1260 }
1261 }
1262
1263 current_cells.push(flatten_row(current_cell));
1265 if current_cells
1266 .iter()
1267 .any(|e| !matches!(e, MathExpr::Row { children: r } if r.is_empty()))
1268 {
1269 result.push(AlignRow {
1270 cells: current_cells,
1271 label: None,
1272 });
1273 }
1274
1275 result
1276}
1277
1278fn parse_array_env(ts: &mut TokenStream) -> MathExpr {
1281 ts.skip_whitespace();
1283 let columns = if matches!(ts.peek(), Token::LBrace) {
1284 let spec_group = parse_group(ts);
1285 parse_column_spec(&extract_text_content(&spec_group))
1286 } else {
1287 Vec::new()
1288 };
1289
1290 let rows = parse_matrix_body(ts, "array");
1291 MathExpr::Array { columns, rows }
1292}
1293
1294fn parse_column_spec(spec: &str) -> Vec<ColumnAlign> {
1295 let mut result = Vec::new();
1296 for c in spec.chars() {
1297 match c {
1298 'l' => result.push(ColumnAlign::Left),
1299 'c' => result.push(ColumnAlign::Center),
1300 'r' => result.push(ColumnAlign::Right),
1301 _ => {} }
1303 }
1304 result
1305}
1306
1307fn parse_style_body(ts: &mut TokenStream) -> MathExpr {
1313 ts.skip_whitespace();
1314 if matches!(ts.peek(), Token::LBrace) {
1316 parse_group(ts)
1317 } else {
1318 let items = parse_row(ts);
1320 flatten_row(items)
1321 }
1322}
1323
1324fn collect_until_end(ts: &mut TokenStream, env: &str) -> String {
1328 let mut raw = String::new();
1329 loop {
1330 match ts.peek().clone() {
1331 Token::Eof => break,
1332 Token::End(e) if e == env => break,
1333 tok => {
1334 raw.push_str(&token_to_raw(&tok));
1335 ts.next();
1336 }
1337 }
1338 }
1339 raw
1340}
1341
1342fn consume_end(ts: &mut TokenStream, env: &str) {
1343 if matches!(ts.peek(), Token::End(e) if e == env) {
1344 ts.next();
1345 }
1346}
1347
1348fn parse_until_rbracket(ts: &mut TokenStream) -> Vec<MathExpr> {
1350 let mut items = Vec::new();
1351 loop {
1352 ts.skip_whitespace();
1353 match ts.peek() {
1354 Token::RBracket | Token::Eof => break,
1355 _ => {
1356 let atom = parse_atom(ts);
1357 let scripted = parse_scripts(ts, atom);
1358 items.push(scripted);
1359 }
1360 }
1361 }
1362 items
1363}
1364
1365fn collect_until_rbracket_str(ts: &mut TokenStream) -> String {
1367 let mut s = String::new();
1368 loop {
1369 match ts.peek().clone() {
1370 Token::RBracket | Token::Eof => break,
1371 tok => {
1372 s.push_str(&token_to_raw(&tok));
1373 ts.next();
1374 }
1375 }
1376 }
1377 s
1378}
1379
1380fn flatten_row(items: Vec<MathExpr>) -> MathExpr {
1382 match items.len() {
1383 0 => MathExpr::Row {
1384 children: Vec::new(),
1385 },
1386 1 => items.into_iter().next().unwrap(),
1387 _ => MathExpr::Row { children: items },
1388 }
1389}
1390
1391fn token_to_raw(tok: &Token) -> String {
1393 match tok {
1394 Token::LBrace => "{".to_string(),
1395 Token::RBrace => "}".to_string(),
1396 Token::Caret => "^".to_string(),
1397 Token::Underscore => "_".to_string(),
1398 Token::Ampersand => "&".to_string(),
1399 Token::Tilde => "~".to_string(),
1400 Token::LParen => "(".to_string(),
1401 Token::RParen => ")".to_string(),
1402 Token::LBracket => "[".to_string(),
1403 Token::RBracket => "]".to_string(),
1404 Token::Pipe => "|".to_string(),
1405 Token::Plus => "+".to_string(),
1406 Token::Minus => "-".to_string(),
1407 Token::Equals => "=".to_string(),
1408 Token::LessThan => "<".to_string(),
1409 Token::GreaterThan => ">".to_string(),
1410 Token::Comma => ",".to_string(),
1411 Token::Semicolon => ";".to_string(),
1412 Token::Colon => ":".to_string(),
1413 Token::Bang => "!".to_string(),
1414 Token::Prime => "'".to_string(),
1415 Token::Dot => ".".to_string(),
1416 Token::Command(c) => format!("\\{c}"),
1417 Token::DoubleBackslash => "\\\\".to_string(),
1418 Token::ThinSpace => "\\,".to_string(),
1419 Token::MedSpace => "\\;".to_string(),
1420 Token::NegThinSpace => "\\!".to_string(),
1421 Token::Letter(c) => c.to_string(),
1422 Token::Digit(c) => c.to_string(),
1423 Token::Begin(e) => format!("\\begin{{{e}}}"),
1424 Token::End(e) => format!("\\end{{{e}}}"),
1425 Token::Whitespace => " ".to_string(),
1426 Token::Eof => String::new(),
1427 }
1428}
1429
1430fn extract_text_content(expr: &MathExpr) -> String {
1432 match expr {
1433 MathExpr::Ident { value: s }
1434 | MathExpr::Number { value: s }
1435 | MathExpr::Text { value: s } => s.clone(),
1436 MathExpr::Row { children: items } => {
1437 items.iter().map(extract_text_content).collect::<String>()
1438 }
1439 MathExpr::Operator(op) => op.symbol.clone(),
1440 MathExpr::Space(_) => " ".to_string(),
1441 _ => String::new(),
1442 }
1443}
1444
1445fn exprs_to_raw(items: &[MathExpr]) -> String {
1447 items.iter().map(|_| "...").collect::<Vec<_>>().join("")
1448}
1449
1450fn accent_kind(cmd: &str) -> Option<AccentKind> {
1452 match cmd {
1453 "hat" => Some(AccentKind::Hat),
1454 "widehat" => Some(AccentKind::WideHat),
1455 "tilde" => Some(AccentKind::Tilde),
1456 "widetilde" => Some(AccentKind::WideTilde),
1457 "vec" => Some(AccentKind::Vec),
1458 "dot" => Some(AccentKind::Dot),
1459 "ddot" => Some(AccentKind::Ddot),
1460 "bar" => Some(AccentKind::Bar),
1461 "acute" => Some(AccentKind::Acute),
1462 "grave" => Some(AccentKind::Grave),
1463 "breve" => Some(AccentKind::Breve),
1464 "check" => Some(AccentKind::Check),
1465 _ => None,
1466 }
1467}
1468
1469#[cfg(test)]
1472mod tests {
1473 use super::*;
1474 use crate::tokenizer::{TokenStream, tokenize};
1475
1476 fn parse(input: &str) -> MathExpr {
1477 let tokens = tokenize(input);
1478 let mut ts = TokenStream::new(tokens);
1479 parse_expr(&mut ts)
1480 }
1481
1482 #[test]
1483 fn parse_single_letter() {
1484 let expr = parse("x");
1485 assert_eq!(
1486 expr,
1487 MathExpr::Ident {
1488 value: "x".to_string()
1489 }
1490 );
1491 }
1492
1493 #[test]
1494 fn parse_number() {
1495 let expr = parse("42");
1496 assert_eq!(
1497 expr,
1498 MathExpr::Number {
1499 value: "42".to_string()
1500 }
1501 );
1502 }
1503
1504 #[test]
1505 fn parse_decimal_number() {
1506 let expr = parse("3.14");
1507 assert_eq!(
1508 expr,
1509 MathExpr::Number {
1510 value: "3.14".to_string()
1511 }
1512 );
1513 }
1514
1515 #[test]
1516 fn parse_superscript_braced() {
1517 let expr = parse("x^{2}");
1518 assert_eq!(
1519 expr,
1520 MathExpr::Superscript {
1521 base: Box::new(MathExpr::Ident {
1522 value: "x".to_string()
1523 }),
1524 script: Box::new(MathExpr::Number {
1525 value: "2".to_string()
1526 }),
1527 }
1528 );
1529 }
1530
1531 #[test]
1532 fn parse_superscript_unbraced() {
1533 let expr = parse("x^2");
1534 assert_eq!(
1535 expr,
1536 MathExpr::Superscript {
1537 base: Box::new(MathExpr::Ident {
1538 value: "x".to_string()
1539 }),
1540 script: Box::new(MathExpr::Number {
1541 value: "2".to_string()
1542 }),
1543 }
1544 );
1545 }
1546
1547 #[test]
1548 fn parse_subscript_unbraced() {
1549 let expr = parse("x_i");
1550 assert_eq!(
1551 expr,
1552 MathExpr::Subscript {
1553 base: Box::new(MathExpr::Ident {
1554 value: "x".to_string()
1555 }),
1556 script: Box::new(MathExpr::Ident {
1557 value: "i".to_string()
1558 }),
1559 }
1560 );
1561 }
1562
1563 #[test]
1564 fn parse_sub_superscript() {
1565 let expr = parse("x_i^2");
1566 assert_eq!(
1567 expr,
1568 MathExpr::Subsuperscript {
1569 base: Box::new(MathExpr::Ident {
1570 value: "x".to_string()
1571 }),
1572 sub: Box::new(MathExpr::Ident {
1573 value: "i".to_string()
1574 }),
1575 sup: Box::new(MathExpr::Number {
1576 value: "2".to_string()
1577 }),
1578 }
1579 );
1580 }
1581
1582 #[test]
1583 fn parse_frac() {
1584 let expr = parse(r"\frac{a}{b}");
1585 assert_eq!(
1586 expr,
1587 MathExpr::Frac {
1588 numerator: Box::new(MathExpr::Ident {
1589 value: "a".to_string()
1590 }),
1591 denominator: Box::new(MathExpr::Ident {
1592 value: "b".to_string()
1593 }),
1594 style: FracStyle::Auto,
1595 }
1596 );
1597 }
1598
1599 #[test]
1600 fn parse_dfrac() {
1601 let expr = parse(r"\dfrac{a}{b}");
1602 assert!(matches!(
1603 expr,
1604 MathExpr::Frac {
1605 style: FracStyle::Display,
1606 ..
1607 }
1608 ));
1609 }
1610
1611 #[test]
1612 fn parse_sqrt() {
1613 let expr = parse(r"\sqrt{x}");
1614 assert_eq!(
1615 expr,
1616 MathExpr::Sqrt {
1617 index: None,
1618 body: Box::new(MathExpr::Ident {
1619 value: "x".to_string()
1620 }),
1621 }
1622 );
1623 }
1624
1625 #[test]
1626 fn parse_sqrt_with_index() {
1627 let expr = parse(r"\sqrt[3]{x}");
1628 assert!(matches!(expr, MathExpr::Sqrt { index: Some(_), .. }));
1629 }
1630
1631 #[test]
1632 fn parse_greek_letter() {
1633 let expr = parse(r"\alpha");
1634 assert_eq!(
1635 expr,
1636 MathExpr::Ident {
1637 value: "α".to_string()
1638 }
1639 );
1640 }
1641
1642 #[test]
1643 fn parse_infty() {
1644 let expr = parse(r"\infty");
1645 assert_eq!(
1646 expr,
1647 MathExpr::Ident {
1648 value: "∞".to_string()
1649 }
1650 );
1651 }
1652
1653 #[test]
1654 fn parse_sum_with_limits() {
1655 let expr = parse(r"\sum_{i=0}^{n}");
1656 assert!(matches!(
1657 expr,
1658 MathExpr::BigOperator {
1659 lower: Some(_),
1660 upper: Some(_),
1661 ..
1662 }
1663 ));
1664 }
1665
1666 #[test]
1667 fn parse_left_right_parens() {
1668 let expr = parse(r"\left( x \right)");
1669 assert!(matches!(
1670 expr,
1671 MathExpr::Fenced {
1672 open: Delimiter::Paren,
1673 close: Delimiter::Paren,
1674 ..
1675 }
1676 ));
1677 }
1678
1679 #[test]
1680 fn parse_text_command() {
1681 let expr = parse(r"\text{hello}");
1682 assert_eq!(
1683 expr,
1684 MathExpr::Text {
1685 value: "hello".to_string()
1686 }
1687 );
1688 }
1689
1690 #[test]
1691 fn parse_unknown_command_error() {
1692 let expr = parse(r"\unknowncmd");
1693 assert!(matches!(expr, MathExpr::Error { .. }));
1694 }
1695
1696 #[test]
1697 fn parse_empty_input() {
1698 let expr = parse("");
1699 assert_eq!(
1700 expr,
1701 MathExpr::Row {
1702 children: Vec::new()
1703 }
1704 );
1705 }
1706
1707 #[test]
1708 fn parse_spacing_commands() {
1709 let expr = parse(r"\quad");
1710 assert_eq!(expr, MathExpr::Space(MathSpace::Quad));
1711 }
1712
1713 #[test]
1714 fn parse_thin_space() {
1715 let expr = parse(r"\,");
1716 assert_eq!(expr, MathExpr::Space(MathSpace::Thin));
1717 }
1718
1719 #[test]
1720 fn parse_row_multiple_atoms() {
1721 let expr = parse("a+b");
1722 assert!(matches!(expr, MathExpr::Row { .. }));
1723 }
1724
1725 #[test]
1726 fn parse_nested_frac() {
1727 let expr = parse(r"\frac{\frac{a}{b}}{c}");
1728 assert!(matches!(expr, MathExpr::Frac { .. }));
1729 }
1730
1731 #[test]
1732 fn parse_mathbb() {
1733 let expr = parse(r"\mathbb{R}");
1734 assert!(matches!(
1735 expr,
1736 MathExpr::FontOverride {
1737 font: MathFont::Blackboard,
1738 ..
1739 }
1740 ));
1741 }
1742
1743 #[test]
1744 fn parse_overline() {
1745 let expr = parse(r"\overline{x}");
1746 assert!(matches!(expr, MathExpr::Overline { .. }));
1747 }
1748
1749 #[test]
1750 fn parse_hat_accent() {
1751 let expr = parse(r"\hat{x}");
1752 assert!(matches!(
1753 expr,
1754 MathExpr::Accent {
1755 kind: AccentKind::Hat,
1756 ..
1757 }
1758 ));
1759 }
1760
1761 #[test]
1762 fn parse_leq_operator() {
1763 let expr = parse(r"\leq");
1764 assert!(matches!(
1765 expr,
1766 MathExpr::Operator(MathOperator {
1767 kind: OperatorKind::Relation,
1768 ..
1769 })
1770 ));
1771 }
1772
1773 #[test]
1774 fn parse_frac_error_recovery() {
1775 let expr = parse(r"\frac{a}{\unknowncmd}");
1777 assert!(matches!(expr, MathExpr::Frac { .. }));
1778 if let MathExpr::Frac { denominator, .. } = expr {
1779 assert!(matches!(*denominator, MathExpr::Error { .. }));
1780 }
1781 }
1782
1783 #[test]
1784 fn parse_unmatched_brace_error() {
1785 let expr = parse(r"\frac{a}{b");
1787 let _ = expr;
1790 }
1791}