1use crate::ast::*;
11
12const OP_SPACE: f32 = 4.0;
14
15pub const BIG_OP_COMMANDS: &[&str] = &["sum", "prod", "int", "iint", "iiint", "oint", "coprod"];
16
17fn is_spaced_operator(ch: char) -> bool {
19 matches!(ch,
20 '\u{2264}' | '\u{2265}' | '\u{2260}' | '\u{2248}' | '\u{2261}' | '\u{2208}' | '\u{2282}' | '\u{2283}' | '\u{2286}' | '\u{2287}' | '\u{2192}' | '\u{2190}' | '\u{2194}' | '\u{21D2}' | '\u{21D0}' | '\u{21D4}' | '\u{227A}' | '\u{227B}' | '\u{223C}' | '\u{2245}' | '\u{226A}' | '\u{226B}' | '\u{221D}' | '\u{00B1}' | '\u{2213}' | '\u{00D7}' | '\u{00F7}' )
27}
28
29fn maybe_wrap_op_spacing(symbol: String) -> EqNode {
31 if let Some(ch) = symbol.chars().next() {
32 if is_spaced_operator(ch) {
33 return EqNode::Seq(vec![
34 EqNode::Space(OP_SPACE),
35 EqNode::Text(symbol),
36 EqNode::Space(OP_SPACE),
37 ]);
38 }
39 }
40 EqNode::Text(symbol)
41}
42
43pub fn is_big_op(name: &str) -> bool {
44 BIG_OP_COMMANDS.contains(&name)
45}
46
47pub fn big_op_symbol(name: &str) -> &'static str {
48 match name {
49 "sum" => "\u{2211}",
50 "prod" => "\u{220F}",
51 "coprod" => "\u{2210}",
52 "int" => "\u{222B}",
53 "iint" => "\u{222C}",
54 "iiint" => "\u{222D}",
55 "oint" => "\u{222E}",
56 _ => "\u{2211}",
57 }
58}
59
60pub fn parse_equation(input: &str) -> EqNode {
82 let mut parser = EqParser::new(input);
83 parser.parse_sequence()
84}
85
86pub struct EqParser {
87 chars: Vec<char>,
88 pos: usize,
89}
90
91impl EqParser {
92 pub fn new(input: &str) -> Self {
93 Self {
94 chars: input.chars().collect(),
95 pos: 0,
96 }
97 }
98
99 fn peek(&self) -> Option<char> {
100 self.chars.get(self.pos).copied()
101 }
102
103 fn advance(&mut self) -> Option<char> {
104 let ch = self.chars.get(self.pos).copied();
105 if ch.is_some() {
106 self.pos += 1;
107 }
108 ch
109 }
110
111 fn parse_sequence(&mut self) -> EqNode {
112 self.parse_sequence_until(false)
113 }
114
115 fn parse_sequence_until(&mut self, stop_on_paren: bool) -> EqNode {
116 self.parse_sequence_until_ex(stop_on_paren, false)
117 }
118
119 fn parse_sequence_until_ex(&mut self, stop_on_paren: bool, stop_on_right: bool) -> EqNode {
120 let mut nodes = Vec::new();
121 while self.pos < self.chars.len() {
122 let ch = match self.peek() {
123 Some(c) => c,
124 None => break,
125 };
126 if stop_on_right && ch == '\\' {
128 if self.lookahead_command("right") {
129 break;
130 }
131 }
132 if ch == '\\' && self.lookahead_command("end") {
134 break;
135 }
136 match ch {
137 '}' => break,
138 ')' if stop_on_paren => break,
139 '\\' if self.is_row_separator() => break,
141 '&' => break,
143 '/' => {
144 self.advance();
145 let numer = if let Some(n) = nodes.pop() { n } else { EqNode::Text(String::new()) };
147 let denom = self.parse_group_atom();
148 let denom = self.parse_postfix(denom);
149 nodes.push(EqNode::Frac(Box::new(numer), Box::new(denom)));
150 }
151 _ => {
152 let atom = self.parse_atom();
153 let node = self.parse_postfix(atom);
154 if let EqNode::Text(ref s) = node {
156 if s == "-" && !nodes.is_empty() {
157 let is_binary = match nodes.last() {
159 Some(EqNode::Space(_)) => false,
160 Some(_) => true,
161 None => false,
162 };
163 if is_binary {
164 nodes.push(EqNode::Space(OP_SPACE));
165 nodes.push(node);
166 nodes.push(EqNode::Space(OP_SPACE));
167 continue;
168 }
169 }
170 }
171 nodes.push(node);
172 }
173 }
174 }
175 match nodes.len() {
176 0 => EqNode::Text(String::new()),
177 1 => nodes.remove(0),
178 _ => EqNode::Seq(nodes),
179 }
180 }
181
182 fn is_row_separator(&self) -> bool {
184 self.pos + 1 < self.chars.len()
185 && self.chars[self.pos] == '\\'
186 && self.chars[self.pos + 1] == '\\'
187 }
188
189 fn lookahead_command(&self, name: &str) -> bool {
192 if self.pos >= self.chars.len() || self.chars[self.pos] != '\\' {
193 return false;
194 }
195 let name_chars: Vec<char> = name.chars().collect();
196 let start = self.pos + 1; if start + name_chars.len() > self.chars.len() {
198 return false;
199 }
200 for (i, &nc) in name_chars.iter().enumerate() {
201 if self.chars[start + i] != nc {
202 return false;
203 }
204 }
205 let after = start + name_chars.len();
207 if after < self.chars.len() && self.chars[after].is_ascii_alphabetic() {
208 return false;
209 }
210 true
211 }
212
213 fn parse_group_atom(&mut self) -> EqNode {
217 if self.peek() == Some('(') {
218 self.advance();
219 let mut nodes = Vec::new();
222 let mut depth = 0i32;
223 while self.pos < self.chars.len() {
224 match self.peek() {
225 Some(')') if depth == 0 => break,
226 Some(')') => {
227 depth -= 1;
228 self.advance();
229 nodes.push(EqNode::Text(")".into()));
230 }
231 Some('(') => {
232 depth += 1;
233 self.advance();
234 nodes.push(EqNode::Text("(".into()));
235 }
236 Some('}') => break,
237 None => break,
238 _ => {
239 let atom = self.parse_atom();
240 let node = self.parse_postfix(atom);
241 nodes.push(node);
242 }
243 }
244 }
245 if self.peek() == Some(')') {
246 self.advance();
247 }
248 return match nodes.len() {
249 0 => EqNode::Text(String::new()),
250 1 => nodes.remove(0),
251 _ => EqNode::Seq(nodes),
252 };
253 }
254 self.parse_atom()
255 }
256
257 fn parse_atom(&mut self) -> EqNode {
258 match self.peek() {
259 None => EqNode::Text(String::new()),
260 Some('{') => {
261 self.advance();
262 let inner = self.parse_sequence();
263 if self.peek() == Some('}') {
264 self.advance();
265 }
266 inner
267 }
268 Some('\\') => {
269 self.advance();
270 self.parse_command()
271 }
272 Some(ch) if ch.is_ascii_alphabetic() => {
273 if self.try_keyword("sqrt") {
275 let arg = self.parse_group_atom();
276 return EqNode::Sqrt(Box::new(arg));
277 }
278 for &kw in BIG_OP_COMMANDS {
280 if self.try_keyword(kw) {
281 let symbol = big_op_symbol(kw).to_string();
282 let mut upper: Option<EqNode> = None;
283 let mut lower: Option<EqNode> = None;
284 loop {
285 match self.peek() {
286 Some('^') if upper.is_none() => {
287 self.advance();
288 upper = Some(self.parse_atom());
289 }
290 Some('_') if lower.is_none() => {
291 self.advance();
292 lower = Some(self.parse_atom());
293 }
294 _ => break,
295 }
296 }
297 let op = EqNode::BigOp {
298 symbol,
299 lower: lower.map(Box::new),
300 upper: upper.map(Box::new),
301 };
302 if self.peek() == Some('(') {
304 let body = self.parse_group_atom();
305 return EqNode::Seq(vec![op, body]);
306 }
307 return op;
308 }
309 }
310 static BARE_LIMIT_OPS: &[&str] = &[
312 "liminf", "limsup", "lim",
313 "arcsin", "arccos", "arctan",
314 "sinh", "cosh", "tanh",
315 "sin", "cos", "tan", "cot", "sec", "csc",
316 "log", "ln", "exp",
317 "min", "max", "det", "dim", "ker", "gcd", "arg",
318 ];
319 for &kw in BARE_LIMIT_OPS {
320 if self.try_keyword(kw) {
321 let mut lower: Option<EqNode> = None;
322 if self.peek() == Some('_') {
323 self.advance();
324 lower = Some(self.parse_group_atom());
325 }
326 return EqNode::Limit {
327 name: kw.to_string(),
328 lower: lower.map(Box::new),
329 };
330 }
331 }
332 if let Some(node) = self.try_bareword_symbol() {
334 return node;
335 }
336 self.advance();
338 EqNode::Text(ch.to_string())
339 }
340 Some(ch) if ch.is_ascii_digit() || ch == '.' => {
341 let mut s = String::new();
342 while let Some(c) = self.peek() {
343 if c.is_ascii_digit() || c == '.' {
344 s.push(c);
345 self.advance();
346 } else {
347 break;
348 }
349 }
350 EqNode::Text(s)
351 }
352 Some(' ') => {
353 self.advance();
354 EqNode::Text(" ".into())
355 }
356 Some(ch @ ('+' | '=' | '<' | '>' | '*')) => {
357 self.advance();
358 let op = if ch == '*' { "\u{00B7}".to_string() } else { ch.to_string() };
359 EqNode::Seq(vec![
360 EqNode::Space(OP_SPACE),
361 EqNode::Text(op),
362 EqNode::Space(OP_SPACE),
363 ])
364 }
365 Some('-') => {
366 self.advance();
368 EqNode::Text("-".to_string())
369 }
370 Some(ch) => {
371 self.advance();
372 EqNode::Text(ch.to_string())
373 }
374 }
375 }
376
377 fn parse_postfix(&mut self, base: EqNode) -> EqNode {
378 let mut sup: Option<EqNode> = None;
380 let mut sub: Option<EqNode> = None;
381
382 loop {
383 match self.peek() {
384 Some('^') if sup.is_none() => {
385 self.advance();
386 sup = Some(self.parse_atom());
387 }
388 Some('_') if sub.is_none() => {
389 self.advance();
390 sub = Some(self.parse_atom());
391 }
392 _ => break,
393 }
394 }
395
396 match (sup, sub) {
397 (Some(s), Some(b)) => EqNode::SupSub(Box::new(base), Box::new(s), Box::new(b)),
398 (Some(s), None) => EqNode::Sup(Box::new(base), Box::new(s)),
399 (None, Some(b)) => EqNode::Sub(Box::new(base), Box::new(b)),
400 (None, None) => base,
401 }
402 }
403
404 fn try_keyword(&mut self, kw: &str) -> bool {
405 let kw_chars: Vec<char> = kw.chars().collect();
406 if self.pos + kw_chars.len() > self.chars.len() {
407 return false;
408 }
409 for (i, &kc) in kw_chars.iter().enumerate() {
410 if self.chars[self.pos + i] != kc {
411 return false;
412 }
413 }
414 let after = self.pos + kw_chars.len();
415 if after < self.chars.len() && self.chars[after].is_ascii_alphabetic() {
416 return false;
417 }
418 self.pos += kw_chars.len();
419 true
420 }
421
422 fn try_bareword_symbol(&mut self) -> Option<EqNode> {
425 static BAREWORDS: &[&str] = &[
427 "varepsilon", "rightarrow", "leftarrow", "Rightarrow", "Leftarrow",
428 "epsilon", "upsilon", "omicron", "lambda", "Lambda", "implies",
429 "partial", "emptyset",
430 "alpha", "beta", "gamma", "delta", "theta", "kappa", "sigma",
431 "omega", "Gamma", "Delta", "Theta", "Sigma", "Omega",
432 "infty", "nabla", "forall", "exists", "approx", "equiv",
433 "times", "cdot",
434 "zeta", "iota", "pi", "rho", "tau", "phi", "chi", "psi",
435 "eta", "nu", "xi", "mu", "Pi", "Phi", "Psi",
436 "pm",
437 ];
438 for &word in BAREWORDS {
439 if self.try_keyword(word) {
440 let symbol = latex_to_unicode(word)
441 .unwrap_or_else(|| word.to_string());
442 return Some(maybe_wrap_op_spacing(symbol));
443 }
444 }
445 None
446 }
447
448 fn parse_command(&mut self) -> EqNode {
449 if let Some(ch) = self.peek() {
452 if !ch.is_ascii_alphabetic() {
453 self.advance();
454 return match ch {
455 ',' => EqNode::Space(3.0), ':' | '>' => EqNode::Space(4.0), ';' => EqNode::Space(5.0), '!' => EqNode::Space(-3.0), '\\' => EqNode::Text(String::new()), '{' => EqNode::Text("{".to_string()),
461 '}' => EqNode::Text("}".to_string()),
462 ' ' => EqNode::Space(4.0), _ => EqNode::Text(ch.to_string()),
464 };
465 }
466 }
467
468 let mut name = String::new();
469 while let Some(ch) = self.peek() {
470 if ch.is_ascii_alphabetic() {
471 name.push(ch);
472 self.advance();
473 } else {
474 break;
475 }
476 }
477 if self.peek() == Some(' ') {
479 self.advance();
480 }
481
482 match name.as_str() {
484 "quad" => return EqNode::Space(18.0),
485 "qquad" => return EqNode::Space(36.0),
486 _ => {}
487 }
488
489 if name == "text" {
491 return self.parse_text_block();
492 }
493
494 if let Some(kind) = match name.as_str() {
496 "mathbf" => Some(MathFontKind::Bold),
497 "mathbb" => Some(MathFontKind::Blackboard),
498 "mathcal" => Some(MathFontKind::Calligraphic),
499 "mathrm" => Some(MathFontKind::Roman),
500 "mathfrak" => Some(MathFontKind::Fraktur),
501 "mathsf" => Some(MathFontKind::SansSerif),
502 "mathtt" => Some(MathFontKind::Monospace),
503 _ => None,
504 } {
505 let arg = self.parse_atom();
506 return EqNode::MathFont { kind, content: Box::new(arg) };
507 }
508
509 if name == "left" {
511 return self.parse_left_right();
512 }
513
514 if name == "begin" {
516 return self.parse_begin_env();
517 }
518
519 if name == "binom" {
521 let top = self.parse_group_atom();
522 let bot = self.parse_group_atom();
523 return EqNode::Binom(Box::new(top), Box::new(bot));
524 }
525
526 if name == "overbrace" {
528 let content = self.parse_atom();
529 let mut label = None;
530 if self.peek() == Some('^') {
531 self.advance();
532 label = Some(Box::new(self.parse_atom()));
533 }
534 return EqNode::Brace { content: Box::new(content), label, over: true };
535 }
536
537 if name == "underbrace" {
539 let content = self.parse_atom();
540 let mut label = None;
541 if self.peek() == Some('_') {
542 self.advance();
543 label = Some(Box::new(self.parse_atom()));
544 }
545 return EqNode::Brace { content: Box::new(content), label, over: false };
546 }
547
548 if name == "overset" || name == "stackrel" {
550 let annotation = self.parse_group_atom();
551 let base = self.parse_group_atom();
552 return EqNode::StackRel { base: Box::new(base), annotation: Box::new(annotation), over: true };
553 }
554
555 if name == "underset" {
557 let annotation = self.parse_group_atom();
558 let base = self.parse_group_atom();
559 return EqNode::StackRel { base: Box::new(base), annotation: Box::new(annotation), over: false };
560 }
561
562 if name == "frac" {
564 let numer = self.parse_group_atom();
565 let denom = self.parse_group_atom();
566 return EqNode::Frac(Box::new(numer), Box::new(denom));
567 }
568
569 if name == "sqrt" {
571 let arg = self.parse_group_atom();
572 return EqNode::Sqrt(Box::new(arg));
573 }
574
575 if let Some(kind) = match name.as_str() {
577 "hat" => Some(AccentKind::Hat),
578 "bar" | "overline" => Some(AccentKind::Bar),
579 "dot" => Some(AccentKind::Dot),
580 "ddot" => Some(AccentKind::DoubleDot),
581 "tilde" => Some(AccentKind::Tilde),
582 "vec" => Some(AccentKind::Vec),
583 _ => None,
584 } {
585 let arg = self.parse_atom();
586 return EqNode::Accent(Box::new(arg), kind);
587 }
588
589 static LIMIT_OPS: &[&str] = &[
591 "lim", "liminf", "limsup",
592 "min", "max", "sup", "inf",
593 "log", "ln", "exp",
594 "sin", "cos", "tan", "cot", "sec", "csc",
595 "arcsin", "arccos", "arctan",
596 "sinh", "cosh", "tanh",
597 "det", "dim", "ker", "deg", "gcd", "hom", "arg",
598 ];
599 if LIMIT_OPS.contains(&name.as_str()) {
600 let mut lower: Option<EqNode> = None;
601 if self.peek() == Some('_') {
602 self.advance();
603 lower = Some(self.parse_group_atom());
604 }
605 return EqNode::Limit {
606 name: name.clone(),
607 lower: lower.map(Box::new),
608 };
609 }
610
611 if is_big_op(&name) {
613 let symbol = big_op_symbol(&name).to_string();
614 let mut upper: Option<EqNode> = None;
616 let mut lower: Option<EqNode> = None;
617 loop {
618 match self.peek() {
619 Some('^') if upper.is_none() => {
620 self.advance();
621 upper = Some(self.parse_atom());
622 }
623 Some('_') if lower.is_none() => {
624 self.advance();
625 lower = Some(self.parse_atom());
626 }
627 _ => break,
628 }
629 }
630 return EqNode::BigOp {
631 symbol,
632 lower: lower.map(Box::new),
633 upper: upper.map(Box::new),
634 };
635 }
636
637 let symbol = latex_to_unicode(&name).unwrap_or_else(|| format!("\\{}", name));
639 maybe_wrap_op_spacing(symbol)
640 }
641
642 fn parse_text_block(&mut self) -> EqNode {
644 if self.peek() == Some('{') {
645 self.advance();
646 let mut text = String::new();
647 let mut depth = 1;
648 while let Some(ch) = self.advance() {
649 if ch == '{' {
650 depth += 1;
651 text.push(ch);
652 } else if ch == '}' {
653 depth -= 1;
654 if depth == 0 {
655 break;
656 }
657 text.push(ch);
658 } else {
659 text.push(ch);
660 }
661 }
662 EqNode::TextBlock(text)
663 } else {
664 EqNode::TextBlock(String::new())
665 }
666 }
667
668 fn parse_left_right(&mut self) -> EqNode {
670 let left_delim = match self.advance() {
672 Some('.') => String::new(), Some(ch) => ch.to_string(),
674 None => String::new(),
675 };
676
677 let content = self.parse_sequence_until_ex(false, true);
679
680 let right_delim = if self.lookahead_command("right") {
682 self.advance(); for _ in 0..5 { self.advance(); }
685 if self.peek() == Some(' ') { self.advance(); }
687 match self.advance() {
688 Some('.') => String::new(),
689 Some(ch) => ch.to_string(),
690 None => String::new(),
691 }
692 } else {
693 String::new()
694 };
695
696 EqNode::Delimited {
697 left: left_delim,
698 right: right_delim,
699 content: Box::new(content),
700 }
701 }
702
703 fn parse_begin_env(&mut self) -> EqNode {
705 let env_name = self.parse_brace_arg();
706
707 match env_name.as_str() {
708 "matrix" | "pmatrix" | "bmatrix" | "vmatrix" | "Bmatrix" | "Vmatrix" => {
709 let kind = match env_name.as_str() {
710 "matrix" => MatrixKind::Plain,
711 "pmatrix" => MatrixKind::Paren,
712 "bmatrix" => MatrixKind::Bracket,
713 "vmatrix" => MatrixKind::VBar,
714 "Vmatrix" => MatrixKind::DoubleVBar,
715 "Bmatrix" => MatrixKind::Brace,
716 _ => MatrixKind::Plain,
717 };
718 let rows = self.parse_matrix_body();
719 self.consume_end_env(&env_name);
720 EqNode::Matrix { kind, rows }
721 }
722 "cases" => {
723 let rows = self.parse_cases_body();
724 self.consume_end_env("cases");
725 EqNode::Cases { rows }
726 }
727 _ => {
728 EqNode::Text(format!("\\begin{{{}}}", env_name))
730 }
731 }
732 }
733
734 fn parse_brace_arg(&mut self) -> String {
736 if self.peek() == Some('{') {
737 self.advance();
738 let mut s = String::new();
739 while let Some(ch) = self.peek() {
740 if ch == '}' {
741 self.advance();
742 break;
743 }
744 s.push(ch);
745 self.advance();
746 }
747 s
748 } else {
749 String::new()
750 }
751 }
752
753 fn parse_matrix_body(&mut self) -> Vec<Vec<EqNode>> {
755 let mut rows = Vec::new();
756 loop {
757 let mut row = Vec::new();
758 loop {
759 let cell = self.parse_sequence_until_ex(false, false);
760 row.push(cell);
761 if self.peek() == Some('&') {
762 self.advance(); continue;
764 }
765 break;
766 }
767 rows.push(row);
768 if self.is_row_separator() {
770 self.advance(); self.advance(); if self.peek() == Some(' ') { self.advance(); }
774 if self.peek() == Some('\\') && self.lookahead_command("end") {
776 break;
777 }
778 continue;
779 }
780 break;
781 }
782 rows
783 }
784
785 fn parse_cases_body(&mut self) -> Vec<(EqNode, Option<EqNode>)> {
787 let mut rows = Vec::new();
788 loop {
789 let value = self.parse_sequence_until_ex(false, false);
790 let condition = if self.peek() == Some('&') {
791 self.advance();
792 Some(self.parse_sequence_until_ex(false, false))
793 } else {
794 None
795 };
796 rows.push((value, condition));
797 if self.is_row_separator() {
799 self.advance();
800 self.advance();
801 if self.peek() == Some(' ') { self.advance(); }
802 if self.peek() == Some('\\') && self.lookahead_command("end") {
803 break;
804 }
805 continue;
806 }
807 break;
808 }
809 rows
810 }
811
812 fn consume_end_env(&mut self, _expected: &str) {
814 if self.peek() == Some('\\') {
816 self.advance(); for _ in 0..3 {
819 self.advance();
820 }
821 if self.peek() == Some(' ') { self.advance(); }
823 self.parse_brace_arg();
825 }
826 }
827}
828
829pub fn latex_to_unicode(name: &str) -> Option<String> {
849 let ch = match name {
850 "alpha" => '\u{03B1}',
852 "beta" => '\u{03B2}',
853 "gamma" => '\u{03B3}',
854 "delta" => '\u{03B4}',
855 "epsilon" | "varepsilon" => '\u{03B5}',
856 "zeta" => '\u{03B6}',
857 "eta" => '\u{03B7}',
858 "theta" | "vartheta" => '\u{03B8}',
859 "iota" => '\u{03B9}',
860 "kappa" => '\u{03BA}',
861 "lambda" => '\u{03BB}',
862 "mu" => '\u{03BC}',
863 "nu" => '\u{03BD}',
864 "xi" => '\u{03BE}',
865 "omicron" => '\u{03BF}',
866 "pi" | "varpi" => '\u{03C0}',
867 "rho" | "varrho" => '\u{03C1}',
868 "sigma" | "varsigma" => '\u{03C3}',
869 "tau" => '\u{03C4}',
870 "upsilon" => '\u{03C5}',
871 "phi" | "varphi" => '\u{03C6}',
872 "chi" => '\u{03C7}',
873 "psi" => '\u{03C8}',
874 "omega" => '\u{03C9}',
875 "Alpha" => '\u{0391}',
877 "Beta" => '\u{0392}',
878 "Gamma" => '\u{0393}',
879 "Delta" => '\u{0394}',
880 "Epsilon" => '\u{0395}',
881 "Zeta" => '\u{0396}',
882 "Eta" => '\u{0397}',
883 "Theta" => '\u{0398}',
884 "Iota" => '\u{0399}',
885 "Kappa" => '\u{039A}',
886 "Lambda" => '\u{039B}',
887 "Mu" => '\u{039C}',
888 "Nu" => '\u{039D}',
889 "Xi" => '\u{039E}',
890 "Pi" => '\u{03A0}',
891 "Rho" => '\u{03A1}',
892 "Sigma" => '\u{03A3}',
893 "Tau" => '\u{03A4}',
894 "Upsilon" => '\u{03A5}',
895 "Phi" => '\u{03A6}',
896 "Chi" => '\u{03A7}',
897 "Psi" => '\u{03A8}',
898 "Omega" => '\u{03A9}',
899 "pm" | "plusminus" => '\u{00B1}',
901 "mp" | "minusplus" => '\u{2213}',
902 "times" => '\u{00D7}',
903 "div" => '\u{00F7}',
904 "cdot" => '\u{00B7}',
905 "ast" => '\u{2217}',
906 "star" => '\u{22C6}',
907 "circ" => '\u{2218}',
908 "bullet" => '\u{2022}',
909 "oplus" => '\u{2295}',
910 "otimes" => '\u{2297}',
911 "leq" | "le" => '\u{2264}',
913 "geq" | "ge" => '\u{2265}',
914 "neq" | "ne" => '\u{2260}',
915 "approx" => '\u{2248}',
916 "equiv" => '\u{2261}',
917 "sim" => '\u{223C}',
918 "simeq" => '\u{2243}',
919 "cong" => '\u{2245}',
920 "propto" => '\u{221D}',
921 "ll" => '\u{226A}',
922 "gg" => '\u{226B}',
923 "prec" => '\u{227A}',
924 "succ" => '\u{227B}',
925 "perp" => '\u{22A5}',
926 "parallel" => '\u{2225}',
927 "forall" => '\u{2200}',
929 "exists" => '\u{2203}',
930 "nexists" => '\u{2204}',
931 "neg" | "lnot" => '\u{00AC}',
932 "land" | "wedge" => '\u{2227}',
933 "lor" | "vee" => '\u{2228}',
934 "in" => '\u{2208}',
935 "notin" => '\u{2209}',
936 "ni" => '\u{220B}',
937 "subset" => '\u{2282}',
938 "supset" => '\u{2283}',
939 "subseteq" => '\u{2286}',
940 "supseteq" => '\u{2287}',
941 "cup" => '\u{222A}',
942 "cap" => '\u{2229}',
943 "emptyset" | "varnothing" => '\u{2205}',
944 "setminus" => '\u{2216}',
945 "rightarrow" | "to" => '\u{2192}',
947 "leftarrow" | "gets" => '\u{2190}',
948 "leftrightarrow" => '\u{2194}',
949 "Rightarrow" | "implies" => '\u{21D2}',
950 "Leftarrow" => '\u{21D0}',
951 "Leftrightarrow" | "iff" => '\u{21D4}',
952 "uparrow" => '\u{2191}',
953 "downarrow" => '\u{2193}',
954 "mapsto" => '\u{21A6}',
955 "hookrightarrow" => '\u{21AA}',
956 "hookleftarrow" => '\u{21A9}',
957 "infty" | "inf" => '\u{221E}',
959 "partial" => '\u{2202}',
960 "nabla" => '\u{2207}',
961 "hbar" => '\u{210F}',
962 "ell" => '\u{2113}',
963 "Re" => '\u{211C}',
964 "Im" => '\u{2111}',
965 "wp" => '\u{2118}',
966 "aleph" => '\u{2135}',
967 "angle" => '\u{2220}',
968 "triangle" => '\u{25B3}',
969 "degree" | "deg" => '\u{00B0}',
970 "prime" => '\u{2032}',
971 "dots" | "ldots" | "cdots" => '\u{22EF}',
972 "vdots" => '\u{22EE}',
973 "ddots" => '\u{22F1}',
974 "langle" => '\u{27E8}',
976 "rangle" => '\u{27E9}',
977 "lceil" => '\u{2308}',
978 "rceil" => '\u{2309}',
979 "lfloor" => '\u{230A}',
980 "rfloor" => '\u{230B}',
981 _ => return None,
982 };
983 Some(ch.to_string())
984}
985
986#[cfg(test)]
987mod tests {
988 use super::*;
989
990 fn is_text(node: &EqNode, expected: &str) -> bool {
993 matches!(node, EqNode::Text(s) if s == expected)
994 }
995
996 fn collect_text(node: &EqNode) -> String {
998 match node {
999 EqNode::Text(s) => s.clone(),
1000 EqNode::Space(_) => String::new(),
1001 EqNode::Seq(nodes) => nodes.iter().map(collect_text).collect(),
1002 EqNode::Sup(base, sup) => format!("{}^{}", collect_text(base), collect_text(sup)),
1003 EqNode::Sub(base, sub) => format!("{}_{}",collect_text(base), collect_text(sub)),
1004 EqNode::SupSub(base, sup, sub) => format!("{}^{}_{}", collect_text(base), collect_text(sup), collect_text(sub)),
1005 EqNode::Frac(n, d) => format!("({})/({})", collect_text(n), collect_text(d)),
1006 EqNode::Sqrt(inner) => format!("sqrt({})", collect_text(inner)),
1007 EqNode::BigOp { symbol, lower, upper } => {
1008 let mut s = symbol.clone();
1009 if let Some(l) = lower { s += &format!("_{}", collect_text(l)); }
1010 if let Some(u) = upper { s += &format!("^{}", collect_text(u)); }
1011 s
1012 }
1013 EqNode::Limit { name, lower } => {
1014 let mut s = name.clone();
1015 if let Some(l) = lower { s += &format!("_{}", collect_text(l)); }
1016 s
1017 }
1018 EqNode::Accent(inner, _) => collect_text(inner),
1019 EqNode::TextBlock(s) => s.clone(),
1020 EqNode::MathFont { content, .. } => collect_text(content),
1021 EqNode::Delimited { content, .. } => collect_text(content),
1022 EqNode::Matrix { rows, .. } => {
1023 rows.iter().map(|r| r.iter().map(collect_text).collect::<Vec<_>>().join("&")).collect::<Vec<_>>().join("\\\\")
1024 }
1025 EqNode::Cases { rows, .. } => {
1026 rows.iter().map(|(v, c)| {
1027 let mut s = collect_text(v);
1028 if let Some(cond) = c { s += &format!("&{}", collect_text(cond)); }
1029 s
1030 }).collect::<Vec<_>>().join("\\\\")
1031 }
1032 EqNode::Binom(a, b) => format!("binom({},{})", collect_text(a), collect_text(b)),
1033 EqNode::Brace { content, .. } => collect_text(content),
1034 EqNode::StackRel { base, annotation, .. } => format!("stack({},{})", collect_text(base), collect_text(annotation)),
1035 }
1036 }
1037
1038 #[test]
1041 fn parse_single_letter() {
1042 let node = parse_equation("x");
1043 assert!(is_text(&node, "x"));
1044 }
1045
1046 #[test]
1047 fn parse_number() {
1048 let node = parse_equation("42");
1049 assert!(is_text(&node, "42"));
1050 }
1051
1052 #[test]
1053 fn parse_decimal_number() {
1054 let node = parse_equation("3.14");
1055 assert!(is_text(&node, "3.14"));
1056 }
1057
1058 #[test]
1059 fn parse_empty_input() {
1060 let node = parse_equation("");
1061 assert!(is_text(&node, ""));
1062 }
1063
1064 #[test]
1067 fn parse_superscript() {
1068 let node = parse_equation("x^2");
1069 assert!(matches!(node, EqNode::Sup(_, _)));
1070 assert_eq!(collect_text(&node), "x^2");
1071 }
1072
1073 #[test]
1074 fn parse_subscript() {
1075 let node = parse_equation("x_1");
1076 assert!(matches!(node, EqNode::Sub(_, _)));
1077 assert_eq!(collect_text(&node), "x_1");
1078 }
1079
1080 #[test]
1081 fn parse_combined_sup_sub() {
1082 let node = parse_equation("x^2_3");
1083 assert!(matches!(node, EqNode::SupSub(_, _, _)));
1084 assert_eq!(collect_text(&node), "x^2_3");
1085 }
1086
1087 #[test]
1088 fn parse_combined_sub_sup_order() {
1089 let node = parse_equation("x_3^2");
1091 assert!(matches!(node, EqNode::SupSub(_, _, _)));
1092 }
1093
1094 #[test]
1095 fn parse_braced_superscript() {
1096 let node = parse_equation("x^{2n}");
1097 assert!(matches!(node, EqNode::Sup(_, _)));
1098 if let EqNode::Sup(_, sup) = &node {
1099 assert_eq!(collect_text(sup), "2n");
1100 }
1101 }
1102
1103 #[test]
1106 fn parse_inline_fraction() {
1107 let node = parse_equation("a/b");
1108 assert!(matches!(node, EqNode::Frac(_, _)));
1109 assert_eq!(collect_text(&node), "(a)/(b)");
1110 }
1111
1112 #[test]
1113 fn parse_frac_command() {
1114 let node = parse_equation("\\frac{a}{b}");
1115 assert!(matches!(node, EqNode::Frac(_, _)));
1116 assert_eq!(collect_text(&node), "(a)/(b)");
1117 }
1118
1119 #[test]
1120 fn parse_frac_with_parens() {
1121 let node = parse_equation("(x+1)/(x-1)");
1123 let text = collect_text(&node);
1125 assert!(text.contains("x+1"));
1126 assert!(text.contains("x-1"));
1127 }
1128
1129 #[test]
1130 fn parse_nested_paren_fraction() {
1131 let node = parse_equation("5/((3x+3))");
1133 let text = collect_text(&node);
1134 assert!(text.contains("(3x+3)"));
1135 }
1136
1137 #[test]
1140 fn parse_sqrt_bareword() {
1141 let node = parse_equation("sqrt(x)");
1142 assert!(matches!(node, EqNode::Sqrt(_)));
1143 assert_eq!(collect_text(&node), "sqrt(x)");
1144 }
1145
1146 #[test]
1147 fn parse_sqrt_command() {
1148 let node = parse_equation("\\sqrt{x}");
1149 assert!(matches!(node, EqNode::Sqrt(_)));
1150 }
1151
1152 #[test]
1153 fn parse_sqrt_no_visible_parens() {
1154 let node = parse_equation("sqrt(x^2)");
1156 assert!(matches!(node, EqNode::Sqrt(_)));
1157 if let EqNode::Sqrt(inner) = &node {
1159 assert!(matches!(**inner, EqNode::Sup(_, _)), "Inner should be Sup, got: {:?}", inner);
1160 }
1161 }
1162
1163 #[test]
1164 fn parse_sqrt_with_visible_parens() {
1165 let node = parse_equation("sqrt((x^2))");
1167 let text = collect_text(&node);
1168 assert!(text.contains("("));
1169 }
1170
1171 #[test]
1174 fn parse_greek_bareword() {
1175 let node = parse_equation("pi");
1176 assert!(is_text(&node, "\u{03C0}"));
1177 }
1178
1179 #[test]
1180 fn parse_greek_backslash() {
1181 let node = parse_equation("\\alpha");
1182 assert!(is_text(&node, "\u{03B1}"));
1183 }
1184
1185 #[test]
1186 fn parse_greek_uppercase() {
1187 let node = parse_equation("\\Omega");
1188 assert!(is_text(&node, "\u{03A9}"));
1189 }
1190
1191 #[test]
1192 fn parse_greek_in_expression() {
1193 let node = parse_equation("pi r^2");
1194 let text = collect_text(&node);
1195 assert!(text.contains("\u{03C0}"));
1196 assert!(text.contains("r^2"));
1197 }
1198
1199 #[test]
1202 fn parse_sum_with_limits() {
1203 let node = parse_equation("\\sum_{i=0}^{n}");
1204 assert!(matches!(node, EqNode::BigOp { .. }));
1205 if let EqNode::BigOp { lower, upper, .. } = &node {
1206 assert!(lower.is_some());
1207 assert!(upper.is_some());
1208 }
1209 }
1210
1211 #[test]
1212 fn parse_int_bareword() {
1213 let node = parse_equation("int_0^1");
1214 assert!(matches!(node, EqNode::BigOp { .. }));
1215 }
1216
1217 #[test]
1218 fn parse_int_with_body_parens() {
1219 let tree = parse_equation("int(x^2)");
1221 let text = collect_text(&tree);
1222 assert!(text.contains("x^2"));
1224 assert!(!text.contains("("));
1225 }
1226
1227 #[test]
1230 fn parse_lim_bareword() {
1231 let node = parse_equation("lim_{x \\to 0}");
1232 assert!(matches!(node, EqNode::Limit { .. }));
1233 if let EqNode::Limit { name, lower } = &node {
1234 assert_eq!(name, "lim");
1235 assert!(lower.is_some());
1236 }
1237 }
1238
1239 #[test]
1240 fn parse_sin_bareword() {
1241 let node = parse_equation("sin(x)");
1242 let text = collect_text(&node);
1244 assert!(text.contains("sin"));
1245 assert!(text.contains("x"));
1246 }
1247
1248 #[test]
1249 fn parse_log_with_subscript() {
1250 let node = parse_equation("log_2");
1251 assert!(matches!(node, EqNode::Limit { .. }));
1252 if let EqNode::Limit { name, lower } = &node {
1253 assert_eq!(name, "log");
1254 assert!(lower.is_some());
1255 }
1256 }
1257
1258 #[test]
1261 fn parse_plus_gets_spacing() {
1262 let node = parse_equation("a + b");
1263 fn has_space(node: &EqNode) -> bool {
1265 match node {
1266 EqNode::Space(_) => true,
1267 EqNode::Seq(nodes) => nodes.iter().any(has_space),
1268 _ => false,
1269 }
1270 }
1271 assert!(has_space(&node), "Plus should have space nodes in tree");
1272 }
1273
1274 #[test]
1275 fn parse_unary_minus_no_spacing() {
1276 let node = parse_equation("-x");
1277 let text = collect_text(&node);
1279 assert!(text.starts_with("-") || text.contains("-x"));
1280 }
1281
1282 #[test]
1283 fn parse_binary_minus_gets_spacing() {
1284 let node = parse_equation("a - b");
1285 if let EqNode::Seq(nodes) = &node {
1286 let space_count = nodes.iter().filter(|n| matches!(n, EqNode::Space(_))).count();
1288 assert!(space_count >= 2, "Binary minus should have spaces: got {space_count}");
1289 }
1290 }
1291
1292 #[test]
1295 fn parse_neq_gets_spacing() {
1296 let node = parse_equation("a \\neq b");
1297 fn has_space(node: &EqNode) -> bool {
1298 match node {
1299 EqNode::Space(_) => true,
1300 EqNode::Seq(nodes) => nodes.iter().any(has_space),
1301 _ => false,
1302 }
1303 }
1304 assert!(has_space(&node), "\\neq should get operator spacing");
1305 }
1306
1307 #[test]
1308 fn parse_leq_gets_spacing() {
1309 let node = parse_equation("x \\leq y");
1310 fn has_space(node: &EqNode) -> bool {
1311 match node {
1312 EqNode::Space(_) => true,
1313 EqNode::Seq(nodes) => nodes.iter().any(has_space),
1314 _ => false,
1315 }
1316 }
1317 assert!(has_space(&node), "\\leq should get operator spacing");
1318 }
1319
1320 #[test]
1323 fn parse_pmatrix() {
1324 let node = parse_equation("\\begin{pmatrix} a & b \\\\ c & d \\end{pmatrix}");
1325 assert!(matches!(node, EqNode::Matrix { .. }));
1326 if let EqNode::Matrix { kind, rows } = &node {
1327 assert!(matches!(kind, MatrixKind::Paren));
1328 assert_eq!(rows.len(), 2);
1329 assert_eq!(rows[0].len(), 2);
1330 }
1331 }
1332
1333 #[test]
1334 fn parse_cases() {
1335 let node = parse_equation("\\begin{cases} x^2 & x > 0 \\\\ 0 & x = 0 \\end{cases}");
1336 assert!(matches!(node, EqNode::Cases { .. }));
1337 if let EqNode::Cases { rows } = &node {
1338 assert_eq!(rows.len(), 2);
1339 }
1340 }
1341
1342 #[test]
1345 fn parse_text_block() {
1346 let node = parse_equation("\\text{hello world}");
1347 assert!(matches!(node, EqNode::TextBlock(_)));
1348 if let EqNode::TextBlock(s) = &node {
1349 assert_eq!(s, "hello world");
1350 }
1351 }
1352
1353 #[test]
1354 fn parse_mathbb() {
1355 let node = parse_equation("\\mathbb{R}");
1356 assert!(matches!(node, EqNode::MathFont { .. }));
1357 if let EqNode::MathFont { kind, .. } = &node {
1358 assert!(matches!(kind, MathFontKind::Blackboard));
1359 }
1360 }
1361
1362 #[test]
1363 fn parse_binom() {
1364 let node = parse_equation("\\binom{n}{k}");
1365 assert!(matches!(node, EqNode::Binom(_, _)));
1366 }
1367
1368 #[test]
1369 fn parse_left_right_delimiters() {
1370 let node = parse_equation("\\left( x \\right)");
1371 assert!(matches!(node, EqNode::Delimited { .. }));
1372 }
1373
1374 #[test]
1375 fn parse_overbrace() {
1376 let node = parse_equation("\\overbrace{a+b}^{n}");
1377 assert!(matches!(node, EqNode::Brace { over: true, .. }));
1378 }
1379
1380 #[test]
1381 fn parse_overset() {
1382 let node = parse_equation("\\overset{def}{=}");
1383 assert!(matches!(node, EqNode::StackRel { over: true, .. }));
1384 }
1385
1386 #[test]
1389 fn parse_quad_spacing() {
1390 let node = parse_equation("a \\quad b");
1391 if let EqNode::Seq(nodes) = &node {
1392 let has_wide_space = nodes.iter().any(|n| matches!(n, EqNode::Space(w) if *w >= 18.0));
1393 assert!(has_wide_space, "\\quad should produce 18pt space");
1394 }
1395 }
1396
1397 #[test]
1398 fn parse_thin_space() {
1399 let node = parse_equation("a\\,b");
1400 if let EqNode::Seq(nodes) = &node {
1401 let has_thin_space = nodes.iter().any(|n| matches!(n, EqNode::Space(w) if *w > 0.0 && *w < 5.0));
1402 assert!(has_thin_space, "\\, should produce thin space");
1403 }
1404 }
1405
1406 #[test]
1409 fn parse_hat_accent() {
1410 let node = parse_equation("\\hat{x}");
1411 assert!(matches!(node, EqNode::Accent(_, AccentKind::Hat)));
1412 }
1413
1414 #[test]
1415 fn parse_vec_accent() {
1416 let node = parse_equation("\\vec{v}");
1417 assert!(matches!(node, EqNode::Accent(_, AccentKind::Vec)));
1418 }
1419
1420 #[test]
1423 fn unicode_greek_lowercase() {
1424 assert_eq!(latex_to_unicode("alpha"), Some("\u{03B1}".to_string()));
1425 assert_eq!(latex_to_unicode("omega"), Some("\u{03C9}".to_string()));
1426 }
1427
1428 #[test]
1429 fn unicode_greek_uppercase() {
1430 assert_eq!(latex_to_unicode("Gamma"), Some("\u{0393}".to_string()));
1431 assert_eq!(latex_to_unicode("Sigma"), Some("\u{03A3}".to_string()));
1432 }
1433
1434 #[test]
1435 fn unicode_operators() {
1436 assert_eq!(latex_to_unicode("leq"), Some("\u{2264}".to_string()));
1437 assert_eq!(latex_to_unicode("geq"), Some("\u{2265}".to_string()));
1438 assert_eq!(latex_to_unicode("neq"), Some("\u{2260}".to_string()));
1439 assert_eq!(latex_to_unicode("infty"), Some("\u{221E}".to_string()));
1440 }
1441
1442 #[test]
1443 fn unicode_arrows() {
1444 assert_eq!(latex_to_unicode("rightarrow"), Some("\u{2192}".to_string()));
1445 assert_eq!(latex_to_unicode("implies"), Some("\u{21D2}".to_string()));
1446 }
1447
1448 #[test]
1449 fn unicode_unknown_returns_none() {
1450 assert_eq!(latex_to_unicode("notacommand"), None);
1451 }
1452
1453 #[test]
1456 fn parse_nested_fractions() {
1457 let node = parse_equation("\\frac{\\frac{a}{b}}{c}");
1458 assert!(matches!(node, EqNode::Frac(_, _)));
1459 if let EqNode::Frac(numer, _) = &node {
1460 assert!(matches!(**numer, EqNode::Frac(_, _)));
1461 }
1462 }
1463
1464 #[test]
1465 fn parse_nested_superscripts() {
1466 let node = parse_equation("x^{2^3}");
1467 assert!(matches!(node, EqNode::Sup(_, _)));
1468 if let EqNode::Sup(_, sup) = &node {
1469 assert!(matches!(**sup, EqNode::Sup(_, _)));
1470 }
1471 }
1472
1473 #[test]
1474 fn parse_complex_expression() {
1475 let _node = parse_equation("\\frac{-b \\pm sqrt(b^2 - 4ac)}{2a}");
1477 }
1478
1479 #[test]
1480 fn parse_complex_integral() {
1481 let _node = parse_equation("\\int_0^\\infty e^{-x^2} dx");
1482 }
1483
1484 #[test]
1485 fn parse_complex_matrix() {
1486 let _node = parse_equation("\\begin{pmatrix} \\cos\\theta & -\\sin\\theta \\\\ \\sin\\theta & \\cos\\theta \\end{pmatrix}");
1487 }
1488
1489 #[test]
1490 fn parse_euler_identity() {
1491 let _node = parse_equation("e^{i pi} + 1 = 0");
1492 }
1493
1494 #[test]
1495 fn parse_epsilon_delta() {
1496 let _node = parse_equation("\\forall \\epsilon > 0, \\exists \\delta > 0 : |x - a| < \\delta \\implies |f(x) - L| < \\epsilon");
1497 }
1498}