1use ratex_lexer::token::{SourceLocation, Token};
2use unicode_normalization::UnicodeNormalization;
3
4use crate::error::{ParseError, ParseResult};
5use crate::functions::{self, ArgType, FunctionContext, FUNCTIONS};
6use crate::macro_expander::{MacroExpander, IMPLICIT_COMMANDS};
7use crate::parse_node::{AtomFamily, Mode, ParseNode};
8
9static END_OF_EXPRESSION: &[&str] = &["}", "\\endgroup", "\\end", "\\right", "&"];
11
12pub struct Parser<'a> {
22 pub mode: Mode,
23 pub gullet: MacroExpander<'a>,
24 pub leftright_depth: i32,
25 next_token: Option<Token>,
26 pub equation_counter: usize,
27}
28
29impl<'a> Parser<'a> {
30 pub fn new(input: &'a str) -> Self {
31 Self {
32 mode: Mode::Math,
33 gullet: MacroExpander::new(input, Mode::Math),
34 leftright_depth: 0,
35 next_token: None,
36 equation_counter: 0,
37 }
38 }
39
40 pub fn fetch(&mut self) -> ParseResult<Token> {
44 if self.next_token.is_none() {
45 self.next_token = Some(self.gullet.expand_next_token()?);
46 }
47 Ok(self.next_token.clone().unwrap())
48 }
49
50 pub fn consume(&mut self) {
52 self.next_token = None;
53 }
54
55 pub fn expect(&mut self, text: &str, do_consume: bool) -> ParseResult<()> {
57 let tok = self.fetch()?;
58 if tok.text != text {
59 return Err(ParseError::new(
60 format!("Expected '{}', got '{}'", text, tok.text),
61 Some(&tok),
62 ));
63 }
64 if do_consume {
65 self.consume();
66 }
67 Ok(())
68 }
69
70 pub fn consume_spaces(&mut self) -> ParseResult<()> {
72 loop {
73 let tok = self.fetch()?;
74 if tok.text == " " {
75 self.consume();
76 } else {
77 break;
78 }
79 }
80 Ok(())
81 }
82
83 pub fn switch_mode(&mut self, new_mode: Mode) {
85 self.mode = new_mode;
86 self.gullet.switch_mode(new_mode);
87 }
88
89 pub fn parse(&mut self) -> ParseResult<Vec<ParseNode>> {
93 self.gullet.begin_group();
94
95 let result = self.parse_expression(false, None);
96
97 match result {
98 Ok(parse) => {
99 self.expect("EOF", true)?;
100 self.gullet.end_group();
101 Ok(parse)
102 }
103 Err(e) => {
104 self.gullet.end_groups();
105 Err(e)
106 }
107 }
108 }
109
110 pub fn parse_expression(
114 &mut self,
115 break_on_infix: bool,
116 break_on_token_text: Option<&str>,
117 ) -> ParseResult<Vec<ParseNode>> {
118 let mut body = Vec::new();
119
120 loop {
121 if self.mode == Mode::Math {
122 self.consume_spaces()?;
123 }
124
125 let lex = self.fetch()?;
126
127 if END_OF_EXPRESSION.contains(&lex.text.as_str()) {
128 break;
129 }
130 if let Some(break_text) = break_on_token_text {
131 if lex.text == break_text {
132 break;
133 }
134 }
135 if break_on_infix {
136 if let Some(func) = FUNCTIONS.get(lex.text.as_str()) {
137 if func.infix {
138 break;
139 }
140 }
141 }
142
143 let atom = self.parse_atom(break_on_token_text)?;
144
145 match atom {
146 None => break,
147 Some(node) if node.type_name() == "internal" => continue,
148 Some(node) => body.push(node),
149 }
150 }
151
152 if self.mode == Mode::Text {
153 self.form_ligatures(&mut body);
154 }
155
156 self.handle_infix_nodes(body)
157 }
158
159 fn handle_infix_nodes(&mut self, body: Vec<ParseNode>) -> ParseResult<Vec<ParseNode>> {
161 let mut over_index: Option<usize> = None;
162 let mut func_name: Option<String> = None;
163
164 for (i, node) in body.iter().enumerate() {
165 if let ParseNode::Infix { replace_with, .. } = node {
166 if over_index.is_some() {
167 return Err(ParseError::msg("only one infix operator per group"));
168 }
169 over_index = Some(i);
170 func_name = Some(replace_with.clone());
171 }
172 }
173
174 if let (Some(idx), Some(fname)) = (over_index, func_name) {
175 let numer_body: Vec<ParseNode> = body[..idx].to_vec();
176 let denom_body: Vec<ParseNode> = body[idx + 1..].to_vec();
177
178 let numer = if numer_body.len() == 1 {
179 if let ParseNode::OrdGroup { .. } = &numer_body[0] {
180 numer_body.into_iter().next().unwrap()
181 } else {
182 ParseNode::OrdGroup {
183 mode: self.mode,
184 body: numer_body,
185 semisimple: None,
186 loc: None,
187 }
188 }
189 } else {
190 ParseNode::OrdGroup {
191 mode: self.mode,
192 body: numer_body,
193 semisimple: None,
194 loc: None,
195 }
196 };
197
198 let denom = if denom_body.len() == 1 {
199 if let ParseNode::OrdGroup { .. } = &denom_body[0] {
200 denom_body.into_iter().next().unwrap()
201 } else {
202 ParseNode::OrdGroup {
203 mode: self.mode,
204 body: denom_body,
205 semisimple: None,
206 loc: None,
207 }
208 }
209 } else {
210 ParseNode::OrdGroup {
211 mode: self.mode,
212 body: denom_body,
213 semisimple: None,
214 loc: None,
215 }
216 };
217
218 let node = if fname == "\\\\abovefrac" {
219 let infix_node = body[idx].clone();
221 self.call_function(&fname, vec![numer, infix_node, denom], vec![], None, None)?
222 } else {
223 self.call_function(&fname, vec![numer, denom], vec![], None, None)?
224 };
225 Ok(vec![node])
226 } else {
227 Ok(body)
228 }
229 }
230
231 fn form_ligatures(&self, group: &mut Vec<ParseNode>) {
233 let mut i = 0;
234 while i + 1 < group.len() {
235 let a_text = group[i].symbol_text().map(|s| s.to_string());
236 let b_text = group[i + 1].symbol_text().map(|s| s.to_string());
237
238 if let (Some(a), Some(b)) = (a_text, b_text) {
239 if group[i].type_name() == "textord" && group[i + 1].type_name() == "textord" {
240 if a == "-" && b == "-" {
241 if i + 2 < group.len() {
242 if let Some(c) = group[i + 2].symbol_text() {
243 if c == "-" && group[i + 2].type_name() == "textord" {
244 group[i] = ParseNode::TextOrd {
245 mode: Mode::Text,
246 text: "---".to_string(),
247 loc: None,
248 };
249 group.remove(i + 2);
250 group.remove(i + 1);
251 continue;
252 }
253 }
254 }
255 group[i] = ParseNode::TextOrd {
256 mode: Mode::Text,
257 text: "--".to_string(),
258 loc: None,
259 };
260 group.remove(i + 1);
261 continue;
262 }
263 if (a == "'" || a == "`") && b == a {
264 group[i] = ParseNode::TextOrd {
265 mode: Mode::Text,
266 text: format!("{}{}", a, a),
267 loc: None,
268 };
269 group.remove(i + 1);
270 continue;
271 }
272 }
273 }
274 i += 1;
275 }
276 }
277
278 pub fn parse_atom(
282 &mut self,
283 break_on_token_text: Option<&str>,
284 ) -> ParseResult<Option<ParseNode>> {
285 let mut base = self.parse_group("atom", break_on_token_text)?;
286
287 if let Some(ref b) = base {
288 if b.type_name() == "internal" {
289 return Ok(base);
290 }
291 }
292
293 if self.mode == Mode::Text {
294 return Ok(base);
295 }
296
297 let mut superscript: Option<ParseNode> = None;
298 let mut subscript: Option<ParseNode> = None;
299
300 loop {
301 self.consume_spaces()?;
302 let lex = self.fetch()?;
303
304 if lex.text == "\\limits" || lex.text == "\\nolimits" {
305 let is_limits = lex.text == "\\limits";
306 self.consume();
307 if let Some(base_node) = base.as_mut() {
308 match base_node {
309 ParseNode::Op {
310 limits,
311 always_handle_sup_sub,
312 ..
313 } => {
314 *limits = is_limits;
315 *always_handle_sup_sub = Some(is_limits);
316 }
317 ParseNode::OperatorName {
318 limits,
319 always_handle_sup_sub,
320 ..
321 } => {
322 *limits = is_limits;
323 *always_handle_sup_sub = is_limits;
324 }
325 _ => {}
326 }
327 }
328 } else if lex.text == "^" {
329 if superscript.is_some() {
330 return Err(ParseError::new("Double superscript", Some(&lex)));
331 }
332 superscript = Some(self.handle_sup_subscript("superscript")?);
333 } else if lex.text == "_" {
334 if subscript.is_some() {
335 return Err(ParseError::new("Double subscript", Some(&lex)));
336 }
337 subscript = Some(self.handle_sup_subscript("subscript")?);
338 } else if lex.text == "'" {
339 if superscript.is_some() {
340 return Err(ParseError::new("Double superscript", Some(&lex)));
341 }
342 let prime = ParseNode::TextOrd {
343 mode: self.mode,
344 text: "\\prime".to_string(),
345 loc: None,
346 };
347 let mut primes = vec![prime.clone()];
348 self.consume();
349 while self.fetch()?.text == "'" {
350 primes.push(prime.clone());
351 self.consume();
352 }
353 if self.fetch()?.text == "^" {
354 primes.push(self.handle_sup_subscript("superscript")?);
355 }
356 superscript = Some(ParseNode::OrdGroup {
357 mode: self.mode,
358 body: primes,
359 semisimple: None,
360 loc: None,
361 });
362 } else if let Some((mapped, is_sub)) = lex
363 .text
364 .chars()
365 .next()
366 .and_then(crate::unicode_sup_sub::unicode_sub_sup)
367 {
368 if is_sub && subscript.is_some() {
369 return Err(ParseError::new("Double subscript", Some(&lex)));
370 }
371 if !is_sub && superscript.is_some() {
372 return Err(ParseError::new("Double superscript", Some(&lex)));
373 }
374 let mut subsup_tokens = vec![Token::new(mapped, 0, 0)];
376 self.consume();
377 loop {
378 let tok = self.fetch()?;
379 match tok
380 .text
381 .chars()
382 .next()
383 .and_then(crate::unicode_sup_sub::unicode_sub_sup)
384 {
385 Some((m, sub)) if sub == is_sub => {
386 subsup_tokens.insert(0, Token::new(m, 0, 0));
387 self.consume();
388 }
389 _ => break,
390 }
391 }
392 let body = self.subparse(subsup_tokens)?;
393 let group = ParseNode::OrdGroup {
394 mode: Mode::Math,
395 body,
396 semisimple: None,
397 loc: None,
398 };
399 if is_sub {
400 subscript = Some(group);
401 } else {
402 superscript = Some(group);
403 }
404 } else {
405 break;
406 }
407 }
408
409 if superscript.is_some() || subscript.is_some() {
410 Ok(Some(ParseNode::SupSub {
411 mode: self.mode,
412 base: base.map(Box::new),
413 sup: superscript.map(Box::new),
414 sub: subscript.map(Box::new),
415 loc: None,
416 }))
417 } else {
418 Ok(base)
419 }
420 }
421
422 fn handle_sup_subscript(&mut self, name: &str) -> ParseResult<ParseNode> {
424 let symbol_token = self.fetch()?;
425 self.consume();
426 self.consume_spaces()?;
427
428 let group = self.parse_group(name, None)?;
429 match group {
430 Some(g) if g.type_name() != "internal" => Ok(g),
431 Some(_) => {
432 let g2 = self.parse_group(name, None)?;
434 g2.ok_or_else(|| {
435 ParseError::new(
436 format!("Expected group after '{}'", symbol_token.text),
437 Some(&symbol_token),
438 )
439 })
440 }
441 None => Err(ParseError::new(
442 format!("Expected group after '{}'", symbol_token.text),
443 Some(&symbol_token),
444 )),
445 }
446 }
447
448 pub fn parse_group(
452 &mut self,
453 name: &str,
454 break_on_token_text: Option<&str>,
455 ) -> ParseResult<Option<ParseNode>> {
456 let first_token = self.fetch()?;
457 let text = first_token.text.clone();
458
459 if text == "{" || text == "\\begingroup" {
460 self.consume();
461 let group_end = if text == "{" { "}" } else { "\\endgroup" };
462
463 self.gullet.begin_group();
464 let expression = self.parse_expression(false, Some(group_end))?;
465 let last_token = self.fetch()?;
466 self.expect(group_end, true)?;
467 self.gullet.end_group();
468
469 let loc = Some(SourceLocation::range(&first_token.loc, &last_token.loc));
470 let semisimple = if text == "\\begingroup" {
471 Some(true)
472 } else {
473 None
474 };
475
476 Ok(Some(ParseNode::OrdGroup {
477 mode: self.mode,
478 body: expression,
479 semisimple,
480 loc,
481 }))
482 } else {
483 let result = self
484 .parse_function(break_on_token_text, Some(name))?
485 .or_else(|| self.parse_symbol_inner().ok().flatten());
486
487 if result.is_none()
488 && text.starts_with('\\')
489 && !IMPLICIT_COMMANDS.contains(&text.as_str())
490 {
491 return Err(ParseError::new(
492 format!("Undefined control sequence: {}", text),
493 Some(&first_token),
494 ));
495 }
496
497 Ok(result)
498 }
499 }
500
501 pub fn parse_function(
505 &mut self,
506 break_on_token_text: Option<&str>,
507 name: Option<&str>,
508 ) -> ParseResult<Option<ParseNode>> {
509 let token = self.fetch()?;
510 let func = token.text.clone();
511
512 let func_data = match FUNCTIONS.get(func.as_str()) {
513 Some(f) => f,
514 None => return Ok(None),
515 };
516
517 self.consume();
518
519 if let Some(n) = name {
520 if n != "atom" && !func_data.allowed_in_argument {
521 return Err(ParseError::new(
522 format!("Got function '{}' with no arguments as {}", func, n),
523 Some(&token),
524 ));
525 }
526 }
527
528 functions::check_mode_compatibility(func_data, self.mode, &func, Some(&token))?;
529
530 if func == "\\hspace" {
534 self.gullet.consume_spaces();
535 if self.gullet.future().text == "*" {
536 self.gullet.pop_token();
537 }
538 }
539
540 let (args, opt_args) = self.parse_arguments(&func, func_data)?;
541
542 self.call_function(
543 &func,
544 args,
545 opt_args,
546 Some(token),
547 break_on_token_text.map(|s| s.to_string()).as_deref(),
548 )
549 .map(Some)
550 }
551
552 pub fn call_function(
554 &mut self,
555 name: &str,
556 args: Vec<ParseNode>,
557 opt_args: Vec<Option<ParseNode>>,
558 token: Option<Token>,
559 break_on_token_text: Option<&str>,
560 ) -> ParseResult<ParseNode> {
561 let func = FUNCTIONS.get(name).ok_or_else(|| {
562 ParseError::msg(format!("No function handler for {}", name))
563 })?;
564
565 let mut ctx = FunctionContext {
566 func_name: name.to_string(),
567 parser: self,
568 token: token.clone(),
569 break_on_token_text: break_on_token_text.map(|s| s.to_string()),
570 };
571
572 (func.handler)(&mut ctx, args, opt_args)
573 }
574
575 pub fn parse_arguments(
577 &mut self,
578 func: &str,
579 func_data: &functions::FunctionSpec,
580 ) -> ParseResult<(Vec<ParseNode>, Vec<Option<ParseNode>>)> {
581 let total_args = func_data.num_args + func_data.num_optional_args;
582 if total_args == 0 {
583 return Ok((Vec::new(), Vec::new()));
584 }
585
586 let mut args = Vec::new();
587 let mut opt_args = Vec::new();
588
589 for i in 0..total_args {
590 let arg_type = func_data
591 .arg_types
592 .as_ref()
593 .and_then(|types| types.get(i).copied());
594 let is_optional = i < func_data.num_optional_args;
595
596 let effective_type = if (func_data.primitive && arg_type.is_none())
597 || (func_data.node_type == "sqrt" && i == 1
598 && opt_args.first().is_some_and(|o: &Option<ParseNode>| o.is_none()))
599 {
600 Some(ArgType::Primitive)
601 } else {
602 arg_type
603 };
604
605 let arg = self.parse_group_of_type(
606 &format!("argument to '{}'", func),
607 effective_type,
608 is_optional,
609 )?;
610
611 if is_optional {
612 opt_args.push(arg);
613 } else if let Some(a) = arg {
614 args.push(a);
615 } else {
616 return Err(ParseError::msg("Null argument, please report this as a bug"));
617 }
618 }
619
620 Ok((args, opt_args))
621 }
622
623 fn parse_group_of_type(
625 &mut self,
626 name: &str,
627 arg_type: Option<ArgType>,
628 optional: bool,
629 ) -> ParseResult<Option<ParseNode>> {
630 match arg_type {
631 Some(ArgType::Color) => self.parse_color_group(optional),
632 Some(ArgType::Size) => self.parse_size_group(optional),
633 Some(ArgType::Primitive) => {
634 if optional {
635 return Err(ParseError::msg("A primitive argument cannot be optional"));
636 }
637 let group = self.parse_group(name, None)?;
638 match group {
639 Some(g) => Ok(Some(g)),
640 None => Err(ParseError::new(
641 format!("Expected group as {}", name),
642 None,
643 )),
644 }
645 }
646 Some(ArgType::Math) | Some(ArgType::Text) => {
647 let mode = match arg_type {
648 Some(ArgType::Math) => Some(Mode::Math),
649 Some(ArgType::Text) => Some(Mode::Text),
650 _ => None,
651 };
652 self.parse_argument_group(optional, mode)
653 }
654 Some(ArgType::HBox) => {
655 let group = self.parse_argument_group(optional, Some(Mode::Text))?;
656 match group {
657 Some(g) => Ok(Some(ParseNode::Styling {
658 mode: g.mode(),
659 style: crate::parse_node::StyleStr::Text,
660 body: vec![g],
661 loc: None,
662 })),
663 None => Ok(None),
664 }
665 }
666 Some(ArgType::Raw) => {
667 let token = self.parse_string_group("raw", optional)?;
668 match token {
669 Some(t) => Ok(Some(ParseNode::Raw {
670 mode: Mode::Text,
671 string: t.text,
672 loc: None,
673 })),
674 None => Ok(None),
675 }
676 }
677 Some(ArgType::Url) => self.parse_url_group(optional),
678 None | Some(ArgType::Original) => self.parse_argument_group(optional, None),
679 }
680 }
681
682 fn parse_color_group(&mut self, optional: bool) -> ParseResult<Option<ParseNode>> {
684 let res = self.parse_string_group("color", optional)?;
685 match res {
686 None => Ok(None),
687 Some(token) => {
688 let text = token.text.trim().to_string();
689 let re = regex_lite::Regex::new(
690 r"^(#[a-fA-F0-9]{3,4}|#[a-fA-F0-9]{6}|#[a-fA-F0-9]{8}|[a-fA-F0-9]{6}|[a-zA-Z]+|\d+(\.\d+)?(,\d+(\.\d+)?)*)$",
691 )
692 .unwrap();
693
694 if !re.is_match(&text) {
695 return Err(ParseError::new(
696 format!("Invalid color: '{}'", text),
697 Some(&token),
698 ));
699 }
700 let mut color = text;
701 if regex_lite::Regex::new(r"^[0-9a-fA-F]{6}$")
702 .unwrap()
703 .is_match(&color)
704 {
705 color = format!("#{}", color);
706 }
707
708 Ok(Some(ParseNode::ColorToken {
709 mode: self.mode,
710 color,
711 loc: None,
712 }))
713 }
714 }
715 }
716
717 pub fn parse_size_group(&mut self, optional: bool) -> ParseResult<Option<ParseNode>> {
719 let mut is_blank = false;
720
721 self.gullet.consume_spaces();
722 let res = if !optional && self.gullet.future().text != "{" {
723 Some(self.parse_regex_group(
724 ®ex_lite::Regex::new(r"^[-+]? *(?:$|\d+|\d+\.\d*|\.\d*) *[a-z]{0,2} *$")
725 .unwrap(),
726 "size",
727 )?)
728 } else {
729 self.parse_string_group("size", optional)?
730 };
731
732 let res = match res {
733 Some(r) => r,
734 None => return Ok(None),
735 };
736
737 let mut text = res.text.clone();
738 if !optional && text.is_empty() {
739 text = "0pt".to_string();
740 is_blank = true;
741 }
742
743 let size_re =
744 regex_lite::Regex::new(r"([-+]?) *(\d+(?:\.\d*)?|\.\d+) *([a-z]{2})").unwrap();
745 let m = size_re.captures(&text).ok_or_else(|| {
746 ParseError::new(format!("Invalid size: '{}'", text), Some(&res))
747 })?;
748
749 let sign = m.get(1).map_or("", |m| m.as_str());
750 let magnitude = m.get(2).map_or("", |m| m.as_str());
751 let unit = m.get(3).map_or("", |m| m.as_str());
752
753 let number: f64 = format!("{}{}", sign, magnitude).parse().unwrap_or(0.0);
754
755 if !is_valid_unit(unit) {
756 return Err(ParseError::new(
757 format!("Invalid unit: '{}'", unit),
758 Some(&res),
759 ));
760 }
761
762 Ok(Some(ParseNode::Size {
763 mode: self.mode,
764 value: crate::parse_node::Measurement {
765 number,
766 unit: unit.to_string(),
767 },
768 is_blank,
769 loc: None,
770 }))
771 }
772
773 fn parse_url_group(&mut self, optional: bool) -> ParseResult<Option<ParseNode>> {
776 self.gullet.lexer.set_catcode('%', 13);
777 self.gullet.lexer.set_catcode('~', 12);
778 let res = self.parse_string_group("url", optional);
779 self.gullet.lexer.set_catcode('%', 14);
780 self.gullet.lexer.set_catcode('~', 13);
781 let res = res?;
782 match res {
783 None => Ok(None),
784 Some(token) => {
785 let url = token.text;
786 Ok(Some(ParseNode::Url {
787 mode: self.mode,
788 url,
789 loc: None,
790 }))
791 }
792 }
793 }
794
795 fn parse_string_group(
797 &mut self,
798 _mode_name: &str,
799 optional: bool,
800 ) -> ParseResult<Option<Token>> {
801 let arg_token = self.gullet.scan_argument(optional)?;
802 let arg_token = match arg_token {
803 Some(t) => t,
804 None => return Ok(None),
805 };
806
807 let mut s = String::new();
808 loop {
809 let next = self.fetch()?;
810 if next.text == "EOF" {
811 break;
812 }
813 s.push_str(&next.text);
814 self.consume();
815 }
816 self.consume(); let mut result = arg_token;
819 result.text = s;
820 Ok(Some(result))
821 }
822
823 fn parse_regex_group(
825 &mut self,
826 regex: ®ex_lite::Regex,
827 mode_name: &str,
828 ) -> ParseResult<Token> {
829 let first_token = self.fetch()?;
830 let mut last_token = first_token.clone();
831 let mut s = String::new();
832
833 loop {
834 let next = self.fetch()?;
835 if next.text == "EOF" {
836 break;
837 }
838 let candidate = format!("{}{}", s, next.text);
839 if regex.is_match(&candidate) {
840 last_token = next;
841 s = candidate;
842 self.consume();
843 } else {
844 break;
845 }
846 }
847
848 if s.is_empty() {
849 return Err(ParseError::new(
850 format!("Invalid {}: '{}'", mode_name, first_token.text),
851 Some(&first_token),
852 ));
853 }
854
855 Ok(first_token.range(&last_token, s))
856 }
857
858 pub fn parse_argument_group(
860 &mut self,
861 optional: bool,
862 mode: Option<Mode>,
863 ) -> ParseResult<Option<ParseNode>> {
864 let arg_token = self.gullet.scan_argument(optional)?;
865 let arg_token = match arg_token {
866 Some(t) => t,
867 None => return Ok(None),
868 };
869
870 let outer_mode = self.mode;
871 if let Some(m) = mode {
872 self.switch_mode(m);
873 }
874
875 self.gullet.begin_group();
876 let expression = self.parse_expression(false, Some("EOF"))?;
877 self.expect("EOF", true)?;
878 self.gullet.end_group();
879
880 let result = ParseNode::OrdGroup {
881 mode: self.mode,
882 loc: Some(arg_token.loc.clone()),
883 body: expression,
884 semisimple: None,
885 };
886
887 if mode.is_some() {
888 self.switch_mode(outer_mode);
889 }
890
891 Ok(Some(result))
892 }
893
894 fn parse_symbol_inner(&mut self) -> ParseResult<Option<ParseNode>> {
898 let nucleus = self.fetch()?;
899 let text = nucleus.text.clone();
900
901 if let Some(stripped) = text.strip_prefix("\\verb") {
902 self.consume();
903 let arg = stripped.to_string();
904 let star = arg.starts_with('*');
905 let arg = if star { &arg[1..] } else { &arg };
906
907 if arg.len() < 2 {
908 return Err(ParseError::new("\\verb assertion failed", Some(&nucleus)));
909 }
910 let body = arg[1..arg.len() - 1].to_string();
911 return Ok(Some(ParseNode::Verb {
912 mode: Mode::Text,
913 body,
914 star,
915 loc: Some(nucleus.loc.clone()),
916 }));
917 }
918
919 let font_mode = match self.mode {
920 Mode::Math => ratex_font::symbols::Mode::Math,
921 Mode::Text => ratex_font::symbols::Mode::Text,
922 };
923
924 if text == "^" || text == "_" {
926 return Ok(None);
927 }
928
929 if text == "\\" {
931 return Ok(None);
932 }
933
934 if let Some(sym_info) = ratex_font::symbols::get_symbol(&text, font_mode) {
935 let loc = Some(SourceLocation::range(&nucleus.loc, &nucleus.loc));
936 let group = sym_info.group;
937
938 let node = if group.is_atom() {
939 let family = match group {
940 ratex_font::symbols::Group::Bin => AtomFamily::Bin,
941 ratex_font::symbols::Group::Close => AtomFamily::Close,
942 ratex_font::symbols::Group::Inner => AtomFamily::Inner,
943 ratex_font::symbols::Group::Open => AtomFamily::Open,
944 ratex_font::symbols::Group::Punct => AtomFamily::Punct,
945 ratex_font::symbols::Group::Rel => AtomFamily::Rel,
946 _ => unreachable!(),
947 };
948 ParseNode::Atom {
949 mode: self.mode,
950 family,
951 text: text.clone(),
952 loc,
953 }
954 } else {
955 match group {
956 ratex_font::symbols::Group::MathOrd => ParseNode::MathOrd {
957 mode: self.mode,
958 text: text.clone(),
959 loc,
960 },
961 ratex_font::symbols::Group::TextOrd => ParseNode::TextOrd {
962 mode: self.mode,
963 text: text.clone(),
964 loc,
965 },
966 ratex_font::symbols::Group::OpToken => ParseNode::OpToken {
967 mode: self.mode,
968 text: text.clone(),
969 loc,
970 },
971 ratex_font::symbols::Group::AccentToken => ParseNode::AccentToken {
972 mode: self.mode,
973 text: text.clone(),
974 loc,
975 },
976 ratex_font::symbols::Group::Spacing => ParseNode::SpacingNode {
977 mode: self.mode,
978 text: text.clone(),
979 loc,
980 },
981 _ => ParseNode::MathOrd {
982 mode: self.mode,
983 text: text.clone(),
984 loc,
985 },
986 }
987 };
988
989 self.consume();
990 return Ok(Some(node));
991 }
992
993 if let Some(node) = self.try_parse_unicode_accent(&text, &nucleus)? {
996 self.consume();
997 return Ok(Some(node));
998 }
999
1000 let first_char = text.chars().next();
1003 if let Some(ch) = first_char {
1004 if ch as u32 >= 0x80 {
1005 let node = ParseNode::TextOrd {
1006 mode: Mode::Text,
1007 text: text.clone(),
1008 loc: Some(SourceLocation::range(&nucleus.loc, &nucleus.loc)),
1009 };
1010 self.consume();
1011 return Ok(Some(node));
1012 }
1013 }
1014
1015 Ok(None)
1016 }
1017
1018 fn try_parse_unicode_accent(
1022 &self,
1023 text: &str,
1024 nucleus: &Token,
1025 ) -> ParseResult<Option<ParseNode>> {
1026 let nfd: String = text.nfd().collect();
1027 let chars: Vec<char> = nfd.chars().collect();
1028
1029 if chars.len() < 2 {
1030 return Ok(None);
1031 }
1032
1033 let mut split_idx = chars.len() - 1;
1035 while split_idx > 0 && is_supported_combining_accent(chars[split_idx]) {
1036 split_idx -= 1;
1037 }
1038
1039 if split_idx == chars.len() - 1 {
1041 return Ok(None);
1042 }
1043
1044 let base_char = chars[0];
1046 if !is_latin_base_char(base_char) {
1047 return Ok(None);
1048 }
1049
1050 let loc = Some(SourceLocation::range(&nucleus.loc, &nucleus.loc));
1051
1052 let mut base_str: String = chars[..split_idx + 1].iter().collect();
1054
1055 if base_str.len() == 1 {
1057 match base_str.as_str() {
1058 "i" => base_str = "\u{0131}".to_string(), "j" => base_str = "\u{0237}".to_string(), _ => {}
1061 }
1062 }
1063
1064 let font_mode = match self.mode {
1065 Mode::Math => ratex_font::symbols::Mode::Math,
1066 Mode::Text => ratex_font::symbols::Mode::Text,
1067 };
1068
1069 let mut node = if base_str.chars().count() == 1 {
1070 let ch = base_str.chars().next().unwrap();
1071 if let Some(sym) = ratex_font::symbols::get_symbol(&base_str, font_mode) {
1072 match sym.group {
1073 ratex_font::symbols::Group::TextOrd => ParseNode::TextOrd {
1074 mode: self.mode,
1075 text: base_str.clone(),
1076 loc: loc.clone(),
1077 },
1078 _ => ParseNode::MathOrd {
1079 mode: self.mode,
1080 text: base_str.clone(),
1081 loc: loc.clone(),
1082 },
1083 }
1084 } else if (ch as u32) >= 0x80 {
1085 ParseNode::TextOrd {
1087 mode: Mode::Text,
1088 text: base_str.clone(),
1089 loc: loc.clone(),
1090 }
1091 } else {
1092 ParseNode::MathOrd {
1093 mode: self.mode,
1094 text: base_str.clone(),
1095 loc: loc.clone(),
1096 }
1097 }
1098 } else {
1099 return self.try_parse_unicode_accent(&base_str, nucleus).map(|opt| {
1100 opt.or_else(|| {
1101 Some(ParseNode::TextOrd {
1102 mode: Mode::Text,
1103 text: base_str.clone(),
1104 loc: loc.clone(),
1105 })
1106 })
1107 });
1108 };
1109
1110 for &combining in &chars[split_idx + 1..] {
1112 let label = combining_to_accent_label(combining, self.mode);
1113 node = ParseNode::Accent {
1114 mode: self.mode,
1115 label,
1116 is_stretchy: Some(false),
1117 is_shifty: Some(true),
1118 base: Box::new(node),
1119 loc: loc.clone(),
1120 };
1121 }
1122
1123 Ok(Some(node))
1124 }
1125
1126 pub fn subparse(&mut self, tokens: Vec<Token>) -> ParseResult<Vec<ParseNode>> {
1128 let old_token = self.next_token.take();
1129
1130 self.gullet
1131 .push_token(Token::new("}", 0, 0));
1132 self.gullet.push_tokens(tokens);
1133 let parse = self.parse_expression(false, None)?;
1134 self.expect("}", true)?;
1135
1136 self.next_token = old_token;
1137 Ok(parse)
1138 }
1139}
1140
1141fn is_latin_base_char(ch: char) -> bool {
1142 matches!(ch,
1143 'A'..='Z' | 'a'..='z'
1144 | '\u{0131}' | '\u{0237}' | '\u{00C6}' | '\u{00D0}' | '\u{00D8}' | '\u{00DE}' | '\u{00DF}' | '\u{00E6}' | '\u{00F0}' | '\u{00F8}' | '\u{00FE}' )
1156}
1157
1158fn is_supported_combining_accent(ch: char) -> bool {
1159 matches!(
1160 ch,
1161 '\u{0300}' | '\u{0301}' | '\u{0302}' | '\u{0303}' | '\u{0304}'
1162 | '\u{0306}' | '\u{0307}' | '\u{0308}' | '\u{030A}' | '\u{030B}' | '\u{030C}'
1163 | '\u{0327}'
1164 )
1165}
1166
1167fn combining_to_accent_label(ch: char, mode: Mode) -> String {
1168 match mode {
1169 Mode::Math => match ch {
1170 '\u{0300}' => "\\grave".to_string(),
1171 '\u{0301}' => "\\acute".to_string(),
1172 '\u{0302}' => "\\hat".to_string(),
1173 '\u{0303}' => "\\tilde".to_string(),
1174 '\u{0304}' => "\\bar".to_string(),
1175 '\u{0306}' => "\\breve".to_string(),
1176 '\u{0307}' => "\\dot".to_string(),
1177 '\u{0308}' => "\\ddot".to_string(),
1178 '\u{030A}' => "\\mathring".to_string(),
1179 '\u{030B}' => "\\H".to_string(),
1180 '\u{030C}' => "\\check".to_string(),
1181 '\u{0327}' => "\\c".to_string(),
1182 _ => format!("\\char\"{:X}", ch as u32),
1183 },
1184 Mode::Text => match ch {
1185 '\u{0300}' => "\\`".to_string(),
1186 '\u{0301}' => "\\'".to_string(),
1187 '\u{0302}' => "\\^".to_string(),
1188 '\u{0303}' => "\\~".to_string(),
1189 '\u{0304}' => "\\=".to_string(),
1190 '\u{0306}' => "\\u".to_string(),
1191 '\u{0307}' => "\\.".to_string(),
1192 '\u{0308}' => "\\\"".to_string(),
1193 '\u{030A}' => "\\r".to_string(),
1194 '\u{030B}' => "\\H".to_string(),
1195 '\u{030C}' => "\\v".to_string(),
1196 '\u{0327}' => "\\c".to_string(),
1197 _ => format!("\\char\"{:X}", ch as u32),
1198 },
1199 }
1200}
1201
1202fn is_valid_unit(unit: &str) -> bool {
1203 matches!(
1204 unit,
1205 "pt" | "mm" | "cm" | "in" | "bp" | "pc" | "dd" | "cc" | "nd" | "nc" | "sp" | "px"
1206 | "ex" | "em" | "mu"
1207 )
1208}
1209
1210fn strip_outer_math_delimiters(input: &str) -> &str {
1214 let s = input.trim();
1215 if s.len() >= 4 && s.starts_with("$$") && s.ends_with("$$") {
1216 return s[2..s.len() - 2].trim();
1217 }
1218 if s.len() >= 2 && s.starts_with('$') && s.ends_with('$') {
1219 return s[1..s.len() - 1].trim();
1220 }
1221 s
1222}
1223
1224pub fn parse(input: &str) -> ParseResult<Vec<ParseNode>> {
1226 Parser::new(strip_outer_math_delimiters(input)).parse()
1227}
1228
1229#[cfg(test)]
1230mod tests {
1231 use super::*;
1232
1233 #[test]
1234 fn test_parse_single_char() {
1235 let result = parse("x").unwrap();
1236 assert_eq!(result.len(), 1);
1237 assert_eq!(result[0].type_name(), "mathord");
1238 }
1239
1240 #[test]
1241 fn test_parse_strips_outer_dollar_inline_math() {
1242 let inner = r"C_p[\ce{H2O(l)}] = \pu{75.3 J // mol K}";
1243 let wrapped = format!("${inner}$");
1244 let a = parse(&wrapped).expect("wrapped");
1245 let b = parse(inner).expect("inner");
1246 assert_eq!(a.len(), b.len());
1247 for (x, y) in a.iter().zip(b.iter()) {
1248 assert_eq!(x.type_name(), y.type_name());
1249 }
1250 }
1251
1252 #[test]
1253 fn test_parse_addition() {
1254 let result = parse("a+b").unwrap();
1255 assert_eq!(result.len(), 3);
1256 assert_eq!(result[0].type_name(), "mathord"); assert_eq!(result[1].type_name(), "atom"); assert_eq!(result[2].type_name(), "mathord"); }
1260
1261 #[test]
1262 fn test_parse_superscript() {
1263 let result = parse("x^2").unwrap();
1264 assert_eq!(result.len(), 1);
1265 assert_eq!(result[0].type_name(), "supsub");
1266 }
1267
1268 #[test]
1269 fn test_parse_subscript() {
1270 let result = parse("a_i").unwrap();
1271 assert_eq!(result.len(), 1);
1272 assert_eq!(result[0].type_name(), "supsub");
1273 }
1274
1275 #[test]
1276 fn test_parse_supsub() {
1277 let result = parse("x^2_i").unwrap();
1278 assert_eq!(result.len(), 1);
1279 assert_eq!(result[0].type_name(), "supsub");
1280 if let ParseNode::SupSub { sup, sub, .. } = &result[0] {
1281 assert!(sup.is_some());
1282 assert!(sub.is_some());
1283 } else {
1284 panic!("Expected SupSub");
1285 }
1286 }
1287
1288 #[test]
1289 fn test_parse_group() {
1290 let result = parse("{a+b}").unwrap();
1291 assert_eq!(result.len(), 1);
1292 assert_eq!(result[0].type_name(), "ordgroup");
1293 }
1294
1295 #[test]
1296 fn test_parse_frac() {
1297 let result = parse("\\frac{a}{b}").unwrap();
1298 assert_eq!(result.len(), 1);
1299 assert_eq!(result[0].type_name(), "genfrac");
1300 }
1301
1302 #[test]
1303 fn test_parse_sqrt() {
1304 let result = parse("\\sqrt{x}").unwrap();
1305 assert_eq!(result.len(), 1);
1306 assert_eq!(result[0].type_name(), "sqrt");
1307 }
1308
1309 #[test]
1310 fn test_parse_sqrt_optional() {
1311 let result = parse("\\sqrt[3]{x}").unwrap();
1312 assert_eq!(result.len(), 1);
1313 if let ParseNode::Sqrt { index, .. } = &result[0] {
1314 assert!(index.is_some());
1315 } else {
1316 panic!("Expected Sqrt");
1317 }
1318 }
1319
1320 #[test]
1321 fn test_parse_nested() {
1322 let result = parse("\\frac{\\sqrt{a^2+b^2}}{c}").unwrap();
1323 assert_eq!(result.len(), 1);
1324 assert_eq!(result[0].type_name(), "genfrac");
1325 }
1326
1327 #[test]
1328 fn test_parse_empty() {
1329 let result = parse("").unwrap();
1330 assert_eq!(result.len(), 0);
1331 }
1332
1333 #[test]
1334 fn test_parse_double_superscript_error() {
1335 let result = parse("x^2^3");
1336 assert!(result.is_err());
1337 }
1338
1339 #[test]
1340 fn test_parse_unclosed_brace_error() {
1341 let result = parse("{x");
1342 assert!(result.is_err());
1343 }
1344
1345 #[test]
1346 fn test_parse_json_output() {
1347 let result = parse("x^2").unwrap();
1348 let json = serde_json::to_string_pretty(&result).unwrap();
1349 assert!(json.contains("supsub"));
1350 }
1351}