1use oxyl_diagnostics::{DiagSpan, Diagnostic};
19use oxyl_lexer::{Span, Token, TokenKind};
20
21fn diag_span(s: Span) -> DiagSpan {
22 DiagSpan::new(s.start, s.end)
23}
24
25fn is_display_math_close(k: &TokenKind) -> bool {
27 matches!(k, TokenKind::ControlSeq(s) if s == "]")
28}
29
30fn is_end_control_seq(k: &TokenKind) -> bool {
32 matches!(k, TokenKind::ControlSeq(s) if s == "end")
33}
34
35fn find_env_name(args: &[Arg]) -> Option<(usize, String)> {
40 for (i, arg) in args.iter().enumerate() {
41 if let Arg::Mandatory(children) = arg {
42 let mut name = String::new();
43 for child in children {
44 if let Node::Text(t, _) = child {
45 name.push_str(t);
46 } else {
47 return None;
48 }
49 }
50 let trimmed = name.trim().to_owned();
51 if !trimmed.is_empty() {
52 return Some((i, trimmed));
53 }
54 }
55 }
56 None
57}
58
59#[derive(Debug, Clone)]
68pub struct Document {
69 pub body: Vec<Node>,
70}
71
72#[derive(Debug, Clone)]
74pub enum Node {
75 Text(String, Span),
77
78 ParagraphBreak(Span),
80
81 Command {
83 name: String ,
85 args: Vec<Arg>,
86 span: Span,
87 },
88
89 Group(Vec<Node>, Span),
91
92 Math(Vec<Node>, Span),
94
95 DisplayMath(Vec<Node>, Span),
97
98 Comment(String, Span),
102
103 AlignTab(Span),
105
106 Tilde(Span),
109
110 Environment {
114 name: String,
115 args: Vec<Arg>,
116 body: Vec<Node>,
117 span: Span,
118 },
119}
120
121impl Node {
122 pub fn span(&self) -> Span {
123 match self {
124 Node::Text(_, s) => *s,
125 Node::ParagraphBreak(s) => *s,
126 Node::Command { span, .. } => *span,
127 Node::Group(_, s) => *s,
128 Node::Math(_, s) => *s,
129 Node::DisplayMath(_, s) => *s,
130 Node::Comment(_, s) => *s,
131 Node::AlignTab(s) => *s,
132 Node::Tilde(s) => *s,
133 Node::Environment{ span, .. } => *span,
134 }
135 }
136}
137
138#[derive(Debug, Clone)]
140pub enum Arg {
141 Mandatory(Vec<Node>),
142 Optional(Vec<Node>),
143}
144
145#[derive(Debug)]
152pub struct ParseResult {
153 pub document: Document,
154 pub errors: Vec<Diagnostic>,
155}
156
157pub struct Parser {
162 tokens: Vec<Token>,
163 pos: usize,
164 errors: Vec<Diagnostic>,
165}
166
167impl Parser {
168 pub fn new(tokens: Vec<Token>) -> Self {
169 Self { tokens, pos: 0, errors: Vec::new() }
170 }
171
172 pub fn parse(mut self) -> ParseResult {
174 let body = self.parse_nodes(|_| false);
175 ParseResult { document: Document { body }, errors: self.errors }
176 }
177
178 fn peek(&self) -> Option<&Token> {
179 self.tokens.get(self.pos)
180 }
181
182 fn peek_kind(&self) -> Option<&TokenKind> {
183 self.peek().map(|t| &t.kind)
184 }
185
186 fn bump(&mut self) -> Option<Token> {
187 if self.pos < self.tokens.len() {
188 let tok = self.tokens[self.pos].clone();
189 self.pos += 1;
190 Some(tok)
191 } else {
192 None
193 }
194 }
195
196 fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
203 let mut nodes: Vec<Node> = Vec::new();
204
205 loop {
206 match self.peek() {
207 None => break,
208 Some(tok) if stop(&tok.kind) => break,
209 _ => {}
210 }
211
212 let tok = self.bump().unwrap();
213
214 match tok.kind {
215 TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
216 TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
217
218 TokenKind::ParagraphBreak => {
219 nodes.push(Node::ParagraphBreak(tok.span));
220 }
221
222 TokenKind::Comment(body) => {
223 nodes.push(Node::Comment(body, tok.span));
224 }
225
226 TokenKind::ControlSeq(ref name) if name == "begin" => {
228 let env = self.parse_environment(tok.span);
229 nodes.push(env);
230 }
231
232 TokenKind::ControlSeq(ref name) if name == "end" => {
234 self.errors.push(
235 Diagnostic::error("E043", "stray '\\end' (no matching '\\begin')")
236 .with_span(diag_span(tok.span)),
237 );
238 let _ = self.parse_args();
240 }
241
242 TokenKind::ControlSeq(ref name) if name == "[" => {
244 let open_span = tok.span;
245 let children = self.parse_nodes(is_display_math_close);
246 if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
247 let close = self.bump().unwrap();
248 nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
249 } else {
250 self.errors.push(
251 Diagnostic::error("E031", "unclosed '\\[' (display math)")
252 .with_span(diag_span(open_span)),
253 );
254 nodes.push(Node::DisplayMath(children, open_span));
255 }
256 }
257
258 TokenKind::ControlSeq(ref name) if name == "]" => {
260 self.errors.push(
261 Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
262 .with_span(diag_span(tok.span)),
263 );
264 }
265
266 TokenKind::ControlSeq(name) => {
267 let cmd_span = tok.span;
268 let args = self.parse_args();
269 let full_span = args.last()
271 .and_then(|a| match a {
272 Arg::Mandatory(children) => children.last().map(|n| n.span()),
273 Arg::Optional(children) => children.last().map(|n| n.span()),
274 })
275 .map(|s| cmd_span.merge(s))
276 .unwrap_or(cmd_span);
277 nodes.push(Node::Command { name, args, span: full_span });
278 }
279
280 TokenKind::BeginGroup => {
281 let open_span = tok.span;
282 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
283 if self.peek_kind() == Some(&TokenKind::EndGroup) {
284 let close = self.bump().unwrap();
285 nodes.push(Node::Group(children, open_span.merge(close.span)));
286 } else {
287 self.errors.push(
289 Diagnostic::error("E020", "unclosed '{'")
290 .with_span(diag_span(open_span)),
291 );
292 nodes.push(Node::Group(children, open_span));
293 }
294 }
295
296 TokenKind::MathShift => {
297 let open_span = tok.span;
298 let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
299 if self.peek_kind() == Some(&TokenKind::MathShift) {
300 let close = self.bump().unwrap();
301 nodes.push(Node::Math(children, open_span.merge(close.span)));
302 } else {
303 self.errors.push(
304 Diagnostic::error("E030", "unclosed '$' (math mode)")
305 .with_span(diag_span(open_span)),
306 );
307 nodes.push(Node::Math(children, open_span));
308 }
309 }
310
311 TokenKind::AlignTab => nodes.push(Node::AlignTab(tok.span)),
312 TokenKind::Tilde => nodes.push(Node::Tilde(tok.span)),
313
314 _ => {}
316 }
317 }
318
319 nodes
320 }
321 fn parse_args(&mut self) -> Vec<Arg> {
327 let mut args = Vec::new();
328
329 loop {
330 if self.peek_kind() == Some(&TokenKind::Space) {
332 self.bump();
333 }
334
335 match self.peek_kind() {
336 Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
337 Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
338 _ => break,
339 }
340 }
341 args
342
343 }
344
345 fn parse_mandatory_arg(&mut self) -> Arg {
346 let open_span = self.bump().unwrap().span;
348 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
349 if self.peek_kind() == Some(&TokenKind::EndGroup) {
350 self.bump();
351 } else {
352 self.errors.push(
353 Diagnostic::error("E021","unclosed mandatory argument")
354 .with_span(diag_span(open_span)),
355 );
356 }
357 Arg::Mandatory(children)
358 }
359
360 fn parse_environment(&mut self, begin_span: Span) -> Node {
363 let mut args = self.parse_args();
364
365 let (name_idx, env_name) = match find_env_name(&args) {
369 Some(x) => x,
370 None => {
371 self.errors.push(
372 Diagnostic::error("E040", "'\\begin' missing environment name")
373 .with_span(diag_span(begin_span)),
374 );
375 return Node::Command {
376 name: "begin".to_owned(),
377 args,
378 span: begin_span,
379 };
380 }
381 };
382 args.remove(name_idx);
383
384 let body = self.parse_nodes(is_end_control_seq);
385
386 let close_span = if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "end") {
388 let end_tok = self.bump().unwrap();
389 let end_args = self.parse_args();
390 let close_name = find_env_name(&end_args).map(|(_, n)| n);
391
392 if close_name.as_deref() != Some(env_name.as_str()) {
393 self.errors.push(
394 Diagnostic::error("E042", format!(
395 "'\\end{{{}}}' does not match '\\begin{{{}}}'",
396 close_name.as_deref().unwrap_or(""), env_name,
397 ))
398 .with_span(diag_span(end_tok.span)),
399 );
400 }
401
402 end_args.last()
404 .and_then(|a| match a {
405 Arg::Mandatory(c) | Arg::Optional(c) => c.last().map(|n| n.span()),
406 })
407 .map(|s| end_tok.span.merge(s))
408 .unwrap_or(end_tok.span)
409 } else {
410 self.errors.push(
411 Diagnostic::error("E041", format!("unclosed '\\begin{{{}}}'", env_name))
412 .with_span(diag_span(begin_span)),
413 );
414 body.last().map(|n| n.span()).unwrap_or(begin_span)
415 };
416
417 Node::Environment {
418 name: env_name,
419 args,
420 body,
421 span: begin_span.merge(close_span),
422 }
423 }
424
425 fn parse_optional_arg(&mut self) -> Arg {
426 let open_span = self.bump().unwrap().span;
428 let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
429 if self.peek_kind() == Some(&TokenKind::Char(']')) {
430 self.bump();
431 } else {
432 self.errors.push(
433 Diagnostic::error("E022","unclosed optional argument")
434 .with_span(diag_span(open_span)),
435 );
436 }
437 Arg::Optional(children)
438 }
439
440 fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
442 match nodes.last_mut() {
443 Some(Node::Text(s, existing)) => {
444 s.push(c);
445 *existing = existing.merge(span);
446 }
447 _ => nodes.push(Node::Text(c.to_string(), span)),
448 }
449 }
450}
451
452
453
454#[cfg(test)]
457mod tests {
458 use super::*;
459 use oxyl_lexer::Lexer;
460
461 fn parse(src: &str) -> ParseResult {
462 let tokens = Lexer::new(src).tokenise().tokens;
463 Parser::new(tokens).parse()
464 }
465
466 fn first_command(src: &str) -> (String, Vec<Arg>) {
467 let r = parse(src);
468 for node in &r.document.body {
469 if let Node::Command { name, args, .. } = node {
470 return (name.clone(), args.clone());
471 }
472 }
473 panic!("no command found in: {src}");
474 }
475
476 #[test]
477 fn command_no_args() {
478 let (name, args) = first_command("\\LaTeX");
479 assert_eq!(name, "LaTeX");
480 assert!(args.is_empty());
481 }
482
483 #[test]
484 fn command_one_mandatory_arg() {
485 let (name, args) = first_command("\\textbf{hello}");
486 assert_eq!(name, "textbf");
487 assert_eq!(args.len(), 1);
488 assert!(matches!(&args[0], Arg::Mandatory(children)
489 if matches!(&children[0], Node::Text(s, _) if s == "hello")));
490 }
491
492 #[test]
493 fn command_two_mandatory_args() {
494 let (name, args) = first_command("\\frac{a}{b}");
495 assert_eq!(name, "frac");
496 assert_eq!(args.len(), 2);
497 }
498
499 #[test]
500 fn unclosed_arg_produces_error() {
501 let r = parse("\\cmd{oops");
502 assert!(!r.errors.is_empty());
503 }
504
505 #[test]
506 fn paragraph_break_still_works() {
507 let r = parse("line one\n\nline two");
508 let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
509 assert!(has_par);
510 }
511
512 #[test]
513 fn nested_command_in_arg() {
514 let r = parse("\\outer{\\inner{x}}");
515 assert!(r.errors.is_empty());
516 if let Node::Command { args, .. } = &r.document.body[0] {
517 if let Arg::Mandatory(inner) = &args[0] {
518 assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
519 } else { panic!("expected mandatory arg"); }
520 } else { panic!("expected command"); }
521 }
522
523 #[test]
524 fn command_with_optional_arg() {
525 let (name, args) = first_command("\\sqrt[3]{27}");
526 assert_eq!(name, "sqrt");
527 assert_eq!(args.len(), 2);
528 assert!(matches!(&args[0], Arg::Optional(children)
529 if matches!(&children[0], Node::Text(s, _) if s == "3")));
530 assert!(matches!(&args[1], Arg::Mandatory(children)
531 if matches!(&children[0], Node::Text(s, _) if s == "27")));
532 }
533
534 #[test]
535 fn command_with_only_optional_arg() {
536 let (name, args) = first_command("\\foo[opt]");
537 assert_eq!(name, "foo");
538 assert_eq!(args.len(), 1);
539 assert!(matches!(&args[0], Arg::Optional(_)));
540 }
541
542 #[test]
543 fn optional_then_two_mandatory() {
544 let (_, args) = first_command("\\section[short]{long}{extra}");
546 assert_eq!(args.len(), 3);
547 assert!(matches!(&args[0], Arg::Optional(_)));
548 assert!(matches!(&args[1], Arg::Mandatory(_)));
549 assert!(matches!(&args[2], Arg::Mandatory(_)));
550 }
551
552 #[test]
553 fn unclosed_optional_arg_produces_error() {
554 let r = parse("\\cmd[oops");
555 assert!(!r.errors.is_empty());
556 }
557
558 #[test]
559 fn bracket_outside_command_is_text() {
560 let r = parse("hello [world]");
562 assert!(r.errors.is_empty());
563 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
564 }
565
566 #[test]
567 fn inline_math_simple() {
568 let r = parse("$x+1$");
569 assert!(r.errors.is_empty());
570 assert_eq!(r.document.body.len(), 1);
571 assert!(matches!(&r.document.body[0], Node::Math(children, _)
572 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
573 }
574
575 #[test]
576 fn inline_math_with_command() {
577 let r = parse("$\\alpha + \\beta$");
578 assert!(r.errors.is_empty());
579 if let Node::Math(children, _) = &r.document.body[0] {
580 let names: Vec<_> = children.iter().filter_map(|n| match n {
581 Node::Command { name, .. } => Some(name.as_str()),
582 _ => None,
583 }).collect();
584 assert_eq!(names, vec!["alpha", "beta"]);
585 } else {
586 panic!("expected math node");
587 }
588 }
589
590 #[test]
591 fn unclosed_math_produces_error() {
592 let r = parse("text $oops");
593 assert!(!r.errors.is_empty());
594 }
595
596 #[test]
597 fn parser_errors_carry_spans() {
598 let cases = [
602 "\\cmd{oops", "\\cmd[oops", "{", "$oops", ];
607 for src in cases {
608 let r = parse(src);
609 assert!(!r.errors.is_empty(), "expected error for {src:?}");
610 for e in &r.errors {
611 assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
612 }
613 }
614 }
615
616 #[test]
617 fn math_after_text() {
618 let r = parse("hello $x$");
619 assert!(r.errors.is_empty());
620 assert_eq!(r.document.body.len(), 2);
621 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
622 assert!(matches!(&r.document.body[1], Node::Math(_, _)));
623 }
624
625 #[test]
626 fn display_math_simple() {
627 let r = parse("\\[x+1\\]");
628 assert!(r.errors.is_empty(), "{:?}", r.errors);
629 assert_eq!(r.document.body.len(), 1);
630 assert!(matches!(&r.document.body[0], Node::DisplayMath(children, _)
631 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
632 }
633
634 #[test]
635 fn display_math_with_command() {
636 let r = parse("\\[ \\sum_{i=0}^n i \\]");
637 assert!(r.errors.is_empty(), "{:?}", r.errors);
638 assert!(matches!(&r.document.body[0], Node::DisplayMath(_, _)));
639 }
640
641 #[test]
642 fn unclosed_display_math_produces_error() {
643 let r = parse("\\[ a + b");
644 assert!(r.errors.iter().any(|e| e.code == "E031"));
645 }
646
647 #[test]
648 fn stray_close_display_math_produces_error() {
649 let r = parse("oops \\] more");
650 assert!(r.errors.iter().any(|e| e.code == "E032"));
651 }
652
653 #[test]
654 fn comment_preserved() {
655 let r = parse("% hello\nworld");
656 assert!(r.errors.is_empty());
657 assert!(matches!(&r.document.body[0], Node::Comment(s, _) if s == " hello"));
658 assert!(matches!(&r.document.body[1], Node::Text(s, _) if s == "world"));
659 }
660
661 #[test]
662 fn comment_inside_command_arg() {
663 let r = parse("\\textbf{foo % drop?\nbar}");
664 assert!(r.errors.is_empty(), "{:?}", r.errors);
665 if let Node::Command { args, .. } = &r.document.body[0] {
666 if let Arg::Mandatory(children) = &args[0] {
667 assert!(children.iter().any(|n| matches!(n, Node::Comment(_, _))));
668 } else { panic!("expected mandatory arg"); }
669 } else { panic!("expected command"); }
670 }
671
672 #[test]
673 fn environment_simple() {
674 let r = parse("\\begin{quote}hello\\end{quote}");
675 assert!(r.errors.is_empty(), "{:?}", r.errors);
676 if let Node::Environment { name, args, body, .. } = &r.document.body[0] {
677 assert_eq!(name, "quote");
678 assert!(args.is_empty());
679 assert!(matches!(&body[0], Node::Text(s, _) if s == "hello"));
680 } else {
681 panic!("expected environment, got {:?}", r.document.body[0]);
682 }
683 }
684
685 #[test]
686 fn environment_with_starred_name() {
687 let r = parse("\\begin{equation*}x = 1\\end{equation*}");
688 assert!(r.errors.is_empty(), "{:?}", r.errors);
689 assert!(matches!(&r.document.body[0], Node::Environment { name, .. } if name == "equation*"));
690 }
691
692 #[test]
693 fn environment_with_extra_args() {
694 let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
696 assert!(r.errors.is_empty(), "{:?}", r.errors);
697 if let Node::Environment { name, args, .. } = &r.document.body[0] {
698 assert_eq!(name, "tabular");
699 assert_eq!(args.len(), 1);
700 assert!(matches!(&args[0], Arg::Mandatory(_)));
701 } else { panic!("expected environment"); }
702 }
703
704 #[test]
705 fn nested_environments() {
706 let r = parse("\\begin{outer}\\begin{inner}x\\end{inner}\\end{outer}");
707 assert!(r.errors.is_empty(), "{:?}", r.errors);
708 if let Node::Environment { name, body, .. } = &r.document.body[0] {
709 assert_eq!(name, "outer");
710 assert!(matches!(&body[0], Node::Environment {name, .. } if name == "inner"));
711 } else { panic!("expected outer environment"); }
712 }
713
714 #[test]
715 fn mismatched_end_produces_error() {
716 let r = parse("\\begin{a}x\\end{b}");
717 assert!(r.errors.iter().any(|e| e.code == "E042"));
718 }
719
720 #[test]
721 fn unclosed_begin_produces_error() {
722 let r = parse("\\begin{a}body");
723 assert!(r.errors.iter().any(|e| e.code == "E041"));
724 }
725
726 #[test]
727 fn stray_end_produces_error() {
728 let r = parse("\\end{a}");
729 assert!(r.errors.iter().any(|e| e.code == "E043"));
730 }
731
732 #[test]
733 fn begin_without_name_produces_error() {
734 let r = parse("\\begin foo");
735 assert!(r.errors.iter().any(|e| e.code == "E040"));
736 }
737
738 #[test]
739 fn align_tab_becomes_node() {
740 let r = parse("a & b");
741 assert!(r.errors.is_empty());
742 let kinds: Vec<_> = r.document.body.iter().map(|n| match n {
743 Node::Text(s, _) => format!("T({s})"),
744 Node::AlignTab(_) => "&".to_owned(),
745 other => format!("{other:?}"),
746 }).collect();
747 assert_eq!(kinds, vec!["T(a )", "&", "T( b)"]);
748 }
749
750 #[test]
751 fn tilde_becomes_node() {
752 let r = parse("oxyl.~isthebest");
753 assert!(r.errors.is_empty());
754 assert!(matches!(&r.document.body[1], Node::Tilde(_)));
756 }
757
758 #[test]
759 fn align_tab_inside_tabular_body() {
760 let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
761 assert!(r.errors.is_empty(), "{:?}", r.errors);
762 if let Node::Environment { body, .. } = &r.document.body[0] {
763 assert!(body.iter().any(|n| matches!(n, Node::AlignTab(_))));
764 } else { panic!("expected environment"); }
765 }
766}