1use oxyl_diagnostics::{DiagSpan, Diagnostic};
24use oxyl_lexer::{Span, Token, TokenKind};
25
26use crate::ast::{Arg, Document, Node};
27
28fn diag_span(s: Span) -> DiagSpan {
29 DiagSpan::new(s.start, s.end)
30}
31
32fn is_display_math_close(k: &TokenKind) -> bool {
34 matches!(k, TokenKind::ControlSeq(s) if s == "]")
35}
36
37fn is_end_control_seq(k: &TokenKind) -> bool {
39 matches!(k, TokenKind::ControlSeq(s) if s == "end")
40}
41
42fn find_env_name(args: &[Arg]) -> Option<(usize, String)> {
47 for (i, arg) in args.iter().enumerate() {
48 if let Arg::Mandatory(children) = arg {
49 let mut name = String::new();
50 for child in children {
51 if let Node::Text(t, _) = child {
52 name.push_str(t);
53 } else {
54 return None;
55 }
56 }
57 let trimmed = name.trim().to_owned();
58 if !trimmed.is_empty() {
59 return Some((i, trimmed));
60 }
61 }
62 }
63 None
64}
65
66
67#[derive(Debug)]
70pub struct ParseResult {
71 pub document: Document,
72 pub errors: Vec<Diagnostic>,
73}
74
75
76pub struct Parser {
77 tokens: Vec<Token>,
78 pos: usize,
79 errors: Vec<Diagnostic>,
80}
81
82impl Parser {
83 pub fn new(tokens: Vec<Token>) -> Self {
84 Self { tokens, pos: 0, errors: Vec::new() }
85 }
86
87 pub fn parse(mut self) -> ParseResult {
89 let body = self.parse_nodes(|_| false);
90 ParseResult { document: Document { body }, errors: self.errors }
91 }
92
93 fn peek(&self) -> Option<&Token> {
94 self.tokens.get(self.pos)
95 }
96
97 fn peek_kind(&self) -> Option<&TokenKind> {
98 self.peek().map(|t| &t.kind)
99 }
100
101 fn bump(&mut self) -> Option<Token> {
102 if self.pos < self.tokens.len() {
103 let tok = self.tokens[self.pos].clone();
104 self.pos += 1;
105 Some(tok)
106 } else {
107 None
108 }
109 }
110
111 fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
118 let mut nodes: Vec<Node> = Vec::new();
119
120 loop {
121 match self.peek() {
122 None => break,
123 Some(tok) if stop(&tok.kind) => break,
124 _ => {}
125 }
126
127 let tok = self.bump().unwrap();
128
129 match tok.kind {
130 TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
131 TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
132
133 TokenKind::ParagraphBreak => {
134 nodes.push(Node::ParagraphBreak(tok.span));
135 }
136
137 TokenKind::Comment(body) => {
138 nodes.push(Node::Comment(body, tok.span));
139 }
140
141 TokenKind::ControlSeq(ref name) if name == "begin" => {
143 let env = self.parse_environment(tok.span);
144 nodes.push(env);
145 }
146
147 TokenKind::ControlSeq(ref name) if name == "end" => {
149 self.errors.push(
150 Diagnostic::error("E043", "stray '\\end' (no matching '\\begin')")
151 .with_span(diag_span(tok.span)),
152 );
153 let _ = self.parse_args();
155 }
156
157 TokenKind::ControlSeq(ref name) if name == "[" => {
159 let open_span = tok.span;
160 let children = self.parse_nodes(is_display_math_close);
161 if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
162 let close = self.bump().unwrap();
163 nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
164 } else {
165 self.errors.push(
166 Diagnostic::error("E031", "unclosed '\\[' (display math)")
167 .with_span(diag_span(open_span)),
168 );
169 nodes.push(Node::DisplayMath(children, open_span));
170 }
171 }
172
173 TokenKind::ControlSeq(ref name) if name == "]" => {
175 self.errors.push(
176 Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
177 .with_span(diag_span(tok.span)),
178 );
179 }
180
181 TokenKind::ControlSeq(name) => {
182 let cmd_span = tok.span;
183 let args = self.parse_args();
184 let full_span = args.last()
186 .and_then(|a| match a {
187 Arg::Mandatory(children) => children.last().map(|n| n.span()),
188 Arg::Optional(children) => children.last().map(|n| n.span()),
189 })
190 .map(|s| cmd_span.merge(s))
191 .unwrap_or(cmd_span);
192 nodes.push(Node::Command { name, args, span: full_span });
193 }
194
195 TokenKind::BeginGroup => {
196 let open_span = tok.span;
197 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
198 if self.peek_kind() == Some(&TokenKind::EndGroup) {
199 let close = self.bump().unwrap();
200 nodes.push(Node::Group(children, open_span.merge(close.span)));
201 } else {
202 self.errors.push(
204 Diagnostic::error("E020", "unclosed '{'")
205 .with_span(diag_span(open_span)),
206 );
207 nodes.push(Node::Group(children, open_span));
208 }
209 }
210
211 TokenKind::MathShift => {
212 let open_span = tok.span;
213 let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
214 if self.peek_kind() == Some(&TokenKind::MathShift) {
215 let close = self.bump().unwrap();
216 nodes.push(Node::Math(children, open_span.merge(close.span)));
217 } else {
218 self.errors.push(
219 Diagnostic::error("E030", "unclosed '$' (math mode)")
220 .with_span(diag_span(open_span)),
221 );
222 nodes.push(Node::Math(children, open_span));
223 }
224 }
225
226 TokenKind::AlignTab => nodes.push(Node::AlignTab(tok.span)),
227 TokenKind::Tilde => nodes.push(Node::Tilde(tok.span)),
228
229 _ => {}
231 }
232 }
233
234 nodes
235 }
236 fn parse_args(&mut self) -> Vec<Arg> {
242 let mut args = Vec::new();
243
244 loop {
245 if self.peek_kind() == Some(&TokenKind::Space) {
247 self.bump();
248 }
249
250 match self.peek_kind() {
251 Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
252 Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
253 _ => break,
254 }
255 }
256 args
257
258 }
259
260 fn parse_mandatory_arg(&mut self) -> Arg {
261 let open_span = self.bump().unwrap().span;
263 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
264 if self.peek_kind() == Some(&TokenKind::EndGroup) {
265 self.bump();
266 } else {
267 self.errors.push(
268 Diagnostic::error("E021","unclosed mandatory argument")
269 .with_span(diag_span(open_span)),
270 );
271 }
272 Arg::Mandatory(children)
273 }
274
275 fn parse_environment(&mut self, begin_span: Span) -> Node {
278 let mut args = self.parse_args();
279
280 let (name_idx, env_name) = match find_env_name(&args) {
284 Some(x) => x,
285 None => {
286 self.errors.push(
287 Diagnostic::error("E040", "'\\begin' missing environment name")
288 .with_span(diag_span(begin_span)),
289 );
290 return Node::Command {
291 name: "begin".to_owned(),
292 args,
293 span: begin_span,
294 };
295 }
296 };
297 args.remove(name_idx);
298
299 let body = self.parse_nodes(is_end_control_seq);
300
301 let close_span = if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "end") {
303 let end_tok = self.bump().unwrap();
304 let end_args = self.parse_args();
305 let close_name = find_env_name(&end_args).map(|(_, n)| n);
306
307 if close_name.as_deref() != Some(env_name.as_str()) {
308 self.errors.push(
309 Diagnostic::error("E042", format!(
310 "'\\end{{{}}}' does not match '\\begin{{{}}}'",
311 close_name.as_deref().unwrap_or(""), env_name,
312 ))
313 .with_span(diag_span(end_tok.span))
314 .with_note(format!("the matching '\\begin' opened the '{env_name}' environment")),
315 );
316 }
317
318 end_args.last()
320 .and_then(|a| match a {
321 Arg::Mandatory(c) | Arg::Optional(c) => c.last().map(|n| n.span()),
322 })
323 .map(|s| end_tok.span.merge(s))
324 .unwrap_or(end_tok.span)
325 } else {
326 self.errors.push(
327 Diagnostic::error("E041", format!("unclosed '\\begin{{{}}}'", env_name))
328 .with_span(diag_span(begin_span)),
329 );
330 body.last().map(|n| n.span()).unwrap_or(begin_span)
331 };
332
333 Node::Environment {
334 name: env_name,
335 args,
336 body,
337 span: begin_span.merge(close_span),
338 }
339 }
340
341 fn parse_optional_arg(&mut self) -> Arg {
342 let open_span = self.bump().unwrap().span;
344 let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
345 if self.peek_kind() == Some(&TokenKind::Char(']')) {
346 self.bump();
347 } else {
348 self.errors.push(
349 Diagnostic::error("E022","unclosed optional argument")
350 .with_span(diag_span(open_span)),
351 );
352 }
353 Arg::Optional(children)
354 }
355
356 fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
358 match nodes.last_mut() {
359 Some(Node::Text(s, existing)) => {
360 s.push(c);
361 *existing = existing.merge(span);
362 }
363 _ => nodes.push(Node::Text(c.to_string(), span)),
364 }
365 }
366}
367
368
369
370#[cfg(test)]
373mod tests {
374 use super::*;
375 use oxyl_lexer::Lexer;
376
377 fn parse(src: &str) -> ParseResult {
378 let tokens = Lexer::new(src).tokenise().tokens;
379 Parser::new(tokens).parse()
380 }
381
382 fn first_command(src: &str) -> (String, Vec<Arg>) {
383 let r = parse(src);
384 for node in &r.document.body {
385 if let Node::Command { name, args, .. } = node {
386 return (name.clone(), args.clone());
387 }
388 }
389 panic!("no command found in: {src}");
390 }
391
392 #[test]
393 fn command_no_args() {
394 let (name, args) = first_command("\\LaTeX");
395 assert_eq!(name, "LaTeX");
396 assert!(args.is_empty());
397 }
398
399 #[test]
400 fn command_one_mandatory_arg() {
401 let (name, args) = first_command("\\textbf{hello}");
402 assert_eq!(name, "textbf");
403 assert_eq!(args.len(), 1);
404 assert!(matches!(&args[0], Arg::Mandatory(children)
405 if matches!(&children[0], Node::Text(s, _) if s == "hello")));
406 }
407
408 #[test]
409 fn command_two_mandatory_args() {
410 let (name, args) = first_command("\\frac{a}{b}");
411 assert_eq!(name, "frac");
412 assert_eq!(args.len(), 2);
413 }
414
415 #[test]
416 fn unclosed_arg_produces_error() {
417 let r = parse("\\cmd{oops");
418 assert!(!r.errors.is_empty());
419 }
420
421 #[test]
422 fn paragraph_break_still_works() {
423 let r = parse("line one\n\nline two");
424 let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
425 assert!(has_par);
426 }
427
428 #[test]
429 fn nested_command_in_arg() {
430 let r = parse("\\outer{\\inner{x}}");
431 assert!(r.errors.is_empty());
432 if let Node::Command { args, .. } = &r.document.body[0] {
433 if let Arg::Mandatory(inner) = &args[0] {
434 assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
435 } else { panic!("expected mandatory arg"); }
436 } else { panic!("expected command"); }
437 }
438
439 #[test]
440 fn command_with_optional_arg() {
441 let (name, args) = first_command("\\sqrt[3]{27}");
442 assert_eq!(name, "sqrt");
443 assert_eq!(args.len(), 2);
444 assert!(matches!(&args[0], Arg::Optional(children)
445 if matches!(&children[0], Node::Text(s, _) if s == "3")));
446 assert!(matches!(&args[1], Arg::Mandatory(children)
447 if matches!(&children[0], Node::Text(s, _) if s == "27")));
448 }
449
450 #[test]
451 fn command_with_only_optional_arg() {
452 let (name, args) = first_command("\\foo[opt]");
453 assert_eq!(name, "foo");
454 assert_eq!(args.len(), 1);
455 assert!(matches!(&args[0], Arg::Optional(_)));
456 }
457
458 #[test]
459 fn optional_then_two_mandatory() {
460 let (_, args) = first_command("\\section[short]{long}{extra}");
462 assert_eq!(args.len(), 3);
463 assert!(matches!(&args[0], Arg::Optional(_)));
464 assert!(matches!(&args[1], Arg::Mandatory(_)));
465 assert!(matches!(&args[2], Arg::Mandatory(_)));
466 }
467
468 #[test]
469 fn unclosed_optional_arg_produces_error() {
470 let r = parse("\\cmd[oops");
471 assert!(!r.errors.is_empty());
472 }
473
474 #[test]
475 fn bracket_outside_command_is_text() {
476 let r = parse("hello [world]");
478 assert!(r.errors.is_empty());
479 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
480 }
481
482 #[test]
483 fn inline_math_simple() {
484 let r = parse("$x+1$");
485 assert!(r.errors.is_empty());
486 assert_eq!(r.document.body.len(), 1);
487 assert!(matches!(&r.document.body[0], Node::Math(children, _)
488 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
489 }
490
491 #[test]
492 fn inline_math_with_command() {
493 let r = parse("$\\alpha + \\beta$");
494 assert!(r.errors.is_empty());
495 if let Node::Math(children, _) = &r.document.body[0] {
496 let names: Vec<_> = children.iter().filter_map(|n| match n {
497 Node::Command { name, .. } => Some(name.as_str()),
498 _ => None,
499 }).collect();
500 assert_eq!(names, vec!["alpha", "beta"]);
501 } else {
502 panic!("expected math node");
503 }
504 }
505
506 #[test]
507 fn unclosed_math_produces_error() {
508 let r = parse("text $oops");
509 assert!(!r.errors.is_empty());
510 }
511
512 #[test]
513 fn parser_errors_carry_spans() {
514 let cases = [
518 "\\cmd{oops", "\\cmd[oops", "{", "$oops", ];
523 for src in cases {
524 let r = parse(src);
525 assert!(!r.errors.is_empty(), "expected error for {src:?}");
526 for e in &r.errors {
527 assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
528 }
529 }
530 }
531
532 #[test]
533 fn math_after_text() {
534 let r = parse("hello $x$");
535 assert!(r.errors.is_empty());
536 assert_eq!(r.document.body.len(), 2);
537 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
538 assert!(matches!(&r.document.body[1], Node::Math(_, _)));
539 }
540
541 #[test]
542 fn display_math_simple() {
543 let r = parse("\\[x+1\\]");
544 assert!(r.errors.is_empty(), "{:?}", r.errors);
545 assert_eq!(r.document.body.len(), 1);
546 assert!(matches!(&r.document.body[0], Node::DisplayMath(children, _)
547 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
548 }
549
550 #[test]
551 fn display_math_with_command() {
552 let r = parse("\\[ \\sum_{i=0}^n i \\]");
553 assert!(r.errors.is_empty(), "{:?}", r.errors);
554 assert!(matches!(&r.document.body[0], Node::DisplayMath(_, _)));
555 }
556
557 #[test]
558 fn unclosed_display_math_produces_error() {
559 let r = parse("\\[ a + b");
560 assert!(r.errors.iter().any(|e| e.code == "E031"));
561 }
562
563 #[test]
564 fn stray_close_display_math_produces_error() {
565 let r = parse("oops \\] more");
566 assert!(r.errors.iter().any(|e| e.code == "E032"));
567 }
568
569 #[test]
570 fn comment_preserved() {
571 let r = parse("% hello\nworld");
572 assert!(r.errors.is_empty());
573 assert!(matches!(&r.document.body[0], Node::Comment(s, _) if s == " hello"));
574 assert!(matches!(&r.document.body[1], Node::Text(s, _) if s == "world"));
575 }
576
577 #[test]
578 fn comment_inside_command_arg() {
579 let r = parse("\\textbf{foo % drop?\nbar}");
580 assert!(r.errors.is_empty(), "{:?}", r.errors);
581 if let Node::Command { args, .. } = &r.document.body[0] {
582 if let Arg::Mandatory(children) = &args[0] {
583 assert!(children.iter().any(|n| matches!(n, Node::Comment(_, _))));
584 } else { panic!("expected mandatory arg"); }
585 } else { panic!("expected command"); }
586 }
587
588 #[test]
589 fn environment_simple() {
590 let r = parse("\\begin{quote}hello\\end{quote}");
591 assert!(r.errors.is_empty(), "{:?}", r.errors);
592 if let Node::Environment { name, args, body, .. } = &r.document.body[0] {
593 assert_eq!(name, "quote");
594 assert!(args.is_empty());
595 assert!(matches!(&body[0], Node::Text(s, _) if s == "hello"));
596 } else {
597 panic!("expected environment, got {:?}", r.document.body[0]);
598 }
599 }
600
601 #[test]
602 fn environment_with_starred_name() {
603 let r = parse("\\begin{equation*}x = 1\\end{equation*}");
604 assert!(r.errors.is_empty(), "{:?}", r.errors);
605 assert!(matches!(&r.document.body[0], Node::Environment { name, .. } if name == "equation*"));
606 }
607
608 #[test]
609 fn environment_with_extra_args() {
610 let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
612 assert!(r.errors.is_empty(), "{:?}", r.errors);
613 if let Node::Environment { name, args, .. } = &r.document.body[0] {
614 assert_eq!(name, "tabular");
615 assert_eq!(args.len(), 1);
616 assert!(matches!(&args[0], Arg::Mandatory(_)));
617 } else { panic!("expected environment"); }
618 }
619
620 #[test]
621 fn nested_environments() {
622 let r = parse("\\begin{outer}\\begin{inner}x\\end{inner}\\end{outer}");
623 assert!(r.errors.is_empty(), "{:?}", r.errors);
624 if let Node::Environment { name, body, .. } = &r.document.body[0] {
625 assert_eq!(name, "outer");
626 assert!(matches!(&body[0], Node::Environment {name, .. } if name == "inner"));
627 } else { panic!("expected outer environment"); }
628 }
629
630 #[test]
631 fn mismatched_end_produces_error() {
632 let r = parse("\\begin{a}x\\end{b}");
633 assert!(r.errors.iter().any(|e| e.code == "E042"));
634 }
635
636 #[test]
637 fn unclosed_begin_produces_error() {
638 let r = parse("\\begin{a}body");
639 assert!(r.errors.iter().any(|e| e.code == "E041"));
640 }
641
642 #[test]
643 fn stray_end_produces_error() {
644 let r = parse("\\end{a}");
645 assert!(r.errors.iter().any(|e| e.code == "E043"));
646 }
647
648 #[test]
649 fn begin_without_name_produces_error() {
650 let r = parse("\\begin foo");
651 assert!(r.errors.iter().any(|e| e.code == "E040"));
652 }
653
654 #[test]
655 fn align_tab_becomes_node() {
656 let r = parse("a & b");
657 assert!(r.errors.is_empty());
658 let kinds: Vec<_> = r.document.body.iter().map(|n| match n {
659 Node::Text(s, _) => format!("T({s})"),
660 Node::AlignTab(_) => "&".to_owned(),
661 other => format!("{other:?}"),
662 }).collect();
663 assert_eq!(kinds, vec!["T(a )", "&", "T( b)"]);
664 }
665
666 #[test]
667 fn tilde_becomes_node() {
668 let r = parse("oxyl.~isthebest");
669 assert!(r.errors.is_empty());
670 assert!(matches!(&r.document.body[1], Node::Tilde(_)));
672 }
673
674 #[test]
675 fn align_tab_inside_tabular_body() {
676 let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
677 assert!(r.errors.is_empty(), "{:?}", r.errors);
678 if let Node::Environment { body, .. } = &r.document.body[0] {
679 assert!(body.iter().any(|n| matches!(n, Node::AlignTab(_))));
680 } else { panic!("expected environment"); }
681 }
682}