1use oxyl_diagnostics::{DiagSpan, Diagnostic};
24use oxyl_lexer::{Span, Token, TokenKind};
25
26use crate::ast::{Arg, Document, Node};
27
28fn diag_span(s: Span) -> DiagSpan {
29 DiagSpan::new(s.start, s.end)
30}
31
32fn is_display_math_close(k: &TokenKind) -> bool {
34 matches!(k, TokenKind::ControlSeq(s) if s == "]")
35}
36
37fn is_end_control_seq(k: &TokenKind) -> bool {
39 matches!(k, TokenKind::ControlSeq(s) if s == "end")
40}
41
42fn find_env_name(args: &[Arg]) -> Option<(usize, String)> {
47 for (i, arg) in args.iter().enumerate() {
48 if let Arg::Mandatory(children) = arg {
49 let mut name = String::new();
50 for child in children {
51 if let Node::Text(t, _) = child {
52 name.push_str(t);
53 } else {
54 return None;
55 }
56 }
57 let trimmed = name.trim().to_owned();
58 if !trimmed.is_empty() {
59 return Some((i, trimmed));
60 }
61 }
62 }
63 None
64}
65
66
67#[derive(Debug)]
70pub struct ParseResult {
71 pub document: Document,
72 pub errors: Vec<Diagnostic>,
73}
74
75
76pub struct Parser {
77 tokens: Vec<Token>,
78 pos: usize,
79 errors: Vec<Diagnostic>,
80}
81
82impl Parser {
83 pub fn new(tokens: Vec<Token>) -> Self {
84 Self { tokens, pos: 0, errors: Vec::new() }
85 }
86
87 pub fn parse(mut self) -> ParseResult {
89 let body = self.parse_nodes(|_| false);
90 ParseResult { document: Document { body }, errors: self.errors }
91 }
92
93 fn peek(&self) -> Option<&Token> {
94 self.tokens.get(self.pos)
95 }
96
97 fn peek_kind(&self) -> Option<&TokenKind> {
98 self.peek().map(|t| &t.kind)
99 }
100
101 fn bump(&mut self) -> Option<Token> {
102 if self.pos < self.tokens.len() {
103 let tok = self.tokens[self.pos].clone();
104 self.pos += 1;
105 Some(tok)
106 } else {
107 None
108 }
109 }
110
111 fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
118 let mut nodes: Vec<Node> = Vec::new();
119
120 loop {
121 match self.peek() {
122 None => break,
123 Some(tok) if stop(&tok.kind) => break,
124 _ => {}
125 }
126
127 let tok = self.bump().unwrap();
128
129 match tok.kind {
130 TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
131 TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
132
133 TokenKind::ParagraphBreak => {
134 nodes.push(Node::ParagraphBreak(tok.span));
135 }
136
137 TokenKind::Comment(body) => {
138 nodes.push(Node::Comment(body, tok.span));
139 }
140
141 TokenKind::ControlSeq(ref name) if name == "begin" => {
143 let env = self.parse_environment(tok.span);
144 nodes.push(env);
145 }
146
147 TokenKind::ControlSeq(ref name) if name == "end" => {
149 self.errors.push(
150 Diagnostic::error("E043", "stray '\\end' (no matching '\\begin')")
151 .with_span(diag_span(tok.span)),
152 );
153 let _ = self.parse_args();
155 }
156
157 TokenKind::ControlSeq(ref name) if name == "[" => {
159 let open_span = tok.span;
160 let children = self.parse_nodes(is_display_math_close);
161 if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
162 let close = self.bump().unwrap();
163 nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
164 } else {
165 self.errors.push(
166 Diagnostic::error("E031", "unclosed '\\[' (display math)")
167 .with_span(diag_span(open_span)),
168 );
169 nodes.push(Node::DisplayMath(children, open_span));
170 }
171 }
172
173 TokenKind::ControlSeq(ref name) if name == "]" => {
175 self.errors.push(
176 Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
177 .with_span(diag_span(tok.span)),
178 );
179 }
180
181 TokenKind::ControlSeq(name) => {
182 let cmd_span = tok.span;
183 let args = self.parse_args();
184 let full_span = args.last()
186 .and_then(|a| match a {
187 Arg::Mandatory(children) => children.last().map(|n| n.span()),
188 Arg::Optional(children) => children.last().map(|n| n.span()),
189 })
190 .map(|s| cmd_span.merge(s))
191 .unwrap_or(cmd_span);
192 nodes.push(Node::Command { name, args, span: full_span });
193 }
194
195 TokenKind::BeginGroup => {
196 let open_span = tok.span;
197 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
198 if self.peek_kind() == Some(&TokenKind::EndGroup) {
199 let close = self.bump().unwrap();
200 nodes.push(Node::Group(children, open_span.merge(close.span)));
201 } else {
202 self.errors.push(
204 Diagnostic::error("E020", "unclosed '{'")
205 .with_span(diag_span(open_span)),
206 );
207 nodes.push(Node::Group(children, open_span));
208 }
209 }
210
211 TokenKind::MathShift => {
212 let open_span = tok.span;
213 let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
214 if self.peek_kind() == Some(&TokenKind::MathShift) {
215 let close = self.bump().unwrap();
216 nodes.push(Node::Math(children, open_span.merge(close.span)));
217 } else {
218 self.errors.push(
219 Diagnostic::error("E030", "unclosed '$' (math mode)")
220 .with_span(diag_span(open_span)),
221 );
222 nodes.push(Node::Math(children, open_span));
223 }
224 }
225
226 TokenKind::AlignTab => nodes.push(Node::AlignTab(tok.span)),
227 TokenKind::Tilde => nodes.push(Node::Tilde(tok.span)),
228
229 _ => {}
231 }
232 }
233
234 nodes
235 }
236 fn parse_args(&mut self) -> Vec<Arg> {
242 let mut args = Vec::new();
243
244 loop {
245 if self.peek_kind() == Some(&TokenKind::Space) {
247 self.bump();
248 }
249
250 match self.peek_kind() {
251 Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
252 Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
253 _ => break,
254 }
255 }
256 args
257
258 }
259
260 fn parse_mandatory_arg(&mut self) -> Arg {
261 let open_span = self.bump().unwrap().span;
263 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
264 if self.peek_kind() == Some(&TokenKind::EndGroup) {
265 self.bump();
266 } else {
267 self.errors.push(
268 Diagnostic::error("E021","unclosed mandatory argument")
269 .with_span(diag_span(open_span)),
270 );
271 }
272 Arg::Mandatory(children)
273 }
274
275 fn parse_environment(&mut self, begin_span: Span) -> Node {
278 let mut args = self.parse_args();
279
280 let (name_idx, env_name) = match find_env_name(&args) {
284 Some(x) => x,
285 None => {
286 self.errors.push(
287 Diagnostic::error("E040", "'\\begin' missing environment name")
288 .with_span(diag_span(begin_span)),
289 );
290 return Node::Command {
291 name: "begin".to_owned(),
292 args,
293 span: begin_span,
294 };
295 }
296 };
297 args.remove(name_idx);
298
299 let body = self.parse_nodes(is_end_control_seq);
300
301 let close_span = if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "end") {
303 let end_tok = self.bump().unwrap();
304 let end_args = self.parse_args();
305 let close_name = find_env_name(&end_args).map(|(_, n)| n);
306
307 if close_name.as_deref() != Some(env_name.as_str()) {
308 self.errors.push(
309 Diagnostic::error("E042", format!(
310 "'\\end{{{}}}' does not match '\\begin{{{}}}'",
311 close_name.as_deref().unwrap_or(""), env_name,
312 ))
313 .with_span(diag_span(end_tok.span)),
314 );
315 }
316
317 end_args.last()
319 .and_then(|a| match a {
320 Arg::Mandatory(c) | Arg::Optional(c) => c.last().map(|n| n.span()),
321 })
322 .map(|s| end_tok.span.merge(s))
323 .unwrap_or(end_tok.span)
324 } else {
325 self.errors.push(
326 Diagnostic::error("E041", format!("unclosed '\\begin{{{}}}'", env_name))
327 .with_span(diag_span(begin_span)),
328 );
329 body.last().map(|n| n.span()).unwrap_or(begin_span)
330 };
331
332 Node::Environment {
333 name: env_name,
334 args,
335 body,
336 span: begin_span.merge(close_span),
337 }
338 }
339
340 fn parse_optional_arg(&mut self) -> Arg {
341 let open_span = self.bump().unwrap().span;
343 let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
344 if self.peek_kind() == Some(&TokenKind::Char(']')) {
345 self.bump();
346 } else {
347 self.errors.push(
348 Diagnostic::error("E022","unclosed optional argument")
349 .with_span(diag_span(open_span)),
350 );
351 }
352 Arg::Optional(children)
353 }
354
355 fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
357 match nodes.last_mut() {
358 Some(Node::Text(s, existing)) => {
359 s.push(c);
360 *existing = existing.merge(span);
361 }
362 _ => nodes.push(Node::Text(c.to_string(), span)),
363 }
364 }
365}
366
367
368
369#[cfg(test)]
372mod tests {
373 use super::*;
374 use oxyl_lexer::Lexer;
375
376 fn parse(src: &str) -> ParseResult {
377 let tokens = Lexer::new(src).tokenise().tokens;
378 Parser::new(tokens).parse()
379 }
380
381 fn first_command(src: &str) -> (String, Vec<Arg>) {
382 let r = parse(src);
383 for node in &r.document.body {
384 if let Node::Command { name, args, .. } = node {
385 return (name.clone(), args.clone());
386 }
387 }
388 panic!("no command found in: {src}");
389 }
390
391 #[test]
392 fn command_no_args() {
393 let (name, args) = first_command("\\LaTeX");
394 assert_eq!(name, "LaTeX");
395 assert!(args.is_empty());
396 }
397
398 #[test]
399 fn command_one_mandatory_arg() {
400 let (name, args) = first_command("\\textbf{hello}");
401 assert_eq!(name, "textbf");
402 assert_eq!(args.len(), 1);
403 assert!(matches!(&args[0], Arg::Mandatory(children)
404 if matches!(&children[0], Node::Text(s, _) if s == "hello")));
405 }
406
407 #[test]
408 fn command_two_mandatory_args() {
409 let (name, args) = first_command("\\frac{a}{b}");
410 assert_eq!(name, "frac");
411 assert_eq!(args.len(), 2);
412 }
413
414 #[test]
415 fn unclosed_arg_produces_error() {
416 let r = parse("\\cmd{oops");
417 assert!(!r.errors.is_empty());
418 }
419
420 #[test]
421 fn paragraph_break_still_works() {
422 let r = parse("line one\n\nline two");
423 let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
424 assert!(has_par);
425 }
426
427 #[test]
428 fn nested_command_in_arg() {
429 let r = parse("\\outer{\\inner{x}}");
430 assert!(r.errors.is_empty());
431 if let Node::Command { args, .. } = &r.document.body[0] {
432 if let Arg::Mandatory(inner) = &args[0] {
433 assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
434 } else { panic!("expected mandatory arg"); }
435 } else { panic!("expected command"); }
436 }
437
438 #[test]
439 fn command_with_optional_arg() {
440 let (name, args) = first_command("\\sqrt[3]{27}");
441 assert_eq!(name, "sqrt");
442 assert_eq!(args.len(), 2);
443 assert!(matches!(&args[0], Arg::Optional(children)
444 if matches!(&children[0], Node::Text(s, _) if s == "3")));
445 assert!(matches!(&args[1], Arg::Mandatory(children)
446 if matches!(&children[0], Node::Text(s, _) if s == "27")));
447 }
448
449 #[test]
450 fn command_with_only_optional_arg() {
451 let (name, args) = first_command("\\foo[opt]");
452 assert_eq!(name, "foo");
453 assert_eq!(args.len(), 1);
454 assert!(matches!(&args[0], Arg::Optional(_)));
455 }
456
457 #[test]
458 fn optional_then_two_mandatory() {
459 let (_, args) = first_command("\\section[short]{long}{extra}");
461 assert_eq!(args.len(), 3);
462 assert!(matches!(&args[0], Arg::Optional(_)));
463 assert!(matches!(&args[1], Arg::Mandatory(_)));
464 assert!(matches!(&args[2], Arg::Mandatory(_)));
465 }
466
467 #[test]
468 fn unclosed_optional_arg_produces_error() {
469 let r = parse("\\cmd[oops");
470 assert!(!r.errors.is_empty());
471 }
472
473 #[test]
474 fn bracket_outside_command_is_text() {
475 let r = parse("hello [world]");
477 assert!(r.errors.is_empty());
478 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
479 }
480
481 #[test]
482 fn inline_math_simple() {
483 let r = parse("$x+1$");
484 assert!(r.errors.is_empty());
485 assert_eq!(r.document.body.len(), 1);
486 assert!(matches!(&r.document.body[0], Node::Math(children, _)
487 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
488 }
489
490 #[test]
491 fn inline_math_with_command() {
492 let r = parse("$\\alpha + \\beta$");
493 assert!(r.errors.is_empty());
494 if let Node::Math(children, _) = &r.document.body[0] {
495 let names: Vec<_> = children.iter().filter_map(|n| match n {
496 Node::Command { name, .. } => Some(name.as_str()),
497 _ => None,
498 }).collect();
499 assert_eq!(names, vec!["alpha", "beta"]);
500 } else {
501 panic!("expected math node");
502 }
503 }
504
505 #[test]
506 fn unclosed_math_produces_error() {
507 let r = parse("text $oops");
508 assert!(!r.errors.is_empty());
509 }
510
511 #[test]
512 fn parser_errors_carry_spans() {
513 let cases = [
517 "\\cmd{oops", "\\cmd[oops", "{", "$oops", ];
522 for src in cases {
523 let r = parse(src);
524 assert!(!r.errors.is_empty(), "expected error for {src:?}");
525 for e in &r.errors {
526 assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
527 }
528 }
529 }
530
531 #[test]
532 fn math_after_text() {
533 let r = parse("hello $x$");
534 assert!(r.errors.is_empty());
535 assert_eq!(r.document.body.len(), 2);
536 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
537 assert!(matches!(&r.document.body[1], Node::Math(_, _)));
538 }
539
540 #[test]
541 fn display_math_simple() {
542 let r = parse("\\[x+1\\]");
543 assert!(r.errors.is_empty(), "{:?}", r.errors);
544 assert_eq!(r.document.body.len(), 1);
545 assert!(matches!(&r.document.body[0], Node::DisplayMath(children, _)
546 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
547 }
548
549 #[test]
550 fn display_math_with_command() {
551 let r = parse("\\[ \\sum_{i=0}^n i \\]");
552 assert!(r.errors.is_empty(), "{:?}", r.errors);
553 assert!(matches!(&r.document.body[0], Node::DisplayMath(_, _)));
554 }
555
556 #[test]
557 fn unclosed_display_math_produces_error() {
558 let r = parse("\\[ a + b");
559 assert!(r.errors.iter().any(|e| e.code == "E031"));
560 }
561
562 #[test]
563 fn stray_close_display_math_produces_error() {
564 let r = parse("oops \\] more");
565 assert!(r.errors.iter().any(|e| e.code == "E032"));
566 }
567
568 #[test]
569 fn comment_preserved() {
570 let r = parse("% hello\nworld");
571 assert!(r.errors.is_empty());
572 assert!(matches!(&r.document.body[0], Node::Comment(s, _) if s == " hello"));
573 assert!(matches!(&r.document.body[1], Node::Text(s, _) if s == "world"));
574 }
575
576 #[test]
577 fn comment_inside_command_arg() {
578 let r = parse("\\textbf{foo % drop?\nbar}");
579 assert!(r.errors.is_empty(), "{:?}", r.errors);
580 if let Node::Command { args, .. } = &r.document.body[0] {
581 if let Arg::Mandatory(children) = &args[0] {
582 assert!(children.iter().any(|n| matches!(n, Node::Comment(_, _))));
583 } else { panic!("expected mandatory arg"); }
584 } else { panic!("expected command"); }
585 }
586
587 #[test]
588 fn environment_simple() {
589 let r = parse("\\begin{quote}hello\\end{quote}");
590 assert!(r.errors.is_empty(), "{:?}", r.errors);
591 if let Node::Environment { name, args, body, .. } = &r.document.body[0] {
592 assert_eq!(name, "quote");
593 assert!(args.is_empty());
594 assert!(matches!(&body[0], Node::Text(s, _) if s == "hello"));
595 } else {
596 panic!("expected environment, got {:?}", r.document.body[0]);
597 }
598 }
599
600 #[test]
601 fn environment_with_starred_name() {
602 let r = parse("\\begin{equation*}x = 1\\end{equation*}");
603 assert!(r.errors.is_empty(), "{:?}", r.errors);
604 assert!(matches!(&r.document.body[0], Node::Environment { name, .. } if name == "equation*"));
605 }
606
607 #[test]
608 fn environment_with_extra_args() {
609 let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
611 assert!(r.errors.is_empty(), "{:?}", r.errors);
612 if let Node::Environment { name, args, .. } = &r.document.body[0] {
613 assert_eq!(name, "tabular");
614 assert_eq!(args.len(), 1);
615 assert!(matches!(&args[0], Arg::Mandatory(_)));
616 } else { panic!("expected environment"); }
617 }
618
619 #[test]
620 fn nested_environments() {
621 let r = parse("\\begin{outer}\\begin{inner}x\\end{inner}\\end{outer}");
622 assert!(r.errors.is_empty(), "{:?}", r.errors);
623 if let Node::Environment { name, body, .. } = &r.document.body[0] {
624 assert_eq!(name, "outer");
625 assert!(matches!(&body[0], Node::Environment {name, .. } if name == "inner"));
626 } else { panic!("expected outer environment"); }
627 }
628
629 #[test]
630 fn mismatched_end_produces_error() {
631 let r = parse("\\begin{a}x\\end{b}");
632 assert!(r.errors.iter().any(|e| e.code == "E042"));
633 }
634
635 #[test]
636 fn unclosed_begin_produces_error() {
637 let r = parse("\\begin{a}body");
638 assert!(r.errors.iter().any(|e| e.code == "E041"));
639 }
640
641 #[test]
642 fn stray_end_produces_error() {
643 let r = parse("\\end{a}");
644 assert!(r.errors.iter().any(|e| e.code == "E043"));
645 }
646
647 #[test]
648 fn begin_without_name_produces_error() {
649 let r = parse("\\begin foo");
650 assert!(r.errors.iter().any(|e| e.code == "E040"));
651 }
652
653 #[test]
654 fn align_tab_becomes_node() {
655 let r = parse("a & b");
656 assert!(r.errors.is_empty());
657 let kinds: Vec<_> = r.document.body.iter().map(|n| match n {
658 Node::Text(s, _) => format!("T({s})"),
659 Node::AlignTab(_) => "&".to_owned(),
660 other => format!("{other:?}"),
661 }).collect();
662 assert_eq!(kinds, vec!["T(a )", "&", "T( b)"]);
663 }
664
665 #[test]
666 fn tilde_becomes_node() {
667 let r = parse("oxyl.~isthebest");
668 assert!(r.errors.is_empty());
669 assert!(matches!(&r.document.body[1], Node::Tilde(_)));
671 }
672
673 #[test]
674 fn align_tab_inside_tabular_body() {
675 let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
676 assert!(r.errors.is_empty(), "{:?}", r.errors);
677 if let Node::Environment { body, .. } = &r.document.body[0] {
678 assert!(body.iter().any(|n| matches!(n, Node::AlignTab(_))));
679 } else { panic!("expected environment"); }
680 }
681}