1use oxyl_diagnostics::{DiagSpan, Diagnostic};
19use oxyl_lexer::{Span, Token, TokenKind};
20
21fn diag_span(s: Span) -> DiagSpan {
22 DiagSpan::new(s.start, s.end)
23}
24
25fn is_display_math_close(k: &TokenKind) -> bool {
27 matches!(k, TokenKind::ControlSeq(s) if s == "]")
28}
29
30fn is_end_control_seq(k: &TokenKind) -> bool {
32 matches!(k, TokenKind::ControlSeq(s) if s == "end")
33}
34
35fn find_env_name(args: &[Arg]) -> Option<(usize, String)> {
40 for (i, arg) in args.iter().enumerate() {
41 if let Arg::Mandatory(children) = arg {
42 let mut name = String::new();
43 for child in children {
44 if let Node::Text(t, _) = child {
45 name.push_str(t);
46 } else {
47 return None;
48 }
49 }
50 let trimmed = name.trim().to_owned();
51 if !trimmed.is_empty() {
52 return Some((i, trimmed));
53 }
54 }
55 }
56 None
57}
58
59#[derive(Debug, Clone)]
68pub struct Document {
69 pub body: Vec<Node>,
70}
71
72#[derive(Debug, Clone)]
74pub enum Node {
75 Text(String, Span),
77
78 ParagraphBreak(Span),
80
81 Command {
83 name: String ,
85 args: Vec<Arg>,
86 span: Span,
87 },
88
89 Group(Vec<Node>, Span),
91
92 Math(Vec<Node>, Span),
94
95 DisplayMath(Vec<Node>, Span),
97
98 Comment(String, Span),
102
103 Environment {
107 name: String,
108 args: Vec<Arg>,
109 body: Vec<Node>,
110 span: Span,
111 },
112}
113
114impl Node {
115 pub fn span(&self) -> Span {
116 match self {
117 Node::Text(_, s) => *s,
118 Node::ParagraphBreak(s) => *s,
119 Node::Command { span, .. } => *span,
120 Node::Group(_, s) => *s,
121 Node::Math(_, s) => *s,
122 Node::DisplayMath(_, s) => *s,
123 Node::Comment(_, s) => *s,
124 Node::Environment{ span, .. } => *span,
125 }
126 }
127}
128
129#[derive(Debug, Clone)]
131pub enum Arg {
132 Mandatory(Vec<Node>),
133 Optional(Vec<Node>),
134}
135
136#[derive(Debug)]
143pub struct ParseResult {
144 pub document: Document,
145 pub errors: Vec<Diagnostic>,
146}
147
148pub struct Parser {
153 tokens: Vec<Token>,
154 pos: usize,
155 errors: Vec<Diagnostic>,
156}
157
158impl Parser {
159 pub fn new(tokens: Vec<Token>) -> Self {
160 Self { tokens, pos: 0, errors: Vec::new() }
161 }
162
163 pub fn parse(mut self) -> ParseResult {
165 let body = self.parse_nodes(|_| false);
166 ParseResult { document: Document { body }, errors: self.errors }
167 }
168
169 fn peek(&self) -> Option<&Token> {
170 self.tokens.get(self.pos)
171 }
172
173 fn peek_kind(&self) -> Option<&TokenKind> {
174 self.peek().map(|t| &t.kind)
175 }
176
177 fn bump(&mut self) -> Option<Token> {
178 if self.pos < self.tokens.len() {
179 let tok = self.tokens[self.pos].clone();
180 self.pos += 1;
181 Some(tok)
182 } else {
183 None
184 }
185 }
186
187 fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
194 let mut nodes: Vec<Node> = Vec::new();
195
196 loop {
197 match self.peek() {
198 None => break,
199 Some(tok) if stop(&tok.kind) => break,
200 _ => {}
201 }
202
203 let tok = self.bump().unwrap();
204
205 match tok.kind {
206 TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
207 TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
208
209 TokenKind::ParagraphBreak => {
210 nodes.push(Node::ParagraphBreak(tok.span));
211 }
212
213 TokenKind::Comment(body) => {
214 nodes.push(Node::Comment(body, tok.span));
215 }
216
217 TokenKind::ControlSeq(ref name) if name == "begin" => {
219 let env = self.parse_environment(tok.span);
220 nodes.push(env);
221 }
222
223 TokenKind::ControlSeq(ref name) if name == "end" => {
225 self.errors.push(
226 Diagnostic::error("E043", "stray '\\end' (no matching '\\begin')")
227 .with_span(diag_span(tok.span)),
228 );
229 let _ = self.parse_args();
231 }
232
233 TokenKind::ControlSeq(ref name) if name == "[" => {
235 let open_span = tok.span;
236 let children = self.parse_nodes(is_display_math_close);
237 if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
238 let close = self.bump().unwrap();
239 nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
240 } else {
241 self.errors.push(
242 Diagnostic::error("E031", "unclosed '\\[' (display math)")
243 .with_span(diag_span(open_span)),
244 );
245 nodes.push(Node::DisplayMath(children, open_span));
246 }
247 }
248
249 TokenKind::ControlSeq(ref name) if name == "]" => {
251 self.errors.push(
252 Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
253 .with_span(diag_span(tok.span)),
254 );
255 }
256
257 TokenKind::ControlSeq(name) => {
258 let cmd_span = tok.span;
259 let args = self.parse_args();
260 let full_span = args.last()
262 .and_then(|a| match a {
263 Arg::Mandatory(children) => children.last().map(|n| n.span()),
264 Arg::Optional(children) => children.last().map(|n| n.span()),
265 })
266 .map(|s| cmd_span.merge(s))
267 .unwrap_or(cmd_span);
268 nodes.push(Node::Command { name, args, span: full_span });
269 }
270
271 TokenKind::BeginGroup => {
272 let open_span = tok.span;
273 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
274 if self.peek_kind() == Some(&TokenKind::EndGroup) {
275 let close = self.bump().unwrap();
276 nodes.push(Node::Group(children, open_span.merge(close.span)));
277 } else {
278 self.errors.push(
280 Diagnostic::error("E020", "unclosed '{'")
281 .with_span(diag_span(open_span)),
282 );
283 nodes.push(Node::Group(children, open_span));
284 }
285 }
286
287 TokenKind::MathShift => {
288 let open_span = tok.span;
289 let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
290 if self.peek_kind() == Some(&TokenKind::MathShift) {
291 let close = self.bump().unwrap();
292 nodes.push(Node::Math(children, open_span.merge(close.span)));
293 } else {
294 self.errors.push(
295 Diagnostic::error("E030", "unclosed '$' (math mode)")
296 .with_span(diag_span(open_span)),
297 );
298 nodes.push(Node::Math(children, open_span));
299 }
300 }
301 _ => {}
303 }
304 }
305
306 nodes
307 }
308 fn parse_args(&mut self) -> Vec<Arg> {
314 let mut args = Vec::new();
315
316 loop {
317 if self.peek_kind() == Some(&TokenKind::Space) {
319 self.bump();
320 }
321
322 match self.peek_kind() {
323 Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
324 Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
325 _ => break,
326 }
327 }
328 args
329
330 }
331
332 fn parse_mandatory_arg(&mut self) -> Arg {
333 let open_span = self.bump().unwrap().span;
335 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
336 if self.peek_kind() == Some(&TokenKind::EndGroup) {
337 self.bump();
338 } else {
339 self.errors.push(
340 Diagnostic::error("E021","unclosed mandatory argument")
341 .with_span(diag_span(open_span)),
342 );
343 }
344 Arg::Mandatory(children)
345 }
346
347 fn parse_environment(&mut self, begin_span: Span) -> Node {
350 let mut args = self.parse_args();
351
352 let (name_idx, env_name) = match find_env_name(&args) {
356 Some(x) => x,
357 None => {
358 self.errors.push(
359 Diagnostic::error("E040", "'\\begin' missing environment name")
360 .with_span(diag_span(begin_span)),
361 );
362 return Node::Command {
363 name: "begin".to_owned(),
364 args,
365 span: begin_span,
366 };
367 }
368 };
369 args.remove(name_idx);
370
371 let body = self.parse_nodes(is_end_control_seq);
372
373 let close_span = if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "end") {
375 let end_tok = self.bump().unwrap();
376 let end_args = self.parse_args();
377 let close_name = find_env_name(&end_args).map(|(_, n)| n);
378
379 if close_name.as_deref() != Some(env_name.as_str()) {
380 self.errors.push(
381 Diagnostic::error("E042", format!(
382 "'\\end{{{}}}' does not match '\\begin{{{}}}'",
383 close_name.as_deref().unwrap_or(""), env_name,
384 ))
385 .with_span(diag_span(end_tok.span)),
386 );
387 }
388
389 end_args.last()
391 .and_then(|a| match a {
392 Arg::Mandatory(c) | Arg::Optional(c) => c.last().map(|n| n.span()),
393 })
394 .map(|s| end_tok.span.merge(s))
395 .unwrap_or(end_tok.span)
396 } else {
397 self.errors.push(
398 Diagnostic::error("E041", format!("unclosed '\\begin{{{}}}'", env_name))
399 .with_span(diag_span(begin_span)),
400 );
401 body.last().map(|n| n.span()).unwrap_or(begin_span)
402 };
403
404 Node::Environment {
405 name: env_name,
406 args,
407 body,
408 span: begin_span.merge(close_span),
409 }
410 }
411
412 fn parse_optional_arg(&mut self) -> Arg {
413 let open_span = self.bump().unwrap().span;
415 let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
416 if self.peek_kind() == Some(&TokenKind::Char(']')) {
417 self.bump();
418 } else {
419 self.errors.push(
420 Diagnostic::error("E022","unclosed optional argument")
421 .with_span(diag_span(open_span)),
422 );
423 }
424 Arg::Optional(children)
425 }
426
427 fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
429 match nodes.last_mut() {
430 Some(Node::Text(s, existing)) => {
431 s.push(c);
432 *existing = existing.merge(span);
433 }
434 _ => nodes.push(Node::Text(c.to_string(), span)),
435 }
436 }
437}
438
439
440
441#[cfg(test)]
444mod tests {
445 use super::*;
446 use oxyl_lexer::Lexer;
447
448 fn parse(src: &str) -> ParseResult {
449 let tokens = Lexer::new(src).tokenise().tokens;
450 Parser::new(tokens).parse()
451 }
452
453 fn first_command(src: &str) -> (String, Vec<Arg>) {
454 let r = parse(src);
455 for node in &r.document.body {
456 if let Node::Command { name, args, .. } = node {
457 return (name.clone(), args.clone());
458 }
459 }
460 panic!("no command found in: {src}");
461 }
462
463 #[test]
464 fn command_no_args() {
465 let (name, args) = first_command("\\LaTeX");
466 assert_eq!(name, "LaTeX");
467 assert!(args.is_empty());
468 }
469
470 #[test]
471 fn command_one_mandatory_arg() {
472 let (name, args) = first_command("\\textbf{hello}");
473 assert_eq!(name, "textbf");
474 assert_eq!(args.len(), 1);
475 assert!(matches!(&args[0], Arg::Mandatory(children)
476 if matches!(&children[0], Node::Text(s, _) if s == "hello")));
477 }
478
479 #[test]
480 fn command_two_mandatory_args() {
481 let (name, args) = first_command("\\frac{a}{b}");
482 assert_eq!(name, "frac");
483 assert_eq!(args.len(), 2);
484 }
485
486 #[test]
487 fn unclosed_arg_produces_error() {
488 let r = parse("\\cmd{oops");
489 assert!(!r.errors.is_empty());
490 }
491
492 #[test]
493 fn paragraph_break_still_works() {
494 let r = parse("line one\n\nline two");
495 let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
496 assert!(has_par);
497 }
498
499 #[test]
500 fn nested_command_in_arg() {
501 let r = parse("\\outer{\\inner{x}}");
502 assert!(r.errors.is_empty());
503 if let Node::Command { args, .. } = &r.document.body[0] {
504 if let Arg::Mandatory(inner) = &args[0] {
505 assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
506 } else { panic!("expected mandatory arg"); }
507 } else { panic!("expected command"); }
508 }
509
510 #[test]
511 fn command_with_optional_arg() {
512 let (name, args) = first_command("\\sqrt[3]{27}");
513 assert_eq!(name, "sqrt");
514 assert_eq!(args.len(), 2);
515 assert!(matches!(&args[0], Arg::Optional(children)
516 if matches!(&children[0], Node::Text(s, _) if s == "3")));
517 assert!(matches!(&args[1], Arg::Mandatory(children)
518 if matches!(&children[0], Node::Text(s, _) if s == "27")));
519 }
520
521 #[test]
522 fn command_with_only_optional_arg() {
523 let (name, args) = first_command("\\foo[opt]");
524 assert_eq!(name, "foo");
525 assert_eq!(args.len(), 1);
526 assert!(matches!(&args[0], Arg::Optional(_)));
527 }
528
529 #[test]
530 fn optional_then_two_mandatory() {
531 let (_, args) = first_command("\\section[short]{long}{extra}");
533 assert_eq!(args.len(), 3);
534 assert!(matches!(&args[0], Arg::Optional(_)));
535 assert!(matches!(&args[1], Arg::Mandatory(_)));
536 assert!(matches!(&args[2], Arg::Mandatory(_)));
537 }
538
539 #[test]
540 fn unclosed_optional_arg_produces_error() {
541 let r = parse("\\cmd[oops");
542 assert!(!r.errors.is_empty());
543 }
544
545 #[test]
546 fn bracket_outside_command_is_text() {
547 let r = parse("hello [world]");
549 assert!(r.errors.is_empty());
550 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
551 }
552
553 #[test]
554 fn inline_math_simple() {
555 let r = parse("$x+1$");
556 assert!(r.errors.is_empty());
557 assert_eq!(r.document.body.len(), 1);
558 assert!(matches!(&r.document.body[0], Node::Math(children, _)
559 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
560 }
561
562 #[test]
563 fn inline_math_with_command() {
564 let r = parse("$\\alpha + \\beta$");
565 assert!(r.errors.is_empty());
566 if let Node::Math(children, _) = &r.document.body[0] {
567 let names: Vec<_> = children.iter().filter_map(|n| match n {
568 Node::Command { name, .. } => Some(name.as_str()),
569 _ => None,
570 }).collect();
571 assert_eq!(names, vec!["alpha", "beta"]);
572 } else {
573 panic!("expected math node");
574 }
575 }
576
577 #[test]
578 fn unclosed_math_produces_error() {
579 let r = parse("text $oops");
580 assert!(!r.errors.is_empty());
581 }
582
583 #[test]
584 fn parser_errors_carry_spans() {
585 let cases = [
589 "\\cmd{oops", "\\cmd[oops", "{", "$oops", ];
594 for src in cases {
595 let r = parse(src);
596 assert!(!r.errors.is_empty(), "expected error for {src:?}");
597 for e in &r.errors {
598 assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
599 }
600 }
601 }
602
603 #[test]
604 fn math_after_text() {
605 let r = parse("hello $x$");
606 assert!(r.errors.is_empty());
607 assert_eq!(r.document.body.len(), 2);
608 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
609 assert!(matches!(&r.document.body[1], Node::Math(_, _)));
610 }
611
612 #[test]
613 fn display_math_simple() {
614 let r = parse("\\[x+1\\]");
615 assert!(r.errors.is_empty(), "{:?}", r.errors);
616 assert_eq!(r.document.body.len(), 1);
617 assert!(matches!(&r.document.body[0], Node::DisplayMath(children, _)
618 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
619 }
620
621 #[test]
622 fn display_math_with_command() {
623 let r = parse("\\[ \\sum_{i=0}^n i \\]");
624 assert!(r.errors.is_empty(), "{:?}", r.errors);
625 assert!(matches!(&r.document.body[0], Node::DisplayMath(_, _)));
626 }
627
628 #[test]
629 fn unclosed_display_math_produces_error() {
630 let r = parse("\\[ a + b");
631 assert!(r.errors.iter().any(|e| e.code == "E031"));
632 }
633
634 #[test]
635 fn stray_close_display_math_produces_error() {
636 let r = parse("oops \\] more");
637 assert!(r.errors.iter().any(|e| e.code == "E032"));
638 }
639
640 #[test]
641 fn comment_preserved() {
642 let r = parse("% hello\nworld");
643 assert!(r.errors.is_empty());
644 assert!(matches!(&r.document.body[0], Node::Comment(s, _) if s == " hello"));
645 assert!(matches!(&r.document.body[1], Node::Text(s, _) if s == "world"));
646 }
647
648 #[test]
649 fn comment_inside_command_arg() {
650 let r = parse("\\textbf{foo % drop?\nbar}");
651 assert!(r.errors.is_empty(), "{:?}", r.errors);
652 if let Node::Command { args, .. } = &r.document.body[0] {
653 if let Arg::Mandatory(children) = &args[0] {
654 assert!(children.iter().any(|n| matches!(n, Node::Comment(_, _))));
655 } else { panic!("expected mandatory arg"); }
656 } else { panic!("expected command"); }
657 }
658
659 #[test]
660 fn environment_simple() {
661 let r = parse("\\begin{quote}hello\\end{quote}");
662 assert!(r.errors.is_empty(), "{:?}", r.errors);
663 if let Node::Environment { name, args, body, .. } = &r.document.body[0] {
664 assert_eq!(name, "quote");
665 assert!(args.is_empty());
666 assert!(matches!(&body[0], Node::Text(s, _) if s == "hello"));
667 } else {
668 panic!("expected environment, got {:?}", r.document.body[0]);
669 }
670 }
671
672 #[test]
673 fn environment_with_starred_name() {
674 let r = parse("\\begin{equation*}x = 1\\end{equation*}");
675 assert!(r.errors.is_empty(), "{:?}", r.errors);
676 assert!(matches!(&r.document.body[0], Node::Environment { name, .. } if name == "equation*"));
677 }
678
679 #[test]
680 fn environment_with_extra_args() {
681 let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
683 assert!(r.errors.is_empty(), "{:?}", r.errors);
684 if let Node::Environment { name, args, .. } = &r.document.body[0] {
685 assert_eq!(name, "tabular");
686 assert_eq!(args.len(), 1);
687 assert!(matches!(&args[0], Arg::Mandatory(_)));
688 } else { panic!("expected environment"); }
689 }
690
691 #[test]
692 fn nested_environments() {
693 let r = parse("\\begin{outer}\\begin{inner}x\\end{inner}\\end{outer}");
694 assert!(r.errors.is_empty(), "{:?}", r.errors);
695 if let Node::Environment { name, body, .. } = &r.document.body[0] {
696 assert_eq!(name, "outer");
697 assert!(matches!(&body[0], Node::Environment {name, .. } if name == "inner"));
698 } else { panic!("expected outer environment"); }
699 }
700
701 #[test]
702 fn mismatched_end_produces_error() {
703 let r = parse("\\begin{a}x\\end{b}");
704 assert!(r.errors.iter().any(|e| e.code == "E042"));
705 }
706
707 #[test]
708 fn unclosed_begin_produces_error() {
709 let r = parse("\\begin{a}body");
710 assert!(r.errors.iter().any(|e| e.code == "E041"));
711 }
712
713 #[test]
714 fn stray_end_produces_error() {
715 let r = parse("\\end{a}");
716 assert!(r.errors.iter().any(|e| e.code == "E043"));
717 }
718
719 #[test]
720 fn begin_without_name_produces_error() {
721 let r = parse("\\begin foo");
722 assert!(r.errors.iter().any(|e| e.code == "E040"));
723 }
724}