Skip to main content

oxyl_parser/
lib.rs

1// oxyl-parser
2// TODO - put all this in docs
3// Builds a Document of Nodes from the lexer's token stream.
4//
5// - Commands greedily pick up [...] and {...} until the next token 
6// is neither of those.
7// - A pair of $ tokens wraps a math node.
8// - A \[...\] pair wraps a display math node. Inline and display math children 
9// are parsed with the same machinery as ordinairy text;
10// will do atoms and scripts later - TODO
11// - Comments are preserved so source fidelity tools ie formatters and
12// linters can round-trip them!!
13//  - Every error carries a DiagSpan poiting at the token that triggered it 
14//  (the unmatched bracket or dollar sign) so the cli can render source 
15//  context without having to extract it from the message string :D
16
17
18use oxyl_diagnostics::{DiagSpan, Diagnostic};
19use oxyl_lexer::{Span, Token, TokenKind};
20
21fn diag_span(s: Span) -> DiagSpan {
22    DiagSpan::new(s.start, s.end)
23}
24
25/// Stop predicate for `parse_nodes` when scanning the body of `\[ ... \]`.
26fn is_display_math_close(k: &TokenKind) -> bool {
27    matches!(k, TokenKind::ControlSeq(s) if s == "]")
28}
29
30/// Stop predicate for `parse_nodes` when scanning the body of an environment.
31fn is_end_control_seq(k: &TokenKind) -> bool {
32    matches!(k, TokenKind::ControlSeq(s) if s == "end")
33}
34
35/// Find the first `Arg::Mandatory` whose children are all `Node::Text`,
36/// concatenate that text and return its index along with the trimmed name.
37/// This is how the environment name is recovered (either from the begin 
38/// statement or the end one).
39fn find_env_name(args: &[Arg]) -> Option<(usize, String)> {
40    for (i, arg) in args.iter().enumerate() {
41        if let Arg::Mandatory(children) = arg {
42            let mut name = String::new();
43            for child in children {
44                if let Node::Text(t, _) = child {
45                    name.push_str(t);
46                } else {
47                    return None;
48                }
49            }
50            let trimmed = name.trim().to_owned();
51            if !trimmed.is_empty() {
52                return Some((i, trimmed));
53            }
54        }
55    }
56    None 
57}
58
59// --- 
60// AST Types 
61//
62
63/// The root of a parsed LaTeX document.
64///
65/// For now we do not distinguish preamble from body - everything lands in 
66/// `body`. Will add that split when handling for `\begin{document}` is done.
67#[derive(Debug, Clone)]
68pub struct Document {
69    pub body: Vec<Node>,
70}
71
72/// A single node in the LaTeX AST.
73#[derive(Debug, Clone)]
74pub enum Node {
75    /// A run of plain text characters
76    Text(String, Span),
77
78    /// A blank line in the source - signals a paragraph break.
79    ParagraphBreak(Span),
80
81    /// A LaTeX command and its arguments, e.g. `\textbf{hello}`.
82    Command {
83        /// Name without the leading backslash, e.g. `"textbf"`.
84        name: String ,
85        args: Vec<Arg>,
86        span: Span,
87    },
88
89    /// A braced group `{...}`.
90    Group(Vec<Node>, Span),
91    
92    /// Inline match: `$ ... $`. The span covers both `$` delimiters.
93    Math(Vec<Node>, Span),
94
95    /// Display math: `\[ ... \]`. The span covers both delimiters.
96    DisplayMath(Vec<Node>, Span),
97
98    /// A `% ...` line comment. THe string is the body without the leading 
99    /// `%` and without the trailing newline - the span covers the whole 
100    /// run, including both. Comments in AST since they can actually affect produced PDF.
101    Comment(String, Span),
102
103    /// A `&` column separator inside `tabular`/`array`/`align` and other environments.
104    AlignTab(Span),
105
106    /// A `~` - a non-breaking space. Acts like a regular space for layout
107    /// but forbids a line break at this point.
108    Tilde(Span),
109
110    /// `\begin{name} ... \end{name}`. `args` is everything after the 
111    /// environment name (optionals and additional mandatory groups). `body`
112    /// holds the parsed children; the span also covers the entire construct.
113    Environment {
114        name: String,
115        args: Vec<Arg>,
116        body: Vec<Node>,
117        span: Span,
118    },
119}
120
121impl Node {
122    pub fn span(&self) -> Span {
123        match self {
124            Node::Text(_, s) => *s,
125            Node::ParagraphBreak(s) => *s,
126            Node::Command { span, .. } => *span,
127            Node::Group(_, s) => *s,
128            Node::Math(_, s) => *s,
129            Node::DisplayMath(_, s) => *s,
130            Node::Comment(_, s) => *s,
131            Node::AlignTab(s) => *s,
132            Node::Tilde(s) => *s,
133            Node::Environment{ span, .. } => *span,
134        }
135    }
136}
137
138/// A single argument to a command or environment 
139#[derive(Debug, Clone)]
140pub enum Arg {
141    Mandatory(Vec<Node>),
142    Optional(Vec<Node>),
143}
144
145// --- 
146// Parser Result 
147// --- 
148
149/// Returned by [`Parser::parse`]. The document is always produced; errors 
150/// are collected alongside it so the caller sees everything at once.
151#[derive(Debug)]
152pub struct ParseResult {
153    pub document: Document,
154    pub errors: Vec<Diagnostic>,
155}
156
157// --- 
158// Parser 
159// --- 
160
161pub struct Parser {
162    tokens: Vec<Token>,
163    pos: usize,
164    errors: Vec<Diagnostic>,
165}
166
167impl Parser {
168    pub fn new(tokens: Vec<Token>) -> Self {
169        Self { tokens, pos: 0, errors: Vec::new() }
170    }
171    
172    /// Parse the token stream.
173    pub fn parse(mut self) -> ParseResult {
174        let body = self.parse_nodes(|_| false);
175        ParseResult { document: Document { body }, errors: self.errors }
176    }
177
178    fn peek(&self) -> Option<&Token> {
179        self.tokens.get(self.pos)
180    }
181
182    fn peek_kind(&self) -> Option<&TokenKind> {
183        self.peek().map(|t| &t.kind)
184    }
185
186    fn bump(&mut self) -> Option<Token> {
187        if self.pos < self.tokens.len() {
188            let tok = self.tokens[self.pos].clone();
189            self.pos += 1;
190            Some(tok)
191        } else {
192            None
193        }
194    }
195
196    /// Parse a run of nodes until the token stream is exhausted or 
197    /// `stop` returns true for the next token's kind. The stopping token is 
198    /// left unconsumed so it can be examined and bumped by the caller !
199    ///
200    /// `stop` is used by the group parser to halt at `}` - it is a function pointer 
201    /// rather than an `impl Fn` so the recursive calls don't blow up the parser.
202    fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
203        let mut nodes: Vec<Node> = Vec::new();
204        
205        loop {
206            match self.peek() {
207                None => break,
208                Some(tok) if stop(&tok.kind) => break,
209                _ => {}
210            }
211
212            let tok = self.bump().unwrap();
213
214            match tok.kind {
215                TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
216                TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
217
218                TokenKind::ParagraphBreak => {
219                    nodes.push(Node::ParagraphBreak(tok.span));
220                }
221                
222                TokenKind::Comment(body) => {
223                    nodes.push(Node::Comment(body, tok.span));
224                }
225               
226                // begin{name} opens an environment.
227                TokenKind::ControlSeq(ref name) if name == "begin" => {
228                    let env = self.parse_environment(tok.span);
229                    nodes.push(env);
230                }
231
232                // A bare \end outside an environment is a stray closer. :)
233                TokenKind::ControlSeq(ref name) if name == "end" => {
234                    self.errors.push(
235                        Diagnostic::error("E043", "stray '\\end' (no matching '\\begin')")
236                            .with_span(diag_span(tok.span)),
237                    );
238                    // Eat its name arg so we don't cause a slippery slope of errors lol.
239                    let _ = self.parse_args();
240                }
241
242                // `\[` opens display math. 
243                TokenKind::ControlSeq(ref name) if name == "[" => {
244                    let open_span = tok.span;
245                    let children = self.parse_nodes(is_display_math_close);
246                    if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
247                        let close = self.bump().unwrap();
248                        nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
249                    } else {
250                        self.errors.push(
251                            Diagnostic::error("E031", "unclosed '\\[' (display math)")
252                                .with_span(diag_span(open_span)),
253                        );
254                        nodes.push(Node::DisplayMath(children, open_span));
255                    }
256                }
257
258                // A bare `\]` outside display math is a stray closer.
259                TokenKind::ControlSeq(ref name) if name == "]" => {
260                    self.errors.push(
261                        Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
262                            .with_span(diag_span(tok.span)),
263                    );
264                }
265
266                TokenKind::ControlSeq(name) => {
267                    let cmd_span = tok.span; 
268                    let args = self.parse_args();
269                    // Extend the span to cover the last argument. 
270                    let full_span = args.last()
271                        .and_then(|a| match a {
272                            Arg::Mandatory(children) => children.last().map(|n| n.span()),
273                            Arg::Optional(children) => children.last().map(|n| n.span()), 
274                        })
275                        .map(|s| cmd_span.merge(s))
276                        .unwrap_or(cmd_span);
277                    nodes.push(Node::Command { name, args, span: full_span });
278                }
279
280                TokenKind::BeginGroup => {
281                    let open_span = tok.span;
282                    let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
283                    if self.peek_kind() == Some(&TokenKind::EndGroup) {
284                        let close = self.bump().unwrap();
285                        nodes.push(Node::Group(children, open_span.merge(close.span)));
286                    } else {
287                        // Unclosed group - record the error, keep what we parsed.
288                        self.errors.push(
289                            Diagnostic::error("E020", "unclosed '{'")
290                                .with_span(diag_span(open_span)),
291                        );
292                        nodes.push(Node::Group(children, open_span));
293                    }
294                }
295                
296                TokenKind::MathShift => {
297                    let open_span = tok.span;
298                    let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
299                    if self.peek_kind() == Some(&TokenKind::MathShift) {
300                        let close = self.bump().unwrap();
301                        nodes.push(Node::Math(children, open_span.merge(close.span)));
302                    } else {
303                        self.errors.push(
304                            Diagnostic::error("E030", "unclosed '$' (math mode)")
305                                .with_span(diag_span(open_span)),
306                        );
307                        nodes.push(Node::Math(children, open_span));
308                    }
309                }
310
311                TokenKind::AlignTab => nodes.push(Node::AlignTab(tok.span)),
312                TokenKind::Tilde => nodes.push(Node::Tilde(tok.span)),
313
314                // Everything else is left unhandled for now so skip it.
315                _ => {}
316            }
317        }
318
319        nodes
320    }
321    /// Consume all immediately following `[...] and `{ ... }` groups as args.
322    ///
323    /// TeX commands pick up their arguments greedily; we skip spaces between
324    /// the command name and each argument to match TeX's behaviour. The loop
325    /// stops at the first token that is neither `[` nor `{`.
326    fn parse_args(&mut self) -> Vec<Arg> {
327        let mut args = Vec::new();
328        
329        loop {
330            // Skip spaces between the command and its next argument.
331            if self.peek_kind() == Some(&TokenKind::Space) {
332                self.bump();
333            }
334
335            match self.peek_kind() {
336                Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
337                Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
338                _ => break,
339            }
340        }
341        args
342
343    }    
344
345    fn parse_mandatory_arg(&mut self) -> Arg {
346        // Consume the opening brace, remembering its span for diagnostics.
347        let open_span = self.bump().unwrap().span;
348        let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
349        if self.peek_kind() == Some(&TokenKind::EndGroup) {
350            self.bump();
351        } else {
352            self.errors.push(
353                Diagnostic::error("E021","unclosed mandatory argument")
354                    .with_span(diag_span(open_span)),
355            );
356        }
357        Arg::Mandatory(children)
358    }
359
360    /// Parse `\begin{name} body \end{name}`. The opening `\begin` token has
361    /// already been consumed; `begin_span` is its span.
362    fn parse_environment(&mut self, begin_span: Span) -> Node {
363        let mut args = self.parse_args();
364
365        // First mandatory arg is the environment name. Without one we
366        // record the error and fall back to a plain cmd so the AST 
367        // still contains atleast something useful
368        let (name_idx, env_name) = match find_env_name(&args) {
369            Some(x) => x,
370            None => {
371                self.errors.push(
372                    Diagnostic::error("E040", "'\\begin' missing environment name")
373                        .with_span(diag_span(begin_span)),
374                );
375                return Node::Command {
376                    name: "begin".to_owned(),
377                    args,
378                    span: begin_span,
379                };
380            }
381        };
382        args.remove(name_idx);
383
384        let body = self.parse_nodes(is_end_control_seq);
385
386        // Try consume the matching \end
387        let close_span = if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "end") {
388            let end_tok = self.bump().unwrap();
389            let end_args = self.parse_args();
390            let close_name = find_env_name(&end_args).map(|(_, n)| n);
391
392            if close_name.as_deref() != Some(env_name.as_str()) {
393                self.errors.push(
394                    Diagnostic::error("E042", format!(
395                            "'\\end{{{}}}' does not match '\\begin{{{}}}'",
396                            close_name.as_deref().unwrap_or(""), env_name,
397                    ))
398                    .with_span(diag_span(end_tok.span)),
399                );
400            }
401
402            // Stretch the span to the last argument of \end (if any)
403            end_args.last()
404                .and_then(|a| match a {
405                    Arg::Mandatory(c) | Arg::Optional(c) => c.last().map(|n| n.span()),
406                })
407                .map(|s| end_tok.span.merge(s))
408                .unwrap_or(end_tok.span)
409        } else {
410            self.errors.push(
411                Diagnostic::error("E041", format!("unclosed '\\begin{{{}}}'", env_name))
412                    .with_span(diag_span(begin_span)),
413            );
414            body.last().map(|n| n.span()).unwrap_or(begin_span)
415        };
416
417        Node::Environment {
418            name: env_name, 
419            args,
420            body,
421            span: begin_span.merge(close_span),
422        }
423    }
424
425    fn parse_optional_arg(&mut self) -> Arg {
426        // Consume the opening `[`, remembering its span for diagnostics.
427        let open_span = self.bump().unwrap().span;
428        let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
429        if self.peek_kind() == Some(&TokenKind::Char(']')) {
430            self.bump();
431        } else {
432            self.errors.push(
433                Diagnostic::error("E022","unclosed optional argument")
434                    .with_span(diag_span(open_span)),
435            );
436        }
437        Arg::Optional(children)
438    }
439    
440    /// Append a character to the last `Text` node, or start a new one.
441    fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
442        match nodes.last_mut() {
443            Some(Node::Text(s, existing)) => {
444                s.push(c);
445                *existing = existing.merge(span);
446            }
447            _ => nodes.push(Node::Text(c.to_string(), span)),
448        }
449    }
450}
451
452
453
454// Tests
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459    use oxyl_lexer::Lexer;
460
461    fn parse(src: &str) -> ParseResult {
462        let tokens = Lexer::new(src).tokenise().tokens;
463        Parser::new(tokens).parse()
464    }
465
466    fn first_command(src: &str) -> (String, Vec<Arg>) {
467        let r = parse(src);
468        for node in &r.document.body {
469            if let Node::Command { name, args, .. } = node {
470                return (name.clone(), args.clone());
471            }
472        }
473        panic!("no command found in: {src}");
474    }
475
476    #[test]
477    fn command_no_args() {
478        let (name, args) = first_command("\\LaTeX");
479        assert_eq!(name, "LaTeX");
480        assert!(args.is_empty());
481    }
482
483    #[test]
484    fn command_one_mandatory_arg() {
485        let (name, args) = first_command("\\textbf{hello}");
486        assert_eq!(name, "textbf");
487        assert_eq!(args.len(), 1);
488        assert!(matches!(&args[0], Arg::Mandatory(children)
489            if matches!(&children[0], Node::Text(s, _) if s == "hello")));
490    }
491
492    #[test]
493    fn command_two_mandatory_args() {
494        let (name, args) = first_command("\\frac{a}{b}");
495        assert_eq!(name, "frac");
496        assert_eq!(args.len(), 2);
497    }
498    
499    #[test]
500    fn unclosed_arg_produces_error() {
501        let r = parse("\\cmd{oops");
502        assert!(!r.errors.is_empty());
503    }
504
505    #[test]
506    fn paragraph_break_still_works() {
507        let r = parse("line one\n\nline two");
508        let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
509        assert!(has_par);
510    }
511
512    #[test]
513    fn nested_command_in_arg() {
514        let r = parse("\\outer{\\inner{x}}");
515        assert!(r.errors.is_empty());
516        if let Node::Command { args, .. } = &r.document.body[0] {
517            if let Arg::Mandatory(inner) = &args[0] {
518                assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
519            } else { panic!("expected mandatory arg"); }
520        } else { panic!("expected command"); }
521    }
522
523    #[test]
524    fn command_with_optional_arg() {
525        let (name, args) = first_command("\\sqrt[3]{27}");
526        assert_eq!(name, "sqrt");
527        assert_eq!(args.len(), 2);
528        assert!(matches!(&args[0], Arg::Optional(children)
529            if matches!(&children[0], Node::Text(s, _) if s == "3")));
530        assert!(matches!(&args[1], Arg::Mandatory(children)
531            if matches!(&children[0], Node::Text(s, _) if s == "27")));
532    }
533
534    #[test]
535    fn command_with_only_optional_arg() {
536        let (name, args) = first_command("\\foo[opt]");
537        assert_eq!(name, "foo");
538        assert_eq!(args.len(), 1);
539        assert!(matches!(&args[0], Arg::Optional(_)));
540    }
541
542    #[test]
543    fn optional_then_two_mandatory() {
544        // two diff types of option + ordering 
545        let (_, args) = first_command("\\section[short]{long}{extra}");
546        assert_eq!(args.len(), 3);
547        assert!(matches!(&args[0], Arg::Optional(_)));
548        assert!(matches!(&args[1], Arg::Mandatory(_)));
549        assert!(matches!(&args[2], Arg::Mandatory(_)));
550    }
551
552    #[test]
553    fn unclosed_optional_arg_produces_error() {
554        let r = parse("\\cmd[oops");
555        assert!(!r.errors.is_empty());
556    }
557
558    #[test]
559    fn bracket_outside_command_is_text() {
560        // A `'[` not directly after a control sequence is just ordinary text.
561        let r = parse("hello [world]");
562        assert!(r.errors.is_empty());
563        assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
564    }
565
566    #[test]
567    fn inline_math_simple() {
568        let r = parse("$x+1$");
569        assert!(r.errors.is_empty());
570        assert_eq!(r.document.body.len(), 1);
571        assert!(matches!(&r.document.body[0], Node::Math(children, _)
572            if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
573    }
574
575    #[test]
576    fn inline_math_with_command() {
577        let r = parse("$\\alpha + \\beta$");
578        assert!(r.errors.is_empty());
579        if let Node::Math(children, _) = &r.document.body[0] {
580            let names: Vec<_> = children.iter().filter_map(|n| match n {
581                Node::Command { name, .. } => Some(name.as_str()),
582                _ => None, 
583            }).collect();
584            assert_eq!(names, vec!["alpha", "beta"]);
585        } else {
586            panic!("expected math node");
587        }
588    }
589
590    #[test]
591    fn unclosed_math_produces_error() {
592        let r = parse("text $oops");
593        assert!(!r.errors.is_empty());
594    }
595    
596    #[test]
597    fn parser_errors_carry_spans() {
598        // Every parser error must point at the offending opener so the CLI 
599        // can render the location from the diagnostic span instead of
600        // picking it ouf the message text.
601        let cases = [
602            "\\cmd{oops", // E021
603            "\\cmd[oops", // E022
604            "{", // E020
605            "$oops", // E030
606        ];
607        for src in cases {
608            let r = parse(src);
609            assert!(!r.errors.is_empty(), "expected error for {src:?}");
610            for e in &r.errors {
611                assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
612            }
613        }
614    }
615
616    #[test]
617    fn math_after_text() {
618        let r = parse("hello $x$");
619        assert!(r.errors.is_empty());
620        assert_eq!(r.document.body.len(), 2);
621        assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
622        assert!(matches!(&r.document.body[1], Node::Math(_, _)));
623    }
624
625    #[test]
626    fn display_math_simple() {
627        let r = parse("\\[x+1\\]");
628        assert!(r.errors.is_empty(), "{:?}", r.errors);
629        assert_eq!(r.document.body.len(), 1);
630        assert!(matches!(&r.document.body[0], Node::DisplayMath(children, _)
631            if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
632    }
633
634    #[test]
635    fn display_math_with_command() {
636        let r = parse("\\[ \\sum_{i=0}^n i \\]");
637        assert!(r.errors.is_empty(), "{:?}", r.errors);
638        assert!(matches!(&r.document.body[0], Node::DisplayMath(_, _)));
639    }
640
641    #[test]
642    fn unclosed_display_math_produces_error() {
643        let r = parse("\\[ a + b");
644        assert!(r.errors.iter().any(|e| e.code == "E031"));
645    }
646
647    #[test]
648    fn stray_close_display_math_produces_error() {
649        let r = parse("oops \\] more");
650        assert!(r.errors.iter().any(|e| e.code == "E032"));
651    }
652
653    #[test]
654    fn comment_preserved() {
655        let r = parse("% hello\nworld");
656        assert!(r.errors.is_empty());
657        assert!(matches!(&r.document.body[0], Node::Comment(s, _) if s == " hello"));
658        assert!(matches!(&r.document.body[1], Node::Text(s, _) if s == "world"));
659    }
660
661    #[test]
662    fn comment_inside_command_arg() {
663        let r = parse("\\textbf{foo % drop?\nbar}");
664        assert!(r.errors.is_empty(), "{:?}", r.errors);
665        if let Node::Command { args, .. } = &r.document.body[0] {
666            if let Arg::Mandatory(children) = &args[0] {
667                assert!(children.iter().any(|n| matches!(n, Node::Comment(_, _))));
668            } else { panic!("expected mandatory arg"); }
669        } else { panic!("expected command"); }
670    }
671
672    #[test]
673    fn environment_simple() {
674        let r = parse("\\begin{quote}hello\\end{quote}");
675        assert!(r.errors.is_empty(), "{:?}", r.errors);
676        if let Node::Environment { name, args, body, .. } = &r.document.body[0] {
677            assert_eq!(name, "quote");
678            assert!(args.is_empty());
679            assert!(matches!(&body[0], Node::Text(s, _) if s == "hello"));
680        } else {
681            panic!("expected environment, got {:?}", r.document.body[0]);
682        }
683    }
684    
685    #[test]
686    fn environment_with_starred_name() {
687        let r = parse("\\begin{equation*}x = 1\\end{equation*}");
688        assert!(r.errors.is_empty(), "{:?}", r.errors);
689        assert!(matches!(&r.document.body[0], Node::Environment { name, .. } if name == "equation*"));
690    }
691
692    #[test]
693    fn environment_with_extra_args() {
694        // \begin{tabular}{cc} keeps {cc} as env arg, not as the name.
695        let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
696        assert!(r.errors.is_empty(), "{:?}", r.errors);
697        if let Node::Environment { name, args, .. } = &r.document.body[0] {
698            assert_eq!(name, "tabular");
699            assert_eq!(args.len(), 1);
700            assert!(matches!(&args[0], Arg::Mandatory(_)));
701        } else { panic!("expected environment"); }
702    }
703
704    #[test]
705    fn nested_environments() {
706        let r = parse("\\begin{outer}\\begin{inner}x\\end{inner}\\end{outer}");
707        assert!(r.errors.is_empty(), "{:?}", r.errors);
708        if let Node::Environment { name, body, .. } = &r.document.body[0] {
709            assert_eq!(name, "outer");
710            assert!(matches!(&body[0], Node::Environment {name, .. } if name == "inner"));
711        } else { panic!("expected outer environment"); }
712    }
713
714    #[test]
715    fn mismatched_end_produces_error() {
716        let r = parse("\\begin{a}x\\end{b}");
717        assert!(r.errors.iter().any(|e| e.code == "E042"));
718    }
719
720    #[test]
721    fn unclosed_begin_produces_error() {
722        let r = parse("\\begin{a}body");
723        assert!(r.errors.iter().any(|e| e.code == "E041"));
724    }
725
726    #[test]
727    fn stray_end_produces_error() {
728        let r = parse("\\end{a}");
729        assert!(r.errors.iter().any(|e| e.code == "E043"));
730    }
731
732    #[test]
733    fn begin_without_name_produces_error() {
734        let r = parse("\\begin foo");
735        assert!(r.errors.iter().any(|e| e.code == "E040"));
736    }
737
738    #[test]
739    fn align_tab_becomes_node() {
740        let r = parse("a & b");
741        assert!(r.errors.is_empty());
742        let kinds: Vec<_> = r.document.body.iter().map(|n| match n {
743            Node::Text(s, _) => format!("T({s})"),
744            Node::AlignTab(_) => "&".to_owned(),
745            other => format!("{other:?}"),
746        }).collect();
747        assert_eq!(kinds, vec!["T(a )", "&", "T( b)"]);
748    }
749
750    #[test]
751    fn tilde_becomes_node() {
752        let r = parse("oxyl.~isthebest");
753        assert!(r.errors.is_empty());
754        // Order should be oxyl. (text), tilde, isthebest (text)
755        assert!(matches!(&r.document.body[1], Node::Tilde(_)));
756    }
757
758    #[test]
759    fn align_tab_inside_tabular_body() {
760        let r = parse("\\begin{tabular}{cc}A & B\\end{tabular}");
761        assert!(r.errors.is_empty(), "{:?}", r.errors);
762        if let Node::Environment { body, .. } = &r.document.body[0] {
763            assert!(body.iter().any(|n| matches!(n, Node::AlignTab(_))));
764        } else { panic!("expected environment"); }
765    }
766}