1use oxyl_diagnostics::{DiagSpan, Diagnostic};
19use oxyl_lexer::{Span, Token, TokenKind};
20
21fn diag_span(s: Span) -> DiagSpan {
22 DiagSpan::new(s.start, s.end)
23}
24
25fn is_display_math_close(k: &TokenKind) -> bool {
27 matches!(k, TokenKind::ControlSeq(s) if s == "]")
28}
29
30#[derive(Debug, Clone)]
39pub struct Document {
40 pub body: Vec<Node>,
41}
42
43#[derive(Debug, Clone)]
45pub enum Node {
46 Text(String, Span),
48
49 ParagraphBreak(Span),
51
52 Command {
54 name: String ,
56 args: Vec<Arg>,
57 span: Span,
58 },
59
60 Group(Vec<Node>, Span),
62
63 Math(Vec<Node>, Span),
65
66 DisplayMath(Vec<Node>, Span),
68
69 Comment(String, Span),
73}
74
75impl Node {
76 pub fn span(&self) -> Span {
77 match self {
78 Node::Text(_, s) => *s,
79 Node::ParagraphBreak(s) => *s,
80 Node::Command { span, .. } => *span,
81 Node::Group(_, s) => *s,
82 Node::Math(_, s) => *s,
83 Node::DisplayMath(_, s) => *s,
84 Node::Comment(_, s) => *s,
85 }
86 }
87}
88
89#[derive(Debug, Clone)]
91pub enum Arg {
92 Mandatory(Vec<Node>),
93 Optional(Vec<Node>),
94}
95
96#[derive(Debug)]
103pub struct ParseResult {
104 pub document: Document,
105 pub errors: Vec<Diagnostic>,
106}
107
108pub struct Parser {
113 tokens: Vec<Token>,
114 pos: usize,
115 errors: Vec<Diagnostic>,
116}
117
118impl Parser {
119 pub fn new(tokens: Vec<Token>) -> Self {
120 Self { tokens, pos: 0, errors: Vec::new() }
121 }
122
123 pub fn parse(mut self) -> ParseResult {
125 let body = self.parse_nodes(|_| false);
126 ParseResult { document: Document { body }, errors: self.errors }
127 }
128
129 fn peek(&self) -> Option<&Token> {
130 self.tokens.get(self.pos)
131 }
132
133 fn peek_kind(&self) -> Option<&TokenKind> {
134 self.peek().map(|t| &t.kind)
135 }
136
137 fn bump(&mut self) -> Option<Token> {
138 if self.pos < self.tokens.len() {
139 let tok = self.tokens[self.pos].clone();
140 self.pos += 1;
141 Some(tok)
142 } else {
143 None
144 }
145 }
146
147 fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
154 let mut nodes: Vec<Node> = Vec::new();
155
156 loop {
157 match self.peek() {
158 None => break,
159 Some(tok) if stop(&tok.kind) => break,
160 _ => {}
161 }
162
163 let tok = self.bump().unwrap();
164
165 match tok.kind {
166 TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
167 TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
168
169 TokenKind::ParagraphBreak => {
170 nodes.push(Node::ParagraphBreak(tok.span));
171 }
172
173 TokenKind::Comment(body) => {
174 nodes.push(Node::Comment(body, tok.span));
175 }
176
177 TokenKind::ControlSeq(ref name) if name == "[" => {
179 let open_span = tok.span;
180 let children = self.parse_nodes(is_display_math_close);
181 if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
182 let close = self.bump().unwrap();
183 nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
184 } else {
185 self.errors.push(
186 Diagnostic::error("E031", "unclosed '\\[' (display math)")
187 .with_span(diag_span(open_span)),
188 );
189 nodes.push(Node::DisplayMath(children, open_span));
190 }
191 }
192
193 TokenKind::ControlSeq(ref name) if name == "]" => {
195 self.errors.push(
196 Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
197 .with_span(diag_span(tok.span)),
198 );
199 }
200
201 TokenKind::ControlSeq(name) => {
202 let cmd_span = tok.span;
203 let args = self.parse_args();
204 let full_span = args.last()
206 .and_then(|a| match a {
207 Arg::Mandatory(children) => children.last().map(|n| n.span()),
208 Arg::Optional(children) => children.last().map(|n| n.span()),
209 })
210 .map(|s| cmd_span.merge(s))
211 .unwrap_or(cmd_span);
212 nodes.push(Node::Command { name, args, span: full_span });
213 }
214
215 TokenKind::BeginGroup => {
216 let open_span = tok.span;
217 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
218 if self.peek_kind() == Some(&TokenKind::EndGroup) {
219 let close = self.bump().unwrap();
220 nodes.push(Node::Group(children, open_span.merge(close.span)));
221 } else {
222 self.errors.push(
224 Diagnostic::error("E020", "unclosed '{'")
225 .with_span(diag_span(open_span)),
226 );
227 nodes.push(Node::Group(children, open_span));
228 }
229 }
230
231 TokenKind::MathShift => {
232 let open_span = tok.span;
233 let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
234 if self.peek_kind() == Some(&TokenKind::MathShift) {
235 let close = self.bump().unwrap();
236 nodes.push(Node::Math(children, open_span.merge(close.span)));
237 } else {
238 self.errors.push(
239 Diagnostic::error("E030", "unclosed '$' (math mode)")
240 .with_span(diag_span(open_span)),
241 );
242 nodes.push(Node::Math(children, open_span));
243 }
244 }
245 _ => {}
247 }
248 }
249
250 nodes
251 }
252 fn parse_args(&mut self) -> Vec<Arg> {
258 let mut args = Vec::new();
259
260 loop {
261 if self.peek_kind() == Some(&TokenKind::Space) {
263 self.bump();
264 }
265
266 match self.peek_kind() {
267 Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
268 Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
269 _ => break,
270 }
271 }
272 args
273 }
274
275 fn parse_mandatory_arg(&mut self) -> Arg {
276 let open_span = self.bump().unwrap().span;
278 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
279 if self.peek_kind() == Some(&TokenKind::EndGroup) {
280 self.bump();
281 } else {
282 self.errors.push(
283 Diagnostic::error("E021","unclosed mandatory argument")
284 .with_span(diag_span(open_span)),
285 );
286 }
287 Arg::Mandatory(children)
288 }
289
290 fn parse_optional_arg(&mut self) -> Arg {
291 let open_span = self.bump().unwrap().span;
293 let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
294 if self.peek_kind() == Some(&TokenKind::Char(']')) {
295 self.bump();
296 } else {
297 self.errors.push(
298 Diagnostic::error("E022","unclosed optional argument")
299 .with_span(diag_span(open_span)),
300 );
301 }
302 Arg::Optional(children)
303 }
304
305 fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
307 match nodes.last_mut() {
308 Some(Node::Text(s, existing)) => {
309 s.push(c);
310 *existing = existing.merge(span);
311 }
312 _ => nodes.push(Node::Text(c.to_string(), span)),
313 }
314 }
315}
316
317
318
319#[cfg(test)]
322mod tests {
323 use super::*;
324 use oxyl_lexer::Lexer;
325
326 fn parse(src: &str) -> ParseResult {
327 let tokens = Lexer::new(src).tokenise().tokens;
328 Parser::new(tokens).parse()
329 }
330
331 fn first_command(src: &str) -> (String, Vec<Arg>) {
332 let r = parse(src);
333 for node in &r.document.body {
334 if let Node::Command { name, args, .. } = node {
335 return (name.clone(), args.clone());
336 }
337 }
338 panic!("no command found in: {src}");
339 }
340
341 #[test]
342 fn command_no_args() {
343 let (name, args) = first_command("\\LaTeX");
344 assert_eq!(name, "LaTeX");
345 assert!(args.is_empty());
346 }
347
348 #[test]
349 fn command_one_mandatory_arg() {
350 let (name, args) = first_command("\\textbf{hello}");
351 assert_eq!(name, "textbf");
352 assert_eq!(args.len(), 1);
353 assert!(matches!(&args[0], Arg::Mandatory(children)
354 if matches!(&children[0], Node::Text(s, _) if s == "hello")));
355 }
356
357 #[test]
358 fn command_two_mandatory_args() {
359 let (name, args) = first_command("\\frac{a}{b}");
360 assert_eq!(name, "frac");
361 assert_eq!(args.len(), 2);
362 }
363
364 #[test]
365 fn unclosed_arg_produces_error() {
366 let r = parse("\\cmd{oops");
367 assert!(!r.errors.is_empty());
368 }
369
370 #[test]
371 fn paragraph_break_still_works() {
372 let r = parse("line one\n\nline two");
373 let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
374 assert!(has_par);
375 }
376
377 #[test]
378 fn nested_command_in_arg() {
379 let r = parse("\\outer{\\inner{x}}");
380 assert!(r.errors.is_empty());
381 if let Node::Command { args, .. } = &r.document.body[0] {
382 if let Arg::Mandatory(inner) = &args[0] {
383 assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
384 } else { panic!("expected mandatory arg"); }
385 } else { panic!("expected command"); }
386 }
387
388 #[test]
389 fn command_with_optional_arg() {
390 let (name, args) = first_command("\\sqrt[3]{27}");
391 assert_eq!(name, "sqrt");
392 assert_eq!(args.len(), 2);
393 assert!(matches!(&args[0], Arg::Optional(children)
394 if matches!(&children[0], Node::Text(s, _) if s == "3")));
395 assert!(matches!(&args[1], Arg::Mandatory(children)
396 if matches!(&children[0], Node::Text(s, _) if s == "27")));
397 }
398
399 #[test]
400 fn command_with_only_optional_arg() {
401 let (name, args) = first_command("\\foo[opt]");
402 assert_eq!(name, "foo");
403 assert_eq!(args.len(), 1);
404 assert!(matches!(&args[0], Arg::Optional(_)));
405 }
406
407 #[test]
408 fn optional_then_two_mandatory() {
409 let (_, args) = first_command("\\section[short]{long}{extra}");
411 assert_eq!(args.len(), 3);
412 assert!(matches!(&args[0], Arg::Optional(_)));
413 assert!(matches!(&args[1], Arg::Mandatory(_)));
414 assert!(matches!(&args[2], Arg::Mandatory(_)));
415 }
416
417 #[test]
418 fn unclosed_optional_arg_produces_error() {
419 let r = parse("\\cmd[oops");
420 assert!(!r.errors.is_empty());
421 }
422
423 #[test]
424 fn bracket_outside_command_is_text() {
425 let r = parse("hello [world]");
427 assert!(r.errors.is_empty());
428 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
429 }
430
431 #[test]
432 fn inline_math_simple() {
433 let r = parse("$x+1$");
434 assert!(r.errors.is_empty());
435 assert_eq!(r.document.body.len(), 1);
436 assert!(matches!(&r.document.body[0], Node::Math(children, _)
437 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
438 }
439
440 #[test]
441 fn inline_math_with_command() {
442 let r = parse("$\\alpha + \\beta$");
443 assert!(r.errors.is_empty());
444 if let Node::Math(children, _) = &r.document.body[0] {
445 let names: Vec<_> = children.iter().filter_map(|n| match n {
446 Node::Command { name, .. } => Some(name.as_str()),
447 _ => None,
448 }).collect();
449 assert_eq!(names, vec!["alpha", "beta"]);
450 } else {
451 panic!("expected math node");
452 }
453 }
454
455 #[test]
456 fn unclosed_math_produces_error() {
457 let r = parse("text $oops");
458 assert!(!r.errors.is_empty());
459 }
460
461 #[test]
462 fn parser_errors_carry_spans() {
463 let cases = [
467 "\\cmd{oops", "\\cmd[oops", "{", "$oops", ];
472 for src in cases {
473 let r = parse(src);
474 assert!(!r.errors.is_empty(), "expected error for {src:?}");
475 for e in &r.errors {
476 assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
477 }
478 }
479 }
480
481 #[test]
482 fn math_after_text() {
483 let r = parse("hello $x$");
484 assert!(r.errors.is_empty());
485 assert_eq!(r.document.body.len(), 2);
486 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
487 assert!(matches!(&r.document.body[1], Node::Math(_, _)));
488 }
489
490 #[test]
491 fn display_math_simple() {
492 let r = parse("\\[x+1\\]");
493 assert!(r.errors.is_empty(), "{:?}", r.errors);
494 assert_eq!(r.document.body.len(), 1);
495 assert!(matches!(&r.document.body[0], Node::DisplayMath(children, _)
496 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
497 }
498
499 #[test]
500 fn display_math_with_command() {
501 let r = parse("\\[ \\sum_{i=0}^n i \\]");
502 assert!(r.errors.is_empty(), "{:?}", r.errors);
503 assert!(matches!(&r.document.body[0], Node::DisplayMath(_, _)));
504 }
505
506 #[test]
507 fn unclosed_display_math_produces_error() {
508 let r = parse("\\[ a + b");
509 assert!(r.errors.iter().any(|e| e.code == "E031"));
510 }
511
512 #[test]
513 fn stray_close_display_math_produces_error() {
514 let r = parse("oops \\] more");
515 assert!(r.errors.iter().any(|e| e.code == "E032"));
516 }
517
518 #[test]
519 fn comment_preserved() {
520 let r = parse("% hello\nworld");
521 assert!(r.errors.is_empty());
522 assert!(matches!(&r.document.body[0], Node::Comment(s, _) if s == " hello"));
523 assert!(matches!(&r.document.body[1], Node::Text(s, _) if s == "world"));
524 }
525
526 #[test]
527 fn comment_inside_command_arg() {
528 let r = parse("\\textbf{foo % drop?\nbar}");
529 assert!(r.errors.is_empty(), "{:?}", r.errors);
530 if let Node::Command { args, .. } = &r.document.body[0] {
531 if let Arg::Mandatory(children) = &args[0] {
532 assert!(children.iter().any(|n| matches!(n, Node::Comment(_, _))));
533 } else { panic!("expected mandatory arg"); }
534 } else { panic!("expected command"); }
535 }
536}