1use oxyl_diagnostics::{DiagSpan, Diagnostic};
17use oxyl_lexer::{Span, Token, TokenKind};
18
19fn diag_span(s: Span) -> DiagSpan {
20 DiagSpan::new(s.start, s.end)
21}
22
23fn is_display_math_close(k: &TokenKind) -> bool {
25 matches!(k, TokenKind::ControlSeq(s) if s == "]")
26}
27
28#[derive(Debug, Clone)]
37pub struct Document {
38 pub body: Vec<Node>,
39}
40
41#[derive(Debug, Clone)]
43pub enum Node {
44 Text(String, Span),
46
47 ParagraphBreak(Span),
49
50 Command {
52 name: String ,
54 args: Vec<Arg>,
55 span: Span,
56 },
57
58 Group(Vec<Node>, Span),
60
61 Math(Vec<Node>, Span),
63
64 DisplayMath(Vec<Node>, Span),
66}
67
68impl Node {
69 pub fn span(&self) -> Span {
70 match self {
71 Node::Text(_, s) => *s,
72 Node::ParagraphBreak(s) => *s,
73 Node::Command { span, .. } => *span,
74 Node::Group(_, s) => *s,
75 Node::Math(_, s) => *s,
76 Node::DisplayMath(_, s) => *s,
77 }
78 }
79}
80
81#[derive(Debug, Clone)]
83pub enum Arg {
84 Mandatory(Vec<Node>),
85 Optional(Vec<Node>),
86}
87
88#[derive(Debug)]
95pub struct ParseResult {
96 pub document: Document,
97 pub errors: Vec<Diagnostic>,
98}
99
100pub struct Parser {
105 tokens: Vec<Token>,
106 pos: usize,
107 errors: Vec<Diagnostic>,
108}
109
110impl Parser {
111 pub fn new(tokens: Vec<Token>) -> Self {
112 Self { tokens, pos: 0, errors: Vec::new() }
113 }
114
115 pub fn parse(mut self) -> ParseResult {
117 let body = self.parse_nodes(|_| false);
118 ParseResult { document: Document { body }, errors: self.errors }
119 }
120
121 fn peek(&self) -> Option<&Token> {
122 self.tokens.get(self.pos)
123 }
124
125 fn peek_kind(&self) -> Option<&TokenKind> {
126 self.peek().map(|t| &t.kind)
127 }
128
129 fn bump(&mut self) -> Option<Token> {
130 if self.pos < self.tokens.len() {
131 let tok = self.tokens[self.pos].clone();
132 self.pos += 1;
133 Some(tok)
134 } else {
135 None
136 }
137 }
138
139 fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
146 let mut nodes: Vec<Node> = Vec::new();
147
148 loop {
149 match self.peek() {
150 None => break,
151 Some(tok) if stop(&tok.kind) => break,
152 _ => {}
153 }
154
155 let tok = self.bump().unwrap();
156
157 match tok.kind {
158 TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
159 TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
160
161 TokenKind::ParagraphBreak => {
162 nodes.push(Node::ParagraphBreak(tok.span));
163 }
164
165 TokenKind::ControlSeq(ref name) if name == "[" => {
167 let open_span = tok.span;
168 let children = self.parse_nodes(is_display_math_close);
169 if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
170 let close = self.bump().unwrap();
171 nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
172 } else {
173 self.errors.push(
174 Diagnostic::error("E031", "unclosed '\\[' (display math)")
175 .with_span(diag_span(open_span)),
176 );
177 nodes.push(Node::DisplayMath(children, open_span));
178 }
179 }
180
181 TokenKind::ControlSeq(ref name) if name == "]" => {
183 self.errors.push(
184 Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
185 .with_span(diag_span(tok.span)),
186 );
187 }
188
189 TokenKind::ControlSeq(name) => {
190 let cmd_span = tok.span;
191 let args = self.parse_args();
192 let full_span = args.last()
194 .and_then(|a| match a {
195 Arg::Mandatory(children) => children.last().map(|n| n.span()),
196 Arg::Optional(children) => children.last().map(|n| n.span()),
197 })
198 .map(|s| cmd_span.merge(s))
199 .unwrap_or(cmd_span);
200 nodes.push(Node::Command { name, args, span: full_span });
201 }
202
203 TokenKind::BeginGroup => {
204 let open_span = tok.span;
205 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
206 if self.peek_kind() == Some(&TokenKind::EndGroup) {
207 let close = self.bump().unwrap();
208 nodes.push(Node::Group(children, open_span.merge(close.span)));
209 } else {
210 self.errors.push(
212 Diagnostic::error("E020", "unclosed '{'")
213 .with_span(diag_span(open_span)),
214 );
215 nodes.push(Node::Group(children, open_span));
216 }
217 }
218
219 TokenKind::MathShift => {
220 let open_span = tok.span;
221 let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
222 if self.peek_kind() == Some(&TokenKind::MathShift) {
223 let close = self.bump().unwrap();
224 nodes.push(Node::Math(children, open_span.merge(close.span)));
225 } else {
226 self.errors.push(
227 Diagnostic::error("E030", "unclosed '$' (math mode)")
228 .with_span(diag_span(open_span)),
229 );
230 nodes.push(Node::Math(children, open_span));
231 }
232 }
233 _ => {}
235 }
236 }
237
238 nodes
239 }
240 fn parse_args(&mut self) -> Vec<Arg> {
246 let mut args = Vec::new();
247
248 loop {
249 if self.peek_kind() == Some(&TokenKind::Space) {
251 self.bump();
252 }
253
254 match self.peek_kind() {
255 Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
256 Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
257 _ => break,
258 }
259 }
260 args
261 }
262
263 fn parse_mandatory_arg(&mut self) -> Arg {
264 let open_span = self.bump().unwrap().span;
266 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
267 if self.peek_kind() == Some(&TokenKind::EndGroup) {
268 self.bump();
269 } else {
270 self.errors.push(
271 Diagnostic::error("E021","unclosed mandatory argument")
272 .with_span(diag_span(open_span)),
273 );
274 }
275 Arg::Mandatory(children)
276 }
277
278 fn parse_optional_arg(&mut self) -> Arg {
279 let open_span = self.bump().unwrap().span;
281 let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
282 if self.peek_kind() == Some(&TokenKind::Char(']')) {
283 self.bump();
284 } else {
285 self.errors.push(
286 Diagnostic::error("E022","unclosed optional argument")
287 .with_span(diag_span(open_span)),
288 );
289 }
290 Arg::Optional(children)
291 }
292
293 fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
295 match nodes.last_mut() {
296 Some(Node::Text(s, existing)) => {
297 s.push(c);
298 *existing = existing.merge(span);
299 }
300 _ => nodes.push(Node::Text(c.to_string(), span)),
301 }
302 }
303}
304
305
306
307#[cfg(test)]
310mod tests {
311 use super::*;
312 use oxyl_lexer::Lexer;
313
314 fn parse(src: &str) -> ParseResult {
315 let tokens = Lexer::new(src).tokenise().tokens;
316 Parser::new(tokens).parse()
317 }
318
319 fn first_command(src: &str) -> (String, Vec<Arg>) {
320 let r = parse(src);
321 for node in &r.document.body {
322 if let Node::Command { name, args, .. } = node {
323 return (name.clone(), args.clone());
324 }
325 }
326 panic!("no command found in: {src}");
327 }
328
329 #[test]
330 fn command_no_args() {
331 let (name, args) = first_command("\\LaTeX");
332 assert_eq!(name, "LaTeX");
333 assert!(args.is_empty());
334 }
335
336 #[test]
337 fn command_one_mandatory_arg() {
338 let (name, args) = first_command("\\textbf{hello}");
339 assert_eq!(name, "textbf");
340 assert_eq!(args.len(), 1);
341 assert!(matches!(&args[0], Arg::Mandatory(children)
342 if matches!(&children[0], Node::Text(s, _) if s == "hello")));
343 }
344
345 #[test]
346 fn command_two_mandatory_args() {
347 let (name, args) = first_command("\\frac{a}{b}");
348 assert_eq!(name, "frac");
349 assert_eq!(args.len(), 2);
350 }
351
352 #[test]
353 fn unclosed_arg_produces_error() {
354 let r = parse("\\cmd{oops");
355 assert!(!r.errors.is_empty());
356 }
357
358 #[test]
359 fn paragraph_break_still_works() {
360 let r = parse("line one\n\nline two");
361 let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
362 assert!(has_par);
363 }
364
365 #[test]
366 fn nested_command_in_arg() {
367 let r = parse("\\outer{\\inner{x}}");
368 assert!(r.errors.is_empty());
369 if let Node::Command { args, .. } = &r.document.body[0] {
370 if let Arg::Mandatory(inner) = &args[0] {
371 assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
372 } else { panic!("expected mandatory arg"); }
373 } else { panic!("expected command"); }
374 }
375
376 #[test]
377 fn command_with_optional_arg() {
378 let (name, args) = first_command("\\sqrt[3]{27}");
379 assert_eq!(name, "sqrt");
380 assert_eq!(args.len(), 2);
381 assert!(matches!(&args[0], Arg::Optional(children)
382 if matches!(&children[0], Node::Text(s, _) if s == "3")));
383 assert!(matches!(&args[1], Arg::Mandatory(children)
384 if matches!(&children[0], Node::Text(s, _) if s == "27")));
385 }
386
387 #[test]
388 fn command_with_only_optional_arg() {
389 let (name, args) = first_command("\\foo[opt]");
390 assert_eq!(name, "foo");
391 assert_eq!(args.len(), 1);
392 assert!(matches!(&args[0], Arg::Optional(_)));
393 }
394
395 #[test]
396 fn optional_then_two_mandatory() {
397 let (_, args) = first_command("\\section[short]{long}{extra}");
399 assert_eq!(args.len(), 3);
400 assert!(matches!(&args[0], Arg::Optional(_)));
401 assert!(matches!(&args[1], Arg::Mandatory(_)));
402 assert!(matches!(&args[2], Arg::Mandatory(_)));
403 }
404
405 #[test]
406 fn unclosed_optional_arg_produces_error() {
407 let r = parse("\\cmd[oops");
408 assert!(!r.errors.is_empty());
409 }
410
411 #[test]
412 fn bracket_outside_command_is_text() {
413 let r = parse("hello [world]");
415 assert!(r.errors.is_empty());
416 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
417 }
418
419 #[test]
420 fn inline_math_simple() {
421 let r = parse("$x+1$");
422 assert!(r.errors.is_empty());
423 assert_eq!(r.document.body.len(), 1);
424 assert!(matches!(&r.document.body[0], Node::Math(children, _)
425 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
426 }
427
428 #[test]
429 fn inline_math_with_command() {
430 let r = parse("$\\alpha + \\beta$");
431 assert!(r.errors.is_empty());
432 if let Node::Math(children, _) = &r.document.body[0] {
433 let names: Vec<_> = children.iter().filter_map(|n| match n {
434 Node::Command { name, .. } => Some(name.as_str()),
435 _ => None,
436 }).collect();
437 assert_eq!(names, vec!["alpha", "beta"]);
438 } else {
439 panic!("expected math node");
440 }
441 }
442
443 #[test]
444 fn unclosed_math_produces_error() {
445 let r = parse("text $oops");
446 assert!(!r.errors.is_empty());
447 }
448
449 #[test]
450 fn parser_errors_carry_spans() {
451 let cases = [
455 "\\cmd{oops", "\\cmd[oops", "{", "$oops", ];
460 for src in cases {
461 let r = parse(src);
462 assert!(!r.errors.is_empty(), "expected error for {src:?}");
463 for e in &r.errors {
464 assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
465 }
466 }
467 }
468
469 #[test]
470 fn math_after_text() {
471 let r = parse("hello $x$");
472 assert!(r.errors.is_empty());
473 assert_eq!(r.document.body.len(), 2);
474 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
475 assert!(matches!(&r.document.body[1], Node::Math(_, _)));
476 }
477
478 #[test]
479 fn display_math_simple() {
480 let r = parse("\\[x+1\\]");
481 assert!(r.errors.is_empty(), "{:?}", r.errors);
482 assert_eq!(r.document.body.len(), 1);
483 assert!(matches!(&r.document.body[0], Node::DisplayMath(children, _)
484 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
485 }
486
487 #[test]
488 fn display_math_with_command() {
489 let r = parse("\\[ \\sum_{i=0}^n i \\]");
490 assert!(r.errors.is_empty(), "{:?}", r.errors);
491 assert!(matches!(&r.document.body[0], Node::DisplayMath(_, _)));
492 }
493
494 #[test]
495 fn unclosed_display_math_produces_error() {
496 let r = parse("\\[ a + b");
497 assert!(r.errors.iter().any(|e| e.code == "E031"));
498 }
499
500 #[test]
501 fn stray_close_display_math_produces_error() {
502 let r = parse("oops \\] more");
503 assert!(r.errors.iter().any(|e| e.code == "E032"));
504 }
505}