1use oxyl_diagnostics::{DiagSpan, Diagnostic};
16use oxyl_lexer::{Span, Token, TokenKind};
17
18fn diag_span(s: Span) -> DiagSpan {
19 DiagSpan::new(s.start, s.end)
20}
21#[derive(Debug, Clone)]
30pub struct Document {
31 pub body: Vec<Node>,
32}
33
34#[derive(Debug, Clone)]
36pub enum Node {
37 Text(String, Span),
39
40 ParagraphBreak(Span),
42
43 Command {
45 name: String ,
47 args: Vec<Arg>,
48 span: Span,
49 },
50
51 Group(Vec<Node>, Span),
53
54 Math(Vec<Node>, Span),
56}
57
58impl Node {
59 pub fn span(&self) -> Span {
60 match self {
61 Node::Text(_,s) => *s,
62 Node::ParagraphBreak(s) => *s,
63 Node::Command { span, .. } => *span,
64 Node::Group(_, s) => *s,
65 Node::Math(_, s) => *s,
66 }
67 }
68}
69
70#[derive(Debug, Clone)]
72pub enum Arg {
73 Mandatory(Vec<Node>),
74 Optional(Vec<Node>),
75}
76
77#[derive(Debug)]
84pub struct ParseResult {
85 pub document: Document,
86 pub errors: Vec<Diagnostic>,
87}
88
89pub struct Parser {
94 tokens: Vec<Token>,
95 pos: usize,
96 errors: Vec<Diagnostic>,
97}
98
99impl Parser {
100 pub fn new(tokens: Vec<Token>) -> Self {
101 Self { tokens, pos: 0, errors: Vec::new() }
102 }
103
104 pub fn parse(mut self) -> ParseResult {
106 let body = self.parse_nodes(None);
107 ParseResult { document: Document { body }, errors: self.errors }
108 }
109
110 fn peek(&self) -> Option<&Token> {
111 self.tokens.get(self.pos)
112 }
113
114 fn peek_kind(&self) -> Option<&TokenKind> {
115 self.peek().map(|t| &t.kind)
116 }
117
118 fn bump(&mut self) -> Option<Token> {
119 if self.pos < self.tokens.len() {
120 let tok = self.tokens[self.pos].clone();
121 self.pos += 1;
122 Some(tok)
123 } else {
124 None
125 }
126 }
127
128 fn parse_nodes(&mut self, stop: Option<&TokenKind>) -> Vec<Node> {
132 let mut nodes: Vec<Node> = Vec::new();
133
134 loop {
135 match self.peek() {
136 None => break,
137 Some(tok) if stop.map_or(false, |s| &tok.kind == s) => break,
138 _ => {}
139 }
140
141 let tok = self.bump().unwrap();
142
143 match tok.kind {
144 TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
145 TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
146
147 TokenKind::ParagraphBreak => nodes.push(Node::ParagraphBreak(tok.span)),
148
149 TokenKind::ControlSeq(name) => {
150 let cmd_span = tok.span;
151 let args = self.parse_args();
152 let full_span = args.last()
154 .and_then(|a| match a {
155 Arg::Mandatory(children) => children.last().map(|n| n.span()),
156 Arg::Optional(children) => children.last().map(|n| n.span()),
157 })
158 .map(|s| cmd_span.merge(s))
159 .unwrap_or(cmd_span);
160 nodes.push(Node::Command { name, args, span: full_span });
161 }
162
163 TokenKind::BeginGroup => {
164 let open_span = tok.span;
165 let children = self.parse_nodes(Some(&TokenKind::EndGroup));
166 if self.peek_kind() == Some(&TokenKind::EndGroup) {
167 let close = self.bump().unwrap();
168 nodes.push(Node::Group(children, open_span.merge(close.span)));
169 } else {
170 self.errors.push(
172 Diagnostic::error("E020", "unclosed '{'")
173 .with_span(diag_span(open_span)),
174 );
175 nodes.push(Node::Group(children, open_span));
176 }
177 }
178
179 TokenKind::MathShift => {
180 let open_span = tok.span;
181 let children = self.parse_nodes(Some(&TokenKind::MathShift));
182 if self.peek_kind() == Some(&TokenKind::MathShift) {
183 let close = self.bump().unwrap();
184 nodes.push(Node::Math(children, open_span.merge(close.span)));
185 } else {
186 self.errors.push(
187 Diagnostic::error("E030", "unclosed '$' (math mode)")
188 .with_span(diag_span(open_span)),
189 );
190 }
191 }
192 _ => {}
194 }
195 }
196
197 nodes
198 }
199 fn parse_args(&mut self) -> Vec<Arg> {
205 let mut args = Vec::new();
206
207 loop {
208 if self.peek_kind() == Some(&TokenKind::Space) {
210 self.bump();
211 }
212
213 match self.peek_kind() {
214 Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
215 Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
216 _ => break,
217 }
218 }
219 args
220 }
221
222 fn parse_mandatory_arg(&mut self) -> Arg {
223 let open_span = self.bump().unwrap().span;
225 let children = self.parse_nodes(Some(&TokenKind::EndGroup));
226 if self.peek_kind() == Some(&TokenKind::EndGroup) {
227 self.bump();
228 } else {
229 self.errors.push(
230 Diagnostic::error("E021","unclosed mandatory argument")
231 .with_span(diag_span(open_span)),
232 );
233 }
234 Arg::Mandatory(children)
235 }
236
237 fn parse_optional_arg(&mut self) -> Arg {
238 let open_span = self.bump().unwrap().span;
240 let children = self.parse_nodes(Some(&TokenKind::Char(']')));
241 if self.peek_kind() == Some(&TokenKind::Char(']')) {
242 self.bump();
243 } else {
244 self.errors.push(
245 Diagnostic::error("E022","unclosed optional argument")
246 .with_span(diag_span(open_span)),
247 );
248 }
249 Arg::Optional(children)
250 }
251
252 fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
254 match nodes.last_mut() {
255 Some(Node::Text(s, existing)) => {
256 s.push(c);
257 *existing = existing.merge(span);
258 }
259 _ => nodes.push(Node::Text(c.to_string(), span)),
260 }
261 }
262}
263
264
265
266#[cfg(test)]
269mod tests {
270 use super::*;
271 use oxyl_lexer::Lexer;
272
273 fn parse(src: &str) -> ParseResult {
274 let tokens = Lexer::new(src).tokenise().tokens;
275 Parser::new(tokens).parse()
276 }
277
278 fn first_command(src: &str) -> (String, Vec<Arg>) {
279 let r = parse(src);
280 for node in &r.document.body {
281 if let Node::Command { name, args, .. } = node {
282 return (name.clone(), args.clone());
283 }
284 }
285 panic!("no command found in: {src}");
286 }
287
288 #[test]
289 fn command_no_args() {
290 let (name, args) = first_command("\\LaTeX");
291 assert_eq!(name, "LaTeX");
292 assert!(args.is_empty());
293 }
294
295 #[test]
296 fn command_one_mandatory_arg() {
297 let (name, args) = first_command("\\textbf{hello}");
298 assert_eq!(name, "textbf");
299 assert_eq!(args.len(), 1);
300 assert!(matches!(&args[0], Arg::Mandatory(children)
301 if matches!(&children[0], Node::Text(s, _) if s == "hello")));
302 }
303
304 #[test]
305 fn command_two_mandatory_args() {
306 let (name, args) = first_command("\\frac{a}{b}");
307 assert_eq!(name, "frac");
308 assert_eq!(args.len(), 2);
309 }
310
311 #[test]
312 fn unclosed_arg_produces_error() {
313 let r = parse("\\cmd{oops");
314 assert!(!r.errors.is_empty());
315 }
316
317 #[test]
318 fn paragraph_break_still_works() {
319 let r = parse("line one\n\nline two");
320 let has_par = r.document.body.iter().any(|n| matches!(n, Node::ParagraphBreak(_)));
321 assert!(has_par);
322 }
323
324 #[test]
325 fn nested_command_in_arg() {
326 let r = parse("\\outer{\\inner{x}}");
327 assert!(r.errors.is_empty());
328 if let Node::Command { args, .. } = &r.document.body[0] {
329 if let Arg::Mandatory(inner) = &args[0] {
330 assert!(matches!(&inner[0], Node::Command { name, .. } if name == "inner"));
331 } else { panic!("expected mandatory arg"); }
332 } else { panic!("expected command"); }
333 }
334
335 #[test]
336 fn command_with_optional_arg() {
337 let (name, args) = first_command("\\sqrt[3]{27}");
338 assert_eq!(name, "sqrt");
339 assert_eq!(args.len(), 2);
340 assert!(matches!(&args[0], Arg::Optional(children)
341 if matches!(&children[0], Node::Text(s, _) if s == "3")));
342 assert!(matches!(&args[1], Arg::Mandatory(children)
343 if matches!(&children[0], Node::Text(s, _) if s== "27")));
344 }
345
346 #[test]
347 fn command_with_only_optional_arg() {
348 let (name, args) = first_command("\\foo[opt]");
349 assert_eq!(name, "foo");
350 assert_eq!(args.len(), 1);
351 assert!(matches!(&args[0], Arg::Optional(_)));
352 }
353
354 #[test]
355 fn optional_then_two_mandatory() {
356 let (_, args) = first_command("\\section[short]{long}{extra}");
358 assert_eq!(args.len(), 3);
359 assert!(matches!(&args[0], Arg::Optional(_)));
360 assert!(matches!(&args[1], Arg::Mandatory(_)));
361 assert!(matches!(&args[2], Arg::Mandatory(_)));
362 }
363
364 #[test]
365 fn unclosed_optional_arg_produces_error() {
366 let r = parse("\\cmd[oops");
367 assert!(!r.errors.is_empty());
368 }
369
370 #[test]
371 fn bracket_outside_command_is_text() {
372 let r = parse("hello [world]");
374 assert!(r.errors.is_empty());
375 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello [world]"));
376 }
377
378 #[test]
379 fn inline_math_simple() {
380 let r = parse("$x+1$");
381 assert!(r.errors.is_empty());
382 assert_eq!(r.document.body.len(), 1);
383 assert!(matches!(&r.document.body[0], Node::Math(children, _)
384 if matches!(&children[0], Node::Text(s, _) if s == "x+1")));
385 }
386
387 #[test]
388 fn inline_math_with_command() {
389 let r = parse("$\\alpha + \\beta$");
390 assert!(r.errors.is_empty());
391 if let Node::Math(children, _) = &r.document.body[0] {
392 let names: Vec<_> = children.iter().filter_map(|n| match n {
393 Node::Command { name, .. } => Some(name.as_str()),
394 _ => None,
395 }).collect();
396 assert_eq!(names, vec!["alpha", "beta"]);
397 } else {
398 panic!("expected math node");
399 }
400 }
401
402 #[test]
403 fn unclosed_math_produces_error() {
404 let r = parse("text $oops");
405 assert!(!r.errors.is_empty());
406 }
407
408 #[test]
409 fn parser_errors_carry_spans() {
410 let cases = [
411 "\\cmd{oops", "\\cmd[oops", "{", "$oops", ];
416 for src in cases {
417 let r = parse(src);
418 assert!(!r.errors.is_empty(), "expected error for {src:?}");
419 for e in &r.errors {
420 assert!(e.span.is_some(), "error for {src:?} has no span: {e:?}");
421 }
422 }
423 }
424
425 #[test]
426 fn math_after_text() {
427 let r = parse("hello $x$");
428 assert!(r.errors.is_empty());
429 assert_eq!(r.document.body.len(), 2);
430 assert!(matches!(&r.document.body[0], Node::Text(s, _) if s == "hello "));
431 assert!(matches!(&r.document.body[1], Node::Math(_, _)));
432 }
433}