1use oxyl_diagnostics::Diagnostic;
24use oxyl_lexer::{Span, Token, TokenKind};
25
26use crate::ast::{Arg, Document, Node};
27
28mod helpers;
29use helpers::{diag_span, find_env_name, is_display_math_close, is_end_control_seq};
30
31#[cfg(test)]
32mod tests;
33
34
35#[derive(Debug)]
38pub struct ParseResult {
39 pub document: Document,
40 pub errors: Vec<Diagnostic>,
41}
42
43
44pub struct Parser {
45 tokens: Vec<Token>,
46 pos: usize,
47 errors: Vec<Diagnostic>,
48}
49
50impl Parser {
51 pub fn new(tokens: Vec<Token>) -> Self {
52 Self { tokens, pos: 0, errors: Vec::new() }
53 }
54
55 pub fn parse(mut self) -> ParseResult {
57 let body = self.parse_nodes(|_| false);
58 ParseResult { document: Document { body }, errors: self.errors }
59 }
60
61 fn peek(&self) -> Option<&Token> {
62 self.tokens.get(self.pos)
63 }
64
65 fn peek_kind(&self) -> Option<&TokenKind> {
66 self.peek().map(|t| &t.kind)
67 }
68
69 fn bump(&mut self) -> Option<Token> {
70 if self.pos < self.tokens.len() {
71 let tok = self.tokens[self.pos].clone();
72 self.pos += 1;
73 Some(tok)
74 } else {
75 None
76 }
77 }
78
79 fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
86 let mut nodes: Vec<Node> = Vec::new();
87
88 loop {
89 match self.peek() {
90 None => break,
91 Some(tok) if stop(&tok.kind) => break,
92 _ => {}
93 }
94
95 let tok = self.bump().unwrap();
96
97 match tok.kind {
98 TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
99 TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
100
101 TokenKind::ParagraphBreak => {
102 nodes.push(Node::ParagraphBreak(tok.span));
103 }
104
105 TokenKind::Comment(body) => {
106 nodes.push(Node::Comment(body, tok.span));
107 }
108
109 TokenKind::ControlSeq(ref name) if name == "begin" => {
111 let env = self.parse_environment(tok.span);
112 nodes.push(env);
113 }
114
115 TokenKind::ControlSeq(ref name) if name == "end" => {
117 self.errors.push(
118 Diagnostic::error("E043", "stray '\\end' (no matching '\\begin')")
119 .with_span(diag_span(tok.span)),
120 );
121 let _ = self.parse_args();
123 }
124
125 TokenKind::ControlSeq(ref name) if name == "[" => {
127 let open_span = tok.span;
128 let children = self.parse_nodes(is_display_math_close);
129 if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
130 let close = self.bump().unwrap();
131 nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
132 } else {
133 self.errors.push(
134 Diagnostic::error("E031", "unclosed '\\[' (display math)")
135 .with_span(diag_span(open_span)),
136 );
137 nodes.push(Node::DisplayMath(children, open_span));
138 }
139 }
140
141 TokenKind::ControlSeq(ref name) if name == "]" => {
143 self.errors.push(
144 Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
145 .with_span(diag_span(tok.span)),
146 );
147 }
148
149 TokenKind::ControlSeq(name) => {
150 let cmd_span = tok.span;
151 let args = self.parse_args();
152 let full_span = args.last()
154 .and_then(|a| match a {
155 Arg::Mandatory(children) => children.last().map(|n| n.span()),
156 Arg::Optional(children) => children.last().map(|n| n.span()),
157 })
158 .map(|s| cmd_span.merge(s))
159 .unwrap_or(cmd_span);
160 nodes.push(Node::Command { name, args, span: full_span });
161 }
162
163 TokenKind::BeginGroup => {
164 let open_span = tok.span;
165 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
166 if self.peek_kind() == Some(&TokenKind::EndGroup) {
167 let close = self.bump().unwrap();
168 nodes.push(Node::Group(children, open_span.merge(close.span)));
169 } else {
170 self.errors.push(
172 Diagnostic::error("E020", "unclosed '{'")
173 .with_span(diag_span(open_span)),
174 );
175 nodes.push(Node::Group(children, open_span));
176 }
177 }
178
179 TokenKind::MathShift => {
180 let open_span = tok.span;
181 let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
182 if self.peek_kind() == Some(&TokenKind::MathShift) {
183 let close = self.bump().unwrap();
184 nodes.push(Node::Math(children, open_span.merge(close.span)));
185 } else {
186 self.errors.push(
187 Diagnostic::error("E030", "unclosed '$' (math mode)")
188 .with_span(diag_span(open_span)),
189 );
190 nodes.push(Node::Math(children, open_span));
191 }
192 }
193
194 TokenKind::AlignTab => nodes.push(Node::AlignTab(tok.span)),
195 TokenKind::Tilde => nodes.push(Node::Tilde(tok.span)),
196
197 _ => {}
199 }
200 }
201
202 nodes
203 }
204 fn parse_args(&mut self) -> Vec<Arg> {
210 let mut args = Vec::new();
211
212 loop {
213 if self.peek_kind() == Some(&TokenKind::Space) {
215 self.bump();
216 }
217
218 match self.peek_kind() {
219 Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
220 Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
221 _ => break,
222 }
223 }
224 args
225
226 }
227
228 fn parse_mandatory_arg(&mut self) -> Arg {
229 let open_span = self.bump().unwrap().span;
231 let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
232 if self.peek_kind() == Some(&TokenKind::EndGroup) {
233 self.bump();
234 } else {
235 self.errors.push(
236 Diagnostic::error("E021","unclosed mandatory argument")
237 .with_span(diag_span(open_span)),
238 );
239 }
240 Arg::Mandatory(children)
241 }
242
243 fn parse_environment(&mut self, begin_span: Span) -> Node {
246 let mut args = self.parse_args();
247
248 let (name_idx, env_name) = match find_env_name(&args) {
252 Some(x) => x,
253 None => {
254 self.errors.push(
255 Diagnostic::error("E040", "'\\begin' missing environment name")
256 .with_span(diag_span(begin_span)),
257 );
258 return Node::Command {
259 name: "begin".to_owned(),
260 args,
261 span: begin_span,
262 };
263 }
264 };
265 args.remove(name_idx);
266
267 let body = self.parse_nodes(is_end_control_seq);
268
269 let close_span = if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "end") {
271 let end_tok = self.bump().unwrap();
272 let end_args = self.parse_args();
273 let close_name = find_env_name(&end_args).map(|(_, n)| n);
274
275 if close_name.as_deref() != Some(env_name.as_str()) {
276 self.errors.push(
277 Diagnostic::error("E042", format!(
278 "'\\end{{{}}}' does not match '\\begin{{{}}}'",
279 close_name.as_deref().unwrap_or(""), env_name,
280 ))
281 .with_span(diag_span(end_tok.span))
282 .with_note(format!("the matching '\\begin' opened the '{env_name}' environment")),
283 );
284 }
285
286 end_args.last()
288 .and_then(|a| match a {
289 Arg::Mandatory(c) | Arg::Optional(c) => c.last().map(|n| n.span()),
290 })
291 .map(|s| end_tok.span.merge(s))
292 .unwrap_or(end_tok.span)
293 } else {
294 self.errors.push(
295 Diagnostic::error("E041", format!("unclosed '\\begin{{{}}}'", env_name))
296 .with_span(diag_span(begin_span)),
297 );
298 body.last().map(|n| n.span()).unwrap_or(begin_span)
299 };
300
301 Node::Environment {
302 name: env_name,
303 args,
304 body,
305 span: begin_span.merge(close_span),
306 }
307 }
308
309 fn parse_optional_arg(&mut self) -> Arg {
310 let open_span = self.bump().unwrap().span;
312 let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
313 if self.peek_kind() == Some(&TokenKind::Char(']')) {
314 self.bump();
315 } else {
316 self.errors.push(
317 Diagnostic::error("E022","unclosed optional argument")
318 .with_span(diag_span(open_span)),
319 );
320 }
321 Arg::Optional(children)
322 }
323
324 fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
326 match nodes.last_mut() {
327 Some(Node::Text(s, existing)) => {
328 s.push(c);
329 *existing = existing.merge(span);
330 }
331 _ => nodes.push(Node::Text(c.to_string(), span)),
332 }
333 }
334}
335