use oxyl_diagnostics::Diagnostic;
use oxyl_lexer::{Span, Token, TokenKind};
use crate::ast::{Arg, Document, Node};
mod helpers;
use helpers::{diag_span, find_env_name, is_display_math_close, is_end_control_seq};
#[cfg(test)]
mod tests;
#[derive(Debug)]
pub struct ParseResult {
pub document: Document,
pub errors: Vec<Diagnostic>,
}
pub struct Parser {
tokens: Vec<Token>,
pos: usize,
errors: Vec<Diagnostic>,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, pos: 0, errors: Vec::new() }
}
pub fn parse(mut self) -> ParseResult {
let body = self.parse_nodes(|_| false);
ParseResult { document: Document { body }, errors: self.errors }
}
fn peek(&self) -> Option<&Token> {
self.tokens.get(self.pos)
}
fn peek_kind(&self) -> Option<&TokenKind> {
self.peek().map(|t| &t.kind)
}
fn bump(&mut self) -> Option<Token> {
if self.pos < self.tokens.len() {
let tok = self.tokens[self.pos].clone();
self.pos += 1;
Some(tok)
} else {
None
}
}
fn parse_nodes(&mut self, stop: fn(&TokenKind) -> bool) -> Vec<Node> {
let mut nodes: Vec<Node> = Vec::new();
loop {
match self.peek() {
None => break,
Some(tok) if stop(&tok.kind) => break,
_ => {}
}
let tok = self.bump().unwrap();
match tok.kind {
TokenKind::Char(c) => self.push_char(&mut nodes, c, tok.span),
TokenKind::Space => self.push_char(&mut nodes, ' ', tok.span),
TokenKind::ParagraphBreak => {
nodes.push(Node::ParagraphBreak(tok.span));
}
TokenKind::Comment(body) => {
nodes.push(Node::Comment(body, tok.span));
}
TokenKind::ControlSeq(ref name) if name == "begin" => {
let env = self.parse_environment(tok.span);
nodes.push(env);
}
TokenKind::ControlSeq(ref name) if name == "end" => {
self.errors.push(
Diagnostic::error("E043", "stray '\\end' (no matching '\\begin')")
.with_span(diag_span(tok.span)),
);
let _ = self.parse_args();
}
TokenKind::ControlSeq(ref name) if name == "[" => {
let open_span = tok.span;
let children = self.parse_nodes(is_display_math_close);
if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "]") {
let close = self.bump().unwrap();
nodes.push(Node::DisplayMath(children, open_span.merge(close.span)));
} else {
self.errors.push(
Diagnostic::error("E031", "unclosed '\\[' (display math)")
.with_span(diag_span(open_span)),
);
nodes.push(Node::DisplayMath(children, open_span));
}
}
TokenKind::ControlSeq(ref name) if name == "]" => {
self.errors.push(
Diagnostic::error("E032", "stray '\\]' (no matching '\\[')")
.with_span(diag_span(tok.span)),
);
}
TokenKind::ControlSeq(name) => {
let cmd_span = tok.span;
let args = self.parse_args();
let full_span = args.last()
.and_then(|a| match a {
Arg::Mandatory(children) => children.last().map(|n| n.span()),
Arg::Optional(children) => children.last().map(|n| n.span()),
})
.map(|s| cmd_span.merge(s))
.unwrap_or(cmd_span);
nodes.push(Node::Command { name, args, span: full_span });
}
TokenKind::BeginGroup => {
let open_span = tok.span;
let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
if self.peek_kind() == Some(&TokenKind::EndGroup) {
let close = self.bump().unwrap();
nodes.push(Node::Group(children, open_span.merge(close.span)));
} else {
self.errors.push(
Diagnostic::error("E020", "unclosed '{'")
.with_span(diag_span(open_span)),
);
nodes.push(Node::Group(children, open_span));
}
}
TokenKind::MathShift => {
let open_span = tok.span;
let children = self.parse_nodes(|k| matches!(k, TokenKind::MathShift));
if self.peek_kind() == Some(&TokenKind::MathShift) {
let close = self.bump().unwrap();
nodes.push(Node::Math(children, open_span.merge(close.span)));
} else {
self.errors.push(
Diagnostic::error("E030", "unclosed '$' (math mode)")
.with_span(diag_span(open_span)),
);
nodes.push(Node::Math(children, open_span));
}
}
TokenKind::AlignTab => nodes.push(Node::AlignTab(tok.span)),
TokenKind::Tilde => nodes.push(Node::Tilde(tok.span)),
_ => {}
}
}
nodes
}
fn parse_args(&mut self) -> Vec<Arg> {
let mut args = Vec::new();
loop {
if self.peek_kind() == Some(&TokenKind::Space) {
self.bump();
}
match self.peek_kind() {
Some(&TokenKind::BeginGroup) => args.push(self.parse_mandatory_arg()),
Some(&TokenKind::Char('[')) => args.push(self.parse_optional_arg()),
_ => break,
}
}
args
}
fn parse_mandatory_arg(&mut self) -> Arg {
let open_span = self.bump().unwrap().span;
let children = self.parse_nodes(|k| matches!(k, TokenKind::EndGroup));
if self.peek_kind() == Some(&TokenKind::EndGroup) {
self.bump();
} else {
self.errors.push(
Diagnostic::error("E021","unclosed mandatory argument")
.with_span(diag_span(open_span)),
);
}
Arg::Mandatory(children)
}
fn parse_environment(&mut self, begin_span: Span) -> Node {
let mut args = self.parse_args();
let (name_idx, env_name) = match find_env_name(&args) {
Some(x) => x,
None => {
self.errors.push(
Diagnostic::error("E040", "'\\begin' missing environment name")
.with_span(diag_span(begin_span)),
);
return Node::Command {
name: "begin".to_owned(),
args,
span: begin_span,
};
}
};
args.remove(name_idx);
let body = self.parse_nodes(is_end_control_seq);
let close_span = if matches!(self.peek_kind(), Some(TokenKind::ControlSeq(s)) if s == "end") {
let end_tok = self.bump().unwrap();
let end_args = self.parse_args();
let close_name = find_env_name(&end_args).map(|(_, n)| n);
if close_name.as_deref() != Some(env_name.as_str()) {
self.errors.push(
Diagnostic::error("E042", format!(
"'\\end{{{}}}' does not match '\\begin{{{}}}'",
close_name.as_deref().unwrap_or(""), env_name,
))
.with_span(diag_span(end_tok.span))
.with_note(format!("the matching '\\begin' opened the '{env_name}' environment")),
);
}
end_args.last()
.and_then(|a| match a {
Arg::Mandatory(c) | Arg::Optional(c) => c.last().map(|n| n.span()),
})
.map(|s| end_tok.span.merge(s))
.unwrap_or(end_tok.span)
} else {
self.errors.push(
Diagnostic::error("E041", format!("unclosed '\\begin{{{}}}'", env_name))
.with_span(diag_span(begin_span)),
);
body.last().map(|n| n.span()).unwrap_or(begin_span)
};
Node::Environment {
name: env_name,
args,
body,
span: begin_span.merge(close_span),
}
}
fn parse_optional_arg(&mut self) -> Arg {
let open_span = self.bump().unwrap().span;
let children = self.parse_nodes(|k| matches!(k, TokenKind::Char(']')));
if self.peek_kind() == Some(&TokenKind::Char(']')) {
self.bump();
} else {
self.errors.push(
Diagnostic::error("E022","unclosed optional argument")
.with_span(diag_span(open_span)),
);
}
Arg::Optional(children)
}
fn push_char(&self, nodes: &mut Vec<Node>, c: char, span: Span) {
match nodes.last_mut() {
Some(Node::Text(s, existing)) => {
s.push(c);
*existing = existing.merge(span);
}
_ => nodes.push(Node::Text(c.to_string(), span)),
}
}
}