use crate::parser::core::SyntaxError;
use crate::parser::events::Event;
use crate::parser::lexer::{Token, is_verbatim_environment};
use crate::syntax::SyntaxKind;
const BEGIN_CMD: &str = "\\begin";
const END_CMD: &str = "\\end";
#[derive(Clone, Copy, PartialEq, Eq)]
enum Block {
Document,
Environment,
}
pub(crate) fn parse(tokens: &[Token]) -> (Vec<Event>, Vec<SyntaxError>) {
let mut p = Parser::new(tokens);
p.document();
(p.events, p.errors)
}
struct Parser<'t> {
tokens: &'t [Token],
starts: Vec<usize>,
pos: usize,
events: Vec<Event>,
errors: Vec<SyntaxError>,
}
impl<'t> Parser<'t> {
fn new(tokens: &'t [Token]) -> Self {
let mut starts = Vec::with_capacity(tokens.len() + 1);
let mut off = 0;
for t in tokens {
starts.push(off);
off += t.text.len();
}
starts.push(off);
Self {
tokens,
starts,
pos: 0,
events: Vec::new(),
errors: Vec::new(),
}
}
fn kind(&self) -> Option<SyntaxKind> {
self.tokens.get(self.pos).map(|t| t.kind)
}
fn nth_kind(&self, n: usize) -> Option<SyntaxKind> {
self.tokens.get(self.pos + n).map(|t| t.kind)
}
fn text(&self) -> &str {
self.tokens
.get(self.pos)
.map(|t| t.text.as_str())
.unwrap_or("")
}
fn at_end(&self) -> bool {
self.pos >= self.tokens.len()
}
fn at_command(&self, name: &str) -> bool {
self.kind() == Some(SyntaxKind::CONTROL_WORD) && self.text() == name
}
fn is_trivia(k: SyntaxKind) -> bool {
matches!(
k,
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::COMMENT
)
}
fn bump(&mut self) {
debug_assert!(!self.at_end(), "bump past end of input");
self.events.push(Event::Tok(self.pos));
self.pos += 1;
}
fn open(&mut self, kind: SyntaxKind) {
self.events.push(Event::Start(kind));
}
fn close(&mut self) {
self.events.push(Event::Finish);
}
fn error(&mut self, message: impl Into<String>) {
let (start, end) = if self.at_end() {
let end = *self.starts.last().expect("starts is non-empty");
(end, end)
} else {
(self.starts[self.pos], self.starts[self.pos + 1])
};
self.errors.push(SyntaxError {
message: message.into(),
start,
end,
});
}
fn skip_trivia(&mut self) {
while self.kind().is_some_and(Self::is_trivia) {
self.bump();
}
}
fn peek_meaningful(&self) -> (Option<SyntaxKind>, bool) {
let mut i = self.pos;
let mut newlines = 0;
while let Some(t) = self.tokens.get(i) {
match t.kind {
SyntaxKind::NEWLINE => newlines += 1,
SyntaxKind::WHITESPACE | SyntaxKind::COMMENT => {}
k => return (Some(k), newlines >= 2),
}
i += 1;
}
(None, newlines >= 2)
}
fn at_paragraph_break(&self) -> bool {
let mut i = self.pos;
let mut newlines = 0;
while let Some(t) = self.tokens.get(i) {
match t.kind {
SyntaxKind::NEWLINE => {
newlines += 1;
if newlines >= 2 {
return true;
}
}
SyntaxKind::WHITESPACE | SyntaxKind::COMMENT => {}
_ => return false,
}
i += 1;
}
false
}
fn document(&mut self) {
self.parse_block(Block::Document);
}
fn parse_block(&mut self, block: Block) {
loop {
if self.at_block_end(block) {
break;
}
if self.kind().is_some_and(Self::is_trivia) && self.trivia_run_is_separator(block) {
self.skip_trivia();
continue;
}
self.open(SyntaxKind::PARAGRAPH);
loop {
if self.at_block_end(block) {
break;
}
if self.kind().is_some_and(Self::is_trivia) && self.trivia_run_is_separator(block) {
break;
}
self.element();
}
self.close();
}
}
fn at_block_end(&self, block: Block) -> bool {
self.at_end() || (block == Block::Environment && self.at_command(END_CMD))
}
fn trivia_run_is_separator(&self, block: Block) -> bool {
let mut i = self.pos;
let mut newlines = 0;
while let Some(t) = self.tokens.get(i) {
match t.kind {
SyntaxKind::NEWLINE => newlines += 1,
SyntaxKind::WHITESPACE | SyntaxKind::COMMENT => {}
SyntaxKind::CONTROL_WORD if block == Block::Environment && t.text == END_CMD => {
return true;
}
_ => return newlines >= 2,
}
i += 1;
}
true
}
fn element(&mut self) {
let Some(k) = self.kind() else { return };
match k {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::COMMENT => self.bump(),
SyntaxKind::CONTROL_WORD => {
if self.at_command(BEGIN_CMD) {
self.environment();
} else if self.at_command(END_CMD) {
self.stray_end();
} else {
self.command();
}
}
SyntaxKind::CONTROL_SYMBOL => {
let sym = self.text().to_owned();
match sym.as_str() {
"\\[" => self.delim_math(SyntaxKind::DISPLAY_MATH, "\\[", "\\]"),
"\\(" => self.delim_math(SyntaxKind::INLINE_MATH, "\\(", "\\)"),
"\\]" | "\\)" => {
self.error(format!("unmatched `{sym}`"));
self.bump();
}
"\\\\" => self.line_break(),
_ => self.bump(),
}
}
SyntaxKind::L_BRACE => self.group(),
SyntaxKind::R_BRACE => {
self.error("unmatched `}`");
self.bump();
}
SyntaxKind::DOLLAR => self.dollar_math(),
_ => self.bump(),
}
}
fn command(&mut self) {
self.open(SyntaxKind::COMMAND);
self.bump(); self.attach_arguments();
self.close();
}
fn line_break(&mut self) {
self.open(SyntaxKind::LINE_BREAK);
self.bump(); if self.kind() == Some(SyntaxKind::WORD) && self.text() == "*" {
self.bump(); }
if self.kind() == Some(SyntaxKind::L_BRACKET) {
self.optional(); }
self.close();
}
fn attach_arguments(&mut self) {
loop {
let (next, paragraph_break) = self.peek_meaningful();
if paragraph_break {
break;
}
match next {
Some(SyntaxKind::L_BRACE) => {
self.skip_trivia();
self.group();
}
Some(SyntaxKind::L_BRACKET) => {
self.skip_trivia();
self.optional();
}
_ => break,
}
}
}
fn group(&mut self) {
debug_assert_eq!(self.kind(), Some(SyntaxKind::L_BRACE));
self.open(SyntaxKind::GROUP);
self.bump(); loop {
match self.kind() {
None => {
self.error("unclosed `{`");
break;
}
Some(SyntaxKind::R_BRACE) => {
self.bump();
break;
}
_ => self.element(),
}
}
self.close();
}
fn optional(&mut self) {
debug_assert_eq!(self.kind(), Some(SyntaxKind::L_BRACKET));
self.open(SyntaxKind::OPTIONAL);
self.bump(); loop {
match self.kind() {
None | Some(SyntaxKind::R_BRACE) => {
self.error("unclosed `[`");
break;
}
Some(SyntaxKind::R_BRACKET) => {
self.bump();
break;
}
Some(SyntaxKind::CONTROL_WORD)
if self.at_command(BEGIN_CMD) || self.at_command(END_CMD) =>
{
self.error("unclosed `[`");
break;
}
_ => {
if self.at_paragraph_break() {
self.error("unclosed `[`");
break;
}
self.element();
}
}
}
self.close();
}
fn dollar_math(&mut self) {
let display = self.nth_kind(1) == Some(SyntaxKind::DOLLAR);
let (kind, label) = if display {
(SyntaxKind::DISPLAY_MATH, "$$")
} else {
(SyntaxKind::INLINE_MATH, "$")
};
self.open(kind);
self.bump(); if display {
self.bump(); }
loop {
match self.kind() {
None => {
self.error(format!("unclosed `{label}`"));
break;
}
Some(SyntaxKind::R_BRACE) => {
self.error(format!("unclosed `{label}`"));
break;
}
Some(SyntaxKind::CONTROL_WORD) if self.at_command(END_CMD) => {
self.error(format!("unclosed `{label}`"));
break;
}
Some(SyntaxKind::DOLLAR) => {
if display && self.nth_kind(1) != Some(SyntaxKind::DOLLAR) {
self.bump();
continue;
}
self.bump(); if display {
self.bump(); }
break;
}
_ => {
if self.at_paragraph_break() {
self.error(format!("unclosed `{label}`"));
break;
}
self.element();
}
}
}
self.close();
}
fn delim_math(&mut self, kind: SyntaxKind, opener: &str, closer: &str) {
self.open(kind);
self.bump(); loop {
match self.kind() {
None => {
self.error(format!("unclosed `{opener}`"));
break;
}
Some(SyntaxKind::CONTROL_SYMBOL) if self.text() == closer => {
self.bump();
break;
}
Some(SyntaxKind::R_BRACE) => {
self.error(format!("unclosed `{opener}`"));
break;
}
Some(SyntaxKind::CONTROL_WORD) if self.at_command(END_CMD) => {
self.error(format!("unclosed `{opener}`"));
break;
}
_ => {
if self.at_paragraph_break() {
self.error(format!("unclosed `{opener}`"));
break;
}
self.element();
}
}
}
self.close();
}
fn environment(&mut self) {
self.open(SyntaxKind::ENVIRONMENT);
self.open(SyntaxKind::BEGIN);
self.bump(); let name = self.name_group();
self.attach_arguments(); self.close();
if name.as_deref().is_some_and(is_verbatim_environment) {
self.verbatim_body(name.as_deref().expect("verbatim name"));
} else {
self.parse_block(Block::Environment);
}
self.finish_environment(&name);
}
fn finish_environment(&mut self, name: &Option<String>) {
match self.kind() {
None => {
self.error(format!(
"unclosed environment `{}`",
name.as_deref().unwrap_or("")
));
}
Some(_) => {
let end_name = peek_end_name(self.tokens, self.pos);
if name.is_none() || *name == end_name {
self.open(SyntaxKind::END);
self.bump(); self.name_group();
self.close();
} else {
self.error(format!(
"unclosed environment `{}` (found `\\end{{{}}}`)",
name.as_deref().unwrap_or(""),
end_name.as_deref().unwrap_or("")
));
}
}
}
self.close(); }
fn verbatim_body(&mut self, name: &str) {
loop {
match self.kind() {
None => break,
Some(SyntaxKind::CONTROL_WORD)
if self.at_command(END_CMD)
&& peek_end_name(self.tokens, self.pos).as_deref() == Some(name) =>
{
break;
}
_ => self.bump(),
}
}
}
fn stray_end(&mut self) {
self.error("`\\end` without matching `\\begin`");
self.open(SyntaxKind::END);
self.bump(); self.name_group();
self.close();
}
fn name_group(&mut self) -> Option<String> {
self.skip_trivia();
if self.kind() != Some(SyntaxKind::L_BRACE) {
self.error("expected `{` for environment name");
return None;
}
self.open(SyntaxKind::NAME_GROUP);
self.bump(); let mut name = String::new();
loop {
match self.kind() {
None => {
self.error("unclosed environment name");
break;
}
Some(SyntaxKind::R_BRACE) => {
self.bump();
break;
}
_ => {
name.push_str(self.text());
self.bump();
}
}
}
self.close();
Some(name.trim().to_owned())
}
}
fn peek_end_name(tokens: &[Token], end_pos: usize) -> Option<String> {
let mut i = end_pos + 1; while tokens.get(i).is_some_and(|t| Parser::is_trivia(t.kind)) {
i += 1;
}
if tokens.get(i).map(|t| t.kind) != Some(SyntaxKind::L_BRACE) {
return None;
}
i += 1;
let mut name = String::new();
while let Some(t) = tokens.get(i) {
if t.kind == SyntaxKind::R_BRACE {
break;
}
name.push_str(&t.text);
i += 1;
}
Some(name.trim().to_owned())
}