use crate::bib::core::SyntaxError;
use crate::bib::events::Event;
use crate::bib::lexer::Token;
use crate::bib::syntax::SyntaxKind;
pub(crate) fn parse(tokens: &[Token]) -> (Vec<Event>, Vec<SyntaxError>) {
let mut p = Parser::new(tokens);
p.file();
(p.events, p.errors)
}
struct Parser<'t> {
tokens: &'t [Token],
starts: Vec<usize>,
pos: usize,
events: Vec<Event>,
errors: Vec<SyntaxError>,
}
impl<'t> Parser<'t> {
fn new(tokens: &'t [Token]) -> Self {
let mut starts = Vec::with_capacity(tokens.len() + 1);
let mut off = 0;
for t in tokens {
starts.push(off);
off += t.text.len();
}
starts.push(off);
Self {
tokens,
starts,
pos: 0,
events: Vec::new(),
errors: Vec::new(),
}
}
fn kind(&self) -> Option<SyntaxKind> {
self.tokens.get(self.pos).map(|t| t.kind)
}
fn at_end(&self) -> bool {
self.pos >= self.tokens.len()
}
fn is_trivia(k: SyntaxKind) -> bool {
matches!(k, SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE)
}
fn at_name(&self) -> bool {
matches!(self.kind(), Some(SyntaxKind::WORD | SyntaxKind::NUMBER))
}
fn peek_past_trivia(&self) -> Option<SyntaxKind> {
let mut i = self.pos;
while self.tokens.get(i).is_some_and(|t| Self::is_trivia(t.kind)) {
i += 1;
}
self.tokens.get(i).map(|t| t.kind)
}
fn bump(&mut self) {
debug_assert!(!self.at_end(), "bump past end of input");
self.events.push(Event::Tok(self.pos));
self.pos += 1;
}
fn open(&mut self, kind: SyntaxKind) {
self.events.push(Event::Start(kind));
}
fn close(&mut self) {
self.events.push(Event::Finish);
}
fn error(&mut self, message: impl Into<String>) {
let (start, end) = if self.at_end() {
let end = *self.starts.last().expect("starts is non-empty");
(end, end)
} else {
(self.starts[self.pos], self.starts[self.pos + 1])
};
self.errors.push(SyntaxError {
message: message.into(),
start,
end,
});
}
fn skip_trivia(&mut self) {
while self.kind().is_some_and(Self::is_trivia) {
self.bump();
}
}
fn file(&mut self) {
loop {
self.skip_trivia();
match self.kind() {
None => break,
Some(SyntaxKind::AT) => self.entry(),
Some(_) => self.junk(),
}
}
}
fn junk(&mut self) {
self.open(SyntaxKind::JUNK);
while !self.at_end() && self.kind() != Some(SyntaxKind::AT) {
self.bump();
}
self.close();
}
fn entry(&mut self) {
let node_kind = match self.peek_entry_type().to_ascii_lowercase().as_str() {
"string" => SyntaxKind::STRING_ENTRY,
"preamble" => SyntaxKind::PREAMBLE_ENTRY,
"comment" => SyntaxKind::COMMENT_ENTRY,
_ => SyntaxKind::ENTRY,
};
self.open(node_kind);
self.bump(); self.skip_trivia();
if self.kind() == Some(SyntaxKind::WORD) {
self.open(SyntaxKind::ENTRY_TYPE);
self.bump();
self.close();
} else {
self.error("expected an entry type after `@`");
self.close();
return;
}
self.skip_trivia();
let closer = match self.kind() {
Some(SyntaxKind::L_BRACE) => {
self.bump();
SyntaxKind::R_BRACE
}
Some(SyntaxKind::L_PAREN) => {
self.bump();
SyntaxKind::R_PAREN
}
_ => {
self.error("expected `{` or `(` after the entry type");
self.close();
return;
}
};
match node_kind {
SyntaxKind::STRING_ENTRY => self.string_body(closer),
SyntaxKind::PREAMBLE_ENTRY => self.preamble_body(closer),
SyntaxKind::COMMENT_ENTRY => self.comment_body(closer),
_ => self.entry_body(closer),
}
self.close();
}
fn peek_entry_type(&self) -> String {
let mut i = self.pos + 1; while self.tokens.get(i).is_some_and(|t| Self::is_trivia(t.kind)) {
i += 1;
}
match self.tokens.get(i) {
Some(t) if t.kind == SyntaxKind::WORD => t.text.to_string(),
_ => String::new(),
}
}
fn entry_body(&mut self, closer: SyntaxKind) {
self.skip_trivia();
if self.at_name() && !self.looks_like_field() {
self.open(SyntaxKind::KEY);
while self.at_name() {
self.bump();
}
self.close();
}
let mut need_comma = false;
loop {
self.skip_trivia();
match self.kind() {
None => {
self.error("unterminated entry");
break;
}
Some(k) if k == closer => {
self.bump();
break;
}
Some(SyntaxKind::AT) => {
self.error("unterminated entry");
break;
}
Some(SyntaxKind::COMMA) => {
self.bump(); need_comma = false;
}
Some(_) => {
if need_comma {
self.error("expected `,` between fields");
}
need_comma = self.field(closer);
}
}
}
}
fn looks_like_field(&self) -> bool {
let mut i = self.pos;
while self
.tokens
.get(i)
.is_some_and(|t| matches!(t.kind, SyntaxKind::WORD | SyntaxKind::NUMBER))
{
i += 1;
}
while self.tokens.get(i).is_some_and(|t| Self::is_trivia(t.kind)) {
i += 1;
}
self.tokens.get(i).map(|t| t.kind) == Some(SyntaxKind::EQ)
}
fn field(&mut self, closer: SyntaxKind) -> bool {
self.open(SyntaxKind::FIELD);
if !self.at_name() {
self.error("expected a field name");
self.bump();
self.close();
return false;
}
self.open(SyntaxKind::FIELD_NAME);
while self.at_name() {
self.bump();
}
self.close();
self.skip_trivia();
let complete = if self.kind() == Some(SyntaxKind::EQ) {
self.bump();
self.skip_trivia();
self.value(closer);
true
} else {
self.error("expected `=` after the field name");
false
};
self.close();
complete
}
fn value(&mut self, closer: SyntaxKind) {
self.open(SyntaxKind::VALUE);
loop {
self.value_piece();
if self.peek_past_trivia() != Some(SyntaxKind::HASH) {
break;
}
self.skip_trivia();
self.bump(); self.skip_trivia();
if self.at_end() || self.kind() == Some(closer) {
break;
}
}
self.close();
}
fn value_piece(&mut self) {
match self.kind() {
Some(SyntaxKind::L_BRACE) => self.brace_group(),
Some(SyntaxKind::QUOTE) => self.quoted(),
Some(SyntaxKind::WORD | SyntaxKind::NUMBER) => {
self.open(SyntaxKind::LITERAL);
self.bump();
self.close();
}
_ => self.error("expected a value"),
}
}
fn brace_group(&mut self) {
self.open(SyntaxKind::BRACE_GROUP);
self.bump(); loop {
match self.kind() {
None => {
self.error("unterminated `{`");
break;
}
Some(SyntaxKind::R_BRACE) => {
self.bump();
break;
}
Some(SyntaxKind::L_BRACE) => self.brace_group(),
Some(_) => self.bump(),
}
}
self.close();
}
fn quoted(&mut self) {
self.open(SyntaxKind::QUOTED);
self.bump(); loop {
match self.kind() {
None => {
self.error("unterminated `\"`");
break;
}
Some(SyntaxKind::QUOTE) => {
self.bump();
break;
}
Some(SyntaxKind::L_BRACE) => self.brace_group(),
Some(_) => self.bump(),
}
}
self.close();
}
fn string_body(&mut self, closer: SyntaxKind) {
self.skip_trivia();
if self.at_name() {
self.field(closer);
} else if self.kind() != Some(closer) && !self.at_end() {
self.error("expected `name = value` in @string");
}
self.expect_close(closer);
}
fn preamble_body(&mut self, closer: SyntaxKind) {
self.skip_trivia();
if self.kind() != Some(closer) && !self.at_end() {
self.value(closer);
}
self.expect_close(closer);
}
fn comment_body(&mut self, closer: SyntaxKind) {
loop {
match self.kind() {
None => {
self.error("unterminated @comment");
break;
}
Some(k) if k == closer => {
self.bump();
break;
}
Some(SyntaxKind::L_BRACE) => self.brace_group(),
Some(_) => self.bump(),
}
}
}
fn expect_close(&mut self, closer: SyntaxKind) {
self.skip_trivia();
match self.kind() {
Some(k) if k == closer => self.bump(),
None => self.error("unterminated entry"),
_ => self.error("expected the closing delimiter"),
}
}
}