use std::{cmp::Reverse, mem, ops::Range};
use indexmap::IndexSet;
use itertools::Itertools;
use rowan::{Checkpoint, GreenNodeBuilder, Language};
use rue_diagnostic::{Diagnostic, DiagnosticKind, Source, SrcLoc};
use rue_lexer::{Token, TokenKind};
use crate::{RueLang, SyntaxKind, SyntaxNode, T, document};
#[derive(Debug)]
struct ParseToken {
span: Range<usize>,
kind: SyntaxKind,
}
#[derive(Debug, Clone)]
pub struct ParseResult {
pub diagnostics: Vec<Diagnostic>,
pub node: SyntaxNode,
}
#[derive(Debug)]
pub struct Parser {
source: Source,
parse_tokens: Vec<ParseToken>,
pos: usize,
expected: IndexSet<SyntaxKind>,
diagnostics: Vec<Diagnostic>,
builder: GreenNodeBuilder<'static>,
}
impl Parser {
pub fn new(source: Source, tokens: Vec<Token>) -> Self {
let mut parse_tokens = Vec::with_capacity(tokens.len());
let mut diagnostics = Vec::new();
for token in tokens {
let kind = match token.kind {
TokenKind::Whitespace => SyntaxKind::Whitespace,
TokenKind::LineComment => SyntaxKind::LineComment,
TokenKind::BlockComment { is_terminated } => {
if !is_terminated {
diagnostics.push(Diagnostic::new(
SrcLoc::new(source.clone(), token.span.clone()),
DiagnosticKind::UnterminatedBlockComment,
));
}
SyntaxKind::BlockComment
}
TokenKind::String { is_terminated } => {
if !is_terminated {
diagnostics.push(Diagnostic::new(
SrcLoc::new(source.clone(), token.span.clone()),
DiagnosticKind::UnterminatedString,
));
}
SyntaxKind::String
}
TokenKind::Hex { is_terminated } => {
if !is_terminated {
diagnostics.push(Diagnostic::new(
SrcLoc::new(source.clone(), token.span.clone()),
DiagnosticKind::UnterminatedHex,
));
}
SyntaxKind::Hex
}
TokenKind::Binary { is_terminated } => {
if !is_terminated {
diagnostics.push(Diagnostic::new(
SrcLoc::new(source.clone(), token.span.clone()),
DiagnosticKind::UnterminatedBinary,
));
}
SyntaxKind::Binary
}
TokenKind::Octal { is_terminated } => {
if !is_terminated {
diagnostics.push(Diagnostic::new(
SrcLoc::new(source.clone(), token.span.clone()),
DiagnosticKind::UnterminatedOctal,
));
}
SyntaxKind::Octal
}
TokenKind::Integer => SyntaxKind::Integer,
TokenKind::Ident => SyntaxKind::Ident,
TokenKind::Nil => T![nil],
TokenKind::True => T![true],
TokenKind::False => T![false],
TokenKind::Import => T![import],
TokenKind::Export => T![export],
TokenKind::Extern => T![extern],
TokenKind::Inline => T![inline],
TokenKind::Test => T![test],
TokenKind::Mod => T![mod],
TokenKind::Fn => T![fn],
TokenKind::Const => T![const],
TokenKind::Type => T![type],
TokenKind::Struct => T![struct],
TokenKind::Let => T![let],
TokenKind::If => T![if],
TokenKind::Else => T![else],
TokenKind::Return => T![return],
TokenKind::Assert => T![assert],
TokenKind::Raise => T![raise],
TokenKind::Debug => T![debug],
TokenKind::Is => T![is],
TokenKind::As => T![as],
TokenKind::Super => T![super],
TokenKind::OpenParen => T!['('],
TokenKind::CloseParen => T![')'],
TokenKind::OpenBrace => T!['{'],
TokenKind::CloseBrace => T!['}'],
TokenKind::OpenBracket => T!['['],
TokenKind::CloseBracket => T![']'],
TokenKind::Plus => T![+],
TokenKind::Minus => T![-],
TokenKind::Star => T![*],
TokenKind::Slash => T![/],
TokenKind::Percent => T![%],
TokenKind::Equals => T![=],
TokenKind::LessThan => T![<],
TokenKind::GreaterThan => T![>],
TokenKind::Not => T![!],
TokenKind::And => T![&],
TokenKind::Or => T![|],
TokenKind::Tilde => T![~],
TokenKind::Xor => T![^],
TokenKind::Dot => T![.],
TokenKind::Comma => T![,],
TokenKind::Colon => T![:],
TokenKind::Semicolon => T![;],
TokenKind::Unknown => {
diagnostics.push(Diagnostic::new(
SrcLoc::new(source.clone(), token.span.clone()),
DiagnosticKind::UnknownToken(source.text[token.span.clone()].to_string()),
));
SyntaxKind::Error
}
};
parse_tokens.push(ParseToken {
span: token.span,
kind,
});
}
Self {
source,
parse_tokens,
pos: 0,
expected: IndexSet::new(),
diagnostics,
builder: GreenNodeBuilder::new(),
}
}
pub fn parse(mut self) -> ParseResult {
document(&mut self);
ParseResult {
diagnostics: self.diagnostics,
node: SyntaxNode::new_root(self.builder.finish()),
}
}
#[cfg(test)]
pub(crate) fn parse_raw(self) -> ParseResult {
ParseResult {
diagnostics: self.diagnostics,
node: SyntaxNode::new_root(self.builder.finish()),
}
}
pub(crate) fn checkpoint(&mut self) -> Checkpoint {
self.eat_trivia();
self.builder.checkpoint()
}
pub(crate) fn start_including_trivia(&mut self, kind: SyntaxKind) {
self.builder.start_node(RueLang::kind_to_raw(kind));
}
pub(crate) fn start(&mut self, kind: SyntaxKind) {
self.eat_trivia();
self.builder.start_node(RueLang::kind_to_raw(kind));
}
pub(crate) fn start_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
self.builder
.start_node_at(checkpoint, RueLang::kind_to_raw(kind));
}
pub(crate) fn finish(&mut self) {
self.builder.finish_node();
}
pub(crate) fn at_any(&mut self, kinds: &[SyntaxKind]) -> Option<SyntaxKind> {
for kind in kinds
.iter()
.sorted_by_key(|kind| Reverse(kind.split().len()))
{
if self.at(*kind) {
return Some(*kind);
}
}
None
}
pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
self.eat_trivia();
self.expected.insert(kind);
let split = kind.split();
for (i, kind) in split.iter().enumerate() {
if self.nth(i) != *kind {
return false;
}
}
true
}
pub(crate) fn try_eat(&mut self, kind: SyntaxKind) -> bool {
if self.at(kind) {
self.bump(kind);
true
} else {
false
}
}
pub(crate) fn expect(&mut self, kind: SyntaxKind) {
if self.at(kind) {
self.bump(kind);
} else {
self.skip();
}
}
fn nth(&self, n: usize) -> SyntaxKind {
self.parse_tokens
.get(self.pos + n)
.map_or(SyntaxKind::Eof, |token| token.kind)
}
pub(crate) fn eat_trivia(&mut self) {
while self.nth(0).is_trivia() {
self.bump(self.nth(0));
}
}
pub(crate) fn skip(&mut self) {
let expected = mem::take(&mut self.expected);
let len = self.source.text.len();
let span = self
.parse_tokens
.get(self.pos)
.map_or(len..len, |token| token.span.clone());
self.diagnostics.push(Diagnostic::new(
SrcLoc::new(self.source.clone(), span),
DiagnosticKind::UnexpectedToken(
self.nth(0).to_string(),
expected.iter().map(ToString::to_string).collect(),
),
));
if self.pos < self.parse_tokens.len() {
self.pos += 1;
}
}
pub(crate) fn error(&mut self, kind: DiagnosticKind) {
self.expected.clear();
let len = self.source.text.len();
let span = self
.parse_tokens
.get(self.pos)
.map_or(len..len, |token| token.span.clone());
self.diagnostics.push(Diagnostic::new(
SrcLoc::new(self.source.clone(), span),
kind,
));
}
fn bump(&mut self, kind: SyntaxKind) {
self.expected.clear();
let len = kind.split().len();
let span =
self.parse_tokens[self.pos].span.start..self.parse_tokens[self.pos + len - 1].span.end;
self.builder
.token(RueLang::kind_to_raw(kind), &self.source.text[span]);
self.pos += len;
}
}