use drop_bomb::DropBomb;
use rowan::GreenNode;
use thiserror::Error;
use crate::{
SyntaxKind, SyntaxNode,
builder::SyntaxTreeBuilder,
lexer::{LexToken, lex},
};
#[derive(Debug, Clone)]
pub struct Parse {
green: GreenNode,
errors: Vec<SyntaxError>,
}
impl Parse {
#[must_use]
pub fn syntax(&self) -> SyntaxNode {
SyntaxNode::new_root(self.green.clone())
}
#[must_use]
pub fn green(&self) -> &GreenNode {
&self.green
}
#[must_use]
pub fn errors(&self) -> &[SyntaxError] {
&self.errors
}
#[cfg(feature = "incremental")]
pub(crate) fn from_parts(green: GreenNode, errors: Vec<SyntaxError>) -> Self {
Parse { green, errors }
}
}
#[must_use]
pub fn parse(src: &str) -> Parse {
let tokens = lex(src);
let mut parser = Parser::new(&tokens);
crate::grammar::source_file(&mut parser);
let events = parser.into_events();
let (green, errors) = SyntaxTreeBuilder::build(&tokens, events);
Parse { green, errors }
}
#[derive(Debug, Clone, Error)]
#[error("[E{code:04}] {message}")]
pub struct SyntaxError {
pub code: u16,
pub message: String,
pub offset: text_size::TextSize,
}
#[derive(Debug)]
pub(crate) enum Event {
Start {
kind: SyntaxKind,
forward_parent: Option<u32>,
},
Finish,
Token {
kind: SyntaxKind,
},
Error {
code: u16,
msg: String,
offset: text_size::TextSize,
},
Abandoned,
}
impl Event {
pub(crate) fn tombstone() -> Self {
Event::Abandoned
}
}
#[derive(Copy, Clone, Debug)]
pub(crate) struct TokenSet([u128; TOKEN_SET_WORDS]);
const TOKEN_SET_WORDS: usize = 8;
#[allow(clippy::cast_possible_truncation)]
const TOKEN_SET_CAPACITY: u16 = (TOKEN_SET_WORDS as u16) * 128;
impl TokenSet {
pub(crate) const EMPTY: TokenSet = TokenSet([0; TOKEN_SET_WORDS]);
pub(crate) const fn new(kinds: &[SyntaxKind]) -> Self {
let mut mask = [0u128; TOKEN_SET_WORDS];
let mut i = 0;
while i < kinds.len() {
let raw = kinds[i] as u16;
assert!(raw < TOKEN_SET_CAPACITY, "TokenSet: kind out of range");
let word = (raw / 128) as usize;
let bit = raw % 128;
mask[word] |= 1u128 << bit;
i += 1;
}
TokenSet(mask)
}
pub(crate) const fn union(self, other: TokenSet) -> TokenSet {
let mut out = [0u128; TOKEN_SET_WORDS];
let mut i = 0;
while i < TOKEN_SET_WORDS {
out[i] = self.0[i] | other.0[i];
i += 1;
}
TokenSet(out)
}
pub(crate) const fn contains(self, kind: SyntaxKind) -> bool {
let raw = kind as u16;
if raw >= TOKEN_SET_CAPACITY {
return false;
}
let word = (raw / 128) as usize;
let bit = raw % 128;
(self.0[word] & (1u128 << bit)) != 0
}
}
impl Default for TokenSet {
fn default() -> Self {
TokenSet::EMPTY
}
}
pub(crate) struct Parser<'a> {
tokens: &'a [LexToken],
pos: usize,
events: Vec<Event>,
}
impl<'a> Parser<'a> {
pub(crate) fn new(tokens: &'a [LexToken]) -> Self {
Self {
tokens,
pos: skip_trivia(tokens, 0),
events: Vec::new(),
}
}
pub(crate) fn current(&self) -> SyntaxKind {
self.tokens
.get(self.pos)
.map_or(SyntaxKind::EOF, |t| t.kind)
}
#[allow(dead_code)]
pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
let mut idx = self.pos;
let mut remaining = n;
loop {
if idx >= self.tokens.len() {
return SyntaxKind::EOF;
}
if self.tokens[idx].kind.is_trivia() {
idx += 1;
continue;
}
if remaining == 0 {
return self.tokens[idx].kind;
}
remaining -= 1;
idx += 1;
}
}
pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
self.current() == kind
}
pub(crate) fn at_ts(&self, set: TokenSet) -> bool {
set.contains(self.current())
}
#[allow(dead_code)]
pub(crate) fn at_contextual(&self, ident: &str) -> bool {
self.current() == SyntaxKind::IDENT
&& self
.tokens
.get(self.pos)
.is_some_and(|t| t.text.eq_ignore_ascii_case(ident))
}
fn current_offset(&self) -> text_size::TextSize {
self.tokens.get(self.pos).map_or_else(
|| {
self.tokens
.last()
.map_or(text_size::TextSize::from(0), |t| t.range.end())
},
|t| t.range.start(),
)
}
pub(crate) fn bump_any(&mut self) {
let kind = self.current();
if kind == SyntaxKind::EOF {
return;
}
self.events.push(Event::Token { kind });
self.pos += 1;
self.pos = skip_trivia(self.tokens, self.pos);
}
pub(crate) fn bump(&mut self, kind: SyntaxKind) {
debug_assert!(
self.at(kind),
"bump: expected {kind:?}, got {:?}",
self.current()
);
self.bump_any();
}
pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
if self.at(kind) {
self.bump(kind);
true
} else {
false
}
}
pub(crate) fn error_code(&mut self, code: u16, msg: impl Into<String>) {
let offset = self.current_offset();
self.events.push(Event::Error {
code,
msg: msg.into(),
offset,
});
}
pub(crate) fn error(&mut self, msg: impl Into<String>) {
self.error_code(syntax_codes::GENERIC_SYNTAX_ERROR, msg);
}
#[allow(dead_code)]
pub(crate) fn err_and_bump(&mut self, msg: impl Into<String>) {
let m = self.start();
self.error(msg);
self.bump_any();
m.complete(self, SyntaxKind::ERROR);
}
#[allow(dead_code)]
pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
if self.eat(kind) {
true
} else {
self.error_code(syntax_codes::EXPECTED_TOKEN, format!("expected {kind:?}"));
false
}
}
pub(crate) fn recover_until(&mut self, set: TokenSet) {
let stop = set.union(RECOVERY_STOP);
if stop.contains(self.current()) || self.current() == SyntaxKind::EOF {
return;
}
let m = self.start();
while !stop.contains(self.current()) && self.current() != SyntaxKind::EOF {
self.bump_any();
}
m.complete(self, SyntaxKind::ERROR);
}
pub(crate) fn start(&mut self) -> Marker {
let pos = self.events.len();
self.events.push(Event::Start {
kind: SyntaxKind::ERROR, forward_parent: None,
});
Marker::new(pos)
}
pub(crate) fn position(&self) -> usize {
self.pos
}
pub(crate) fn into_events(self) -> Vec<Event> {
self.events
}
}
pub(crate) mod syntax_codes {
pub(crate) const GENERIC_SYNTAX_ERROR: u16 = 1;
pub(crate) const UNEXPECTED_TOKEN: u16 = 2;
pub(crate) const EXPECTED_TOKEN: u16 = 3;
pub(crate) const UNCLOSED_STRING: u16 = 4;
pub(crate) const UNCLOSED_BLOCK_COMMENT: u16 = 5;
pub(crate) const INVALID_NUMERIC_LITERAL: u16 = 6;
pub(crate) const EXPECTED_STATEMENT: u16 = 7;
pub(crate) const EXPECTED_SEMICOLON_OR_EOF: u16 = 8;
pub(crate) const EXPECTED_LPAREN_NODE: u16 = 9;
pub(crate) const EXPECTED_NODE_AFTER_REL: u16 = 10;
pub(crate) const EXPECTED_RPAREN_NODE: u16 = 11;
pub(crate) const EXPECTED_DASH_REL_START: u16 = 12;
pub(crate) const EXPECTED_DASH_REL_CLOSE: u16 = 13;
pub(crate) const EXPECTED_DASH_OR_ARROW: u16 = 14;
pub(crate) const EXPECTED_RBRACK_REL: u16 = 15;
pub(crate) const EXPECTED_LABEL: u16 = 16;
pub(crate) const EXPECTED_REL_TYPE: u16 = 17;
pub(crate) const EXPECTED_RBRACE_PROP: u16 = 18;
pub(crate) const EXPECTED_PROP_KEY: u16 = 19;
pub(crate) const EXPECTED_COLON_PROP: u16 = 20;
pub(crate) const EXPECTED_PROP_VALUE: u16 = 21;
pub(crate) const EXPECTED_IDENT: u16 = 22;
pub(crate) const EXPR_NESTING_LIMIT: u16 = 23;
pub(crate) const EXPECTED_UNARY_OPERAND: u16 = 24;
pub(crate) const EXPECTED_NULL_AFTER_IS: u16 = 25;
pub(crate) const EXPECTED_BINOP_RHS: u16 = 26;
pub(crate) const EXPECTED_EXPR_IN_PARENS: u16 = 27;
pub(crate) const EXPECTED_RPAREN_EXPR: u16 = 28;
pub(crate) const EXPECTED_WITH_AFTER_STARTS: u16 = 29;
pub(crate) const EXPECTED_WITH_AFTER_ENDS: u16 = 30;
pub(crate) const EXPECTED_PROP_KEY_AFTER_DOT: u16 = 31;
pub(crate) const EXPECTED_INDEX_EXPR: u16 = 32;
pub(crate) const EXPECTED_RBRACK_INDEX: u16 = 33;
pub(crate) const EXPECTED_RPAREN_CALL: u16 = 34;
pub(crate) const EXPECTED_CALL_ARG: u16 = 35;
pub(crate) const EXPECTED_RETURN_EXPR: u16 = 36;
pub(crate) const EXPECTED_IDENT_AFTER_AS: u16 = 37;
pub(crate) const EXPECTED_BY_AFTER_ORDER: u16 = 38;
pub(crate) const EXPECTED_ORDERBY_EXPR: u16 = 39;
pub(crate) const EXPECTED_SKIP_EXPR: u16 = 40;
pub(crate) const EXPECTED_LIMIT_EXPR: u16 = 41;
pub(crate) const EXPECTED_MATCH_AFTER_OPTIONAL: u16 = 42;
pub(crate) const EXPECTED_WHERE_EXPR: u16 = 43;
#[allow(dead_code)]
pub(crate) const UNIMPLEMENTED_CLAUSE_RESERVED: u16 = 44;
pub(crate) const EXPECTED_CLAUSE: u16 = 45;
pub(crate) const INVALID_ESCAPE: u16 = 46;
pub(crate) const EXPECTED_LIST_ELEM: u16 = 47;
pub(crate) const EXPECTED_RBRACK_LIST: u16 = 48;
pub(crate) const EXPECTED_MAP_KEY: u16 = 49;
pub(crate) const EXPECTED_COLON_MAP: u16 = 50;
pub(crate) const EXPECTED_MAP_VALUE: u16 = 51;
pub(crate) const EXPECTED_RBRACE_MAP: u16 = 52;
pub(crate) const EXPECTED_UNWIND_EXPR: u16 = 53;
pub(crate) const EXPECTED_AS_UNWIND: u16 = 54;
pub(crate) const EXPECTED_CREATE_PATTERN: u16 = 55;
pub(crate) const EXPECTED_MERGE_PATTERN: u16 = 56;
pub(crate) const EXPECTED_SET_ITEM: u16 = 57;
pub(crate) const EXPECTED_REMOVE_ITEM: u16 = 58;
pub(crate) const EXPECTED_DELETE_EXPR: u16 = 59;
pub(crate) const EXPECTED_DELETE_AFTER_DETACH: u16 = 60;
pub(crate) const EXPECTED_ON_ACTION: u16 = 61;
#[allow(dead_code)]
pub(crate) const EXPECTED_RBRACK_HOP: u16 = 62;
#[allow(dead_code)]
pub(crate) const EXPECTED_EQ_PATH_BIND: u16 = 63;
pub(crate) const UNCLOSED_INDEX_BRACKET: u16 = 64;
pub(crate) const EXPECTED_LPAREN_LIST_PREDICATE: u16 = 65;
pub(crate) const EXPECTED_IN_LIST_PREDICATE: u16 = 66;
pub(crate) const EXPECTED_RPAREN_LIST_PREDICATE: u16 = 67;
pub(crate) const EXPECTED_IN_LIST_COMP: u16 = 68;
pub(crate) const EXPECTED_PIPE_OR_RBRACK_LIST_COMP: u16 = 69;
pub(crate) const EXPECTED_THEN_CASE: u16 = 70;
pub(crate) const EXPECTED_END_CASE: u16 = 71;
pub(crate) const EXPECTED_RPAREN_EXISTS: u16 = 72;
pub(crate) const EXPECTED_PROCEDURE_NAME: u16 = 73;
pub(crate) const EXPECTED_RPAREN_CALL_ARGS: u16 = 74;
pub(crate) const EXPECTED_YIELD_ITEM: u16 = 75;
pub(crate) const EXPECTED_RPAREN_SHORTEST_PATH: u16 = 77;
pub(crate) const EXPECTED_RBRACE_MAP_PROJECTION: u16 = 78;
pub(crate) const EXPECTED_PROP_OR_STAR_AFTER_DOT_IN_PROJECTION: u16 = 79;
pub(crate) const EXPECTED_COLON_MAP_PROJECTION: u16 = 80;
pub(crate) const EXPECTED_VALUE_MAP_PROJECTION: u16 = 81;
pub(crate) const EXPECTED_MAP_PROJECTION_ITEM: u16 = 82;
pub(crate) const EXISTS_BLOCK_DEFERRED: u16 = 4017;
}
fn skip_trivia(tokens: &[LexToken], mut idx: usize) -> usize {
while idx < tokens.len() && tokens[idx].kind.is_trivia() {
idx += 1;
}
idx
}
pub(crate) const RECOVERY_STOP: TokenSet = TokenSet::new(&[
SyntaxKind::MATCH_KW,
SyntaxKind::OPTIONAL_KW,
SyntaxKind::WHERE_KW,
SyntaxKind::WITH_KW,
SyntaxKind::RETURN_KW,
SyntaxKind::CREATE_KW,
SyntaxKind::MERGE_KW,
SyntaxKind::SET_KW,
SyntaxKind::REMOVE_KW,
SyntaxKind::DELETE_KW,
SyntaxKind::DETACH_KW,
SyntaxKind::UNWIND_KW,
SyntaxKind::CALL_KW,
SyntaxKind::UNION_KW,
SyntaxKind::SEMI,
]);
#[must_use = "Marker must be completed or abandoned"]
pub(crate) struct Marker {
pos: u32,
bomb: DropBomb,
}
impl Marker {
fn new(pos: usize) -> Self {
Self {
pos: u32::try_from(pos).expect("event index fits u32"),
bomb: DropBomb::new("Marker must be completed or abandoned"),
}
}
pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
self.bomb.defuse();
let idx = self.pos as usize;
match &mut p.events[idx] {
Event::Start { kind: slot, .. } => *slot = kind,
_ => unreachable!("marker index does not point to a Start event"),
}
p.events.push(Event::Finish);
CompletedMarker {
pos: self.pos,
kind,
}
}
#[allow(dead_code)]
pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
self.bomb.defuse();
let idx = self.pos as usize;
if idx + 1 == p.events.len() {
p.events.pop();
} else {
p.events[idx] = Event::Abandoned;
}
}
}
#[derive(Debug, Copy, Clone)]
pub(crate) struct CompletedMarker {
pos: u32,
#[allow(dead_code)]
kind: SyntaxKind,
}
impl CompletedMarker {
pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
let new_marker = p.start();
let old_idx = self.pos as usize;
let new_idx = new_marker.pos as usize;
let delta = u32::try_from(new_idx - old_idx).expect("forward-parent delta fits u32");
match &mut p.events[old_idx] {
Event::Start { forward_parent, .. } => {
debug_assert!(forward_parent.is_none(), "forward_parent already set");
*forward_parent = Some(delta);
}
_ => unreachable!("CompletedMarker pos does not point to a Start"),
}
new_marker
}
}
#[cfg(test)]
mod tests {
use super::parse;
use crate::SyntaxKind;
#[test]
fn parse_empty_is_lossless() {
let p = parse("");
assert_eq!(p.syntax().to_string(), "");
}
#[test]
fn parse_simple_is_lossless() {
let src = "MATCH (n) RETURN n";
let p = parse(src);
assert_eq!(p.syntax().to_string(), src);
}
#[test]
fn parse_with_comments_is_lossless() {
let src = "// leading\nMATCH (n) /* inline */ RETURN n";
let p = parse(src);
assert_eq!(p.syntax().to_string(), src);
}
#[test]
fn root_kind_is_source_file() {
let p = parse("MATCH (n) RETURN n");
assert_eq!(p.syntax().kind(), SyntaxKind::SOURCE_FILE);
}
use proptest::prelude::*;
proptest! {
#[test]
fn lossless_on_arbitrary_utf8(s in ".*") {
let p = parse(&s);
prop_assert_eq!(p.syntax().to_string(), s);
}
#[test]
fn parser_is_total_on_random_utf8(s in ".*") {
let _ = parse(&s);
}
}
}