use drop_bomb::DropBomb;
use event::Event;
use grammar::OPERATOR_FIRST;
use std::cell::Cell;
use token_set::TokenSet;
mod event;
mod generated;
mod grammar;
mod input;
mod lexed_str;
mod output;
mod shortcuts;
mod syntax_kind;
mod token_set;
pub use crate::{
lexed_str::LexedStr,
shortcuts::StrStep,
syntax_kind::SyntaxKind,
};
use crate::input::Input;
pub use crate::output::Output;
pub(crate) struct Marker {
pos: u32,
bomb: DropBomb,
}
impl Marker {
fn new(pos: u32) -> Marker {
Marker {
pos,
bomb: DropBomb::new("Marker must be either completed or abandoned"),
}
}
pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
self.bomb.defuse();
let idx = self.pos as usize;
match &mut p.events[idx] {
Event::Start { kind: slot, .. } => {
*slot = kind;
}
_ => unreachable!(),
}
p.push_event(Event::Finish);
CompletedMarker::new(self.pos, kind)
}
pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
self.bomb.defuse();
let idx = self.pos as usize;
if idx == p.events.len() - 1 {
match p.events.pop() {
Some(Event::Start {
kind: SyntaxKind::TOMBSTONE,
forward_parent: None,
}) => (),
_ => unreachable!(),
}
}
}
}
pub(crate) struct CompletedMarker {
pos: u32,
kind: SyntaxKind,
}
impl CompletedMarker {
fn new(pos: u32, kind: SyntaxKind) -> Self {
CompletedMarker { pos, kind }
}
pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
let new_pos = p.start();
let idx = self.pos as usize;
match &mut p.events[idx] {
Event::Start { forward_parent, .. } => {
*forward_parent = Some(new_pos.pos - self.pos);
}
_ => unreachable!(),
}
new_pos
}
pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
m.bomb.defuse();
let idx = m.pos as usize;
match &mut p.events[idx] {
Event::Start { forward_parent, .. } => {
*forward_parent = Some(self.pos - m.pos);
}
_ => unreachable!(),
}
self
}
pub(crate) fn kind(&self) -> SyntaxKind {
self.kind
}
}
pub fn parse(input: &Input) -> Output {
let mut p = Parser::new(input);
grammar::entry_point(&mut p);
let events = p.finish();
event::process(events)
}
pub(crate) struct Parser<'t> {
inp: &'t Input,
pos: usize,
events: Vec<Event>,
steps: Cell<u32>,
}
const PARSER_STEP_LIMIT: usize = 15_000_000;
enum TrivaBetween {
NotAllowed,
Allowed,
}
const OPERATOR_SIGN: TokenSet = TokenSet::new(&[SyntaxKind::PLUS, SyntaxKind::MINUS]);
const SPECIAL_OP_CHARS: TokenSet = TokenSet::new(&[
SyntaxKind::TILDE,
SyntaxKind::BANG,
SyntaxKind::AT,
SyntaxKind::POUND,
SyntaxKind::PERCENT,
SyntaxKind::CARET,
SyntaxKind::AMP,
SyntaxKind::PIPE,
SyntaxKind::BACKTICK,
SyntaxKind::QUESTION,
]);
impl<'t> Parser<'t> {
fn new(inp: &'t Input) -> Parser<'t> {
Parser {
inp,
pos: 0,
events: vec![],
steps: Cell::new(0),
}
}
pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
if !self.at(kind) {
return false;
}
let n_raw_tokens = match kind {
SyntaxKind::COLON_EQ
| SyntaxKind::NEQ
| SyntaxKind::NEQB
| SyntaxKind::LTEQ
| SyntaxKind::FAT_ARROW
| SyntaxKind::GTEQ => 2,
SyntaxKind::SIMILAR_TO => {
let m = self.start();
self.bump(SyntaxKind::SIMILAR_KW);
self.bump(SyntaxKind::TO_KW);
m.complete(self, SyntaxKind::SIMILAR_TO);
return true;
}
SyntaxKind::AT_TIME_ZONE => {
let m = self.start();
self.bump(SyntaxKind::AT_KW);
self.bump(SyntaxKind::TIME_KW);
self.bump(SyntaxKind::ZONE_KW);
m.complete(self, SyntaxKind::AT_TIME_ZONE);
return true;
}
SyntaxKind::AT_LOCAL => {
let m = self.start();
self.bump(SyntaxKind::AT_KW);
self.bump(SyntaxKind::LOCAL_KW);
m.complete(self, SyntaxKind::AT_LOCAL);
return true;
}
SyntaxKind::IS_NOT_NORMALIZED => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::NOT_KW);
if matches!(
self.current(),
SyntaxKind::NFC_KW
| SyntaxKind::NFD_KW
| SyntaxKind::NFKC_KW
| SyntaxKind::NFKD_KW
) {
let fm = self.start();
self.bump_any();
fm.complete(self, SyntaxKind::UNICODE_NORMAL_FORM);
}
self.bump(SyntaxKind::NORMALIZED_KW);
m.complete(self, SyntaxKind::IS_NOT_NORMALIZED);
return true;
}
SyntaxKind::IS_NORMALIZED => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
if matches!(
self.current(),
SyntaxKind::NFC_KW
| SyntaxKind::NFD_KW
| SyntaxKind::NFKC_KW
| SyntaxKind::NFKD_KW
) {
let fm = self.start();
self.bump_any();
fm.complete(self, SyntaxKind::UNICODE_NORMAL_FORM);
}
self.bump(SyntaxKind::NORMALIZED_KW);
m.complete(self, SyntaxKind::IS_NORMALIZED);
return true;
}
SyntaxKind::COLON_COLON => {
let m = self.start();
self.bump(SyntaxKind::COLON);
self.bump(SyntaxKind::COLON);
m.complete(self, SyntaxKind::COLON_COLON);
return true;
}
SyntaxKind::IS_JSON => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::JSON_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_JSON);
return true;
}
SyntaxKind::IS_NOT_JSON => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::JSON_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_NOT_JSON);
return true;
}
SyntaxKind::IS_NOT_JSON_OBJECT => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::JSON_KW);
self.bump(SyntaxKind::OBJECT_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_NOT_JSON_OBJECT);
return true;
}
SyntaxKind::IS_NOT_JSON_ARRAY => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::JSON_KW);
self.bump(SyntaxKind::ARRAY_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_NOT_JSON_ARRAY);
return true;
}
SyntaxKind::IS_NOT_JSON_VALUE => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::JSON_KW);
self.bump(SyntaxKind::VALUE_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_NOT_JSON_VALUE);
return true;
}
SyntaxKind::IS_NOT_JSON_SCALAR => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::JSON_KW);
self.bump(SyntaxKind::SCALAR_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_NOT_JSON_SCALAR);
return true;
}
SyntaxKind::IS_JSON_OBJECT => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::JSON_KW);
self.bump(SyntaxKind::OBJECT_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_JSON_OBJECT);
return true;
}
SyntaxKind::IS_JSON_ARRAY => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::JSON_KW);
self.bump(SyntaxKind::ARRAY_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_JSON_ARRAY);
return true;
}
SyntaxKind::IS_JSON_VALUE => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::JSON_KW);
self.bump(SyntaxKind::VALUE_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_JSON_VALUE);
return true;
}
SyntaxKind::IS_JSON_SCALAR => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::JSON_KW);
self.bump(SyntaxKind::SCALAR_KW);
grammar::opt_json_keys_unique_clause(self);
m.complete(self, SyntaxKind::IS_JSON_SCALAR);
return true;
}
SyntaxKind::NOT_SIMILAR_TO => {
let m = self.start();
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::SIMILAR_KW);
self.bump(SyntaxKind::TO_KW);
m.complete(self, SyntaxKind::NOT_SIMILAR_TO);
return true;
}
SyntaxKind::IS_NOT_DISTINCT_FROM => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::DISTINCT_KW);
self.bump(SyntaxKind::FROM_KW);
m.complete(self, SyntaxKind::IS_NOT_DISTINCT_FROM);
return true;
}
SyntaxKind::OPERATOR_CALL => {
let m = self.start();
self.bump(SyntaxKind::OPERATOR_KW);
self.bump(SyntaxKind::L_PAREN);
if self.eat(SyntaxKind::IDENT) {
self.expect(SyntaxKind::DOT);
}
if self.eat(SyntaxKind::IDENT) {
self.expect(SyntaxKind::DOT);
}
match grammar::current_operator(self) {
Some(kind) => {
self.bump(kind);
}
None => {
self.error("expected operator");
}
}
self.expect(SyntaxKind::R_PAREN);
m.complete(self, SyntaxKind::OPERATOR_CALL);
return true;
}
SyntaxKind::IS_DISTINCT_FROM => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::DISTINCT_KW);
self.bump(SyntaxKind::FROM_KW);
m.complete(self, SyntaxKind::IS_DISTINCT_FROM);
return true;
}
SyntaxKind::NOT_LIKE => {
let m = self.start();
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::LIKE_KW);
m.complete(self, SyntaxKind::NOT_LIKE);
return true;
}
SyntaxKind::NOT_ILIKE => {
let m = self.start();
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::ILIKE_KW);
m.complete(self, SyntaxKind::NOT_ILIKE);
return true;
}
SyntaxKind::NOT_IN => {
let m = self.start();
self.bump(SyntaxKind::NOT_KW);
self.bump(SyntaxKind::IN_KW);
m.complete(self, SyntaxKind::NOT_IN);
return true;
}
SyntaxKind::IS_NOT => {
let m = self.start();
self.bump(SyntaxKind::IS_KW);
self.bump(SyntaxKind::NOT_KW);
m.complete(self, SyntaxKind::IS_NOT);
return true;
}
SyntaxKind::CUSTOM_OP => {
let m = self.start();
for _ in 0..self.op_len() {
self.bump_any();
}
m.complete(self, SyntaxKind::CUSTOM_OP);
return true;
}
_ => 1,
};
self.do_bump(kind, n_raw_tokens);
true
}
fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, triva: TrivaBetween) -> bool {
let tokens_match =
self.inp.kind(self.pos + n) == k1 && self.inp.kind(self.pos + n + 1) == k2;
match triva {
TrivaBetween::Allowed => tokens_match,
TrivaBetween::NotAllowed => {
return tokens_match
&& self.inp.is_joint(self.pos + n)
&& self.next_not_joined_op(n + 1);
}
}
}
fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
self.inp.kind(self.pos + n) == k1
&& self.inp.kind(self.pos + n + 1) == k2
&& self.inp.kind(self.pos + n + 2) == k3
}
fn at_composite4(
&self,
n: usize,
k1: SyntaxKind,
k2: SyntaxKind,
k3: SyntaxKind,
k4: SyntaxKind,
) -> bool {
self.inp.kind(self.pos + n) == k1
&& self.inp.kind(self.pos + n + 1) == k2
&& self.inp.kind(self.pos + n + 2) == k3
&& self.inp.kind(self.pos + n + 3) == k4
}
fn next_not_joined_op(&self, n: usize) -> bool {
if !self.nth_at_ts(n + 1, OPERATOR_FIRST) {
return true;
}
if !self.inp.is_joint(self.pos + n) {
return true;
}
self.op_len() == n + 1
}
fn op_len(&self) -> usize {
if !self.at_ts(OPERATOR_FIRST) {
return 0;
}
let mut len = 1;
let mut has_special = self.at_ts(SPECIAL_OP_CHARS);
while self.inp.is_joint(self.pos + len - 1) && self.nth_at_ts(len, OPERATOR_FIRST) {
has_special |= self.nth_at_ts(len, SPECIAL_OP_CHARS);
len += 1;
}
if !has_special {
while len > 1 && self.nth_at_ts(len - 1, OPERATOR_SIGN) {
len -= 1;
}
}
len
}
pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
kinds.contains(self.current())
}
pub(crate) fn start(&mut self) -> Marker {
let pos = self.events.len() as u32;
self.push_event(Event::tombstone());
Marker::new(pos)
}
pub(crate) fn bump(&mut self, kind: SyntaxKind) {
assert!(self.eat(kind));
}
pub(crate) fn bump_any(&mut self) {
let kind = self.nth(0);
if kind == SyntaxKind::EOF {
return;
}
self.do_bump(kind, 1);
}
pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
if self.eat(kind) {
return true;
}
self.error(format!("expected {kind:?}"));
false
}
pub(crate) fn err_and_bump(&mut self, message: &str) {
self.err_recover(message, TokenSet::EMPTY);
}
pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
if self.at_ts(recovery) {
self.error(message);
return;
}
let m = self.start();
self.error(message);
self.bump_any();
m.complete(self, SyntaxKind::ERROR);
}
fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
self.pos += n_raw_tokens as usize;
self.steps.set(0);
self.push_event(Event::Token { kind, n_raw_tokens });
}
fn push_event(&mut self, event: Event) {
self.events.push(event);
}
fn finish(self) -> Vec<Event> {
self.events
}
pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
let msg = message.into();
self.push_event(Event::Error { msg });
}
#[must_use]
pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
self.nth_at(0, kind)
}
#[must_use]
pub(crate) fn nth_at_ts(&self, n: usize, kinds: TokenSet) -> bool {
kinds.contains(self.nth(n))
}
#[must_use]
pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
match kind {
SyntaxKind::FAT_ARROW => self.at_composite2(
n,
SyntaxKind::EQ,
SyntaxKind::R_ANGLE,
TrivaBetween::NotAllowed,
),
SyntaxKind::COLON_EQ => self.at_composite2(
n,
SyntaxKind::COLON,
SyntaxKind::EQ,
TrivaBetween::NotAllowed,
),
SyntaxKind::COLON_COLON => self.at_composite2(
n,
SyntaxKind::COLON,
SyntaxKind::COLON,
TrivaBetween::NotAllowed,
),
SyntaxKind::NEQ => self.at_composite2(
n,
SyntaxKind::BANG,
SyntaxKind::EQ,
TrivaBetween::NotAllowed,
),
SyntaxKind::NEQB => self.at_composite2(
n,
SyntaxKind::L_ANGLE,
SyntaxKind::R_ANGLE,
TrivaBetween::NotAllowed,
),
SyntaxKind::IS_NOT => self.at_composite2(
n,
SyntaxKind::IS_KW,
SyntaxKind::NOT_KW,
TrivaBetween::Allowed,
),
SyntaxKind::NOT_LIKE => self.at_composite2(
n,
SyntaxKind::NOT_KW,
SyntaxKind::LIKE_KW,
TrivaBetween::Allowed,
),
SyntaxKind::NOT_ILIKE => self.at_composite2(
n,
SyntaxKind::NOT_KW,
SyntaxKind::ILIKE_KW,
TrivaBetween::Allowed,
),
SyntaxKind::NOT_IN => self.at_composite2(
n,
SyntaxKind::NOT_KW,
SyntaxKind::IN_KW,
TrivaBetween::Allowed,
),
SyntaxKind::AT_TIME_ZONE => self.at_composite3(
n,
SyntaxKind::AT_KW,
SyntaxKind::TIME_KW,
SyntaxKind::ZONE_KW,
),
SyntaxKind::AT_LOCAL => self.at_composite2(
n,
SyntaxKind::AT_KW,
SyntaxKind::LOCAL_KW,
TrivaBetween::Allowed,
),
SyntaxKind::IS_DISTINCT_FROM => self.at_composite3(
n,
SyntaxKind::IS_KW,
SyntaxKind::DISTINCT_KW,
SyntaxKind::FROM_KW,
),
SyntaxKind::IS_NOT_DISTINCT_FROM => self.at_composite4(
n,
SyntaxKind::IS_KW,
SyntaxKind::NOT_KW,
SyntaxKind::DISTINCT_KW,
SyntaxKind::FROM_KW,
),
SyntaxKind::IS_NORMALIZED => {
if self.at(SyntaxKind::IS_KW) {
if matches!(
self.nth(1),
SyntaxKind::NFC_KW
| SyntaxKind::NFD_KW
| SyntaxKind::NFKC_KW
| SyntaxKind::NFKD_KW
) {
if self.nth_at(2, SyntaxKind::NORMALIZED_KW) {
return true;
}
} else {
if self.nth_at(1, SyntaxKind::NORMALIZED_KW) {
return true;
}
}
}
return false;
}
SyntaxKind::IS_NOT_NORMALIZED => {
if self.at(SyntaxKind::IS_KW) && self.nth_at(1, SyntaxKind::NOT_KW) {
if matches!(
self.nth(2),
SyntaxKind::NFC_KW
| SyntaxKind::NFD_KW
| SyntaxKind::NFKC_KW
| SyntaxKind::NFKD_KW
) {
if self.nth_at(3, SyntaxKind::NORMALIZED_KW) {
return true;
}
} else if self.nth_at(2, SyntaxKind::NORMALIZED_KW) {
return true;
}
}
return false;
}
SyntaxKind::NOT_SIMILAR_TO => self.at_composite3(
n,
SyntaxKind::NOT_KW,
SyntaxKind::SIMILAR_KW,
SyntaxKind::TO_KW,
),
SyntaxKind::SIMILAR_TO => self.at_composite2(
n,
SyntaxKind::SIMILAR_KW,
SyntaxKind::TO_KW,
TrivaBetween::Allowed,
),
SyntaxKind::OPERATOR_CALL => self.at_composite2(
n,
SyntaxKind::OPERATOR_KW,
SyntaxKind::L_PAREN,
TrivaBetween::Allowed,
),
SyntaxKind::IS_JSON => self.at_composite2(
n,
SyntaxKind::IS_KW,
SyntaxKind::JSON_KW,
TrivaBetween::Allowed,
),
SyntaxKind::IS_NOT_JSON => self.at_composite3(
n,
SyntaxKind::IS_KW,
SyntaxKind::NOT_KW,
SyntaxKind::JSON_KW,
),
SyntaxKind::IS_NOT_JSON_OBJECT => self.at_composite4(
n,
SyntaxKind::IS_KW,
SyntaxKind::NOT_KW,
SyntaxKind::JSON_KW,
SyntaxKind::OBJECT_KW,
),
SyntaxKind::IS_NOT_JSON_ARRAY => self.at_composite4(
n,
SyntaxKind::IS_KW,
SyntaxKind::NOT_KW,
SyntaxKind::JSON_KW,
SyntaxKind::ARRAY_KW,
),
SyntaxKind::IS_NOT_JSON_VALUE => self.at_composite4(
n,
SyntaxKind::IS_KW,
SyntaxKind::NOT_KW,
SyntaxKind::JSON_KW,
SyntaxKind::VALUE_KW,
),
SyntaxKind::IS_NOT_JSON_SCALAR => self.at_composite4(
n,
SyntaxKind::IS_KW,
SyntaxKind::NOT_KW,
SyntaxKind::JSON_KW,
SyntaxKind::SCALAR_KW,
),
SyntaxKind::IS_JSON_OBJECT => self.at_composite3(
n,
SyntaxKind::IS_KW,
SyntaxKind::JSON_KW,
SyntaxKind::OBJECT_KW,
),
SyntaxKind::IS_JSON_ARRAY => self.at_composite3(
n,
SyntaxKind::IS_KW,
SyntaxKind::JSON_KW,
SyntaxKind::ARRAY_KW,
),
SyntaxKind::IS_JSON_VALUE => self.at_composite3(
n,
SyntaxKind::IS_KW,
SyntaxKind::JSON_KW,
SyntaxKind::VALUE_KW,
),
SyntaxKind::IS_JSON_SCALAR => self.at_composite3(
n,
SyntaxKind::IS_KW,
SyntaxKind::JSON_KW,
SyntaxKind::SCALAR_KW,
),
SyntaxKind::LTEQ => self.at_composite2(
n,
SyntaxKind::L_ANGLE,
SyntaxKind::EQ,
TrivaBetween::NotAllowed,
),
SyntaxKind::GTEQ => self.at_composite2(
n,
SyntaxKind::R_ANGLE,
SyntaxKind::EQ,
TrivaBetween::NotAllowed,
),
SyntaxKind::CUSTOM_OP => {
if self.at_ts(OPERATOR_FIRST) {
return true;
}
return false;
}
_ => self.inp.kind(self.pos + n) == kind,
}
}
#[must_use]
pub(crate) fn current(&self) -> SyntaxKind {
self.nth(0)
}
#[must_use]
fn nth(&self, n: usize) -> SyntaxKind {
let steps = self.steps.get();
assert!(
(steps as usize) < PARSER_STEP_LIMIT,
"the parser seems stuck"
);
self.steps.set(steps + 1);
self.inp.kind(self.pos + n)
}
}