mod generated;
mod language;
mod syntax_tree;
mod token_text;
pub(crate) mod grammar;
use crate::cst::Document;
use crate::cst::SelectionSet;
use crate::cst::Type;
use crate::lexer::Lexer;
use crate::Error;
use crate::LimitTracker;
use crate::Token;
use crate::TokenKind;
pub use generated::syntax_kind::SyntaxKind;
pub use language::SyntaxElement;
pub use language::SyntaxNode;
pub use language::SyntaxNodeChildren;
pub use language::SyntaxNodePtr;
pub use language::SyntaxToken;
use std::cell::RefCell;
use std::ops::ControlFlow;
use std::rc::Rc;
pub use syntax_tree::SyntaxTree;
pub(crate) use syntax_tree::SyntaxTreeBuilder;
pub(crate) use token_text::TokenText;
#[derive(Debug)]
pub struct Parser<'input> {
lexer: Lexer<'input>,
current_token: Option<Token<'input>>,
builder: Rc<RefCell<SyntaxTreeBuilder>>,
pending: Vec<PendingToken<'input>>,
errors: Vec<crate::Error>,
recursion_limit: LimitTracker,
accept_errors: bool,
}
#[derive(Debug)]
enum PendingToken<'input> {
Ignored(Token<'input>),
Error(String),
}
const DEFAULT_RECURSION_LIMIT: usize = 500;
impl<'input> Parser<'input> {
pub fn new(input: &'input str) -> Self {
let lexer = Lexer::new(input);
Self {
lexer,
current_token: None,
builder: Rc::new(RefCell::new(SyntaxTreeBuilder::new())),
pending: vec![],
errors: Vec::new(),
recursion_limit: LimitTracker::new(DEFAULT_RECURSION_LIMIT),
accept_errors: true,
}
}
pub fn recursion_limit(mut self, recursion_limit: usize) -> Self {
self.recursion_limit = LimitTracker::new(recursion_limit);
self
}
pub fn token_limit(mut self, token_limit: usize) -> Self {
self.lexer = self.lexer.with_limit(token_limit);
self
}
pub fn parse(mut self) -> SyntaxTree<Document> {
grammar::document::document(&mut self);
let builder = Rc::try_unwrap(self.builder)
.expect("More than one reference to builder left")
.into_inner();
let builder =
builder.finish_document(self.errors, self.recursion_limit, self.lexer.limit_tracker);
match builder {
syntax_tree::SyntaxTreeWrapper::Document(tree) => tree,
syntax_tree::SyntaxTreeWrapper::Type(_)
| syntax_tree::SyntaxTreeWrapper::FieldSet(_) => {
unreachable!("parse constructor can only construct a document")
}
}
}
pub fn parse_selection_set(mut self) -> SyntaxTree<SelectionSet> {
grammar::selection::field_set(&mut self);
let builder = Rc::try_unwrap(self.builder)
.expect("More than one reference to builder left")
.into_inner();
let builder = builder.finish_selection_set(
self.errors,
self.recursion_limit,
self.lexer.limit_tracker,
);
match builder {
syntax_tree::SyntaxTreeWrapper::FieldSet(tree) => tree,
syntax_tree::SyntaxTreeWrapper::Document(_)
| syntax_tree::SyntaxTreeWrapper::Type(_) => {
unreachable!("parse_selection_set constructor can only construct a selection set")
}
}
}
pub fn parse_type(mut self) -> SyntaxTree<Type> {
grammar::ty::ty(&mut self);
let builder = Rc::try_unwrap(self.builder)
.expect("More than one reference to builder left")
.into_inner();
let builder =
builder.finish_type(self.errors, self.recursion_limit, self.lexer.limit_tracker);
match builder {
syntax_tree::SyntaxTreeWrapper::Type(tree) => tree,
syntax_tree::SyntaxTreeWrapper::FieldSet(_)
| syntax_tree::SyntaxTreeWrapper::Document(_) => {
unreachable!("parse_type constructor can only construct a type")
}
}
}
pub(crate) fn at(&mut self, token: TokenKind) -> bool {
if let Some(t) = self.peek() {
if t == token {
return true;
}
return false;
}
false
}
pub(crate) fn bump(&mut self, kind: SyntaxKind) {
self.eat(kind);
self.skip_ignored();
}
pub(crate) fn skip_ignored(&mut self) {
while let Some(TokenKind::Comment | TokenKind::Whitespace | TokenKind::Comma) = self.peek()
{
let token = self.pop();
self.pending.push(PendingToken::Ignored(token));
}
}
pub(crate) fn push_ignored(&mut self) {
let pending = std::mem::take(&mut self.pending);
for item in pending {
match item {
PendingToken::Ignored(token) => {
let syntax_kind = match token.kind {
TokenKind::Comment => SyntaxKind::COMMENT,
TokenKind::Whitespace => SyntaxKind::WHITESPACE,
TokenKind::Comma => SyntaxKind::COMMA,
_ => unreachable!(),
};
self.push_token(syntax_kind, token);
}
PendingToken::Error(data) => {
self.builder.borrow_mut().token(SyntaxKind::ERROR, &data);
}
}
}
}
pub(crate) fn current(&mut self) -> Option<&Token<'input>> {
self.peek_token()
}
fn eat(&mut self, kind: SyntaxKind) {
self.push_ignored();
if self.current().is_none() {
return;
}
let token = self.pop();
self.push_token(kind, token);
}
pub(crate) fn limit_err<S: Into<String>>(&mut self, message: S) {
let current = if let Some(current) = self.current() {
current
} else {
return;
};
let err = Error::limit(message, current.index());
self.push_err(err);
self.accept_errors = false;
}
pub(crate) fn err_at_token(&mut self, current: &Token<'_>, message: &str) {
let err = if current.kind == TokenKind::Eof {
Error::eof(message, current.index())
} else {
Error::with_loc(message, current.data().to_string(), current.index())
};
self.push_err(err);
}
pub(crate) fn err(&mut self, message: &str) {
let current = if let Some(current) = self.current() {
current
} else {
return;
};
let err = if current.kind == TokenKind::Eof {
Error::eof(message, current.index())
} else {
Error::with_loc(message, current.data().to_string(), current.index())
};
self.push_err(err);
}
pub(crate) fn err_and_pop(&mut self, message: &str) {
self.push_ignored();
if self.current().is_none() {
return;
}
let current = self.pop();
let err = if current.kind == TokenKind::Eof {
Error::eof(message, current.index())
} else {
Error::with_loc(message, current.data().to_string(), current.index())
};
self.push_token(SyntaxKind::ERROR, current);
self.push_err(err);
self.skip_ignored();
}
pub(crate) fn expect(&mut self, token: TokenKind, kind: SyntaxKind) {
let Some(current) = self.current() else {
return;
};
let is_eof = current.kind == TokenKind::Eof;
let data = current.data();
let index = current.index();
if self.at(token) {
self.bump(kind);
return;
}
let err = if is_eof {
let message = format!("expected {kind:?}, got EOF");
Error::eof(message, index)
} else {
let message = format!("expected {kind:?}, got {data}");
Error::with_loc(message, data.to_string(), index)
};
self.push_err(err);
}
pub(crate) fn push_err(&mut self, err: crate::error::Error) {
if self.accept_errors {
self.errors.push(err);
}
}
fn next_token(&mut self) -> Option<Token<'input>> {
for res in &mut self.lexer {
match res {
Err(err) => {
if err.is_limit() {
self.accept_errors = false;
}
let data = err.data();
if !data.is_empty() {
self.pending.push(PendingToken::Error(data.to_owned()));
}
self.errors.push(err);
}
Ok(token) => {
return Some(token);
}
}
}
None
}
pub(crate) fn pop(&mut self) -> Token<'input> {
if let Some(token) = self.current_token.take() {
return token;
}
self.next_token()
.expect("Could not pop a token from the lexer")
}
pub(crate) fn push_token(&mut self, kind: SyntaxKind, token: Token) {
self.builder.borrow_mut().token(kind, token.data())
}
pub(crate) fn start_node(&mut self, kind: SyntaxKind) -> NodeGuard {
self.push_ignored();
self.builder.borrow_mut().start_node(kind);
let guard = NodeGuard::new(self.builder.clone());
self.skip_ignored();
guard
}
pub(crate) fn checkpoint_node(&mut self) -> Checkpoint {
self.push_ignored();
let checkpoint = self.builder.borrow().checkpoint();
Checkpoint::new(self.builder.clone(), checkpoint)
}
pub(crate) fn peek(&mut self) -> Option<TokenKind> {
self.peek_token().map(|token| token.kind())
}
pub(crate) fn peek_while(
&mut self,
mut run: impl FnMut(&mut Parser, TokenKind) -> ControlFlow<()>,
) {
while let Some(kind) = self.peek() {
let before = self.current_token.clone();
match run(self, kind) {
ControlFlow::Break(()) => break,
ControlFlow::Continue(()) => {
debug_assert!(
before != self.current_token,
"peek_while() iteration must advance parsing"
);
}
}
}
}
pub(crate) fn peek_while_kind(&mut self, expect: TokenKind, mut run: impl FnMut(&mut Parser)) {
while let Some(kind) = self.peek() {
if kind != expect {
break;
}
let before = self.current_token.clone();
run(self);
debug_assert!(
before != self.current_token,
"peek_while_kind() iteration must advance parsing"
);
}
}
pub(crate) fn parse_separated_list(
&mut self,
separator: TokenKind,
separator_syntax: SyntaxKind,
mut run: impl FnMut(&mut Parser),
) {
if matches!(self.peek(), Some(kind) if kind == separator) {
self.bump(separator_syntax);
}
run(self);
self.peek_while_kind(separator, |p| {
p.bump(separator_syntax);
run(p);
});
}
pub(crate) fn peek_token(&mut self) -> Option<&Token<'input>> {
if self.current_token.is_none() {
self.current_token = self.next_token();
}
self.current_token.as_ref()
}
pub(crate) fn peek_token_n(&self, n: usize) -> Option<Token<'input>> {
self.peek_n_inner(n)
}
pub(crate) fn peek_n(&self, n: usize) -> Option<TokenKind> {
self.peek_n_inner(n).map(|token| token.kind())
}
fn peek_n_inner(&self, n: usize) -> Option<Token<'input>> {
self.current_token
.iter()
.cloned()
.map(Result::Ok)
.chain(self.lexer.clone())
.filter_map(Result::ok)
.filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
.nth(n - 1)
}
pub(crate) fn peek_data(&mut self) -> Option<&'input str> {
self.peek_token().map(|token| token.data())
}
pub(crate) fn peek_data_n(&self, n: usize) -> Option<&'input str> {
self.peek_token_n(n).map(|token| token.data())
}
}
#[must_use]
pub(crate) struct NodeGuard {
builder: Rc<RefCell<SyntaxTreeBuilder>>,
}
impl NodeGuard {
fn new(builder: Rc<RefCell<SyntaxTreeBuilder>>) -> Self {
Self { builder }
}
pub(crate) fn finish_node(self) {
drop(self);
}
}
impl Drop for NodeGuard {
fn drop(&mut self) {
self.builder.borrow_mut().finish_node();
}
}
pub(crate) struct Checkpoint {
builder: Rc<RefCell<SyntaxTreeBuilder>>,
checkpoint: rowan::Checkpoint,
}
impl Checkpoint {
fn new(builder: Rc<RefCell<SyntaxTreeBuilder>>, checkpoint: rowan::Checkpoint) -> Self {
Self {
builder,
checkpoint,
}
}
pub(crate) fn wrap_node(self, kind: SyntaxKind) -> NodeGuard {
self.builder.borrow_mut().wrap_node(self.checkpoint, kind);
NodeGuard::new(self.builder)
}
}
#[cfg(test)]
mod tests {
use super::DEFAULT_RECURSION_LIMIT;
use crate::cst;
use crate::Error;
use crate::Parser;
use crate::SyntaxTree;
use expect_test::expect;
#[test]
fn limited_mid_node() {
let source = r#"
type Query {
field(arg1: Int, arg2: Int, arg3: Int, arg4: Int, arg5: Int, arg6: Int): Int
}
"#;
let parser = Parser::new(source)
.token_limit(18);
let tree = parser.parse();
let mut errors = tree.errors();
assert_eq!(
errors.next(),
Some(&Error::limit("token limit reached, aborting lexing", 65))
);
assert_eq!(errors.next(), None);
}
#[test]
fn multiple_limits() {
let source = r#"
query {
a {
a {
a {
a
}
}
}
}
"#;
let parser = Parser::new(source).recursion_limit(10).token_limit(22);
let cst = parser.parse();
let errors = cst.errors().collect::<Vec<_>>();
assert_eq!(
errors,
&[&Error::limit("token limit reached, aborting lexing", 170),]
);
let parser = Parser::new(source).recursion_limit(3).token_limit(200);
let cst = parser.parse();
let errors = cst.errors().collect::<Vec<_>>();
assert_eq!(
errors,
&[&Error::limit("parser recursion limit reached", 121),]
);
}
#[test]
fn syntax_errors_and_limits() {
let source = r#"
type Query {
field(arg1: Int, missing_arg): Int
# limit reached here
field2: !String
} and then some garbage
"#;
let parser = Parser::new(source).token_limit(22);
let cst = parser.parse();
let mut errors = cst.errors();
assert_eq!(
errors.next(),
Some(&Error::with_loc("expected a Name", ")".to_string(), 70))
);
assert_eq!(
errors.next(),
Some(&Error::limit("token limit reached, aborting lexing", 113))
);
assert_eq!(errors.next(), None);
let tree = expect![[r##"
DOCUMENT@0..113
WHITESPACE@0..13 "\n "
OBJECT_TYPE_DEFINITION@13..76
type_KW@13..17 "type"
WHITESPACE@17..18 " "
NAME@18..23
IDENT@18..23 "Query"
WHITESPACE@23..24 " "
FIELDS_DEFINITION@24..76
L_CURLY@24..25 "{"
WHITESPACE@25..42 "\n "
FIELD_DEFINITION@42..76
NAME@42..47
IDENT@42..47 "field"
ARGUMENTS_DEFINITION@47..71
L_PAREN@47..48 "("
INPUT_VALUE_DEFINITION@48..57
NAME@48..52
IDENT@48..52 "arg1"
COLON@52..53 ":"
WHITESPACE@53..54 " "
NAMED_TYPE@54..57
NAME@54..57
IDENT@54..57 "Int"
COMMA@57..58 ","
WHITESPACE@58..59 " "
INPUT_VALUE_DEFINITION@59..70
NAME@59..70
IDENT@59..70 "missing_arg"
R_PAREN@70..71 ")"
COLON@71..72 ":"
WHITESPACE@72..73 " "
NAMED_TYPE@73..76
NAME@73..76
IDENT@73..76 "Int"
WHITESPACE@76..93 "\n "
COMMENT@93..113 "# limit reached here"
"##]];
tree.assert_eq(&format!("{:#?}", cst.document().syntax));
}
#[test]
fn tree_with_syntax_errors() {
use crate::cst::Definition;
let source = r#"
garbage type Query implements X {
field(arg: Int): Int
} garbage :,, (|) interface X {}
"#;
let cst = Parser::new(source).parse();
let mut definitions = cst.document().definitions();
let query_def = definitions.next().unwrap();
let interface_def = definitions.next().unwrap();
assert_eq!(definitions.next(), None);
assert!(matches!(query_def, Definition::ObjectTypeDefinition(_)));
assert!(matches!(
interface_def,
Definition::InterfaceTypeDefinition(_)
));
}
#[test]
fn token_limit() {
let cst = Parser::new("type Query { a a a a a a a a a }")
.token_limit(100)
.parse();
assert_eq!(cst.token_limit().high, 26);
}
#[test]
#[allow(clippy::single_char_add_str)]
fn recursion_limit() {
const SMASH_THE_STACK_FACTOR: usize = 50;
wide(2, |ast| assert_eq!(ast.errors, []));
wide(DEFAULT_RECURSION_LIMIT - 2, |ast| {
assert_eq!(ast.errors.len(), 0, "{:?}", ast.errors[0])
});
wide(DEFAULT_RECURSION_LIMIT * SMASH_THE_STACK_FACTOR, |_ast| {
});
deep(2, |ast| assert_eq!(ast.errors, []));
deep(DEFAULT_RECURSION_LIMIT - 2, |ast| {
assert_eq!(ast.errors.len(), 0, "{:?}", ast.errors[0])
});
deep(DEFAULT_RECURSION_LIMIT * SMASH_THE_STACK_FACTOR, |ast| {
assert_eq!(ast.errors.len(), 1);
assert!(ast.errors[0].message.contains("recursion limit reached"));
});
fn deep(count: usize, each: impl Fn(SyntaxTree)) {
let check = |input: String| each(Parser::new(&input).parse());
let mut doc = String::new();
doc.push_str("type O { field: ");
doc.push_str(&"[".repeat(count));
doc.push_str("Int");
doc.push_str(&"]".repeat(count));
doc.push_str(" }");
check(doc);
let mut doc = String::new();
doc.push_str("type O { field(arg: T = ");
doc.push_str(&"[".repeat(count));
doc.push_str("0");
doc.push_str(&"]".repeat(count));
doc.push_str("): Int }");
check(doc);
let mut doc = String::new();
doc.push_str("type O { field(arg: T = ");
doc.push_str(&"{f: ".repeat(count));
doc.push_str("0");
doc.push_str(&"}".repeat(count));
doc.push_str("): Int }");
check(doc);
let mut doc = String::new();
doc.push_str("query { ");
doc.push_str(&"f { ".repeat(count));
doc.push_str("f ");
doc.push_str(&"}".repeat(count));
doc.push_str("}");
check(doc);
}
fn wide(count: usize, each: impl Fn(SyntaxTree)) {
let check = |input: String| each(Parser::new(&input).parse());
let mut doc = String::new();
doc.push_str(&"directive @d on FIELD ".repeat(count));
check(doc);
let mut doc = String::new();
doc.push_str("scalar Url");
doc.push_str(&" @d".repeat(count));
check(doc);
let mut doc = String::new();
doc.push_str("schema {");
doc.push_str(&" query: Q".repeat(count));
doc.push_str(" }");
check(doc);
let mut doc = String::new();
doc.push_str("type O implements");
doc.push_str(&" & I".repeat(count));
check(doc);
let mut doc = String::new();
doc.push_str("type O {");
doc.push_str(&" f: T".repeat(count));
doc.push_str("}");
check(doc);
let mut doc = String::new();
doc.push_str("enum E {");
doc.push_str(&" V".repeat(count));
doc.push_str("}");
check(doc);
let mut doc = String::new();
doc.push_str("union U = ");
doc.push_str(&" | T".repeat(count));
check(doc);
let mut doc = String::new();
doc.push_str("input In {");
doc.push_str(&" f: T".repeat(count));
doc.push_str("}");
check(doc);
let mut doc = String::new();
doc.push_str("type O { field(arg: T = {");
doc.push_str(&" f: 0".repeat(count));
doc.push_str(" }): Int }");
check(doc);
let mut doc = String::new();
doc.push_str("type O { field(arg: T = [");
doc.push_str(&" 0,".repeat(count));
doc.push_str(" ]): Int }");
check(doc);
let mut doc = String::new();
doc.push_str("type O { field(");
doc.push_str(&"a: T ".repeat(count));
doc.push_str("): Int }");
check(doc);
let mut doc = String::new();
doc.push_str("query {");
doc.push_str(&" f".repeat(count));
doc.push_str(" }");
check(doc);
let mut doc = String::new();
doc.push_str("query { f(");
doc.push_str(&" a: 0".repeat(count));
doc.push_str(") }");
check(doc);
let mut doc = String::new();
doc.push_str("query Q(");
doc.push_str(&" $v: Int".repeat(count));
doc.push_str(" ) { f }");
check(doc);
}
}
#[test]
fn parse_field_set() {
let source = r#"{ a }"#;
let parser = Parser::new(source);
let cst: SyntaxTree<cst::SelectionSet> = parser.parse_selection_set();
let errors = cst.errors().collect::<Vec<_>>();
assert_eq!(errors.len(), 0);
let sel_set: cst::SelectionSet = cst.field_set();
let _ = sel_set.selections().map(|sel| {
if let cst::Selection::Field(f) = sel {
assert_eq!(f.name().unwrap().text().as_ref(), "a")
} else {
panic!("no field a in field set selection")
}
});
let source = r#"a { a }"#;
let parser = Parser::new(source);
let cst: SyntaxTree<cst::SelectionSet> = parser.parse_selection_set();
let errors = cst.errors().collect::<Vec<_>>();
assert_eq!(errors.len(), 0);
let sel_set: cst::SelectionSet = cst.field_set();
let _ = sel_set.selections().map(|sel| {
if let cst::Selection::Field(f) = sel {
assert_eq!(f.name().unwrap().text().as_ref(), "a")
} else {
panic!("no field a in field set selection")
}
});
}
#[test]
fn no_infinite_loop() {
let source = r#"{ ..."#;
let parser = Parser::new(source).token_limit(3);
let _cst = parser.parse();
}
fn check_char_boundaries(node: &crate::SyntaxNode, source: &str) {
let range = node.text_range();
let start: usize = range.start().into();
let end: usize = range.end().into();
assert!(
source.is_char_boundary(start),
"Node {:?} start {} is not a char boundary",
node.kind(),
start
);
assert!(
source.is_char_boundary(end),
"Node {:?} end {} is not a char boundary",
node.kind(),
end
);
for child in node.children_with_tokens() {
match child {
rowan::NodeOrToken::Node(n) => check_char_boundaries(&n, source),
rowan::NodeOrToken::Token(t) => {
let range = t.text_range();
let start: usize = range.start().into();
let end: usize = range.end().into();
assert!(
source.is_char_boundary(start),
"Token {:?} start {} is not a char boundary",
t.kind(),
start
);
assert!(
source.is_char_boundary(end),
"Token {:?} end {} is not a char boundary",
t.kind(),
end
);
}
}
}
}
#[test]
fn lexer_error_cjk_preserves_byte_positions() {
use crate::cst::CstNode;
let source = "type Query { field: 中文类型 }";
let cst = Parser::new(source).parse();
assert!(!cst.errors().collect::<Vec<_>>().is_empty());
check_char_boundaries(cst.document().syntax(), source);
}
#[test]
fn lexer_error_mixed_preserves_byte_positions() {
use crate::cst::CstNode;
let source = "type Query { f1: @#$ f2: 日本語 f3: !!! }";
let cst = Parser::new(source).parse();
assert!(!cst.errors().collect::<Vec<_>>().is_empty());
check_char_boundaries(cst.document().syntax(), source);
}
#[test]
fn lexer_error_emoji_preserves_byte_positions() {
use crate::cst::CstNode;
let source = "type Query { field: 🚀🌍 }";
let cst = Parser::new(source).parse();
assert!(!cst.errors().collect::<Vec<_>>().is_empty());
check_char_boundaries(cst.document().syntax(), source);
}
}