use crate::syntax_kind::SyntaxKind;
use rowan::GreenNode;
use rowan::GreenNodeBuilder;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SyntaxError {
pub message: String,
pub offset: usize,
}
pub fn parse(tokens: Vec<(SyntaxKind, &str)>) -> (GreenNode, Vec<SyntaxError>) {
let mut parser = Parser::new(tokens);
parser.parse_root();
(parser.builder.finish(), parser.errors)
}
struct Parser<'a> {
tokens: Vec<(SyntaxKind, &'a str)>,
pos: usize,
builder: GreenNodeBuilder<'static>,
errors: Vec<SyntaxError>,
offset: usize,
}
impl<'a> Parser<'a> {
fn new(tokens: Vec<(SyntaxKind, &'a str)>) -> Self {
Self {
tokens,
pos: 0,
builder: GreenNodeBuilder::new(),
errors: Vec::new(),
offset: 0,
}
}
fn current(&self) -> Option<SyntaxKind> {
self.tokens.get(self.pos).map(|(k, _)| *k)
}
fn current_text(&self) -> &'a str {
self.tokens.get(self.pos).map(|(_, t)| *t).unwrap_or("")
}
fn at(&self, kind: SyntaxKind) -> bool {
self.current() == Some(kind)
}
fn at_end(&self) -> bool {
self.pos >= self.tokens.len()
}
fn bump(&mut self) {
if let Some(&(kind, text)) = self.tokens.get(self.pos) {
self.builder.token(kind.into(), text);
self.offset += text.len();
self.pos += 1;
}
}
fn eat_trivia(&mut self) {
while let Some(kind) = self.current() {
if kind == SyntaxKind::WHITESPACE || kind == SyntaxKind::NEWLINE {
self.bump();
} else {
break;
}
}
}
fn peek_non_trivia(&self) -> Option<SyntaxKind> {
let mut i = self.pos;
while i < self.tokens.len() {
let kind = self.tokens[i].0;
if kind != SyntaxKind::WHITESPACE && kind != SyntaxKind::NEWLINE {
return Some(kind);
}
i += 1;
}
None
}
fn error(&mut self, message: impl Into<String>) {
self.errors.push(SyntaxError {
message: message.into(),
offset: self.offset,
});
}
fn parse_root(&mut self) {
self.builder.start_node(SyntaxKind::ROOT.into());
self.parse_items(false);
self.builder.finish_node();
}
fn parse_items(&mut self, in_block: bool) {
loop {
match self.current() {
None => break,
Some(SyntaxKind::R_BRACE) if in_block => break,
Some(SyntaxKind::R_BRACE) => {
self.error("unexpected '}'");
self.builder.start_node(SyntaxKind::ERROR.into());
self.bump();
self.builder.finish_node();
}
Some(SyntaxKind::WHITESPACE) => {
if self.is_blank_line_start() {
self.parse_blank_line();
} else {
self.bump(); }
}
Some(SyntaxKind::NEWLINE) => {
self.bump();
}
Some(SyntaxKind::COMMENT) => {
self.bump();
}
Some(kind) if is_directive_start(kind) => {
self.parse_directive();
}
Some(SyntaxKind::ERROR) => {
self.error("unexpected token");
self.bump();
}
Some(_) => {
self.error(format!("unexpected token: {:?}", self.current().unwrap()));
self.builder.start_node(SyntaxKind::ERROR.into());
self.bump();
self.builder.finish_node();
}
}
}
}
fn is_blank_line_start(&self) -> bool {
if !self.at(SyntaxKind::WHITESPACE) {
return false;
}
let next = self.tokens.get(self.pos + 1).map(|(k, _)| *k);
if next != Some(SyntaxKind::NEWLINE) {
return false;
}
if self.pos == 0 {
return true;
}
let prev = self.tokens[self.pos - 1].0;
prev == SyntaxKind::NEWLINE
}
fn parse_blank_line(&mut self) {
self.builder.start_node(SyntaxKind::BLANK_LINE.into());
self.bump(); self.bump(); self.builder.finish_node();
}
fn parse_directive(&mut self) {
self.builder.start_node(SyntaxKind::DIRECTIVE.into());
let name = self.current_text().to_string();
self.bump();
self.parse_arguments();
let is_lua_block = name.ends_with("_by_lua_block");
match self.peek_non_trivia() {
Some(SyntaxKind::SEMICOLON) => {
self.eat_trivia();
self.bump(); self.eat_trailing_comment();
}
Some(SyntaxKind::L_BRACE) => {
self.eat_trivia();
if is_lua_block {
self.parse_raw_block();
} else {
self.parse_block();
}
}
_ => {
self.error("expected ';' or '{'");
}
}
self.builder.finish_node(); }
fn parse_arguments(&mut self) {
loop {
let mut lookahead = self.pos;
while lookahead < self.tokens.len() {
let kind = self.tokens[lookahead].0;
if kind == SyntaxKind::WHITESPACE || kind == SyntaxKind::NEWLINE {
lookahead += 1;
} else {
break;
}
}
if lookahead >= self.tokens.len() {
break;
}
let next_kind = self.tokens[lookahead].0;
if is_argument_kind(next_kind) {
self.eat_trivia();
self.bump(); } else {
break;
}
}
}
fn eat_trailing_comment(&mut self) {
if self.at(SyntaxKind::WHITESPACE) {
let next = self.tokens.get(self.pos + 1).map(|(k, _)| *k);
if next == Some(SyntaxKind::COMMENT) {
self.bump(); self.bump(); }
}
}
fn parse_block(&mut self) {
self.builder.start_node(SyntaxKind::BLOCK.into());
self.bump();
self.parse_items(true);
if self.at(SyntaxKind::R_BRACE) {
self.bump(); } else {
self.error("expected '}'");
}
self.builder.finish_node();
}
fn parse_raw_block(&mut self) {
self.builder.start_node(SyntaxKind::BLOCK.into());
self.bump();
let mut depth: u32 = 1;
while !self.at_end() && depth > 0 {
match self.current() {
Some(SyntaxKind::L_BRACE) => {
depth += 1;
self.bump();
}
Some(SyntaxKind::R_BRACE) => {
depth -= 1;
if depth == 0 {
self.bump(); } else {
self.bump(); }
}
Some(_) => {
self.bump();
}
None => break,
}
}
if depth > 0 {
self.error("expected '}' for lua block");
}
self.builder.finish_node();
}
}
fn is_argument_kind(kind: SyntaxKind) -> bool {
matches!(
kind,
SyntaxKind::ARGUMENT
| SyntaxKind::IDENT
| SyntaxKind::VARIABLE
| SyntaxKind::DOUBLE_QUOTED_STRING
| SyntaxKind::SINGLE_QUOTED_STRING
)
}
fn is_directive_start(kind: SyntaxKind) -> bool {
matches!(
kind,
SyntaxKind::IDENT
| SyntaxKind::ARGUMENT
| SyntaxKind::VARIABLE
| SyntaxKind::DOUBLE_QUOTED_STRING
| SyntaxKind::SINGLE_QUOTED_STRING
)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lexer_rowan::tokenize;
use crate::syntax_kind::SyntaxNode;
fn parse_source(source: &str) -> (SyntaxNode, Vec<SyntaxError>) {
let tokens = tokenize(source);
let (green, errors) = parse(tokens);
(SyntaxNode::new_root(green), errors)
}
fn assert_lossless(source: &str) {
let (root, _) = parse_source(source);
assert_eq!(
root.text().to_string(),
source,
"lossless round-trip failed"
);
}
fn assert_no_errors(source: &str) -> SyntaxNode {
let (root, errors) = parse_source(source);
assert!(errors.is_empty(), "unexpected errors: {:?}", errors);
root
}
fn first_directive(root: &SyntaxNode) -> SyntaxNode {
root.children()
.find(|n| n.kind() == SyntaxKind::DIRECTIVE)
.expect("no DIRECTIVE node found")
}
fn child_kinds(node: &SyntaxNode) -> Vec<SyntaxKind> {
node.children_with_tokens()
.map(|child| child.kind())
.collect()
}
#[test]
fn simple_directive() {
let source = "listen 80;";
let root = assert_no_errors(source);
assert_lossless(source);
let dir = first_directive(&root);
let kinds = child_kinds(&dir);
assert_eq!(
kinds,
vec![
SyntaxKind::IDENT,
SyntaxKind::WHITESPACE,
SyntaxKind::ARGUMENT,
SyntaxKind::SEMICOLON
]
);
}
#[test]
fn directive_no_args() {
let source = "accept_mutex on;";
let root = assert_no_errors(source);
assert_lossless(source);
let dir = first_directive(&root);
let kinds = child_kinds(&dir);
assert_eq!(
kinds,
vec![
SyntaxKind::IDENT,
SyntaxKind::WHITESPACE,
SyntaxKind::IDENT,
SyntaxKind::SEMICOLON
]
);
}
#[test]
fn block_directive() {
let source = "server { listen 80; }";
let root = assert_no_errors(source);
assert_lossless(source);
let dir = first_directive(&root);
let kinds = child_kinds(&dir);
assert!(kinds.contains(&SyntaxKind::IDENT));
assert!(kinds.contains(&SyntaxKind::BLOCK));
}
#[test]
fn nested_blocks() {
let source = "http { server { listen 80; } }";
assert_no_errors(source);
assert_lossless(source);
}
#[test]
fn multiline_config() {
let source = "http {\n server {\n listen 80;\n }\n}";
assert_no_errors(source);
assert_lossless(source);
}
#[test]
fn comment_standalone() {
let source = "# this is a comment\nlisten 80;";
assert_no_errors(source);
assert_lossless(source);
}
#[test]
fn comment_after_directive() {
let source = "listen 80; # port";
let root = assert_no_errors(source);
assert_lossless(source);
let dir = first_directive(&root);
let kinds = child_kinds(&dir);
assert!(kinds.contains(&SyntaxKind::COMMENT));
}
#[test]
fn double_quoted_string_arg() {
let source = r#"return 200 "hello world";"#;
let root = assert_no_errors(source);
assert_lossless(source);
let dir = first_directive(&root);
let kinds = child_kinds(&dir);
assert!(kinds.contains(&SyntaxKind::DOUBLE_QUOTED_STRING));
}
#[test]
fn single_quoted_string_arg() {
let source = "return 200 'hello world';";
let root = assert_no_errors(source);
assert_lossless(source);
let dir = first_directive(&root);
let kinds = child_kinds(&dir);
assert!(kinds.contains(&SyntaxKind::SINGLE_QUOTED_STRING));
}
#[test]
fn variable_arg() {
let source = "set $var value;";
let root = assert_no_errors(source);
assert_lossless(source);
let dir = first_directive(&root);
let kinds = child_kinds(&dir);
assert!(kinds.contains(&SyntaxKind::VARIABLE));
}
#[test]
fn lua_block() {
let source = "content_by_lua_block {\n ngx.say(\"hello\")\n}";
let root = assert_no_errors(source);
assert_lossless(source);
let dir = first_directive(&root);
let kinds = child_kinds(&dir);
assert!(kinds.contains(&SyntaxKind::BLOCK));
}
#[test]
fn lua_block_nested_braces() {
let source =
"content_by_lua_block {\n if true then\n local t = {1, 2}\n end\n}";
assert_no_errors(source);
assert_lossless(source);
}
#[test]
fn missing_semicolon() {
let source = "listen 80";
let (_root, errors) = parse_source(source);
assert_lossless(source);
assert!(!errors.is_empty(), "should report missing semicolon");
}
#[test]
fn missing_closing_brace() {
let source = "server { listen 80;";
let (_root, errors) = parse_source(source);
assert_lossless(source);
assert!(!errors.is_empty(), "should report missing '}}'");
}
#[test]
fn unexpected_closing_brace() {
let source = "} listen 80;";
let (_root, errors) = parse_source(source);
assert_lossless(source);
assert!(!errors.is_empty(), "should report unexpected '}}'");
}
#[test]
fn lossless_empty() {
assert_lossless("");
}
#[test]
fn lossless_whitespace_only() {
assert_lossless(" \n \n");
}
#[test]
fn lossless_complex_config() {
let source = r#"http {
# Main server
server {
listen 80;
server_name example.com;
location / {
proxy_pass http://backend;
}
}
}
"#;
assert_lossless(source);
assert_no_errors(source);
}
#[test]
fn lossless_blank_lines() {
let source = "listen 80;\n\nlisten 443;\n";
assert_lossless(source);
assert_no_errors(source);
}
#[test]
fn lossless_utf8() {
let source = "# これは日本語コメント\nlisten 80;\n";
assert_lossless(source);
assert_no_errors(source);
}
#[test]
fn location_with_regex() {
let source = "location ~ ^/api/(.*) {\n proxy_pass http://backend;\n}";
assert_no_errors(source);
assert_lossless(source);
}
#[test]
fn multiple_directives() {
let source = "worker_processes auto;\nevents {\n worker_connections 1024;\n}\n";
assert_no_errors(source);
assert_lossless(source);
}
}