// ######################################
// Readme
// ######################################
// You will notice the pattern ( X ~ Y | X )
// instead of X ~ Y?. This is because we do not
// want the concat operator ~ between X and Y
// to consume any whitespace after X, if Y is not present.
// This is how PEG grammars work:
// https://pest.rs/book/grammars/peg.html
// This is the basic syntax of Pest grammar files:
// https://pest.rs/book/grammars/syntax.html#cheat-sheet
// ######################################
// Schema - the root of all rules
// ######################################
schema = {
SOI
~ (model_declaration | enum_declaration | config_block | type_alias | arbitrary_block | comment_block | empty_lines | CATCH_ALL)*
~ EOI
}
// ######################################
// Model and composite types
// ######################################
// At the syntax level, models and composite types are the same.
model_declaration = {
(MODEL_KEYWORD | TYPE_KEYWORD | VIEW_KEYWORD)
~ identifier
~ BLOCK_OPEN
~ model_contents
~ BLOCK_CLOSE
}
field_declaration = {
identifier
~ LEGACY_COLON?
~ field_type?
~ field_attribute*
~ trailing_comment?
~ NEWLINE
}
model_contents = {
(field_declaration | (block_attribute ~ NEWLINE) | comment_block | empty_lines | BLOCK_LEVEL_CATCH_ALL)*
}
// ######################################
// Field Type
// ######################################
// Pest is greedy, order is very important here.
field_type = { unsupported_optional_list_type | list_type | optional_type | legacy_required_type | legacy_list_type | base_type }
unsupported_type = { "Unsupported(" ~ string_literal ~ ")" }
base_type = { unsupported_type | identifier } // Called base type to not conflict with type rust keyword
unsupported_optional_list_type = { base_type ~ "[]" ~ "?" }
list_type = { base_type ~ "[]" }
optional_type = { base_type ~ "?" }
legacy_required_type = { base_type ~ "!" }
legacy_list_type = { "[" ~ base_type ~ "]" }
// ######################################
// Type Alias
// ######################################
type_alias = { TYPE_KEYWORD ~ identifier ~ "=" ~ base_type ~ field_attribute* }
// ######################################
// Configuration blocks
// ######################################
config_block = {
(DATASOURCE_KEYWORD | GENERATOR_KEYWORD)
~ identifier
~ BLOCK_OPEN
~ config_contents
~ BLOCK_CLOSE
}
key_value = { identifier ~ "=" ~ expression? ~ trailing_comment? }
config_contents = {
((key_value ~ NEWLINE) | comment_block | empty_lines| BLOCK_LEVEL_CATCH_ALL)*
}
// a block definition without a keyword. Is not valid. Just acts as a catch for the parser to display a nice error.
arbitrary_block = { identifier ~ BLOCK_OPEN ~ ((!BLOCK_CLOSE ~ ANY) | NEWLINE)* ~ BLOCK_CLOSE }
// ######################################
// Enum
// ######################################
enum_declaration = {
ENUM_KEYWORD
~ identifier
~ BLOCK_OPEN
~ enum_contents
~ BLOCK_CLOSE
}
enum_value_declaration = { identifier ~ field_attribute* ~ trailing_comment? ~ NEWLINE }
enum_contents = {
(enum_value_declaration | (block_attribute ~ NEWLINE) | comment_block | empty_lines | BLOCK_LEVEL_CATCH_ALL)*
}
// ######################################
// Attributes
// ######################################
block_attribute = { "@@" ~ path ~ arguments_list? ~ trailing_comment? }
field_attribute = { "@" ~ path ~ arguments_list? }
// ######################################
// Arguments
// ######################################
arguments_list = { "(" ~ (argument ~ ("," ~ argument)*)? ~ trailing_comma? ~ ")" }
argument = _{ named_argument | empty_argument | expression }
empty_argument = { identifier ~ ":" }
named_argument = { identifier ~ ":" ~ expression }
trailing_comma = @{ "," }
// ######################################
// Comments and Documentation Comments
// ######################################
comment_block = ${ ((doc_comment | comment | multi_line_comment) ~ NEWLINE?)+ }
trailing_comment = ${ doc_comment | comment | multi_line_comment }
doc_comment = { WHITESPACE* ~ "///" ~ doc_content }
comment = { WHITESPACE* ~ (!"///") ~ "//" ~ doc_content }
doc_content = @{ (!NEWLINE ~ ANY)* }
multi_line_comment = {
WHITESPACE*
~ "/*"
~ (!"*/" ~ ANY)* // Match any characters until the closing */
~ "*/"
}
// ######################################
// shared building blocks
// ######################################
unicode_alphanumeric = { LETTER | ASCII_DIGIT }
identifier = @{ unicode_alphanumeric ~ ( "_" | "-" | unicode_alphanumeric)* }
path = @{ identifier ~ ("." ~ path?)* }
WHITESPACE = _{ SPACE_SEPARATOR | "\t" } // tabs are also whitespace
NEWLINE = _{ "\n" | "\r\n" | "\r" }
empty_lines = @{ (WHITESPACE* ~ NEWLINE)+ }
// the any part is to not crash on comments next to an open block, see test `parse_comments_without_crasing_or_loosing_info`
BLOCK_OPEN = { "{" ~ (!NEWLINE ~ ANY)* ~ NEWLINE }
BLOCK_CLOSE = { "}" }
ENUM_KEYWORD = { "enum" }
MODEL_KEYWORD = { "model" }
TYPE_KEYWORD = { "type" }
VIEW_KEYWORD = { "view" }
GENERATOR_KEYWORD = { "generator" }
DATASOURCE_KEYWORD = { "datasource" }
LEGACY_COLON = { ":" }
CATCH_ALL = { (!NEWLINE ~ ANY)+ ~ NEWLINE? }
BLOCK_LEVEL_CATCH_ALL = { !BLOCK_CLOSE ~ CATCH_ALL }
// ######################################
// Expressions & Functions
// ######################################
function_call = { path ~ arguments_list }
array_expression = { "[" ~ (expression ~ ( "," ~ expression )*)? ~ "]" }
expression = { function_call | array_expression | numeric_literal | string_literal | path }
// ######################################
// Literals / Values
// ######################################
numeric_literal = @{ ("-")? ~ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? }
// String literals. We follow exactly the same grammar as JSON strings
// References:
// - https://datatracker.ietf.org/doc/html/rfc8259
// - https://www.json.org/json-en.html
ASCII_CONTROL_CHARACTER = _{ '\u{0000}'..'\u{001F}' }
string_escape = _{ "\\" ~ ANY }
string_content = @{ (string_escape | !("\"" | ASCII_CONTROL_CHARACTER) ~ ANY)* }
string_literal = ${ "\"" ~ string_content ~ "\"" }