/* NOTE: this grammar should be matched case-insensitively. */
/**
* A type derivation program consists of zero or more statements followed by
* the final pattern that should evaluate to the derived data type.
*/
program ::= statement-separator* ( statement statement-separator+ )* pattern statement-separator*;
/**
* Statements are separated from each other and from the final derivation
* expression using newlines or a semicolon.
*/
statement-separator :== [#r#n;]+ ;
/**
* Statements manipulate the state of the type derivation interpreter before
* the final derivation expression is evaluated. They look like assignment
* statements at first glance, but act more like equality or set containment
* assertions: the right-hand side is evaluated like an expression as you
* might expect, but the left-hand side acts just like the patterns that are
* used to match function argument types. While this is perhaps not the most
* intuitive ruleset, it is extremely easy to implement (it only reuses
* features we already needed anyway), while also being a much more powerful
* primitive than a simple assignment statement, because it can also be used
* for bounds checking and other assertions. For example, if we have a
* function like `fn(VARCHAR(a), VARCHAR(b))` and the implementation of the
* function requires that a + b equals 10, we can simply write "10 = a + b".
* This works, because the pattern "10" will only match the value 10, and
* a pattern mismatch at any point during the matching and evaluation process
* indicates that the implementation is incompatible with the given argument
* types. If you find this syntax confusing, you may also write
* "assert a + b matches 10" or "assert a + b == 10"; the former does the
* exact same thing, while the latter reduces to "true = a + b == 10", which is
* functionally the same thing.
*
* Note that when you use these statements like assignment statements, you can
* only ever reassign a binding to the same value. For example, "a = 10; a = 20"
* will always fail, because a cannot both be 10 and 20 at the same time (more
* accurately, a is bound to 10, so the second statement behaves like
* "10 = 20", and 20 does not match 10).
*/
statement
::= pattern "=" pattern -> normal
| "assert" pattern "matches" pattern -> match
| "assert" pattern -> assert
;
/**
* Patterns are at the core of the type derivation interpreter; they are used
* both for matching and as expressions. However, note that not all types of
* patterns work in both contexts.
*/
pattern ::= pattern-or;
/**
* Lazily-evaluated boolean OR expression. Maps to builtin or() function if
* more than one pattern is parsed.
*/
pattern-or ::= pattern-and ( "||" pattern-and )* ;
/**
* Lazily-evaluated boolean AND expression. Maps to builtin and() function if
* more than one pattern is parsed.
*/
pattern-and ::= pattern-eq-neq ( "&&" pattern-eq-neq )* ;
/**
* Equality and not-equality expressions. These map to the builtin equal()
* and not_equal() functions in left-to-right order.
*/
pattern-eq-neq ::= pattern-ineq ( op-eq-neq pattern-ineq )* ;
op-eq-neq ::= "==" | "!=" ;
/**
* Integer inequality expressions. These map to the builtin greater_than(),
* less_than(), greater_equal(), and less_equal() functions in left-to-right
* order.
*/
pattern-ineq ::= pattern-add-sub ( op-ineq pattern-add-sub )* ;
op-ineq ::= "<" | "<=" | ">" | ">=" ;
/**
* Integer addition and subtraction. These map to the builtin add() and
* subtract() functions in left-to-right order.
*/
pattern-add-sub ::= pattern-mul-div ( op-add-sub pattern-mul-div )* ;
op-add-sub ::= "+" | "-" ;
/**
* Integer multiplication and division. These map to the builtin multiply() and
* divide() functions in left-to-right order.
*/
pattern-mul-div ::= pattern-misc ( op-mul-div pattern-misc )* ;
op-mul-div ::= "*" | "/" ;
/**
* Miscellaneous patterns that don't need special rules for precedence or
* avoiding left-recursion.
*/
pattern-misc
/**
* Parentheses for overriding operator precedence.
*/
::= "(" pattern ")" -> parentheses
/**
* If-then-else pattern. Can only be evaluated. The first pattern must
* evaluate to a boolean. The second or third pattern is then evaluated
* based on that boolean and returned. The branch that is not selected is
* also not evaluated (i.e. evaluation is lazy).
*/
| "if" pattern "then" pattern "else" pattern -> if-then-else
/**
* Unary not function. Can only be evaluated and can only be applied to
* booleans.
*/
| "!" pattern -> unary-not
/**
* The "anything" pattern. This matches everything, and cannot be evaluated.
* It's primarily intended for matching (parts of) argument types, when you
* don't need or want a binding. For example, `equals(?, ?) -> boolean` would
* allow for any combination of argument types. This distinguishes it from
* `equals(any1, any1) -> boolean`, which only accepts equal types; instead
* it behaves like `equals(any1, any2) -> boolean`. `?` is especially useful
* when you want this type of behavior for a variadic function; for example,
* `serialize(?...) -> binary` will match any number and combination of
* argument types, while `serialize(any1...) -> binary` would only accept any
* number of any *one* data type.
*/
| "?" -> any
/**
* Matches any boolean value. Cannot be evaluated.
*/
| "metabool" -> bool-any
/**
* Matches and evaluates to the boolean value "true".
*/
| "true" -> bool-true
/**
* Matches and evaluates to the boolean value "false".
*/
| "false" -> bool-false
/**
* Matches any integer value. Cannot be evaluated.
*/
| "metaint" -> int-any
/**
* Matches any integer value within the specified inclusive range. Can only
* be evaluated if the two bounds are equal, in which case it reduces to just
* a single integer.
*/
| integer ".." integer -> int-range
/**
* Matches any integer value that equals at least the given number. Cannot be
* evaluated.
*/
| integer ".." -> int-at-least
/**
* Matches any integer value that equals at most the given number. Cannot be
* evaluated.
*/
| ".." integer -> int-at-most
/**
* Matches and evaluates to exactly the given integer.
*/
| integer -> int-exactly
/**
* Matches any enumeration constant.
*/
| "metaenum" -> enum-any
/**
* Matches an enumeration constant in the given set. If only a single
* constant is specified, the pattern evaluates to that constant, otherwise
* it cannot be evaluated.
*/
| "{" identifier ("," identifier)* "}" -> enum-set
/**
* Matches any string.
*/
| "metastr" -> str-any
/**
* Matches and evaluates to exactly the given string.
*/
| string -> str-exactly
/**
* Matches any data type.
*/
| "typename" -> dt-any
/**
* Evaluates a function. Cannot be matched. The following functions are
* currently available:
*
* - "not(metabool) -> metabool": boolean NOT.
* - "and(metabool*) -> metabool": boolean AND. Evaluated lazily from left
* to right.
* - "or(metabool*) -> metabool": boolean OR. Evaluated lazily from left to
* right.
* - "negate(metaint) -> metaint": integer negation. 64-bit two's complement
* overflow must be detected, and implies that the function implementation
* that the program belongs to does not match the given argument types.
* - "add(metaint*) -> metaint": integer sum. Overflow handled as above.
* - "subtract(metaint, metaint) -> metaint": subtracts an integer from
* another. Overflow handled as above.
* - "multiply(metaint*) -> metaint": integer product. Overflow handled as
* above.
* - "divide(metaint, metaint) -> metaint": divides an integer over
* another. Overflow and division by zero handled as above.
* - "min(metaint+) -> metaint": return the minimum integer value.
* - "max(metaint+) -> metaint": return the maximum integer value.
* - "equal(T, T) -> metabool": return whether the two values are equal.
* - "not_equal(T, T) -> metabool": return whether the two values are not
* equal.
* - "greater_than(metaint, metaint) -> metabool": return whether the left
* integer is greater than the right.
* - "less_than(metaint, metaint) -> metabool": return whether the left
* integer is less than the right.
* - "greater_equal(metaint, metaint) -> metabool": return whether the left
* integer is greater than or equal to the right.
* - "less_equal(metaint, metaint) -> metabool": return whether the left
* integer is less than or equal to the right.
* - "covers(value, pattern) -> metabool": return whether the left value
* matches the pattern. The pattern may make use of bindings that were
* previously defined, but it does NOT capture new bindings regardless
* of whether the pattern match succeeded.
* - "if_then_else(metabool, T, T) -> T": if-then-else expression. Evaluated
* lazily.
*
* Note that many of the functions also have corresponding expressions. These
* expressions are simply syntactic sugar for calling the functions directly.
*/
| identifier "(" ( pattern ("," pattern)* )? ")" -> function
/**
* This pattern matches one of three things, which are too context-sensitive
* to distinguish at this time:
*
* - a data type pattern;
* - a binding; or
* - an enum constant.
*
* The type depends on the identifier path, and must be disambiguated in a
* three-step process:
*
* - Gather all identifiers that match a builtin type class or an in-scope
* user-defined type class.
* - Gather all enumeration parameter constants that these types declare.
* - Now disambiguate as follows: if an identifier path matches a type
* class, it's a type pattern; if it matches an enumeration parameter
* constant, it's an enum constant pattern; otherwise, it's a binding.
*
* Two types of bindings exist, with different behavior:
*
* - Normal bindings. The subset of the data type pattern syntax used for
* these is just a single identifier with no suffix. When matched the
* first time, this matches anything and binds the identifier to the
* matched value. The next time it will only match the previously bound
* value, and once bound, it will evaluate to the bound value.
* - Implicit-OR bindings. The subset of the data type pattern syntax used
* for these is just a single identifier with exactly and only a "?"
* suffix. These will always match both true and false, and will evaluate
* to whether any true value was matched. This is useful to model
* nullability behavior. For example, `add(i8?n?, i8?n?) -> i8?n?` will
* match any combination of nullabilities for the arguments, and return
* a nullable type if and only if either argument is nullable.
*
* Enum constants only match a single identifier. If a dt-binding-constant
* AST node resolves to a binding or an enum constant, an error should be
* emitted if illegal syntax was used.
*/
| identifier-path nullability? variation? parameters? -> dt-binding-constant
/**
* Unary negation function. Can only be evaluated and can only be applied to
* integers. Note that this is all the way at the back because signed integer
* literals should be preferred, since those can also be matched, and can
* deal with -2^63 without overflow.
*/
| "-" pattern -> unary-negate
;
/**
* Nullability suffix for a data type pattern.
*
* - If there is no such suffix, the pattern matches only non-nullable types,
* and also evaluates to a non-nullable type if applicable.
* - If this suffix is just "?", the pattern matches only nullable types,
* and also evaluates to a nullable type if applicable.
* - If this suffix is a "?" followed by a pattern, the pattern is matched
* against false for non-nullable and true for nullable types. Likewise for
* evaluation; if the pattern evaluates to false the type will be
* non-nullable, if it evaluates to true it will be nullable.
*
* The "?" is also used for implicit-OR bindings.
*/
nullability ::= "?" pattern? ;
/**
* Type variation suffix.
*
* - If there is no such suffix, the pattern matches any variation that is
* marked as compatible with the system-preferred variation via the function
* behavior option of the variation, as well as the system-preferred
* variation itself. It will evaluate to the system-preferred variation.
* - If the suffix is [0], the pattern matches and evaluates to the
* system-preferred variation exactly.
* - If the suffix is [ident], the pattern matches and evaluates to the named
* variation exactly. The variation must be in scope.
*/
variation ::= "[" variation-body "]" ;
variation-body ::= "?" | zero | identifier-path ;
/**
* Type parameter pack suffix.
*
* - If there is no such suffix, the pattern accepts any number of parameters
* for the type (assuming that the type class accepts this as well), and
* will attempt to evaluate to a type with no parameters.
* - If there is a "<>" suffix, the pattern accepts only types with zero
* parameters, and will attempt to evaluate to a type with no parameters.
* - If parameters are specified, the pattern accepts only types with exactly
* the specified number of parameters, and will attempt to evaluate to a
* type with exactly those parameters.
*/
parameters ::= "<" ( parameter ("," parameter)* )? ">" ;
/**
* Type parameter pattern. The name prefix is only used when evaluated (it is
* never matched), and is currently only accepted by the NSTRUCT (pseudo)type.
*/
parameter ::= ( ident-or-string ":" )? optional-pattern ;
/**
* A pattern for matching potentially-optional parameter values. "null" may be
* used to match or evaluate to explicitly-skipped optional parameters;
* otherwise, the given pattern is used for the parameter value. The "?" (any)
* pattern is special-cased to also match explicitly-skipped parameter slots.
*/
optional-pattern ::= "null" | pattern ;
/**
* An identifier or a string (so the syntax allows for both).
*/
ident-or-string ::= string | identifier ;
/**
* An identifier path, separated by periods.
*/
identifier-path ::= ( identifier "." )* identifier ;
/**
* An identifier. Note that $ signs are legal in identifiers, and note that all
* identifier matching is case-insensitive.
*/
identifier :== [a-zA-Z_$] [a-zA-Z0-9_$]* ;
/**
* A string literal.
*/
string :== '"' [^"]+ '"' ;
/**
* An integer literal.
*/
integer ::= sign? unsigned ;
/**
* The (optional) sign of a signed integer.
*/
sign ::= "-" | "+" ;
/**
* An unsigned integer.
*/
unsigned ::= zero | nonzero ;
/**
* The number zero.
*/
zero :== "0" ;
/**
* A natural+ number.
*/
nonzero :== [1-9] [0-9]* ;
/**
* Ignore spaces, tabs, and # end-of-line comments.
*/
_ :== [ #t]+ | ## [^#n#r]+ [#r#n]+ ;