// =============================================================================
// FlowLog GRAMMAR DEFINITION
// =============================================================================
// This grammar defines the syntax for FlowLog, a Datalog language engine
// for declarative data processing and rule-based queries.
// Main grammar entry point
main_grammar = {
SOI ~
(declaration | type_alias_decl | extern_fn | include_directive | input_directive | output_directive | printsize_directive | comp_decl | init_decl | fixpoint_block | loop_block | fact | rule)* ~
EOI
}
// =============================================================================
// BASIC ELEMENTS (IDENTIFIERS, LITERALS, TYPES)
// =============================================================================
// Identifier pattern - must start with letter or underscore,
// can contain letters, numbers, underscores
// Valid examples: x, _temp, myVar123, _private_var
// Invalid examples: 1x, x-yz (hyphen not allowed)
identifier = @{ "_"? ~ ASCII_ALPHA+ ~ (ASCII_ALPHANUMERIC | "_")* }
// Variable reference - variables used in rules and queries
variable = { identifier }
// Attribute name - names of relation attributes (columns)
attribute_name = { identifier }
// Relation name as it appears at a *defining* site (`.decl`). Always
// a single identifier — dots are reserved for inliner-produced names.
relation_name = @{ identifier }
// Relation reference at a *use* site (atom, head, directive target,
// loop condition, iterative list). Accepts the dotted form so user
// code can refer to inliner-produced relations like `c.Holds`.
relation_ref = @{ identifier ~ ("." ~ identifier)* }
// Type at a `.decl` attribute. `data_type` must come first so reserved
// keywords (`number`, `symbol`, …) win over the catch-all identifier.
type_ref = { data_type | alias_name }
// A user-declared type name at a *use* site. Resolved against the
// [`TypeRegistry`] after parsing — includes top-level `.type` aliases
// and per-instance prefixed types registered by the component inliner
// (`a.cfg.context`, etc.). Dotted forms support comp-type-parameter
// references like `.decl R(c: cfg.Context)`.
alias_name = @{ identifier ~ ("." ~ identifier)* }
// Data type specification
data_type = { integer8_type | integer16_type | integer32_type | integer64_type
| uinteger8_type | uinteger16_type | uinteger32_type | uinteger64_type
| float32_type | float64_type
| string_type | bool_type }
// 8-bit signed integer data type
integer8_type = { "int8" }
// 16-bit signed integer data type
integer16_type = { "int16" }
// 32-bit signed integer data type ("number" is an alias for "int32")
integer32_type = { "int32" | "number" }
// 64-bit signed integer data type
integer64_type = { "int64" }
// 8-bit unsigned integer data type
uinteger8_type = { "uint8" }
// 16-bit unsigned integer data type
uinteger16_type = { "uint16" }
// 32-bit unsigned integer data type ("unsigned" is an alias for "uint32")
uinteger32_type = { "uint32" | "unsigned" }
// 64-bit unsigned integer data type
uinteger64_type = { "uint64" }
// 32-bit floating point data type ("float" is an alias for "f32")
float32_type = { "float" | "f32" }
// 64-bit floating point data type
float64_type = { "f64" }
// Text data type for strings ("symbol" is an alias for "string")
string_type = { "string" | "symbol" }
// Boolean data type
bool_type = { "bool" }
// Constant values
constant = { boolean | float | integer | string }
// Float with optional sign, must contain a decimal point
// Examples: 1.5, -3.14, +0.0
float = @{ ("+" | "-")? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
// Integer with optional sign
// Examples: 42, -17, +100
integer = @{ ("+" | "-")? ~ ASCII_DIGIT+ }
// String enclosed in double quotes
// Example: "hello world"
string = { "\"" ~ (!"\"" ~ ANY)* ~ "\"" }
// Placeholder for unused variables (underscore)
// Used when you don't need to reference a variable
placeholder = { "_" }
// Boolean literal values
boolean = { ("True" | "False") ~ !(ASCII_ALPHANUMERIC | "_") }
// =============================================================================
// RELATION DECLARATIONS
// =============================================================================
// Relations are the core data structures in FlowLog. They can be:
// - EDB (Extensional Database): Input relations with external data
// - IDB (Intentional Database): Computed relations defined by rules
// Declaration only defines the relation schema. I/O directives are
// separate and can appear anywhere in the program. Optional trailing
// `overridable` is only meaningful inside a `.comp` body.
declaration = {
".decl" ~ relation_name ~ "(" ~ attributes_decl? ~ ")" ~ overridable_kw?
}
// Lets a subcomponent's `.override` replace this relation's rules.
overridable_kw = @{ "overridable" ~ !(ASCII_ALPHANUMERIC | "_") }
// Input directive for EDB relations - separate from declaration
// Example: .input Relation(IO="file", filename="data.csv", delimiter=",")
input_directive = {
".input" ~ relation_ref ~ "(" ~ io_params? ~ ")"
}
// Output directive for relations
// Example: .output Relation
// Example: .output Relation(delimiter="|")
output_directive = {
".output" ~ relation_ref ~ ("(" ~ io_params? ~ ")")?
}
// Shared key=value parameter list used by both input and output directives
io_params = { io_param ~ ("," ~ io_param)* }
// Individual parameter
io_param = { parameter_name ~ "=" ~ parameter_value }
// Parameter name (IO, filename, delimiter, etc.)
parameter_name = { identifier }
// Parameter value (quoted string)
parameter_value = { string }
// Print size directive for relations - separate from declaration
// Example: .printsize Relation
printsize_directive = {
".printsize" ~ relation_ref
}
// Include directive — pull another .dl file into the current program.
// Path is resolved relative to the including file's directory.
// Example: .include "lib/base.dl"
include_directive = { ".include" ~ string }
// Comma-separated list of attribute declarations
// Example: src: number, dst: number, weight: string
attributes_decl = { attribute_decl ~ ("," ~ attribute_decl)* }
// Single attribute with name and type. Type is either a built-in primitive
// or a `.type`-declared alias name (resolved transitively at parse time).
// Example: "src: number" or "src: NodeId" where `NodeId` is a `.type` alias.
attribute_decl = { attribute_name ~ ":" ~ type_ref }
// =============================================================================
// USER-DEFINED EXTERNAL FUNCTION DECLARATIONS
// =============================================================================
// .extern fn name(param1: type1, ...) -> ret_type
extern_fn = {
".extern" ~ "fn" ~ identifier ~
"(" ~ extern_fn_params? ~ ")" ~
"->" ~ data_type
}
extern_fn_params = { extern_fn_param ~ ("," ~ extern_fn_param)* }
extern_fn_param = { identifier ~ ":" ~ data_type }
// =============================================================================
// ARITHMETIC EXPRESSIONS
// =============================================================================
// Mathematical expressions with basic operators
// Simple arithmetic without precedence
// Note: "x + y * z" is parsed left-to-right as "(x + y) * z"
arithmetic_expr = { factor ~ (arithmetic_op ~ factor)* }
// `as_cast` first so `as` wins over a UDF named `as` (reserved keyword).
factor = { as_cast | builtin_fn_call | fn_call_expr | constant | variable }
// `as(factor, TargetType)` — explicit cast, runtime no-op. Inner is a
// single `factor` (not `arithmetic_expr`) so the typechecker can lower
// `as(...)` away after validation; for `as(x + 1, T)`, bind first.
as_cast = { as_kw ~ "(" ~ factor ~ "," ~ type_ref ~ ")" }
as_kw = _{ "as" ~ !(ASCII_ALPHANUMERIC | "_") }
// Arithmetic operators
arithmetic_op = {
plus | // +
minus | // -
times | // *
divide | // /
modulo | // %
cat // string concatenation
}
// Individual arithmetic operators
plus = { "+" }
minus = { "-" }
times = { "*" }
divide = { "/" }
modulo = { "%" }
cat = { "cat" }
// =============================================================================
// BUILT-IN FUNCTIONS
// =============================================================================
// Reserved keyword call sites lowered inline by the codegen — they are
// NOT user functions and share no infrastructure with `.extern fn`
builtin_fn_call = {
builtin_op ~ "(" ~ (arithmetic_expr ~ ("," ~ arithmetic_expr)*)? ~ ")"
}
builtin_op = {
strlen_op | substr_op | ord_op | contains_op | to_string_op | to_number_op
}
strlen_op = @{ "strlen" ~ !(ASCII_ALPHANUMERIC | "_") }
substr_op = @{ "substr" ~ !(ASCII_ALPHANUMERIC | "_") }
ord_op = @{ "ord" ~ !(ASCII_ALPHANUMERIC | "_") }
contains_op = @{ "contains" ~ !(ASCII_ALPHANUMERIC | "_") }
to_string_op = @{ "to_string" ~ !(ASCII_ALPHANUMERIC | "_") }
to_number_op = @{ "to_number" ~ !(ASCII_ALPHANUMERIC | "_") }
// =============================================================================
// COMPARISON EXPRESSIONS
// =============================================================================
// Binary comparisons between arithmetic expressions
// Binary comparison between arithmetic expressions
// Examples: x + y > z + w, count(x) = 5
compare_expr = { arithmetic_expr ~ compare_op ~ arithmetic_expr }
// Comparison operators
compare_op = {
equal | // =
not_equal | // !=
greater_equal_than | // >=
greater_than | // >
less_equal_than | // <=
less_than // <
}
// Individual comparison operators
equal = { "=" }
not_equal = { "!=" }
greater_equal_than = { ">=" }
greater_than = { ">" }
less_equal_than = { "<=" }
less_than = { "<" }
// =============================================================================
// AGGREGATE EXPRESSIONS
// =============================================================================
// Aggregation functions for computing statistics over groups
// Aggregation operation on a value
// Examples: count(x), avg(x + y), sum(weight)
aggregate_expr = {
aggregate_op ~
"(" ~
arithmetic_expr ~
")"
}
// Supported aggregate operators
aggregate_op = { count | average | sum | min | max }
count = { "count" | "COUNT" } // Count of elements
average = { "average" | "AVG" } // Average value
sum = { "sum" | "SUM" } // Sum of values
min = { "min" | "MIN" } // Minimum value
max = { "max" | "MAX" } // Maximum value
// =============================================================================
// ATOMS (RELATION REFERENCES)
// =============================================================================
// Atoms represent relation instances with specific arguments
// Relation with arguments
// Example: edge(x, y), person("Alice", 25)
atom = {
relation_ref ~
"(" ~
(atom_arg ~ ("," ~ atom_arg)*)? ~
")"
}
// Negative atom for negative conditions
// Example: !edge(x, y)
negative_atom = { "!" ~ atom }
// Arguments in atoms can be constants, variables, or placeholders
atom_arg = { constant | variable | placeholder }
// =============================================================================
// RULES SECTION
// =============================================================================
// Rules define how output relations are computed from input relations
// Format: head :- body.
// Example: reachable(x, z) :- edge(x, y), reachable(y, z).
// A fact is a ground tuple inserted directly into a relation.
// Example: edge(1, 2).
fact = { head ~ "." }
// Rule with optional multi-head and multi-body (Souffle-style).
// Commas separate alternative heads, semicolons seperate alternative bodies:
// h1, h2 :- b1; b2. → h1:-b1. h1:-b2. h2:-b1. h2:-b2.
rule = { rule_heads ~ ":-" ~ rule_bodies ~ "." }
// One or more commas-separated heads
rule_heads = { head ~ ("," ~ head)* }
// One or more semicolon-separated bodies (disjunction)
rule_bodies = { predicates ~ (";" ~ predicates)* }
// Left side of rule (before :-)
// Example: reachable(x, z), stats(count(x))
head = {
relation_ref ~
"(" ~
(head_arg ~ ("," ~ head_arg)*)? ~
")"
}
// Can be aggregate functions or arithmetic expressions (UDF calls are parsed as factors within arithmetic)
head_arg = { aggregate_expr | arithmetic_expr }
// User-defined function call in rule head
// Example: my_udf(x, y + 1)
fn_call_expr = {
identifier ~
"(" ~
(arithmetic_expr ~ ("," ~ arithmetic_expr)*)? ~
")"
}
// Comma-separated list of predicates in the rule body
predicates = { predicate ~ ("," ~ predicate)* }
// Types of predicates allowed in rule bodies
// Note: compare_expr must come before atom because compare_expr can start with
// fn_call_expr (e.g., "cost(x) > 5") which atom would greedily consume.
predicate = {
compare_expr | // Comparison: x > y, cost(x) > 5
atom | // Positive atom: edge(x, y)
negative_atom | // Negative atom: !edge(x, y)
disjunction_group | // Nested disjunction: (A ; B)
fn_call_expr // UDF predicate: is_valid(x + 1, y)
}
// Parenthesised disjunction inside a body: `(A ; B)`.
disjunction_group = { "(" ~ rule_bodies ~ ")" }
// =============================================================================
// LOOP BLOCK / FIXPOINT BLOCK
// =============================================================================
// A fixpoint block evaluates rules until no new tuples are derived (delta
// becomes empty). A loop block evaluates rules with bounded/conditional
// iteration. Both act as hard evaluation barriers — the stratifier cannot
// move rules across their boundaries.
//
// Syntax:
// fixpoint { rules... }
// loop <condition> { rules... }
//
// The condition is composed of at most one `while` clause and one `until`
// clause, in either order:
//
// while { iter_expr } — keep looping while the iteration constraint holds.
// iter_expr is one or more "@it op N" sub-conditions (joined by and/or)
// that describe allowed iteration windows.
// @it <= 6 → run iterations 0..=6
// @it >= 5 and @it <= 10 → run iterations 5..=10
// @it < 5 or @it > 10 → run iterations 0..=4 and 11..
//
// until { rel_expr } — halt when the listed bool relation(s) become true.
// rel_expr is one or more nullary relation names joined by and/or.
// until { Done } → stop when Done holds
// until { Done1 or Done2 } → stop when either holds
//
// Combining while and until:
// while W until U — stop as soon as EITHER condition triggers (min)
// while W or until U — stop only when BOTH conditions trigger (max)
//
// Relations using replacement (iterative) semantics are declared inside the
// block with `.iterative <relation_name>` directives. This is scoped per-block:
// the same relation can be iterative in one block and accumulative in another.
//
// Examples:
// fixpoint { ... }
// fixpoint { .iterative X ... }
// loop while { @it <= 10 } { ... }
// loop until { Done } { .iterative X ... }
// loop while { @it <= 100 } until { Done } { ... }
// loop while { @it <= 100 } or until { Done } { ... }
// loop until { Done } while { @it >= 5 and @it <= 10 } { ... }
fixpoint_block = {
"fixpoint" ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}
loop_block = {
"loop" ~ loop_condition ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}
// .iterative <relation_name> — marks a relation as using replacement (new_from)
// semantics within this block. Unlisted relations default to accumulative (new).
// Scoped per-block: the same relation can be iterative in one block and
// accumulative in another.
iterative_directive = { ".iterative" ~ relation_ref }
// At most one while clause and one until clause, in either order.
// If both are present without "or", stop when EITHER fires (min / and semantics).
// If joined by "or", stop when BOTH fire (max / or semantics).
loop_condition = {
loop_while ~ (loop_or? ~ loop_until)?
| loop_until ~ (loop_or? ~ loop_while)?
}
// while { iter_expr } — loop while iteration is in the specified window(s).
loop_while = { "while" ~ "{" ~ loop_iter_expr ~ "}" }
// until { rel_expr } — stop when the listed relation(s) become true.
loop_until = { "until" ~ "{" ~ loop_stop_group ~ "}" }
// One or more bool relations joined by and/or inside the until clause.
loop_stop_group = { loop_bool_relation ~ (loop_connective ~ loop_bool_relation)* }
// One or more "@it op N" sub-conditions joined by and/or.
// Resolved at parse time into a list of allowed iteration windows Vec<(u16,u16)>.
loop_iter_expr = {
"@it" ~ loop_iter_compare_op ~ integer
~ (loop_connective ~ "@it" ~ loop_iter_compare_op ~ integer)*
}
// Comparison operators for iteration sub-conditions.
// Longer tokens must come before shorter ones to avoid premature match.
loop_iter_compare_op = { "<=" | ">=" | "==" | "<" | ">" }
// A single nullary (boolean) relation name used in an until clause.
loop_bool_relation = { relation_ref }
// Boolean connective joining clauses or sub-conditions.
loop_connective = { loop_and | loop_or }
loop_and = @{ "and" ~ !(ASCII_ALPHANUMERIC | "_") }
loop_or = @{ "or" ~ !(ASCII_ALPHANUMERIC | "_") }
// =============================================================================
// TYPE DECLARATIONS (.type)
// =============================================================================
// .type NodeId = number alias (transparent equivalence)
// .type UserId <: number subtype (distinct compile-time identity)
//
// Both forms are global, position-independent, and resolved at parse
// time to a primitive root. Subtypes are zero-cost: only the type-
// checker distinguishes them.
type_alias_decl = { ".type" ~ identifier ~ type_decl_op ~ type_ref }
type_decl_op = { subtype_op | alias_op }
subtype_op = { "<:" }
alias_op = { "=" }
// =============================================================================
// COMPONENT DECLARATIONS (.comp / .init)
// =============================================================================
//
// .comp Container<T> {
// .decl Holds(x: T)
// Holds(x) :- Source(x).
// }
// .init c = Container<symbol>
//
// `.comp` declares a parameterized template; `.init` instantiates it
// under a name (e.g. `c`). The component inliner pass eliminates both
// before typechecking, leaving only prefixed primitive forms
// (`.decl c.holds(x: symbol)` etc.).
comp_decl = {
".comp" ~ identifier ~ comp_type_params? ~ comp_supertype? ~ "{" ~ comp_body_item* ~ "}"
}
// `<T>` or `<T, U>` — names of types this component is parameterized by.
comp_type_params = { "<" ~ identifier ~ ("," ~ identifier)* ~ ">" }
// `: BaseComp<args>` — inherit and splice another component's body.
comp_supertype = { ":" ~ identifier ~ comp_type_args? }
// `<symbol, NodeId>` — concrete type arguments. `type_ref` so dotted
// forms like `cfg.Context` work.
comp_type_args = { "<" ~ type_ref ~ ("," ~ type_ref)* ~ ">" }
// `.init c = Container<symbol>`
init_decl = { ".init" ~ identifier ~ "=" ~ identifier ~ comp_type_args? }
// Items allowed inside a component body. Mirrors the top-level grammar
// minus fixpoint/loop blocks and include directives (which are
// resolved at file-load time), plus `override_directive` which is
// comp-body-only.
comp_body_item = {
declaration | type_alias_decl | input_directive | output_directive
| printsize_directive | rule | fact | comp_decl | init_decl
| override_directive
}
// Drop inherited rules and facts for `Name`; the parent's `.decl Name`
// (which must be `overridable`) is kept.
override_directive = { ".override" ~ identifier }
// =============================================================================
// WHITESPACE AND COMMENTS
// =============================================================================
// Rules for handling whitespace and comments (ignored during parsing)
// Whitespace and comments are ignored during parsing
WHITESPACE = _{ " " | "\t" | NEWLINE }
// Comment starts with # or // and continues until end of line
// Examples: # This is a comment, // Another comment
COMMENT = _{ ("#" | "//") ~ (!NEWLINE ~ ANY)* }