// =============================================================================
// FlowLog GRAMMAR DEFINITION
// =============================================================================
// This grammar defines the syntax for FlowLog, a Datalog language engine
// for declarative data processing and rule-based queries.
// Main grammar entry point
main_grammar = {
SOI ~
(declaration | extern_fn | include_directive | input_directive | output_directive | printsize_directive | fixpoint_block | loop_block | fact | rule)* ~
EOI
}
// =============================================================================
// BASIC ELEMENTS (IDENTIFIERS, LITERALS, TYPES)
// =============================================================================
// Identifier pattern - must start with letter or underscore,
// can contain letters, numbers, underscores
// Valid examples: x, _temp, myVar123, _private_var
// Invalid examples: 1x, x-yz (hyphen not allowed)
identifier = @{ "_"? ~ ASCII_ALPHA+ ~ (ASCII_ALPHANUMERIC | "_")* }
// Variable reference - variables used in rules and queries
variable = { identifier }
// Attribute name - names of relation attributes (columns)
attribute_name = { identifier }
// Relation name - names of relations (tables)
relation_name = { identifier }
// Data type specification
data_type = { integer8_type | integer16_type | integer32_type | integer64_type
| uinteger8_type | uinteger16_type | uinteger32_type | uinteger64_type
| float32_type | float64_type
| string_type | bool_type }
// 8-bit signed integer data type
integer8_type = { "int8" }
// 16-bit signed integer data type
integer16_type = { "int16" }
// 32-bit signed integer data type ("number" is an alias for "int32")
integer32_type = { "int32" | "number" }
// 64-bit signed integer data type
integer64_type = { "int64" }
// 8-bit unsigned integer data type
uinteger8_type = { "uint8" }
// 16-bit unsigned integer data type
uinteger16_type = { "uint16" }
// 32-bit unsigned integer data type ("unsigned" is an alias for "uint32")
uinteger32_type = { "uint32" | "unsigned" }
// 64-bit unsigned integer data type
uinteger64_type = { "uint64" }
// 32-bit floating point data type ("float" is an alias for "f32")
float32_type = { "float" | "f32" }
// 64-bit floating point data type
float64_type = { "f64" }
// Text data type for strings ("symbol" is an alias for "string")
string_type = { "string" | "symbol" }
// Boolean data type
bool_type = { "bool" }
// Constant values
constant = { boolean | float | integer | string }
// Float with optional sign, must contain a decimal point
// Examples: 1.5, -3.14, +0.0
float = @{ ("+" | "-")? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
// Integer with optional sign
// Examples: 42, -17, +100
integer = @{ ("+" | "-")? ~ ASCII_DIGIT+ }
// String enclosed in double quotes
// Example: "hello world"
string = { "\"" ~ (!"\"" ~ ANY)* ~ "\"" }
// Placeholder for unused variables (underscore)
// Used when you don't need to reference a variable
placeholder = { "_" }
// Boolean literal values
boolean = { ("True" | "False") ~ !(ASCII_ALPHANUMERIC | "_") }
// =============================================================================
// RELATION DECLARATIONS
// =============================================================================
// Relations are the core data structures in FlowLog. They can be:
// - EDB (Extensional Database): Input relations with external data
// - IDB (Intentional Database): Computed relations defined by rules
// Declaration only defines the relation schema
// I/O directives are separate and can appear anywhere in the program
declaration = {
".decl" ~ relation_name ~ "(" ~ attributes_decl? ~ ")"
}
// Input directive for EDB relations - separate from declaration
// Example: .input Relation(IO="file", filename="data.csv", delimiter=",")
input_directive = {
".input" ~ relation_name ~ "(" ~ io_params? ~ ")"
}
// Output directive for relations
// Example: .output Relation
// Example: .output Relation(delimiter="|")
output_directive = {
".output" ~ relation_name ~ ("(" ~ io_params? ~ ")")?
}
// Shared key=value parameter list used by both input and output directives
io_params = { io_param ~ ("," ~ io_param)* }
// Individual parameter
io_param = { parameter_name ~ "=" ~ parameter_value }
// Parameter name (IO, filename, delimiter, etc.)
parameter_name = { identifier }
// Parameter value (quoted string)
parameter_value = { string }
// Print size directive for relations - separate from declaration
// Example: .printsize Relation
printsize_directive = {
".printsize" ~ relation_name
}
// Include directive — pull another .dl file into the current program.
// Path is resolved relative to the including file's directory.
// Example: .include "lib/base.dl"
include_directive = { ".include" ~ string }
// Comma-separated list of attribute declarations
// Example: src: number, dst: number, weight: string
attributes_decl = { attribute_decl ~ ("," ~ attribute_decl)* }
// Single attribute with name and data type
// Example: "src: number"
attribute_decl = { attribute_name ~ ":" ~ data_type }
// =============================================================================
// USER-DEFINED EXTERNAL FUNCTION DECLARATIONS
// =============================================================================
// .extern fn name(param1: type1, ...) -> ret_type
extern_fn = {
".extern" ~ "fn" ~ identifier ~
"(" ~ extern_fn_params? ~ ")" ~
"->" ~ data_type
}
extern_fn_params = { extern_fn_param ~ ("," ~ extern_fn_param)* }
extern_fn_param = { identifier ~ ":" ~ data_type }
// =============================================================================
// ARITHMETIC EXPRESSIONS
// =============================================================================
// Mathematical expressions with basic operators
// Simple arithmetic without precedence
// Note: "x + y * z" is parsed left-to-right as "(x + y) * z"
arithmetic_expr = { factor ~ (arithmetic_op ~ factor)* }
// Basic elements in arithmetic expressions
factor = { fn_call_expr | constant | variable }
// Arithmetic operators
arithmetic_op = {
plus | // +
minus | // -
times | // *
divide | // /
modulo | // %
cat // string concatenation
}
// Individual arithmetic operators
plus = { "+" }
minus = { "-" }
times = { "*" }
divide = { "/" }
modulo = { "%" }
cat = { "cat" }
// =============================================================================
// COMPARISON EXPRESSIONS
// =============================================================================
// Binary comparisons between arithmetic expressions
// Binary comparison between arithmetic expressions
// Examples: x + y > z + w, count(x) = 5
compare_expr = { arithmetic_expr ~ compare_op ~ arithmetic_expr }
// Comparison operators
compare_op = {
equal | // =
not_equal | // !=
greater_equal_than | // >=
greater_than | // >
less_equal_than | // <=
less_than // <
}
// Individual comparison operators
equal = { "=" }
not_equal = { "!=" }
greater_equal_than = { ">=" }
greater_than = { ">" }
less_equal_than = { "<=" }
less_than = { "<" }
// =============================================================================
// AGGREGATE EXPRESSIONS
// =============================================================================
// Aggregation functions for computing statistics over groups
// Aggregation operation on a value
// Examples: count(x), avg(x + y), sum(weight)
aggregate_expr = {
aggregate_op ~
"(" ~
arithmetic_expr ~
")"
}
// Supported aggregate operators
aggregate_op = { count | average | sum | min | max }
count = { "count" | "COUNT" } // Count of elements
average = { "average" | "AVG" } // Average value
sum = { "sum" | "SUM" } // Sum of values
min = { "min" | "MIN" } // Minimum value
max = { "max" | "MAX" } // Maximum value
// =============================================================================
// ATOMS (RELATION REFERENCES)
// =============================================================================
// Atoms represent relation instances with specific arguments
// Relation with arguments
// Example: edge(x, y), person("Alice", 25)
atom = {
relation_name ~
"(" ~
(atom_arg ~ ("," ~ atom_arg)*)? ~
")"
}
// Negative atom for negative conditions
// Example: !edge(x, y)
negative_atom = { "!" ~ atom }
// Arguments in atoms can be constants, variables, or placeholders
atom_arg = { constant | variable | placeholder }
// =============================================================================
// RULES SECTION
// =============================================================================
// Rules define how output relations are computed from input relations
// Format: head :- body.
// Example: reachable(x, z) :- edge(x, y), reachable(y, z).
// A fact is a ground tuple inserted directly into a relation.
// Example: edge(1, 2).
fact = { head ~ "." }
// Rule with optional multi-head and multi-body (Souffle-style).
// Semicolons separate alternative heads and alternative bodies:
// h1; h2 :- b1; b2. → h1:-b1. h1:-b2. h2:-b1. h2:-b2.
rule = { rule_heads ~ ":-" ~ rule_bodies ~ "." }
// One or more semicolon-separated heads
rule_heads = { head ~ (";" ~ head)* }
// One or more semicolon-separated bodies (disjunction)
rule_bodies = { predicates ~ (";" ~ predicates)* }
// Left side of rule (before :-)
// Example: reachable(x, z), stats(count(x))
head = {
relation_name ~
"(" ~
(head_arg ~ ("," ~ head_arg)*)? ~
")"
}
// Can be aggregate functions or arithmetic expressions (UDF calls are parsed as factors within arithmetic)
head_arg = { aggregate_expr | arithmetic_expr }
// User-defined function call in rule head
// Example: my_udf(x, y + 1)
fn_call_expr = {
identifier ~
"(" ~
(arithmetic_expr ~ ("," ~ arithmetic_expr)*)? ~
")"
}
// Comma-separated list of predicates in the rule body
predicates = { predicate ~ ("," ~ predicate)* }
// Types of predicates allowed in rule bodies
// Note: compare_expr must come before atom because compare_expr can start with
// fn_call_expr (e.g., "cost(x) > 5") which atom would greedily consume.
predicate = {
compare_expr | // Comparison: x > y, cost(x) > 5
atom | // Positive atom: edge(x, y)
negative_atom | // Negative atom: !edge(x, y)
fn_call_expr // UDF predicate: is_valid(x + 1, y)
}
// =============================================================================
// LOOP BLOCK / FIXPOINT BLOCK
// =============================================================================
// A fixpoint block evaluates rules until no new tuples are derived (delta
// becomes empty). A loop block evaluates rules with bounded/conditional
// iteration. Both act as hard evaluation barriers — the stratifier cannot
// move rules across their boundaries.
//
// Syntax:
// fixpoint { rules... }
// loop <condition> { rules... }
//
// The condition is composed of at most one `while` clause and one `until`
// clause, in either order:
//
// while { iter_expr } — keep looping while the iteration constraint holds.
// iter_expr is one or more "@it op N" sub-conditions (joined by and/or)
// that describe allowed iteration windows.
// @it <= 6 → run iterations 0..=6
// @it >= 5 and @it <= 10 → run iterations 5..=10
// @it < 5 or @it > 10 → run iterations 0..=4 and 11..
//
// until { rel_expr } — halt when the listed bool relation(s) become true.
// rel_expr is one or more nullary relation names joined by and/or.
// until { Done } → stop when Done holds
// until { Done1 or Done2 } → stop when either holds
//
// Combining while and until:
// while W until U — stop as soon as EITHER condition triggers (min)
// while W or until U — stop only when BOTH conditions trigger (max)
//
// Relations using replacement (iterative) semantics are declared inside the
// block with `.iterative <relation_name>` directives. This is scoped per-block:
// the same relation can be iterative in one block and accumulative in another.
//
// Examples:
// fixpoint { ... }
// fixpoint { .iterative X ... }
// loop while { @it <= 10 } { ... }
// loop until { Done } { .iterative X ... }
// loop while { @it <= 100 } until { Done } { ... }
// loop while { @it <= 100 } or until { Done } { ... }
// loop until { Done } while { @it >= 5 and @it <= 10 } { ... }
fixpoint_block = {
"fixpoint" ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}
loop_block = {
"loop" ~ loop_condition ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}
// .iterative <relation_name> — marks a relation as using replacement (new_from)
// semantics within this block. Unlisted relations default to accumulative (new).
// Scoped per-block: the same relation can be iterative in one block and
// accumulative in another.
iterative_directive = { ".iterative" ~ relation_name }
// At most one while clause and one until clause, in either order.
// If both are present without "or", stop when EITHER fires (min / and semantics).
// If joined by "or", stop when BOTH fire (max / or semantics).
loop_condition = {
loop_while ~ (loop_or? ~ loop_until)?
| loop_until ~ (loop_or? ~ loop_while)?
}
// while { iter_expr } — loop while iteration is in the specified window(s).
loop_while = { "while" ~ "{" ~ loop_iter_expr ~ "}" }
// until { rel_expr } — stop when the listed relation(s) become true.
loop_until = { "until" ~ "{" ~ loop_stop_group ~ "}" }
// One or more bool relations joined by and/or inside the until clause.
loop_stop_group = { loop_bool_relation ~ (loop_connective ~ loop_bool_relation)* }
// One or more "@it op N" sub-conditions joined by and/or.
// Resolved at parse time into a list of allowed iteration windows Vec<(u16,u16)>.
loop_iter_expr = {
"@it" ~ loop_iter_compare_op ~ integer
~ (loop_connective ~ "@it" ~ loop_iter_compare_op ~ integer)*
}
// Comparison operators for iteration sub-conditions.
// Longer tokens must come before shorter ones to avoid premature match.
loop_iter_compare_op = { "<=" | ">=" | "==" | "<" | ">" }
// A single nullary (boolean) relation name used in an until clause.
loop_bool_relation = { relation_name }
// Boolean connective joining clauses or sub-conditions.
loop_connective = { loop_and | loop_or }
loop_and = @{ "and" ~ !(ASCII_ALPHANUMERIC | "_") }
loop_or = @{ "or" ~ !(ASCII_ALPHANUMERIC | "_") }
// =============================================================================
// WHITESPACE AND COMMENTS
// =============================================================================
// Rules for handling whitespace and comments (ignored during parsing)
// Whitespace and comments are ignored during parsing
WHITESPACE = _{ " " | "\t" | NEWLINE }
// Comment starts with # or // and continues until end of line
// Examples: # This is a comment, // Another comment
COMMENT = _{ ("#" | "//") ~ (!NEWLINE ~ ANY)* }