flowlog-build 0.2.2

// =============================================================================
// FlowLog GRAMMAR DEFINITION
// =============================================================================
// This grammar defines the syntax for FlowLog, a Datalog language engine
// for declarative data processing and rule-based queries.

// Main grammar entry point
main_grammar = {
    SOI ~
    (declaration | extern_fn | include_directive | input_directive | output_directive | printsize_directive | fixpoint_block | loop_block | fact | rule)* ~
    EOI
}

// =============================================================================
// BASIC ELEMENTS (IDENTIFIERS, LITERALS, TYPES)
// =============================================================================

// Identifier pattern - must start with letter or underscore, 
// can contain letters, numbers, underscores
// Valid examples: x, _temp, myVar123, _private_var
// Invalid examples: 1x, x-yz (hyphen not allowed)
identifier = @{ "_"? ~ ASCII_ALPHA+ ~ (ASCII_ALPHANUMERIC | "_")* }

// Variable reference - variables used in rules and queries
variable = { identifier }

// Attribute name - names of relation attributes (columns)
attribute_name = { identifier }

// Relation name - names of relations (tables)
relation_name = { identifier }

// Data type specification
data_type = { integer8_type | integer16_type | integer32_type | integer64_type
            | uinteger8_type | uinteger16_type | uinteger32_type | uinteger64_type
            | float32_type | float64_type
            | string_type | bool_type }

// 8-bit signed integer data type
integer8_type = { "int8" }

// 16-bit signed integer data type
integer16_type = { "int16" }

// 32-bit signed integer data type ("number" is an alias for "int32")
integer32_type = { "int32" | "number" }

// 64-bit signed integer data type
integer64_type = { "int64" }

// 8-bit unsigned integer data type
uinteger8_type = { "uint8" }

// 16-bit unsigned integer data type
uinteger16_type = { "uint16" }

// 32-bit unsigned integer data type ("unsigned" is an alias for "uint32")
uinteger32_type = { "uint32" | "unsigned" }

// 64-bit unsigned integer data type
uinteger64_type = { "uint64" }

// 32-bit floating point data type ("float" is an alias for "f32")
float32_type = { "float" | "f32" }

// 64-bit floating point data type
float64_type = { "f64" }

// Text data type for strings ("symbol" is an alias for "string")
string_type = { "string" | "symbol" }

// Boolean data type
bool_type = { "bool" }

// Constant values
constant = { boolean | float | integer | string } 

// Float with optional sign, must contain a decimal point
// Examples: 1.5, -3.14, +0.0
float = @{ ("+" | "-")? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }

// Integer with optional sign
// Examples: 42, -17, +100
integer = @{ ("+" | "-")? ~ ASCII_DIGIT+ }

// String enclosed in double quotes
// Example: "hello world"
string = { "\"" ~ (!"\"" ~ ANY)* ~ "\"" }

// Placeholder for unused variables (underscore)
// Used when you don't need to reference a variable
placeholder = { "_" } 

// Boolean literal values
boolean = { ("True" | "False") ~ !(ASCII_ALPHANUMERIC | "_") } 

// =============================================================================
// RELATION DECLARATIONS
// =============================================================================
// Relations are the core data structures in FlowLog. They can be:
// - EDB (Extensional Database): Input relations with external data
// - IDB (Intentional Database): Computed relations defined by rules

// Declaration only defines the relation schema
// I/O directives are separate and can appear anywhere in the program
declaration = { 
    ".decl" ~ relation_name ~ "(" ~ attributes_decl? ~ ")"
}

// Input directive for EDB relations - separate from declaration
// Example: .input Relation(IO="file", filename="data.csv", delimiter=",")
input_directive = {
    ".input" ~ relation_name ~ "(" ~ io_params? ~ ")"
}

// Output directive for relations
// Example: .output Relation
// Example: .output Relation(delimiter="|")
output_directive = {
    ".output" ~ relation_name ~ ("(" ~ io_params? ~ ")")?
}

// Shared key=value parameter list used by both input and output directives
io_params = { io_param ~ ("," ~ io_param)* }

// Individual parameter
io_param = { parameter_name ~ "=" ~ parameter_value }

// Parameter name (IO, filename, delimiter, etc.)
parameter_name = { identifier }

// Parameter value (quoted string)
parameter_value = { string }

// Print size directive for relations - separate from declaration
// Example: .printsize Relation
printsize_directive = {
    ".printsize" ~ relation_name
}

// Include directive — pull another .dl file into the current program.
// Path is resolved relative to the including file's directory.
// Example: .include "lib/base.dl"
include_directive = { ".include" ~ string }

// Comma-separated list of attribute declarations
// Example: src: number, dst: number, weight: string
attributes_decl = { attribute_decl ~ ("," ~ attribute_decl)* }

// Single attribute with name and data type
// Example: "src: number" 
attribute_decl = { attribute_name ~ ":" ~ data_type }

// =============================================================================
// USER-DEFINED EXTERNAL FUNCTION DECLARATIONS
// =============================================================================
//   .extern fn   name(param1: type1, ...) -> ret_type

extern_fn = {
    ".extern" ~ "fn" ~ identifier ~
    "(" ~ extern_fn_params? ~ ")" ~
    "->" ~ data_type
}

extern_fn_params = { extern_fn_param ~ ("," ~ extern_fn_param)* }

extern_fn_param = { identifier ~ ":" ~ data_type }

// =============================================================================
// ARITHMETIC EXPRESSIONS
// =============================================================================
// Mathematical expressions with basic operators

// Simple arithmetic without precedence
// Note: "x + y * z" is parsed left-to-right as "(x + y) * z"
arithmetic_expr = { factor ~ (arithmetic_op ~ factor)* }

// Basic elements in arithmetic expressions
factor = { fn_call_expr | constant | variable }

// Arithmetic operators
arithmetic_op = { 
    plus |    // +
    minus |   // -
    times |   // *
    divide |  // /
    modulo |  // %
    cat       // string concatenation
}

// Individual arithmetic operators
plus = { "+" }
minus = { "-" }
times = { "*" }
divide = { "/" }
modulo = { "%" }
cat = { "cat" }

// =============================================================================
// COMPARISON EXPRESSIONS  
// =============================================================================
// Binary comparisons between arithmetic expressions

// Binary comparison between arithmetic expressions
// Examples: x + y > z + w, count(x) = 5
compare_expr = { arithmetic_expr ~ compare_op ~ arithmetic_expr }

// Comparison operators
compare_op = { 
    equal |               // =
    not_equal |           // !=
    greater_equal_than |  // >=
    greater_than |        // >
    less_equal_than |     // <=
    less_than             // <
}

// Individual comparison operators
equal = { "=" } 
not_equal = { "!=" }
greater_equal_than = { ">=" }
greater_than = { ">" }
less_equal_than = { "<=" }
less_than = { "<" }

// =============================================================================
// AGGREGATE EXPRESSIONS
// =============================================================================
// Aggregation functions for computing statistics over groups

// Aggregation operation on a value
// Examples: count(x), avg(x + y), sum(weight)
aggregate_expr = { 
    aggregate_op ~ 
    "(" ~ 
    arithmetic_expr ~
    ")" 
} 

// Supported aggregate operators
aggregate_op = { count | average | sum | min | max }
count = { "count" | "COUNT" }       // Count of elements
average = { "average" | "AVG" }     // Average value
sum = { "sum" | "SUM" }             // Sum of values
min = { "min" | "MIN" }             // Minimum value
max = { "max" | "MAX" }             // Maximum value

// =============================================================================
// ATOMS (RELATION REFERENCES)
// =============================================================================
// Atoms represent relation instances with specific arguments

// Relation with arguments
// Example: edge(x, y), person("Alice", 25)
atom = { 
    relation_name ~ 
    "(" ~ 
    (atom_arg ~ ("," ~ atom_arg)*)? ~ 
    ")" 
}

// Negative atom for negative conditions
// Example: !edge(x, y)
negative_atom = { "!" ~ atom }

// Arguments in atoms can be constants, variables, or placeholders
atom_arg = { constant | variable | placeholder } 

// =============================================================================
// RULES SECTION
// =============================================================================
// Rules define how output relations are computed from input relations
// Format: head :- body.
// Example: reachable(x, z) :- edge(x, y), reachable(y, z).

// A fact is a ground tuple inserted directly into a relation.
// Example: edge(1, 2).
fact = { head ~ "." }

// Rule with optional multi-head and multi-body (Souffle-style).
// Semicolons separate alternative heads and alternative bodies:
//   h1; h2 :- b1; b2.  →  h1:-b1. h1:-b2. h2:-b1. h2:-b2.
rule = { rule_heads ~ ":-" ~ rule_bodies ~ "." }

// One or more semicolon-separated heads
rule_heads = { head ~ (";" ~ head)* }

// One or more semicolon-separated bodies (disjunction)
rule_bodies = { predicates ~ (";" ~ predicates)* }

// Left side of rule (before :-)
// Example: reachable(x, z), stats(count(x))
head = {
    relation_name ~ 
    "(" ~ 
    (head_arg ~ ("," ~ head_arg)*)? ~
    ")" 
}

// Can be aggregate functions or arithmetic expressions (UDF calls are parsed as factors within arithmetic)
head_arg = { aggregate_expr | arithmetic_expr }

// User-defined function call in rule head
// Example: my_udf(x, y + 1)
fn_call_expr = {
    identifier ~
    "(" ~
    (arithmetic_expr ~ ("," ~ arithmetic_expr)*)? ~
    ")"
}

// Comma-separated list of predicates in the rule body
predicates = { predicate ~ ("," ~ predicate)* }

// Types of predicates allowed in rule bodies
// Note: compare_expr must come before atom because compare_expr can start with
// fn_call_expr (e.g., "cost(x) > 5") which atom would greedily consume.
predicate = {
    compare_expr |  // Comparison: x > y, cost(x) > 5
    atom |          // Positive atom: edge(x, y)
    negative_atom | // Negative atom: !edge(x, y)
    fn_call_expr    // UDF predicate: is_valid(x + 1, y)
}


// =============================================================================
// LOOP BLOCK / FIXPOINT BLOCK
// =============================================================================
// A fixpoint block evaluates rules until no new tuples are derived (delta
// becomes empty).  A loop block evaluates rules with bounded/conditional
// iteration.  Both act as hard evaluation barriers — the stratifier cannot
// move rules across their boundaries.
//
// Syntax:
//   fixpoint { rules... }
//   loop <condition> { rules... }
//
// The condition is composed of at most one `while` clause and one `until`
// clause, in either order:
//
//   while { iter_expr }  — keep looping while the iteration constraint holds.
//       iter_expr is one or more "@it op N" sub-conditions (joined by and/or)
//       that describe allowed iteration windows.
//         @it <= 6                  → run iterations 0..=6
//         @it >= 5 and @it <= 10   → run iterations 5..=10
//         @it < 5 or @it > 10      → run iterations 0..=4 and 11..
//
//   until { rel_expr }   — halt when the listed bool relation(s) become true.
//       rel_expr is one or more nullary relation names joined by and/or.
//         until { Done }            → stop when Done holds
//         until { Done1 or Done2 }  → stop when either holds
//
//   Combining while and until:
//     while W until U          — stop as soon as EITHER condition triggers (min)
//     while W or until U       — stop only when BOTH conditions trigger (max)
//
// Relations using replacement (iterative) semantics are declared inside the
// block with `.iterative <relation_name>` directives.  This is scoped per-block:
// the same relation can be iterative in one block and accumulative in another.
//
// Examples:
//   fixpoint { ... }
//   fixpoint { .iterative X  ... }
//   loop while { @it <= 10 } { ... }
//   loop until { Done } { .iterative X  ... }
//   loop while { @it <= 100 } until { Done } { ... }
//   loop while { @it <= 100 } or until { Done } { ... }
//   loop until { Done } while { @it >= 5 and @it <= 10 } { ... }

fixpoint_block = {
    "fixpoint" ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}

loop_block = {
    "loop" ~ loop_condition ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}

// .iterative <relation_name> — marks a relation as using replacement (new_from)
// semantics within this block.  Unlisted relations default to accumulative (new).
// Scoped per-block: the same relation can be iterative in one block and
// accumulative in another.
iterative_directive = { ".iterative" ~ relation_name }

// At most one while clause and one until clause, in either order.
// If both are present without "or", stop when EITHER fires (min / and semantics).
// If joined by "or", stop when BOTH fire (max / or semantics).
loop_condition = {
    loop_while ~ (loop_or? ~ loop_until)?
  | loop_until ~ (loop_or? ~ loop_while)?
}

// while { iter_expr } — loop while iteration is in the specified window(s).
loop_while = { "while" ~ "{" ~ loop_iter_expr ~ "}" }

// until { rel_expr } — stop when the listed relation(s) become true.
loop_until = { "until" ~ "{" ~ loop_stop_group ~ "}" }

// One or more bool relations joined by and/or inside the until clause.
loop_stop_group = { loop_bool_relation ~ (loop_connective ~ loop_bool_relation)* }

// One or more "@it op N" sub-conditions joined by and/or.
// Resolved at parse time into a list of allowed iteration windows Vec<(u16,u16)>.
loop_iter_expr = {
    "@it" ~ loop_iter_compare_op ~ integer
    ~ (loop_connective ~ "@it" ~ loop_iter_compare_op ~ integer)*
}

// Comparison operators for iteration sub-conditions.
// Longer tokens must come before shorter ones to avoid premature match.
loop_iter_compare_op = { "<=" | ">=" | "==" | "<" | ">" }

// A single nullary (boolean) relation name used in an until clause.
loop_bool_relation = { relation_name }

// Boolean connective joining clauses or sub-conditions.
loop_connective = { loop_and | loop_or }
loop_and        = @{ "and" ~ !(ASCII_ALPHANUMERIC | "_") }
loop_or         = @{ "or"  ~ !(ASCII_ALPHANUMERIC | "_") }

// =============================================================================
// WHITESPACE AND COMMENTS
// =============================================================================
// Rules for handling whitespace and comments (ignored during parsing)

// Whitespace and comments are ignored during parsing
WHITESPACE = _{ " " | "\t" | NEWLINE }

// Comment starts with # or // and continues until end of line
// Examples: # This is a comment, // Another comment
COMMENT = _{ ("#" | "//") ~ (!NEWLINE ~ ANY)* }