flowlog-build 0.2.2

Build-time FlowLog compiler for library mode.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
// =============================================================================
// FlowLog GRAMMAR DEFINITION
// =============================================================================
// This grammar defines the syntax for FlowLog, a Datalog language engine
// for declarative data processing and rule-based queries.

// Main grammar entry point
main_grammar = {
    SOI ~
    (declaration | extern_fn | include_directive | input_directive | output_directive | printsize_directive | fixpoint_block | loop_block | fact | rule)* ~
    EOI
}

// =============================================================================
// BASIC ELEMENTS (IDENTIFIERS, LITERALS, TYPES)
// =============================================================================

// Identifier pattern - must start with letter or underscore, 
// can contain letters, numbers, underscores
// Valid examples: x, _temp, myVar123, _private_var
// Invalid examples: 1x, x-yz (hyphen not allowed)
identifier = @{ "_"? ~ ASCII_ALPHA+ ~ (ASCII_ALPHANUMERIC | "_")* }

// Variable reference - variables used in rules and queries
variable = { identifier }

// Attribute name - names of relation attributes (columns)
attribute_name = { identifier }

// Relation name - names of relations (tables)
relation_name = { identifier }

// Data type specification
data_type = { integer8_type | integer16_type | integer32_type | integer64_type
            | uinteger8_type | uinteger16_type | uinteger32_type | uinteger64_type
            | float32_type | float64_type
            | string_type | bool_type }

// 8-bit signed integer data type
integer8_type = { "int8" }

// 16-bit signed integer data type
integer16_type = { "int16" }

// 32-bit signed integer data type ("number" is an alias for "int32")
integer32_type = { "int32" | "number" }

// 64-bit signed integer data type
integer64_type = { "int64" }

// 8-bit unsigned integer data type
uinteger8_type = { "uint8" }

// 16-bit unsigned integer data type
uinteger16_type = { "uint16" }

// 32-bit unsigned integer data type ("unsigned" is an alias for "uint32")
uinteger32_type = { "uint32" | "unsigned" }

// 64-bit unsigned integer data type
uinteger64_type = { "uint64" }

// 32-bit floating point data type ("float" is an alias for "f32")
float32_type = { "float" | "f32" }

// 64-bit floating point data type
float64_type = { "f64" }

// Text data type for strings ("symbol" is an alias for "string")
string_type = { "string" | "symbol" }

// Boolean data type
bool_type = { "bool" }

// Constant values
constant = { boolean | float | integer | string } 

// Float with optional sign, must contain a decimal point
// Examples: 1.5, -3.14, +0.0
float = @{ ("+" | "-")? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }

// Integer with optional sign
// Examples: 42, -17, +100
integer = @{ ("+" | "-")? ~ ASCII_DIGIT+ }

// String enclosed in double quotes
// Example: "hello world"
string = { "\"" ~ (!"\"" ~ ANY)* ~ "\"" }

// Placeholder for unused variables (underscore)
// Used when you don't need to reference a variable
placeholder = { "_" } 

// Boolean literal values
boolean = { ("True" | "False") ~ !(ASCII_ALPHANUMERIC | "_") } 

// =============================================================================
// RELATION DECLARATIONS
// =============================================================================
// Relations are the core data structures in FlowLog. They can be:
// - EDB (Extensional Database): Input relations with external data
// - IDB (Intentional Database): Computed relations defined by rules

// Declaration only defines the relation schema
// I/O directives are separate and can appear anywhere in the program
declaration = { 
    ".decl" ~ relation_name ~ "(" ~ attributes_decl? ~ ")"
}

// Input directive for EDB relations - separate from declaration
// Example: .input Relation(IO="file", filename="data.csv", delimiter=",")
input_directive = {
    ".input" ~ relation_name ~ "(" ~ io_params? ~ ")"
}

// Output directive for relations
// Example: .output Relation
// Example: .output Relation(delimiter="|")
output_directive = {
    ".output" ~ relation_name ~ ("(" ~ io_params? ~ ")")?
}

// Shared key=value parameter list used by both input and output directives
io_params = { io_param ~ ("," ~ io_param)* }

// Individual parameter
io_param = { parameter_name ~ "=" ~ parameter_value }

// Parameter name (IO, filename, delimiter, etc.)
parameter_name = { identifier }

// Parameter value (quoted string)
parameter_value = { string }

// Print size directive for relations - separate from declaration
// Example: .printsize Relation
printsize_directive = {
    ".printsize" ~ relation_name
}

// Include directive — pull another .dl file into the current program.
// Path is resolved relative to the including file's directory.
// Example: .include "lib/base.dl"
include_directive = { ".include" ~ string }

// Comma-separated list of attribute declarations
// Example: src: number, dst: number, weight: string
attributes_decl = { attribute_decl ~ ("," ~ attribute_decl)* }

// Single attribute with name and data type
// Example: "src: number" 
attribute_decl = { attribute_name ~ ":" ~ data_type }

// =============================================================================
// USER-DEFINED EXTERNAL FUNCTION DECLARATIONS
// =============================================================================
//   .extern fn   name(param1: type1, ...) -> ret_type

extern_fn = {
    ".extern" ~ "fn" ~ identifier ~
    "(" ~ extern_fn_params? ~ ")" ~
    "->" ~ data_type
}

extern_fn_params = { extern_fn_param ~ ("," ~ extern_fn_param)* }

extern_fn_param = { identifier ~ ":" ~ data_type }

// =============================================================================
// ARITHMETIC EXPRESSIONS
// =============================================================================
// Mathematical expressions with basic operators

// Simple arithmetic without precedence
// Note: "x + y * z" is parsed left-to-right as "(x + y) * z"
arithmetic_expr = { factor ~ (arithmetic_op ~ factor)* }

// Basic elements in arithmetic expressions
factor = { fn_call_expr | constant | variable }

// Arithmetic operators
arithmetic_op = { 
    plus |    // +
    minus |   // -
    times |   // *
    divide |  // /
    modulo |  // %
    cat       // string concatenation
}

// Individual arithmetic operators
plus = { "+" }
minus = { "-" }
times = { "*" }
divide = { "/" }
modulo = { "%" }
cat = { "cat" }

// =============================================================================
// COMPARISON EXPRESSIONS  
// =============================================================================
// Binary comparisons between arithmetic expressions

// Binary comparison between arithmetic expressions
// Examples: x + y > z + w, count(x) = 5
compare_expr = { arithmetic_expr ~ compare_op ~ arithmetic_expr }

// Comparison operators
compare_op = { 
    equal |               // =
    not_equal |           // !=
    greater_equal_than |  // >=
    greater_than |        // >
    less_equal_than |     // <=
    less_than             // <
}

// Individual comparison operators
equal = { "=" } 
not_equal = { "!=" }
greater_equal_than = { ">=" }
greater_than = { ">" }
less_equal_than = { "<=" }
less_than = { "<" }

// =============================================================================
// AGGREGATE EXPRESSIONS
// =============================================================================
// Aggregation functions for computing statistics over groups

// Aggregation operation on a value
// Examples: count(x), avg(x + y), sum(weight)
aggregate_expr = { 
    aggregate_op ~ 
    "(" ~ 
    arithmetic_expr ~
    ")" 
} 

// Supported aggregate operators
aggregate_op = { count | average | sum | min | max }
count = { "count" | "COUNT" }       // Count of elements
average = { "average" | "AVG" }     // Average value
sum = { "sum" | "SUM" }             // Sum of values
min = { "min" | "MIN" }             // Minimum value
max = { "max" | "MAX" }             // Maximum value

// =============================================================================
// ATOMS (RELATION REFERENCES)
// =============================================================================
// Atoms represent relation instances with specific arguments

// Relation with arguments
// Example: edge(x, y), person("Alice", 25)
atom = { 
    relation_name ~ 
    "(" ~ 
    (atom_arg ~ ("," ~ atom_arg)*)? ~ 
    ")" 
}

// Negative atom for negative conditions
// Example: !edge(x, y)
negative_atom = { "!" ~ atom }

// Arguments in atoms can be constants, variables, or placeholders
atom_arg = { constant | variable | placeholder } 

// =============================================================================
// RULES SECTION
// =============================================================================
// Rules define how output relations are computed from input relations
// Format: head :- body.
// Example: reachable(x, z) :- edge(x, y), reachable(y, z).

// A fact is a ground tuple inserted directly into a relation.
// Example: edge(1, 2).
fact = { head ~ "." }

// Rule with optional multi-head and multi-body (Souffle-style).
// Semicolons separate alternative heads and alternative bodies:
//   h1; h2 :- b1; b2.  →  h1:-b1. h1:-b2. h2:-b1. h2:-b2.
rule = { rule_heads ~ ":-" ~ rule_bodies ~ "." }

// One or more semicolon-separated heads
rule_heads = { head ~ (";" ~ head)* }

// One or more semicolon-separated bodies (disjunction)
rule_bodies = { predicates ~ (";" ~ predicates)* }

// Left side of rule (before :-)
// Example: reachable(x, z), stats(count(x))
head = {
    relation_name ~ 
    "(" ~ 
    (head_arg ~ ("," ~ head_arg)*)? ~
    ")" 
}

// Can be aggregate functions or arithmetic expressions (UDF calls are parsed as factors within arithmetic)
head_arg = { aggregate_expr | arithmetic_expr }

// User-defined function call in rule head
// Example: my_udf(x, y + 1)
fn_call_expr = {
    identifier ~
    "(" ~
    (arithmetic_expr ~ ("," ~ arithmetic_expr)*)? ~
    ")"
}

// Comma-separated list of predicates in the rule body
predicates = { predicate ~ ("," ~ predicate)* }

// Types of predicates allowed in rule bodies
// Note: compare_expr must come before atom because compare_expr can start with
// fn_call_expr (e.g., "cost(x) > 5") which atom would greedily consume.
predicate = {
    compare_expr |  // Comparison: x > y, cost(x) > 5
    atom |          // Positive atom: edge(x, y)
    negative_atom | // Negative atom: !edge(x, y)
    fn_call_expr    // UDF predicate: is_valid(x + 1, y)
}


// =============================================================================
// LOOP BLOCK / FIXPOINT BLOCK
// =============================================================================
// A fixpoint block evaluates rules until no new tuples are derived (delta
// becomes empty).  A loop block evaluates rules with bounded/conditional
// iteration.  Both act as hard evaluation barriers — the stratifier cannot
// move rules across their boundaries.
//
// Syntax:
//   fixpoint { rules... }
//   loop <condition> { rules... }
//
// The condition is composed of at most one `while` clause and one `until`
// clause, in either order:
//
//   while { iter_expr }  — keep looping while the iteration constraint holds.
//       iter_expr is one or more "@it op N" sub-conditions (joined by and/or)
//       that describe allowed iteration windows.
//         @it <= 6                  → run iterations 0..=6
//         @it >= 5 and @it <= 10   → run iterations 5..=10
//         @it < 5 or @it > 10      → run iterations 0..=4 and 11..
//
//   until { rel_expr }   — halt when the listed bool relation(s) become true.
//       rel_expr is one or more nullary relation names joined by and/or.
//         until { Done }            → stop when Done holds
//         until { Done1 or Done2 }  → stop when either holds
//
//   Combining while and until:
//     while W until U          — stop as soon as EITHER condition triggers (min)
//     while W or until U       — stop only when BOTH conditions trigger (max)
//
// Relations using replacement (iterative) semantics are declared inside the
// block with `.iterative <relation_name>` directives.  This is scoped per-block:
// the same relation can be iterative in one block and accumulative in another.
//
// Examples:
//   fixpoint { ... }
//   fixpoint { .iterative X  ... }
//   loop while { @it <= 10 } { ... }
//   loop until { Done } { .iterative X  ... }
//   loop while { @it <= 100 } until { Done } { ... }
//   loop while { @it <= 100 } or until { Done } { ... }
//   loop until { Done } while { @it >= 5 and @it <= 10 } { ... }

fixpoint_block = {
    "fixpoint" ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}

loop_block = {
    "loop" ~ loop_condition ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}

// .iterative <relation_name> — marks a relation as using replacement (new_from)
// semantics within this block.  Unlisted relations default to accumulative (new).
// Scoped per-block: the same relation can be iterative in one block and
// accumulative in another.
iterative_directive = { ".iterative" ~ relation_name }

// At most one while clause and one until clause, in either order.
// If both are present without "or", stop when EITHER fires (min / and semantics).
// If joined by "or", stop when BOTH fire (max / or semantics).
loop_condition = {
    loop_while ~ (loop_or? ~ loop_until)?
  | loop_until ~ (loop_or? ~ loop_while)?
}

// while { iter_expr } — loop while iteration is in the specified window(s).
loop_while = { "while" ~ "{" ~ loop_iter_expr ~ "}" }

// until { rel_expr } — stop when the listed relation(s) become true.
loop_until = { "until" ~ "{" ~ loop_stop_group ~ "}" }

// One or more bool relations joined by and/or inside the until clause.
loop_stop_group = { loop_bool_relation ~ (loop_connective ~ loop_bool_relation)* }

// One or more "@it op N" sub-conditions joined by and/or.
// Resolved at parse time into a list of allowed iteration windows Vec<(u16,u16)>.
loop_iter_expr = {
    "@it" ~ loop_iter_compare_op ~ integer
    ~ (loop_connective ~ "@it" ~ loop_iter_compare_op ~ integer)*
}

// Comparison operators for iteration sub-conditions.
// Longer tokens must come before shorter ones to avoid premature match.
loop_iter_compare_op = { "<=" | ">=" | "==" | "<" | ">" }

// A single nullary (boolean) relation name used in an until clause.
loop_bool_relation = { relation_name }

// Boolean connective joining clauses or sub-conditions.
loop_connective = { loop_and | loop_or }
loop_and        = @{ "and" ~ !(ASCII_ALPHANUMERIC | "_") }
loop_or         = @{ "or"  ~ !(ASCII_ALPHANUMERIC | "_") }

// =============================================================================
// WHITESPACE AND COMMENTS
// =============================================================================
// Rules for handling whitespace and comments (ignored during parsing)

// Whitespace and comments are ignored during parsing
WHITESPACE = _{ " " | "\t" | NEWLINE }

// Comment starts with # or // and continues until end of line
// Examples: # This is a comment, // Another comment
COMMENT = _{ ("#" | "//") ~ (!NEWLINE ~ ANY)* }