flowlog-build 0.3.0

Build-time FlowLog compiler for library mode.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
// =============================================================================
// FlowLog GRAMMAR DEFINITION
// =============================================================================
// This grammar defines the syntax for FlowLog, a Datalog language engine
// for declarative data processing and rule-based queries.

// Main grammar entry point
main_grammar = {
    SOI ~
    (declaration | type_alias_decl | extern_fn | include_directive | input_directive | output_directive | printsize_directive | comp_decl | init_decl | fixpoint_block | loop_block | fact | rule)* ~
    EOI
}

// =============================================================================
// BASIC ELEMENTS (IDENTIFIERS, LITERALS, TYPES)
// =============================================================================

// Identifier pattern - must start with letter or underscore, 
// can contain letters, numbers, underscores
// Valid examples: x, _temp, myVar123, _private_var
// Invalid examples: 1x, x-yz (hyphen not allowed)
identifier = @{ "_"? ~ ASCII_ALPHA+ ~ (ASCII_ALPHANUMERIC | "_")* }

// Variable reference - variables used in rules and queries
variable = { identifier }

// Attribute name - names of relation attributes (columns)
attribute_name = { identifier }

// Relation name as it appears at a *defining* site (`.decl`). Always
// a single identifier — dots are reserved for inliner-produced names.
relation_name = @{ identifier }

// Relation reference at a *use* site (atom, head, directive target,
// loop condition, iterative list). Accepts the dotted form so user
// code can refer to inliner-produced relations like `c.Holds`.
relation_ref = @{ identifier ~ ("." ~ identifier)* }

// Type at a `.decl` attribute. `data_type` must come first so reserved
// keywords (`number`, `symbol`, …) win over the catch-all identifier.
type_ref = { data_type | alias_name }

// A user-declared type name at a *use* site. Resolved against the
// [`TypeRegistry`] after parsing — includes top-level `.type` aliases
// and per-instance prefixed types registered by the component inliner
// (`a.cfg.context`, etc.). Dotted forms support comp-type-parameter
// references like `.decl R(c: cfg.Context)`.
alias_name = @{ identifier ~ ("." ~ identifier)* }

// Data type specification
data_type = { integer8_type | integer16_type | integer32_type | integer64_type
            | uinteger8_type | uinteger16_type | uinteger32_type | uinteger64_type
            | float32_type | float64_type
            | string_type | bool_type }

// 8-bit signed integer data type
integer8_type = { "int8" }

// 16-bit signed integer data type
integer16_type = { "int16" }

// 32-bit signed integer data type ("number" is an alias for "int32")
integer32_type = { "int32" | "number" }

// 64-bit signed integer data type
integer64_type = { "int64" }

// 8-bit unsigned integer data type
uinteger8_type = { "uint8" }

// 16-bit unsigned integer data type
uinteger16_type = { "uint16" }

// 32-bit unsigned integer data type ("unsigned" is an alias for "uint32")
uinteger32_type = { "uint32" | "unsigned" }

// 64-bit unsigned integer data type
uinteger64_type = { "uint64" }

// 32-bit floating point data type ("float" is an alias for "f32")
float32_type = { "float" | "f32" }

// 64-bit floating point data type
float64_type = { "f64" }

// Text data type for strings ("symbol" is an alias for "string")
string_type = { "string" | "symbol" }

// Boolean data type
bool_type = { "bool" }

// Constant values
constant = { boolean | float | integer | string } 

// Float with optional sign, must contain a decimal point
// Examples: 1.5, -3.14, +0.0
float = @{ ("+" | "-")? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }

// Integer with optional sign
// Examples: 42, -17, +100
integer = @{ ("+" | "-")? ~ ASCII_DIGIT+ }

// String enclosed in double quotes
// Example: "hello world"
string = { "\"" ~ (!"\"" ~ ANY)* ~ "\"" }

// Placeholder for unused variables (underscore)
// Used when you don't need to reference a variable
placeholder = { "_" } 

// Boolean literal values
boolean = { ("True" | "False") ~ !(ASCII_ALPHANUMERIC | "_") } 

// =============================================================================
// RELATION DECLARATIONS
// =============================================================================
// Relations are the core data structures in FlowLog. They can be:
// - EDB (Extensional Database): Input relations with external data
// - IDB (Intentional Database): Computed relations defined by rules

// Declaration only defines the relation schema. I/O directives are
// separate and can appear anywhere in the program. Optional trailing
// `overridable` is only meaningful inside a `.comp` body.
declaration = {
    ".decl" ~ relation_name ~ "(" ~ attributes_decl? ~ ")" ~ overridable_kw?
}

// Lets a subcomponent's `.override` replace this relation's rules.
overridable_kw = @{ "overridable" ~ !(ASCII_ALPHANUMERIC | "_") }

// Input directive for EDB relations - separate from declaration
// Example: .input Relation(IO="file", filename="data.csv", delimiter=",")
input_directive = {
    ".input" ~ relation_ref ~ "(" ~ io_params? ~ ")"
}

// Output directive for relations
// Example: .output Relation
// Example: .output Relation(delimiter="|")
output_directive = {
    ".output" ~ relation_ref ~ ("(" ~ io_params? ~ ")")?
}

// Shared key=value parameter list used by both input and output directives
io_params = { io_param ~ ("," ~ io_param)* }

// Individual parameter
io_param = { parameter_name ~ "=" ~ parameter_value }

// Parameter name (IO, filename, delimiter, etc.)
parameter_name = { identifier }

// Parameter value (quoted string)
parameter_value = { string }

// Print size directive for relations - separate from declaration
// Example: .printsize Relation
printsize_directive = {
    ".printsize" ~ relation_ref
}

// Include directive — pull another .dl file into the current program.
// Path is resolved relative to the including file's directory.
// Example: .include "lib/base.dl"
include_directive = { ".include" ~ string }

// Comma-separated list of attribute declarations
// Example: src: number, dst: number, weight: string
attributes_decl = { attribute_decl ~ ("," ~ attribute_decl)* }

// Single attribute with name and type. Type is either a built-in primitive
// or a `.type`-declared alias name (resolved transitively at parse time).
// Example: "src: number" or "src: NodeId" where `NodeId` is a `.type` alias.
attribute_decl = { attribute_name ~ ":" ~ type_ref }

// =============================================================================
// USER-DEFINED EXTERNAL FUNCTION DECLARATIONS
// =============================================================================
//   .extern fn   name(param1: type1, ...) -> ret_type

extern_fn = {
    ".extern" ~ "fn" ~ identifier ~
    "(" ~ extern_fn_params? ~ ")" ~
    "->" ~ data_type
}

extern_fn_params = { extern_fn_param ~ ("," ~ extern_fn_param)* }

extern_fn_param = { identifier ~ ":" ~ data_type }

// =============================================================================
// ARITHMETIC EXPRESSIONS
// =============================================================================
// Mathematical expressions with basic operators

// Simple arithmetic without precedence
// Note: "x + y * z" is parsed left-to-right as "(x + y) * z"
arithmetic_expr = { factor ~ (arithmetic_op ~ factor)* }

// `as_cast` first so `as` wins over a UDF named `as` (reserved keyword).
factor = { as_cast | builtin_fn_call | fn_call_expr | constant | variable }

// `as(factor, TargetType)` — explicit cast, runtime no-op. Inner is a
// single `factor` (not `arithmetic_expr`) so the typechecker can lower
// `as(...)` away after validation; for `as(x + 1, T)`, bind first.
as_cast = { as_kw ~ "(" ~ factor ~ "," ~ type_ref ~ ")" }
as_kw = _{ "as" ~ !(ASCII_ALPHANUMERIC | "_") }

// Arithmetic operators
arithmetic_op = { 
    plus |    // +
    minus |   // -
    times |   // *
    divide |  // /
    modulo |  // %
    cat       // string concatenation
}

// Individual arithmetic operators
plus = { "+" }
minus = { "-" }
times = { "*" }
divide = { "/" }
modulo = { "%" }
cat = { "cat" }

// =============================================================================
// BUILT-IN FUNCTIONS
// =============================================================================
// Reserved keyword call sites lowered inline by the codegen — they are
// NOT user functions and share no infrastructure with `.extern fn`

builtin_fn_call = {
    builtin_op ~ "(" ~ (arithmetic_expr ~ ("," ~ arithmetic_expr)*)? ~ ")"
}

builtin_op = {
    strlen_op | substr_op | ord_op | contains_op | to_string_op | to_number_op
}

strlen_op    = @{ "strlen"    ~ !(ASCII_ALPHANUMERIC | "_") }
substr_op    = @{ "substr"    ~ !(ASCII_ALPHANUMERIC | "_") }
ord_op       = @{ "ord"       ~ !(ASCII_ALPHANUMERIC | "_") }
contains_op  = @{ "contains"  ~ !(ASCII_ALPHANUMERIC | "_") }
to_string_op = @{ "to_string" ~ !(ASCII_ALPHANUMERIC | "_") }
to_number_op = @{ "to_number" ~ !(ASCII_ALPHANUMERIC | "_") }

// =============================================================================
// COMPARISON EXPRESSIONS  
// =============================================================================
// Binary comparisons between arithmetic expressions

// Binary comparison between arithmetic expressions
// Examples: x + y > z + w, count(x) = 5
compare_expr = { arithmetic_expr ~ compare_op ~ arithmetic_expr }

// Comparison operators
compare_op = { 
    equal |               // =
    not_equal |           // !=
    greater_equal_than |  // >=
    greater_than |        // >
    less_equal_than |     // <=
    less_than             // <
}

// Individual comparison operators
equal = { "=" } 
not_equal = { "!=" }
greater_equal_than = { ">=" }
greater_than = { ">" }
less_equal_than = { "<=" }
less_than = { "<" }

// =============================================================================
// AGGREGATE EXPRESSIONS
// =============================================================================
// Aggregation functions for computing statistics over groups

// Aggregation operation on a value
// Examples: count(x), avg(x + y), sum(weight)
aggregate_expr = { 
    aggregate_op ~ 
    "(" ~ 
    arithmetic_expr ~
    ")" 
} 

// Supported aggregate operators
aggregate_op = { count | average | sum | min | max }
count = { "count" | "COUNT" }       // Count of elements
average = { "average" | "AVG" }     // Average value
sum = { "sum" | "SUM" }             // Sum of values
min = { "min" | "MIN" }             // Minimum value
max = { "max" | "MAX" }             // Maximum value

// =============================================================================
// ATOMS (RELATION REFERENCES)
// =============================================================================
// Atoms represent relation instances with specific arguments

// Relation with arguments
// Example: edge(x, y), person("Alice", 25)
atom = {
    relation_ref ~
    "(" ~
    (atom_arg ~ ("," ~ atom_arg)*)? ~
    ")"
}

// Negative atom for negative conditions
// Example: !edge(x, y)
negative_atom = { "!" ~ atom }

// Arguments in atoms can be constants, variables, or placeholders
atom_arg = { constant | variable | placeholder } 

// =============================================================================
// RULES SECTION
// =============================================================================
// Rules define how output relations are computed from input relations
// Format: head :- body.
// Example: reachable(x, z) :- edge(x, y), reachable(y, z).

// A fact is a ground tuple inserted directly into a relation.
// Example: edge(1, 2).
fact = { head ~ "." }

// Rule with optional multi-head and multi-body (Souffle-style).
// Commas separate alternative heads, semicolons seperate alternative bodies:
//   h1, h2 :- b1; b2.  →  h1:-b1. h1:-b2. h2:-b1. h2:-b2.
rule = { rule_heads ~ ":-" ~ rule_bodies ~ "." }

// One or more commas-separated heads
rule_heads = { head ~ ("," ~ head)* }

// One or more semicolon-separated bodies (disjunction)
rule_bodies = { predicates ~ (";" ~ predicates)* }

// Left side of rule (before :-)
// Example: reachable(x, z), stats(count(x))
head = {
    relation_ref ~
    "(" ~
    (head_arg ~ ("," ~ head_arg)*)? ~
    ")"
}

// Can be aggregate functions or arithmetic expressions (UDF calls are parsed as factors within arithmetic)
head_arg = { aggregate_expr | arithmetic_expr }

// User-defined function call in rule head
// Example: my_udf(x, y + 1)
fn_call_expr = {
    identifier ~
    "(" ~
    (arithmetic_expr ~ ("," ~ arithmetic_expr)*)? ~
    ")"
}

// Comma-separated list of predicates in the rule body
predicates = { predicate ~ ("," ~ predicate)* }

// Types of predicates allowed in rule bodies
// Note: compare_expr must come before atom because compare_expr can start with
// fn_call_expr (e.g., "cost(x) > 5") which atom would greedily consume.
predicate = {
    compare_expr |  // Comparison: x > y, cost(x) > 5
    atom |          // Positive atom: edge(x, y)
    negative_atom | // Negative atom: !edge(x, y)
    disjunction_group | // Nested disjunction: (A ; B)
    fn_call_expr    // UDF predicate: is_valid(x + 1, y)
}

// Parenthesised disjunction inside a body: `(A ; B)`.
disjunction_group = { "(" ~ rule_bodies ~ ")" }


// =============================================================================
// LOOP BLOCK / FIXPOINT BLOCK
// =============================================================================
// A fixpoint block evaluates rules until no new tuples are derived (delta
// becomes empty).  A loop block evaluates rules with bounded/conditional
// iteration.  Both act as hard evaluation barriers — the stratifier cannot
// move rules across their boundaries.
//
// Syntax:
//   fixpoint { rules... }
//   loop <condition> { rules... }
//
// The condition is composed of at most one `while` clause and one `until`
// clause, in either order:
//
//   while { iter_expr }  — keep looping while the iteration constraint holds.
//       iter_expr is one or more "@it op N" sub-conditions (joined by and/or)
//       that describe allowed iteration windows.
//         @it <= 6                  → run iterations 0..=6
//         @it >= 5 and @it <= 10   → run iterations 5..=10
//         @it < 5 or @it > 10      → run iterations 0..=4 and 11..
//
//   until { rel_expr }   — halt when the listed bool relation(s) become true.
//       rel_expr is one or more nullary relation names joined by and/or.
//         until { Done }            → stop when Done holds
//         until { Done1 or Done2 }  → stop when either holds
//
//   Combining while and until:
//     while W until U          — stop as soon as EITHER condition triggers (min)
//     while W or until U       — stop only when BOTH conditions trigger (max)
//
// Relations using replacement (iterative) semantics are declared inside the
// block with `.iterative <relation_name>` directives.  This is scoped per-block:
// the same relation can be iterative in one block and accumulative in another.
//
// Examples:
//   fixpoint { ... }
//   fixpoint { .iterative X  ... }
//   loop while { @it <= 10 } { ... }
//   loop until { Done } { .iterative X  ... }
//   loop while { @it <= 100 } until { Done } { ... }
//   loop while { @it <= 100 } or until { Done } { ... }
//   loop until { Done } while { @it >= 5 and @it <= 10 } { ... }

fixpoint_block = {
    "fixpoint" ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}

loop_block = {
    "loop" ~ loop_condition ~ "{" ~ iterative_directive* ~ rule* ~ "}"
}

// .iterative <relation_name> — marks a relation as using replacement (new_from)
// semantics within this block.  Unlisted relations default to accumulative (new).
// Scoped per-block: the same relation can be iterative in one block and
// accumulative in another.
iterative_directive = { ".iterative" ~ relation_ref }

// At most one while clause and one until clause, in either order.
// If both are present without "or", stop when EITHER fires (min / and semantics).
// If joined by "or", stop when BOTH fire (max / or semantics).
loop_condition = {
    loop_while ~ (loop_or? ~ loop_until)?
  | loop_until ~ (loop_or? ~ loop_while)?
}

// while { iter_expr } — loop while iteration is in the specified window(s).
loop_while = { "while" ~ "{" ~ loop_iter_expr ~ "}" }

// until { rel_expr } — stop when the listed relation(s) become true.
loop_until = { "until" ~ "{" ~ loop_stop_group ~ "}" }

// One or more bool relations joined by and/or inside the until clause.
loop_stop_group = { loop_bool_relation ~ (loop_connective ~ loop_bool_relation)* }

// One or more "@it op N" sub-conditions joined by and/or.
// Resolved at parse time into a list of allowed iteration windows Vec<(u16,u16)>.
loop_iter_expr = {
    "@it" ~ loop_iter_compare_op ~ integer
    ~ (loop_connective ~ "@it" ~ loop_iter_compare_op ~ integer)*
}

// Comparison operators for iteration sub-conditions.
// Longer tokens must come before shorter ones to avoid premature match.
loop_iter_compare_op = { "<=" | ">=" | "==" | "<" | ">" }

// A single nullary (boolean) relation name used in an until clause.
loop_bool_relation = { relation_ref }

// Boolean connective joining clauses or sub-conditions.
loop_connective = { loop_and | loop_or }
loop_and        = @{ "and" ~ !(ASCII_ALPHANUMERIC | "_") }
loop_or         = @{ "or"  ~ !(ASCII_ALPHANUMERIC | "_") }

// =============================================================================
// TYPE DECLARATIONS (.type)
// =============================================================================
//   .type NodeId  = number    alias (transparent equivalence)
//   .type UserId <: number    subtype (distinct compile-time identity)
//
// Both forms are global, position-independent, and resolved at parse
// time to a primitive root. Subtypes are zero-cost: only the type-
// checker distinguishes them.

type_alias_decl = { ".type" ~ identifier ~ type_decl_op ~ type_ref }
type_decl_op = { subtype_op | alias_op }
subtype_op = { "<:" }
alias_op = { "=" }

// =============================================================================
// COMPONENT DECLARATIONS (.comp / .init)
// =============================================================================
//
//   .comp Container<T> {
//     .decl Holds(x: T)
//     Holds(x) :- Source(x).
//   }
//   .init c = Container<symbol>
//
// `.comp` declares a parameterized template; `.init` instantiates it
// under a name (e.g. `c`). The component inliner pass eliminates both
// before typechecking, leaving only prefixed primitive forms
// (`.decl c.holds(x: symbol)` etc.).

comp_decl = {
    ".comp" ~ identifier ~ comp_type_params? ~ comp_supertype? ~ "{" ~ comp_body_item* ~ "}"
}

// `<T>` or `<T, U>` — names of types this component is parameterized by.
comp_type_params = { "<" ~ identifier ~ ("," ~ identifier)* ~ ">" }

// `: BaseComp<args>` — inherit and splice another component's body.
comp_supertype = { ":" ~ identifier ~ comp_type_args? }

// `<symbol, NodeId>` — concrete type arguments. `type_ref` so dotted
// forms like `cfg.Context` work.
comp_type_args = { "<" ~ type_ref ~ ("," ~ type_ref)* ~ ">" }

// `.init c = Container<symbol>`
init_decl = { ".init" ~ identifier ~ "=" ~ identifier ~ comp_type_args? }

// Items allowed inside a component body. Mirrors the top-level grammar
// minus fixpoint/loop blocks and include directives (which are
// resolved at file-load time), plus `override_directive` which is
// comp-body-only.
comp_body_item = {
    declaration | type_alias_decl | input_directive | output_directive
    | printsize_directive | rule | fact | comp_decl | init_decl
    | override_directive
}

// Drop inherited rules and facts for `Name`; the parent's `.decl Name`
// (which must be `overridable`) is kept.
override_directive = { ".override" ~ identifier }

// =============================================================================
// WHITESPACE AND COMMENTS
// =============================================================================
// Rules for handling whitespace and comments (ignored during parsing)

// Whitespace and comments are ignored during parsing
WHITESPACE = _{ " " | "\t" | NEWLINE }

// Comment starts with # or // and continues until end of line
// Examples: # This is a comment, // Another comment
COMMENT = _{ ("#" | "//") ~ (!NEWLINE ~ ANY)* }