xlog-logic 0.5.0

Parser, compiler, and optimizer for XLOG logic programs
Documentation
// XLOG Grammar for Datalog-style logic programs

WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ "//" ~ (!"\n" ~ ANY)* }

// Identifiers
ident = @{ ASCII_ALPHA_LOWER ~ (ASCII_ALPHANUMERIC | "_")* }
// Variables: uppercase letter followed by alphanumerics, OR anonymous wildcard "_"
anonymous = @{ "_" }
variable = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")* }
var_or_anon = { anonymous | variable }

// Module path: graph or utils/math or deep/nested/module
module_path = @{ ident ~ ("/" ~ ident)* }

// Import list: {edge, reach, node}
import_list = { "{" ~ ident ~ ("," ~ ident)* ~ "}" }

// Use statement: use graph. or use utils/math::{abs, clamp}.
use_stmt = { "use" ~ module_path ~ ("::" ~ import_list)? ~ "." }

// Literals
integer = @{ "-"? ~ ASCII_DIGIT+ }
float_num = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
string_lit = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" }
bool_lit = { "true" | "false" }

// Probabilistic annotations
prob_num = @{ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? }

// Terms
term = { var_or_anon | float_num | integer | string_lit | ident }

// Atoms
atom = { ident ~ "(" ~ term_list? ~ ")" }
term_list = { term ~ ("," ~ term)* }

// Aggregate expressions
agg_op = { "count" | "sum" | "min" | "max" | "logsumexp" }
aggregate = { agg_op ~ "(" ~ variable ~ ")" }
agg_term = { aggregate | term }

// Comparison operators
cmp_op = { "==" | "!=" | "<=" | ">=" | "<" | ">" | "=" }
comparison = { term ~ cmp_op ~ term }

// Arithmetic operators
arith_op_mul = { "*" | "/" | "%" }
arith_op_add = { "+" | "-" }

// Built-in functions
builtin_fn = { "abs" | "min" | "max" | "pow" | "cast" }

// Function call: square(X) or dist(X1, Y1, X2, Y2)
func_call = { ident ~ "(" ~ (arith_expr ~ ("," ~ arith_expr)*)? ~ ")" }

// Arithmetic expressions with precedence
arith_primary = {
    builtin_fn ~ "(" ~ arith_expr ~ ("," ~ (arith_expr | type_spec))* ~ ")" |
    func_call |
    "(" ~ arith_expr ~ ")" |
    variable |
    float_num |
    integer
}

arith_term = { arith_primary ~ (arith_op_mul ~ arith_primary)* }
arith_expr = { arith_term ~ (arith_op_add ~ arith_term)* }

// The 'is' construct
is_expr = { variable ~ "is" ~ arith_expr }

// Body literals
negated_atom = { "not" ~ atom }
body_literal = { negated_atom | atom | comparison | is_expr }
body = { body_literal ~ ("," ~ body_literal)* }

// Head with optional aggregate
head_term = { agg_term | term }
head_term_list = { head_term ~ ("," ~ head_term)* }
head = { ident ~ "(" ~ head_term_list? ~ ")" }

// Rules and facts
rule_def = { head ~ ":-" ~ body ~ "." }
fact = { atom ~ "." }
constraint = { ":-" ~ body ~ "." }

// Probabilistic facts / annotated disjunctions
prob_choice = { prob_num ~ "::" ~ atom }
prob_fact = { prob_choice ~ "." }
annotated_disjunction = { prob_choice ~ (";" ~ prob_choice)+ ~ "." }

// Neural predicate declarations
// nn(network, [inputs], output, [labels]) :: pred(args).
// nn(network, [inputs], embedding) :: pred(args).  (embedding mode, no labels)
neural_label = { integer | ident }
neural_label_list = { "[" ~ (neural_label ~ ("," ~ neural_label)*)? ~ "]" }
neural_input_list = { "[" ~ (variable ~ ("," ~ variable)*)? ~ "]" }
neural_pred_decl = {
    "nn" ~ "(" ~ ident ~ "," ~ neural_input_list ~ "," ~ variable ~ ("," ~ neural_label_list)? ~ ")"
    ~ "::" ~ atom ~ "."
}

// Queries
query = { "?-" ~ atom ~ "." }
prob_query = { "query" ~ "(" ~ atom ~ ")" ~ "." }
evidence_stmt = { "evidence" ~ "(" ~ atom ~ "," ~ bool_lit ~ ")" ~ "." }

// Pragmas / directives
prob_engine_value = { "exact_ddnnf" | "mc" }
prob_cache_value = { "on" | "off" }
pragma_prob_engine = { "#pragma" ~ "prob_engine" ~ "=" ~ prob_engine_value }
pragma_prob_cache = { "#pragma" ~ "prob_cache" ~ "=" ~ prob_cache_value }
pragma_max_recursion = { "#pragma" ~ "max_recursion_depth" ~ "=" ~ integer }
pragma = { pragma_prob_engine | pragma_prob_cache | pragma_max_recursion }

// Domain declarations
domain_decl = { "domain" ~ ident ~ ":" ~ type_spec ~ "." }
type_spec = { "u32" | "u64" | "i32" | "i64" | "f32" | "f64" | "bool" | "symbol" }

// Type annotation for function parameters: X: f64
type_annotation = { ":" ~ type_spec }

// Return type annotation: -> f64
return_type = { "->" ~ type_spec }

// Function parameter: X or X: f64
func_param = { variable ~ type_annotation? }

// Parameter list: X, Y, Z or X: f64, Y: f64
func_params = { func_param ~ ("," ~ func_param)* }

// Condition test: X < 0 or N == 1
cond_test = { arith_expr ~ cmp_op ~ arith_expr }

// Conditional expression: if X < 0 then 0 - X else X
cond_expr = { "if" ~ cond_test ~ "then" ~ func_body ~ "else" ~ func_body }

// Arithmetic function body (includes conditionals)
func_body_arith = { cond_expr | arith_expr }

// Predicate-based function body: P :- parent(X, P).
func_body_pred = { variable ~ ":-" ~ body }

// Function body - either arithmetic or predicate-based
func_body = { func_body_pred | func_body_arith }

// Function definition: func square(X) = X * X.
func_def = {
    private_mod? ~ "func" ~ ident ~ "(" ~ func_params? ~ ")" ~ return_type?
    ~ "=" ~ func_body ~ "."
}

// Private modifier
private_mod = { "private" }

// Predicate declarations
pred_decl = { private_mod? ~ "pred" ~ ident ~ "(" ~ type_list? ~ ")" ~ "." }
type_list = { type_spec ~ ("," ~ type_spec)* }

// Learnable rule: parameterized by a named tensor mask
// mask_name starts with ASCII_ALPHA (upper OR lower), unlike ident which
// requires ASCII_ALPHA_LOWER. This allows W_mask, w_mask, W, etc.
mask_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
learnable_rule = {
    "learnable" ~ "(" ~ mask_name ~ ")" ~ "::" ~ head ~ ":-" ~ body ~ "."
}

// Program structure
statement = {
    func_def
    | use_stmt
    | domain_decl
    | pred_decl
    | pragma
    | neural_pred_decl
    | learnable_rule
    | rule_def
    | prob_fact
    | annotated_disjunction
    | evidence_stmt
    | prob_query
    | fact
    | constraint
    | query
}
program = { SOI ~ statement* ~ EOI }