xlog-logic 0.9.2

// XLOG Grammar for Datalog-style logic programs

WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ "//" ~ (!"\n" ~ ANY)* }

// Identifiers
ident = @{ ASCII_ALPHA_LOWER ~ (ASCII_ALPHANUMERIC | "_")* }
ident_continue = _{ ASCII_ALPHANUMERIC | "_" }
// Variables: uppercase letter followed by alphanumerics, OR anonymous wildcard "_"
anonymous = @{ "_" }
variable = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")* }
var_or_anon = { anonymous | variable }

// Module path: graph or utils/math or deep/nested/module
module_path = @{ ident ~ ("/" ~ ident)* }

// Import list: {edge, reach, node}
import_list = { "{" ~ ident ~ ("," ~ ident)* ~ "}" }

// Use statement: use graph. or use utils/math::{abs, clamp}.
use_stmt = { "use" ~ module_path ~ ("::" ~ import_list)? ~ "." }

// Literals
integer = @{ "-"? ~ ASCII_DIGIT+ }
float_num = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
string_lit = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" }
bool_lit = { "true" | "false" }

// Probabilistic annotations
prob_num = @{ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? }

// Terms
list_literal = { "[" ~ (term ~ ("," ~ term)*)? ~ "]" }
cons_pattern = { "[" ~ term ~ "|" ~ term ~ "]" }
compound_term = { ident ~ "(" ~ term_list? ~ ")" }
term = { var_or_anon | cons_pattern | list_literal | compound_term | float_num | integer | string_lit | ident }

// Atoms
atom = { ident ~ "(" ~ term_list? ~ ")" }
term_list = { term ~ ("," ~ term)* }

// Aggregate expressions
agg_op = { "count" | "sum" | "min" | "max" | "logsumexp" }
aggregate = { agg_op ~ "(" ~ variable ~ ")" }
agg_term = { aggregate | term }

// Comparison operators
cmp_op = { "==" | "!=" | "<=" | ">=" | "<" | ">" | "=" }
comparison = { term ~ cmp_op ~ term }

// Arithmetic operators
arith_op_mul = { "*" | "/" | "%" }
arith_op_add = { "+" | "-" }

// Built-in functions
builtin_fn = { "abs" | "min" | "max" | "pow" | "cast" }

// Function call: square(X) or dist(X1, Y1, X2, Y2)
func_call = { ident ~ "(" ~ (arith_expr ~ ("," ~ arith_expr)*)? ~ ")" }

// Arithmetic expressions with precedence
arith_primary = {
    builtin_fn ~ "(" ~ arith_expr ~ ("," ~ (arith_expr | type_spec))* ~ ")" |
    func_call |
    "(" ~ arith_expr ~ ")" |
    variable |
    float_num |
    integer
}

arith_term = { arith_primary ~ (arith_op_mul ~ arith_primary)* }
arith_expr = { arith_term ~ (arith_op_add ~ arith_term)* }

// The 'is' construct
is_expr = { variable ~ "is" ~ arith_expr }

// Body literals
epistemic_op = @{ ("know" | "possible") ~ !ident_continue }
// A named `not` keyword so the parser builder can see EXACTLY where each
// negation sits in a modal chain (leading / interior / atom-adjacent). The
// bare `"not"` literal in the single-level rules below is unchanged; this
// captured form is used only inside `nested_modal_chain` so interior vs
// leading negations are structurally distinguishable for the S5/KD45 collapse.
not_kw = @{ "not" ~ !ident_continue }
// A nested modal chain is a SEQUENCE of two-or-more epistemic operators
// (`know`/`possible`), with `not` allowed before any operator and before the
// final atom, applied to a single atom (e.g. `know possible p()`,
// `not know possible p()`). It is NOT rejected at parse time: the parser
// builder collapses it to a single epistemic literal using the modal-logic
// (KD45/S5) equivalence in which the operator ADJACENT to the atom wins
// (`know possible p ≡ possible p`, `know know p ≡ know p`). Leading,
// interior, and atom-adjacent `not` tokens are accepted in the finite chain and
// folded by the parser builder's parity/duality rules.
nested_modal_chain = {
    not_kw? ~ epistemic_op ~ (not_kw? ~ epistemic_op)+ ~ not_kw? ~ atom
}
epistemic_atom = { epistemic_op ~ atom }
negated_epistemic_atom = { "not" ~ epistemic_atom }
negated_atom = { "not" ~ atom }
univ = { term ~ "=.." ~ term }
body_literal = {
    nested_modal_chain
    | negated_epistemic_atom
    | epistemic_atom
    | negated_atom
    | univ
    | atom
    | comparison
    | is_expr
}
body = { body_literal ~ ("," ~ body_literal)* }

// Head with optional aggregate
head_term = { agg_term | term }
head_term_list = { head_term ~ ("," ~ head_term)* }
head = { ident ~ "(" ~ head_term_list? ~ ")" }

// Rules and facts
rule_def = { head ~ ":-" ~ body ~ "." }
fact = { atom ~ "." }
constraint = { ":-" ~ body ~ "." }

// Probabilistic facts / annotated disjunctions
prob_choice = { prob_num ~ "::" ~ atom }
prob_fact = { prob_choice ~ "." }
annotated_disjunction = { prob_choice ~ (";" ~ prob_choice)+ ~ "." }

// Neural predicate declarations
// nn(network, [inputs], output, [labels]) :: pred(args).
// nn(network, [inputs], embedding) :: pred(args).  (embedding mode, no labels)
neural_label = { integer | ident }
neural_label_list = { "[" ~ (neural_label ~ ("," ~ neural_label)*)? ~ "]" }
neural_input_list = { "[" ~ (variable ~ ("," ~ variable)*)? ~ "]" }
neural_pred_decl = {
    "nn" ~ "(" ~ ident ~ "," ~ neural_input_list ~ "," ~ variable ~ ("," ~ neural_label_list)? ~ ")"
    ~ "::" ~ atom ~ "."
}

// Queries
query = { "?-" ~ atom ~ "." }
prob_query = { "query" ~ "(" ~ atom ~ ")" ~ "." }
evidence_stmt = { "evidence" ~ "(" ~ atom ~ "," ~ bool_lit ~ ")" ~ "." }

// Pragmas / directives
prob_engine_value = { "exact_ddnnf" | "mc" }
prob_cache_value = { "on" | "off" }
epistemic_mode_value = { "g91" | "faeel" }
magic_sets_value = { "auto" | "on" | "off" }
prob_method_value = { "rejection" | "evidence_clamping" }
pragma_prob_engine = { "#pragma" ~ "prob_engine" ~ "=" ~ prob_engine_value }
pragma_prob_cache = { "#pragma" ~ "prob_cache" ~ "=" ~ prob_cache_value }
pragma_epistemic_mode = { "#pragma" ~ "epistemic_mode" ~ "=" ~ epistemic_mode_value }
pragma_prob_samples = { "#pragma" ~ "prob_samples" ~ "=" ~ integer }
pragma_prob_seed = { "#pragma" ~ "prob_seed" ~ "=" ~ integer }
pragma_prob_confidence = { "#pragma" ~ "prob_confidence" ~ "=" ~ float_num }
pragma_prob_method = { "#pragma" ~ "prob_method" ~ "=" ~ prob_method_value }
pragma_prob_max_nonmonotone_iterations = { "#pragma" ~ "prob_max_nonmonotone_iterations" ~ "=" ~ integer }
pragma_max_recursion = { "#pragma" ~ "max_recursion_depth" ~ "=" ~ integer }
pragma_magic_sets = { "#pragma" ~ "magic_sets" ~ "=" ~ magic_sets_value }
pragma = {
    pragma_prob_engine
    | pragma_prob_cache
    | pragma_epistemic_mode
    | pragma_prob_samples
    | pragma_prob_seed
    | pragma_prob_confidence
    | pragma_prob_method
    | pragma_prob_max_nonmonotone_iterations
    | pragma_max_recursion
    | pragma_magic_sets
}

// Domain declarations
domain_decl = { "domain" ~ ident ~ ":" ~ type_spec ~ "." }
scalar_type = { "u32" | "u64" | "i32" | "i64" | "f32" | "f64" | "bool" | "symbol" }
list_type = { "list" ~ "<" ~ type_spec ~ ">" }
type_spec = { list_type | "term" | "compound" | "predref" | scalar_type | ident }

// Type annotation for function parameters: X: f64
type_annotation = { ":" ~ type_spec }

// Return type annotation: -> f64
return_type = { "->" ~ type_spec }

// Function parameter: X or X: f64
func_param = { variable ~ type_annotation? }

// Parameter list: X, Y, Z or X: f64, Y: f64
func_params = { func_param ~ ("," ~ func_param)* }

// Condition test: X < 0 or N == 1
cond_test = { arith_expr ~ cmp_op ~ arith_expr }

// Conditional expression: if X < 0 then 0 - X else X
cond_expr = { "if" ~ cond_test ~ "then" ~ func_body ~ "else" ~ func_body }

// Arithmetic function body (includes conditionals)
func_body_arith = { cond_expr | arith_expr }

// Predicate-based function body: P :- parent(X, P).
func_body_pred = { variable ~ ":-" ~ body }

// Function body - either arithmetic or predicate-based
func_body = { func_body_pred | func_body_arith }

// Function definition: func square(X) = X * X.
func_def = {
    private_mod? ~ "func" ~ ident ~ "(" ~ func_params? ~ ")" ~ return_type?
    ~ "=" ~ func_body ~ "."
}

// Private modifier
private_mod = { "private" }

// Predicate declarations
pred_decl = { private_mod? ~ "pred" ~ ident ~ "(" ~ type_list? ~ ")" ~ "." }
pred_column = { ident ~ ":" ~ type_spec | type_spec }
type_list = { pred_column ~ ("," ~ pred_column)* }

// Learnable rule: parameterized by a named tensor mask
// mask_name starts with ASCII_ALPHA (upper OR lower), unlike ident which
// requires ASCII_ALPHA_LOWER. This allows W_mask, w_mask, W, etc.
mask_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
learnable_rule = {
    "learnable" ~ "(" ~ mask_name ~ ")" ~ "::" ~ head ~ ":-" ~ body ~ "."
}

// Program structure
statement = {
    func_def
    | use_stmt
    | domain_decl
    | pred_decl
    | pragma
    | neural_pred_decl
    | learnable_rule
    | rule_def
    | prob_fact
    | annotated_disjunction
    | evidence_stmt
    | prob_query
    | fact
    | constraint
    | query
}
program = { SOI ~ statement* ~ EOI }