flowlog-build 0.3.4

Build-time FlowLog compiler for library mode.
Documentation
//! Equality-as-assignment desugaring.
//!
//! Soufflé and standard Datalog treat a body literal `v = expr` as an
//! *assignment* that grounds `v` whenever `v` is otherwise unbound and every
//! variable in `expr` is already bound. FlowLog's planner grounds variables
//! only through positive atoms, so this pass eliminates assignment literals up
//! front by substituting the bound variable into the head and the remaining
//! body, before catalog construction and planning ever see the rule:
//!
//! ```text
//! R(t)        :- A(x), t = x + 1.        =>  R(x + 1) :- A(x).
//! D(d)        :- M(rt), d = cat(rt,"()").=>  D(cat(rt, "()")) :- M(rt).
//! R(y)        :- A(x), y = x.            =>  R(x) :- A(x).
//! isModifier(x) :- x = "abstract".       =>  fact  isModifier("abstract").
//! ```
//!
//! A `v = expr` whose `v` already occurs in a positive atom is a genuine
//! equality *filter* (both sides are bound), and is left untouched — only an
//! otherwise-unbound `v` is treated as an assignment.
//!
//! Chained assignments (`a = x + 1, b = a + 2`) are resolved to a fixpoint and
//! back-substituted so each bound variable is expressed purely over
//! positive-atom variables and constants.
//!
//! Rules whose body becomes empty and whose head is fully ground are emitted as
//! inline facts (reusing the existing fact path) rather than being handed to the
//! planner, which requires at least one positive atom.

use std::collections::{HashMap, HashSet};

use super::segment::Segment;
use crate::parser::error::ParseError;
use crate::parser::{
    Arithmetic, ArithmeticOperator, AtomArg, ComparisonOperator, ConstType, Factor, FlowLogRule,
    HeadArg, Predicate,
};

/// Rewrite every rule in `segments`, removing equality assignments by
/// substitution. Rules that reduce to a fully-ground fact are appended to
/// `raw_facts` instead of being kept in their segment.
pub(crate) fn desugar_equality_assignments(
    segments: &mut [Segment],
    raw_facts: &mut Vec<FlowLogRule>,
) -> Result<(), ParseError> {
    for seg in segments.iter_mut() {
        let rules: &mut Vec<FlowLogRule> = match seg {
            Segment::Plain(rules) => rules,
            Segment::Loop(block) | Segment::Fixpoint(block) => block.rules_mut(),
        };

        let mut kept = Vec::with_capacity(rules.len());
        for mut rule in std::mem::take(rules) {
            if desugar_rule(&mut rule)? {
                raw_facts.push(rule);
            } else {
                kept.push(rule);
            }
        }
        *rules = kept;
    }
    Ok(())
}

/// Desugar a single rule in place. Returns `true` when the rule became a
/// fully-ground fact and should be moved to the fact set.
fn desugar_rule(rule: &mut FlowLogRule) -> Result<bool, ParseError> {
    // Variables grounded by a positive atom — never treated as assignments.
    let mut bound: HashSet<String> = HashSet::new();
    for pred in rule.rhs() {
        if let Predicate::PositiveAtom(atom) = pred {
            for arg in atom.arguments() {
                if let AtomArg::Var(v) = arg {
                    bound.insert(v.clone());
                }
            }
        }
    }

    // Discover assignment comparisons to a fixpoint. `assignment_idx` records
    // which `rhs` slots are assignments (to drop later); `order` preserves
    // discovery order so chains resolve correctly.
    let mut assignment_idx: HashSet<usize> = HashSet::new();
    let mut order: Vec<(String, Arithmetic)> = Vec::new();
    loop {
        let mut progressed = false;
        for (i, pred) in rule.rhs().iter().enumerate() {
            if assignment_idx.contains(&i) {
                continue;
            }
            let Predicate::Compare(expr) = pred else {
                continue;
            };
            if *expr.operator() != ComparisonOperator::Equal {
                continue;
            }
            if let Some((var, value)) = as_assignment(expr.left(), expr.right(), &bound) {
                bound.insert(var.clone());
                assignment_idx.insert(i);
                order.push((var, value));
                progressed = true;
            }
        }
        if !progressed {
            break;
        }
    }

    if order.is_empty() {
        return Ok(false);
    }

    // Back-substitute so each binding is expressed only over positive-atom
    // variables and constants, then freeze into a resolved map.
    let mut resolved: HashMap<String, Arithmetic> = HashMap::new();
    let mut resolved_order: Vec<String> = Vec::new();
    for (var, mut value) in order {
        for prior in &resolved_order {
            subst_arith(&mut value, prior, &resolved[prior]);
        }
        resolved_order.push(var.clone());
        resolved.insert(var, value);
    }

    // Substitute into the head.
    for var in &resolved_order {
        subst_head(rule, var, &resolved[var]);
    }

    // Substitute into every non-assignment body predicate.
    let mut new_rhs: Vec<Predicate> = Vec::with_capacity(rule.rhs().len());
    for (i, pred) in rule.rhs_mut().iter_mut().enumerate() {
        if assignment_idx.contains(&i) {
            continue;
        }
        for var in &resolved_order {
            subst_predicate(pred, var, &resolved[var])?;
        }
        new_rhs.push(pred.clone());
    }
    rule.set_rhs(new_rhs);

    // An emptied body must leave the segment (the planner needs ≥1 positive
    // atom): fold ground integer arithmetic (`x = 1 + 2` → fact `P(3)`),
    // reject anything non-constant (unbound head var, float/string/builtin)
    // instead of panicking the planner downstream.
    if !rule.rhs().is_empty() {
        return Ok(false);
    }
    let span = rule.head().span();
    for arg in rule.head_mut().head_arguments_mut() {
        match arg {
            HeadArg::Arith(a) if a.is_const() => {}
            HeadArg::Arith(a) => {
                let folded = fold_const_int(a).ok_or(ParseError::GroundRuleNotConst { span })?;
                *a = Arithmetic::new(Factor::Const(ConstType::Int(folded)), vec![]);
            }
            HeadArg::Var(_) | HeadArg::Aggregation(_) => {
                return Err(ParseError::GroundRuleNotConst { span });
            }
        }
    }
    Ok(true)
}

/// Fold a ground arithmetic expression over polymorphic integer literals
/// (`1 + 2` → `3`). Returns `None` for any non-integer factor (variable,
/// float, string, builtin, UDF call) or on overflow / division by zero —
/// the caller rejects those.
fn fold_const_int(a: &Arithmetic) -> Option<i64> {
    fn factor_value(f: &Factor) -> Option<i64> {
        match f {
            Factor::Const(ConstType::Int(v)) => Some(*v),
            Factor::Group(inner) => fold_const_int(inner),
            _ => None,
        }
    }
    let mut acc = factor_value(a.init())?;
    for (op, f) in a.rest() {
        let v = factor_value(f)?;
        acc = match op {
            ArithmeticOperator::Plus => acc.checked_add(v)?,
            ArithmeticOperator::Minus => acc.checked_sub(v)?,
            ArithmeticOperator::Multiply => acc.checked_mul(v)?,
            ArithmeticOperator::Divide => acc.checked_div(v)?,
            ArithmeticOperator::Modulo => acc.checked_rem(v)?,
        };
    }
    Some(acc)
}

/// Recognise `lhs = rhs` as an assignment that grounds a fresh variable.
///
/// Returns `(var, value)` when exactly one side is a single still-unbound
/// variable and the other side's variables are all bound. The variable must not
/// also appear on the value side (`v = v + 1` is not groundable).
fn as_assignment(
    lhs: &Arithmetic,
    rhs: &Arithmetic,
    bound: &HashSet<String>,
) -> Option<(String, Arithmetic)> {
    let try_side =
        |var_side: &Arithmetic, value_side: &Arithmetic| -> Option<(String, Arithmetic)> {
            if !var_side.is_var() {
                return None;
            }
            let var = var_side.vars().into_iter().next()?.clone();
            if bound.contains(&var) {
                return None;
            }
            let value_vars = value_side.vars();
            if value_vars.iter().any(|v| **v == var) {
                return None;
            }
            if value_vars.iter().all(|v| bound.contains(*v)) {
                Some((var, value_side.clone()))
            } else {
                None
            }
        };

    try_side(lhs, rhs).or_else(|| try_side(rhs, lhs))
}

/// `Factor` form of a resolved value: a lone factor is inlined directly; a
/// multi-term expression is wrapped in a group so left-to-right folding keeps
/// its meaning when spliced into a larger expression.
fn value_factor(value: &Arithmetic) -> Factor {
    if value.rest().is_empty() {
        value.init().clone()
    } else {
        Factor::Group(Box::new(value.clone()))
    }
}

fn subst_head(rule: &mut FlowLogRule, var: &str, value: &Arithmetic) {
    for arg in rule.head_mut().head_arguments_mut() {
        match arg {
            HeadArg::Var(v) if v == var => {
                *arg = if value.is_var() {
                    HeadArg::Var(value.vars()[0].clone())
                } else {
                    HeadArg::Arith(value.clone())
                };
            }
            HeadArg::Var(_) => {}
            HeadArg::Arith(a) => subst_arith(a, var, value),
            HeadArg::Aggregation(agg) => subst_arith(agg.arithmetic_mut(), var, value),
        }
    }
}

fn subst_predicate(pred: &mut Predicate, var: &str, value: &Arithmetic) -> Result<(), ParseError> {
    match pred {
        // The assignment variable is never grounded by a positive atom, so it
        // cannot appear in one.
        Predicate::PositiveAtom(_) => {}
        Predicate::NegativeAtom(atom) => {
            let span = atom.span();
            for arg in atom.arguments_mut() {
                if let AtomArg::Var(v) = arg
                    && v == var
                {
                    *arg = atom_arg_from_value(value).ok_or_else(|| {
                        ParseError::AssignmentVarInNegation {
                            span,
                            var: var.to_string(),
                        }
                    })?;
                }
            }
        }
        Predicate::Compare(expr) => {
            subst_arith(expr.left_mut(), var, value);
            subst_arith(expr.right_mut(), var, value);
        }
        Predicate::FnCall(fc) => {
            for arg in fc.args_mut() {
                subst_arith(arg, var, value);
            }
        }
    }
    Ok(())
}

/// A negated atom argument can only receive a bare variable or constant — not a
/// computed expression. Returns `None` for anything else.
fn atom_arg_from_value(value: &Arithmetic) -> Option<AtomArg> {
    if !value.rest().is_empty() {
        return None;
    }
    match value.init() {
        Factor::Var(v) => Some(AtomArg::Var(v.clone())),
        Factor::Const(c) => Some(AtomArg::Const(c.clone())),
        _ => None,
    }
}

fn subst_arith(arith: &mut Arithmetic, var: &str, value: &Arithmetic) {
    subst_factor(arith.init_mut(), var, value);
    for (_, factor) in arith.rest_mut() {
        subst_factor(factor, var, value);
    }
}

fn subst_factor(factor: &mut Factor, var: &str, value: &Arithmetic) {
    match factor {
        Factor::Var(v) if v == var => *factor = value_factor(value),
        Factor::Var(_) | Factor::Const(_) => {}
        Factor::FnCall(fc) => {
            for arg in fc.args_mut() {
                subst_arith(arg, var, value);
            }
        }
        Factor::Builtin(bc) => {
            for arg in bc.args_mut() {
                subst_arith(arg, var, value);
            }
        }
        Factor::Cast(c) => subst_factor(c.inner_mut(), var, value),
        Factor::Group(a) => subst_arith(a, var, value),
    }
}