patch-prolog-compiler 0.2.0

Standalone Prolog compiler (plgc) — compiles .pl to native binaries via LLVM
//! Whole-program assembly: group clauses into predicates, emit runtime
//! declarations, the atom table, the registry, every predicate's
//! functions, and the thin `main`.

use super::{CgSource, CodeGen};
use plg_frontend::{CgClause, ProgramDirectives};
use plg_shared::StringInterner;
use std::fmt::Write;

/// Runtime functions generated code calls (the plg_rt_* ABI — see
/// docs/design/RUNTIME_ABI.md; signatures mirror crates/runtime/src/abi.rs).
const RUNTIME_DECLS: &str = "\
declare ptr @plg_rt_init(ptr, i32, ptr, i32, ptr, i32, ptr, i32)
declare i32 @plg_rt_main(ptr, i32, ptr)
declare i32 @plg_rt_step(ptr)
declare i64 @plg_rt_new_var(ptr)
declare i64 @plg_rt_frame_alloc(ptr, i32)
declare void @plg_rt_frame_set(ptr, i64, i32, i64)
declare i64 @plg_rt_frame_get(ptr, i64, i32)
declare i64 @plg_rt_areg_get(ptr, i32)
declare void @plg_rt_areg_set(ptr, i32, i64)
declare void @plg_rt_breg_set(ptr, i32, i64)
declare i64 @plg_rt_put_struct(ptr, i32, i32)
declare i64 @plg_rt_put_list(ptr, i64, i64)
declare i64 @plg_rt_put_float(ptr, i64)
declare i32 @plg_rt_unify(ptr, i64, i64)
declare void @plg_rt_set_k(ptr, i64, i64)
declare i64 @plg_rt_k_fn(ptr)
declare i64 @plg_rt_k_env(ptr)
declare void @plg_rt_push_cp(ptr, i64, i64)
declare i32 @plg_rt_pred_fail(ptr, i64)
declare i32 @plg_rt_existence_error(ptr, i32, i32, i32)
declare i64 @plg_rt_cp_top(ptr)
declare void @plg_rt_cut(ptr, i64)
declare i64 @plg_rt_deref(ptr, i64)
declare i64 @plg_rt_str_key(ptr, i64)
declare i32 @plg_rt_b_is(ptr, i64, i64, i32)
declare i32 @plg_rt_b_arith_cmp(ptr, i32, i64, i64, i32)
declare i32 @plg_rt_b_neq(ptr, i64, i64)
declare i32 @plg_rt_b_term_cmp(ptr, i32, i64, i64)
declare i32 @plg_rt_b_compare(ptr, i64, i64, i64)
declare i64 @plg_rt_put_big(ptr, i64)
declare i32 @plg_rt_metacall(ptr, i64)
declare i32 @plg_rt_b_throw_1(ptr, i64)
declare i32 @plg_rt_b_catch_3(ptr, i64, i64, i64)
declare i32 @plg_rt_b_findall_3(ptr, i64, i64, i64)
declare i32 @plg_rt_pred_between_3(ptr, i64)
declare i32 @plg_rt_fact_first(ptr, i64, i64, i64, i64, i64, i64, i64)
declare i32 @plg_rt_fact_next(ptr, i64)
";

/// Host target triple (plgc compiles for the machine it runs on).
fn target_triple() -> &'static str {
    if cfg!(all(target_os = "linux", target_arch = "x86_64")) {
        "x86_64-unknown-linux-gnu"
    } else if cfg!(all(target_os = "linux", target_arch = "aarch64")) {
        "aarch64-unknown-linux-gnu"
    } else if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
        "arm64-apple-macosx11.0.0"
    } else if cfg!(all(target_os = "macos", target_arch = "x86_64")) {
        "x86_64-apple-macosx11.0.0"
    } else {
        "" // let clang infer
    }
}

pub fn codegen_program(
    clauses: &[CgClause],
    directives: &ProgramDirectives,
    interner: &StringInterner,
    sources: &[CgSource],
) -> Result<String, String> {
    let mut cg = CodeGen::new(interner, sources);

    // Group clauses by (functor, arity), preserving program order.
    for clause in clauses {
        let (f, a) = clause
            .head
            .functor_arity()
            .ok_or_else(|| "clause head must be an atom or compound".to_string())?;
        if a > crate::MAX_GOAL_ARITY {
            return Err(format!(
                "predicate {}/{a} exceeds the supported maximum arity {}",
                interner.resolve(f),
                crate::MAX_GOAL_ARITY
            ));
        }
        cg.predicates
            .entry((f, a as u32))
            .or_default()
            .push(clause.clone());
    }
    // Dynamic declarations that have no clauses become silent-fail rows.
    for &(f, a) in &directives.dynamic {
        if !cg.predicates.contains_key(&(f, a as u32)) {
            cg.dynamic_only.push((f, a as u32));
        }
    }

    // --- Module header.
    writeln!(cg.out, "; generated by plgc {}", env!("CARGO_PKG_VERSION")).unwrap();
    let triple = target_triple();
    if !triple.is_empty() {
        writeln!(cg.out, "target triple = \"{triple}\"").unwrap();
    }
    cg.out.push('\n');
    cg.out.push_str(RUNTIME_DECLS);
    // Deterministic-builtin declarations, generated from the table so
    // the IR and the lowering can never disagree.
    for (_, arity, sym, raises) in super::lower::DET_BUILTINS {
        let mut args: String = std::iter::repeat_n(", i64", *arity as usize).collect();
        if *raises {
            args.push_str(", i32"); // trailing site_id (SPANS.md Layer 3)
        }
        writeln!(cg.out, "declare i32 @{sym}(ptr{args})").unwrap();
    }
    cg.out.push('\n');

    cg.emit_atom_table();
    let registry_len = cg.emit_registry();
    cg.out.push('\n');

    // --- Predicates.
    let preds: Vec<_> = cg.predicates.iter().map(|(&k, v)| (k, v.clone())).collect();
    for ((f, a), clauses) in preds {
        if super::facts::is_fact_predicate(&clauses) {
            cg.emit_fact_predicate(f, a, &clauses)?;
        } else {
            cg.emit_predicate(f, a, &clauses)?;
        }
        cg.out.push('\n');
    }

    // --- Source-location side-table (SPANS.md Layer 3), emitted after the
    // predicates have populated it via `site_id`.
    let (srcmap_len, files_len) = cg.emit_provenance();
    cg.out.push('\n');

    // --- Thin main: everything else lives in the runtime.
    writeln!(cg.out, "define i32 @main(i32 %argc, ptr %argv) {{").unwrap();
    writeln!(cg.out, "entry:").unwrap();
    writeln!(
        cg.out,
        "  %m = call ptr @plg_rt_init(ptr @plg_atom_strs, i32 {}, ptr @plg_registry, i32 {registry_len}, ptr @plg_srcmap, i32 {srcmap_len}, ptr @plg_files, i32 {files_len})",
        interner.len()
    )
    .unwrap();
    writeln!(
        cg.out,
        "  %code = call i32 @plg_rt_main(ptr %m, i32 %argc, ptr %argv)"
    )
    .unwrap();
    writeln!(cg.out, "  ret i32 %code").unwrap();
    writeln!(cg.out, "}}").unwrap();

    Ok(cg.out)
}