patch-prolog-compiler 0.3.0

Standalone Prolog compiler (plgc) — compiles .pl to native binaries via LLVM
//! Worker glue emission (Tier 2, docs/design/done/WASM_TIER2_PLAN.md D1g).
//!
//! `plgc build --target worker prog.pl` drops four overrideable scaffolding
//! files next to the reactor `.wasm` so it "just works" (D2):
//!
//! - `reactor.mjs` — the buffer-ABI marshalling (`runQuery`/`assertExports`).
//!   This is the ONE copy of the ABI dance; both the deployed `worker.js` and
//!   the `wasm-reactor-smoke` test driver import it, so the tested code is the
//!   shipped code and the 5-arg `plg_rt_run_query` call can't drift between them.
//! - `worker.js` — a Cloudflare/`workerd` fetch handler that instantiates the
//!   module and calls `reactor.mjs`'s `runQuery`.
//! - `wrangler.toml` — Cloudflare deploy config (`wrangler deploy`).
//! - `config.capnp` — local `workerd serve` config (the dev/test loop).
//!
//! These are SCAFFOLDING: each is written only if absent, so a rebuild never
//! clobbers your edits (the `.wasm` itself is always regenerated). Delete one
//! to get a fresh copy.

use std::path::Path;

/// Cloudflare compatibility date baked into the generated configs. A fixed,
/// conservative past date every recent runtime supports; overrideable.
const COMPAT_DATE: &str = "2024-01-01";

/// Cloudflare worker-name length cap (kept well under the platform limit).
const MAX_NAME_LEN: usize = 63;

const REACTOR_MJS: &str = r#"// Reactor buffer-ABI marshalling (generated by plgc). The SINGLE source of the
// host-call dance: imported by worker.js (deploy) AND scripts/reactor-smoke.mjs
// (test), so the tested code is the shipped code. Keep in lockstep with
// `plg_rt_run_query`'s signature — that is the whole point of having one copy.

export const REACTOR_EXPORTS = [
  "plg_init",
  "plg_rt_run_query",
  "plg_rt_alloc",
  "plg_rt_free",
  "memory",
];

// Under `wasm-ld --allow-undefined` a missing/renamed export degrades to a
// silent import rather than a link error, so check at instantiation time.
export function assertExports(ex) {
  for (const name of REACTOR_EXPORTS) {
    if (!(name in ex)) throw new Error(`reactor module missing export: ${name}`);
  }
}

// limit/stepLimit/depthLimit map to the per-request ABI knobs; 0 = the module
// default (WASM_TIER2_PLAN.md A3). stepLimit is i64, hence the BigInt.
export function runQuery(ex, query, { limit = 0, stepLimit = 0n, depthLimit = 0 } = {}) {
  const bytes = new TextEncoder().encode(query);
  const qptr = ex.plg_rt_alloc(bytes.length);
  new Uint8Array(ex.memory.buffer, qptr, bytes.length).set(bytes);
  const packed = ex.plg_rt_run_query(qptr, bytes.length, limit, BigInt(stepLimit), depthLimit);
  ex.plg_rt_free(qptr, bytes.length);
  // Packed (len << 32) | ptr — the i64 return is a BigInt. Read the result view
  // AFTER the call so a memory growth during solving can't leave it detached.
  const len = Number(packed >> 32n);
  const ptr = Number(packed & 0xffffffffn);
  const json = new TextDecoder().decode(new Uint8Array(ex.memory.buffer, ptr, len));
  ex.plg_rt_free(ptr, len);
  return json;
}
"#;

const WORKER_JS: &str = r#"// Cloudflare / workerd glue for a patch-prolog reactor module (generated by
// plgc — edit freely; it is not regenerated once it exists).
//
// Build the Machine once per isolate (`plg_init`), then drive the buffer ABI
// per request via `reactor.mjs`. One in-flight query per isolate — the
// reactor's concurrency contract (WASM_TIER2_PLAN.md D3) — holds because
// `runQuery` never yields (the only await is reading the POST body, before it).
import { runQuery, assertExports } from "./reactor.mjs";
import reactorModule from "./__WASM_FILE__";

let cached;
function reactor() {
  if (!cached) {
    const instance = new WebAssembly.Instance(reactorModule, {});
    assertExports(instance.exports);
    instance.exports.plg_init();
    cached = instance.exports;
  }
  return cached;
}

export default {
  async fetch(request) {
    const url = new URL(request.url);
    let query = url.searchParams.get("query")?.trim();
    if (!query && request.method === "POST") {
      query = (await request.text()).trim();
    }
    const headers = { "content-type": "application/json" };
    if (!query) {
      return new Response(
        '{"error":"missing query (use ?query=<goal> or POST the goal)"}',
        { status: 400, headers },
      );
    }
    return new Response(runQuery(reactor(), query), { headers });
  },
};
"#;

const WRANGLER_TOML: &str = r#"# Cloudflare deploy config for a patch-prolog reactor (generated by plgc).
# Deploy:  wrangler deploy
# Then:    curl 'https://__APP_NAME__.<your-subdomain>.workers.dev/?query=<goal>'
name = "__APP_NAME__"
main = "worker.js"
compatibility_date = "__DATE__"

# Import the compiled reactor as a WebAssembly module.
[[rules]]
globs = ["**/*.wasm"]
type = "CompiledWasm"
"#;

const CONFIG_CAPNP: &str = r#"# Local workerd config for a patch-prolog reactor (generated by plgc).
# Serve:  workerd serve config.capnp
# Then:   curl 'http://localhost:8080/?query=<goal>'
using Workerd = import "/workerd/workerd.capnp";

const config :Workerd.Config = (
  services = [ (name = "main", worker = .mainWorker) ],
  sockets = [ (name = "http", address = "*:8080", http = (), service = "main") ],
);

const mainWorker :Workerd.Worker = (
  modules = [
    (name = "worker.js", esModule = embed "worker.js"),
    (name = "reactor.mjs", esModule = embed "reactor.mjs"),
    (name = "__WASM_FILE__", wasm = embed "__WASM_FILE__"),
  ],
  compatibilityDate = "__DATE__",
);
"#;

/// Emit the glue files next to `wasm_path` (the just-linked reactor module).
/// Returns the list of files actually written (those that didn't already exist)
/// so the CLI can report what landed vs. what it preserved.
pub fn emit(wasm_path: &Path) -> Result<Vec<String>, String> {
    let dir = wasm_path.parent().unwrap_or(Path::new("."));
    let wasm_file = wasm_path
        .file_name()
        .ok_or("reactor output path has no file name")?
        .to_string_lossy()
        .into_owned();
    let app_name = worker_name(&wasm_file);

    let fill = |t: &str| {
        t.replace("__WASM_FILE__", &wasm_file)
            .replace("__APP_NAME__", &app_name)
            .replace("__DATE__", COMPAT_DATE)
    };

    let mut written = Vec::new();
    for (name, body) in [
        ("reactor.mjs", fill(REACTOR_MJS)),
        ("worker.js", fill(WORKER_JS)),
        ("wrangler.toml", fill(WRANGLER_TOML)),
        ("config.capnp", fill(CONFIG_CAPNP)),
    ] {
        let path = dir.join(name);
        // Scaffolding: never clobber an existing (possibly user-edited) file.
        if path.exists() {
            continue;
        }
        std::fs::write(&path, body).map_err(|e| format!("failed to write {name}: {e}"))?;
        written.push(name.to_string());
    }
    Ok(written)
}

/// Derive a Cloudflare-legal worker name from the wasm file name: strip the
/// `.worker.wasm` / `.wasm` suffix, lowercase, map every other character to a
/// single `-` (runs collapsed), trim leading/trailing `-`, and cap the length.
/// Falls back to `prolog-worker` if nothing usable remains.
fn worker_name(wasm_file: &str) -> String {
    let stem = wasm_file.strip_suffix(".wasm").unwrap_or(wasm_file);
    let stem = stem.strip_suffix(".worker").unwrap_or(stem);

    let mut name = String::new();
    for c in stem.chars() {
        if c.is_ascii_alphanumeric() {
            name.push(c.to_ascii_lowercase());
        } else if !name.ends_with('-') {
            name.push('-'); // collapse runs of non-alphanumerics into one `-`
        }
    }
    let name: String = name.trim_matches('-').chars().take(MAX_NAME_LEN).collect();
    let name = name.trim_end_matches('-'); // in case the cap landed mid-run
    if name.is_empty() {
        "prolog-worker".to_string()
    } else {
        name.to_string()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn worker_name_strips_suffixes_and_sanitizes() {
        assert_eq!(worker_name("deps.worker.wasm"), "deps");
        assert_eq!(worker_name("my_app.worker.wasm"), "my-app");
        assert_eq!(worker_name("plain.wasm"), "plain");
        // Runs of non-alphanumerics collapse to a single hyphen.
        assert_eq!(worker_name("my__app.worker.wasm"), "my-app");
        // Degenerate names still yield something Cloudflare accepts.
        assert_eq!(worker_name(".worker.wasm"), "prolog-worker");
    }

    #[test]
    fn worker_name_caps_length() {
        let long = format!("{}.worker.wasm", "a".repeat(200));
        assert_eq!(worker_name(&long).len(), MAX_NAME_LEN);
    }

    #[test]
    fn emit_writes_glue_then_preserves_it() {
        let dir = tempfile::tempdir().unwrap();
        let wasm = dir.path().join("deps.worker.wasm");
        std::fs::write(&wasm, b"\0asm").unwrap();

        let written = emit(&wasm).unwrap();
        assert_eq!(
            written,
            ["reactor.mjs", "worker.js", "wrangler.toml", "config.capnp"]
        );

        // The marshalling lives once in reactor.mjs; worker.js imports it.
        let abi = std::fs::read_to_string(dir.path().join("reactor.mjs")).unwrap();
        assert!(
            abi.contains(
                "ex.plg_rt_run_query(qptr, bytes.length, limit, BigInt(stepLimit), depthLimit)"
            ),
            "{abi}"
        );
        let js = std::fs::read_to_string(dir.path().join("worker.js")).unwrap();
        assert!(
            js.contains(r#"import { runQuery, assertExports } from "./reactor.mjs""#),
            "{js}"
        );
        assert!(
            js.contains(r#"import reactorModule from "./deps.worker.wasm""#),
            "{js}"
        );
        let toml = std::fs::read_to_string(dir.path().join("wrangler.toml")).unwrap();
        assert!(toml.contains("name = \"deps\""), "{toml}");
        let capnp = std::fs::read_to_string(dir.path().join("config.capnp")).unwrap();
        assert!(capnp.contains(r#"embed "deps.worker.wasm""#), "{capnp}");
        assert!(capnp.contains(r#"embed "reactor.mjs""#), "{capnp}");

        // A second emit preserves the (possibly edited) files — writes nothing.
        std::fs::write(dir.path().join("worker.js"), "// edited").unwrap();
        let again = emit(&wasm).unwrap();
        assert!(again.is_empty(), "rebuild must not clobber glue: {again:?}");
        assert_eq!(
            std::fs::read_to_string(dir.path().join("worker.js")).unwrap(),
            "// edited"
        );
    }
}