patch-prolog-compiler 0.4.1

Standalone Prolog compiler (plgc) — compiles .pl to native binaries via LLVM
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
//! Worker glue emission (Tier 2, docs/design/done/WASM_TIER2_PLAN.md D1g).
//!
//! `plgc build --target worker prog.pl` drops four overrideable scaffolding
//! files next to the reactor `.wasm` so it "just works" (D2):
//!
//! - `reactor.mjs` — the buffer-ABI marshalling (`runQuery`/`assertExports`).
//!   This is the ONE copy of the ABI dance; both the deployed `worker.js` and
//!   the `wasm-reactor-smoke` test driver import it, so the tested code is the
//!   shipped code and the 5-arg `plg_rt_run_query` call can't drift between them.
//! - `worker.js` — a Cloudflare/`workerd` fetch handler that instantiates the
//!   module and calls `reactor.mjs`'s `runQuery`.
//! - `wrangler.toml` — Cloudflare deploy config (`wrangler deploy`).
//! - `config.capnp` — local `workerd serve` config (the dev/test loop).
//!
//! These are SCAFFOLDING: each is written only if absent, so a rebuild never
//! clobbers your edits (the `.wasm` itself is always regenerated). Delete one
//! to get a fresh copy.

use std::path::Path;

/// Cloudflare compatibility date baked into the generated configs. A fixed,
/// conservative past date every recent runtime supports; overrideable.
const COMPAT_DATE: &str = "2024-01-01";

/// Cloudflare worker-name length cap (kept well under the platform limit).
const MAX_NAME_LEN: usize = 63;

const REACTOR_MJS: &str = r#"// Reactor buffer-ABI marshalling (generated by plgc). The SINGLE source of the
// host-call dance: imported by worker.js (deploy) AND scripts/reactor-smoke.mjs
// (test), so the tested code is the shipped code. Keep in lockstep with
// `plg_rt_run_query`'s signature — that is the whole point of having one copy.

export const REACTOR_EXPORTS = [
  "plg_init",
  "plg_rt_run_query",
  "plg_rt_alloc",
  "plg_rt_free",
  "plg_rt_atom_name",
  "memory",
];

// Under `wasm-ld --allow-undefined` a missing/renamed export degrades to a
// silent import rather than a link error, so check at instantiation time.
export function assertExports(ex) {
  for (const name of REACTOR_EXPORTS) {
    if (!(name in ex)) throw new Error(`reactor module missing export: ${name}`);
  }
}

// The reactor emits a bson envelope; the engine has no JSON. This glue decodes
// bson→JSON host-side (docs/design/WASM_HOST_GLUE.md): bson term values are
// BinData TermBuf with atom-IDS, resolved via plg_rt_atom_name (which reads the
// runtime interner — program AND query atoms). All conversion logic is host-side.

// Atom-id → name cache, scoped per module instance (a WeakMap keyed on the
// exports object, which is unique per wasm instance). Atoms are immutable and
// only added during solve, so a per-instance cache is valid across queries and
// grows monotonically — and different programs (different atom tables) can't
// conflate, which a module-level cache would.
const _atomCaches = new WeakMap();
function resolveAtom(ex, id) {
  let cache = _atomCaches.get(ex);
  if (!cache) { cache = new Map(); _atomCaches.set(ex, cache); }
  let name = cache.get(id);
  if (name !== undefined) return name;
  const packed = ex.plg_rt_atom_name(id);
  if (packed === 0n) return undefined; // out of range (shouldn't happen)
  const ptr = Number(packed >> 32n);
  const len = Number(packed & 0xffffffffn);
  name = new TextDecoder().decode(new Uint8Array(ex.memory.buffer, ptr, len));
  cache.set(id, name);
  return name;
}

// limit/stepLimit/depthLimit map to the per-request ABI knobs; 0 = the module
// default (WASM_TIER2_PLAN.md A3). stepLimit is i64, hence the BigInt.
export function runQuery(ex, query, { limit = 0, stepLimit = 0n, depthLimit = 0 } = {}) {
  const bytes = new TextEncoder().encode(query);
  const qptr = ex.plg_rt_alloc(bytes.length);
  new Uint8Array(ex.memory.buffer, qptr, bytes.length).set(bytes);
  const packed = ex.plg_rt_run_query(qptr, bytes.length, limit, BigInt(stepLimit), depthLimit);
  ex.plg_rt_free(qptr, bytes.length);
  // Packed (len << 32) | ptr — the i64 return is a BigInt. Copy the result
  // bytes BEFORE freeing so parsing is independent of wasm memory growth.
  const len = Number(packed >> 32n);
  const ptr = Number(packed & 0xffffffffn);
  const bson = new Uint8Array(ex.memory.buffer, ptr, len).slice();
  ex.plg_rt_free(ptr, len);
  return envelopeToJson(bson, ex);
}

// ── bson document decode (the subset the reactor emits) ───────────────────
function rdI32(b, o) { return new DataView(b.buffer, b.byteOffset, b.byteLength).getInt32(o, true); }
function rdCStr(b, o) { let e = o; while (b[e] !== 0) e++; return [new TextDecoder().decode(b.subarray(o, e)), e + 1]; }
function rdStr(b, o) {
  const n = rdI32(b, o); const s = new TextDecoder().decode(b.subarray(o + 4, o + 4 + n - 1));
  return [s, o + 4 + n];
}
// parseDoc returns [obj, endOff]; endOff = start + total (the doc is self-delimiting).
function parseDoc(b, o) {
  const start = o, total = rdI32(b, o); o += 4;
  const obj = {};
  while (b[o] !== 0) {
    const ty = b[o++]; let k; [k, o] = rdCStr(b, o);
    let v; [v, o] = rdValue(b, o, ty); obj[k] = v;
  }
  return [obj, start + total];
}
function parseArr(b, o) {
  const start = o, total = rdI32(b, o); o += 4;
  const tmp = {};
  while (b[o] !== 0) {
    const ty = b[o++]; let k; [k, o] = rdCStr(b, o);
    let v; [v, o] = rdValue(b, o, ty); tmp[k] = v;
  }
  const arr = []; for (let i = 0; String(i) in tmp; i++) arr.push(tmp[String(i)]);
  return [arr, start + total];
}
function rdValue(b, o, ty) {
  switch (ty) {
    case 0x02: return rdStr(b, o);                       // string
    case 0x03: return parseDoc(b, o);                    // document
    case 0x04: return parseArr(b, o);                    // array
    case 0x05: { const n = rdI32(b, o); return [b.subarray(o + 5, o + 5 + n), o + 5 + n]; } // binary
    case 0x08: return [b[o] !== 0, o + 1];               // bool
    case 0x10: return [rdI32(b, o), o + 4];              // int32
    default: throw new Error(`bson: unsupported element type ${ty}`);
  }
}

// ── envelope assembly + term rendering ────────────────────────────────────
function envelopeToJson(bson, ex) {
  const [doc] = parseDoc(bson, 0);
  if ("error" in doc) return JSON.stringify({ error: doc.error });
  const env = { count: doc.count, exhausted: doc.exhausted };
  if ("output" in doc) env.output = doc.output;
  env.solutions = doc.solutions.map((sol) => {
    const o = {};
    for (const k in sol) o[k] = decodeTermBuf(sol[k], ex);
    return o;
  });
  return JSON.stringify(env);
}

// TermBuf BinData payload: [ver:u8=1][cell_count:u32 LE][root:u64 LE][cells…].
// Cell ABI mirrors plg-shared::cell: tag = word & 7, payload = word >> 3.
const TAG_ATOM = 1n, TAG_INT = 2n, TAG_STR = 3n, TAG_LST = 4n, TAG_FLT = 5n, TAG_BIG = 6n;
function decodeTermBuf(b, ex) {
  const dv = new DataView(b.buffer, b.byteOffset, b.byteLength);
  const cellCount = dv.getUint32(1, true);
  const root = dv.getBigUint64(5, true);
  const cells = new Array(cellCount);
  for (let i = 0; i < cellCount; i++) cells[i] = dv.getBigUint64(13 + i * 8, true);
  return renderWord(root, cells, ex, new Set());
}
function bitsToFloat(bits) {
  const buf = new ArrayBuffer(8); new DataView(buf).setBigUint64(0, bits, true);
  return new DataView(buf).getFloat64(0, true);
}
function asI64(u) { return u >= (1n << 63n) ? u - (1n << 64n) : u; }
function isNilName(name) { return name === "[]"; }

// Render a cell word as a native JS value (→ JSON): atom→string (nil→[]),
// int/float/big→number, compound→{functor,args}, proper list→array, improper
// list→{head,tail}, unbound/cycle→"_". `visiting` holds STR/LST buffer indices
// on the render stack so shared subterms render fully but cycles cut to "_".
function renderWord(w, cells, ex, visiting) {
  const tag = w & 7n, payload = w >> 3n;
  if (tag === TAG_ATOM) {
    const name = resolveAtom(ex, Number(payload));
    return isNilName(name) ? [] : name;
  }
  if (tag === TAG_INT) return Number(asI64(w) >> 3n);
  if (tag === TAG_FLT) return bitsToFloat(cells[Number(payload)]);
  if (tag === TAG_BIG) return Number(asI64(cells[Number(payload)]));
  if (tag === TAG_STR) {
    const idx = Number(payload);
    if (visiting.has(idx)) return "_";
    visiting.add(idx);
    const header = cells[idx];
    const functor = resolveAtom(ex, Number(header >> 32n));
    const arity = Number(header & 0xffffffffn);
    const args = [];
    for (let k = 0; k < arity; k++) args.push(renderWord(cells[idx + 1 + k], cells, ex, visiting));
    visiting.delete(idx);
    return { functor, args };
  }
  if (tag === TAG_LST) {
    const elems = [], added = [];
    let cur = w, cut = false;
    while ((cur & 7n) === TAG_LST) {
      const ci = Number(cur >> 3n);
      if (visiting.has(ci)) { cut = true; break; }
      visiting.add(ci); added.push(ci);
      elems.push(renderWord(cells[ci], cells, ex, visiting)); // head
      cur = cells[ci + 1];                                                              // tail
    }
    for (const ci of added) visiting.delete(ci);
    if (cut) return { head: elems, tail: "_" };
    if ((cur & 7n) === TAG_ATOM && isNilName(resolveAtom(ex, Number(cur >> 3n)))) return elems;
    return { head: elems, tail: renderWord(cur, cells, ex, visiting) };
  }
  return "_"; // REF (unbound) or unknown
}
"#;

const WORKER_JS: &str = r#"// Cloudflare / workerd glue for a patch-prolog reactor module (generated by
// plgc — edit freely; it is not regenerated once it exists).
//
// Build the Machine once per isolate (`plg_init`), then drive the buffer ABI
// per request via `reactor.mjs`. One in-flight query per isolate — the
// reactor's concurrency contract (WASM_TIER2_PLAN.md D3) — holds because
// `runQuery` never yields (the only await is reading the POST body, before it).
import { runQuery, assertExports } from "./reactor.mjs";
import reactorModule from "./__WASM_FILE__";

let cached;
function reactor() {
  if (!cached) {
    const instance = new WebAssembly.Instance(reactorModule, {});
    assertExports(instance.exports);
    instance.exports.plg_init();
    cached = instance.exports;
  }
  return cached;
}

export default {
  async fetch(request) {
    const url = new URL(request.url);
    let query = url.searchParams.get("query")?.trim();
    if (!query && request.method === "POST") {
      query = (await request.text()).trim();
    }
    const headers = { "content-type": "application/json" };
    if (!query) {
      return new Response(
        '{"error":"missing query (use ?query=<goal> or POST the goal)"}',
        { status: 400, headers },
      );
    }
    return new Response(runQuery(reactor(), query), { headers });
  },
};
"#;

const WRANGLER_TOML: &str = r#"# Cloudflare deploy config for a patch-prolog reactor (generated by plgc).
# Deploy:  wrangler deploy
# Then:    curl 'https://__APP_NAME__.<your-subdomain>.workers.dev/?query=<goal>'
name = "__APP_NAME__"
main = "worker.js"
compatibility_date = "__DATE__"

# Import the compiled reactor as a WebAssembly module.
[[rules]]
globs = ["**/*.wasm"]
type = "CompiledWasm"
"#;

const CONFIG_CAPNP: &str = r#"# Local workerd config for a patch-prolog reactor (generated by plgc).
# Serve:  workerd serve config.capnp
# Then:   curl 'http://localhost:8080/?query=<goal>'
using Workerd = import "/workerd/workerd.capnp";

const config :Workerd.Config = (
  services = [ (name = "main", worker = .mainWorker) ],
  sockets = [ (name = "http", address = "*:8080", http = (), service = "main") ],
);

const mainWorker :Workerd.Worker = (
  modules = [
    (name = "worker.js", esModule = embed "worker.js"),
    (name = "reactor.mjs", esModule = embed "reactor.mjs"),
    (name = "__WASM_FILE__", wasm = embed "__WASM_FILE__"),
  ],
  compatibilityDate = "__DATE__",
);
"#;

/// Emit the glue files next to `wasm_path` (the just-linked reactor module).
/// Returns the list of files actually written (those that didn't already exist)
/// so the CLI can report what landed vs. what it preserved.
pub fn emit(wasm_path: &Path) -> Result<Vec<String>, String> {
    let dir = wasm_path.parent().unwrap_or(Path::new("."));
    let wasm_file = wasm_path
        .file_name()
        .ok_or("reactor output path has no file name")?
        .to_string_lossy()
        .into_owned();
    let app_name = worker_name(&wasm_file);

    let fill = |t: &str| {
        t.replace("__WASM_FILE__", &wasm_file)
            .replace("__APP_NAME__", &app_name)
            .replace("__DATE__", COMPAT_DATE)
    };

    let mut written = Vec::new();
    for (name, body) in [
        ("reactor.mjs", fill(REACTOR_MJS)),
        ("worker.js", fill(WORKER_JS)),
        ("wrangler.toml", fill(WRANGLER_TOML)),
        ("config.capnp", fill(CONFIG_CAPNP)),
    ] {
        let path = dir.join(name);
        // Scaffolding: never clobber an existing (possibly user-edited) file.
        if path.exists() {
            continue;
        }
        std::fs::write(&path, body).map_err(|e| format!("failed to write {name}: {e}"))?;
        written.push(name.to_string());
    }
    Ok(written)
}

/// Derive a Cloudflare-legal worker name from the wasm file name: strip the
/// `.worker.wasm` / `.wasm` suffix, lowercase, map every other character to a
/// single `-` (runs collapsed), trim leading/trailing `-`, and cap the length.
/// Falls back to `prolog-worker` if nothing usable remains.
fn worker_name(wasm_file: &str) -> String {
    let stem = wasm_file.strip_suffix(".wasm").unwrap_or(wasm_file);
    let stem = stem.strip_suffix(".worker").unwrap_or(stem);

    let mut name = String::new();
    for c in stem.chars() {
        if c.is_ascii_alphanumeric() {
            name.push(c.to_ascii_lowercase());
        } else if !name.ends_with('-') {
            name.push('-'); // collapse runs of non-alphanumerics into one `-`
        }
    }
    let name: String = name.trim_matches('-').chars().take(MAX_NAME_LEN).collect();
    let name = name.trim_end_matches('-'); // in case the cap landed mid-run
    if name.is_empty() {
        "prolog-worker".to_string()
    } else {
        name.to_string()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn worker_name_strips_suffixes_and_sanitizes() {
        assert_eq!(worker_name("deps.worker.wasm"), "deps");
        assert_eq!(worker_name("my_app.worker.wasm"), "my-app");
        assert_eq!(worker_name("plain.wasm"), "plain");
        // Runs of non-alphanumerics collapse to a single hyphen.
        assert_eq!(worker_name("my__app.worker.wasm"), "my-app");
        // Degenerate names still yield something Cloudflare accepts.
        assert_eq!(worker_name(".worker.wasm"), "prolog-worker");
    }

    #[test]
    fn worker_name_caps_length() {
        let long = format!("{}.worker.wasm", "a".repeat(200));
        assert_eq!(worker_name(&long).len(), MAX_NAME_LEN);
    }

    #[test]
    fn emit_writes_glue_then_preserves_it() {
        let dir = tempfile::tempdir().unwrap();
        let wasm = dir.path().join("deps.worker.wasm");
        std::fs::write(&wasm, b"\0asm").unwrap();

        let written = emit(&wasm).unwrap();
        assert_eq!(
            written,
            ["reactor.mjs", "worker.js", "wrangler.toml", "config.capnp"]
        );

        // The marshalling lives once in reactor.mjs; worker.js imports it.
        let abi = std::fs::read_to_string(dir.path().join("reactor.mjs")).unwrap();
        assert!(
            abi.contains(
                "ex.plg_rt_run_query(qptr, bytes.length, limit, BigInt(stepLimit), depthLimit)"
            ),
            "{abi}"
        );
        let js = std::fs::read_to_string(dir.path().join("worker.js")).unwrap();
        assert!(
            js.contains(r#"import { runQuery, assertExports } from "./reactor.mjs""#),
            "{js}"
        );
        assert!(
            js.contains(r#"import reactorModule from "./deps.worker.wasm""#),
            "{js}"
        );
        let toml = std::fs::read_to_string(dir.path().join("wrangler.toml")).unwrap();
        assert!(toml.contains("name = \"deps\""), "{toml}");
        let capnp = std::fs::read_to_string(dir.path().join("config.capnp")).unwrap();
        assert!(capnp.contains(r#"embed "deps.worker.wasm""#), "{capnp}");
        assert!(capnp.contains(r#"embed "reactor.mjs""#), "{capnp}");

        // A second emit preserves the (possibly edited) files — writes nothing.
        std::fs::write(dir.path().join("worker.js"), "// edited").unwrap();
        let again = emit(&wasm).unwrap();
        assert!(again.is_empty(), "rebuild must not clobber glue: {again:?}");
        assert_eq!(
            std::fs::read_to_string(dir.path().join("worker.js")).unwrap(),
            "// edited"
        );
    }
}