Skip to main content

alef_e2e/codegen/
wasm.rs

1//! WebAssembly e2e test generator using vitest.
2//!
3//! Reuses the TypeScript test renderer for both HTTP and non-HTTP fixtures,
4//! configured with the `@kreuzberg/wasm` (or equivalent) package as the import
5//! path and `wasm` as the language key for skip/override resolution. Adds
6//! wasm-specific scaffolding: vite-plugin-wasm + top-level-await for vitest,
7//! a `setup.ts` chdir to `test_documents/` so file_path fixtures resolve, and
8//! a `globalSetup.ts` that spawns the mock-server for HTTP fixtures.
9
10use crate::config::E2eConfig;
11use crate::escape::sanitize_filename;
12use crate::field_access::FieldResolver;
13use crate::fixture::{Fixture, FixtureGroup};
14use alef_core::backend::GeneratedFile;
15use alef_core::config::ResolvedCrateConfig;
16use alef_core::hash::{self, CommentStyle};
17use alef_core::template_versions as tv;
18use anyhow::Result;
19use std::fmt::Write as FmtWrite;
20use std::path::PathBuf;
21
22use super::E2eCodegen;
23
24/// WebAssembly e2e code generator.
25pub struct WasmCodegen;
26
27impl E2eCodegen for WasmCodegen {
28    fn generate(
29        &self,
30        groups: &[FixtureGroup],
31        e2e_config: &E2eConfig,
32        config: &ResolvedCrateConfig,
33        type_defs: &[alef_core::ir::TypeDef],
34    ) -> Result<Vec<GeneratedFile>> {
35        let lang = self.language_name();
36        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
37        let tests_base = output_base.join("tests");
38
39        let mut files = Vec::new();
40
41        // Resolve call config with wasm-specific overrides.
42        let call = &e2e_config.call;
43        let overrides = call.overrides.get(lang);
44        let module_path = overrides
45            .and_then(|o| o.module.as_ref())
46            .cloned()
47            .unwrap_or_else(|| call.module.clone());
48        let function_name = overrides
49            .and_then(|o| o.function.as_ref())
50            .cloned()
51            .unwrap_or_else(|| snake_to_camel(&call.function));
52        let client_factory = overrides.and_then(|o| o.client_factory.as_deref());
53
54        // Resolve package config — defaults to a co-located pkg/ directory shipped
55        // by `wasm-pack build` next to the wasm crate.
56        // When `[crates.output] wasm` is set explicitly, derive the pkg path from
57        // that value so that renamed WASM crates resolve correctly without any
58        // hardcoded special cases.
59        let wasm_pkg = e2e_config.resolve_package("wasm");
60        let pkg_path = wasm_pkg
61            .as_ref()
62            .and_then(|p| p.path.as_ref())
63            .cloned()
64            .unwrap_or_else(|| config.wasm_crate_path());
65        let pkg_name = wasm_pkg
66            .as_ref()
67            .and_then(|p| p.name.as_ref())
68            .cloned()
69            .unwrap_or_else(|| {
70                // Default: derive from WASM crate name (config.name + "-wasm")
71                // wasm-pack transforms the crate name to the package name by replacing
72                // dashes with the crate separator in Cargo (e.g., kreuzberg-wasm -> kreuzberg_wasm).
73                // However, the published npm package might use the module name, which is typically
74                // the crate name without "-wasm". Fall back to the module path.
75                module_path.clone()
76            });
77        let pkg_version = wasm_pkg
78            .as_ref()
79            .and_then(|p| p.version.as_ref())
80            .cloned()
81            .or_else(|| config.resolved_version())
82            .unwrap_or_else(|| "0.1.0".to_string());
83
84        // Determine which auxiliary scaffolding files we need based on the active
85        // fixture set. Doing this once up front lets us emit a self-contained vitest
86        // config that wires only the setup files we'll actually generate.
87        let active_per_group: Vec<Vec<&Fixture>> = groups
88            .iter()
89            .map(|group| {
90                group
91                    .fixtures
92                    .iter()
93                    .filter(|f| super::should_include_fixture(f, lang, e2e_config))
94                    // Honor per-call `skip_languages`: when the resolved call's
95                    // `skip_languages` contains `wasm`, the wasm binding doesn't
96                    // export that function and any test file referencing it
97                    // would fail TS resolution. Drop the fixture entirely.
98                    .filter(|f| {
99                        let cc = e2e_config.resolve_call_for_fixture(f.call.as_deref(), &f.input);
100                        !cc.skip_languages.iter().any(|l| l == lang)
101                    })
102                    .filter(|f| {
103                        // Node fetch (undici) rejects pre-set Content-Length that
104                        // doesn't match the real body length — skip fixtures that
105                        // intentionally send a mismatched header.
106                        f.http.as_ref().is_none_or(|h| {
107                            !h.request
108                                .headers
109                                .iter()
110                                .any(|(k, _)| k.eq_ignore_ascii_case("content-length"))
111                        })
112                    })
113                    .filter(|f| {
114                        // Node fetch only supports a fixed set of HTTP methods;
115                        // TRACE and CONNECT throw before reaching the server.
116                        f.http.as_ref().is_none_or(|h| {
117                            let m = h.request.method.to_ascii_uppercase();
118                            m != "TRACE" && m != "CONNECT"
119                        })
120                    })
121                    .collect()
122            })
123            .collect();
124
125        let any_fixtures = active_per_group.iter().flat_map(|g| g.iter());
126        // The wasm globalSetup spawns the mock server. It must run for any fixture
127        // that interpolates `${process.env.MOCK_SERVER_URL}` into a base URL —
128        // i.e. anything with `mock_response` (liter-llm shape) or `http`
129        // (kreuzberg/kreuzcrawl shape), not just raw `is_http_test`. The
130        // comment block below this line states the same intent; the previous
131        // condition (`f.is_http_test()`) only detected the consumer-style
132        // `http: { ... }` shape and missed the entire liter-llm fixture set.
133        let has_http_fixtures = any_fixtures.clone().any(|f| f.needs_mock_server());
134        // file_path / bytes args are read off disk by the generated code at runtime;
135        // we add a setup.ts chdir to test_documents so relative paths resolve.
136        let has_file_fixtures = active_per_group.iter().flatten().any(|f| {
137            let cc = e2e_config.resolve_call_for_fixture(f.call.as_deref(), &f.input);
138            cc.args
139                .iter()
140                .any(|a| a.arg_type == "file_path" || a.arg_type == "bytes")
141        });
142
143        // Generate package.json — adds vite-plugin-wasm + top-level-await on top
144        // of the standard vitest dev deps so that `import init, { … } from
145        // '@kreuzberg/wasm'` resolves and instantiates the wasm module before tests
146        // run.
147        files.push(GeneratedFile {
148            path: output_base.join("package.json"),
149            content: render_package_json(&pkg_name, &pkg_path, &pkg_version, e2e_config.dep_mode),
150            generated_header: false,
151        });
152
153        // Generate vitest.config.ts — needs vite-plugin-wasm + topLevelAwait, plus
154        // optional globalSetup (for HTTP fixtures and any function-call test that
155        // hits the mock server via MOCK_SERVER_URL) and setupFiles (for chdir).
156        // Function-call e2e tests construct request URLs via
157        // `${process.env.MOCK_SERVER_URL}/fixtures/<id>`, so the mock server must
158        // be running and the env var set even when no raw HTTP fixtures exist.
159        let needs_global_setup = has_http_fixtures;
160        files.push(GeneratedFile {
161            path: output_base.join("vitest.config.ts"),
162            content: render_vitest_config(needs_global_setup, has_file_fixtures),
163            generated_header: true,
164        });
165
166        // Generate globalSetup.ts when any fixture requires the mock server —
167        // either an HTTP fixture (the original consumer) or any function-call
168        // fixture that interpolates `${process.env.MOCK_SERVER_URL}` into a
169        // base URL. It spawns the rust mock-server binary.
170        if needs_global_setup {
171            files.push(GeneratedFile {
172                path: output_base.join("globalSetup.ts"),
173                content: render_global_setup(),
174                generated_header: true,
175            });
176        }
177
178        // Generate setup.ts when any active fixture takes a file_path / bytes arg.
179        // This chdir's to test_documents/ so relative fixture paths resolve.
180        if has_file_fixtures {
181            files.push(GeneratedFile {
182                path: output_base.join("setup.ts"),
183                content: render_file_setup(&e2e_config.test_documents_dir),
184                generated_header: true,
185            });
186        }
187
188        // Generate tsconfig.json — prevents Vite from walking up to a project-level
189        // tsconfig and pulling in unrelated compiler options.
190        files.push(GeneratedFile {
191            path: output_base.join("tsconfig.json"),
192            content: render_tsconfig(),
193            generated_header: false,
194        });
195
196        // Resolve options_type from override (e.g. `WasmExtractionConfig`).
197        let options_type = overrides.and_then(|o| o.options_type.clone());
198        let field_resolver = FieldResolver::new(
199            &e2e_config.fields,
200            &e2e_config.fields_optional,
201            &e2e_config.result_fields,
202            &e2e_config.fields_array,
203            &std::collections::HashSet::new(),
204        );
205
206        // Generate test files per category. We delegate the per-fixture rendering
207        // to the typescript codegen (`render_test_file`), which already handles
208        // both HTTP and function-call fixtures correctly. Passing `lang = "wasm"`
209        // routes per-fixture override resolution and skip checks through the wasm
210        // language key. We then inject Node.js WASM initialization code to load
211        // the WASM binary from the pkg directory using fs.readFileSync.
212        for (group, active) in groups.iter().zip(active_per_group.iter()) {
213            if active.is_empty() {
214                continue;
215            }
216            let filename = format!("{}.test.ts", sanitize_filename(&group.category));
217            let content = super::typescript::render_test_file(
218                lang,
219                &group.category,
220                active,
221                &module_path,
222                &pkg_name,
223                &function_name,
224                &e2e_config.call.args,
225                options_type.as_deref(),
226                &field_resolver,
227                client_factory,
228                e2e_config,
229                type_defs,
230            );
231
232            // The local `pkg/` directory produced by `wasm-pack build --target nodejs`
233            // is already a Node-friendly self-initializing CJS module — `pkg/package.json`
234            // sets `"main"` to the JS entry, so test files can import the package by name
235            // (`from "<pkg_name>"`) with no subpath. The historical `dist-node` rewrite
236            // assumed a multi-distribution layout (`dist/`, `dist-node/`, `dist-web/`)
237            // that the alef-managed `wasm-pack build` does not produce; it is therefore
238            // intentionally absent here.
239            let _ = (&pkg_path, &config.name); // keep variables alive for future use
240
241            files.push(GeneratedFile {
242                path: tests_base.join(filename),
243                content,
244                generated_header: true,
245            });
246        }
247
248        Ok(files)
249    }
250
251    fn language_name(&self) -> &'static str {
252        "wasm"
253    }
254}
255
256fn snake_to_camel(s: &str) -> String {
257    let mut out = String::with_capacity(s.len());
258    let mut upper_next = false;
259    for ch in s.chars() {
260        if ch == '_' {
261            upper_next = true;
262        } else if upper_next {
263            out.push(ch.to_ascii_uppercase());
264            upper_next = false;
265        } else {
266            out.push(ch);
267        }
268    }
269    out
270}
271
272fn render_package_json(
273    pkg_name: &str,
274    pkg_path: &str,
275    pkg_version: &str,
276    dep_mode: crate::config::DependencyMode,
277) -> String {
278    let dep_value = match dep_mode {
279        crate::config::DependencyMode::Registry => pkg_version.to_string(),
280        crate::config::DependencyMode::Local => format!("file:{pkg_path}"),
281    };
282    crate::template_env::render(
283        "wasm/package.json.jinja",
284        minijinja::context! {
285            pkg_name => pkg_name,
286            dep_value => dep_value,
287            rollup => tv::npm::ROLLUP,
288            vite_plugin_wasm => tv::npm::VITE_PLUGIN_WASM,
289            vitest => tv::npm::VITEST,
290        },
291    )
292}
293
294fn render_vitest_config(with_global_setup: bool, with_file_setup: bool) -> String {
295    let header = hash::header(CommentStyle::DoubleSlash);
296    crate::template_env::render(
297        "wasm/vitest.config.ts.jinja",
298        minijinja::context! {
299            header => header,
300            with_global_setup => with_global_setup,
301            with_file_setup => with_file_setup,
302        },
303    )
304}
305
306fn render_file_setup(test_documents_dir: &str) -> String {
307    let header = hash::header(CommentStyle::DoubleSlash);
308    let mut out = header;
309    out.push_str("import { createRequire } from 'module';\n");
310    out.push_str("import { fileURLToPath } from 'url';\n");
311    out.push_str("import { dirname, join } from 'path';\n\n");
312    out.push_str("// Patch CommonJS `require('env')` and `require('wasi_snapshot_preview1')` to\n");
313    out.push_str("// return shim objects. wasm-pack `--target nodejs` emits bare `require()`\n");
314    out.push_str("// calls for these from getrandom/wasi transitives, but they are not real\n");
315    out.push_str("// Node modules — the WASM module imports them by name and the host is\n");
316    out.push_str("// expected to satisfy them. Patch Module._load BEFORE the wasm bundle is\n");
317    out.push_str("// imported by any test file.\n");
318    out.push_str("// Note: setupFiles run per-test-worker; vitest imports the test files\n");
319    out.push_str("// AFTER setupFiles complete, so this hook installs in time.\n");
320    out.push_str("{\n");
321    out.push_str("  const _require = createRequire(import.meta.url);\n");
322    out.push_str("  const Module = _require('module');\n");
323    out.push_str("  // env.system / env.mkstemp come from C-runtime calls embedded in some\n");
324    out.push_str("  // WASM-compiled deps (e.g. tesseract-wasm). Tests that don't exercise\n");
325    out.push_str("  // those paths only need the imports to be callable for module instantiation.\n");
326    out.push_str("  const env = {\n");
327    out.push_str("    system: (_cmd: number) => -1,\n");
328    out.push_str("    mkstemp: (_template: number) => -1,\n");
329    out.push_str("  };\n");
330    out.push_str("  // WASI shims. Critical: clock_time_get and random_get must produce realistic\n");
331    out.push_str("  // values — returning 0 for all clock calls causes WASM-side timing loops to\n");
332    out.push_str("  // spin forever (e.g. getrandom's spin-until-elapsed retry), and zero-filled\n");
333    out.push_str("  // random buffers can cause init loops in deps expecting non-zero entropy.\n");
334    out.push_str("  const _wasiMemoryView = (): DataView | null => {\n");
335    out.push_str("    // Imports are wired before the WASM is instantiated; the bundle stashes\n");
336    out.push_str("    // its instance on a runtime-known global once available. We try to grab\n");
337    out.push_str("    // it lazily so writes to wasm memory go to the right place.\n");
338    out.push_str("    const g = globalThis as unknown as { __kreuzberg_wasm_memory__?: WebAssembly.Memory };\n");
339    out.push_str("    return g.__kreuzberg_wasm_memory__ ? new DataView(g.__kreuzberg_wasm_memory__.buffer) : null;\n");
340    out.push_str("  };\n");
341    out.push_str("  const _cryptoFill = (buf: Uint8Array) => {\n");
342    out.push_str("    const c = globalThis.crypto;\n");
343    out.push_str("    if (c && typeof c.getRandomValues === 'function') c.getRandomValues(buf);\n");
344    out.push_str("    else for (let i = 0; i < buf.length; i++) buf[i] = Math.floor(Math.random() * 256);\n");
345    out.push_str("  };\n");
346    out.push_str("  const wasi_snapshot_preview1 = {\n");
347    out.push_str("    proc_exit: () => {},\n");
348    out.push_str("    environ_get: () => 0,\n");
349    out.push_str("    environ_sizes_get: (countOut: number, _sizeOut: number) => {\n");
350    out.push_str("      const v = _wasiMemoryView();\n");
351    out.push_str("      if (v) v.setUint32(countOut, 0, true);\n");
352    out.push_str("      return 0;\n");
353    out.push_str("    },\n");
354    out.push_str("    // WASI fd_write must update `nwritten_ptr` with the total bytes consumed,\n");
355    out.push_str("    // otherwise libc-style callers (e.g. tesseract-compiled-to-wasm fputs)\n");
356    out.push_str("    // see 0 of N bytes written and retry forever, hanging the host.\n");
357    out.push_str("    fd_write: (_fd: number, iovsPtr: number, iovsLen: number, nwrittenPtr: number) => {\n");
358    out.push_str("      const v = _wasiMemoryView();\n");
359    out.push_str("      if (!v) return 0;\n");
360    out.push_str("      let total = 0;\n");
361    out.push_str("      for (let i = 0; i < iovsLen; i++) {\n");
362    out.push_str("        const off = iovsPtr + i * 8;\n");
363    out.push_str("        total += v.getUint32(off + 4, true);\n");
364    out.push_str("      }\n");
365    out.push_str("      v.setUint32(nwrittenPtr, total, true);\n");
366    out.push_str("      return 0;\n");
367    out.push_str("    },\n");
368    out.push_str("    // Mirror fd_write: callers retry on partial reads. Reporting 0 bytes\n");
369    out.push_str("    // read (EOF) is fine; just make sure `nread_ptr` is written.\n");
370    out.push_str("    fd_read: (_fd: number, _iovsPtr: number, _iovsLen: number, nreadPtr: number) => {\n");
371    out.push_str("      const v = _wasiMemoryView();\n");
372    out.push_str("      if (v) v.setUint32(nreadPtr, 0, true);\n");
373    out.push_str("      return 0;\n");
374    out.push_str("    },\n");
375    out.push_str("    fd_seek: () => 0,\n");
376    out.push_str("    fd_close: () => 0,\n");
377    out.push_str("    fd_prestat_get: () => 8, // EBADF — no preopens.\n");
378    out.push_str("    fd_prestat_dir_name: () => 0,\n");
379    out.push_str("    fd_fdstat_get: () => 0,\n");
380    out.push_str("    fd_fdstat_set_flags: () => 0,\n");
381    out.push_str("    path_open: () => 44, // ENOENT.\n");
382    out.push_str("    path_create_directory: () => 0,\n");
383    out.push_str("    path_remove_directory: () => 0,\n");
384    out.push_str("    path_unlink_file: () => 0,\n");
385    out.push_str("    path_filestat_get: () => 44, // ENOENT.\n");
386    out.push_str("    path_rename: () => 0,\n");
387    out.push_str("    clock_time_get: (_clockId: number, _precision: bigint, timeOut: number) => {\n");
388    out.push_str("      const ns = BigInt(Date.now()) * 1_000_000n + BigInt(performance.now() | 0) % 1_000_000n;\n");
389    out.push_str("      const v = _wasiMemoryView();\n");
390    out.push_str("      if (v) v.setBigUint64(timeOut, ns, true);\n");
391    out.push_str("      return 0;\n");
392    out.push_str("    },\n");
393    out.push_str("    clock_res_get: (_clockId: number, resOut: number) => {\n");
394    out.push_str("      const v = _wasiMemoryView();\n");
395    out.push_str("      if (v) v.setBigUint64(resOut, 1_000n, true);\n");
396    out.push_str("      return 0;\n");
397    out.push_str("    },\n");
398    out.push_str("    random_get: (bufPtr: number, bufLen: number) => {\n");
399    out.push_str("      const g = globalThis as unknown as { __kreuzberg_wasm_memory__?: WebAssembly.Memory };\n");
400    out.push_str("      if (!g.__kreuzberg_wasm_memory__) return 0;\n");
401    out.push_str("      _cryptoFill(new Uint8Array(g.__kreuzberg_wasm_memory__.buffer, bufPtr, bufLen));\n");
402    out.push_str("      return 0;\n");
403    out.push_str("    },\n");
404    out.push_str("    args_get: () => 0,\n");
405    out.push_str("    args_sizes_get: (countOut: number, _sizeOut: number) => {\n");
406    out.push_str("      const v = _wasiMemoryView();\n");
407    out.push_str("      if (v) v.setUint32(countOut, 0, true);\n");
408    out.push_str("      return 0;\n");
409    out.push_str("    },\n");
410    out.push_str("    poll_oneoff: () => 0,\n");
411    out.push_str("    sched_yield: () => 0,\n");
412    out.push_str("  };\n");
413    out.push_str("  const _origResolve = Module._resolveFilename;\n");
414    out.push_str("  Module._resolveFilename = function(request: string, parent: unknown, ...rest: unknown[]) {\n");
415    out.push_str("    if (request === 'env' || request === 'wasi_snapshot_preview1') return request;\n");
416    out.push_str("    return _origResolve.call(this, request, parent, ...rest);\n");
417    out.push_str("  };\n");
418    out.push_str("  const _origLoad = Module._load;\n");
419    out.push_str("  Module._load = function(request: string, parent: unknown, ...rest: unknown[]) {\n");
420    out.push_str("    if (request === 'env') return env;\n");
421    out.push_str("    if (request === 'wasi_snapshot_preview1') return wasi_snapshot_preview1;\n");
422    out.push_str("    return _origLoad.call(this, request, parent, ...rest);\n");
423    out.push_str("  };\n");
424    out.push_str("  // Capture the WASM linear memory at instantiation time so the WASI shims\n");
425    out.push_str("  // can read/write into it. Without this, every shim that needs memory\n");
426    out.push_str("  // (fd_write nwritten, clock_time_get, random_get, etc.) silently no-ops\n");
427    out.push_str("  // and the host-side C runtime hangs in a retry loop.\n");
428    out.push_str("  const _OrigInstance = WebAssembly.Instance;\n");
429    out.push_str("  const PatchedInstance = function(this: WebAssembly.Instance, mod: WebAssembly.Module, imports?: WebAssembly.Imports) {\n");
430    out.push_str("    const inst = new _OrigInstance(mod, imports);\n");
431    out.push_str("    const exportsMem = (inst.exports as Record<string, unknown>).memory;\n");
432    out.push_str("    if (exportsMem instanceof WebAssembly.Memory) {\n");
433    out.push_str("      (globalThis as unknown as { __kreuzberg_wasm_memory__?: WebAssembly.Memory }).__kreuzberg_wasm_memory__ = exportsMem;\n");
434    out.push_str("    }\n");
435    out.push_str("    return inst;\n");
436    out.push_str("  } as unknown as typeof WebAssembly.Instance;\n");
437    out.push_str("  PatchedInstance.prototype = _OrigInstance.prototype;\n");
438    out.push_str(
439        "  (WebAssembly as unknown as { Instance: typeof WebAssembly.Instance }).Instance = PatchedInstance;\n",
440    );
441    out.push_str("}\n\n");
442    out.push_str("// Change to the configured test-documents directory so that fixture file paths like\n");
443    out.push_str("// \"pdf/fake_memo.pdf\" resolve correctly when vitest runs from e2e/wasm/.\n");
444    out.push_str("// setup.ts lives in e2e/wasm/; the fixtures dir lives at the repository root,\n");
445    out.push_str("// two directories up: e2e/wasm/ -> e2e/ -> repo root.\n");
446    out.push_str("const __filename = fileURLToPath(import.meta.url);\n");
447    out.push_str("const __dirname = dirname(__filename);\n");
448    let _ = writeln!(
449        out,
450        "const testDocumentsDir = join(__dirname, '..', '..', '{test_documents_dir}');"
451    );
452    out.push_str("process.chdir(testDocumentsDir);\n");
453    out
454}
455
456fn render_global_setup() -> String {
457    let header = hash::header(CommentStyle::DoubleSlash);
458    crate::template_env::render(
459        "wasm/globalSetup.ts.jinja",
460        minijinja::context! {
461            header => header,
462        },
463    )
464}
465
466fn render_tsconfig() -> String {
467    crate::template_env::render("wasm/tsconfig.jinja", minijinja::context! {})
468}
469
470// The historical `inject_wasm_init` post-processor rewrote test imports to a
471// `<pkg>/dist-node` subpath. It was removed because the alef-managed
472// `wasm-pack build --target nodejs` artifact is a flat self-initializing CJS
473// module — its `package.json` already sets `"main"` to the JS entry, so the
474// emitted `import … from "<pkg>"` resolves directly.