Skip to main content

alef_e2e/codegen/
wasm.rs

1//! WebAssembly e2e test generator using vitest.
2//!
3//! Reuses the TypeScript test renderer for both HTTP and non-HTTP fixtures,
4//! configured with the `@kreuzberg/wasm` (or equivalent) package as the import
5//! path and `wasm` as the language key for skip/override resolution. Adds
6//! wasm-specific scaffolding: vite-plugin-wasm + top-level-await for vitest,
7//! a `setup.ts` chdir to `test_documents/` so file_path fixtures resolve, and
8//! a `globalSetup.ts` that spawns the mock-server for HTTP fixtures.
9
10use crate::config::E2eConfig;
11use crate::escape::sanitize_filename;
12use crate::field_access::FieldResolver;
13use crate::fixture::{Fixture, FixtureGroup};
14use alef_core::backend::GeneratedFile;
15use alef_core::config::ResolvedCrateConfig;
16use alef_core::hash::{self, CommentStyle};
17use alef_core::template_versions as tv;
18use anyhow::Result;
19use std::path::PathBuf;
20
21use super::E2eCodegen;
22
23/// WebAssembly e2e code generator.
24pub struct WasmCodegen;
25
26impl E2eCodegen for WasmCodegen {
27    fn generate(
28        &self,
29        groups: &[FixtureGroup],
30        e2e_config: &E2eConfig,
31        config: &ResolvedCrateConfig,
32    ) -> Result<Vec<GeneratedFile>> {
33        let lang = self.language_name();
34        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
35        let tests_base = output_base.join("tests");
36
37        let mut files = Vec::new();
38
39        // Resolve call config with wasm-specific overrides.
40        let call = &e2e_config.call;
41        let overrides = call.overrides.get(lang);
42        let module_path = overrides
43            .and_then(|o| o.module.as_ref())
44            .cloned()
45            .unwrap_or_else(|| call.module.clone());
46        let function_name = overrides
47            .and_then(|o| o.function.as_ref())
48            .cloned()
49            .unwrap_or_else(|| snake_to_camel(&call.function));
50        let client_factory = overrides.and_then(|o| o.client_factory.as_deref());
51
52        // Resolve package config — defaults to a co-located pkg/ directory shipped
53        // by `wasm-pack build` next to the wasm crate.
54        // For projects with a core library name different from the package name,
55        // try both {config.name}-wasm and ts-pack-core-wasm (for tree-sitter-language-pack).
56        let wasm_pkg = e2e_config.resolve_package("wasm");
57        let pkg_path = wasm_pkg
58            .as_ref()
59            .and_then(|p| p.path.as_ref())
60            .cloned()
61            .unwrap_or_else(|| {
62                let default_name = format!("../../crates/{}-wasm/pkg", config.name);
63                // Special case: tree-sitter-language-pack uses ts-pack-core-wasm
64                if config.name == "tree-sitter-language-pack" {
65                    "../../crates/ts-pack-core-wasm/pkg".to_string()
66                } else {
67                    default_name
68                }
69            });
70        let pkg_name = wasm_pkg
71            .as_ref()
72            .and_then(|p| p.name.as_ref())
73            .cloned()
74            .unwrap_or_else(|| {
75                // Default: derive from WASM crate name (config.name + "-wasm")
76                // wasm-pack transforms the crate name to the package name by replacing
77                // dashes with the crate separator in Cargo (e.g., kreuzberg-wasm -> kreuzberg_wasm).
78                // However, the published npm package might use the module name, which is typically
79                // the crate name without "-wasm". Fall back to the module path.
80                module_path.clone()
81            });
82        let pkg_version = wasm_pkg
83            .as_ref()
84            .and_then(|p| p.version.as_ref())
85            .cloned()
86            .or_else(|| config.resolved_version())
87            .unwrap_or_else(|| "0.1.0".to_string());
88
89        // Determine which auxiliary scaffolding files we need based on the active
90        // fixture set. Doing this once up front lets us emit a self-contained vitest
91        // config that wires only the setup files we'll actually generate.
92        let active_per_group: Vec<Vec<&Fixture>> = groups
93            .iter()
94            .map(|group| {
95                group
96                    .fixtures
97                    .iter()
98                    .filter(|f| super::should_include_fixture(f, lang, e2e_config))
99                    // Honor per-call `skip_languages`: when the resolved call's
100                    // `skip_languages` contains `wasm`, the wasm binding doesn't
101                    // export that function and any test file referencing it
102                    // would fail TS resolution. Drop the fixture entirely.
103                    .filter(|f| {
104                        let cc = e2e_config.resolve_call(f.call.as_deref());
105                        !cc.skip_languages.iter().any(|l| l == lang)
106                    })
107                    .filter(|f| {
108                        // Node fetch (undici) rejects pre-set Content-Length that
109                        // doesn't match the real body length — skip fixtures that
110                        // intentionally send a mismatched header.
111                        f.http.as_ref().is_none_or(|h| {
112                            !h.request
113                                .headers
114                                .iter()
115                                .any(|(k, _)| k.eq_ignore_ascii_case("content-length"))
116                        })
117                    })
118                    .filter(|f| {
119                        // Node fetch only supports a fixed set of HTTP methods;
120                        // TRACE and CONNECT throw before reaching the server.
121                        f.http.as_ref().is_none_or(|h| {
122                            let m = h.request.method.to_ascii_uppercase();
123                            m != "TRACE" && m != "CONNECT"
124                        })
125                    })
126                    .collect()
127            })
128            .collect();
129
130        let any_fixtures = active_per_group.iter().flat_map(|g| g.iter());
131        let has_http_fixtures = any_fixtures.clone().any(|f| f.is_http_test());
132        let has_non_http_fixtures = any_fixtures
133            .clone()
134            .any(|f| !f.is_http_test() && !f.assertions.is_empty());
135        // file_path / bytes args are read off disk by the generated code at runtime;
136        // we add a setup.ts chdir to test_documents so relative paths resolve.
137        let has_file_fixtures = active_per_group.iter().flatten().any(|f| {
138            let cc = e2e_config.resolve_call(f.call.as_deref());
139            cc.args
140                .iter()
141                .any(|a| a.arg_type == "file_path" || a.arg_type == "bytes")
142        });
143
144        // Generate package.json — adds vite-plugin-wasm + top-level-await on top
145        // of the standard vitest dev deps so that `import init, { … } from
146        // '@kreuzberg/wasm'` resolves and instantiates the wasm module before tests
147        // run.
148        files.push(GeneratedFile {
149            path: output_base.join("package.json"),
150            content: render_package_json(&pkg_name, &pkg_path, &pkg_version, e2e_config.dep_mode),
151            generated_header: false,
152        });
153
154        // Generate vitest.config.ts — needs vite-plugin-wasm + topLevelAwait, plus
155        // optional globalSetup (for HTTP fixtures) and setupFiles (for chdir).
156        files.push(GeneratedFile {
157            path: output_base.join("vitest.config.ts"),
158            content: render_vitest_config(has_http_fixtures, has_file_fixtures),
159            generated_header: true,
160        });
161
162        // Generate globalSetup.ts only when at least one HTTP fixture is in scope —
163        // it spawns the rust mock-server.
164        if has_http_fixtures {
165            files.push(GeneratedFile {
166                path: output_base.join("globalSetup.ts"),
167                content: render_global_setup(),
168                generated_header: true,
169            });
170        }
171
172        // Generate setup.ts when any active fixture takes a file_path / bytes arg.
173        // This chdir's to test_documents/ so relative fixture paths resolve.
174        if has_file_fixtures {
175            files.push(GeneratedFile {
176                path: output_base.join("setup.ts"),
177                content: render_file_setup(),
178                generated_header: true,
179            });
180        }
181
182        // Generate tsconfig.json — prevents Vite from walking up to a project-level
183        // tsconfig and pulling in unrelated compiler options.
184        files.push(GeneratedFile {
185            path: output_base.join("tsconfig.json"),
186            content: render_tsconfig(),
187            generated_header: false,
188        });
189
190        // Suppress the unused-variable warning when no non-HTTP fixtures exist.
191        let _ = has_non_http_fixtures;
192
193        // Resolve options_type from override (e.g. `WasmExtractionConfig`).
194        let options_type = overrides.and_then(|o| o.options_type.clone());
195        let field_resolver = FieldResolver::new(
196            &e2e_config.fields,
197            &e2e_config.fields_optional,
198            &e2e_config.result_fields,
199            &e2e_config.fields_array,
200            &std::collections::HashSet::new(),
201        );
202
203        // Generate test files per category. We delegate the per-fixture rendering
204        // to the typescript codegen (`render_test_file`), which already handles
205        // both HTTP and function-call fixtures correctly. Passing `lang = "wasm"`
206        // routes per-fixture override resolution and skip checks through the wasm
207        // language key. We then inject Node.js WASM initialization code to load
208        // the WASM binary from the pkg directory using fs.readFileSync.
209        for (group, active) in groups.iter().zip(active_per_group.iter()) {
210            if active.is_empty() {
211                continue;
212            }
213            let filename = format!("{}.test.ts", sanitize_filename(&group.category));
214            let mut content = super::typescript::render_test_file(
215                lang,
216                &group.category,
217                active,
218                &module_path,
219                &pkg_name,
220                &function_name,
221                &e2e_config.call.args,
222                options_type.as_deref(),
223                &field_resolver,
224                client_factory,
225                e2e_config,
226            );
227
228            // Inject WASM initialization code for Node.js environments.
229            // Pass the WASM crate name (e.g., "html-to-markdown-wasm") instead of the core crate name.
230            let wasm_crate_name = format!("{}-wasm", config.name);
231            content = inject_wasm_init(&content, &pkg_name, &wasm_crate_name);
232
233            files.push(GeneratedFile {
234                path: tests_base.join(filename),
235                content,
236                generated_header: true,
237            });
238        }
239
240        Ok(files)
241    }
242
243    fn language_name(&self) -> &'static str {
244        "wasm"
245    }
246}
247
248fn snake_to_camel(s: &str) -> String {
249    let mut out = String::with_capacity(s.len());
250    let mut upper_next = false;
251    for ch in s.chars() {
252        if ch == '_' {
253            upper_next = true;
254        } else if upper_next {
255            out.push(ch.to_ascii_uppercase());
256            upper_next = false;
257        } else {
258            out.push(ch);
259        }
260    }
261    out
262}
263
264fn render_package_json(
265    pkg_name: &str,
266    pkg_path: &str,
267    pkg_version: &str,
268    dep_mode: crate::config::DependencyMode,
269) -> String {
270    let dep_value = match dep_mode {
271        crate::config::DependencyMode::Registry => pkg_version.to_string(),
272        crate::config::DependencyMode::Local => format!("file:{pkg_path}"),
273    };
274    format!(
275        r#"{{
276  "name": "{pkg_name}-e2e-wasm",
277  "version": "0.1.0",
278  "private": true,
279  "type": "module",
280  "scripts": {{
281    "test": "vitest run"
282  }},
283  "devDependencies": {{
284    "{pkg_name}": "{dep_value}",
285    "rollup": "{rollup}",
286    "vite-plugin-wasm": "{vite_plugin_wasm}",
287    "vitest": "{vitest}"
288  }}
289}}
290"#,
291        rollup = tv::npm::ROLLUP,
292        vite_plugin_wasm = tv::npm::VITE_PLUGIN_WASM,
293        vitest = tv::npm::VITEST,
294    )
295}
296
297fn render_vitest_config(with_global_setup: bool, with_file_setup: bool) -> String {
298    let header = hash::header(CommentStyle::DoubleSlash);
299    let setup_files_line = if with_file_setup {
300        "    setupFiles: ['./setup.ts'],\n"
301    } else {
302        ""
303    };
304    let global_setup_line = if with_global_setup {
305        "    globalSetup: './globalSetup.ts',\n"
306    } else {
307        ""
308    };
309    format!(
310        r#"{header}import {{ defineConfig }} from 'vitest/config';
311import wasm from 'vite-plugin-wasm';
312
313export default defineConfig({{
314  plugins: [wasm()],
315  test: {{
316    include: ['tests/**/*.test.ts'],
317{global_setup_line}{setup_files_line}  }},
318}});
319"#
320    )
321}
322
323fn render_file_setup() -> String {
324    let header = hash::header(CommentStyle::DoubleSlash);
325    header
326        + r#"import { fileURLToPath } from 'url';
327import { dirname, join } from 'path';
328
329// Change to the test_documents directory so that fixture file paths like
330// "pdf/fake_memo.pdf" resolve correctly when vitest runs from e2e/wasm/.
331// setup.ts lives in e2e/wasm/; test_documents lives at the repository root,
332// two directories up: e2e/wasm/ -> e2e/ -> repo root -> test_documents/.
333const __filename = fileURLToPath(import.meta.url);
334const __dirname = dirname(__filename);
335const testDocumentsDir = join(__dirname, '..', '..', 'test_documents');
336process.chdir(testDocumentsDir);
337"#
338}
339
340fn render_global_setup() -> String {
341    let header = hash::header(CommentStyle::DoubleSlash);
342    format!(
343        r#"{header}import {{ spawn }} from 'child_process';
344import {{ resolve }} from 'path';
345
346let serverProcess: any;
347
348export async function setup() {{
349  // Mock server binary must be pre-built (e.g. by CI or `cargo build --manifest-path e2e/rust/Cargo.toml --bin mock-server --release`)
350  serverProcess = spawn(
351    resolve(__dirname, '../rust/target/release/mock-server'),
352    [resolve(__dirname, '../../fixtures')],
353    {{ stdio: ['pipe', 'pipe', 'inherit'] }}
354  );
355
356  const url = await new Promise<string>((resolve, reject) => {{
357    serverProcess.stdout.on('data', (data: Buffer) => {{
358      const match = data.toString().match(/MOCK_SERVER_URL=(.*)/);
359      if (match) resolve(match[1].trim());
360    }});
361    setTimeout(() => reject(new Error('Mock server startup timeout')), 30000);
362  }});
363
364  process.env.MOCK_SERVER_URL = url;
365}}
366
367export async function teardown() {{
368  if (serverProcess) {{
369    serverProcess.stdin.end();
370    serverProcess.kill();
371  }}
372}}
373"#
374    )
375}
376
377fn render_tsconfig() -> String {
378    r#"{
379  "compilerOptions": {
380    "target": "ES2022",
381    "module": "ESNext",
382    "moduleResolution": "bundler",
383    "strict": true,
384    "strictNullChecks": false,
385    "esModuleInterop": true,
386    "skipLibCheck": true
387  },
388  "include": ["tests/**/*.ts", "vitest.config.ts"]
389}
390"#
391    .to_string()
392}
393
394/// Inject WASM initialization code for Node.js environments.
395///
396/// Injects top-level await for the async init() function from wasm-pack.
397/// This allows the WASM module to be initialized before tests run.
398/// Also injects chdir to test_documents before init() so file paths resolve.
399///
400/// # Arguments
401/// * `content` — the generated TypeScript test file content
402/// * `pkg_name` — the npm package name (e.g., "kreuzberg" or "@org/kreuzberg")
403/// * `_crate_name` — the Rust crate name (unused in async init pattern)
404fn inject_wasm_init(content: &str, pkg_name: &str, _crate_name: &str) -> String {
405    // The TypeScript renderer generates single-quoted imports; match both styles for robustness.
406    let from_marker_sq = format!("}} from '{pkg_name}';");
407    let from_marker_dq = format!("}} from \"{pkg_name}\";");
408    let from_marker = if content.contains(&from_marker_sq) {
409        from_marker_sq
410    } else {
411        from_marker_dq
412    };
413
414    // Find the closing `} from "pkg_name";` marker, then search backward for the matching `import {`
415    // to avoid accidentally patching an earlier import statement (e.g. `import { ... } from "vitest"`).
416    if let Some(from_pos) = content.find(&from_marker) {
417        let full_from_pos = from_pos + from_marker.len();
418        // Search backward from from_pos to find the last `import {` or `import init, {` before it.
419        let before_from = &content[..from_pos];
420        if let Some(import_pos) = before_from
421            .rfind("import {")
422            .or_else(|| before_from.rfind("import init, {"))
423        {
424            let import_section = &content[import_pos..full_from_pos];
425
426            // Already patched (contains `import init`) — nothing to do.
427            if import_section.contains("import init,") {
428                return content.to_string();
429            }
430
431            // For Node.js test environments (vitest), use initSync with the bundled WASM
432            // binary. Use import.meta.resolve to locate the bundled WASM file reliably.
433            let init_code = format!("import {{ initSync }} from '{pkg_name}';\n", pkg_name = pkg_name);
434            let setup_code = format!(
435                "import {{ fileURLToPath }} from \"url\";\n\
436                import {{ dirname, join }} from \"path\";\n\
437                import {{ readFileSync }} from \"fs\";\n\
438                const __filename = fileURLToPath(import.meta.url);\n\
439                const __dirname = dirname(__filename);\n\
440                const testDocumentsDir = join(__dirname, \"..\", \"..\", \"..\", \"test_documents\");\n\
441                globalThis.process.chdir(testDocumentsDir);\n\
442                const wasmUrl = await import.meta.resolve('{pkg_name}/kreuzberg_wasm_bg.wasm');\n\
443                const wasmPath = fileURLToPath(wasmUrl);\n\
444                const wasmBuffer = readFileSync(wasmPath);\n\
445                initSync(wasmBuffer);\n",
446                pkg_name = pkg_name
447            );
448
449            return init_code + &content[..full_from_pos] + "\n" + &setup_code + &content[full_from_pos..];
450        }
451    }
452
453    content.to_string()
454}