Skip to main content

alef_e2e/codegen/
java.rs

1//! Java e2e test generator using JUnit 5.
2//!
3//! Generates `e2e/java/pom.xml` and `src/test/java/dev/kreuzberg/e2e/{Category}Test.java`
4//! files from JSON fixtures, driven entirely by `E2eConfig` and `CallConfig`.
5
6use crate::config::E2eConfig;
7use crate::escape::{escape_java, sanitize_filename};
8use crate::field_access::FieldResolver;
9use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, HttpFixture};
10use alef_core::backend::GeneratedFile;
11use alef_core::config::ResolvedCrateConfig;
12use alef_core::hash::{self, CommentStyle};
13use alef_core::template_versions as tv;
14use anyhow::Result;
15use heck::{ToLowerCamelCase, ToUpperCamelCase};
16use std::path::PathBuf;
17
18use super::E2eCodegen;
19use super::client;
20
21/// Java e2e code generator.
22pub struct JavaCodegen;
23
24impl E2eCodegen for JavaCodegen {
25    fn generate(
26        &self,
27        groups: &[FixtureGroup],
28        e2e_config: &E2eConfig,
29        config: &ResolvedCrateConfig,
30        _type_defs: &[alef_core::ir::TypeDef],
31    ) -> Result<Vec<GeneratedFile>> {
32        let lang = self.language_name();
33        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
34
35        let mut files = Vec::new();
36
37        // Resolve call config with overrides.
38        let call = &e2e_config.call;
39        let overrides = call.overrides.get(lang);
40        let _module_path = overrides
41            .and_then(|o| o.module.as_ref())
42            .cloned()
43            .unwrap_or_else(|| call.module.clone());
44        let function_name = overrides
45            .and_then(|o| o.function.as_ref())
46            .cloned()
47            .unwrap_or_else(|| call.function.clone());
48        let class_name = overrides
49            .and_then(|o| o.class.as_ref())
50            .cloned()
51            .unwrap_or_else(|| config.name.to_upper_camel_case());
52        let result_is_simple = overrides.is_some_and(|o| o.result_is_simple);
53        let result_var = &call.result_var;
54
55        // Resolve package config.
56        let java_pkg = e2e_config.resolve_package("java");
57        let pkg_name = java_pkg
58            .as_ref()
59            .and_then(|p| p.name.as_ref())
60            .cloned()
61            .unwrap_or_else(|| config.name.clone());
62
63        // Resolve Java package info for the dependency.
64        let java_group_id = config.java_group_id();
65        let binding_pkg = config.java_package();
66        let pkg_version = config.resolved_version().unwrap_or_else(|| "0.1.0".to_string());
67
68        // Generate pom.xml.
69        files.push(GeneratedFile {
70            path: output_base.join("pom.xml"),
71            content: render_pom_xml(&pkg_name, &java_group_id, &pkg_version, e2e_config.dep_mode),
72            generated_header: false,
73        });
74
75        // Detect whether any fixture needs the mock-server (HTTP fixtures or
76        // fixtures with a `mock_response`). When present, emit a
77        // JUnit Platform LauncherSessionListener that spawns the mock-server
78        // before any test runs and a META-INF/services SPI manifest registering
79        // it. Without this, every fixture-bound test failed with
80        // `LiterLlmRsException: error sending request for url` because
81        // `System.getenv("MOCK_SERVER_URL")` was null.
82        let needs_mock_server = groups
83            .iter()
84            .flat_map(|g| g.fixtures.iter())
85            .any(|f| f.needs_mock_server());
86
87        // Generate test files per category. Path mirrors the configured Java
88        // package — `dev.myorg` becomes `dev/myorg`, etc. — so the package
89        // declaration in each test file matches its filesystem location.
90        let mut test_base = output_base.join("src").join("test").join("java");
91        for segment in java_group_id.split('.') {
92            test_base = test_base.join(segment);
93        }
94        let test_base = test_base.join("e2e");
95
96        if needs_mock_server {
97            files.push(GeneratedFile {
98                path: test_base.join("MockServerListener.java"),
99                content: render_mock_server_listener(&java_group_id),
100                generated_header: true,
101            });
102            files.push(GeneratedFile {
103                path: output_base
104                    .join("src")
105                    .join("test")
106                    .join("resources")
107                    .join("META-INF")
108                    .join("services")
109                    .join("org.junit.platform.launcher.LauncherSessionListener"),
110                content: format!("{java_group_id}.e2e.MockServerListener\n"),
111                generated_header: false,
112            });
113        }
114
115        // Resolve options_type from override.
116        let options_type = overrides.and_then(|o| o.options_type.clone());
117
118        // Resolve enum_fields and nested_types from Java override config.
119        static EMPTY_ENUM_FIELDS: std::sync::LazyLock<std::collections::HashMap<String, String>> =
120            std::sync::LazyLock::new(std::collections::HashMap::new);
121        let _enum_fields = overrides.map(|o| &o.enum_fields).unwrap_or(&EMPTY_ENUM_FIELDS);
122
123        // Build effective nested_types by merging defaults with configured overrides.
124        let mut effective_nested_types = default_java_nested_types();
125        if let Some(overrides_map) = overrides.map(|o| &o.nested_types) {
126            effective_nested_types.extend(overrides_map.clone());
127        }
128
129        // Resolve nested_types_optional from override (defaults to true for backward compatibility).
130        let nested_types_optional = overrides.map(|o| o.nested_types_optional).unwrap_or(true);
131
132        let field_resolver = FieldResolver::new(
133            &e2e_config.fields,
134            &e2e_config.fields_optional,
135            &e2e_config.result_fields,
136            &e2e_config.fields_array,
137            &std::collections::HashSet::new(),
138        );
139
140        for group in groups {
141            let active: Vec<&Fixture> = group
142                .fixtures
143                .iter()
144                .filter(|f| super::should_include_fixture(f, lang, e2e_config))
145                .collect();
146
147            if active.is_empty() {
148                continue;
149            }
150
151            let class_file_name = format!("{}Test.java", sanitize_filename(&group.category).to_upper_camel_case());
152            let content = render_test_file(
153                &group.category,
154                &active,
155                &class_name,
156                &function_name,
157                &java_group_id,
158                &binding_pkg,
159                result_var,
160                &e2e_config.call.args,
161                options_type.as_deref(),
162                &field_resolver,
163                result_is_simple,
164                &e2e_config.fields_enum,
165                e2e_config,
166                &effective_nested_types,
167                nested_types_optional,
168            );
169            files.push(GeneratedFile {
170                path: test_base.join(class_file_name),
171                content,
172                generated_header: true,
173            });
174        }
175
176        Ok(files)
177    }
178
179    fn language_name(&self) -> &'static str {
180        "java"
181    }
182}
183
184// ---------------------------------------------------------------------------
185// Rendering
186// ---------------------------------------------------------------------------
187
188fn render_pom_xml(
189    pkg_name: &str,
190    java_group_id: &str,
191    pkg_version: &str,
192    dep_mode: crate::config::DependencyMode,
193) -> String {
194    // pkg_name may be in "groupId:artifactId" Maven format; split accordingly.
195    let (dep_group_id, dep_artifact_id) = if let Some((g, a)) = pkg_name.split_once(':') {
196        (g, a)
197    } else {
198        (java_group_id, pkg_name)
199    };
200    let artifact_id = format!("{dep_artifact_id}-e2e-java");
201    let dep_block = match dep_mode {
202        crate::config::DependencyMode::Registry => {
203            format!(
204                r#"        <dependency>
205            <groupId>{dep_group_id}</groupId>
206            <artifactId>{dep_artifact_id}</artifactId>
207            <version>{pkg_version}</version>
208        </dependency>"#
209            )
210        }
211        crate::config::DependencyMode::Local => {
212            format!(
213                r#"        <dependency>
214            <groupId>{dep_group_id}</groupId>
215            <artifactId>{dep_artifact_id}</artifactId>
216            <version>{pkg_version}</version>
217            <scope>system</scope>
218            <systemPath>${{project.basedir}}/../../packages/java/target/{dep_artifact_id}-{pkg_version}.jar</systemPath>
219        </dependency>"#
220            )
221        }
222    };
223    crate::template_env::render(
224        "java/pom.xml.jinja",
225        minijinja::context! {
226            artifact_id => artifact_id,
227            java_group_id => java_group_id,
228            dep_block => dep_block,
229            junit_version => tv::maven::JUNIT,
230            jackson_version => tv::maven::JACKSON_E2E,
231            build_helper_version => tv::maven::BUILD_HELPER_MAVEN_PLUGIN,
232            maven_surefire_version => tv::maven::MAVEN_SUREFIRE_PLUGIN_E2E,
233        },
234    )
235}
236
237/// Render the JUnit Platform LauncherSessionListener that spawns the
238/// mock-server binary once per launcher session and tears it down on close.
239///
240/// Mirrors the Ruby `spec_helper.rb` and Python `conftest.py` patterns. The
241/// URL is exposed as a JVM system property `mockServerUrl`; generated test
242/// bodies prefer it over the `MOCK_SERVER_URL` env var so external overrides
243/// (e.g. CI exporting MOCK_SERVER_URL) still work without rerouting through
244/// JNI's lack of `setenv`.
245fn render_mock_server_listener(java_group_id: &str) -> String {
246    let header = hash::header(CommentStyle::DoubleSlash);
247    let mut out = header;
248    out.push_str(&format!("package {java_group_id}.e2e;\n\n"));
249    out.push_str("import java.io.BufferedReader;\n");
250    out.push_str("import java.io.File;\n");
251    out.push_str("import java.io.IOException;\n");
252    out.push_str("import java.io.InputStreamReader;\n");
253    out.push_str("import java.nio.charset.StandardCharsets;\n");
254    out.push_str("import java.nio.file.Path;\n");
255    out.push_str("import java.nio.file.Paths;\n");
256    out.push_str("import org.junit.platform.launcher.LauncherSession;\n");
257    out.push_str("import org.junit.platform.launcher.LauncherSessionListener;\n");
258    out.push('\n');
259    out.push_str("/**\n");
260    out.push_str(" * Spawns the mock-server binary once per JUnit launcher session and\n");
261    out.push_str(" * exposes its URL as the `mockServerUrl` system property. Generated\n");
262    out.push_str(" * test bodies read the property (with `MOCK_SERVER_URL` env-var\n");
263    out.push_str(" * fallback) so tests can run via plain `mvn test` without any external\n");
264    out.push_str(" * mock-server orchestration. Mirrors the Ruby spec_helper / Python\n");
265    out.push_str(" * conftest spawn pattern. Honors a pre-set MOCK_SERVER_URL by\n");
266    out.push_str(" * skipping the spawn entirely.\n");
267    out.push_str(" */\n");
268    out.push_str("public class MockServerListener implements LauncherSessionListener {\n");
269    out.push_str("    private Process mockServer;\n");
270    out.push('\n');
271    out.push_str("    @Override\n");
272    out.push_str("    public void launcherSessionOpened(LauncherSession session) {\n");
273    out.push_str("        String preset = System.getenv(\"MOCK_SERVER_URL\");\n");
274    out.push_str("        if (preset != null && !preset.isEmpty()) {\n");
275    out.push_str("            System.setProperty(\"mockServerUrl\", preset);\n");
276    out.push_str("            return;\n");
277    out.push_str("        }\n");
278    out.push_str("        Path repoRoot = locateRepoRoot();\n");
279    out.push_str("        if (repoRoot == null) {\n");
280    out.push_str("            throw new IllegalStateException(\"MockServerListener: could not locate repo root (looked for fixtures/ in ancestors of \" + System.getProperty(\"user.dir\") + \")\");\n");
281    out.push_str("        }\n");
282    out.push_str("        String binName = System.getProperty(\"os.name\", \"\").toLowerCase().contains(\"win\") ? \"mock-server.exe\" : \"mock-server\";\n");
283    out.push_str("        File bin = repoRoot.resolve(\"e2e\").resolve(\"rust\").resolve(\"target\").resolve(\"release\").resolve(binName).toFile();\n");
284    out.push_str("        File fixturesDir = repoRoot.resolve(\"fixtures\").toFile();\n");
285    out.push_str("        if (!bin.exists()) {\n");
286    out.push_str("            throw new IllegalStateException(\"MockServerListener: mock-server binary not found at \" + bin + \" — run: cargo build --manifest-path e2e/rust/Cargo.toml --bin mock-server --release\");\n");
287    out.push_str("        }\n");
288    out.push_str(
289        "        ProcessBuilder pb = new ProcessBuilder(bin.getAbsolutePath(), fixturesDir.getAbsolutePath())\n",
290    );
291    out.push_str("            .redirectErrorStream(false);\n");
292    out.push_str("        try {\n");
293    out.push_str("            mockServer = pb.start();\n");
294    out.push_str("        } catch (IOException e) {\n");
295    out.push_str(
296        "            throw new IllegalStateException(\"MockServerListener: failed to start mock-server\", e);\n",
297    );
298    out.push_str("        }\n");
299    out.push_str("        // Read until we see the MOCK_SERVER_URL=... line. Cap the loop so a\n");
300    out.push_str("        // misbehaving mock-server cannot block the launcher indefinitely.\n");
301    out.push_str("        BufferedReader stdout = new BufferedReader(new InputStreamReader(mockServer.getInputStream(), StandardCharsets.UTF_8));\n");
302    out.push_str("        String url = null;\n");
303    out.push_str("        try {\n");
304    out.push_str("            for (int i = 0; i < 16; i++) {\n");
305    out.push_str("                String line = stdout.readLine();\n");
306    out.push_str("                if (line == null) break;\n");
307    out.push_str("                if (line.startsWith(\"MOCK_SERVER_URL=\")) {\n");
308    out.push_str("                    url = line.substring(\"MOCK_SERVER_URL=\".length()).trim();\n");
309    out.push_str("                    break;\n");
310    out.push_str("                }\n");
311    out.push_str("            }\n");
312    out.push_str("        } catch (IOException e) {\n");
313    out.push_str("            mockServer.destroyForcibly();\n");
314    out.push_str(
315        "            throw new IllegalStateException(\"MockServerListener: failed to read mock-server stdout\", e);\n",
316    );
317    out.push_str("        }\n");
318    out.push_str("        if (url == null || url.isEmpty()) {\n");
319    out.push_str("            mockServer.destroyForcibly();\n");
320    out.push_str("            throw new IllegalStateException(\"MockServerListener: mock-server did not emit MOCK_SERVER_URL\");\n");
321    out.push_str("        }\n");
322    out.push_str("        // TCP-readiness probe: ensure axum::serve is accepting before tests start.\n");
323    out.push_str("        // The mock-server binds the TcpListener synchronously then prints the URL\n");
324    out.push_str("        // before tokio::spawn(axum::serve(...)) is polled, so under Surefire\n");
325    out.push_str("        // parallel mode tests can race startup. Poll-connect (max 5s, 50ms backoff)\n");
326    out.push_str("        // until success.\n");
327    out.push_str("        java.net.URI healthUri = java.net.URI.create(url);\n");
328    out.push_str("        String host = healthUri.getHost();\n");
329    out.push_str("        int port = healthUri.getPort();\n");
330    out.push_str("        long deadline = System.nanoTime() + 5_000_000_000L;\n");
331    out.push_str("        while (System.nanoTime() < deadline) {\n");
332    out.push_str("            try (java.net.Socket s = new java.net.Socket()) {\n");
333    out.push_str("                s.connect(new java.net.InetSocketAddress(host, port), 100);\n");
334    out.push_str("                break;\n");
335    out.push_str("            } catch (java.io.IOException ignored) {\n");
336    out.push_str("                try { Thread.sleep(50); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); break; }\n");
337    out.push_str("            }\n");
338    out.push_str("        }\n");
339    out.push_str("        System.setProperty(\"mockServerUrl\", url);\n");
340    out.push_str("        // Drain remaining stdout/stderr in daemon threads so a full pipe\n");
341    out.push_str("        // does not block the child.\n");
342    out.push_str("        Process server = mockServer;\n");
343    out.push_str("        Thread drainOut = new Thread(() -> drain(stdout));\n");
344    out.push_str("        drainOut.setDaemon(true);\n");
345    out.push_str("        drainOut.start();\n");
346    out.push_str("        Thread drainErr = new Thread(() -> drain(new BufferedReader(new InputStreamReader(server.getErrorStream(), StandardCharsets.UTF_8))));\n");
347    out.push_str("        drainErr.setDaemon(true);\n");
348    out.push_str("        drainErr.start();\n");
349    out.push_str("    }\n");
350    out.push('\n');
351    out.push_str("    @Override\n");
352    out.push_str("    public void launcherSessionClosed(LauncherSession session) {\n");
353    out.push_str("        if (mockServer == null) return;\n");
354    out.push_str("        try { mockServer.getOutputStream().close(); } catch (IOException ignored) {}\n");
355    out.push_str("        try {\n");
356    out.push_str("            if (!mockServer.waitFor(2, java.util.concurrent.TimeUnit.SECONDS)) {\n");
357    out.push_str("                mockServer.destroyForcibly();\n");
358    out.push_str("            }\n");
359    out.push_str("        } catch (InterruptedException ignored) {\n");
360    out.push_str("            Thread.currentThread().interrupt();\n");
361    out.push_str("            mockServer.destroyForcibly();\n");
362    out.push_str("        }\n");
363    out.push_str("    }\n");
364    out.push('\n');
365    out.push_str("    private static Path locateRepoRoot() {\n");
366    out.push_str("        Path dir = Paths.get(\"\").toAbsolutePath();\n");
367    out.push_str("        while (dir != null) {\n");
368    out.push_str("            if (dir.resolve(\"fixtures\").toFile().isDirectory()\n");
369    out.push_str("                && dir.resolve(\"e2e\").toFile().isDirectory()) {\n");
370    out.push_str("                return dir;\n");
371    out.push_str("            }\n");
372    out.push_str("            dir = dir.getParent();\n");
373    out.push_str("        }\n");
374    out.push_str("        return null;\n");
375    out.push_str("    }\n");
376    out.push('\n');
377    out.push_str("    private static void drain(BufferedReader reader) {\n");
378    out.push_str("        try {\n");
379    out.push_str("            char[] buf = new char[1024];\n");
380    out.push_str("            while (reader.read(buf) >= 0) { /* drain */ }\n");
381    out.push_str("        } catch (IOException ignored) {}\n");
382    out.push_str("    }\n");
383    out.push_str("}\n");
384    out
385}
386
387#[allow(clippy::too_many_arguments)]
388fn render_test_file(
389    category: &str,
390    fixtures: &[&Fixture],
391    class_name: &str,
392    function_name: &str,
393    java_group_id: &str,
394    binding_pkg: &str,
395    result_var: &str,
396    args: &[crate::config::ArgMapping],
397    options_type: Option<&str>,
398    field_resolver: &FieldResolver,
399    result_is_simple: bool,
400    enum_fields: &std::collections::HashSet<String>,
401    e2e_config: &E2eConfig,
402    nested_types: &std::collections::HashMap<String, String>,
403    nested_types_optional: bool,
404) -> String {
405    let header = hash::header(CommentStyle::DoubleSlash);
406    let test_class_name = format!("{}Test", sanitize_filename(category).to_upper_camel_case());
407
408    // If the class_name is fully qualified (contains '.'), import it and use
409    // only the simple name for method calls.  Otherwise use it as-is.
410    let (import_path, simple_class) = if class_name.contains('.') {
411        let simple = class_name.rsplit('.').next().unwrap_or(class_name);
412        (class_name, simple)
413    } else {
414        ("", class_name)
415    };
416
417    // Check if any fixture (with its resolved call) will emit MAPPER usage.
418    let lang_for_om = "java";
419    let needs_object_mapper_for_handle = fixtures.iter().any(|f| {
420        args.iter().filter(|a| a.arg_type == "handle").any(|a| {
421            let v = f.input.get(&a.field).unwrap_or(&serde_json::Value::Null);
422            !(v.is_null() || v.is_object() && v.as_object().is_some_and(|o| o.is_empty()))
423        })
424    });
425    // HTTP fixtures always need ObjectMapper for JSON body comparison.
426    let has_http_fixtures = fixtures.iter().any(|f| f.http.is_some());
427    let needs_object_mapper = needs_object_mapper_for_handle || has_http_fixtures;
428
429    // Collect all options_type values used (class-level + per-fixture call overrides).
430    let mut all_options_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
431    if let Some(t) = options_type {
432        all_options_types.insert(t.to_string());
433    }
434    for f in fixtures.iter() {
435        let call_cfg = e2e_config.resolve_call(f.call.as_deref());
436        if let Some(ov) = call_cfg.overrides.get(lang_for_om) {
437            if let Some(t) = &ov.options_type {
438                all_options_types.insert(t.clone());
439            }
440        }
441        // Auto-fallback: when the Java override does not declare an options_type
442        // but another non-prefixed binding (csharp/c/go/php/python) does, mirror
443        // that name into the import set so the auto-emitted `Type.fromJson(json)`
444        // expression compiles. The Java POJO class name matches the Rust source
445        // type name for these backends.
446        let java_has_type = call_cfg
447            .overrides
448            .get(lang_for_om)
449            .and_then(|o| o.options_type.as_deref())
450            .is_some();
451        if !java_has_type {
452            for cand in ["csharp", "c", "go", "php", "python"] {
453                if let Some(o) = call_cfg.overrides.get(cand) {
454                    if let Some(t) = &o.options_type {
455                        all_options_types.insert(t.clone());
456                        break;
457                    }
458                }
459            }
460        }
461        // Detect batch item types used in this fixture
462        for arg in &call_cfg.args {
463            if let Some(elem_type) = &arg.element_type {
464                if elem_type == "BatchBytesItem" || elem_type == "BatchFileItem" {
465                    all_options_types.insert(elem_type.clone());
466                }
467            }
468        }
469    }
470
471    // Collect nested config types actually referenced in fixture builder expressions.
472    // Note: enum types don't need explicit imports since they're in the same package.
473    let mut nested_types_used: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
474    for f in fixtures.iter() {
475        let call_cfg = e2e_config.resolve_call(f.call.as_deref());
476        for arg in &call_cfg.args {
477            if arg.arg_type == "json_object" {
478                let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
479                if let Some(val) = f.input.get(field) {
480                    if !val.is_null() && !val.is_array() {
481                        if let Some(obj) = val.as_object() {
482                            collect_nested_type_names(obj, nested_types, &mut nested_types_used);
483                        }
484                    }
485                }
486            }
487        }
488    }
489
490    // Effective binding package for FQN imports of binding types
491    // (ChatCompletionRequest, etc.). Prefer the explicit `[crates.java] package`
492    // wired in via `binding_pkg`; fall back to the package derived from a
493    // fully-qualified `class_name` when present.
494    let binding_pkg_for_imports: String = if !binding_pkg.is_empty() {
495        binding_pkg.to_string()
496    } else if !import_path.is_empty() {
497        import_path
498            .rsplit_once('.')
499            .map(|(p, _)| p.to_string())
500            .unwrap_or_default()
501    } else {
502        String::new()
503    };
504
505    // Build imports list
506    let mut imports: Vec<String> = Vec::new();
507    imports.push("import org.junit.jupiter.api.Test;".to_string());
508    imports.push("import static org.junit.jupiter.api.Assertions.*;".to_string());
509
510    // Import the test entry-point class itself when it is fully-qualified or
511    // when we know the binding package — emit the FQN so javac resolves it.
512    if !import_path.is_empty() {
513        imports.push(format!("import {import_path};"));
514    } else if !binding_pkg_for_imports.is_empty() && !class_name.is_empty() {
515        imports.push(format!("import {binding_pkg_for_imports}.{class_name};"));
516    }
517
518    if needs_object_mapper {
519        imports.push("import com.fasterxml.jackson.databind.ObjectMapper;".to_string());
520        imports.push("import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;".to_string());
521    }
522
523    // Import all options types used across fixtures (for builder expressions and MAPPER).
524    if !all_options_types.is_empty() {
525        for opts_type in &all_options_types {
526            let qualified = if binding_pkg_for_imports.is_empty() {
527                opts_type.clone()
528            } else {
529                format!("{binding_pkg_for_imports}.{opts_type}")
530            };
531            imports.push(format!("import {qualified};"));
532        }
533    }
534
535    // Import nested options types
536    if !nested_types_used.is_empty() && !binding_pkg_for_imports.is_empty() {
537        for type_name in &nested_types_used {
538            imports.push(format!("import {binding_pkg_for_imports}.{type_name};"));
539        }
540    }
541
542    // Import CrawlConfig when handle args need JSON deserialization.
543    if needs_object_mapper_for_handle && !binding_pkg_for_imports.is_empty() {
544        imports.push(format!("import {binding_pkg_for_imports}.CrawlConfig;"));
545    }
546
547    // Import visitor types when any fixture uses visitor callbacks.
548    let has_visitor_fixtures = fixtures.iter().any(|f| f.visitor.is_some());
549    if has_visitor_fixtures && !binding_pkg_for_imports.is_empty() {
550        imports.push(format!("import {binding_pkg_for_imports}.Visitor;"));
551        imports.push(format!("import {binding_pkg_for_imports}.NodeContext;"));
552        imports.push(format!("import {binding_pkg_for_imports}.VisitResult;"));
553    }
554
555    // Import Optional when using builder expressions with optional fields
556    if !all_options_types.is_empty() {
557        imports.push("import java.util.Optional;".to_string());
558    }
559
560    // Render all test methods
561    let mut fixtures_body = String::new();
562    for (i, fixture) in fixtures.iter().enumerate() {
563        render_test_method(
564            &mut fixtures_body,
565            fixture,
566            simple_class,
567            function_name,
568            result_var,
569            args,
570            options_type,
571            field_resolver,
572            result_is_simple,
573            enum_fields,
574            e2e_config,
575            nested_types,
576            nested_types_optional,
577        );
578        if i + 1 < fixtures.len() {
579            fixtures_body.push('\n');
580        }
581    }
582
583    // Render template
584    crate::template_env::render(
585        "java/test_file.jinja",
586        minijinja::context! {
587            header => header,
588            java_group_id => java_group_id,
589            test_class_name => test_class_name,
590            category => category,
591            imports => imports,
592            needs_object_mapper => needs_object_mapper,
593            fixtures_body => fixtures_body,
594        },
595    )
596}
597
598// ---------------------------------------------------------------------------
599// HTTP test rendering — shared-driver integration
600// ---------------------------------------------------------------------------
601
602/// Thin renderer that emits JUnit 5 test methods targeting a mock server via
603/// `java.net.http.HttpClient`. Satisfies [`client::TestClientRenderer`] so the
604/// shared [`client::http_call::render_http_test`] driver drives the call sequence.
605struct JavaTestClientRenderer;
606
607impl client::TestClientRenderer for JavaTestClientRenderer {
608    fn language_name(&self) -> &'static str {
609        "java"
610    }
611
612    /// Convert a fixture id to the UpperCamelCase suffix appended to `test`.
613    ///
614    /// The emitted method name is `test{fn_name}`, matching the pre-existing shape.
615    fn sanitize_test_name(&self, id: &str) -> String {
616        id.to_upper_camel_case()
617    }
618
619    /// Emit `@Test void test{fn_name}() throws Exception {`.
620    ///
621    /// When `skip_reason` is `Some`, the body is a single
622    /// `Assumptions.assumeTrue(false, ...)` call and `render_test_close` closes
623    /// the brace symmetrically.
624    fn render_test_open(&self, out: &mut String, fn_name: &str, description: &str, skip_reason: Option<&str>) {
625        let escaped_reason = skip_reason.map(escape_java);
626        let rendered = crate::template_env::render(
627            "java/http_test_open.jinja",
628            minijinja::context! {
629                fn_name => fn_name,
630                description => description,
631                skip_reason => escaped_reason,
632            },
633        );
634        out.push_str(&rendered);
635    }
636
637    /// Emit the closing `}` for a test method.
638    fn render_test_close(&self, out: &mut String) {
639        let rendered = crate::template_env::render("java/http_test_close.jinja", minijinja::context! {});
640        out.push_str(&rendered);
641    }
642
643    /// Emit a `java.net.http.HttpClient` request to `baseUrl + path`.
644    ///
645    /// Binds the response to `response` (the `ctx.response_var`). Java's
646    /// `HttpClient` disallows a fixed set of restricted headers; those are
647    /// silently dropped so the test compiles.
648    fn render_call(&self, out: &mut String, ctx: &client::CallCtx<'_>) {
649        // Java's HttpClient throws IllegalArgumentException for these headers.
650        const JAVA_RESTRICTED_HEADERS: &[&str] = &["connection", "content-length", "expect", "host", "upgrade"];
651
652        let method = ctx.method.to_uppercase();
653
654        // Build the path, appending query params when present.
655        let path = if ctx.query_params.is_empty() {
656            ctx.path.to_string()
657        } else {
658            let pairs: Vec<String> = ctx
659                .query_params
660                .iter()
661                .map(|(k, v)| {
662                    let val_str = match v {
663                        serde_json::Value::String(s) => s.clone(),
664                        other => other.to_string(),
665                    };
666                    format!("{}={}", k, escape_java(&val_str))
667                })
668                .collect();
669            format!("{}?{}", ctx.path, pairs.join("&"))
670        };
671
672        let body_publisher = if let Some(body) = ctx.body {
673            let json = serde_json::to_string(body).unwrap_or_default();
674            let escaped = escape_java(&json);
675            format!("java.net.http.HttpRequest.BodyPublishers.ofString(\"{escaped}\")")
676        } else {
677            "java.net.http.HttpRequest.BodyPublishers.noBody()".to_string()
678        };
679
680        // Content-Type header — only when a body is present.
681        let content_type = if ctx.body.is_some() {
682            let ct = ctx.content_type.unwrap_or("application/json");
683            // Only emit when not already in ctx.headers (avoid duplicate Content-Type).
684            if !ctx.headers.keys().any(|k| k.to_lowercase() == "content-type") {
685                Some(ct.to_string())
686            } else {
687                None
688            }
689        } else {
690            None
691        };
692
693        // Build header lines — skip Java-restricted ones.
694        let mut headers_lines: Vec<String> = Vec::new();
695        for (name, value) in ctx.headers {
696            if JAVA_RESTRICTED_HEADERS.contains(&name.to_lowercase().as_str()) {
697                continue;
698            }
699            let escaped_name = escape_java(name);
700            let escaped_value = escape_java(value);
701            headers_lines.push(format!(
702                "builder = builder.header(\"{escaped_name}\", \"{escaped_value}\");"
703            ));
704        }
705
706        // Cookies as a single `Cookie` header.
707        let cookies_line = if !ctx.cookies.is_empty() {
708            let cookie_str: Vec<String> = ctx.cookies.iter().map(|(k, v)| format!("{k}={v}")).collect();
709            let cookie_header = escape_java(&cookie_str.join("; "));
710            Some(format!("builder = builder.header(\"Cookie\", \"{cookie_header}\");"))
711        } else {
712            None
713        };
714
715        let rendered = crate::template_env::render(
716            "java/http_request.jinja",
717            minijinja::context! {
718                method => method,
719                path => path,
720                body_publisher => body_publisher,
721                content_type => content_type,
722                headers_lines => headers_lines,
723                cookies_line => cookies_line,
724                response_var => ctx.response_var,
725            },
726        );
727        out.push_str(&rendered);
728    }
729
730    /// Emit `assertEquals(status, response.statusCode(), ...)`.
731    fn render_assert_status(&self, out: &mut String, response_var: &str, status: u16) {
732        let rendered = crate::template_env::render(
733            "java/http_assertions.jinja",
734            minijinja::context! {
735                response_var => response_var,
736                status_code => status,
737                headers => Vec::<std::collections::HashMap<&str, String>>::new(),
738                body_assertion => String::new(),
739                partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
740                validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
741            },
742        );
743        out.push_str(&rendered);
744    }
745
746    /// Emit a header assertion using `response.headers().firstValue(...)`.
747    ///
748    /// Handles special tokens: `<<present>>`, `<<absent>>`, `<<uuid>>`.
749    fn render_assert_header(&self, out: &mut String, response_var: &str, name: &str, expected: &str) {
750        let escaped_name = escape_java(name);
751        let assertion_code = match expected {
752            "<<present>>" => {
753                format!(
754                    "assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").isPresent(), \"header {escaped_name} should be present\");"
755                )
756            }
757            "<<absent>>" => {
758                format!(
759                    "assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").isEmpty(), \"header {escaped_name} should be absent\");"
760                )
761            }
762            "<<uuid>>" => {
763                format!(
764                    "assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").orElse(\"\").matches(\"[0-9a-fA-F]{{8}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{12}}\"), \"header {escaped_name} should be a UUID\");"
765                )
766            }
767            literal => {
768                let escaped_value = escape_java(literal);
769                format!(
770                    "assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").orElse(\"\").contains(\"{escaped_value}\"), \"header {escaped_name} mismatch\");"
771                )
772            }
773        };
774
775        let mut headers = vec![std::collections::HashMap::new()];
776        headers[0].insert("assertion_code", assertion_code);
777
778        let rendered = crate::template_env::render(
779            "java/http_assertions.jinja",
780            minijinja::context! {
781                response_var => response_var,
782                status_code => 0u16,
783                headers => headers,
784                body_assertion => String::new(),
785                partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
786                validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
787            },
788        );
789        out.push_str(&rendered);
790    }
791
792    /// Emit a JSON body equality assertion using Jackson's `MAPPER.readTree`.
793    fn render_assert_json_body(&self, out: &mut String, response_var: &str, expected: &serde_json::Value) {
794        let body_assertion = match expected {
795            serde_json::Value::Object(_) | serde_json::Value::Array(_) => {
796                let json_str = serde_json::to_string(expected).unwrap_or_default();
797                let escaped = escape_java(&json_str);
798                format!(
799                    "var bodyJson = MAPPER.readTree({response_var}.body());\n        var expectedJson = MAPPER.readTree(\"{escaped}\");\n        assertEquals(expectedJson, bodyJson, \"body mismatch\");"
800                )
801            }
802            serde_json::Value::String(s) => {
803                let escaped = escape_java(s);
804                format!("assertEquals(\"{escaped}\", {response_var}.body().trim(), \"body mismatch\");")
805            }
806            other => {
807                let escaped = escape_java(&other.to_string());
808                format!("assertEquals(\"{escaped}\", {response_var}.body().trim(), \"body mismatch\");")
809            }
810        };
811
812        let rendered = crate::template_env::render(
813            "java/http_assertions.jinja",
814            minijinja::context! {
815                response_var => response_var,
816                status_code => 0u16,
817                headers => Vec::<std::collections::HashMap<&str, String>>::new(),
818                body_assertion => body_assertion,
819                partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
820                validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
821            },
822        );
823        out.push_str(&rendered);
824    }
825
826    /// Emit partial JSON body assertions: parse once, then assert each expected field.
827    fn render_assert_partial_body(&self, out: &mut String, response_var: &str, expected: &serde_json::Value) {
828        if let Some(obj) = expected.as_object() {
829            let mut partial_body: Vec<std::collections::HashMap<&str, String>> = Vec::new();
830            for (key, val) in obj {
831                let escaped_key = escape_java(key);
832                let json_str = serde_json::to_string(val).unwrap_or_default();
833                let escaped_val = escape_java(&json_str);
834                let assertion_code = format!(
835                    "assertEquals(MAPPER.readTree(\"{escaped_val}\"), partialJson.get(\"{escaped_key}\"), \"body field '{escaped_key}' mismatch\");"
836                );
837                let mut entry = std::collections::HashMap::new();
838                entry.insert("assertion_code", assertion_code);
839                partial_body.push(entry);
840            }
841
842            let rendered = crate::template_env::render(
843                "java/http_assertions.jinja",
844                minijinja::context! {
845                    response_var => response_var,
846                    status_code => 0u16,
847                    headers => Vec::<std::collections::HashMap<&str, String>>::new(),
848                    body_assertion => String::new(),
849                    partial_body => partial_body,
850                    validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
851                },
852            );
853            out.push_str(&rendered);
854        }
855    }
856
857    /// Emit validation-error assertions: parse the body and check each expected message.
858    fn render_assert_validation_errors(
859        &self,
860        out: &mut String,
861        response_var: &str,
862        errors: &[crate::fixture::ValidationErrorExpectation],
863    ) {
864        let mut validation_errors: Vec<std::collections::HashMap<&str, String>> = Vec::new();
865        for err in errors {
866            let escaped_msg = escape_java(&err.msg);
867            let assertion_code = format!(
868                "assertTrue(veBody.contains(\"{escaped_msg}\"), \"expected validation error message: {escaped_msg}\");"
869            );
870            let mut entry = std::collections::HashMap::new();
871            entry.insert("assertion_code", assertion_code);
872            validation_errors.push(entry);
873        }
874
875        let rendered = crate::template_env::render(
876            "java/http_assertions.jinja",
877            minijinja::context! {
878                response_var => response_var,
879                status_code => 0u16,
880                headers => Vec::<std::collections::HashMap<&str, String>>::new(),
881                body_assertion => String::new(),
882                partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
883                validation_errors => validation_errors,
884            },
885        );
886        out.push_str(&rendered);
887    }
888}
889
890/// Render an HTTP server test method using `java.net.http.HttpClient` against
891/// `MOCK_SERVER_URL`. Delegates to the shared
892/// [`client::http_call::render_http_test`] driver via [`JavaTestClientRenderer`].
893///
894/// The one Java-specific pre-condition — HTTP 101 (WebSocket upgrade) causing an
895/// `EOFException` in `HttpClient` — is handled here before delegating.
896fn render_http_test_method(out: &mut String, fixture: &Fixture, http: &HttpFixture) {
897    // HTTP 101 (WebSocket upgrade) causes Java's HttpClient to throw EOFException.
898    // Emit an assumeTrue(false, ...) stub so the test is skipped rather than failing.
899    if http.expected_response.status_code == 101 {
900        let method_name = fixture.id.to_upper_camel_case();
901        let description = &fixture.description;
902        out.push_str(&crate::template_env::render(
903            "java/http_test_skip_101.jinja",
904            minijinja::context! {
905                method_name => method_name,
906                description => description,
907            },
908        ));
909        return;
910    }
911
912    client::http_call::render_http_test(out, &JavaTestClientRenderer, fixture);
913}
914
915#[allow(clippy::too_many_arguments)]
916fn render_test_method(
917    out: &mut String,
918    fixture: &Fixture,
919    class_name: &str,
920    _function_name: &str,
921    _result_var: &str,
922    _args: &[crate::config::ArgMapping],
923    options_type: Option<&str>,
924    field_resolver: &FieldResolver,
925    result_is_simple: bool,
926    enum_fields: &std::collections::HashSet<String>,
927    e2e_config: &E2eConfig,
928    nested_types: &std::collections::HashMap<String, String>,
929    nested_types_optional: bool,
930) {
931    // Delegate HTTP fixtures to the HTTP-specific renderer.
932    if let Some(http) = &fixture.http {
933        render_http_test_method(out, fixture, http);
934        return;
935    }
936
937    // Resolve per-fixture call config (supports named calls via fixture.call field).
938    let call_config = e2e_config.resolve_call(fixture.call.as_deref());
939    let lang = "java";
940    let call_overrides = call_config.overrides.get(lang);
941    let effective_function_name = call_overrides
942        .and_then(|o| o.function.as_ref())
943        .cloned()
944        .unwrap_or_else(|| call_config.function.to_lower_camel_case());
945    let effective_result_var = &call_config.result_var;
946    let effective_args = &call_config.args;
947    let function_name = effective_function_name.as_str();
948    let result_var = effective_result_var.as_str();
949    let args: &[crate::config::ArgMapping] = effective_args.as_slice();
950
951    let method_name = fixture.id.to_upper_camel_case();
952    let description = &fixture.description;
953    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
954
955    // Resolve per-fixture options_type: prefer the java call override, fall back to
956    // class-level, then to any other language's options_type for the same call (the
957    // generated Java POJO class name matches the Rust type name across bindings, so
958    // mirroring the C/csharp/go option lets us auto-emit `Type.fromJson(json)` without
959    // requiring an explicit Java override per call).
960    let effective_options_type: Option<String> = call_overrides
961        .and_then(|o| o.options_type.clone())
962        .or_else(|| options_type.map(|s| s.to_string()))
963        .or_else(|| {
964            // Borrow from any other backend's options_type. Prefer non-language-prefixed
965            // names (csharp/c/go/php/python) over wasm or ruby which use prefixed types
966            // like `WasmCreateBatchRequest` or `LiterLlm::CreateBatchRequest`.
967            for cand in ["csharp", "c", "go", "php", "python"] {
968                if let Some(o) = call_config.overrides.get(cand) {
969                    if let Some(t) = &o.options_type {
970                        return Some(t.clone());
971                    }
972                }
973            }
974            None
975        });
976    let effective_options_type = effective_options_type.as_deref();
977    // When options_type is resolvable but no explicit options_via is given for Java,
978    // default to "from_json" so the typed-request arg is emitted as
979    // `Type.fromJson(json)` rather than the raw JSON string. The Java backend exposes
980    // a static `fromJson(String)` factory on every record type (Stage A).
981    let auto_from_json = effective_options_type.is_some()
982        && call_overrides.and_then(|o| o.options_via.as_deref()).is_none()
983        && e2e_config
984            .call
985            .overrides
986            .get(lang)
987            .and_then(|o| o.options_via.as_deref())
988            .is_none();
989
990    // Resolve client_factory: prefer call-level java override, fall back to file-level java override.
991    let client_factory: Option<String> = call_overrides.and_then(|o| o.client_factory.clone()).or_else(|| {
992        e2e_config
993            .call
994            .overrides
995            .get(lang)
996            .and_then(|o| o.client_factory.clone())
997    });
998
999    // Resolve options_via: "kwargs" (default), "from_json", "json", "dict".
1000    // Auto-default to "from_json" when an options_type is resolvable and no explicit
1001    // options_via is configured — this lets typed-request args emit `Type.fromJson(json)`
1002    // even when alef.toml only declares the type in another binding's override block.
1003    let options_via: String = call_overrides
1004        .and_then(|o| o.options_via.clone())
1005        .or_else(|| e2e_config.call.overrides.get(lang).and_then(|o| o.options_via.clone()))
1006        .unwrap_or_else(|| {
1007            if auto_from_json {
1008                "from_json".to_string()
1009            } else {
1010                "kwargs".to_string()
1011            }
1012        });
1013
1014    // Resolve per-fixture result_is_simple and result_is_bytes from the call override.
1015    let effective_result_is_simple =
1016        call_overrides.is_some_and(|o| o.result_is_simple) || call_config.result_is_simple || result_is_simple;
1017    let effective_result_is_bytes = call_overrides.is_some_and(|o| o.result_is_bytes);
1018
1019    // Check if this test needs ObjectMapper deserialization for json_object args.
1020    let needs_deser = effective_options_type.is_some()
1021        && args.iter().any(|arg| {
1022            if arg.arg_type != "json_object" {
1023                return false;
1024            }
1025            let val = super::resolve_field(&fixture.input, &arg.field);
1026            !val.is_null() && !val.is_array()
1027        });
1028
1029    // Emit builder expressions for json_object args.
1030    let mut builder_expressions = String::new();
1031    if let (true, Some(opts_type)) = (needs_deser, effective_options_type) {
1032        for arg in args {
1033            if arg.arg_type == "json_object" {
1034                let val = super::resolve_field(&fixture.input, &arg.field);
1035                if !val.is_null() && !val.is_array() {
1036                    if options_via == "from_json" {
1037                        // Build the typed POJO via static fromJson(String) method.
1038                        let json_str = serde_json::to_string(val).unwrap_or_default();
1039                        let escaped = escape_java(&json_str);
1040                        let var_name = &arg.name;
1041                        builder_expressions.push_str(&format!(
1042                            "        var {var_name} = {opts_type}.fromJson(\"{escaped}\");\n",
1043                        ));
1044                    } else if let Some(obj) = val.as_object() {
1045                        // Generate builder expression: TypeName.builder().withFieldName(value)...build()
1046                        let empty_path_fields: Vec<String> = Vec::new();
1047                        let path_fields = call_overrides.map(|o| &o.path_fields).unwrap_or(&empty_path_fields);
1048                        let builder_expr = java_builder_expression(
1049                            obj,
1050                            opts_type,
1051                            enum_fields,
1052                            nested_types,
1053                            nested_types_optional,
1054                            path_fields,
1055                        );
1056                        let var_name = &arg.name;
1057                        builder_expressions.push_str(&format!("        var {} = {};\n", var_name, builder_expr));
1058                    }
1059                }
1060            }
1061        }
1062    }
1063
1064    let (mut setup_lines, args_str) =
1065        build_args_and_setup(&fixture.input, args, class_name, effective_options_type, &fixture.id);
1066
1067    // Per-language `extra_args` from call overrides — verbatim trailing
1068    // expressions appended after the configured args (e.g. `null` for an
1069    // optional trailing parameter the fixture cannot supply). Mirrors the
1070    // TypeScript and C# implementations.
1071    let extra_args_slice: &[String] = call_overrides.map_or(&[], |o| o.extra_args.as_slice());
1072
1073    // Build visitor if present and add to setup
1074    let mut visitor_var = String::new();
1075    let mut has_visitor_fixture = false;
1076    if let Some(visitor_spec) = &fixture.visitor {
1077        visitor_var = build_java_visitor(&mut setup_lines, visitor_spec, class_name);
1078        has_visitor_fixture = true;
1079    }
1080
1081    // When visitor is present, attach it to the options parameter
1082    let mut final_args = if has_visitor_fixture {
1083        if args_str.is_empty() {
1084            format!("new ConversionOptions().withVisitor({})", visitor_var)
1085        } else if args_str.contains("new ConversionOptions")
1086            || args_str.contains("ConversionOptionsBuilder")
1087            || args_str.contains(".builder()")
1088        {
1089            // Options are being built (either new ConversionOptions(), builder pattern, or .builder().build())
1090            // append .withVisitor() call before .build() if present
1091            if args_str.contains(".build()") {
1092                let idx = args_str.rfind(".build()").unwrap();
1093                format!("{}.withVisitor({}){}", &args_str[..idx], visitor_var, &args_str[idx..])
1094            } else {
1095                format!("{}.withVisitor({})", args_str, visitor_var)
1096            }
1097        } else if args_str.ends_with(", null") {
1098            let base = &args_str[..args_str.len() - 6];
1099            format!("{}, new ConversionOptions().withVisitor({})", base, visitor_var)
1100        } else {
1101            format!("{}, new ConversionOptions().withVisitor({})", args_str, visitor_var)
1102        }
1103    } else {
1104        args_str
1105    };
1106
1107    if !extra_args_slice.is_empty() {
1108        let extra_str = extra_args_slice.join(", ");
1109        final_args = if final_args.is_empty() {
1110            extra_str
1111        } else {
1112            format!("{final_args}, {extra_str}")
1113        };
1114    }
1115
1116    // Render assertions_body
1117    let mut assertions_body = String::new();
1118
1119    // Emit a `source` variable for run_query assertions that need the raw bytes.
1120    let needs_source_var = fixture
1121        .assertions
1122        .iter()
1123        .any(|a| a.assertion_type == "method_result" && a.method.as_deref() == Some("run_query"));
1124    if needs_source_var {
1125        if let Some(source_arg) = args.iter().find(|a| a.field == "source_code") {
1126            let field = source_arg.field.strip_prefix("input.").unwrap_or(&source_arg.field);
1127            if let Some(val) = fixture.input.get(field) {
1128                let java_val = json_to_java(val);
1129                assertions_body.push_str(&format!("        var source = {}.getBytes();\n", java_val));
1130            }
1131        }
1132    }
1133
1134    // Merge per-call java enum_fields with the file-level java enum_fields so that
1135    // call-specific enum-typed result fields (e.g. `choices[0].finish_reason` for
1136    // chat) trigger Optional<Enum> coercion even when the global override block
1137    // does not list them. Per-call entries take precedence.
1138    // Combine global enum_fields (HashSet) with per-call overrides (HashMap).
1139    let mut effective_enum_fields: std::collections::HashSet<String> = enum_fields.clone();
1140    if let Some(co) = call_overrides {
1141        for k in co.enum_fields.keys() {
1142            effective_enum_fields.insert(k.clone());
1143        }
1144    }
1145
1146    for assertion in &fixture.assertions {
1147        render_assertion(
1148            &mut assertions_body,
1149            assertion,
1150            result_var,
1151            class_name,
1152            field_resolver,
1153            effective_result_is_simple,
1154            effective_result_is_bytes,
1155            &effective_enum_fields,
1156        );
1157    }
1158
1159    let throws_clause = " throws Exception";
1160
1161    // When client_factory is set, instantiate a client and dispatch the call as
1162    // a method on the client; otherwise call the static helper on `class_name`.
1163    let (client_setup_lines, call_target) = if let Some(factory) = client_factory.as_deref() {
1164        let factory_name = factory.to_lower_camel_case();
1165        let fixture_id = &fixture.id;
1166        let mut setup: Vec<String> = Vec::new();
1167        if fixture.mock_response.is_some() || fixture.http.is_some() {
1168            setup.push(format!(
1169                "String mockUrl = System.getProperty(\"mockServerUrl\", System.getenv(\"MOCK_SERVER_URL\")) + \"/fixtures/{fixture_id}\";"
1170            ));
1171            setup.push(format!(
1172                "var client = {class_name}.{factory_name}(\"test-key\", mockUrl, null, null, null);"
1173            ));
1174        } else if let Some(api_key_var) = fixture.env.as_ref().and_then(|e| e.api_key_var.as_deref()) {
1175            setup.push(format!("String apiKey = System.getenv(\"{api_key_var}\");"));
1176            setup.push(format!(
1177                "org.junit.jupiter.api.Assumptions.assumeTrue(apiKey != null && !apiKey.isEmpty(), \"{api_key_var} not set\");"
1178            ));
1179            setup.push(format!("var client = {class_name}.{factory_name}(apiKey);"));
1180        } else {
1181            setup.push(format!("var client = {class_name}.{factory_name}(\"test-key\");"));
1182        }
1183        (setup, "client".to_string())
1184    } else {
1185        (Vec::new(), class_name.to_string())
1186    };
1187
1188    // Prepend client setup before any other setup_lines.
1189    let combined_setup: Vec<String> = client_setup_lines.into_iter().chain(setup_lines).collect();
1190
1191    let call_expr = format!("{call_target}.{function_name}({final_args})");
1192
1193    let rendered = crate::template_env::render(
1194        "java/test_method.jinja",
1195        minijinja::context! {
1196            method_name => method_name,
1197            description => description,
1198            builder_expressions => builder_expressions,
1199            setup_lines => combined_setup,
1200            throws_clause => throws_clause,
1201            expects_error => expects_error,
1202            call_expr => call_expr,
1203            result_var => result_var,
1204            assertions_body => assertions_body,
1205        },
1206    );
1207    out.push_str(&rendered);
1208}
1209
1210/// Build setup lines (e.g. handle creation) and the argument list for the function call.
1211///
1212/// Returns `(setup_lines, args_string)`.
1213fn build_args_and_setup(
1214    input: &serde_json::Value,
1215    args: &[crate::config::ArgMapping],
1216    class_name: &str,
1217    options_type: Option<&str>,
1218    fixture_id: &str,
1219) -> (Vec<String>, String) {
1220    if args.is_empty() {
1221        return (Vec::new(), String::new());
1222    }
1223
1224    let mut setup_lines: Vec<String> = Vec::new();
1225    let mut parts: Vec<String> = Vec::new();
1226
1227    for arg in args {
1228        if arg.arg_type == "mock_url" {
1229            setup_lines.push(format!(
1230                "String {} = System.getProperty(\"mockServerUrl\", System.getenv(\"MOCK_SERVER_URL\")) + \"/fixtures/{fixture_id}\";",
1231                arg.name,
1232            ));
1233            parts.push(arg.name.clone());
1234            continue;
1235        }
1236
1237        if arg.arg_type == "handle" {
1238            // Generate a createEngine (or equivalent) call and pass the variable.
1239            let constructor_name = format!("create{}", arg.name.to_upper_camel_case());
1240            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
1241            let config_value = input.get(field).unwrap_or(&serde_json::Value::Null);
1242            if config_value.is_null()
1243                || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
1244            {
1245                setup_lines.push(format!("var {} = {class_name}.{constructor_name}(null);", arg.name,));
1246            } else {
1247                let json_str = serde_json::to_string(config_value).unwrap_or_default();
1248                let name = &arg.name;
1249                setup_lines.push(format!(
1250                    "var {name}Config = MAPPER.readValue(\"{}\", CrawlConfig.class);",
1251                    escape_java(&json_str),
1252                ));
1253                setup_lines.push(format!(
1254                    "var {} = {class_name}.{constructor_name}({name}Config);",
1255                    arg.name,
1256                    name = name,
1257                ));
1258            }
1259            parts.push(arg.name.clone());
1260            continue;
1261        }
1262
1263        let resolved = super::resolve_field(input, &arg.field);
1264        let val = if resolved.is_null() { None } else { Some(resolved) };
1265        match val {
1266            None | Some(serde_json::Value::Null) if arg.optional => {
1267                // Optional arg with no fixture value: emit positional null/default so the call
1268                // has the right arity. For json_object optional args, build an empty default object
1269                // so we get the right type rather than a raw null.
1270                if arg.arg_type == "json_object" {
1271                    if let Some(opts_type) = options_type {
1272                        parts.push(format!("{opts_type}.builder().build()"));
1273                    } else {
1274                        parts.push("null".to_string());
1275                    }
1276                } else {
1277                    parts.push("null".to_string());
1278                }
1279            }
1280            None | Some(serde_json::Value::Null) => {
1281                // Required arg with no fixture value: pass a language-appropriate default.
1282                let default_val = match arg.arg_type.as_str() {
1283                    "string" | "file_path" => "\"\"".to_string(),
1284                    "int" | "integer" => "0".to_string(),
1285                    "float" | "number" => "0.0d".to_string(),
1286                    "bool" | "boolean" => "false".to_string(),
1287                    _ => "null".to_string(),
1288                };
1289                parts.push(default_val);
1290            }
1291            Some(v) => {
1292                if arg.arg_type == "json_object" {
1293                    // Array json_object args: emit inline Java list expression.
1294                    // Check for batch item arrays first (element_type = BatchBytesItem/BatchFileItem).
1295                    if v.is_array() {
1296                        if let Some(elem_type) = &arg.element_type {
1297                            if elem_type == "BatchBytesItem" || elem_type == "BatchFileItem" {
1298                                parts.push(emit_java_batch_item_array(v, elem_type));
1299                                continue;
1300                            }
1301                        }
1302                        // Otherwise use element_type to emit the correct numeric literal suffix (f vs d).
1303                        let elem_type = arg.element_type.as_deref();
1304                        parts.push(json_to_java_typed(v, elem_type));
1305                        continue;
1306                    }
1307                    // Object json_object args with options_type: use pre-deserialized variable.
1308                    if options_type.is_some() {
1309                        parts.push(arg.name.clone());
1310                        continue;
1311                    }
1312                    parts.push(json_to_java(v));
1313                    continue;
1314                }
1315                // bytes args must be passed as byte[], not String.
1316                if arg.arg_type == "bytes" {
1317                    let val = json_to_java(v);
1318                    parts.push(format!("{val}.getBytes()"));
1319                    continue;
1320                }
1321                // file_path args must be wrapped in java.nio.file.Path.of().
1322                if arg.arg_type == "file_path" {
1323                    let val = json_to_java(v);
1324                    parts.push(format!("java.nio.file.Path.of({val})"));
1325                    continue;
1326                }
1327                parts.push(json_to_java(v));
1328            }
1329        }
1330    }
1331
1332    (setup_lines, parts.join(", "))
1333}
1334
1335#[allow(clippy::too_many_arguments)]
1336fn render_assertion(
1337    out: &mut String,
1338    assertion: &Assertion,
1339    result_var: &str,
1340    class_name: &str,
1341    field_resolver: &FieldResolver,
1342    result_is_simple: bool,
1343    result_is_bytes: bool,
1344    enum_fields: &std::collections::HashSet<String>,
1345) {
1346    // Byte-buffer returns: emit length-based assertions instead of struct-field
1347    // accessors. The result is `byte[]`, which has no `isEmpty()`/struct-field methods.
1348    // Field paths on byte-buffer results (e.g. `audio`, `content`) are pseudo-fields
1349    // referencing the buffer itself — treat them the same as no-field assertions.
1350    if result_is_bytes {
1351        match assertion.assertion_type.as_str() {
1352            "not_empty" => {
1353                out.push_str(&format!(
1354                    "        assertTrue({result_var}.length > 0, \"expected non-empty value\");\n"
1355                ));
1356                return;
1357            }
1358            "is_empty" => {
1359                out.push_str(&format!(
1360                    "        assertEquals(0, {result_var}.length, \"expected empty value\");\n"
1361                ));
1362                return;
1363            }
1364            "count_equals" | "length_equals" => {
1365                if let Some(n) = assertion.value.as_ref().and_then(|v| v.as_u64()) {
1366                    out.push_str(&format!("        assertEquals({n}, {result_var}.length);\n"));
1367                }
1368                return;
1369            }
1370            "count_min" | "length_min" => {
1371                if let Some(n) = assertion.value.as_ref().and_then(|v| v.as_u64()) {
1372                    out.push_str(&format!(
1373                        "        assertTrue({result_var}.length >= {n}, \"expected length >= {n}\");\n"
1374                    ));
1375                }
1376                return;
1377            }
1378            _ => {
1379                out.push_str(&format!(
1380                    "        // skipped: assertion type '{}' not supported on byte[] result\n",
1381                    assertion.assertion_type
1382                ));
1383                return;
1384            }
1385        }
1386    }
1387
1388    // Handle synthetic/virtual fields that are computed rather than direct record accessors.
1389    if let Some(f) = &assertion.field {
1390        match f.as_str() {
1391            // ---- ExtractionResult chunk-level computed predicates ----
1392            "chunks_have_content" => {
1393                let pred = format!(
1394                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.content() != null && !c.content().isBlank())"
1395                );
1396                out.push_str(&crate::template_env::render(
1397                    "java/synthetic_assertion.jinja",
1398                    minijinja::context! {
1399                        assertion_kind => "chunks_content",
1400                        assertion_type => assertion.assertion_type.as_str(),
1401                        pred => pred,
1402                        field_name => f,
1403                    },
1404                ));
1405                return;
1406            }
1407            "chunks_have_heading_context" => {
1408                let pred = format!(
1409                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.metadata().headingContext().isPresent())"
1410                );
1411                out.push_str(&crate::template_env::render(
1412                    "java/synthetic_assertion.jinja",
1413                    minijinja::context! {
1414                        assertion_kind => "chunks_heading_context",
1415                        assertion_type => assertion.assertion_type.as_str(),
1416                        pred => pred,
1417                        field_name => f,
1418                    },
1419                ));
1420                return;
1421            }
1422            "chunks_have_embeddings" => {
1423                let pred = format!(
1424                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.embedding() != null && !c.embedding().isEmpty())"
1425                );
1426                out.push_str(&crate::template_env::render(
1427                    "java/synthetic_assertion.jinja",
1428                    minijinja::context! {
1429                        assertion_kind => "chunks_embeddings",
1430                        assertion_type => assertion.assertion_type.as_str(),
1431                        pred => pred,
1432                        field_name => f,
1433                    },
1434                ));
1435                return;
1436            }
1437            "first_chunk_starts_with_heading" => {
1438                let pred = format!(
1439                    "{result_var}.chunks().orElse(java.util.List.of()).stream().findFirst().map(c -> c.metadata().headingContext().isPresent()).orElse(false)"
1440                );
1441                out.push_str(&crate::template_env::render(
1442                    "java/synthetic_assertion.jinja",
1443                    minijinja::context! {
1444                        assertion_kind => "first_chunk_heading",
1445                        assertion_type => assertion.assertion_type.as_str(),
1446                        pred => pred,
1447                        field_name => f,
1448                    },
1449                ));
1450                return;
1451            }
1452            // ---- EmbedResponse virtual fields ----
1453            // When result_is_simple=true the result IS List<List<Float>> (the raw embeddings list).
1454            // When result_is_simple=false the result has an .embeddings() accessor.
1455            "embedding_dimensions" => {
1456                // Dimension = size of the first embedding vector in the list.
1457                let embed_list = if result_is_simple {
1458                    result_var.to_string()
1459                } else {
1460                    format!("{result_var}.embeddings()")
1461                };
1462                let expr = format!("({embed_list}.isEmpty() ? 0 : {embed_list}.get(0).size())");
1463                let java_val = assertion.value.as_ref().map(json_to_java).unwrap_or_default();
1464                out.push_str(&crate::template_env::render(
1465                    "java/synthetic_assertion.jinja",
1466                    minijinja::context! {
1467                        assertion_kind => "embedding_dimensions",
1468                        assertion_type => assertion.assertion_type.as_str(),
1469                        expr => expr,
1470                        java_val => java_val,
1471                        field_name => f,
1472                    },
1473                ));
1474                return;
1475            }
1476            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
1477                // These are validation predicates that require iterating the embedding matrix.
1478                let embed_list = if result_is_simple {
1479                    result_var.to_string()
1480                } else {
1481                    format!("{result_var}.embeddings()")
1482                };
1483                let pred = match f.as_str() {
1484                    "embeddings_valid" => {
1485                        format!("{embed_list}.stream().allMatch(e -> e != null && !e.isEmpty())")
1486                    }
1487                    "embeddings_finite" => {
1488                        format!("{embed_list}.stream().flatMap(java.util.Collection::stream).allMatch(Float::isFinite)")
1489                    }
1490                    "embeddings_non_zero" => {
1491                        format!("{embed_list}.stream().allMatch(e -> e.stream().anyMatch(v -> v != 0.0f))")
1492                    }
1493                    "embeddings_normalized" => format!(
1494                        "{embed_list}.stream().allMatch(e -> {{ double n = e.stream().mapToDouble(v -> v * v).sum(); return Math.abs(n - 1.0) < 1e-3; }})"
1495                    ),
1496                    _ => unreachable!(),
1497                };
1498                let assertion_kind = format!("embeddings_{}", f.strip_prefix("embeddings_").unwrap_or(f));
1499                out.push_str(&crate::template_env::render(
1500                    "java/synthetic_assertion.jinja",
1501                    minijinja::context! {
1502                        assertion_kind => assertion_kind,
1503                        assertion_type => assertion.assertion_type.as_str(),
1504                        pred => pred,
1505                        field_name => f,
1506                    },
1507                ));
1508                return;
1509            }
1510            // ---- Fields not present on the Java ExtractionResult ----
1511            "keywords" | "keywords_count" => {
1512                out.push_str(&crate::template_env::render(
1513                    "java/synthetic_assertion.jinja",
1514                    minijinja::context! {
1515                        assertion_kind => "keywords",
1516                        field_name => f,
1517                    },
1518                ));
1519                return;
1520            }
1521            // ---- metadata not_empty / is_empty: Metadata is a required record, not Optional ----
1522            // Metadata has no .isEmpty() method; check that at least one optional field is present.
1523            "metadata" => {
1524                match assertion.assertion_type.as_str() {
1525                    "not_empty" | "is_empty" => {
1526                        out.push_str(&crate::template_env::render(
1527                            "java/synthetic_assertion.jinja",
1528                            minijinja::context! {
1529                                assertion_kind => "metadata",
1530                                assertion_type => assertion.assertion_type.as_str(),
1531                                result_var => result_var,
1532                            },
1533                        ));
1534                        return;
1535                    }
1536                    _ => {} // fall through to normal handling
1537                }
1538            }
1539            _ => {}
1540        }
1541    }
1542
1543    // Skip assertions on fields that don't exist on the result type.
1544    if let Some(f) = &assertion.field {
1545        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
1546            out.push_str(&crate::template_env::render(
1547                "java/synthetic_assertion.jinja",
1548                minijinja::context! {
1549                    assertion_kind => "skipped",
1550                    field_name => f,
1551                },
1552            ));
1553            return;
1554        }
1555    }
1556
1557    // Determine if this field is an enum type (no `.contains()` on enums in Java).
1558    // Check both the raw fixture field path and the resolved (aliased) path so that
1559    // `fields_enum` entries can use either form (e.g., `"assets[].category"` or the
1560    // resolved `"assets[].asset_category"`).
1561    let field_is_enum = assertion
1562        .field
1563        .as_deref()
1564        .is_some_and(|f| enum_fields.contains(f) || enum_fields.contains(field_resolver.resolve(f)));
1565
1566    // Determine if this field is an array (List<T>) — needed to choose .toString() for
1567    // contains assertions, since List.contains(Object) uses equals() which won't match
1568    // strings against complex record types like StructureItem.
1569    let field_is_array = assertion
1570        .field
1571        .as_deref()
1572        .is_some_and(|f| field_resolver.is_array(field_resolver.resolve(f)));
1573
1574    let field_expr = if result_is_simple {
1575        result_var.to_string()
1576    } else {
1577        match &assertion.field {
1578            Some(f) if !f.is_empty() => {
1579                let accessor = field_resolver.accessor(f, "java", result_var);
1580                let resolved = field_resolver.resolve(f);
1581                // Unwrap Optional fields with a type-appropriate fallback.
1582                // Map.get() returns nullable, not Optional, so skip .orElse() for map access.
1583                // NOTE: is_optional() means the field is in optional_fields, but that doesn't
1584                // guarantee it returns Optional<T> in Java — nested fields like metadata.twitterCard
1585                // return @Nullable String, not Optional<String>. We detect this by checking
1586                // if the field path contains a dot (nested access).
1587                if field_resolver.is_optional(resolved) && !field_resolver.has_map_access(f) {
1588                    // All nullable fields in the Java binding return @Nullable types, not Optional<T>.
1589                    // Wrap them in Optional.ofNullable() so e2e tests can use .orElse() fallbacks.
1590                    let optional_expr = format!("java.util.Optional.ofNullable({accessor})");
1591                    // Enum-typed optional fields need .map(v -> v.getValue()) to coerce to String
1592                    // before the orElse("") fallback can type-check (Optional<Enum>.orElse("") would
1593                    // be a type mismatch — Optional<String>.orElse("") is the only safe form).
1594                    if field_is_enum {
1595                        match assertion.assertion_type.as_str() {
1596                            "not_empty" | "is_empty" => optional_expr,
1597                            _ => format!("{optional_expr}.map(v -> v.getValue()).orElse(\"\")"),
1598                        }
1599                    } else {
1600                        match assertion.assertion_type.as_str() {
1601                            // For not_empty / is_empty on Optional fields, return the raw Optional
1602                            // so the assertion arms can call isPresent()/isEmpty().
1603                            "not_empty" | "is_empty" => optional_expr,
1604                            // For size/count assertions on Optional<List<T>> fields, use List.of() fallback.
1605                            "count_min" | "count_equals" => {
1606                                format!("{optional_expr}.orElse(java.util.List.of())")
1607                            }
1608                            // For numeric comparisons on Optional<Long/Integer> fields, use 0L.
1609                            "greater_than" | "less_than" | "greater_than_or_equal" | "less_than_or_equal" => {
1610                                if field_resolver.is_array(resolved) {
1611                                    format!("{optional_expr}.orElse(java.util.List.of())")
1612                                } else {
1613                                    format!("{optional_expr}.orElse(0L)")
1614                                }
1615                            }
1616                            // For equals on Optional fields, determine fallback based on whether value is numeric.
1617                            // If the fixture value is a number, use 0L; otherwise use "".
1618                            "equals" => {
1619                                if let Some(expected) = &assertion.value {
1620                                    if expected.is_number() {
1621                                        format!("{optional_expr}.orElse(0L)")
1622                                    } else {
1623                                        format!("{optional_expr}.orElse(\"\")")
1624                                    }
1625                                } else {
1626                                    format!("{optional_expr}.orElse(\"\")")
1627                                }
1628                            }
1629                            _ if field_resolver.is_array(resolved) => {
1630                                format!("{optional_expr}.orElse(java.util.List.of())")
1631                            }
1632                            _ => format!("{optional_expr}.orElse(\"\")"),
1633                        }
1634                    }
1635                } else {
1636                    accessor
1637                }
1638            }
1639            _ => result_var.to_string(),
1640        }
1641    };
1642
1643    // For enum fields, string-based assertions need .getValue() to convert the enum to
1644    // its serde-serialized lowercase string value (e.g., AssetCategory.Image -> "image").
1645    // All alef-generated Java enums expose a getValue() method annotated with @JsonValue.
1646    // Optional enum fields are already coerced to String via `.map(v -> v.getValue()).orElse("")`
1647    // upstream in field_expr; in that case the value is already a String and we must not
1648    // call .getValue() again. Detect by looking for `.map(v -> v.getValue())` in the expr.
1649    let string_expr = if field_is_enum && !field_expr.contains(".map(v -> v.getValue())") {
1650        format!("{field_expr}.getValue()")
1651    } else {
1652        field_expr.clone()
1653    };
1654
1655    // Pre-compute context for template
1656    let assertion_type = assertion.assertion_type.as_str();
1657    let java_val = assertion.value.as_ref().map(json_to_java).unwrap_or_default();
1658    let is_string_val = assertion.value.as_ref().is_some_and(|v| v.is_string());
1659    let is_numeric_val = assertion.value.as_ref().is_some_and(|v| v.is_number());
1660
1661    let values_java: Vec<String> = assertion
1662        .values
1663        .as_ref()
1664        .map(|values| values.iter().map(json_to_java).collect())
1665        .unwrap_or_default();
1666
1667    let contains_any_expr = if !values_java.is_empty() {
1668        values_java
1669            .iter()
1670            .map(|v| format!("{string_expr}.contains({v})"))
1671            .collect::<Vec<_>>()
1672            .join(" || ")
1673    } else {
1674        String::new()
1675    };
1676
1677    let length_expr = if result_is_bytes {
1678        format!("{field_expr}.length")
1679    } else {
1680        format!("{field_expr}.length()")
1681    };
1682
1683    let n = assertion.value.as_ref().and_then(|v| v.as_u64()).unwrap_or(0);
1684
1685    let call_expr = if let Some(method_name) = &assertion.method {
1686        build_java_method_call(result_var, method_name, assertion.args.as_ref(), class_name)
1687    } else {
1688        String::new()
1689    };
1690
1691    let check = assertion.check.as_deref().unwrap_or("is_true");
1692
1693    let java_check_val = assertion.value.as_ref().map(json_to_java).unwrap_or_default();
1694
1695    let check_n = assertion.value.as_ref().and_then(|v| v.as_u64()).unwrap_or(0);
1696
1697    let is_bool_val = assertion.value.as_ref().is_some_and(|v| v.is_boolean());
1698    let bool_is_true = assertion.value.as_ref().is_some_and(|v| v.as_bool() == Some(true));
1699
1700    let method_returns_collection = assertion
1701        .method
1702        .as_ref()
1703        .is_some_and(|m| matches!(m.as_str(), "find_nodes_by_type" | "findNodesByType"));
1704
1705    let rendered = crate::template_env::render(
1706        "java/assertion.jinja",
1707        minijinja::context! {
1708            assertion_type,
1709            java_val,
1710            string_expr,
1711            field_expr,
1712            field_is_enum,
1713            field_is_array,
1714            is_string_val,
1715            is_numeric_val,
1716            values_java => values_java,
1717            contains_any_expr,
1718            length_expr,
1719            n,
1720            call_expr,
1721            check,
1722            java_check_val,
1723            check_n,
1724            is_bool_val,
1725            bool_is_true,
1726            method_returns_collection,
1727        },
1728    );
1729    out.push_str(&rendered);
1730}
1731
1732/// Build a Java call expression for a `method_result` assertion on a tree-sitter Tree.
1733///
1734/// Maps method names to the appropriate Java static/instance method calls.
1735fn build_java_method_call(
1736    result_var: &str,
1737    method_name: &str,
1738    args: Option<&serde_json::Value>,
1739    class_name: &str,
1740) -> String {
1741    match method_name {
1742        "root_child_count" => format!("{result_var}.rootNode().childCount()"),
1743        "root_node_type" => format!("{result_var}.rootNode().kind()"),
1744        "named_children_count" => format!("{result_var}.rootNode().namedChildCount()"),
1745        "has_error_nodes" => format!("{class_name}.treeHasErrorNodes({result_var})"),
1746        "error_count" | "tree_error_count" => format!("{class_name}.treeErrorCount({result_var})"),
1747        "tree_to_sexp" => format!("{class_name}.treeToSexp({result_var})"),
1748        "contains_node_type" => {
1749            let node_type = args
1750                .and_then(|a| a.get("node_type"))
1751                .and_then(|v| v.as_str())
1752                .unwrap_or("");
1753            format!("{class_name}.treeContainsNodeType({result_var}, \"{node_type}\")")
1754        }
1755        "find_nodes_by_type" => {
1756            let node_type = args
1757                .and_then(|a| a.get("node_type"))
1758                .and_then(|v| v.as_str())
1759                .unwrap_or("");
1760            format!("{class_name}.findNodesByType({result_var}, \"{node_type}\")")
1761        }
1762        "run_query" => {
1763            let query_source = args
1764                .and_then(|a| a.get("query_source"))
1765                .and_then(|v| v.as_str())
1766                .unwrap_or("");
1767            let language = args
1768                .and_then(|a| a.get("language"))
1769                .and_then(|v| v.as_str())
1770                .unwrap_or("");
1771            let escaped_query = escape_java(query_source);
1772            format!("{class_name}.runQuery({result_var}, \"{language}\", \"{escaped_query}\", source)")
1773        }
1774        _ => {
1775            format!("{result_var}.{}()", method_name.to_lower_camel_case())
1776        }
1777    }
1778}
1779
1780/// Convert a `serde_json::Value` to a Java literal string.
1781fn json_to_java(value: &serde_json::Value) -> String {
1782    json_to_java_typed(value, None)
1783}
1784
1785/// Convert a JSON value to a Java literal, optionally overriding number type for array elements.
1786/// `element_type` controls how numeric array elements are emitted: "f32" → `1.0f`, otherwise `1.0d`.
1787/// Emit Java batch item constructors for BatchBytesItem or BatchFileItem arrays.
1788fn emit_java_batch_item_array(arr: &serde_json::Value, elem_type: &str) -> String {
1789    if let Some(items) = arr.as_array() {
1790        let item_strs: Vec<String> = items
1791            .iter()
1792            .filter_map(|item| {
1793                if let Some(obj) = item.as_object() {
1794                    match elem_type {
1795                        "BatchBytesItem" => {
1796                            let content = obj.get("content").and_then(|v| v.as_array());
1797                            let mime_type = obj.get("mime_type").and_then(|v| v.as_str()).unwrap_or("text/plain");
1798                            let content_code = if let Some(arr) = content {
1799                                let bytes: Vec<String> = arr
1800                                    .iter()
1801                                    .filter_map(|v| v.as_u64().map(|n| format!("(byte) {}", n)))
1802                                    .collect();
1803                                format!("new byte[] {{{}}}", bytes.join(", "))
1804                            } else {
1805                                "new byte[] {}".to_string()
1806                            };
1807                            Some(format!("new {}({}, \"{}\", null)", elem_type, content_code, mime_type))
1808                        }
1809                        "BatchFileItem" => {
1810                            let path = obj.get("path").and_then(|v| v.as_str()).unwrap_or("");
1811                            Some(format!(
1812                                "new {}(java.nio.file.Paths.get(\"{}\"), null)",
1813                                elem_type, path
1814                            ))
1815                        }
1816                        _ => None,
1817                    }
1818                } else {
1819                    None
1820                }
1821            })
1822            .collect();
1823        format!("java.util.Arrays.asList({})", item_strs.join(", "))
1824    } else {
1825        "java.util.List.of()".to_string()
1826    }
1827}
1828
1829fn json_to_java_typed(value: &serde_json::Value, element_type: Option<&str>) -> String {
1830    match value {
1831        serde_json::Value::String(s) => format!("\"{}\"", escape_java(s)),
1832        serde_json::Value::Bool(b) => b.to_string(),
1833        serde_json::Value::Number(n) => {
1834            if n.is_f64() {
1835                match element_type {
1836                    Some("f32" | "float" | "Float") => format!("{}f", n),
1837                    _ => format!("{}d", n),
1838                }
1839            } else {
1840                n.to_string()
1841            }
1842        }
1843        serde_json::Value::Null => "null".to_string(),
1844        serde_json::Value::Array(arr) => {
1845            let items: Vec<String> = arr.iter().map(|v| json_to_java_typed(v, element_type)).collect();
1846            format!("java.util.List.of({})", items.join(", "))
1847        }
1848        serde_json::Value::Object(_) => {
1849            let json_str = serde_json::to_string(value).unwrap_or_default();
1850            format!("\"{}\"", escape_java(&json_str))
1851        }
1852    }
1853}
1854
1855/// Generate a Java builder expression for a JSON object.
1856/// E.g., `obj = {"language": "abl", "chunk_max_size": 50}`
1857/// becomes: `TypeName.builder().withLanguage("abl").withChunkMaxSize(50L).build()`
1858///
1859/// For enums: emit `EnumType.VariantName` (detected via camelCase lookup in enum_fields)
1860/// For strings and bools: use the value directly
1861/// For plain numbers: emit the literal with type suffix (long uses L, double uses d)
1862/// For nested objects: recurse with Options suffix
1863/// When `nested_types_optional` is false, nested builders are passed directly without
1864/// Optional.of() wrapping, allowing non-optional nested config types.
1865fn java_builder_expression(
1866    obj: &serde_json::Map<String, serde_json::Value>,
1867    type_name: &str,
1868    enum_fields: &std::collections::HashSet<String>,
1869    nested_types: &std::collections::HashMap<String, String>,
1870    nested_types_optional: bool,
1871    path_fields: &[String],
1872) -> String {
1873    let mut expr = format!("{}.builder()", type_name);
1874    for (key, val) in obj {
1875        // Convert snake_case key to camelCase for method name
1876        let camel_key = key.to_lower_camel_case();
1877        let method_name = format!("with{}", camel_key.to_upper_camel_case());
1878
1879        let java_val = match val {
1880            serde_json::Value::String(s) => {
1881                // Check if this field is an enum type by checking enum_fields.
1882                // Infer enum type name from camelCase field name by converting to UpperCamelCase.
1883                if enum_fields.contains(&camel_key) {
1884                    // Enum field: infer type name from field name (e.g., "codeBlockStyle" -> "CodeBlockStyle")
1885                    let enum_type_name = camel_key.to_upper_camel_case();
1886                    let variant_name = s.to_upper_camel_case();
1887                    format!("{}.{}", enum_type_name, variant_name)
1888                } else if camel_key == "preset" && type_name == "PreprocessingOptions" {
1889                    // Special case: preset field in PreprocessingOptions maps to PreprocessingPreset
1890                    let variant_name = s.to_upper_camel_case();
1891                    format!("PreprocessingPreset.{}", variant_name)
1892                } else if path_fields.contains(key) {
1893                    // Path field: wrap in Optional.of(java.nio.file.Path.of(...))
1894                    format!("Optional.of(java.nio.file.Path.of(\"{}\"))", escape_java(s))
1895                } else {
1896                    // String field: emit as a quoted literal
1897                    format!("\"{}\"", escape_java(s))
1898                }
1899            }
1900            serde_json::Value::Bool(b) => b.to_string(),
1901            serde_json::Value::Null => "null".to_string(),
1902            serde_json::Value::Number(n) => {
1903                // Number field: emit literal with type suffix.
1904                // Java records/classes use either `long` (primitive, not nullable) or
1905                // `Optional<Long>` (nullable). The codegen wraps in `Optional.of(...)`
1906                // by default since most options builder fields are Optional, but several
1907                // record types (e.g. SecurityLimits) use primitive `long` throughout.
1908                // Skip the wrap for: (a) known-primitive top-level fields and (b) any
1909                // method on a record type whose builder methods take primitives only.
1910                let camel_key = key.to_lower_camel_case();
1911                let is_plain_field = matches!(camel_key.as_str(), "listIndentWidth" | "wrapWidth");
1912                // Builders for typed-record nested config classes use primitives
1913                // throughout — they're not the optional-options pattern.
1914                let is_primitive_builder = matches!(type_name, "SecurityLimits" | "SecurityLimitsBuilder");
1915
1916                if is_plain_field || is_primitive_builder {
1917                    // Plain numeric field: no Optional wrapper
1918                    if n.is_f64() {
1919                        format!("{}d", n)
1920                    } else {
1921                        format!("{}L", n)
1922                    }
1923                } else {
1924                    // Optional numeric field: wrap in Optional.of()
1925                    if n.is_f64() {
1926                        format!("Optional.of({}d)", n)
1927                    } else {
1928                        format!("Optional.of({}L)", n)
1929                    }
1930                }
1931            }
1932            serde_json::Value::Array(arr) => {
1933                let items: Vec<String> = arr.iter().map(|v| json_to_java_typed(v, None)).collect();
1934                format!("java.util.List.of({})", items.join(", "))
1935            }
1936            serde_json::Value::Object(nested) => {
1937                // Recurse with the type from nested_types mapping, or default to snake_case → PascalCase + "Options".
1938                let nested_type = nested_types
1939                    .get(key.as_str())
1940                    .cloned()
1941                    .unwrap_or_else(|| format!("{}Options", key.to_upper_camel_case()));
1942                let inner = java_builder_expression(
1943                    nested,
1944                    &nested_type,
1945                    enum_fields,
1946                    nested_types,
1947                    nested_types_optional,
1948                    &[],
1949                );
1950                // Top-level config builders (e.g. ExtractionConfigBuilder) declare nested
1951                // record fields as `Optional<T>` (since they are nullable). Primitive-fields
1952                // builders (SecurityLimitsBuilder etc.) take the bare type directly.
1953                let is_primitive_builder = matches!(type_name, "SecurityLimits" | "SecurityLimitsBuilder");
1954                if is_primitive_builder || !nested_types_optional {
1955                    inner
1956                } else {
1957                    format!("Optional.of({inner})")
1958                }
1959            }
1960        };
1961        expr.push_str(&format!(".{}({})", method_name, java_val));
1962    }
1963    expr.push_str(".build()");
1964    expr
1965}
1966
1967/// Build default nested type mappings for Java extraction config types.
1968///
1969/// Maps known Kreuzberg/Kreuzcrawl config field names (in snake_case) to their
1970/// Java record type names (in PascalCase). These defaults allow e2e codegen to
1971/// automatically deserialize nested config objects without requiring explicit
1972/// configuration in alef.toml. User-provided overrides take precedence.
1973fn default_java_nested_types() -> std::collections::HashMap<String, String> {
1974    [
1975        ("chunking", "ChunkingConfig"),
1976        ("ocr", "OcrConfig"),
1977        ("images", "ImageExtractionConfig"),
1978        ("html_output", "HtmlOutputConfig"),
1979        ("language_detection", "LanguageDetectionConfig"),
1980        ("postprocessor", "PostProcessorConfig"),
1981        ("acceleration", "AccelerationConfig"),
1982        ("email", "EmailConfig"),
1983        ("pages", "PageConfig"),
1984        ("pdf_options", "PdfConfig"),
1985        ("layout", "LayoutDetectionConfig"),
1986        ("tree_sitter", "TreeSitterConfig"),
1987        ("structured_extraction", "StructuredExtractionConfig"),
1988        ("content_filter", "ContentFilterConfig"),
1989        ("token_reduction", "TokenReductionOptions"),
1990        ("security_limits", "SecurityLimits"),
1991    ]
1992    .iter()
1993    .map(|(k, v)| (k.to_string(), v.to_string()))
1994    .collect()
1995}
1996
1997// ---------------------------------------------------------------------------
1998// Import collection helpers
1999// ---------------------------------------------------------------------------
2000
2001/// Recursively collect enum types and nested option types used in a builder expression.
2002/// Enums are keyed in the enum_fields map by camelCase names (e.g., "codeBlockStyle" → "CodeBlockStyle").
2003#[allow(dead_code)]
2004fn collect_enum_and_nested_types(
2005    obj: &serde_json::Map<String, serde_json::Value>,
2006    enum_fields: &std::collections::HashMap<String, String>,
2007    types_out: &mut std::collections::BTreeSet<String>,
2008) {
2009    for (key, val) in obj {
2010        // enum_fields is keyed by camelCase, not snake_case.
2011        let camel_key = key.to_lower_camel_case();
2012        if let Some(enum_type) = enum_fields.get(&camel_key) {
2013            // Add the enum type from the mapping (e.g., "CodeBlockStyle").
2014            types_out.insert(enum_type.clone());
2015        } else if camel_key == "preset" {
2016            // Special case: preset field uses PreprocessingPreset enum.
2017            types_out.insert("PreprocessingPreset".to_string());
2018        }
2019        // Recurse into nested objects to find their nested enum types.
2020        if let Some(nested) = val.as_object() {
2021            collect_enum_and_nested_types(nested, enum_fields, types_out);
2022        }
2023    }
2024}
2025
2026fn collect_nested_type_names(
2027    obj: &serde_json::Map<String, serde_json::Value>,
2028    nested_types: &std::collections::HashMap<String, String>,
2029    types_out: &mut std::collections::BTreeSet<String>,
2030) {
2031    for (key, val) in obj {
2032        if let Some(type_name) = nested_types.get(key.as_str()) {
2033            types_out.insert(type_name.clone());
2034        }
2035        if let Some(nested) = val.as_object() {
2036            collect_nested_type_names(nested, nested_types, types_out);
2037        }
2038    }
2039}
2040
2041// ---------------------------------------------------------------------------
2042// Visitor generation
2043// ---------------------------------------------------------------------------
2044
2045/// Build a Java visitor class and add setup lines. Returns the visitor variable name.
2046fn build_java_visitor(
2047    setup_lines: &mut Vec<String>,
2048    visitor_spec: &crate::fixture::VisitorSpec,
2049    class_name: &str,
2050) -> String {
2051    setup_lines.push("class _TestVisitor implements Visitor {".to_string());
2052    for (method_name, action) in &visitor_spec.callbacks {
2053        emit_java_visitor_method(setup_lines, method_name, action, class_name);
2054    }
2055    setup_lines.push("}".to_string());
2056    setup_lines.push("var visitor = new _TestVisitor();".to_string());
2057    "visitor".to_string()
2058}
2059
2060/// Emit a Java visitor method for a callback action.
2061fn emit_java_visitor_method(
2062    setup_lines: &mut Vec<String>,
2063    method_name: &str,
2064    action: &CallbackAction,
2065    _class_name: &str,
2066) {
2067    let camel_method = method_to_camel(method_name);
2068    let params = match method_name {
2069        "visit_link" => "NodeContext ctx, String href, String text, String title",
2070        "visit_image" => "NodeContext ctx, String src, String alt, String title",
2071        "visit_heading" => "NodeContext ctx, int level, String text, String id",
2072        "visit_code_block" => "NodeContext ctx, String lang, String code",
2073        "visit_code_inline"
2074        | "visit_strong"
2075        | "visit_emphasis"
2076        | "visit_strikethrough"
2077        | "visit_underline"
2078        | "visit_subscript"
2079        | "visit_superscript"
2080        | "visit_mark"
2081        | "visit_button"
2082        | "visit_summary"
2083        | "visit_figcaption"
2084        | "visit_definition_term"
2085        | "visit_definition_description" => "NodeContext ctx, String text",
2086        "visit_text" => "NodeContext ctx, String text",
2087        "visit_list_item" => "NodeContext ctx, boolean ordered, String marker, String text",
2088        "visit_blockquote" => "NodeContext ctx, String content, long depth",
2089        "visit_table_row" => "NodeContext ctx, java.util.List<String> cells, boolean isHeader",
2090        "visit_custom_element" => "NodeContext ctx, String tagName, String html",
2091        "visit_form" => "NodeContext ctx, String actionUrl, String method",
2092        "visit_input" => "NodeContext ctx, String inputType, String name, String value",
2093        "visit_audio" | "visit_video" | "visit_iframe" => "NodeContext ctx, String src",
2094        "visit_details" => "NodeContext ctx, boolean isOpen",
2095        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
2096            "NodeContext ctx, String output"
2097        }
2098        "visit_list_start" => "NodeContext ctx, boolean ordered",
2099        "visit_list_end" => "NodeContext ctx, boolean ordered, String output",
2100        _ => "NodeContext ctx",
2101    };
2102
2103    // Determine action type and values for template
2104    let (action_type, action_value, format_args) = match action {
2105        CallbackAction::Skip => ("skip", String::new(), Vec::new()),
2106        CallbackAction::Continue => ("continue", String::new(), Vec::new()),
2107        CallbackAction::PreserveHtml => ("preserve_html", String::new(), Vec::new()),
2108        CallbackAction::Custom { output } => ("custom_literal", escape_java(output), Vec::new()),
2109        CallbackAction::CustomTemplate { template } => {
2110            // Extract {placeholder} names from the template (in order of appearance).
2111            let mut format_str = String::with_capacity(template.len());
2112            let mut format_args: Vec<String> = Vec::new();
2113            let mut chars = template.chars().peekable();
2114            while let Some(ch) = chars.next() {
2115                if ch == '{' {
2116                    // Collect identifier chars until '}'.
2117                    let mut name = String::new();
2118                    let mut closed = false;
2119                    for inner in chars.by_ref() {
2120                        if inner == '}' {
2121                            closed = true;
2122                            break;
2123                        }
2124                        name.push(inner);
2125                    }
2126                    if closed && !name.is_empty() && name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2127                        let camel_name = name.as_str().to_lower_camel_case();
2128                        format_args.push(camel_name);
2129                        format_str.push_str("%s");
2130                    } else {
2131                        // Not a simple placeholder — emit literally.
2132                        format_str.push('{');
2133                        format_str.push_str(&name);
2134                        if closed {
2135                            format_str.push('}');
2136                        }
2137                    }
2138                } else {
2139                    format_str.push(ch);
2140                }
2141            }
2142            let escaped = escape_java(&format_str);
2143            if format_args.is_empty() {
2144                ("custom_literal", escaped, Vec::new())
2145            } else {
2146                ("custom_formatted", escaped, format_args)
2147            }
2148        }
2149    };
2150
2151    let params = params.to_string();
2152
2153    let rendered = crate::template_env::render(
2154        "java/visitor_method.jinja",
2155        minijinja::context! {
2156            camel_method,
2157            params,
2158            action_type,
2159            action_value,
2160            format_args => format_args,
2161        },
2162    );
2163    setup_lines.push(rendered);
2164}
2165
2166/// Convert snake_case method names to Java camelCase.
2167fn method_to_camel(snake: &str) -> String {
2168    snake.to_lower_camel_case()
2169}