Skip to main content

alef_e2e/codegen/
java.rs

1//! Java e2e test generator using JUnit 5.
2//!
3//! Generates `e2e/java/pom.xml` and `src/test/java/dev/kreuzberg/e2e/{Category}Test.java`
4//! files from JSON fixtures, driven entirely by `E2eConfig` and `CallConfig`.
5
6use crate::config::E2eConfig;
7use crate::escape::{escape_java, sanitize_filename};
8use crate::field_access::FieldResolver;
9use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, HttpFixture};
10use alef_core::backend::GeneratedFile;
11use alef_core::config::ResolvedCrateConfig;
12use alef_core::hash::{self, CommentStyle};
13use alef_core::template_versions as tv;
14use anyhow::Result;
15use heck::{ToLowerCamelCase, ToUpperCamelCase};
16use std::fmt::Write as FmtWrite;
17use std::path::PathBuf;
18
19use super::E2eCodegen;
20use super::client;
21
22/// Java e2e code generator.
23pub struct JavaCodegen;
24
25impl E2eCodegen for JavaCodegen {
26    fn generate(
27        &self,
28        groups: &[FixtureGroup],
29        e2e_config: &E2eConfig,
30        config: &ResolvedCrateConfig,
31    ) -> Result<Vec<GeneratedFile>> {
32        let lang = self.language_name();
33        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
34
35        let mut files = Vec::new();
36
37        // Resolve call config with overrides.
38        let call = &e2e_config.call;
39        let overrides = call.overrides.get(lang);
40        let _module_path = overrides
41            .and_then(|o| o.module.as_ref())
42            .cloned()
43            .unwrap_or_else(|| call.module.clone());
44        let function_name = overrides
45            .and_then(|o| o.function.as_ref())
46            .cloned()
47            .unwrap_or_else(|| call.function.clone());
48        let class_name = overrides
49            .and_then(|o| o.class.as_ref())
50            .cloned()
51            .unwrap_or_else(|| config.name.to_upper_camel_case());
52        let result_is_simple = overrides.is_some_and(|o| o.result_is_simple);
53        let result_var = &call.result_var;
54
55        // Resolve package config.
56        let java_pkg = e2e_config.resolve_package("java");
57        let pkg_name = java_pkg
58            .as_ref()
59            .and_then(|p| p.name.as_ref())
60            .cloned()
61            .unwrap_or_else(|| config.name.clone());
62
63        // Resolve Java package info for the dependency.
64        let java_group_id = config.java_group_id();
65        let pkg_version = config.resolved_version().unwrap_or_else(|| "0.1.0".to_string());
66
67        // Generate pom.xml.
68        files.push(GeneratedFile {
69            path: output_base.join("pom.xml"),
70            content: render_pom_xml(&pkg_name, &java_group_id, &pkg_version, e2e_config.dep_mode),
71            generated_header: false,
72        });
73
74        // Generate test files per category. Path mirrors the configured Java
75        // package — `dev.myorg` becomes `dev/myorg`, etc. — so the package
76        // declaration in each test file matches its filesystem location.
77        let mut test_base = output_base.join("src").join("test").join("java");
78        for segment in java_group_id.split('.') {
79            test_base = test_base.join(segment);
80        }
81        let test_base = test_base.join("e2e");
82
83        // Resolve options_type from override.
84        let options_type = overrides.and_then(|o| o.options_type.clone());
85
86        // Get Java-specific enum_fields from override (required for correct enum handling).
87        let empty_enum_fields = std::collections::HashMap::new();
88        let java_enum_fields = overrides.as_ref().map(|o| &o.enum_fields).unwrap_or(&empty_enum_fields);
89
90        // Build effective nested_types by merging defaults with configured overrides.
91        let mut effective_nested_types = default_java_nested_types();
92        if let Some(overrides_map) = overrides.map(|o| &o.nested_types) {
93            effective_nested_types.extend(overrides_map.clone());
94        }
95
96        // Resolve nested_types_optional from override (defaults to true for backward compatibility).
97        let nested_types_optional = overrides.map(|o| o.nested_types_optional).unwrap_or(true);
98
99        let field_resolver = FieldResolver::new(
100            &e2e_config.fields,
101            &e2e_config.fields_optional,
102            &e2e_config.result_fields,
103            &e2e_config.fields_array,
104            &std::collections::HashSet::new(),
105        );
106
107        for group in groups {
108            let active: Vec<&Fixture> = group
109                .fixtures
110                .iter()
111                .filter(|f| super::should_include_fixture(f, lang, e2e_config))
112                .collect();
113
114            if active.is_empty() {
115                continue;
116            }
117
118            let class_file_name = format!("{}Test.java", sanitize_filename(&group.category).to_upper_camel_case());
119            let content = render_test_file(
120                &group.category,
121                &active,
122                &class_name,
123                &function_name,
124                &java_group_id,
125                result_var,
126                &e2e_config.call.args,
127                options_type.as_deref(),
128                &field_resolver,
129                result_is_simple,
130                java_enum_fields,
131                e2e_config,
132                &effective_nested_types,
133                nested_types_optional,
134            );
135            files.push(GeneratedFile {
136                path: test_base.join(class_file_name),
137                content,
138                generated_header: true,
139            });
140        }
141
142        Ok(files)
143    }
144
145    fn language_name(&self) -> &'static str {
146        "java"
147    }
148}
149
150// ---------------------------------------------------------------------------
151// Rendering
152// ---------------------------------------------------------------------------
153
154fn render_pom_xml(
155    pkg_name: &str,
156    java_group_id: &str,
157    pkg_version: &str,
158    dep_mode: crate::config::DependencyMode,
159) -> String {
160    // pkg_name may be in "groupId:artifactId" Maven format; split accordingly.
161    let (dep_group_id, dep_artifact_id) = if let Some((g, a)) = pkg_name.split_once(':') {
162        (g, a)
163    } else {
164        (java_group_id, pkg_name)
165    };
166    let artifact_id = format!("{dep_artifact_id}-e2e-java");
167    let dep_block = match dep_mode {
168        crate::config::DependencyMode::Registry => {
169            format!(
170                r#"        <dependency>
171            <groupId>{dep_group_id}</groupId>
172            <artifactId>{dep_artifact_id}</artifactId>
173            <version>{pkg_version}</version>
174        </dependency>"#
175            )
176        }
177        crate::config::DependencyMode::Local => {
178            format!(
179                r#"        <dependency>
180            <groupId>{dep_group_id}</groupId>
181            <artifactId>{dep_artifact_id}</artifactId>
182            <version>{pkg_version}</version>
183            <scope>system</scope>
184            <systemPath>${{project.basedir}}/../../packages/java/target/{dep_artifact_id}-{pkg_version}.jar</systemPath>
185        </dependency>"#
186            )
187        }
188    };
189    format!(
190        r#"<?xml version="1.0" encoding="UTF-8"?>
191<project xmlns="http://maven.apache.org/POM/4.0.0"
192         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
193         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
194    <modelVersion>4.0.0</modelVersion>
195
196    <groupId>{java_group_id}</groupId>
197    <artifactId>{artifact_id}</artifactId>
198    <version>0.1.0</version>
199
200    <properties>
201        <maven.compiler.source>25</maven.compiler.source>
202        <maven.compiler.target>25</maven.compiler.target>
203        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
204        <junit.version>{junit}</junit.version>
205    </properties>
206
207    <dependencies>
208{dep_block}
209        <dependency>
210            <groupId>com.fasterxml.jackson.core</groupId>
211            <artifactId>jackson-databind</artifactId>
212            <version>{jackson}</version>
213        </dependency>
214        <dependency>
215            <groupId>com.fasterxml.jackson.datatype</groupId>
216            <artifactId>jackson-datatype-jdk8</artifactId>
217            <version>{jackson}</version>
218        </dependency>
219        <dependency>
220            <groupId>org.jetbrains</groupId>
221            <artifactId>annotations</artifactId>
222            <version>24.1.0</version>
223        </dependency>
224        <dependency>
225            <groupId>org.junit.jupiter</groupId>
226            <artifactId>junit-jupiter</artifactId>
227            <version>${{junit.version}}</version>
228            <scope>test</scope>
229        </dependency>
230    </dependencies>
231
232    <build>
233        <plugins>
234            <plugin>
235                <groupId>org.codehaus.mojo</groupId>
236                <artifactId>build-helper-maven-plugin</artifactId>
237                <version>{build_helper}</version>
238                <executions>
239                    <execution>
240                        <id>add-test-source</id>
241                        <phase>generate-test-sources</phase>
242                        <goals>
243                            <goal>add-test-source</goal>
244                        </goals>
245                        <configuration>
246                            <sources>
247                                <source>src/test/java</source>
248                            </sources>
249                        </configuration>
250                    </execution>
251                </executions>
252            </plugin>
253            <plugin>
254                <groupId>org.apache.maven.plugins</groupId>
255                <artifactId>maven-surefire-plugin</artifactId>
256                <version>{maven_surefire}</version>
257                <configuration>
258                    <argLine>--enable-preview --enable-native-access=ALL-UNNAMED -Djava.library.path=${{project.basedir}}/../../target/release</argLine>
259                    <workingDirectory>${{project.basedir}}/../../test_documents</workingDirectory>
260                </configuration>
261            </plugin>
262        </plugins>
263    </build>
264</project>
265"#,
266        junit = tv::maven::JUNIT,
267        jackson = tv::maven::JACKSON_E2E,
268        build_helper = tv::maven::BUILD_HELPER_MAVEN_PLUGIN,
269        maven_surefire = tv::maven::MAVEN_SUREFIRE_PLUGIN_E2E,
270    )
271}
272
273#[allow(clippy::too_many_arguments)]
274fn render_test_file(
275    category: &str,
276    fixtures: &[&Fixture],
277    class_name: &str,
278    function_name: &str,
279    java_group_id: &str,
280    result_var: &str,
281    args: &[crate::config::ArgMapping],
282    options_type: Option<&str>,
283    field_resolver: &FieldResolver,
284    result_is_simple: bool,
285    enum_fields: &std::collections::HashMap<String, String>,
286    e2e_config: &E2eConfig,
287    nested_types: &std::collections::HashMap<String, String>,
288    nested_types_optional: bool,
289) -> String {
290    let mut out = String::new();
291    out.push_str(&hash::header(CommentStyle::DoubleSlash));
292    let test_class_name = format!("{}Test", sanitize_filename(category).to_upper_camel_case());
293
294    // If the class_name is fully qualified (contains '.'), import it and use
295    // only the simple name for method calls.  Otherwise use it as-is.
296    let (import_path, simple_class) = if class_name.contains('.') {
297        let simple = class_name.rsplit('.').next().unwrap_or(class_name);
298        (class_name, simple)
299    } else {
300        ("", class_name)
301    };
302
303    let _ = writeln!(out, "package {java_group_id}.e2e;");
304    let _ = writeln!(out);
305
306    // Check if any fixture (with its resolved call) will emit MAPPER usage.
307    // Note: we no longer use MAPPER for json_object options (using builder pattern instead).
308    // But we still need it for handle args and HTTP fixtures.
309    let lang_for_om = "java";
310    let _needs_object_mapper_for_options = false;
311    // Also need ObjectMapper when a handle arg has a non-null config.
312    let needs_object_mapper_for_handle = fixtures.iter().any(|f| {
313        args.iter().filter(|a| a.arg_type == "handle").any(|a| {
314            let v = f.input.get(&a.field).unwrap_or(&serde_json::Value::Null);
315            !(v.is_null() || v.is_object() && v.as_object().is_some_and(|o| o.is_empty()))
316        })
317    });
318    // HTTP fixtures always need ObjectMapper for JSON body comparison.
319    let has_http_fixtures = fixtures.iter().any(|f| f.http.is_some());
320    let needs_object_mapper = needs_object_mapper_for_handle || has_http_fixtures;
321
322    // Collect all options_type values used (class-level + per-fixture call overrides).
323    let mut all_options_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
324    if let Some(t) = options_type {
325        all_options_types.insert(t.to_string());
326    }
327    for f in fixtures.iter() {
328        let call_cfg = e2e_config.resolve_call(f.call.as_deref());
329        if let Some(ov) = call_cfg.overrides.get(lang_for_om) {
330            if let Some(t) = &ov.options_type {
331                all_options_types.insert(t.clone());
332            }
333        }
334        // Detect batch item types used in this fixture
335        for arg in &call_cfg.args {
336            if let Some(elem_type) = &arg.element_type {
337                if elem_type == "BatchBytesItem" || elem_type == "BatchFileItem" {
338                    all_options_types.insert(elem_type.clone());
339                }
340            }
341        }
342    }
343
344    let _ = writeln!(out, "import org.junit.jupiter.api.Test;");
345    let _ = writeln!(out, "import static org.junit.jupiter.api.Assertions.*;");
346    if !import_path.is_empty() {
347        let _ = writeln!(out, "import {import_path};");
348    }
349    if needs_object_mapper {
350        let _ = writeln!(out, "import com.fasterxml.jackson.databind.ObjectMapper;");
351        let _ = writeln!(out, "import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;");
352    }
353    // Collect all enum types used in builder expressions across all fixtures.
354    let mut enum_types_used: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
355    // Collect nested config types actually referenced in fixture builder expressions
356    // (rather than importing all defaults unconditionally, which causes javac errors
357    // when a type like ChunkingConfig doesn't exist in the binding's package).
358    let mut nested_types_used: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
359    for f in fixtures.iter() {
360        let call_cfg = e2e_config.resolve_call(f.call.as_deref());
361        for arg in &call_cfg.args {
362            if arg.arg_type == "json_object" {
363                let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
364                if let Some(val) = f.input.get(field) {
365                    if !val.is_null() && !val.is_array() {
366                        if let Some(obj) = val.as_object() {
367                            collect_enum_and_nested_types(obj, enum_fields, &mut enum_types_used);
368                            collect_nested_type_names(obj, nested_types, &mut nested_types_used);
369                        }
370                    }
371                }
372            }
373        }
374    }
375
376    // Import all options types used across fixtures (for builder expressions and MAPPER).
377    if !all_options_types.is_empty() {
378        let opts_pkg = if !import_path.is_empty() {
379            import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("")
380        } else {
381            ""
382        };
383        for opts_type in &all_options_types {
384            let qualified = if opts_pkg.is_empty() {
385                opts_type.clone()
386            } else {
387                format!("{opts_pkg}.{opts_type}")
388            };
389            let _ = writeln!(out, "import {qualified};");
390        }
391    }
392
393    // Import all enum types used in builder expressions
394    if !enum_types_used.is_empty() && !import_path.is_empty() {
395        let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
396        for enum_type in &enum_types_used {
397            let _ = writeln!(out, "import {binding_pkg}.{enum_type};");
398        }
399    }
400
401    // Import only the nested options types that are actually referenced in fixture
402    // builder expressions. Using `nested_types_used` (populated above) rather than
403    // all `nested_types.values()` avoids javac `cannot find symbol` errors for types
404    // like ChunkingConfig that exist in other Kreuzberg bindings but not this one.
405    if !nested_types_used.is_empty() && !import_path.is_empty() {
406        let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
407        for type_name in &nested_types_used {
408            let _ = writeln!(out, "import {binding_pkg}.{type_name};");
409        }
410    }
411
412    // Import CrawlConfig when handle args need JSON deserialization.
413    if needs_object_mapper_for_handle && !import_path.is_empty() {
414        let pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
415        let _ = writeln!(out, "import {pkg}.CrawlConfig;");
416    }
417    // Import visitor types when any fixture uses visitor callbacks.
418    let has_visitor_fixtures = fixtures.iter().any(|f| f.visitor.is_some());
419    if has_visitor_fixtures && !import_path.is_empty() {
420        let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
421        if !binding_pkg.is_empty() {
422            let _ = writeln!(out, "import {binding_pkg}.Visitor;");
423            let _ = writeln!(out, "import {binding_pkg}.NodeContext;");
424            let _ = writeln!(out, "import {binding_pkg}.VisitResult;");
425        }
426    }
427    // Import Optional when using builder expressions with optional fields
428    if !all_options_types.is_empty() {
429        let _ = writeln!(out, "import java.util.Optional;");
430    }
431    let _ = writeln!(out);
432
433    let _ = writeln!(out, "/** E2e tests for category: {category}. */");
434    let _ = writeln!(out, "@SuppressWarnings(\"checkstyle:LineLength\")");
435    let _ = writeln!(out, "class {test_class_name} {{");
436
437    if needs_object_mapper {
438        let _ = writeln!(out);
439        let _ = writeln!(
440            out,
441            "    private static final ObjectMapper MAPPER = new ObjectMapper().registerModule(new Jdk8Module());"
442        );
443    }
444
445    for fixture in fixtures {
446        render_test_method(
447            &mut out,
448            fixture,
449            simple_class,
450            function_name,
451            result_var,
452            args,
453            options_type,
454            field_resolver,
455            result_is_simple,
456            enum_fields,
457            e2e_config,
458            nested_types,
459            nested_types_optional,
460        );
461        let _ = writeln!(out);
462    }
463
464    let _ = writeln!(out, "}}");
465    out
466}
467
468// ---------------------------------------------------------------------------
469// HTTP test rendering — shared-driver integration
470// ---------------------------------------------------------------------------
471
472/// Thin renderer that emits JUnit 5 test methods targeting a mock server via
473/// `java.net.http.HttpClient`. Satisfies [`client::TestClientRenderer`] so the
474/// shared [`client::http_call::render_http_test`] driver drives the call sequence.
475struct JavaTestClientRenderer;
476
477impl client::TestClientRenderer for JavaTestClientRenderer {
478    fn language_name(&self) -> &'static str {
479        "java"
480    }
481
482    /// Convert a fixture id to the UpperCamelCase suffix appended to `test`.
483    ///
484    /// The emitted method name is `test{fn_name}`, matching the pre-existing shape.
485    fn sanitize_test_name(&self, id: &str) -> String {
486        id.to_upper_camel_case()
487    }
488
489    /// Emit `@Test void test{fn_name}() throws Exception {`.
490    ///
491    /// When `skip_reason` is `Some`, the body is a single
492    /// `Assumptions.assumeTrue(false, ...)` call and `render_test_close` closes
493    /// the brace symmetrically.
494    fn render_test_open(&self, out: &mut String, fn_name: &str, description: &str, skip_reason: Option<&str>) {
495        let _ = writeln!(out, "    @Test");
496        if let Some(reason) = skip_reason {
497            let escaped_reason = escape_java(reason);
498            let _ = writeln!(out, "    void test{fn_name}() {{");
499            let _ = writeln!(out, "        // {description}");
500            let _ = writeln!(
501                out,
502                "        org.junit.jupiter.api.Assumptions.assumeTrue(false, \"{escaped_reason}\");"
503            );
504        } else {
505            let _ = writeln!(out, "    void test{fn_name}() throws Exception {{");
506            let _ = writeln!(out, "        // {description}");
507            // Resolve base URL once at the top of every non-skipped test.
508            let _ = writeln!(out, "        String baseUrl = System.getenv(\"MOCK_SERVER_URL\");");
509            let _ = writeln!(out, "        if (baseUrl == null) baseUrl = \"http://localhost:8080\";");
510        }
511    }
512
513    /// Emit the closing `}` for a test method.
514    fn render_test_close(&self, out: &mut String) {
515        let _ = writeln!(out, "    }}");
516    }
517
518    /// Emit a `java.net.http.HttpClient` request to `baseUrl + path`.
519    ///
520    /// Binds the response to `response` (the `ctx.response_var`). Java's
521    /// `HttpClient` disallows a fixed set of restricted headers; those are
522    /// silently dropped so the test compiles.
523    fn render_call(&self, out: &mut String, ctx: &client::CallCtx<'_>) {
524        // Java's HttpClient throws IllegalArgumentException for these headers.
525        const JAVA_RESTRICTED_HEADERS: &[&str] = &["connection", "content-length", "expect", "host", "upgrade"];
526
527        let method = ctx.method.to_uppercase();
528
529        // Build the path, appending query params when present.
530        let path = if ctx.query_params.is_empty() {
531            ctx.path.to_string()
532        } else {
533            let pairs: Vec<String> = ctx
534                .query_params
535                .iter()
536                .map(|(k, v)| {
537                    let val_str = match v {
538                        serde_json::Value::String(s) => s.clone(),
539                        other => other.to_string(),
540                    };
541                    format!("{}={}", k, escape_java(&val_str))
542                })
543                .collect();
544            format!("{}?{}", ctx.path, pairs.join("&"))
545        };
546        let _ = writeln!(
547            out,
548            "        java.net.URI uri = java.net.URI.create(baseUrl + \"{path}\");"
549        );
550
551        let body_publisher = if let Some(body) = ctx.body {
552            let json = serde_json::to_string(body).unwrap_or_default();
553            let escaped = escape_java(&json);
554            format!("java.net.http.HttpRequest.BodyPublishers.ofString(\"{escaped}\")")
555        } else {
556            "java.net.http.HttpRequest.BodyPublishers.noBody()".to_string()
557        };
558
559        let _ = writeln!(out, "        var builder = java.net.http.HttpRequest.newBuilder(uri)");
560        let _ = writeln!(out, "            .method(\"{method}\", {body_publisher});");
561
562        // Content-Type header — only when a body is present.
563        if ctx.body.is_some() {
564            let content_type = ctx.content_type.unwrap_or("application/json");
565            // Only emit when not already in ctx.headers (avoid duplicate Content-Type).
566            if !ctx.headers.keys().any(|k| k.to_lowercase() == "content-type") {
567                let _ = writeln!(
568                    out,
569                    "        builder = builder.header(\"Content-Type\", \"{content_type}\");"
570                );
571            }
572        }
573
574        // Explicit request headers — skip Java-restricted ones.
575        for (name, value) in ctx.headers {
576            if JAVA_RESTRICTED_HEADERS.contains(&name.to_lowercase().as_str()) {
577                continue;
578            }
579            let escaped_name = escape_java(name);
580            let escaped_value = escape_java(value);
581            let _ = writeln!(
582                out,
583                "        builder = builder.header(\"{escaped_name}\", \"{escaped_value}\");"
584            );
585        }
586
587        // Cookies as a single `Cookie` header.
588        if !ctx.cookies.is_empty() {
589            let cookie_str: Vec<String> = ctx.cookies.iter().map(|(k, v)| format!("{k}={v}")).collect();
590            let cookie_header = escape_java(&cookie_str.join("; "));
591            let _ = writeln!(
592                out,
593                "        builder = builder.header(\"Cookie\", \"{cookie_header}\");"
594            );
595        }
596
597        let response_var = ctx.response_var;
598        let _ = writeln!(
599            out,
600            "        var {response_var} = java.net.http.HttpClient.newHttpClient()"
601        );
602        let _ = writeln!(
603            out,
604            "            .send(builder.build(), java.net.http.HttpResponse.BodyHandlers.ofString());"
605        );
606    }
607
608    /// Emit `assertEquals(status, response.statusCode(), ...)`.
609    fn render_assert_status(&self, out: &mut String, response_var: &str, status: u16) {
610        let _ = writeln!(
611            out,
612            "        assertEquals({status}, {response_var}.statusCode(), \"status code mismatch\");"
613        );
614    }
615
616    /// Emit a header assertion using `response.headers().firstValue(...)`.
617    ///
618    /// Handles special tokens: `<<present>>`, `<<absent>>`, `<<uuid>>`.
619    fn render_assert_header(&self, out: &mut String, response_var: &str, name: &str, expected: &str) {
620        let escaped_name = escape_java(name);
621        match expected {
622            "<<present>>" => {
623                let _ = writeln!(
624                    out,
625                    "        assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").isPresent(), \"header {escaped_name} should be present\");"
626                );
627            }
628            "<<absent>>" => {
629                let _ = writeln!(
630                    out,
631                    "        assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").isEmpty(), \"header {escaped_name} should be absent\");"
632                );
633            }
634            "<<uuid>>" => {
635                let _ = writeln!(
636                    out,
637                    "        assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").orElse(\"\").matches(\"[0-9a-fA-F]{{8}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{12}}\"), \"header {escaped_name} should be a UUID\");"
638                );
639            }
640            literal => {
641                let escaped_value = escape_java(literal);
642                let _ = writeln!(
643                    out,
644                    "        assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").orElse(\"\").contains(\"{escaped_value}\"), \"header {escaped_name} mismatch\");"
645                );
646            }
647        }
648    }
649
650    /// Emit a JSON body equality assertion using Jackson's `MAPPER.readTree`.
651    fn render_assert_json_body(&self, out: &mut String, response_var: &str, expected: &serde_json::Value) {
652        match expected {
653            serde_json::Value::Object(_) | serde_json::Value::Array(_) => {
654                let json_str = serde_json::to_string(expected).unwrap_or_default();
655                let escaped = escape_java(&json_str);
656                let _ = writeln!(out, "        var bodyJson = MAPPER.readTree({response_var}.body());");
657                let _ = writeln!(out, "        var expectedJson = MAPPER.readTree(\"{escaped}\");");
658                let _ = writeln!(out, "        assertEquals(expectedJson, bodyJson, \"body mismatch\");");
659            }
660            serde_json::Value::String(s) => {
661                let escaped = escape_java(s);
662                let _ = writeln!(
663                    out,
664                    "        assertEquals(\"{escaped}\", {response_var}.body().trim(), \"body mismatch\");"
665                );
666            }
667            other => {
668                let escaped = escape_java(&other.to_string());
669                let _ = writeln!(
670                    out,
671                    "        assertEquals(\"{escaped}\", {response_var}.body().trim(), \"body mismatch\");"
672                );
673            }
674        }
675    }
676
677    /// Emit partial JSON body assertions: parse once, then assert each expected field.
678    fn render_assert_partial_body(&self, out: &mut String, response_var: &str, expected: &serde_json::Value) {
679        if let Some(obj) = expected.as_object() {
680            let _ = writeln!(out, "        var partialJson = MAPPER.readTree({response_var}.body());");
681            for (key, val) in obj {
682                let escaped_key = escape_java(key);
683                let json_str = serde_json::to_string(val).unwrap_or_default();
684                let escaped_val = escape_java(&json_str);
685                let _ = writeln!(
686                    out,
687                    "        assertEquals(MAPPER.readTree(\"{escaped_val}\"), partialJson.get(\"{escaped_key}\"), \"body field '{escaped_key}' mismatch\");"
688                );
689            }
690        }
691    }
692
693    /// Emit validation-error assertions: parse the body and check each expected message.
694    fn render_assert_validation_errors(
695        &self,
696        out: &mut String,
697        response_var: &str,
698        errors: &[crate::fixture::ValidationErrorExpectation],
699    ) {
700        let _ = writeln!(out, "        var veBody = {response_var}.body();");
701        for err in errors {
702            let escaped_msg = escape_java(&err.msg);
703            let _ = writeln!(
704                out,
705                "        assertTrue(veBody.contains(\"{escaped_msg}\"), \"expected validation error message: {escaped_msg}\");"
706            );
707        }
708    }
709}
710
711/// Render an HTTP server test method using `java.net.http.HttpClient` against
712/// `MOCK_SERVER_URL`. Delegates to the shared
713/// [`client::http_call::render_http_test`] driver via [`JavaTestClientRenderer`].
714///
715/// The one Java-specific pre-condition — HTTP 101 (WebSocket upgrade) causing an
716/// `EOFException` in `HttpClient` — is handled here before delegating.
717fn render_http_test_method(out: &mut String, fixture: &Fixture, http: &HttpFixture) {
718    // HTTP 101 (WebSocket upgrade) causes Java's HttpClient to throw EOFException.
719    // Emit an assumeTrue(false, ...) stub so the test is skipped rather than failing.
720    if http.expected_response.status_code == 101 {
721        let method_name = fixture.id.to_upper_camel_case();
722        let description = &fixture.description;
723        let _ = writeln!(out, "    @Test");
724        let _ = writeln!(out, "    void test{method_name}() {{");
725        let _ = writeln!(out, "        // {description}");
726        let _ = writeln!(
727            out,
728            "        org.junit.jupiter.api.Assumptions.assumeTrue(false, \"Skipped: Java HttpClient cannot handle 101 Switching Protocols responses\");"
729        );
730        let _ = writeln!(out, "    }}");
731        return;
732    }
733
734    client::http_call::render_http_test(out, &JavaTestClientRenderer, fixture);
735}
736
737#[allow(clippy::too_many_arguments)]
738fn render_test_method(
739    out: &mut String,
740    fixture: &Fixture,
741    class_name: &str,
742    _function_name: &str,
743    _result_var: &str,
744    _args: &[crate::config::ArgMapping],
745    options_type: Option<&str>,
746    field_resolver: &FieldResolver,
747    result_is_simple: bool,
748    enum_fields: &std::collections::HashMap<String, String>,
749    e2e_config: &E2eConfig,
750    nested_types: &std::collections::HashMap<String, String>,
751    nested_types_optional: bool,
752) {
753    // Delegate HTTP fixtures to the HTTP-specific renderer.
754    if let Some(http) = &fixture.http {
755        render_http_test_method(out, fixture, http);
756        return;
757    }
758
759    // Resolve per-fixture call config (supports named calls via fixture.call field).
760    let call_config = e2e_config.resolve_call(fixture.call.as_deref());
761    let lang = "java";
762    let call_overrides = call_config.overrides.get(lang);
763    let effective_function_name = call_overrides
764        .and_then(|o| o.function.as_ref())
765        .cloned()
766        .unwrap_or_else(|| call_config.function.to_lower_camel_case());
767    let effective_result_var = &call_config.result_var;
768    let effective_args = &call_config.args;
769    let function_name = effective_function_name.as_str();
770    let result_var = effective_result_var.as_str();
771    let args: &[crate::config::ArgMapping] = effective_args.as_slice();
772
773    let method_name = fixture.id.to_upper_camel_case();
774    let description = &fixture.description;
775    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
776
777    // Resolve per-fixture options_type: prefer the java call override, fall back to class-level.
778    let effective_options_type: Option<String> = call_overrides
779        .and_then(|o| o.options_type.clone())
780        .or_else(|| options_type.map(|s| s.to_string()));
781    let effective_options_type = effective_options_type.as_deref();
782
783    // Resolve per-fixture result_is_simple and result_is_bytes from the call override.
784    let effective_result_is_simple =
785        call_overrides.is_some_and(|o| o.result_is_simple) || call_config.result_is_simple || result_is_simple;
786    let effective_result_is_bytes = call_overrides.is_some_and(|o| o.result_is_bytes);
787
788    // Check if this test needs ObjectMapper deserialization for json_object args.
789    // Strip "input." prefix when looking up field in fixture.input.
790    let needs_deser = effective_options_type.is_some()
791        && args.iter().any(|arg| {
792            if arg.arg_type != "json_object" {
793                return false;
794            }
795            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
796            fixture.input.get(field).is_some_and(|v| !v.is_null() && !v.is_array())
797        });
798
799    // Always add throws Exception since the convert method may throw checked exceptions.
800    let throws_clause = " throws Exception";
801
802    let _ = writeln!(out, "    @Test");
803    let _ = writeln!(out, "    void test{method_name}(){throws_clause} {{");
804    let _ = writeln!(out, "        // {description}");
805
806    // Emit builder expressions for json_object args.
807    if let (true, Some(opts_type)) = (needs_deser, effective_options_type) {
808        for arg in args {
809            if arg.arg_type == "json_object" {
810                let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
811                if let Some(val) = fixture.input.get(field) {
812                    if !val.is_null() && !val.is_array() {
813                        if let Some(obj) = val.as_object() {
814                            // Generate builder expression: TypeName.builder().withFieldName(value)...build()
815                            let empty_path_fields: Vec<String> = Vec::new();
816                            let path_fields = call_overrides.map(|o| &o.path_fields).unwrap_or(&empty_path_fields);
817                            let builder_expr = java_builder_expression(
818                                obj,
819                                opts_type,
820                                enum_fields,
821                                nested_types,
822                                nested_types_optional,
823                                path_fields,
824                            );
825                            let var_name = &arg.name;
826                            let _ = writeln!(out, "        var {var_name} = {builder_expr};");
827                        }
828                    }
829                }
830            }
831        }
832    }
833
834    let (mut setup_lines, args_str) =
835        build_args_and_setup(&fixture.input, args, class_name, effective_options_type, &fixture.id);
836
837    // Build visitor if present and add to setup
838    let mut visitor_var = String::new();
839    let mut has_visitor_fixture = false;
840    if let Some(visitor_spec) = &fixture.visitor {
841        visitor_var = build_java_visitor(&mut setup_lines, visitor_spec, class_name);
842        has_visitor_fixture = true;
843    }
844
845    for line in &setup_lines {
846        let _ = writeln!(out, "        {line}");
847    }
848
849    // When visitor is present, attach it to the options parameter
850    let final_args = if has_visitor_fixture {
851        if args_str.is_empty() {
852            // No arguments: just create ConversionOptions with visitor
853            format!("new ConversionOptions().withVisitor({})", visitor_var)
854        } else if args_str.contains("new ConversionOptions")
855            || args_str.contains("ConversionOptionsBuilder")
856            || args_str.contains(".builder()")
857        {
858            // Options are being built (either new ConversionOptions(), builder pattern, or .builder().build())
859            // append .withVisitor() call before .build() if present
860            if args_str.contains(".build()") {
861                // Insert .withVisitor() before the final .build()
862                let idx = args_str.rfind(".build()").unwrap();
863                format!("{}.withVisitor({}){}", &args_str[..idx], visitor_var, &args_str[idx..])
864            } else {
865                // Already a chain, just append
866                format!("{}.withVisitor({})", args_str, visitor_var)
867            }
868        } else if args_str.ends_with(", null") {
869            // Replace trailing null options with ConversionOptions containing visitor
870            let base = &args_str[..args_str.len() - 6];
871            format!("{}, new ConversionOptions().withVisitor({})", base, visitor_var)
872        } else {
873            // args_str is just the html argument(s) — append new ConversionOptions with visitor
874            format!("{}, new ConversionOptions().withVisitor({})", args_str, visitor_var)
875        }
876    } else {
877        args_str
878    };
879
880    if expects_error {
881        let _ = writeln!(
882            out,
883            "        assertThrows(Exception.class, () -> {class_name}.{function_name}({final_args}));"
884        );
885        let _ = writeln!(out, "    }}");
886        return;
887    }
888
889    if call_config.returns_void {
890        let _ = writeln!(out, "        {class_name}.{function_name}({final_args});");
891        let _ = writeln!(out, "    }}");
892        return;
893    }
894
895    let _ = writeln!(
896        out,
897        "        var {result_var} = {class_name}.{function_name}({final_args});"
898    );
899
900    // Emit a `source` variable for run_query assertions that need the raw bytes.
901    let needs_source_var = fixture
902        .assertions
903        .iter()
904        .any(|a| a.assertion_type == "method_result" && a.method.as_deref() == Some("run_query"));
905    if needs_source_var {
906        // Find the source_code arg to emit a `source` binding.
907        if let Some(source_arg) = args.iter().find(|a| a.field == "source_code") {
908            let field = source_arg.field.strip_prefix("input.").unwrap_or(&source_arg.field);
909            if let Some(val) = fixture.input.get(field) {
910                let java_val = json_to_java(val);
911                let _ = writeln!(out, "        var source = {java_val}.getBytes();");
912            }
913        }
914    }
915
916    for assertion in &fixture.assertions {
917        render_assertion(
918            out,
919            assertion,
920            result_var,
921            class_name,
922            field_resolver,
923            effective_result_is_simple,
924            effective_result_is_bytes,
925            enum_fields,
926        );
927    }
928
929    let _ = writeln!(out, "    }}");
930}
931
932/// Build setup lines (e.g. handle creation) and the argument list for the function call.
933///
934/// Returns `(setup_lines, args_string)`.
935fn build_args_and_setup(
936    input: &serde_json::Value,
937    args: &[crate::config::ArgMapping],
938    class_name: &str,
939    options_type: Option<&str>,
940    fixture_id: &str,
941) -> (Vec<String>, String) {
942    if args.is_empty() {
943        return (Vec::new(), String::new());
944    }
945
946    let mut setup_lines: Vec<String> = Vec::new();
947    let mut parts: Vec<String> = Vec::new();
948
949    for arg in args {
950        if arg.arg_type == "mock_url" {
951            setup_lines.push(format!(
952                "String {} = System.getenv(\"MOCK_SERVER_URL\") + \"/fixtures/{fixture_id}\";",
953                arg.name,
954            ));
955            parts.push(arg.name.clone());
956            continue;
957        }
958
959        if arg.arg_type == "handle" {
960            // Generate a createEngine (or equivalent) call and pass the variable.
961            let constructor_name = format!("create{}", arg.name.to_upper_camel_case());
962            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
963            let config_value = input.get(field).unwrap_or(&serde_json::Value::Null);
964            if config_value.is_null()
965                || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
966            {
967                setup_lines.push(format!("var {} = {class_name}.{constructor_name}(null);", arg.name,));
968            } else {
969                let json_str = serde_json::to_string(config_value).unwrap_or_default();
970                let name = &arg.name;
971                setup_lines.push(format!(
972                    "var {name}Config = MAPPER.readValue(\"{}\", CrawlConfig.class);",
973                    escape_java(&json_str),
974                ));
975                setup_lines.push(format!(
976                    "var {} = {class_name}.{constructor_name}({name}Config);",
977                    arg.name,
978                    name = name,
979                ));
980            }
981            parts.push(arg.name.clone());
982            continue;
983        }
984
985        let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
986        let val = input.get(field);
987        match val {
988            None | Some(serde_json::Value::Null) if arg.optional => {
989                // Optional arg with no fixture value: emit positional null/default so the call
990                // has the right arity. For json_object optional args, build an empty default object
991                // so we get the right type rather than a raw null.
992                if arg.arg_type == "json_object" {
993                    if let Some(opts_type) = options_type {
994                        parts.push(format!("{opts_type}.builder().build()"));
995                    } else {
996                        parts.push("null".to_string());
997                    }
998                } else {
999                    parts.push("null".to_string());
1000                }
1001            }
1002            None | Some(serde_json::Value::Null) => {
1003                // Required arg with no fixture value: pass a language-appropriate default.
1004                let default_val = match arg.arg_type.as_str() {
1005                    "string" | "file_path" => "\"\"".to_string(),
1006                    "int" | "integer" => "0".to_string(),
1007                    "float" | "number" => "0.0d".to_string(),
1008                    "bool" | "boolean" => "false".to_string(),
1009                    _ => "null".to_string(),
1010                };
1011                parts.push(default_val);
1012            }
1013            Some(v) => {
1014                if arg.arg_type == "json_object" {
1015                    // Array json_object args: emit inline Java list expression.
1016                    // Check for batch item arrays first (element_type = BatchBytesItem/BatchFileItem).
1017                    if v.is_array() {
1018                        if let Some(elem_type) = &arg.element_type {
1019                            if elem_type == "BatchBytesItem" || elem_type == "BatchFileItem" {
1020                                parts.push(emit_java_batch_item_array(v, elem_type));
1021                                continue;
1022                            }
1023                        }
1024                        // Otherwise use element_type to emit the correct numeric literal suffix (f vs d).
1025                        let elem_type = arg.element_type.as_deref();
1026                        parts.push(json_to_java_typed(v, elem_type));
1027                        continue;
1028                    }
1029                    // Object json_object args with options_type: use pre-deserialized variable.
1030                    if options_type.is_some() {
1031                        parts.push(arg.name.clone());
1032                        continue;
1033                    }
1034                    parts.push(json_to_java(v));
1035                    continue;
1036                }
1037                // bytes args must be passed as byte[], not String.
1038                if arg.arg_type == "bytes" {
1039                    let val = json_to_java(v);
1040                    parts.push(format!("{val}.getBytes()"));
1041                    continue;
1042                }
1043                // file_path args must be wrapped in java.nio.file.Path.of().
1044                if arg.arg_type == "file_path" {
1045                    let val = json_to_java(v);
1046                    parts.push(format!("java.nio.file.Path.of({val})"));
1047                    continue;
1048                }
1049                parts.push(json_to_java(v));
1050            }
1051        }
1052    }
1053
1054    (setup_lines, parts.join(", "))
1055}
1056
1057#[allow(clippy::too_many_arguments)]
1058fn render_assertion(
1059    out: &mut String,
1060    assertion: &Assertion,
1061    result_var: &str,
1062    class_name: &str,
1063    field_resolver: &FieldResolver,
1064    result_is_simple: bool,
1065    result_is_bytes: bool,
1066    enum_fields: &std::collections::HashMap<String, String>,
1067) {
1068    // Handle synthetic/virtual fields that are computed rather than direct record accessors.
1069    if let Some(f) = &assertion.field {
1070        match f.as_str() {
1071            // ---- ExtractionResult chunk-level computed predicates ----
1072            "chunks_have_content" => {
1073                let pred = format!(
1074                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.content() != null && !c.content().isBlank())"
1075                );
1076                match assertion.assertion_type.as_str() {
1077                    "is_true" => {
1078                        let _ = writeln!(out, "        assertTrue({pred}, \"expected true\");");
1079                    }
1080                    "is_false" => {
1081                        let _ = writeln!(out, "        assertFalse({pred}, \"expected false\");");
1082                    }
1083                    _ => {
1084                        let _ = writeln!(
1085                            out,
1086                            "        // skipped: unsupported assertion on synthetic field '{f}'"
1087                        );
1088                    }
1089                }
1090                return;
1091            }
1092            "chunks_have_heading_context" => {
1093                let pred = format!(
1094                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.metadata().headingContext().isPresent())"
1095                );
1096                match assertion.assertion_type.as_str() {
1097                    "is_true" => {
1098                        let _ = writeln!(out, "        assertTrue({pred}, \"expected true\");");
1099                    }
1100                    "is_false" => {
1101                        let _ = writeln!(out, "        assertFalse({pred}, \"expected false\");");
1102                    }
1103                    _ => {
1104                        let _ = writeln!(
1105                            out,
1106                            "        // skipped: unsupported assertion on synthetic field '{f}'"
1107                        );
1108                    }
1109                }
1110                return;
1111            }
1112            "chunks_have_embeddings" => {
1113                let pred = format!(
1114                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.embedding() != null && !c.embedding().isEmpty())"
1115                );
1116                match assertion.assertion_type.as_str() {
1117                    "is_true" => {
1118                        let _ = writeln!(out, "        assertTrue({pred}, \"expected true\");");
1119                    }
1120                    "is_false" => {
1121                        let _ = writeln!(out, "        assertFalse({pred}, \"expected false\");");
1122                    }
1123                    _ => {
1124                        let _ = writeln!(
1125                            out,
1126                            "        // skipped: unsupported assertion on synthetic field '{f}'"
1127                        );
1128                    }
1129                }
1130                return;
1131            }
1132            "first_chunk_starts_with_heading" => {
1133                let pred = format!(
1134                    "{result_var}.chunks().orElse(java.util.List.of()).stream().findFirst().map(c -> c.metadata().headingContext().isPresent()).orElse(false)"
1135                );
1136                match assertion.assertion_type.as_str() {
1137                    "is_true" => {
1138                        let _ = writeln!(out, "        assertTrue({pred}, \"expected true\");");
1139                    }
1140                    "is_false" => {
1141                        let _ = writeln!(out, "        assertFalse({pred}, \"expected false\");");
1142                    }
1143                    _ => {
1144                        let _ = writeln!(
1145                            out,
1146                            "        // skipped: unsupported assertion on synthetic field '{f}'"
1147                        );
1148                    }
1149                }
1150                return;
1151            }
1152            // ---- EmbedResponse virtual fields ----
1153            // When result_is_simple=true the result IS List<List<Float>> (the raw embeddings list).
1154            // When result_is_simple=false the result has an .embeddings() accessor.
1155            "embedding_dimensions" => {
1156                // Dimension = size of the first embedding vector in the list.
1157                let embed_list = if result_is_simple {
1158                    result_var.to_string()
1159                } else {
1160                    format!("{result_var}.embeddings()")
1161                };
1162                let expr = format!("({embed_list}.isEmpty() ? 0 : {embed_list}.get(0).size())");
1163                match assertion.assertion_type.as_str() {
1164                    "equals" => {
1165                        if let Some(val) = &assertion.value {
1166                            let java_val = json_to_java(val);
1167                            let _ = writeln!(out, "        assertEquals({java_val}, {expr});");
1168                        }
1169                    }
1170                    "greater_than" => {
1171                        if let Some(val) = &assertion.value {
1172                            let java_val = json_to_java(val);
1173                            let _ = writeln!(
1174                                out,
1175                                "        assertTrue({expr} > {java_val}, \"expected > {java_val}\");"
1176                            );
1177                        }
1178                    }
1179                    _ => {
1180                        let _ = writeln!(out, "        // skipped: unsupported assertion on '{f}'");
1181                    }
1182                }
1183                return;
1184            }
1185            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
1186                // These are validation predicates that require iterating the embedding matrix.
1187                let embed_list = if result_is_simple {
1188                    result_var.to_string()
1189                } else {
1190                    format!("{result_var}.embeddings()")
1191                };
1192                let pred = match f.as_str() {
1193                    "embeddings_valid" => {
1194                        format!("{embed_list}.stream().allMatch(e -> e != null && !e.isEmpty())")
1195                    }
1196                    "embeddings_finite" => {
1197                        format!("{embed_list}.stream().flatMap(java.util.Collection::stream).allMatch(Float::isFinite)")
1198                    }
1199                    "embeddings_non_zero" => {
1200                        format!("{embed_list}.stream().allMatch(e -> e.stream().anyMatch(v -> v != 0.0f))")
1201                    }
1202                    "embeddings_normalized" => format!(
1203                        "{embed_list}.stream().allMatch(e -> {{ double n = e.stream().mapToDouble(v -> v * v).sum(); return Math.abs(n - 1.0) < 1e-3; }})"
1204                    ),
1205                    _ => unreachable!(),
1206                };
1207                match assertion.assertion_type.as_str() {
1208                    "is_true" => {
1209                        let _ = writeln!(out, "        assertTrue({pred}, \"expected true\");");
1210                    }
1211                    "is_false" => {
1212                        let _ = writeln!(out, "        assertFalse({pred}, \"expected false\");");
1213                    }
1214                    _ => {
1215                        let _ = writeln!(out, "        // skipped: unsupported assertion on '{f}'");
1216                    }
1217                }
1218                return;
1219            }
1220            // ---- Fields not present on the Java ExtractionResult ----
1221            "keywords" | "keywords_count" => {
1222                let _ = writeln!(
1223                    out,
1224                    "        // skipped: field '{f}' not available on Java ExtractionResult"
1225                );
1226                return;
1227            }
1228            // ---- metadata not_empty / is_empty: Metadata is a required record, not Optional ----
1229            // Metadata has no .isEmpty() method; check that at least one optional field is present.
1230            "metadata" => {
1231                match assertion.assertion_type.as_str() {
1232                    "not_empty" => {
1233                        let _ = writeln!(
1234                            out,
1235                            "        assertTrue({result_var}.metadata().title().isPresent() || {result_var}.metadata().subject().isPresent() || !{result_var}.metadata().additional().isEmpty(), \"expected non-empty value\");"
1236                        );
1237                        return;
1238                    }
1239                    "is_empty" => {
1240                        let _ = writeln!(
1241                            out,
1242                            "        assertFalse({result_var}.metadata().title().isPresent() || {result_var}.metadata().subject().isPresent() || !{result_var}.metadata().additional().isEmpty(), \"expected empty value\");"
1243                        );
1244                        return;
1245                    }
1246                    _ => {} // fall through to normal handling
1247                }
1248            }
1249            _ => {}
1250        }
1251    }
1252
1253    // Skip assertions on fields that don't exist on the result type.
1254    if let Some(f) = &assertion.field {
1255        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
1256            let _ = writeln!(out, "        // skipped: field '{f}' not available on result type");
1257            return;
1258        }
1259    }
1260
1261    // Determine if this field is an enum type (no `.contains()` on enums in Java).
1262    // Check both the raw fixture field path and the resolved (aliased) path so that
1263    // `fields_enum` entries can use either form (e.g., `"assets[].category"` or the
1264    // resolved `"assets[].asset_category"`).
1265    let field_is_enum = assertion
1266        .field
1267        .as_deref()
1268        .is_some_and(|f| enum_fields.contains_key(f) || enum_fields.contains_key(field_resolver.resolve(f)));
1269
1270    // Determine if this field is an array (List<T>) — needed to choose .toString() for
1271    // contains assertions, since List.contains(Object) uses equals() which won't match
1272    // strings against complex record types like StructureItem.
1273    let field_is_array = assertion
1274        .field
1275        .as_deref()
1276        .is_some_and(|f| field_resolver.is_array(field_resolver.resolve(f)));
1277
1278    let field_expr = if result_is_simple {
1279        result_var.to_string()
1280    } else {
1281        match &assertion.field {
1282            Some(f) if !f.is_empty() => {
1283                let accessor = field_resolver.accessor(f, "java", result_var);
1284                let resolved = field_resolver.resolve(f);
1285                // Unwrap Optional fields with a type-appropriate fallback.
1286                // Map.get() returns nullable, not Optional, so skip .orElse() for map access.
1287                // NOTE: is_optional() means the field is in optional_fields, but that doesn't
1288                // guarantee it returns Optional<T> in Java — nested fields like metadata.twitterCard
1289                // return @Nullable String, not Optional<String>. We detect this by checking
1290                // if the field path contains a dot (nested access).
1291                if field_resolver.is_optional(resolved) && !field_resolver.has_map_access(f) {
1292                    // All nullable fields in the Java binding return @Nullable types, not Optional<T>.
1293                    // Wrap them in Optional.ofNullable() so e2e tests can use .orElse() fallbacks.
1294                    let optional_expr = format!("java.util.Optional.ofNullable({accessor})");
1295                    match assertion.assertion_type.as_str() {
1296                        // For not_empty / is_empty on Optional fields, return the raw Optional
1297                        // so the assertion arms can call isPresent()/isEmpty().
1298                        "not_empty" | "is_empty" => optional_expr,
1299                        // For size/count assertions on Optional<List<T>> fields, use List.of() fallback.
1300                        "count_min" | "count_equals" => {
1301                            format!("{optional_expr}.orElse(java.util.List.of())")
1302                        }
1303                        // For numeric comparisons on Optional<Long/Integer> fields, use 0L.
1304                        "greater_than" | "less_than" | "greater_than_or_equal" | "less_than_or_equal" => {
1305                            if field_resolver.is_array(resolved) {
1306                                format!("{optional_expr}.orElse(java.util.List.of())")
1307                            } else {
1308                                format!("{optional_expr}.orElse(0L)")
1309                            }
1310                        }
1311                        // For equals on Optional fields, determine fallback based on whether value is numeric.
1312                        // If the fixture value is a number, use 0L; otherwise use "".
1313                        "equals" => {
1314                            if let Some(expected) = &assertion.value {
1315                                if expected.is_number() {
1316                                    format!("{optional_expr}.orElse(0L)")
1317                                } else {
1318                                    format!("{optional_expr}.orElse(\"\")")
1319                                }
1320                            } else {
1321                                format!("{optional_expr}.orElse(\"\")")
1322                            }
1323                        }
1324                        _ if field_resolver.is_array(resolved) => {
1325                            format!("{optional_expr}.orElse(java.util.List.of())")
1326                        }
1327                        _ => format!("{optional_expr}.orElse(\"\")"),
1328                    }
1329                } else {
1330                    accessor
1331                }
1332            }
1333            _ => result_var.to_string(),
1334        }
1335    };
1336
1337    // For enum fields, string-based assertions need .getValue() to convert the enum to
1338    // its serde-serialized lowercase string value (e.g., AssetCategory.Image -> "image").
1339    // All alef-generated Java enums expose a getValue() method annotated with @JsonValue.
1340    let string_expr = if field_is_enum {
1341        format!("{field_expr}.getValue()")
1342    } else {
1343        field_expr.clone()
1344    };
1345
1346    match assertion.assertion_type.as_str() {
1347        "equals" => {
1348            if let Some(expected) = &assertion.value {
1349                let java_val = json_to_java(expected);
1350                if expected.is_string() {
1351                    let _ = writeln!(out, "        assertEquals({java_val}, {string_expr}.trim());");
1352                } else if expected.is_number() && field_expr.contains(".orElse(\"\")") {
1353                    // For numeric "equals" on Optional fields with string fallback,
1354                    // the field must be Optional<Long/Integer>, not Optional<String>.
1355                    // Replace the string fallback with a numeric one.
1356                    let fixed_expr = field_expr.replace(".orElse(\"\")", ".orElse(0L)");
1357                    let _ = writeln!(out, "        assertEquals({java_val}, {fixed_expr});");
1358                } else {
1359                    let _ = writeln!(out, "        assertEquals({java_val}, {field_expr});");
1360                }
1361            }
1362        }
1363        "contains" => {
1364            if let Some(expected) = &assertion.value {
1365                let java_val = json_to_java(expected);
1366                // For array fields of complex objects (e.g. List<StructureItem>), use .toString()
1367                // because List.contains(Object) uses equals(), which won't match a String against
1368                // a record type. Java records produce toString() like "StructureItem[kind=Function, ...]".
1369                let check_expr = if field_is_array {
1370                    format!("{string_expr}.toString()")
1371                } else {
1372                    string_expr.clone()
1373                };
1374                let _ = writeln!(
1375                    out,
1376                    "        assertTrue({check_expr}.contains({java_val}), \"expected to contain: \" + {java_val});"
1377                );
1378            }
1379        }
1380        "contains_all" => {
1381            if let Some(values) = &assertion.values {
1382                for val in values {
1383                    let java_val = json_to_java(val);
1384                    let check_expr = if field_is_array {
1385                        format!("{string_expr}.toString()")
1386                    } else {
1387                        string_expr.clone()
1388                    };
1389                    let _ = writeln!(
1390                        out,
1391                        "        assertTrue({check_expr}.contains({java_val}), \"expected to contain: \" + {java_val});"
1392                    );
1393                }
1394            }
1395        }
1396        "not_contains" => {
1397            if let Some(expected) = &assertion.value {
1398                let java_val = json_to_java(expected);
1399                let check_expr = if field_is_array {
1400                    format!("{string_expr}.toString()")
1401                } else {
1402                    string_expr.clone()
1403                };
1404                let _ = writeln!(
1405                    out,
1406                    "        assertFalse({check_expr}.contains({java_val}), \"expected NOT to contain: \" + {java_val});"
1407                );
1408            }
1409        }
1410        "not_empty" => {
1411            let _ = writeln!(
1412                out,
1413                "        assertFalse({field_expr} == null || {field_expr}.isEmpty(), \"expected non-empty value\");"
1414            );
1415        }
1416        "is_empty" => {
1417            let _ = writeln!(
1418                out,
1419                "        assertTrue({field_expr} == null || {field_expr}.isEmpty(), \"expected empty value\");"
1420            );
1421        }
1422        "contains_any" => {
1423            if let Some(values) = &assertion.values {
1424                let checks: Vec<String> = values
1425                    .iter()
1426                    .map(|v| {
1427                        let java_val = json_to_java(v);
1428                        format!("{string_expr}.contains({java_val})")
1429                    })
1430                    .collect();
1431                let joined = checks.join(" || ");
1432                let _ = writeln!(
1433                    out,
1434                    "        assertTrue({joined}, \"expected to contain at least one of the specified values\");"
1435                );
1436            }
1437        }
1438        "greater_than" => {
1439            if let Some(val) = &assertion.value {
1440                let java_val = json_to_java(val);
1441                let _ = writeln!(
1442                    out,
1443                    "        assertTrue({field_expr} > {java_val}, \"expected > {java_val}\");"
1444                );
1445            }
1446        }
1447        "less_than" => {
1448            if let Some(val) = &assertion.value {
1449                let java_val = json_to_java(val);
1450                let _ = writeln!(
1451                    out,
1452                    "        assertTrue({field_expr} < {java_val}, \"expected < {java_val}\");"
1453                );
1454            }
1455        }
1456        "greater_than_or_equal" => {
1457            if let Some(val) = &assertion.value {
1458                let java_val = json_to_java(val);
1459                let _ = writeln!(
1460                    out,
1461                    "        assertTrue({field_expr} >= {java_val}, \"expected >= {java_val}\");"
1462                );
1463            }
1464        }
1465        "less_than_or_equal" => {
1466            if let Some(val) = &assertion.value {
1467                let java_val = json_to_java(val);
1468                let _ = writeln!(
1469                    out,
1470                    "        assertTrue({field_expr} <= {java_val}, \"expected <= {java_val}\");"
1471                );
1472            }
1473        }
1474        "starts_with" => {
1475            if let Some(expected) = &assertion.value {
1476                let java_val = json_to_java(expected);
1477                let _ = writeln!(
1478                    out,
1479                    "        assertTrue({string_expr}.startsWith({java_val}), \"expected to start with: \" + {java_val});"
1480                );
1481            }
1482        }
1483        "ends_with" => {
1484            if let Some(expected) = &assertion.value {
1485                let java_val = json_to_java(expected);
1486                let _ = writeln!(
1487                    out,
1488                    "        assertTrue({string_expr}.endsWith({java_val}), \"expected to end with: \" + {java_val});"
1489                );
1490            }
1491        }
1492        "min_length" => {
1493            if let Some(val) = &assertion.value {
1494                if let Some(n) = val.as_u64() {
1495                    // byte[] uses `.length` (array field), String uses `.length()` (method).
1496                    let len_expr = if result_is_bytes {
1497                        format!("{field_expr}.length")
1498                    } else {
1499                        format!("{field_expr}.length()")
1500                    };
1501                    let _ = writeln!(
1502                        out,
1503                        "        assertTrue({len_expr} >= {n}, \"expected length >= {n}\");"
1504                    );
1505                }
1506            }
1507        }
1508        "max_length" => {
1509            if let Some(val) = &assertion.value {
1510                if let Some(n) = val.as_u64() {
1511                    let len_expr = if result_is_bytes {
1512                        format!("{field_expr}.length")
1513                    } else {
1514                        format!("{field_expr}.length()")
1515                    };
1516                    let _ = writeln!(
1517                        out,
1518                        "        assertTrue({len_expr} <= {n}, \"expected length <= {n}\");"
1519                    );
1520                }
1521            }
1522        }
1523        "count_min" => {
1524            if let Some(val) = &assertion.value {
1525                if let Some(n) = val.as_u64() {
1526                    let _ = writeln!(
1527                        out,
1528                        "        assertTrue({field_expr}.size() >= {n}, \"expected at least {n} elements\");"
1529                    );
1530                }
1531            }
1532        }
1533        "count_equals" => {
1534            if let Some(val) = &assertion.value {
1535                if let Some(n) = val.as_u64() {
1536                    let _ = writeln!(
1537                        out,
1538                        "        assertEquals({n}, {field_expr}.size(), \"expected exactly {n} elements\");"
1539                    );
1540                }
1541            }
1542        }
1543        "is_true" => {
1544            let _ = writeln!(out, "        assertTrue({field_expr}, \"expected true\");");
1545        }
1546        "is_false" => {
1547            let _ = writeln!(out, "        assertFalse({field_expr}, \"expected false\");");
1548        }
1549        "method_result" => {
1550            if let Some(method_name) = &assertion.method {
1551                let call_expr = build_java_method_call(result_var, method_name, assertion.args.as_ref(), class_name);
1552                let check = assertion.check.as_deref().unwrap_or("is_true");
1553                // Methods that return a collection (List) rather than a scalar.
1554                let method_returns_collection =
1555                    matches!(method_name.as_str(), "find_nodes_by_type" | "findNodesByType");
1556                match check {
1557                    "equals" => {
1558                        if let Some(val) = &assertion.value {
1559                            if val.is_boolean() {
1560                                if val.as_bool() == Some(true) {
1561                                    let _ = writeln!(out, "        assertTrue({call_expr});");
1562                                } else {
1563                                    let _ = writeln!(out, "        assertFalse({call_expr});");
1564                                }
1565                            } else if method_returns_collection {
1566                                let java_val = json_to_java(val);
1567                                let _ = writeln!(out, "        assertEquals({java_val}, {call_expr}.size());");
1568                            } else {
1569                                let java_val = json_to_java(val);
1570                                let _ = writeln!(out, "        assertEquals({java_val}, {call_expr});");
1571                            }
1572                        }
1573                    }
1574                    "is_true" => {
1575                        let _ = writeln!(out, "        assertTrue({call_expr});");
1576                    }
1577                    "is_false" => {
1578                        let _ = writeln!(out, "        assertFalse({call_expr});");
1579                    }
1580                    "greater_than_or_equal" => {
1581                        if let Some(val) = &assertion.value {
1582                            let n = val.as_u64().unwrap_or(0);
1583                            let _ = writeln!(out, "        assertTrue({call_expr} >= {n}, \"expected >= {n}\");");
1584                        }
1585                    }
1586                    "count_min" => {
1587                        if let Some(val) = &assertion.value {
1588                            let n = val.as_u64().unwrap_or(0);
1589                            let _ = writeln!(
1590                                out,
1591                                "        assertTrue({call_expr}.size() >= {n}, \"expected at least {n} elements\");"
1592                            );
1593                        }
1594                    }
1595                    "is_error" => {
1596                        let _ = writeln!(out, "        assertThrows(Exception.class, () -> {{ {call_expr}; }});");
1597                    }
1598                    "contains" => {
1599                        if let Some(val) = &assertion.value {
1600                            let java_val = json_to_java(val);
1601                            let _ = writeln!(
1602                                out,
1603                                "        assertTrue({call_expr}.contains({java_val}), \"expected to contain: \" + {java_val});"
1604                            );
1605                        }
1606                    }
1607                    other_check => {
1608                        panic!("Java e2e generator: unsupported method_result check type: {other_check}");
1609                    }
1610                }
1611            } else {
1612                panic!("Java e2e generator: method_result assertion missing 'method' field");
1613            }
1614        }
1615        "matches_regex" => {
1616            if let Some(expected) = &assertion.value {
1617                let java_val = json_to_java(expected);
1618                let _ = writeln!(
1619                    out,
1620                    "        assertTrue({string_expr}.matches({java_val}), \"expected value to match regex: \" + {java_val});"
1621                );
1622            }
1623        }
1624        "not_error" => {
1625            // Already handled by the call succeeding without exception.
1626        }
1627        "error" => {
1628            // Handled at the test method level.
1629        }
1630        other => {
1631            panic!("Java e2e generator: unsupported assertion type: {other}");
1632        }
1633    }
1634}
1635
1636/// Build a Java call expression for a `method_result` assertion on a tree-sitter Tree.
1637///
1638/// Maps method names to the appropriate Java static/instance method calls.
1639fn build_java_method_call(
1640    result_var: &str,
1641    method_name: &str,
1642    args: Option<&serde_json::Value>,
1643    class_name: &str,
1644) -> String {
1645    match method_name {
1646        "root_child_count" => format!("{result_var}.rootNode().childCount()"),
1647        "root_node_type" => format!("{result_var}.rootNode().kind()"),
1648        "named_children_count" => format!("{result_var}.rootNode().namedChildCount()"),
1649        "has_error_nodes" => format!("{class_name}.treeHasErrorNodes({result_var})"),
1650        "error_count" | "tree_error_count" => format!("{class_name}.treeErrorCount({result_var})"),
1651        "tree_to_sexp" => format!("{class_name}.treeToSexp({result_var})"),
1652        "contains_node_type" => {
1653            let node_type = args
1654                .and_then(|a| a.get("node_type"))
1655                .and_then(|v| v.as_str())
1656                .unwrap_or("");
1657            format!("{class_name}.treeContainsNodeType({result_var}, \"{node_type}\")")
1658        }
1659        "find_nodes_by_type" => {
1660            let node_type = args
1661                .and_then(|a| a.get("node_type"))
1662                .and_then(|v| v.as_str())
1663                .unwrap_or("");
1664            format!("{class_name}.findNodesByType({result_var}, \"{node_type}\")")
1665        }
1666        "run_query" => {
1667            let query_source = args
1668                .and_then(|a| a.get("query_source"))
1669                .and_then(|v| v.as_str())
1670                .unwrap_or("");
1671            let language = args
1672                .and_then(|a| a.get("language"))
1673                .and_then(|v| v.as_str())
1674                .unwrap_or("");
1675            let escaped_query = escape_java(query_source);
1676            format!("{class_name}.runQuery({result_var}, \"{language}\", \"{escaped_query}\", source)")
1677        }
1678        _ => {
1679            format!("{result_var}.{}()", method_name.to_lower_camel_case())
1680        }
1681    }
1682}
1683
1684/// Convert a `serde_json::Value` to a Java literal string.
1685fn json_to_java(value: &serde_json::Value) -> String {
1686    json_to_java_typed(value, None)
1687}
1688
1689/// Convert a JSON value to a Java literal, optionally overriding number type for array elements.
1690/// `element_type` controls how numeric array elements are emitted: "f32" → `1.0f`, otherwise `1.0d`.
1691/// Emit Java batch item constructors for BatchBytesItem or BatchFileItem arrays.
1692fn emit_java_batch_item_array(arr: &serde_json::Value, elem_type: &str) -> String {
1693    if let Some(items) = arr.as_array() {
1694        let item_strs: Vec<String> = items
1695            .iter()
1696            .filter_map(|item| {
1697                if let Some(obj) = item.as_object() {
1698                    match elem_type {
1699                        "BatchBytesItem" => {
1700                            let content = obj.get("content").and_then(|v| v.as_array());
1701                            let mime_type = obj.get("mime_type").and_then(|v| v.as_str()).unwrap_or("text/plain");
1702                            let content_code = if let Some(arr) = content {
1703                                let bytes: Vec<String> = arr
1704                                    .iter()
1705                                    .filter_map(|v| v.as_u64().map(|n| format!("(byte) {}", n)))
1706                                    .collect();
1707                                format!("new byte[] {{{}}}", bytes.join(", "))
1708                            } else {
1709                                "new byte[] {}".to_string()
1710                            };
1711                            Some(format!("new {}({}, \"{}\", null)", elem_type, content_code, mime_type))
1712                        }
1713                        "BatchFileItem" => {
1714                            let path = obj.get("path").and_then(|v| v.as_str()).unwrap_or("");
1715                            Some(format!(
1716                                "new {}(java.nio.file.Paths.get(\"{}\"), null)",
1717                                elem_type, path
1718                            ))
1719                        }
1720                        _ => None,
1721                    }
1722                } else {
1723                    None
1724                }
1725            })
1726            .collect();
1727        format!("java.util.Arrays.asList({})", item_strs.join(", "))
1728    } else {
1729        "java.util.List.of()".to_string()
1730    }
1731}
1732
1733fn json_to_java_typed(value: &serde_json::Value, element_type: Option<&str>) -> String {
1734    match value {
1735        serde_json::Value::String(s) => format!("\"{}\"", escape_java(s)),
1736        serde_json::Value::Bool(b) => b.to_string(),
1737        serde_json::Value::Number(n) => {
1738            if n.is_f64() {
1739                match element_type {
1740                    Some("f32" | "float" | "Float") => format!("{}f", n),
1741                    _ => format!("{}d", n),
1742                }
1743            } else {
1744                n.to_string()
1745            }
1746        }
1747        serde_json::Value::Null => "null".to_string(),
1748        serde_json::Value::Array(arr) => {
1749            let items: Vec<String> = arr.iter().map(|v| json_to_java_typed(v, element_type)).collect();
1750            format!("java.util.List.of({})", items.join(", "))
1751        }
1752        serde_json::Value::Object(_) => {
1753            let json_str = serde_json::to_string(value).unwrap_or_default();
1754            format!("\"{}\"", escape_java(&json_str))
1755        }
1756    }
1757}
1758
1759/// Generate a Java builder expression for a JSON object.
1760/// E.g., `obj = {"language": "abl", "chunk_max_size": 50}`
1761/// becomes: `TypeName.builder().withLanguage("abl").withChunkMaxSize(50L).build()`
1762///
1763/// For enums: emit `EnumType.VariantName` (detected via camelCase lookup in enum_fields)
1764/// For strings and bools: use the value directly
1765/// For plain numbers: emit the literal with type suffix (long uses L, double uses d)
1766/// For nested objects: recurse with Options suffix
1767/// When `nested_types_optional` is false, nested builders are passed directly without
1768/// Optional.of() wrapping, allowing non-optional nested config types.
1769fn java_builder_expression(
1770    obj: &serde_json::Map<String, serde_json::Value>,
1771    type_name: &str,
1772    enum_fields: &std::collections::HashMap<String, String>,
1773    nested_types: &std::collections::HashMap<String, String>,
1774    nested_types_optional: bool,
1775    path_fields: &[String],
1776) -> String {
1777    let mut expr = format!("{}.builder()", type_name);
1778    for (key, val) in obj {
1779        // Convert snake_case key to camelCase for method name
1780        let camel_key = key.to_lower_camel_case();
1781        let method_name = format!("with{}", camel_key.to_upper_camel_case());
1782
1783        let java_val = match val {
1784            serde_json::Value::String(s) => {
1785                // Check if this field is an enum type by looking up in enum_fields.
1786                // enum_fields is keyed by camelCase names (e.g., "codeBlockStyle"), not snake_case.
1787                if let Some(enum_type_name) = enum_fields.get(&camel_key) {
1788                    // Enum field: use the mapped enum type name from the config
1789                    let variant_name = s.to_upper_camel_case();
1790                    format!("{}.{}", enum_type_name, variant_name)
1791                } else if camel_key == "preset" && type_name == "PreprocessingOptions" {
1792                    // Special case: preset field in PreprocessingOptions maps to PreprocessingPreset
1793                    let variant_name = s.to_upper_camel_case();
1794                    format!("PreprocessingPreset.{}", variant_name)
1795                } else if path_fields.contains(key) {
1796                    // Path field: wrap in Optional.of(java.nio.file.Path.of(...))
1797                    format!("Optional.of(java.nio.file.Path.of(\"{}\"))", escape_java(s))
1798                } else {
1799                    // String field: emit as a quoted literal
1800                    format!("\"{}\"", escape_java(s))
1801                }
1802            }
1803            serde_json::Value::Bool(b) => b.to_string(),
1804            serde_json::Value::Null => "null".to_string(),
1805            serde_json::Value::Number(n) => {
1806                // Number field: emit literal with type suffix.
1807                // Java records/classes use either `long` (primitive, not nullable) or
1808                // `Optional<Long>` (nullable). The codegen wraps in `Optional.of(...)`
1809                // by default since most options builder fields are Optional, but several
1810                // record types (e.g. SecurityLimits) use primitive `long` throughout.
1811                // Skip the wrap for: (a) known-primitive top-level fields and (b) any
1812                // method on a record type whose builder methods take primitives only.
1813                let camel_key = key.to_lower_camel_case();
1814                let is_plain_field = matches!(camel_key.as_str(), "listIndentWidth" | "wrapWidth");
1815                // Builders for typed-record nested config classes use primitives
1816                // throughout — they're not the optional-options pattern.
1817                let is_primitive_builder = matches!(type_name, "SecurityLimits" | "SecurityLimitsBuilder");
1818
1819                if is_plain_field || is_primitive_builder {
1820                    // Plain numeric field: no Optional wrapper
1821                    if n.is_f64() {
1822                        format!("{}d", n)
1823                    } else {
1824                        format!("{}L", n)
1825                    }
1826                } else {
1827                    // Optional numeric field: wrap in Optional.of()
1828                    if n.is_f64() {
1829                        format!("Optional.of({}d)", n)
1830                    } else {
1831                        format!("Optional.of({}L)", n)
1832                    }
1833                }
1834            }
1835            serde_json::Value::Array(arr) => {
1836                let items: Vec<String> = arr.iter().map(|v| json_to_java_typed(v, None)).collect();
1837                format!("java.util.List.of({})", items.join(", "))
1838            }
1839            serde_json::Value::Object(nested) => {
1840                // Recurse with the type from nested_types mapping, or default to snake_case → PascalCase + "Options".
1841                let nested_type = nested_types
1842                    .get(key.as_str())
1843                    .cloned()
1844                    .unwrap_or_else(|| format!("{}Options", key.to_upper_camel_case()));
1845                let inner = java_builder_expression(
1846                    nested,
1847                    &nested_type,
1848                    enum_fields,
1849                    nested_types,
1850                    nested_types_optional,
1851                    &[],
1852                );
1853                // Top-level config builders (e.g. ExtractionConfigBuilder) declare nested
1854                // record fields as `Optional<T>` (since they are nullable). Primitive-fields
1855                // builders (SecurityLimitsBuilder etc.) take the bare type directly.
1856                let is_primitive_builder = matches!(type_name, "SecurityLimits" | "SecurityLimitsBuilder");
1857                if is_primitive_builder || !nested_types_optional {
1858                    inner
1859                } else {
1860                    format!("Optional.of({inner})")
1861                }
1862            }
1863        };
1864        expr.push_str(&format!(".{}({})", method_name, java_val));
1865    }
1866    expr.push_str(".build()");
1867    expr
1868}
1869
1870/// Build default nested type mappings for Java extraction config types.
1871///
1872/// Maps known Kreuzberg/Kreuzcrawl config field names (in snake_case) to their
1873/// Java record type names (in PascalCase). These defaults allow e2e codegen to
1874/// automatically deserialize nested config objects without requiring explicit
1875/// configuration in alef.toml. User-provided overrides take precedence.
1876fn default_java_nested_types() -> std::collections::HashMap<String, String> {
1877    [
1878        ("chunking", "ChunkingConfig"),
1879        ("ocr", "OcrConfig"),
1880        ("images", "ImageExtractionConfig"),
1881        ("html_output", "HtmlOutputConfig"),
1882        ("language_detection", "LanguageDetectionConfig"),
1883        ("postprocessor", "PostProcessorConfig"),
1884        ("acceleration", "AccelerationConfig"),
1885        ("email", "EmailConfig"),
1886        ("pages", "PageConfig"),
1887        ("pdf_options", "PdfConfig"),
1888        ("layout", "LayoutDetectionConfig"),
1889        ("tree_sitter", "TreeSitterConfig"),
1890        ("structured_extraction", "StructuredExtractionConfig"),
1891        ("content_filter", "ContentFilterConfig"),
1892        ("token_reduction", "TokenReductionOptions"),
1893        ("security_limits", "SecurityLimits"),
1894    ]
1895    .iter()
1896    .map(|(k, v)| (k.to_string(), v.to_string()))
1897    .collect()
1898}
1899
1900// ---------------------------------------------------------------------------
1901// Import collection helpers
1902// ---------------------------------------------------------------------------
1903
1904/// Recursively collect enum types and nested option types used in a builder expression.
1905/// Enums are keyed in the enum_fields map by camelCase names (e.g., "codeBlockStyle" → "CodeBlockStyle").
1906fn collect_enum_and_nested_types(
1907    obj: &serde_json::Map<String, serde_json::Value>,
1908    enum_fields: &std::collections::HashMap<String, String>,
1909    types_out: &mut std::collections::BTreeSet<String>,
1910) {
1911    for (key, val) in obj {
1912        // enum_fields is keyed by camelCase, not snake_case.
1913        let camel_key = key.to_lower_camel_case();
1914        if let Some(enum_type) = enum_fields.get(&camel_key) {
1915            // Add the enum type from the mapping (e.g., "CodeBlockStyle").
1916            types_out.insert(enum_type.clone());
1917        } else if camel_key == "preset" {
1918            // Special case: preset field uses PreprocessingPreset enum.
1919            types_out.insert("PreprocessingPreset".to_string());
1920        }
1921        // Recurse into nested objects to find their nested enum types.
1922        if let Some(nested) = val.as_object() {
1923            collect_enum_and_nested_types(nested, enum_fields, types_out);
1924        }
1925    }
1926}
1927
1928fn collect_nested_type_names(
1929    obj: &serde_json::Map<String, serde_json::Value>,
1930    nested_types: &std::collections::HashMap<String, String>,
1931    types_out: &mut std::collections::BTreeSet<String>,
1932) {
1933    for (key, val) in obj {
1934        if let Some(type_name) = nested_types.get(key.as_str()) {
1935            types_out.insert(type_name.clone());
1936        }
1937        if let Some(nested) = val.as_object() {
1938            collect_nested_type_names(nested, nested_types, types_out);
1939        }
1940    }
1941}
1942
1943// ---------------------------------------------------------------------------
1944// Visitor generation
1945// ---------------------------------------------------------------------------
1946
1947/// Build a Java visitor class and add setup lines. Returns the visitor variable name.
1948fn build_java_visitor(
1949    setup_lines: &mut Vec<String>,
1950    visitor_spec: &crate::fixture::VisitorSpec,
1951    class_name: &str,
1952) -> String {
1953    setup_lines.push("class _TestVisitor implements Visitor {".to_string());
1954    for (method_name, action) in &visitor_spec.callbacks {
1955        emit_java_visitor_method(setup_lines, method_name, action, class_name);
1956    }
1957    setup_lines.push("}".to_string());
1958    setup_lines.push("var visitor = new _TestVisitor();".to_string());
1959    "visitor".to_string()
1960}
1961
1962/// Emit a Java visitor method for a callback action.
1963fn emit_java_visitor_method(
1964    setup_lines: &mut Vec<String>,
1965    method_name: &str,
1966    action: &CallbackAction,
1967    _class_name: &str,
1968) {
1969    let camel_method = method_to_camel(method_name);
1970    let params = match method_name {
1971        "visit_link" => "NodeContext ctx, String href, String text, String title",
1972        "visit_image" => "NodeContext ctx, String src, String alt, String title",
1973        "visit_heading" => "NodeContext ctx, int level, String text, String id",
1974        "visit_code_block" => "NodeContext ctx, String lang, String code",
1975        "visit_code_inline"
1976        | "visit_strong"
1977        | "visit_emphasis"
1978        | "visit_strikethrough"
1979        | "visit_underline"
1980        | "visit_subscript"
1981        | "visit_superscript"
1982        | "visit_mark"
1983        | "visit_button"
1984        | "visit_summary"
1985        | "visit_figcaption"
1986        | "visit_definition_term"
1987        | "visit_definition_description" => "NodeContext ctx, String text",
1988        "visit_text" => "NodeContext ctx, String text",
1989        "visit_list_item" => "NodeContext ctx, boolean ordered, String marker, String text",
1990        "visit_blockquote" => "NodeContext ctx, String content, long depth",
1991        "visit_table_row" => "NodeContext ctx, java.util.List<String> cells, boolean isHeader",
1992        "visit_custom_element" => "NodeContext ctx, String tagName, String html",
1993        "visit_form" => "NodeContext ctx, String actionUrl, String method",
1994        "visit_input" => "NodeContext ctx, String inputType, String name, String value",
1995        "visit_audio" | "visit_video" | "visit_iframe" => "NodeContext ctx, String src",
1996        "visit_details" => "NodeContext ctx, boolean isOpen",
1997        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
1998            "NodeContext ctx, String output"
1999        }
2000        "visit_list_start" => "NodeContext ctx, boolean ordered",
2001        "visit_list_end" => "NodeContext ctx, boolean ordered, String output",
2002        _ => "NodeContext ctx",
2003    };
2004
2005    setup_lines.push(format!("    @Override public VisitResult {camel_method}({params}) {{"));
2006    match action {
2007        CallbackAction::Skip => {
2008            setup_lines.push("        return VisitResult.skip();".to_string());
2009        }
2010        CallbackAction::Continue => {
2011            setup_lines.push("        return VisitResult.continue_();".to_string());
2012        }
2013        CallbackAction::PreserveHtml => {
2014            setup_lines.push("        return VisitResult.preserveHtml();".to_string());
2015        }
2016        CallbackAction::Custom { output } => {
2017            let escaped = escape_java(output);
2018            setup_lines.push(format!("        return VisitResult.custom(\"{escaped}\");"));
2019        }
2020        CallbackAction::CustomTemplate { template } => {
2021            // Extract {placeholder} names from the template (in order of appearance).
2022            // Convert each snake_case placeholder to the camelCase Java variable name,
2023            // then replace each {placeholder} with %s for String.format.
2024            let mut format_str = String::with_capacity(template.len());
2025            let mut format_args: Vec<String> = Vec::new();
2026            let mut chars = template.chars().peekable();
2027            while let Some(ch) = chars.next() {
2028                if ch == '{' {
2029                    // Collect identifier chars until '}'.
2030                    let mut name = String::new();
2031                    let mut closed = false;
2032                    for inner in chars.by_ref() {
2033                        if inner == '}' {
2034                            closed = true;
2035                            break;
2036                        }
2037                        name.push(inner);
2038                    }
2039                    if closed && !name.is_empty() && name.chars().all(|c| c.is_alphanumeric() || c == '_') {
2040                        let camel_name = name.as_str().to_lower_camel_case();
2041                        format_args.push(camel_name);
2042                        format_str.push_str("%s");
2043                    } else {
2044                        // Not a simple placeholder — emit literally.
2045                        format_str.push('{');
2046                        format_str.push_str(&name);
2047                        if closed {
2048                            format_str.push('}');
2049                        }
2050                    }
2051                } else {
2052                    format_str.push(ch);
2053                }
2054            }
2055            let escaped = escape_java(&format_str);
2056            if format_args.is_empty() {
2057                setup_lines.push(format!("        return VisitResult.custom(\"{escaped}\");"));
2058            } else {
2059                let args_str = format_args.join(", ");
2060                setup_lines.push(format!(
2061                    "        return VisitResult.custom(String.format(\"{escaped}\", {args_str}));"
2062                ));
2063            }
2064        }
2065    }
2066    setup_lines.push("    }".to_string());
2067}
2068
2069/// Convert snake_case method names to Java camelCase.
2070fn method_to_camel(snake: &str) -> String {
2071    snake.to_lower_camel_case()
2072}