Skip to main content

alef_e2e/codegen/
java.rs

1//! Java e2e test generator using JUnit 5.
2//!
3//! Generates `e2e/java/pom.xml` and `src/test/java/dev/kreuzberg/e2e/{Category}Test.java`
4//! files from JSON fixtures, driven entirely by `E2eConfig` and `CallConfig`.
5
6use crate::config::E2eConfig;
7use crate::escape::{escape_java, sanitize_filename};
8use crate::field_access::FieldResolver;
9use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, HttpFixture};
10use alef_core::backend::GeneratedFile;
11use alef_core::config::ResolvedCrateConfig;
12use alef_core::hash::{self, CommentStyle};
13use alef_core::template_versions as tv;
14use anyhow::Result;
15use heck::{ToLowerCamelCase, ToUpperCamelCase};
16use std::path::PathBuf;
17
18use super::E2eCodegen;
19use super::client;
20
21/// Java e2e code generator.
22pub struct JavaCodegen;
23
24impl E2eCodegen for JavaCodegen {
25    fn generate(
26        &self,
27        groups: &[FixtureGroup],
28        e2e_config: &E2eConfig,
29        config: &ResolvedCrateConfig,
30    ) -> Result<Vec<GeneratedFile>> {
31        let lang = self.language_name();
32        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
33
34        let mut files = Vec::new();
35
36        // Resolve call config with overrides.
37        let call = &e2e_config.call;
38        let overrides = call.overrides.get(lang);
39        let _module_path = overrides
40            .and_then(|o| o.module.as_ref())
41            .cloned()
42            .unwrap_or_else(|| call.module.clone());
43        let function_name = overrides
44            .and_then(|o| o.function.as_ref())
45            .cloned()
46            .unwrap_or_else(|| call.function.clone());
47        let class_name = overrides
48            .and_then(|o| o.class.as_ref())
49            .cloned()
50            .unwrap_or_else(|| config.name.to_upper_camel_case());
51        let result_is_simple = overrides.is_some_and(|o| o.result_is_simple);
52        let result_var = &call.result_var;
53
54        // Resolve package config.
55        let java_pkg = e2e_config.resolve_package("java");
56        let pkg_name = java_pkg
57            .as_ref()
58            .and_then(|p| p.name.as_ref())
59            .cloned()
60            .unwrap_or_else(|| config.name.clone());
61
62        // Resolve Java package info for the dependency.
63        let java_group_id = config.java_group_id();
64        let pkg_version = config.resolved_version().unwrap_or_else(|| "0.1.0".to_string());
65
66        // Generate pom.xml.
67        files.push(GeneratedFile {
68            path: output_base.join("pom.xml"),
69            content: render_pom_xml(&pkg_name, &java_group_id, &pkg_version, e2e_config.dep_mode),
70            generated_header: false,
71        });
72
73        // Generate test files per category. Path mirrors the configured Java
74        // package — `dev.myorg` becomes `dev/myorg`, etc. — so the package
75        // declaration in each test file matches its filesystem location.
76        let mut test_base = output_base.join("src").join("test").join("java");
77        for segment in java_group_id.split('.') {
78            test_base = test_base.join(segment);
79        }
80        let test_base = test_base.join("e2e");
81
82        // Resolve options_type from override.
83        let options_type = overrides.and_then(|o| o.options_type.clone());
84
85        // Get Java-specific enum_fields from override (required for correct enum handling).
86        let empty_enum_fields = std::collections::HashMap::new();
87        let java_enum_fields = overrides.as_ref().map(|o| &o.enum_fields).unwrap_or(&empty_enum_fields);
88
89        // Build effective nested_types by merging defaults with configured overrides.
90        let mut effective_nested_types = default_java_nested_types();
91        if let Some(overrides_map) = overrides.map(|o| &o.nested_types) {
92            effective_nested_types.extend(overrides_map.clone());
93        }
94
95        // Resolve nested_types_optional from override (defaults to true for backward compatibility).
96        let nested_types_optional = overrides.map(|o| o.nested_types_optional).unwrap_or(true);
97
98        let field_resolver = FieldResolver::new(
99            &e2e_config.fields,
100            &e2e_config.fields_optional,
101            &e2e_config.result_fields,
102            &e2e_config.fields_array,
103            &std::collections::HashSet::new(),
104        );
105
106        for group in groups {
107            let active: Vec<&Fixture> = group
108                .fixtures
109                .iter()
110                .filter(|f| super::should_include_fixture(f, lang, e2e_config))
111                .collect();
112
113            if active.is_empty() {
114                continue;
115            }
116
117            let class_file_name = format!("{}Test.java", sanitize_filename(&group.category).to_upper_camel_case());
118            let content = render_test_file(
119                &group.category,
120                &active,
121                &class_name,
122                &function_name,
123                &java_group_id,
124                result_var,
125                &e2e_config.call.args,
126                options_type.as_deref(),
127                &field_resolver,
128                result_is_simple,
129                java_enum_fields,
130                e2e_config,
131                &effective_nested_types,
132                nested_types_optional,
133            );
134            files.push(GeneratedFile {
135                path: test_base.join(class_file_name),
136                content,
137                generated_header: true,
138            });
139        }
140
141        Ok(files)
142    }
143
144    fn language_name(&self) -> &'static str {
145        "java"
146    }
147}
148
149// ---------------------------------------------------------------------------
150// Rendering
151// ---------------------------------------------------------------------------
152
153fn render_pom_xml(
154    pkg_name: &str,
155    java_group_id: &str,
156    pkg_version: &str,
157    dep_mode: crate::config::DependencyMode,
158) -> String {
159    // pkg_name may be in "groupId:artifactId" Maven format; split accordingly.
160    let (dep_group_id, dep_artifact_id) = if let Some((g, a)) = pkg_name.split_once(':') {
161        (g, a)
162    } else {
163        (java_group_id, pkg_name)
164    };
165    let artifact_id = format!("{dep_artifact_id}-e2e-java");
166    let dep_block = match dep_mode {
167        crate::config::DependencyMode::Registry => {
168            format!(
169                r#"        <dependency>
170            <groupId>{dep_group_id}</groupId>
171            <artifactId>{dep_artifact_id}</artifactId>
172            <version>{pkg_version}</version>
173        </dependency>"#
174            )
175        }
176        crate::config::DependencyMode::Local => {
177            format!(
178                r#"        <dependency>
179            <groupId>{dep_group_id}</groupId>
180            <artifactId>{dep_artifact_id}</artifactId>
181            <version>{pkg_version}</version>
182            <scope>system</scope>
183            <systemPath>${{project.basedir}}/../../packages/java/target/{dep_artifact_id}-{pkg_version}.jar</systemPath>
184        </dependency>"#
185            )
186        }
187    };
188    crate::template_env::render(
189        "java/pom.xml.jinja",
190        minijinja::context! {
191            artifact_id => artifact_id,
192            java_group_id => java_group_id,
193            dep_block => dep_block,
194            junit_version => tv::maven::JUNIT,
195            jackson_version => tv::maven::JACKSON_E2E,
196            build_helper_version => tv::maven::BUILD_HELPER_MAVEN_PLUGIN,
197            maven_surefire_version => tv::maven::MAVEN_SUREFIRE_PLUGIN_E2E,
198        },
199    )
200}
201
202#[allow(clippy::too_many_arguments)]
203fn render_test_file(
204    category: &str,
205    fixtures: &[&Fixture],
206    class_name: &str,
207    function_name: &str,
208    java_group_id: &str,
209    result_var: &str,
210    args: &[crate::config::ArgMapping],
211    options_type: Option<&str>,
212    field_resolver: &FieldResolver,
213    result_is_simple: bool,
214    enum_fields: &std::collections::HashMap<String, String>,
215    e2e_config: &E2eConfig,
216    nested_types: &std::collections::HashMap<String, String>,
217    nested_types_optional: bool,
218) -> String {
219    let header = hash::header(CommentStyle::DoubleSlash);
220    let test_class_name = format!("{}Test", sanitize_filename(category).to_upper_camel_case());
221
222    // If the class_name is fully qualified (contains '.'), import it and use
223    // only the simple name for method calls.  Otherwise use it as-is.
224    let (import_path, simple_class) = if class_name.contains('.') {
225        let simple = class_name.rsplit('.').next().unwrap_or(class_name);
226        (class_name, simple)
227    } else {
228        ("", class_name)
229    };
230
231    // Check if any fixture (with its resolved call) will emit MAPPER usage.
232    let lang_for_om = "java";
233    let needs_object_mapper_for_handle = fixtures.iter().any(|f| {
234        args.iter().filter(|a| a.arg_type == "handle").any(|a| {
235            let v = f.input.get(&a.field).unwrap_or(&serde_json::Value::Null);
236            !(v.is_null() || v.is_object() && v.as_object().is_some_and(|o| o.is_empty()))
237        })
238    });
239    // HTTP fixtures always need ObjectMapper for JSON body comparison.
240    let has_http_fixtures = fixtures.iter().any(|f| f.http.is_some());
241    let needs_object_mapper = needs_object_mapper_for_handle || has_http_fixtures;
242
243    // Collect all options_type values used (class-level + per-fixture call overrides).
244    let mut all_options_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
245    if let Some(t) = options_type {
246        all_options_types.insert(t.to_string());
247    }
248    for f in fixtures.iter() {
249        let call_cfg = e2e_config.resolve_call(f.call.as_deref());
250        if let Some(ov) = call_cfg.overrides.get(lang_for_om) {
251            if let Some(t) = &ov.options_type {
252                all_options_types.insert(t.clone());
253            }
254        }
255        // Detect batch item types used in this fixture
256        for arg in &call_cfg.args {
257            if let Some(elem_type) = &arg.element_type {
258                if elem_type == "BatchBytesItem" || elem_type == "BatchFileItem" {
259                    all_options_types.insert(elem_type.clone());
260                }
261            }
262        }
263    }
264
265    // Collect all enum types used in builder expressions across all fixtures.
266    let mut enum_types_used: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
267    // Collect nested config types actually referenced in fixture builder expressions
268    let mut nested_types_used: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
269    for f in fixtures.iter() {
270        let call_cfg = e2e_config.resolve_call(f.call.as_deref());
271        for arg in &call_cfg.args {
272            if arg.arg_type == "json_object" {
273                let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
274                if let Some(val) = f.input.get(field) {
275                    if !val.is_null() && !val.is_array() {
276                        if let Some(obj) = val.as_object() {
277                            collect_enum_and_nested_types(obj, enum_fields, &mut enum_types_used);
278                            collect_nested_type_names(obj, nested_types, &mut nested_types_used);
279                        }
280                    }
281                }
282            }
283        }
284    }
285
286    // Build imports list
287    let mut imports: Vec<String> = Vec::new();
288    imports.push("import org.junit.jupiter.api.Test;".to_string());
289    imports.push("import static org.junit.jupiter.api.Assertions.*;".to_string());
290
291    if !import_path.is_empty() {
292        imports.push(format!("import {import_path};"));
293    }
294
295    if needs_object_mapper {
296        imports.push("import com.fasterxml.jackson.databind.ObjectMapper;".to_string());
297        imports.push("import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;".to_string());
298    }
299
300    // Import all options types used across fixtures (for builder expressions and MAPPER).
301    if !all_options_types.is_empty() {
302        let opts_pkg = if !import_path.is_empty() {
303            import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("")
304        } else {
305            ""
306        };
307        for opts_type in &all_options_types {
308            let qualified = if opts_pkg.is_empty() {
309                opts_type.clone()
310            } else {
311                format!("{opts_pkg}.{opts_type}")
312            };
313            imports.push(format!("import {qualified};"));
314        }
315    }
316
317    // Import all enum types used in builder expressions
318    if !enum_types_used.is_empty() && !import_path.is_empty() {
319        let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
320        for enum_type in &enum_types_used {
321            imports.push(format!("import {binding_pkg}.{enum_type};"));
322        }
323    }
324
325    // Import nested options types
326    if !nested_types_used.is_empty() && !import_path.is_empty() {
327        let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
328        for type_name in &nested_types_used {
329            imports.push(format!("import {binding_pkg}.{type_name};"));
330        }
331    }
332
333    // Import CrawlConfig when handle args need JSON deserialization.
334    if needs_object_mapper_for_handle && !import_path.is_empty() {
335        let pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
336        imports.push(format!("import {pkg}.CrawlConfig;"));
337    }
338
339    // Import visitor types when any fixture uses visitor callbacks.
340    let has_visitor_fixtures = fixtures.iter().any(|f| f.visitor.is_some());
341    if has_visitor_fixtures && !import_path.is_empty() {
342        let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
343        if !binding_pkg.is_empty() {
344            imports.push(format!("import {binding_pkg}.Visitor;"));
345            imports.push(format!("import {binding_pkg}.NodeContext;"));
346            imports.push(format!("import {binding_pkg}.VisitResult;"));
347        }
348    }
349
350    // Import Optional when using builder expressions with optional fields
351    if !all_options_types.is_empty() {
352        imports.push("import java.util.Optional;".to_string());
353    }
354
355    // Render all test methods
356    let mut fixtures_body = String::new();
357    for (i, fixture) in fixtures.iter().enumerate() {
358        render_test_method(
359            &mut fixtures_body,
360            fixture,
361            simple_class,
362            function_name,
363            result_var,
364            args,
365            options_type,
366            field_resolver,
367            result_is_simple,
368            enum_fields,
369            e2e_config,
370            nested_types,
371            nested_types_optional,
372        );
373        if i + 1 < fixtures.len() {
374            fixtures_body.push('\n');
375        }
376    }
377
378    // Render template
379    crate::template_env::render(
380        "java/test_file.jinja",
381        minijinja::context! {
382            header => header,
383            java_group_id => java_group_id,
384            test_class_name => test_class_name,
385            category => category,
386            imports => imports,
387            needs_object_mapper => needs_object_mapper,
388            fixtures_body => fixtures_body,
389        },
390    )
391}
392
393// ---------------------------------------------------------------------------
394// HTTP test rendering — shared-driver integration
395// ---------------------------------------------------------------------------
396
397/// Thin renderer that emits JUnit 5 test methods targeting a mock server via
398/// `java.net.http.HttpClient`. Satisfies [`client::TestClientRenderer`] so the
399/// shared [`client::http_call::render_http_test`] driver drives the call sequence.
400struct JavaTestClientRenderer;
401
402impl client::TestClientRenderer for JavaTestClientRenderer {
403    fn language_name(&self) -> &'static str {
404        "java"
405    }
406
407    /// Convert a fixture id to the UpperCamelCase suffix appended to `test`.
408    ///
409    /// The emitted method name is `test{fn_name}`, matching the pre-existing shape.
410    fn sanitize_test_name(&self, id: &str) -> String {
411        id.to_upper_camel_case()
412    }
413
414    /// Emit `@Test void test{fn_name}() throws Exception {`.
415    ///
416    /// When `skip_reason` is `Some`, the body is a single
417    /// `Assumptions.assumeTrue(false, ...)` call and `render_test_close` closes
418    /// the brace symmetrically.
419    fn render_test_open(&self, out: &mut String, fn_name: &str, description: &str, skip_reason: Option<&str>) {
420        let escaped_reason = skip_reason.map(escape_java);
421        let rendered = crate::template_env::render(
422            "java/http_test_open.jinja",
423            minijinja::context! {
424                fn_name => fn_name,
425                description => description,
426                skip_reason => escaped_reason,
427            },
428        );
429        out.push_str(&rendered);
430    }
431
432    /// Emit the closing `}` for a test method.
433    fn render_test_close(&self, out: &mut String) {
434        let rendered = crate::template_env::render("java/http_test_close.jinja", minijinja::context! {});
435        out.push_str(&rendered);
436    }
437
438    /// Emit a `java.net.http.HttpClient` request to `baseUrl + path`.
439    ///
440    /// Binds the response to `response` (the `ctx.response_var`). Java's
441    /// `HttpClient` disallows a fixed set of restricted headers; those are
442    /// silently dropped so the test compiles.
443    fn render_call(&self, out: &mut String, ctx: &client::CallCtx<'_>) {
444        // Java's HttpClient throws IllegalArgumentException for these headers.
445        const JAVA_RESTRICTED_HEADERS: &[&str] = &["connection", "content-length", "expect", "host", "upgrade"];
446
447        let method = ctx.method.to_uppercase();
448
449        // Build the path, appending query params when present.
450        let path = if ctx.query_params.is_empty() {
451            ctx.path.to_string()
452        } else {
453            let pairs: Vec<String> = ctx
454                .query_params
455                .iter()
456                .map(|(k, v)| {
457                    let val_str = match v {
458                        serde_json::Value::String(s) => s.clone(),
459                        other => other.to_string(),
460                    };
461                    format!("{}={}", k, escape_java(&val_str))
462                })
463                .collect();
464            format!("{}?{}", ctx.path, pairs.join("&"))
465        };
466
467        let body_publisher = if let Some(body) = ctx.body {
468            let json = serde_json::to_string(body).unwrap_or_default();
469            let escaped = escape_java(&json);
470            format!("java.net.http.HttpRequest.BodyPublishers.ofString(\"{escaped}\")")
471        } else {
472            "java.net.http.HttpRequest.BodyPublishers.noBody()".to_string()
473        };
474
475        // Content-Type header — only when a body is present.
476        let content_type = if ctx.body.is_some() {
477            let ct = ctx.content_type.unwrap_or("application/json");
478            // Only emit when not already in ctx.headers (avoid duplicate Content-Type).
479            if !ctx.headers.keys().any(|k| k.to_lowercase() == "content-type") {
480                Some(ct.to_string())
481            } else {
482                None
483            }
484        } else {
485            None
486        };
487
488        // Build header lines — skip Java-restricted ones.
489        let mut headers_lines: Vec<String> = Vec::new();
490        for (name, value) in ctx.headers {
491            if JAVA_RESTRICTED_HEADERS.contains(&name.to_lowercase().as_str()) {
492                continue;
493            }
494            let escaped_name = escape_java(name);
495            let escaped_value = escape_java(value);
496            headers_lines.push(format!(
497                "builder = builder.header(\"{escaped_name}\", \"{escaped_value}\");"
498            ));
499        }
500
501        // Cookies as a single `Cookie` header.
502        let cookies_line = if !ctx.cookies.is_empty() {
503            let cookie_str: Vec<String> = ctx.cookies.iter().map(|(k, v)| format!("{k}={v}")).collect();
504            let cookie_header = escape_java(&cookie_str.join("; "));
505            Some(format!("builder = builder.header(\"Cookie\", \"{cookie_header}\");"))
506        } else {
507            None
508        };
509
510        let rendered = crate::template_env::render(
511            "java/http_request.jinja",
512            minijinja::context! {
513                method => method,
514                path => path,
515                body_publisher => body_publisher,
516                content_type => content_type,
517                headers_lines => headers_lines,
518                cookies_line => cookies_line,
519                response_var => ctx.response_var,
520            },
521        );
522        out.push_str(&rendered);
523    }
524
525    /// Emit `assertEquals(status, response.statusCode(), ...)`.
526    fn render_assert_status(&self, out: &mut String, response_var: &str, status: u16) {
527        let rendered = crate::template_env::render(
528            "java/http_assertions.jinja",
529            minijinja::context! {
530                response_var => response_var,
531                status_code => status,
532                headers => Vec::<std::collections::HashMap<&str, String>>::new(),
533                body_assertion => String::new(),
534                partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
535                validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
536            },
537        );
538        out.push_str(&rendered);
539    }
540
541    /// Emit a header assertion using `response.headers().firstValue(...)`.
542    ///
543    /// Handles special tokens: `<<present>>`, `<<absent>>`, `<<uuid>>`.
544    fn render_assert_header(&self, out: &mut String, response_var: &str, name: &str, expected: &str) {
545        let escaped_name = escape_java(name);
546        let assertion_code = match expected {
547            "<<present>>" => {
548                format!(
549                    "assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").isPresent(), \"header {escaped_name} should be present\");"
550                )
551            }
552            "<<absent>>" => {
553                format!(
554                    "assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").isEmpty(), \"header {escaped_name} should be absent\");"
555                )
556            }
557            "<<uuid>>" => {
558                format!(
559                    "assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").orElse(\"\").matches(\"[0-9a-fA-F]{{8}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{12}}\"), \"header {escaped_name} should be a UUID\");"
560                )
561            }
562            literal => {
563                let escaped_value = escape_java(literal);
564                format!(
565                    "assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").orElse(\"\").contains(\"{escaped_value}\"), \"header {escaped_name} mismatch\");"
566                )
567            }
568        };
569
570        let mut headers = vec![std::collections::HashMap::new()];
571        headers[0].insert("assertion_code", assertion_code);
572
573        let rendered = crate::template_env::render(
574            "java/http_assertions.jinja",
575            minijinja::context! {
576                response_var => response_var,
577                status_code => 0u16,
578                headers => headers,
579                body_assertion => String::new(),
580                partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
581                validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
582            },
583        );
584        out.push_str(&rendered);
585    }
586
587    /// Emit a JSON body equality assertion using Jackson's `MAPPER.readTree`.
588    fn render_assert_json_body(&self, out: &mut String, response_var: &str, expected: &serde_json::Value) {
589        let body_assertion = match expected {
590            serde_json::Value::Object(_) | serde_json::Value::Array(_) => {
591                let json_str = serde_json::to_string(expected).unwrap_or_default();
592                let escaped = escape_java(&json_str);
593                format!(
594                    "var bodyJson = MAPPER.readTree({response_var}.body());\n        var expectedJson = MAPPER.readTree(\"{escaped}\");\n        assertEquals(expectedJson, bodyJson, \"body mismatch\");"
595                )
596            }
597            serde_json::Value::String(s) => {
598                let escaped = escape_java(s);
599                format!("assertEquals(\"{escaped}\", {response_var}.body().trim(), \"body mismatch\");")
600            }
601            other => {
602                let escaped = escape_java(&other.to_string());
603                format!("assertEquals(\"{escaped}\", {response_var}.body().trim(), \"body mismatch\");")
604            }
605        };
606
607        let rendered = crate::template_env::render(
608            "java/http_assertions.jinja",
609            minijinja::context! {
610                response_var => response_var,
611                status_code => 0u16,
612                headers => Vec::<std::collections::HashMap<&str, String>>::new(),
613                body_assertion => body_assertion,
614                partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
615                validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
616            },
617        );
618        out.push_str(&rendered);
619    }
620
621    /// Emit partial JSON body assertions: parse once, then assert each expected field.
622    fn render_assert_partial_body(&self, out: &mut String, response_var: &str, expected: &serde_json::Value) {
623        if let Some(obj) = expected.as_object() {
624            let mut partial_body: Vec<std::collections::HashMap<&str, String>> = Vec::new();
625            for (key, val) in obj {
626                let escaped_key = escape_java(key);
627                let json_str = serde_json::to_string(val).unwrap_or_default();
628                let escaped_val = escape_java(&json_str);
629                let assertion_code = format!(
630                    "assertEquals(MAPPER.readTree(\"{escaped_val}\"), partialJson.get(\"{escaped_key}\"), \"body field '{escaped_key}' mismatch\");"
631                );
632                let mut entry = std::collections::HashMap::new();
633                entry.insert("assertion_code", assertion_code);
634                partial_body.push(entry);
635            }
636
637            let rendered = crate::template_env::render(
638                "java/http_assertions.jinja",
639                minijinja::context! {
640                    response_var => response_var,
641                    status_code => 0u16,
642                    headers => Vec::<std::collections::HashMap<&str, String>>::new(),
643                    body_assertion => String::new(),
644                    partial_body => partial_body,
645                    validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
646                },
647            );
648            out.push_str(&rendered);
649        }
650    }
651
652    /// Emit validation-error assertions: parse the body and check each expected message.
653    fn render_assert_validation_errors(
654        &self,
655        out: &mut String,
656        response_var: &str,
657        errors: &[crate::fixture::ValidationErrorExpectation],
658    ) {
659        let mut validation_errors: Vec<std::collections::HashMap<&str, String>> = Vec::new();
660        for err in errors {
661            let escaped_msg = escape_java(&err.msg);
662            let assertion_code = format!(
663                "assertTrue(veBody.contains(\"{escaped_msg}\"), \"expected validation error message: {escaped_msg}\");"
664            );
665            let mut entry = std::collections::HashMap::new();
666            entry.insert("assertion_code", assertion_code);
667            validation_errors.push(entry);
668        }
669
670        let rendered = crate::template_env::render(
671            "java/http_assertions.jinja",
672            minijinja::context! {
673                response_var => response_var,
674                status_code => 0u16,
675                headers => Vec::<std::collections::HashMap<&str, String>>::new(),
676                body_assertion => String::new(),
677                partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
678                validation_errors => validation_errors,
679            },
680        );
681        out.push_str(&rendered);
682    }
683}
684
685/// Render an HTTP server test method using `java.net.http.HttpClient` against
686/// `MOCK_SERVER_URL`. Delegates to the shared
687/// [`client::http_call::render_http_test`] driver via [`JavaTestClientRenderer`].
688///
689/// The one Java-specific pre-condition — HTTP 101 (WebSocket upgrade) causing an
690/// `EOFException` in `HttpClient` — is handled here before delegating.
691fn render_http_test_method(out: &mut String, fixture: &Fixture, http: &HttpFixture) {
692    // HTTP 101 (WebSocket upgrade) causes Java's HttpClient to throw EOFException.
693    // Emit an assumeTrue(false, ...) stub so the test is skipped rather than failing.
694    if http.expected_response.status_code == 101 {
695        let method_name = fixture.id.to_upper_camel_case();
696        let description = &fixture.description;
697        out.push_str(&crate::template_env::render(
698            "java/http_test_skip_101.jinja",
699            minijinja::context! {
700                method_name => method_name,
701                description => description,
702            },
703        ));
704        return;
705    }
706
707    client::http_call::render_http_test(out, &JavaTestClientRenderer, fixture);
708}
709
710#[allow(clippy::too_many_arguments)]
711fn render_test_method(
712    out: &mut String,
713    fixture: &Fixture,
714    class_name: &str,
715    _function_name: &str,
716    _result_var: &str,
717    _args: &[crate::config::ArgMapping],
718    options_type: Option<&str>,
719    field_resolver: &FieldResolver,
720    result_is_simple: bool,
721    enum_fields: &std::collections::HashMap<String, String>,
722    e2e_config: &E2eConfig,
723    nested_types: &std::collections::HashMap<String, String>,
724    nested_types_optional: bool,
725) {
726    // Delegate HTTP fixtures to the HTTP-specific renderer.
727    if let Some(http) = &fixture.http {
728        render_http_test_method(out, fixture, http);
729        return;
730    }
731
732    // Resolve per-fixture call config (supports named calls via fixture.call field).
733    let call_config = e2e_config.resolve_call(fixture.call.as_deref());
734    let lang = "java";
735    let call_overrides = call_config.overrides.get(lang);
736    let effective_function_name = call_overrides
737        .and_then(|o| o.function.as_ref())
738        .cloned()
739        .unwrap_or_else(|| call_config.function.to_lower_camel_case());
740    let effective_result_var = &call_config.result_var;
741    let effective_args = &call_config.args;
742    let function_name = effective_function_name.as_str();
743    let result_var = effective_result_var.as_str();
744    let args: &[crate::config::ArgMapping] = effective_args.as_slice();
745
746    let method_name = fixture.id.to_upper_camel_case();
747    let description = &fixture.description;
748    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
749
750    // Emit a compilable stub for non-HTTP fixtures that have no call override.
751    if call_overrides.is_none() {
752        let skip_msg = format!("TODO: implement Java e2e test for fixture '{}'", fixture.id);
753        out.push_str(&format!(
754            "    @Test\n    void test{}() {{\n        // {}\n        org.junit.jupiter.api.Assumptions.assumeTrue(false, \"{}\");\n    }}\n",
755            method_name, description, skip_msg
756        ));
757        return;
758    }
759
760    // Resolve per-fixture options_type: prefer the java call override, fall back to class-level.
761    let effective_options_type: Option<String> = call_overrides
762        .and_then(|o| o.options_type.clone())
763        .or_else(|| options_type.map(|s| s.to_string()));
764    let effective_options_type = effective_options_type.as_deref();
765
766    // Resolve per-fixture result_is_simple and result_is_bytes from the call override.
767    let effective_result_is_simple =
768        call_overrides.is_some_and(|o| o.result_is_simple) || call_config.result_is_simple || result_is_simple;
769    let effective_result_is_bytes = call_overrides.is_some_and(|o| o.result_is_bytes);
770
771    // Check if this test needs ObjectMapper deserialization for json_object args.
772    let needs_deser = effective_options_type.is_some()
773        && args.iter().any(|arg| {
774            if arg.arg_type != "json_object" {
775                return false;
776            }
777            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
778            fixture.input.get(field).is_some_and(|v| !v.is_null() && !v.is_array())
779        });
780
781    // Emit builder expressions for json_object args.
782    let mut builder_expressions = String::new();
783    if let (true, Some(opts_type)) = (needs_deser, effective_options_type) {
784        for arg in args {
785            if arg.arg_type == "json_object" {
786                let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
787                if let Some(val) = fixture.input.get(field) {
788                    if !val.is_null() && !val.is_array() {
789                        if let Some(obj) = val.as_object() {
790                            // Generate builder expression: TypeName.builder().withFieldName(value)...build()
791                            let empty_path_fields: Vec<String> = Vec::new();
792                            let path_fields = call_overrides.map(|o| &o.path_fields).unwrap_or(&empty_path_fields);
793                            let builder_expr = java_builder_expression(
794                                obj,
795                                opts_type,
796                                enum_fields,
797                                nested_types,
798                                nested_types_optional,
799                                path_fields,
800                            );
801                            let var_name = &arg.name;
802                            builder_expressions.push_str(&format!("        var {} = {};\n", var_name, builder_expr));
803                        }
804                    }
805                }
806            }
807        }
808    }
809
810    let (mut setup_lines, args_str) =
811        build_args_and_setup(&fixture.input, args, class_name, effective_options_type, &fixture.id);
812
813    // Build visitor if present and add to setup
814    let mut visitor_var = String::new();
815    let mut has_visitor_fixture = false;
816    if let Some(visitor_spec) = &fixture.visitor {
817        visitor_var = build_java_visitor(&mut setup_lines, visitor_spec, class_name);
818        has_visitor_fixture = true;
819    }
820
821    // When visitor is present, attach it to the options parameter
822    let final_args = if has_visitor_fixture {
823        if args_str.is_empty() {
824            format!("new ConversionOptions().withVisitor({})", visitor_var)
825        } else if args_str.contains("new ConversionOptions")
826            || args_str.contains("ConversionOptionsBuilder")
827            || args_str.contains(".builder()")
828        {
829            // Options are being built (either new ConversionOptions(), builder pattern, or .builder().build())
830            // append .withVisitor() call before .build() if present
831            if args_str.contains(".build()") {
832                let idx = args_str.rfind(".build()").unwrap();
833                format!("{}.withVisitor({}){}", &args_str[..idx], visitor_var, &args_str[idx..])
834            } else {
835                format!("{}.withVisitor({})", args_str, visitor_var)
836            }
837        } else if args_str.ends_with(", null") {
838            let base = &args_str[..args_str.len() - 6];
839            format!("{}, new ConversionOptions().withVisitor({})", base, visitor_var)
840        } else {
841            format!("{}, new ConversionOptions().withVisitor({})", args_str, visitor_var)
842        }
843    } else {
844        args_str
845    };
846
847    // Render assertions_body
848    let mut assertions_body = String::new();
849
850    // Emit a `source` variable for run_query assertions that need the raw bytes.
851    let needs_source_var = fixture
852        .assertions
853        .iter()
854        .any(|a| a.assertion_type == "method_result" && a.method.as_deref() == Some("run_query"));
855    if needs_source_var {
856        if let Some(source_arg) = args.iter().find(|a| a.field == "source_code") {
857            let field = source_arg.field.strip_prefix("input.").unwrap_or(&source_arg.field);
858            if let Some(val) = fixture.input.get(field) {
859                let java_val = json_to_java(val);
860                assertions_body.push_str(&format!("        var source = {}.getBytes();\n", java_val));
861            }
862        }
863    }
864
865    for assertion in &fixture.assertions {
866        render_assertion(
867            &mut assertions_body,
868            assertion,
869            result_var,
870            class_name,
871            field_resolver,
872            effective_result_is_simple,
873            effective_result_is_bytes,
874            enum_fields,
875        );
876    }
877
878    let throws_clause = " throws Exception";
879    let call_expr = format!("{class_name}.{function_name}({final_args})");
880
881    let rendered = crate::template_env::render(
882        "java/test_method.jinja",
883        minijinja::context! {
884            method_name => method_name,
885            description => description,
886            builder_expressions => builder_expressions,
887            setup_lines => setup_lines,
888            throws_clause => throws_clause,
889            expects_error => expects_error,
890            call_expr => call_expr,
891            result_var => result_var,
892            assertions_body => assertions_body,
893        },
894    );
895    out.push_str(&rendered);
896}
897
898/// Build setup lines (e.g. handle creation) and the argument list for the function call.
899///
900/// Returns `(setup_lines, args_string)`.
901fn build_args_and_setup(
902    input: &serde_json::Value,
903    args: &[crate::config::ArgMapping],
904    class_name: &str,
905    options_type: Option<&str>,
906    fixture_id: &str,
907) -> (Vec<String>, String) {
908    if args.is_empty() {
909        return (Vec::new(), String::new());
910    }
911
912    let mut setup_lines: Vec<String> = Vec::new();
913    let mut parts: Vec<String> = Vec::new();
914
915    for arg in args {
916        if arg.arg_type == "mock_url" {
917            setup_lines.push(format!(
918                "String {} = System.getenv(\"MOCK_SERVER_URL\") + \"/fixtures/{fixture_id}\";",
919                arg.name,
920            ));
921            parts.push(arg.name.clone());
922            continue;
923        }
924
925        if arg.arg_type == "handle" {
926            // Generate a createEngine (or equivalent) call and pass the variable.
927            let constructor_name = format!("create{}", arg.name.to_upper_camel_case());
928            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
929            let config_value = input.get(field).unwrap_or(&serde_json::Value::Null);
930            if config_value.is_null()
931                || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
932            {
933                setup_lines.push(format!("var {} = {class_name}.{constructor_name}(null);", arg.name,));
934            } else {
935                let json_str = serde_json::to_string(config_value).unwrap_or_default();
936                let name = &arg.name;
937                setup_lines.push(format!(
938                    "var {name}Config = MAPPER.readValue(\"{}\", CrawlConfig.class);",
939                    escape_java(&json_str),
940                ));
941                setup_lines.push(format!(
942                    "var {} = {class_name}.{constructor_name}({name}Config);",
943                    arg.name,
944                    name = name,
945                ));
946            }
947            parts.push(arg.name.clone());
948            continue;
949        }
950
951        let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
952        let val = input.get(field);
953        match val {
954            None | Some(serde_json::Value::Null) if arg.optional => {
955                // Optional arg with no fixture value: emit positional null/default so the call
956                // has the right arity. For json_object optional args, build an empty default object
957                // so we get the right type rather than a raw null.
958                if arg.arg_type == "json_object" {
959                    if let Some(opts_type) = options_type {
960                        parts.push(format!("{opts_type}.builder().build()"));
961                    } else {
962                        parts.push("null".to_string());
963                    }
964                } else {
965                    parts.push("null".to_string());
966                }
967            }
968            None | Some(serde_json::Value::Null) => {
969                // Required arg with no fixture value: pass a language-appropriate default.
970                let default_val = match arg.arg_type.as_str() {
971                    "string" | "file_path" => "\"\"".to_string(),
972                    "int" | "integer" => "0".to_string(),
973                    "float" | "number" => "0.0d".to_string(),
974                    "bool" | "boolean" => "false".to_string(),
975                    _ => "null".to_string(),
976                };
977                parts.push(default_val);
978            }
979            Some(v) => {
980                if arg.arg_type == "json_object" {
981                    // Array json_object args: emit inline Java list expression.
982                    // Check for batch item arrays first (element_type = BatchBytesItem/BatchFileItem).
983                    if v.is_array() {
984                        if let Some(elem_type) = &arg.element_type {
985                            if elem_type == "BatchBytesItem" || elem_type == "BatchFileItem" {
986                                parts.push(emit_java_batch_item_array(v, elem_type));
987                                continue;
988                            }
989                        }
990                        // Otherwise use element_type to emit the correct numeric literal suffix (f vs d).
991                        let elem_type = arg.element_type.as_deref();
992                        parts.push(json_to_java_typed(v, elem_type));
993                        continue;
994                    }
995                    // Object json_object args with options_type: use pre-deserialized variable.
996                    if options_type.is_some() {
997                        parts.push(arg.name.clone());
998                        continue;
999                    }
1000                    parts.push(json_to_java(v));
1001                    continue;
1002                }
1003                // bytes args must be passed as byte[], not String.
1004                if arg.arg_type == "bytes" {
1005                    let val = json_to_java(v);
1006                    parts.push(format!("{val}.getBytes()"));
1007                    continue;
1008                }
1009                // file_path args must be wrapped in java.nio.file.Path.of().
1010                if arg.arg_type == "file_path" {
1011                    let val = json_to_java(v);
1012                    parts.push(format!("java.nio.file.Path.of({val})"));
1013                    continue;
1014                }
1015                parts.push(json_to_java(v));
1016            }
1017        }
1018    }
1019
1020    (setup_lines, parts.join(", "))
1021}
1022
1023#[allow(clippy::too_many_arguments)]
1024fn render_assertion(
1025    out: &mut String,
1026    assertion: &Assertion,
1027    result_var: &str,
1028    class_name: &str,
1029    field_resolver: &FieldResolver,
1030    result_is_simple: bool,
1031    result_is_bytes: bool,
1032    enum_fields: &std::collections::HashMap<String, String>,
1033) {
1034    // Handle synthetic/virtual fields that are computed rather than direct record accessors.
1035    if let Some(f) = &assertion.field {
1036        match f.as_str() {
1037            // ---- ExtractionResult chunk-level computed predicates ----
1038            "chunks_have_content" => {
1039                let pred = format!(
1040                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.content() != null && !c.content().isBlank())"
1041                );
1042                out.push_str(&crate::template_env::render(
1043                    "java/synthetic_assertion.jinja",
1044                    minijinja::context! {
1045                        assertion_kind => "chunks_content",
1046                        assertion_type => assertion.assertion_type.as_str(),
1047                        pred => pred,
1048                        field_name => f,
1049                    },
1050                ));
1051                return;
1052            }
1053            "chunks_have_heading_context" => {
1054                let pred = format!(
1055                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.metadata().headingContext().isPresent())"
1056                );
1057                out.push_str(&crate::template_env::render(
1058                    "java/synthetic_assertion.jinja",
1059                    minijinja::context! {
1060                        assertion_kind => "chunks_heading_context",
1061                        assertion_type => assertion.assertion_type.as_str(),
1062                        pred => pred,
1063                        field_name => f,
1064                    },
1065                ));
1066                return;
1067            }
1068            "chunks_have_embeddings" => {
1069                let pred = format!(
1070                    "{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.embedding() != null && !c.embedding().isEmpty())"
1071                );
1072                out.push_str(&crate::template_env::render(
1073                    "java/synthetic_assertion.jinja",
1074                    minijinja::context! {
1075                        assertion_kind => "chunks_embeddings",
1076                        assertion_type => assertion.assertion_type.as_str(),
1077                        pred => pred,
1078                        field_name => f,
1079                    },
1080                ));
1081                return;
1082            }
1083            "first_chunk_starts_with_heading" => {
1084                let pred = format!(
1085                    "{result_var}.chunks().orElse(java.util.List.of()).stream().findFirst().map(c -> c.metadata().headingContext().isPresent()).orElse(false)"
1086                );
1087                out.push_str(&crate::template_env::render(
1088                    "java/synthetic_assertion.jinja",
1089                    minijinja::context! {
1090                        assertion_kind => "first_chunk_heading",
1091                        assertion_type => assertion.assertion_type.as_str(),
1092                        pred => pred,
1093                        field_name => f,
1094                    },
1095                ));
1096                return;
1097            }
1098            // ---- EmbedResponse virtual fields ----
1099            // When result_is_simple=true the result IS List<List<Float>> (the raw embeddings list).
1100            // When result_is_simple=false the result has an .embeddings() accessor.
1101            "embedding_dimensions" => {
1102                // Dimension = size of the first embedding vector in the list.
1103                let embed_list = if result_is_simple {
1104                    result_var.to_string()
1105                } else {
1106                    format!("{result_var}.embeddings()")
1107                };
1108                let expr = format!("({embed_list}.isEmpty() ? 0 : {embed_list}.get(0).size())");
1109                let java_val = assertion.value.as_ref().map(json_to_java).unwrap_or_default();
1110                out.push_str(&crate::template_env::render(
1111                    "java/synthetic_assertion.jinja",
1112                    minijinja::context! {
1113                        assertion_kind => "embedding_dimensions",
1114                        assertion_type => assertion.assertion_type.as_str(),
1115                        expr => expr,
1116                        java_val => java_val,
1117                        field_name => f,
1118                    },
1119                ));
1120                return;
1121            }
1122            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
1123                // These are validation predicates that require iterating the embedding matrix.
1124                let embed_list = if result_is_simple {
1125                    result_var.to_string()
1126                } else {
1127                    format!("{result_var}.embeddings()")
1128                };
1129                let pred = match f.as_str() {
1130                    "embeddings_valid" => {
1131                        format!("{embed_list}.stream().allMatch(e -> e != null && !e.isEmpty())")
1132                    }
1133                    "embeddings_finite" => {
1134                        format!("{embed_list}.stream().flatMap(java.util.Collection::stream).allMatch(Float::isFinite)")
1135                    }
1136                    "embeddings_non_zero" => {
1137                        format!("{embed_list}.stream().allMatch(e -> e.stream().anyMatch(v -> v != 0.0f))")
1138                    }
1139                    "embeddings_normalized" => format!(
1140                        "{embed_list}.stream().allMatch(e -> {{ double n = e.stream().mapToDouble(v -> v * v).sum(); return Math.abs(n - 1.0) < 1e-3; }})"
1141                    ),
1142                    _ => unreachable!(),
1143                };
1144                let assertion_kind = format!("embeddings_{}", f.strip_prefix("embeddings_").unwrap_or(f));
1145                out.push_str(&crate::template_env::render(
1146                    "java/synthetic_assertion.jinja",
1147                    minijinja::context! {
1148                        assertion_kind => assertion_kind,
1149                        assertion_type => assertion.assertion_type.as_str(),
1150                        pred => pred,
1151                        field_name => f,
1152                    },
1153                ));
1154                return;
1155            }
1156            // ---- Fields not present on the Java ExtractionResult ----
1157            "keywords" | "keywords_count" => {
1158                out.push_str(&crate::template_env::render(
1159                    "java/synthetic_assertion.jinja",
1160                    minijinja::context! {
1161                        assertion_kind => "keywords",
1162                        field_name => f,
1163                    },
1164                ));
1165                return;
1166            }
1167            // ---- metadata not_empty / is_empty: Metadata is a required record, not Optional ----
1168            // Metadata has no .isEmpty() method; check that at least one optional field is present.
1169            "metadata" => {
1170                match assertion.assertion_type.as_str() {
1171                    "not_empty" | "is_empty" => {
1172                        out.push_str(&crate::template_env::render(
1173                            "java/synthetic_assertion.jinja",
1174                            minijinja::context! {
1175                                assertion_kind => "metadata",
1176                                assertion_type => assertion.assertion_type.as_str(),
1177                                result_var => result_var,
1178                            },
1179                        ));
1180                        return;
1181                    }
1182                    _ => {} // fall through to normal handling
1183                }
1184            }
1185            _ => {}
1186        }
1187    }
1188
1189    // Skip assertions on fields that don't exist on the result type.
1190    if let Some(f) = &assertion.field {
1191        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
1192            out.push_str(&crate::template_env::render(
1193                "java/synthetic_assertion.jinja",
1194                minijinja::context! {
1195                    assertion_kind => "skipped",
1196                    field_name => f,
1197                },
1198            ));
1199            return;
1200        }
1201    }
1202
1203    // Determine if this field is an enum type (no `.contains()` on enums in Java).
1204    // Check both the raw fixture field path and the resolved (aliased) path so that
1205    // `fields_enum` entries can use either form (e.g., `"assets[].category"` or the
1206    // resolved `"assets[].asset_category"`).
1207    let field_is_enum = assertion
1208        .field
1209        .as_deref()
1210        .is_some_and(|f| enum_fields.contains_key(f) || enum_fields.contains_key(field_resolver.resolve(f)));
1211
1212    // Determine if this field is an array (List<T>) — needed to choose .toString() for
1213    // contains assertions, since List.contains(Object) uses equals() which won't match
1214    // strings against complex record types like StructureItem.
1215    let field_is_array = assertion
1216        .field
1217        .as_deref()
1218        .is_some_and(|f| field_resolver.is_array(field_resolver.resolve(f)));
1219
1220    let field_expr = if result_is_simple {
1221        result_var.to_string()
1222    } else {
1223        match &assertion.field {
1224            Some(f) if !f.is_empty() => {
1225                let accessor = field_resolver.accessor(f, "java", result_var);
1226                let resolved = field_resolver.resolve(f);
1227                // Unwrap Optional fields with a type-appropriate fallback.
1228                // Map.get() returns nullable, not Optional, so skip .orElse() for map access.
1229                // NOTE: is_optional() means the field is in optional_fields, but that doesn't
1230                // guarantee it returns Optional<T> in Java — nested fields like metadata.twitterCard
1231                // return @Nullable String, not Optional<String>. We detect this by checking
1232                // if the field path contains a dot (nested access).
1233                if field_resolver.is_optional(resolved) && !field_resolver.has_map_access(f) {
1234                    // All nullable fields in the Java binding return @Nullable types, not Optional<T>.
1235                    // Wrap them in Optional.ofNullable() so e2e tests can use .orElse() fallbacks.
1236                    let optional_expr = format!("java.util.Optional.ofNullable({accessor})");
1237                    match assertion.assertion_type.as_str() {
1238                        // For not_empty / is_empty on Optional fields, return the raw Optional
1239                        // so the assertion arms can call isPresent()/isEmpty().
1240                        "not_empty" | "is_empty" => optional_expr,
1241                        // For size/count assertions on Optional<List<T>> fields, use List.of() fallback.
1242                        "count_min" | "count_equals" => {
1243                            format!("{optional_expr}.orElse(java.util.List.of())")
1244                        }
1245                        // For numeric comparisons on Optional<Long/Integer> fields, use 0L.
1246                        "greater_than" | "less_than" | "greater_than_or_equal" | "less_than_or_equal" => {
1247                            if field_resolver.is_array(resolved) {
1248                                format!("{optional_expr}.orElse(java.util.List.of())")
1249                            } else {
1250                                format!("{optional_expr}.orElse(0L)")
1251                            }
1252                        }
1253                        // For equals on Optional fields, determine fallback based on whether value is numeric.
1254                        // If the fixture value is a number, use 0L; otherwise use "".
1255                        "equals" => {
1256                            if let Some(expected) = &assertion.value {
1257                                if expected.is_number() {
1258                                    format!("{optional_expr}.orElse(0L)")
1259                                } else {
1260                                    format!("{optional_expr}.orElse(\"\")")
1261                                }
1262                            } else {
1263                                format!("{optional_expr}.orElse(\"\")")
1264                            }
1265                        }
1266                        _ if field_resolver.is_array(resolved) => {
1267                            format!("{optional_expr}.orElse(java.util.List.of())")
1268                        }
1269                        _ => format!("{optional_expr}.orElse(\"\")"),
1270                    }
1271                } else {
1272                    accessor
1273                }
1274            }
1275            _ => result_var.to_string(),
1276        }
1277    };
1278
1279    // For enum fields, string-based assertions need .getValue() to convert the enum to
1280    // its serde-serialized lowercase string value (e.g., AssetCategory.Image -> "image").
1281    // All alef-generated Java enums expose a getValue() method annotated with @JsonValue.
1282    let string_expr = if field_is_enum {
1283        format!("{field_expr}.getValue()")
1284    } else {
1285        field_expr.clone()
1286    };
1287
1288    // Pre-compute context for template
1289    let assertion_type = assertion.assertion_type.as_str();
1290    let java_val = assertion.value.as_ref().map(json_to_java).unwrap_or_default();
1291    let is_string_val = assertion.value.as_ref().is_some_and(|v| v.is_string());
1292    let is_numeric_val = assertion.value.as_ref().is_some_and(|v| v.is_number());
1293
1294    let values_java: Vec<String> = assertion
1295        .values
1296        .as_ref()
1297        .map(|values| values.iter().map(json_to_java).collect())
1298        .unwrap_or_default();
1299
1300    let contains_any_expr = if !values_java.is_empty() {
1301        values_java
1302            .iter()
1303            .map(|v| format!("{string_expr}.contains({v})"))
1304            .collect::<Vec<_>>()
1305            .join(" || ")
1306    } else {
1307        String::new()
1308    };
1309
1310    let length_expr = if result_is_bytes {
1311        format!("{field_expr}.length")
1312    } else {
1313        format!("{field_expr}.length()")
1314    };
1315
1316    let n = assertion.value.as_ref().and_then(|v| v.as_u64()).unwrap_or(0);
1317
1318    let call_expr = if let Some(method_name) = &assertion.method {
1319        build_java_method_call(result_var, method_name, assertion.args.as_ref(), class_name)
1320    } else {
1321        String::new()
1322    };
1323
1324    let check = assertion.check.as_deref().unwrap_or("is_true");
1325
1326    let java_check_val = assertion.value.as_ref().map(json_to_java).unwrap_or_default();
1327
1328    let check_n = assertion.value.as_ref().and_then(|v| v.as_u64()).unwrap_or(0);
1329
1330    let is_bool_val = assertion.value.as_ref().is_some_and(|v| v.is_boolean());
1331    let bool_is_true = assertion.value.as_ref().is_some_and(|v| v.as_bool() == Some(true));
1332
1333    let method_returns_collection = assertion
1334        .method
1335        .as_ref()
1336        .is_some_and(|m| matches!(m.as_str(), "find_nodes_by_type" | "findNodesByType"));
1337
1338    let rendered = crate::template_env::render(
1339        "java/assertion.jinja",
1340        minijinja::context! {
1341            assertion_type,
1342            java_val,
1343            string_expr,
1344            field_expr,
1345            field_is_enum,
1346            field_is_array,
1347            is_string_val,
1348            is_numeric_val,
1349            values_java => values_java,
1350            contains_any_expr,
1351            length_expr,
1352            n,
1353            call_expr,
1354            check,
1355            java_check_val,
1356            check_n,
1357            is_bool_val,
1358            bool_is_true,
1359            method_returns_collection,
1360        },
1361    );
1362    out.push_str(&rendered);
1363}
1364
1365/// Build a Java call expression for a `method_result` assertion on a tree-sitter Tree.
1366///
1367/// Maps method names to the appropriate Java static/instance method calls.
1368fn build_java_method_call(
1369    result_var: &str,
1370    method_name: &str,
1371    args: Option<&serde_json::Value>,
1372    class_name: &str,
1373) -> String {
1374    match method_name {
1375        "root_child_count" => format!("{result_var}.rootNode().childCount()"),
1376        "root_node_type" => format!("{result_var}.rootNode().kind()"),
1377        "named_children_count" => format!("{result_var}.rootNode().namedChildCount()"),
1378        "has_error_nodes" => format!("{class_name}.treeHasErrorNodes({result_var})"),
1379        "error_count" | "tree_error_count" => format!("{class_name}.treeErrorCount({result_var})"),
1380        "tree_to_sexp" => format!("{class_name}.treeToSexp({result_var})"),
1381        "contains_node_type" => {
1382            let node_type = args
1383                .and_then(|a| a.get("node_type"))
1384                .and_then(|v| v.as_str())
1385                .unwrap_or("");
1386            format!("{class_name}.treeContainsNodeType({result_var}, \"{node_type}\")")
1387        }
1388        "find_nodes_by_type" => {
1389            let node_type = args
1390                .and_then(|a| a.get("node_type"))
1391                .and_then(|v| v.as_str())
1392                .unwrap_or("");
1393            format!("{class_name}.findNodesByType({result_var}, \"{node_type}\")")
1394        }
1395        "run_query" => {
1396            let query_source = args
1397                .and_then(|a| a.get("query_source"))
1398                .and_then(|v| v.as_str())
1399                .unwrap_or("");
1400            let language = args
1401                .and_then(|a| a.get("language"))
1402                .and_then(|v| v.as_str())
1403                .unwrap_or("");
1404            let escaped_query = escape_java(query_source);
1405            format!("{class_name}.runQuery({result_var}, \"{language}\", \"{escaped_query}\", source)")
1406        }
1407        _ => {
1408            format!("{result_var}.{}()", method_name.to_lower_camel_case())
1409        }
1410    }
1411}
1412
1413/// Convert a `serde_json::Value` to a Java literal string.
1414fn json_to_java(value: &serde_json::Value) -> String {
1415    json_to_java_typed(value, None)
1416}
1417
1418/// Convert a JSON value to a Java literal, optionally overriding number type for array elements.
1419/// `element_type` controls how numeric array elements are emitted: "f32" → `1.0f`, otherwise `1.0d`.
1420/// Emit Java batch item constructors for BatchBytesItem or BatchFileItem arrays.
1421fn emit_java_batch_item_array(arr: &serde_json::Value, elem_type: &str) -> String {
1422    if let Some(items) = arr.as_array() {
1423        let item_strs: Vec<String> = items
1424            .iter()
1425            .filter_map(|item| {
1426                if let Some(obj) = item.as_object() {
1427                    match elem_type {
1428                        "BatchBytesItem" => {
1429                            let content = obj.get("content").and_then(|v| v.as_array());
1430                            let mime_type = obj.get("mime_type").and_then(|v| v.as_str()).unwrap_or("text/plain");
1431                            let content_code = if let Some(arr) = content {
1432                                let bytes: Vec<String> = arr
1433                                    .iter()
1434                                    .filter_map(|v| v.as_u64().map(|n| format!("(byte) {}", n)))
1435                                    .collect();
1436                                format!("new byte[] {{{}}}", bytes.join(", "))
1437                            } else {
1438                                "new byte[] {}".to_string()
1439                            };
1440                            Some(format!("new {}({}, \"{}\", null)", elem_type, content_code, mime_type))
1441                        }
1442                        "BatchFileItem" => {
1443                            let path = obj.get("path").and_then(|v| v.as_str()).unwrap_or("");
1444                            Some(format!(
1445                                "new {}(java.nio.file.Paths.get(\"{}\"), null)",
1446                                elem_type, path
1447                            ))
1448                        }
1449                        _ => None,
1450                    }
1451                } else {
1452                    None
1453                }
1454            })
1455            .collect();
1456        format!("java.util.Arrays.asList({})", item_strs.join(", "))
1457    } else {
1458        "java.util.List.of()".to_string()
1459    }
1460}
1461
1462fn json_to_java_typed(value: &serde_json::Value, element_type: Option<&str>) -> String {
1463    match value {
1464        serde_json::Value::String(s) => format!("\"{}\"", escape_java(s)),
1465        serde_json::Value::Bool(b) => b.to_string(),
1466        serde_json::Value::Number(n) => {
1467            if n.is_f64() {
1468                match element_type {
1469                    Some("f32" | "float" | "Float") => format!("{}f", n),
1470                    _ => format!("{}d", n),
1471                }
1472            } else {
1473                n.to_string()
1474            }
1475        }
1476        serde_json::Value::Null => "null".to_string(),
1477        serde_json::Value::Array(arr) => {
1478            let items: Vec<String> = arr.iter().map(|v| json_to_java_typed(v, element_type)).collect();
1479            format!("java.util.List.of({})", items.join(", "))
1480        }
1481        serde_json::Value::Object(_) => {
1482            let json_str = serde_json::to_string(value).unwrap_or_default();
1483            format!("\"{}\"", escape_java(&json_str))
1484        }
1485    }
1486}
1487
1488/// Generate a Java builder expression for a JSON object.
1489/// E.g., `obj = {"language": "abl", "chunk_max_size": 50}`
1490/// becomes: `TypeName.builder().withLanguage("abl").withChunkMaxSize(50L).build()`
1491///
1492/// For enums: emit `EnumType.VariantName` (detected via camelCase lookup in enum_fields)
1493/// For strings and bools: use the value directly
1494/// For plain numbers: emit the literal with type suffix (long uses L, double uses d)
1495/// For nested objects: recurse with Options suffix
1496/// When `nested_types_optional` is false, nested builders are passed directly without
1497/// Optional.of() wrapping, allowing non-optional nested config types.
1498fn java_builder_expression(
1499    obj: &serde_json::Map<String, serde_json::Value>,
1500    type_name: &str,
1501    enum_fields: &std::collections::HashMap<String, String>,
1502    nested_types: &std::collections::HashMap<String, String>,
1503    nested_types_optional: bool,
1504    path_fields: &[String],
1505) -> String {
1506    let mut expr = format!("{}.builder()", type_name);
1507    for (key, val) in obj {
1508        // Convert snake_case key to camelCase for method name
1509        let camel_key = key.to_lower_camel_case();
1510        let method_name = format!("with{}", camel_key.to_upper_camel_case());
1511
1512        let java_val = match val {
1513            serde_json::Value::String(s) => {
1514                // Check if this field is an enum type by looking up in enum_fields.
1515                // enum_fields is keyed by camelCase names (e.g., "codeBlockStyle"), not snake_case.
1516                if let Some(enum_type_name) = enum_fields.get(&camel_key) {
1517                    // Enum field: use the mapped enum type name from the config
1518                    let variant_name = s.to_upper_camel_case();
1519                    format!("{}.{}", enum_type_name, variant_name)
1520                } else if camel_key == "preset" && type_name == "PreprocessingOptions" {
1521                    // Special case: preset field in PreprocessingOptions maps to PreprocessingPreset
1522                    let variant_name = s.to_upper_camel_case();
1523                    format!("PreprocessingPreset.{}", variant_name)
1524                } else if path_fields.contains(key) {
1525                    // Path field: wrap in Optional.of(java.nio.file.Path.of(...))
1526                    format!("Optional.of(java.nio.file.Path.of(\"{}\"))", escape_java(s))
1527                } else {
1528                    // String field: emit as a quoted literal
1529                    format!("\"{}\"", escape_java(s))
1530                }
1531            }
1532            serde_json::Value::Bool(b) => b.to_string(),
1533            serde_json::Value::Null => "null".to_string(),
1534            serde_json::Value::Number(n) => {
1535                // Number field: emit literal with type suffix.
1536                // Java records/classes use either `long` (primitive, not nullable) or
1537                // `Optional<Long>` (nullable). The codegen wraps in `Optional.of(...)`
1538                // by default since most options builder fields are Optional, but several
1539                // record types (e.g. SecurityLimits) use primitive `long` throughout.
1540                // Skip the wrap for: (a) known-primitive top-level fields and (b) any
1541                // method on a record type whose builder methods take primitives only.
1542                let camel_key = key.to_lower_camel_case();
1543                let is_plain_field = matches!(camel_key.as_str(), "listIndentWidth" | "wrapWidth");
1544                // Builders for typed-record nested config classes use primitives
1545                // throughout — they're not the optional-options pattern.
1546                let is_primitive_builder = matches!(type_name, "SecurityLimits" | "SecurityLimitsBuilder");
1547
1548                if is_plain_field || is_primitive_builder {
1549                    // Plain numeric field: no Optional wrapper
1550                    if n.is_f64() {
1551                        format!("{}d", n)
1552                    } else {
1553                        format!("{}L", n)
1554                    }
1555                } else {
1556                    // Optional numeric field: wrap in Optional.of()
1557                    if n.is_f64() {
1558                        format!("Optional.of({}d)", n)
1559                    } else {
1560                        format!("Optional.of({}L)", n)
1561                    }
1562                }
1563            }
1564            serde_json::Value::Array(arr) => {
1565                let items: Vec<String> = arr.iter().map(|v| json_to_java_typed(v, None)).collect();
1566                format!("java.util.List.of({})", items.join(", "))
1567            }
1568            serde_json::Value::Object(nested) => {
1569                // Recurse with the type from nested_types mapping, or default to snake_case → PascalCase + "Options".
1570                let nested_type = nested_types
1571                    .get(key.as_str())
1572                    .cloned()
1573                    .unwrap_or_else(|| format!("{}Options", key.to_upper_camel_case()));
1574                let inner = java_builder_expression(
1575                    nested,
1576                    &nested_type,
1577                    enum_fields,
1578                    nested_types,
1579                    nested_types_optional,
1580                    &[],
1581                );
1582                // Top-level config builders (e.g. ExtractionConfigBuilder) declare nested
1583                // record fields as `Optional<T>` (since they are nullable). Primitive-fields
1584                // builders (SecurityLimitsBuilder etc.) take the bare type directly.
1585                let is_primitive_builder = matches!(type_name, "SecurityLimits" | "SecurityLimitsBuilder");
1586                if is_primitive_builder || !nested_types_optional {
1587                    inner
1588                } else {
1589                    format!("Optional.of({inner})")
1590                }
1591            }
1592        };
1593        expr.push_str(&format!(".{}({})", method_name, java_val));
1594    }
1595    expr.push_str(".build()");
1596    expr
1597}
1598
1599/// Build default nested type mappings for Java extraction config types.
1600///
1601/// Maps known Kreuzberg/Kreuzcrawl config field names (in snake_case) to their
1602/// Java record type names (in PascalCase). These defaults allow e2e codegen to
1603/// automatically deserialize nested config objects without requiring explicit
1604/// configuration in alef.toml. User-provided overrides take precedence.
1605fn default_java_nested_types() -> std::collections::HashMap<String, String> {
1606    [
1607        ("chunking", "ChunkingConfig"),
1608        ("ocr", "OcrConfig"),
1609        ("images", "ImageExtractionConfig"),
1610        ("html_output", "HtmlOutputConfig"),
1611        ("language_detection", "LanguageDetectionConfig"),
1612        ("postprocessor", "PostProcessorConfig"),
1613        ("acceleration", "AccelerationConfig"),
1614        ("email", "EmailConfig"),
1615        ("pages", "PageConfig"),
1616        ("pdf_options", "PdfConfig"),
1617        ("layout", "LayoutDetectionConfig"),
1618        ("tree_sitter", "TreeSitterConfig"),
1619        ("structured_extraction", "StructuredExtractionConfig"),
1620        ("content_filter", "ContentFilterConfig"),
1621        ("token_reduction", "TokenReductionOptions"),
1622        ("security_limits", "SecurityLimits"),
1623    ]
1624    .iter()
1625    .map(|(k, v)| (k.to_string(), v.to_string()))
1626    .collect()
1627}
1628
1629// ---------------------------------------------------------------------------
1630// Import collection helpers
1631// ---------------------------------------------------------------------------
1632
1633/// Recursively collect enum types and nested option types used in a builder expression.
1634/// Enums are keyed in the enum_fields map by camelCase names (e.g., "codeBlockStyle" → "CodeBlockStyle").
1635fn collect_enum_and_nested_types(
1636    obj: &serde_json::Map<String, serde_json::Value>,
1637    enum_fields: &std::collections::HashMap<String, String>,
1638    types_out: &mut std::collections::BTreeSet<String>,
1639) {
1640    for (key, val) in obj {
1641        // enum_fields is keyed by camelCase, not snake_case.
1642        let camel_key = key.to_lower_camel_case();
1643        if let Some(enum_type) = enum_fields.get(&camel_key) {
1644            // Add the enum type from the mapping (e.g., "CodeBlockStyle").
1645            types_out.insert(enum_type.clone());
1646        } else if camel_key == "preset" {
1647            // Special case: preset field uses PreprocessingPreset enum.
1648            types_out.insert("PreprocessingPreset".to_string());
1649        }
1650        // Recurse into nested objects to find their nested enum types.
1651        if let Some(nested) = val.as_object() {
1652            collect_enum_and_nested_types(nested, enum_fields, types_out);
1653        }
1654    }
1655}
1656
1657fn collect_nested_type_names(
1658    obj: &serde_json::Map<String, serde_json::Value>,
1659    nested_types: &std::collections::HashMap<String, String>,
1660    types_out: &mut std::collections::BTreeSet<String>,
1661) {
1662    for (key, val) in obj {
1663        if let Some(type_name) = nested_types.get(key.as_str()) {
1664            types_out.insert(type_name.clone());
1665        }
1666        if let Some(nested) = val.as_object() {
1667            collect_nested_type_names(nested, nested_types, types_out);
1668        }
1669    }
1670}
1671
1672// ---------------------------------------------------------------------------
1673// Visitor generation
1674// ---------------------------------------------------------------------------
1675
1676/// Build a Java visitor class and add setup lines. Returns the visitor variable name.
1677fn build_java_visitor(
1678    setup_lines: &mut Vec<String>,
1679    visitor_spec: &crate::fixture::VisitorSpec,
1680    class_name: &str,
1681) -> String {
1682    setup_lines.push("class _TestVisitor implements Visitor {".to_string());
1683    for (method_name, action) in &visitor_spec.callbacks {
1684        emit_java_visitor_method(setup_lines, method_name, action, class_name);
1685    }
1686    setup_lines.push("}".to_string());
1687    setup_lines.push("var visitor = new _TestVisitor();".to_string());
1688    "visitor".to_string()
1689}
1690
1691/// Emit a Java visitor method for a callback action.
1692fn emit_java_visitor_method(
1693    setup_lines: &mut Vec<String>,
1694    method_name: &str,
1695    action: &CallbackAction,
1696    _class_name: &str,
1697) {
1698    let camel_method = method_to_camel(method_name);
1699    let params = match method_name {
1700        "visit_link" => "NodeContext ctx, String href, String text, String title",
1701        "visit_image" => "NodeContext ctx, String src, String alt, String title",
1702        "visit_heading" => "NodeContext ctx, int level, String text, String id",
1703        "visit_code_block" => "NodeContext ctx, String lang, String code",
1704        "visit_code_inline"
1705        | "visit_strong"
1706        | "visit_emphasis"
1707        | "visit_strikethrough"
1708        | "visit_underline"
1709        | "visit_subscript"
1710        | "visit_superscript"
1711        | "visit_mark"
1712        | "visit_button"
1713        | "visit_summary"
1714        | "visit_figcaption"
1715        | "visit_definition_term"
1716        | "visit_definition_description" => "NodeContext ctx, String text",
1717        "visit_text" => "NodeContext ctx, String text",
1718        "visit_list_item" => "NodeContext ctx, boolean ordered, String marker, String text",
1719        "visit_blockquote" => "NodeContext ctx, String content, long depth",
1720        "visit_table_row" => "NodeContext ctx, java.util.List<String> cells, boolean isHeader",
1721        "visit_custom_element" => "NodeContext ctx, String tagName, String html",
1722        "visit_form" => "NodeContext ctx, String actionUrl, String method",
1723        "visit_input" => "NodeContext ctx, String inputType, String name, String value",
1724        "visit_audio" | "visit_video" | "visit_iframe" => "NodeContext ctx, String src",
1725        "visit_details" => "NodeContext ctx, boolean isOpen",
1726        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
1727            "NodeContext ctx, String output"
1728        }
1729        "visit_list_start" => "NodeContext ctx, boolean ordered",
1730        "visit_list_end" => "NodeContext ctx, boolean ordered, String output",
1731        _ => "NodeContext ctx",
1732    };
1733
1734    // Determine action type and values for template
1735    let (action_type, action_value, format_args) = match action {
1736        CallbackAction::Skip => ("skip", String::new(), Vec::new()),
1737        CallbackAction::Continue => ("continue", String::new(), Vec::new()),
1738        CallbackAction::PreserveHtml => ("preserve_html", String::new(), Vec::new()),
1739        CallbackAction::Custom { output } => ("custom_literal", escape_java(output), Vec::new()),
1740        CallbackAction::CustomTemplate { template } => {
1741            // Extract {placeholder} names from the template (in order of appearance).
1742            let mut format_str = String::with_capacity(template.len());
1743            let mut format_args: Vec<String> = Vec::new();
1744            let mut chars = template.chars().peekable();
1745            while let Some(ch) = chars.next() {
1746                if ch == '{' {
1747                    // Collect identifier chars until '}'.
1748                    let mut name = String::new();
1749                    let mut closed = false;
1750                    for inner in chars.by_ref() {
1751                        if inner == '}' {
1752                            closed = true;
1753                            break;
1754                        }
1755                        name.push(inner);
1756                    }
1757                    if closed && !name.is_empty() && name.chars().all(|c| c.is_alphanumeric() || c == '_') {
1758                        let camel_name = name.as_str().to_lower_camel_case();
1759                        format_args.push(camel_name);
1760                        format_str.push_str("%s");
1761                    } else {
1762                        // Not a simple placeholder — emit literally.
1763                        format_str.push('{');
1764                        format_str.push_str(&name);
1765                        if closed {
1766                            format_str.push('}');
1767                        }
1768                    }
1769                } else {
1770                    format_str.push(ch);
1771                }
1772            }
1773            let escaped = escape_java(&format_str);
1774            if format_args.is_empty() {
1775                ("custom_literal", escaped, Vec::new())
1776            } else {
1777                ("custom_formatted", escaped, format_args)
1778            }
1779        }
1780    };
1781
1782    let params = params.to_string();
1783
1784    let rendered = crate::template_env::render(
1785        "java/visitor_method.jinja",
1786        minijinja::context! {
1787            camel_method,
1788            params,
1789            action_type,
1790            action_value,
1791            format_args => format_args,
1792        },
1793    );
1794    setup_lines.push(rendered);
1795}
1796
1797/// Convert snake_case method names to Java camelCase.
1798fn method_to_camel(snake: &str) -> String {
1799    snake.to_lower_camel_case()
1800}