use crate::config::E2eConfig;
use crate::escape::{escape_java, sanitize_filename};
use crate::field_access::FieldResolver;
use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, HttpFixture};
use alef_core::backend::GeneratedFile;
use alef_core::config::ResolvedCrateConfig;
use alef_core::hash::{self, CommentStyle};
use alef_core::template_versions as tv;
use anyhow::Result;
use heck::{ToLowerCamelCase, ToUpperCamelCase};
use std::fmt::Write as FmtWrite;
use std::path::PathBuf;
use super::E2eCodegen;
use super::client;
pub struct JavaCodegen;
impl E2eCodegen for JavaCodegen {
fn generate(
&self,
groups: &[FixtureGroup],
e2e_config: &E2eConfig,
config: &ResolvedCrateConfig,
) -> Result<Vec<GeneratedFile>> {
let lang = self.language_name();
let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
let mut files = Vec::new();
let call = &e2e_config.call;
let overrides = call.overrides.get(lang);
let _module_path = overrides
.and_then(|o| o.module.as_ref())
.cloned()
.unwrap_or_else(|| call.module.clone());
let function_name = overrides
.and_then(|o| o.function.as_ref())
.cloned()
.unwrap_or_else(|| call.function.clone());
let class_name = overrides
.and_then(|o| o.class.as_ref())
.cloned()
.unwrap_or_else(|| config.name.to_upper_camel_case());
let result_is_simple = overrides.is_some_and(|o| o.result_is_simple);
let result_var = &call.result_var;
let java_pkg = e2e_config.resolve_package("java");
let pkg_name = java_pkg
.as_ref()
.and_then(|p| p.name.as_ref())
.cloned()
.unwrap_or_else(|| config.name.clone());
let java_group_id = config.java_group_id();
let pkg_version = config.resolved_version().unwrap_or_else(|| "0.1.0".to_string());
files.push(GeneratedFile {
path: output_base.join("pom.xml"),
content: render_pom_xml(&pkg_name, &java_group_id, &pkg_version, e2e_config.dep_mode),
generated_header: false,
});
let mut test_base = output_base.join("src").join("test").join("java");
for segment in java_group_id.split('.') {
test_base = test_base.join(segment);
}
let test_base = test_base.join("e2e");
let options_type = overrides.and_then(|o| o.options_type.clone());
let empty_enum_fields = std::collections::HashMap::new();
let java_enum_fields = overrides.as_ref().map(|o| &o.enum_fields).unwrap_or(&empty_enum_fields);
let mut effective_nested_types = default_java_nested_types();
if let Some(overrides_map) = overrides.map(|o| &o.nested_types) {
effective_nested_types.extend(overrides_map.clone());
}
let nested_types_optional = overrides.map(|o| o.nested_types_optional).unwrap_or(true);
let field_resolver = FieldResolver::new(
&e2e_config.fields,
&e2e_config.fields_optional,
&e2e_config.result_fields,
&e2e_config.fields_array,
&std::collections::HashSet::new(),
);
for group in groups {
let active: Vec<&Fixture> = group
.fixtures
.iter()
.filter(|f| super::should_include_fixture(f, lang, e2e_config))
.collect();
if active.is_empty() {
continue;
}
let class_file_name = format!("{}Test.java", sanitize_filename(&group.category).to_upper_camel_case());
let content = render_test_file(
&group.category,
&active,
&class_name,
&function_name,
&java_group_id,
result_var,
&e2e_config.call.args,
options_type.as_deref(),
&field_resolver,
result_is_simple,
java_enum_fields,
e2e_config,
&effective_nested_types,
nested_types_optional,
);
files.push(GeneratedFile {
path: test_base.join(class_file_name),
content,
generated_header: true,
});
}
Ok(files)
}
fn language_name(&self) -> &'static str {
"java"
}
}
fn render_pom_xml(
pkg_name: &str,
java_group_id: &str,
pkg_version: &str,
dep_mode: crate::config::DependencyMode,
) -> String {
let (dep_group_id, dep_artifact_id) = if let Some((g, a)) = pkg_name.split_once(':') {
(g, a)
} else {
(java_group_id, pkg_name)
};
let artifact_id = format!("{dep_artifact_id}-e2e-java");
let dep_block = match dep_mode {
crate::config::DependencyMode::Registry => {
format!(
r#" <dependency>
<groupId>{dep_group_id}</groupId>
<artifactId>{dep_artifact_id}</artifactId>
<version>{pkg_version}</version>
</dependency>"#
)
}
crate::config::DependencyMode::Local => {
format!(
r#" <dependency>
<groupId>{dep_group_id}</groupId>
<artifactId>{dep_artifact_id}</artifactId>
<version>{pkg_version}</version>
<scope>system</scope>
<systemPath>${{project.basedir}}/../../packages/java/target/{dep_artifact_id}-{pkg_version}.jar</systemPath>
</dependency>"#
)
}
};
format!(
r#"<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>{java_group_id}</groupId>
<artifactId>{artifact_id}</artifactId>
<version>0.1.0</version>
<properties>
<maven.compiler.source>25</maven.compiler.source>
<maven.compiler.target>25</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<junit.version>{junit}</junit.version>
</properties>
<dependencies>
{dep_block}
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>{jackson}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jdk8</artifactId>
<version>{jackson}</version>
</dependency>
<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<version>24.1.0</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>${{junit.version}}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>{build_helper}</version>
<executions>
<execution>
<id>add-test-source</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>src/test/java</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>{maven_surefire}</version>
<configuration>
<argLine>--enable-preview --enable-native-access=ALL-UNNAMED -Djava.library.path=${{project.basedir}}/../../target/release</argLine>
<workingDirectory>${{project.basedir}}/../../test_documents</workingDirectory>
</configuration>
</plugin>
</plugins>
</build>
</project>
"#,
junit = tv::maven::JUNIT,
jackson = tv::maven::JACKSON_E2E,
build_helper = tv::maven::BUILD_HELPER_MAVEN_PLUGIN,
maven_surefire = tv::maven::MAVEN_SUREFIRE_PLUGIN_E2E,
)
}
#[allow(clippy::too_many_arguments)]
fn render_test_file(
category: &str,
fixtures: &[&Fixture],
class_name: &str,
function_name: &str,
java_group_id: &str,
result_var: &str,
args: &[crate::config::ArgMapping],
options_type: Option<&str>,
field_resolver: &FieldResolver,
result_is_simple: bool,
enum_fields: &std::collections::HashMap<String, String>,
e2e_config: &E2eConfig,
nested_types: &std::collections::HashMap<String, String>,
nested_types_optional: bool,
) -> String {
let mut out = String::new();
out.push_str(&hash::header(CommentStyle::DoubleSlash));
let test_class_name = format!("{}Test", sanitize_filename(category).to_upper_camel_case());
let (import_path, simple_class) = if class_name.contains('.') {
let simple = class_name.rsplit('.').next().unwrap_or(class_name);
(class_name, simple)
} else {
("", class_name)
};
let _ = writeln!(out, "package {java_group_id}.e2e;");
let _ = writeln!(out);
let lang_for_om = "java";
let _needs_object_mapper_for_options = false;
let needs_object_mapper_for_handle = fixtures.iter().any(|f| {
args.iter().filter(|a| a.arg_type == "handle").any(|a| {
let v = f.input.get(&a.field).unwrap_or(&serde_json::Value::Null);
!(v.is_null() || v.is_object() && v.as_object().is_some_and(|o| o.is_empty()))
})
});
let has_http_fixtures = fixtures.iter().any(|f| f.http.is_some());
let needs_object_mapper = needs_object_mapper_for_handle || has_http_fixtures;
let mut all_options_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
if let Some(t) = options_type {
all_options_types.insert(t.to_string());
}
for f in fixtures.iter() {
let call_cfg = e2e_config.resolve_call(f.call.as_deref());
if let Some(ov) = call_cfg.overrides.get(lang_for_om) {
if let Some(t) = &ov.options_type {
all_options_types.insert(t.clone());
}
}
for arg in &call_cfg.args {
if let Some(elem_type) = &arg.element_type {
if elem_type == "BatchBytesItem" || elem_type == "BatchFileItem" {
all_options_types.insert(elem_type.clone());
}
}
}
}
let _ = writeln!(out, "import org.junit.jupiter.api.Test;");
let _ = writeln!(out, "import static org.junit.jupiter.api.Assertions.*;");
if !import_path.is_empty() {
let _ = writeln!(out, "import {import_path};");
}
if needs_object_mapper {
let _ = writeln!(out, "import com.fasterxml.jackson.databind.ObjectMapper;");
let _ = writeln!(out, "import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;");
}
let mut enum_types_used: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
let mut nested_types_used: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
for f in fixtures.iter() {
let call_cfg = e2e_config.resolve_call(f.call.as_deref());
for arg in &call_cfg.args {
if arg.arg_type == "json_object" {
let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
if let Some(val) = f.input.get(field) {
if !val.is_null() && !val.is_array() {
if let Some(obj) = val.as_object() {
collect_enum_and_nested_types(obj, enum_fields, &mut enum_types_used);
collect_nested_type_names(obj, nested_types, &mut nested_types_used);
}
}
}
}
}
}
if !all_options_types.is_empty() {
let opts_pkg = if !import_path.is_empty() {
import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("")
} else {
""
};
for opts_type in &all_options_types {
let qualified = if opts_pkg.is_empty() {
opts_type.clone()
} else {
format!("{opts_pkg}.{opts_type}")
};
let _ = writeln!(out, "import {qualified};");
}
}
if !enum_types_used.is_empty() && !import_path.is_empty() {
let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
for enum_type in &enum_types_used {
let _ = writeln!(out, "import {binding_pkg}.{enum_type};");
}
}
if !nested_types_used.is_empty() && !import_path.is_empty() {
let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
for type_name in &nested_types_used {
let _ = writeln!(out, "import {binding_pkg}.{type_name};");
}
}
if needs_object_mapper_for_handle && !import_path.is_empty() {
let pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
let _ = writeln!(out, "import {pkg}.CrawlConfig;");
}
let has_visitor_fixtures = fixtures.iter().any(|f| f.visitor.is_some());
if has_visitor_fixtures && !import_path.is_empty() {
let binding_pkg = import_path.rsplit_once('.').map(|(p, _)| p).unwrap_or("");
if !binding_pkg.is_empty() {
let _ = writeln!(out, "import {binding_pkg}.Visitor;");
let _ = writeln!(out, "import {binding_pkg}.NodeContext;");
let _ = writeln!(out, "import {binding_pkg}.VisitResult;");
}
}
if !all_options_types.is_empty() {
let _ = writeln!(out, "import java.util.Optional;");
}
let _ = writeln!(out);
let _ = writeln!(out, "/** E2e tests for category: {category}. */");
let _ = writeln!(out, "class {test_class_name} {{");
if needs_object_mapper {
let _ = writeln!(out);
let _ = writeln!(
out,
" private static final ObjectMapper MAPPER = new ObjectMapper().registerModule(new Jdk8Module());"
);
}
for fixture in fixtures {
render_test_method(
&mut out,
fixture,
simple_class,
function_name,
result_var,
args,
options_type,
field_resolver,
result_is_simple,
enum_fields,
e2e_config,
nested_types,
nested_types_optional,
);
let _ = writeln!(out);
}
let _ = writeln!(out, "}}");
out
}
struct JavaTestClientRenderer;
impl client::TestClientRenderer for JavaTestClientRenderer {
fn language_name(&self) -> &'static str {
"java"
}
fn sanitize_test_name(&self, id: &str) -> String {
id.to_upper_camel_case()
}
fn render_test_open(&self, out: &mut String, fn_name: &str, description: &str, skip_reason: Option<&str>) {
let _ = writeln!(out, " @Test");
if let Some(reason) = skip_reason {
let escaped_reason = escape_java(reason);
let _ = writeln!(out, " void test{fn_name}() {{");
let _ = writeln!(out, " // {description}");
let _ = writeln!(
out,
" org.junit.jupiter.api.Assumptions.assumeTrue(false, \"{escaped_reason}\");"
);
} else {
let _ = writeln!(out, " void test{fn_name}() throws Exception {{");
let _ = writeln!(out, " // {description}");
let _ = writeln!(out, " String baseUrl = System.getenv(\"MOCK_SERVER_URL\");");
let _ = writeln!(out, " if (baseUrl == null) baseUrl = \"http://localhost:8080\";");
}
}
fn render_test_close(&self, out: &mut String) {
let _ = writeln!(out, " }}");
}
fn render_call(&self, out: &mut String, ctx: &client::CallCtx<'_>) {
const JAVA_RESTRICTED_HEADERS: &[&str] = &["connection", "content-length", "expect", "host", "upgrade"];
let method = ctx.method.to_uppercase();
let path = if ctx.query_params.is_empty() {
ctx.path.to_string()
} else {
let pairs: Vec<String> = ctx
.query_params
.iter()
.map(|(k, v)| {
let val_str = match v {
serde_json::Value::String(s) => s.clone(),
other => other.to_string(),
};
format!("{}={}", k, escape_java(&val_str))
})
.collect();
format!("{}?{}", ctx.path, pairs.join("&"))
};
let _ = writeln!(
out,
" java.net.URI uri = java.net.URI.create(baseUrl + \"{path}\");"
);
let body_publisher = if let Some(body) = ctx.body {
let json = serde_json::to_string(body).unwrap_or_default();
let escaped = escape_java(&json);
format!("java.net.http.HttpRequest.BodyPublishers.ofString(\"{escaped}\")")
} else {
"java.net.http.HttpRequest.BodyPublishers.noBody()".to_string()
};
let _ = writeln!(out, " var builder = java.net.http.HttpRequest.newBuilder(uri)");
let _ = writeln!(out, " .method(\"{method}\", {body_publisher});");
if ctx.body.is_some() {
let content_type = ctx.content_type.unwrap_or("application/json");
if !ctx.headers.keys().any(|k| k.to_lowercase() == "content-type") {
let _ = writeln!(
out,
" builder = builder.header(\"Content-Type\", \"{content_type}\");"
);
}
}
for (name, value) in ctx.headers {
if JAVA_RESTRICTED_HEADERS.contains(&name.to_lowercase().as_str()) {
continue;
}
let escaped_name = escape_java(name);
let escaped_value = escape_java(value);
let _ = writeln!(
out,
" builder = builder.header(\"{escaped_name}\", \"{escaped_value}\");"
);
}
if !ctx.cookies.is_empty() {
let cookie_str: Vec<String> = ctx.cookies.iter().map(|(k, v)| format!("{k}={v}")).collect();
let cookie_header = escape_java(&cookie_str.join("; "));
let _ = writeln!(
out,
" builder = builder.header(\"Cookie\", \"{cookie_header}\");"
);
}
let response_var = ctx.response_var;
let _ = writeln!(
out,
" var {response_var} = java.net.http.HttpClient.newHttpClient()"
);
let _ = writeln!(
out,
" .send(builder.build(), java.net.http.HttpResponse.BodyHandlers.ofString());"
);
}
fn render_assert_status(&self, out: &mut String, response_var: &str, status: u16) {
let _ = writeln!(
out,
" assertEquals({status}, {response_var}.statusCode(), \"status code mismatch\");"
);
}
fn render_assert_header(&self, out: &mut String, response_var: &str, name: &str, expected: &str) {
let escaped_name = escape_java(name);
match expected {
"<<present>>" => {
let _ = writeln!(
out,
" assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").isPresent(), \"header {escaped_name} should be present\");"
);
}
"<<absent>>" => {
let _ = writeln!(
out,
" assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").isEmpty(), \"header {escaped_name} should be absent\");"
);
}
"<<uuid>>" => {
let _ = writeln!(
out,
" assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").orElse(\"\").matches(\"[0-9a-fA-F]{{8}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{4}}-[0-9a-fA-F]{{12}}\"), \"header {escaped_name} should be a UUID\");"
);
}
literal => {
let escaped_value = escape_java(literal);
let _ = writeln!(
out,
" assertTrue({response_var}.headers().firstValue(\"{escaped_name}\").orElse(\"\").contains(\"{escaped_value}\"), \"header {escaped_name} mismatch\");"
);
}
}
}
fn render_assert_json_body(&self, out: &mut String, response_var: &str, expected: &serde_json::Value) {
match expected {
serde_json::Value::Object(_) | serde_json::Value::Array(_) => {
let json_str = serde_json::to_string(expected).unwrap_or_default();
let escaped = escape_java(&json_str);
let _ = writeln!(out, " var bodyJson = MAPPER.readTree({response_var}.body());");
let _ = writeln!(out, " var expectedJson = MAPPER.readTree(\"{escaped}\");");
let _ = writeln!(out, " assertEquals(expectedJson, bodyJson, \"body mismatch\");");
}
serde_json::Value::String(s) => {
let escaped = escape_java(s);
let _ = writeln!(
out,
" assertEquals(\"{escaped}\", {response_var}.body().trim(), \"body mismatch\");"
);
}
other => {
let escaped = escape_java(&other.to_string());
let _ = writeln!(
out,
" assertEquals(\"{escaped}\", {response_var}.body().trim(), \"body mismatch\");"
);
}
}
}
fn render_assert_partial_body(&self, out: &mut String, response_var: &str, expected: &serde_json::Value) {
if let Some(obj) = expected.as_object() {
let _ = writeln!(out, " var partialJson = MAPPER.readTree({response_var}.body());");
for (key, val) in obj {
let escaped_key = escape_java(key);
let json_str = serde_json::to_string(val).unwrap_or_default();
let escaped_val = escape_java(&json_str);
let _ = writeln!(
out,
" assertEquals(MAPPER.readTree(\"{escaped_val}\"), partialJson.get(\"{escaped_key}\"), \"body field '{escaped_key}' mismatch\");"
);
}
}
}
fn render_assert_validation_errors(
&self,
out: &mut String,
response_var: &str,
errors: &[crate::fixture::ValidationErrorExpectation],
) {
let _ = writeln!(out, " var veBody = {response_var}.body();");
for err in errors {
let escaped_msg = escape_java(&err.msg);
let _ = writeln!(
out,
" assertTrue(veBody.contains(\"{escaped_msg}\"), \"expected validation error message: {escaped_msg}\");"
);
}
}
}
fn render_http_test_method(out: &mut String, fixture: &Fixture, http: &HttpFixture) {
if http.expected_response.status_code == 101 {
let method_name = fixture.id.to_upper_camel_case();
let description = &fixture.description;
let _ = writeln!(out, " @Test");
let _ = writeln!(out, " void test{method_name}() {{");
let _ = writeln!(out, " // {description}");
let _ = writeln!(
out,
" org.junit.jupiter.api.Assumptions.assumeTrue(false, \"Skipped: Java HttpClient cannot handle 101 Switching Protocols responses\");"
);
let _ = writeln!(out, " }}");
return;
}
client::http_call::render_http_test(out, &JavaTestClientRenderer, fixture);
}
#[allow(clippy::too_many_arguments)]
fn render_test_method(
out: &mut String,
fixture: &Fixture,
class_name: &str,
_function_name: &str,
_result_var: &str,
_args: &[crate::config::ArgMapping],
options_type: Option<&str>,
field_resolver: &FieldResolver,
result_is_simple: bool,
enum_fields: &std::collections::HashMap<String, String>,
e2e_config: &E2eConfig,
nested_types: &std::collections::HashMap<String, String>,
nested_types_optional: bool,
) {
if let Some(http) = &fixture.http {
render_http_test_method(out, fixture, http);
return;
}
let call_config = e2e_config.resolve_call(fixture.call.as_deref());
let lang = "java";
let call_overrides = call_config.overrides.get(lang);
let effective_function_name = call_overrides
.and_then(|o| o.function.as_ref())
.cloned()
.unwrap_or_else(|| call_config.function.to_lower_camel_case());
let effective_result_var = &call_config.result_var;
let effective_args = &call_config.args;
let function_name = effective_function_name.as_str();
let result_var = effective_result_var.as_str();
let args: &[crate::config::ArgMapping] = effective_args.as_slice();
let method_name = fixture.id.to_upper_camel_case();
let description = &fixture.description;
let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
let effective_options_type: Option<String> = call_overrides
.and_then(|o| o.options_type.clone())
.or_else(|| options_type.map(|s| s.to_string()));
let effective_options_type = effective_options_type.as_deref();
let effective_result_is_simple =
call_overrides.is_some_and(|o| o.result_is_simple) || call_config.result_is_simple || result_is_simple;
let effective_result_is_bytes = call_overrides.is_some_and(|o| o.result_is_bytes);
let needs_deser = effective_options_type.is_some()
&& args.iter().any(|arg| {
if arg.arg_type != "json_object" {
return false;
}
let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
fixture.input.get(field).is_some_and(|v| !v.is_null() && !v.is_array())
});
let throws_clause = " throws Exception";
let _ = writeln!(out, " @Test");
let _ = writeln!(out, " void test{method_name}(){throws_clause} {{");
let _ = writeln!(out, " // {description}");
if let (true, Some(opts_type)) = (needs_deser, effective_options_type) {
for arg in args {
if arg.arg_type == "json_object" {
let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
if let Some(val) = fixture.input.get(field) {
if !val.is_null() && !val.is_array() {
if let Some(obj) = val.as_object() {
let empty_path_fields: Vec<String> = Vec::new();
let path_fields = call_overrides.map(|o| &o.path_fields).unwrap_or(&empty_path_fields);
let builder_expr = java_builder_expression(
obj,
opts_type,
enum_fields,
nested_types,
nested_types_optional,
path_fields,
);
let var_name = &arg.name;
let _ = writeln!(out, " var {var_name} = {builder_expr};");
}
}
}
}
}
}
let (mut setup_lines, args_str) =
build_args_and_setup(&fixture.input, args, class_name, effective_options_type, &fixture.id);
let mut visitor_var = String::new();
let mut has_visitor_fixture = false;
if let Some(visitor_spec) = &fixture.visitor {
visitor_var = build_java_visitor(&mut setup_lines, visitor_spec, class_name);
has_visitor_fixture = true;
}
for line in &setup_lines {
let _ = writeln!(out, " {line}");
}
let final_args = if has_visitor_fixture {
if args_str.is_empty() {
format!("new ConversionOptions().withVisitor({})", visitor_var)
} else if args_str.contains("new ConversionOptions")
|| args_str.contains("ConversionOptionsBuilder")
|| args_str.contains(".builder()")
{
if args_str.contains(".build()") {
let idx = args_str.rfind(".build()").unwrap();
format!("{}.withVisitor({}){}", &args_str[..idx], visitor_var, &args_str[idx..])
} else {
format!("{}.withVisitor({})", args_str, visitor_var)
}
} else if args_str.ends_with(", null") {
let base = &args_str[..args_str.len() - 6];
format!("{}, new ConversionOptions().withVisitor({})", base, visitor_var)
} else {
format!("{}, new ConversionOptions().withVisitor({})", args_str, visitor_var)
}
} else {
args_str
};
if expects_error {
let _ = writeln!(
out,
" assertThrows(Exception.class, () -> {class_name}.{function_name}({final_args}));"
);
let _ = writeln!(out, " }}");
return;
}
if call_config.returns_void {
let _ = writeln!(out, " {class_name}.{function_name}({final_args});");
let _ = writeln!(out, " }}");
return;
}
let _ = writeln!(
out,
" var {result_var} = {class_name}.{function_name}({final_args});"
);
let needs_source_var = fixture
.assertions
.iter()
.any(|a| a.assertion_type == "method_result" && a.method.as_deref() == Some("run_query"));
if needs_source_var {
if let Some(source_arg) = args.iter().find(|a| a.field == "source_code") {
let field = source_arg.field.strip_prefix("input.").unwrap_or(&source_arg.field);
if let Some(val) = fixture.input.get(field) {
let java_val = json_to_java(val);
let _ = writeln!(out, " var source = {java_val}.getBytes();");
}
}
}
for assertion in &fixture.assertions {
render_assertion(
out,
assertion,
result_var,
class_name,
field_resolver,
effective_result_is_simple,
effective_result_is_bytes,
enum_fields,
);
}
let _ = writeln!(out, " }}");
}
fn build_args_and_setup(
input: &serde_json::Value,
args: &[crate::config::ArgMapping],
class_name: &str,
options_type: Option<&str>,
fixture_id: &str,
) -> (Vec<String>, String) {
if args.is_empty() {
return (Vec::new(), String::new());
}
let mut setup_lines: Vec<String> = Vec::new();
let mut parts: Vec<String> = Vec::new();
for arg in args {
if arg.arg_type == "mock_url" {
setup_lines.push(format!(
"String {} = System.getenv(\"MOCK_SERVER_URL\") + \"/fixtures/{fixture_id}\";",
arg.name,
));
parts.push(arg.name.clone());
continue;
}
if arg.arg_type == "handle" {
let constructor_name = format!("create{}", arg.name.to_upper_camel_case());
let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
let config_value = input.get(field).unwrap_or(&serde_json::Value::Null);
if config_value.is_null()
|| config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
{
setup_lines.push(format!("var {} = {class_name}.{constructor_name}(null);", arg.name,));
} else {
let json_str = serde_json::to_string(config_value).unwrap_or_default();
let name = &arg.name;
setup_lines.push(format!(
"var {name}Config = MAPPER.readValue(\"{}\", CrawlConfig.class);",
escape_java(&json_str),
));
setup_lines.push(format!(
"var {} = {class_name}.{constructor_name}({name}Config);",
arg.name,
name = name,
));
}
parts.push(arg.name.clone());
continue;
}
let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
let val = input.get(field);
match val {
None | Some(serde_json::Value::Null) if arg.optional => {
if arg.arg_type == "json_object" {
if let Some(opts_type) = options_type {
parts.push(format!("{opts_type}.builder().build()"));
} else {
parts.push("null".to_string());
}
} else {
parts.push("null".to_string());
}
}
None | Some(serde_json::Value::Null) => {
let default_val = match arg.arg_type.as_str() {
"string" | "file_path" => "\"\"".to_string(),
"int" | "integer" => "0".to_string(),
"float" | "number" => "0.0d".to_string(),
"bool" | "boolean" => "false".to_string(),
_ => "null".to_string(),
};
parts.push(default_val);
}
Some(v) => {
if arg.arg_type == "json_object" {
if v.is_array() {
if let Some(elem_type) = &arg.element_type {
if elem_type == "BatchBytesItem" || elem_type == "BatchFileItem" {
parts.push(emit_java_batch_item_array(v, elem_type));
continue;
}
}
let elem_type = arg.element_type.as_deref();
parts.push(json_to_java_typed(v, elem_type));
continue;
}
if options_type.is_some() {
parts.push(arg.name.clone());
continue;
}
parts.push(json_to_java(v));
continue;
}
if arg.arg_type == "bytes" {
let val = json_to_java(v);
parts.push(format!("{val}.getBytes()"));
continue;
}
if arg.arg_type == "file_path" {
let val = json_to_java(v);
parts.push(format!("java.nio.file.Path.of({val})"));
continue;
}
parts.push(json_to_java(v));
}
}
}
(setup_lines, parts.join(", "))
}
#[allow(clippy::too_many_arguments)]
fn render_assertion(
out: &mut String,
assertion: &Assertion,
result_var: &str,
class_name: &str,
field_resolver: &FieldResolver,
result_is_simple: bool,
result_is_bytes: bool,
enum_fields: &std::collections::HashMap<String, String>,
) {
if let Some(f) = &assertion.field {
match f.as_str() {
"chunks_have_content" => {
let pred = format!(
"{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.content() != null && !c.content().isBlank())"
);
match assertion.assertion_type.as_str() {
"is_true" => {
let _ = writeln!(out, " assertTrue({pred}, \"expected true\");");
}
"is_false" => {
let _ = writeln!(out, " assertFalse({pred}, \"expected false\");");
}
_ => {
let _ = writeln!(
out,
" // skipped: unsupported assertion on synthetic field '{f}'"
);
}
}
return;
}
"chunks_have_heading_context" => {
let pred = format!(
"{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.metadata().headingContext().isPresent())"
);
match assertion.assertion_type.as_str() {
"is_true" => {
let _ = writeln!(out, " assertTrue({pred}, \"expected true\");");
}
"is_false" => {
let _ = writeln!(out, " assertFalse({pred}, \"expected false\");");
}
_ => {
let _ = writeln!(
out,
" // skipped: unsupported assertion on synthetic field '{f}'"
);
}
}
return;
}
"chunks_have_embeddings" => {
let pred = format!(
"{result_var}.chunks().orElse(java.util.List.of()).stream().allMatch(c -> c.embedding() != null && !c.embedding().isEmpty())"
);
match assertion.assertion_type.as_str() {
"is_true" => {
let _ = writeln!(out, " assertTrue({pred}, \"expected true\");");
}
"is_false" => {
let _ = writeln!(out, " assertFalse({pred}, \"expected false\");");
}
_ => {
let _ = writeln!(
out,
" // skipped: unsupported assertion on synthetic field '{f}'"
);
}
}
return;
}
"first_chunk_starts_with_heading" => {
let pred = format!(
"{result_var}.chunks().orElse(java.util.List.of()).stream().findFirst().map(c -> c.metadata().headingContext().isPresent()).orElse(false)"
);
match assertion.assertion_type.as_str() {
"is_true" => {
let _ = writeln!(out, " assertTrue({pred}, \"expected true\");");
}
"is_false" => {
let _ = writeln!(out, " assertFalse({pred}, \"expected false\");");
}
_ => {
let _ = writeln!(
out,
" // skipped: unsupported assertion on synthetic field '{f}'"
);
}
}
return;
}
"embedding_dimensions" => {
let embed_list = if result_is_simple {
result_var.to_string()
} else {
format!("{result_var}.embeddings()")
};
let expr = format!("({embed_list}.isEmpty() ? 0 : {embed_list}.get(0).size())");
match assertion.assertion_type.as_str() {
"equals" => {
if let Some(val) = &assertion.value {
let java_val = json_to_java(val);
let _ = writeln!(out, " assertEquals({java_val}, {expr});");
}
}
"greater_than" => {
if let Some(val) = &assertion.value {
let java_val = json_to_java(val);
let _ = writeln!(
out,
" assertTrue({expr} > {java_val}, \"expected > {java_val}\");"
);
}
}
_ => {
let _ = writeln!(out, " // skipped: unsupported assertion on '{f}'");
}
}
return;
}
"embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
let embed_list = if result_is_simple {
result_var.to_string()
} else {
format!("{result_var}.embeddings()")
};
let pred = match f.as_str() {
"embeddings_valid" => {
format!("{embed_list}.stream().allMatch(e -> e != null && !e.isEmpty())")
}
"embeddings_finite" => {
format!("{embed_list}.stream().flatMap(java.util.Collection::stream).allMatch(Float::isFinite)")
}
"embeddings_non_zero" => {
format!("{embed_list}.stream().allMatch(e -> e.stream().anyMatch(v -> v != 0.0f))")
}
"embeddings_normalized" => format!(
"{embed_list}.stream().allMatch(e -> {{ double n = e.stream().mapToDouble(v -> v * v).sum(); return Math.abs(n - 1.0) < 1e-3; }})"
),
_ => unreachable!(),
};
match assertion.assertion_type.as_str() {
"is_true" => {
let _ = writeln!(out, " assertTrue({pred}, \"expected true\");");
}
"is_false" => {
let _ = writeln!(out, " assertFalse({pred}, \"expected false\");");
}
_ => {
let _ = writeln!(out, " // skipped: unsupported assertion on '{f}'");
}
}
return;
}
"keywords" | "keywords_count" => {
let _ = writeln!(
out,
" // skipped: field '{f}' not available on Java ExtractionResult"
);
return;
}
"metadata" => {
match assertion.assertion_type.as_str() {
"not_empty" => {
let _ = writeln!(
out,
" assertTrue({result_var}.metadata().title().isPresent() || {result_var}.metadata().subject().isPresent() || !{result_var}.metadata().additional().isEmpty(), \"expected non-empty value\");"
);
return;
}
"is_empty" => {
let _ = writeln!(
out,
" assertFalse({result_var}.metadata().title().isPresent() || {result_var}.metadata().subject().isPresent() || !{result_var}.metadata().additional().isEmpty(), \"expected empty value\");"
);
return;
}
_ => {} }
}
_ => {}
}
}
if let Some(f) = &assertion.field {
if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
let _ = writeln!(out, " // skipped: field '{f}' not available on result type");
return;
}
}
let field_is_enum = assertion
.field
.as_deref()
.is_some_and(|f| enum_fields.contains_key(f) || enum_fields.contains_key(field_resolver.resolve(f)));
let field_is_array = assertion
.field
.as_deref()
.is_some_and(|f| field_resolver.is_array(field_resolver.resolve(f)));
let field_expr = if result_is_simple {
result_var.to_string()
} else {
match &assertion.field {
Some(f) if !f.is_empty() => {
let accessor = field_resolver.accessor(f, "java", result_var);
let resolved = field_resolver.resolve(f);
if field_resolver.is_optional(resolved) && !field_resolver.has_map_access(f) {
let optional_expr = format!("java.util.Optional.ofNullable({accessor})");
match assertion.assertion_type.as_str() {
"not_empty" | "is_empty" => optional_expr,
"count_min" | "count_equals" => {
format!("{optional_expr}.orElse(java.util.List.of())")
}
"greater_than" | "less_than" | "greater_than_or_equal" | "less_than_or_equal" => {
if field_resolver.is_array(resolved) {
format!("{optional_expr}.orElse(java.util.List.of())")
} else {
format!("{optional_expr}.orElse(0L)")
}
}
"equals" => {
if let Some(expected) = &assertion.value {
if expected.is_number() {
format!("{optional_expr}.orElse(0L)")
} else {
format!("{optional_expr}.orElse(\"\")")
}
} else {
format!("{optional_expr}.orElse(\"\")")
}
}
_ if field_resolver.is_array(resolved) => {
format!("{optional_expr}.orElse(java.util.List.of())")
}
_ => format!("{optional_expr}.orElse(\"\")"),
}
} else {
accessor
}
}
_ => result_var.to_string(),
}
};
let string_expr = if field_is_enum {
format!("{field_expr}.getValue()")
} else {
field_expr.clone()
};
match assertion.assertion_type.as_str() {
"equals" => {
if let Some(expected) = &assertion.value {
let java_val = json_to_java(expected);
if expected.is_string() {
let _ = writeln!(out, " assertEquals({java_val}, {string_expr}.trim());");
} else if expected.is_number() && field_expr.contains(".orElse(\"\")") {
let fixed_expr = field_expr.replace(".orElse(\"\")", ".orElse(0L)");
let _ = writeln!(out, " assertEquals({java_val}, {fixed_expr});");
} else {
let _ = writeln!(out, " assertEquals({java_val}, {field_expr});");
}
}
}
"contains" => {
if let Some(expected) = &assertion.value {
let java_val = json_to_java(expected);
let check_expr = if field_is_array {
format!("{string_expr}.toString()")
} else {
string_expr.clone()
};
let _ = writeln!(
out,
" assertTrue({check_expr}.contains({java_val}), \"expected to contain: \" + {java_val});"
);
}
}
"contains_all" => {
if let Some(values) = &assertion.values {
for val in values {
let java_val = json_to_java(val);
let check_expr = if field_is_array {
format!("{string_expr}.toString()")
} else {
string_expr.clone()
};
let _ = writeln!(
out,
" assertTrue({check_expr}.contains({java_val}), \"expected to contain: \" + {java_val});"
);
}
}
}
"not_contains" => {
if let Some(expected) = &assertion.value {
let java_val = json_to_java(expected);
let check_expr = if field_is_array {
format!("{string_expr}.toString()")
} else {
string_expr.clone()
};
let _ = writeln!(
out,
" assertFalse({check_expr}.contains({java_val}), \"expected NOT to contain: \" + {java_val});"
);
}
}
"not_empty" => {
let _ = writeln!(
out,
" assertFalse({field_expr} == null || {field_expr}.isEmpty(), \"expected non-empty value\");"
);
}
"is_empty" => {
let _ = writeln!(
out,
" assertTrue({field_expr} == null || {field_expr}.isEmpty(), \"expected empty value\");"
);
}
"contains_any" => {
if let Some(values) = &assertion.values {
let checks: Vec<String> = values
.iter()
.map(|v| {
let java_val = json_to_java(v);
format!("{string_expr}.contains({java_val})")
})
.collect();
let joined = checks.join(" || ");
let _ = writeln!(
out,
" assertTrue({joined}, \"expected to contain at least one of the specified values\");"
);
}
}
"greater_than" => {
if let Some(val) = &assertion.value {
let java_val = json_to_java(val);
let _ = writeln!(
out,
" assertTrue({field_expr} > {java_val}, \"expected > {java_val}\");"
);
}
}
"less_than" => {
if let Some(val) = &assertion.value {
let java_val = json_to_java(val);
let _ = writeln!(
out,
" assertTrue({field_expr} < {java_val}, \"expected < {java_val}\");"
);
}
}
"greater_than_or_equal" => {
if let Some(val) = &assertion.value {
let java_val = json_to_java(val);
let _ = writeln!(
out,
" assertTrue({field_expr} >= {java_val}, \"expected >= {java_val}\");"
);
}
}
"less_than_or_equal" => {
if let Some(val) = &assertion.value {
let java_val = json_to_java(val);
let _ = writeln!(
out,
" assertTrue({field_expr} <= {java_val}, \"expected <= {java_val}\");"
);
}
}
"starts_with" => {
if let Some(expected) = &assertion.value {
let java_val = json_to_java(expected);
let _ = writeln!(
out,
" assertTrue({string_expr}.startsWith({java_val}), \"expected to start with: \" + {java_val});"
);
}
}
"ends_with" => {
if let Some(expected) = &assertion.value {
let java_val = json_to_java(expected);
let _ = writeln!(
out,
" assertTrue({string_expr}.endsWith({java_val}), \"expected to end with: \" + {java_val});"
);
}
}
"min_length" => {
if let Some(val) = &assertion.value {
if let Some(n) = val.as_u64() {
let len_expr = if result_is_bytes {
format!("{field_expr}.length")
} else {
format!("{field_expr}.length()")
};
let _ = writeln!(
out,
" assertTrue({len_expr} >= {n}, \"expected length >= {n}\");"
);
}
}
}
"max_length" => {
if let Some(val) = &assertion.value {
if let Some(n) = val.as_u64() {
let len_expr = if result_is_bytes {
format!("{field_expr}.length")
} else {
format!("{field_expr}.length()")
};
let _ = writeln!(
out,
" assertTrue({len_expr} <= {n}, \"expected length <= {n}\");"
);
}
}
}
"count_min" => {
if let Some(val) = &assertion.value {
if let Some(n) = val.as_u64() {
let _ = writeln!(
out,
" assertTrue({field_expr}.size() >= {n}, \"expected at least {n} elements\");"
);
}
}
}
"count_equals" => {
if let Some(val) = &assertion.value {
if let Some(n) = val.as_u64() {
let _ = writeln!(
out,
" assertEquals({n}, {field_expr}.size(), \"expected exactly {n} elements\");"
);
}
}
}
"is_true" => {
let _ = writeln!(out, " assertTrue({field_expr}, \"expected true\");");
}
"is_false" => {
let _ = writeln!(out, " assertFalse({field_expr}, \"expected false\");");
}
"method_result" => {
if let Some(method_name) = &assertion.method {
let call_expr = build_java_method_call(result_var, method_name, assertion.args.as_ref(), class_name);
let check = assertion.check.as_deref().unwrap_or("is_true");
let method_returns_collection =
matches!(method_name.as_str(), "find_nodes_by_type" | "findNodesByType");
match check {
"equals" => {
if let Some(val) = &assertion.value {
if val.is_boolean() {
if val.as_bool() == Some(true) {
let _ = writeln!(out, " assertTrue({call_expr});");
} else {
let _ = writeln!(out, " assertFalse({call_expr});");
}
} else if method_returns_collection {
let java_val = json_to_java(val);
let _ = writeln!(out, " assertEquals({java_val}, {call_expr}.size());");
} else {
let java_val = json_to_java(val);
let _ = writeln!(out, " assertEquals({java_val}, {call_expr});");
}
}
}
"is_true" => {
let _ = writeln!(out, " assertTrue({call_expr});");
}
"is_false" => {
let _ = writeln!(out, " assertFalse({call_expr});");
}
"greater_than_or_equal" => {
if let Some(val) = &assertion.value {
let n = val.as_u64().unwrap_or(0);
let _ = writeln!(out, " assertTrue({call_expr} >= {n}, \"expected >= {n}\");");
}
}
"count_min" => {
if let Some(val) = &assertion.value {
let n = val.as_u64().unwrap_or(0);
let _ = writeln!(
out,
" assertTrue({call_expr}.size() >= {n}, \"expected at least {n} elements\");"
);
}
}
"is_error" => {
let _ = writeln!(out, " assertThrows(Exception.class, () -> {{ {call_expr}; }});");
}
"contains" => {
if let Some(val) = &assertion.value {
let java_val = json_to_java(val);
let _ = writeln!(
out,
" assertTrue({call_expr}.contains({java_val}), \"expected to contain: \" + {java_val});"
);
}
}
other_check => {
panic!("Java e2e generator: unsupported method_result check type: {other_check}");
}
}
} else {
panic!("Java e2e generator: method_result assertion missing 'method' field");
}
}
"matches_regex" => {
if let Some(expected) = &assertion.value {
let java_val = json_to_java(expected);
let _ = writeln!(
out,
" assertTrue({string_expr}.matches({java_val}), \"expected value to match regex: \" + {java_val});"
);
}
}
"not_error" => {
}
"error" => {
}
other => {
panic!("Java e2e generator: unsupported assertion type: {other}");
}
}
}
fn build_java_method_call(
result_var: &str,
method_name: &str,
args: Option<&serde_json::Value>,
class_name: &str,
) -> String {
match method_name {
"root_child_count" => format!("{result_var}.rootNode().childCount()"),
"root_node_type" => format!("{result_var}.rootNode().kind()"),
"named_children_count" => format!("{result_var}.rootNode().namedChildCount()"),
"has_error_nodes" => format!("{class_name}.treeHasErrorNodes({result_var})"),
"error_count" | "tree_error_count" => format!("{class_name}.treeErrorCount({result_var})"),
"tree_to_sexp" => format!("{class_name}.treeToSexp({result_var})"),
"contains_node_type" => {
let node_type = args
.and_then(|a| a.get("node_type"))
.and_then(|v| v.as_str())
.unwrap_or("");
format!("{class_name}.treeContainsNodeType({result_var}, \"{node_type}\")")
}
"find_nodes_by_type" => {
let node_type = args
.and_then(|a| a.get("node_type"))
.and_then(|v| v.as_str())
.unwrap_or("");
format!("{class_name}.findNodesByType({result_var}, \"{node_type}\")")
}
"run_query" => {
let query_source = args
.and_then(|a| a.get("query_source"))
.and_then(|v| v.as_str())
.unwrap_or("");
let language = args
.and_then(|a| a.get("language"))
.and_then(|v| v.as_str())
.unwrap_or("");
let escaped_query = escape_java(query_source);
format!("{class_name}.runQuery({result_var}, \"{language}\", \"{escaped_query}\", source)")
}
_ => {
format!("{result_var}.{}()", method_name.to_lower_camel_case())
}
}
}
fn json_to_java(value: &serde_json::Value) -> String {
json_to_java_typed(value, None)
}
fn emit_java_batch_item_array(arr: &serde_json::Value, elem_type: &str) -> String {
if let Some(items) = arr.as_array() {
let item_strs: Vec<String> = items
.iter()
.filter_map(|item| {
if let Some(obj) = item.as_object() {
match elem_type {
"BatchBytesItem" => {
let content = obj.get("content").and_then(|v| v.as_array());
let mime_type = obj.get("mime_type").and_then(|v| v.as_str()).unwrap_or("text/plain");
let content_code = if let Some(arr) = content {
let bytes: Vec<String> = arr
.iter()
.filter_map(|v| v.as_u64().map(|n| format!("(byte) {}", n)))
.collect();
format!("new byte[] {{{}}}", bytes.join(", "))
} else {
"new byte[] {}".to_string()
};
Some(format!("new {}({}, \"{}\", null)", elem_type, content_code, mime_type))
}
"BatchFileItem" => {
let path = obj.get("path").and_then(|v| v.as_str()).unwrap_or("");
Some(format!(
"new {}(java.nio.file.Paths.get(\"{}\"), null)",
elem_type, path
))
}
_ => None,
}
} else {
None
}
})
.collect();
format!("java.util.Arrays.asList({})", item_strs.join(", "))
} else {
"java.util.List.of()".to_string()
}
}
fn json_to_java_typed(value: &serde_json::Value, element_type: Option<&str>) -> String {
match value {
serde_json::Value::String(s) => format!("\"{}\"", escape_java(s)),
serde_json::Value::Bool(b) => b.to_string(),
serde_json::Value::Number(n) => {
if n.is_f64() {
match element_type {
Some("f32" | "float" | "Float") => format!("{}f", n),
_ => format!("{}d", n),
}
} else {
n.to_string()
}
}
serde_json::Value::Null => "null".to_string(),
serde_json::Value::Array(arr) => {
let items: Vec<String> = arr.iter().map(|v| json_to_java_typed(v, element_type)).collect();
format!("java.util.List.of({})", items.join(", "))
}
serde_json::Value::Object(_) => {
let json_str = serde_json::to_string(value).unwrap_or_default();
format!("\"{}\"", escape_java(&json_str))
}
}
}
fn java_builder_expression(
obj: &serde_json::Map<String, serde_json::Value>,
type_name: &str,
enum_fields: &std::collections::HashMap<String, String>,
nested_types: &std::collections::HashMap<String, String>,
nested_types_optional: bool,
path_fields: &[String],
) -> String {
let mut expr = format!("{}.builder()", type_name);
for (key, val) in obj {
let camel_key = key.to_lower_camel_case();
let method_name = format!("with{}", camel_key.to_upper_camel_case());
let java_val = match val {
serde_json::Value::String(s) => {
if let Some(enum_type_name) = enum_fields.get(&camel_key) {
let variant_name = s.to_upper_camel_case();
format!("{}.{}", enum_type_name, variant_name)
} else if camel_key == "preset" && type_name == "PreprocessingOptions" {
let variant_name = s.to_upper_camel_case();
format!("PreprocessingPreset.{}", variant_name)
} else if path_fields.contains(key) {
format!("Optional.of(java.nio.file.Path.of(\"{}\"))", escape_java(s))
} else {
format!("\"{}\"", escape_java(s))
}
}
serde_json::Value::Bool(b) => b.to_string(),
serde_json::Value::Null => "null".to_string(),
serde_json::Value::Number(n) => {
let camel_key = key.to_lower_camel_case();
let is_plain_field = matches!(camel_key.as_str(), "listIndentWidth" | "wrapWidth");
let is_primitive_builder = matches!(type_name, "SecurityLimits" | "SecurityLimitsBuilder");
if is_plain_field || is_primitive_builder {
if n.is_f64() {
format!("{}d", n)
} else {
format!("{}L", n)
}
} else {
if n.is_f64() {
format!("Optional.of({}d)", n)
} else {
format!("Optional.of({}L)", n)
}
}
}
serde_json::Value::Array(arr) => {
let items: Vec<String> = arr.iter().map(|v| json_to_java_typed(v, None)).collect();
format!("java.util.List.of({})", items.join(", "))
}
serde_json::Value::Object(nested) => {
let nested_type = nested_types
.get(key.as_str())
.cloned()
.unwrap_or_else(|| format!("{}Options", key.to_upper_camel_case()));
let inner = java_builder_expression(
nested,
&nested_type,
enum_fields,
nested_types,
nested_types_optional,
&[],
);
let is_primitive_builder = matches!(type_name, "SecurityLimits" | "SecurityLimitsBuilder");
if is_primitive_builder || !nested_types_optional {
inner
} else {
format!("Optional.of({inner})")
}
}
};
expr.push_str(&format!(".{}({})", method_name, java_val));
}
expr.push_str(".build()");
expr
}
fn default_java_nested_types() -> std::collections::HashMap<String, String> {
[
("chunking", "ChunkingConfig"),
("ocr", "OcrConfig"),
("images", "ImageExtractionConfig"),
("html_output", "HtmlOutputConfig"),
("language_detection", "LanguageDetectionConfig"),
("postprocessor", "PostProcessorConfig"),
("acceleration", "AccelerationConfig"),
("email", "EmailConfig"),
("pages", "PageConfig"),
("pdf_options", "PdfConfig"),
("layout", "LayoutDetectionConfig"),
("tree_sitter", "TreeSitterConfig"),
("structured_extraction", "StructuredExtractionConfig"),
("content_filter", "ContentFilterConfig"),
("token_reduction", "TokenReductionOptions"),
("security_limits", "SecurityLimits"),
]
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect()
}
fn collect_enum_and_nested_types(
obj: &serde_json::Map<String, serde_json::Value>,
enum_fields: &std::collections::HashMap<String, String>,
types_out: &mut std::collections::BTreeSet<String>,
) {
for (key, val) in obj {
let camel_key = key.to_lower_camel_case();
if let Some(enum_type) = enum_fields.get(&camel_key) {
types_out.insert(enum_type.clone());
} else if camel_key == "preset" {
types_out.insert("PreprocessingPreset".to_string());
}
if let Some(nested) = val.as_object() {
collect_enum_and_nested_types(nested, enum_fields, types_out);
}
}
}
fn collect_nested_type_names(
obj: &serde_json::Map<String, serde_json::Value>,
nested_types: &std::collections::HashMap<String, String>,
types_out: &mut std::collections::BTreeSet<String>,
) {
for (key, val) in obj {
if let Some(type_name) = nested_types.get(key.as_str()) {
types_out.insert(type_name.clone());
}
if let Some(nested) = val.as_object() {
collect_nested_type_names(nested, nested_types, types_out);
}
}
}
fn build_java_visitor(
setup_lines: &mut Vec<String>,
visitor_spec: &crate::fixture::VisitorSpec,
class_name: &str,
) -> String {
setup_lines.push("class _TestVisitor implements Visitor {".to_string());
for (method_name, action) in &visitor_spec.callbacks {
emit_java_visitor_method(setup_lines, method_name, action, class_name);
}
setup_lines.push("}".to_string());
setup_lines.push("var visitor = new _TestVisitor();".to_string());
"visitor".to_string()
}
fn emit_java_visitor_method(
setup_lines: &mut Vec<String>,
method_name: &str,
action: &CallbackAction,
_class_name: &str,
) {
let camel_method = method_to_camel(method_name);
let params = match method_name {
"visit_link" => "NodeContext ctx, String href, String text, String title",
"visit_image" => "NodeContext ctx, String src, String alt, String title",
"visit_heading" => "NodeContext ctx, int level, String text, String id",
"visit_code_block" => "NodeContext ctx, String lang, String code",
"visit_code_inline"
| "visit_strong"
| "visit_emphasis"
| "visit_strikethrough"
| "visit_underline"
| "visit_subscript"
| "visit_superscript"
| "visit_mark"
| "visit_button"
| "visit_summary"
| "visit_figcaption"
| "visit_definition_term"
| "visit_definition_description" => "NodeContext ctx, String text",
"visit_text" => "NodeContext ctx, String text",
"visit_list_item" => "NodeContext ctx, boolean ordered, String marker, String text",
"visit_blockquote" => "NodeContext ctx, String content, long depth",
"visit_table_row" => "NodeContext ctx, java.util.List<String> cells, boolean isHeader",
"visit_custom_element" => "NodeContext ctx, String tagName, String html",
"visit_form" => "NodeContext ctx, String actionUrl, String method",
"visit_input" => "NodeContext ctx, String inputType, String name, String value",
"visit_audio" | "visit_video" | "visit_iframe" => "NodeContext ctx, String src",
"visit_details" => "NodeContext ctx, boolean isOpen",
"visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
"NodeContext ctx, String output"
}
"visit_list_start" => "NodeContext ctx, boolean ordered",
"visit_list_end" => "NodeContext ctx, boolean ordered, String output",
_ => "NodeContext ctx",
};
setup_lines.push(format!(" @Override public VisitResult {camel_method}({params}) {{"));
match action {
CallbackAction::Skip => {
setup_lines.push(" return VisitResult.skip();".to_string());
}
CallbackAction::Continue => {
setup_lines.push(" return VisitResult.continue_();".to_string());
}
CallbackAction::PreserveHtml => {
setup_lines.push(" return VisitResult.preserveHtml();".to_string());
}
CallbackAction::Custom { output } => {
let escaped = escape_java(output);
setup_lines.push(format!(" return VisitResult.custom(\"{escaped}\");"));
}
CallbackAction::CustomTemplate { template } => {
let mut format_str = String::with_capacity(template.len());
let mut format_args: Vec<String> = Vec::new();
let mut chars = template.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '{' {
let mut name = String::new();
let mut closed = false;
for inner in chars.by_ref() {
if inner == '}' {
closed = true;
break;
}
name.push(inner);
}
if closed && !name.is_empty() && name.chars().all(|c| c.is_alphanumeric() || c == '_') {
let camel_name = name.as_str().to_lower_camel_case();
format_args.push(camel_name);
format_str.push_str("%s");
} else {
format_str.push('{');
format_str.push_str(&name);
if closed {
format_str.push('}');
}
}
} else {
format_str.push(ch);
}
}
let escaped = escape_java(&format_str);
if format_args.is_empty() {
setup_lines.push(format!(" return VisitResult.custom(\"{escaped}\");"));
} else {
let args_str = format_args.join(", ");
setup_lines.push(format!(
" return VisitResult.custom(String.format(\"{escaped}\", {args_str}));"
));
}
}
}
setup_lines.push(" }".to_string());
}
fn method_to_camel(snake: &str) -> String {
snake.to_lower_camel_case()
}