Skip to main content

alef_backend_java/gen_bindings/
mod.rs

1use ahash::AHashSet;
2use alef_codegen::naming::to_class_name;
3use alef_core::backend::{Backend, BuildConfig, BuildDependency, Capabilities, GeneratedFile};
4use alef_core::config::{BridgeBinding, Language, ResolvedCrateConfig};
5use alef_core::ir::{ApiSurface, TypeRef};
6use std::collections::HashSet;
7use std::path::PathBuf;
8
9mod facade;
10mod ffi_class;
11mod helpers;
12mod line_wrap;
13mod marshal;
14mod native_lib;
15mod trait_bridge;
16mod types;
17
18use facade::gen_facade_class;
19use ffi_class::gen_main_class;
20use helpers::{gen_exception_class, gen_infrastructure_exception_class};
21use native_lib::gen_native_lib;
22use types::{gen_builder_class, gen_byte_array_serializer, gen_enum_class, gen_opaque_handle_class, gen_record_type};
23
24pub struct JavaBackend;
25
26impl JavaBackend {
27    /// Convert crate name to main class name (PascalCase + "Rs" suffix).
28    ///
29    /// The "Rs" suffix ensures the raw FFI wrapper class has a distinct name from
30    /// the public facade class (which strips the "Rs" suffix). Without this, the
31    /// facade would delegate to itself, causing infinite recursion.
32    fn resolve_main_class(api: &ApiSurface) -> String {
33        let base = to_class_name(&api.crate_name.replace('-', "_"));
34        if base.ends_with("Rs") {
35            base
36        } else {
37            format!("{}Rs", base)
38        }
39    }
40}
41
42impl Backend for JavaBackend {
43    fn name(&self) -> &str {
44        "java"
45    }
46
47    fn language(&self) -> Language {
48        Language::Java
49    }
50
51    fn capabilities(&self) -> Capabilities {
52        Capabilities {
53            supports_async: true,
54            supports_classes: true,
55            supports_enums: true,
56            supports_option: true,
57            supports_result: true,
58            ..Capabilities::default()
59        }
60    }
61
62    fn generate_bindings(&self, api: &ApiSurface, config: &ResolvedCrateConfig) -> anyhow::Result<Vec<GeneratedFile>> {
63        let package = config.java_package();
64        let prefix = config.ffi_prefix();
65        let main_class = Self::resolve_main_class(api);
66        let package_path = package.replace('.', "/");
67
68        let output_dir = config
69            .output_for("java")
70            .map(|p| p.to_string_lossy().into_owned())
71            .unwrap_or_else(|| "packages/java/src/main/java/".to_string());
72
73        // If output_dir already ends with the package path (user configured the full path),
74        // use it as-is. Otherwise, append the package path.
75        let base_path = if output_dir.ends_with(&package_path) || output_dir.ends_with(&format!("{}/", package_path)) {
76            PathBuf::from(&output_dir)
77        } else {
78            PathBuf::from(&output_dir).join(&package_path)
79        };
80
81        // Collect bridge param names and type aliases so we can strip them from generated
82        // function signatures and emit convertWithVisitor instead.
83        let bridge_param_names: HashSet<String> = config
84            .trait_bridges
85            .iter()
86            .filter_map(|b| b.param_name.clone())
87            .collect();
88        let bridge_type_aliases: HashSet<String> = config
89            .trait_bridges
90            .iter()
91            .filter_map(|b| b.type_alias.clone())
92            .collect();
93        // Generate visitor support when visitor_callbacks is enabled in FFI config (canonical check),
94        // OR when any trait bridge is bound via options_field (Java-specific activation path).
95        let has_visitor_pattern = config.ffi.as_ref().map(|f| f.visitor_callbacks).unwrap_or(false)
96            || config
97                .trait_bridges
98                .iter()
99                .any(|b| b.bind_via == BridgeBinding::OptionsField);
100        let bridge_associated_types = config.bridge_associated_types();
101
102        let mut files = Vec::new();
103
104        // 0. package-info.java - required by Checkstyle
105        let description = config
106            .scaffold
107            .as_ref()
108            .and_then(|s| s.description.as_deref())
109            .unwrap_or("High-performance HTML to Markdown converter.");
110        files.push(GeneratedFile {
111            path: base_path.join("package-info.java"),
112            content: format!(
113                "/**\n * {description}\n */\npackage {package};\n",
114                description = description,
115                package = package,
116            ),
117            generated_header: true,
118        });
119
120        // 1. NativeLib.java - FFI method handles
121        files.push(GeneratedFile {
122            path: base_path.join("NativeLib.java"),
123            content: gen_native_lib(api, config, &package, &prefix, has_visitor_pattern),
124            generated_header: true,
125        });
126
127        // 2. Main wrapper class
128        files.push(GeneratedFile {
129            path: base_path.join(format!("{}.java", main_class)),
130            content: gen_main_class(
131                api,
132                config,
133                &package,
134                &main_class,
135                &prefix,
136                &bridge_param_names,
137                &bridge_type_aliases,
138                has_visitor_pattern,
139            ),
140            generated_header: true,
141        });
142
143        // 3. Exception class
144        files.push(GeneratedFile {
145            path: base_path.join(format!("{}Exception.java", main_class)),
146            content: gen_exception_class(&package, &main_class),
147            generated_header: true,
148        });
149
150        // 3b. Infrastructure exception classes for FFI error codes 1 and 2.
151        // These are always emitted because checkLastError() hardcodes:
152        //   case 1 -> throw new InvalidInputException(msg);
153        //   case 2 -> throw new ConversionErrorException(msg);
154        // Code 1 = null pointer / invalid UTF-8 in an input arg (invalid input).
155        // Code 2 = JSON serialisation/deserialisation failure (type conversion).
156        for (class_name, code, doc) in [
157            (
158                "InvalidInputException",
159                1i32,
160                "Exception thrown when input validation fails.",
161            ),
162            (
163                "ConversionErrorException",
164                2i32,
165                "Exception thrown when type conversion fails.",
166            ),
167        ] {
168            files.push(GeneratedFile {
169                path: base_path.join(format!("{}.java", class_name)),
170                content: gen_infrastructure_exception_class(&package, &main_class, class_name, code, doc),
171                generated_header: true,
172            });
173        }
174
175        // Untagged unions with data variants now emit as JsonNode-wrapper classes
176        // (see gen_java_untagged_wrapper). The set is intentionally empty so that
177        // record fields keep their wrapper type instead of being downcast to Object.
178        let complex_enums: AHashSet<String> = AHashSet::new();
179
180        // Collect sealed union types with unwrapped/tuple variants that need custom deserializers.
181        // When a record field references one of these types, we need to add a @JsonDeserialize
182        // annotation to the field so Jackson uses the custom deserializer.
183        let sealed_unions_with_unwrapped: AHashSet<String> = api
184            .enums
185            .iter()
186            .filter(|e| {
187                e.serde_tag.is_some()
188                    && e.variants
189                        .iter()
190                        .any(|v| v.fields.len() == 1 && helpers::is_tuple_field_name(&v.fields[0].name))
191            })
192            .map(|e| e.name.clone())
193            .collect();
194
195        // Resolve language-level serde rename strategy (always wins over IR type-level).
196        let lang_rename_all = config.serde_rename_all_for_language(Language::Java);
197
198        // 4. Record types
199        // Include non-opaque types that either have fields OR are serializable unit structs
200        // (has_serde + has_default, empty fields). Unit structs like `ExcelMetadata` need a
201        // concrete Java class so they can be referenced as record components in tagged-union
202        // variant records (e.g. FormatMetadata.Excel(@JsonUnwrapped ExcelMetadata value)).
203        for typ in api.types.iter().filter(|typ| !typ.is_trait) {
204            let is_unit_serde = !typ.is_opaque && typ.fields.is_empty() && typ.has_serde;
205            if !typ.is_opaque && (!typ.fields.is_empty() || is_unit_serde) {
206                // Skip types that gen_visitor handles with richer visitor-specific versions
207                if has_visitor_pattern && bridge_associated_types.contains(typ.name.as_str()) {
208                    continue;
209                }
210                files.push(GeneratedFile {
211                    path: base_path.join(format!("{}.java", typ.name)),
212                    content: gen_record_type(
213                        &package,
214                        typ,
215                        &complex_enums,
216                        &sealed_unions_with_unwrapped,
217                        &lang_rename_all,
218                        has_visitor_pattern,
219                        &main_class,
220                    ),
221                    generated_header: true,
222                });
223                // Generate builder class for types with defaults
224                if typ.has_default {
225                    files.push(GeneratedFile {
226                        path: base_path.join(format!("{}Builder.java", typ.name)),
227                        content: gen_builder_class(&package, typ, has_visitor_pattern),
228                        generated_header: true,
229                    });
230                }
231            }
232        }
233
234        // 4a. Utility serializer for byte[] → JSON int-array (needed when any record
235        // has a non-optional Bytes field). Jackson's default byte[] serialiser emits
236        // base64, which Rust's serde Vec<u8> cannot accept. Emit the class once.
237        let needs_bytes_serializer = api
238            .types
239            .iter()
240            .any(|t| !t.is_opaque && t.fields.iter().any(|f| !f.optional && matches!(f.ty, TypeRef::Bytes)));
241        if needs_bytes_serializer {
242            files.push(GeneratedFile {
243                path: base_path.join("ByteArrayToIntArraySerializer.java"),
244                content: gen_byte_array_serializer(&package),
245                generated_header: true,
246            });
247        }
248
249        // Collect builder class names generated from record types with defaults,
250        // so we can skip opaque types that would collide with them.
251        let builder_class_names: AHashSet<String> = api
252            .types
253            .iter()
254            .filter(|t| !t.is_opaque && (!t.fields.is_empty() || (t.has_serde && t.fields.is_empty())) && t.has_default)
255            .map(|t| format!("{}Builder", t.name))
256            .collect();
257
258        // 4b. Opaque handle types (skip if a pure-Java builder already covers this name)
259        for typ in api.types.iter().filter(|typ| !typ.is_trait) {
260            if typ.is_opaque && !builder_class_names.contains(&typ.name) {
261                files.push(GeneratedFile {
262                    path: base_path.join(format!("{}.java", typ.name)),
263                    content: gen_opaque_handle_class(&package, typ, &prefix, &config.adapters, &main_class),
264                    generated_header: true,
265                });
266            }
267        }
268
269        // 5. Enums
270        for enum_def in &api.enums {
271            // Skip enums that gen_visitor handles with richer visitor-specific versions
272            if has_visitor_pattern && bridge_associated_types.contains(enum_def.name.as_str()) {
273                continue;
274            }
275            files.push(GeneratedFile {
276                path: base_path.join(format!("{}.java", enum_def.name)),
277                content: gen_enum_class(&package, enum_def, &main_class),
278                generated_header: true,
279            });
280        }
281
282        // 6. Error exception classes
283        //
284        // Filter out variants whose generated class name collides with the FFI infrastructure
285        // exceptions emitted at step 3b. Both paths target the same .java file; without this
286        // filter, the gen_java_error_types content was overwriting (or worse, mangling — the
287        // InvalidInputException file ended up with a duplicate constructor block appended
288        // after the closing brace) the canonical infrastructure-emitted class.
289        let infrastructure_exception_names: AHashSet<&str> = ["InvalidInputException", "ConversionErrorException"]
290            .into_iter()
291            .collect();
292        for error in &api.errors {
293            for (class_name, content) in alef_codegen::error_gen::gen_java_error_types(error, &package) {
294                if infrastructure_exception_names.contains(class_name.as_str()) {
295                    continue;
296                }
297                files.push(GeneratedFile {
298                    path: base_path.join(format!("{}.java", class_name)),
299                    content,
300                    generated_header: true,
301                });
302            }
303        }
304
305        // 7. Visitor support files (only when ConversionOptions/ConversionResult types exist)
306        if has_visitor_pattern {
307            for (filename, content) in crate::gen_visitor::gen_visitor_files(&package, &main_class) {
308                files.push(GeneratedFile {
309                    path: base_path.join(filename),
310                    content,
311                    generated_header: false, // already has header comment
312                });
313            }
314        }
315
316        // 8. Trait bridge plugin registration files
317        // Emits two files per trait: I{Trait}.java (managed interface) and
318        // {Trait}Bridge.java (Panama upcall stubs + register/unregister helpers).
319        //
320        // Set of struct + enum names that get a generated companion Java class.
321        // Trait method signatures referencing types outside this set (e.g. excluded
322        // internal types like `InternalDocument`) are JSON-bridged as Strings.
323        let visible_type_names: HashSet<&str> = api
324            .types
325            .iter()
326            .filter(|t| !t.is_trait)
327            .map(|t| t.name.as_str())
328            .chain(api.enums.iter().map(|e| e.name.as_str()))
329            .collect();
330        for bridge_cfg in &config.trait_bridges {
331            if bridge_cfg.exclude_languages.contains(&Language::Java.to_string()) {
332                continue;
333            }
334
335            // When visitor_callbacks is active, visitor traits bound via options_field are
336            // surfaced through Visitor.java + VisitorBridge.java (generated by gen_visitor_files).
337            // The raw trait bridge I{Trait}.java emitted here would be an unreferenced orphan
338            // with snake_case method names. Suppress it for options_field-bound visitor traits.
339            if has_visitor_pattern && bridge_cfg.bind_via == BridgeBinding::OptionsField {
340                continue;
341            }
342
343            if let Some(trait_def) = api.types.iter().find(|t| t.name == bridge_cfg.trait_name && t.is_trait) {
344                let has_super_trait = bridge_cfg.super_trait.is_some();
345                let trait_bridge::BridgeFiles {
346                    interface_content,
347                    bridge_content,
348                } = trait_bridge::gen_trait_bridge_files(
349                    trait_def,
350                    &prefix,
351                    &package,
352                    has_super_trait,
353                    bridge_cfg.unregister_fn.as_deref(),
354                    bridge_cfg.clear_fn.as_deref(),
355                    &visible_type_names,
356                );
357
358                files.push(GeneratedFile {
359                    path: base_path.join(format!("I{}.java", trait_def.name)),
360                    content: interface_content,
361                    generated_header: true,
362                });
363                files.push(GeneratedFile {
364                    path: base_path.join(format!("{}Bridge.java", trait_def.name)),
365                    content: bridge_content,
366                    generated_header: true,
367                });
368            }
369        }
370
371        // Apply downstream Checkstyle line-length wrapping to every generated
372        // Java source. The templates emit some compound statements on one line;
373        // this pass splits at logical points (annotation lists, call args,
374        // method signatures) without changing semantics.
375        for file in &mut files {
376            file.content = line_wrap::wrap_long_java_lines(&file.content);
377        }
378
379        Ok(files)
380    }
381
382    fn generate_public_api(
383        &self,
384        api: &ApiSurface,
385        config: &ResolvedCrateConfig,
386    ) -> anyhow::Result<Vec<GeneratedFile>> {
387        let package = config.java_package();
388        let prefix = config.ffi_prefix();
389        let main_class = Self::resolve_main_class(api);
390        let package_path = package.replace('.', "/");
391
392        let output_dir = config
393            .output_for("java")
394            .map(|p| p.to_string_lossy().into_owned())
395            .unwrap_or_else(|| "packages/java/src/main/java/".to_string());
396
397        // If output_dir already ends with the package path (user configured the full path),
398        // use it as-is. Otherwise, append the package path.
399        let base_path = if output_dir.ends_with(&package_path) || output_dir.ends_with(&format!("{}/", package_path)) {
400            PathBuf::from(&output_dir)
401        } else {
402            PathBuf::from(&output_dir).join(&package_path)
403        };
404
405        // Collect bridge param names/aliases to strip from the public facade.
406        let bridge_param_names: HashSet<String> = config
407            .trait_bridges
408            .iter()
409            .filter_map(|b| b.param_name.clone())
410            .collect();
411        let bridge_type_aliases: HashSet<String> = config
412            .trait_bridges
413            .iter()
414            .filter_map(|b| b.type_alias.clone())
415            .collect();
416        let has_visitor_pattern = config.ffi.as_ref().map(|f| f.visitor_callbacks).unwrap_or(false)
417            || config
418                .trait_bridges
419                .iter()
420                .any(|b| b.bind_via == BridgeBinding::OptionsField);
421        // Generate a high-level public API class that wraps the raw FFI class.
422        // Class name = main_class without "Rs" suffix (e.g., HtmlToMarkdownRs -> HtmlToMarkdown)
423        let public_class = main_class.trim_end_matches("Rs").to_string();
424        let facade_content = gen_facade_class(
425            api,
426            &package,
427            &public_class,
428            &main_class,
429            &prefix,
430            &bridge_param_names,
431            &bridge_type_aliases,
432            has_visitor_pattern,
433        );
434
435        Ok(vec![GeneratedFile {
436            path: base_path.join(format!("{}.java", public_class)),
437            content: line_wrap::wrap_long_java_lines(&facade_content),
438            generated_header: true,
439        }])
440    }
441
442    fn build_config(&self) -> Option<BuildConfig> {
443        Some(BuildConfig {
444            tool: "mvn",
445            crate_suffix: "",
446            build_dep: BuildDependency::Ffi,
447            post_build: vec![],
448        })
449    }
450}