1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
use serde::{Deserialize, Serialize};
/// Configuration for generating trait bridge code that allows foreign language
/// objects to implement Rust traits via FFI.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TraitBridgeConfig {
/// Name of the Rust trait to bridge (e.g., `"OcrBackend"`).
pub trait_name: String,
/// Super-trait that requires forwarding (e.g., `"Plugin"`).
/// When set, the bridge generates an `impl SuperTrait for Wrapper` block.
#[serde(default)]
pub super_trait: Option<String>,
/// Rust path to the registry getter function
/// (e.g., `"kreuzberg::plugins::registry::get_ocr_backend_registry"`).
/// Optional — when set, the generated registration function inserts the bridge into a registry.
#[serde(default)]
pub registry_getter: Option<String>,
/// Name of the registration function to generate
/// (e.g., `"register_ocr_backend"`).
/// Optional — when set, a `#[pyfunction]` registration function is generated.
/// When absent, only the wrapper struct and trait impl are emitted (per-call bridge pattern).
#[serde(default)]
pub register_fn: Option<String>,
/// Name of the unregister function to generate
/// (e.g., `"unregister_ocr_backend"`).
/// Optional — when set, a host-language wrapper that removes a previously
/// registered plugin from the registry is emitted alongside `register_fn`.
/// The function takes the plugin name as a string.
#[serde(default)]
pub unregister_fn: Option<String>,
/// Name of the clear function to generate
/// (e.g., `"clear_ocr_backends"`).
/// Optional — when set, a host-language wrapper that removes ALL registered
/// plugins of this type is emitted alongside `register_fn`. The function
/// takes no arguments and is typically used in test teardown.
#[serde(default)]
pub clear_fn: Option<String>,
/// Named type alias in the IR that maps to this bridge (e.g., `"VisitorHandle"`).
///
/// When a function parameter has a `TypeRef::Named` matching this alias, code
/// generators replace the parameter type with the language-native callback object
/// (e.g., `Py<PyAny>` for Python) and emit wrapping code to construct the bridge.
#[serde(default)]
pub type_alias: Option<String>,
/// Parameter name override — when the extractor sanitizes the type (e.g., `VisitorHandle`
/// becomes `String` because it is a type alias over `Rc<RefCell<dyn Trait>>`), use the
/// parameter name instead of the IR type to detect which parameter to bridge.
///
/// For example, `param_name = "visitor"` ensures that a sanitized `visitor: Option<String>`
/// parameter is still treated as a bridge param for this trait.
#[serde(default)]
pub param_name: Option<String>,
/// Extra arguments to append to the `registry.register(arc, ...)` call.
/// Example: `"0"` produces `registry.register(arc, 0)`.
#[serde(default)]
pub register_extra_args: Option<String>,
/// Language backends that should NOT generate this trait bridge.
/// Use backend names as they appear in `Backend::name()`, e.g. `["elixir", "wasm"]`.
/// When a backend's name is listed here, the bridge struct and all related code are
/// omitted from that backend's output.
#[serde(default)]
pub exclude_languages: Vec<String>,
/// Methods that the FFI backend should NOT forward through the vtable.
/// These methods fall back to the trait's default implementation.
/// Useful for methods whose signatures involve trait-object references
/// (`&dyn Trait`) that can't traverse the C FFI boundary.
#[serde(default)]
pub ffi_skip_methods: Vec<String>,
/// How the bridge attaches to the public API.
///
/// - `"function_param"` (default): the bridge object arrives as a function argument
/// at the position of any `param_name`-matching parameter. This is the legacy mode.
/// - `"options_field"`: the bridge object lives as a field on a configured options
/// struct that itself arrives as a function argument. Backends emit a host-language
/// field on that struct instead of a separate function parameter; the bridge object
/// is attached to `options.<field>` before the underlying core call.
#[serde(default)]
pub bind_via: BridgeBinding,
/// IR type name that owns the bridge field when `bind_via = "options_field"` (e.g.,
/// `"ConversionOptions"`). Required in that mode; ignored otherwise.
#[serde(default)]
pub options_type: Option<String>,
/// Field name on `options_type` that holds the bridge handle when
/// `bind_via = "options_field"` (e.g., `"visitor"`). When omitted, defaults to
/// `param_name`. Ignored when `bind_via = "function_param"`.
#[serde(default)]
pub options_field: Option<String>,
/// IR type name of the trait's context associated type (e.g., `"NodeContext"`).
///
/// When set, backends skip generic record/enum codegen for this type and instead
/// emit the richer visitor-specific version. Replaces the former literal
/// `"NodeContext"` string comparisons scattered across backends.
#[serde(default)]
pub context_type: Option<String>,
/// IR type name of the trait's result associated type (e.g., `"VisitResult"`).
///
/// When set, backends skip generic record/enum codegen for this type and instead
/// emit the richer visitor-specific version. Replaces the former literal
/// `"VisitResult"` string comparisons scattered across backends.
#[serde(default)]
pub result_type: Option<String>,
}
/// How a trait bridge attaches to the public API.
///
/// See [`TraitBridgeConfig::bind_via`] for the user-facing description.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BridgeBinding {
/// The bridge arrives as a positional function argument. Legacy default.
#[default]
FunctionParam,
/// The bridge lives as a field on a configured options struct.
OptionsField,
}
impl TraitBridgeConfig {
/// Resolve the field name on `options_type` that holds this bridge.
///
/// Falls back to [`Self::param_name`] when [`Self::options_field`] is unset, matching
/// the convention that the field name and parameter name are the same in most cases.
/// Returns `None` if neither is set.
pub fn resolved_options_field(&self) -> Option<&str> {
self.options_field.as_deref().or(self.param_name.as_deref())
}
/// Return the names of associated types declared in `context_type` and `result_type`.
///
/// Backends use this list to skip generic record/enum codegen for these types,
/// deferring to visitor-specific generators instead.
pub fn associated_type_names(&self) -> Vec<&str> {
let mut names = Vec::new();
if let Some(s) = self.context_type.as_deref() {
names.push(s);
}
if let Some(s) = self.result_type.as_deref() {
names.push(s);
}
names
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_toml(bind_via: &str) -> String {
format!(
r#"
trait_name = "HtmlVisitor"
type_alias = "VisitorHandle"
param_name = "visitor"
bind_via = "{bind_via}"
options_type = "ConversionOptions"
"#
)
}
#[test]
fn parses_options_field_binding() {
let cfg: TraitBridgeConfig = toml::from_str(&sample_toml("options_field")).unwrap();
assert_eq!(cfg.bind_via, BridgeBinding::OptionsField);
assert_eq!(cfg.options_type.as_deref(), Some("ConversionOptions"));
assert_eq!(cfg.resolved_options_field(), Some("visitor"));
}
#[test]
fn defaults_to_function_param_when_omitted() {
let toml_src = r#"
trait_name = "OcrBackend"
type_alias = "BackendHandle"
"#;
let cfg: TraitBridgeConfig = toml::from_str(toml_src).unwrap();
assert_eq!(cfg.bind_via, BridgeBinding::FunctionParam);
assert!(cfg.options_type.is_none());
}
#[test]
fn options_field_falls_back_to_param_name() {
let toml_src = r#"
trait_name = "HtmlVisitor"
param_name = "visitor"
bind_via = "options_field"
options_type = "ConversionOptions"
"#;
let cfg: TraitBridgeConfig = toml::from_str(toml_src).unwrap();
assert_eq!(cfg.resolved_options_field(), Some("visitor"));
}
#[test]
fn parses_unregister_and_clear_fns() {
let toml_src = r#"
trait_name = "OcrBackend"
register_fn = "register_ocr_backend"
unregister_fn = "unregister_ocr_backend"
clear_fn = "clear_ocr_backends"
"#;
let cfg: TraitBridgeConfig = toml::from_str(toml_src).unwrap();
assert_eq!(cfg.unregister_fn.as_deref(), Some("unregister_ocr_backend"));
assert_eq!(cfg.clear_fn.as_deref(), Some("clear_ocr_backends"));
}
#[test]
fn unregister_and_clear_default_to_none() {
let toml_src = r#"
trait_name = "OcrBackend"
"#;
let cfg: TraitBridgeConfig = toml::from_str(toml_src).unwrap();
assert!(cfg.unregister_fn.is_none());
assert!(cfg.clear_fn.is_none());
}
#[test]
fn explicit_options_field_overrides_param_name() {
let toml_src = r#"
trait_name = "HtmlVisitor"
param_name = "visitor"
bind_via = "options_field"
options_type = "ConversionOptions"
options_field = "callback"
"#;
let cfg: TraitBridgeConfig = toml::from_str(toml_src).unwrap();
assert_eq!(cfg.resolved_options_field(), Some("callback"));
}
#[test]
fn associated_type_names_returns_configured_names() {
let toml_src = r#"
trait_name = "HtmlVisitor"
context_type = "NodeContext"
result_type = "VisitResult"
"#;
let cfg: TraitBridgeConfig = toml::from_str(toml_src).unwrap();
assert_eq!(cfg.context_type.as_deref(), Some("NodeContext"));
assert_eq!(cfg.result_type.as_deref(), Some("VisitResult"));
let names = cfg.associated_type_names();
assert_eq!(names, vec!["NodeContext", "VisitResult"]);
}
#[test]
fn associated_type_names_empty_when_not_set() {
let toml_src = r#"
trait_name = "OcrBackend"
"#;
let cfg: TraitBridgeConfig = toml::from_str(toml_src).unwrap();
assert!(cfg.associated_type_names().is_empty());
}
}