1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
use crate::codegen::shared::binding_fields;
use crate::codegen::type_mapper::TypeMapper;
use crate::core::config::TraitBridgeConfig;
use crate::core::ir::{ApiSurface, FieldDef, TypeDef, TypeRef};
use std::collections::HashMap;
/// Replace the constructor in an impl block with one that honors serde_rename.
/// For has_default types, the constructor parameters should use serde_rename names
/// (the JSON wire names) to match other language bindings' public APIs.
/// This function finds the existing constructor and replaces it with a custom one.
/// Also adds extra parameters for any options-field bridges (e.g., visitor).
#[allow(clippy::too_many_arguments)]
pub(super) fn replace_constructor_with_serde_rename(
impl_block: &str,
typ: &TypeDef,
mapper: &dyn TypeMapper,
config: &crate::codegen::generators::RustBindingConfig,
config_renames: Option<&HashMap<String, String>>,
trait_bridges: &[TraitBridgeConfig],
never_skip_cfg_field_names: &[String],
api: &ApiSurface,
) -> String {
use crate::core::keywords::{is_valid_rust_ident_chars, rust_raw_ident};
// When the type already has an explicit static `new()` method in its IR, do not
// emit a second field-based `#[new]` constructor — the static method will be emitted
// as `#[staticmethod] pub fn new(...)` and PyO3 forbids two `new` registrations in
// the same impl block (E0592 duplicate definitions).
let has_explicit_new = typ.methods.iter().any(|m| m.is_static && m.name == "new");
if has_explicit_new {
return impl_block.to_string();
}
/// Check if a field should be emitted as Option<T> to accept None for BLK-5 fix.
/// This applies when:
/// - The parent type has_default=true
/// - The field is non-optional (!f.optional && not already Optional)
/// - The field type is a Named type
/// - The referenced type has has_default=true
fn should_option_for_nested_default(typ: &TypeDef, field: &FieldDef, api: &ApiSurface) -> bool {
if !typ.has_default || field.optional || matches!(&field.ty, TypeRef::Optional(_)) {
return false;
}
if let TypeRef::Named(ref type_name) = field.ty {
api.types
.iter()
.find(|t| t.name == *type_name)
.map(|t| t.has_default)
.unwrap_or(false)
} else {
false
}
}
/// Resolve the constructor parameter identifier for a field.
///
/// Prefers the serde rename (or config rename) over the bare Rust field name, but only
/// when the resolved name is a syntactically valid Rust identifier (i.e. contains only
/// `[A-Za-z0-9_]` and does not start with a digit). Names like `"self-harm"` or
/// `"self-harm/intent"` (containing hyphens or slashes) are not valid Rust identifiers
/// even with `r#` escaping, so the function falls back to the Rust field name in those
/// cases. When the resolved name is valid but happens to be a Rust keyword (e.g. `"type"`),
/// it is escaped as a raw identifier (`r#type`).
fn resolve_param_ident<'a>(
field_name: &'a str,
serde_rename: Option<&'a String>,
config_renames: Option<&HashMap<String, String>>,
) -> String {
let wire_name = serde_rename
.map(|s| s.as_str())
.or_else(|| config_renames.and_then(|r| r.get(field_name)).map(|s| s.as_str()))
.unwrap_or(field_name);
if is_valid_rust_ident_chars(wire_name) {
rust_raw_ident(wire_name)
} else {
// Wire name contains characters that are not valid in a Rust identifier (e.g. hyphens,
// slashes in serde renames like "self-harm" or "self-harm/intent"). Fall back to the
// Rust field name, which is guaranteed to be a valid identifier.
field_name.to_string()
}
}
// Check if this type has an options-field bridge (e.g., ParseOptions.visitor).
// The bridge field is appended later via `bridge_param`; filter it out of
// `sorted_fields` so we do not emit it twice when the field is force-restored
// through `never_skip_cfg_field_names`.
let bridge_field_name = trait_bridges
.iter()
.find(|b| {
b.bind_via == crate::core::config::BridgeBinding::OptionsField
&& b.options_type.as_deref() == Some(&typ.name)
})
.and_then(|b| b.resolved_options_field());
// Build parameter list with serde_rename and config-based renames.
// Include cfg-gated fields that the consumer has force-restored via
// `never_skip_cfg_field_names` (e.g. sample_core-py builds with all features so
// pdf_options / keywords / html_* / layout / tree_sitter need to be kwargs).
let mut sorted_fields: Vec<_> = binding_fields(&typ.fields)
.filter(|f| !f.binding_excluded && (f.cfg.is_none() || never_skip_cfg_field_names.contains(&f.name)))
.filter(|f| bridge_field_name.is_none() || f.name != bridge_field_name.unwrap())
.collect();
sorted_fields.sort_by_key(|f| f.optional as u8);
let params: Vec<String> = sorted_fields
.iter()
.map(|f| {
// Use serde_rename if available (and valid), otherwise the Rust field name.
// Keywords are escaped as raw identifiers (e.g. "type" → "r#type").
let param_ident = resolve_param_ident(&f.name, f.serde_rename.as_ref(), config_renames);
// Determine if this field should be optional in the constructor.
// This matches the logic in gen_struct_with_per_field_attrs (structs.rs lines 128-131).
let force_optional = config.option_duration_on_defaults
&& typ.has_default
&& !f.optional
&& matches!(f.ty, TypeRef::Duration);
// BLK-5 fix: for non-optional nested-struct fields on has_default types,
// if the nested struct also has_default, emit as Option<T> to accept None.
let nested_default_optional = should_option_for_nested_default(typ, f, api);
let ty =
if (f.optional || force_optional || nested_default_optional) && !matches!(f.ty, TypeRef::Optional(_)) {
// All optional constructor parameters are emitted as Option<T>.
// The IR unwraps TypeRef::Optional to mark fields as optional,
// so we need to re-wrap the base type for the constructor signature.
// Skip re-wrapping when the IR field type is *already* Optional —
// that happens for Update structs where a source field of
// `Option<Option<T>>` peels to `f.optional = true, f.ty = Optional(T)`.
// Mirrors the same guard in `gen_struct_with_per_field_attrs`.
format!("Option<{}>", mapper.map_type(&f.ty))
} else {
mapper.map_type(&f.ty)
};
format!("{}: {}", param_ident, ty)
})
.collect();
let bridge_param = trait_bridges
.iter()
.find(|b| {
b.bind_via == crate::core::config::BridgeBinding::OptionsField
&& b.options_type.as_deref() == Some(&typ.name)
})
.and_then(|b| {
let param_name = b.param_name.as_deref()?;
Some((param_name, b.type_alias.as_deref().unwrap_or("object")))
});
let defaults: Vec<String> = sorted_fields
.iter()
.filter(|f| bridge_field_name.is_none() || f.name != bridge_field_name.unwrap())
.map(|f| {
// PyO3 strips the `r#` prefix when deriving the Python-facing keyword argument
// name, so `r#type` in the signature → Python `type`.
let param_ident = resolve_param_ident(&f.name, f.serde_rename.as_ref(), config_renames);
// Same force_optional logic as above.
let force_optional = config.option_duration_on_defaults
&& typ.has_default
&& !f.optional
&& matches!(f.ty, TypeRef::Duration);
// BLK-5 fix: for non-optional nested-struct fields on has_default types,
// if the nested struct also has_default, emit default as None.
let nested_default_optional = should_option_for_nested_default(typ, f, api);
if f.optional || force_optional || nested_default_optional {
format!("{}=None", param_ident)
} else if typ.has_default {
// For has_default types, non-optional fields get a default value in the signature
// so the generated `__new__` is callable with keyword args omitted.
// The field's default is `Self::default().<field>`.
format!("{}=Self::default().{}", param_ident, f.name)
} else {
// For non-has_default types, required fields have no default in the signature
// (they are required keyword arguments).
param_ident
}
})
.collect();
// Struct literal uses bare Rust field names (never renamed).
// For non-cfg fields: use constructor parameters (with explicit form if renamed).
// For cfg-gated fields: initialize with default (None for Option types, Default::default() otherwise).
// Bridge fields are handled separately below.
let assignments: Vec<String> = typ
.fields
.iter()
.filter(|f| !f.binding_excluded && (bridge_field_name.is_none() || f.name != bridge_field_name.unwrap()))
.map(|f| {
if f.cfg.is_some() && !never_skip_cfg_field_names.contains(&f.name) {
// Cfg-gated field that was NOT force-restored: not a constructor parameter, use default
if f.optional {
format!("{}: None", f.name)
} else {
format!("{}: Default::default()", f.name)
}
} else {
// Non-cfg field: use constructor parameter.
// Use the same resolve_param_ident logic so the struct literal references
// exactly the same variable as the parameter declaration.
let param_ident = resolve_param_ident(&f.name, f.serde_rename.as_ref(), config_renames);
// BLK-5 fix: for nested-struct fields emitted as Option<T> due to has_default,
// use unwrap_or_else to fall back to the nested type's default.
let nested_default_optional = should_option_for_nested_default(typ, f, api);
// The binding struct's Rust field name is python-keyword-escaped
// (e.g. `from` -> `from_`), so the LEFT side of the struct literal must
// match that escaped name, not the core IR field name.
let binding_field = crate::core::keywords::python_ident(&f.name);
if nested_default_optional {
// Use unwrap_or_else for nested default optional fields
format!(
"{}: {}.unwrap_or_else(|| Self::default().{})",
binding_field, param_ident, binding_field
)
} else if param_ident != binding_field {
// Parameter name differs from binding struct field name:
// use explicit form to match the parameter variable
format!("{}: {}", binding_field, param_ident)
} else {
// Names match: use shorthand
binding_field
}
}
})
.collect();
// Add bridge parameter to defaults and params if present.
//
// The bridge parameter's *type* must match the struct field it ultimately
// populates — the struct literal below emits a bare `visitor: visitor`,
// which would fail to compile if the parameter type and field type differ
// (e.g. user-facing `VisitorHandle` pyclass vs the binding's internal
// `PyVisitorRef` wrapper). Look up the matching field's actual mapped
// type and use it. Fall back to the bridge's `type_alias` only when the
// bridge field can't be located in the struct (which would be a bug, but
// preserves the prior behaviour).
let mut all_defaults = defaults.clone();
let mut all_params = params.clone();
if let Some((param_name, type_alias)) = bridge_param {
let field_type = bridge_field_name
.and_then(|fname| typ.fields.iter().find(|f| f.name == fname))
.map(|f| mapper.map_type(&f.ty))
.unwrap_or_else(|| type_alias.to_string());
// The field's mapped type may already include `Option<...>` (it
// typically does, since bridge fields are optional). Avoid double-
// wrapping by checking for the prefix.
let param_type = if field_type.starts_with("Option<") {
field_type
} else {
format!("Option<{}>", field_type)
};
all_params.push(format!("{}: {}", param_name, param_type));
all_defaults.push(format!("{}=None", param_name));
}
let param_list = if all_params.join(", ").len() > 100 {
format!("\n {},\n ", all_params.join(",\n "))
} else {
all_params.join(", ")
};
// Build the assignment for the bridge field
let mut all_assignments = assignments.clone();
if let Some((param_name, _)) = bridge_param {
if let Some(field_name) = bridge_field_name {
all_assignments.push(format!("{}: {}", field_name, param_name));
}
}
// Build the new constructor method (without impl wrapper — we'll inject it into existing impl)
let new_constructor = format!(
" #[allow(clippy::too_many_arguments)]\n \
#[must_use]\n \
#[pyo3(signature = ({}))]#[new]\n \
pub fn new({}) -> Self {{\n \
Self {{ {} }}\n \
}}",
all_defaults.join(", "),
param_list,
all_assignments.join(", ")
);
// Find and replace the old constructor in the impl block
// Look for the pattern that includes the signature and fn new
if let Some(start) = impl_block.find("#[pyo3(signature = (") {
if let Some(new_start) = impl_block[..start].rfind("\n") {
// Find the end of the constructor (closing brace of the function)
if let Some(fn_new_pos) = impl_block.find("pub fn new(") {
// Find the closing brace of this constructor
let mut brace_count = 0;
let mut in_fn = false;
let mut end_pos = None;
for (i, c) in impl_block[fn_new_pos..].chars().enumerate() {
if c == '{' {
in_fn = true;
brace_count += 1;
} else if c == '}' && in_fn {
brace_count -= 1;
if brace_count == 0 {
end_pos = Some(fn_new_pos + i + 1);
break;
}
}
}
if let Some(end) = end_pos {
let before = &impl_block[..new_start + 1];
let after = &impl_block[end..];
return format!("{}{}{}", before, new_constructor, after);
}
}
}
}
// Fallback: if we can't find the constructor to replace, return the original
impl_block.to_string()
}