1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
//! E2E test generation configuration types.
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
/// Root e2e configuration from `[e2e]` section of alef.toml.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct E2eConfig {
/// Directory containing fixture JSON files (default: "fixtures").
#[serde(default = "default_fixtures_dir")]
pub fixtures: String,
/// Output directory for generated e2e test projects (default: "e2e").
#[serde(default = "default_output_dir")]
pub output: String,
/// Languages to generate e2e tests for. Defaults to top-level `languages` list.
#[serde(default)]
pub languages: Vec<String>,
/// Default function call configuration.
pub call: CallConfig,
/// Named additional call configurations for multi-function testing.
/// Fixtures reference these via the `call` field, e.g. `"call": "embed"`.
#[serde(default)]
pub calls: HashMap<String, CallConfig>,
/// Per-language package reference overrides.
#[serde(default)]
pub packages: HashMap<String, PackageRef>,
/// Per-language formatter commands.
#[serde(default)]
pub format: HashMap<String, String>,
/// Field path aliases: maps fixture field paths to actual API struct paths.
/// E.g., "metadata.title" -> "metadata.document.title"
/// Supports struct access (foo.bar), map access (foo[key]), direct fields.
#[serde(default)]
pub fields: HashMap<String, String>,
/// Fields that are Optional/nullable in the return type.
/// Rust generators use .as_deref().unwrap_or("") for strings, .is_some() for structs.
#[serde(default)]
pub fields_optional: HashSet<String>,
/// Fields that are arrays/Vecs on the result type.
/// When a fixture path like `json_ld.name` traverses an array field, the
/// accessor adds `[0]` (or language equivalent) to index into the first element.
#[serde(default)]
pub fields_array: HashSet<String>,
/// Known top-level fields on the result type.
///
/// When non-empty, assertions whose resolved field path starts with a
/// segment that is NOT in this set are emitted as comments (skipped)
/// instead of executable assertions. This prevents broken assertions
/// when fixtures reference fields from a different operation (e.g.,
/// `batch.completed_count` on a `ScrapeResult`).
#[serde(default)]
pub result_fields: HashSet<String>,
/// C FFI accessor type chain: maps `"{parent_snake_type}.{field}"` to the
/// PascalCase return type name (without prefix).
///
/// Used by the C e2e generator to emit chained FFI accessor calls for
/// nested field paths. The root type is always `conversion_result`.
///
/// Example:
/// ```toml
/// [e2e.fields_c_types]
/// "conversion_result.metadata" = "HtmlMetadata"
/// "html_metadata.document" = "DocumentMetadata"
/// ```
#[serde(default)]
pub fields_c_types: HashMap<String, String>,
/// Fields whose resolved type is an enum in the generated bindings.
///
/// When a `contains` / `contains_all` / etc. assertion targets one of these
/// fields, language generators that cannot call `.contains()` directly on an
/// enum (e.g., Java) will emit a string-conversion call first. For Java,
/// the generated assertion calls `.getValue()` on the enum — the `@JsonValue`
/// method that all alef-generated Java enums expose — to obtain the lowercase
/// serde string before performing the string comparison.
///
/// Both the raw fixture field path (before alias resolution) and the resolved
/// path (after alias resolution via `[e2e.fields]`) are accepted, so you can
/// use either form:
///
/// ```toml
/// # Raw fixture field:
/// fields_enum = ["links[].link_type", "assets[].category"]
/// # …or the resolved (aliased) field name:
/// fields_enum = ["links[].link_type", "assets[].asset_category"]
/// ```
#[serde(default)]
pub fields_enum: HashSet<String>,
}
impl E2eConfig {
/// Resolve the call config for a fixture. Uses the named call if specified,
/// otherwise falls back to the default `[e2e.call]`.
pub fn resolve_call(&self, call_name: Option<&str>) -> &CallConfig {
match call_name {
Some(name) => self.calls.get(name).unwrap_or(&self.call),
None => &self.call,
}
}
}
fn default_fixtures_dir() -> String {
"fixtures".to_string()
}
fn default_output_dir() -> String {
"e2e".to_string()
}
/// Configuration for the function call in each test.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct CallConfig {
/// The function name (alef applies language naming conventions).
#[serde(default)]
pub function: String,
/// The module/package where the function lives.
#[serde(default)]
pub module: String,
/// Variable name for the return value (default: "result").
#[serde(default = "default_result_var")]
pub result_var: String,
/// Whether the function is async.
#[serde(default)]
pub r#async: bool,
/// How fixture `input` fields map to function arguments.
#[serde(default)]
pub args: Vec<ArgMapping>,
/// Per-language overrides for module/function/etc.
#[serde(default)]
pub overrides: HashMap<String, CallOverride>,
}
fn default_result_var() -> String {
"result".to_string()
}
/// Maps a fixture input field to a function argument.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArgMapping {
/// Argument name in the function signature.
pub name: String,
/// JSON field path in the fixture's `input` object.
pub field: String,
/// Type hint for code generation.
#[serde(rename = "type", default = "default_arg_type")]
pub arg_type: String,
/// Whether this argument is optional.
#[serde(default)]
pub optional: bool,
}
fn default_arg_type() -> String {
"string".to_string()
}
/// Per-language override for function call configuration.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct CallOverride {
/// Override the module/import path.
#[serde(default)]
pub module: Option<String>,
/// Override the function name.
#[serde(default)]
pub function: Option<String>,
/// Override the crate name (Rust only).
#[serde(default)]
pub crate_name: Option<String>,
/// Override the class name (Java/C# only).
#[serde(default)]
pub class: Option<String>,
/// Import alias (Go only, e.g., `htmd`).
#[serde(default)]
pub alias: Option<String>,
/// C header file name (C only).
#[serde(default)]
pub header: Option<String>,
/// FFI symbol prefix (C only).
#[serde(default)]
pub prefix: Option<String>,
/// For json_object args: the constructor to use instead of raw dict/object.
/// E.g., "ConversionOptions" — generates `ConversionOptions(**options)` in Python,
/// `new ConversionOptions(options)` in TypeScript.
#[serde(default)]
pub options_type: Option<String>,
/// How to pass json_object args: "kwargs" (default), "dict", or "json".
///
/// - `"kwargs"`: construct `OptionsType(key=val, ...)` (requires `options_type`).
/// - `"dict"`: pass as a plain dict/object literal `{"key": "val"}`.
/// - `"json"`: pass via `json.loads('...')` / `JSON.parse('...')`.
#[serde(default)]
pub options_via: Option<String>,
/// Maps fixture option field names to their enum type names.
/// E.g., `{"headingStyle": "HeadingStyle", "codeBlockStyle": "CodeBlockStyle"}`.
/// The generator imports these types and maps string values to enum constants.
#[serde(default)]
pub enum_fields: HashMap<String, String>,
/// Module to import enum types from (if different from the main module).
/// E.g., "html_to_markdown._html_to_markdown" for PyO3 native enums.
#[serde(default)]
pub enum_module: Option<String>,
/// When `true`, the function returns a simple type (e.g., `String`) rather
/// than a struct. Generators that would normally emit `result.content`
/// (or equivalent field access) will use the result variable directly.
#[serde(default)]
pub result_is_simple: bool,
/// Maps handle config field names to their Python type constructor names.
///
/// When the handle config object contains a nested dict-valued field, the
/// generator will wrap it in the specified type using keyword arguments.
/// E.g., `{"browser": "BrowserConfig"}` generates `BrowserConfig(mode="auto")`
/// instead of `{"mode": "auto"}`.
#[serde(default)]
pub handle_nested_types: HashMap<String, String>,
/// Handle config fields whose type constructor takes a single dict argument
/// instead of keyword arguments.
///
/// E.g., `["auth"]` means `AuthConfig({"type": "basic", ...})` instead of
/// `AuthConfig(type="basic", ...)`.
#[serde(default)]
pub handle_dict_types: HashSet<String>,
/// Elixir struct module name for the handle config argument.
///
/// When set, the generated Elixir handle config uses struct literal syntax
/// (`%Module.StructType{key: val}`) instead of a plain string-keyed map.
/// Rustler `NifStruct` requires a proper Elixir struct — plain maps are rejected.
///
/// E.g., `"CrawlConfig"` generates `%Kreuzcrawl.CrawlConfig{download_assets: true}`.
#[serde(default)]
pub handle_struct_type: Option<String>,
/// Handle config fields whose list values are Elixir atoms (Rustler NifUnitEnum).
///
/// When a config field is a `Vec<EnumType>` in Rust, the Elixir side must pass
/// a list of atoms (e.g., `[:image, :document]`) not strings (`["image"]`).
/// List the field names here so the generator emits atom literals instead of strings.
///
/// E.g., `["asset_types"]` generates `asset_types: [:image]` instead of `["image"]`.
#[serde(default)]
pub handle_atom_list_fields: HashSet<String>,
}
/// Per-language package reference configuration.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct PackageRef {
/// Package/crate/gem/module name.
#[serde(default)]
pub name: Option<String>,
/// Relative path from e2e/{lang}/ to the package.
#[serde(default)]
pub path: Option<String>,
/// Go module path.
#[serde(default)]
pub module: Option<String>,
/// Package version (e.g., for go.mod require directives).
#[serde(default)]
pub version: Option<String>,
}