alef_core/config/e2e.rs
1//! E2E test generation configuration types.
2
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6/// Controls whether generated e2e test projects reference the package under
7/// test via a local path (for development) or a registry version string
8/// (for standalone `test_apps` that consumers can run without the monorepo).
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
10#[serde(rename_all = "lowercase")]
11pub enum DependencyMode {
12 /// Local path dependency (default) — used during normal e2e development.
13 #[default]
14 Local,
15 /// Registry dependency — generates standalone test apps that pull the
16 /// package from its published registry (PyPI, npm, crates.io, etc.).
17 Registry,
18}
19
20/// Configuration for registry-mode e2e generation (`alef e2e generate --registry`).
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct RegistryConfig {
23 /// Output directory for registry-mode test apps (default: "test_apps").
24 #[serde(default = "default_test_apps_dir")]
25 pub output: String,
26 /// Per-language package overrides used only in registry mode.
27 /// Merged on top of the base `[e2e.packages]` entries.
28 #[serde(default)]
29 pub packages: HashMap<String, PackageRef>,
30 /// When non-empty, only fixture categories in this list are included in
31 /// registry-mode generation (useful for shipping a curated subset).
32 #[serde(default)]
33 pub categories: Vec<String>,
34}
35
36impl Default for RegistryConfig {
37 fn default() -> Self {
38 Self {
39 output: default_test_apps_dir(),
40 packages: HashMap::new(),
41 categories: Vec::new(),
42 }
43 }
44}
45
46fn default_test_apps_dir() -> String {
47 "test_apps".to_string()
48}
49
50/// Root e2e configuration from `[e2e]` section of alef.toml.
51#[derive(Debug, Clone, Serialize, Deserialize, Default)]
52pub struct E2eConfig {
53 /// Directory containing fixture JSON files (default: "fixtures").
54 #[serde(default = "default_fixtures_dir")]
55 pub fixtures: String,
56 /// Output directory for generated e2e test projects (default: "e2e").
57 #[serde(default = "default_output_dir")]
58 pub output: String,
59 /// Languages to generate e2e tests for. Defaults to top-level `languages` list.
60 #[serde(default)]
61 pub languages: Vec<String>,
62 /// Default function call configuration.
63 pub call: CallConfig,
64 /// Named additional call configurations for multi-function testing.
65 /// Fixtures reference these via the `call` field, e.g. `"call": "embed"`.
66 #[serde(default)]
67 pub calls: HashMap<String, CallConfig>,
68 /// Per-language package reference overrides.
69 #[serde(default)]
70 pub packages: HashMap<String, PackageRef>,
71 /// Per-language formatter commands.
72 #[serde(default)]
73 pub format: HashMap<String, String>,
74 /// Field path aliases: maps fixture field paths to actual API struct paths.
75 /// E.g., "metadata.title" -> "metadata.document.title"
76 /// Supports struct access (foo.bar), map access (foo[key]), direct fields.
77 #[serde(default)]
78 pub fields: HashMap<String, String>,
79 /// Fields that are Optional/nullable in the return type.
80 /// Rust generators use .as_deref().unwrap_or("") for strings, .is_some() for structs.
81 #[serde(default)]
82 pub fields_optional: HashSet<String>,
83 /// Fields that are arrays/Vecs on the result type.
84 /// When a fixture path like `json_ld.name` traverses an array field, the
85 /// accessor adds `[0]` (or language equivalent) to index into the first element.
86 #[serde(default)]
87 pub fields_array: HashSet<String>,
88 /// Known top-level fields on the result type.
89 ///
90 /// When non-empty, assertions whose resolved field path starts with a
91 /// segment that is NOT in this set are emitted as comments (skipped)
92 /// instead of executable assertions. This prevents broken assertions
93 /// when fixtures reference fields from a different operation (e.g.,
94 /// `batch.completed_count` on a `ScrapeResult`).
95 #[serde(default)]
96 pub result_fields: HashSet<String>,
97 /// C FFI accessor type chain: maps `"{parent_snake_type}.{field}"` to the
98 /// PascalCase return type name (without prefix).
99 ///
100 /// Used by the C e2e generator to emit chained FFI accessor calls for
101 /// nested field paths. The root type is always `conversion_result`.
102 ///
103 /// Example:
104 /// ```toml
105 /// [e2e.fields_c_types]
106 /// "conversion_result.metadata" = "HtmlMetadata"
107 /// "html_metadata.document" = "DocumentMetadata"
108 /// ```
109 #[serde(default)]
110 pub fields_c_types: HashMap<String, String>,
111 /// Fields whose resolved type is an enum in the generated bindings.
112 ///
113 /// When a `contains` / `contains_all` / etc. assertion targets one of these
114 /// fields, language generators that cannot call `.contains()` directly on an
115 /// enum (e.g., Java) will emit a string-conversion call first. For Java,
116 /// the generated assertion calls `.getValue()` on the enum — the `@JsonValue`
117 /// method that all alef-generated Java enums expose — to obtain the lowercase
118 /// serde string before performing the string comparison.
119 ///
120 /// Both the raw fixture field path (before alias resolution) and the resolved
121 /// path (after alias resolution via `[e2e.fields]`) are accepted, so you can
122 /// use either form:
123 ///
124 /// ```toml
125 /// # Raw fixture field:
126 /// fields_enum = ["links[].link_type", "assets[].category"]
127 /// # …or the resolved (aliased) field name:
128 /// fields_enum = ["links[].link_type", "assets[].asset_category"]
129 /// ```
130 #[serde(default)]
131 pub fields_enum: HashSet<String>,
132 /// Dependency mode: `Local` (default) or `Registry`.
133 /// Set at runtime via `--registry` CLI flag; not serialized from TOML.
134 #[serde(skip)]
135 pub dep_mode: DependencyMode,
136 /// Registry-mode configuration from `[e2e.registry]`.
137 #[serde(default)]
138 pub registry: RegistryConfig,
139}
140
141impl E2eConfig {
142 /// Resolve the call config for a fixture. Uses the named call if specified,
143 /// otherwise falls back to the default `[e2e.call]`.
144 pub fn resolve_call(&self, call_name: Option<&str>) -> &CallConfig {
145 match call_name {
146 Some(name) => self.calls.get(name).unwrap_or(&self.call),
147 None => &self.call,
148 }
149 }
150
151 /// Resolve the effective package reference for a language.
152 ///
153 /// In registry mode, entries from `[e2e.registry.packages]` are merged on
154 /// top of the base `[e2e.packages]` — registry overrides win for any field
155 /// that is `Some`.
156 pub fn resolve_package(&self, lang: &str) -> Option<PackageRef> {
157 let base = self.packages.get(lang);
158 if self.dep_mode == DependencyMode::Registry {
159 let reg = self.registry.packages.get(lang);
160 match (base, reg) {
161 (Some(b), Some(r)) => Some(PackageRef {
162 name: r.name.clone().or_else(|| b.name.clone()),
163 path: r.path.clone().or_else(|| b.path.clone()),
164 module: r.module.clone().or_else(|| b.module.clone()),
165 version: r.version.clone().or_else(|| b.version.clone()),
166 }),
167 (None, Some(r)) => Some(r.clone()),
168 (Some(b), None) => Some(b.clone()),
169 (None, None) => None,
170 }
171 } else {
172 base.cloned()
173 }
174 }
175
176 /// Return the effective output directory: `registry.output` in registry
177 /// mode, `output` otherwise.
178 pub fn effective_output(&self) -> &str {
179 if self.dep_mode == DependencyMode::Registry {
180 &self.registry.output
181 } else {
182 &self.output
183 }
184 }
185}
186
187fn default_fixtures_dir() -> String {
188 "fixtures".to_string()
189}
190
191fn default_output_dir() -> String {
192 "e2e".to_string()
193}
194
195/// Configuration for the function call in each test.
196#[derive(Debug, Clone, Serialize, Deserialize, Default)]
197pub struct CallConfig {
198 /// The function name (alef applies language naming conventions).
199 #[serde(default)]
200 pub function: String,
201 /// The module/package where the function lives.
202 #[serde(default)]
203 pub module: String,
204 /// Variable name for the return value (default: "result").
205 #[serde(default = "default_result_var")]
206 pub result_var: String,
207 /// Whether the function is async.
208 #[serde(default)]
209 pub r#async: bool,
210 /// How fixture `input` fields map to function arguments.
211 #[serde(default)]
212 pub args: Vec<ArgMapping>,
213 /// Per-language overrides for module/function/etc.
214 #[serde(default)]
215 pub overrides: HashMap<String, CallOverride>,
216}
217
218fn default_result_var() -> String {
219 "result".to_string()
220}
221
222/// Maps a fixture input field to a function argument.
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub struct ArgMapping {
225 /// Argument name in the function signature.
226 pub name: String,
227 /// JSON field path in the fixture's `input` object.
228 pub field: String,
229 /// Type hint for code generation.
230 #[serde(rename = "type", default = "default_arg_type")]
231 pub arg_type: String,
232 /// Whether this argument is optional.
233 #[serde(default)]
234 pub optional: bool,
235}
236
237fn default_arg_type() -> String {
238 "string".to_string()
239}
240
241/// Per-language override for function call configuration.
242#[derive(Debug, Clone, Serialize, Deserialize, Default)]
243pub struct CallOverride {
244 /// Override the module/import path.
245 #[serde(default)]
246 pub module: Option<String>,
247 /// Override the function name.
248 #[serde(default)]
249 pub function: Option<String>,
250 /// Override the crate name (Rust only).
251 #[serde(default)]
252 pub crate_name: Option<String>,
253 /// Override the class name (Java/C# only).
254 #[serde(default)]
255 pub class: Option<String>,
256 /// Import alias (Go only, e.g., `htmd`).
257 #[serde(default)]
258 pub alias: Option<String>,
259 /// C header file name (C only).
260 #[serde(default)]
261 pub header: Option<String>,
262 /// FFI symbol prefix (C only).
263 #[serde(default)]
264 pub prefix: Option<String>,
265 /// For json_object args: the constructor to use instead of raw dict/object.
266 /// E.g., "ConversionOptions" — generates `ConversionOptions(**options)` in Python,
267 /// `new ConversionOptions(options)` in TypeScript.
268 #[serde(default)]
269 pub options_type: Option<String>,
270 /// How to pass json_object args: "kwargs" (default), "dict", or "json".
271 ///
272 /// - `"kwargs"`: construct `OptionsType(key=val, ...)` (requires `options_type`).
273 /// - `"dict"`: pass as a plain dict/object literal `{"key": "val"}`.
274 /// - `"json"`: pass via `json.loads('...')` / `JSON.parse('...')`.
275 #[serde(default)]
276 pub options_via: Option<String>,
277 /// Maps fixture option field names to their enum type names.
278 /// E.g., `{"headingStyle": "HeadingStyle", "codeBlockStyle": "CodeBlockStyle"}`.
279 /// The generator imports these types and maps string values to enum constants.
280 #[serde(default)]
281 pub enum_fields: HashMap<String, String>,
282 /// Module to import enum types from (if different from the main module).
283 /// E.g., "html_to_markdown._html_to_markdown" for PyO3 native enums.
284 #[serde(default)]
285 pub enum_module: Option<String>,
286 /// When `true`, the function returns a simple type (e.g., `String`) rather
287 /// than a struct. Generators that would normally emit `result.content`
288 /// (or equivalent field access) will use the result variable directly.
289 #[serde(default)]
290 pub result_is_simple: bool,
291 /// Maps handle config field names to their Python type constructor names.
292 ///
293 /// When the handle config object contains a nested dict-valued field, the
294 /// generator will wrap it in the specified type using keyword arguments.
295 /// E.g., `{"browser": "BrowserConfig"}` generates `BrowserConfig(mode="auto")`
296 /// instead of `{"mode": "auto"}`.
297 #[serde(default)]
298 pub handle_nested_types: HashMap<String, String>,
299 /// Handle config fields whose type constructor takes a single dict argument
300 /// instead of keyword arguments.
301 ///
302 /// E.g., `["auth"]` means `AuthConfig({"type": "basic", ...})` instead of
303 /// `AuthConfig(type="basic", ...)`.
304 #[serde(default)]
305 pub handle_dict_types: HashSet<String>,
306 /// Elixir struct module name for the handle config argument.
307 ///
308 /// When set, the generated Elixir handle config uses struct literal syntax
309 /// (`%Module.StructType{key: val}`) instead of a plain string-keyed map.
310 /// Rustler `NifStruct` requires a proper Elixir struct — plain maps are rejected.
311 ///
312 /// E.g., `"CrawlConfig"` generates `%Kreuzcrawl.CrawlConfig{download_assets: true}`.
313 #[serde(default)]
314 pub handle_struct_type: Option<String>,
315 /// Handle config fields whose list values are Elixir atoms (Rustler NifUnitEnum).
316 ///
317 /// When a config field is a `Vec<EnumType>` in Rust, the Elixir side must pass
318 /// a list of atoms (e.g., `[:image, :document]`) not strings (`["image"]`).
319 /// List the field names here so the generator emits atom literals instead of strings.
320 ///
321 /// E.g., `["asset_types"]` generates `asset_types: [:image]` instead of `["image"]`.
322 #[serde(default)]
323 pub handle_atom_list_fields: HashSet<String>,
324}
325
326/// Per-language package reference configuration.
327#[derive(Debug, Clone, Serialize, Deserialize, Default)]
328pub struct PackageRef {
329 /// Package/crate/gem/module name.
330 #[serde(default)]
331 pub name: Option<String>,
332 /// Relative path from e2e/{lang}/ to the package.
333 #[serde(default)]
334 pub path: Option<String>,
335 /// Go module path.
336 #[serde(default)]
337 pub module: Option<String>,
338 /// Package version (e.g., for go.mod require directives).
339 #[serde(default)]
340 pub version: Option<String>,
341}