Skip to main content

alef_core/config/
e2e.rs

1//! E2E test generation configuration types.
2
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6/// Controls whether generated e2e test projects reference the package under
7/// test via a local path (for development) or a registry version string
8/// (for standalone `test_apps` that consumers can run without the monorepo).
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
10#[serde(rename_all = "lowercase")]
11pub enum DependencyMode {
12    /// Local path dependency (default) — used during normal e2e development.
13    #[default]
14    Local,
15    /// Registry dependency — generates standalone test apps that pull the
16    /// package from its published registry (PyPI, npm, crates.io, etc.).
17    Registry,
18}
19
20/// Configuration for registry-mode e2e generation (`alef e2e generate --registry`).
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct RegistryConfig {
23    /// Output directory for registry-mode test apps (default: "test_apps").
24    #[serde(default = "default_test_apps_dir")]
25    pub output: String,
26    /// Per-language package overrides used only in registry mode.
27    /// Merged on top of the base `[e2e.packages]` entries.
28    #[serde(default)]
29    pub packages: HashMap<String, PackageRef>,
30    /// When non-empty, only fixture categories in this list are included in
31    /// registry-mode generation (useful for shipping a curated subset).
32    #[serde(default)]
33    pub categories: Vec<String>,
34    /// GitHub repository URL for downloading prebuilt artifacts (e.g., FFI
35    /// shared libraries) from GitHub Releases.
36    ///
37    /// Falls back to `[scaffold] repository` when not set, then to
38    /// `https://github.com/kreuzberg-dev/{crate.name}`.
39    #[serde(default)]
40    pub github_repo: Option<String>,
41}
42
43impl Default for RegistryConfig {
44    fn default() -> Self {
45        Self {
46            output: default_test_apps_dir(),
47            packages: HashMap::new(),
48            categories: Vec::new(),
49            github_repo: None,
50        }
51    }
52}
53
54fn default_test_apps_dir() -> String {
55    "test_apps".to_string()
56}
57
58/// Root e2e configuration from `[e2e]` section of alef.toml.
59#[derive(Debug, Clone, Serialize, Deserialize, Default)]
60pub struct E2eConfig {
61    /// Directory containing fixture JSON files (default: "fixtures").
62    #[serde(default = "default_fixtures_dir")]
63    pub fixtures: String,
64    /// Output directory for generated e2e test projects (default: "e2e").
65    #[serde(default = "default_output_dir")]
66    pub output: String,
67    /// Languages to generate e2e tests for. Defaults to top-level `languages` list.
68    #[serde(default)]
69    pub languages: Vec<String>,
70    /// Default function call configuration.
71    pub call: CallConfig,
72    /// Named additional call configurations for multi-function testing.
73    /// Fixtures reference these via the `call` field, e.g. `"call": "embed"`.
74    #[serde(default)]
75    pub calls: HashMap<String, CallConfig>,
76    /// Per-language package reference overrides.
77    #[serde(default)]
78    pub packages: HashMap<String, PackageRef>,
79    /// Per-language formatter commands.
80    #[serde(default)]
81    pub format: HashMap<String, String>,
82    /// Field path aliases: maps fixture field paths to actual API struct paths.
83    /// E.g., "metadata.title" -> "metadata.document.title"
84    /// Supports struct access (foo.bar), map access (foo[key]), direct fields.
85    #[serde(default)]
86    pub fields: HashMap<String, String>,
87    /// Fields that are Optional/nullable in the return type.
88    /// Rust generators use .as_deref().unwrap_or("") for strings, .is_some() for structs.
89    #[serde(default)]
90    pub fields_optional: HashSet<String>,
91    /// Fields that are arrays/Vecs on the result type.
92    /// When a fixture path like `json_ld.name` traverses an array field, the
93    /// accessor adds `[0]` (or language equivalent) to index into the first element.
94    #[serde(default)]
95    pub fields_array: HashSet<String>,
96    /// Known top-level fields on the result type.
97    ///
98    /// When non-empty, assertions whose resolved field path starts with a
99    /// segment that is NOT in this set are emitted as comments (skipped)
100    /// instead of executable assertions.  This prevents broken assertions
101    /// when fixtures reference fields from a different operation (e.g.,
102    /// `batch.completed_count` on a `ScrapeResult`).
103    #[serde(default)]
104    pub result_fields: HashSet<String>,
105    /// C FFI accessor type chain: maps `"{parent_snake_type}.{field}"` to the
106    /// PascalCase return type name (without prefix).
107    ///
108    /// Used by the C e2e generator to emit chained FFI accessor calls for
109    /// nested field paths. The root type is always `conversion_result`.
110    ///
111    /// Example:
112    /// ```toml
113    /// [e2e.fields_c_types]
114    /// "conversion_result.metadata" = "HtmlMetadata"
115    /// "html_metadata.document" = "DocumentMetadata"
116    /// ```
117    #[serde(default)]
118    pub fields_c_types: HashMap<String, String>,
119    /// Fields whose resolved type is an enum in the generated bindings.
120    ///
121    /// When a `contains` / `contains_all` / etc. assertion targets one of these
122    /// fields, language generators that cannot call `.contains()` directly on an
123    /// enum (e.g., Java) will emit a string-conversion call first.  For Java,
124    /// the generated assertion calls `.getValue()` on the enum — the `@JsonValue`
125    /// method that all alef-generated Java enums expose — to obtain the lowercase
126    /// serde string before performing the string comparison.
127    ///
128    /// Both the raw fixture field path (before alias resolution) and the resolved
129    /// path (after alias resolution via `[e2e.fields]`) are accepted, so you can
130    /// use either form:
131    ///
132    /// ```toml
133    /// # Raw fixture field:
134    /// fields_enum = ["links[].link_type", "assets[].category"]
135    /// # …or the resolved (aliased) field name:
136    /// fields_enum = ["links[].link_type", "assets[].asset_category"]
137    /// ```
138    #[serde(default)]
139    pub fields_enum: HashSet<String>,
140    /// Dependency mode: `Local` (default) or `Registry`.
141    /// Set at runtime via `--registry` CLI flag; not serialized from TOML.
142    #[serde(skip)]
143    pub dep_mode: DependencyMode,
144    /// Registry-mode configuration from `[e2e.registry]`.
145    #[serde(default)]
146    pub registry: RegistryConfig,
147}
148
149impl E2eConfig {
150    /// Resolve the call config for a fixture. Uses the named call if specified,
151    /// otherwise falls back to the default `[e2e.call]`.
152    pub fn resolve_call(&self, call_name: Option<&str>) -> &CallConfig {
153        match call_name {
154            Some(name) => self.calls.get(name).unwrap_or(&self.call),
155            None => &self.call,
156        }
157    }
158
159    /// Resolve the effective package reference for a language.
160    ///
161    /// In registry mode, entries from `[e2e.registry.packages]` are merged on
162    /// top of the base `[e2e.packages]` — registry overrides win for any field
163    /// that is `Some`.
164    pub fn resolve_package(&self, lang: &str) -> Option<PackageRef> {
165        let base = self.packages.get(lang);
166        if self.dep_mode == DependencyMode::Registry {
167            let reg = self.registry.packages.get(lang);
168            match (base, reg) {
169                (Some(b), Some(r)) => Some(PackageRef {
170                    name: r.name.clone().or_else(|| b.name.clone()),
171                    path: r.path.clone().or_else(|| b.path.clone()),
172                    module: r.module.clone().or_else(|| b.module.clone()),
173                    version: r.version.clone().or_else(|| b.version.clone()),
174                }),
175                (None, Some(r)) => Some(r.clone()),
176                (Some(b), None) => Some(b.clone()),
177                (None, None) => None,
178            }
179        } else {
180            base.cloned()
181        }
182    }
183
184    /// Return the effective output directory: `registry.output` in registry
185    /// mode, `output` otherwise.
186    pub fn effective_output(&self) -> &str {
187        if self.dep_mode == DependencyMode::Registry {
188            &self.registry.output
189        } else {
190            &self.output
191        }
192    }
193}
194
195fn default_fixtures_dir() -> String {
196    "fixtures".to_string()
197}
198
199fn default_output_dir() -> String {
200    "e2e".to_string()
201}
202
203/// Configuration for the function call in each test.
204#[derive(Debug, Clone, Serialize, Deserialize, Default)]
205pub struct CallConfig {
206    /// The function name (alef applies language naming conventions).
207    #[serde(default)]
208    pub function: String,
209    /// The module/package where the function lives.
210    #[serde(default)]
211    pub module: String,
212    /// Variable name for the return value (default: "result").
213    #[serde(default = "default_result_var")]
214    pub result_var: String,
215    /// Whether the function is async.
216    #[serde(default)]
217    pub r#async: bool,
218    /// HTTP endpoint path for mock server routing (e.g., `"/v1/chat/completions"`).
219    ///
220    /// Required when fixtures use `mock_response`. The Rust e2e generator uses
221    /// this to build the `MockRoute` that the mock server matches against.
222    #[serde(default)]
223    pub path: Option<String>,
224    /// HTTP method for mock server routing (default: `"POST"`).
225    ///
226    /// Used together with `path` when building `MockRoute` entries.
227    #[serde(default)]
228    pub method: Option<String>,
229    /// How fixture `input` fields map to function arguments.
230    #[serde(default)]
231    pub args: Vec<ArgMapping>,
232    /// Per-language overrides for module/function/etc.
233    #[serde(default)]
234    pub overrides: HashMap<String, CallOverride>,
235    /// Whether the function returns `Result<T, E>` in its native binding.
236    /// Defaults to `true`. When `false`, generators that distinguish Result-returning
237    /// from non-Result-returning calls (currently Rust) will skip the
238    /// `.expect("should succeed")` unwrap and bind the raw return value directly.
239    #[serde(default = "default_returns_result")]
240    pub returns_result: bool,
241    /// Whether the function returns only an error/unit — i.e., `Result<(), E>`.
242    ///
243    /// When combined with `returns_result = true`, Go generators emit `err := func()`
244    /// (single return value) rather than `_, err := func()` (two return values).
245    /// This is needed for functions like `validate_host` that return only `error` in Go.
246    #[serde(default)]
247    pub returns_void: bool,
248    /// skip_languages
249    #[serde(default)]
250    pub skip_languages: Vec<String>,
251}
252
253fn default_result_var() -> String {
254    "result".to_string()
255}
256
257fn default_returns_result() -> bool {
258    false
259}
260
261/// Maps a fixture input field to a function argument.
262#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct ArgMapping {
264    /// Argument name in the function signature.
265    pub name: String,
266    /// JSON field path in the fixture's `input` object.
267    pub field: String,
268    /// Type hint for code generation.
269    #[serde(rename = "type", default = "default_arg_type")]
270    pub arg_type: String,
271    /// Whether this argument is optional.
272    #[serde(default)]
273    pub optional: bool,
274    /// When `true`, the Rust codegen passes this argument by value (owned) rather than
275    /// by reference. Use for `Vec<T>` parameters that do not accept `&Vec<T>`.
276    #[serde(default)]
277    pub owned: bool,
278    /// For `json_object` args targeting `&[T]` Rust parameters, set to the element type
279    /// (e.g. `"f32"`, `"String"`) so the codegen emits `Vec<element_type>` annotation.
280    #[serde(default)]
281    pub element_type: Option<String>,
282}
283
284fn default_arg_type() -> String {
285    "string".to_string()
286}
287
288/// Per-language override for function call configuration.
289#[derive(Debug, Clone, Serialize, Deserialize, Default)]
290pub struct CallOverride {
291    /// Override the module/import path.
292    #[serde(default)]
293    pub module: Option<String>,
294    /// Override the function name.
295    #[serde(default)]
296    pub function: Option<String>,
297    /// Maps canonical argument names to language-specific argument names.
298    ///
299    /// Used when a language binding uses a different parameter name than the
300    /// canonical `args` list in `CallConfig`. For example, if the canonical
301    /// arg name is `doc` but the Python binding uses `html`, specify:
302    ///
303    /// ```toml
304    /// [e2e.call.overrides.python]
305    /// arg_name_map = { doc = "html" }
306    /// ```
307    ///
308    /// The key is the canonical name (from `args[].name`) and the value is the
309    /// name to use when emitting the keyword argument in generated tests.
310    #[serde(default)]
311    pub arg_name_map: HashMap<String, String>,
312    /// Override the crate name (Rust only).
313    #[serde(default)]
314    pub crate_name: Option<String>,
315    /// Override the class name (Java/C# only).
316    #[serde(default)]
317    pub class: Option<String>,
318    /// Import alias (Go only, e.g., `htmd`).
319    #[serde(default)]
320    pub alias: Option<String>,
321    /// C header file name (C only).
322    #[serde(default)]
323    pub header: Option<String>,
324    /// FFI symbol prefix (C only).
325    #[serde(default)]
326    pub prefix: Option<String>,
327    /// For json_object args: the constructor to use instead of raw dict/object.
328    /// E.g., "ConversionOptions" — generates `ConversionOptions(**options)` in Python,
329    /// `new ConversionOptions(options)` in TypeScript.
330    #[serde(default)]
331    pub options_type: Option<String>,
332    /// How to pass json_object args: "kwargs" (default), "dict", or "json".
333    ///
334    /// - `"kwargs"`: construct `OptionsType(key=val, ...)` (requires `options_type`).
335    /// - `"dict"`: pass as a plain dict/object literal `{"key": "val"}`.
336    /// - `"json"`: pass via `json.loads('...')` / `JSON.parse('...')`.
337    #[serde(default)]
338    pub options_via: Option<String>,
339    /// Maps fixture option field names to their enum type names.
340    /// E.g., `{"headingStyle": "HeadingStyle", "codeBlockStyle": "CodeBlockStyle"}`.
341    /// The generator imports these types and maps string values to enum constants.
342    #[serde(default)]
343    pub enum_fields: HashMap<String, String>,
344    /// Module to import enum types from (if different from the main module).
345    /// E.g., "html_to_markdown._html_to_markdown" for PyO3 native enums.
346    #[serde(default)]
347    pub enum_module: Option<String>,
348    /// When `true`, the function returns a simple type (e.g., `String`) rather
349    /// than a struct.  Generators that would normally emit `result.content`
350    /// (or equivalent field access) will use the result variable directly.
351    #[serde(default)]
352    pub result_is_simple: bool,
353    /// When `true` (and combined with `result_is_simple`), the simple result is
354    /// a slice/array type (e.g., `[]string` in Go, `Vec<String>` in Rust).
355    /// The Go generator uses `strings.Join(value, " ")` for `contains` assertions
356    /// instead of `string(value)`.
357    #[serde(default)]
358    pub result_is_array: bool,
359    /// When `true`, the function returns `Vec<T>` rather than a single value.
360    /// Field-path assertions are emitted as `.iter().all(|r| <accessor>)` so
361    /// every element is checked. (Rust generator.)
362    #[serde(default)]
363    pub result_is_vec: bool,
364    /// When `true`, the function returns a raw byte array (e.g., `byte[]` in Java,
365    /// `[]byte` in Go). Used by generators to select the correct length accessor
366    /// (field `.length` vs method `.length()`).
367    #[serde(default)]
368    pub result_is_bytes: bool,
369    /// When `true`, the function returns `Option<T>`. The result is unwrapped
370    /// before any non-`is_none`/`is_some` assertion runs; `is_empty`/`not_empty`
371    /// assertions map to `is_none()`/`is_some()`. (Rust generator.)
372    #[serde(default)]
373    pub result_is_option: bool,
374    /// When `true`, the Rust generator wraps the `json_object` argument expression
375    /// in `Some(...).clone()` to match an owned `Option<T>` parameter slot rather
376    /// than passing `&options`. (Rust generator only.)
377    #[serde(default)]
378    pub wrap_options_in_some: bool,
379    /// Trailing positional arguments appended verbatim after the configured
380    /// `args`. Used when the target function takes additional positional slots
381    /// (e.g. visitor) the fixture cannot supply directly. (Rust generator only.)
382    #[serde(default)]
383    pub extra_args: Vec<String>,
384    /// Per-rust override of the call-level `returns_result`. When set, takes
385    /// precedence over `CallConfig.returns_result` for the Rust generator only.
386    /// Useful when one binding is fallible while others are not.
387    #[serde(default)]
388    pub returns_result: Option<bool>,
389    /// Maps handle config field names to their Python type constructor names.
390    ///
391    /// When the handle config object contains a nested dict-valued field, the
392    /// generator will wrap it in the specified type using keyword arguments.
393    /// E.g., `{"browser": "BrowserConfig"}` generates `BrowserConfig(mode="auto")`
394    /// instead of `{"mode": "auto"}`.
395    #[serde(default)]
396    pub handle_nested_types: HashMap<String, String>,
397    /// Handle config fields whose type constructor takes a single dict argument
398    /// instead of keyword arguments.
399    ///
400    /// E.g., `["auth"]` means `AuthConfig({"type": "basic", ...})` instead of
401    /// `AuthConfig(type="basic", ...)`.
402    #[serde(default)]
403    pub handle_dict_types: HashSet<String>,
404    /// Elixir struct module name for the handle config argument.
405    ///
406    /// When set, the generated Elixir handle config uses struct literal syntax
407    /// (`%Module.StructType{key: val}`) instead of a plain string-keyed map.
408    /// Rustler `NifStruct` requires a proper Elixir struct — plain maps are rejected.
409    ///
410    /// E.g., `"CrawlConfig"` generates `%Kreuzcrawl.CrawlConfig{download_assets: true}`.
411    #[serde(default)]
412    pub handle_struct_type: Option<String>,
413    /// Handle config fields whose list values are Elixir atoms (Rustler NifUnitEnum).
414    ///
415    /// When a config field is a `Vec<EnumType>` in Rust, the Elixir side must pass
416    /// a list of atoms (e.g., `[:image, :document]`) not strings (`["image"]`).
417    /// List the field names here so the generator emits atom literals instead of strings.
418    ///
419    /// E.g., `["asset_types"]` generates `asset_types: [:image]` instead of `["image"]`.
420    #[serde(default)]
421    pub handle_atom_list_fields: HashSet<String>,
422    /// WASM config class name for handle args (WASM generator only).
423    ///
424    /// When set, handle args are constructed using `ConfigType.default()` + setters
425    /// instead of passing a plain JS object (which fails `_assertClass` validation).
426    ///
427    /// E.g., `"WasmCrawlConfig"` generates:
428    /// ```js
429    /// const engineConfig = WasmCrawlConfig.default();
430    /// engineConfig.maxDepth = 1;
431    /// const engine = createEngine(engineConfig);
432    /// ```
433    #[serde(default)]
434    pub handle_config_type: Option<String>,
435    /// PHP client factory method name (PHP generator only).
436    ///
437    /// When set, the generated PHP test instantiates a client via
438    /// `ClassName::factory_method('test-key')` and calls methods on the instance
439    /// instead of using static facade calls.
440    ///
441    /// E.g., `"createClient"` generates:
442    /// ```php
443    /// $client = LiterLlm::createClient('test-key');
444    /// $result = $client->chat($request);
445    /// ```
446    #[serde(default)]
447    pub php_client_factory: Option<String>,
448    /// Client factory function name for instance-method languages (WASM, etc.).
449    ///
450    /// When set, the generated test imports this function, creates a client,
451    /// and calls API methods on the instance instead of as top-level functions.
452    ///
453    /// E.g., `"createClient"` generates:
454    /// ```typescript
455    /// import { createClient } from 'pkg';
456    /// const client = createClient('test-key');
457    /// const result = await client.chat(request);
458    /// ```
459    #[serde(default)]
460    pub client_factory: Option<String>,
461    /// Fields on the options object that require `BigInt()` wrapping (WASM only).
462    ///
463    /// `wasm_bindgen` maps Rust `u64`/`i64` to JavaScript `BigInt`. Numeric
464    /// values assigned to these setters must be wrapped with `BigInt(n)`.
465    ///
466    /// List camelCase field names, e.g.:
467    /// ```toml
468    /// [e2e.call.overrides.wasm]
469    /// bigint_fields = ["maxTokens", "seed"]
470    /// ```
471    #[serde(default)]
472    pub bigint_fields: Vec<String>,
473    /// Static CLI arguments appended to every invocation (brew/CLI generator only).
474    ///
475    /// E.g., `["--format", "json"]` appends `--format json` to every CLI call.
476    #[serde(default)]
477    pub cli_args: Vec<String>,
478    /// Maps fixture config field names to CLI flag names (brew/CLI generator only).
479    ///
480    /// E.g., `{"output_format": "--format"}` generates `--format <value>` from
481    /// the fixture's `output_format` input field.
482    #[serde(default)]
483    pub cli_flags: HashMap<String, String>,
484    /// C FFI opaque result type name (C only).
485    ///
486    /// The PascalCase name of the result struct, without the prefix.
487    /// E.g., `"ChatCompletionResponse"` for `LiterllmChatCompletionResponse*`.
488    /// If not set, defaults to the function name in PascalCase.
489    #[serde(default)]
490    pub result_type: Option<String>,
491    /// Override the argument order for this language binding.
492    ///
493    /// Lists argument names from `args` in the order they should be passed
494    /// to the target function. Useful when a language binding reorders parameters
495    /// relative to the canonical `args` list in `CallConfig`.
496    ///
497    /// E.g., if `args = [path, mime_type, config]` but the Node.js binding
498    /// takes `(path, config, mime_type?)`, specify:
499    /// ```toml
500    /// [e2e.call.overrides.node]
501    /// arg_order = ["path", "config", "mime_type"]
502    /// ```
503    #[serde(default)]
504    pub arg_order: Vec<String>,
505}
506
507/// Per-language package reference configuration.
508#[derive(Debug, Clone, Serialize, Deserialize, Default)]
509pub struct PackageRef {
510    /// Package/crate/gem/module name.
511    #[serde(default)]
512    pub name: Option<String>,
513    /// Relative path from e2e/{lang}/ to the package.
514    #[serde(default)]
515    pub path: Option<String>,
516    /// Go module path.
517    #[serde(default)]
518    pub module: Option<String>,
519    /// Package version (e.g., for go.mod require directives).
520    #[serde(default)]
521    pub version: Option<String>,
522}