Skip to main content

alef_core/config/
e2e.rs

1//! E2E test generation configuration types.
2
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6/// Controls whether generated e2e test projects reference the package under
7/// test via a local path (for development) or a registry version string
8/// (for standalone `test_apps` that consumers can run without the monorepo).
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
10#[serde(rename_all = "lowercase")]
11pub enum DependencyMode {
12    /// Local path dependency (default) — used during normal e2e development.
13    #[default]
14    Local,
15    /// Registry dependency — generates standalone test apps that pull the
16    /// package from its published registry (PyPI, npm, crates.io, etc.).
17    Registry,
18}
19
20/// Configuration for registry-mode e2e generation (`alef e2e generate --registry`).
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct RegistryConfig {
23    /// Output directory for registry-mode test apps (default: "test_apps").
24    #[serde(default = "default_test_apps_dir")]
25    pub output: String,
26    /// Per-language package overrides used only in registry mode.
27    /// Merged on top of the base `[e2e.packages]` entries.
28    #[serde(default)]
29    pub packages: HashMap<String, PackageRef>,
30    /// When non-empty, only fixture categories in this list are included in
31    /// registry-mode generation (useful for shipping a curated subset).
32    #[serde(default)]
33    pub categories: Vec<String>,
34    /// GitHub repository URL for downloading prebuilt artifacts (e.g., FFI
35    /// shared libraries) from GitHub Releases.
36    ///
37    /// Falls back to `[scaffold] repository` when not set, then to
38    /// `https://github.com/kreuzberg-dev/{crate.name}`.
39    #[serde(default)]
40    pub github_repo: Option<String>,
41}
42
43impl Default for RegistryConfig {
44    fn default() -> Self {
45        Self {
46            output: default_test_apps_dir(),
47            packages: HashMap::new(),
48            categories: Vec::new(),
49            github_repo: None,
50        }
51    }
52}
53
54fn default_test_apps_dir() -> String {
55    "test_apps".to_string()
56}
57
58/// Root e2e configuration from `[e2e]` section of alef.toml.
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct E2eConfig {
61    /// Directory containing fixture JSON files (default: "fixtures").
62    #[serde(default = "default_fixtures_dir")]
63    pub fixtures: String,
64    /// Output directory for generated e2e test projects (default: "e2e").
65    #[serde(default = "default_output_dir")]
66    pub output: String,
67    /// Repo-root-relative directory holding binary file fixtures referenced by
68    /// `file_path` / `bytes` fixture args (default: "test_documents").
69    ///
70    /// Backends that emit chdir / setup hooks for file-based fixtures resolve
71    /// the relative path from the test-emission directory via
72    /// [`E2eConfig::test_documents_relative_from`]. The default matches the
73    /// kreuzberg convention; downstream crates whose fixtures don't reference
74    /// files (e.g. liter-llm, which uses pure mock-server fixtures) can leave
75    /// the default in place — backends conditionally emit the setup only when
76    /// fixtures actually need it.
77    #[serde(default = "default_test_documents_dir")]
78    pub test_documents_dir: String,
79    /// Languages to generate e2e tests for. Defaults to top-level `languages` list.
80    #[serde(default)]
81    pub languages: Vec<String>,
82    /// Default function call configuration.
83    pub call: CallConfig,
84    /// Named additional call configurations for multi-function testing.
85    /// Fixtures reference these via the `call` field, e.g. `"call": "embed"`.
86    #[serde(default)]
87    pub calls: HashMap<String, CallConfig>,
88    /// Per-language package reference overrides.
89    #[serde(default)]
90    pub packages: HashMap<String, PackageRef>,
91    /// Per-language formatter commands.
92    #[serde(default)]
93    pub format: HashMap<String, String>,
94    /// Field path aliases: maps fixture field paths to actual API struct paths.
95    /// E.g., "metadata.title" -> "metadata.document.title"
96    /// Supports struct access (foo.bar), map access (foo[key]), direct fields.
97    #[serde(default)]
98    pub fields: HashMap<String, String>,
99    /// Fields that are Optional/nullable in the return type.
100    /// Rust generators use .as_deref().unwrap_or("") for strings, .is_some() for structs.
101    #[serde(default)]
102    pub fields_optional: HashSet<String>,
103    /// Fields that are arrays/Vecs on the result type.
104    /// When a fixture path like `json_ld.name` traverses an array field, the
105    /// accessor adds `[0]` (or language equivalent) to index into the first element.
106    #[serde(default)]
107    pub fields_array: HashSet<String>,
108    /// Fields where the accessor is a method call (appends `()`) rather than a field access.
109    /// Rust-specific: Java always uses `()`, Python/PHP use field access.
110    /// Listed as the full resolved field path (after alias resolution).
111    /// E.g., `"metadata.format.excel"` means `.excel` should be emitted as `.excel()`.
112    #[serde(default)]
113    pub fields_method_calls: HashSet<String>,
114    /// Known top-level fields on the result type.
115    ///
116    /// When non-empty, assertions whose resolved field path starts with a
117    /// segment that is NOT in this set are emitted as comments (skipped)
118    /// instead of executable assertions.  This prevents broken assertions
119    /// when fixtures reference fields from a different operation (e.g.,
120    /// `batch.completed_count` on a `ScrapeResult`).
121    #[serde(default)]
122    pub result_fields: HashSet<String>,
123    /// Fixture categories excluded from cross-language e2e codegen.
124    ///
125    /// Fixtures whose resolved category matches an entry in this set are
126    /// skipped by every per-language e2e generator — no test is emitted at
127    /// all (no skip directive, no commented-out body). The fixture files stay
128    /// on disk and remain available to Rust integration tests inside the
129    /// consumer crate's own `tests/` directory.
130    ///
131    /// Use this to keep fixtures that exercise internal middleware (cache,
132    /// proxy, budget, hooks, etc.) out of bindings whose public surface does
133    /// not expose those layers.
134    ///
135    /// Example:
136    /// ```toml
137    /// [e2e]
138    /// exclude_categories = ["cache", "proxy", "budget", "hooks"]
139    /// ```
140    #[serde(default)]
141    pub exclude_categories: HashSet<String>,
142    /// C FFI accessor type chain: maps `"{parent_snake_type}.{field}"` to the
143    /// PascalCase return type name (without prefix).
144    ///
145    /// Used by the C e2e generator to emit chained FFI accessor calls for
146    /// nested field paths. The root type is always `conversion_result`.
147    ///
148    /// Example:
149    /// ```toml
150    /// [e2e.fields_c_types]
151    /// "conversion_result.metadata" = "HtmlMetadata"
152    /// "html_metadata.document" = "DocumentMetadata"
153    /// ```
154    #[serde(default)]
155    pub fields_c_types: HashMap<String, String>,
156    /// Fields whose resolved type is an enum in the generated bindings.
157    ///
158    /// When a `contains` / `contains_all` / etc. assertion targets one of these
159    /// fields, language generators that cannot call `.contains()` directly on an
160    /// enum (e.g., Java) will emit a string-conversion call first.  For Java,
161    /// the generated assertion calls `.getValue()` on the enum — the `@JsonValue`
162    /// method that all alef-generated Java enums expose — to obtain the lowercase
163    /// serde string before performing the string comparison.
164    ///
165    /// Both the raw fixture field path (before alias resolution) and the resolved
166    /// path (after alias resolution via `[e2e.fields]`) are accepted, so you can
167    /// use either form:
168    ///
169    /// ```toml
170    /// # Raw fixture field:
171    /// fields_enum = ["links[].link_type", "assets[].category"]
172    /// # …or the resolved (aliased) field name:
173    /// fields_enum = ["links[].link_type", "assets[].asset_category"]
174    /// ```
175    #[serde(default)]
176    pub fields_enum: HashSet<String>,
177    /// Dependency mode: `Local` (default) or `Registry`.
178    /// Set at runtime via `--registry` CLI flag; not serialized from TOML.
179    #[serde(skip)]
180    pub dep_mode: DependencyMode,
181    /// Registry-mode configuration from `[e2e.registry]`.
182    #[serde(default)]
183    pub registry: RegistryConfig,
184}
185
186impl E2eConfig {
187    /// Resolve the call config for a fixture. Uses the named call if specified,
188    /// otherwise falls back to the default `[e2e.call]`.
189    pub fn resolve_call(&self, call_name: Option<&str>) -> &CallConfig {
190        match call_name {
191            Some(name) => self.calls.get(name).unwrap_or(&self.call),
192            None => &self.call,
193        }
194    }
195
196    /// Resolve the call config for a fixture, applying `select_when` auto-routing.
197    ///
198    /// When the fixture has an explicit `call` name, that named config is returned
199    /// (same as [`resolve_call`]).  When the fixture has no explicit call, the method
200    /// scans named calls for a [`SelectWhen`] condition that matches the fixture input
201    /// and returns the first match.  If no condition matches, it falls back to the
202    /// default `[e2e.call]`.
203    pub fn resolve_call_for_fixture(&self, call_name: Option<&str>, fixture_input: &serde_json::Value) -> &CallConfig {
204        if let Some(name) = call_name {
205            return self.calls.get(name).unwrap_or(&self.call);
206        }
207        // Auto-route by select_when condition.
208        for call_config in self.calls.values() {
209            if let Some(SelectWhen::InputHas(key)) = &call_config.select_when {
210                let val = fixture_input.get(key.as_str()).unwrap_or(&serde_json::Value::Null);
211                if !val.is_null() {
212                    return call_config;
213                }
214            }
215        }
216        &self.call
217    }
218
219    /// Resolve the effective package reference for a language.
220    ///
221    /// In registry mode, entries from `[e2e.registry.packages]` are merged on
222    /// top of the base `[e2e.packages]` — registry overrides win for any field
223    /// that is `Some`.
224    pub fn resolve_package(&self, lang: &str) -> Option<PackageRef> {
225        let base = self.packages.get(lang);
226        if self.dep_mode == DependencyMode::Registry {
227            let reg = self.registry.packages.get(lang);
228            match (base, reg) {
229                (Some(b), Some(r)) => Some(PackageRef {
230                    name: r.name.clone().or_else(|| b.name.clone()),
231                    path: r.path.clone().or_else(|| b.path.clone()),
232                    module: r.module.clone().or_else(|| b.module.clone()),
233                    version: r.version.clone().or_else(|| b.version.clone()),
234                }),
235                (None, Some(r)) => Some(r.clone()),
236                (Some(b), None) => Some(b.clone()),
237                (None, None) => None,
238            }
239        } else {
240            base.cloned()
241        }
242    }
243
244    /// Return the effective output directory: `registry.output` in registry
245    /// mode, `output` otherwise.
246    pub fn effective_output(&self) -> &str {
247        if self.dep_mode == DependencyMode::Registry {
248            &self.registry.output
249        } else {
250            &self.output
251        }
252    }
253
254    /// Relative path from a backend's emission directory to the
255    /// `test_documents_dir` at the repo root.
256    ///
257    /// `emission_depth` counts the number of additional `../` segments needed
258    /// to reach `<output>/<lang>/` from where the file is being emitted:
259    ///
260    /// * `0` — emitted directly at `e2e/<lang>/` (e.g. dart, zig `build.zig`)
261    /// * `1` — emitted at `e2e/<lang>/<sub>/` (e.g. ruby `spec/`, R `tests/`)
262    /// * `2` — emitted at `e2e/<lang>/<sub1>/<sub2>/`
263    ///
264    /// The base prefix is two segments above `<output>/<lang>/` (i.e.
265    /// `../../`), matching the canonical layout where `<output>` (default
266    /// `"e2e"`) sits at the repo root next to the configured
267    /// `test_documents_dir`.
268    pub fn test_documents_relative_from(&self, emission_depth: usize) -> String {
269        let mut up = String::from("../../");
270        for _ in 0..emission_depth {
271            up.push_str("../");
272        }
273        format!("{up}{}", self.test_documents_dir)
274    }
275}
276
277fn default_fixtures_dir() -> String {
278    "fixtures".to_string()
279}
280
281fn default_output_dir() -> String {
282    "e2e".to_string()
283}
284
285fn default_test_documents_dir() -> String {
286    "test_documents".to_string()
287}
288
289/// Hand-rolled `Default` so the `test_documents_dir` field receives its
290/// `default_test_documents_dir()` value (`"test_documents"`) when callers use
291/// `..Default::default()` to construct an `E2eConfig` literally rather than
292/// going through `serde::Deserialize`. Without this, `derive(Default)` would
293/// fall back to `String::default()` (i.e. the empty string), and any backend
294/// computing `test_documents_relative_from(0)` would emit `"../../"` (no dir
295/// component), breaking generated chdir hooks.
296impl Default for E2eConfig {
297    fn default() -> Self {
298        Self {
299            fixtures: default_fixtures_dir(),
300            output: default_output_dir(),
301            test_documents_dir: default_test_documents_dir(),
302            languages: Vec::new(),
303            call: CallConfig::default(),
304            calls: HashMap::new(),
305            packages: HashMap::new(),
306            format: HashMap::new(),
307            fields: HashMap::new(),
308            fields_optional: HashSet::new(),
309            fields_array: HashSet::new(),
310            fields_method_calls: HashSet::new(),
311            result_fields: HashSet::new(),
312            exclude_categories: HashSet::new(),
313            fields_c_types: HashMap::new(),
314            fields_enum: HashSet::new(),
315            dep_mode: DependencyMode::default(),
316            registry: RegistryConfig::default(),
317        }
318    }
319}
320
321/// Configuration for the function call in each test.
322#[derive(Debug, Clone, Serialize, Deserialize, Default)]
323pub struct CallConfig {
324    /// The function name (alef applies language naming conventions).
325    #[serde(default)]
326    pub function: String,
327    /// The module/package where the function lives.
328    #[serde(default)]
329    pub module: String,
330    /// Variable name for the return value (default: "result").
331    #[serde(default = "default_result_var")]
332    pub result_var: String,
333    /// Whether the function is async.
334    #[serde(default)]
335    pub r#async: bool,
336    /// HTTP endpoint path for mock server routing (e.g., `"/v1/chat/completions"`).
337    ///
338    /// Required when fixtures use `mock_response`. The Rust e2e generator uses
339    /// this to build the `MockRoute` that the mock server matches against.
340    #[serde(default)]
341    pub path: Option<String>,
342    /// HTTP method for mock server routing (default: `"POST"`).
343    ///
344    /// Used together with `path` when building `MockRoute` entries.
345    #[serde(default)]
346    pub method: Option<String>,
347    /// How fixture `input` fields map to function arguments.
348    #[serde(default)]
349    pub args: Vec<ArgMapping>,
350    /// Per-language overrides for module/function/etc.
351    #[serde(default)]
352    pub overrides: HashMap<String, CallOverride>,
353    /// Whether the function returns `Result<T, E>` in its native binding.
354    /// Defaults to `true`. When `false`, generators that distinguish Result-returning
355    /// from non-Result-returning calls (currently Rust) will skip the
356    /// `.expect("should succeed")` unwrap and bind the raw return value directly.
357    #[serde(default = "default_returns_result")]
358    pub returns_result: bool,
359    /// Whether the function returns only an error/unit — i.e., `Result<(), E>`.
360    ///
361    /// When combined with `returns_result = true`, Go generators emit `err := func()`
362    /// (single return value) rather than `_, err := func()` (two return values).
363    /// This is needed for functions like `validate_host` that return only `error` in Go.
364    #[serde(default)]
365    pub returns_void: bool,
366    /// skip_languages
367    #[serde(default)]
368    pub skip_languages: Vec<String>,
369    /// When `true`, the function returns a primitive (e.g. `String`, `bool`,
370    /// `i32`) rather than a struct.  Generators that would otherwise emit
371    /// `result.<field>` will fall back to the bare result variable.
372    ///
373    /// This is a property of the Rust core's return type and therefore identical
374    /// across every binding — set it on the call, not in per-language overrides.
375    /// The same flag is also accepted under `[e2e.calls.<name>.overrides.<lang>]`
376    /// for backwards compatibility, but the call-level value takes precedence.
377    #[serde(default)]
378    pub result_is_simple: bool,
379    /// When `true`, the function returns `Vec<T>` / `Array<T>`.  Generators that
380    /// support per-element field assertions (rust, csharp) iterate or index into
381    /// the result; the typescript codegen indexes `[0]` to mirror csharp.
382    ///
383    /// As with `result_is_simple`, this is a Rust-side property — set it on the
384    /// call, not on per-language overrides. Per-language overrides remain
385    /// supported for backwards compatibility.
386    #[serde(default)]
387    pub result_is_vec: bool,
388    /// When `true` (combined with `result_is_simple`), the simple return is a
389    /// slice/array (e.g., `Vec<String>` → `string[]` in TS).
390    #[serde(default)]
391    pub result_is_array: bool,
392    /// When `true`, the function returns a raw byte array (`Vec<u8>` →
393    /// `Uint8Array` / `[]byte` / `byte[]`).
394    #[serde(default)]
395    pub result_is_bytes: bool,
396    /// Three-valued opt-in/out for streaming-virtual-field auto-detection.
397    ///
398    /// - `Some(true)`: force streaming semantics regardless of fixture shape.
399    /// - `Some(false)`: disable streaming auto-detection — assertions referencing
400    ///   fields like `chunks` / `chunks.length` / `tool_calls` / `finish_reason`
401    ///   are treated as plain field accessors on the result, not streaming
402    ///   adapters. Use this when your API has a `chunks` field that is a regular
403    ///   list (not an async stream).
404    /// - `None` (default): auto-detect — treat as streaming when either the
405    ///   fixture provides a streaming `mock_response` or any assertion references
406    ///   a hard-coded streaming-virtual-field name.
407    #[serde(default)]
408    pub streaming: Option<bool>,
409    /// When `true`, the function returns `Option<T>`.
410    #[serde(default)]
411    pub result_is_option: bool,
412    /// Automatic fixture-routing condition.
413    ///
414    /// When set, a fixture whose `call` field is `None` is routed to this named call config
415    /// if the condition is satisfied.  This avoids the need to tag every fixture with
416    /// `"call": "batch_scrape"` when the fixture shape already identifies the call.
417    ///
418    /// Example (`alef.toml`):
419    /// ```toml
420    /// [e2e.calls.batch_scrape]
421    /// select_when = { input_has = "batch_urls" }
422    /// ```
423    #[serde(default)]
424    pub select_when: Option<SelectWhen>,
425}
426
427fn default_result_var() -> String {
428    "result".to_string()
429}
430
431fn default_returns_result() -> bool {
432    false
433}
434
435/// Condition for auto-selecting a named call config when the fixture matches.
436///
437/// When a fixture does not specify `"call"`, the codegen normally uses the default
438/// `[e2e.call]`.  A `SelectWhen` condition on a named call allows automatic routing
439/// based on the fixture's input shape:
440///
441/// ```toml
442/// [e2e.calls.batch_scrape]
443/// select_when = { input_has = "batch_urls" }
444/// ```
445#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
446#[serde(rename_all = "snake_case")]
447pub enum SelectWhen {
448    /// Select this call when the fixture input contains the named key with a non-null value.
449    InputHas(String),
450}
451
452/// Maps a fixture input field to a function argument.
453#[derive(Debug, Clone, Serialize, Deserialize)]
454pub struct ArgMapping {
455    /// Argument name in the function signature.
456    pub name: String,
457    /// JSON field path in the fixture's `input` object.
458    pub field: String,
459    /// Type hint for code generation.
460    #[serde(rename = "type", default = "default_arg_type")]
461    pub arg_type: String,
462    /// Whether this argument is optional.
463    #[serde(default)]
464    pub optional: bool,
465    /// When `true`, the Rust codegen passes this argument by value (owned) rather than
466    /// by reference. Use for `Vec<T>` parameters that do not accept `&Vec<T>`.
467    #[serde(default)]
468    pub owned: bool,
469    /// For `json_object` args targeting `&[T]` Rust parameters, set to the element type
470    /// (e.g. `"f32"`, `"String"`) so the codegen emits `Vec<element_type>` annotation.
471    #[serde(default)]
472    pub element_type: Option<String>,
473    /// Override the Go slice element type for `json_object` array args.
474    ///
475    /// When set, the Go e2e codegen uses this as the element type instead of the default
476    /// derived from `element_type`. Use Go-idiomatic type names including the import alias
477    /// prefix where needed, e.g. `"kreuzberg.BatchBytesItem"` or `"string"`.
478    #[serde(default)]
479    pub go_type: Option<String>,
480}
481
482fn default_arg_type() -> String {
483    "string".to_string()
484}
485
486/// Per-language override for function call configuration.
487#[derive(Debug, Clone, Serialize, Deserialize, Default)]
488pub struct CallOverride {
489    /// Override the module/import path.
490    #[serde(default)]
491    pub module: Option<String>,
492    /// Override the function name.
493    #[serde(default)]
494    pub function: Option<String>,
495    /// Maps canonical argument names to language-specific argument names.
496    ///
497    /// Used when a language binding uses a different parameter name than the
498    /// canonical `args` list in `CallConfig`. For example, if the canonical
499    /// arg name is `doc` but the Python binding uses `html`, specify:
500    ///
501    /// ```toml
502    /// [e2e.call.overrides.python]
503    /// arg_name_map = { doc = "html" }
504    /// ```
505    ///
506    /// The key is the canonical name (from `args[].name`) and the value is the
507    /// name to use when emitting the keyword argument in generated tests.
508    #[serde(default)]
509    pub arg_name_map: HashMap<String, String>,
510    /// Override the crate name (Rust only).
511    #[serde(default)]
512    pub crate_name: Option<String>,
513    /// Override the class name (Java/C# only).
514    #[serde(default)]
515    pub class: Option<String>,
516    /// Import alias (Go only, e.g., `htmd`).
517    #[serde(default)]
518    pub alias: Option<String>,
519    /// C header file name (C only).
520    #[serde(default)]
521    pub header: Option<String>,
522    /// FFI symbol prefix (C only).
523    #[serde(default)]
524    pub prefix: Option<String>,
525    /// For json_object args: the constructor to use instead of raw dict/object.
526    /// E.g., "ConversionOptions" — generates `ConversionOptions(**options)` in Python,
527    /// `new ConversionOptions(options)` in TypeScript.
528    #[serde(default)]
529    pub options_type: Option<String>,
530    /// How to pass json_object args: "kwargs" (default), "dict", "json", or "from_json".
531    ///
532    /// - `"kwargs"`: construct `OptionsType(key=val, ...)` (requires `options_type`).
533    /// - `"dict"`: pass as a plain dict/object literal `{"key": "val"}`.
534    /// - `"json"`: pass via `json.loads('...')` / `JSON.parse('...')`.
535    /// - `"from_json"`: call `OptionsType.from_json('...')` (Python only, PyO3 native types).
536    #[serde(default)]
537    pub options_via: Option<String>,
538    /// Module to import `options_type` from when `options_via = "from_json"`.
539    ///
540    /// When set, a separate `from {from_json_module} import {options_type}` line
541    /// is emitted instead of including the type in the main module import.
542    /// E.g., `"liter_llm._internal_bindings"` for PyO3 native types.
543    #[serde(default)]
544    pub from_json_module: Option<String>,
545    /// Override whether the call is async for this language.
546    ///
547    /// When set, takes precedence over the call-level `async` flag.
548    /// Useful when a language binding uses a different async model — for example,
549    /// a Python binding that returns a sync iterator from a function marked
550    /// `async = true` at the call level.
551    #[serde(default, rename = "async")]
552    pub r#async: Option<bool>,
553    /// Maps fixture option field names to their enum type names.
554    /// E.g., `{"headingStyle": "HeadingStyle", "codeBlockStyle": "CodeBlockStyle"}`.
555    /// The generator imports these types and maps string values to enum constants.
556    #[serde(default)]
557    pub enum_fields: HashMap<String, String>,
558    /// Maps result-type field names to their enum type names for assertion routing.
559    /// Per-call so e.g. `BatchObject.status` (enum) and `ResponseObject.status` (string)
560    /// can be disambiguated.
561    #[serde(default)]
562    pub assert_enum_fields: HashMap<String, String>,
563    /// Module to import enum types from (if different from the main module).
564    /// E.g., "html_to_markdown._html_to_markdown" for PyO3 native enums.
565    #[serde(default)]
566    pub enum_module: Option<String>,
567    /// Maps nested fixture object field names to their C# type names.
568    /// Used to generate `JsonSerializer.Deserialize<NestedType>(...)` for nested objects.
569    /// E.g., `{"preprocessing": "PreprocessingOptions"}`.
570    #[serde(default)]
571    pub nested_types: HashMap<String, String>,
572    /// When `false`, nested config builder results are passed directly to builder methods
573    /// without wrapping in `Optional.of(...)`. Set to `false` for bindings where nested
574    /// option types are non-optional (e.g., html-to-markdown Java).
575    /// Defaults to `true` for backward compatibility.
576    #[serde(default = "default_true")]
577    pub nested_types_optional: bool,
578    /// When `true`, the function returns a simple type (e.g., `String`) rather
579    /// than a struct.  Generators that would normally emit `result.content`
580    /// (or equivalent field access) will use the result variable directly.
581    #[serde(default)]
582    pub result_is_simple: bool,
583    /// When `true` (and combined with `result_is_simple`), the simple result is
584    /// a slice/array type (e.g., `[]string` in Go, `Vec<String>` in Rust).
585    /// The Go generator uses `strings.Join(value, " ")` for `contains` assertions
586    /// instead of `string(value)`.
587    #[serde(default)]
588    pub result_is_array: bool,
589    /// When `true`, the function returns `Vec<T>` rather than a single value.
590    /// Field-path assertions are emitted as `.iter().all(|r| <accessor>)` so
591    /// every element is checked. (Rust generator.)
592    #[serde(default)]
593    pub result_is_vec: bool,
594    /// When `true`, the function returns a raw byte array (e.g., `byte[]` in Java,
595    /// `[]byte` in Go). Used by generators to select the correct length accessor
596    /// (field `.length` vs method `.length()`).
597    #[serde(default)]
598    pub result_is_bytes: bool,
599    /// When `true`, the function returns `Option<T>`. The result is unwrapped
600    /// before any non-`is_none`/`is_some` assertion runs; `is_empty`/`not_empty`
601    /// assertions map to `is_none()`/`is_some()`. (Rust generator.)
602    #[serde(default)]
603    pub result_is_option: bool,
604    /// When `true`, the R generator emits the call result directly without wrapping
605    /// in `jsonlite::fromJSON()`. Use when the R binding already returns a native
606    /// R list (`Robj`) rather than a JSON string. Field-path assertions still use
607    /// `result$field` accessor syntax (i.e. `result_is_simple` behaviour is NOT
608    /// implied — only the JSON parse wrapper is suppressed). (R generator only.)
609    #[serde(default)]
610    pub result_is_r_list: bool,
611    /// When `true`, the Zig generator treats the result as a `[]u8` JSON string
612    /// representing a struct value (e.g., `ExtractionResult` serialized via the
613    /// FFI `_to_json` helper). The generator parses the JSON with
614    /// `std.json.parseFromSlice(std.json.Value, ...)` before emitting field
615    /// assertions, traversing the dynamic JSON object for each field path.
616    /// (Zig generator only.)
617    #[serde(default)]
618    pub result_is_json_struct: bool,
619    /// When `true`, the Rust generator wraps the `json_object` argument expression
620    /// in `Some(...).clone()` to match an owned `Option<T>` parameter slot rather
621    /// than passing `&options`. (Rust generator only.)
622    #[serde(default)]
623    pub wrap_options_in_some: bool,
624    /// Trailing positional arguments appended verbatim after the configured
625    /// `args`. Used when the target function takes additional positional slots
626    /// (e.g. visitor) the fixture cannot supply directly. (Rust generator only.)
627    #[serde(default)]
628    pub extra_args: Vec<String>,
629    /// Per-rust override of the call-level `returns_result`. When set, takes
630    /// precedence over `CallConfig.returns_result` for the Rust generator only.
631    /// Useful when one binding is fallible while others are not.
632    #[serde(default)]
633    pub returns_result: Option<bool>,
634    /// Maps handle config field names to their Python type constructor names.
635    ///
636    /// When the handle config object contains a nested dict-valued field, the
637    /// generator will wrap it in the specified type using keyword arguments.
638    /// E.g., `{"browser": "BrowserConfig"}` generates `BrowserConfig(mode="auto")`
639    /// instead of `{"mode": "auto"}`.
640    #[serde(default)]
641    pub handle_nested_types: HashMap<String, String>,
642    /// Handle config fields whose type constructor takes a single dict argument
643    /// instead of keyword arguments.
644    ///
645    /// E.g., `["auth"]` means `AuthConfig({"type": "basic", ...})` instead of
646    /// `AuthConfig(type="basic", ...)`.
647    #[serde(default)]
648    pub handle_dict_types: HashSet<String>,
649    /// Elixir struct module name for the handle config argument.
650    ///
651    /// When set, the generated Elixir handle config uses struct literal syntax
652    /// (`%Module.StructType{key: val}`) instead of a plain string-keyed map.
653    /// Rustler `NifStruct` requires a proper Elixir struct — plain maps are rejected.
654    ///
655    /// E.g., `"CrawlConfig"` generates `%Kreuzcrawl.CrawlConfig{download_assets: true}`.
656    #[serde(default)]
657    pub handle_struct_type: Option<String>,
658    /// Handle config fields whose list values are Elixir atoms (Rustler NifUnitEnum).
659    ///
660    /// When a config field is a `Vec<EnumType>` in Rust, the Elixir side must pass
661    /// a list of atoms (e.g., `[:image, :document]`) not strings (`["image"]`).
662    /// List the field names here so the generator emits atom literals instead of strings.
663    ///
664    /// E.g., `["asset_types"]` generates `asset_types: [:image]` instead of `["image"]`.
665    #[serde(default)]
666    pub handle_atom_list_fields: HashSet<String>,
667    /// WASM config class name for handle args (WASM generator only).
668    ///
669    /// When set, handle args are constructed using `ConfigType.default()` + setters
670    /// instead of passing a plain JS object (which fails `_assertClass` validation).
671    ///
672    /// E.g., `"WasmCrawlConfig"` generates:
673    /// ```js
674    /// const engineConfig = WasmCrawlConfig.default();
675    /// engineConfig.maxDepth = 1;
676    /// const engine = createEngine(engineConfig);
677    /// ```
678    #[serde(default)]
679    pub handle_config_type: Option<String>,
680    /// PHP client factory method name (PHP generator only).
681    ///
682    /// When set, the generated PHP test instantiates a client via
683    /// `ClassName::factory_method('test-key')` and calls methods on the instance
684    /// instead of using static facade calls.
685    ///
686    /// E.g., `"createClient"` generates:
687    /// ```php
688    /// $client = LiterLlm::createClient('test-key');
689    /// $result = $client->chat($request);
690    /// ```
691    #[serde(default)]
692    pub php_client_factory: Option<String>,
693    /// Client factory function name for instance-method languages (WASM, etc.).
694    ///
695    /// When set, the generated test imports this function, creates a client,
696    /// and calls API methods on the instance instead of as top-level functions.
697    ///
698    /// E.g., `"createClient"` generates:
699    /// ```typescript
700    /// import { createClient } from 'pkg';
701    /// const client = createClient('test-key');
702    /// const result = await client.chat(request);
703    /// ```
704    #[serde(default)]
705    pub client_factory: Option<String>,
706    /// Verbatim trailing arguments appended after the fixed `("test-key", ...)` pair
707    /// when calling the `client_factory` function.
708    ///
709    /// Use this when the factory function takes additional positional parameters
710    /// beyond the API key and optional base URL that the generator would otherwise
711    /// emit.  Each element is emitted verbatim, separated by `, `.
712    ///
713    /// Example — Gleam `create_client` takes five positional arguments:
714    /// `(api_key, base_url, timeout_secs, max_retries, model_hint)`.  Set:
715    /// ```toml
716    /// [e2e.call.overrides.gleam]
717    /// client_factory = "create_client"
718    /// client_factory_trailing_args = ["option.None", "option.None", "option.None"]
719    /// ```
720    /// to produce `create_client("test-key", option.Some(url), option.None, option.None, option.None)`.
721    #[serde(default)]
722    pub client_factory_trailing_args: Vec<String>,
723    /// Fields on the options object that require `BigInt()` wrapping (WASM only).
724    ///
725    /// `wasm_bindgen` maps Rust `u64`/`i64` to JavaScript `BigInt`. Numeric
726    /// values assigned to these setters must be wrapped with `BigInt(n)`.
727    ///
728    /// List camelCase field names, e.g.:
729    /// ```toml
730    /// [e2e.call.overrides.wasm]
731    /// bigint_fields = ["maxTokens", "seed"]
732    /// ```
733    #[serde(default)]
734    pub bigint_fields: Vec<String>,
735    /// Static CLI arguments appended to every invocation (brew/CLI generator only).
736    ///
737    /// E.g., `["--format", "json"]` appends `--format json` to every CLI call.
738    #[serde(default)]
739    pub cli_args: Vec<String>,
740    /// Maps fixture config field names to CLI flag names (brew/CLI generator only).
741    ///
742    /// E.g., `{"output_format": "--format"}` generates `--format <value>` from
743    /// the fixture's `output_format` input field.
744    #[serde(default)]
745    pub cli_flags: HashMap<String, String>,
746    /// C FFI opaque result type name (C only).
747    ///
748    /// The PascalCase name of the result struct, without the prefix.
749    /// E.g., `"ChatCompletionResponse"` for `LiterllmChatCompletionResponse*`.
750    /// If not set, defaults to the function name in PascalCase.
751    #[serde(default)]
752    pub result_type: Option<String>,
753    /// Override the argument order for this language binding.
754    ///
755    /// Lists argument names from `args` in the order they should be passed
756    /// to the target function. Useful when a language binding reorders parameters
757    /// relative to the canonical `args` list in `CallConfig`.
758    ///
759    /// E.g., if `args = [path, mime_type, config]` but the Node.js binding
760    /// takes `(path, config, mime_type?)`, specify:
761    /// ```toml
762    /// [e2e.call.overrides.node]
763    /// arg_order = ["path", "config", "mime_type"]
764    /// ```
765    #[serde(default)]
766    pub arg_order: Vec<String>,
767    /// When `true`, `json_object` args with an `options_type` are passed as a
768    /// pointer (`*OptionsType`) rather than a value.  Use for Go bindings where
769    /// the options parameter is `*ConversionOptions` (nil-able pointer) rather
770    /// than a plain struct.
771    ///
772    /// Absent options are passed as `nil`; present options are unmarshalled into
773    /// a local variable and passed as `&optionsVar`.
774    #[serde(default)]
775    pub options_ptr: bool,
776    /// Alternative function name to use when the fixture includes a `visitor`.
777    ///
778    /// Some bindings expose two entry points: `Convert(html, opts)` for the
779    /// plain case and `ConvertWithVisitor(html, opts, visitor)` when a visitor
780    /// is involved.  Set this to the visitor-accepting function name so the
781    /// generator can pick the right symbol automatically.
782    ///
783    /// E.g., `"ConvertWithVisitor"` makes the Go generator emit:
784    /// ```go
785    /// result, err := htmd.ConvertWithVisitor(html, nil, visitor)
786    /// ```
787    /// instead of `htmd.Convert(html, nil, visitor)` (which would not compile).
788    #[serde(default)]
789    pub visitor_function: Option<String>,
790    /// Rust trait names to import when `client_factory` is set (Rust generator only).
791    ///
792    /// When `client_factory` is set, the generated test creates a client object and
793    /// calls methods on it. Those methods are defined on traits (e.g. `LlmClient`,
794    /// `FileClient`) that must be in scope. List the trait names here and the Rust
795    /// generator will emit `use {module}::{trait_name};` for each.
796    ///
797    /// E.g.:
798    /// ```toml
799    /// [e2e.call.overrides.rust]
800    /// client_factory = "create_client"
801    /// trait_imports = ["LlmClient", "FileClient", "BatchClient", "ResponseClient"]
802    /// ```
803    #[serde(default)]
804    pub trait_imports: Vec<String>,
805    /// Raw C return type, used verbatim instead of `{PREFIX}Type*` (C only).
806    ///
807    /// Valid values: `"char*"`, `"int32_t"`, `"uintptr_t"`.
808    /// When set, the C generator skips options handle construction and uses the
809    /// raw type directly. Free logic is adjusted accordingly.
810    #[serde(default)]
811    pub raw_c_result_type: Option<String>,
812    /// Free function for raw `char*` C results (C only).
813    ///
814    /// Defaults to `{prefix}_free_string` when unset and `raw_c_result_type == "char*"`.
815    #[serde(default)]
816    pub c_free_fn: Option<String>,
817    /// C FFI engine factory pattern (C only).
818    ///
819    /// When set, the C generator wraps each test call in a
820    /// `{prefix}_create_engine(config)` / `{prefix}_crawl_engine_handle_free(engine)`
821    /// prologue/epilogue using the named config type as the "arg 0" handle type.
822    ///
823    /// The value is the PascalCase config type name (without prefix), e.g.
824    /// `"CrawlConfig"`. The generator will emit:
825    /// ```c
826    /// KCRAWLCrawlConfig* config_handle = kcrawl_crawl_config_from_json("{json}");
827    /// KCRAWLCrawlEngineHandle* engine = kcrawl_create_engine(config_handle);
828    /// kcrawl_crawl_config_free(config_handle);
829    /// KCRAWLScrapeResult* result = kcrawl_scrape(engine, url);
830    /// // ... assertions ...
831    /// kcrawl_scrape_result_free(result);
832    /// kcrawl_crawl_engine_handle_free(engine);
833    /// ```
834    #[serde(default)]
835    pub c_engine_factory: Option<String>,
836    /// Fields in a `json_object` arg that must be wrapped in `java.nio.file.Path.of()`
837    /// (Java generator only).
838    ///
839    /// E.g., `["cache_dir"]` wraps the string value of `cache_dir` so the builder
840    /// receives `java.nio.file.Path.of("/tmp/dir")` instead of a plain string.
841    #[serde(default)]
842    pub path_fields: Vec<String>,
843    /// Trait name for the visitor pattern (Rust e2e tests only).
844    ///
845    /// When a fixture declares a `visitor` block, the Rust e2e generator emits
846    /// `impl <trait_name> for _TestVisitor { ... }` and imports the trait from
847    /// `{module}::visitor`. When unset, no visitor block is emitted and fixtures
848    /// that declare a visitor will cause a codegen error.
849    ///
850    /// E.g., `"HtmlVisitor"` generates:
851    /// ```rust,ignore
852    /// use html_to_markdown_rs::visitor::{HtmlVisitor, NodeContext, VisitResult};
853    /// // ...
854    /// impl HtmlVisitor for _TestVisitor { ... }
855    /// ```
856    #[serde(default)]
857    pub visitor_trait: Option<String>,
858    /// Maps result field paths to their wasm-bindgen enum class names.
859    ///
860    /// wasm-bindgen exposes Rust enums as numeric discriminants in JavaScript
861    /// (`WasmFinishReason.Stop === 0`), not string variants. When an `equals`
862    /// assertion targets a field listed here, the WASM generator emits
863    /// `expect(result.choices[0].finishReason).toBe(WasmFinishReason.Stop)`
864    /// instead of attempting `(value ?? "").trim()`.
865    ///
866    /// The fixture's expected string value is converted to PascalCase to look
867    /// up the variant (e.g. `"tool_calls"` -> `ToolCalls`).
868    ///
869    /// Example:
870    /// ```toml
871    /// [e2e.calls.chat.overrides.wasm]
872    /// result_enum_fields = { "choices[0].finish_reason" = "WasmFinishReason", "status" = "WasmBatchStatus" }
873    /// ```
874    #[serde(default)]
875    pub result_enum_fields: HashMap<String, String>,
876}
877
878fn default_true() -> bool {
879    true
880}
881
882/// Per-language package reference configuration.
883#[derive(Debug, Clone, Serialize, Deserialize, Default)]
884pub struct PackageRef {
885    /// Package/crate/gem/module name.
886    #[serde(default)]
887    pub name: Option<String>,
888    /// Relative path from e2e/{lang}/ to the package.
889    #[serde(default)]
890    pub path: Option<String>,
891    /// Go module path.
892    #[serde(default)]
893    pub module: Option<String>,
894    /// Package version (e.g., for go.mod require directives).
895    #[serde(default)]
896    pub version: Option<String>,
897}
898
899#[cfg(test)]
900mod tests {
901    use super::*;
902
903    fn empty_e2e_with_test_documents(dir: &str) -> E2eConfig {
904        E2eConfig {
905            test_documents_dir: dir.to_string(),
906            ..Default::default()
907        }
908    }
909
910    #[test]
911    fn test_documents_dir_default_is_test_documents() {
912        let cfg: E2eConfig = toml::from_str("[call]\nfunction = \"f\"\n").expect("minimal TOML must deserialize");
913        assert_eq!(cfg.test_documents_dir, "test_documents");
914    }
915
916    #[test]
917    fn test_documents_dir_explicit_override_wins() {
918        let cfg: E2eConfig = toml::from_str("test_documents_dir = \"fixture_files\"\n[call]\nfunction = \"f\"\n")
919            .expect("explicit override must deserialize");
920        assert_eq!(cfg.test_documents_dir, "fixture_files");
921    }
922
923    #[test]
924    fn test_documents_relative_from_at_lang_root_returns_two_dots_up() {
925        let cfg = empty_e2e_with_test_documents("test_documents");
926        assert_eq!(cfg.test_documents_relative_from(0), "../../test_documents");
927    }
928
929    #[test]
930    fn test_documents_relative_from_at_spec_depth_returns_three_dots_up() {
931        let cfg = empty_e2e_with_test_documents("test_documents");
932        assert_eq!(cfg.test_documents_relative_from(1), "../../../test_documents");
933    }
934
935    #[test]
936    fn test_documents_relative_from_at_two_subdirs_deep_returns_four_dots_up() {
937        let cfg = empty_e2e_with_test_documents("test_documents");
938        assert_eq!(cfg.test_documents_relative_from(2), "../../../../test_documents");
939    }
940
941    #[test]
942    fn test_documents_relative_uses_configured_dir_name() {
943        let cfg = empty_e2e_with_test_documents("fixture_files");
944        assert_eq!(cfg.test_documents_relative_from(0), "../../fixture_files");
945        assert_eq!(cfg.test_documents_relative_from(1), "../../../fixture_files");
946    }
947}