Skip to main content

alef_core/config/
e2e.rs

1//! E2E test generation configuration types.
2
3use crate::config::manifest_extras::ManifestExtras;
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7/// Controls whether generated e2e test projects reference the package under
8/// test via a local path (for development) or a registry version string
9/// (for standalone `test_apps` that consumers can run without the monorepo).
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
11#[serde(rename_all = "lowercase")]
12pub enum DependencyMode {
13    /// Local path dependency (default) — used during normal e2e development.
14    #[default]
15    Local,
16    /// Registry dependency — generates standalone test apps that pull the
17    /// package from its published registry (PyPI, npm, crates.io, etc.).
18    Registry,
19}
20
21/// Configuration for registry-mode e2e generation (`alef e2e generate --registry`).
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct RegistryConfig {
24    /// Output directory for registry-mode test apps (default: "test_apps").
25    #[serde(default = "default_test_apps_dir")]
26    pub output: String,
27    /// Per-language package overrides used only in registry mode.
28    /// Merged on top of the base `[e2e.packages]` entries.
29    #[serde(default)]
30    pub packages: HashMap<String, PackageRef>,
31    /// When non-empty, only fixture categories in this list are included in
32    /// registry-mode generation (useful for shipping a curated subset).
33    #[serde(default)]
34    pub categories: Vec<String>,
35    /// GitHub repository URL for downloading prebuilt artifacts (e.g., FFI
36    /// shared libraries) from GitHub Releases.
37    ///
38    /// Falls back to `[scaffold] repository` when not set, then to
39    /// `https://github.com/kreuzberg-dev/{crate.name}`.
40    #[serde(default)]
41    pub github_repo: Option<String>,
42}
43
44impl Default for RegistryConfig {
45    fn default() -> Self {
46        Self {
47            output: default_test_apps_dir(),
48            packages: HashMap::new(),
49            categories: Vec::new(),
50            github_repo: None,
51        }
52    }
53}
54
55fn default_test_apps_dir() -> String {
56    "test_apps".to_string()
57}
58
59/// Root e2e configuration from `[e2e]` section of alef.toml.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct E2eConfig {
62    /// Directory containing fixture JSON files (default: "fixtures").
63    #[serde(default = "default_fixtures_dir")]
64    pub fixtures: String,
65    /// Output directory for generated e2e test projects (default: "e2e").
66    #[serde(default = "default_output_dir")]
67    pub output: String,
68    /// Repo-root-relative directory holding binary file fixtures referenced by
69    /// `file_path` / `bytes` fixture args (default: "test_documents").
70    ///
71    /// Backends that emit chdir / setup hooks for file-based fixtures resolve
72    /// the relative path from the test-emission directory via
73    /// [`E2eConfig::test_documents_relative_from`]. The default matches the
74    /// kreuzberg convention; downstream crates whose fixtures don't reference
75    /// files (e.g. liter-llm, which uses pure mock-server fixtures) can leave
76    /// the default in place — backends conditionally emit the setup only when
77    /// fixtures actually need it.
78    #[serde(default = "default_test_documents_dir")]
79    pub test_documents_dir: String,
80    /// Languages to generate e2e tests for. Defaults to top-level `languages` list.
81    #[serde(default)]
82    pub languages: Vec<String>,
83    /// Default function call configuration.
84    pub call: CallConfig,
85    /// Named additional call configurations for multi-function testing.
86    /// Fixtures reference these via the `call` field, e.g. `"call": "embed"`.
87    #[serde(default)]
88    pub calls: HashMap<String, CallConfig>,
89    /// Per-language package reference overrides.
90    #[serde(default)]
91    pub packages: HashMap<String, PackageRef>,
92    /// Per-language extra dependencies to splice into the e2e harness's
93    /// language-native manifest (`e2e/<lang>/package.json` for node/wasm,
94    /// `e2e/python/pyproject.toml` for Python, etc.). Distinct from the
95    /// Rust-binding `extra_dependencies` knob — this one targets the
96    /// host-language test-harness manifest. Keys are canonical language
97    /// names (`node`, `wasm`, `python`, …).
98    #[serde(default)]
99    pub harness_extras: HashMap<String, ManifestExtras>,
100    /// Per-language formatter commands.
101    #[serde(default)]
102    pub format: HashMap<String, String>,
103    /// Field path aliases: maps fixture field paths to actual API struct paths.
104    /// E.g., "metadata.title" -> "metadata.document.title"
105    /// Supports struct access (foo.bar), map access (foo[key]), direct fields.
106    #[serde(default)]
107    pub fields: HashMap<String, String>,
108    /// Fields that are Optional/nullable in the return type.
109    /// Rust generators use .as_deref().unwrap_or("") for strings, .is_some() for structs.
110    #[serde(default)]
111    pub fields_optional: HashSet<String>,
112    /// Fields that are arrays/Vecs on the result type.
113    /// When a fixture path like `json_ld.name` traverses an array field, the
114    /// accessor adds `[0]` (or language equivalent) to index into the first element.
115    #[serde(default)]
116    pub fields_array: HashSet<String>,
117    /// Fields where the accessor is a method call (appends `()`) rather than a field access.
118    /// Rust-specific: Java always uses `()`, Python/PHP use field access.
119    /// Listed as the full resolved field path (after alias resolution).
120    /// E.g., `"metadata.format.excel"` means `.excel` should be emitted as `.excel()`.
121    #[serde(default)]
122    pub fields_method_calls: HashSet<String>,
123    /// Known top-level fields on the result type.
124    ///
125    /// When non-empty, assertions whose resolved field path starts with a
126    /// segment that is NOT in this set are emitted as comments (skipped)
127    /// instead of executable assertions.  This prevents broken assertions
128    /// when fixtures reference fields from a different operation (e.g.,
129    /// `batch.completed_count` on a `ScrapeResult`).
130    #[serde(default)]
131    pub result_fields: HashSet<String>,
132    /// Fixture categories excluded from cross-language e2e codegen.
133    ///
134    /// Fixtures whose resolved category matches an entry in this set are
135    /// skipped by every per-language e2e generator — no test is emitted at
136    /// all (no skip directive, no commented-out body). The fixture files stay
137    /// on disk and remain available to Rust integration tests inside the
138    /// consumer crate's own `tests/` directory.
139    ///
140    /// Use this to keep fixtures that exercise internal middleware (cache,
141    /// proxy, budget, hooks, etc.) out of bindings whose public surface does
142    /// not expose those layers.
143    ///
144    /// Example:
145    /// ```toml
146    /// [e2e]
147    /// exclude_categories = ["cache", "proxy", "budget", "hooks"]
148    /// ```
149    #[serde(default)]
150    pub exclude_categories: HashSet<String>,
151    /// C FFI accessor type chain: maps `"{parent_snake_type}.{field}"` to the
152    /// PascalCase return type name (without prefix).
153    ///
154    /// Used by the C e2e generator to emit chained FFI accessor calls for
155    /// nested field paths. The root type is always `conversion_result`.
156    ///
157    /// Example:
158    /// ```toml
159    /// [e2e.fields_c_types]
160    /// "conversion_result.metadata" = "HtmlMetadata"
161    /// "html_metadata.document" = "DocumentMetadata"
162    /// ```
163    #[serde(default)]
164    pub fields_c_types: HashMap<String, String>,
165    /// Fields whose resolved type is an enum in the generated bindings.
166    ///
167    /// When a `contains` / `contains_all` / etc. assertion targets one of these
168    /// fields, language generators that cannot call `.contains()` directly on an
169    /// enum (e.g., Java) will emit a string-conversion call first.  For Java,
170    /// the generated assertion calls `.getValue()` on the enum — the `@JsonValue`
171    /// method that all alef-generated Java enums expose — to obtain the lowercase
172    /// serde string before performing the string comparison.
173    ///
174    /// Both the raw fixture field path (before alias resolution) and the resolved
175    /// path (after alias resolution via `[e2e.fields]`) are accepted, so you can
176    /// use either form:
177    ///
178    /// ```toml
179    /// # Raw fixture field:
180    /// fields_enum = ["links[].link_type", "assets[].category"]
181    /// # …or the resolved (aliased) field name:
182    /// fields_enum = ["links[].link_type", "assets[].asset_category"]
183    /// ```
184    #[serde(default)]
185    pub fields_enum: HashSet<String>,
186    /// Dependency mode: `Local` (default) or `Registry`.
187    /// Set at runtime via `--registry` CLI flag; not serialized from TOML.
188    #[serde(skip)]
189    pub dep_mode: DependencyMode,
190    /// Registry-mode configuration from `[e2e.registry]`.
191    #[serde(default)]
192    pub registry: RegistryConfig,
193}
194
195impl E2eConfig {
196    /// Resolve the call config for a fixture. Uses the named call if specified,
197    /// otherwise falls back to the default `[e2e.call]`.
198    pub fn resolve_call(&self, call_name: Option<&str>) -> &CallConfig {
199        match call_name {
200            Some(name) => self.calls.get(name).unwrap_or(&self.call),
201            None => &self.call,
202        }
203    }
204
205    /// Resolve the call config for a fixture, applying `select_when` auto-routing.
206    ///
207    /// When the fixture has an explicit `call` name, that named config is returned
208    /// (same as [`resolve_call`]).  When the fixture has no explicit call, the method
209    /// scans named calls for a [`SelectWhen`] condition that matches the fixture's
210    /// shape (id, category, tags, input) and returns the first match.  If no condition
211    /// matches, it falls back to the default `[e2e.call]`.
212    ///
213    /// All non-`None` discriminators on a `SelectWhen` must match (logical AND) for
214    /// the condition to fire. A `SelectWhen` with every field `None` never matches —
215    /// at least one discriminator must be set.
216    pub fn resolve_call_for_fixture(
217        &self,
218        call_name: Option<&str>,
219        fixture_id: &str,
220        fixture_category: &str,
221        fixture_tags: &[String],
222        fixture_input: &serde_json::Value,
223    ) -> &CallConfig {
224        if let Some(name) = call_name {
225            return self.calls.get(name).unwrap_or(&self.call);
226        }
227        // Auto-route by select_when condition. Deterministic order: sort by call name.
228        let mut names: Vec<&String> = self.calls.keys().collect();
229        names.sort();
230        for name in names {
231            let call_config = &self.calls[name];
232            if let Some(sel) = &call_config.select_when {
233                if sel.matches(fixture_id, fixture_category, fixture_tags, fixture_input) {
234                    return call_config;
235                }
236            }
237        }
238        &self.call
239    }
240
241    /// Resolve the effective package reference for a language.
242    ///
243    /// In registry mode, entries from `[e2e.registry.packages]` are merged on
244    /// top of the base `[e2e.packages]` — registry overrides win for any field
245    /// that is `Some`.
246    pub fn resolve_package(&self, lang: &str) -> Option<PackageRef> {
247        let base = self.packages.get(lang);
248        if self.dep_mode == DependencyMode::Registry {
249            let reg = self.registry.packages.get(lang);
250            match (base, reg) {
251                (Some(b), Some(r)) => Some(PackageRef {
252                    name: r.name.clone().or_else(|| b.name.clone()),
253                    path: r.path.clone().or_else(|| b.path.clone()),
254                    module: r.module.clone().or_else(|| b.module.clone()),
255                    version: r.version.clone().or_else(|| b.version.clone()),
256                }),
257                (None, Some(r)) => Some(r.clone()),
258                (Some(b), None) => Some(b.clone()),
259                (None, None) => None,
260            }
261        } else {
262            base.cloned()
263        }
264    }
265
266    /// Return the effective `result_fields` for `call`.
267    ///
268    /// Returns `call.result_fields` when non-empty, otherwise the global
269    /// `self.result_fields`.
270    pub fn effective_result_fields<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
271        if !call.result_fields.is_empty() {
272            &call.result_fields
273        } else {
274            &self.result_fields
275        }
276    }
277
278    /// Return the effective `fields` alias map for `call`.
279    pub fn effective_fields<'a>(&'a self, call: &'a CallConfig) -> &'a HashMap<String, String> {
280        if !call.fields.is_empty() {
281            &call.fields
282        } else {
283            &self.fields
284        }
285    }
286
287    /// Return the effective `fields_optional` for `call`.
288    pub fn effective_fields_optional<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
289        if !call.fields_optional.is_empty() {
290            &call.fields_optional
291        } else {
292            &self.fields_optional
293        }
294    }
295
296    /// Return the effective `fields_array` for `call`.
297    pub fn effective_fields_array<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
298        if !call.fields_array.is_empty() {
299            &call.fields_array
300        } else {
301            &self.fields_array
302        }
303    }
304
305    /// Return the effective `fields_method_calls` for `call`.
306    pub fn effective_fields_method_calls<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
307        if !call.fields_method_calls.is_empty() {
308            &call.fields_method_calls
309        } else {
310            &self.fields_method_calls
311        }
312    }
313
314    /// Return the effective `fields_enum` for `call`.
315    pub fn effective_fields_enum<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
316        if !call.fields_enum.is_empty() {
317            &call.fields_enum
318        } else {
319            &self.fields_enum
320        }
321    }
322
323    /// Return the effective `fields_c_types` for `call`.
324    pub fn effective_fields_c_types<'a>(&'a self, call: &'a CallConfig) -> &'a HashMap<String, String> {
325        if !call.fields_c_types.is_empty() {
326            &call.fields_c_types
327        } else {
328            &self.fields_c_types
329        }
330    }
331
332    /// Return the effective output directory: `registry.output` in registry
333    /// mode, `output` otherwise.
334    pub fn effective_output(&self) -> &str {
335        if self.dep_mode == DependencyMode::Registry {
336            &self.registry.output
337        } else {
338            &self.output
339        }
340    }
341
342    /// Relative path from a backend's emission directory to the
343    /// `test_documents_dir` at the repo root.
344    ///
345    /// `emission_depth` counts the number of additional `../` segments needed
346    /// to reach `<output>/<lang>/` from where the file is being emitted:
347    ///
348    /// * `0` — emitted directly at `e2e/<lang>/` (e.g. dart, zig `build.zig`)
349    /// * `1` — emitted at `e2e/<lang>/<sub>/` (e.g. ruby `spec/`, R `tests/`)
350    /// * `2` — emitted at `e2e/<lang>/<sub1>/<sub2>/`
351    ///
352    /// The base prefix is two segments above `<output>/<lang>/` (i.e.
353    /// `../../`), matching the canonical layout where `<output>` (default
354    /// `"e2e"`) sits at the repo root next to the configured
355    /// `test_documents_dir`.
356    pub fn test_documents_relative_from(&self, emission_depth: usize) -> String {
357        let mut up = String::from("../../");
358        for _ in 0..emission_depth {
359            up.push_str("../");
360        }
361        format!("{up}{}", self.test_documents_dir)
362    }
363}
364
365fn default_fixtures_dir() -> String {
366    "fixtures".to_string()
367}
368
369fn default_output_dir() -> String {
370    "e2e".to_string()
371}
372
373fn default_test_documents_dir() -> String {
374    "test_documents".to_string()
375}
376
377/// Hand-rolled `Default` so the `test_documents_dir` field receives its
378/// `default_test_documents_dir()` value (`"test_documents"`) when callers use
379/// `..Default::default()` to construct an `E2eConfig` literally rather than
380/// going through `serde::Deserialize`. Without this, `derive(Default)` would
381/// fall back to `String::default()` (i.e. the empty string), and any backend
382/// computing `test_documents_relative_from(0)` would emit `"../../"` (no dir
383/// component), breaking generated chdir hooks.
384impl Default for E2eConfig {
385    fn default() -> Self {
386        Self {
387            fixtures: default_fixtures_dir(),
388            output: default_output_dir(),
389            test_documents_dir: default_test_documents_dir(),
390            languages: Vec::new(),
391            call: CallConfig::default(),
392            calls: HashMap::new(),
393            packages: HashMap::new(),
394            harness_extras: HashMap::new(),
395            format: HashMap::new(),
396            fields: HashMap::new(),
397            fields_optional: HashSet::new(),
398            fields_array: HashSet::new(),
399            fields_method_calls: HashSet::new(),
400            result_fields: HashSet::new(),
401            exclude_categories: HashSet::new(),
402            fields_c_types: HashMap::new(),
403            fields_enum: HashSet::new(),
404            dep_mode: DependencyMode::default(),
405            registry: RegistryConfig::default(),
406        }
407    }
408}
409
410/// Configuration for the function call in each test.
411#[derive(Debug, Clone, Serialize, Deserialize, Default)]
412pub struct CallConfig {
413    /// Per-call override for `result_fields`.
414    ///
415    /// When non-empty, this set replaces the global `[e2e].result_fields` for
416    /// fixtures routed to this call.  Use this when different API functions return
417    /// differently-shaped structs so each call can gate its own field set.
418    ///
419    /// Example:
420    /// ```toml
421    /// [e2e.calls.crawl]
422    /// result_fields = ["pages", "final_url", "stayed_on_domain"]
423    /// ```
424    #[serde(default)]
425    pub result_fields: HashSet<String>,
426    /// Per-call override for `[e2e].fields` alias map.
427    ///
428    /// When non-empty, replaces (not merges with) the global `fields` map for
429    /// fixtures routed to this call.
430    #[serde(default)]
431    pub fields: HashMap<String, String>,
432    /// Per-call override for `[e2e].fields_optional`.
433    #[serde(default)]
434    pub fields_optional: HashSet<String>,
435    /// Per-call override for `[e2e].fields_array`.
436    #[serde(default)]
437    pub fields_array: HashSet<String>,
438    /// Per-call override for `[e2e].fields_method_calls`.
439    #[serde(default)]
440    pub fields_method_calls: HashSet<String>,
441    /// Per-call override for `[e2e].fields_enum`.
442    #[serde(default)]
443    pub fields_enum: HashSet<String>,
444    /// Per-call override for `[e2e].fields_c_types`.
445    #[serde(default)]
446    pub fields_c_types: HashMap<String, String>,
447    /// The function name (alef applies language naming conventions).
448    #[serde(default)]
449    pub function: String,
450    /// The module/package where the function lives.
451    #[serde(default)]
452    pub module: String,
453    /// Variable name for the return value (default: "result").
454    #[serde(default = "default_result_var")]
455    pub result_var: String,
456    /// Whether the function is async.
457    #[serde(default)]
458    pub r#async: bool,
459    /// HTTP endpoint path for mock server routing (e.g., `"/v1/chat/completions"`).
460    ///
461    /// Required when fixtures use `mock_response`. The Rust e2e generator uses
462    /// this to build the `MockRoute` that the mock server matches against.
463    #[serde(default)]
464    pub path: Option<String>,
465    /// HTTP method for mock server routing (default: `"POST"`).
466    ///
467    /// Used together with `path` when building `MockRoute` entries.
468    #[serde(default)]
469    pub method: Option<String>,
470    /// How fixture `input` fields map to function arguments.
471    #[serde(default)]
472    pub args: Vec<ArgMapping>,
473    /// Per-language overrides for module/function/etc.
474    #[serde(default)]
475    pub overrides: HashMap<String, CallOverride>,
476    /// Whether the function returns `Result<T, E>` in its native binding.
477    /// Defaults to `true`. When `false`, generators that distinguish Result-returning
478    /// from non-Result-returning calls (currently Rust) will skip the
479    /// `.expect("should succeed")` unwrap and bind the raw return value directly.
480    #[serde(default = "default_returns_result")]
481    pub returns_result: bool,
482    /// Whether the function returns only an error/unit — i.e., `Result<(), E>`.
483    ///
484    /// When combined with `returns_result = true`, Go generators emit `err := func()`
485    /// (single return value) rather than `_, err := func()` (two return values).
486    /// This is needed for functions like `validate_host` that return only `error` in Go.
487    #[serde(default)]
488    pub returns_void: bool,
489    /// skip_languages
490    #[serde(default)]
491    pub skip_languages: Vec<String>,
492    /// When `true`, the function returns a primitive (e.g. `String`, `bool`,
493    /// `i32`) rather than a struct.  Generators that would otherwise emit
494    /// `result.<field>` will fall back to the bare result variable.
495    ///
496    /// This is a property of the Rust core's return type and therefore identical
497    /// across every binding — set it on the call, not in per-language overrides.
498    /// The same flag is also accepted under `[e2e.calls.<name>.overrides.<lang>]`
499    /// for backwards compatibility, but the call-level value takes precedence.
500    #[serde(default)]
501    pub result_is_simple: bool,
502    /// When `true`, the function returns `Vec<T>` / `Array<T>`.  Generators that
503    /// support per-element field assertions (rust, csharp) iterate or index into
504    /// the result; the typescript codegen indexes `[0]` to mirror csharp.
505    ///
506    /// As with `result_is_simple`, this is a Rust-side property — set it on the
507    /// call, not on per-language overrides. Per-language overrides remain
508    /// supported for backwards compatibility.
509    #[serde(default)]
510    pub result_is_vec: bool,
511    /// When `true` (combined with `result_is_simple`), the simple return is a
512    /// slice/array (e.g., `Vec<String>` → `string[]` in TS).
513    #[serde(default)]
514    pub result_is_array: bool,
515    /// When `true`, the function returns a raw byte array (`Vec<u8>` →
516    /// `Uint8Array` / `[]byte` / `byte[]`).
517    #[serde(default)]
518    pub result_is_bytes: bool,
519    /// Three-valued opt-in/out for streaming-virtual-field auto-detection.
520    ///
521    /// - `Some(true)`: force streaming semantics regardless of fixture shape.
522    /// - `Some(false)`: disable streaming auto-detection — assertions referencing
523    ///   fields like `chunks` / `chunks.length` / `tool_calls` / `finish_reason`
524    ///   are treated as plain field accessors on the result, not streaming
525    ///   adapters. Use this when your API has a `chunks` field that is a regular
526    ///   list (not an async stream).
527    /// - `None` (default): auto-detect — treat as streaming when either the
528    ///   fixture provides a streaming `mock_response` or any assertion references
529    ///   a hard-coded streaming-virtual-field name.
530    #[serde(default)]
531    pub streaming: Option<bool>,
532    /// When `true`, the function returns `Option<T>`.
533    #[serde(default)]
534    pub result_is_option: bool,
535    /// Automatic fixture-routing condition.
536    ///
537    /// When set, a fixture whose `call` field is `None` is routed to this named call config
538    /// if the condition is satisfied.  This avoids the need to tag every fixture with
539    /// `"call": "batch_scrape"` when the fixture shape already identifies the call.
540    ///
541    /// Example (`alef.toml`):
542    /// ```toml
543    /// [e2e.calls.batch_scrape]
544    /// select_when = { input_has = "batch_urls" }
545    /// ```
546    #[serde(default)]
547    pub select_when: Option<SelectWhen>,
548}
549
550fn default_result_var() -> String {
551    "result".to_string()
552}
553
554fn default_returns_result() -> bool {
555    false
556}
557
558/// Condition for auto-selecting a named call config when the fixture matches.
559///
560/// When a fixture does not specify `"call"`, the codegen normally uses the default
561/// `[e2e.call]`.  A `SelectWhen` condition on a named call allows automatic routing
562/// based on the fixture's id, category, tags, or input shape.  All set fields must
563/// match (logical AND); a condition with no fields set never matches.
564///
565/// ```toml
566/// [e2e.calls.batch_scrape]
567/// select_when = { input_has = "batch_urls" }
568///
569/// [e2e.calls.crawl]
570/// select_when = { category = "crawl" }
571///
572/// [e2e.calls.batch_crawl_stream]
573/// select_when = { category = "stream", id_prefix = "batch_crawl_stream" }
574/// ```
575#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
576pub struct SelectWhen {
577    /// Match when the fixture's resolved category equals this string.
578    #[serde(default)]
579    pub category: Option<String>,
580    /// Match when the fixture's id starts with this prefix.
581    #[serde(default)]
582    pub id_prefix: Option<String>,
583    /// Match when the fixture's id matches this simple glob.
584    ///
585    /// Only `*` (matches any run of characters) is supported. Use `id_prefix`
586    /// for plain prefix matches.
587    #[serde(default)]
588    pub id_glob: Option<String>,
589    /// Match when the fixture's tags include this tag.
590    #[serde(default)]
591    pub tag: Option<String>,
592    /// Match when the fixture's input object contains this key with a non-null value.
593    #[serde(default)]
594    pub input_has: Option<String>,
595}
596
597impl SelectWhen {
598    /// Returns true when every set discriminator matches the fixture.
599    ///
600    /// A `SelectWhen` with all fields `None` returns `false` — at least one
601    /// discriminator must be set for the condition to fire.
602    pub fn matches(
603        &self,
604        fixture_id: &str,
605        fixture_category: &str,
606        fixture_tags: &[String],
607        fixture_input: &serde_json::Value,
608    ) -> bool {
609        let any_set = self.category.is_some()
610            || self.id_prefix.is_some()
611            || self.id_glob.is_some()
612            || self.tag.is_some()
613            || self.input_has.is_some();
614        if !any_set {
615            return false;
616        }
617        if let Some(cat) = &self.category
618            && cat.as_str() != fixture_category
619        {
620            return false;
621        }
622        if let Some(prefix) = &self.id_prefix
623            && !fixture_id.starts_with(prefix.as_str())
624        {
625            return false;
626        }
627        if let Some(glob) = &self.id_glob
628            && !glob_matches(glob, fixture_id)
629        {
630            return false;
631        }
632        if let Some(tag) = &self.tag
633            && !fixture_tags.iter().any(|t| t == tag)
634        {
635            return false;
636        }
637        if let Some(key) = &self.input_has {
638            let val = fixture_input.get(key.as_str()).unwrap_or(&serde_json::Value::Null);
639            if val.is_null() {
640                return false;
641            }
642        }
643        true
644    }
645}
646
647/// Minimal glob matcher supporting `*` (greedy any-run) only.
648fn glob_matches(pattern: &str, text: &str) -> bool {
649    if !pattern.contains('*') {
650        return pattern == text;
651    }
652    let parts: Vec<&str> = pattern.split('*').collect();
653    let mut cursor = 0usize;
654    for (idx, part) in parts.iter().enumerate() {
655        if part.is_empty() {
656            continue;
657        }
658        if idx == 0 {
659            if !text[cursor..].starts_with(part) {
660                return false;
661            }
662            cursor += part.len();
663        } else if idx + 1 == parts.len() && !pattern.ends_with('*') {
664            return text[cursor..].ends_with(part);
665        } else {
666            match text[cursor..].find(part) {
667                Some(pos) => cursor += pos + part.len(),
668                None => return false,
669            }
670        }
671    }
672    true
673}
674
675/// Maps a fixture input field to a function argument.
676#[derive(Debug, Clone, Serialize, Deserialize)]
677pub struct ArgMapping {
678    /// Argument name in the function signature.
679    pub name: String,
680    /// JSON field path in the fixture's `input` object.
681    pub field: String,
682    /// Type hint for code generation.
683    #[serde(rename = "type", default = "default_arg_type")]
684    pub arg_type: String,
685    /// Whether this argument is optional.
686    #[serde(default)]
687    pub optional: bool,
688    /// When `true`, the Rust codegen passes this argument by value (owned) rather than
689    /// by reference. Use for `Vec<T>` parameters that do not accept `&Vec<T>`.
690    #[serde(default)]
691    pub owned: bool,
692    /// For `json_object` args targeting `&[T]` Rust parameters, set to the element type
693    /// (e.g. `"f32"`, `"String"`) so the codegen emits `Vec<element_type>` annotation.
694    #[serde(default)]
695    pub element_type: Option<String>,
696    /// Override the Go slice element type for `json_object` array args.
697    ///
698    /// When set, the Go e2e codegen uses this as the element type instead of the default
699    /// derived from `element_type`. Use Go-idiomatic type names including the import alias
700    /// prefix where needed, e.g. `"kreuzberg.BatchBytesItem"` or `"string"`.
701    #[serde(default)]
702    pub go_type: Option<String>,
703}
704
705fn default_arg_type() -> String {
706    "string".to_string()
707}
708
709/// Per-language override for function call configuration.
710#[derive(Debug, Clone, Serialize, Deserialize, Default)]
711pub struct CallOverride {
712    /// Override the module/import path.
713    #[serde(default)]
714    pub module: Option<String>,
715    /// Override the function name.
716    #[serde(default)]
717    pub function: Option<String>,
718    /// Maps canonical argument names to language-specific argument names.
719    ///
720    /// Used when a language binding uses a different parameter name than the
721    /// canonical `args` list in `CallConfig`. For example, if the canonical
722    /// arg name is `doc` but the Python binding uses `html`, specify:
723    ///
724    /// ```toml
725    /// [e2e.call.overrides.python]
726    /// arg_name_map = { doc = "html" }
727    /// ```
728    ///
729    /// The key is the canonical name (from `args[].name`) and the value is the
730    /// name to use when emitting the keyword argument in generated tests.
731    #[serde(default)]
732    pub arg_name_map: HashMap<String, String>,
733    /// Override the crate name (Rust only).
734    #[serde(default)]
735    pub crate_name: Option<String>,
736    /// Override the class name (Java/C# only).
737    #[serde(default)]
738    pub class: Option<String>,
739    /// Import alias (Go only, e.g., `htmd`).
740    #[serde(default)]
741    pub alias: Option<String>,
742    /// C header file name (C only).
743    #[serde(default)]
744    pub header: Option<String>,
745    /// FFI symbol prefix (C only).
746    #[serde(default)]
747    pub prefix: Option<String>,
748    /// For json_object args: the constructor to use instead of raw dict/object.
749    /// E.g., "ConversionOptions" — generates `ConversionOptions(**options)` in Python,
750    /// `new ConversionOptions(options)` in TypeScript.
751    #[serde(default)]
752    pub options_type: Option<String>,
753    /// How to pass json_object args: "kwargs" (default), "dict", "json", or "from_json".
754    ///
755    /// - `"kwargs"`: construct `OptionsType(key=val, ...)` (requires `options_type`).
756    /// - `"dict"`: pass as a plain dict/object literal `{"key": "val"}`.
757    /// - `"json"`: pass via `json.loads('...')` / `JSON.parse('...')`.
758    /// - `"from_json"`: call `OptionsType.from_json('...')` (Python only, PyO3 native types).
759    #[serde(default)]
760    pub options_via: Option<String>,
761    /// Module to import `options_type` from when `options_via = "from_json"`.
762    ///
763    /// When set, a separate `from {from_json_module} import {options_type}` line
764    /// is emitted instead of including the type in the main module import.
765    /// E.g., `"liter_llm._internal_bindings"` for PyO3 native types.
766    #[serde(default)]
767    pub from_json_module: Option<String>,
768    /// Override whether the call is async for this language.
769    ///
770    /// When set, takes precedence over the call-level `async` flag.
771    /// Useful when a language binding uses a different async model — for example,
772    /// a Python binding that returns a sync iterator from a function marked
773    /// `async = true` at the call level.
774    #[serde(default, rename = "async")]
775    pub r#async: Option<bool>,
776    /// Maps fixture option field names to their enum type names.
777    /// E.g., `{"headingStyle": "HeadingStyle", "codeBlockStyle": "CodeBlockStyle"}`.
778    /// The generator imports these types and maps string values to enum constants.
779    #[serde(default)]
780    pub enum_fields: HashMap<String, String>,
781    /// Maps result-type field names to their enum type names for assertion routing.
782    /// Per-call so e.g. `BatchObject.status` (enum) and `ResponseObject.status` (string)
783    /// can be disambiguated.
784    #[serde(default)]
785    pub assert_enum_fields: HashMap<String, String>,
786    /// Module to import enum types from (if different from the main module).
787    /// E.g., "html_to_markdown._html_to_markdown" for PyO3 native enums.
788    #[serde(default)]
789    pub enum_module: Option<String>,
790    /// Maps nested fixture object field names to their C# type names.
791    /// Used to generate `JsonSerializer.Deserialize<NestedType>(...)` for nested objects.
792    /// E.g., `{"preprocessing": "PreprocessingOptions"}`.
793    #[serde(default)]
794    pub nested_types: HashMap<String, String>,
795    /// When `false`, nested config builder results are passed directly to builder methods
796    /// without wrapping in `Optional.of(...)`. Set to `false` for bindings where nested
797    /// option types are non-optional (e.g., html-to-markdown Java).
798    /// Defaults to `true` for backward compatibility.
799    #[serde(default = "default_true")]
800    pub nested_types_optional: bool,
801    /// When `true`, the function returns a simple type (e.g., `String`) rather
802    /// than a struct.  Generators that would normally emit `result.content`
803    /// (or equivalent field access) will use the result variable directly.
804    #[serde(default)]
805    pub result_is_simple: bool,
806    /// When `true` (and combined with `result_is_simple`), the simple result is
807    /// a slice/array type (e.g., `[]string` in Go, `Vec<String>` in Rust).
808    /// The Go generator uses `strings.Join(value, " ")` for `contains` assertions
809    /// instead of `string(value)`.
810    #[serde(default)]
811    pub result_is_array: bool,
812    /// When `true`, the function returns `Vec<T>` rather than a single value.
813    /// Field-path assertions are emitted as `.iter().all(|r| <accessor>)` so
814    /// every element is checked. (Rust generator.)
815    #[serde(default)]
816    pub result_is_vec: bool,
817    /// When `true`, the function returns a raw byte array (e.g., `byte[]` in Java,
818    /// `[]byte` in Go). Used by generators to select the correct length accessor
819    /// (field `.length` vs method `.length()`).
820    #[serde(default)]
821    pub result_is_bytes: bool,
822    /// When `true`, the function returns `Option<T>`. The result is unwrapped
823    /// before any non-`is_none`/`is_some` assertion runs; `is_empty`/`not_empty`
824    /// assertions map to `is_none()`/`is_some()`. (Rust generator.)
825    #[serde(default)]
826    pub result_is_option: bool,
827    /// When `true`, the R generator emits the call result directly without wrapping
828    /// in `jsonlite::fromJSON()`. Use when the R binding already returns a native
829    /// R list (`Robj`) rather than a JSON string. Field-path assertions still use
830    /// `result$field` accessor syntax (i.e. `result_is_simple` behaviour is NOT
831    /// implied — only the JSON parse wrapper is suppressed). (R generator only.)
832    #[serde(default)]
833    pub result_is_r_list: bool,
834    /// When `true`, the Zig generator treats the result as a `[]u8` JSON string
835    /// representing a struct value (e.g., `ExtractionResult` serialized via the
836    /// FFI `_to_json` helper). The generator parses the JSON with
837    /// `std.json.parseFromSlice(std.json.Value, ...)` before emitting field
838    /// assertions, traversing the dynamic JSON object for each field path.
839    /// (Zig generator only.)
840    #[serde(default)]
841    pub result_is_json_struct: bool,
842    /// When `true`, the Rust generator wraps the `json_object` argument expression
843    /// in `Some(...).clone()` to match an owned `Option<T>` parameter slot rather
844    /// than passing `&options`. (Rust generator only.)
845    #[serde(default)]
846    pub wrap_options_in_some: bool,
847    /// Trailing positional arguments appended verbatim after the configured
848    /// `args`. Used when the target function takes additional positional slots
849    /// (e.g. visitor) the fixture cannot supply directly. (Rust generator only.)
850    #[serde(default)]
851    pub extra_args: Vec<String>,
852    /// Per-rust override of the call-level `returns_result`. When set, takes
853    /// precedence over `CallConfig.returns_result` for the Rust generator only.
854    /// Useful when one binding is fallible while others are not.
855    #[serde(default)]
856    pub returns_result: Option<bool>,
857    /// Maps handle config field names to their Python type constructor names.
858    ///
859    /// When the handle config object contains a nested dict-valued field, the
860    /// generator will wrap it in the specified type using keyword arguments.
861    /// E.g., `{"browser": "BrowserConfig"}` generates `BrowserConfig(mode="auto")`
862    /// instead of `{"mode": "auto"}`.
863    #[serde(default)]
864    pub handle_nested_types: HashMap<String, String>,
865    /// Handle config fields whose type constructor takes a single dict argument
866    /// instead of keyword arguments.
867    ///
868    /// E.g., `["auth"]` means `AuthConfig({"type": "basic", ...})` instead of
869    /// `AuthConfig(type="basic", ...)`.
870    #[serde(default)]
871    pub handle_dict_types: HashSet<String>,
872    /// Elixir struct module name for the handle config argument.
873    ///
874    /// When set, the generated Elixir handle config uses struct literal syntax
875    /// (`%Module.StructType{key: val}`) instead of a plain string-keyed map.
876    /// Rustler `NifStruct` requires a proper Elixir struct — plain maps are rejected.
877    ///
878    /// E.g., `"CrawlConfig"` generates `%Kreuzcrawl.CrawlConfig{download_assets: true}`.
879    #[serde(default)]
880    pub handle_struct_type: Option<String>,
881    /// Handle config fields whose list values are Elixir atoms (Rustler NifUnitEnum).
882    ///
883    /// When a config field is a `Vec<EnumType>` in Rust, the Elixir side must pass
884    /// a list of atoms (e.g., `[:image, :document]`) not strings (`["image"]`).
885    /// List the field names here so the generator emits atom literals instead of strings.
886    ///
887    /// E.g., `["asset_types"]` generates `asset_types: [:image]` instead of `["image"]`.
888    #[serde(default)]
889    pub handle_atom_list_fields: HashSet<String>,
890    /// WASM config class name for handle args (WASM generator only).
891    ///
892    /// When set, handle args are constructed using `ConfigType.default()` + setters
893    /// instead of passing a plain JS object (which fails `_assertClass` validation).
894    ///
895    /// E.g., `"WasmCrawlConfig"` generates:
896    /// ```js
897    /// const engineConfig = WasmCrawlConfig.default();
898    /// engineConfig.maxDepth = 1;
899    /// const engine = createEngine(engineConfig);
900    /// ```
901    #[serde(default)]
902    pub handle_config_type: Option<String>,
903    /// PHP client factory method name (PHP generator only).
904    ///
905    /// When set, the generated PHP test instantiates a client via
906    /// `ClassName::factory_method('test-key')` and calls methods on the instance
907    /// instead of using static facade calls.
908    ///
909    /// E.g., `"createClient"` generates:
910    /// ```php
911    /// $client = LiterLlm::createClient('test-key');
912    /// $result = $client->chat($request);
913    /// ```
914    #[serde(default)]
915    pub php_client_factory: Option<String>,
916    /// Client factory function name for instance-method languages (WASM, etc.).
917    ///
918    /// When set, the generated test imports this function, creates a client,
919    /// and calls API methods on the instance instead of as top-level functions.
920    ///
921    /// E.g., `"createClient"` generates:
922    /// ```typescript
923    /// import { createClient } from 'pkg';
924    /// const client = createClient('test-key');
925    /// const result = await client.chat(request);
926    /// ```
927    #[serde(default)]
928    pub client_factory: Option<String>,
929    /// Verbatim trailing arguments appended after the fixed `("test-key", ...)` pair
930    /// when calling the `client_factory` function.
931    ///
932    /// Use this when the factory function takes additional positional parameters
933    /// beyond the API key and optional base URL that the generator would otherwise
934    /// emit.  Each element is emitted verbatim, separated by `, `.
935    ///
936    /// Example — Gleam `create_client` takes five positional arguments:
937    /// `(api_key, base_url, timeout_secs, max_retries, model_hint)`.  Set:
938    /// ```toml
939    /// [e2e.call.overrides.gleam]
940    /// client_factory = "create_client"
941    /// client_factory_trailing_args = ["option.None", "option.None", "option.None"]
942    /// ```
943    /// to produce `create_client("test-key", option.Some(url), option.None, option.None, option.None)`.
944    #[serde(default)]
945    pub client_factory_trailing_args: Vec<String>,
946    /// Fields on the options object that require `BigInt()` wrapping (WASM only).
947    ///
948    /// `wasm_bindgen` maps Rust `u64`/`i64` to JavaScript `BigInt`. Numeric
949    /// values assigned to these setters must be wrapped with `BigInt(n)`.
950    ///
951    /// List camelCase field names, e.g.:
952    /// ```toml
953    /// [e2e.call.overrides.wasm]
954    /// bigint_fields = ["maxTokens", "seed"]
955    /// ```
956    #[serde(default)]
957    pub bigint_fields: Vec<String>,
958    /// Static CLI arguments appended to every invocation (brew/CLI generator only).
959    ///
960    /// E.g., `["--format", "json"]` appends `--format json` to every CLI call.
961    #[serde(default)]
962    pub cli_args: Vec<String>,
963    /// Maps fixture config field names to CLI flag names (brew/CLI generator only).
964    ///
965    /// E.g., `{"output_format": "--format"}` generates `--format <value>` from
966    /// the fixture's `output_format` input field.
967    #[serde(default)]
968    pub cli_flags: HashMap<String, String>,
969    /// C FFI opaque result type name (C only).
970    ///
971    /// The PascalCase name of the result struct, without the prefix.
972    /// E.g., `"ChatCompletionResponse"` for `LiterllmChatCompletionResponse*`.
973    /// If not set, defaults to the function name in PascalCase.
974    #[serde(default)]
975    pub result_type: Option<String>,
976    /// Override the argument order for this language binding.
977    ///
978    /// Lists argument names from `args` in the order they should be passed
979    /// to the target function. Useful when a language binding reorders parameters
980    /// relative to the canonical `args` list in `CallConfig`.
981    ///
982    /// E.g., if `args = [path, mime_type, config]` but the Node.js binding
983    /// takes `(path, config, mime_type?)`, specify:
984    /// ```toml
985    /// [e2e.call.overrides.node]
986    /// arg_order = ["path", "config", "mime_type"]
987    /// ```
988    #[serde(default)]
989    pub arg_order: Vec<String>,
990    /// When `true`, `json_object` args with an `options_type` are passed as a
991    /// pointer (`*OptionsType`) rather than a value.  Use for Go bindings where
992    /// the options parameter is `*ConversionOptions` (nil-able pointer) rather
993    /// than a plain struct.
994    ///
995    /// Absent options are passed as `nil`; present options are unmarshalled into
996    /// a local variable and passed as `&optionsVar`.
997    #[serde(default)]
998    pub options_ptr: bool,
999    /// Alternative function name to use when the fixture includes a `visitor`.
1000    ///
1001    /// Some bindings expose two entry points: `Convert(html, opts)` for the
1002    /// plain case and `ConvertWithVisitor(html, opts, visitor)` when a visitor
1003    /// is involved.  Set this to the visitor-accepting function name so the
1004    /// generator can pick the right symbol automatically.
1005    ///
1006    /// E.g., `"ConvertWithVisitor"` makes the Go generator emit:
1007    /// ```go
1008    /// result, err := htmd.ConvertWithVisitor(html, nil, visitor)
1009    /// ```
1010    /// instead of `htmd.Convert(html, nil, visitor)` (which would not compile).
1011    #[serde(default)]
1012    pub visitor_function: Option<String>,
1013    /// Rust trait names to import when `client_factory` is set (Rust generator only).
1014    ///
1015    /// When `client_factory` is set, the generated test creates a client object and
1016    /// calls methods on it. Those methods are defined on traits (e.g. `LlmClient`,
1017    /// `FileClient`) that must be in scope. List the trait names here and the Rust
1018    /// generator will emit `use {module}::{trait_name};` for each.
1019    ///
1020    /// E.g.:
1021    /// ```toml
1022    /// [e2e.call.overrides.rust]
1023    /// client_factory = "create_client"
1024    /// trait_imports = ["LlmClient", "FileClient", "BatchClient", "ResponseClient"]
1025    /// ```
1026    #[serde(default)]
1027    pub trait_imports: Vec<String>,
1028    /// Raw C return type, used verbatim instead of `{PREFIX}Type*` (C only).
1029    ///
1030    /// Valid values: `"char*"`, `"int32_t"`, `"uintptr_t"`.
1031    /// When set, the C generator skips options handle construction and uses the
1032    /// raw type directly. Free logic is adjusted accordingly.
1033    #[serde(default)]
1034    pub raw_c_result_type: Option<String>,
1035    /// Free function for raw `char*` C results (C only).
1036    ///
1037    /// Defaults to `{prefix}_free_string` when unset and `raw_c_result_type == "char*"`.
1038    #[serde(default)]
1039    pub c_free_fn: Option<String>,
1040    /// C FFI engine factory pattern (C only).
1041    ///
1042    /// When set, the C generator wraps each test call in a
1043    /// `{prefix}_create_engine(config)` / `{prefix}_crawl_engine_handle_free(engine)`
1044    /// prologue/epilogue using the named config type as the "arg 0" handle type.
1045    ///
1046    /// The value is the PascalCase config type name (without prefix), e.g.
1047    /// `"CrawlConfig"`. The generator will emit:
1048    /// ```c
1049    /// KCRAWLCrawlConfig* config_handle = kcrawl_crawl_config_from_json("{json}");
1050    /// KCRAWLCrawlEngineHandle* engine = kcrawl_create_engine(config_handle);
1051    /// kcrawl_crawl_config_free(config_handle);
1052    /// KCRAWLScrapeResult* result = kcrawl_scrape(engine, url);
1053    /// // ... assertions ...
1054    /// kcrawl_scrape_result_free(result);
1055    /// kcrawl_crawl_engine_handle_free(engine);
1056    /// ```
1057    #[serde(default)]
1058    pub c_engine_factory: Option<String>,
1059    /// Fields in a `json_object` arg that must be wrapped in `java.nio.file.Path.of()`
1060    /// (Java generator only).
1061    ///
1062    /// E.g., `["cache_dir"]` wraps the string value of `cache_dir` so the builder
1063    /// receives `java.nio.file.Path.of("/tmp/dir")` instead of a plain string.
1064    #[serde(default)]
1065    pub path_fields: Vec<String>,
1066    /// Trait name for the visitor pattern (Rust e2e tests only).
1067    ///
1068    /// When a fixture declares a `visitor` block, the Rust e2e generator emits
1069    /// `impl <trait_name> for _TestVisitor { ... }` and imports the trait from
1070    /// `{module}::visitor`. When unset, no visitor block is emitted and fixtures
1071    /// that declare a visitor will cause a codegen error.
1072    ///
1073    /// E.g., `"HtmlVisitor"` generates:
1074    /// ```rust,ignore
1075    /// use html_to_markdown_rs::visitor::{HtmlVisitor, NodeContext, VisitResult};
1076    /// // ...
1077    /// impl HtmlVisitor for _TestVisitor { ... }
1078    /// ```
1079    #[serde(default)]
1080    pub visitor_trait: Option<String>,
1081    /// Maps result field paths to their wasm-bindgen enum class names.
1082    ///
1083    /// wasm-bindgen exposes Rust enums as numeric discriminants in JavaScript
1084    /// (`WasmFinishReason.Stop === 0`), not string variants. When an `equals`
1085    /// assertion targets a field listed here, the WASM generator emits
1086    /// `expect(result.choices[0].finishReason).toBe(WasmFinishReason.Stop)`
1087    /// instead of attempting `(value ?? "").trim()`.
1088    ///
1089    /// The fixture's expected string value is converted to PascalCase to look
1090    /// up the variant (e.g. `"tool_calls"` -> `ToolCalls`).
1091    ///
1092    /// Example:
1093    /// ```toml
1094    /// [e2e.calls.chat.overrides.wasm]
1095    /// result_enum_fields = { "choices[0].finish_reason" = "WasmFinishReason", "status" = "WasmBatchStatus" }
1096    /// ```
1097    #[serde(default)]
1098    pub result_enum_fields: HashMap<String, String>,
1099    /// When `true`, indicates that the result is a pointer type (e.g., `*string` in Go,
1100    /// `*T` in Rust). The Go codegen will dereference it. When `false` (Go only), the
1101    /// result is a value type and should not be dereferenced.
1102    ///
1103    /// Used to distinguish between functions that return `(value, error)` where value
1104    /// is a scalar (string, uint, bool) as-is vs. those that return pointers.
1105    /// Defaults to `true` for backward compatibility with existing fixtures.
1106    #[serde(default = "default_true")]
1107    pub result_is_pointer: bool,
1108}
1109
1110fn default_true() -> bool {
1111    true
1112}
1113
1114/// Per-language package reference configuration.
1115#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1116pub struct PackageRef {
1117    /// Package/crate/gem/module name.
1118    #[serde(default)]
1119    pub name: Option<String>,
1120    /// Relative path from e2e/{lang}/ to the package.
1121    #[serde(default)]
1122    pub path: Option<String>,
1123    /// Go module path.
1124    #[serde(default)]
1125    pub module: Option<String>,
1126    /// Package version (e.g., for go.mod require directives).
1127    #[serde(default)]
1128    pub version: Option<String>,
1129}
1130
1131#[cfg(test)]
1132mod tests {
1133    use super::*;
1134
1135    fn empty_e2e_with_test_documents(dir: &str) -> E2eConfig {
1136        E2eConfig {
1137            test_documents_dir: dir.to_string(),
1138            ..Default::default()
1139        }
1140    }
1141
1142    #[test]
1143    fn test_documents_dir_default_is_test_documents() {
1144        let cfg: E2eConfig = toml::from_str("[call]\nfunction = \"f\"\n").expect("minimal TOML must deserialize");
1145        assert_eq!(cfg.test_documents_dir, "test_documents");
1146    }
1147
1148    #[test]
1149    fn test_documents_dir_explicit_override_wins() {
1150        let cfg: E2eConfig = toml::from_str("test_documents_dir = \"fixture_files\"\n[call]\nfunction = \"f\"\n")
1151            .expect("explicit override must deserialize");
1152        assert_eq!(cfg.test_documents_dir, "fixture_files");
1153    }
1154
1155    #[test]
1156    fn test_documents_relative_from_at_lang_root_returns_two_dots_up() {
1157        let cfg = empty_e2e_with_test_documents("test_documents");
1158        assert_eq!(cfg.test_documents_relative_from(0), "../../test_documents");
1159    }
1160
1161    #[test]
1162    fn test_documents_relative_from_at_spec_depth_returns_three_dots_up() {
1163        let cfg = empty_e2e_with_test_documents("test_documents");
1164        assert_eq!(cfg.test_documents_relative_from(1), "../../../test_documents");
1165    }
1166
1167    #[test]
1168    fn test_documents_relative_from_at_two_subdirs_deep_returns_four_dots_up() {
1169        let cfg = empty_e2e_with_test_documents("test_documents");
1170        assert_eq!(cfg.test_documents_relative_from(2), "../../../../test_documents");
1171    }
1172
1173    #[test]
1174    fn test_documents_relative_uses_configured_dir_name() {
1175        let cfg = empty_e2e_with_test_documents("fixture_files");
1176        assert_eq!(cfg.test_documents_relative_from(0), "../../fixture_files");
1177        assert_eq!(cfg.test_documents_relative_from(1), "../../../fixture_files");
1178    }
1179
1180    #[test]
1181    fn select_when_with_no_discriminators_never_matches() {
1182        let sel = SelectWhen::default();
1183        assert!(!sel.matches("any_id", "any_category", &[], &serde_json::Value::Null));
1184    }
1185
1186    #[test]
1187    fn select_when_input_has_matches_non_null_key() {
1188        let sel = SelectWhen {
1189            input_has: Some("batch_urls".to_string()),
1190            ..Default::default()
1191        };
1192        let input = serde_json::json!({ "batch_urls": [] });
1193        assert!(sel.matches("fid", "cat", &[], &input));
1194        let empty_input = serde_json::json!({ "url": "x" });
1195        assert!(!sel.matches("fid", "cat", &[], &empty_input));
1196    }
1197
1198    #[test]
1199    fn select_when_category_matches_exactly() {
1200        let sel = SelectWhen {
1201            category: Some("crawl".to_string()),
1202            ..Default::default()
1203        };
1204        assert!(sel.matches("any_id", "crawl", &[], &serde_json::Value::Null));
1205        assert!(!sel.matches("any_id", "scrape", &[], &serde_json::Value::Null));
1206    }
1207
1208    #[test]
1209    fn select_when_id_prefix_matches() {
1210        let sel = SelectWhen {
1211            id_prefix: Some("batch_crawl_".to_string()),
1212            ..Default::default()
1213        };
1214        assert!(sel.matches("batch_crawl_events", "any", &[], &serde_json::Value::Null));
1215        assert!(!sel.matches("batch_scrape_basic", "any", &[], &serde_json::Value::Null));
1216    }
1217
1218    #[test]
1219    fn select_when_id_glob_handles_star() {
1220        let sel = SelectWhen {
1221            id_glob: Some("crawl_stream*".to_string()),
1222            ..Default::default()
1223        };
1224        assert!(sel.matches("crawl_stream_basic", "any", &[], &serde_json::Value::Null));
1225        assert!(!sel.matches("batch_crawl_stream", "any", &[], &serde_json::Value::Null));
1226    }
1227
1228    #[test]
1229    fn select_when_tag_matches_any_tag_in_list() {
1230        let sel = SelectWhen {
1231            tag: Some("streaming".to_string()),
1232            ..Default::default()
1233        };
1234        let tags = vec!["smoke".to_string(), "streaming".to_string()];
1235        assert!(sel.matches("fid", "cat", &tags, &serde_json::Value::Null));
1236        assert!(!sel.matches("fid", "cat", &["smoke".to_string()], &serde_json::Value::Null));
1237    }
1238
1239    #[test]
1240    fn select_when_multiple_discriminators_anded() {
1241        let sel = SelectWhen {
1242            category: Some("stream".to_string()),
1243            id_prefix: Some("batch_crawl_stream".to_string()),
1244            ..Default::default()
1245        };
1246        assert!(sel.matches("batch_crawl_stream_events", "stream", &[], &serde_json::Value::Null));
1247        // Wrong category fails even though prefix matches
1248        assert!(!sel.matches("batch_crawl_stream_events", "crawl", &[], &serde_json::Value::Null));
1249        // Wrong prefix fails even though category matches
1250        assert!(!sel.matches("crawl_stream_basic", "stream", &[], &serde_json::Value::Null));
1251    }
1252
1253    #[test]
1254    fn select_when_deserializes_legacy_input_has_only() {
1255        let toml_src = r#"
1256            [call]
1257            function = "scrape"
1258
1259            [calls.batch_scrape]
1260            function = "batch_scrape"
1261            select_when = { input_has = "batch_urls" }
1262        "#;
1263        let cfg: E2eConfig = toml::from_str(toml_src).expect("legacy input_has must deserialize");
1264        let sel = cfg.calls["batch_scrape"].select_when.as_ref().unwrap();
1265        assert_eq!(sel.input_has.as_deref(), Some("batch_urls"));
1266        assert!(sel.category.is_none());
1267        assert!(sel.id_prefix.is_none());
1268    }
1269
1270    #[test]
1271    fn select_when_deserializes_compound_discriminators() {
1272        let toml_src = r#"
1273            [call]
1274            function = "scrape"
1275
1276            [calls.batch_crawl_stream]
1277            function = "batch_crawl_stream"
1278            select_when = { category = "stream", id_prefix = "batch_crawl_stream" }
1279        "#;
1280        let cfg: E2eConfig = toml::from_str(toml_src).expect("compound select_when must deserialize");
1281        let sel = cfg.calls["batch_crawl_stream"].select_when.as_ref().unwrap();
1282        assert_eq!(sel.category.as_deref(), Some("stream"));
1283        assert_eq!(sel.id_prefix.as_deref(), Some("batch_crawl_stream"));
1284    }
1285
1286    #[test]
1287    fn resolve_call_for_fixture_routes_by_category_then_falls_back() {
1288        let mut calls = HashMap::new();
1289        calls.insert(
1290            "crawl".to_string(),
1291            CallConfig {
1292                function: "crawl".to_string(),
1293                select_when: Some(SelectWhen {
1294                    category: Some("crawl".to_string()),
1295                    ..Default::default()
1296                }),
1297                ..Default::default()
1298            },
1299        );
1300        let cfg = E2eConfig {
1301            call: CallConfig {
1302                function: "scrape".to_string(),
1303                ..Default::default()
1304            },
1305            calls,
1306            ..Default::default()
1307        };
1308        let input = serde_json::json!({ "url": "https://example.com" });
1309        let resolved = cfg.resolve_call_for_fixture(None, "crawl_basic", "crawl", &[], &input);
1310        assert_eq!(resolved.function, "crawl");
1311        let resolved = cfg.resolve_call_for_fixture(None, "scrape_basic", "scrape", &[], &input);
1312        assert_eq!(resolved.function, "scrape");
1313    }
1314
1315    // --- effective_* resolver helpers ---
1316
1317    #[test]
1318    fn effective_result_fields_returns_global_when_call_is_empty() {
1319        let mut global = HashSet::new();
1320        global.insert("url".to_string());
1321        let cfg = E2eConfig {
1322            result_fields: global.clone(),
1323            ..Default::default()
1324        };
1325        let call = CallConfig::default();
1326        assert_eq!(cfg.effective_result_fields(&call), &global);
1327    }
1328
1329    #[test]
1330    fn effective_result_fields_call_override_wins_over_global() {
1331        let mut global = HashSet::new();
1332        global.insert("url".to_string());
1333        let mut per_call = HashSet::new();
1334        per_call.insert("pages".to_string());
1335        per_call.insert("final_url".to_string());
1336        let cfg = E2eConfig {
1337            result_fields: global,
1338            ..Default::default()
1339        };
1340        let call = CallConfig {
1341            result_fields: per_call.clone(),
1342            ..Default::default()
1343        };
1344        assert_eq!(cfg.effective_result_fields(&call), &per_call);
1345    }
1346
1347    #[test]
1348    fn effective_fields_returns_global_when_call_is_empty() {
1349        let mut global = HashMap::new();
1350        global.insert("metadata.title".to_string(), "metadata.document.title".to_string());
1351        let cfg = E2eConfig {
1352            fields: global.clone(),
1353            ..Default::default()
1354        };
1355        let call = CallConfig::default();
1356        assert_eq!(cfg.effective_fields(&call), &global);
1357    }
1358
1359    #[test]
1360    fn effective_fields_call_override_wins_over_global() {
1361        let mut global = HashMap::new();
1362        global.insert("a".to_string(), "b".to_string());
1363        let mut per_call = HashMap::new();
1364        per_call.insert("x".to_string(), "y".to_string());
1365        let cfg = E2eConfig {
1366            fields: global,
1367            ..Default::default()
1368        };
1369        let call = CallConfig {
1370            fields: per_call.clone(),
1371            ..Default::default()
1372        };
1373        assert_eq!(cfg.effective_fields(&call), &per_call);
1374    }
1375
1376    #[test]
1377    fn effective_fields_optional_returns_global_when_call_is_empty() {
1378        let mut global = HashSet::new();
1379        global.insert("segments".to_string());
1380        let cfg = E2eConfig {
1381            fields_optional: global.clone(),
1382            ..Default::default()
1383        };
1384        let call = CallConfig::default();
1385        assert_eq!(cfg.effective_fields_optional(&call), &global);
1386    }
1387
1388    #[test]
1389    fn effective_fields_optional_call_override_wins_over_global() {
1390        let mut global = HashSet::new();
1391        global.insert("segments".to_string());
1392        let mut per_call = HashSet::new();
1393        per_call.insert("pages".to_string());
1394        let cfg = E2eConfig {
1395            fields_optional: global,
1396            ..Default::default()
1397        };
1398        let call = CallConfig {
1399            fields_optional: per_call.clone(),
1400            ..Default::default()
1401        };
1402        assert_eq!(cfg.effective_fields_optional(&call), &per_call);
1403    }
1404
1405    #[test]
1406    fn effective_fields_array_returns_global_when_call_is_empty() {
1407        let mut global = HashSet::new();
1408        global.insert("choices".to_string());
1409        let cfg = E2eConfig {
1410            fields_array: global.clone(),
1411            ..Default::default()
1412        };
1413        let call = CallConfig::default();
1414        assert_eq!(cfg.effective_fields_array(&call), &global);
1415    }
1416
1417    #[test]
1418    fn effective_fields_array_call_override_wins_over_global() {
1419        let mut global = HashSet::new();
1420        global.insert("choices".to_string());
1421        let mut per_call = HashSet::new();
1422        per_call.insert("pages".to_string());
1423        let cfg = E2eConfig {
1424            fields_array: global,
1425            ..Default::default()
1426        };
1427        let call = CallConfig {
1428            fields_array: per_call.clone(),
1429            ..Default::default()
1430        };
1431        assert_eq!(cfg.effective_fields_array(&call), &per_call);
1432    }
1433
1434    #[test]
1435    fn effective_fields_method_calls_returns_global_when_call_is_empty() {
1436        let mut global = HashSet::new();
1437        global.insert("metadata.format".to_string());
1438        let cfg = E2eConfig {
1439            fields_method_calls: global.clone(),
1440            ..Default::default()
1441        };
1442        let call = CallConfig::default();
1443        assert_eq!(cfg.effective_fields_method_calls(&call), &global);
1444    }
1445
1446    #[test]
1447    fn effective_fields_method_calls_call_override_wins_over_global() {
1448        let mut global = HashSet::new();
1449        global.insert("metadata.format".to_string());
1450        let mut per_call = HashSet::new();
1451        per_call.insert("pages.status".to_string());
1452        let cfg = E2eConfig {
1453            fields_method_calls: global,
1454            ..Default::default()
1455        };
1456        let call = CallConfig {
1457            fields_method_calls: per_call.clone(),
1458            ..Default::default()
1459        };
1460        assert_eq!(cfg.effective_fields_method_calls(&call), &per_call);
1461    }
1462
1463    #[test]
1464    fn effective_fields_enum_returns_global_when_call_is_empty() {
1465        let mut global = HashSet::new();
1466        global.insert("choices.finish_reason".to_string());
1467        let cfg = E2eConfig {
1468            fields_enum: global.clone(),
1469            ..Default::default()
1470        };
1471        let call = CallConfig::default();
1472        assert_eq!(cfg.effective_fields_enum(&call), &global);
1473    }
1474
1475    #[test]
1476    fn effective_fields_enum_call_override_wins_over_global() {
1477        let mut global = HashSet::new();
1478        global.insert("choices.finish_reason".to_string());
1479        let mut per_call = HashSet::new();
1480        per_call.insert("assets.category".to_string());
1481        let cfg = E2eConfig {
1482            fields_enum: global,
1483            ..Default::default()
1484        };
1485        let call = CallConfig {
1486            fields_enum: per_call.clone(),
1487            ..Default::default()
1488        };
1489        assert_eq!(cfg.effective_fields_enum(&call), &per_call);
1490    }
1491
1492    #[test]
1493    fn effective_fields_c_types_returns_global_when_call_is_empty() {
1494        let mut global = HashMap::new();
1495        global.insert("conversion_result.metadata".to_string(), "HtmlMetadata".to_string());
1496        let cfg = E2eConfig {
1497            fields_c_types: global.clone(),
1498            ..Default::default()
1499        };
1500        let call = CallConfig::default();
1501        assert_eq!(cfg.effective_fields_c_types(&call), &global);
1502    }
1503
1504    #[test]
1505    fn effective_fields_c_types_call_override_wins_over_global() {
1506        let mut global = HashMap::new();
1507        global.insert("conversion_result.metadata".to_string(), "HtmlMetadata".to_string());
1508        let mut per_call = HashMap::new();
1509        per_call.insert("crawl_result.pages".to_string(), "PageResult".to_string());
1510        let cfg = E2eConfig {
1511            fields_c_types: global,
1512            ..Default::default()
1513        };
1514        let call = CallConfig {
1515            fields_c_types: per_call.clone(),
1516            ..Default::default()
1517        };
1518        assert_eq!(cfg.effective_fields_c_types(&call), &per_call);
1519    }
1520
1521    #[test]
1522    fn effective_resolver_helpers_deserialize_from_toml() {
1523        let toml = r#"
1524[call]
1525function = "scrape"
1526result_fields = ["url", "markdown"]
1527fields_enum = ["status"]
1528
1529[call.fields]
1530"meta.title" = "meta.document.title"
1531
1532[call.fields_c_types]
1533"scrape_result.meta" = "MetaResult"
1534"#;
1535        let cfg: E2eConfig = toml::from_str(toml).expect("must deserialize");
1536        let call = &cfg.call;
1537        assert!(cfg.effective_result_fields(call).contains("url"));
1538        assert!(cfg.effective_result_fields(call).contains("markdown"));
1539        assert!(cfg.effective_fields_enum(call).contains("status"));
1540        assert_eq!(
1541            cfg.effective_fields(call).get("meta.title").map(String::as_str),
1542            Some("meta.document.title")
1543        );
1544        assert_eq!(
1545            cfg.effective_fields_c_types(call)
1546                .get("scrape_result.meta")
1547                .map(String::as_str),
1548            Some("MetaResult")
1549        );
1550    }
1551}