Skip to main content

alef_core/config/
e2e.rs

1//! E2E test generation configuration types.
2
3use serde::{Deserialize, Serialize};
4use std::collections::{HashMap, HashSet};
5
6/// Controls whether generated e2e test projects reference the package under
7/// test via a local path (for development) or a registry version string
8/// (for standalone `test_apps` that consumers can run without the monorepo).
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
10#[serde(rename_all = "lowercase")]
11pub enum DependencyMode {
12    /// Local path dependency (default) — used during normal e2e development.
13    #[default]
14    Local,
15    /// Registry dependency — generates standalone test apps that pull the
16    /// package from its published registry (PyPI, npm, crates.io, etc.).
17    Registry,
18}
19
20/// Configuration for registry-mode e2e generation (`alef e2e generate --registry`).
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct RegistryConfig {
23    /// Output directory for registry-mode test apps (default: "test_apps").
24    #[serde(default = "default_test_apps_dir")]
25    pub output: String,
26    /// Per-language package overrides used only in registry mode.
27    /// Merged on top of the base `[e2e.packages]` entries.
28    #[serde(default)]
29    pub packages: HashMap<String, PackageRef>,
30    /// When non-empty, only fixture categories in this list are included in
31    /// registry-mode generation (useful for shipping a curated subset).
32    #[serde(default)]
33    pub categories: Vec<String>,
34    /// GitHub repository URL for downloading prebuilt artifacts (e.g., FFI
35    /// shared libraries) from GitHub Releases.
36    ///
37    /// Falls back to `[scaffold] repository` when not set, then to
38    /// `https://github.com/kreuzberg-dev/{crate.name}`.
39    #[serde(default)]
40    pub github_repo: Option<String>,
41}
42
43impl Default for RegistryConfig {
44    fn default() -> Self {
45        Self {
46            output: default_test_apps_dir(),
47            packages: HashMap::new(),
48            categories: Vec::new(),
49            github_repo: None,
50        }
51    }
52}
53
54fn default_test_apps_dir() -> String {
55    "test_apps".to_string()
56}
57
58/// Root e2e configuration from `[e2e]` section of alef.toml.
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct E2eConfig {
61    /// Directory containing fixture JSON files (default: "fixtures").
62    #[serde(default = "default_fixtures_dir")]
63    pub fixtures: String,
64    /// Output directory for generated e2e test projects (default: "e2e").
65    #[serde(default = "default_output_dir")]
66    pub output: String,
67    /// Repo-root-relative directory holding binary file fixtures referenced by
68    /// `file_path` / `bytes` fixture args (default: "test_documents").
69    ///
70    /// Backends that emit chdir / setup hooks for file-based fixtures resolve
71    /// the relative path from the test-emission directory via
72    /// [`E2eConfig::test_documents_relative_from`]. The default matches the
73    /// kreuzberg convention; downstream crates whose fixtures don't reference
74    /// files (e.g. liter-llm, which uses pure mock-server fixtures) can leave
75    /// the default in place — backends conditionally emit the setup only when
76    /// fixtures actually need it.
77    #[serde(default = "default_test_documents_dir")]
78    pub test_documents_dir: String,
79    /// Languages to generate e2e tests for. Defaults to top-level `languages` list.
80    #[serde(default)]
81    pub languages: Vec<String>,
82    /// Default function call configuration.
83    pub call: CallConfig,
84    /// Named additional call configurations for multi-function testing.
85    /// Fixtures reference these via the `call` field, e.g. `"call": "embed"`.
86    #[serde(default)]
87    pub calls: HashMap<String, CallConfig>,
88    /// Per-language package reference overrides.
89    #[serde(default)]
90    pub packages: HashMap<String, PackageRef>,
91    /// Per-language formatter commands.
92    #[serde(default)]
93    pub format: HashMap<String, String>,
94    /// Field path aliases: maps fixture field paths to actual API struct paths.
95    /// E.g., "metadata.title" -> "metadata.document.title"
96    /// Supports struct access (foo.bar), map access (foo[key]), direct fields.
97    #[serde(default)]
98    pub fields: HashMap<String, String>,
99    /// Fields that are Optional/nullable in the return type.
100    /// Rust generators use .as_deref().unwrap_or("") for strings, .is_some() for structs.
101    #[serde(default)]
102    pub fields_optional: HashSet<String>,
103    /// Fields that are arrays/Vecs on the result type.
104    /// When a fixture path like `json_ld.name` traverses an array field, the
105    /// accessor adds `[0]` (or language equivalent) to index into the first element.
106    #[serde(default)]
107    pub fields_array: HashSet<String>,
108    /// Fields where the accessor is a method call (appends `()`) rather than a field access.
109    /// Rust-specific: Java always uses `()`, Python/PHP use field access.
110    /// Listed as the full resolved field path (after alias resolution).
111    /// E.g., `"metadata.format.excel"` means `.excel` should be emitted as `.excel()`.
112    #[serde(default)]
113    pub fields_method_calls: HashSet<String>,
114    /// Known top-level fields on the result type.
115    ///
116    /// When non-empty, assertions whose resolved field path starts with a
117    /// segment that is NOT in this set are emitted as comments (skipped)
118    /// instead of executable assertions.  This prevents broken assertions
119    /// when fixtures reference fields from a different operation (e.g.,
120    /// `batch.completed_count` on a `ScrapeResult`).
121    #[serde(default)]
122    pub result_fields: HashSet<String>,
123    /// Fixture categories excluded from cross-language e2e codegen.
124    ///
125    /// Fixtures whose resolved category matches an entry in this set are
126    /// skipped by every per-language e2e generator — no test is emitted at
127    /// all (no skip directive, no commented-out body). The fixture files stay
128    /// on disk and remain available to Rust integration tests inside the
129    /// consumer crate's own `tests/` directory.
130    ///
131    /// Use this to keep fixtures that exercise internal middleware (cache,
132    /// proxy, budget, hooks, etc.) out of bindings whose public surface does
133    /// not expose those layers.
134    ///
135    /// Example:
136    /// ```toml
137    /// [e2e]
138    /// exclude_categories = ["cache", "proxy", "budget", "hooks"]
139    /// ```
140    #[serde(default)]
141    pub exclude_categories: HashSet<String>,
142    /// C FFI accessor type chain: maps `"{parent_snake_type}.{field}"` to the
143    /// PascalCase return type name (without prefix).
144    ///
145    /// Used by the C e2e generator to emit chained FFI accessor calls for
146    /// nested field paths. The root type is always `conversion_result`.
147    ///
148    /// Example:
149    /// ```toml
150    /// [e2e.fields_c_types]
151    /// "conversion_result.metadata" = "HtmlMetadata"
152    /// "html_metadata.document" = "DocumentMetadata"
153    /// ```
154    #[serde(default)]
155    pub fields_c_types: HashMap<String, String>,
156    /// Fields whose resolved type is an enum in the generated bindings.
157    ///
158    /// When a `contains` / `contains_all` / etc. assertion targets one of these
159    /// fields, language generators that cannot call `.contains()` directly on an
160    /// enum (e.g., Java) will emit a string-conversion call first.  For Java,
161    /// the generated assertion calls `.getValue()` on the enum — the `@JsonValue`
162    /// method that all alef-generated Java enums expose — to obtain the lowercase
163    /// serde string before performing the string comparison.
164    ///
165    /// Both the raw fixture field path (before alias resolution) and the resolved
166    /// path (after alias resolution via `[e2e.fields]`) are accepted, so you can
167    /// use either form:
168    ///
169    /// ```toml
170    /// # Raw fixture field:
171    /// fields_enum = ["links[].link_type", "assets[].category"]
172    /// # …or the resolved (aliased) field name:
173    /// fields_enum = ["links[].link_type", "assets[].asset_category"]
174    /// ```
175    #[serde(default)]
176    pub fields_enum: HashSet<String>,
177    /// Dependency mode: `Local` (default) or `Registry`.
178    /// Set at runtime via `--registry` CLI flag; not serialized from TOML.
179    #[serde(skip)]
180    pub dep_mode: DependencyMode,
181    /// Registry-mode configuration from `[e2e.registry]`.
182    #[serde(default)]
183    pub registry: RegistryConfig,
184}
185
186impl E2eConfig {
187    /// Resolve the call config for a fixture. Uses the named call if specified,
188    /// otherwise falls back to the default `[e2e.call]`.
189    pub fn resolve_call(&self, call_name: Option<&str>) -> &CallConfig {
190        match call_name {
191            Some(name) => self.calls.get(name).unwrap_or(&self.call),
192            None => &self.call,
193        }
194    }
195
196    /// Resolve the call config for a fixture, applying `select_when` auto-routing.
197    ///
198    /// When the fixture has an explicit `call` name, that named config is returned
199    /// (same as [`resolve_call`]).  When the fixture has no explicit call, the method
200    /// scans named calls for a [`SelectWhen`] condition that matches the fixture's
201    /// shape (id, category, tags, input) and returns the first match.  If no condition
202    /// matches, it falls back to the default `[e2e.call]`.
203    ///
204    /// All non-`None` discriminators on a `SelectWhen` must match (logical AND) for
205    /// the condition to fire. A `SelectWhen` with every field `None` never matches —
206    /// at least one discriminator must be set.
207    pub fn resolve_call_for_fixture(
208        &self,
209        call_name: Option<&str>,
210        fixture_id: &str,
211        fixture_category: &str,
212        fixture_tags: &[String],
213        fixture_input: &serde_json::Value,
214    ) -> &CallConfig {
215        if let Some(name) = call_name {
216            return self.calls.get(name).unwrap_or(&self.call);
217        }
218        // Auto-route by select_when condition. Deterministic order: sort by call name.
219        let mut names: Vec<&String> = self.calls.keys().collect();
220        names.sort();
221        for name in names {
222            let call_config = &self.calls[name];
223            if let Some(sel) = &call_config.select_when {
224                if sel.matches(fixture_id, fixture_category, fixture_tags, fixture_input) {
225                    return call_config;
226                }
227            }
228        }
229        &self.call
230    }
231
232    /// Resolve the effective package reference for a language.
233    ///
234    /// In registry mode, entries from `[e2e.registry.packages]` are merged on
235    /// top of the base `[e2e.packages]` — registry overrides win for any field
236    /// that is `Some`.
237    pub fn resolve_package(&self, lang: &str) -> Option<PackageRef> {
238        let base = self.packages.get(lang);
239        if self.dep_mode == DependencyMode::Registry {
240            let reg = self.registry.packages.get(lang);
241            match (base, reg) {
242                (Some(b), Some(r)) => Some(PackageRef {
243                    name: r.name.clone().or_else(|| b.name.clone()),
244                    path: r.path.clone().or_else(|| b.path.clone()),
245                    module: r.module.clone().or_else(|| b.module.clone()),
246                    version: r.version.clone().or_else(|| b.version.clone()),
247                }),
248                (None, Some(r)) => Some(r.clone()),
249                (Some(b), None) => Some(b.clone()),
250                (None, None) => None,
251            }
252        } else {
253            base.cloned()
254        }
255    }
256
257    /// Return the effective `result_fields` for `call`.
258    ///
259    /// Returns `call.result_fields` when non-empty, otherwise the global
260    /// `self.result_fields`.
261    pub fn effective_result_fields<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
262        if !call.result_fields.is_empty() {
263            &call.result_fields
264        } else {
265            &self.result_fields
266        }
267    }
268
269    /// Return the effective `fields` alias map for `call`.
270    pub fn effective_fields<'a>(&'a self, call: &'a CallConfig) -> &'a HashMap<String, String> {
271        if !call.fields.is_empty() {
272            &call.fields
273        } else {
274            &self.fields
275        }
276    }
277
278    /// Return the effective `fields_optional` for `call`.
279    pub fn effective_fields_optional<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
280        if !call.fields_optional.is_empty() {
281            &call.fields_optional
282        } else {
283            &self.fields_optional
284        }
285    }
286
287    /// Return the effective `fields_array` for `call`.
288    pub fn effective_fields_array<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
289        if !call.fields_array.is_empty() {
290            &call.fields_array
291        } else {
292            &self.fields_array
293        }
294    }
295
296    /// Return the effective `fields_method_calls` for `call`.
297    pub fn effective_fields_method_calls<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
298        if !call.fields_method_calls.is_empty() {
299            &call.fields_method_calls
300        } else {
301            &self.fields_method_calls
302        }
303    }
304
305    /// Return the effective `fields_enum` for `call`.
306    pub fn effective_fields_enum<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
307        if !call.fields_enum.is_empty() {
308            &call.fields_enum
309        } else {
310            &self.fields_enum
311        }
312    }
313
314    /// Return the effective `fields_c_types` for `call`.
315    pub fn effective_fields_c_types<'a>(&'a self, call: &'a CallConfig) -> &'a HashMap<String, String> {
316        if !call.fields_c_types.is_empty() {
317            &call.fields_c_types
318        } else {
319            &self.fields_c_types
320        }
321    }
322
323    /// Return the effective output directory: `registry.output` in registry
324    /// mode, `output` otherwise.
325    pub fn effective_output(&self) -> &str {
326        if self.dep_mode == DependencyMode::Registry {
327            &self.registry.output
328        } else {
329            &self.output
330        }
331    }
332
333    /// Relative path from a backend's emission directory to the
334    /// `test_documents_dir` at the repo root.
335    ///
336    /// `emission_depth` counts the number of additional `../` segments needed
337    /// to reach `<output>/<lang>/` from where the file is being emitted:
338    ///
339    /// * `0` — emitted directly at `e2e/<lang>/` (e.g. dart, zig `build.zig`)
340    /// * `1` — emitted at `e2e/<lang>/<sub>/` (e.g. ruby `spec/`, R `tests/`)
341    /// * `2` — emitted at `e2e/<lang>/<sub1>/<sub2>/`
342    ///
343    /// The base prefix is two segments above `<output>/<lang>/` (i.e.
344    /// `../../`), matching the canonical layout where `<output>` (default
345    /// `"e2e"`) sits at the repo root next to the configured
346    /// `test_documents_dir`.
347    pub fn test_documents_relative_from(&self, emission_depth: usize) -> String {
348        let mut up = String::from("../../");
349        for _ in 0..emission_depth {
350            up.push_str("../");
351        }
352        format!("{up}{}", self.test_documents_dir)
353    }
354}
355
356fn default_fixtures_dir() -> String {
357    "fixtures".to_string()
358}
359
360fn default_output_dir() -> String {
361    "e2e".to_string()
362}
363
364fn default_test_documents_dir() -> String {
365    "test_documents".to_string()
366}
367
368/// Hand-rolled `Default` so the `test_documents_dir` field receives its
369/// `default_test_documents_dir()` value (`"test_documents"`) when callers use
370/// `..Default::default()` to construct an `E2eConfig` literally rather than
371/// going through `serde::Deserialize`. Without this, `derive(Default)` would
372/// fall back to `String::default()` (i.e. the empty string), and any backend
373/// computing `test_documents_relative_from(0)` would emit `"../../"` (no dir
374/// component), breaking generated chdir hooks.
375impl Default for E2eConfig {
376    fn default() -> Self {
377        Self {
378            fixtures: default_fixtures_dir(),
379            output: default_output_dir(),
380            test_documents_dir: default_test_documents_dir(),
381            languages: Vec::new(),
382            call: CallConfig::default(),
383            calls: HashMap::new(),
384            packages: HashMap::new(),
385            format: HashMap::new(),
386            fields: HashMap::new(),
387            fields_optional: HashSet::new(),
388            fields_array: HashSet::new(),
389            fields_method_calls: HashSet::new(),
390            result_fields: HashSet::new(),
391            exclude_categories: HashSet::new(),
392            fields_c_types: HashMap::new(),
393            fields_enum: HashSet::new(),
394            dep_mode: DependencyMode::default(),
395            registry: RegistryConfig::default(),
396        }
397    }
398}
399
400/// Configuration for the function call in each test.
401#[derive(Debug, Clone, Serialize, Deserialize, Default)]
402pub struct CallConfig {
403    /// Per-call override for `result_fields`.
404    ///
405    /// When non-empty, this set replaces the global `[e2e].result_fields` for
406    /// fixtures routed to this call.  Use this when different API functions return
407    /// differently-shaped structs so each call can gate its own field set.
408    ///
409    /// Example:
410    /// ```toml
411    /// [e2e.calls.crawl]
412    /// result_fields = ["pages", "final_url", "stayed_on_domain"]
413    /// ```
414    #[serde(default)]
415    pub result_fields: HashSet<String>,
416    /// Per-call override for `[e2e].fields` alias map.
417    ///
418    /// When non-empty, replaces (not merges with) the global `fields` map for
419    /// fixtures routed to this call.
420    #[serde(default)]
421    pub fields: HashMap<String, String>,
422    /// Per-call override for `[e2e].fields_optional`.
423    #[serde(default)]
424    pub fields_optional: HashSet<String>,
425    /// Per-call override for `[e2e].fields_array`.
426    #[serde(default)]
427    pub fields_array: HashSet<String>,
428    /// Per-call override for `[e2e].fields_method_calls`.
429    #[serde(default)]
430    pub fields_method_calls: HashSet<String>,
431    /// Per-call override for `[e2e].fields_enum`.
432    #[serde(default)]
433    pub fields_enum: HashSet<String>,
434    /// Per-call override for `[e2e].fields_c_types`.
435    #[serde(default)]
436    pub fields_c_types: HashMap<String, String>,
437    /// The function name (alef applies language naming conventions).
438    #[serde(default)]
439    pub function: String,
440    /// The module/package where the function lives.
441    #[serde(default)]
442    pub module: String,
443    /// Variable name for the return value (default: "result").
444    #[serde(default = "default_result_var")]
445    pub result_var: String,
446    /// Whether the function is async.
447    #[serde(default)]
448    pub r#async: bool,
449    /// HTTP endpoint path for mock server routing (e.g., `"/v1/chat/completions"`).
450    ///
451    /// Required when fixtures use `mock_response`. The Rust e2e generator uses
452    /// this to build the `MockRoute` that the mock server matches against.
453    #[serde(default)]
454    pub path: Option<String>,
455    /// HTTP method for mock server routing (default: `"POST"`).
456    ///
457    /// Used together with `path` when building `MockRoute` entries.
458    #[serde(default)]
459    pub method: Option<String>,
460    /// How fixture `input` fields map to function arguments.
461    #[serde(default)]
462    pub args: Vec<ArgMapping>,
463    /// Per-language overrides for module/function/etc.
464    #[serde(default)]
465    pub overrides: HashMap<String, CallOverride>,
466    /// Whether the function returns `Result<T, E>` in its native binding.
467    /// Defaults to `true`. When `false`, generators that distinguish Result-returning
468    /// from non-Result-returning calls (currently Rust) will skip the
469    /// `.expect("should succeed")` unwrap and bind the raw return value directly.
470    #[serde(default = "default_returns_result")]
471    pub returns_result: bool,
472    /// Whether the function returns only an error/unit — i.e., `Result<(), E>`.
473    ///
474    /// When combined with `returns_result = true`, Go generators emit `err := func()`
475    /// (single return value) rather than `_, err := func()` (two return values).
476    /// This is needed for functions like `validate_host` that return only `error` in Go.
477    #[serde(default)]
478    pub returns_void: bool,
479    /// skip_languages
480    #[serde(default)]
481    pub skip_languages: Vec<String>,
482    /// When `true`, the function returns a primitive (e.g. `String`, `bool`,
483    /// `i32`) rather than a struct.  Generators that would otherwise emit
484    /// `result.<field>` will fall back to the bare result variable.
485    ///
486    /// This is a property of the Rust core's return type and therefore identical
487    /// across every binding — set it on the call, not in per-language overrides.
488    /// The same flag is also accepted under `[e2e.calls.<name>.overrides.<lang>]`
489    /// for backwards compatibility, but the call-level value takes precedence.
490    #[serde(default)]
491    pub result_is_simple: bool,
492    /// When `true`, the function returns `Vec<T>` / `Array<T>`.  Generators that
493    /// support per-element field assertions (rust, csharp) iterate or index into
494    /// the result; the typescript codegen indexes `[0]` to mirror csharp.
495    ///
496    /// As with `result_is_simple`, this is a Rust-side property — set it on the
497    /// call, not on per-language overrides. Per-language overrides remain
498    /// supported for backwards compatibility.
499    #[serde(default)]
500    pub result_is_vec: bool,
501    /// When `true` (combined with `result_is_simple`), the simple return is a
502    /// slice/array (e.g., `Vec<String>` → `string[]` in TS).
503    #[serde(default)]
504    pub result_is_array: bool,
505    /// When `true`, the function returns a raw byte array (`Vec<u8>` →
506    /// `Uint8Array` / `[]byte` / `byte[]`).
507    #[serde(default)]
508    pub result_is_bytes: bool,
509    /// Three-valued opt-in/out for streaming-virtual-field auto-detection.
510    ///
511    /// - `Some(true)`: force streaming semantics regardless of fixture shape.
512    /// - `Some(false)`: disable streaming auto-detection — assertions referencing
513    ///   fields like `chunks` / `chunks.length` / `tool_calls` / `finish_reason`
514    ///   are treated as plain field accessors on the result, not streaming
515    ///   adapters. Use this when your API has a `chunks` field that is a regular
516    ///   list (not an async stream).
517    /// - `None` (default): auto-detect — treat as streaming when either the
518    ///   fixture provides a streaming `mock_response` or any assertion references
519    ///   a hard-coded streaming-virtual-field name.
520    #[serde(default)]
521    pub streaming: Option<bool>,
522    /// When `true`, the function returns `Option<T>`.
523    #[serde(default)]
524    pub result_is_option: bool,
525    /// Automatic fixture-routing condition.
526    ///
527    /// When set, a fixture whose `call` field is `None` is routed to this named call config
528    /// if the condition is satisfied.  This avoids the need to tag every fixture with
529    /// `"call": "batch_scrape"` when the fixture shape already identifies the call.
530    ///
531    /// Example (`alef.toml`):
532    /// ```toml
533    /// [e2e.calls.batch_scrape]
534    /// select_when = { input_has = "batch_urls" }
535    /// ```
536    #[serde(default)]
537    pub select_when: Option<SelectWhen>,
538}
539
540fn default_result_var() -> String {
541    "result".to_string()
542}
543
544fn default_returns_result() -> bool {
545    false
546}
547
548/// Condition for auto-selecting a named call config when the fixture matches.
549///
550/// When a fixture does not specify `"call"`, the codegen normally uses the default
551/// `[e2e.call]`.  A `SelectWhen` condition on a named call allows automatic routing
552/// based on the fixture's id, category, tags, or input shape.  All set fields must
553/// match (logical AND); a condition with no fields set never matches.
554///
555/// ```toml
556/// [e2e.calls.batch_scrape]
557/// select_when = { input_has = "batch_urls" }
558///
559/// [e2e.calls.crawl]
560/// select_when = { category = "crawl" }
561///
562/// [e2e.calls.batch_crawl_stream]
563/// select_when = { category = "stream", id_prefix = "batch_crawl_stream" }
564/// ```
565#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
566pub struct SelectWhen {
567    /// Match when the fixture's resolved category equals this string.
568    #[serde(default)]
569    pub category: Option<String>,
570    /// Match when the fixture's id starts with this prefix.
571    #[serde(default)]
572    pub id_prefix: Option<String>,
573    /// Match when the fixture's id matches this simple glob.
574    ///
575    /// Only `*` (matches any run of characters) is supported. Use `id_prefix`
576    /// for plain prefix matches.
577    #[serde(default)]
578    pub id_glob: Option<String>,
579    /// Match when the fixture's tags include this tag.
580    #[serde(default)]
581    pub tag: Option<String>,
582    /// Match when the fixture's input object contains this key with a non-null value.
583    #[serde(default)]
584    pub input_has: Option<String>,
585}
586
587impl SelectWhen {
588    /// Returns true when every set discriminator matches the fixture.
589    ///
590    /// A `SelectWhen` with all fields `None` returns `false` — at least one
591    /// discriminator must be set for the condition to fire.
592    pub fn matches(
593        &self,
594        fixture_id: &str,
595        fixture_category: &str,
596        fixture_tags: &[String],
597        fixture_input: &serde_json::Value,
598    ) -> bool {
599        let any_set = self.category.is_some()
600            || self.id_prefix.is_some()
601            || self.id_glob.is_some()
602            || self.tag.is_some()
603            || self.input_has.is_some();
604        if !any_set {
605            return false;
606        }
607        if let Some(cat) = &self.category
608            && cat.as_str() != fixture_category
609        {
610            return false;
611        }
612        if let Some(prefix) = &self.id_prefix
613            && !fixture_id.starts_with(prefix.as_str())
614        {
615            return false;
616        }
617        if let Some(glob) = &self.id_glob
618            && !glob_matches(glob, fixture_id)
619        {
620            return false;
621        }
622        if let Some(tag) = &self.tag
623            && !fixture_tags.iter().any(|t| t == tag)
624        {
625            return false;
626        }
627        if let Some(key) = &self.input_has {
628            let val = fixture_input.get(key.as_str()).unwrap_or(&serde_json::Value::Null);
629            if val.is_null() {
630                return false;
631            }
632        }
633        true
634    }
635}
636
637/// Minimal glob matcher supporting `*` (greedy any-run) only.
638fn glob_matches(pattern: &str, text: &str) -> bool {
639    if !pattern.contains('*') {
640        return pattern == text;
641    }
642    let parts: Vec<&str> = pattern.split('*').collect();
643    let mut cursor = 0usize;
644    for (idx, part) in parts.iter().enumerate() {
645        if part.is_empty() {
646            continue;
647        }
648        if idx == 0 {
649            if !text[cursor..].starts_with(part) {
650                return false;
651            }
652            cursor += part.len();
653        } else if idx + 1 == parts.len() && !pattern.ends_with('*') {
654            return text[cursor..].ends_with(part);
655        } else {
656            match text[cursor..].find(part) {
657                Some(pos) => cursor += pos + part.len(),
658                None => return false,
659            }
660        }
661    }
662    true
663}
664
665/// Maps a fixture input field to a function argument.
666#[derive(Debug, Clone, Serialize, Deserialize)]
667pub struct ArgMapping {
668    /// Argument name in the function signature.
669    pub name: String,
670    /// JSON field path in the fixture's `input` object.
671    pub field: String,
672    /// Type hint for code generation.
673    #[serde(rename = "type", default = "default_arg_type")]
674    pub arg_type: String,
675    /// Whether this argument is optional.
676    #[serde(default)]
677    pub optional: bool,
678    /// When `true`, the Rust codegen passes this argument by value (owned) rather than
679    /// by reference. Use for `Vec<T>` parameters that do not accept `&Vec<T>`.
680    #[serde(default)]
681    pub owned: bool,
682    /// For `json_object` args targeting `&[T]` Rust parameters, set to the element type
683    /// (e.g. `"f32"`, `"String"`) so the codegen emits `Vec<element_type>` annotation.
684    #[serde(default)]
685    pub element_type: Option<String>,
686    /// Override the Go slice element type for `json_object` array args.
687    ///
688    /// When set, the Go e2e codegen uses this as the element type instead of the default
689    /// derived from `element_type`. Use Go-idiomatic type names including the import alias
690    /// prefix where needed, e.g. `"kreuzberg.BatchBytesItem"` or `"string"`.
691    #[serde(default)]
692    pub go_type: Option<String>,
693}
694
695fn default_arg_type() -> String {
696    "string".to_string()
697}
698
699/// Per-language override for function call configuration.
700#[derive(Debug, Clone, Serialize, Deserialize, Default)]
701pub struct CallOverride {
702    /// Override the module/import path.
703    #[serde(default)]
704    pub module: Option<String>,
705    /// Override the function name.
706    #[serde(default)]
707    pub function: Option<String>,
708    /// Maps canonical argument names to language-specific argument names.
709    ///
710    /// Used when a language binding uses a different parameter name than the
711    /// canonical `args` list in `CallConfig`. For example, if the canonical
712    /// arg name is `doc` but the Python binding uses `html`, specify:
713    ///
714    /// ```toml
715    /// [e2e.call.overrides.python]
716    /// arg_name_map = { doc = "html" }
717    /// ```
718    ///
719    /// The key is the canonical name (from `args[].name`) and the value is the
720    /// name to use when emitting the keyword argument in generated tests.
721    #[serde(default)]
722    pub arg_name_map: HashMap<String, String>,
723    /// Override the crate name (Rust only).
724    #[serde(default)]
725    pub crate_name: Option<String>,
726    /// Override the class name (Java/C# only).
727    #[serde(default)]
728    pub class: Option<String>,
729    /// Import alias (Go only, e.g., `htmd`).
730    #[serde(default)]
731    pub alias: Option<String>,
732    /// C header file name (C only).
733    #[serde(default)]
734    pub header: Option<String>,
735    /// FFI symbol prefix (C only).
736    #[serde(default)]
737    pub prefix: Option<String>,
738    /// For json_object args: the constructor to use instead of raw dict/object.
739    /// E.g., "ConversionOptions" — generates `ConversionOptions(**options)` in Python,
740    /// `new ConversionOptions(options)` in TypeScript.
741    #[serde(default)]
742    pub options_type: Option<String>,
743    /// How to pass json_object args: "kwargs" (default), "dict", "json", or "from_json".
744    ///
745    /// - `"kwargs"`: construct `OptionsType(key=val, ...)` (requires `options_type`).
746    /// - `"dict"`: pass as a plain dict/object literal `{"key": "val"}`.
747    /// - `"json"`: pass via `json.loads('...')` / `JSON.parse('...')`.
748    /// - `"from_json"`: call `OptionsType.from_json('...')` (Python only, PyO3 native types).
749    #[serde(default)]
750    pub options_via: Option<String>,
751    /// Module to import `options_type` from when `options_via = "from_json"`.
752    ///
753    /// When set, a separate `from {from_json_module} import {options_type}` line
754    /// is emitted instead of including the type in the main module import.
755    /// E.g., `"liter_llm._internal_bindings"` for PyO3 native types.
756    #[serde(default)]
757    pub from_json_module: Option<String>,
758    /// Override whether the call is async for this language.
759    ///
760    /// When set, takes precedence over the call-level `async` flag.
761    /// Useful when a language binding uses a different async model — for example,
762    /// a Python binding that returns a sync iterator from a function marked
763    /// `async = true` at the call level.
764    #[serde(default, rename = "async")]
765    pub r#async: Option<bool>,
766    /// Maps fixture option field names to their enum type names.
767    /// E.g., `{"headingStyle": "HeadingStyle", "codeBlockStyle": "CodeBlockStyle"}`.
768    /// The generator imports these types and maps string values to enum constants.
769    #[serde(default)]
770    pub enum_fields: HashMap<String, String>,
771    /// Maps result-type field names to their enum type names for assertion routing.
772    /// Per-call so e.g. `BatchObject.status` (enum) and `ResponseObject.status` (string)
773    /// can be disambiguated.
774    #[serde(default)]
775    pub assert_enum_fields: HashMap<String, String>,
776    /// Module to import enum types from (if different from the main module).
777    /// E.g., "html_to_markdown._html_to_markdown" for PyO3 native enums.
778    #[serde(default)]
779    pub enum_module: Option<String>,
780    /// Maps nested fixture object field names to their C# type names.
781    /// Used to generate `JsonSerializer.Deserialize<NestedType>(...)` for nested objects.
782    /// E.g., `{"preprocessing": "PreprocessingOptions"}`.
783    #[serde(default)]
784    pub nested_types: HashMap<String, String>,
785    /// When `false`, nested config builder results are passed directly to builder methods
786    /// without wrapping in `Optional.of(...)`. Set to `false` for bindings where nested
787    /// option types are non-optional (e.g., html-to-markdown Java).
788    /// Defaults to `true` for backward compatibility.
789    #[serde(default = "default_true")]
790    pub nested_types_optional: bool,
791    /// When `true`, the function returns a simple type (e.g., `String`) rather
792    /// than a struct.  Generators that would normally emit `result.content`
793    /// (or equivalent field access) will use the result variable directly.
794    #[serde(default)]
795    pub result_is_simple: bool,
796    /// When `true` (and combined with `result_is_simple`), the simple result is
797    /// a slice/array type (e.g., `[]string` in Go, `Vec<String>` in Rust).
798    /// The Go generator uses `strings.Join(value, " ")` for `contains` assertions
799    /// instead of `string(value)`.
800    #[serde(default)]
801    pub result_is_array: bool,
802    /// When `true`, the function returns `Vec<T>` rather than a single value.
803    /// Field-path assertions are emitted as `.iter().all(|r| <accessor>)` so
804    /// every element is checked. (Rust generator.)
805    #[serde(default)]
806    pub result_is_vec: bool,
807    /// When `true`, the function returns a raw byte array (e.g., `byte[]` in Java,
808    /// `[]byte` in Go). Used by generators to select the correct length accessor
809    /// (field `.length` vs method `.length()`).
810    #[serde(default)]
811    pub result_is_bytes: bool,
812    /// When `true`, the function returns `Option<T>`. The result is unwrapped
813    /// before any non-`is_none`/`is_some` assertion runs; `is_empty`/`not_empty`
814    /// assertions map to `is_none()`/`is_some()`. (Rust generator.)
815    #[serde(default)]
816    pub result_is_option: bool,
817    /// When `true`, the R generator emits the call result directly without wrapping
818    /// in `jsonlite::fromJSON()`. Use when the R binding already returns a native
819    /// R list (`Robj`) rather than a JSON string. Field-path assertions still use
820    /// `result$field` accessor syntax (i.e. `result_is_simple` behaviour is NOT
821    /// implied — only the JSON parse wrapper is suppressed). (R generator only.)
822    #[serde(default)]
823    pub result_is_r_list: bool,
824    /// When `true`, the Zig generator treats the result as a `[]u8` JSON string
825    /// representing a struct value (e.g., `ExtractionResult` serialized via the
826    /// FFI `_to_json` helper). The generator parses the JSON with
827    /// `std.json.parseFromSlice(std.json.Value, ...)` before emitting field
828    /// assertions, traversing the dynamic JSON object for each field path.
829    /// (Zig generator only.)
830    #[serde(default)]
831    pub result_is_json_struct: bool,
832    /// When `true`, the Rust generator wraps the `json_object` argument expression
833    /// in `Some(...).clone()` to match an owned `Option<T>` parameter slot rather
834    /// than passing `&options`. (Rust generator only.)
835    #[serde(default)]
836    pub wrap_options_in_some: bool,
837    /// Trailing positional arguments appended verbatim after the configured
838    /// `args`. Used when the target function takes additional positional slots
839    /// (e.g. visitor) the fixture cannot supply directly. (Rust generator only.)
840    #[serde(default)]
841    pub extra_args: Vec<String>,
842    /// Per-rust override of the call-level `returns_result`. When set, takes
843    /// precedence over `CallConfig.returns_result` for the Rust generator only.
844    /// Useful when one binding is fallible while others are not.
845    #[serde(default)]
846    pub returns_result: Option<bool>,
847    /// Maps handle config field names to their Python type constructor names.
848    ///
849    /// When the handle config object contains a nested dict-valued field, the
850    /// generator will wrap it in the specified type using keyword arguments.
851    /// E.g., `{"browser": "BrowserConfig"}` generates `BrowserConfig(mode="auto")`
852    /// instead of `{"mode": "auto"}`.
853    #[serde(default)]
854    pub handle_nested_types: HashMap<String, String>,
855    /// Handle config fields whose type constructor takes a single dict argument
856    /// instead of keyword arguments.
857    ///
858    /// E.g., `["auth"]` means `AuthConfig({"type": "basic", ...})` instead of
859    /// `AuthConfig(type="basic", ...)`.
860    #[serde(default)]
861    pub handle_dict_types: HashSet<String>,
862    /// Elixir struct module name for the handle config argument.
863    ///
864    /// When set, the generated Elixir handle config uses struct literal syntax
865    /// (`%Module.StructType{key: val}`) instead of a plain string-keyed map.
866    /// Rustler `NifStruct` requires a proper Elixir struct — plain maps are rejected.
867    ///
868    /// E.g., `"CrawlConfig"` generates `%Kreuzcrawl.CrawlConfig{download_assets: true}`.
869    #[serde(default)]
870    pub handle_struct_type: Option<String>,
871    /// Handle config fields whose list values are Elixir atoms (Rustler NifUnitEnum).
872    ///
873    /// When a config field is a `Vec<EnumType>` in Rust, the Elixir side must pass
874    /// a list of atoms (e.g., `[:image, :document]`) not strings (`["image"]`).
875    /// List the field names here so the generator emits atom literals instead of strings.
876    ///
877    /// E.g., `["asset_types"]` generates `asset_types: [:image]` instead of `["image"]`.
878    #[serde(default)]
879    pub handle_atom_list_fields: HashSet<String>,
880    /// WASM config class name for handle args (WASM generator only).
881    ///
882    /// When set, handle args are constructed using `ConfigType.default()` + setters
883    /// instead of passing a plain JS object (which fails `_assertClass` validation).
884    ///
885    /// E.g., `"WasmCrawlConfig"` generates:
886    /// ```js
887    /// const engineConfig = WasmCrawlConfig.default();
888    /// engineConfig.maxDepth = 1;
889    /// const engine = createEngine(engineConfig);
890    /// ```
891    #[serde(default)]
892    pub handle_config_type: Option<String>,
893    /// PHP client factory method name (PHP generator only).
894    ///
895    /// When set, the generated PHP test instantiates a client via
896    /// `ClassName::factory_method('test-key')` and calls methods on the instance
897    /// instead of using static facade calls.
898    ///
899    /// E.g., `"createClient"` generates:
900    /// ```php
901    /// $client = LiterLlm::createClient('test-key');
902    /// $result = $client->chat($request);
903    /// ```
904    #[serde(default)]
905    pub php_client_factory: Option<String>,
906    /// Client factory function name for instance-method languages (WASM, etc.).
907    ///
908    /// When set, the generated test imports this function, creates a client,
909    /// and calls API methods on the instance instead of as top-level functions.
910    ///
911    /// E.g., `"createClient"` generates:
912    /// ```typescript
913    /// import { createClient } from 'pkg';
914    /// const client = createClient('test-key');
915    /// const result = await client.chat(request);
916    /// ```
917    #[serde(default)]
918    pub client_factory: Option<String>,
919    /// Verbatim trailing arguments appended after the fixed `("test-key", ...)` pair
920    /// when calling the `client_factory` function.
921    ///
922    /// Use this when the factory function takes additional positional parameters
923    /// beyond the API key and optional base URL that the generator would otherwise
924    /// emit.  Each element is emitted verbatim, separated by `, `.
925    ///
926    /// Example — Gleam `create_client` takes five positional arguments:
927    /// `(api_key, base_url, timeout_secs, max_retries, model_hint)`.  Set:
928    /// ```toml
929    /// [e2e.call.overrides.gleam]
930    /// client_factory = "create_client"
931    /// client_factory_trailing_args = ["option.None", "option.None", "option.None"]
932    /// ```
933    /// to produce `create_client("test-key", option.Some(url), option.None, option.None, option.None)`.
934    #[serde(default)]
935    pub client_factory_trailing_args: Vec<String>,
936    /// Fields on the options object that require `BigInt()` wrapping (WASM only).
937    ///
938    /// `wasm_bindgen` maps Rust `u64`/`i64` to JavaScript `BigInt`. Numeric
939    /// values assigned to these setters must be wrapped with `BigInt(n)`.
940    ///
941    /// List camelCase field names, e.g.:
942    /// ```toml
943    /// [e2e.call.overrides.wasm]
944    /// bigint_fields = ["maxTokens", "seed"]
945    /// ```
946    #[serde(default)]
947    pub bigint_fields: Vec<String>,
948    /// Static CLI arguments appended to every invocation (brew/CLI generator only).
949    ///
950    /// E.g., `["--format", "json"]` appends `--format json` to every CLI call.
951    #[serde(default)]
952    pub cli_args: Vec<String>,
953    /// Maps fixture config field names to CLI flag names (brew/CLI generator only).
954    ///
955    /// E.g., `{"output_format": "--format"}` generates `--format <value>` from
956    /// the fixture's `output_format` input field.
957    #[serde(default)]
958    pub cli_flags: HashMap<String, String>,
959    /// C FFI opaque result type name (C only).
960    ///
961    /// The PascalCase name of the result struct, without the prefix.
962    /// E.g., `"ChatCompletionResponse"` for `LiterllmChatCompletionResponse*`.
963    /// If not set, defaults to the function name in PascalCase.
964    #[serde(default)]
965    pub result_type: Option<String>,
966    /// Override the argument order for this language binding.
967    ///
968    /// Lists argument names from `args` in the order they should be passed
969    /// to the target function. Useful when a language binding reorders parameters
970    /// relative to the canonical `args` list in `CallConfig`.
971    ///
972    /// E.g., if `args = [path, mime_type, config]` but the Node.js binding
973    /// takes `(path, config, mime_type?)`, specify:
974    /// ```toml
975    /// [e2e.call.overrides.node]
976    /// arg_order = ["path", "config", "mime_type"]
977    /// ```
978    #[serde(default)]
979    pub arg_order: Vec<String>,
980    /// When `true`, `json_object` args with an `options_type` are passed as a
981    /// pointer (`*OptionsType`) rather than a value.  Use for Go bindings where
982    /// the options parameter is `*ConversionOptions` (nil-able pointer) rather
983    /// than a plain struct.
984    ///
985    /// Absent options are passed as `nil`; present options are unmarshalled into
986    /// a local variable and passed as `&optionsVar`.
987    #[serde(default)]
988    pub options_ptr: bool,
989    /// Alternative function name to use when the fixture includes a `visitor`.
990    ///
991    /// Some bindings expose two entry points: `Convert(html, opts)` for the
992    /// plain case and `ConvertWithVisitor(html, opts, visitor)` when a visitor
993    /// is involved.  Set this to the visitor-accepting function name so the
994    /// generator can pick the right symbol automatically.
995    ///
996    /// E.g., `"ConvertWithVisitor"` makes the Go generator emit:
997    /// ```go
998    /// result, err := htmd.ConvertWithVisitor(html, nil, visitor)
999    /// ```
1000    /// instead of `htmd.Convert(html, nil, visitor)` (which would not compile).
1001    #[serde(default)]
1002    pub visitor_function: Option<String>,
1003    /// Rust trait names to import when `client_factory` is set (Rust generator only).
1004    ///
1005    /// When `client_factory` is set, the generated test creates a client object and
1006    /// calls methods on it. Those methods are defined on traits (e.g. `LlmClient`,
1007    /// `FileClient`) that must be in scope. List the trait names here and the Rust
1008    /// generator will emit `use {module}::{trait_name};` for each.
1009    ///
1010    /// E.g.:
1011    /// ```toml
1012    /// [e2e.call.overrides.rust]
1013    /// client_factory = "create_client"
1014    /// trait_imports = ["LlmClient", "FileClient", "BatchClient", "ResponseClient"]
1015    /// ```
1016    #[serde(default)]
1017    pub trait_imports: Vec<String>,
1018    /// Raw C return type, used verbatim instead of `{PREFIX}Type*` (C only).
1019    ///
1020    /// Valid values: `"char*"`, `"int32_t"`, `"uintptr_t"`.
1021    /// When set, the C generator skips options handle construction and uses the
1022    /// raw type directly. Free logic is adjusted accordingly.
1023    #[serde(default)]
1024    pub raw_c_result_type: Option<String>,
1025    /// Free function for raw `char*` C results (C only).
1026    ///
1027    /// Defaults to `{prefix}_free_string` when unset and `raw_c_result_type == "char*"`.
1028    #[serde(default)]
1029    pub c_free_fn: Option<String>,
1030    /// C FFI engine factory pattern (C only).
1031    ///
1032    /// When set, the C generator wraps each test call in a
1033    /// `{prefix}_create_engine(config)` / `{prefix}_crawl_engine_handle_free(engine)`
1034    /// prologue/epilogue using the named config type as the "arg 0" handle type.
1035    ///
1036    /// The value is the PascalCase config type name (without prefix), e.g.
1037    /// `"CrawlConfig"`. The generator will emit:
1038    /// ```c
1039    /// KCRAWLCrawlConfig* config_handle = kcrawl_crawl_config_from_json("{json}");
1040    /// KCRAWLCrawlEngineHandle* engine = kcrawl_create_engine(config_handle);
1041    /// kcrawl_crawl_config_free(config_handle);
1042    /// KCRAWLScrapeResult* result = kcrawl_scrape(engine, url);
1043    /// // ... assertions ...
1044    /// kcrawl_scrape_result_free(result);
1045    /// kcrawl_crawl_engine_handle_free(engine);
1046    /// ```
1047    #[serde(default)]
1048    pub c_engine_factory: Option<String>,
1049    /// Fields in a `json_object` arg that must be wrapped in `java.nio.file.Path.of()`
1050    /// (Java generator only).
1051    ///
1052    /// E.g., `["cache_dir"]` wraps the string value of `cache_dir` so the builder
1053    /// receives `java.nio.file.Path.of("/tmp/dir")` instead of a plain string.
1054    #[serde(default)]
1055    pub path_fields: Vec<String>,
1056    /// Trait name for the visitor pattern (Rust e2e tests only).
1057    ///
1058    /// When a fixture declares a `visitor` block, the Rust e2e generator emits
1059    /// `impl <trait_name> for _TestVisitor { ... }` and imports the trait from
1060    /// `{module}::visitor`. When unset, no visitor block is emitted and fixtures
1061    /// that declare a visitor will cause a codegen error.
1062    ///
1063    /// E.g., `"HtmlVisitor"` generates:
1064    /// ```rust,ignore
1065    /// use html_to_markdown_rs::visitor::{HtmlVisitor, NodeContext, VisitResult};
1066    /// // ...
1067    /// impl HtmlVisitor for _TestVisitor { ... }
1068    /// ```
1069    #[serde(default)]
1070    pub visitor_trait: Option<String>,
1071    /// Maps result field paths to their wasm-bindgen enum class names.
1072    ///
1073    /// wasm-bindgen exposes Rust enums as numeric discriminants in JavaScript
1074    /// (`WasmFinishReason.Stop === 0`), not string variants. When an `equals`
1075    /// assertion targets a field listed here, the WASM generator emits
1076    /// `expect(result.choices[0].finishReason).toBe(WasmFinishReason.Stop)`
1077    /// instead of attempting `(value ?? "").trim()`.
1078    ///
1079    /// The fixture's expected string value is converted to PascalCase to look
1080    /// up the variant (e.g. `"tool_calls"` -> `ToolCalls`).
1081    ///
1082    /// Example:
1083    /// ```toml
1084    /// [e2e.calls.chat.overrides.wasm]
1085    /// result_enum_fields = { "choices[0].finish_reason" = "WasmFinishReason", "status" = "WasmBatchStatus" }
1086    /// ```
1087    #[serde(default)]
1088    pub result_enum_fields: HashMap<String, String>,
1089}
1090
1091fn default_true() -> bool {
1092    true
1093}
1094
1095/// Per-language package reference configuration.
1096#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1097pub struct PackageRef {
1098    /// Package/crate/gem/module name.
1099    #[serde(default)]
1100    pub name: Option<String>,
1101    /// Relative path from e2e/{lang}/ to the package.
1102    #[serde(default)]
1103    pub path: Option<String>,
1104    /// Go module path.
1105    #[serde(default)]
1106    pub module: Option<String>,
1107    /// Package version (e.g., for go.mod require directives).
1108    #[serde(default)]
1109    pub version: Option<String>,
1110}
1111
1112#[cfg(test)]
1113mod tests {
1114    use super::*;
1115
1116    fn empty_e2e_with_test_documents(dir: &str) -> E2eConfig {
1117        E2eConfig {
1118            test_documents_dir: dir.to_string(),
1119            ..Default::default()
1120        }
1121    }
1122
1123    #[test]
1124    fn test_documents_dir_default_is_test_documents() {
1125        let cfg: E2eConfig = toml::from_str("[call]\nfunction = \"f\"\n").expect("minimal TOML must deserialize");
1126        assert_eq!(cfg.test_documents_dir, "test_documents");
1127    }
1128
1129    #[test]
1130    fn test_documents_dir_explicit_override_wins() {
1131        let cfg: E2eConfig = toml::from_str("test_documents_dir = \"fixture_files\"\n[call]\nfunction = \"f\"\n")
1132            .expect("explicit override must deserialize");
1133        assert_eq!(cfg.test_documents_dir, "fixture_files");
1134    }
1135
1136    #[test]
1137    fn test_documents_relative_from_at_lang_root_returns_two_dots_up() {
1138        let cfg = empty_e2e_with_test_documents("test_documents");
1139        assert_eq!(cfg.test_documents_relative_from(0), "../../test_documents");
1140    }
1141
1142    #[test]
1143    fn test_documents_relative_from_at_spec_depth_returns_three_dots_up() {
1144        let cfg = empty_e2e_with_test_documents("test_documents");
1145        assert_eq!(cfg.test_documents_relative_from(1), "../../../test_documents");
1146    }
1147
1148    #[test]
1149    fn test_documents_relative_from_at_two_subdirs_deep_returns_four_dots_up() {
1150        let cfg = empty_e2e_with_test_documents("test_documents");
1151        assert_eq!(cfg.test_documents_relative_from(2), "../../../../test_documents");
1152    }
1153
1154    #[test]
1155    fn test_documents_relative_uses_configured_dir_name() {
1156        let cfg = empty_e2e_with_test_documents("fixture_files");
1157        assert_eq!(cfg.test_documents_relative_from(0), "../../fixture_files");
1158        assert_eq!(cfg.test_documents_relative_from(1), "../../../fixture_files");
1159    }
1160
1161    #[test]
1162    fn select_when_with_no_discriminators_never_matches() {
1163        let sel = SelectWhen::default();
1164        assert!(!sel.matches("any_id", "any_category", &[], &serde_json::Value::Null));
1165    }
1166
1167    #[test]
1168    fn select_when_input_has_matches_non_null_key() {
1169        let sel = SelectWhen {
1170            input_has: Some("batch_urls".to_string()),
1171            ..Default::default()
1172        };
1173        let input = serde_json::json!({ "batch_urls": [] });
1174        assert!(sel.matches("fid", "cat", &[], &input));
1175        let empty_input = serde_json::json!({ "url": "x" });
1176        assert!(!sel.matches("fid", "cat", &[], &empty_input));
1177    }
1178
1179    #[test]
1180    fn select_when_category_matches_exactly() {
1181        let sel = SelectWhen {
1182            category: Some("crawl".to_string()),
1183            ..Default::default()
1184        };
1185        assert!(sel.matches("any_id", "crawl", &[], &serde_json::Value::Null));
1186        assert!(!sel.matches("any_id", "scrape", &[], &serde_json::Value::Null));
1187    }
1188
1189    #[test]
1190    fn select_when_id_prefix_matches() {
1191        let sel = SelectWhen {
1192            id_prefix: Some("batch_crawl_".to_string()),
1193            ..Default::default()
1194        };
1195        assert!(sel.matches("batch_crawl_events", "any", &[], &serde_json::Value::Null));
1196        assert!(!sel.matches("batch_scrape_basic", "any", &[], &serde_json::Value::Null));
1197    }
1198
1199    #[test]
1200    fn select_when_id_glob_handles_star() {
1201        let sel = SelectWhen {
1202            id_glob: Some("crawl_stream*".to_string()),
1203            ..Default::default()
1204        };
1205        assert!(sel.matches("crawl_stream_basic", "any", &[], &serde_json::Value::Null));
1206        assert!(!sel.matches("batch_crawl_stream", "any", &[], &serde_json::Value::Null));
1207    }
1208
1209    #[test]
1210    fn select_when_tag_matches_any_tag_in_list() {
1211        let sel = SelectWhen {
1212            tag: Some("streaming".to_string()),
1213            ..Default::default()
1214        };
1215        let tags = vec!["smoke".to_string(), "streaming".to_string()];
1216        assert!(sel.matches("fid", "cat", &tags, &serde_json::Value::Null));
1217        assert!(!sel.matches("fid", "cat", &["smoke".to_string()], &serde_json::Value::Null));
1218    }
1219
1220    #[test]
1221    fn select_when_multiple_discriminators_anded() {
1222        let sel = SelectWhen {
1223            category: Some("stream".to_string()),
1224            id_prefix: Some("batch_crawl_stream".to_string()),
1225            ..Default::default()
1226        };
1227        assert!(sel.matches("batch_crawl_stream_events", "stream", &[], &serde_json::Value::Null));
1228        // Wrong category fails even though prefix matches
1229        assert!(!sel.matches("batch_crawl_stream_events", "crawl", &[], &serde_json::Value::Null));
1230        // Wrong prefix fails even though category matches
1231        assert!(!sel.matches("crawl_stream_basic", "stream", &[], &serde_json::Value::Null));
1232    }
1233
1234    #[test]
1235    fn select_when_deserializes_legacy_input_has_only() {
1236        let toml_src = r#"
1237            [call]
1238            function = "scrape"
1239
1240            [calls.batch_scrape]
1241            function = "batch_scrape"
1242            select_when = { input_has = "batch_urls" }
1243        "#;
1244        let cfg: E2eConfig = toml::from_str(toml_src).expect("legacy input_has must deserialize");
1245        let sel = cfg.calls["batch_scrape"].select_when.as_ref().unwrap();
1246        assert_eq!(sel.input_has.as_deref(), Some("batch_urls"));
1247        assert!(sel.category.is_none());
1248        assert!(sel.id_prefix.is_none());
1249    }
1250
1251    #[test]
1252    fn select_when_deserializes_compound_discriminators() {
1253        let toml_src = r#"
1254            [call]
1255            function = "scrape"
1256
1257            [calls.batch_crawl_stream]
1258            function = "batch_crawl_stream"
1259            select_when = { category = "stream", id_prefix = "batch_crawl_stream" }
1260        "#;
1261        let cfg: E2eConfig = toml::from_str(toml_src).expect("compound select_when must deserialize");
1262        let sel = cfg.calls["batch_crawl_stream"].select_when.as_ref().unwrap();
1263        assert_eq!(sel.category.as_deref(), Some("stream"));
1264        assert_eq!(sel.id_prefix.as_deref(), Some("batch_crawl_stream"));
1265    }
1266
1267    #[test]
1268    fn resolve_call_for_fixture_routes_by_category_then_falls_back() {
1269        let mut calls = HashMap::new();
1270        calls.insert(
1271            "crawl".to_string(),
1272            CallConfig {
1273                function: "crawl".to_string(),
1274                select_when: Some(SelectWhen {
1275                    category: Some("crawl".to_string()),
1276                    ..Default::default()
1277                }),
1278                ..Default::default()
1279            },
1280        );
1281        let cfg = E2eConfig {
1282            call: CallConfig {
1283                function: "scrape".to_string(),
1284                ..Default::default()
1285            },
1286            calls,
1287            ..Default::default()
1288        };
1289        let input = serde_json::json!({ "url": "https://example.com" });
1290        let resolved = cfg.resolve_call_for_fixture(None, "crawl_basic", "crawl", &[], &input);
1291        assert_eq!(resolved.function, "crawl");
1292        let resolved = cfg.resolve_call_for_fixture(None, "scrape_basic", "scrape", &[], &input);
1293        assert_eq!(resolved.function, "scrape");
1294    }
1295
1296    // --- effective_* resolver helpers ---
1297
1298    #[test]
1299    fn effective_result_fields_returns_global_when_call_is_empty() {
1300        let mut global = HashSet::new();
1301        global.insert("url".to_string());
1302        let cfg = E2eConfig {
1303            result_fields: global.clone(),
1304            ..Default::default()
1305        };
1306        let call = CallConfig::default();
1307        assert_eq!(cfg.effective_result_fields(&call), &global);
1308    }
1309
1310    #[test]
1311    fn effective_result_fields_call_override_wins_over_global() {
1312        let mut global = HashSet::new();
1313        global.insert("url".to_string());
1314        let mut per_call = HashSet::new();
1315        per_call.insert("pages".to_string());
1316        per_call.insert("final_url".to_string());
1317        let cfg = E2eConfig {
1318            result_fields: global,
1319            ..Default::default()
1320        };
1321        let call = CallConfig {
1322            result_fields: per_call.clone(),
1323            ..Default::default()
1324        };
1325        assert_eq!(cfg.effective_result_fields(&call), &per_call);
1326    }
1327
1328    #[test]
1329    fn effective_fields_returns_global_when_call_is_empty() {
1330        let mut global = HashMap::new();
1331        global.insert("metadata.title".to_string(), "metadata.document.title".to_string());
1332        let cfg = E2eConfig {
1333            fields: global.clone(),
1334            ..Default::default()
1335        };
1336        let call = CallConfig::default();
1337        assert_eq!(cfg.effective_fields(&call), &global);
1338    }
1339
1340    #[test]
1341    fn effective_fields_call_override_wins_over_global() {
1342        let mut global = HashMap::new();
1343        global.insert("a".to_string(), "b".to_string());
1344        let mut per_call = HashMap::new();
1345        per_call.insert("x".to_string(), "y".to_string());
1346        let cfg = E2eConfig {
1347            fields: global,
1348            ..Default::default()
1349        };
1350        let call = CallConfig {
1351            fields: per_call.clone(),
1352            ..Default::default()
1353        };
1354        assert_eq!(cfg.effective_fields(&call), &per_call);
1355    }
1356
1357    #[test]
1358    fn effective_fields_optional_returns_global_when_call_is_empty() {
1359        let mut global = HashSet::new();
1360        global.insert("segments".to_string());
1361        let cfg = E2eConfig {
1362            fields_optional: global.clone(),
1363            ..Default::default()
1364        };
1365        let call = CallConfig::default();
1366        assert_eq!(cfg.effective_fields_optional(&call), &global);
1367    }
1368
1369    #[test]
1370    fn effective_fields_optional_call_override_wins_over_global() {
1371        let mut global = HashSet::new();
1372        global.insert("segments".to_string());
1373        let mut per_call = HashSet::new();
1374        per_call.insert("pages".to_string());
1375        let cfg = E2eConfig {
1376            fields_optional: global,
1377            ..Default::default()
1378        };
1379        let call = CallConfig {
1380            fields_optional: per_call.clone(),
1381            ..Default::default()
1382        };
1383        assert_eq!(cfg.effective_fields_optional(&call), &per_call);
1384    }
1385
1386    #[test]
1387    fn effective_fields_array_returns_global_when_call_is_empty() {
1388        let mut global = HashSet::new();
1389        global.insert("choices".to_string());
1390        let cfg = E2eConfig {
1391            fields_array: global.clone(),
1392            ..Default::default()
1393        };
1394        let call = CallConfig::default();
1395        assert_eq!(cfg.effective_fields_array(&call), &global);
1396    }
1397
1398    #[test]
1399    fn effective_fields_array_call_override_wins_over_global() {
1400        let mut global = HashSet::new();
1401        global.insert("choices".to_string());
1402        let mut per_call = HashSet::new();
1403        per_call.insert("pages".to_string());
1404        let cfg = E2eConfig {
1405            fields_array: global,
1406            ..Default::default()
1407        };
1408        let call = CallConfig {
1409            fields_array: per_call.clone(),
1410            ..Default::default()
1411        };
1412        assert_eq!(cfg.effective_fields_array(&call), &per_call);
1413    }
1414
1415    #[test]
1416    fn effective_fields_method_calls_returns_global_when_call_is_empty() {
1417        let mut global = HashSet::new();
1418        global.insert("metadata.format".to_string());
1419        let cfg = E2eConfig {
1420            fields_method_calls: global.clone(),
1421            ..Default::default()
1422        };
1423        let call = CallConfig::default();
1424        assert_eq!(cfg.effective_fields_method_calls(&call), &global);
1425    }
1426
1427    #[test]
1428    fn effective_fields_method_calls_call_override_wins_over_global() {
1429        let mut global = HashSet::new();
1430        global.insert("metadata.format".to_string());
1431        let mut per_call = HashSet::new();
1432        per_call.insert("pages.status".to_string());
1433        let cfg = E2eConfig {
1434            fields_method_calls: global,
1435            ..Default::default()
1436        };
1437        let call = CallConfig {
1438            fields_method_calls: per_call.clone(),
1439            ..Default::default()
1440        };
1441        assert_eq!(cfg.effective_fields_method_calls(&call), &per_call);
1442    }
1443
1444    #[test]
1445    fn effective_fields_enum_returns_global_when_call_is_empty() {
1446        let mut global = HashSet::new();
1447        global.insert("choices.finish_reason".to_string());
1448        let cfg = E2eConfig {
1449            fields_enum: global.clone(),
1450            ..Default::default()
1451        };
1452        let call = CallConfig::default();
1453        assert_eq!(cfg.effective_fields_enum(&call), &global);
1454    }
1455
1456    #[test]
1457    fn effective_fields_enum_call_override_wins_over_global() {
1458        let mut global = HashSet::new();
1459        global.insert("choices.finish_reason".to_string());
1460        let mut per_call = HashSet::new();
1461        per_call.insert("assets.category".to_string());
1462        let cfg = E2eConfig {
1463            fields_enum: global,
1464            ..Default::default()
1465        };
1466        let call = CallConfig {
1467            fields_enum: per_call.clone(),
1468            ..Default::default()
1469        };
1470        assert_eq!(cfg.effective_fields_enum(&call), &per_call);
1471    }
1472
1473    #[test]
1474    fn effective_fields_c_types_returns_global_when_call_is_empty() {
1475        let mut global = HashMap::new();
1476        global.insert("conversion_result.metadata".to_string(), "HtmlMetadata".to_string());
1477        let cfg = E2eConfig {
1478            fields_c_types: global.clone(),
1479            ..Default::default()
1480        };
1481        let call = CallConfig::default();
1482        assert_eq!(cfg.effective_fields_c_types(&call), &global);
1483    }
1484
1485    #[test]
1486    fn effective_fields_c_types_call_override_wins_over_global() {
1487        let mut global = HashMap::new();
1488        global.insert("conversion_result.metadata".to_string(), "HtmlMetadata".to_string());
1489        let mut per_call = HashMap::new();
1490        per_call.insert("crawl_result.pages".to_string(), "PageResult".to_string());
1491        let cfg = E2eConfig {
1492            fields_c_types: global,
1493            ..Default::default()
1494        };
1495        let call = CallConfig {
1496            fields_c_types: per_call.clone(),
1497            ..Default::default()
1498        };
1499        assert_eq!(cfg.effective_fields_c_types(&call), &per_call);
1500    }
1501
1502    #[test]
1503    fn effective_resolver_helpers_deserialize_from_toml() {
1504        let toml = r#"
1505[call]
1506function = "scrape"
1507result_fields = ["url", "markdown"]
1508fields_enum = ["status"]
1509
1510[call.fields]
1511"meta.title" = "meta.document.title"
1512
1513[call.fields_c_types]
1514"scrape_result.meta" = "MetaResult"
1515"#;
1516        let cfg: E2eConfig = toml::from_str(toml).expect("must deserialize");
1517        let call = &cfg.call;
1518        assert!(cfg.effective_result_fields(call).contains("url"));
1519        assert!(cfg.effective_result_fields(call).contains("markdown"));
1520        assert!(cfg.effective_fields_enum(call).contains("status"));
1521        assert_eq!(
1522            cfg.effective_fields(call).get("meta.title").map(String::as_str),
1523            Some("meta.document.title")
1524        );
1525        assert_eq!(
1526            cfg.effective_fields_c_types(call)
1527                .get("scrape_result.meta")
1528                .map(String::as_str),
1529            Some("MetaResult")
1530        );
1531    }
1532}