alef_core/config/e2e.rs
1//! E2E test generation configuration types.
2
3use crate::config::manifest_extras::ManifestExtras;
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7/// Controls whether generated e2e test projects reference the package under
8/// test via a local path (for development) or a registry version string
9/// (for standalone `test_apps` that consumers can run without the monorepo).
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
11#[serde(rename_all = "lowercase")]
12pub enum DependencyMode {
13 /// Local path dependency (default) — used during normal e2e development.
14 #[default]
15 Local,
16 /// Registry dependency — generates standalone test apps that pull the
17 /// package from its published registry (PyPI, npm, crates.io, etc.).
18 Registry,
19}
20
21/// Configuration for registry-mode e2e generation (`alef e2e generate --registry`).
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct RegistryConfig {
24 /// Output directory for registry-mode test apps (default: "test_apps").
25 #[serde(default = "default_test_apps_dir")]
26 pub output: String,
27 /// Per-language package overrides used only in registry mode.
28 /// Merged on top of the base `[e2e.packages]` entries.
29 #[serde(default)]
30 pub packages: HashMap<String, PackageRef>,
31 /// When non-empty, only fixture categories in this list are included in
32 /// registry-mode generation (useful for shipping a curated subset).
33 #[serde(default)]
34 pub categories: Vec<String>,
35 /// GitHub repository URL for downloading prebuilt artifacts (e.g., FFI
36 /// shared libraries) from GitHub Releases.
37 ///
38 /// Falls back to `[scaffold] repository` when not set, then to
39 /// `https://github.com/kreuzberg-dev/{crate.name}`.
40 #[serde(default)]
41 pub github_repo: Option<String>,
42}
43
44impl Default for RegistryConfig {
45 fn default() -> Self {
46 Self {
47 output: default_test_apps_dir(),
48 packages: HashMap::new(),
49 categories: Vec::new(),
50 github_repo: None,
51 }
52 }
53}
54
55fn default_test_apps_dir() -> String {
56 "test_apps".to_string()
57}
58
59/// Root e2e configuration from `[e2e]` section of alef.toml.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct E2eConfig {
62 /// Directory containing fixture JSON files (default: "fixtures").
63 #[serde(default = "default_fixtures_dir")]
64 pub fixtures: String,
65 /// Output directory for generated e2e test projects (default: "e2e").
66 #[serde(default = "default_output_dir")]
67 pub output: String,
68 /// Repo-root-relative directory holding binary file fixtures referenced by
69 /// `file_path` / `bytes` fixture args (default: "test_documents").
70 ///
71 /// Backends that emit chdir / setup hooks for file-based fixtures resolve
72 /// the relative path from the test-emission directory via
73 /// [`E2eConfig::test_documents_relative_from`]. The default matches the
74 /// kreuzberg convention; downstream crates whose fixtures don't reference
75 /// files (e.g. liter-llm, which uses pure mock-server fixtures) can leave
76 /// the default in place — backends conditionally emit the setup only when
77 /// fixtures actually need it.
78 #[serde(default = "default_test_documents_dir")]
79 pub test_documents_dir: String,
80 /// Languages to generate e2e tests for. Defaults to top-level `languages` list.
81 #[serde(default)]
82 pub languages: Vec<String>,
83 /// Default function call configuration.
84 pub call: CallConfig,
85 /// Named additional call configurations for multi-function testing.
86 /// Fixtures reference these via the `call` field, e.g. `"call": "embed"`.
87 #[serde(default)]
88 pub calls: HashMap<String, CallConfig>,
89 /// Per-language package reference overrides.
90 #[serde(default)]
91 pub packages: HashMap<String, PackageRef>,
92 /// Per-language extra dependencies to splice into the e2e harness's
93 /// language-native manifest (`e2e/<lang>/package.json` for node/wasm,
94 /// `e2e/python/pyproject.toml` for Python, etc.). Distinct from the
95 /// Rust-binding `extra_dependencies` knob — this one targets the
96 /// host-language test-harness manifest. Keys are canonical language
97 /// names (`node`, `wasm`, `python`, …).
98 #[serde(default)]
99 pub harness_extras: HashMap<String, ManifestExtras>,
100 /// Per-language formatter commands.
101 #[serde(default)]
102 pub format: HashMap<String, String>,
103 /// Field path aliases: maps fixture field paths to actual API struct paths.
104 /// E.g., "metadata.title" -> "metadata.document.title"
105 /// Supports struct access (foo.bar), map access (foo[key]), direct fields.
106 #[serde(default)]
107 pub fields: HashMap<String, String>,
108 /// Fields that are Optional/nullable in the return type.
109 /// Rust generators use .as_deref().unwrap_or("") for strings, .is_some() for structs.
110 #[serde(default)]
111 pub fields_optional: HashSet<String>,
112 /// Fields that are arrays/Vecs on the result type.
113 /// When a fixture path like `json_ld.name` traverses an array field, the
114 /// accessor adds `[0]` (or language equivalent) to index into the first element.
115 #[serde(default)]
116 pub fields_array: HashSet<String>,
117 /// Fields where the accessor is a method call (appends `()`) rather than a field access.
118 /// Rust-specific: Java always uses `()`, Python/PHP use field access.
119 /// Listed as the full resolved field path (after alias resolution).
120 /// E.g., `"metadata.format.excel"` means `.excel` should be emitted as `.excel()`.
121 #[serde(default)]
122 pub fields_method_calls: HashSet<String>,
123 /// Known top-level fields on the result type.
124 ///
125 /// When non-empty, assertions whose resolved field path starts with a
126 /// segment that is NOT in this set are emitted as comments (skipped)
127 /// instead of executable assertions. This prevents broken assertions
128 /// when fixtures reference fields from a different operation (e.g.,
129 /// `batch.completed_count` on a `ScrapeResult`).
130 #[serde(default)]
131 pub result_fields: HashSet<String>,
132 /// Fixture categories excluded from cross-language e2e codegen.
133 ///
134 /// Fixtures whose resolved category matches an entry in this set are
135 /// skipped by every per-language e2e generator — no test is emitted at
136 /// all (no skip directive, no commented-out body). The fixture files stay
137 /// on disk and remain available to Rust integration tests inside the
138 /// consumer crate's own `tests/` directory.
139 ///
140 /// Use this to keep fixtures that exercise internal middleware (cache,
141 /// proxy, budget, hooks, etc.) out of bindings whose public surface does
142 /// not expose those layers.
143 ///
144 /// Example:
145 /// ```toml
146 /// [e2e]
147 /// exclude_categories = ["cache", "proxy", "budget", "hooks"]
148 /// ```
149 #[serde(default)]
150 pub exclude_categories: HashSet<String>,
151 /// C FFI accessor type chain: maps `"{parent_snake_type}.{field}"` to the
152 /// PascalCase return type name (without prefix).
153 ///
154 /// Used by the C e2e generator to emit chained FFI accessor calls for
155 /// nested field paths. The root type is always `conversion_result`.
156 ///
157 /// Example:
158 /// ```toml
159 /// [e2e.fields_c_types]
160 /// "conversion_result.metadata" = "HtmlMetadata"
161 /// "html_metadata.document" = "DocumentMetadata"
162 /// ```
163 #[serde(default)]
164 pub fields_c_types: HashMap<String, String>,
165 /// Fields whose resolved type is an enum in the generated bindings.
166 ///
167 /// When a `contains` / `contains_all` / etc. assertion targets one of these
168 /// fields, language generators that cannot call `.contains()` directly on an
169 /// enum (e.g., Java) will emit a string-conversion call first. For Java,
170 /// the generated assertion calls `.getValue()` on the enum — the `@JsonValue`
171 /// method that all alef-generated Java enums expose — to obtain the lowercase
172 /// serde string before performing the string comparison.
173 ///
174 /// Both the raw fixture field path (before alias resolution) and the resolved
175 /// path (after alias resolution via `[e2e.fields]`) are accepted, so you can
176 /// use either form:
177 ///
178 /// ```toml
179 /// # Raw fixture field:
180 /// fields_enum = ["links[].link_type", "assets[].category"]
181 /// # …or the resolved (aliased) field name:
182 /// fields_enum = ["links[].link_type", "assets[].asset_category"]
183 /// ```
184 #[serde(default)]
185 pub fields_enum: HashSet<String>,
186 /// Dependency mode: `Local` (default) or `Registry`.
187 /// Set at runtime via `--registry` CLI flag; not serialized from TOML.
188 #[serde(skip)]
189 pub dep_mode: DependencyMode,
190 /// Registry-mode configuration from `[e2e.registry]`.
191 #[serde(default)]
192 pub registry: RegistryConfig,
193}
194
195impl E2eConfig {
196 /// Resolve the call config for a fixture. Uses the named call if specified,
197 /// otherwise falls back to the default `[e2e.call]`.
198 pub fn resolve_call(&self, call_name: Option<&str>) -> &CallConfig {
199 match call_name {
200 Some(name) => self.calls.get(name).unwrap_or(&self.call),
201 None => &self.call,
202 }
203 }
204
205 /// Resolve the call config for a fixture, applying `select_when` auto-routing.
206 ///
207 /// When the fixture has an explicit `call` name, that named config is returned
208 /// (same as [`resolve_call`]). When the fixture has no explicit call, the method
209 /// scans named calls for a [`SelectWhen`] condition that matches the fixture's
210 /// shape (id, category, tags, input) and returns the first match. If no condition
211 /// matches, it falls back to the default `[e2e.call]`.
212 ///
213 /// All non-`None` discriminators on a `SelectWhen` must match (logical AND) for
214 /// the condition to fire. A `SelectWhen` with every field `None` never matches —
215 /// at least one discriminator must be set.
216 pub fn resolve_call_for_fixture(
217 &self,
218 call_name: Option<&str>,
219 fixture_id: &str,
220 fixture_category: &str,
221 fixture_tags: &[String],
222 fixture_input: &serde_json::Value,
223 ) -> &CallConfig {
224 if let Some(name) = call_name {
225 return self.calls.get(name).unwrap_or(&self.call);
226 }
227 // Auto-route by select_when condition. Deterministic order: sort by call name.
228 let mut names: Vec<&String> = self.calls.keys().collect();
229 names.sort();
230 for name in names {
231 let call_config = &self.calls[name];
232 if let Some(sel) = &call_config.select_when {
233 if sel.matches(fixture_id, fixture_category, fixture_tags, fixture_input) {
234 return call_config;
235 }
236 }
237 }
238 &self.call
239 }
240
241 /// Resolve the effective package reference for a language.
242 ///
243 /// In registry mode, entries from `[e2e.registry.packages]` are merged on
244 /// top of the base `[e2e.packages]` — registry overrides win for any field
245 /// that is `Some`.
246 pub fn resolve_package(&self, lang: &str) -> Option<PackageRef> {
247 let base = self.packages.get(lang);
248 if self.dep_mode == DependencyMode::Registry {
249 let reg = self.registry.packages.get(lang);
250 match (base, reg) {
251 (Some(b), Some(r)) => Some(PackageRef {
252 name: r.name.clone().or_else(|| b.name.clone()),
253 path: r.path.clone().or_else(|| b.path.clone()),
254 module: r.module.clone().or_else(|| b.module.clone()),
255 version: r.version.clone().or_else(|| b.version.clone()),
256 }),
257 (None, Some(r)) => Some(r.clone()),
258 (Some(b), None) => Some(b.clone()),
259 (None, None) => None,
260 }
261 } else {
262 base.cloned()
263 }
264 }
265
266 /// Return the effective `result_fields` for `call`.
267 ///
268 /// Returns `call.result_fields` when non-empty, otherwise the global
269 /// `self.result_fields`.
270 pub fn effective_result_fields<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
271 if !call.result_fields.is_empty() {
272 &call.result_fields
273 } else {
274 &self.result_fields
275 }
276 }
277
278 /// Return the effective `fields` alias map for `call`.
279 pub fn effective_fields<'a>(&'a self, call: &'a CallConfig) -> &'a HashMap<String, String> {
280 if !call.fields.is_empty() {
281 &call.fields
282 } else {
283 &self.fields
284 }
285 }
286
287 /// Return the effective `fields_optional` for `call`.
288 pub fn effective_fields_optional<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
289 if !call.fields_optional.is_empty() {
290 &call.fields_optional
291 } else {
292 &self.fields_optional
293 }
294 }
295
296 /// Return the effective `fields_array` for `call`.
297 pub fn effective_fields_array<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
298 if !call.fields_array.is_empty() {
299 &call.fields_array
300 } else {
301 &self.fields_array
302 }
303 }
304
305 /// Return the effective `fields_method_calls` for `call`.
306 pub fn effective_fields_method_calls<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
307 if !call.fields_method_calls.is_empty() {
308 &call.fields_method_calls
309 } else {
310 &self.fields_method_calls
311 }
312 }
313
314 /// Return the effective `fields_enum` for `call`.
315 pub fn effective_fields_enum<'a>(&'a self, call: &'a CallConfig) -> &'a HashSet<String> {
316 if !call.fields_enum.is_empty() {
317 &call.fields_enum
318 } else {
319 &self.fields_enum
320 }
321 }
322
323 /// Return the effective `fields_c_types` for `call`.
324 pub fn effective_fields_c_types<'a>(&'a self, call: &'a CallConfig) -> &'a HashMap<String, String> {
325 if !call.fields_c_types.is_empty() {
326 &call.fields_c_types
327 } else {
328 &self.fields_c_types
329 }
330 }
331
332 /// Return the effective output directory: `registry.output` in registry
333 /// mode, `output` otherwise.
334 pub fn effective_output(&self) -> &str {
335 if self.dep_mode == DependencyMode::Registry {
336 &self.registry.output
337 } else {
338 &self.output
339 }
340 }
341
342 /// Relative path from a backend's emission directory to the
343 /// `test_documents_dir` at the repo root.
344 ///
345 /// `emission_depth` counts the number of additional `../` segments needed
346 /// to reach `<output>/<lang>/` from where the file is being emitted:
347 ///
348 /// * `0` — emitted directly at `e2e/<lang>/` (e.g. dart, zig `build.zig`)
349 /// * `1` — emitted at `e2e/<lang>/<sub>/` (e.g. ruby `spec/`, R `tests/`)
350 /// * `2` — emitted at `e2e/<lang>/<sub1>/<sub2>/`
351 ///
352 /// The base prefix is two segments above `<output>/<lang>/` (i.e.
353 /// `../../`), matching the canonical layout where `<output>` (default
354 /// `"e2e"`) sits at the repo root next to the configured
355 /// `test_documents_dir`.
356 pub fn test_documents_relative_from(&self, emission_depth: usize) -> String {
357 let mut up = String::from("../../");
358 for _ in 0..emission_depth {
359 up.push_str("../");
360 }
361 format!("{up}{}", self.test_documents_dir)
362 }
363}
364
365fn default_fixtures_dir() -> String {
366 "fixtures".to_string()
367}
368
369fn default_output_dir() -> String {
370 "e2e".to_string()
371}
372
373fn default_test_documents_dir() -> String {
374 "test_documents".to_string()
375}
376
377/// Hand-rolled `Default` so the `test_documents_dir` field receives its
378/// `default_test_documents_dir()` value (`"test_documents"`) when callers use
379/// `..Default::default()` to construct an `E2eConfig` literally rather than
380/// going through `serde::Deserialize`. Without this, `derive(Default)` would
381/// fall back to `String::default()` (i.e. the empty string), and any backend
382/// computing `test_documents_relative_from(0)` would emit `"../../"` (no dir
383/// component), breaking generated chdir hooks.
384impl Default for E2eConfig {
385 fn default() -> Self {
386 Self {
387 fixtures: default_fixtures_dir(),
388 output: default_output_dir(),
389 test_documents_dir: default_test_documents_dir(),
390 languages: Vec::new(),
391 call: CallConfig::default(),
392 calls: HashMap::new(),
393 packages: HashMap::new(),
394 harness_extras: HashMap::new(),
395 format: HashMap::new(),
396 fields: HashMap::new(),
397 fields_optional: HashSet::new(),
398 fields_array: HashSet::new(),
399 fields_method_calls: HashSet::new(),
400 result_fields: HashSet::new(),
401 exclude_categories: HashSet::new(),
402 fields_c_types: HashMap::new(),
403 fields_enum: HashSet::new(),
404 dep_mode: DependencyMode::default(),
405 registry: RegistryConfig::default(),
406 }
407 }
408}
409
410/// Configuration for the function call in each test.
411#[derive(Debug, Clone, Serialize, Deserialize, Default)]
412pub struct CallConfig {
413 /// Per-call override for `result_fields`.
414 ///
415 /// When non-empty, this set replaces the global `[e2e].result_fields` for
416 /// fixtures routed to this call. Use this when different API functions return
417 /// differently-shaped structs so each call can gate its own field set.
418 ///
419 /// Example:
420 /// ```toml
421 /// [e2e.calls.crawl]
422 /// result_fields = ["pages", "final_url", "stayed_on_domain"]
423 /// ```
424 #[serde(default)]
425 pub result_fields: HashSet<String>,
426 /// Per-call override for `[e2e].fields` alias map.
427 ///
428 /// When non-empty, replaces (not merges with) the global `fields` map for
429 /// fixtures routed to this call.
430 #[serde(default)]
431 pub fields: HashMap<String, String>,
432 /// Per-call override for `[e2e].fields_optional`.
433 #[serde(default)]
434 pub fields_optional: HashSet<String>,
435 /// Per-call override for `[e2e].fields_array`.
436 #[serde(default)]
437 pub fields_array: HashSet<String>,
438 /// Per-call override for `[e2e].fields_method_calls`.
439 #[serde(default)]
440 pub fields_method_calls: HashSet<String>,
441 /// Per-call override for `[e2e].fields_enum`.
442 #[serde(default)]
443 pub fields_enum: HashSet<String>,
444 /// Per-call override for `[e2e].fields_c_types`.
445 #[serde(default)]
446 pub fields_c_types: HashMap<String, String>,
447 /// The function name (alef applies language naming conventions).
448 #[serde(default)]
449 pub function: String,
450 /// The module/package where the function lives.
451 #[serde(default)]
452 pub module: String,
453 /// Variable name for the return value (default: "result").
454 #[serde(default = "default_result_var")]
455 pub result_var: String,
456 /// Whether the function is async.
457 #[serde(default)]
458 pub r#async: bool,
459 /// HTTP endpoint path for mock server routing (e.g., `"/v1/chat/completions"`).
460 ///
461 /// Required when fixtures use `mock_response`. The Rust e2e generator uses
462 /// this to build the `MockRoute` that the mock server matches against.
463 #[serde(default)]
464 pub path: Option<String>,
465 /// HTTP method for mock server routing (default: `"POST"`).
466 ///
467 /// Used together with `path` when building `MockRoute` entries.
468 #[serde(default)]
469 pub method: Option<String>,
470 /// How fixture `input` fields map to function arguments.
471 #[serde(default)]
472 pub args: Vec<ArgMapping>,
473 /// Per-language overrides for module/function/etc.
474 #[serde(default)]
475 pub overrides: HashMap<String, CallOverride>,
476 /// Whether the function returns `Result<T, E>` in its native binding.
477 /// Defaults to `true`. When `false`, generators that distinguish Result-returning
478 /// from non-Result-returning calls (currently Rust) will skip the
479 /// `.expect("should succeed")` unwrap and bind the raw return value directly.
480 #[serde(default = "default_returns_result")]
481 pub returns_result: bool,
482 /// Whether the function returns only an error/unit — i.e., `Result<(), E>`.
483 ///
484 /// When combined with `returns_result = true`, Go generators emit `err := func()`
485 /// (single return value) rather than `_, err := func()` (two return values).
486 /// This is needed for functions like `validate_host` that return only `error` in Go.
487 #[serde(default)]
488 pub returns_void: bool,
489 /// skip_languages
490 #[serde(default)]
491 pub skip_languages: Vec<String>,
492 /// Per-backend exclusion: backends listed here will emit a skip comment instead of
493 /// a failing test, with the documented reason that the call is unsupported on that
494 /// backend (e.g., "brew: interact requires complex JSON serialization of PageAction enums").
495 ///
496 /// Use this when a backend structurally cannot support a call (e.g., CLI-based
497 /// backends that lack certain features). Unlike `skip_languages`, unsupported calls
498 /// are documented in the generated test files with rationale comments.
499 ///
500 /// Example:
501 /// ```toml
502 /// [e2e.calls.interact]
503 /// unsupported_in = { brew = "interact requires serializing Vec<PageAction> enums to JSON CLI args" }
504 /// ```
505 #[serde(default)]
506 pub unsupported_in: HashMap<String, String>,
507 /// When `true`, the function returns a primitive (e.g. `String`, `bool`,
508 /// `i32`) rather than a struct. Generators that would otherwise emit
509 /// `result.<field>` will fall back to the bare result variable.
510 ///
511 /// This is a property of the Rust core's return type and therefore identical
512 /// across every binding — set it on the call, not in per-language overrides.
513 /// The same flag is also accepted under `[e2e.calls.<name>.overrides.<lang>]`
514 /// for backwards compatibility, but the call-level value takes precedence.
515 #[serde(default)]
516 pub result_is_simple: bool,
517 /// When `true`, the function returns `Vec<T>` / `Array<T>`. Generators that
518 /// support per-element field assertions (rust, csharp) iterate or index into
519 /// the result; the typescript codegen indexes `[0]` to mirror csharp.
520 ///
521 /// As with `result_is_simple`, this is a Rust-side property — set it on the
522 /// call, not on per-language overrides. Per-language overrides remain
523 /// supported for backwards compatibility.
524 #[serde(default)]
525 pub result_is_vec: bool,
526 /// When `true` (combined with `result_is_simple`), the simple return is a
527 /// slice/array (e.g., `Vec<String>` → `string[]` in TS).
528 #[serde(default)]
529 pub result_is_array: bool,
530 /// When `true`, the function returns a raw byte array (`Vec<u8>` →
531 /// `Uint8Array` / `[]byte` / `byte[]`).
532 #[serde(default)]
533 pub result_is_bytes: bool,
534 /// Three-valued opt-in/out for streaming-virtual-field auto-detection.
535 ///
536 /// - `Some(true)`: force streaming semantics regardless of fixture shape.
537 /// - `Some(false)`: disable streaming auto-detection — assertions referencing
538 /// fields like `chunks` / `chunks.length` / `tool_calls` / `finish_reason`
539 /// are treated as plain field accessors on the result, not streaming
540 /// adapters. Use this when your API has a `chunks` field that is a regular
541 /// list (not an async stream).
542 /// - `None` (default): auto-detect — treat as streaming when either the
543 /// fixture provides a streaming `mock_response` or any assertion references
544 /// a hard-coded streaming-virtual-field name.
545 #[serde(default)]
546 pub streaming: Option<bool>,
547 /// When `true`, the function returns `Option<T>`.
548 #[serde(default)]
549 pub result_is_option: bool,
550 /// When `true` (combined with `result_is_simple` + `result_is_array`),
551 /// signals that the result is `Vec<String>` returned to the host as a
552 /// native string array (e.g., Swift `[String]`) rather than an opaque
553 /// `RustVec<RustString>` requiring `.asStr().toString()` per element.
554 ///
555 /// Generators that emit per-element coercion for opaque RustVec types
556 /// (currently Swift) drop the coercion and operate on the elements as
557 /// native strings when this flag is set.
558 #[serde(default)]
559 pub result_element_is_string: bool,
560 /// Automatic fixture-routing condition.
561 ///
562 /// When set, a fixture whose `call` field is `None` is routed to this named call config
563 /// if the condition is satisfied. This avoids the need to tag every fixture with
564 /// `"call": "batch_scrape"` when the fixture shape already identifies the call.
565 ///
566 /// Example (`alef.toml`):
567 /// ```toml
568 /// [e2e.calls.batch_scrape]
569 /// select_when = { input_has = "batch_urls" }
570 /// ```
571 #[serde(default)]
572 pub select_when: Option<SelectWhen>,
573 /// Call-level constructor type for `json_object` config args.
574 ///
575 /// This is the type of the function's config parameter (e.g. `EmbeddingConfig`
576 /// vs `ExtractionConfig`) and is therefore identical across every binding — set
577 /// it on the call, not in per-language overrides. Per-language overrides
578 /// (`[e2e.calls.<name>.overrides.<lang>].options_type`) still take precedence
579 /// when a binding exposes a language-specific wrapper type (e.g. `JsExtractionConfig`).
580 #[serde(default)]
581 pub options_type: Option<String>,
582}
583
584fn default_result_var() -> String {
585 "result".to_string()
586}
587
588fn default_returns_result() -> bool {
589 false
590}
591
592/// Condition for auto-selecting a named call config when the fixture matches.
593///
594/// When a fixture does not specify `"call"`, the codegen normally uses the default
595/// `[e2e.call]`. A `SelectWhen` condition on a named call allows automatic routing
596/// based on the fixture's id, category, tags, or input shape. All set fields must
597/// match (logical AND); a condition with no fields set never matches.
598///
599/// ```toml
600/// [e2e.calls.batch_scrape]
601/// select_when = { input_has = "batch_urls" }
602///
603/// [e2e.calls.crawl]
604/// select_when = { category = "crawl" }
605///
606/// [e2e.calls.batch_crawl_stream]
607/// select_when = { category = "stream", id_prefix = "batch_crawl_stream" }
608/// ```
609#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
610pub struct SelectWhen {
611 /// Match when the fixture's resolved category equals this string.
612 #[serde(default)]
613 pub category: Option<String>,
614 /// Match when the fixture's id starts with this prefix.
615 #[serde(default)]
616 pub id_prefix: Option<String>,
617 /// Match when the fixture's id matches this simple glob.
618 ///
619 /// Only `*` (matches any run of characters) is supported. Use `id_prefix`
620 /// for plain prefix matches.
621 #[serde(default)]
622 pub id_glob: Option<String>,
623 /// Match when the fixture's tags include this tag.
624 #[serde(default)]
625 pub tag: Option<String>,
626 /// Match when the fixture's input object contains this key with a non-null value.
627 #[serde(default)]
628 pub input_has: Option<String>,
629}
630
631impl SelectWhen {
632 /// Returns true when every set discriminator matches the fixture.
633 ///
634 /// A `SelectWhen` with all fields `None` returns `false` — at least one
635 /// discriminator must be set for the condition to fire.
636 pub fn matches(
637 &self,
638 fixture_id: &str,
639 fixture_category: &str,
640 fixture_tags: &[String],
641 fixture_input: &serde_json::Value,
642 ) -> bool {
643 let any_set = self.category.is_some()
644 || self.id_prefix.is_some()
645 || self.id_glob.is_some()
646 || self.tag.is_some()
647 || self.input_has.is_some();
648 if !any_set {
649 return false;
650 }
651 if let Some(cat) = &self.category
652 && cat.as_str() != fixture_category
653 {
654 return false;
655 }
656 if let Some(prefix) = &self.id_prefix
657 && !fixture_id.starts_with(prefix.as_str())
658 {
659 return false;
660 }
661 if let Some(glob) = &self.id_glob
662 && !glob_matches(glob, fixture_id)
663 {
664 return false;
665 }
666 if let Some(tag) = &self.tag
667 && !fixture_tags.iter().any(|t| t == tag)
668 {
669 return false;
670 }
671 if let Some(key) = &self.input_has {
672 let val = fixture_input.get(key.as_str()).unwrap_or(&serde_json::Value::Null);
673 if val.is_null() {
674 return false;
675 }
676 }
677 true
678 }
679}
680
681/// Minimal glob matcher supporting `*` (greedy any-run) only.
682fn glob_matches(pattern: &str, text: &str) -> bool {
683 if !pattern.contains('*') {
684 return pattern == text;
685 }
686 let parts: Vec<&str> = pattern.split('*').collect();
687 let mut cursor = 0usize;
688 for (idx, part) in parts.iter().enumerate() {
689 if part.is_empty() {
690 continue;
691 }
692 if idx == 0 {
693 if !text[cursor..].starts_with(part) {
694 return false;
695 }
696 cursor += part.len();
697 } else if idx + 1 == parts.len() && !pattern.ends_with('*') {
698 return text[cursor..].ends_with(part);
699 } else {
700 match text[cursor..].find(part) {
701 Some(pos) => cursor += pos + part.len(),
702 None => return false,
703 }
704 }
705 }
706 true
707}
708
709/// Maps a fixture input field to a function argument.
710#[derive(Debug, Clone, Serialize, Deserialize)]
711pub struct ArgMapping {
712 /// Argument name in the function signature.
713 pub name: String,
714 /// JSON field path in the fixture's `input` object.
715 pub field: String,
716 /// Type hint for code generation.
717 #[serde(rename = "type", default = "default_arg_type")]
718 pub arg_type: String,
719 /// Whether this argument is optional.
720 #[serde(default)]
721 pub optional: bool,
722 /// When `true`, the Rust codegen passes this argument by value (owned) rather than
723 /// by reference. Use for `Vec<T>` parameters that do not accept `&Vec<T>`.
724 #[serde(default)]
725 pub owned: bool,
726 /// For `json_object` args targeting `&[T]` Rust parameters, set to the element type
727 /// (e.g. `"f32"`, `"String"`) so the codegen emits `Vec<element_type>` annotation.
728 #[serde(default)]
729 pub element_type: Option<String>,
730 /// Override the Go slice element type for `json_object` array args.
731 ///
732 /// When set, the Go e2e codegen uses this as the element type instead of the default
733 /// derived from `element_type`. Use Go-idiomatic type names including the import alias
734 /// prefix where needed, e.g. `"kreuzberg.BatchBytesItem"` or `"string"`.
735 #[serde(default)]
736 pub go_type: Option<String>,
737}
738
739fn default_arg_type() -> String {
740 "string".to_string()
741}
742
743/// Per-language override for function call configuration.
744#[derive(Debug, Clone, Serialize, Deserialize, Default)]
745pub struct CallOverride {
746 /// Override the module/import path.
747 #[serde(default)]
748 pub module: Option<String>,
749 /// Override the function name.
750 #[serde(default)]
751 pub function: Option<String>,
752 /// Maps canonical argument names to language-specific argument names.
753 ///
754 /// Used when a language binding uses a different parameter name than the
755 /// canonical `args` list in `CallConfig`. For example, if the canonical
756 /// arg name is `doc` but the Python binding uses `html`, specify:
757 ///
758 /// ```toml
759 /// [e2e.call.overrides.python]
760 /// arg_name_map = { doc = "html" }
761 /// ```
762 ///
763 /// The key is the canonical name (from `args[].name`) and the value is the
764 /// name to use when emitting the keyword argument in generated tests.
765 #[serde(default)]
766 pub arg_name_map: HashMap<String, String>,
767 /// Override the crate name (Rust only).
768 #[serde(default)]
769 pub crate_name: Option<String>,
770 /// Override the class name (Java/C# only).
771 #[serde(default)]
772 pub class: Option<String>,
773 /// Import alias (Go only, e.g., `htmd`).
774 #[serde(default)]
775 pub alias: Option<String>,
776 /// C header file name (C only).
777 #[serde(default)]
778 pub header: Option<String>,
779 /// FFI symbol prefix (C only).
780 #[serde(default)]
781 pub prefix: Option<String>,
782 /// For json_object args: the constructor to use instead of raw dict/object.
783 /// E.g., "ConversionOptions" — generates `ConversionOptions(**options)` in Python,
784 /// `new ConversionOptions(options)` in TypeScript.
785 #[serde(default)]
786 pub options_type: Option<String>,
787 /// How to pass json_object args: "kwargs" (default), "dict", "json", or "from_json".
788 ///
789 /// - `"kwargs"`: construct `OptionsType(key=val, ...)` (requires `options_type`).
790 /// - `"dict"`: pass as a plain dict/object literal `{"key": "val"}`.
791 /// - `"json"`: pass via `json.loads('...')` / `JSON.parse('...')`.
792 /// - `"from_json"`: call `OptionsType.from_json('...')` (Python only, PyO3 native types).
793 #[serde(default)]
794 pub options_via: Option<String>,
795 /// Module to import `options_type` from when `options_via = "from_json"`.
796 ///
797 /// When set, a separate `from {from_json_module} import {options_type}` line
798 /// is emitted instead of including the type in the main module import.
799 /// E.g., `"liter_llm._internal_bindings"` for PyO3 native types.
800 #[serde(default)]
801 pub from_json_module: Option<String>,
802 /// Override whether the call is async for this language.
803 ///
804 /// When set, takes precedence over the call-level `async` flag.
805 /// Useful when a language binding uses a different async model — for example,
806 /// a Python binding that returns a sync iterator from a function marked
807 /// `async = true` at the call level.
808 #[serde(default, rename = "async")]
809 pub r#async: Option<bool>,
810 /// Maps fixture option field names to their enum type names.
811 /// E.g., `{"headingStyle": "HeadingStyle", "codeBlockStyle": "CodeBlockStyle"}`.
812 /// The generator imports these types and maps string values to enum constants.
813 #[serde(default)]
814 pub enum_fields: HashMap<String, String>,
815 /// Maps result-type field names to their enum type names for assertion routing.
816 /// Per-call so e.g. `BatchObject.status` (enum) and `ResponseObject.status` (string)
817 /// can be disambiguated.
818 #[serde(default)]
819 pub assert_enum_fields: HashMap<String, String>,
820 /// Module to import enum types from (if different from the main module).
821 /// E.g., "html_to_markdown._html_to_markdown" for PyO3 native enums.
822 #[serde(default)]
823 pub enum_module: Option<String>,
824 /// Maps nested fixture object field names to their C# type names.
825 /// Used to generate `JsonSerializer.Deserialize<NestedType>(...)` for nested objects.
826 /// E.g., `{"preprocessing": "PreprocessingOptions"}`.
827 #[serde(default)]
828 pub nested_types: HashMap<String, String>,
829 /// When `false`, nested config builder results are passed directly to builder methods
830 /// without wrapping in `Optional.of(...)`. Set to `false` for bindings where nested
831 /// option types are non-optional (e.g., html-to-markdown Java).
832 /// Defaults to `true` for backward compatibility.
833 #[serde(default = "default_true")]
834 pub nested_types_optional: bool,
835 /// When `true`, the function returns a simple type (e.g., `String`) rather
836 /// than a struct. Generators that would normally emit `result.content`
837 /// (or equivalent field access) will use the result variable directly.
838 #[serde(default)]
839 pub result_is_simple: bool,
840 /// When `true` (and combined with `result_is_simple`), the simple result is
841 /// a slice/array type (e.g., `[]string` in Go, `Vec<String>` in Rust).
842 /// The Go generator uses `strings.Join(value, " ")` for `contains` assertions
843 /// instead of `string(value)`.
844 #[serde(default)]
845 pub result_is_array: bool,
846 /// When `true`, the function returns `Vec<T>` rather than a single value.
847 /// Field-path assertions are emitted as `.iter().all(|r| <accessor>)` so
848 /// every element is checked. (Rust generator.)
849 #[serde(default)]
850 pub result_is_vec: bool,
851 /// When `true`, the function returns a raw byte array (e.g., `byte[]` in Java,
852 /// `[]byte` in Go). Used by generators to select the correct length accessor
853 /// (field `.length` vs method `.length()`).
854 #[serde(default)]
855 pub result_is_bytes: bool,
856 /// When `true`, the function returns `Option<T>`. The result is unwrapped
857 /// before any non-`is_none`/`is_some` assertion runs; `is_empty`/`not_empty`
858 /// assertions map to `is_none()`/`is_some()`. (Rust generator.)
859 #[serde(default)]
860 pub result_is_option: bool,
861 /// When `true`, the R generator emits the call result directly without wrapping
862 /// in `jsonlite::fromJSON()`. Use when the R binding already returns a native
863 /// R list (`Robj`) rather than a JSON string. Field-path assertions still use
864 /// `result$field` accessor syntax (i.e. `result_is_simple` behaviour is NOT
865 /// implied — only the JSON parse wrapper is suppressed). (R generator only.)
866 #[serde(default)]
867 pub result_is_r_list: bool,
868 /// When `true`, the Zig generator treats the result as a `[]u8` JSON string
869 /// representing a struct value (e.g., `ExtractionResult` serialized via the
870 /// FFI `_to_json` helper). The generator parses the JSON with
871 /// `std.json.parseFromSlice(std.json.Value, ...)` before emitting field
872 /// assertions, traversing the dynamic JSON object for each field path.
873 /// (Zig generator only.)
874 #[serde(default)]
875 pub result_is_json_struct: bool,
876 /// When `true`, the Rust generator wraps the `json_object` argument expression
877 /// in `Some(...).clone()` to match an owned `Option<T>` parameter slot rather
878 /// than passing `&options`. (Rust generator only.)
879 #[serde(default)]
880 pub wrap_options_in_some: bool,
881 /// Trailing positional arguments appended verbatim after the configured
882 /// `args`. Used when the target function takes additional positional slots
883 /// (e.g. visitor) the fixture cannot supply directly. (Rust generator only.)
884 #[serde(default)]
885 pub extra_args: Vec<String>,
886 /// Per-rust override of the call-level `returns_result`. When set, takes
887 /// precedence over `CallConfig.returns_result` for the Rust generator only.
888 /// Useful when one binding is fallible while others are not.
889 #[serde(default)]
890 pub returns_result: Option<bool>,
891 /// Maps handle config field names to their Python type constructor names.
892 ///
893 /// When the handle config object contains a nested dict-valued field, the
894 /// generator will wrap it in the specified type using keyword arguments.
895 /// E.g., `{"browser": "BrowserConfig"}` generates `BrowserConfig(mode="auto")`
896 /// instead of `{"mode": "auto"}`.
897 #[serde(default)]
898 pub handle_nested_types: HashMap<String, String>,
899 /// Handle config fields whose type constructor takes a single dict argument
900 /// instead of keyword arguments.
901 ///
902 /// E.g., `["auth"]` means `AuthConfig({"type": "basic", ...})` instead of
903 /// `AuthConfig(type="basic", ...)`.
904 #[serde(default)]
905 pub handle_dict_types: HashSet<String>,
906 /// Elixir struct module name for the handle config argument.
907 ///
908 /// When set, the generated Elixir handle config uses struct literal syntax
909 /// (`%Module.StructType{key: val}`) instead of a plain string-keyed map.
910 /// Rustler `NifStruct` requires a proper Elixir struct — plain maps are rejected.
911 ///
912 /// E.g., `"CrawlConfig"` generates `%Kreuzcrawl.CrawlConfig{download_assets: true}`.
913 #[serde(default)]
914 pub handle_struct_type: Option<String>,
915 /// Handle config fields whose list values are Elixir atoms (Rustler NifUnitEnum).
916 ///
917 /// When a config field is a `Vec<EnumType>` in Rust, the Elixir side must pass
918 /// a list of atoms (e.g., `[:image, :document]`) not strings (`["image"]`).
919 /// List the field names here so the generator emits atom literals instead of strings.
920 ///
921 /// E.g., `["asset_types"]` generates `asset_types: [:image]` instead of `["image"]`.
922 #[serde(default)]
923 pub handle_atom_list_fields: HashSet<String>,
924 /// WASM config class name for handle args (WASM generator only).
925 ///
926 /// When set, handle args are constructed using `ConfigType.default()` + setters
927 /// instead of passing a plain JS object (which fails `_assertClass` validation).
928 ///
929 /// E.g., `"WasmCrawlConfig"` generates:
930 /// ```js
931 /// const engineConfig = WasmCrawlConfig.default();
932 /// engineConfig.maxDepth = 1;
933 /// const engine = createEngine(engineConfig);
934 /// ```
935 #[serde(default)]
936 pub handle_config_type: Option<String>,
937 /// PHP client factory method name (PHP generator only).
938 ///
939 /// When set, the generated PHP test instantiates a client via
940 /// `ClassName::factory_method('test-key')` and calls methods on the instance
941 /// instead of using static facade calls.
942 ///
943 /// E.g., `"createClient"` generates:
944 /// ```php
945 /// $client = LiterLlm::createClient('test-key');
946 /// $result = $client->chat($request);
947 /// ```
948 #[serde(default)]
949 pub php_client_factory: Option<String>,
950 /// Client factory function name for instance-method languages (WASM, etc.).
951 ///
952 /// When set, the generated test imports this function, creates a client,
953 /// and calls API methods on the instance instead of as top-level functions.
954 ///
955 /// E.g., `"createClient"` generates:
956 /// ```typescript
957 /// import { createClient } from 'pkg';
958 /// const client = createClient('test-key');
959 /// const result = await client.chat(request);
960 /// ```
961 #[serde(default)]
962 pub client_factory: Option<String>,
963 /// Verbatim trailing arguments appended after the fixed `("test-key", ...)` pair
964 /// when calling the `client_factory` function.
965 ///
966 /// Use this when the factory function takes additional positional parameters
967 /// beyond the API key and optional base URL that the generator would otherwise
968 /// emit. Each element is emitted verbatim, separated by `, `.
969 ///
970 /// Example — Gleam `create_client` takes five positional arguments:
971 /// `(api_key, base_url, timeout_secs, max_retries, model_hint)`. Set:
972 /// ```toml
973 /// [e2e.call.overrides.gleam]
974 /// client_factory = "create_client"
975 /// client_factory_trailing_args = ["option.None", "option.None", "option.None"]
976 /// ```
977 /// to produce `create_client("test-key", option.Some(url), option.None, option.None, option.None)`.
978 #[serde(default)]
979 pub client_factory_trailing_args: Vec<String>,
980 /// Fields on the options object that require `BigInt()` wrapping (WASM only).
981 ///
982 /// `wasm_bindgen` maps Rust `u64`/`i64` to JavaScript `BigInt`. Numeric
983 /// values assigned to these setters must be wrapped with `BigInt(n)`.
984 ///
985 /// List camelCase field names, e.g.:
986 /// ```toml
987 /// [e2e.call.overrides.wasm]
988 /// bigint_fields = ["maxTokens", "seed"]
989 /// ```
990 #[serde(default)]
991 pub bigint_fields: Vec<String>,
992 /// Static CLI arguments appended to every invocation (brew/CLI generator only).
993 ///
994 /// E.g., `["--format", "json"]` appends `--format json` to every CLI call.
995 #[serde(default)]
996 pub cli_args: Vec<String>,
997 /// Maps fixture config field names to CLI flag names (brew/CLI generator only).
998 ///
999 /// E.g., `{"output_format": "--format"}` generates `--format <value>` from
1000 /// the fixture's `output_format` input field.
1001 #[serde(default)]
1002 pub cli_flags: HashMap<String, String>,
1003 /// C FFI opaque result type name (C only).
1004 ///
1005 /// The PascalCase name of the result struct, without the prefix.
1006 /// E.g., `"ChatCompletionResponse"` for `LiterllmChatCompletionResponse*`.
1007 /// If not set, defaults to the function name in PascalCase.
1008 #[serde(default)]
1009 pub result_type: Option<String>,
1010 /// Override the argument order for this language binding.
1011 ///
1012 /// Lists argument names from `args` in the order they should be passed
1013 /// to the target function. Useful when a language binding reorders parameters
1014 /// relative to the canonical `args` list in `CallConfig`.
1015 ///
1016 /// E.g., if `args = [path, mime_type, config]` but the Node.js binding
1017 /// takes `(path, config, mime_type?)`, specify:
1018 /// ```toml
1019 /// [e2e.call.overrides.node]
1020 /// arg_order = ["path", "config", "mime_type"]
1021 /// ```
1022 #[serde(default)]
1023 pub arg_order: Vec<String>,
1024 /// When `true`, `json_object` args with an `options_type` are passed as a
1025 /// pointer (`*OptionsType`) rather than a value. Use for Go bindings where
1026 /// the options parameter is `*ConversionOptions` (nil-able pointer) rather
1027 /// than a plain struct.
1028 ///
1029 /// Absent options are passed as `nil`; present options are unmarshalled into
1030 /// a local variable and passed as `&optionsVar`.
1031 #[serde(default)]
1032 pub options_ptr: bool,
1033 /// Alternative function name to use when the fixture includes a `visitor`.
1034 ///
1035 /// Some bindings expose two entry points: `Convert(html, opts)` for the
1036 /// plain case and `ConvertWithVisitor(html, opts, visitor)` when a visitor
1037 /// is involved. Set this to the visitor-accepting function name so the
1038 /// generator can pick the right symbol automatically.
1039 ///
1040 /// E.g., `"ConvertWithVisitor"` makes the Go generator emit:
1041 /// ```go
1042 /// result, err := htmd.ConvertWithVisitor(html, nil, visitor)
1043 /// ```
1044 /// instead of `htmd.Convert(html, nil, visitor)` (which would not compile).
1045 #[serde(default)]
1046 pub visitor_function: Option<String>,
1047 /// Rust trait names to import when `client_factory` is set (Rust generator only).
1048 ///
1049 /// When `client_factory` is set, the generated test creates a client object and
1050 /// calls methods on it. Those methods are defined on traits (e.g. `LlmClient`,
1051 /// `FileClient`) that must be in scope. List the trait names here and the Rust
1052 /// generator will emit `use {module}::{trait_name};` for each.
1053 ///
1054 /// E.g.:
1055 /// ```toml
1056 /// [e2e.call.overrides.rust]
1057 /// client_factory = "create_client"
1058 /// trait_imports = ["LlmClient", "FileClient", "BatchClient", "ResponseClient"]
1059 /// ```
1060 #[serde(default)]
1061 pub trait_imports: Vec<String>,
1062 /// Raw C return type, used verbatim instead of `{PREFIX}Type*` (C only).
1063 ///
1064 /// Valid values: `"char*"`, `"int32_t"`, `"uintptr_t"`.
1065 /// When set, the C generator skips options handle construction and uses the
1066 /// raw type directly. Free logic is adjusted accordingly.
1067 #[serde(default)]
1068 pub raw_c_result_type: Option<String>,
1069 /// Free function for raw `char*` C results (C only).
1070 ///
1071 /// Defaults to `{prefix}_free_string` when unset and `raw_c_result_type == "char*"`.
1072 #[serde(default)]
1073 pub c_free_fn: Option<String>,
1074 /// C FFI engine factory pattern (C only).
1075 ///
1076 /// When set, the C generator wraps each test call in a
1077 /// `{prefix}_create_engine(config)` / `{prefix}_crawl_engine_handle_free(engine)`
1078 /// prologue/epilogue using the named config type as the "arg 0" handle type.
1079 ///
1080 /// The value is the PascalCase config type name (without prefix), e.g.
1081 /// `"CrawlConfig"`. The generator will emit:
1082 /// ```c
1083 /// KCRAWLCrawlConfig* config_handle = kcrawl_crawl_config_from_json("{json}");
1084 /// KCRAWLCrawlEngineHandle* engine = kcrawl_create_engine(config_handle);
1085 /// kcrawl_crawl_config_free(config_handle);
1086 /// KCRAWLScrapeResult* result = kcrawl_scrape(engine, url);
1087 /// // ... assertions ...
1088 /// kcrawl_scrape_result_free(result);
1089 /// kcrawl_crawl_engine_handle_free(engine);
1090 /// ```
1091 #[serde(default)]
1092 pub c_engine_factory: Option<String>,
1093 /// Fields in a `json_object` arg that must be wrapped in `java.nio.file.Path.of()`
1094 /// (Java generator only).
1095 ///
1096 /// E.g., `["cache_dir"]` wraps the string value of `cache_dir` so the builder
1097 /// receives `java.nio.file.Path.of("/tmp/dir")` instead of a plain string.
1098 #[serde(default)]
1099 pub path_fields: Vec<String>,
1100 /// Trait name for the visitor pattern (Rust e2e tests only).
1101 ///
1102 /// When a fixture declares a `visitor` block, the Rust e2e generator emits
1103 /// `impl <trait_name> for _TestVisitor { ... }` and imports the trait from
1104 /// `{module}::visitor`. When unset, no visitor block is emitted and fixtures
1105 /// that declare a visitor will cause a codegen error.
1106 ///
1107 /// E.g., `"HtmlVisitor"` generates:
1108 /// ```rust,ignore
1109 /// use html_to_markdown_rs::visitor::{HtmlVisitor, NodeContext, VisitResult};
1110 /// // ...
1111 /// impl HtmlVisitor for _TestVisitor { ... }
1112 /// ```
1113 #[serde(default)]
1114 pub visitor_trait: Option<String>,
1115 /// Maps result field paths to their wasm-bindgen enum class names.
1116 ///
1117 /// wasm-bindgen exposes Rust enums as numeric discriminants in JavaScript
1118 /// (`WasmFinishReason.Stop === 0`), not string variants. When an `equals`
1119 /// assertion targets a field listed here, the WASM generator emits
1120 /// `expect(result.choices[0].finishReason).toBe(WasmFinishReason.Stop)`
1121 /// instead of attempting `(value ?? "").trim()`.
1122 ///
1123 /// The fixture's expected string value is converted to PascalCase to look
1124 /// up the variant (e.g. `"tool_calls"` -> `ToolCalls`).
1125 ///
1126 /// Example:
1127 /// ```toml
1128 /// [e2e.calls.chat.overrides.wasm]
1129 /// result_enum_fields = { "choices[0].finish_reason" = "WasmFinishReason", "status" = "WasmBatchStatus" }
1130 /// ```
1131 #[serde(default)]
1132 pub result_enum_fields: HashMap<String, String>,
1133 /// When `true`, indicates that the result is a pointer type (e.g., `*string` in Go,
1134 /// `*T` in Rust). The Go codegen will dereference it. When `false` (Go only), the
1135 /// result is a value type and should not be dereferenced.
1136 ///
1137 /// Used to distinguish between functions that return `(value, error)` where value
1138 /// is a scalar (string, uint, bool) as-is vs. those that return pointers.
1139 /// Defaults to `true` for backward compatibility with existing fixtures.
1140 #[serde(default = "default_true")]
1141 pub result_is_pointer: bool,
1142 /// Per-language override mirroring `CallConfig.result_element_is_string`.
1143 ///
1144 /// Set this on a per-language override when only one host's binding exposes
1145 /// the result as a native string array; otherwise prefer the call-level flag.
1146 #[serde(default)]
1147 pub result_element_is_string: bool,
1148 /// Maps array-typed result fields to the method name on each element that
1149 /// yields a string used in `contains` / `contains_all` assertions.
1150 ///
1151 /// Used when the array element is an opaque struct (e.g., a swift-bridge
1152 /// `type X;` declaration) and the element's "name" accessor is not the
1153 /// default `as_str` — for instance, `StructureItem` exposes `kind() -> String`
1154 /// instead of `as_str()`. The codegen consults this map when emitting
1155 /// `.map { $0.<accessor>().toString() }` so the closure compiles.
1156 ///
1157 /// Example:
1158 /// ```toml
1159 /// [e2e.call.overrides.swift]
1160 /// result_field_accessor = { "structure" = "kind" }
1161 /// ```
1162 #[serde(default)]
1163 pub result_field_accessor: HashMap<String, String>,
1164 /// Argument indices (0-based) that should be passed without labels in Swift
1165 /// (i.e., using `(_` parameter syntax instead of `name:`).
1166 ///
1167 /// Swift allows unnamed first parameters: `func f(_ x: Int)` vs `func f(x: Int)`.
1168 /// When the generated test call should match this signature, list the indices here.
1169 ///
1170 /// E.g., `[0]` for a single unnamed first parameter:
1171 /// ```toml
1172 /// [e2e.call.overrides.swift]
1173 /// unnamed_arg_indices = [0]
1174 /// ```
1175 /// generates `f(contentVec)` instead of `f(content: contentVec)`.
1176 #[serde(default)]
1177 pub unnamed_arg_indices: Vec<usize>,
1178}
1179
1180fn default_true() -> bool {
1181 true
1182}
1183
1184/// Per-language package reference configuration.
1185#[derive(Debug, Clone, Serialize, Deserialize, Default)]
1186pub struct PackageRef {
1187 /// Package/crate/gem/module name.
1188 #[serde(default)]
1189 pub name: Option<String>,
1190 /// Relative path from e2e/{lang}/ to the package.
1191 #[serde(default)]
1192 pub path: Option<String>,
1193 /// Go module path.
1194 #[serde(default)]
1195 pub module: Option<String>,
1196 /// Package version (e.g., for go.mod require directives).
1197 #[serde(default)]
1198 pub version: Option<String>,
1199}
1200
1201#[cfg(test)]
1202mod tests {
1203 use super::*;
1204
1205 fn empty_e2e_with_test_documents(dir: &str) -> E2eConfig {
1206 E2eConfig {
1207 test_documents_dir: dir.to_string(),
1208 ..Default::default()
1209 }
1210 }
1211
1212 #[test]
1213 fn test_documents_dir_default_is_test_documents() {
1214 let cfg: E2eConfig = toml::from_str("[call]\nfunction = \"f\"\n").expect("minimal TOML must deserialize");
1215 assert_eq!(cfg.test_documents_dir, "test_documents");
1216 }
1217
1218 #[test]
1219 fn test_documents_dir_explicit_override_wins() {
1220 let cfg: E2eConfig = toml::from_str("test_documents_dir = \"fixture_files\"\n[call]\nfunction = \"f\"\n")
1221 .expect("explicit override must deserialize");
1222 assert_eq!(cfg.test_documents_dir, "fixture_files");
1223 }
1224
1225 #[test]
1226 fn test_documents_relative_from_at_lang_root_returns_two_dots_up() {
1227 let cfg = empty_e2e_with_test_documents("test_documents");
1228 assert_eq!(cfg.test_documents_relative_from(0), "../../test_documents");
1229 }
1230
1231 #[test]
1232 fn test_documents_relative_from_at_spec_depth_returns_three_dots_up() {
1233 let cfg = empty_e2e_with_test_documents("test_documents");
1234 assert_eq!(cfg.test_documents_relative_from(1), "../../../test_documents");
1235 }
1236
1237 #[test]
1238 fn test_documents_relative_from_at_two_subdirs_deep_returns_four_dots_up() {
1239 let cfg = empty_e2e_with_test_documents("test_documents");
1240 assert_eq!(cfg.test_documents_relative_from(2), "../../../../test_documents");
1241 }
1242
1243 #[test]
1244 fn test_documents_relative_uses_configured_dir_name() {
1245 let cfg = empty_e2e_with_test_documents("fixture_files");
1246 assert_eq!(cfg.test_documents_relative_from(0), "../../fixture_files");
1247 assert_eq!(cfg.test_documents_relative_from(1), "../../../fixture_files");
1248 }
1249
1250 #[test]
1251 fn select_when_with_no_discriminators_never_matches() {
1252 let sel = SelectWhen::default();
1253 assert!(!sel.matches("any_id", "any_category", &[], &serde_json::Value::Null));
1254 }
1255
1256 #[test]
1257 fn select_when_input_has_matches_non_null_key() {
1258 let sel = SelectWhen {
1259 input_has: Some("batch_urls".to_string()),
1260 ..Default::default()
1261 };
1262 let input = serde_json::json!({ "batch_urls": [] });
1263 assert!(sel.matches("fid", "cat", &[], &input));
1264 let empty_input = serde_json::json!({ "url": "x" });
1265 assert!(!sel.matches("fid", "cat", &[], &empty_input));
1266 }
1267
1268 #[test]
1269 fn select_when_category_matches_exactly() {
1270 let sel = SelectWhen {
1271 category: Some("crawl".to_string()),
1272 ..Default::default()
1273 };
1274 assert!(sel.matches("any_id", "crawl", &[], &serde_json::Value::Null));
1275 assert!(!sel.matches("any_id", "scrape", &[], &serde_json::Value::Null));
1276 }
1277
1278 #[test]
1279 fn select_when_id_prefix_matches() {
1280 let sel = SelectWhen {
1281 id_prefix: Some("batch_crawl_".to_string()),
1282 ..Default::default()
1283 };
1284 assert!(sel.matches("batch_crawl_events", "any", &[], &serde_json::Value::Null));
1285 assert!(!sel.matches("batch_scrape_basic", "any", &[], &serde_json::Value::Null));
1286 }
1287
1288 #[test]
1289 fn select_when_id_glob_handles_star() {
1290 let sel = SelectWhen {
1291 id_glob: Some("crawl_stream*".to_string()),
1292 ..Default::default()
1293 };
1294 assert!(sel.matches("crawl_stream_basic", "any", &[], &serde_json::Value::Null));
1295 assert!(!sel.matches("batch_crawl_stream", "any", &[], &serde_json::Value::Null));
1296 }
1297
1298 #[test]
1299 fn select_when_tag_matches_any_tag_in_list() {
1300 let sel = SelectWhen {
1301 tag: Some("streaming".to_string()),
1302 ..Default::default()
1303 };
1304 let tags = vec!["smoke".to_string(), "streaming".to_string()];
1305 assert!(sel.matches("fid", "cat", &tags, &serde_json::Value::Null));
1306 assert!(!sel.matches("fid", "cat", &["smoke".to_string()], &serde_json::Value::Null));
1307 }
1308
1309 #[test]
1310 fn select_when_multiple_discriminators_anded() {
1311 let sel = SelectWhen {
1312 category: Some("stream".to_string()),
1313 id_prefix: Some("batch_crawl_stream".to_string()),
1314 ..Default::default()
1315 };
1316 assert!(sel.matches("batch_crawl_stream_events", "stream", &[], &serde_json::Value::Null));
1317 // Wrong category fails even though prefix matches
1318 assert!(!sel.matches("batch_crawl_stream_events", "crawl", &[], &serde_json::Value::Null));
1319 // Wrong prefix fails even though category matches
1320 assert!(!sel.matches("crawl_stream_basic", "stream", &[], &serde_json::Value::Null));
1321 }
1322
1323 #[test]
1324 fn select_when_deserializes_legacy_input_has_only() {
1325 let toml_src = r#"
1326 [call]
1327 function = "scrape"
1328
1329 [calls.batch_scrape]
1330 function = "batch_scrape"
1331 select_when = { input_has = "batch_urls" }
1332 "#;
1333 let cfg: E2eConfig = toml::from_str(toml_src).expect("legacy input_has must deserialize");
1334 let sel = cfg.calls["batch_scrape"].select_when.as_ref().unwrap();
1335 assert_eq!(sel.input_has.as_deref(), Some("batch_urls"));
1336 assert!(sel.category.is_none());
1337 assert!(sel.id_prefix.is_none());
1338 }
1339
1340 #[test]
1341 fn select_when_deserializes_compound_discriminators() {
1342 let toml_src = r#"
1343 [call]
1344 function = "scrape"
1345
1346 [calls.batch_crawl_stream]
1347 function = "batch_crawl_stream"
1348 select_when = { category = "stream", id_prefix = "batch_crawl_stream" }
1349 "#;
1350 let cfg: E2eConfig = toml::from_str(toml_src).expect("compound select_when must deserialize");
1351 let sel = cfg.calls["batch_crawl_stream"].select_when.as_ref().unwrap();
1352 assert_eq!(sel.category.as_deref(), Some("stream"));
1353 assert_eq!(sel.id_prefix.as_deref(), Some("batch_crawl_stream"));
1354 }
1355
1356 #[test]
1357 fn resolve_call_for_fixture_routes_by_category_then_falls_back() {
1358 let mut calls = HashMap::new();
1359 calls.insert(
1360 "crawl".to_string(),
1361 CallConfig {
1362 function: "crawl".to_string(),
1363 select_when: Some(SelectWhen {
1364 category: Some("crawl".to_string()),
1365 ..Default::default()
1366 }),
1367 ..Default::default()
1368 },
1369 );
1370 let cfg = E2eConfig {
1371 call: CallConfig {
1372 function: "scrape".to_string(),
1373 ..Default::default()
1374 },
1375 calls,
1376 ..Default::default()
1377 };
1378 let input = serde_json::json!({ "url": "https://example.com" });
1379 let resolved = cfg.resolve_call_for_fixture(None, "crawl_basic", "crawl", &[], &input);
1380 assert_eq!(resolved.function, "crawl");
1381 let resolved = cfg.resolve_call_for_fixture(None, "scrape_basic", "scrape", &[], &input);
1382 assert_eq!(resolved.function, "scrape");
1383 }
1384
1385 // --- effective_* resolver helpers ---
1386
1387 #[test]
1388 fn effective_result_fields_returns_global_when_call_is_empty() {
1389 let mut global = HashSet::new();
1390 global.insert("url".to_string());
1391 let cfg = E2eConfig {
1392 result_fields: global.clone(),
1393 ..Default::default()
1394 };
1395 let call = CallConfig::default();
1396 assert_eq!(cfg.effective_result_fields(&call), &global);
1397 }
1398
1399 #[test]
1400 fn effective_result_fields_call_override_wins_over_global() {
1401 let mut global = HashSet::new();
1402 global.insert("url".to_string());
1403 let mut per_call = HashSet::new();
1404 per_call.insert("pages".to_string());
1405 per_call.insert("final_url".to_string());
1406 let cfg = E2eConfig {
1407 result_fields: global,
1408 ..Default::default()
1409 };
1410 let call = CallConfig {
1411 result_fields: per_call.clone(),
1412 ..Default::default()
1413 };
1414 assert_eq!(cfg.effective_result_fields(&call), &per_call);
1415 }
1416
1417 #[test]
1418 fn effective_fields_returns_global_when_call_is_empty() {
1419 let mut global = HashMap::new();
1420 global.insert("metadata.title".to_string(), "metadata.document.title".to_string());
1421 let cfg = E2eConfig {
1422 fields: global.clone(),
1423 ..Default::default()
1424 };
1425 let call = CallConfig::default();
1426 assert_eq!(cfg.effective_fields(&call), &global);
1427 }
1428
1429 #[test]
1430 fn effective_fields_call_override_wins_over_global() {
1431 let mut global = HashMap::new();
1432 global.insert("a".to_string(), "b".to_string());
1433 let mut per_call = HashMap::new();
1434 per_call.insert("x".to_string(), "y".to_string());
1435 let cfg = E2eConfig {
1436 fields: global,
1437 ..Default::default()
1438 };
1439 let call = CallConfig {
1440 fields: per_call.clone(),
1441 ..Default::default()
1442 };
1443 assert_eq!(cfg.effective_fields(&call), &per_call);
1444 }
1445
1446 #[test]
1447 fn effective_fields_optional_returns_global_when_call_is_empty() {
1448 let mut global = HashSet::new();
1449 global.insert("segments".to_string());
1450 let cfg = E2eConfig {
1451 fields_optional: global.clone(),
1452 ..Default::default()
1453 };
1454 let call = CallConfig::default();
1455 assert_eq!(cfg.effective_fields_optional(&call), &global);
1456 }
1457
1458 #[test]
1459 fn effective_fields_optional_call_override_wins_over_global() {
1460 let mut global = HashSet::new();
1461 global.insert("segments".to_string());
1462 let mut per_call = HashSet::new();
1463 per_call.insert("pages".to_string());
1464 let cfg = E2eConfig {
1465 fields_optional: global,
1466 ..Default::default()
1467 };
1468 let call = CallConfig {
1469 fields_optional: per_call.clone(),
1470 ..Default::default()
1471 };
1472 assert_eq!(cfg.effective_fields_optional(&call), &per_call);
1473 }
1474
1475 #[test]
1476 fn effective_fields_array_returns_global_when_call_is_empty() {
1477 let mut global = HashSet::new();
1478 global.insert("choices".to_string());
1479 let cfg = E2eConfig {
1480 fields_array: global.clone(),
1481 ..Default::default()
1482 };
1483 let call = CallConfig::default();
1484 assert_eq!(cfg.effective_fields_array(&call), &global);
1485 }
1486
1487 #[test]
1488 fn effective_fields_array_call_override_wins_over_global() {
1489 let mut global = HashSet::new();
1490 global.insert("choices".to_string());
1491 let mut per_call = HashSet::new();
1492 per_call.insert("pages".to_string());
1493 let cfg = E2eConfig {
1494 fields_array: global,
1495 ..Default::default()
1496 };
1497 let call = CallConfig {
1498 fields_array: per_call.clone(),
1499 ..Default::default()
1500 };
1501 assert_eq!(cfg.effective_fields_array(&call), &per_call);
1502 }
1503
1504 #[test]
1505 fn effective_fields_method_calls_returns_global_when_call_is_empty() {
1506 let mut global = HashSet::new();
1507 global.insert("metadata.format".to_string());
1508 let cfg = E2eConfig {
1509 fields_method_calls: global.clone(),
1510 ..Default::default()
1511 };
1512 let call = CallConfig::default();
1513 assert_eq!(cfg.effective_fields_method_calls(&call), &global);
1514 }
1515
1516 #[test]
1517 fn effective_fields_method_calls_call_override_wins_over_global() {
1518 let mut global = HashSet::new();
1519 global.insert("metadata.format".to_string());
1520 let mut per_call = HashSet::new();
1521 per_call.insert("pages.status".to_string());
1522 let cfg = E2eConfig {
1523 fields_method_calls: global,
1524 ..Default::default()
1525 };
1526 let call = CallConfig {
1527 fields_method_calls: per_call.clone(),
1528 ..Default::default()
1529 };
1530 assert_eq!(cfg.effective_fields_method_calls(&call), &per_call);
1531 }
1532
1533 #[test]
1534 fn effective_fields_enum_returns_global_when_call_is_empty() {
1535 let mut global = HashSet::new();
1536 global.insert("choices.finish_reason".to_string());
1537 let cfg = E2eConfig {
1538 fields_enum: global.clone(),
1539 ..Default::default()
1540 };
1541 let call = CallConfig::default();
1542 assert_eq!(cfg.effective_fields_enum(&call), &global);
1543 }
1544
1545 #[test]
1546 fn effective_fields_enum_call_override_wins_over_global() {
1547 let mut global = HashSet::new();
1548 global.insert("choices.finish_reason".to_string());
1549 let mut per_call = HashSet::new();
1550 per_call.insert("assets.category".to_string());
1551 let cfg = E2eConfig {
1552 fields_enum: global,
1553 ..Default::default()
1554 };
1555 let call = CallConfig {
1556 fields_enum: per_call.clone(),
1557 ..Default::default()
1558 };
1559 assert_eq!(cfg.effective_fields_enum(&call), &per_call);
1560 }
1561
1562 #[test]
1563 fn effective_fields_c_types_returns_global_when_call_is_empty() {
1564 let mut global = HashMap::new();
1565 global.insert("conversion_result.metadata".to_string(), "HtmlMetadata".to_string());
1566 let cfg = E2eConfig {
1567 fields_c_types: global.clone(),
1568 ..Default::default()
1569 };
1570 let call = CallConfig::default();
1571 assert_eq!(cfg.effective_fields_c_types(&call), &global);
1572 }
1573
1574 #[test]
1575 fn effective_fields_c_types_call_override_wins_over_global() {
1576 let mut global = HashMap::new();
1577 global.insert("conversion_result.metadata".to_string(), "HtmlMetadata".to_string());
1578 let mut per_call = HashMap::new();
1579 per_call.insert("crawl_result.pages".to_string(), "PageResult".to_string());
1580 let cfg = E2eConfig {
1581 fields_c_types: global,
1582 ..Default::default()
1583 };
1584 let call = CallConfig {
1585 fields_c_types: per_call.clone(),
1586 ..Default::default()
1587 };
1588 assert_eq!(cfg.effective_fields_c_types(&call), &per_call);
1589 }
1590
1591 #[test]
1592 fn effective_resolver_helpers_deserialize_from_toml() {
1593 let toml = r#"
1594[call]
1595function = "scrape"
1596result_fields = ["url", "markdown"]
1597fields_enum = ["status"]
1598
1599[call.fields]
1600"meta.title" = "meta.document.title"
1601
1602[call.fields_c_types]
1603"scrape_result.meta" = "MetaResult"
1604"#;
1605 let cfg: E2eConfig = toml::from_str(toml).expect("must deserialize");
1606 let call = &cfg.call;
1607 assert!(cfg.effective_result_fields(call).contains("url"));
1608 assert!(cfg.effective_result_fields(call).contains("markdown"));
1609 assert!(cfg.effective_fields_enum(call).contains("status"));
1610 assert_eq!(
1611 cfg.effective_fields(call).get("meta.title").map(String::as_str),
1612 Some("meta.document.title")
1613 );
1614 assert_eq!(
1615 cfg.effective_fields_c_types(call)
1616 .get("scrape_result.meta")
1617 .map(String::as_str),
1618 Some("MetaResult")
1619 );
1620 }
1621}