Skip to main content

alef_e2e/codegen/
python.rs

1//! Python e2e test code generator.
2//!
3//! Generates `e2e/python/conftest.py` and `tests/test_{category}.py` files from
4//! JSON fixtures, driven entirely by `E2eConfig` and `CallConfig`.
5
6use crate::codegen::resolve_field;
7use crate::config::E2eConfig;
8use crate::escape::{escape_python, sanitize_filename, sanitize_ident};
9use crate::field_access::FieldResolver;
10use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup};
11use alef_core::backend::GeneratedFile;
12use alef_core::config::AlefConfig;
13use alef_core::hash::{self, CommentStyle};
14use anyhow::Result;
15use heck::{ToShoutySnakeCase, ToSnakeCase};
16use std::collections::HashMap;
17use std::fmt::Write as FmtWrite;
18use std::path::PathBuf;
19
20/// Python e2e test code generator.
21pub struct PythonE2eCodegen;
22
23impl super::E2eCodegen for PythonE2eCodegen {
24    fn generate(
25        &self,
26        groups: &[FixtureGroup],
27        e2e_config: &E2eConfig,
28        _alef_config: &AlefConfig,
29    ) -> Result<Vec<GeneratedFile>> {
30        let mut files = Vec::new();
31        let output_base = PathBuf::from(e2e_config.effective_output()).join("python");
32
33        // conftest.py
34        files.push(GeneratedFile {
35            path: output_base.join("conftest.py"),
36            content: render_conftest(e2e_config, groups),
37            generated_header: true,
38        });
39
40        // Root __init__.py (prevents ruff INP001).
41        files.push(GeneratedFile {
42            path: output_base.join("__init__.py"),
43            content: "\n".to_string(),
44            generated_header: false,
45        });
46
47        // tests/__init__.py
48        files.push(GeneratedFile {
49            path: output_base.join("tests").join("__init__.py"),
50            content: "\n".to_string(),
51            generated_header: false,
52        });
53
54        // pyproject.toml for standalone uv resolution
55        let python_pkg = e2e_config.resolve_package("python");
56        let pkg_name = python_pkg
57            .as_ref()
58            .and_then(|p| p.name.as_deref())
59            .unwrap_or("kreuzcrawl");
60        let pkg_path = python_pkg
61            .as_ref()
62            .and_then(|p| p.path.as_deref())
63            .unwrap_or("../../packages/python");
64        let pkg_version = python_pkg
65            .as_ref()
66            .and_then(|p| p.version.as_deref())
67            .unwrap_or("0.1.0");
68        files.push(GeneratedFile {
69            path: output_base.join("pyproject.toml"),
70            content: render_pyproject(pkg_name, pkg_path, pkg_version, e2e_config.dep_mode),
71            generated_header: true,
72        });
73
74        // Per-category test files.
75        for group in groups {
76            let fixtures: Vec<&Fixture> = group.fixtures.iter().collect();
77
78            if fixtures.is_empty() {
79                continue;
80            }
81
82            // Skip emitting the file entirely when every fixture is skipped for
83            // python — there's nothing to run, and emitting imports of
84            // not-bound APIs causes module-level ImportError that masks the
85            // skip marker.
86            if fixtures.iter().all(|f| is_skipped(f, "python")) {
87                continue;
88            }
89
90            let filename = format!("test_{}.py", sanitize_filename(&group.category));
91            let content = render_test_file(&group.category, &fixtures, e2e_config);
92
93            files.push(GeneratedFile {
94                path: output_base.join("tests").join(filename),
95                content,
96                generated_header: true,
97            });
98        }
99
100        Ok(files)
101    }
102
103    fn language_name(&self) -> &'static str {
104        "python"
105    }
106}
107
108// ---------------------------------------------------------------------------
109// pyproject.toml
110// ---------------------------------------------------------------------------
111
112fn render_pyproject(
113    pkg_name: &str,
114    pkg_path: &str,
115    pkg_version: &str,
116    dep_mode: crate::config::DependencyMode,
117) -> String {
118    // Generate in pyproject-fmt canonical form so the pre-commit hook is a no-op.
119    // pyproject-fmt sorts deps alphabetically, uses spaces inside brackets, dotted
120    // tool keys, and injects Python classifiers.
121    let (deps_line, uv_sources_block) = match dep_mode {
122        crate::config::DependencyMode::Registry => (
123            format!(
124                "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}{pkg_version}\" ]"
125            ),
126            String::new(),
127        ),
128        crate::config::DependencyMode::Local => (
129            format!(
130                "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}\" ]"
131            ),
132            format!(
133                "\n[tool.uv]\nsources.{pkg_name} = {{ path = \"{pkg_path}\" }}\n",
134                pkg_path = pkg_path
135            ),
136        ),
137    };
138
139    format!(
140        r#"[build-system]
141build-backend = "setuptools.build_meta"
142requires = [ "setuptools>=68", "wheel" ]
143
144[project]
145name = "{pkg_name}-e2e-tests"
146version = "0.0.0"
147description = "End-to-end tests"
148requires-python = ">=3.10"
149classifiers = [
150  "Programming Language :: Python :: 3 :: Only",
151  "Programming Language :: Python :: 3.10",
152  "Programming Language :: Python :: 3.11",
153  "Programming Language :: Python :: 3.12",
154  "Programming Language :: Python :: 3.13",
155  "Programming Language :: Python :: 3.14",
156]
157{deps_line}
158
159[tool.setuptools]
160packages = [  ]
161{uv_sources_block}
162[tool.ruff]
163lint.ignore = [ "PLR2004" ]
164lint.per-file-ignores."tests/**" = [ "B017", "PT011", "S101", "S108" ]
165
166[tool.pytest]
167ini_options.asyncio_mode = "auto"
168ini_options.testpaths = [ "tests" ]
169ini_options.python_files = "test_*.py"
170ini_options.python_functions = "test_*"
171ini_options.addopts = "-v --strict-markers --tb=short"
172ini_options.timeout = 300
173"#
174    )
175}
176
177// ---------------------------------------------------------------------------
178// Config resolution helpers
179// ---------------------------------------------------------------------------
180
181fn resolve_function_name(e2e_config: &E2eConfig) -> String {
182    resolve_function_name_for_call(&e2e_config.call)
183}
184
185fn resolve_function_name_for_call(call_config: &crate::config::CallConfig) -> String {
186    call_config
187        .overrides
188        .get("python")
189        .and_then(|o| o.function.clone())
190        .unwrap_or_else(|| call_config.function.clone())
191}
192
193fn resolve_module(e2e_config: &E2eConfig) -> String {
194    e2e_config
195        .call
196        .overrides
197        .get("python")
198        .and_then(|o| o.module.clone())
199        .unwrap_or_else(|| e2e_config.call.module.replace('-', "_"))
200}
201
202fn resolve_options_type(e2e_config: &E2eConfig) -> Option<String> {
203    e2e_config
204        .call
205        .overrides
206        .get("python")
207        .and_then(|o| o.options_type.clone())
208}
209
210/// Resolve how json_object args are passed: "kwargs" (default), "dict", or "json".
211fn resolve_options_via(e2e_config: &E2eConfig) -> &str {
212    e2e_config
213        .call
214        .overrides
215        .get("python")
216        .and_then(|o| o.options_via.as_deref())
217        .unwrap_or("kwargs")
218}
219
220/// Resolve enum field mappings from the Python override config.
221fn resolve_enum_fields(e2e_config: &E2eConfig) -> &HashMap<String, String> {
222    static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
223    e2e_config
224        .call
225        .overrides
226        .get("python")
227        .map(|o| &o.enum_fields)
228        .unwrap_or(&EMPTY)
229}
230
231/// Resolve handle nested type mappings from the Python override config.
232/// Maps config field names to their Python constructor type names.
233fn resolve_handle_nested_types(e2e_config: &E2eConfig) -> &HashMap<String, String> {
234    static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
235    e2e_config
236        .call
237        .overrides
238        .get("python")
239        .map(|o| &o.handle_nested_types)
240        .unwrap_or(&EMPTY)
241}
242
243/// Resolve handle dict type set from the Python override config.
244/// Fields in this set use `TypeName({...})` instead of `TypeName(key=val, ...)`.
245fn resolve_handle_dict_types(e2e_config: &E2eConfig) -> &std::collections::HashSet<String> {
246    static EMPTY: std::sync::LazyLock<std::collections::HashSet<String>> =
247        std::sync::LazyLock::new(std::collections::HashSet::new);
248    e2e_config
249        .call
250        .overrides
251        .get("python")
252        .map(|o| &o.handle_dict_types)
253        .unwrap_or(&EMPTY)
254}
255
256fn is_skipped(fixture: &Fixture, language: &str) -> bool {
257    fixture.skip.as_ref().is_some_and(|s| s.should_skip(language))
258}
259
260// ---------------------------------------------------------------------------
261// Rendering
262// ---------------------------------------------------------------------------
263
264fn render_conftest(e2e_config: &E2eConfig, groups: &[FixtureGroup]) -> String {
265    let module = resolve_module(e2e_config);
266    let has_http_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| f.is_http_test());
267
268    let header = hash::header(CommentStyle::Hash);
269    if has_http_fixtures {
270        format!(
271            r#"{header}"""Pytest configuration for e2e tests."""
272from __future__ import annotations
273
274import os
275import subprocess
276import threading
277from pathlib import Path
278from typing import Generator
279
280import pytest
281
282# Ensure the package is importable.
283# The {module} package is expected to be installed in the current environment.
284
285_HERE = Path(__file__).parent
286_E2E_DIR = _HERE.parent
287_MOCK_SERVER_BIN = _E2E_DIR / "rust" / "target" / "release" / "mock-server"
288_FIXTURES_DIR = _E2E_DIR.parent / "fixtures"
289
290
291@pytest.fixture(scope="session", autouse=True)
292def mock_server() -> Generator[str, None, None]:
293    """Spawn the mock HTTP server binary and set MOCK_SERVER_URL."""
294    proc = subprocess.Popen(  # noqa: S603
295        [str(_MOCK_SERVER_BIN), str(_FIXTURES_DIR)],
296        stdout=subprocess.PIPE,
297        stderr=None,
298        stdin=subprocess.PIPE,
299    )
300    url = ""
301    assert proc.stdout is not None
302    for raw_line in proc.stdout:
303        line = raw_line.decode().strip()
304        if line.startswith("MOCK_SERVER_URL="):
305            url = line.split("=", 1)[1]
306            break
307    os.environ["MOCK_SERVER_URL"] = url
308    # Drain stdout in background so the server never blocks.
309    threading.Thread(target=proc.stdout.read, daemon=True).start()
310    yield url
311    if proc.stdin:
312        proc.stdin.close()
313    proc.terminate()
314    proc.wait()
315
316
317def _make_request(method: str, path: str, **kwargs: object) -> object:
318    """Make an HTTP request to the mock server."""
319    import urllib.request  # noqa: PLC0415
320
321    base_url = os.environ.get("MOCK_SERVER_URL", "http://localhost:8080")
322    url = f"{{base_url}}{{path}}"
323    data = kwargs.pop("json", None)
324    if data is not None:
325        import json  # noqa: PLC0415
326
327        body = json.dumps(data).encode()
328        headers = dict(kwargs.pop("headers", {{}}))
329        headers.setdefault("Content-Type", "application/json")
330        req = urllib.request.Request(url, data=body, headers=headers, method=method.upper())
331    else:
332        headers = dict(kwargs.pop("headers", {{}}))
333        req = urllib.request.Request(url, headers=headers, method=method.upper())
334    try:
335        with urllib.request.urlopen(req) as resp:  # noqa: S310
336            return resp
337    except urllib.error.HTTPError as exc:
338        return exc
339
340
341@pytest.fixture(scope="session")
342def app(mock_server: str) -> object:  # noqa: ARG001
343    """Return a simple HTTP helper bound to the mock server URL."""
344
345    class _App:
346        def request(self, path: str, **kwargs: object) -> object:
347            method = str(kwargs.pop("method", "GET"))
348            return _make_request(method, path, **kwargs)
349
350    return _App()
351"#
352        )
353    } else {
354        format!(
355            r#"{header}"""Pytest configuration for e2e tests."""
356# Ensure the package is importable.
357# The {module} package is expected to be installed in the current environment.
358"#
359        )
360    }
361}
362
363fn render_test_file(category: &str, fixtures: &[&Fixture], e2e_config: &E2eConfig) -> String {
364    let mut out = String::new();
365    out.push_str(&hash::header(CommentStyle::Hash));
366    let _ = writeln!(out, "\"\"\"E2e tests for category: {category}.\"\"\"");
367
368    let module = resolve_module(e2e_config);
369    let function_name = resolve_function_name(e2e_config);
370    let options_type = resolve_options_type(e2e_config);
371    let options_via = resolve_options_via(e2e_config);
372    let enum_fields = resolve_enum_fields(e2e_config);
373    let handle_nested_types = resolve_handle_nested_types(e2e_config);
374    let handle_dict_types = resolve_handle_dict_types(e2e_config);
375    let field_resolver = FieldResolver::new(
376        &e2e_config.fields,
377        &e2e_config.fields_optional,
378        &e2e_config.result_fields,
379        &e2e_config.fields_array,
380    );
381
382    let has_error_test = fixtures
383        .iter()
384        .any(|f| f.assertions.iter().any(|a| a.assertion_type == "error"));
385    let has_skipped = fixtures.iter().any(|f| is_skipped(f, "python"));
386    let has_http_tests = fixtures.iter().any(|f| f.is_http_test());
387
388    // Check if any fixture in this file uses an async call.
389    let is_async = fixtures.iter().any(|f| {
390        let cc = e2e_config.resolve_call(f.call.as_deref());
391        cc.r#async
392    }) || e2e_config.call.r#async;
393    let needs_pytest = has_error_test || has_skipped || is_async;
394
395    // "json" mode needs `import json`.
396    let needs_json_import = options_via == "json"
397        && fixtures.iter().any(|f| {
398            e2e_config
399                .call
400                .args
401                .iter()
402                .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
403        });
404
405    // mock_url args need `import os`.
406    let needs_os_import = e2e_config.call.args.iter().any(|arg| arg.arg_type == "mock_url");
407
408    // HTTP tests handle `import re` inline (per-test), so no top-level re import is needed.
409    let needs_re_import = false;
410    let _ = has_http_tests; // used indirectly via inline imports in render_http_test_function
411
412    // Only import options_type when using "kwargs" mode.
413    let needs_options_type = options_via == "kwargs"
414        && options_type.is_some()
415        && fixtures.iter().any(|f| {
416            e2e_config
417                .call
418                .args
419                .iter()
420                .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
421        });
422
423    // Collect enum types actually used across all fixtures in this file.
424    let mut used_enum_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
425    if needs_options_type && !enum_fields.is_empty() {
426        for fixture in fixtures.iter() {
427            for arg in &e2e_config.call.args {
428                if arg.arg_type == "json_object" {
429                    let value = resolve_field(&fixture.input, &arg.field);
430                    if let Some(obj) = value.as_object() {
431                        for key in obj.keys() {
432                            if let Some(enum_type) = enum_fields.get(key) {
433                                used_enum_types.insert(enum_type.clone());
434                            }
435                        }
436                    }
437                }
438            }
439        }
440    }
441
442    // Collect imports sorted per isort/ruff I001: stdlib group, then
443    // third-party group, separated by a blank line. Within each group
444    // `import X` lines come before `from X import Y` lines, both sorted.
445    let mut stdlib_imports: Vec<String> = Vec::new();
446    let mut thirdparty_bare: Vec<String> = Vec::new();
447    let mut thirdparty_from: Vec<String> = Vec::new();
448
449    if needs_json_import {
450        stdlib_imports.push("import json".to_string());
451    }
452
453    if needs_os_import {
454        stdlib_imports.push("import os".to_string());
455    }
456
457    if needs_re_import {
458        stdlib_imports.push("import re".to_string());
459    }
460
461    if needs_pytest {
462        // F401 (unused-import) suppression: pytest is needed at module level for
463        // its fixture decorators and `pytest.mark.*` annotations, but ruff cannot
464        // statically tell whether a generated test file references those — so we
465        // hint to ruff that the import is intentional.
466        thirdparty_bare.push("import pytest  # noqa: F401".to_string());
467    }
468
469    // For non-HTTP fixtures, build the normal function imports.
470    // Only count fixtures that are not skipped and have assertions (need to call the function).
471    let has_non_http_fixtures = fixtures
472        .iter()
473        .any(|f| !f.is_http_test() && !is_skipped(f, "python") && !f.assertions.is_empty());
474    if has_non_http_fixtures {
475        // Collect handle constructor function names that need to be imported.
476        let handle_constructors: Vec<String> = e2e_config
477            .call
478            .args
479            .iter()
480            .filter(|arg| arg.arg_type == "handle")
481            .map(|arg| format!("create_{}", arg.name.to_snake_case()))
482            .collect();
483
484        // Collect all unique function names actually used across all fixtures in this file.
485        // Do not seed with the default function_name — only include it when at least one
486        // fixture resolves to it, to avoid unused-import (F401) warnings from ruff.
487        let mut import_names: Vec<String> = Vec::new();
488        for fixture in fixtures.iter() {
489            let cc = e2e_config.resolve_call(fixture.call.as_deref());
490            let fn_name = resolve_function_name_for_call(cc);
491            if !import_names.contains(&fn_name) {
492                import_names.push(fn_name);
493            }
494        }
495        // Safety net: should not occur since the group is non-empty, but ensures
496        // import_names is never empty if all fixtures use the default call.
497        if import_names.is_empty() {
498            import_names.push(function_name.clone());
499        }
500        for ctor in &handle_constructors {
501            if !import_names.contains(ctor) {
502                import_names.push(ctor.clone());
503            }
504        }
505
506        // If any handle arg has config, import the config class (CrawlConfig or options_type).
507        let needs_config_import = e2e_config.call.args.iter().any(|arg| {
508            arg.arg_type == "handle"
509                && fixtures.iter().any(|f| {
510                    let val = resolve_field(&f.input, &arg.field);
511                    !val.is_null() && val.as_object().is_some_and(|o| !o.is_empty())
512                })
513        });
514        if needs_config_import {
515            let config_class = options_type.as_deref().unwrap_or("CrawlConfig");
516            if !import_names.contains(&config_class.to_string()) {
517                import_names.push(config_class.to_string());
518            }
519        }
520
521        // Import any nested handle config types actually used in this file.
522        if !handle_nested_types.is_empty() {
523            let mut used_nested_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
524            for fixture in fixtures.iter() {
525                for arg in &e2e_config.call.args {
526                    if arg.arg_type == "handle" {
527                        let config_value = resolve_field(&fixture.input, &arg.field);
528                        if let Some(obj) = config_value.as_object() {
529                            for key in obj.keys() {
530                                if let Some(type_name) = handle_nested_types.get(key) {
531                                    if obj[key].is_object() {
532                                        used_nested_types.insert(type_name.clone());
533                                    }
534                                }
535                            }
536                        }
537                    }
538                }
539            }
540            for type_name in used_nested_types {
541                if !import_names.contains(&type_name) {
542                    import_names.push(type_name);
543                }
544            }
545        }
546
547        // Collect method_result helper function imports.
548        for fixture in fixtures.iter() {
549            for assertion in &fixture.assertions {
550                if assertion.assertion_type == "method_result" {
551                    if let Some(method_name) = &assertion.method {
552                        let import = python_method_helper_import(method_name);
553                        if let Some(name) = import {
554                            if !import_names.contains(&name) {
555                                import_names.push(name);
556                            }
557                        }
558                    }
559                }
560            }
561        }
562
563        if let (true, Some(opts_type)) = (needs_options_type, &options_type) {
564            import_names.push(opts_type.clone());
565            thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
566            // Import enum types from enum_module (if specified) or main module.
567            if !used_enum_types.is_empty() {
568                let enum_mod = e2e_config
569                    .call
570                    .overrides
571                    .get("python")
572                    .and_then(|o| o.enum_module.as_deref())
573                    .unwrap_or(&module);
574                let enum_names: Vec<&String> = used_enum_types.iter().collect();
575                thirdparty_from.push(format!(
576                    "from {enum_mod} import {}",
577                    enum_names.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")
578                ));
579            }
580        } else {
581            thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
582        }
583    }
584
585    stdlib_imports.sort();
586    thirdparty_bare.sort();
587    thirdparty_from.sort();
588
589    // Emit sorted import groups with blank lines between groups per PEP 8.
590    if !stdlib_imports.is_empty() {
591        for imp in &stdlib_imports {
592            let _ = writeln!(out, "{imp}");
593        }
594        let _ = writeln!(out);
595    }
596    // Third-party: bare imports then from-imports, no blank line between them.
597    for imp in &thirdparty_bare {
598        let _ = writeln!(out, "{imp}");
599    }
600    for imp in &thirdparty_from {
601        let _ = writeln!(out, "{imp}");
602    }
603    // Two blank lines after imports (PEP 8 / ruff I001).
604    let _ = writeln!(out);
605    let _ = writeln!(out);
606
607    for fixture in fixtures {
608        if fixture.is_http_test() {
609            render_http_test_function(&mut out, fixture);
610        } else if !is_skipped(fixture, "python") && fixture.assertions.is_empty() {
611            // Non-HTTP fixture with no assertions: generate a skipped placeholder.
612            let fn_name = sanitize_ident(&fixture.id);
613            let description = &fixture.description;
614            let desc_with_period = if description.ends_with('.') {
615                description.to_string()
616            } else {
617                format!("{description}.")
618            };
619            let _ = writeln!(
620                out,
621                "@pytest.mark.skip(reason=\"no assertions configured for this fixture in python e2e\")"
622            );
623            let _ = writeln!(out, "def test_{fn_name}() -> None:");
624            let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
625        } else {
626            render_test_function(
627                &mut out,
628                fixture,
629                e2e_config,
630                options_type.as_deref(),
631                options_via,
632                enum_fields,
633                handle_nested_types,
634                handle_dict_types,
635                &field_resolver,
636            );
637        }
638        let _ = writeln!(out);
639    }
640
641    out
642}
643
644// ---------------------------------------------------------------------------
645// HTTP server test rendering
646// ---------------------------------------------------------------------------
647
648/// Render a pytest test function for an HTTP server fixture.
649///
650/// The generated test:
651/// 1. Receives a `client` fixture from conftest.py (the test server client).
652/// 2. Sends the configured request.
653/// 3. Asserts status code, body (exact or partial), headers, and validation errors.
654fn render_http_test_function(out: &mut String, fixture: &Fixture) {
655    let Some(http) = &fixture.http else {
656        return;
657    };
658
659    let fn_name = sanitize_ident(&fixture.id);
660    let description = &fixture.description;
661    let desc_with_period = if description.ends_with('.') {
662        description.to_string()
663    } else {
664        format!("{description}.")
665    };
666
667    // HTTP 101 (WebSocket upgrade) — urllib cannot handle upgrade responses.
668    let status = http.expected_response.status_code;
669    if status == 101 {
670        let _ = writeln!(
671            out,
672            "@pytest.mark.skip(reason=\"HTTP 101 WebSocket upgrade cannot be tested via urllib\")"
673        );
674        let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
675        let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
676        let _ = writeln!(out, "    ...");
677        let _ = writeln!(out);
678        return;
679    }
680
681    if is_skipped(fixture, "python") {
682        let reason = fixture
683            .skip
684            .as_ref()
685            .and_then(|s| s.reason.as_deref())
686            .unwrap_or("skipped for python");
687        let escaped = escape_python(reason);
688        let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
689    }
690
691    let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
692    let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
693    let _ = writeln!(out, "    import os  # noqa: PLC0415");
694    let _ = writeln!(out, "    import urllib.request  # noqa: PLC0415");
695    let _ = writeln!(out, "    base = os.environ.get(\"MOCK_SERVER_URL\", mock_server)");
696    let fixture_id = fixture.id.as_str();
697    let _ = writeln!(out, "    url = f\"{{base}}/fixtures/{fixture_id}\"");
698
699    // Build the request call using urllib.
700    let method = http.request.method.to_uppercase();
701
702    // Build headers dict.
703    let mut header_entries: Vec<String> = Vec::new();
704    for (k, v) in &http.request.headers {
705        header_entries.push(format!("        \"{}\": \"{}\",", escape_python(k), escape_python(v)));
706    }
707    let headers_py = if header_entries.is_empty() {
708        "{}".to_string()
709    } else {
710        format!("{{\n{}\n    }}", header_entries.join("\n"))
711    };
712
713    if let Some(body) = &http.request.body {
714        let py_body = json_to_python_literal(body);
715        let _ = writeln!(out, "    import json  # noqa: PLC0415");
716        let _ = writeln!(out, "    _headers = {headers_py}");
717        let _ = writeln!(out, "    _headers.setdefault(\"Content-Type\", \"application/json\")");
718        let _ = writeln!(out, "    _body = json.dumps({py_body}).encode()");
719        let _ = writeln!(
720            out,
721            "    _req = urllib.request.Request(url, data=_body, headers=_headers, method=\"{method}\")"
722        );
723    } else {
724        let _ = writeln!(out, "    _headers = {headers_py}");
725        let _ = writeln!(
726            out,
727            "    _req = urllib.request.Request(url, headers=_headers, method=\"{method}\")"
728        );
729    }
730    // Determine which response variables are actually needed.
731    // Exclude the empty-string body sentinel ("") and null — those mean "no body".
732    let body_has_content = matches!(&http.expected_response.body, Some(v)
733        if !(v.is_null() || (v.is_string() && v.as_str() == Some(""))));
734    let needs_body = body_has_content
735        || http.expected_response.body_partial.is_some()
736        || http
737            .expected_response
738            .validation_errors
739            .as_ref()
740            .is_some_and(|v| !v.is_empty());
741    // content-encoding is skipped (mock server strips it), so only consider other headers.
742    let needs_headers = http
743        .expected_response
744        .headers
745        .iter()
746        .any(|(k, _)| k.to_lowercase() != "content-encoding");
747
748    // Build an opener that does NOT follow redirects so we can assert on 3xx responses.
749    let _ = writeln!(
750        out,
751        "    class _NoRedirect(urllib.request.HTTPRedirectHandler):  # noqa: N801"
752    );
753    let _ = writeln!(
754        out,
755        "        def redirect_request(self, *args, **kwargs): return None  # noqa: E704"
756    );
757    let _ = writeln!(out, "    _opener = urllib.request.build_opener(_NoRedirect())");
758    let _ = writeln!(out, "    try:");
759    let _ = writeln!(out, "        response = _opener.open(_req)  # noqa: S310");
760    let _ = writeln!(out, "        status_code = response.status");
761    if needs_body {
762        let _ = writeln!(out, "        resp_body = response.read()");
763    }
764    if needs_headers {
765        let _ = writeln!(out, "        resp_headers = dict(response.headers)");
766    }
767    let _ = writeln!(out, "    except urllib.error.HTTPError as _exc:");
768    let _ = writeln!(out, "        status_code = _exc.code");
769    if needs_body {
770        let _ = writeln!(out, "        resp_body = _exc.read()");
771    }
772    if needs_headers {
773        let _ = writeln!(out, "        resp_headers = dict(_exc.headers)");
774    }
775
776    // Status code assertion.
777    let status = http.expected_response.status_code;
778    let _ = writeln!(out, "    assert status_code == {status}  # noqa: S101");
779
780    // Body assertions.
781    if let Some(expected_body) = &http.expected_response.body {
782        // Empty-string sentinel means no body — skip assertion.
783        if !(expected_body.is_null() || expected_body.is_string() && expected_body.as_str() == Some("")) {
784            if let serde_json::Value::String(s) = expected_body {
785                // Plain-string body: mock server returns raw text, compare decoded bytes directly.
786                let py_val = format!("\"{}\"", escape_python(s));
787                let _ = writeln!(out, "    assert resp_body.decode() == {py_val}  # noqa: S101");
788            } else {
789                let py_val = json_to_python_literal(expected_body);
790                let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
791                let _ = writeln!(out, "    data = _json.loads(resp_body)");
792                let _ = writeln!(out, "    assert data == {py_val}  # noqa: S101");
793            }
794        }
795    } else if let Some(partial) = &http.expected_response.body_partial {
796        let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
797        let _ = writeln!(out, "    data = _json.loads(resp_body)");
798        if let Some(obj) = partial.as_object() {
799            for (key, val) in obj {
800                let py_val = json_to_python_literal(val);
801                let escaped_key = escape_python(key);
802                let _ = writeln!(out, "    assert data[\"{escaped_key}\"] == {py_val}  # noqa: S101");
803            }
804        }
805    }
806
807    // Header assertions.
808    for (header_name, header_value) in &http.expected_response.headers {
809        let lower_name = header_name.to_lowercase();
810        // The mock server strips content-encoding headers because it returns uncompressed bodies.
811        if lower_name == "content-encoding" {
812            continue;
813        }
814        let escaped_name = escape_python(&lower_name);
815        match header_value.as_str() {
816            "<<present>>" => {
817                let _ = writeln!(out, "    assert \"{escaped_name}\" in resp_headers  # noqa: S101");
818            }
819            "<<absent>>" => {
820                let _ = writeln!(
821                    out,
822                    "    assert resp_headers.get(\"{escaped_name}\") is None  # noqa: S101"
823                );
824            }
825            "<<uuid>>" => {
826                let _ = writeln!(out, "    import re  # noqa: PLC0415");
827                let _ = writeln!(
828                    out,
829                    "    assert re.match(r'^[0-9a-f]{{8}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{12}}$', resp_headers[\"{escaped_name}\"])  # noqa: S101"
830                );
831            }
832            exact => {
833                let escaped_val = escape_python(exact);
834                let _ = writeln!(
835                    out,
836                    "    assert resp_headers[\"{escaped_name}\"] == \"{escaped_val}\"  # noqa: S101"
837                );
838            }
839        }
840    }
841
842    // Validation error assertions — skip when a full body assertEquals is already generated
843    // (it is redundant and avoids miss-keying "detail" vs "errors").
844    if let Some(validation_errors) = &http.expected_response.validation_errors {
845        if !validation_errors.is_empty() && !body_has_content {
846            let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
847            let _ = writeln!(out, "    _data = _json.loads(resp_body)");
848            let _ = writeln!(out, "    errors = _data.get(\"errors\", [])");
849            for ve in validation_errors {
850                let loc_py: Vec<String> = ve.loc.iter().map(|s| format!("\"{}\"", escape_python(s))).collect();
851                let loc_str = loc_py.join(", ");
852                let escaped_msg = escape_python(&ve.msg);
853                let _ = writeln!(
854                    out,
855                    "    assert any(e[\"loc\"] == [{loc_str}] and \"{escaped_msg}\" in e[\"msg\"] for e in errors)  # noqa: S101"
856                );
857            }
858        }
859    }
860}
861
862// ---------------------------------------------------------------------------
863// Function-call test rendering
864// ---------------------------------------------------------------------------
865
866#[allow(clippy::too_many_arguments)]
867fn render_test_function(
868    out: &mut String,
869    fixture: &Fixture,
870    e2e_config: &E2eConfig,
871    options_type: Option<&str>,
872    options_via: &str,
873    enum_fields: &HashMap<String, String>,
874    handle_nested_types: &HashMap<String, String>,
875    handle_dict_types: &std::collections::HashSet<String>,
876    field_resolver: &FieldResolver,
877) {
878    let fn_name = sanitize_ident(&fixture.id);
879    let description = &fixture.description;
880    let call_config = e2e_config.resolve_call(fixture.call.as_deref());
881    let function_name = resolve_function_name_for_call(call_config);
882    let result_var = &call_config.result_var;
883
884    let desc_with_period = if description.ends_with('.') {
885        description.to_string()
886    } else {
887        format!("{description}.")
888    };
889
890    // Emit pytest.mark.skip for fixtures that should be skipped for python.
891    if is_skipped(fixture, "python") {
892        let reason = fixture
893            .skip
894            .as_ref()
895            .and_then(|s| s.reason.as_deref())
896            .unwrap_or("skipped for python");
897        let escaped = escape_python(reason);
898        let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
899    }
900
901    let is_async = call_config.r#async;
902    if is_async {
903        let _ = writeln!(out, "@pytest.mark.asyncio");
904        let _ = writeln!(out, "async def test_{fn_name}() -> None:");
905    } else {
906        let _ = writeln!(out, "def test_{fn_name}() -> None:");
907    }
908    let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
909
910    // Check if any assertion is an error assertion.
911    let has_error_assertion = fixture.assertions.iter().any(|a| a.assertion_type == "error");
912
913    // Build argument expressions from config.
914    let mut arg_bindings = Vec::new();
915    let mut kwarg_exprs = Vec::new();
916    for arg in &call_config.args {
917        let var_name = &arg.name;
918
919        if arg.arg_type == "handle" {
920            // Generate a create_engine (or equivalent) call and pass the variable.
921            // If there's config data, construct a CrawlConfig with kwargs.
922            let constructor_name = format!("create_{}", arg.name.to_snake_case());
923            let config_value = resolve_field(&fixture.input, &arg.field);
924            if config_value.is_null()
925                || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
926            {
927                arg_bindings.push(format!("    {var_name} = {constructor_name}(None)"));
928            } else if let Some(obj) = config_value.as_object() {
929                // Build kwargs for the config constructor (CrawlConfig(key=val, ...)).
930                // For fields with a nested type mapping, wrap the dict value in the
931                // appropriate typed constructor instead of passing a plain dict.
932                let kwargs: Vec<String> = obj
933                    .iter()
934                    .map(|(k, v)| {
935                        let snake_key = k.to_snake_case();
936                        let py_val = if let Some(type_name) = handle_nested_types.get(k) {
937                            // Wrap the nested dict in the typed constructor.
938                            if let Some(nested_obj) = v.as_object() {
939                                if nested_obj.is_empty() {
940                                    // Empty dict: use the default constructor.
941                                    format!("{type_name}()")
942                                } else if handle_dict_types.contains(k) {
943                                    // The outer Python config type (e.g. CrawlConfig) accepts a
944                                    // plain dict for this field (e.g. `auth: dict | None`).
945                                    // The binding-layer wrapper (e.g. api.py) creates the typed
946                                    // object internally, so we must NOT pre-wrap it here.
947                                    json_to_python_literal(v)
948                                } else {
949                                    // Type takes keyword arguments.
950                                    let nested_kwargs: Vec<String> = nested_obj
951                                        .iter()
952                                        .map(|(nk, nv)| {
953                                            let nested_snake_key = nk.to_snake_case();
954                                            format!("{nested_snake_key}={}", json_to_python_literal(nv))
955                                        })
956                                        .collect();
957                                    format!("{type_name}({})", nested_kwargs.join(", "))
958                                }
959                            } else {
960                                // Non-object value: use as-is.
961                                json_to_python_literal(v)
962                            }
963                        } else if k == "request_timeout" {
964                            // The Python binding converts request_timeout with Duration::from_secs
965                            // (seconds) while fixtures specify values in milliseconds. Divide by
966                            // 1000 to compensate: e.g., 1 ms → 0 s (immediate timeout),
967                            // 5000 ms → 5 s. This keeps test semantics consistent with the
968                            // fixture intent.
969                            if let Some(ms) = v.as_u64() {
970                                format!("{}", ms / 1000)
971                            } else {
972                                json_to_python_literal(v)
973                            }
974                        } else {
975                            json_to_python_literal(v)
976                        };
977                        format!("{snake_key}={py_val}")
978                    })
979                    .collect();
980                // Use the options_type if configured, otherwise "CrawlConfig".
981                let config_class = options_type.unwrap_or("CrawlConfig");
982                let single_line = format!("    {var_name}_config = {config_class}({})", kwargs.join(", "));
983                if single_line.len() <= 120 {
984                    arg_bindings.push(single_line);
985                } else {
986                    // Split into multi-line for readability and E501 compliance.
987                    let mut lines = format!("    {var_name}_config = {config_class}(\n");
988                    for kw in &kwargs {
989                        lines.push_str(&format!("        {kw},\n"));
990                    }
991                    lines.push_str("    )");
992                    arg_bindings.push(lines);
993                }
994                arg_bindings.push(format!("    {var_name} = {constructor_name}({var_name}_config)"));
995            } else {
996                let literal = json_to_python_literal(config_value);
997                arg_bindings.push(format!("    {var_name} = {constructor_name}({literal})"));
998            }
999            kwarg_exprs.push(format!("{var_name}={var_name}"));
1000            continue;
1001        }
1002
1003        if arg.arg_type == "mock_url" {
1004            let fixture_id = &fixture.id;
1005            arg_bindings.push(format!(
1006                "    {var_name} = os.environ['MOCK_SERVER_URL'] + '/fixtures/{fixture_id}'"
1007            ));
1008            kwarg_exprs.push(format!("{var_name}={var_name}"));
1009            continue;
1010        }
1011
1012        let value = resolve_field(&fixture.input, &arg.field);
1013
1014        if value.is_null() && arg.optional {
1015            continue;
1016        }
1017
1018        // For json_object args, use the configured options_via strategy.
1019        // A1 fix: when optional=true and value is non-null, pass T directly (not Optional[T]).
1020        if arg.arg_type == "json_object" && !value.is_null() {
1021            match options_via {
1022                "dict" => {
1023                    // Pass as a plain Python dict literal.
1024                    let literal = json_to_python_literal(value);
1025                    let noqa = if literal.contains("/tmp/") {
1026                        "  # noqa: S108"
1027                    } else {
1028                        ""
1029                    };
1030                    arg_bindings.push(format!("    {var_name} = {literal}{noqa}"));
1031                    kwarg_exprs.push(format!("{var_name}={var_name}"));
1032                    continue;
1033                }
1034                "json" => {
1035                    // Pass via json.loads() with the raw JSON string.
1036                    let json_str = serde_json::to_string(value).unwrap_or_default();
1037                    let escaped = escape_python(&json_str);
1038                    arg_bindings.push(format!("    {var_name} = json.loads(\"{escaped}\")"));
1039                    kwarg_exprs.push(format!("{var_name}={var_name}"));
1040                    continue;
1041                }
1042                _ => {
1043                    // "kwargs" (default): construct OptionsType(key=val, ...).
1044                    if let (Some(opts_type), Some(obj)) = (options_type, value.as_object()) {
1045                        let kwargs: Vec<String> = obj
1046                            .iter()
1047                            .map(|(k, v)| {
1048                                let snake_key = k.to_snake_case();
1049                                let py_val = if let Some(enum_type) = enum_fields.get(k) {
1050                                    // Map string value to enum constant.
1051                                    if let Some(s) = v.as_str() {
1052                                        let upper_val = s.to_shouty_snake_case();
1053                                        format!("{enum_type}.{upper_val}")
1054                                    } else {
1055                                        json_to_python_literal(v)
1056                                    }
1057                                } else {
1058                                    json_to_python_literal(v)
1059                                };
1060                                format!("{snake_key}={py_val}")
1061                            })
1062                            .collect();
1063                        let constructor = format!("{opts_type}({})", kwargs.join(", "));
1064                        arg_bindings.push(format!("    {var_name} = {constructor}"));
1065                        kwarg_exprs.push(format!("{var_name}={var_name}"));
1066                        continue;
1067                    }
1068                }
1069            }
1070        }
1071
1072        // When optional=true but fixture value is null, skip the argument entirely.
1073        // The function signature expects Optional[T] — Python's default keyword behavior handles None.
1074        if arg.optional && value.is_null() {
1075            continue;
1076        }
1077
1078        // For required args with no fixture value, use a language-appropriate default.
1079        if value.is_null() && !arg.optional {
1080            let default_val = match arg.arg_type.as_str() {
1081                "string" => "\"\"".to_string(),
1082                "int" | "integer" => "0".to_string(),
1083                "float" | "number" => "0.0".to_string(),
1084                "bool" | "boolean" => "False".to_string(),
1085                _ => "None".to_string(),
1086            };
1087            arg_bindings.push(format!("    {var_name} = {default_val}"));
1088            kwarg_exprs.push(format!("{var_name}={var_name}"));
1089            continue;
1090        }
1091
1092        let literal = json_to_python_literal(value);
1093        let noqa = if literal.contains("/tmp/") {
1094            "  # noqa: S108"
1095        } else {
1096            ""
1097        };
1098        arg_bindings.push(format!("    {var_name} = {literal}{noqa}"));
1099        kwarg_exprs.push(format!("{var_name}={var_name}"));
1100    }
1101
1102    // Generate visitor class if the fixture has a visitor spec.
1103    if let Some(visitor_spec) = &fixture.visitor {
1104        let _ = writeln!(out, "    class _TestVisitor:");
1105        for (method_name, action) in &visitor_spec.callbacks {
1106            emit_python_visitor_method(out, method_name, action);
1107        }
1108        kwarg_exprs.push("visitor=_TestVisitor()".to_string());
1109    }
1110
1111    for binding in &arg_bindings {
1112        let _ = writeln!(out, "{binding}");
1113    }
1114
1115    let call_args = kwarg_exprs.join(", ");
1116    let await_prefix = if is_async { "await " } else { "" };
1117    let call_expr = format!("{await_prefix}{function_name}({call_args})");
1118
1119    if has_error_assertion {
1120        // Find error assertion for optional message check.
1121        let error_assertion = fixture.assertions.iter().find(|a| a.assertion_type == "error");
1122        let has_message = error_assertion
1123            .and_then(|a| a.value.as_ref())
1124            .and_then(|v| v.as_str())
1125            .is_some();
1126
1127        if has_message {
1128            let _ = writeln!(out, "    with pytest.raises(Exception) as exc_info:  # noqa: B017");
1129            let _ = writeln!(out, "        {call_expr}");
1130            if let Some(msg) = error_assertion.and_then(|a| a.value.as_ref()).and_then(|v| v.as_str()) {
1131                let escaped = escape_python(msg);
1132                let _ = writeln!(out, "    assert \"{escaped}\" in str(exc_info.value)  # noqa: S101");
1133            }
1134        } else {
1135            let _ = writeln!(out, "    with pytest.raises(Exception):  # noqa: B017");
1136            let _ = writeln!(out, "        {call_expr}");
1137        }
1138
1139        // Skip non-error assertions: `result` is not defined outside the
1140        // `pytest.raises` block, so referencing it would trigger ruff F821.
1141        return;
1142    }
1143
1144    // Non-error path.
1145    // A2 fix: respect returns_result=false (non-Result returns don't need error handling).
1146    let has_usable_assertion = fixture.assertions.iter().any(|a| {
1147        if a.assertion_type == "not_error" || a.assertion_type == "error" {
1148            return false;
1149        }
1150        match &a.field {
1151            Some(f) if !f.is_empty() => field_resolver.is_valid_for_result(f),
1152            _ => true,
1153        }
1154    });
1155    let py_result_var = if has_usable_assertion {
1156        result_var.to_string()
1157    } else {
1158        "_".to_string()
1159    };
1160    let _ = writeln!(out, "    {py_result_var} = {call_expr}");
1161
1162    let fields_enum = &e2e_config.fields_enum;
1163    for assertion in &fixture.assertions {
1164        if assertion.assertion_type == "not_error" {
1165            // A2: When returns_result=false, the call doesn't return Result<T, E>,
1166            // so there's no error to check. Skip the assertion entirely.
1167            if !call_config.returns_result {
1168                continue;
1169            }
1170            // The call already raises on error in Python.
1171            continue;
1172        }
1173        render_assertion(out, assertion, result_var, field_resolver, fields_enum);
1174    }
1175}
1176
1177// ---------------------------------------------------------------------------
1178// Argument rendering
1179// ---------------------------------------------------------------------------
1180
1181fn json_to_python_literal(value: &serde_json::Value) -> String {
1182    match value {
1183        serde_json::Value::Null => "None".to_string(),
1184        serde_json::Value::Bool(true) => "True".to_string(),
1185        serde_json::Value::Bool(false) => "False".to_string(),
1186        serde_json::Value::Number(n) => n.to_string(),
1187        serde_json::Value::String(s) => python_string_literal(s),
1188        serde_json::Value::Array(arr) => {
1189            let items: Vec<String> = arr.iter().map(json_to_python_literal).collect();
1190            format!("[{}]", items.join(", "))
1191        }
1192        serde_json::Value::Object(map) => {
1193            let items: Vec<String> = map
1194                .iter()
1195                .map(|(k, v)| format!("\"{}\": {}", escape_python(k), json_to_python_literal(v)))
1196                .collect();
1197            format!("{{{}}}", items.join(", "))
1198        }
1199    }
1200}
1201
1202// ---------------------------------------------------------------------------
1203// Assertion rendering
1204// ---------------------------------------------------------------------------
1205
1206fn render_assertion(
1207    out: &mut String,
1208    assertion: &Assertion,
1209    result_var: &str,
1210    field_resolver: &FieldResolver,
1211    fields_enum: &std::collections::HashSet<String>,
1212) {
1213    // Handle synthetic / derived fields before the is_valid_for_result check
1214    // so they are never treated as struct attribute accesses on the result.
1215    if let Some(f) = &assertion.field {
1216        match f.as_str() {
1217            "chunks_have_content" => {
1218                let pred = format!("all(c.content for c in ({result_var}.chunks or []))");
1219                match assertion.assertion_type.as_str() {
1220                    "is_true" => {
1221                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1222                    }
1223                    "is_false" => {
1224                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1225                    }
1226                    _ => {
1227                        let _ = writeln!(
1228                            out,
1229                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1230                        );
1231                    }
1232                }
1233                return;
1234            }
1235            "chunks_have_embeddings" => {
1236                let pred = format!(
1237                    "all(c.embedding is not None and len(c.embedding) > 0 for c in ({result_var}.chunks or []))"
1238                );
1239                match assertion.assertion_type.as_str() {
1240                    "is_true" => {
1241                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1242                    }
1243                    "is_false" => {
1244                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1245                    }
1246                    _ => {
1247                        let _ = writeln!(
1248                            out,
1249                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1250                        );
1251                    }
1252                }
1253                return;
1254            }
1255            // ---- EmbedResponse virtual fields ----
1256            // embed_texts returns list[list[float]] in Python — no wrapper struct.
1257            // result_var is the embedding matrix; use it directly.
1258            "embeddings" => {
1259                match assertion.assertion_type.as_str() {
1260                    "count_equals" => {
1261                        if let Some(val) = &assertion.value {
1262                            if let Some(n) = val.as_u64() {
1263                                let _ = writeln!(out, "    assert len({result_var}) == {n}  # noqa: S101");
1264                            }
1265                        }
1266                    }
1267                    "count_min" => {
1268                        if let Some(val) = &assertion.value {
1269                            if let Some(n) = val.as_u64() {
1270                                let _ = writeln!(out, "    assert len({result_var}) >= {n}  # noqa: S101");
1271                            }
1272                        }
1273                    }
1274                    "not_empty" => {
1275                        let _ = writeln!(out, "    assert len({result_var}) > 0  # noqa: S101");
1276                    }
1277                    "is_empty" => {
1278                        let _ = writeln!(out, "    assert len({result_var}) == 0  # noqa: S101");
1279                    }
1280                    _ => {
1281                        let _ = writeln!(
1282                            out,
1283                            "    # skipped: unsupported assertion type on synthetic field 'embeddings'"
1284                        );
1285                    }
1286                }
1287                return;
1288            }
1289            "embedding_dimensions" => {
1290                let expr = format!("(len({result_var}[0]) if {result_var} else 0)");
1291                match assertion.assertion_type.as_str() {
1292                    "equals" => {
1293                        if let Some(val) = &assertion.value {
1294                            let py_val = value_to_python_string(val);
1295                            let _ = writeln!(out, "    assert {expr} == {py_val}  # noqa: S101");
1296                        }
1297                    }
1298                    "greater_than" => {
1299                        if let Some(val) = &assertion.value {
1300                            let py_val = value_to_python_string(val);
1301                            let _ = writeln!(out, "    assert {expr} > {py_val}  # noqa: S101");
1302                        }
1303                    }
1304                    _ => {
1305                        let _ = writeln!(
1306                            out,
1307                            "    # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
1308                        );
1309                    }
1310                }
1311                return;
1312            }
1313            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
1314                let pred = match f.as_str() {
1315                    "embeddings_valid" => {
1316                        format!("all(bool(e) for e in {result_var})")
1317                    }
1318                    "embeddings_finite" => {
1319                        format!("all(v == v and abs(v) != float('inf') for e in {result_var} for v in e)")
1320                    }
1321                    "embeddings_non_zero" => {
1322                        format!("all(any(v != 0.0 for v in e) for e in {result_var})")
1323                    }
1324                    "embeddings_normalized" => {
1325                        format!("all(abs(sum(v * v for v in e) - 1.0) < 1e-3 for e in {result_var})")
1326                    }
1327                    _ => unreachable!(),
1328                };
1329                match assertion.assertion_type.as_str() {
1330                    "is_true" => {
1331                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1332                    }
1333                    "is_false" => {
1334                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1335                    }
1336                    _ => {
1337                        let _ = writeln!(
1338                            out,
1339                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1340                        );
1341                    }
1342                }
1343                return;
1344            }
1345            // ---- keywords / keywords_count ----
1346            // Python ExtractionResult does not expose extracted_keywords; skip.
1347            "keywords" | "keywords_count" => {
1348                let _ = writeln!(
1349                    out,
1350                    "    # skipped: field '{f}' not available on Python ExtractionResult"
1351                );
1352                return;
1353            }
1354            _ => {}
1355        }
1356    }
1357
1358    // Skip assertions on fields that don't exist on the result type.
1359    if let Some(f) = &assertion.field {
1360        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
1361            let _ = writeln!(out, "    # skipped: field '{f}' not available on result type");
1362            return;
1363        }
1364    }
1365
1366    let field_access = match &assertion.field {
1367        Some(f) if !f.is_empty() => field_resolver.accessor(f, "python", result_var),
1368        _ => result_var.to_string(),
1369    };
1370
1371    // Determine whether this field should be compared as an enum string.
1372    //
1373    // PyO3 integer-based enums (`#[pyclass(eq, eq_int)]`) are NOT iterable, so
1374    // `"value" in enum_field` raises TypeError.  Use `str(enum_field).lower()`
1375    // instead, which for a variant like `LinkType.Anchor` gives `"linktype.anchor"`,
1376    // making `"anchor" in str(LinkType.Anchor).lower()` evaluate to True.
1377    //
1378    // We apply this to fields explicitly listed in `fields_enum` (using both the
1379    // fixture field path and the resolved path) and to any field whose accessor
1380    // involves array-element indexing (`[0]`) which typically holds typed enums.
1381    let field_is_enum = assertion.field.as_deref().is_some_and(|f| {
1382        if fields_enum.contains(f) {
1383            return true;
1384        }
1385        let resolved = field_resolver.resolve(f);
1386        if fields_enum.contains(resolved) {
1387            return true;
1388        }
1389        // Also treat fields accessed via array indexing as potentially enum-typed
1390        // (e.g., `result.links[0].link_type`, `result.assets[0].asset_category`).
1391        // This is safe because `str(string_value).lower()` is idempotent for
1392        // plain string fields, and all fixture `contains` values are lowercase.
1393        field_resolver.accessor(f, "python", result_var).contains("[0]")
1394    });
1395
1396    // Check whether the field path (or any prefix of it) is optional so we can
1397    // guard `in` / `not in` expressions against None.
1398    let field_is_optional = match &assertion.field {
1399        Some(f) if !f.is_empty() => {
1400            let resolved = field_resolver.resolve(f);
1401            field_resolver.is_optional(resolved)
1402        }
1403        _ => false,
1404    };
1405
1406    match assertion.assertion_type.as_str() {
1407        "error" | "not_error" => {
1408            // Handled at call site.
1409        }
1410        "equals" => {
1411            if let Some(val) = &assertion.value {
1412                let expected = value_to_python_string(val);
1413                // Use `is` for boolean/None comparisons (ruff E712).
1414                let op = if val.is_boolean() || val.is_null() { "is" } else { "==" };
1415                // For string equality, strip trailing whitespace to handle trailing newlines
1416                // from the converter.
1417                if val.is_string() {
1418                    let _ = writeln!(out, "    assert {field_access}.strip() {op} {expected}  # noqa: S101");
1419                } else {
1420                    let _ = writeln!(out, "    assert {field_access} {op} {expected}  # noqa: S101");
1421                }
1422            }
1423        }
1424        "contains" => {
1425            if let Some(val) = &assertion.value {
1426                let expected = value_to_python_string(val);
1427                // For enum fields, convert to lowercase string for comparison.
1428                let cmp_expr = if field_is_enum && val.is_string() {
1429                    format!("str({field_access}).lower()")
1430                } else {
1431                    field_access.clone()
1432                };
1433                if field_is_optional {
1434                    let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1435                    let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1436                } else {
1437                    let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1438                }
1439            }
1440        }
1441        "contains_all" => {
1442            if let Some(values) = &assertion.values {
1443                for val in values {
1444                    let expected = value_to_python_string(val);
1445                    // For enum fields, convert to lowercase string for comparison.
1446                    let cmp_expr = if field_is_enum && val.is_string() {
1447                        format!("str({field_access}).lower()")
1448                    } else {
1449                        field_access.clone()
1450                    };
1451                    if field_is_optional {
1452                        let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1453                        let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1454                    } else {
1455                        let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1456                    }
1457                }
1458            }
1459        }
1460        "not_contains" => {
1461            if let Some(val) = &assertion.value {
1462                let expected = value_to_python_string(val);
1463                // For enum fields, convert to lowercase string for comparison.
1464                let cmp_expr = if field_is_enum && val.is_string() {
1465                    format!("str({field_access}).lower()")
1466                } else {
1467                    field_access.clone()
1468                };
1469                if field_is_optional {
1470                    let _ = writeln!(
1471                        out,
1472                        "    assert {field_access} is None or {expected} not in {cmp_expr}  # noqa: S101"
1473                    );
1474                } else {
1475                    let _ = writeln!(out, "    assert {expected} not in {cmp_expr}  # noqa: S101");
1476                }
1477            }
1478        }
1479        "not_empty" => {
1480            let _ = writeln!(out, "    assert {field_access}  # noqa: S101");
1481        }
1482        "is_empty" => {
1483            let _ = writeln!(out, "    assert not {field_access}  # noqa: S101");
1484        }
1485        "contains_any" => {
1486            if let Some(values) = &assertion.values {
1487                let items: Vec<String> = values.iter().map(value_to_python_string).collect();
1488                let list_str = items.join(", ");
1489                // For enum fields, convert to lowercase string for comparison.
1490                let cmp_expr = if field_is_enum {
1491                    format!("str({field_access}).lower()")
1492                } else {
1493                    field_access.clone()
1494                };
1495                if field_is_optional {
1496                    let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1497                    let _ = writeln!(
1498                        out,
1499                        "    assert any(v in {cmp_expr} for v in [{list_str}])  # noqa: S101"
1500                    );
1501                } else {
1502                    let _ = writeln!(
1503                        out,
1504                        "    assert any(v in {cmp_expr} for v in [{list_str}])  # noqa: S101"
1505                    );
1506                }
1507            }
1508        }
1509        "greater_than" => {
1510            if let Some(val) = &assertion.value {
1511                let expected = value_to_python_string(val);
1512                let _ = writeln!(out, "    assert {field_access} > {expected}  # noqa: S101");
1513            }
1514        }
1515        "less_than" => {
1516            if let Some(val) = &assertion.value {
1517                let expected = value_to_python_string(val);
1518                let _ = writeln!(out, "    assert {field_access} < {expected}  # noqa: S101");
1519            }
1520        }
1521        "greater_than_or_equal" | "min" => {
1522            if let Some(val) = &assertion.value {
1523                let expected = value_to_python_string(val);
1524                let _ = writeln!(out, "    assert {field_access} >= {expected}  # noqa: S101");
1525            }
1526        }
1527        "less_than_or_equal" | "max" => {
1528            if let Some(val) = &assertion.value {
1529                let expected = value_to_python_string(val);
1530                let _ = writeln!(out, "    assert {field_access} <= {expected}  # noqa: S101");
1531            }
1532        }
1533        "starts_with" => {
1534            if let Some(val) = &assertion.value {
1535                let expected = value_to_python_string(val);
1536                let _ = writeln!(out, "    assert {field_access}.startswith({expected})  # noqa: S101");
1537            }
1538        }
1539        "ends_with" => {
1540            if let Some(val) = &assertion.value {
1541                let expected = value_to_python_string(val);
1542                let _ = writeln!(out, "    assert {field_access}.endswith({expected})  # noqa: S101");
1543            }
1544        }
1545        "min_length" => {
1546            if let Some(val) = &assertion.value {
1547                if let Some(n) = val.as_u64() {
1548                    let _ = writeln!(out, "    assert len({field_access}) >= {n}  # noqa: S101");
1549                }
1550            }
1551        }
1552        "max_length" => {
1553            if let Some(val) = &assertion.value {
1554                if let Some(n) = val.as_u64() {
1555                    let _ = writeln!(out, "    assert len({field_access}) <= {n}  # noqa: S101");
1556                }
1557            }
1558        }
1559        "count_min" => {
1560            if let Some(val) = &assertion.value {
1561                if let Some(n) = val.as_u64() {
1562                    let _ = writeln!(out, "    assert len({field_access}) >= {n}  # noqa: S101");
1563                }
1564            }
1565        }
1566        "count_equals" => {
1567            if let Some(val) = &assertion.value {
1568                if let Some(n) = val.as_u64() {
1569                    let _ = writeln!(out, "    assert len({field_access}) == {n}  # noqa: S101");
1570                }
1571            }
1572        }
1573        "is_true" => {
1574            let _ = writeln!(out, "    assert {field_access} is True  # noqa: S101");
1575        }
1576        "is_false" => {
1577            let _ = writeln!(out, "    assert not {field_access}  # noqa: S101");
1578        }
1579        "method_result" => {
1580            if let Some(method_name) = &assertion.method {
1581                let call_expr = build_python_method_call(result_var, method_name, assertion.args.as_ref());
1582                let check = assertion.check.as_deref().unwrap_or("is_true");
1583                match check {
1584                    "equals" => {
1585                        if let Some(val) = &assertion.value {
1586                            if val.is_boolean() {
1587                                if val.as_bool() == Some(true) {
1588                                    let _ = writeln!(out, "    assert {call_expr} is True  # noqa: S101");
1589                                } else {
1590                                    let _ = writeln!(out, "    assert {call_expr} is False  # noqa: S101");
1591                                }
1592                            } else {
1593                                let expected = value_to_python_string(val);
1594                                let _ = writeln!(out, "    assert {call_expr} == {expected}  # noqa: S101");
1595                            }
1596                        }
1597                    }
1598                    "is_true" => {
1599                        let _ = writeln!(out, "    assert {call_expr}  # noqa: S101");
1600                    }
1601                    "is_false" => {
1602                        let _ = writeln!(out, "    assert not {call_expr}  # noqa: S101");
1603                    }
1604                    "greater_than_or_equal" => {
1605                        if let Some(val) = &assertion.value {
1606                            let n = val.as_u64().unwrap_or(0);
1607                            let _ = writeln!(out, "    assert {call_expr} >= {n}  # noqa: S101");
1608                        }
1609                    }
1610                    "count_min" => {
1611                        if let Some(val) = &assertion.value {
1612                            let n = val.as_u64().unwrap_or(0);
1613                            let _ = writeln!(out, "    assert len({call_expr}) >= {n}  # noqa: S101");
1614                        }
1615                    }
1616                    "contains" => {
1617                        if let Some(val) = &assertion.value {
1618                            let expected = value_to_python_string(val);
1619                            let _ = writeln!(out, "    assert {expected} in {call_expr}  # noqa: S101");
1620                        }
1621                    }
1622                    "is_error" => {
1623                        let _ = writeln!(out, "    with pytest.raises(Exception):  # noqa: B017");
1624                        let _ = writeln!(out, "        {call_expr}");
1625                    }
1626                    other_check => {
1627                        panic!("unsupported method_result check type: {other_check}");
1628                    }
1629                }
1630            } else {
1631                panic!("method_result assertion missing 'method' field");
1632            }
1633        }
1634        "matches_regex" => {
1635            if let Some(val) = &assertion.value {
1636                let expected = value_to_python_string(val);
1637                let _ = writeln!(out, "    import re  # noqa: PLC0415");
1638                let _ = writeln!(
1639                    out,
1640                    "    assert re.search({expected}, {field_access}) is not None  # noqa: S101"
1641                );
1642            }
1643        }
1644        other => {
1645            panic!("unsupported assertion type: {other}");
1646        }
1647    }
1648}
1649
1650/// Build a Python call expression for a method_result assertion on a tree-sitter Tree.
1651/// Maps method names to the appropriate Python function calls.
1652fn build_python_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
1653    match method_name {
1654        "root_child_count" => format!("{result_var}.root_node().child_count()"),
1655        "root_node_type" => format!("{result_var}.root_node().kind()"),
1656        "named_children_count" => format!("{result_var}.root_node().named_child_count()"),
1657        "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
1658        "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
1659        "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
1660        "contains_node_type" => {
1661            let node_type = args
1662                .and_then(|a| a.get("node_type"))
1663                .and_then(|v| v.as_str())
1664                .unwrap_or("");
1665            format!("tree_contains_node_type({result_var}, \"{node_type}\")")
1666        }
1667        "find_nodes_by_type" => {
1668            let node_type = args
1669                .and_then(|a| a.get("node_type"))
1670                .and_then(|v| v.as_str())
1671                .unwrap_or("");
1672            format!("find_nodes_by_type({result_var}, \"{node_type}\")")
1673        }
1674        "run_query" => {
1675            let query_source = args
1676                .and_then(|a| a.get("query_source"))
1677                .and_then(|v| v.as_str())
1678                .unwrap_or("");
1679            let language = args
1680                .and_then(|a| a.get("language"))
1681                .and_then(|v| v.as_str())
1682                .unwrap_or("");
1683            format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
1684        }
1685        _ => {
1686            if let Some(args_val) = args {
1687                let arg_str = args_val
1688                    .as_object()
1689                    .map(|obj| {
1690                        obj.iter()
1691                            .map(|(k, v)| format!("{}={}", k, value_to_python_string(v)))
1692                            .collect::<Vec<_>>()
1693                            .join(", ")
1694                    })
1695                    .unwrap_or_default();
1696                format!("{result_var}.{method_name}({arg_str})")
1697            } else {
1698                format!("{result_var}.{method_name}()")
1699            }
1700        }
1701    }
1702}
1703
1704/// Returns the Python import name for a method_result method that uses a
1705/// module-level helper function (not a method on the result object).
1706fn python_method_helper_import(method_name: &str) -> Option<String> {
1707    match method_name {
1708        "has_error_nodes" => Some("tree_has_error_nodes".to_string()),
1709        "error_count" | "tree_error_count" => Some("tree_error_count".to_string()),
1710        "tree_to_sexp" => Some("tree_to_sexp".to_string()),
1711        "contains_node_type" => Some("tree_contains_node_type".to_string()),
1712        "find_nodes_by_type" => Some("find_nodes_by_type".to_string()),
1713        "run_query" => Some("run_query".to_string()),
1714        // Methods accessed via result_var (e.g. tree.root_node().child_count()) don't need imports.
1715        _ => None,
1716    }
1717}
1718
1719fn value_to_python_string(value: &serde_json::Value) -> String {
1720    match value {
1721        serde_json::Value::String(s) => python_string_literal(s),
1722        serde_json::Value::Bool(true) => "True".to_string(),
1723        serde_json::Value::Bool(false) => "False".to_string(),
1724        serde_json::Value::Number(n) => n.to_string(),
1725        serde_json::Value::Null => "None".to_string(),
1726        other => python_string_literal(&other.to_string()),
1727    }
1728}
1729
1730/// Produce a quoted Python string literal, choosing single or double quotes
1731/// to avoid unnecessary escaping (ruff Q003).
1732fn python_string_literal(s: &str) -> String {
1733    if s.contains('"') && !s.contains('\'') {
1734        // Use single quotes to avoid escaping double quotes.
1735        let escaped = s
1736            .replace('\\', "\\\\")
1737            .replace('\'', "\\'")
1738            .replace('\n', "\\n")
1739            .replace('\r', "\\r")
1740            .replace('\t', "\\t");
1741        format!("'{escaped}'")
1742    } else {
1743        format!("\"{}\"", escape_python(s))
1744    }
1745}
1746
1747/// Emit a Python visitor method for a callback action.
1748fn emit_python_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
1749    let params = match method_name {
1750        "visit_link" => "self, ctx, href, text, title",
1751        "visit_image" => "self, ctx, src, alt, title",
1752        "visit_heading" => "self, ctx, level, text, id",
1753        "visit_code_block" => "self, ctx, lang, code",
1754        "visit_code_inline"
1755        | "visit_strong"
1756        | "visit_emphasis"
1757        | "visit_strikethrough"
1758        | "visit_underline"
1759        | "visit_subscript"
1760        | "visit_superscript"
1761        | "visit_mark"
1762        | "visit_button"
1763        | "visit_summary"
1764        | "visit_figcaption"
1765        | "visit_definition_term"
1766        | "visit_definition_description" => "self, ctx, text",
1767        "visit_text" => "self, ctx, text",
1768        "visit_list_item" => "self, ctx, ordered, marker, text",
1769        "visit_blockquote" => "self, ctx, content, depth",
1770        "visit_table_row" => "self, ctx, cells, is_header",
1771        "visit_custom_element" => "self, ctx, tag_name, html",
1772        "visit_form" => "self, ctx, action_url, method",
1773        "visit_input" => "self, ctx, input_type, name, value",
1774        "visit_audio" | "visit_video" | "visit_iframe" => "self, ctx, src",
1775        "visit_details" => "self, ctx, is_open",
1776        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
1777            "self, ctx, output, *args"
1778        }
1779        "visit_list_start" => "self, ctx, ordered, *args",
1780        "visit_list_end" => "self, ctx, ordered, output, *args",
1781        _ => "self, ctx, *args",
1782    };
1783
1784    let _ = writeln!(
1785        out,
1786        "        def {method_name}({params}):  # noqa: A002, ANN001, ANN202, ARG002"
1787    );
1788    match action {
1789        CallbackAction::Skip => {
1790            let _ = writeln!(out, "            return \"skip\"");
1791        }
1792        CallbackAction::Continue => {
1793            let _ = writeln!(out, "            return \"continue\"");
1794        }
1795        CallbackAction::PreserveHtml => {
1796            let _ = writeln!(out, "            return \"preserve_html\"");
1797        }
1798        CallbackAction::Custom { output } => {
1799            let escaped = escape_python(output);
1800            let _ = writeln!(out, "            return {{\"custom\": \"{escaped}\"}}");
1801        }
1802        CallbackAction::CustomTemplate { template } => {
1803            // Use single-quoted f-string so that double quotes inside the template
1804            // (e.g. `QUOTE: "{text}"`) are not misinterpreted as string delimiters.
1805            // Escape newlines/tabs/backslashes/single quotes so the template stays
1806            // on a single line in the generated source.
1807            let escaped_template = template
1808                .replace('\\', "\\\\")
1809                .replace('\'', "\\'")
1810                .replace('\n', "\\n")
1811                .replace('\r', "\\r")
1812                .replace('\t', "\\t");
1813            let _ = writeln!(out, "            return {{\"custom\": f'{escaped_template}'}}");
1814        }
1815    }
1816}