Skip to main content

alef_e2e/codegen/
python.rs

1//! Python e2e test code generator.
2//!
3//! Generates `e2e/python/conftest.py` and `tests/test_{category}.py` files from
4//! JSON fixtures, driven entirely by `E2eConfig` and `CallConfig`.
5
6use super::client;
7use crate::codegen::resolve_field;
8use crate::config::E2eConfig;
9use crate::escape::{escape_python, sanitize_filename, sanitize_ident};
10use crate::field_access::FieldResolver;
11use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, ValidationErrorExpectation};
12use alef_core::backend::GeneratedFile;
13use alef_core::config::AlefConfig;
14use alef_core::hash::{self, CommentStyle};
15use anyhow::Result;
16use heck::{ToShoutySnakeCase, ToSnakeCase};
17use std::collections::HashMap;
18use std::fmt::Write as FmtWrite;
19use std::path::PathBuf;
20
21/// Python e2e test code generator.
22pub struct PythonE2eCodegen;
23
24impl super::E2eCodegen for PythonE2eCodegen {
25    fn generate(
26        &self,
27        groups: &[FixtureGroup],
28        e2e_config: &E2eConfig,
29        _alef_config: &AlefConfig,
30    ) -> Result<Vec<GeneratedFile>> {
31        let mut files = Vec::new();
32        let output_base = PathBuf::from(e2e_config.effective_output()).join("python");
33
34        // conftest.py
35        files.push(GeneratedFile {
36            path: output_base.join("conftest.py"),
37            content: render_conftest(e2e_config, groups),
38            generated_header: true,
39        });
40
41        // Root __init__.py (prevents ruff INP001).
42        files.push(GeneratedFile {
43            path: output_base.join("__init__.py"),
44            content: "\n".to_string(),
45            generated_header: false,
46        });
47
48        // tests/__init__.py
49        files.push(GeneratedFile {
50            path: output_base.join("tests").join("__init__.py"),
51            content: "\n".to_string(),
52            generated_header: false,
53        });
54
55        // pyproject.toml for standalone uv resolution
56        let python_pkg = e2e_config.resolve_package("python");
57        let pkg_name = python_pkg
58            .as_ref()
59            .and_then(|p| p.name.as_deref())
60            .unwrap_or("kreuzcrawl");
61        let pkg_path = python_pkg
62            .as_ref()
63            .and_then(|p| p.path.as_deref())
64            .unwrap_or("../../packages/python");
65        let pkg_version = python_pkg
66            .as_ref()
67            .and_then(|p| p.version.as_deref())
68            .unwrap_or("0.1.0");
69        files.push(GeneratedFile {
70            path: output_base.join("pyproject.toml"),
71            content: render_pyproject(pkg_name, pkg_path, pkg_version, e2e_config.dep_mode),
72            generated_header: true,
73        });
74
75        // Per-category test files.
76        for group in groups {
77            let fixtures: Vec<&Fixture> = group.fixtures.iter().collect();
78
79            if fixtures.is_empty() {
80                continue;
81            }
82
83            // Skip emitting the file entirely when every fixture is skipped for
84            // python — there's nothing to run, and emitting imports of
85            // not-bound APIs causes module-level ImportError that masks the
86            // skip marker.
87            if fixtures.iter().all(|f| is_skipped(f, "python")) {
88                continue;
89            }
90
91            let filename = format!("test_{}.py", sanitize_filename(&group.category));
92            let content = render_test_file(&group.category, &fixtures, e2e_config);
93
94            files.push(GeneratedFile {
95                path: output_base.join("tests").join(filename),
96                content,
97                generated_header: true,
98            });
99        }
100
101        Ok(files)
102    }
103
104    fn language_name(&self) -> &'static str {
105        "python"
106    }
107}
108
109// ---------------------------------------------------------------------------
110// pyproject.toml
111// ---------------------------------------------------------------------------
112
113fn render_pyproject(
114    pkg_name: &str,
115    pkg_path: &str,
116    pkg_version: &str,
117    dep_mode: crate::config::DependencyMode,
118) -> String {
119    // Generate in pyproject-fmt canonical form so the pre-commit hook is a no-op.
120    // pyproject-fmt sorts deps alphabetically, uses spaces inside brackets, dotted
121    // tool keys, and injects Python classifiers.
122    let (deps_line, uv_sources_block) = match dep_mode {
123        crate::config::DependencyMode::Registry => (
124            format!(
125                "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}{pkg_version}\" ]"
126            ),
127            String::new(),
128        ),
129        crate::config::DependencyMode::Local => (
130            format!(
131                "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}\" ]"
132            ),
133            format!(
134                "\n[tool.uv]\nsources.{pkg_name} = {{ path = \"{pkg_path}\" }}\n",
135                pkg_path = pkg_path
136            ),
137        ),
138    };
139
140    format!(
141        r#"[build-system]
142build-backend = "setuptools.build_meta"
143requires = [ "setuptools>=68", "wheel" ]
144
145[project]
146name = "{pkg_name}-e2e-tests"
147version = "0.0.0"
148description = "End-to-end tests"
149requires-python = ">=3.10"
150classifiers = [
151  "Programming Language :: Python :: 3 :: Only",
152  "Programming Language :: Python :: 3.10",
153  "Programming Language :: Python :: 3.11",
154  "Programming Language :: Python :: 3.12",
155  "Programming Language :: Python :: 3.13",
156  "Programming Language :: Python :: 3.14",
157]
158{deps_line}
159
160[tool.setuptools]
161packages = [  ]
162{uv_sources_block}
163[tool.ruff]
164lint.ignore = [ "PLR2004" ]
165lint.per-file-ignores."tests/**" = [ "B017", "PT011", "S101", "S108" ]
166
167[tool.pytest]
168ini_options.asyncio_mode = "auto"
169ini_options.testpaths = [ "tests" ]
170ini_options.python_files = "test_*.py"
171ini_options.python_functions = "test_*"
172ini_options.addopts = "-v --strict-markers --tb=short"
173ini_options.timeout = 300
174"#
175    )
176}
177
178// ---------------------------------------------------------------------------
179// Config resolution helpers
180// ---------------------------------------------------------------------------
181
182fn resolve_function_name(e2e_config: &E2eConfig) -> String {
183    resolve_function_name_for_call(&e2e_config.call)
184}
185
186fn resolve_function_name_for_call(call_config: &crate::config::CallConfig) -> String {
187    call_config
188        .overrides
189        .get("python")
190        .and_then(|o| o.function.clone())
191        .unwrap_or_else(|| call_config.function.clone())
192}
193
194fn resolve_module(e2e_config: &E2eConfig) -> String {
195    e2e_config
196        .call
197        .overrides
198        .get("python")
199        .and_then(|o| o.module.clone())
200        .unwrap_or_else(|| e2e_config.call.module.replace('-', "_"))
201}
202
203fn resolve_options_type(e2e_config: &E2eConfig) -> Option<String> {
204    e2e_config
205        .call
206        .overrides
207        .get("python")
208        .and_then(|o| o.options_type.clone())
209}
210
211/// Resolve how json_object args are passed: "kwargs" (default), "dict", or "json".
212fn resolve_options_via(e2e_config: &E2eConfig) -> &str {
213    e2e_config
214        .call
215        .overrides
216        .get("python")
217        .and_then(|o| o.options_via.as_deref())
218        .unwrap_or("kwargs")
219}
220
221/// Resolve enum field mappings from the Python override config.
222fn resolve_enum_fields(e2e_config: &E2eConfig) -> &HashMap<String, String> {
223    static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
224    e2e_config
225        .call
226        .overrides
227        .get("python")
228        .map(|o| &o.enum_fields)
229        .unwrap_or(&EMPTY)
230}
231
232/// Resolve handle nested type mappings from the Python override config.
233/// Maps config field names to their Python constructor type names.
234fn resolve_handle_nested_types(e2e_config: &E2eConfig) -> &HashMap<String, String> {
235    static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
236    e2e_config
237        .call
238        .overrides
239        .get("python")
240        .map(|o| &o.handle_nested_types)
241        .unwrap_or(&EMPTY)
242}
243
244/// Resolve handle dict type set from the Python override config.
245/// Fields in this set use `TypeName({...})` instead of `TypeName(key=val, ...)`.
246fn resolve_handle_dict_types(e2e_config: &E2eConfig) -> &std::collections::HashSet<String> {
247    static EMPTY: std::sync::LazyLock<std::collections::HashSet<String>> =
248        std::sync::LazyLock::new(std::collections::HashSet::new);
249    e2e_config
250        .call
251        .overrides
252        .get("python")
253        .map(|o| &o.handle_dict_types)
254        .unwrap_or(&EMPTY)
255}
256
257fn is_skipped(fixture: &Fixture, language: &str) -> bool {
258    fixture.skip.as_ref().is_some_and(|s| s.should_skip(language))
259}
260
261// ---------------------------------------------------------------------------
262// Rendering
263// ---------------------------------------------------------------------------
264
265fn render_conftest(e2e_config: &E2eConfig, groups: &[FixtureGroup]) -> String {
266    let module = resolve_module(e2e_config);
267    let has_http_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| f.is_http_test());
268
269    // Detect whether any fixture uses file_path or bytes args — if so we need to
270    // chdir to the test_documents directory so relative paths resolve correctly.
271    let has_file_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| {
272        let cc = e2e_config.resolve_call(f.call.as_deref());
273        cc.args
274            .iter()
275            .any(|a| a.arg_type == "file_path" || a.arg_type == "bytes")
276    });
277
278    let header = hash::header(CommentStyle::Hash);
279    if has_http_fixtures {
280        format!(
281            r#"{header}"""Pytest configuration for e2e tests."""
282from __future__ import annotations
283
284import os
285import subprocess
286import threading
287from pathlib import Path
288from typing import Generator
289
290import pytest
291
292# Ensure the package is importable.
293# The {module} package is expected to be installed in the current environment.
294
295_HERE = Path(__file__).parent
296_E2E_DIR = _HERE.parent
297_MOCK_SERVER_BIN = _E2E_DIR / "rust" / "target" / "release" / "mock-server"
298_FIXTURES_DIR = _E2E_DIR.parent / "fixtures"
299
300
301@pytest.fixture(scope="session", autouse=True)
302def mock_server() -> Generator[str, None, None]:
303    """Spawn the mock HTTP server binary and set MOCK_SERVER_URL."""
304    proc = subprocess.Popen(  # noqa: S603
305        [str(_MOCK_SERVER_BIN), str(_FIXTURES_DIR)],
306        stdout=subprocess.PIPE,
307        stderr=None,
308        stdin=subprocess.PIPE,
309    )
310    url = ""
311    assert proc.stdout is not None
312    for raw_line in proc.stdout:
313        line = raw_line.decode().strip()
314        if line.startswith("MOCK_SERVER_URL="):
315            url = line.split("=", 1)[1]
316            break
317    os.environ["MOCK_SERVER_URL"] = url
318    # Drain stdout in background so the server never blocks.
319    threading.Thread(target=proc.stdout.read, daemon=True).start()
320    yield url
321    if proc.stdin:
322        proc.stdin.close()
323    proc.terminate()
324    proc.wait()
325
326
327def _make_request(method: str, path: str, **kwargs: object) -> object:
328    """Make an HTTP request to the mock server."""
329    import urllib.request  # noqa: PLC0415
330
331    base_url = os.environ.get("MOCK_SERVER_URL", "http://localhost:8080")
332    url = f"{{base_url}}{{path}}"
333    data = kwargs.pop("json", None)
334    if data is not None:
335        import json  # noqa: PLC0415
336
337        body = json.dumps(data).encode()
338        headers = dict(kwargs.pop("headers", {{}}))
339        headers.setdefault("Content-Type", "application/json")
340        req = urllib.request.Request(url, data=body, headers=headers, method=method.upper())
341    else:
342        headers = dict(kwargs.pop("headers", {{}}))
343        req = urllib.request.Request(url, headers=headers, method=method.upper())
344    try:
345        with urllib.request.urlopen(req) as resp:  # noqa: S310
346            return resp
347    except urllib.error.HTTPError as exc:
348        return exc
349
350
351@pytest.fixture(scope="session")
352def app(mock_server: str) -> object:  # noqa: ARG001
353    """Return a simple HTTP helper bound to the mock server URL."""
354
355    class _App:
356        def request(self, path: str, **kwargs: object) -> object:
357            method = str(kwargs.pop("method", "GET"))
358            return _make_request(method, path, **kwargs)
359
360    return _App()
361"#
362        )
363    } else if has_file_fixtures {
364        format!(
365            r#"{header}"""Pytest configuration for e2e tests."""
366import os
367from pathlib import Path
368
369# Ensure the package is importable.
370# The {module} package is expected to be installed in the current environment.
371
372# Change to the test_documents directory so that fixture file paths like
373# "pdf/fake_memo.pdf" resolve correctly when running pytest from e2e/python/.
374_TEST_DOCUMENTS = Path(__file__).parent.parent.parent / "test_documents"
375if _TEST_DOCUMENTS.is_dir():
376    os.chdir(_TEST_DOCUMENTS)
377
378# On macOS, Pdfium is a separate dylib not on the default library path in dev builds.
379# Search common locations (Cargo build output, staged target/release) and extend
380# DYLD_LIBRARY_PATH / LD_LIBRARY_PATH so the extension can load the library.
381_REPO_ROOT = Path(__file__).parent.parent.parent
382
383
384def _find_pdfium_dir() -> str | None:
385    """Find the directory containing libpdfium, searching Cargo build outputs."""
386    for _candidate in sorted(_REPO_ROOT.glob("target/*/release/build/*/out/libpdfium*")):
387        return str(_candidate.parent)
388    for _candidate in sorted(_REPO_ROOT.glob("target/release/build/*/out/libpdfium*")):
389        return str(_candidate.parent)
390    return None
391
392
393_pdfium_dir = _find_pdfium_dir()
394if _pdfium_dir is not None:
395    for _var in ("DYLD_LIBRARY_PATH", "LD_LIBRARY_PATH"):
396        _existing = os.environ.get(_var, "")
397        if _pdfium_dir not in _existing:
398            os.environ[_var] = f"{{_pdfium_dir}}:{{_existing}}" if _existing else _pdfium_dir
399"#
400        )
401    } else {
402        format!(
403            r#"{header}"""Pytest configuration for e2e tests."""
404# Ensure the package is importable.
405# The {module} package is expected to be installed in the current environment.
406"#
407        )
408    }
409}
410
411fn render_test_file(category: &str, fixtures: &[&Fixture], e2e_config: &E2eConfig) -> String {
412    let mut out = String::new();
413    out.push_str(&hash::header(CommentStyle::Hash));
414    let _ = writeln!(out, "\"\"\"E2e tests for category: {category}.\"\"\"");
415
416    let module = resolve_module(e2e_config);
417    let function_name = resolve_function_name(e2e_config);
418    let options_type = resolve_options_type(e2e_config);
419    let options_via = resolve_options_via(e2e_config);
420    let enum_fields = resolve_enum_fields(e2e_config);
421    let handle_nested_types = resolve_handle_nested_types(e2e_config);
422    let handle_dict_types = resolve_handle_dict_types(e2e_config);
423    let field_resolver = FieldResolver::new(
424        &e2e_config.fields,
425        &e2e_config.fields_optional,
426        &e2e_config.result_fields,
427        &e2e_config.fields_array,
428    );
429
430    let has_error_test = fixtures
431        .iter()
432        .any(|f| f.assertions.iter().any(|a| a.assertion_type == "error"));
433    let has_skipped = fixtures.iter().any(|f| is_skipped(f, "python"));
434    let has_http_tests = fixtures.iter().any(|f| f.is_http_test());
435
436    // Check if any fixture in this file uses an async call.
437    let is_async = fixtures.iter().any(|f| {
438        let cc = e2e_config.resolve_call(f.call.as_deref());
439        cc.r#async
440    }) || e2e_config.call.r#async;
441    let needs_pytest = has_error_test || has_skipped || is_async;
442
443    // "json" mode needs `import json`.
444    let needs_json_import = options_via == "json"
445        && fixtures.iter().any(|f| {
446            e2e_config
447                .call
448                .args
449                .iter()
450                .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
451        });
452
453    // mock_url args need `import os`.
454    let needs_os_import = e2e_config.call.args.iter().any(|arg| arg.arg_type == "mock_url");
455
456    // bytes args need `from pathlib import Path` when any fixture value is a file path.
457    // bytes args need `import base64` when any fixture value is a base64 blob.
458    let needs_path_import = fixtures.iter().any(|f| {
459        let cc = e2e_config.resolve_call(f.call.as_deref());
460        cc.args.iter().any(|arg| {
461            if arg.arg_type != "bytes" {
462                return false;
463            }
464            let val = resolve_field(&f.input, &arg.field);
465            val.as_str()
466                .is_some_and(|s| matches!(classify_bytes_value(s), BytesKind::FilePath))
467        })
468    });
469    let needs_base64_import = fixtures.iter().any(|f| {
470        let cc = e2e_config.resolve_call(f.call.as_deref());
471        cc.args.iter().any(|arg| {
472            if arg.arg_type != "bytes" {
473                return false;
474            }
475            let val = resolve_field(&f.input, &arg.field);
476            val.as_str()
477                .is_some_and(|s| matches!(classify_bytes_value(s), BytesKind::Base64))
478        })
479    });
480
481    // HTTP tests handle `import re` inline (per-test), so no top-level re import is needed.
482    let needs_re_import = false;
483    let _ = has_http_tests; // used indirectly via inline imports in render_http_test_function
484
485    // Only import options_type when using "kwargs" mode.
486    let needs_options_type = options_via == "kwargs"
487        && options_type.is_some()
488        && fixtures.iter().any(|f| {
489            e2e_config
490                .call
491                .args
492                .iter()
493                .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
494        });
495
496    // Collect enum types actually used across all fixtures in this file.
497    let mut used_enum_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
498    if needs_options_type && !enum_fields.is_empty() {
499        for fixture in fixtures.iter() {
500            for arg in &e2e_config.call.args {
501                if arg.arg_type == "json_object" {
502                    let value = resolve_field(&fixture.input, &arg.field);
503                    if let Some(obj) = value.as_object() {
504                        for key in obj.keys() {
505                            if let Some(enum_type) = enum_fields.get(key) {
506                                used_enum_types.insert(enum_type.clone());
507                            }
508                        }
509                    }
510                }
511            }
512        }
513    }
514
515    // Collect imports sorted per isort/ruff I001: stdlib group, then
516    // third-party group, separated by a blank line. Within each group
517    // `import X` lines come before `from X import Y` lines, both sorted.
518    let mut stdlib_imports: Vec<String> = Vec::new();
519    let mut thirdparty_bare: Vec<String> = Vec::new();
520    let mut thirdparty_from: Vec<String> = Vec::new();
521
522    if needs_base64_import {
523        stdlib_imports.push("import base64".to_string());
524    }
525
526    if needs_json_import {
527        stdlib_imports.push("import json".to_string());
528    }
529
530    if needs_os_import {
531        stdlib_imports.push("import os".to_string());
532    }
533
534    if needs_path_import {
535        stdlib_imports.push("from pathlib import Path".to_string());
536    }
537
538    if needs_re_import {
539        stdlib_imports.push("import re".to_string());
540    }
541
542    if needs_pytest {
543        // F401 (unused-import) suppression: pytest is needed at module level for
544        // its fixture decorators and `pytest.mark.*` annotations, but ruff cannot
545        // statically tell whether a generated test file references those — so we
546        // hint to ruff that the import is intentional.
547        thirdparty_bare.push("import pytest  # noqa: F401".to_string());
548    }
549
550    // For non-HTTP fixtures, build the normal function imports.
551    // Only count fixtures that are not skipped and have assertions (need to call the function).
552    let has_non_http_fixtures = fixtures
553        .iter()
554        .any(|f| !f.is_http_test() && !is_skipped(f, "python") && !f.assertions.is_empty());
555    if has_non_http_fixtures {
556        // Collect handle constructor function names that need to be imported.
557        let handle_constructors: Vec<String> = e2e_config
558            .call
559            .args
560            .iter()
561            .filter(|arg| arg.arg_type == "handle")
562            .map(|arg| format!("create_{}", arg.name.to_snake_case()))
563            .collect();
564
565        // Collect all unique function names actually used across all fixtures in this file.
566        // Do not seed with the default function_name — only include it when at least one
567        // fixture resolves to it, to avoid unused-import (F401) warnings from ruff.
568        let mut import_names: Vec<String> = Vec::new();
569        for fixture in fixtures.iter() {
570            let cc = e2e_config.resolve_call(fixture.call.as_deref());
571            let fn_name = resolve_function_name_for_call(cc);
572            if !import_names.contains(&fn_name) {
573                import_names.push(fn_name);
574            }
575        }
576        // Safety net: should not occur since the group is non-empty, but ensures
577        // import_names is never empty if all fixtures use the default call.
578        if import_names.is_empty() {
579            import_names.push(function_name.clone());
580        }
581        for ctor in &handle_constructors {
582            if !import_names.contains(ctor) {
583                import_names.push(ctor.clone());
584            }
585        }
586
587        // If any handle arg has config, import the config class (CrawlConfig or options_type).
588        let needs_config_import = e2e_config.call.args.iter().any(|arg| {
589            arg.arg_type == "handle"
590                && fixtures.iter().any(|f| {
591                    let val = resolve_field(&f.input, &arg.field);
592                    !val.is_null() && val.as_object().is_some_and(|o| !o.is_empty())
593                })
594        });
595        if needs_config_import {
596            let config_class = options_type.as_deref().unwrap_or("CrawlConfig");
597            if !import_names.contains(&config_class.to_string()) {
598                import_names.push(config_class.to_string());
599            }
600        }
601
602        // Import any nested handle config types actually used in this file.
603        if !handle_nested_types.is_empty() {
604            let mut used_nested_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
605            for fixture in fixtures.iter() {
606                for arg in &e2e_config.call.args {
607                    if arg.arg_type == "handle" {
608                        let config_value = resolve_field(&fixture.input, &arg.field);
609                        if let Some(obj) = config_value.as_object() {
610                            for key in obj.keys() {
611                                if let Some(type_name) = handle_nested_types.get(key) {
612                                    if obj[key].is_object() {
613                                        used_nested_types.insert(type_name.clone());
614                                    }
615                                }
616                            }
617                        }
618                    }
619                }
620            }
621            for type_name in used_nested_types {
622                if !import_names.contains(&type_name) {
623                    import_names.push(type_name);
624                }
625            }
626        }
627
628        // Collect method_result helper function imports.
629        for fixture in fixtures.iter() {
630            for assertion in &fixture.assertions {
631                if assertion.assertion_type == "method_result" {
632                    if let Some(method_name) = &assertion.method {
633                        let import = python_method_helper_import(method_name);
634                        if let Some(name) = import {
635                            if !import_names.contains(&name) {
636                                import_names.push(name);
637                            }
638                        }
639                    }
640                }
641            }
642        }
643
644        if let (true, Some(opts_type)) = (needs_options_type, &options_type) {
645            import_names.push(opts_type.clone());
646            thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
647            // Import enum types from enum_module (if specified) or main module.
648            if !used_enum_types.is_empty() {
649                let enum_mod = e2e_config
650                    .call
651                    .overrides
652                    .get("python")
653                    .and_then(|o| o.enum_module.as_deref())
654                    .unwrap_or(&module);
655                let enum_names: Vec<&String> = used_enum_types.iter().collect();
656                thirdparty_from.push(format!(
657                    "from {enum_mod} import {}",
658                    enum_names.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")
659                ));
660            }
661        } else {
662            thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
663        }
664    }
665
666    stdlib_imports.sort();
667    thirdparty_bare.sort();
668    thirdparty_from.sort();
669
670    // Emit sorted import groups with blank lines between groups per PEP 8.
671    if !stdlib_imports.is_empty() {
672        for imp in &stdlib_imports {
673            let _ = writeln!(out, "{imp}");
674        }
675        let _ = writeln!(out);
676    }
677    // Third-party: bare imports then from-imports, no blank line between them.
678    for imp in &thirdparty_bare {
679        let _ = writeln!(out, "{imp}");
680    }
681    for imp in &thirdparty_from {
682        let _ = writeln!(out, "{imp}");
683    }
684    // Two blank lines after imports (PEP 8 / ruff I001).
685    let _ = writeln!(out);
686    let _ = writeln!(out);
687
688    for fixture in fixtures {
689        if fixture.is_http_test() {
690            render_http_test_function(&mut out, fixture);
691        } else if !is_skipped(fixture, "python") && fixture.assertions.is_empty() {
692            // Non-HTTP fixture with no assertions: generate a skipped placeholder.
693            let fn_name = sanitize_ident(&fixture.id);
694            let description = &fixture.description;
695            let desc_with_period = if description.ends_with('.') {
696                description.to_string()
697            } else {
698                format!("{description}.")
699            };
700            let _ = writeln!(
701                out,
702                "@pytest.mark.skip(reason=\"no assertions configured for this fixture in python e2e\")"
703            );
704            let _ = writeln!(out, "def test_{fn_name}() -> None:");
705            let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
706        } else {
707            render_test_function(
708                &mut out,
709                fixture,
710                e2e_config,
711                options_type.as_deref(),
712                options_via,
713                enum_fields,
714                handle_nested_types,
715                handle_dict_types,
716                &field_resolver,
717            );
718        }
719        let _ = writeln!(out);
720    }
721
722    out
723}
724
725// ---------------------------------------------------------------------------
726// HTTP server test rendering — PythonTestClientRenderer
727// ---------------------------------------------------------------------------
728
729/// Pytest/urllib test renderer.
730///
731/// Python HTTP e2e tests use `urllib.request` directly against the mock server
732/// binary (not a `TestClient` over FFI). The trait primitives emit the urllib
733/// request-build + response-capture scaffolding that the existing monolithic
734/// renderer produced, so generated output is unchanged after the migration.
735struct PythonTestClientRenderer;
736
737impl client::TestClientRenderer for PythonTestClientRenderer {
738    fn language_name(&self) -> &'static str {
739        "python"
740    }
741
742    /// Emit `@pytest.mark.skip` (if skipped), function signature, and docstring.
743    ///
744    /// Skipped tests still get a stub body (`...`) so pytest can collect them.
745    fn render_test_open(&self, out: &mut String, fn_name: &str, description: &str, skip_reason: Option<&str>) {
746        let desc_with_period = if description.ends_with('.') {
747            description.to_string()
748        } else {
749            format!("{description}.")
750        };
751
752        if let Some(reason) = skip_reason {
753            let escaped = escape_python(reason);
754            let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
755        }
756        let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
757        let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
758        if skip_reason.is_some() {
759            let _ = writeln!(out, "    ...");
760        }
761    }
762
763    /// No-op: Python functions are not wrapped in a block, so no closing token
764    /// is needed. The blank line between tests is emitted by the call site
765    /// (`render_test_file`) after every fixture, which keeps the separator
766    /// consistent with non-HTTP fixtures.
767    fn render_test_close(&self, _out: &mut String) {}
768
769    /// Emit the urllib request scaffolding that drives the mock server.
770    ///
771    /// Emits:
772    /// - Inline imports for `os`, `urllib.request` (and optionally `json`).
773    /// - URL construction from the fixture path.
774    /// - Headers dict, optional JSON body, and `urllib.request.Request` build.
775    /// - A `_NoRedirect` opener + try/except that captures `status_code`,
776    ///   `resp_body`, and `resp_headers`.
777    fn render_call(&self, out: &mut String, ctx: &client::CallCtx<'_>) {
778        let _ = writeln!(out, "    import os  # noqa: PLC0415");
779        let _ = writeln!(out, "    import urllib.request  # noqa: PLC0415");
780        let _ = writeln!(out, "    base = os.environ.get(\"MOCK_SERVER_URL\", mock_server)");
781        let _ = writeln!(out, "    url = f\"{{base}}{}\"", ctx.path);
782
783        let method = ctx.method.to_uppercase();
784
785        // Build headers dict literal.
786        let mut header_entries: Vec<String> = ctx
787            .headers
788            .iter()
789            .map(|(k, v)| format!("        \"{}\": \"{}\",", escape_python(k), escape_python(v)))
790            .collect();
791        header_entries.sort(); // deterministic output
792        let headers_py = if header_entries.is_empty() {
793            "{}".to_string()
794        } else {
795            format!("{{\n{}\n    }}", header_entries.join("\n"))
796        };
797
798        if let Some(body) = ctx.body {
799            let py_body = json_to_python_literal(body);
800            let _ = writeln!(out, "    import json  # noqa: PLC0415");
801            let _ = writeln!(out, "    _headers = {headers_py}");
802            let _ = writeln!(out, "    _headers.setdefault(\"Content-Type\", \"application/json\")");
803            let _ = writeln!(out, "    _body = json.dumps({py_body}).encode()");
804            let _ = writeln!(
805                out,
806                "    _req = urllib.request.Request(url, data=_body, headers=_headers, method=\"{method}\")"
807            );
808        } else {
809            let _ = writeln!(out, "    _headers = {headers_py}");
810            let _ = writeln!(
811                out,
812                "    _req = urllib.request.Request(url, headers=_headers, method=\"{method}\")"
813            );
814        }
815
816        // Build a no-redirect opener and capture the response.
817        // Both `resp_body` and `resp_headers` are always bound so that
818        // `render_assert_*` primitives can reference them unconditionally.
819        let _ = writeln!(
820            out,
821            "    class _NoRedirect(urllib.request.HTTPRedirectHandler):  # noqa: N801"
822        );
823        let _ = writeln!(
824            out,
825            "        def redirect_request(self, *args, **kwargs): return None  # noqa: E704"
826        );
827        let _ = writeln!(out, "    _opener = urllib.request.build_opener(_NoRedirect())");
828        let _ = writeln!(out, "    try:");
829        let _ = writeln!(out, "        response = _opener.open(_req)  # noqa: S310");
830        let _ = writeln!(out, "        status_code = response.status");
831        let _ = writeln!(out, "        resp_body = response.read()  # noqa: F841");
832        let _ = writeln!(out, "        resp_headers = dict(response.headers)  # noqa: F841");
833        let _ = writeln!(out, "    except urllib.error.HTTPError as _exc:");
834        let _ = writeln!(out, "        status_code = _exc.code");
835        let _ = writeln!(out, "        resp_body = _exc.read()  # noqa: F841");
836        let _ = writeln!(out, "        resp_headers = dict(_exc.headers)  # noqa: F841");
837    }
838
839    fn render_assert_status(&self, out: &mut String, _response_var: &str, status: u16) {
840        let _ = writeln!(out, "    assert status_code == {status}  # noqa: S101");
841    }
842
843    /// Emit a single header assertion, handling special tokens `<<present>>`,
844    /// `<<absent>>`, and `<<uuid>>`.
845    fn render_assert_header(&self, out: &mut String, _response_var: &str, name: &str, expected: &str) {
846        let escaped_name = escape_python(&name.to_lowercase());
847        match expected {
848            "<<present>>" => {
849                let _ = writeln!(out, "    assert \"{escaped_name}\" in resp_headers  # noqa: S101");
850            }
851            "<<absent>>" => {
852                let _ = writeln!(
853                    out,
854                    "    assert resp_headers.get(\"{escaped_name}\") is None  # noqa: S101"
855                );
856            }
857            "<<uuid>>" => {
858                let _ = writeln!(out, "    import re  # noqa: PLC0415");
859                let _ = writeln!(
860                    out,
861                    "    assert re.match(r'^[0-9a-f]{{8}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{12}}$', resp_headers[\"{escaped_name}\"])  # noqa: S101"
862                );
863            }
864            exact => {
865                let escaped_val = escape_python(exact);
866                let _ = writeln!(
867                    out,
868                    "    assert resp_headers[\"{escaped_name}\"] == \"{escaped_val}\"  # noqa: S101"
869                );
870            }
871        }
872    }
873
874    /// Emit an exact-equality body assertion.
875    ///
876    /// String bodies are compared as decoded text; structured JSON bodies are
877    /// compared via `json.loads()`.
878    fn render_assert_json_body(&self, out: &mut String, _response_var: &str, expected: &serde_json::Value) {
879        if let serde_json::Value::String(s) = expected {
880            let py_val = format!("\"{}\"", escape_python(s));
881            let _ = writeln!(out, "    assert resp_body.decode() == {py_val}  # noqa: S101");
882        } else {
883            let py_val = json_to_python_literal(expected);
884            let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
885            let _ = writeln!(out, "    data = _json.loads(resp_body)");
886            let _ = writeln!(out, "    assert data == {py_val}  # noqa: S101");
887        }
888    }
889
890    /// Emit partial-body assertions — every key in `expected` must match the
891    /// corresponding value in the parsed JSON response.
892    fn render_assert_partial_body(&self, out: &mut String, _response_var: &str, expected: &serde_json::Value) {
893        let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
894        let _ = writeln!(out, "    data = _json.loads(resp_body)");
895        if let Some(obj) = expected.as_object() {
896            for (key, val) in obj {
897                let py_val = json_to_python_literal(val);
898                let escaped_key = escape_python(key);
899                let _ = writeln!(out, "    assert data[\"{escaped_key}\"] == {py_val}  # noqa: S101");
900            }
901        }
902    }
903
904    /// Emit validation-error assertions for 422 responses.
905    ///
906    /// The driver only calls this when `body` is absent (fixture has no exact
907    /// body assertion) — if a full body assertion was already emitted the driver
908    /// skips validation errors because the body already covers them.
909    fn render_assert_validation_errors(
910        &self,
911        out: &mut String,
912        _response_var: &str,
913        errors: &[ValidationErrorExpectation],
914    ) {
915        let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
916        let _ = writeln!(out, "    _data = _json.loads(resp_body)");
917        let _ = writeln!(out, "    errors = _data.get(\"errors\", [])");
918        for ve in errors {
919            let loc_py: Vec<String> = ve.loc.iter().map(|s| format!("\"{}\"", escape_python(s))).collect();
920            let loc_str = loc_py.join(", ");
921            let escaped_msg = escape_python(&ve.msg);
922            let _ = writeln!(
923                out,
924                "    assert any(e[\"loc\"] == [{loc_str}] and \"{escaped_msg}\" in e[\"msg\"] for e in errors)  # noqa: S101"
925            );
926        }
927    }
928}
929
930/// Render a pytest test function for an HTTP server fixture.
931///
932/// Delegates to [`client::http_call::render_http_test`] via [`PythonTestClientRenderer`].
933/// HTTP 101 (WebSocket upgrade) is handled as a pre-hook: urllib cannot drive
934/// upgrade responses, so those fixtures are emitted as skip-stubs before the
935/// shared driver is invoked.
936fn render_http_test_function(out: &mut String, fixture: &Fixture) {
937    // HTTP 101 (WebSocket upgrade) — urllib cannot handle upgrade responses.
938    // Emit a skip stub independently of the shared driver.
939    if let Some(http) = &fixture.http {
940        if http.expected_response.status_code == 101 {
941            let fn_name = sanitize_ident(&fixture.id);
942            let description = &fixture.description;
943            let desc_with_period = if description.ends_with('.') {
944                description.to_string()
945            } else {
946                format!("{description}.")
947            };
948            let _ = writeln!(
949                out,
950                "@pytest.mark.skip(reason=\"HTTP 101 WebSocket upgrade cannot be tested via urllib\")"
951            );
952            let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
953            let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
954            let _ = writeln!(out, "    ...");
955            let _ = writeln!(out);
956            return;
957        }
958    }
959
960    client::http_call::render_http_test(out, &PythonTestClientRenderer, fixture);
961}
962
963// ---------------------------------------------------------------------------
964// Function-call test rendering
965// ---------------------------------------------------------------------------
966
967#[allow(clippy::too_many_arguments)]
968fn render_test_function(
969    out: &mut String,
970    fixture: &Fixture,
971    e2e_config: &E2eConfig,
972    options_type: Option<&str>,
973    options_via: &str,
974    enum_fields: &HashMap<String, String>,
975    handle_nested_types: &HashMap<String, String>,
976    handle_dict_types: &std::collections::HashSet<String>,
977    field_resolver: &FieldResolver,
978) {
979    let fn_name = sanitize_ident(&fixture.id);
980    let description = &fixture.description;
981    let call_config = e2e_config.resolve_call(fixture.call.as_deref());
982    let function_name = resolve_function_name_for_call(call_config);
983    let result_var = &call_config.result_var;
984
985    // Resolve Python-specific override settings.
986    let python_override = call_config.overrides.get("python");
987    let result_is_simple = python_override.is_some_and(|o| o.result_is_simple);
988    let arg_name_map = python_override.map(|o| &o.arg_name_map);
989
990    let desc_with_period = if description.ends_with('.') {
991        description.to_string()
992    } else {
993        format!("{description}.")
994    };
995
996    // Emit pytest.mark.skip for fixtures that should be skipped for python.
997    if is_skipped(fixture, "python") {
998        let reason = fixture
999            .skip
1000            .as_ref()
1001            .and_then(|s| s.reason.as_deref())
1002            .unwrap_or("skipped for python");
1003        let escaped = escape_python(reason);
1004        let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
1005    }
1006
1007    let is_async = call_config.r#async;
1008    if is_async {
1009        let _ = writeln!(out, "@pytest.mark.asyncio");
1010        let _ = writeln!(out, "async def test_{fn_name}() -> None:");
1011    } else {
1012        let _ = writeln!(out, "def test_{fn_name}() -> None:");
1013    }
1014    let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
1015
1016    // Check if any assertion is an error assertion.
1017    let has_error_assertion = fixture.assertions.iter().any(|a| a.assertion_type == "error");
1018
1019    // Build argument expressions from config.
1020    let mut arg_bindings = Vec::new();
1021    let mut kwarg_exprs = Vec::new();
1022    for arg in &call_config.args {
1023        let var_name = &arg.name;
1024        // Resolve the kwarg name: use the arg_name_map override if present.
1025        let kwarg_name = arg_name_map
1026            .and_then(|m| m.get(var_name.as_str()))
1027            .map(|s| s.as_str())
1028            .unwrap_or(var_name.as_str());
1029
1030        if arg.arg_type == "handle" {
1031            // Generate a create_engine (or equivalent) call and pass the variable.
1032            // If there's config data, construct a CrawlConfig with kwargs.
1033            let constructor_name = format!("create_{}", arg.name.to_snake_case());
1034            let config_value = resolve_field(&fixture.input, &arg.field);
1035            if config_value.is_null()
1036                || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
1037            {
1038                arg_bindings.push(format!("    {var_name} = {constructor_name}(None)"));
1039            } else if let Some(obj) = config_value.as_object() {
1040                // Build kwargs for the config constructor (CrawlConfig(key=val, ...)).
1041                // For fields with a nested type mapping, wrap the dict value in the
1042                // appropriate typed constructor instead of passing a plain dict.
1043                let kwargs: Vec<String> = obj
1044                    .iter()
1045                    .map(|(k, v)| {
1046                        let snake_key = k.to_snake_case();
1047                        let py_val = if let Some(type_name) = handle_nested_types.get(k) {
1048                            // Wrap the nested dict in the typed constructor.
1049                            if let Some(nested_obj) = v.as_object() {
1050                                if nested_obj.is_empty() {
1051                                    // Empty dict: use the default constructor.
1052                                    format!("{type_name}()")
1053                                } else if handle_dict_types.contains(k) {
1054                                    // The outer Python config type (e.g. CrawlConfig) accepts a
1055                                    // plain dict for this field (e.g. `auth: dict | None`).
1056                                    // The binding-layer wrapper (e.g. api.py) creates the typed
1057                                    // object internally, so we must NOT pre-wrap it here.
1058                                    json_to_python_literal(v)
1059                                } else {
1060                                    // Type takes keyword arguments.
1061                                    let nested_kwargs: Vec<String> = nested_obj
1062                                        .iter()
1063                                        .map(|(nk, nv)| {
1064                                            let nested_snake_key = nk.to_snake_case();
1065                                            format!("{nested_snake_key}={}", json_to_python_literal(nv))
1066                                        })
1067                                        .collect();
1068                                    format!("{type_name}({})", nested_kwargs.join(", "))
1069                                }
1070                            } else {
1071                                // Non-object value: use as-is.
1072                                json_to_python_literal(v)
1073                            }
1074                        } else if k == "request_timeout" {
1075                            // The Python binding converts request_timeout with Duration::from_secs
1076                            // (seconds) while fixtures specify values in milliseconds. Divide by
1077                            // 1000 to compensate: e.g., 1 ms → 0 s (immediate timeout),
1078                            // 5000 ms → 5 s. This keeps test semantics consistent with the
1079                            // fixture intent.
1080                            if let Some(ms) = v.as_u64() {
1081                                format!("{}", ms / 1000)
1082                            } else {
1083                                json_to_python_literal(v)
1084                            }
1085                        } else {
1086                            json_to_python_literal(v)
1087                        };
1088                        format!("{snake_key}={py_val}")
1089                    })
1090                    .collect();
1091                // Use the options_type if configured, otherwise "CrawlConfig".
1092                let config_class = options_type.unwrap_or("CrawlConfig");
1093                let single_line = format!("    {var_name}_config = {config_class}({})", kwargs.join(", "));
1094                if single_line.len() <= 120 {
1095                    arg_bindings.push(single_line);
1096                } else {
1097                    // Split into multi-line for readability and E501 compliance.
1098                    let mut lines = format!("    {var_name}_config = {config_class}(\n");
1099                    for kw in &kwargs {
1100                        lines.push_str(&format!("        {kw},\n"));
1101                    }
1102                    lines.push_str("    )");
1103                    arg_bindings.push(lines);
1104                }
1105                arg_bindings.push(format!("    {var_name} = {constructor_name}({var_name}_config)"));
1106            } else {
1107                let literal = json_to_python_literal(config_value);
1108                arg_bindings.push(format!("    {var_name} = {constructor_name}({literal})"));
1109            }
1110            kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1111            continue;
1112        }
1113
1114        if arg.arg_type == "mock_url" {
1115            let fixture_id = &fixture.id;
1116            arg_bindings.push(format!(
1117                "    {var_name} = os.environ['MOCK_SERVER_URL'] + '/fixtures/{fixture_id}'"
1118            ));
1119            kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1120            continue;
1121        }
1122
1123        let value = resolve_field(&fixture.input, &arg.field);
1124
1125        if value.is_null() && arg.optional {
1126            continue;
1127        }
1128
1129        // For json_object args, use the configured options_via strategy.
1130        // A1 fix: when optional=true and value is non-null, pass T directly (not Optional[T]).
1131        if arg.arg_type == "json_object" && !value.is_null() {
1132            match options_via {
1133                "dict" => {
1134                    // Pass as a plain Python dict literal.
1135                    let literal = json_to_python_literal(value);
1136                    let noqa = if literal.contains("/tmp/") {
1137                        "  # noqa: S108"
1138                    } else {
1139                        ""
1140                    };
1141                    arg_bindings.push(format!("    {var_name} = {literal}{noqa}"));
1142                    kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1143                    continue;
1144                }
1145                "json" => {
1146                    // Pass via json.loads() with the raw JSON string.
1147                    let json_str = serde_json::to_string(value).unwrap_or_default();
1148                    let escaped = escape_python(&json_str);
1149                    arg_bindings.push(format!("    {var_name} = json.loads(\"{escaped}\")"));
1150                    kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1151                    continue;
1152                }
1153                _ => {
1154                    // "kwargs" (default): construct OptionsType(key=val, ...).
1155                    if let (Some(opts_type), Some(obj)) = (options_type, value.as_object()) {
1156                        let kwargs: Vec<String> = obj
1157                            .iter()
1158                            .map(|(k, v)| {
1159                                let snake_key = k.to_snake_case();
1160                                let py_val = if let Some(enum_type) = enum_fields.get(k) {
1161                                    // Map string value to enum constant.
1162                                    if let Some(s) = v.as_str() {
1163                                        let upper_val = s.to_shouty_snake_case();
1164                                        format!("{enum_type}.{upper_val}")
1165                                    } else {
1166                                        json_to_python_literal(v)
1167                                    }
1168                                } else {
1169                                    json_to_python_literal(v)
1170                                };
1171                                format!("{snake_key}={py_val}")
1172                            })
1173                            .collect();
1174                        let constructor = format!("{opts_type}({})", kwargs.join(", "));
1175                        arg_bindings.push(format!("    {var_name} = {constructor}"));
1176                        kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1177                        continue;
1178                    }
1179                }
1180            }
1181        }
1182
1183        // When optional=true but fixture value is null, skip the argument entirely.
1184        // The function signature expects Optional[T] — Python's default keyword behavior handles None.
1185        if arg.optional && value.is_null() {
1186            continue;
1187        }
1188
1189        // For required args with no fixture value, use a language-appropriate default.
1190        if value.is_null() && !arg.optional {
1191            let default_val = match arg.arg_type.as_str() {
1192                "string" => "\"\"".to_string(),
1193                "int" | "integer" => "0".to_string(),
1194                "float" | "number" => "0.0".to_string(),
1195                "bool" | "boolean" => "False".to_string(),
1196                _ => "None".to_string(),
1197            };
1198            arg_bindings.push(format!("    {var_name} = {default_val}"));
1199            kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1200            continue;
1201        }
1202
1203        // bytes args: classify the fixture value and emit the appropriate expression.
1204        //
1205        // Three patterns appear in fixtures:
1206        //   1. File path   — "pdf/fake_memo.pdf", "images/hello_world.png"
1207        //                    Starts with a word character followed by more word/slash/dot chars
1208        //                    and a file extension.  Emit `Path("...").read_bytes()`.
1209        //   2. Inline text — "<!DOCTYPE html>...", "{...}", text with spaces
1210        //                    Starts with '<', '{', or contains whitespace.
1211        //                    Emit `b"..."` bytes literal.
1212        //   3. Base64      — "/9j/4AAQ" (JPEG magic), other short opaque strings
1213        //                    Everything else.  Emit `base64.b64decode("...")`.
1214        if arg.arg_type == "bytes" {
1215            if let Some(raw) = value.as_str() {
1216                match classify_bytes_value(raw) {
1217                    BytesKind::FilePath => {
1218                        let escaped = escape_python(raw);
1219                        arg_bindings.push(format!("    {var_name} = Path(\"{escaped}\").read_bytes()"));
1220                    }
1221                    BytesKind::InlineText => {
1222                        // Emit a bytes literal.  For short single-line values we can embed
1223                        // them directly; use repr-like escaping of non-printable bytes.
1224                        let escaped = escape_python(raw);
1225                        arg_bindings.push(format!("    {var_name} = b\"{escaped}\""));
1226                    }
1227                    BytesKind::Base64 => {
1228                        let escaped = escape_python(raw);
1229                        arg_bindings.push(format!("    {var_name} = base64.b64decode(\"{escaped}\")"));
1230                    }
1231                }
1232            } else {
1233                arg_bindings.push(format!("    {var_name} = None"));
1234            }
1235            kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1236            continue;
1237        }
1238
1239        let literal = json_to_python_literal(value);
1240        let noqa = if literal.contains("/tmp/") {
1241            "  # noqa: S108"
1242        } else {
1243            ""
1244        };
1245        arg_bindings.push(format!("    {var_name} = {literal}{noqa}"));
1246        kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1247    }
1248
1249    // Generate visitor class if the fixture has a visitor spec.
1250    if let Some(visitor_spec) = &fixture.visitor {
1251        let _ = writeln!(out, "    class _TestVisitor:");
1252        for (method_name, action) in &visitor_spec.callbacks {
1253            emit_python_visitor_method(out, method_name, action);
1254        }
1255        kwarg_exprs.push("visitor=_TestVisitor()".to_string());
1256    }
1257
1258    for binding in &arg_bindings {
1259        let _ = writeln!(out, "{binding}");
1260    }
1261
1262    let call_args = kwarg_exprs.join(", ");
1263    let await_prefix = if is_async { "await " } else { "" };
1264    let call_expr = format!("{await_prefix}{function_name}({call_args})");
1265
1266    if has_error_assertion {
1267        // Find error assertion for optional message check.
1268        let error_assertion = fixture.assertions.iter().find(|a| a.assertion_type == "error");
1269        let has_message = error_assertion
1270            .and_then(|a| a.value.as_ref())
1271            .and_then(|v| v.as_str())
1272            .is_some();
1273
1274        if has_message {
1275            let _ = writeln!(out, "    with pytest.raises(Exception) as exc_info:  # noqa: B017");
1276            let _ = writeln!(out, "        {call_expr}");
1277            if let Some(msg) = error_assertion.and_then(|a| a.value.as_ref()).and_then(|v| v.as_str()) {
1278                let escaped = escape_python(msg);
1279                let _ = writeln!(out, "    assert \"{escaped}\" in str(exc_info.value)  # noqa: S101");
1280            }
1281        } else {
1282            let _ = writeln!(out, "    with pytest.raises(Exception):  # noqa: B017");
1283            let _ = writeln!(out, "        {call_expr}");
1284        }
1285
1286        // Skip non-error assertions: `result` is not defined outside the
1287        // `pytest.raises` block, so referencing it would trigger ruff F821.
1288        return;
1289    }
1290
1291    // Non-error path.
1292    // A2 fix: respect returns_result=false (non-Result returns don't need error handling).
1293    let has_usable_assertion = fixture.assertions.iter().any(|a| {
1294        if a.assertion_type == "not_error" || a.assertion_type == "error" {
1295            return false;
1296        }
1297        if result_is_simple {
1298            // When the result is a simple type, only assertions whose field is
1299            // NOT in the skipped-for-simple-result set will produce real code.
1300            if let Some(f) = &a.field {
1301                let f_lower = f.to_lowercase();
1302                if !f.is_empty()
1303                    && f_lower != "content"
1304                    && f_lower != "result"
1305                    && (f_lower.starts_with("metadata")
1306                        || f_lower.starts_with("document")
1307                        || f_lower.starts_with("structure")
1308                        || f_lower.starts_with("pages")
1309                        || f_lower.starts_with("chunks")
1310                        || f_lower.starts_with("tables")
1311                        || f_lower.starts_with("images")
1312                        || f_lower.starts_with("mime_type")
1313                        || f_lower.starts_with("is_")
1314                        || f_lower == "byte_length"
1315                        || f_lower == "page_count"
1316                        || f_lower == "output_format"
1317                        || f_lower == "extraction_method")
1318                {
1319                    return false; // this assertion will be skipped
1320                }
1321            }
1322            return true;
1323        }
1324        match &a.field {
1325            Some(f) if !f.is_empty() => field_resolver.is_valid_for_result(f),
1326            _ => true,
1327        }
1328    });
1329    let py_result_var = if has_usable_assertion {
1330        result_var.to_string()
1331    } else {
1332        "_".to_string()
1333    };
1334    let _ = writeln!(out, "    {py_result_var} = {call_expr}");
1335
1336    let fields_enum = &e2e_config.fields_enum;
1337    for assertion in &fixture.assertions {
1338        if assertion.assertion_type == "not_error" {
1339            // A2: When returns_result=false, the call doesn't return Result<T, E>,
1340            // so there's no error to check. Skip the assertion entirely.
1341            if !call_config.returns_result {
1342                continue;
1343            }
1344            // The call already raises on error in Python.
1345            continue;
1346        }
1347        render_assertion(
1348            out,
1349            assertion,
1350            result_var,
1351            field_resolver,
1352            fields_enum,
1353            result_is_simple,
1354        );
1355    }
1356}
1357
1358// ---------------------------------------------------------------------------
1359// Bytes value classification
1360// ---------------------------------------------------------------------------
1361
1362/// How to represent a fixture `type = "bytes"` string value in generated Python.
1363enum BytesKind {
1364    /// A relative file path like `"pdf/fake_memo.pdf"` — read with `Path(...).read_bytes()`.
1365    FilePath,
1366    /// Inline text content like `"<!DOCTYPE html>..."` — encode to `b"..."`.
1367    InlineText,
1368    /// A base64-encoded blob like `"/9j/4AAQ"` — decode with `base64.b64decode(...)`.
1369    Base64,
1370}
1371
1372/// Classify a fixture string value that maps to a `bytes` argument.
1373///
1374/// Rules (in order):
1375/// 1. Starts with `<`, `{`, or `[`, or contains whitespace → inline text.
1376/// 2. First character is an ASCII letter/digit/underscore AND the value contains
1377///    a `/` that is preceded by at least one word character AND the value contains
1378///    a `.` after the last `/` → file path.
1379/// 3. Everything else → base64.
1380fn classify_bytes_value(s: &str) -> BytesKind {
1381    // Rule 1: obvious inline content markers.
1382    if s.starts_with('<') || s.starts_with('{') || s.starts_with('[') || s.contains(' ') {
1383        return BytesKind::InlineText;
1384    }
1385
1386    // Rule 2: looks like "dir/file.ext" — starts with a word char, has a slash,
1387    // and the portion after the last slash contains a dot (file extension).
1388    let first = s.chars().next().unwrap_or('\0');
1389    if first.is_ascii_alphanumeric() || first == '_' {
1390        if let Some(slash_pos) = s.find('/') {
1391            if slash_pos > 0 {
1392                let after_slash = &s[slash_pos + 1..];
1393                if after_slash.contains('.') && !after_slash.is_empty() {
1394                    return BytesKind::FilePath;
1395                }
1396            }
1397        }
1398    }
1399
1400    // Rule 3: everything else is treated as base64.
1401    BytesKind::Base64
1402}
1403
1404// ---------------------------------------------------------------------------
1405// Argument rendering
1406// ---------------------------------------------------------------------------
1407
1408fn json_to_python_literal(value: &serde_json::Value) -> String {
1409    match value {
1410        serde_json::Value::Null => "None".to_string(),
1411        serde_json::Value::Bool(true) => "True".to_string(),
1412        serde_json::Value::Bool(false) => "False".to_string(),
1413        serde_json::Value::Number(n) => n.to_string(),
1414        serde_json::Value::String(s) => python_string_literal(s),
1415        serde_json::Value::Array(arr) => {
1416            let items: Vec<String> = arr.iter().map(json_to_python_literal).collect();
1417            format!("[{}]", items.join(", "))
1418        }
1419        serde_json::Value::Object(map) => {
1420            let items: Vec<String> = map
1421                .iter()
1422                .map(|(k, v)| format!("\"{}\": {}", escape_python(k), json_to_python_literal(v)))
1423                .collect();
1424            format!("{{{}}}", items.join(", "))
1425        }
1426    }
1427}
1428
1429// ---------------------------------------------------------------------------
1430// Assertion rendering
1431// ---------------------------------------------------------------------------
1432
1433fn render_assertion(
1434    out: &mut String,
1435    assertion: &Assertion,
1436    result_var: &str,
1437    field_resolver: &FieldResolver,
1438    fields_enum: &std::collections::HashSet<String>,
1439    result_is_simple: bool,
1440) {
1441    // When result_is_simple, the result IS the content — skip fields that
1442    // reference struct sub-fields (metadata, document, structure, pages, etc.)
1443    // which don't exist on a plain string/bool/bytes value.
1444    if result_is_simple {
1445        if let Some(f) = &assertion.field {
1446            let f_lower = f.to_lowercase();
1447            if !f.is_empty()
1448                && f_lower != "content"
1449                && f_lower != "result"
1450                && (f_lower.starts_with("metadata")
1451                    || f_lower.starts_with("document")
1452                    || f_lower.starts_with("structure")
1453                    || f_lower.starts_with("pages")
1454                    || f_lower.starts_with("chunks")
1455                    || f_lower.starts_with("tables")
1456                    || f_lower.starts_with("images")
1457                    || f_lower.starts_with("mime_type")
1458                    || f_lower.starts_with("is_")
1459                    || f_lower == "byte_length"
1460                    || f_lower == "page_count"
1461                    || f_lower == "output_format"
1462                    || f_lower == "extraction_method")
1463            {
1464                let _ = writeln!(out, "    # skipped: field '{f}' not applicable for simple result type");
1465                return;
1466            }
1467        }
1468    }
1469
1470    // Handle synthetic / derived fields before the is_valid_for_result check
1471    // so they are never treated as struct attribute accesses on the result.
1472    if let Some(f) = &assertion.field {
1473        match f.as_str() {
1474            "chunks_have_content" => {
1475                let pred = format!("all(c.content for c in ({result_var}.chunks or []))");
1476                match assertion.assertion_type.as_str() {
1477                    "is_true" => {
1478                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1479                    }
1480                    "is_false" => {
1481                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1482                    }
1483                    _ => {
1484                        let _ = writeln!(
1485                            out,
1486                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1487                        );
1488                    }
1489                }
1490                return;
1491            }
1492            "chunks_have_embeddings" => {
1493                let pred = format!(
1494                    "all(c.embedding is not None and len(c.embedding) > 0 for c in ({result_var}.chunks or []))"
1495                );
1496                match assertion.assertion_type.as_str() {
1497                    "is_true" => {
1498                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1499                    }
1500                    "is_false" => {
1501                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1502                    }
1503                    _ => {
1504                        let _ = writeln!(
1505                            out,
1506                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1507                        );
1508                    }
1509                }
1510                return;
1511            }
1512            // ---- EmbedResponse virtual fields ----
1513            // embed_texts returns list[list[float]] in Python — no wrapper struct.
1514            // result_var is the embedding matrix; use it directly.
1515            "embeddings" => {
1516                match assertion.assertion_type.as_str() {
1517                    "count_equals" => {
1518                        if let Some(val) = &assertion.value {
1519                            if let Some(n) = val.as_u64() {
1520                                let _ = writeln!(out, "    assert len({result_var}) == {n}  # noqa: S101");
1521                            }
1522                        }
1523                    }
1524                    "count_min" => {
1525                        if let Some(val) = &assertion.value {
1526                            if let Some(n) = val.as_u64() {
1527                                let _ = writeln!(out, "    assert len({result_var}) >= {n}  # noqa: S101");
1528                            }
1529                        }
1530                    }
1531                    "not_empty" => {
1532                        let _ = writeln!(out, "    assert len({result_var}) > 0  # noqa: S101");
1533                    }
1534                    "is_empty" => {
1535                        let _ = writeln!(out, "    assert len({result_var}) == 0  # noqa: S101");
1536                    }
1537                    _ => {
1538                        let _ = writeln!(
1539                            out,
1540                            "    # skipped: unsupported assertion type on synthetic field 'embeddings'"
1541                        );
1542                    }
1543                }
1544                return;
1545            }
1546            "embedding_dimensions" => {
1547                let expr = format!("(len({result_var}[0]) if {result_var} else 0)");
1548                match assertion.assertion_type.as_str() {
1549                    "equals" => {
1550                        if let Some(val) = &assertion.value {
1551                            let py_val = value_to_python_string(val);
1552                            let _ = writeln!(out, "    assert {expr} == {py_val}  # noqa: S101");
1553                        }
1554                    }
1555                    "greater_than" => {
1556                        if let Some(val) = &assertion.value {
1557                            let py_val = value_to_python_string(val);
1558                            let _ = writeln!(out, "    assert {expr} > {py_val}  # noqa: S101");
1559                        }
1560                    }
1561                    _ => {
1562                        let _ = writeln!(
1563                            out,
1564                            "    # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
1565                        );
1566                    }
1567                }
1568                return;
1569            }
1570            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
1571                let pred = match f.as_str() {
1572                    "embeddings_valid" => {
1573                        format!("all(bool(e) for e in {result_var})")
1574                    }
1575                    "embeddings_finite" => {
1576                        format!("all(v == v and abs(v) != float('inf') for e in {result_var} for v in e)")
1577                    }
1578                    "embeddings_non_zero" => {
1579                        format!("all(any(v != 0.0 for v in e) for e in {result_var})")
1580                    }
1581                    "embeddings_normalized" => {
1582                        format!("all(abs(sum(v * v for v in e) - 1.0) < 1e-3 for e in {result_var})")
1583                    }
1584                    _ => unreachable!(),
1585                };
1586                match assertion.assertion_type.as_str() {
1587                    "is_true" => {
1588                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1589                    }
1590                    "is_false" => {
1591                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1592                    }
1593                    _ => {
1594                        let _ = writeln!(
1595                            out,
1596                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1597                        );
1598                    }
1599                }
1600                return;
1601            }
1602            // ---- keywords / keywords_count ----
1603            // Python ExtractionResult does not expose extracted_keywords; skip.
1604            "keywords" | "keywords_count" => {
1605                let _ = writeln!(
1606                    out,
1607                    "    # skipped: field '{f}' not available on Python ExtractionResult"
1608                );
1609                return;
1610            }
1611            _ => {}
1612        }
1613    }
1614
1615    // Skip assertions on fields that don't exist on the result type.
1616    if !result_is_simple {
1617        if let Some(f) = &assertion.field {
1618            if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
1619                let _ = writeln!(out, "    # skipped: field '{f}' not available on result type");
1620                return;
1621            }
1622        }
1623    }
1624
1625    // For simple results, the result variable IS the value — map `content`/`result`
1626    // fields (and empty/absent fields) to the result variable directly.
1627    let field_access = if result_is_simple {
1628        result_var.to_string()
1629    } else {
1630        match &assertion.field {
1631            Some(f) if !f.is_empty() => field_resolver.accessor(f, "python", result_var),
1632            _ => result_var.to_string(),
1633        }
1634    };
1635
1636    // Determine whether this field should be compared as an enum string.
1637    //
1638    // PyO3 integer-based enums (`#[pyclass(eq, eq_int)]`) are NOT iterable, so
1639    // `"value" in enum_field` raises TypeError.  Use `str(enum_field).lower()`
1640    // instead, which for a variant like `LinkType.Anchor` gives `"linktype.anchor"`,
1641    // making `"anchor" in str(LinkType.Anchor).lower()` evaluate to True.
1642    //
1643    // We apply this to fields explicitly listed in `fields_enum` (using both the
1644    // fixture field path and the resolved path) and to any field whose accessor
1645    // involves array-element indexing (`[0]`) which typically holds typed enums.
1646    let field_is_enum = assertion.field.as_deref().is_some_and(|f| {
1647        if fields_enum.contains(f) {
1648            return true;
1649        }
1650        let resolved = field_resolver.resolve(f);
1651        if fields_enum.contains(resolved) {
1652            return true;
1653        }
1654        // Also treat fields accessed via array indexing as potentially enum-typed
1655        // (e.g., `result.links[0].link_type`, `result.assets[0].asset_category`).
1656        // This is safe because `str(string_value).lower()` is idempotent for
1657        // plain string fields, and all fixture `contains` values are lowercase.
1658        field_resolver.accessor(f, "python", result_var).contains("[0]")
1659    });
1660
1661    // Check whether the field path (or any prefix of it) is optional so we can
1662    // guard `in` / `not in` expressions against None.
1663    let field_is_optional = match &assertion.field {
1664        Some(f) if !f.is_empty() => {
1665            let resolved = field_resolver.resolve(f);
1666            field_resolver.is_optional(resolved)
1667        }
1668        _ => false,
1669    };
1670
1671    match assertion.assertion_type.as_str() {
1672        "error" | "not_error" => {
1673            // Handled at call site.
1674        }
1675        "equals" => {
1676            if let Some(val) = &assertion.value {
1677                let expected = value_to_python_string(val);
1678                // Use `is` for boolean/None comparisons (ruff E712).
1679                let op = if val.is_boolean() || val.is_null() { "is" } else { "==" };
1680                // For string equality, strip trailing whitespace to handle trailing newlines
1681                // from the converter.
1682                if val.is_string() {
1683                    let _ = writeln!(out, "    assert {field_access}.strip() {op} {expected}  # noqa: S101");
1684                } else {
1685                    let _ = writeln!(out, "    assert {field_access} {op} {expected}  # noqa: S101");
1686                }
1687            }
1688        }
1689        "contains" => {
1690            if let Some(val) = &assertion.value {
1691                let expected = value_to_python_string(val);
1692                // For enum fields, convert to lowercase string for comparison.
1693                let cmp_expr = if field_is_enum && val.is_string() {
1694                    format!("str({field_access}).lower()")
1695                } else {
1696                    field_access.clone()
1697                };
1698                if field_is_optional {
1699                    let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1700                    let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1701                } else {
1702                    let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1703                }
1704            }
1705        }
1706        "contains_all" => {
1707            if let Some(values) = &assertion.values {
1708                for val in values {
1709                    let expected = value_to_python_string(val);
1710                    // For enum fields, convert to lowercase string for comparison.
1711                    let cmp_expr = if field_is_enum && val.is_string() {
1712                        format!("str({field_access}).lower()")
1713                    } else {
1714                        field_access.clone()
1715                    };
1716                    if field_is_optional {
1717                        let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1718                        let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1719                    } else {
1720                        let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1721                    }
1722                }
1723            }
1724        }
1725        "not_contains" => {
1726            if let Some(val) = &assertion.value {
1727                let expected = value_to_python_string(val);
1728                // For enum fields, convert to lowercase string for comparison.
1729                let cmp_expr = if field_is_enum && val.is_string() {
1730                    format!("str({field_access}).lower()")
1731                } else {
1732                    field_access.clone()
1733                };
1734                if field_is_optional {
1735                    let _ = writeln!(
1736                        out,
1737                        "    assert {field_access} is None or {expected} not in {cmp_expr}  # noqa: S101"
1738                    );
1739                } else {
1740                    let _ = writeln!(out, "    assert {expected} not in {cmp_expr}  # noqa: S101");
1741                }
1742            }
1743        }
1744        "not_empty" => {
1745            let _ = writeln!(out, "    assert {field_access}  # noqa: S101");
1746        }
1747        "is_empty" => {
1748            let _ = writeln!(out, "    assert not {field_access}  # noqa: S101");
1749        }
1750        "contains_any" => {
1751            if let Some(values) = &assertion.values {
1752                let items: Vec<String> = values.iter().map(value_to_python_string).collect();
1753                let list_str = items.join(", ");
1754                // For enum fields, convert to lowercase string for comparison.
1755                let cmp_expr = if field_is_enum {
1756                    format!("str({field_access}).lower()")
1757                } else {
1758                    field_access.clone()
1759                };
1760                if field_is_optional {
1761                    let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1762                    let _ = writeln!(
1763                        out,
1764                        "    assert any(v in {cmp_expr} for v in [{list_str}])  # noqa: S101"
1765                    );
1766                } else {
1767                    let _ = writeln!(
1768                        out,
1769                        "    assert any(v in {cmp_expr} for v in [{list_str}])  # noqa: S101"
1770                    );
1771                }
1772            }
1773        }
1774        "greater_than" => {
1775            if let Some(val) = &assertion.value {
1776                let expected = value_to_python_string(val);
1777                let _ = writeln!(out, "    assert {field_access} > {expected}  # noqa: S101");
1778            }
1779        }
1780        "less_than" => {
1781            if let Some(val) = &assertion.value {
1782                let expected = value_to_python_string(val);
1783                let _ = writeln!(out, "    assert {field_access} < {expected}  # noqa: S101");
1784            }
1785        }
1786        "greater_than_or_equal" | "min" => {
1787            if let Some(val) = &assertion.value {
1788                let expected = value_to_python_string(val);
1789                let _ = writeln!(out, "    assert {field_access} >= {expected}  # noqa: S101");
1790            }
1791        }
1792        "less_than_or_equal" | "max" => {
1793            if let Some(val) = &assertion.value {
1794                let expected = value_to_python_string(val);
1795                let _ = writeln!(out, "    assert {field_access} <= {expected}  # noqa: S101");
1796            }
1797        }
1798        "starts_with" => {
1799            if let Some(val) = &assertion.value {
1800                let expected = value_to_python_string(val);
1801                let _ = writeln!(out, "    assert {field_access}.startswith({expected})  # noqa: S101");
1802            }
1803        }
1804        "ends_with" => {
1805            if let Some(val) = &assertion.value {
1806                let expected = value_to_python_string(val);
1807                let _ = writeln!(out, "    assert {field_access}.endswith({expected})  # noqa: S101");
1808            }
1809        }
1810        "min_length" => {
1811            if let Some(val) = &assertion.value {
1812                if let Some(n) = val.as_u64() {
1813                    let _ = writeln!(out, "    assert len({field_access}) >= {n}  # noqa: S101");
1814                }
1815            }
1816        }
1817        "max_length" => {
1818            if let Some(val) = &assertion.value {
1819                if let Some(n) = val.as_u64() {
1820                    let _ = writeln!(out, "    assert len({field_access}) <= {n}  # noqa: S101");
1821                }
1822            }
1823        }
1824        "count_min" => {
1825            if let Some(val) = &assertion.value {
1826                if let Some(n) = val.as_u64() {
1827                    let _ = writeln!(out, "    assert len({field_access}) >= {n}  # noqa: S101");
1828                }
1829            }
1830        }
1831        "count_equals" => {
1832            if let Some(val) = &assertion.value {
1833                if let Some(n) = val.as_u64() {
1834                    let _ = writeln!(out, "    assert len({field_access}) == {n}  # noqa: S101");
1835                }
1836            }
1837        }
1838        "is_true" => {
1839            let _ = writeln!(out, "    assert {field_access} is True  # noqa: S101");
1840        }
1841        "is_false" => {
1842            let _ = writeln!(out, "    assert not {field_access}  # noqa: S101");
1843        }
1844        "method_result" => {
1845            if let Some(method_name) = &assertion.method {
1846                let call_expr = build_python_method_call(result_var, method_name, assertion.args.as_ref());
1847                let check = assertion.check.as_deref().unwrap_or("is_true");
1848                match check {
1849                    "equals" => {
1850                        if let Some(val) = &assertion.value {
1851                            if val.is_boolean() {
1852                                if val.as_bool() == Some(true) {
1853                                    let _ = writeln!(out, "    assert {call_expr} is True  # noqa: S101");
1854                                } else {
1855                                    let _ = writeln!(out, "    assert {call_expr} is False  # noqa: S101");
1856                                }
1857                            } else {
1858                                let expected = value_to_python_string(val);
1859                                let _ = writeln!(out, "    assert {call_expr} == {expected}  # noqa: S101");
1860                            }
1861                        }
1862                    }
1863                    "is_true" => {
1864                        let _ = writeln!(out, "    assert {call_expr}  # noqa: S101");
1865                    }
1866                    "is_false" => {
1867                        let _ = writeln!(out, "    assert not {call_expr}  # noqa: S101");
1868                    }
1869                    "greater_than_or_equal" => {
1870                        if let Some(val) = &assertion.value {
1871                            let n = val.as_u64().unwrap_or(0);
1872                            let _ = writeln!(out, "    assert {call_expr} >= {n}  # noqa: S101");
1873                        }
1874                    }
1875                    "count_min" => {
1876                        if let Some(val) = &assertion.value {
1877                            let n = val.as_u64().unwrap_or(0);
1878                            let _ = writeln!(out, "    assert len({call_expr}) >= {n}  # noqa: S101");
1879                        }
1880                    }
1881                    "contains" => {
1882                        if let Some(val) = &assertion.value {
1883                            let expected = value_to_python_string(val);
1884                            let _ = writeln!(out, "    assert {expected} in {call_expr}  # noqa: S101");
1885                        }
1886                    }
1887                    "is_error" => {
1888                        let _ = writeln!(out, "    with pytest.raises(Exception):  # noqa: B017");
1889                        let _ = writeln!(out, "        {call_expr}");
1890                    }
1891                    other_check => {
1892                        panic!("unsupported method_result check type: {other_check}");
1893                    }
1894                }
1895            } else {
1896                panic!("method_result assertion missing 'method' field");
1897            }
1898        }
1899        "matches_regex" => {
1900            if let Some(val) = &assertion.value {
1901                let expected = value_to_python_string(val);
1902                let _ = writeln!(out, "    import re  # noqa: PLC0415");
1903                let _ = writeln!(
1904                    out,
1905                    "    assert re.search({expected}, {field_access}) is not None  # noqa: S101"
1906                );
1907            }
1908        }
1909        other => {
1910            panic!("unsupported assertion type: {other}");
1911        }
1912    }
1913}
1914
1915/// Build a Python call expression for a method_result assertion on a tree-sitter Tree.
1916/// Maps method names to the appropriate Python function calls.
1917fn build_python_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
1918    match method_name {
1919        "root_child_count" => format!("{result_var}.root_node().child_count()"),
1920        "root_node_type" => format!("{result_var}.root_node().kind()"),
1921        "named_children_count" => format!("{result_var}.root_node().named_child_count()"),
1922        "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
1923        "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
1924        "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
1925        "contains_node_type" => {
1926            let node_type = args
1927                .and_then(|a| a.get("node_type"))
1928                .and_then(|v| v.as_str())
1929                .unwrap_or("");
1930            format!("tree_contains_node_type({result_var}, \"{node_type}\")")
1931        }
1932        "find_nodes_by_type" => {
1933            let node_type = args
1934                .and_then(|a| a.get("node_type"))
1935                .and_then(|v| v.as_str())
1936                .unwrap_or("");
1937            format!("find_nodes_by_type({result_var}, \"{node_type}\")")
1938        }
1939        "run_query" => {
1940            let query_source = args
1941                .and_then(|a| a.get("query_source"))
1942                .and_then(|v| v.as_str())
1943                .unwrap_or("");
1944            let language = args
1945                .and_then(|a| a.get("language"))
1946                .and_then(|v| v.as_str())
1947                .unwrap_or("");
1948            format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
1949        }
1950        _ => {
1951            if let Some(args_val) = args {
1952                let arg_str = args_val
1953                    .as_object()
1954                    .map(|obj| {
1955                        obj.iter()
1956                            .map(|(k, v)| format!("{}={}", k, value_to_python_string(v)))
1957                            .collect::<Vec<_>>()
1958                            .join(", ")
1959                    })
1960                    .unwrap_or_default();
1961                format!("{result_var}.{method_name}({arg_str})")
1962            } else {
1963                format!("{result_var}.{method_name}()")
1964            }
1965        }
1966    }
1967}
1968
1969/// Returns the Python import name for a method_result method that uses a
1970/// module-level helper function (not a method on the result object).
1971fn python_method_helper_import(method_name: &str) -> Option<String> {
1972    match method_name {
1973        "has_error_nodes" => Some("tree_has_error_nodes".to_string()),
1974        "error_count" | "tree_error_count" => Some("tree_error_count".to_string()),
1975        "tree_to_sexp" => Some("tree_to_sexp".to_string()),
1976        "contains_node_type" => Some("tree_contains_node_type".to_string()),
1977        "find_nodes_by_type" => Some("find_nodes_by_type".to_string()),
1978        "run_query" => Some("run_query".to_string()),
1979        // Methods accessed via result_var (e.g. tree.root_node().child_count()) don't need imports.
1980        _ => None,
1981    }
1982}
1983
1984fn value_to_python_string(value: &serde_json::Value) -> String {
1985    match value {
1986        serde_json::Value::String(s) => python_string_literal(s),
1987        serde_json::Value::Bool(true) => "True".to_string(),
1988        serde_json::Value::Bool(false) => "False".to_string(),
1989        serde_json::Value::Number(n) => n.to_string(),
1990        serde_json::Value::Null => "None".to_string(),
1991        other => python_string_literal(&other.to_string()),
1992    }
1993}
1994
1995/// Produce a quoted Python string literal, choosing single or double quotes
1996/// to avoid unnecessary escaping (ruff Q003).
1997fn python_string_literal(s: &str) -> String {
1998    if s.contains('"') && !s.contains('\'') {
1999        // Use single quotes to avoid escaping double quotes.
2000        let escaped = s
2001            .replace('\\', "\\\\")
2002            .replace('\'', "\\'")
2003            .replace('\n', "\\n")
2004            .replace('\r', "\\r")
2005            .replace('\t', "\\t");
2006        format!("'{escaped}'")
2007    } else {
2008        format!("\"{}\"", escape_python(s))
2009    }
2010}
2011
2012/// Emit a Python visitor method for a callback action.
2013fn emit_python_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
2014    let params = match method_name {
2015        "visit_link" => "self, ctx, href, text, title",
2016        "visit_image" => "self, ctx, src, alt, title",
2017        "visit_heading" => "self, ctx, level, text, id",
2018        "visit_code_block" => "self, ctx, lang, code",
2019        "visit_code_inline"
2020        | "visit_strong"
2021        | "visit_emphasis"
2022        | "visit_strikethrough"
2023        | "visit_underline"
2024        | "visit_subscript"
2025        | "visit_superscript"
2026        | "visit_mark"
2027        | "visit_button"
2028        | "visit_summary"
2029        | "visit_figcaption"
2030        | "visit_definition_term"
2031        | "visit_definition_description" => "self, ctx, text",
2032        "visit_text" => "self, ctx, text",
2033        "visit_list_item" => "self, ctx, ordered, marker, text",
2034        "visit_blockquote" => "self, ctx, content, depth",
2035        "visit_table_row" => "self, ctx, cells, is_header",
2036        "visit_custom_element" => "self, ctx, tag_name, html",
2037        "visit_form" => "self, ctx, action_url, method",
2038        "visit_input" => "self, ctx, input_type, name, value",
2039        "visit_audio" | "visit_video" | "visit_iframe" => "self, ctx, src",
2040        "visit_details" => "self, ctx, is_open",
2041        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
2042            "self, ctx, output, *args"
2043        }
2044        "visit_list_start" => "self, ctx, ordered, *args",
2045        "visit_list_end" => "self, ctx, ordered, output, *args",
2046        _ => "self, ctx, *args",
2047    };
2048
2049    let _ = writeln!(
2050        out,
2051        "        def {method_name}({params}):  # noqa: A002, ANN001, ANN202, ARG002"
2052    );
2053    match action {
2054        CallbackAction::Skip => {
2055            let _ = writeln!(out, "            return \"skip\"");
2056        }
2057        CallbackAction::Continue => {
2058            let _ = writeln!(out, "            return \"continue\"");
2059        }
2060        CallbackAction::PreserveHtml => {
2061            let _ = writeln!(out, "            return \"preserve_html\"");
2062        }
2063        CallbackAction::Custom { output } => {
2064            let escaped = escape_python(output);
2065            let _ = writeln!(out, "            return {{\"custom\": \"{escaped}\"}}");
2066        }
2067        CallbackAction::CustomTemplate { template } => {
2068            // Use single-quoted f-string so that double quotes inside the template
2069            // (e.g. `QUOTE: "{text}"`) are not misinterpreted as string delimiters.
2070            // Escape newlines/tabs/backslashes/single quotes so the template stays
2071            // on a single line in the generated source.
2072            let escaped_template = template
2073                .replace('\\', "\\\\")
2074                .replace('\'', "\\'")
2075                .replace('\n', "\\n")
2076                .replace('\r', "\\r")
2077                .replace('\t', "\\t");
2078            let _ = writeln!(out, "            return {{\"custom\": f'{escaped_template}'}}");
2079        }
2080    }
2081}