Skip to main content

alef_e2e/codegen/
python.rs

1//! Python e2e test code generator.
2//!
3//! Generates `e2e/python/conftest.py` and `tests/test_{category}.py` files from
4//! JSON fixtures, driven entirely by `E2eConfig` and `CallConfig`.
5
6use crate::codegen::resolve_field;
7use crate::config::E2eConfig;
8use crate::escape::{escape_python, sanitize_filename, sanitize_ident};
9use crate::field_access::FieldResolver;
10use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup};
11use alef_core::backend::GeneratedFile;
12use alef_core::config::AlefConfig;
13use alef_core::hash::{self, CommentStyle};
14use anyhow::Result;
15use heck::{ToShoutySnakeCase, ToSnakeCase};
16use std::collections::HashMap;
17use std::fmt::Write as FmtWrite;
18use std::path::PathBuf;
19
20/// Python e2e test code generator.
21pub struct PythonE2eCodegen;
22
23impl super::E2eCodegen for PythonE2eCodegen {
24    fn generate(
25        &self,
26        groups: &[FixtureGroup],
27        e2e_config: &E2eConfig,
28        _alef_config: &AlefConfig,
29    ) -> Result<Vec<GeneratedFile>> {
30        let mut files = Vec::new();
31        let output_base = PathBuf::from(e2e_config.effective_output()).join("python");
32
33        // conftest.py
34        files.push(GeneratedFile {
35            path: output_base.join("conftest.py"),
36            content: render_conftest(e2e_config, groups),
37            generated_header: true,
38        });
39
40        // Root __init__.py (prevents ruff INP001).
41        files.push(GeneratedFile {
42            path: output_base.join("__init__.py"),
43            content: "\n".to_string(),
44            generated_header: false,
45        });
46
47        // tests/__init__.py
48        files.push(GeneratedFile {
49            path: output_base.join("tests").join("__init__.py"),
50            content: "\n".to_string(),
51            generated_header: false,
52        });
53
54        // pyproject.toml for standalone uv resolution
55        let python_pkg = e2e_config.resolve_package("python");
56        let pkg_name = python_pkg
57            .as_ref()
58            .and_then(|p| p.name.as_deref())
59            .unwrap_or("kreuzcrawl");
60        let pkg_path = python_pkg
61            .as_ref()
62            .and_then(|p| p.path.as_deref())
63            .unwrap_or("../../packages/python");
64        let pkg_version = python_pkg
65            .as_ref()
66            .and_then(|p| p.version.as_deref())
67            .unwrap_or("0.1.0");
68        files.push(GeneratedFile {
69            path: output_base.join("pyproject.toml"),
70            content: render_pyproject(pkg_name, pkg_path, pkg_version, e2e_config.dep_mode),
71            generated_header: true,
72        });
73
74        // Per-category test files.
75        for group in groups {
76            let fixtures: Vec<&Fixture> = group.fixtures.iter().collect();
77
78            if fixtures.is_empty() {
79                continue;
80            }
81
82            // Skip emitting the file entirely when every fixture is skipped for
83            // python — there's nothing to run, and emitting imports of
84            // not-bound APIs causes module-level ImportError that masks the
85            // skip marker.
86            if fixtures.iter().all(|f| is_skipped(f, "python")) {
87                continue;
88            }
89
90            let filename = format!("test_{}.py", sanitize_filename(&group.category));
91            let content = render_test_file(&group.category, &fixtures, e2e_config);
92
93            files.push(GeneratedFile {
94                path: output_base.join("tests").join(filename),
95                content,
96                generated_header: true,
97            });
98        }
99
100        Ok(files)
101    }
102
103    fn language_name(&self) -> &'static str {
104        "python"
105    }
106}
107
108// ---------------------------------------------------------------------------
109// pyproject.toml
110// ---------------------------------------------------------------------------
111
112fn render_pyproject(
113    pkg_name: &str,
114    pkg_path: &str,
115    pkg_version: &str,
116    dep_mode: crate::config::DependencyMode,
117) -> String {
118    // Generate in pyproject-fmt canonical form so the pre-commit hook is a no-op.
119    // pyproject-fmt sorts deps alphabetically, uses spaces inside brackets, dotted
120    // tool keys, and injects Python classifiers.
121    let (deps_line, uv_sources_block) = match dep_mode {
122        crate::config::DependencyMode::Registry => (
123            format!(
124                "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}{pkg_version}\" ]"
125            ),
126            String::new(),
127        ),
128        crate::config::DependencyMode::Local => (
129            format!(
130                "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}\" ]"
131            ),
132            format!(
133                "\n[tool.uv]\nsources.{pkg_name} = {{ path = \"{pkg_path}\" }}\n",
134                pkg_path = pkg_path
135            ),
136        ),
137    };
138
139    format!(
140        r#"[build-system]
141build-backend = "setuptools.build_meta"
142requires = [ "setuptools>=68", "wheel" ]
143
144[project]
145name = "{pkg_name}-e2e-tests"
146version = "0.0.0"
147description = "End-to-end tests"
148requires-python = ">=3.10"
149classifiers = [
150  "Programming Language :: Python :: 3 :: Only",
151  "Programming Language :: Python :: 3.10",
152  "Programming Language :: Python :: 3.11",
153  "Programming Language :: Python :: 3.12",
154  "Programming Language :: Python :: 3.13",
155  "Programming Language :: Python :: 3.14",
156]
157{deps_line}
158
159[tool.setuptools]
160packages = [  ]
161{uv_sources_block}
162[tool.ruff]
163lint.ignore = [ "PLR2004" ]
164lint.per-file-ignores."tests/**" = [ "B017", "PT011", "S101", "S108" ]
165
166[tool.pytest]
167ini_options.asyncio_mode = "auto"
168ini_options.testpaths = [ "tests" ]
169ini_options.python_files = "test_*.py"
170ini_options.python_functions = "test_*"
171ini_options.addopts = "-v --strict-markers --tb=short"
172ini_options.timeout = 300
173"#
174    )
175}
176
177// ---------------------------------------------------------------------------
178// Config resolution helpers
179// ---------------------------------------------------------------------------
180
181fn resolve_function_name(e2e_config: &E2eConfig) -> String {
182    resolve_function_name_for_call(&e2e_config.call)
183}
184
185fn resolve_function_name_for_call(call_config: &crate::config::CallConfig) -> String {
186    call_config
187        .overrides
188        .get("python")
189        .and_then(|o| o.function.clone())
190        .unwrap_or_else(|| call_config.function.clone())
191}
192
193fn resolve_module(e2e_config: &E2eConfig) -> String {
194    e2e_config
195        .call
196        .overrides
197        .get("python")
198        .and_then(|o| o.module.clone())
199        .unwrap_or_else(|| e2e_config.call.module.replace('-', "_"))
200}
201
202fn resolve_options_type(e2e_config: &E2eConfig) -> Option<String> {
203    e2e_config
204        .call
205        .overrides
206        .get("python")
207        .and_then(|o| o.options_type.clone())
208}
209
210/// Resolve how json_object args are passed: "kwargs" (default), "dict", or "json".
211fn resolve_options_via(e2e_config: &E2eConfig) -> &str {
212    e2e_config
213        .call
214        .overrides
215        .get("python")
216        .and_then(|o| o.options_via.as_deref())
217        .unwrap_or("kwargs")
218}
219
220/// Resolve enum field mappings from the Python override config.
221fn resolve_enum_fields(e2e_config: &E2eConfig) -> &HashMap<String, String> {
222    static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
223    e2e_config
224        .call
225        .overrides
226        .get("python")
227        .map(|o| &o.enum_fields)
228        .unwrap_or(&EMPTY)
229}
230
231/// Resolve handle nested type mappings from the Python override config.
232/// Maps config field names to their Python constructor type names.
233fn resolve_handle_nested_types(e2e_config: &E2eConfig) -> &HashMap<String, String> {
234    static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
235    e2e_config
236        .call
237        .overrides
238        .get("python")
239        .map(|o| &o.handle_nested_types)
240        .unwrap_or(&EMPTY)
241}
242
243/// Resolve handle dict type set from the Python override config.
244/// Fields in this set use `TypeName({...})` instead of `TypeName(key=val, ...)`.
245fn resolve_handle_dict_types(e2e_config: &E2eConfig) -> &std::collections::HashSet<String> {
246    static EMPTY: std::sync::LazyLock<std::collections::HashSet<String>> =
247        std::sync::LazyLock::new(std::collections::HashSet::new);
248    e2e_config
249        .call
250        .overrides
251        .get("python")
252        .map(|o| &o.handle_dict_types)
253        .unwrap_or(&EMPTY)
254}
255
256fn is_skipped(fixture: &Fixture, language: &str) -> bool {
257    fixture.skip.as_ref().is_some_and(|s| s.should_skip(language))
258}
259
260// ---------------------------------------------------------------------------
261// Rendering
262// ---------------------------------------------------------------------------
263
264fn render_conftest(e2e_config: &E2eConfig, groups: &[FixtureGroup]) -> String {
265    let module = resolve_module(e2e_config);
266    let has_http_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| f.is_http_test());
267
268    // Detect whether any fixture uses file_path or bytes args — if so we need to
269    // chdir to the test_documents directory so relative paths resolve correctly.
270    let has_file_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| {
271        let cc = e2e_config.resolve_call(f.call.as_deref());
272        cc.args
273            .iter()
274            .any(|a| a.arg_type == "file_path" || a.arg_type == "bytes")
275    });
276
277    let header = hash::header(CommentStyle::Hash);
278    if has_http_fixtures {
279        format!(
280            r#"{header}"""Pytest configuration for e2e tests."""
281from __future__ import annotations
282
283import os
284import subprocess
285import threading
286from pathlib import Path
287from typing import Generator
288
289import pytest
290
291# Ensure the package is importable.
292# The {module} package is expected to be installed in the current environment.
293
294_HERE = Path(__file__).parent
295_E2E_DIR = _HERE.parent
296_MOCK_SERVER_BIN = _E2E_DIR / "rust" / "target" / "release" / "mock-server"
297_FIXTURES_DIR = _E2E_DIR.parent / "fixtures"
298
299
300@pytest.fixture(scope="session", autouse=True)
301def mock_server() -> Generator[str, None, None]:
302    """Spawn the mock HTTP server binary and set MOCK_SERVER_URL."""
303    proc = subprocess.Popen(  # noqa: S603
304        [str(_MOCK_SERVER_BIN), str(_FIXTURES_DIR)],
305        stdout=subprocess.PIPE,
306        stderr=None,
307        stdin=subprocess.PIPE,
308    )
309    url = ""
310    assert proc.stdout is not None
311    for raw_line in proc.stdout:
312        line = raw_line.decode().strip()
313        if line.startswith("MOCK_SERVER_URL="):
314            url = line.split("=", 1)[1]
315            break
316    os.environ["MOCK_SERVER_URL"] = url
317    # Drain stdout in background so the server never blocks.
318    threading.Thread(target=proc.stdout.read, daemon=True).start()
319    yield url
320    if proc.stdin:
321        proc.stdin.close()
322    proc.terminate()
323    proc.wait()
324
325
326def _make_request(method: str, path: str, **kwargs: object) -> object:
327    """Make an HTTP request to the mock server."""
328    import urllib.request  # noqa: PLC0415
329
330    base_url = os.environ.get("MOCK_SERVER_URL", "http://localhost:8080")
331    url = f"{{base_url}}{{path}}"
332    data = kwargs.pop("json", None)
333    if data is not None:
334        import json  # noqa: PLC0415
335
336        body = json.dumps(data).encode()
337        headers = dict(kwargs.pop("headers", {{}}))
338        headers.setdefault("Content-Type", "application/json")
339        req = urllib.request.Request(url, data=body, headers=headers, method=method.upper())
340    else:
341        headers = dict(kwargs.pop("headers", {{}}))
342        req = urllib.request.Request(url, headers=headers, method=method.upper())
343    try:
344        with urllib.request.urlopen(req) as resp:  # noqa: S310
345            return resp
346    except urllib.error.HTTPError as exc:
347        return exc
348
349
350@pytest.fixture(scope="session")
351def app(mock_server: str) -> object:  # noqa: ARG001
352    """Return a simple HTTP helper bound to the mock server URL."""
353
354    class _App:
355        def request(self, path: str, **kwargs: object) -> object:
356            method = str(kwargs.pop("method", "GET"))
357            return _make_request(method, path, **kwargs)
358
359    return _App()
360"#
361        )
362    } else if has_file_fixtures {
363        format!(
364            r#"{header}"""Pytest configuration for e2e tests."""
365import os
366from pathlib import Path
367
368# Ensure the package is importable.
369# The {module} package is expected to be installed in the current environment.
370
371# Change to the test_documents directory so that fixture file paths like
372# "pdf/fake_memo.pdf" resolve correctly when running pytest from e2e/python/.
373_TEST_DOCUMENTS = Path(__file__).parent.parent.parent / "test_documents"
374if _TEST_DOCUMENTS.is_dir():
375    os.chdir(_TEST_DOCUMENTS)
376
377# On macOS, Pdfium is a separate dylib not on the default library path in dev builds.
378# Search common locations (Cargo build output, staged target/release) and extend
379# DYLD_LIBRARY_PATH / LD_LIBRARY_PATH so the extension can load the library.
380_REPO_ROOT = Path(__file__).parent.parent.parent
381
382
383def _find_pdfium_dir() -> str | None:
384    """Find the directory containing libpdfium, searching Cargo build outputs."""
385    for _candidate in sorted(_REPO_ROOT.glob("target/*/release/build/*/out/libpdfium*")):
386        return str(_candidate.parent)
387    for _candidate in sorted(_REPO_ROOT.glob("target/release/build/*/out/libpdfium*")):
388        return str(_candidate.parent)
389    return None
390
391
392_pdfium_dir = _find_pdfium_dir()
393if _pdfium_dir is not None:
394    for _var in ("DYLD_LIBRARY_PATH", "LD_LIBRARY_PATH"):
395        _existing = os.environ.get(_var, "")
396        if _pdfium_dir not in _existing:
397            os.environ[_var] = f"{{_pdfium_dir}}:{{_existing}}" if _existing else _pdfium_dir
398"#
399        )
400    } else {
401        format!(
402            r#"{header}"""Pytest configuration for e2e tests."""
403# Ensure the package is importable.
404# The {module} package is expected to be installed in the current environment.
405"#
406        )
407    }
408}
409
410fn render_test_file(category: &str, fixtures: &[&Fixture], e2e_config: &E2eConfig) -> String {
411    let mut out = String::new();
412    out.push_str(&hash::header(CommentStyle::Hash));
413    let _ = writeln!(out, "\"\"\"E2e tests for category: {category}.\"\"\"");
414
415    let module = resolve_module(e2e_config);
416    let function_name = resolve_function_name(e2e_config);
417    let options_type = resolve_options_type(e2e_config);
418    let options_via = resolve_options_via(e2e_config);
419    let enum_fields = resolve_enum_fields(e2e_config);
420    let handle_nested_types = resolve_handle_nested_types(e2e_config);
421    let handle_dict_types = resolve_handle_dict_types(e2e_config);
422    let field_resolver = FieldResolver::new(
423        &e2e_config.fields,
424        &e2e_config.fields_optional,
425        &e2e_config.result_fields,
426        &e2e_config.fields_array,
427    );
428
429    let has_error_test = fixtures
430        .iter()
431        .any(|f| f.assertions.iter().any(|a| a.assertion_type == "error"));
432    let has_skipped = fixtures.iter().any(|f| is_skipped(f, "python"));
433    let has_http_tests = fixtures.iter().any(|f| f.is_http_test());
434
435    // Check if any fixture in this file uses an async call.
436    let is_async = fixtures.iter().any(|f| {
437        let cc = e2e_config.resolve_call(f.call.as_deref());
438        cc.r#async
439    }) || e2e_config.call.r#async;
440    let needs_pytest = has_error_test || has_skipped || is_async;
441
442    // "json" mode needs `import json`.
443    let needs_json_import = options_via == "json"
444        && fixtures.iter().any(|f| {
445            e2e_config
446                .call
447                .args
448                .iter()
449                .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
450        });
451
452    // mock_url args need `import os`.
453    let needs_os_import = e2e_config.call.args.iter().any(|arg| arg.arg_type == "mock_url");
454
455    // bytes args need `from pathlib import Path` when any fixture value is a file path.
456    // bytes args need `import base64` when any fixture value is a base64 blob.
457    let needs_path_import = fixtures.iter().any(|f| {
458        let cc = e2e_config.resolve_call(f.call.as_deref());
459        cc.args.iter().any(|arg| {
460            if arg.arg_type != "bytes" {
461                return false;
462            }
463            let val = resolve_field(&f.input, &arg.field);
464            val.as_str()
465                .is_some_and(|s| matches!(classify_bytes_value(s), BytesKind::FilePath))
466        })
467    });
468    let needs_base64_import = fixtures.iter().any(|f| {
469        let cc = e2e_config.resolve_call(f.call.as_deref());
470        cc.args.iter().any(|arg| {
471            if arg.arg_type != "bytes" {
472                return false;
473            }
474            let val = resolve_field(&f.input, &arg.field);
475            val.as_str()
476                .is_some_and(|s| matches!(classify_bytes_value(s), BytesKind::Base64))
477        })
478    });
479
480    // HTTP tests handle `import re` inline (per-test), so no top-level re import is needed.
481    let needs_re_import = false;
482    let _ = has_http_tests; // used indirectly via inline imports in render_http_test_function
483
484    // Only import options_type when using "kwargs" mode.
485    let needs_options_type = options_via == "kwargs"
486        && options_type.is_some()
487        && fixtures.iter().any(|f| {
488            e2e_config
489                .call
490                .args
491                .iter()
492                .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
493        });
494
495    // Collect enum types actually used across all fixtures in this file.
496    let mut used_enum_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
497    if needs_options_type && !enum_fields.is_empty() {
498        for fixture in fixtures.iter() {
499            for arg in &e2e_config.call.args {
500                if arg.arg_type == "json_object" {
501                    let value = resolve_field(&fixture.input, &arg.field);
502                    if let Some(obj) = value.as_object() {
503                        for key in obj.keys() {
504                            if let Some(enum_type) = enum_fields.get(key) {
505                                used_enum_types.insert(enum_type.clone());
506                            }
507                        }
508                    }
509                }
510            }
511        }
512    }
513
514    // Collect imports sorted per isort/ruff I001: stdlib group, then
515    // third-party group, separated by a blank line. Within each group
516    // `import X` lines come before `from X import Y` lines, both sorted.
517    let mut stdlib_imports: Vec<String> = Vec::new();
518    let mut thirdparty_bare: Vec<String> = Vec::new();
519    let mut thirdparty_from: Vec<String> = Vec::new();
520
521    if needs_base64_import {
522        stdlib_imports.push("import base64".to_string());
523    }
524
525    if needs_json_import {
526        stdlib_imports.push("import json".to_string());
527    }
528
529    if needs_os_import {
530        stdlib_imports.push("import os".to_string());
531    }
532
533    if needs_path_import {
534        stdlib_imports.push("from pathlib import Path".to_string());
535    }
536
537    if needs_re_import {
538        stdlib_imports.push("import re".to_string());
539    }
540
541    if needs_pytest {
542        // F401 (unused-import) suppression: pytest is needed at module level for
543        // its fixture decorators and `pytest.mark.*` annotations, but ruff cannot
544        // statically tell whether a generated test file references those — so we
545        // hint to ruff that the import is intentional.
546        thirdparty_bare.push("import pytest  # noqa: F401".to_string());
547    }
548
549    // For non-HTTP fixtures, build the normal function imports.
550    // Only count fixtures that are not skipped and have assertions (need to call the function).
551    let has_non_http_fixtures = fixtures
552        .iter()
553        .any(|f| !f.is_http_test() && !is_skipped(f, "python") && !f.assertions.is_empty());
554    if has_non_http_fixtures {
555        // Collect handle constructor function names that need to be imported.
556        let handle_constructors: Vec<String> = e2e_config
557            .call
558            .args
559            .iter()
560            .filter(|arg| arg.arg_type == "handle")
561            .map(|arg| format!("create_{}", arg.name.to_snake_case()))
562            .collect();
563
564        // Collect all unique function names actually used across all fixtures in this file.
565        // Do not seed with the default function_name — only include it when at least one
566        // fixture resolves to it, to avoid unused-import (F401) warnings from ruff.
567        let mut import_names: Vec<String> = Vec::new();
568        for fixture in fixtures.iter() {
569            let cc = e2e_config.resolve_call(fixture.call.as_deref());
570            let fn_name = resolve_function_name_for_call(cc);
571            if !import_names.contains(&fn_name) {
572                import_names.push(fn_name);
573            }
574        }
575        // Safety net: should not occur since the group is non-empty, but ensures
576        // import_names is never empty if all fixtures use the default call.
577        if import_names.is_empty() {
578            import_names.push(function_name.clone());
579        }
580        for ctor in &handle_constructors {
581            if !import_names.contains(ctor) {
582                import_names.push(ctor.clone());
583            }
584        }
585
586        // If any handle arg has config, import the config class (CrawlConfig or options_type).
587        let needs_config_import = e2e_config.call.args.iter().any(|arg| {
588            arg.arg_type == "handle"
589                && fixtures.iter().any(|f| {
590                    let val = resolve_field(&f.input, &arg.field);
591                    !val.is_null() && val.as_object().is_some_and(|o| !o.is_empty())
592                })
593        });
594        if needs_config_import {
595            let config_class = options_type.as_deref().unwrap_or("CrawlConfig");
596            if !import_names.contains(&config_class.to_string()) {
597                import_names.push(config_class.to_string());
598            }
599        }
600
601        // Import any nested handle config types actually used in this file.
602        if !handle_nested_types.is_empty() {
603            let mut used_nested_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
604            for fixture in fixtures.iter() {
605                for arg in &e2e_config.call.args {
606                    if arg.arg_type == "handle" {
607                        let config_value = resolve_field(&fixture.input, &arg.field);
608                        if let Some(obj) = config_value.as_object() {
609                            for key in obj.keys() {
610                                if let Some(type_name) = handle_nested_types.get(key) {
611                                    if obj[key].is_object() {
612                                        used_nested_types.insert(type_name.clone());
613                                    }
614                                }
615                            }
616                        }
617                    }
618                }
619            }
620            for type_name in used_nested_types {
621                if !import_names.contains(&type_name) {
622                    import_names.push(type_name);
623                }
624            }
625        }
626
627        // Collect method_result helper function imports.
628        for fixture in fixtures.iter() {
629            for assertion in &fixture.assertions {
630                if assertion.assertion_type == "method_result" {
631                    if let Some(method_name) = &assertion.method {
632                        let import = python_method_helper_import(method_name);
633                        if let Some(name) = import {
634                            if !import_names.contains(&name) {
635                                import_names.push(name);
636                            }
637                        }
638                    }
639                }
640            }
641        }
642
643        if let (true, Some(opts_type)) = (needs_options_type, &options_type) {
644            import_names.push(opts_type.clone());
645            thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
646            // Import enum types from enum_module (if specified) or main module.
647            if !used_enum_types.is_empty() {
648                let enum_mod = e2e_config
649                    .call
650                    .overrides
651                    .get("python")
652                    .and_then(|o| o.enum_module.as_deref())
653                    .unwrap_or(&module);
654                let enum_names: Vec<&String> = used_enum_types.iter().collect();
655                thirdparty_from.push(format!(
656                    "from {enum_mod} import {}",
657                    enum_names.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")
658                ));
659            }
660        } else {
661            thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
662        }
663    }
664
665    stdlib_imports.sort();
666    thirdparty_bare.sort();
667    thirdparty_from.sort();
668
669    // Emit sorted import groups with blank lines between groups per PEP 8.
670    if !stdlib_imports.is_empty() {
671        for imp in &stdlib_imports {
672            let _ = writeln!(out, "{imp}");
673        }
674        let _ = writeln!(out);
675    }
676    // Third-party: bare imports then from-imports, no blank line between them.
677    for imp in &thirdparty_bare {
678        let _ = writeln!(out, "{imp}");
679    }
680    for imp in &thirdparty_from {
681        let _ = writeln!(out, "{imp}");
682    }
683    // Two blank lines after imports (PEP 8 / ruff I001).
684    let _ = writeln!(out);
685    let _ = writeln!(out);
686
687    for fixture in fixtures {
688        if fixture.is_http_test() {
689            render_http_test_function(&mut out, fixture);
690        } else if !is_skipped(fixture, "python") && fixture.assertions.is_empty() {
691            // Non-HTTP fixture with no assertions: generate a skipped placeholder.
692            let fn_name = sanitize_ident(&fixture.id);
693            let description = &fixture.description;
694            let desc_with_period = if description.ends_with('.') {
695                description.to_string()
696            } else {
697                format!("{description}.")
698            };
699            let _ = writeln!(
700                out,
701                "@pytest.mark.skip(reason=\"no assertions configured for this fixture in python e2e\")"
702            );
703            let _ = writeln!(out, "def test_{fn_name}() -> None:");
704            let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
705        } else {
706            render_test_function(
707                &mut out,
708                fixture,
709                e2e_config,
710                options_type.as_deref(),
711                options_via,
712                enum_fields,
713                handle_nested_types,
714                handle_dict_types,
715                &field_resolver,
716            );
717        }
718        let _ = writeln!(out);
719    }
720
721    out
722}
723
724// ---------------------------------------------------------------------------
725// HTTP server test rendering
726// ---------------------------------------------------------------------------
727
728/// Render a pytest test function for an HTTP server fixture.
729///
730/// The generated test:
731/// 1. Receives a `client` fixture from conftest.py (the test server client).
732/// 2. Sends the configured request.
733/// 3. Asserts status code, body (exact or partial), headers, and validation errors.
734fn render_http_test_function(out: &mut String, fixture: &Fixture) {
735    let Some(http) = &fixture.http else {
736        return;
737    };
738
739    let fn_name = sanitize_ident(&fixture.id);
740    let description = &fixture.description;
741    let desc_with_period = if description.ends_with('.') {
742        description.to_string()
743    } else {
744        format!("{description}.")
745    };
746
747    // HTTP 101 (WebSocket upgrade) — urllib cannot handle upgrade responses.
748    let status = http.expected_response.status_code;
749    if status == 101 {
750        let _ = writeln!(
751            out,
752            "@pytest.mark.skip(reason=\"HTTP 101 WebSocket upgrade cannot be tested via urllib\")"
753        );
754        let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
755        let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
756        let _ = writeln!(out, "    ...");
757        let _ = writeln!(out);
758        return;
759    }
760
761    if is_skipped(fixture, "python") {
762        let reason = fixture
763            .skip
764            .as_ref()
765            .and_then(|s| s.reason.as_deref())
766            .unwrap_or("skipped for python");
767        let escaped = escape_python(reason);
768        let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
769    }
770
771    let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
772    let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
773    let _ = writeln!(out, "    import os  # noqa: PLC0415");
774    let _ = writeln!(out, "    import urllib.request  # noqa: PLC0415");
775    let _ = writeln!(out, "    base = os.environ.get(\"MOCK_SERVER_URL\", mock_server)");
776    let fixture_id = fixture.id.as_str();
777    let _ = writeln!(out, "    url = f\"{{base}}/fixtures/{fixture_id}\"");
778
779    // Build the request call using urllib.
780    let method = http.request.method.to_uppercase();
781
782    // Build headers dict.
783    let mut header_entries: Vec<String> = Vec::new();
784    for (k, v) in &http.request.headers {
785        header_entries.push(format!("        \"{}\": \"{}\",", escape_python(k), escape_python(v)));
786    }
787    let headers_py = if header_entries.is_empty() {
788        "{}".to_string()
789    } else {
790        format!("{{\n{}\n    }}", header_entries.join("\n"))
791    };
792
793    if let Some(body) = &http.request.body {
794        let py_body = json_to_python_literal(body);
795        let _ = writeln!(out, "    import json  # noqa: PLC0415");
796        let _ = writeln!(out, "    _headers = {headers_py}");
797        let _ = writeln!(out, "    _headers.setdefault(\"Content-Type\", \"application/json\")");
798        let _ = writeln!(out, "    _body = json.dumps({py_body}).encode()");
799        let _ = writeln!(
800            out,
801            "    _req = urllib.request.Request(url, data=_body, headers=_headers, method=\"{method}\")"
802        );
803    } else {
804        let _ = writeln!(out, "    _headers = {headers_py}");
805        let _ = writeln!(
806            out,
807            "    _req = urllib.request.Request(url, headers=_headers, method=\"{method}\")"
808        );
809    }
810    // Determine which response variables are actually needed.
811    // Exclude the empty-string body sentinel ("") and null — those mean "no body".
812    let body_has_content = matches!(&http.expected_response.body, Some(v)
813        if !(v.is_null() || (v.is_string() && v.as_str() == Some(""))));
814    let needs_body = body_has_content
815        || http.expected_response.body_partial.is_some()
816        || http
817            .expected_response
818            .validation_errors
819            .as_ref()
820            .is_some_and(|v| !v.is_empty());
821    // content-encoding is skipped (mock server strips it), so only consider other headers.
822    let needs_headers = http
823        .expected_response
824        .headers
825        .iter()
826        .any(|(k, _)| k.to_lowercase() != "content-encoding");
827
828    // Build an opener that does NOT follow redirects so we can assert on 3xx responses.
829    let _ = writeln!(
830        out,
831        "    class _NoRedirect(urllib.request.HTTPRedirectHandler):  # noqa: N801"
832    );
833    let _ = writeln!(
834        out,
835        "        def redirect_request(self, *args, **kwargs): return None  # noqa: E704"
836    );
837    let _ = writeln!(out, "    _opener = urllib.request.build_opener(_NoRedirect())");
838    let _ = writeln!(out, "    try:");
839    let _ = writeln!(out, "        response = _opener.open(_req)  # noqa: S310");
840    let _ = writeln!(out, "        status_code = response.status");
841    if needs_body {
842        let _ = writeln!(out, "        resp_body = response.read()");
843    }
844    if needs_headers {
845        let _ = writeln!(out, "        resp_headers = dict(response.headers)");
846    }
847    let _ = writeln!(out, "    except urllib.error.HTTPError as _exc:");
848    let _ = writeln!(out, "        status_code = _exc.code");
849    if needs_body {
850        let _ = writeln!(out, "        resp_body = _exc.read()");
851    }
852    if needs_headers {
853        let _ = writeln!(out, "        resp_headers = dict(_exc.headers)");
854    }
855
856    // Status code assertion.
857    let status = http.expected_response.status_code;
858    let _ = writeln!(out, "    assert status_code == {status}  # noqa: S101");
859
860    // Body assertions.
861    if let Some(expected_body) = &http.expected_response.body {
862        // Empty-string sentinel means no body — skip assertion.
863        if !(expected_body.is_null() || expected_body.is_string() && expected_body.as_str() == Some("")) {
864            if let serde_json::Value::String(s) = expected_body {
865                // Plain-string body: mock server returns raw text, compare decoded bytes directly.
866                let py_val = format!("\"{}\"", escape_python(s));
867                let _ = writeln!(out, "    assert resp_body.decode() == {py_val}  # noqa: S101");
868            } else {
869                let py_val = json_to_python_literal(expected_body);
870                let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
871                let _ = writeln!(out, "    data = _json.loads(resp_body)");
872                let _ = writeln!(out, "    assert data == {py_val}  # noqa: S101");
873            }
874        }
875    } else if let Some(partial) = &http.expected_response.body_partial {
876        let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
877        let _ = writeln!(out, "    data = _json.loads(resp_body)");
878        if let Some(obj) = partial.as_object() {
879            for (key, val) in obj {
880                let py_val = json_to_python_literal(val);
881                let escaped_key = escape_python(key);
882                let _ = writeln!(out, "    assert data[\"{escaped_key}\"] == {py_val}  # noqa: S101");
883            }
884        }
885    }
886
887    // Header assertions.
888    for (header_name, header_value) in &http.expected_response.headers {
889        let lower_name = header_name.to_lowercase();
890        // The mock server strips content-encoding headers because it returns uncompressed bodies.
891        if lower_name == "content-encoding" {
892            continue;
893        }
894        let escaped_name = escape_python(&lower_name);
895        match header_value.as_str() {
896            "<<present>>" => {
897                let _ = writeln!(out, "    assert \"{escaped_name}\" in resp_headers  # noqa: S101");
898            }
899            "<<absent>>" => {
900                let _ = writeln!(
901                    out,
902                    "    assert resp_headers.get(\"{escaped_name}\") is None  # noqa: S101"
903                );
904            }
905            "<<uuid>>" => {
906                let _ = writeln!(out, "    import re  # noqa: PLC0415");
907                let _ = writeln!(
908                    out,
909                    "    assert re.match(r'^[0-9a-f]{{8}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{12}}$', resp_headers[\"{escaped_name}\"])  # noqa: S101"
910                );
911            }
912            exact => {
913                let escaped_val = escape_python(exact);
914                let _ = writeln!(
915                    out,
916                    "    assert resp_headers[\"{escaped_name}\"] == \"{escaped_val}\"  # noqa: S101"
917                );
918            }
919        }
920    }
921
922    // Validation error assertions — skip when a full body assertEquals is already generated
923    // (it is redundant and avoids miss-keying "detail" vs "errors").
924    if let Some(validation_errors) = &http.expected_response.validation_errors {
925        if !validation_errors.is_empty() && !body_has_content {
926            let _ = writeln!(out, "    import json as _json  # noqa: PLC0415");
927            let _ = writeln!(out, "    _data = _json.loads(resp_body)");
928            let _ = writeln!(out, "    errors = _data.get(\"errors\", [])");
929            for ve in validation_errors {
930                let loc_py: Vec<String> = ve.loc.iter().map(|s| format!("\"{}\"", escape_python(s))).collect();
931                let loc_str = loc_py.join(", ");
932                let escaped_msg = escape_python(&ve.msg);
933                let _ = writeln!(
934                    out,
935                    "    assert any(e[\"loc\"] == [{loc_str}] and \"{escaped_msg}\" in e[\"msg\"] for e in errors)  # noqa: S101"
936                );
937            }
938        }
939    }
940}
941
942// ---------------------------------------------------------------------------
943// Function-call test rendering
944// ---------------------------------------------------------------------------
945
946#[allow(clippy::too_many_arguments)]
947fn render_test_function(
948    out: &mut String,
949    fixture: &Fixture,
950    e2e_config: &E2eConfig,
951    options_type: Option<&str>,
952    options_via: &str,
953    enum_fields: &HashMap<String, String>,
954    handle_nested_types: &HashMap<String, String>,
955    handle_dict_types: &std::collections::HashSet<String>,
956    field_resolver: &FieldResolver,
957) {
958    let fn_name = sanitize_ident(&fixture.id);
959    let description = &fixture.description;
960    let call_config = e2e_config.resolve_call(fixture.call.as_deref());
961    let function_name = resolve_function_name_for_call(call_config);
962    let result_var = &call_config.result_var;
963
964    // Resolve Python-specific override settings.
965    let python_override = call_config.overrides.get("python");
966    let result_is_simple = python_override.is_some_and(|o| o.result_is_simple);
967    let arg_name_map = python_override.map(|o| &o.arg_name_map);
968
969    let desc_with_period = if description.ends_with('.') {
970        description.to_string()
971    } else {
972        format!("{description}.")
973    };
974
975    // Emit pytest.mark.skip for fixtures that should be skipped for python.
976    if is_skipped(fixture, "python") {
977        let reason = fixture
978            .skip
979            .as_ref()
980            .and_then(|s| s.reason.as_deref())
981            .unwrap_or("skipped for python");
982        let escaped = escape_python(reason);
983        let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
984    }
985
986    let is_async = call_config.r#async;
987    if is_async {
988        let _ = writeln!(out, "@pytest.mark.asyncio");
989        let _ = writeln!(out, "async def test_{fn_name}() -> None:");
990    } else {
991        let _ = writeln!(out, "def test_{fn_name}() -> None:");
992    }
993    let _ = writeln!(out, "    \"\"\"{desc_with_period}\"\"\"");
994
995    // Check if any assertion is an error assertion.
996    let has_error_assertion = fixture.assertions.iter().any(|a| a.assertion_type == "error");
997
998    // Build argument expressions from config.
999    let mut arg_bindings = Vec::new();
1000    let mut kwarg_exprs = Vec::new();
1001    for arg in &call_config.args {
1002        let var_name = &arg.name;
1003        // Resolve the kwarg name: use the arg_name_map override if present.
1004        let kwarg_name = arg_name_map
1005            .and_then(|m| m.get(var_name.as_str()))
1006            .map(|s| s.as_str())
1007            .unwrap_or(var_name.as_str());
1008
1009        if arg.arg_type == "handle" {
1010            // Generate a create_engine (or equivalent) call and pass the variable.
1011            // If there's config data, construct a CrawlConfig with kwargs.
1012            let constructor_name = format!("create_{}", arg.name.to_snake_case());
1013            let config_value = resolve_field(&fixture.input, &arg.field);
1014            if config_value.is_null()
1015                || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
1016            {
1017                arg_bindings.push(format!("    {var_name} = {constructor_name}(None)"));
1018            } else if let Some(obj) = config_value.as_object() {
1019                // Build kwargs for the config constructor (CrawlConfig(key=val, ...)).
1020                // For fields with a nested type mapping, wrap the dict value in the
1021                // appropriate typed constructor instead of passing a plain dict.
1022                let kwargs: Vec<String> = obj
1023                    .iter()
1024                    .map(|(k, v)| {
1025                        let snake_key = k.to_snake_case();
1026                        let py_val = if let Some(type_name) = handle_nested_types.get(k) {
1027                            // Wrap the nested dict in the typed constructor.
1028                            if let Some(nested_obj) = v.as_object() {
1029                                if nested_obj.is_empty() {
1030                                    // Empty dict: use the default constructor.
1031                                    format!("{type_name}()")
1032                                } else if handle_dict_types.contains(k) {
1033                                    // The outer Python config type (e.g. CrawlConfig) accepts a
1034                                    // plain dict for this field (e.g. `auth: dict | None`).
1035                                    // The binding-layer wrapper (e.g. api.py) creates the typed
1036                                    // object internally, so we must NOT pre-wrap it here.
1037                                    json_to_python_literal(v)
1038                                } else {
1039                                    // Type takes keyword arguments.
1040                                    let nested_kwargs: Vec<String> = nested_obj
1041                                        .iter()
1042                                        .map(|(nk, nv)| {
1043                                            let nested_snake_key = nk.to_snake_case();
1044                                            format!("{nested_snake_key}={}", json_to_python_literal(nv))
1045                                        })
1046                                        .collect();
1047                                    format!("{type_name}({})", nested_kwargs.join(", "))
1048                                }
1049                            } else {
1050                                // Non-object value: use as-is.
1051                                json_to_python_literal(v)
1052                            }
1053                        } else if k == "request_timeout" {
1054                            // The Python binding converts request_timeout with Duration::from_secs
1055                            // (seconds) while fixtures specify values in milliseconds. Divide by
1056                            // 1000 to compensate: e.g., 1 ms → 0 s (immediate timeout),
1057                            // 5000 ms → 5 s. This keeps test semantics consistent with the
1058                            // fixture intent.
1059                            if let Some(ms) = v.as_u64() {
1060                                format!("{}", ms / 1000)
1061                            } else {
1062                                json_to_python_literal(v)
1063                            }
1064                        } else {
1065                            json_to_python_literal(v)
1066                        };
1067                        format!("{snake_key}={py_val}")
1068                    })
1069                    .collect();
1070                // Use the options_type if configured, otherwise "CrawlConfig".
1071                let config_class = options_type.unwrap_or("CrawlConfig");
1072                let single_line = format!("    {var_name}_config = {config_class}({})", kwargs.join(", "));
1073                if single_line.len() <= 120 {
1074                    arg_bindings.push(single_line);
1075                } else {
1076                    // Split into multi-line for readability and E501 compliance.
1077                    let mut lines = format!("    {var_name}_config = {config_class}(\n");
1078                    for kw in &kwargs {
1079                        lines.push_str(&format!("        {kw},\n"));
1080                    }
1081                    lines.push_str("    )");
1082                    arg_bindings.push(lines);
1083                }
1084                arg_bindings.push(format!("    {var_name} = {constructor_name}({var_name}_config)"));
1085            } else {
1086                let literal = json_to_python_literal(config_value);
1087                arg_bindings.push(format!("    {var_name} = {constructor_name}({literal})"));
1088            }
1089            kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1090            continue;
1091        }
1092
1093        if arg.arg_type == "mock_url" {
1094            let fixture_id = &fixture.id;
1095            arg_bindings.push(format!(
1096                "    {var_name} = os.environ['MOCK_SERVER_URL'] + '/fixtures/{fixture_id}'"
1097            ));
1098            kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1099            continue;
1100        }
1101
1102        let value = resolve_field(&fixture.input, &arg.field);
1103
1104        if value.is_null() && arg.optional {
1105            continue;
1106        }
1107
1108        // For json_object args, use the configured options_via strategy.
1109        // A1 fix: when optional=true and value is non-null, pass T directly (not Optional[T]).
1110        if arg.arg_type == "json_object" && !value.is_null() {
1111            match options_via {
1112                "dict" => {
1113                    // Pass as a plain Python dict literal.
1114                    let literal = json_to_python_literal(value);
1115                    let noqa = if literal.contains("/tmp/") {
1116                        "  # noqa: S108"
1117                    } else {
1118                        ""
1119                    };
1120                    arg_bindings.push(format!("    {var_name} = {literal}{noqa}"));
1121                    kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1122                    continue;
1123                }
1124                "json" => {
1125                    // Pass via json.loads() with the raw JSON string.
1126                    let json_str = serde_json::to_string(value).unwrap_or_default();
1127                    let escaped = escape_python(&json_str);
1128                    arg_bindings.push(format!("    {var_name} = json.loads(\"{escaped}\")"));
1129                    kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1130                    continue;
1131                }
1132                _ => {
1133                    // "kwargs" (default): construct OptionsType(key=val, ...).
1134                    if let (Some(opts_type), Some(obj)) = (options_type, value.as_object()) {
1135                        let kwargs: Vec<String> = obj
1136                            .iter()
1137                            .map(|(k, v)| {
1138                                let snake_key = k.to_snake_case();
1139                                let py_val = if let Some(enum_type) = enum_fields.get(k) {
1140                                    // Map string value to enum constant.
1141                                    if let Some(s) = v.as_str() {
1142                                        let upper_val = s.to_shouty_snake_case();
1143                                        format!("{enum_type}.{upper_val}")
1144                                    } else {
1145                                        json_to_python_literal(v)
1146                                    }
1147                                } else {
1148                                    json_to_python_literal(v)
1149                                };
1150                                format!("{snake_key}={py_val}")
1151                            })
1152                            .collect();
1153                        let constructor = format!("{opts_type}({})", kwargs.join(", "));
1154                        arg_bindings.push(format!("    {var_name} = {constructor}"));
1155                        kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1156                        continue;
1157                    }
1158                }
1159            }
1160        }
1161
1162        // When optional=true but fixture value is null, skip the argument entirely.
1163        // The function signature expects Optional[T] — Python's default keyword behavior handles None.
1164        if arg.optional && value.is_null() {
1165            continue;
1166        }
1167
1168        // For required args with no fixture value, use a language-appropriate default.
1169        if value.is_null() && !arg.optional {
1170            let default_val = match arg.arg_type.as_str() {
1171                "string" => "\"\"".to_string(),
1172                "int" | "integer" => "0".to_string(),
1173                "float" | "number" => "0.0".to_string(),
1174                "bool" | "boolean" => "False".to_string(),
1175                _ => "None".to_string(),
1176            };
1177            arg_bindings.push(format!("    {var_name} = {default_val}"));
1178            kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1179            continue;
1180        }
1181
1182        // bytes args: classify the fixture value and emit the appropriate expression.
1183        //
1184        // Three patterns appear in fixtures:
1185        //   1. File path   — "pdf/fake_memo.pdf", "images/hello_world.png"
1186        //                    Starts with a word character followed by more word/slash/dot chars
1187        //                    and a file extension.  Emit `Path("...").read_bytes()`.
1188        //   2. Inline text — "<!DOCTYPE html>...", "{...}", text with spaces
1189        //                    Starts with '<', '{', or contains whitespace.
1190        //                    Emit `b"..."` bytes literal.
1191        //   3. Base64      — "/9j/4AAQ" (JPEG magic), other short opaque strings
1192        //                    Everything else.  Emit `base64.b64decode("...")`.
1193        if arg.arg_type == "bytes" {
1194            if let Some(raw) = value.as_str() {
1195                match classify_bytes_value(raw) {
1196                    BytesKind::FilePath => {
1197                        let escaped = escape_python(raw);
1198                        arg_bindings.push(format!("    {var_name} = Path(\"{escaped}\").read_bytes()"));
1199                    }
1200                    BytesKind::InlineText => {
1201                        // Emit a bytes literal.  For short single-line values we can embed
1202                        // them directly; use repr-like escaping of non-printable bytes.
1203                        let escaped = escape_python(raw);
1204                        arg_bindings.push(format!("    {var_name} = b\"{escaped}\""));
1205                    }
1206                    BytesKind::Base64 => {
1207                        let escaped = escape_python(raw);
1208                        arg_bindings.push(format!("    {var_name} = base64.b64decode(\"{escaped}\")"));
1209                    }
1210                }
1211            } else {
1212                arg_bindings.push(format!("    {var_name} = None"));
1213            }
1214            kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1215            continue;
1216        }
1217
1218        let literal = json_to_python_literal(value);
1219        let noqa = if literal.contains("/tmp/") {
1220            "  # noqa: S108"
1221        } else {
1222            ""
1223        };
1224        arg_bindings.push(format!("    {var_name} = {literal}{noqa}"));
1225        kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1226    }
1227
1228    // Generate visitor class if the fixture has a visitor spec.
1229    if let Some(visitor_spec) = &fixture.visitor {
1230        let _ = writeln!(out, "    class _TestVisitor:");
1231        for (method_name, action) in &visitor_spec.callbacks {
1232            emit_python_visitor_method(out, method_name, action);
1233        }
1234        kwarg_exprs.push("visitor=_TestVisitor()".to_string());
1235    }
1236
1237    for binding in &arg_bindings {
1238        let _ = writeln!(out, "{binding}");
1239    }
1240
1241    let call_args = kwarg_exprs.join(", ");
1242    let await_prefix = if is_async { "await " } else { "" };
1243    let call_expr = format!("{await_prefix}{function_name}({call_args})");
1244
1245    if has_error_assertion {
1246        // Find error assertion for optional message check.
1247        let error_assertion = fixture.assertions.iter().find(|a| a.assertion_type == "error");
1248        let has_message = error_assertion
1249            .and_then(|a| a.value.as_ref())
1250            .and_then(|v| v.as_str())
1251            .is_some();
1252
1253        if has_message {
1254            let _ = writeln!(out, "    with pytest.raises(Exception) as exc_info:  # noqa: B017");
1255            let _ = writeln!(out, "        {call_expr}");
1256            if let Some(msg) = error_assertion.and_then(|a| a.value.as_ref()).and_then(|v| v.as_str()) {
1257                let escaped = escape_python(msg);
1258                let _ = writeln!(out, "    assert \"{escaped}\" in str(exc_info.value)  # noqa: S101");
1259            }
1260        } else {
1261            let _ = writeln!(out, "    with pytest.raises(Exception):  # noqa: B017");
1262            let _ = writeln!(out, "        {call_expr}");
1263        }
1264
1265        // Skip non-error assertions: `result` is not defined outside the
1266        // `pytest.raises` block, so referencing it would trigger ruff F821.
1267        return;
1268    }
1269
1270    // Non-error path.
1271    // A2 fix: respect returns_result=false (non-Result returns don't need error handling).
1272    let has_usable_assertion = fixture.assertions.iter().any(|a| {
1273        if a.assertion_type == "not_error" || a.assertion_type == "error" {
1274            return false;
1275        }
1276        if result_is_simple {
1277            // When the result is a simple type, only assertions whose field is
1278            // NOT in the skipped-for-simple-result set will produce real code.
1279            if let Some(f) = &a.field {
1280                let f_lower = f.to_lowercase();
1281                if !f.is_empty()
1282                    && f_lower != "content"
1283                    && f_lower != "result"
1284                    && (f_lower.starts_with("metadata")
1285                        || f_lower.starts_with("document")
1286                        || f_lower.starts_with("structure")
1287                        || f_lower.starts_with("pages")
1288                        || f_lower.starts_with("chunks")
1289                        || f_lower.starts_with("tables")
1290                        || f_lower.starts_with("images")
1291                        || f_lower.starts_with("mime_type")
1292                        || f_lower.starts_with("is_")
1293                        || f_lower == "byte_length"
1294                        || f_lower == "page_count"
1295                        || f_lower == "output_format"
1296                        || f_lower == "extraction_method")
1297                {
1298                    return false; // this assertion will be skipped
1299                }
1300            }
1301            return true;
1302        }
1303        match &a.field {
1304            Some(f) if !f.is_empty() => field_resolver.is_valid_for_result(f),
1305            _ => true,
1306        }
1307    });
1308    let py_result_var = if has_usable_assertion {
1309        result_var.to_string()
1310    } else {
1311        "_".to_string()
1312    };
1313    let _ = writeln!(out, "    {py_result_var} = {call_expr}");
1314
1315    let fields_enum = &e2e_config.fields_enum;
1316    for assertion in &fixture.assertions {
1317        if assertion.assertion_type == "not_error" {
1318            // A2: When returns_result=false, the call doesn't return Result<T, E>,
1319            // so there's no error to check. Skip the assertion entirely.
1320            if !call_config.returns_result {
1321                continue;
1322            }
1323            // The call already raises on error in Python.
1324            continue;
1325        }
1326        render_assertion(
1327            out,
1328            assertion,
1329            result_var,
1330            field_resolver,
1331            fields_enum,
1332            result_is_simple,
1333        );
1334    }
1335}
1336
1337// ---------------------------------------------------------------------------
1338// Bytes value classification
1339// ---------------------------------------------------------------------------
1340
1341/// How to represent a fixture `type = "bytes"` string value in generated Python.
1342enum BytesKind {
1343    /// A relative file path like `"pdf/fake_memo.pdf"` — read with `Path(...).read_bytes()`.
1344    FilePath,
1345    /// Inline text content like `"<!DOCTYPE html>..."` — encode to `b"..."`.
1346    InlineText,
1347    /// A base64-encoded blob like `"/9j/4AAQ"` — decode with `base64.b64decode(...)`.
1348    Base64,
1349}
1350
1351/// Classify a fixture string value that maps to a `bytes` argument.
1352///
1353/// Rules (in order):
1354/// 1. Starts with `<`, `{`, or `[`, or contains whitespace → inline text.
1355/// 2. First character is an ASCII letter/digit/underscore AND the value contains
1356///    a `/` that is preceded by at least one word character AND the value contains
1357///    a `.` after the last `/` → file path.
1358/// 3. Everything else → base64.
1359fn classify_bytes_value(s: &str) -> BytesKind {
1360    // Rule 1: obvious inline content markers.
1361    if s.starts_with('<') || s.starts_with('{') || s.starts_with('[') || s.contains(' ') {
1362        return BytesKind::InlineText;
1363    }
1364
1365    // Rule 2: looks like "dir/file.ext" — starts with a word char, has a slash,
1366    // and the portion after the last slash contains a dot (file extension).
1367    let first = s.chars().next().unwrap_or('\0');
1368    if first.is_ascii_alphanumeric() || first == '_' {
1369        if let Some(slash_pos) = s.find('/') {
1370            if slash_pos > 0 {
1371                let after_slash = &s[slash_pos + 1..];
1372                if after_slash.contains('.') && !after_slash.is_empty() {
1373                    return BytesKind::FilePath;
1374                }
1375            }
1376        }
1377    }
1378
1379    // Rule 3: everything else is treated as base64.
1380    BytesKind::Base64
1381}
1382
1383// ---------------------------------------------------------------------------
1384// Argument rendering
1385// ---------------------------------------------------------------------------
1386
1387fn json_to_python_literal(value: &serde_json::Value) -> String {
1388    match value {
1389        serde_json::Value::Null => "None".to_string(),
1390        serde_json::Value::Bool(true) => "True".to_string(),
1391        serde_json::Value::Bool(false) => "False".to_string(),
1392        serde_json::Value::Number(n) => n.to_string(),
1393        serde_json::Value::String(s) => python_string_literal(s),
1394        serde_json::Value::Array(arr) => {
1395            let items: Vec<String> = arr.iter().map(json_to_python_literal).collect();
1396            format!("[{}]", items.join(", "))
1397        }
1398        serde_json::Value::Object(map) => {
1399            let items: Vec<String> = map
1400                .iter()
1401                .map(|(k, v)| format!("\"{}\": {}", escape_python(k), json_to_python_literal(v)))
1402                .collect();
1403            format!("{{{}}}", items.join(", "))
1404        }
1405    }
1406}
1407
1408// ---------------------------------------------------------------------------
1409// Assertion rendering
1410// ---------------------------------------------------------------------------
1411
1412fn render_assertion(
1413    out: &mut String,
1414    assertion: &Assertion,
1415    result_var: &str,
1416    field_resolver: &FieldResolver,
1417    fields_enum: &std::collections::HashSet<String>,
1418    result_is_simple: bool,
1419) {
1420    // When result_is_simple, the result IS the content — skip fields that
1421    // reference struct sub-fields (metadata, document, structure, pages, etc.)
1422    // which don't exist on a plain string/bool/bytes value.
1423    if result_is_simple {
1424        if let Some(f) = &assertion.field {
1425            let f_lower = f.to_lowercase();
1426            if !f.is_empty()
1427                && f_lower != "content"
1428                && f_lower != "result"
1429                && (f_lower.starts_with("metadata")
1430                    || f_lower.starts_with("document")
1431                    || f_lower.starts_with("structure")
1432                    || f_lower.starts_with("pages")
1433                    || f_lower.starts_with("chunks")
1434                    || f_lower.starts_with("tables")
1435                    || f_lower.starts_with("images")
1436                    || f_lower.starts_with("mime_type")
1437                    || f_lower.starts_with("is_")
1438                    || f_lower == "byte_length"
1439                    || f_lower == "page_count"
1440                    || f_lower == "output_format"
1441                    || f_lower == "extraction_method")
1442            {
1443                let _ = writeln!(out, "    # skipped: field '{f}' not applicable for simple result type");
1444                return;
1445            }
1446        }
1447    }
1448
1449    // Handle synthetic / derived fields before the is_valid_for_result check
1450    // so they are never treated as struct attribute accesses on the result.
1451    if let Some(f) = &assertion.field {
1452        match f.as_str() {
1453            "chunks_have_content" => {
1454                let pred = format!("all(c.content for c in ({result_var}.chunks or []))");
1455                match assertion.assertion_type.as_str() {
1456                    "is_true" => {
1457                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1458                    }
1459                    "is_false" => {
1460                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1461                    }
1462                    _ => {
1463                        let _ = writeln!(
1464                            out,
1465                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1466                        );
1467                    }
1468                }
1469                return;
1470            }
1471            "chunks_have_embeddings" => {
1472                let pred = format!(
1473                    "all(c.embedding is not None and len(c.embedding) > 0 for c in ({result_var}.chunks or []))"
1474                );
1475                match assertion.assertion_type.as_str() {
1476                    "is_true" => {
1477                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1478                    }
1479                    "is_false" => {
1480                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1481                    }
1482                    _ => {
1483                        let _ = writeln!(
1484                            out,
1485                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1486                        );
1487                    }
1488                }
1489                return;
1490            }
1491            // ---- EmbedResponse virtual fields ----
1492            // embed_texts returns list[list[float]] in Python — no wrapper struct.
1493            // result_var is the embedding matrix; use it directly.
1494            "embeddings" => {
1495                match assertion.assertion_type.as_str() {
1496                    "count_equals" => {
1497                        if let Some(val) = &assertion.value {
1498                            if let Some(n) = val.as_u64() {
1499                                let _ = writeln!(out, "    assert len({result_var}) == {n}  # noqa: S101");
1500                            }
1501                        }
1502                    }
1503                    "count_min" => {
1504                        if let Some(val) = &assertion.value {
1505                            if let Some(n) = val.as_u64() {
1506                                let _ = writeln!(out, "    assert len({result_var}) >= {n}  # noqa: S101");
1507                            }
1508                        }
1509                    }
1510                    "not_empty" => {
1511                        let _ = writeln!(out, "    assert len({result_var}) > 0  # noqa: S101");
1512                    }
1513                    "is_empty" => {
1514                        let _ = writeln!(out, "    assert len({result_var}) == 0  # noqa: S101");
1515                    }
1516                    _ => {
1517                        let _ = writeln!(
1518                            out,
1519                            "    # skipped: unsupported assertion type on synthetic field 'embeddings'"
1520                        );
1521                    }
1522                }
1523                return;
1524            }
1525            "embedding_dimensions" => {
1526                let expr = format!("(len({result_var}[0]) if {result_var} else 0)");
1527                match assertion.assertion_type.as_str() {
1528                    "equals" => {
1529                        if let Some(val) = &assertion.value {
1530                            let py_val = value_to_python_string(val);
1531                            let _ = writeln!(out, "    assert {expr} == {py_val}  # noqa: S101");
1532                        }
1533                    }
1534                    "greater_than" => {
1535                        if let Some(val) = &assertion.value {
1536                            let py_val = value_to_python_string(val);
1537                            let _ = writeln!(out, "    assert {expr} > {py_val}  # noqa: S101");
1538                        }
1539                    }
1540                    _ => {
1541                        let _ = writeln!(
1542                            out,
1543                            "    # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
1544                        );
1545                    }
1546                }
1547                return;
1548            }
1549            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
1550                let pred = match f.as_str() {
1551                    "embeddings_valid" => {
1552                        format!("all(bool(e) for e in {result_var})")
1553                    }
1554                    "embeddings_finite" => {
1555                        format!("all(v == v and abs(v) != float('inf') for e in {result_var} for v in e)")
1556                    }
1557                    "embeddings_non_zero" => {
1558                        format!("all(any(v != 0.0 for v in e) for e in {result_var})")
1559                    }
1560                    "embeddings_normalized" => {
1561                        format!("all(abs(sum(v * v for v in e) - 1.0) < 1e-3 for e in {result_var})")
1562                    }
1563                    _ => unreachable!(),
1564                };
1565                match assertion.assertion_type.as_str() {
1566                    "is_true" => {
1567                        let _ = writeln!(out, "    assert {pred}  # noqa: S101");
1568                    }
1569                    "is_false" => {
1570                        let _ = writeln!(out, "    assert not ({pred})  # noqa: S101");
1571                    }
1572                    _ => {
1573                        let _ = writeln!(
1574                            out,
1575                            "    # skipped: unsupported assertion type on synthetic field '{f}'"
1576                        );
1577                    }
1578                }
1579                return;
1580            }
1581            // ---- keywords / keywords_count ----
1582            // Python ExtractionResult does not expose extracted_keywords; skip.
1583            "keywords" | "keywords_count" => {
1584                let _ = writeln!(
1585                    out,
1586                    "    # skipped: field '{f}' not available on Python ExtractionResult"
1587                );
1588                return;
1589            }
1590            _ => {}
1591        }
1592    }
1593
1594    // Skip assertions on fields that don't exist on the result type.
1595    if !result_is_simple {
1596        if let Some(f) = &assertion.field {
1597            if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
1598                let _ = writeln!(out, "    # skipped: field '{f}' not available on result type");
1599                return;
1600            }
1601        }
1602    }
1603
1604    // For simple results, the result variable IS the value — map `content`/`result`
1605    // fields (and empty/absent fields) to the result variable directly.
1606    let field_access = if result_is_simple {
1607        result_var.to_string()
1608    } else {
1609        match &assertion.field {
1610            Some(f) if !f.is_empty() => field_resolver.accessor(f, "python", result_var),
1611            _ => result_var.to_string(),
1612        }
1613    };
1614
1615    // Determine whether this field should be compared as an enum string.
1616    //
1617    // PyO3 integer-based enums (`#[pyclass(eq, eq_int)]`) are NOT iterable, so
1618    // `"value" in enum_field` raises TypeError.  Use `str(enum_field).lower()`
1619    // instead, which for a variant like `LinkType.Anchor` gives `"linktype.anchor"`,
1620    // making `"anchor" in str(LinkType.Anchor).lower()` evaluate to True.
1621    //
1622    // We apply this to fields explicitly listed in `fields_enum` (using both the
1623    // fixture field path and the resolved path) and to any field whose accessor
1624    // involves array-element indexing (`[0]`) which typically holds typed enums.
1625    let field_is_enum = assertion.field.as_deref().is_some_and(|f| {
1626        if fields_enum.contains(f) {
1627            return true;
1628        }
1629        let resolved = field_resolver.resolve(f);
1630        if fields_enum.contains(resolved) {
1631            return true;
1632        }
1633        // Also treat fields accessed via array indexing as potentially enum-typed
1634        // (e.g., `result.links[0].link_type`, `result.assets[0].asset_category`).
1635        // This is safe because `str(string_value).lower()` is idempotent for
1636        // plain string fields, and all fixture `contains` values are lowercase.
1637        field_resolver.accessor(f, "python", result_var).contains("[0]")
1638    });
1639
1640    // Check whether the field path (or any prefix of it) is optional so we can
1641    // guard `in` / `not in` expressions against None.
1642    let field_is_optional = match &assertion.field {
1643        Some(f) if !f.is_empty() => {
1644            let resolved = field_resolver.resolve(f);
1645            field_resolver.is_optional(resolved)
1646        }
1647        _ => false,
1648    };
1649
1650    match assertion.assertion_type.as_str() {
1651        "error" | "not_error" => {
1652            // Handled at call site.
1653        }
1654        "equals" => {
1655            if let Some(val) = &assertion.value {
1656                let expected = value_to_python_string(val);
1657                // Use `is` for boolean/None comparisons (ruff E712).
1658                let op = if val.is_boolean() || val.is_null() { "is" } else { "==" };
1659                // For string equality, strip trailing whitespace to handle trailing newlines
1660                // from the converter.
1661                if val.is_string() {
1662                    let _ = writeln!(out, "    assert {field_access}.strip() {op} {expected}  # noqa: S101");
1663                } else {
1664                    let _ = writeln!(out, "    assert {field_access} {op} {expected}  # noqa: S101");
1665                }
1666            }
1667        }
1668        "contains" => {
1669            if let Some(val) = &assertion.value {
1670                let expected = value_to_python_string(val);
1671                // For enum fields, convert to lowercase string for comparison.
1672                let cmp_expr = if field_is_enum && val.is_string() {
1673                    format!("str({field_access}).lower()")
1674                } else {
1675                    field_access.clone()
1676                };
1677                if field_is_optional {
1678                    let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1679                    let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1680                } else {
1681                    let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1682                }
1683            }
1684        }
1685        "contains_all" => {
1686            if let Some(values) = &assertion.values {
1687                for val in values {
1688                    let expected = value_to_python_string(val);
1689                    // For enum fields, convert to lowercase string for comparison.
1690                    let cmp_expr = if field_is_enum && val.is_string() {
1691                        format!("str({field_access}).lower()")
1692                    } else {
1693                        field_access.clone()
1694                    };
1695                    if field_is_optional {
1696                        let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1697                        let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1698                    } else {
1699                        let _ = writeln!(out, "    assert {expected} in {cmp_expr}  # noqa: S101");
1700                    }
1701                }
1702            }
1703        }
1704        "not_contains" => {
1705            if let Some(val) = &assertion.value {
1706                let expected = value_to_python_string(val);
1707                // For enum fields, convert to lowercase string for comparison.
1708                let cmp_expr = if field_is_enum && val.is_string() {
1709                    format!("str({field_access}).lower()")
1710                } else {
1711                    field_access.clone()
1712                };
1713                if field_is_optional {
1714                    let _ = writeln!(
1715                        out,
1716                        "    assert {field_access} is None or {expected} not in {cmp_expr}  # noqa: S101"
1717                    );
1718                } else {
1719                    let _ = writeln!(out, "    assert {expected} not in {cmp_expr}  # noqa: S101");
1720                }
1721            }
1722        }
1723        "not_empty" => {
1724            let _ = writeln!(out, "    assert {field_access}  # noqa: S101");
1725        }
1726        "is_empty" => {
1727            let _ = writeln!(out, "    assert not {field_access}  # noqa: S101");
1728        }
1729        "contains_any" => {
1730            if let Some(values) = &assertion.values {
1731                let items: Vec<String> = values.iter().map(value_to_python_string).collect();
1732                let list_str = items.join(", ");
1733                // For enum fields, convert to lowercase string for comparison.
1734                let cmp_expr = if field_is_enum {
1735                    format!("str({field_access}).lower()")
1736                } else {
1737                    field_access.clone()
1738                };
1739                if field_is_optional {
1740                    let _ = writeln!(out, "    assert {field_access} is not None  # noqa: S101");
1741                    let _ = writeln!(
1742                        out,
1743                        "    assert any(v in {cmp_expr} for v in [{list_str}])  # noqa: S101"
1744                    );
1745                } else {
1746                    let _ = writeln!(
1747                        out,
1748                        "    assert any(v in {cmp_expr} for v in [{list_str}])  # noqa: S101"
1749                    );
1750                }
1751            }
1752        }
1753        "greater_than" => {
1754            if let Some(val) = &assertion.value {
1755                let expected = value_to_python_string(val);
1756                let _ = writeln!(out, "    assert {field_access} > {expected}  # noqa: S101");
1757            }
1758        }
1759        "less_than" => {
1760            if let Some(val) = &assertion.value {
1761                let expected = value_to_python_string(val);
1762                let _ = writeln!(out, "    assert {field_access} < {expected}  # noqa: S101");
1763            }
1764        }
1765        "greater_than_or_equal" | "min" => {
1766            if let Some(val) = &assertion.value {
1767                let expected = value_to_python_string(val);
1768                let _ = writeln!(out, "    assert {field_access} >= {expected}  # noqa: S101");
1769            }
1770        }
1771        "less_than_or_equal" | "max" => {
1772            if let Some(val) = &assertion.value {
1773                let expected = value_to_python_string(val);
1774                let _ = writeln!(out, "    assert {field_access} <= {expected}  # noqa: S101");
1775            }
1776        }
1777        "starts_with" => {
1778            if let Some(val) = &assertion.value {
1779                let expected = value_to_python_string(val);
1780                let _ = writeln!(out, "    assert {field_access}.startswith({expected})  # noqa: S101");
1781            }
1782        }
1783        "ends_with" => {
1784            if let Some(val) = &assertion.value {
1785                let expected = value_to_python_string(val);
1786                let _ = writeln!(out, "    assert {field_access}.endswith({expected})  # noqa: S101");
1787            }
1788        }
1789        "min_length" => {
1790            if let Some(val) = &assertion.value {
1791                if let Some(n) = val.as_u64() {
1792                    let _ = writeln!(out, "    assert len({field_access}) >= {n}  # noqa: S101");
1793                }
1794            }
1795        }
1796        "max_length" => {
1797            if let Some(val) = &assertion.value {
1798                if let Some(n) = val.as_u64() {
1799                    let _ = writeln!(out, "    assert len({field_access}) <= {n}  # noqa: S101");
1800                }
1801            }
1802        }
1803        "count_min" => {
1804            if let Some(val) = &assertion.value {
1805                if let Some(n) = val.as_u64() {
1806                    let _ = writeln!(out, "    assert len({field_access}) >= {n}  # noqa: S101");
1807                }
1808            }
1809        }
1810        "count_equals" => {
1811            if let Some(val) = &assertion.value {
1812                if let Some(n) = val.as_u64() {
1813                    let _ = writeln!(out, "    assert len({field_access}) == {n}  # noqa: S101");
1814                }
1815            }
1816        }
1817        "is_true" => {
1818            let _ = writeln!(out, "    assert {field_access} is True  # noqa: S101");
1819        }
1820        "is_false" => {
1821            let _ = writeln!(out, "    assert not {field_access}  # noqa: S101");
1822        }
1823        "method_result" => {
1824            if let Some(method_name) = &assertion.method {
1825                let call_expr = build_python_method_call(result_var, method_name, assertion.args.as_ref());
1826                let check = assertion.check.as_deref().unwrap_or("is_true");
1827                match check {
1828                    "equals" => {
1829                        if let Some(val) = &assertion.value {
1830                            if val.is_boolean() {
1831                                if val.as_bool() == Some(true) {
1832                                    let _ = writeln!(out, "    assert {call_expr} is True  # noqa: S101");
1833                                } else {
1834                                    let _ = writeln!(out, "    assert {call_expr} is False  # noqa: S101");
1835                                }
1836                            } else {
1837                                let expected = value_to_python_string(val);
1838                                let _ = writeln!(out, "    assert {call_expr} == {expected}  # noqa: S101");
1839                            }
1840                        }
1841                    }
1842                    "is_true" => {
1843                        let _ = writeln!(out, "    assert {call_expr}  # noqa: S101");
1844                    }
1845                    "is_false" => {
1846                        let _ = writeln!(out, "    assert not {call_expr}  # noqa: S101");
1847                    }
1848                    "greater_than_or_equal" => {
1849                        if let Some(val) = &assertion.value {
1850                            let n = val.as_u64().unwrap_or(0);
1851                            let _ = writeln!(out, "    assert {call_expr} >= {n}  # noqa: S101");
1852                        }
1853                    }
1854                    "count_min" => {
1855                        if let Some(val) = &assertion.value {
1856                            let n = val.as_u64().unwrap_or(0);
1857                            let _ = writeln!(out, "    assert len({call_expr}) >= {n}  # noqa: S101");
1858                        }
1859                    }
1860                    "contains" => {
1861                        if let Some(val) = &assertion.value {
1862                            let expected = value_to_python_string(val);
1863                            let _ = writeln!(out, "    assert {expected} in {call_expr}  # noqa: S101");
1864                        }
1865                    }
1866                    "is_error" => {
1867                        let _ = writeln!(out, "    with pytest.raises(Exception):  # noqa: B017");
1868                        let _ = writeln!(out, "        {call_expr}");
1869                    }
1870                    other_check => {
1871                        panic!("unsupported method_result check type: {other_check}");
1872                    }
1873                }
1874            } else {
1875                panic!("method_result assertion missing 'method' field");
1876            }
1877        }
1878        "matches_regex" => {
1879            if let Some(val) = &assertion.value {
1880                let expected = value_to_python_string(val);
1881                let _ = writeln!(out, "    import re  # noqa: PLC0415");
1882                let _ = writeln!(
1883                    out,
1884                    "    assert re.search({expected}, {field_access}) is not None  # noqa: S101"
1885                );
1886            }
1887        }
1888        other => {
1889            panic!("unsupported assertion type: {other}");
1890        }
1891    }
1892}
1893
1894/// Build a Python call expression for a method_result assertion on a tree-sitter Tree.
1895/// Maps method names to the appropriate Python function calls.
1896fn build_python_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
1897    match method_name {
1898        "root_child_count" => format!("{result_var}.root_node().child_count()"),
1899        "root_node_type" => format!("{result_var}.root_node().kind()"),
1900        "named_children_count" => format!("{result_var}.root_node().named_child_count()"),
1901        "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
1902        "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
1903        "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
1904        "contains_node_type" => {
1905            let node_type = args
1906                .and_then(|a| a.get("node_type"))
1907                .and_then(|v| v.as_str())
1908                .unwrap_or("");
1909            format!("tree_contains_node_type({result_var}, \"{node_type}\")")
1910        }
1911        "find_nodes_by_type" => {
1912            let node_type = args
1913                .and_then(|a| a.get("node_type"))
1914                .and_then(|v| v.as_str())
1915                .unwrap_or("");
1916            format!("find_nodes_by_type({result_var}, \"{node_type}\")")
1917        }
1918        "run_query" => {
1919            let query_source = args
1920                .and_then(|a| a.get("query_source"))
1921                .and_then(|v| v.as_str())
1922                .unwrap_or("");
1923            let language = args
1924                .and_then(|a| a.get("language"))
1925                .and_then(|v| v.as_str())
1926                .unwrap_or("");
1927            format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
1928        }
1929        _ => {
1930            if let Some(args_val) = args {
1931                let arg_str = args_val
1932                    .as_object()
1933                    .map(|obj| {
1934                        obj.iter()
1935                            .map(|(k, v)| format!("{}={}", k, value_to_python_string(v)))
1936                            .collect::<Vec<_>>()
1937                            .join(", ")
1938                    })
1939                    .unwrap_or_default();
1940                format!("{result_var}.{method_name}({arg_str})")
1941            } else {
1942                format!("{result_var}.{method_name}()")
1943            }
1944        }
1945    }
1946}
1947
1948/// Returns the Python import name for a method_result method that uses a
1949/// module-level helper function (not a method on the result object).
1950fn python_method_helper_import(method_name: &str) -> Option<String> {
1951    match method_name {
1952        "has_error_nodes" => Some("tree_has_error_nodes".to_string()),
1953        "error_count" | "tree_error_count" => Some("tree_error_count".to_string()),
1954        "tree_to_sexp" => Some("tree_to_sexp".to_string()),
1955        "contains_node_type" => Some("tree_contains_node_type".to_string()),
1956        "find_nodes_by_type" => Some("find_nodes_by_type".to_string()),
1957        "run_query" => Some("run_query".to_string()),
1958        // Methods accessed via result_var (e.g. tree.root_node().child_count()) don't need imports.
1959        _ => None,
1960    }
1961}
1962
1963fn value_to_python_string(value: &serde_json::Value) -> String {
1964    match value {
1965        serde_json::Value::String(s) => python_string_literal(s),
1966        serde_json::Value::Bool(true) => "True".to_string(),
1967        serde_json::Value::Bool(false) => "False".to_string(),
1968        serde_json::Value::Number(n) => n.to_string(),
1969        serde_json::Value::Null => "None".to_string(),
1970        other => python_string_literal(&other.to_string()),
1971    }
1972}
1973
1974/// Produce a quoted Python string literal, choosing single or double quotes
1975/// to avoid unnecessary escaping (ruff Q003).
1976fn python_string_literal(s: &str) -> String {
1977    if s.contains('"') && !s.contains('\'') {
1978        // Use single quotes to avoid escaping double quotes.
1979        let escaped = s
1980            .replace('\\', "\\\\")
1981            .replace('\'', "\\'")
1982            .replace('\n', "\\n")
1983            .replace('\r', "\\r")
1984            .replace('\t', "\\t");
1985        format!("'{escaped}'")
1986    } else {
1987        format!("\"{}\"", escape_python(s))
1988    }
1989}
1990
1991/// Emit a Python visitor method for a callback action.
1992fn emit_python_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
1993    let params = match method_name {
1994        "visit_link" => "self, ctx, href, text, title",
1995        "visit_image" => "self, ctx, src, alt, title",
1996        "visit_heading" => "self, ctx, level, text, id",
1997        "visit_code_block" => "self, ctx, lang, code",
1998        "visit_code_inline"
1999        | "visit_strong"
2000        | "visit_emphasis"
2001        | "visit_strikethrough"
2002        | "visit_underline"
2003        | "visit_subscript"
2004        | "visit_superscript"
2005        | "visit_mark"
2006        | "visit_button"
2007        | "visit_summary"
2008        | "visit_figcaption"
2009        | "visit_definition_term"
2010        | "visit_definition_description" => "self, ctx, text",
2011        "visit_text" => "self, ctx, text",
2012        "visit_list_item" => "self, ctx, ordered, marker, text",
2013        "visit_blockquote" => "self, ctx, content, depth",
2014        "visit_table_row" => "self, ctx, cells, is_header",
2015        "visit_custom_element" => "self, ctx, tag_name, html",
2016        "visit_form" => "self, ctx, action_url, method",
2017        "visit_input" => "self, ctx, input_type, name, value",
2018        "visit_audio" | "visit_video" | "visit_iframe" => "self, ctx, src",
2019        "visit_details" => "self, ctx, is_open",
2020        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
2021            "self, ctx, output, *args"
2022        }
2023        "visit_list_start" => "self, ctx, ordered, *args",
2024        "visit_list_end" => "self, ctx, ordered, output, *args",
2025        _ => "self, ctx, *args",
2026    };
2027
2028    let _ = writeln!(
2029        out,
2030        "        def {method_name}({params}):  # noqa: A002, ANN001, ANN202, ARG002"
2031    );
2032    match action {
2033        CallbackAction::Skip => {
2034            let _ = writeln!(out, "            return \"skip\"");
2035        }
2036        CallbackAction::Continue => {
2037            let _ = writeln!(out, "            return \"continue\"");
2038        }
2039        CallbackAction::PreserveHtml => {
2040            let _ = writeln!(out, "            return \"preserve_html\"");
2041        }
2042        CallbackAction::Custom { output } => {
2043            let escaped = escape_python(output);
2044            let _ = writeln!(out, "            return {{\"custom\": \"{escaped}\"}}");
2045        }
2046        CallbackAction::CustomTemplate { template } => {
2047            // Use single-quoted f-string so that double quotes inside the template
2048            // (e.g. `QUOTE: "{text}"`) are not misinterpreted as string delimiters.
2049            // Escape newlines/tabs/backslashes/single quotes so the template stays
2050            // on a single line in the generated source.
2051            let escaped_template = template
2052                .replace('\\', "\\\\")
2053                .replace('\'', "\\'")
2054                .replace('\n', "\\n")
2055                .replace('\r', "\\r")
2056                .replace('\t', "\\t");
2057            let _ = writeln!(out, "            return {{\"custom\": f'{escaped_template}'}}");
2058        }
2059    }
2060}