1use crate::codegen::resolve_field;
7use crate::config::E2eConfig;
8use crate::escape::{escape_python, sanitize_filename, sanitize_ident};
9use crate::field_access::FieldResolver;
10use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup};
11use alef_core::backend::GeneratedFile;
12use alef_core::config::AlefConfig;
13use alef_core::hash::{self, CommentStyle};
14use anyhow::Result;
15use heck::{ToShoutySnakeCase, ToSnakeCase};
16use std::collections::HashMap;
17use std::fmt::Write as FmtWrite;
18use std::path::PathBuf;
19
20pub struct PythonE2eCodegen;
22
23impl super::E2eCodegen for PythonE2eCodegen {
24 fn generate(
25 &self,
26 groups: &[FixtureGroup],
27 e2e_config: &E2eConfig,
28 _alef_config: &AlefConfig,
29 ) -> Result<Vec<GeneratedFile>> {
30 let mut files = Vec::new();
31 let output_base = PathBuf::from(e2e_config.effective_output()).join("python");
32
33 files.push(GeneratedFile {
35 path: output_base.join("conftest.py"),
36 content: render_conftest(e2e_config, groups),
37 generated_header: true,
38 });
39
40 files.push(GeneratedFile {
42 path: output_base.join("__init__.py"),
43 content: "\n".to_string(),
44 generated_header: false,
45 });
46
47 files.push(GeneratedFile {
49 path: output_base.join("tests").join("__init__.py"),
50 content: "\n".to_string(),
51 generated_header: false,
52 });
53
54 let python_pkg = e2e_config.resolve_package("python");
56 let pkg_name = python_pkg
57 .as_ref()
58 .and_then(|p| p.name.as_deref())
59 .unwrap_or("kreuzcrawl");
60 let pkg_path = python_pkg
61 .as_ref()
62 .and_then(|p| p.path.as_deref())
63 .unwrap_or("../../packages/python");
64 let pkg_version = python_pkg
65 .as_ref()
66 .and_then(|p| p.version.as_deref())
67 .unwrap_or("0.1.0");
68 files.push(GeneratedFile {
69 path: output_base.join("pyproject.toml"),
70 content: render_pyproject(pkg_name, pkg_path, pkg_version, e2e_config.dep_mode),
71 generated_header: true,
72 });
73
74 for group in groups {
76 let fixtures: Vec<&Fixture> = group.fixtures.iter().collect();
77
78 if fixtures.is_empty() {
79 continue;
80 }
81
82 if fixtures.iter().all(|f| is_skipped(f, "python")) {
87 continue;
88 }
89
90 let filename = format!("test_{}.py", sanitize_filename(&group.category));
91 let content = render_test_file(&group.category, &fixtures, e2e_config);
92
93 files.push(GeneratedFile {
94 path: output_base.join("tests").join(filename),
95 content,
96 generated_header: true,
97 });
98 }
99
100 Ok(files)
101 }
102
103 fn language_name(&self) -> &'static str {
104 "python"
105 }
106}
107
108fn render_pyproject(
113 pkg_name: &str,
114 pkg_path: &str,
115 pkg_version: &str,
116 dep_mode: crate::config::DependencyMode,
117) -> String {
118 let (deps_line, uv_sources_block) = match dep_mode {
122 crate::config::DependencyMode::Registry => (
123 format!(
124 "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}{pkg_version}\" ]"
125 ),
126 String::new(),
127 ),
128 crate::config::DependencyMode::Local => (
129 format!(
130 "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}\" ]"
131 ),
132 format!(
133 "\n[tool.uv]\nsources.{pkg_name} = {{ path = \"{pkg_path}\" }}\n",
134 pkg_path = pkg_path
135 ),
136 ),
137 };
138
139 format!(
140 r#"[build-system]
141build-backend = "setuptools.build_meta"
142requires = [ "setuptools>=68", "wheel" ]
143
144[project]
145name = "{pkg_name}-e2e-tests"
146version = "0.0.0"
147description = "End-to-end tests"
148requires-python = ">=3.10"
149classifiers = [
150 "Programming Language :: Python :: 3 :: Only",
151 "Programming Language :: Python :: 3.10",
152 "Programming Language :: Python :: 3.11",
153 "Programming Language :: Python :: 3.12",
154 "Programming Language :: Python :: 3.13",
155 "Programming Language :: Python :: 3.14",
156]
157{deps_line}
158
159[tool.setuptools]
160packages = [ ]
161{uv_sources_block}
162[tool.ruff]
163lint.ignore = [ "PLR2004" ]
164lint.per-file-ignores."tests/**" = [ "B017", "PT011", "S101", "S108" ]
165
166[tool.pytest]
167ini_options.asyncio_mode = "auto"
168ini_options.testpaths = [ "tests" ]
169ini_options.python_files = "test_*.py"
170ini_options.python_functions = "test_*"
171ini_options.addopts = "-v --strict-markers --tb=short"
172ini_options.timeout = 300
173"#
174 )
175}
176
177fn resolve_function_name(e2e_config: &E2eConfig) -> String {
182 resolve_function_name_for_call(&e2e_config.call)
183}
184
185fn resolve_function_name_for_call(call_config: &crate::config::CallConfig) -> String {
186 call_config
187 .overrides
188 .get("python")
189 .and_then(|o| o.function.clone())
190 .unwrap_or_else(|| call_config.function.clone())
191}
192
193fn resolve_module(e2e_config: &E2eConfig) -> String {
194 e2e_config
195 .call
196 .overrides
197 .get("python")
198 .and_then(|o| o.module.clone())
199 .unwrap_or_else(|| e2e_config.call.module.replace('-', "_"))
200}
201
202fn resolve_options_type(e2e_config: &E2eConfig) -> Option<String> {
203 e2e_config
204 .call
205 .overrides
206 .get("python")
207 .and_then(|o| o.options_type.clone())
208}
209
210fn resolve_options_via(e2e_config: &E2eConfig) -> &str {
212 e2e_config
213 .call
214 .overrides
215 .get("python")
216 .and_then(|o| o.options_via.as_deref())
217 .unwrap_or("kwargs")
218}
219
220fn resolve_enum_fields(e2e_config: &E2eConfig) -> &HashMap<String, String> {
222 static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
223 e2e_config
224 .call
225 .overrides
226 .get("python")
227 .map(|o| &o.enum_fields)
228 .unwrap_or(&EMPTY)
229}
230
231fn resolve_handle_nested_types(e2e_config: &E2eConfig) -> &HashMap<String, String> {
234 static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
235 e2e_config
236 .call
237 .overrides
238 .get("python")
239 .map(|o| &o.handle_nested_types)
240 .unwrap_or(&EMPTY)
241}
242
243fn resolve_handle_dict_types(e2e_config: &E2eConfig) -> &std::collections::HashSet<String> {
246 static EMPTY: std::sync::LazyLock<std::collections::HashSet<String>> =
247 std::sync::LazyLock::new(std::collections::HashSet::new);
248 e2e_config
249 .call
250 .overrides
251 .get("python")
252 .map(|o| &o.handle_dict_types)
253 .unwrap_or(&EMPTY)
254}
255
256fn is_skipped(fixture: &Fixture, language: &str) -> bool {
257 fixture.skip.as_ref().is_some_and(|s| s.should_skip(language))
258}
259
260fn render_conftest(e2e_config: &E2eConfig, groups: &[FixtureGroup]) -> String {
265 let module = resolve_module(e2e_config);
266 let has_http_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| f.is_http_test());
267
268 let has_file_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| {
271 let cc = e2e_config.resolve_call(f.call.as_deref());
272 cc.args
273 .iter()
274 .any(|a| a.arg_type == "file_path" || a.arg_type == "bytes")
275 });
276
277 let header = hash::header(CommentStyle::Hash);
278 if has_http_fixtures {
279 format!(
280 r#"{header}"""Pytest configuration for e2e tests."""
281from __future__ import annotations
282
283import os
284import subprocess
285import threading
286from pathlib import Path
287from typing import Generator
288
289import pytest
290
291# Ensure the package is importable.
292# The {module} package is expected to be installed in the current environment.
293
294_HERE = Path(__file__).parent
295_E2E_DIR = _HERE.parent
296_MOCK_SERVER_BIN = _E2E_DIR / "rust" / "target" / "release" / "mock-server"
297_FIXTURES_DIR = _E2E_DIR.parent / "fixtures"
298
299
300@pytest.fixture(scope="session", autouse=True)
301def mock_server() -> Generator[str, None, None]:
302 """Spawn the mock HTTP server binary and set MOCK_SERVER_URL."""
303 proc = subprocess.Popen( # noqa: S603
304 [str(_MOCK_SERVER_BIN), str(_FIXTURES_DIR)],
305 stdout=subprocess.PIPE,
306 stderr=None,
307 stdin=subprocess.PIPE,
308 )
309 url = ""
310 assert proc.stdout is not None
311 for raw_line in proc.stdout:
312 line = raw_line.decode().strip()
313 if line.startswith("MOCK_SERVER_URL="):
314 url = line.split("=", 1)[1]
315 break
316 os.environ["MOCK_SERVER_URL"] = url
317 # Drain stdout in background so the server never blocks.
318 threading.Thread(target=proc.stdout.read, daemon=True).start()
319 yield url
320 if proc.stdin:
321 proc.stdin.close()
322 proc.terminate()
323 proc.wait()
324
325
326def _make_request(method: str, path: str, **kwargs: object) -> object:
327 """Make an HTTP request to the mock server."""
328 import urllib.request # noqa: PLC0415
329
330 base_url = os.environ.get("MOCK_SERVER_URL", "http://localhost:8080")
331 url = f"{{base_url}}{{path}}"
332 data = kwargs.pop("json", None)
333 if data is not None:
334 import json # noqa: PLC0415
335
336 body = json.dumps(data).encode()
337 headers = dict(kwargs.pop("headers", {{}}))
338 headers.setdefault("Content-Type", "application/json")
339 req = urllib.request.Request(url, data=body, headers=headers, method=method.upper())
340 else:
341 headers = dict(kwargs.pop("headers", {{}}))
342 req = urllib.request.Request(url, headers=headers, method=method.upper())
343 try:
344 with urllib.request.urlopen(req) as resp: # noqa: S310
345 return resp
346 except urllib.error.HTTPError as exc:
347 return exc
348
349
350@pytest.fixture(scope="session")
351def app(mock_server: str) -> object: # noqa: ARG001
352 """Return a simple HTTP helper bound to the mock server URL."""
353
354 class _App:
355 def request(self, path: str, **kwargs: object) -> object:
356 method = str(kwargs.pop("method", "GET"))
357 return _make_request(method, path, **kwargs)
358
359 return _App()
360"#
361 )
362 } else if has_file_fixtures {
363 format!(
364 r#"{header}"""Pytest configuration for e2e tests."""
365import os
366from pathlib import Path
367
368# Ensure the package is importable.
369# The {module} package is expected to be installed in the current environment.
370
371# Change to the test_documents directory so that fixture file paths like
372# "pdf/fake_memo.pdf" resolve correctly when running pytest from e2e/python/.
373_TEST_DOCUMENTS = Path(__file__).parent.parent.parent / "test_documents"
374if _TEST_DOCUMENTS.is_dir():
375 os.chdir(_TEST_DOCUMENTS)
376
377# On macOS, Pdfium is a separate dylib not on the default library path in dev builds.
378# Search common locations (Cargo build output, staged target/release) and extend
379# DYLD_LIBRARY_PATH / LD_LIBRARY_PATH so the extension can load the library.
380_REPO_ROOT = Path(__file__).parent.parent.parent
381
382
383def _find_pdfium_dir() -> str | None:
384 """Find the directory containing libpdfium, searching Cargo build outputs."""
385 for _candidate in sorted(_REPO_ROOT.glob("target/*/release/build/*/out/libpdfium*")):
386 return str(_candidate.parent)
387 for _candidate in sorted(_REPO_ROOT.glob("target/release/build/*/out/libpdfium*")):
388 return str(_candidate.parent)
389 return None
390
391
392_pdfium_dir = _find_pdfium_dir()
393if _pdfium_dir is not None:
394 for _var in ("DYLD_LIBRARY_PATH", "LD_LIBRARY_PATH"):
395 _existing = os.environ.get(_var, "")
396 if _pdfium_dir not in _existing:
397 os.environ[_var] = f"{{_pdfium_dir}}:{{_existing}}" if _existing else _pdfium_dir
398"#
399 )
400 } else {
401 format!(
402 r#"{header}"""Pytest configuration for e2e tests."""
403# Ensure the package is importable.
404# The {module} package is expected to be installed in the current environment.
405"#
406 )
407 }
408}
409
410fn render_test_file(category: &str, fixtures: &[&Fixture], e2e_config: &E2eConfig) -> String {
411 let mut out = String::new();
412 out.push_str(&hash::header(CommentStyle::Hash));
413 let _ = writeln!(out, "\"\"\"E2e tests for category: {category}.\"\"\"");
414
415 let module = resolve_module(e2e_config);
416 let function_name = resolve_function_name(e2e_config);
417 let options_type = resolve_options_type(e2e_config);
418 let options_via = resolve_options_via(e2e_config);
419 let enum_fields = resolve_enum_fields(e2e_config);
420 let handle_nested_types = resolve_handle_nested_types(e2e_config);
421 let handle_dict_types = resolve_handle_dict_types(e2e_config);
422 let field_resolver = FieldResolver::new(
423 &e2e_config.fields,
424 &e2e_config.fields_optional,
425 &e2e_config.result_fields,
426 &e2e_config.fields_array,
427 );
428
429 let has_error_test = fixtures
430 .iter()
431 .any(|f| f.assertions.iter().any(|a| a.assertion_type == "error"));
432 let has_skipped = fixtures.iter().any(|f| is_skipped(f, "python"));
433 let has_http_tests = fixtures.iter().any(|f| f.is_http_test());
434
435 let is_async = fixtures.iter().any(|f| {
437 let cc = e2e_config.resolve_call(f.call.as_deref());
438 cc.r#async
439 }) || e2e_config.call.r#async;
440 let needs_pytest = has_error_test || has_skipped || is_async;
441
442 let needs_json_import = options_via == "json"
444 && fixtures.iter().any(|f| {
445 e2e_config
446 .call
447 .args
448 .iter()
449 .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
450 });
451
452 let needs_os_import = e2e_config.call.args.iter().any(|arg| arg.arg_type == "mock_url");
454
455 let needs_path_import = fixtures.iter().any(|f| {
458 let cc = e2e_config.resolve_call(f.call.as_deref());
459 cc.args.iter().any(|arg| {
460 if arg.arg_type != "bytes" {
461 return false;
462 }
463 let val = resolve_field(&f.input, &arg.field);
464 val.as_str()
465 .is_some_and(|s| matches!(classify_bytes_value(s), BytesKind::FilePath))
466 })
467 });
468 let needs_base64_import = fixtures.iter().any(|f| {
469 let cc = e2e_config.resolve_call(f.call.as_deref());
470 cc.args.iter().any(|arg| {
471 if arg.arg_type != "bytes" {
472 return false;
473 }
474 let val = resolve_field(&f.input, &arg.field);
475 val.as_str()
476 .is_some_and(|s| matches!(classify_bytes_value(s), BytesKind::Base64))
477 })
478 });
479
480 let needs_re_import = false;
482 let _ = has_http_tests; let needs_options_type = options_via == "kwargs"
486 && options_type.is_some()
487 && fixtures.iter().any(|f| {
488 e2e_config
489 .call
490 .args
491 .iter()
492 .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
493 });
494
495 let mut used_enum_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
497 if needs_options_type && !enum_fields.is_empty() {
498 for fixture in fixtures.iter() {
499 for arg in &e2e_config.call.args {
500 if arg.arg_type == "json_object" {
501 let value = resolve_field(&fixture.input, &arg.field);
502 if let Some(obj) = value.as_object() {
503 for key in obj.keys() {
504 if let Some(enum_type) = enum_fields.get(key) {
505 used_enum_types.insert(enum_type.clone());
506 }
507 }
508 }
509 }
510 }
511 }
512 }
513
514 let mut stdlib_imports: Vec<String> = Vec::new();
518 let mut thirdparty_bare: Vec<String> = Vec::new();
519 let mut thirdparty_from: Vec<String> = Vec::new();
520
521 if needs_base64_import {
522 stdlib_imports.push("import base64".to_string());
523 }
524
525 if needs_json_import {
526 stdlib_imports.push("import json".to_string());
527 }
528
529 if needs_os_import {
530 stdlib_imports.push("import os".to_string());
531 }
532
533 if needs_path_import {
534 stdlib_imports.push("from pathlib import Path".to_string());
535 }
536
537 if needs_re_import {
538 stdlib_imports.push("import re".to_string());
539 }
540
541 if needs_pytest {
542 thirdparty_bare.push("import pytest # noqa: F401".to_string());
547 }
548
549 let has_non_http_fixtures = fixtures
552 .iter()
553 .any(|f| !f.is_http_test() && !is_skipped(f, "python") && !f.assertions.is_empty());
554 if has_non_http_fixtures {
555 let handle_constructors: Vec<String> = e2e_config
557 .call
558 .args
559 .iter()
560 .filter(|arg| arg.arg_type == "handle")
561 .map(|arg| format!("create_{}", arg.name.to_snake_case()))
562 .collect();
563
564 let mut import_names: Vec<String> = Vec::new();
568 for fixture in fixtures.iter() {
569 let cc = e2e_config.resolve_call(fixture.call.as_deref());
570 let fn_name = resolve_function_name_for_call(cc);
571 if !import_names.contains(&fn_name) {
572 import_names.push(fn_name);
573 }
574 }
575 if import_names.is_empty() {
578 import_names.push(function_name.clone());
579 }
580 for ctor in &handle_constructors {
581 if !import_names.contains(ctor) {
582 import_names.push(ctor.clone());
583 }
584 }
585
586 let needs_config_import = e2e_config.call.args.iter().any(|arg| {
588 arg.arg_type == "handle"
589 && fixtures.iter().any(|f| {
590 let val = resolve_field(&f.input, &arg.field);
591 !val.is_null() && val.as_object().is_some_and(|o| !o.is_empty())
592 })
593 });
594 if needs_config_import {
595 let config_class = options_type.as_deref().unwrap_or("CrawlConfig");
596 if !import_names.contains(&config_class.to_string()) {
597 import_names.push(config_class.to_string());
598 }
599 }
600
601 if !handle_nested_types.is_empty() {
603 let mut used_nested_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
604 for fixture in fixtures.iter() {
605 for arg in &e2e_config.call.args {
606 if arg.arg_type == "handle" {
607 let config_value = resolve_field(&fixture.input, &arg.field);
608 if let Some(obj) = config_value.as_object() {
609 for key in obj.keys() {
610 if let Some(type_name) = handle_nested_types.get(key) {
611 if obj[key].is_object() {
612 used_nested_types.insert(type_name.clone());
613 }
614 }
615 }
616 }
617 }
618 }
619 }
620 for type_name in used_nested_types {
621 if !import_names.contains(&type_name) {
622 import_names.push(type_name);
623 }
624 }
625 }
626
627 for fixture in fixtures.iter() {
629 for assertion in &fixture.assertions {
630 if assertion.assertion_type == "method_result" {
631 if let Some(method_name) = &assertion.method {
632 let import = python_method_helper_import(method_name);
633 if let Some(name) = import {
634 if !import_names.contains(&name) {
635 import_names.push(name);
636 }
637 }
638 }
639 }
640 }
641 }
642
643 if let (true, Some(opts_type)) = (needs_options_type, &options_type) {
644 import_names.push(opts_type.clone());
645 thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
646 if !used_enum_types.is_empty() {
648 let enum_mod = e2e_config
649 .call
650 .overrides
651 .get("python")
652 .and_then(|o| o.enum_module.as_deref())
653 .unwrap_or(&module);
654 let enum_names: Vec<&String> = used_enum_types.iter().collect();
655 thirdparty_from.push(format!(
656 "from {enum_mod} import {}",
657 enum_names.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")
658 ));
659 }
660 } else {
661 thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
662 }
663 }
664
665 stdlib_imports.sort();
666 thirdparty_bare.sort();
667 thirdparty_from.sort();
668
669 if !stdlib_imports.is_empty() {
671 for imp in &stdlib_imports {
672 let _ = writeln!(out, "{imp}");
673 }
674 let _ = writeln!(out);
675 }
676 for imp in &thirdparty_bare {
678 let _ = writeln!(out, "{imp}");
679 }
680 for imp in &thirdparty_from {
681 let _ = writeln!(out, "{imp}");
682 }
683 let _ = writeln!(out);
685 let _ = writeln!(out);
686
687 for fixture in fixtures {
688 if fixture.is_http_test() {
689 render_http_test_function(&mut out, fixture);
690 } else if !is_skipped(fixture, "python") && fixture.assertions.is_empty() {
691 let fn_name = sanitize_ident(&fixture.id);
693 let description = &fixture.description;
694 let desc_with_period = if description.ends_with('.') {
695 description.to_string()
696 } else {
697 format!("{description}.")
698 };
699 let _ = writeln!(
700 out,
701 "@pytest.mark.skip(reason=\"no assertions configured for this fixture in python e2e\")"
702 );
703 let _ = writeln!(out, "def test_{fn_name}() -> None:");
704 let _ = writeln!(out, " \"\"\"{desc_with_period}\"\"\"");
705 } else {
706 render_test_function(
707 &mut out,
708 fixture,
709 e2e_config,
710 options_type.as_deref(),
711 options_via,
712 enum_fields,
713 handle_nested_types,
714 handle_dict_types,
715 &field_resolver,
716 );
717 }
718 let _ = writeln!(out);
719 }
720
721 out
722}
723
724fn render_http_test_function(out: &mut String, fixture: &Fixture) {
735 let Some(http) = &fixture.http else {
736 return;
737 };
738
739 let fn_name = sanitize_ident(&fixture.id);
740 let description = &fixture.description;
741 let desc_with_period = if description.ends_with('.') {
742 description.to_string()
743 } else {
744 format!("{description}.")
745 };
746
747 let status = http.expected_response.status_code;
749 if status == 101 {
750 let _ = writeln!(
751 out,
752 "@pytest.mark.skip(reason=\"HTTP 101 WebSocket upgrade cannot be tested via urllib\")"
753 );
754 let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
755 let _ = writeln!(out, " \"\"\"{desc_with_period}\"\"\"");
756 let _ = writeln!(out, " ...");
757 let _ = writeln!(out);
758 return;
759 }
760
761 if is_skipped(fixture, "python") {
762 let reason = fixture
763 .skip
764 .as_ref()
765 .and_then(|s| s.reason.as_deref())
766 .unwrap_or("skipped for python");
767 let escaped = escape_python(reason);
768 let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
769 }
770
771 let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
772 let _ = writeln!(out, " \"\"\"{desc_with_period}\"\"\"");
773 let _ = writeln!(out, " import os # noqa: PLC0415");
774 let _ = writeln!(out, " import urllib.request # noqa: PLC0415");
775 let _ = writeln!(out, " base = os.environ.get(\"MOCK_SERVER_URL\", mock_server)");
776 let fixture_id = fixture.id.as_str();
777 let _ = writeln!(out, " url = f\"{{base}}/fixtures/{fixture_id}\"");
778
779 let method = http.request.method.to_uppercase();
781
782 let mut header_entries: Vec<String> = Vec::new();
784 for (k, v) in &http.request.headers {
785 header_entries.push(format!(" \"{}\": \"{}\",", escape_python(k), escape_python(v)));
786 }
787 let headers_py = if header_entries.is_empty() {
788 "{}".to_string()
789 } else {
790 format!("{{\n{}\n }}", header_entries.join("\n"))
791 };
792
793 if let Some(body) = &http.request.body {
794 let py_body = json_to_python_literal(body);
795 let _ = writeln!(out, " import json # noqa: PLC0415");
796 let _ = writeln!(out, " _headers = {headers_py}");
797 let _ = writeln!(out, " _headers.setdefault(\"Content-Type\", \"application/json\")");
798 let _ = writeln!(out, " _body = json.dumps({py_body}).encode()");
799 let _ = writeln!(
800 out,
801 " _req = urllib.request.Request(url, data=_body, headers=_headers, method=\"{method}\")"
802 );
803 } else {
804 let _ = writeln!(out, " _headers = {headers_py}");
805 let _ = writeln!(
806 out,
807 " _req = urllib.request.Request(url, headers=_headers, method=\"{method}\")"
808 );
809 }
810 let body_has_content = matches!(&http.expected_response.body, Some(v)
813 if !(v.is_null() || (v.is_string() && v.as_str() == Some(""))));
814 let needs_body = body_has_content
815 || http.expected_response.body_partial.is_some()
816 || http
817 .expected_response
818 .validation_errors
819 .as_ref()
820 .is_some_and(|v| !v.is_empty());
821 let needs_headers = http
823 .expected_response
824 .headers
825 .iter()
826 .any(|(k, _)| k.to_lowercase() != "content-encoding");
827
828 let _ = writeln!(
830 out,
831 " class _NoRedirect(urllib.request.HTTPRedirectHandler): # noqa: N801"
832 );
833 let _ = writeln!(
834 out,
835 " def redirect_request(self, *args, **kwargs): return None # noqa: E704"
836 );
837 let _ = writeln!(out, " _opener = urllib.request.build_opener(_NoRedirect())");
838 let _ = writeln!(out, " try:");
839 let _ = writeln!(out, " response = _opener.open(_req) # noqa: S310");
840 let _ = writeln!(out, " status_code = response.status");
841 if needs_body {
842 let _ = writeln!(out, " resp_body = response.read()");
843 }
844 if needs_headers {
845 let _ = writeln!(out, " resp_headers = dict(response.headers)");
846 }
847 let _ = writeln!(out, " except urllib.error.HTTPError as _exc:");
848 let _ = writeln!(out, " status_code = _exc.code");
849 if needs_body {
850 let _ = writeln!(out, " resp_body = _exc.read()");
851 }
852 if needs_headers {
853 let _ = writeln!(out, " resp_headers = dict(_exc.headers)");
854 }
855
856 let status = http.expected_response.status_code;
858 let _ = writeln!(out, " assert status_code == {status} # noqa: S101");
859
860 if let Some(expected_body) = &http.expected_response.body {
862 if !(expected_body.is_null() || expected_body.is_string() && expected_body.as_str() == Some("")) {
864 if let serde_json::Value::String(s) = expected_body {
865 let py_val = format!("\"{}\"", escape_python(s));
867 let _ = writeln!(out, " assert resp_body.decode() == {py_val} # noqa: S101");
868 } else {
869 let py_val = json_to_python_literal(expected_body);
870 let _ = writeln!(out, " import json as _json # noqa: PLC0415");
871 let _ = writeln!(out, " data = _json.loads(resp_body)");
872 let _ = writeln!(out, " assert data == {py_val} # noqa: S101");
873 }
874 }
875 } else if let Some(partial) = &http.expected_response.body_partial {
876 let _ = writeln!(out, " import json as _json # noqa: PLC0415");
877 let _ = writeln!(out, " data = _json.loads(resp_body)");
878 if let Some(obj) = partial.as_object() {
879 for (key, val) in obj {
880 let py_val = json_to_python_literal(val);
881 let escaped_key = escape_python(key);
882 let _ = writeln!(out, " assert data[\"{escaped_key}\"] == {py_val} # noqa: S101");
883 }
884 }
885 }
886
887 for (header_name, header_value) in &http.expected_response.headers {
889 let lower_name = header_name.to_lowercase();
890 if lower_name == "content-encoding" {
892 continue;
893 }
894 let escaped_name = escape_python(&lower_name);
895 match header_value.as_str() {
896 "<<present>>" => {
897 let _ = writeln!(out, " assert \"{escaped_name}\" in resp_headers # noqa: S101");
898 }
899 "<<absent>>" => {
900 let _ = writeln!(
901 out,
902 " assert resp_headers.get(\"{escaped_name}\") is None # noqa: S101"
903 );
904 }
905 "<<uuid>>" => {
906 let _ = writeln!(out, " import re # noqa: PLC0415");
907 let _ = writeln!(
908 out,
909 " assert re.match(r'^[0-9a-f]{{8}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{12}}$', resp_headers[\"{escaped_name}\"]) # noqa: S101"
910 );
911 }
912 exact => {
913 let escaped_val = escape_python(exact);
914 let _ = writeln!(
915 out,
916 " assert resp_headers[\"{escaped_name}\"] == \"{escaped_val}\" # noqa: S101"
917 );
918 }
919 }
920 }
921
922 if let Some(validation_errors) = &http.expected_response.validation_errors {
925 if !validation_errors.is_empty() && !body_has_content {
926 let _ = writeln!(out, " import json as _json # noqa: PLC0415");
927 let _ = writeln!(out, " _data = _json.loads(resp_body)");
928 let _ = writeln!(out, " errors = _data.get(\"errors\", [])");
929 for ve in validation_errors {
930 let loc_py: Vec<String> = ve.loc.iter().map(|s| format!("\"{}\"", escape_python(s))).collect();
931 let loc_str = loc_py.join(", ");
932 let escaped_msg = escape_python(&ve.msg);
933 let _ = writeln!(
934 out,
935 " assert any(e[\"loc\"] == [{loc_str}] and \"{escaped_msg}\" in e[\"msg\"] for e in errors) # noqa: S101"
936 );
937 }
938 }
939 }
940}
941
942#[allow(clippy::too_many_arguments)]
947fn render_test_function(
948 out: &mut String,
949 fixture: &Fixture,
950 e2e_config: &E2eConfig,
951 options_type: Option<&str>,
952 options_via: &str,
953 enum_fields: &HashMap<String, String>,
954 handle_nested_types: &HashMap<String, String>,
955 handle_dict_types: &std::collections::HashSet<String>,
956 field_resolver: &FieldResolver,
957) {
958 let fn_name = sanitize_ident(&fixture.id);
959 let description = &fixture.description;
960 let call_config = e2e_config.resolve_call(fixture.call.as_deref());
961 let function_name = resolve_function_name_for_call(call_config);
962 let result_var = &call_config.result_var;
963
964 let python_override = call_config.overrides.get("python");
966 let result_is_simple = python_override.is_some_and(|o| o.result_is_simple);
967 let arg_name_map = python_override.map(|o| &o.arg_name_map);
968
969 let desc_with_period = if description.ends_with('.') {
970 description.to_string()
971 } else {
972 format!("{description}.")
973 };
974
975 if is_skipped(fixture, "python") {
977 let reason = fixture
978 .skip
979 .as_ref()
980 .and_then(|s| s.reason.as_deref())
981 .unwrap_or("skipped for python");
982 let escaped = escape_python(reason);
983 let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
984 }
985
986 let is_async = call_config.r#async;
987 if is_async {
988 let _ = writeln!(out, "@pytest.mark.asyncio");
989 let _ = writeln!(out, "async def test_{fn_name}() -> None:");
990 } else {
991 let _ = writeln!(out, "def test_{fn_name}() -> None:");
992 }
993 let _ = writeln!(out, " \"\"\"{desc_with_period}\"\"\"");
994
995 let has_error_assertion = fixture.assertions.iter().any(|a| a.assertion_type == "error");
997
998 let mut arg_bindings = Vec::new();
1000 let mut kwarg_exprs = Vec::new();
1001 for arg in &call_config.args {
1002 let var_name = &arg.name;
1003 let kwarg_name = arg_name_map
1005 .and_then(|m| m.get(var_name.as_str()))
1006 .map(|s| s.as_str())
1007 .unwrap_or(var_name.as_str());
1008
1009 if arg.arg_type == "handle" {
1010 let constructor_name = format!("create_{}", arg.name.to_snake_case());
1013 let config_value = resolve_field(&fixture.input, &arg.field);
1014 if config_value.is_null()
1015 || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
1016 {
1017 arg_bindings.push(format!(" {var_name} = {constructor_name}(None)"));
1018 } else if let Some(obj) = config_value.as_object() {
1019 let kwargs: Vec<String> = obj
1023 .iter()
1024 .map(|(k, v)| {
1025 let snake_key = k.to_snake_case();
1026 let py_val = if let Some(type_name) = handle_nested_types.get(k) {
1027 if let Some(nested_obj) = v.as_object() {
1029 if nested_obj.is_empty() {
1030 format!("{type_name}()")
1032 } else if handle_dict_types.contains(k) {
1033 json_to_python_literal(v)
1038 } else {
1039 let nested_kwargs: Vec<String> = nested_obj
1041 .iter()
1042 .map(|(nk, nv)| {
1043 let nested_snake_key = nk.to_snake_case();
1044 format!("{nested_snake_key}={}", json_to_python_literal(nv))
1045 })
1046 .collect();
1047 format!("{type_name}({})", nested_kwargs.join(", "))
1048 }
1049 } else {
1050 json_to_python_literal(v)
1052 }
1053 } else if k == "request_timeout" {
1054 if let Some(ms) = v.as_u64() {
1060 format!("{}", ms / 1000)
1061 } else {
1062 json_to_python_literal(v)
1063 }
1064 } else {
1065 json_to_python_literal(v)
1066 };
1067 format!("{snake_key}={py_val}")
1068 })
1069 .collect();
1070 let config_class = options_type.unwrap_or("CrawlConfig");
1072 let single_line = format!(" {var_name}_config = {config_class}({})", kwargs.join(", "));
1073 if single_line.len() <= 120 {
1074 arg_bindings.push(single_line);
1075 } else {
1076 let mut lines = format!(" {var_name}_config = {config_class}(\n");
1078 for kw in &kwargs {
1079 lines.push_str(&format!(" {kw},\n"));
1080 }
1081 lines.push_str(" )");
1082 arg_bindings.push(lines);
1083 }
1084 arg_bindings.push(format!(" {var_name} = {constructor_name}({var_name}_config)"));
1085 } else {
1086 let literal = json_to_python_literal(config_value);
1087 arg_bindings.push(format!(" {var_name} = {constructor_name}({literal})"));
1088 }
1089 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1090 continue;
1091 }
1092
1093 if arg.arg_type == "mock_url" {
1094 let fixture_id = &fixture.id;
1095 arg_bindings.push(format!(
1096 " {var_name} = os.environ['MOCK_SERVER_URL'] + '/fixtures/{fixture_id}'"
1097 ));
1098 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1099 continue;
1100 }
1101
1102 let value = resolve_field(&fixture.input, &arg.field);
1103
1104 if value.is_null() && arg.optional {
1105 continue;
1106 }
1107
1108 if arg.arg_type == "json_object" && !value.is_null() {
1111 match options_via {
1112 "dict" => {
1113 let literal = json_to_python_literal(value);
1115 let noqa = if literal.contains("/tmp/") {
1116 " # noqa: S108"
1117 } else {
1118 ""
1119 };
1120 arg_bindings.push(format!(" {var_name} = {literal}{noqa}"));
1121 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1122 continue;
1123 }
1124 "json" => {
1125 let json_str = serde_json::to_string(value).unwrap_or_default();
1127 let escaped = escape_python(&json_str);
1128 arg_bindings.push(format!(" {var_name} = json.loads(\"{escaped}\")"));
1129 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1130 continue;
1131 }
1132 _ => {
1133 if let (Some(opts_type), Some(obj)) = (options_type, value.as_object()) {
1135 let kwargs: Vec<String> = obj
1136 .iter()
1137 .map(|(k, v)| {
1138 let snake_key = k.to_snake_case();
1139 let py_val = if let Some(enum_type) = enum_fields.get(k) {
1140 if let Some(s) = v.as_str() {
1142 let upper_val = s.to_shouty_snake_case();
1143 format!("{enum_type}.{upper_val}")
1144 } else {
1145 json_to_python_literal(v)
1146 }
1147 } else {
1148 json_to_python_literal(v)
1149 };
1150 format!("{snake_key}={py_val}")
1151 })
1152 .collect();
1153 let constructor = format!("{opts_type}({})", kwargs.join(", "));
1154 arg_bindings.push(format!(" {var_name} = {constructor}"));
1155 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1156 continue;
1157 }
1158 }
1159 }
1160 }
1161
1162 if arg.optional && value.is_null() {
1165 continue;
1166 }
1167
1168 if value.is_null() && !arg.optional {
1170 let default_val = match arg.arg_type.as_str() {
1171 "string" => "\"\"".to_string(),
1172 "int" | "integer" => "0".to_string(),
1173 "float" | "number" => "0.0".to_string(),
1174 "bool" | "boolean" => "False".to_string(),
1175 _ => "None".to_string(),
1176 };
1177 arg_bindings.push(format!(" {var_name} = {default_val}"));
1178 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1179 continue;
1180 }
1181
1182 if arg.arg_type == "bytes" {
1194 if let Some(raw) = value.as_str() {
1195 match classify_bytes_value(raw) {
1196 BytesKind::FilePath => {
1197 let escaped = escape_python(raw);
1198 arg_bindings.push(format!(" {var_name} = Path(\"{escaped}\").read_bytes()"));
1199 }
1200 BytesKind::InlineText => {
1201 let escaped = escape_python(raw);
1204 arg_bindings.push(format!(" {var_name} = b\"{escaped}\""));
1205 }
1206 BytesKind::Base64 => {
1207 let escaped = escape_python(raw);
1208 arg_bindings.push(format!(" {var_name} = base64.b64decode(\"{escaped}\")"));
1209 }
1210 }
1211 } else {
1212 arg_bindings.push(format!(" {var_name} = None"));
1213 }
1214 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1215 continue;
1216 }
1217
1218 let literal = json_to_python_literal(value);
1219 let noqa = if literal.contains("/tmp/") {
1220 " # noqa: S108"
1221 } else {
1222 ""
1223 };
1224 arg_bindings.push(format!(" {var_name} = {literal}{noqa}"));
1225 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1226 }
1227
1228 if let Some(visitor_spec) = &fixture.visitor {
1230 let _ = writeln!(out, " class _TestVisitor:");
1231 for (method_name, action) in &visitor_spec.callbacks {
1232 emit_python_visitor_method(out, method_name, action);
1233 }
1234 kwarg_exprs.push("visitor=_TestVisitor()".to_string());
1235 }
1236
1237 for binding in &arg_bindings {
1238 let _ = writeln!(out, "{binding}");
1239 }
1240
1241 let call_args = kwarg_exprs.join(", ");
1242 let await_prefix = if is_async { "await " } else { "" };
1243 let call_expr = format!("{await_prefix}{function_name}({call_args})");
1244
1245 if has_error_assertion {
1246 let error_assertion = fixture.assertions.iter().find(|a| a.assertion_type == "error");
1248 let has_message = error_assertion
1249 .and_then(|a| a.value.as_ref())
1250 .and_then(|v| v.as_str())
1251 .is_some();
1252
1253 if has_message {
1254 let _ = writeln!(out, " with pytest.raises(Exception) as exc_info: # noqa: B017");
1255 let _ = writeln!(out, " {call_expr}");
1256 if let Some(msg) = error_assertion.and_then(|a| a.value.as_ref()).and_then(|v| v.as_str()) {
1257 let escaped = escape_python(msg);
1258 let _ = writeln!(out, " assert \"{escaped}\" in str(exc_info.value) # noqa: S101");
1259 }
1260 } else {
1261 let _ = writeln!(out, " with pytest.raises(Exception): # noqa: B017");
1262 let _ = writeln!(out, " {call_expr}");
1263 }
1264
1265 return;
1268 }
1269
1270 let has_usable_assertion = fixture.assertions.iter().any(|a| {
1273 if a.assertion_type == "not_error" || a.assertion_type == "error" {
1274 return false;
1275 }
1276 if result_is_simple {
1277 if let Some(f) = &a.field {
1280 let f_lower = f.to_lowercase();
1281 if !f.is_empty()
1282 && f_lower != "content"
1283 && f_lower != "result"
1284 && (f_lower.starts_with("metadata")
1285 || f_lower.starts_with("document")
1286 || f_lower.starts_with("structure")
1287 || f_lower.starts_with("pages")
1288 || f_lower.starts_with("chunks")
1289 || f_lower.starts_with("tables")
1290 || f_lower.starts_with("images")
1291 || f_lower.starts_with("mime_type")
1292 || f_lower.starts_with("is_")
1293 || f_lower == "byte_length"
1294 || f_lower == "page_count"
1295 || f_lower == "output_format"
1296 || f_lower == "extraction_method")
1297 {
1298 return false; }
1300 }
1301 return true;
1302 }
1303 match &a.field {
1304 Some(f) if !f.is_empty() => field_resolver.is_valid_for_result(f),
1305 _ => true,
1306 }
1307 });
1308 let py_result_var = if has_usable_assertion {
1309 result_var.to_string()
1310 } else {
1311 "_".to_string()
1312 };
1313 let _ = writeln!(out, " {py_result_var} = {call_expr}");
1314
1315 let fields_enum = &e2e_config.fields_enum;
1316 for assertion in &fixture.assertions {
1317 if assertion.assertion_type == "not_error" {
1318 if !call_config.returns_result {
1321 continue;
1322 }
1323 continue;
1325 }
1326 render_assertion(
1327 out,
1328 assertion,
1329 result_var,
1330 field_resolver,
1331 fields_enum,
1332 result_is_simple,
1333 );
1334 }
1335}
1336
1337enum BytesKind {
1343 FilePath,
1345 InlineText,
1347 Base64,
1349}
1350
1351fn classify_bytes_value(s: &str) -> BytesKind {
1360 if s.starts_with('<') || s.starts_with('{') || s.starts_with('[') || s.contains(' ') {
1362 return BytesKind::InlineText;
1363 }
1364
1365 let first = s.chars().next().unwrap_or('\0');
1368 if first.is_ascii_alphanumeric() || first == '_' {
1369 if let Some(slash_pos) = s.find('/') {
1370 if slash_pos > 0 {
1371 let after_slash = &s[slash_pos + 1..];
1372 if after_slash.contains('.') && !after_slash.is_empty() {
1373 return BytesKind::FilePath;
1374 }
1375 }
1376 }
1377 }
1378
1379 BytesKind::Base64
1381}
1382
1383fn json_to_python_literal(value: &serde_json::Value) -> String {
1388 match value {
1389 serde_json::Value::Null => "None".to_string(),
1390 serde_json::Value::Bool(true) => "True".to_string(),
1391 serde_json::Value::Bool(false) => "False".to_string(),
1392 serde_json::Value::Number(n) => n.to_string(),
1393 serde_json::Value::String(s) => python_string_literal(s),
1394 serde_json::Value::Array(arr) => {
1395 let items: Vec<String> = arr.iter().map(json_to_python_literal).collect();
1396 format!("[{}]", items.join(", "))
1397 }
1398 serde_json::Value::Object(map) => {
1399 let items: Vec<String> = map
1400 .iter()
1401 .map(|(k, v)| format!("\"{}\": {}", escape_python(k), json_to_python_literal(v)))
1402 .collect();
1403 format!("{{{}}}", items.join(", "))
1404 }
1405 }
1406}
1407
1408fn render_assertion(
1413 out: &mut String,
1414 assertion: &Assertion,
1415 result_var: &str,
1416 field_resolver: &FieldResolver,
1417 fields_enum: &std::collections::HashSet<String>,
1418 result_is_simple: bool,
1419) {
1420 if result_is_simple {
1424 if let Some(f) = &assertion.field {
1425 let f_lower = f.to_lowercase();
1426 if !f.is_empty()
1427 && f_lower != "content"
1428 && f_lower != "result"
1429 && (f_lower.starts_with("metadata")
1430 || f_lower.starts_with("document")
1431 || f_lower.starts_with("structure")
1432 || f_lower.starts_with("pages")
1433 || f_lower.starts_with("chunks")
1434 || f_lower.starts_with("tables")
1435 || f_lower.starts_with("images")
1436 || f_lower.starts_with("mime_type")
1437 || f_lower.starts_with("is_")
1438 || f_lower == "byte_length"
1439 || f_lower == "page_count"
1440 || f_lower == "output_format"
1441 || f_lower == "extraction_method")
1442 {
1443 let _ = writeln!(out, " # skipped: field '{f}' not applicable for simple result type");
1444 return;
1445 }
1446 }
1447 }
1448
1449 if let Some(f) = &assertion.field {
1452 match f.as_str() {
1453 "chunks_have_content" => {
1454 let pred = format!("all(c.content for c in ({result_var}.chunks or []))");
1455 match assertion.assertion_type.as_str() {
1456 "is_true" => {
1457 let _ = writeln!(out, " assert {pred} # noqa: S101");
1458 }
1459 "is_false" => {
1460 let _ = writeln!(out, " assert not ({pred}) # noqa: S101");
1461 }
1462 _ => {
1463 let _ = writeln!(
1464 out,
1465 " # skipped: unsupported assertion type on synthetic field '{f}'"
1466 );
1467 }
1468 }
1469 return;
1470 }
1471 "chunks_have_embeddings" => {
1472 let pred = format!(
1473 "all(c.embedding is not None and len(c.embedding) > 0 for c in ({result_var}.chunks or []))"
1474 );
1475 match assertion.assertion_type.as_str() {
1476 "is_true" => {
1477 let _ = writeln!(out, " assert {pred} # noqa: S101");
1478 }
1479 "is_false" => {
1480 let _ = writeln!(out, " assert not ({pred}) # noqa: S101");
1481 }
1482 _ => {
1483 let _ = writeln!(
1484 out,
1485 " # skipped: unsupported assertion type on synthetic field '{f}'"
1486 );
1487 }
1488 }
1489 return;
1490 }
1491 "embeddings" => {
1495 match assertion.assertion_type.as_str() {
1496 "count_equals" => {
1497 if let Some(val) = &assertion.value {
1498 if let Some(n) = val.as_u64() {
1499 let _ = writeln!(out, " assert len({result_var}) == {n} # noqa: S101");
1500 }
1501 }
1502 }
1503 "count_min" => {
1504 if let Some(val) = &assertion.value {
1505 if let Some(n) = val.as_u64() {
1506 let _ = writeln!(out, " assert len({result_var}) >= {n} # noqa: S101");
1507 }
1508 }
1509 }
1510 "not_empty" => {
1511 let _ = writeln!(out, " assert len({result_var}) > 0 # noqa: S101");
1512 }
1513 "is_empty" => {
1514 let _ = writeln!(out, " assert len({result_var}) == 0 # noqa: S101");
1515 }
1516 _ => {
1517 let _ = writeln!(
1518 out,
1519 " # skipped: unsupported assertion type on synthetic field 'embeddings'"
1520 );
1521 }
1522 }
1523 return;
1524 }
1525 "embedding_dimensions" => {
1526 let expr = format!("(len({result_var}[0]) if {result_var} else 0)");
1527 match assertion.assertion_type.as_str() {
1528 "equals" => {
1529 if let Some(val) = &assertion.value {
1530 let py_val = value_to_python_string(val);
1531 let _ = writeln!(out, " assert {expr} == {py_val} # noqa: S101");
1532 }
1533 }
1534 "greater_than" => {
1535 if let Some(val) = &assertion.value {
1536 let py_val = value_to_python_string(val);
1537 let _ = writeln!(out, " assert {expr} > {py_val} # noqa: S101");
1538 }
1539 }
1540 _ => {
1541 let _ = writeln!(
1542 out,
1543 " # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
1544 );
1545 }
1546 }
1547 return;
1548 }
1549 "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
1550 let pred = match f.as_str() {
1551 "embeddings_valid" => {
1552 format!("all(bool(e) for e in {result_var})")
1553 }
1554 "embeddings_finite" => {
1555 format!("all(v == v and abs(v) != float('inf') for e in {result_var} for v in e)")
1556 }
1557 "embeddings_non_zero" => {
1558 format!("all(any(v != 0.0 for v in e) for e in {result_var})")
1559 }
1560 "embeddings_normalized" => {
1561 format!("all(abs(sum(v * v for v in e) - 1.0) < 1e-3 for e in {result_var})")
1562 }
1563 _ => unreachable!(),
1564 };
1565 match assertion.assertion_type.as_str() {
1566 "is_true" => {
1567 let _ = writeln!(out, " assert {pred} # noqa: S101");
1568 }
1569 "is_false" => {
1570 let _ = writeln!(out, " assert not ({pred}) # noqa: S101");
1571 }
1572 _ => {
1573 let _ = writeln!(
1574 out,
1575 " # skipped: unsupported assertion type on synthetic field '{f}'"
1576 );
1577 }
1578 }
1579 return;
1580 }
1581 "keywords" | "keywords_count" => {
1584 let _ = writeln!(
1585 out,
1586 " # skipped: field '{f}' not available on Python ExtractionResult"
1587 );
1588 return;
1589 }
1590 _ => {}
1591 }
1592 }
1593
1594 if !result_is_simple {
1596 if let Some(f) = &assertion.field {
1597 if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
1598 let _ = writeln!(out, " # skipped: field '{f}' not available on result type");
1599 return;
1600 }
1601 }
1602 }
1603
1604 let field_access = if result_is_simple {
1607 result_var.to_string()
1608 } else {
1609 match &assertion.field {
1610 Some(f) if !f.is_empty() => field_resolver.accessor(f, "python", result_var),
1611 _ => result_var.to_string(),
1612 }
1613 };
1614
1615 let field_is_enum = assertion.field.as_deref().is_some_and(|f| {
1626 if fields_enum.contains(f) {
1627 return true;
1628 }
1629 let resolved = field_resolver.resolve(f);
1630 if fields_enum.contains(resolved) {
1631 return true;
1632 }
1633 field_resolver.accessor(f, "python", result_var).contains("[0]")
1638 });
1639
1640 let field_is_optional = match &assertion.field {
1643 Some(f) if !f.is_empty() => {
1644 let resolved = field_resolver.resolve(f);
1645 field_resolver.is_optional(resolved)
1646 }
1647 _ => false,
1648 };
1649
1650 match assertion.assertion_type.as_str() {
1651 "error" | "not_error" => {
1652 }
1654 "equals" => {
1655 if let Some(val) = &assertion.value {
1656 let expected = value_to_python_string(val);
1657 let op = if val.is_boolean() || val.is_null() { "is" } else { "==" };
1659 if val.is_string() {
1662 let _ = writeln!(out, " assert {field_access}.strip() {op} {expected} # noqa: S101");
1663 } else {
1664 let _ = writeln!(out, " assert {field_access} {op} {expected} # noqa: S101");
1665 }
1666 }
1667 }
1668 "contains" => {
1669 if let Some(val) = &assertion.value {
1670 let expected = value_to_python_string(val);
1671 let cmp_expr = if field_is_enum && val.is_string() {
1673 format!("str({field_access}).lower()")
1674 } else {
1675 field_access.clone()
1676 };
1677 if field_is_optional {
1678 let _ = writeln!(out, " assert {field_access} is not None # noqa: S101");
1679 let _ = writeln!(out, " assert {expected} in {cmp_expr} # noqa: S101");
1680 } else {
1681 let _ = writeln!(out, " assert {expected} in {cmp_expr} # noqa: S101");
1682 }
1683 }
1684 }
1685 "contains_all" => {
1686 if let Some(values) = &assertion.values {
1687 for val in values {
1688 let expected = value_to_python_string(val);
1689 let cmp_expr = if field_is_enum && val.is_string() {
1691 format!("str({field_access}).lower()")
1692 } else {
1693 field_access.clone()
1694 };
1695 if field_is_optional {
1696 let _ = writeln!(out, " assert {field_access} is not None # noqa: S101");
1697 let _ = writeln!(out, " assert {expected} in {cmp_expr} # noqa: S101");
1698 } else {
1699 let _ = writeln!(out, " assert {expected} in {cmp_expr} # noqa: S101");
1700 }
1701 }
1702 }
1703 }
1704 "not_contains" => {
1705 if let Some(val) = &assertion.value {
1706 let expected = value_to_python_string(val);
1707 let cmp_expr = if field_is_enum && val.is_string() {
1709 format!("str({field_access}).lower()")
1710 } else {
1711 field_access.clone()
1712 };
1713 if field_is_optional {
1714 let _ = writeln!(
1715 out,
1716 " assert {field_access} is None or {expected} not in {cmp_expr} # noqa: S101"
1717 );
1718 } else {
1719 let _ = writeln!(out, " assert {expected} not in {cmp_expr} # noqa: S101");
1720 }
1721 }
1722 }
1723 "not_empty" => {
1724 let _ = writeln!(out, " assert {field_access} # noqa: S101");
1725 }
1726 "is_empty" => {
1727 let _ = writeln!(out, " assert not {field_access} # noqa: S101");
1728 }
1729 "contains_any" => {
1730 if let Some(values) = &assertion.values {
1731 let items: Vec<String> = values.iter().map(value_to_python_string).collect();
1732 let list_str = items.join(", ");
1733 let cmp_expr = if field_is_enum {
1735 format!("str({field_access}).lower()")
1736 } else {
1737 field_access.clone()
1738 };
1739 if field_is_optional {
1740 let _ = writeln!(out, " assert {field_access} is not None # noqa: S101");
1741 let _ = writeln!(
1742 out,
1743 " assert any(v in {cmp_expr} for v in [{list_str}]) # noqa: S101"
1744 );
1745 } else {
1746 let _ = writeln!(
1747 out,
1748 " assert any(v in {cmp_expr} for v in [{list_str}]) # noqa: S101"
1749 );
1750 }
1751 }
1752 }
1753 "greater_than" => {
1754 if let Some(val) = &assertion.value {
1755 let expected = value_to_python_string(val);
1756 let _ = writeln!(out, " assert {field_access} > {expected} # noqa: S101");
1757 }
1758 }
1759 "less_than" => {
1760 if let Some(val) = &assertion.value {
1761 let expected = value_to_python_string(val);
1762 let _ = writeln!(out, " assert {field_access} < {expected} # noqa: S101");
1763 }
1764 }
1765 "greater_than_or_equal" | "min" => {
1766 if let Some(val) = &assertion.value {
1767 let expected = value_to_python_string(val);
1768 let _ = writeln!(out, " assert {field_access} >= {expected} # noqa: S101");
1769 }
1770 }
1771 "less_than_or_equal" | "max" => {
1772 if let Some(val) = &assertion.value {
1773 let expected = value_to_python_string(val);
1774 let _ = writeln!(out, " assert {field_access} <= {expected} # noqa: S101");
1775 }
1776 }
1777 "starts_with" => {
1778 if let Some(val) = &assertion.value {
1779 let expected = value_to_python_string(val);
1780 let _ = writeln!(out, " assert {field_access}.startswith({expected}) # noqa: S101");
1781 }
1782 }
1783 "ends_with" => {
1784 if let Some(val) = &assertion.value {
1785 let expected = value_to_python_string(val);
1786 let _ = writeln!(out, " assert {field_access}.endswith({expected}) # noqa: S101");
1787 }
1788 }
1789 "min_length" => {
1790 if let Some(val) = &assertion.value {
1791 if let Some(n) = val.as_u64() {
1792 let _ = writeln!(out, " assert len({field_access}) >= {n} # noqa: S101");
1793 }
1794 }
1795 }
1796 "max_length" => {
1797 if let Some(val) = &assertion.value {
1798 if let Some(n) = val.as_u64() {
1799 let _ = writeln!(out, " assert len({field_access}) <= {n} # noqa: S101");
1800 }
1801 }
1802 }
1803 "count_min" => {
1804 if let Some(val) = &assertion.value {
1805 if let Some(n) = val.as_u64() {
1806 let _ = writeln!(out, " assert len({field_access}) >= {n} # noqa: S101");
1807 }
1808 }
1809 }
1810 "count_equals" => {
1811 if let Some(val) = &assertion.value {
1812 if let Some(n) = val.as_u64() {
1813 let _ = writeln!(out, " assert len({field_access}) == {n} # noqa: S101");
1814 }
1815 }
1816 }
1817 "is_true" => {
1818 let _ = writeln!(out, " assert {field_access} is True # noqa: S101");
1819 }
1820 "is_false" => {
1821 let _ = writeln!(out, " assert not {field_access} # noqa: S101");
1822 }
1823 "method_result" => {
1824 if let Some(method_name) = &assertion.method {
1825 let call_expr = build_python_method_call(result_var, method_name, assertion.args.as_ref());
1826 let check = assertion.check.as_deref().unwrap_or("is_true");
1827 match check {
1828 "equals" => {
1829 if let Some(val) = &assertion.value {
1830 if val.is_boolean() {
1831 if val.as_bool() == Some(true) {
1832 let _ = writeln!(out, " assert {call_expr} is True # noqa: S101");
1833 } else {
1834 let _ = writeln!(out, " assert {call_expr} is False # noqa: S101");
1835 }
1836 } else {
1837 let expected = value_to_python_string(val);
1838 let _ = writeln!(out, " assert {call_expr} == {expected} # noqa: S101");
1839 }
1840 }
1841 }
1842 "is_true" => {
1843 let _ = writeln!(out, " assert {call_expr} # noqa: S101");
1844 }
1845 "is_false" => {
1846 let _ = writeln!(out, " assert not {call_expr} # noqa: S101");
1847 }
1848 "greater_than_or_equal" => {
1849 if let Some(val) = &assertion.value {
1850 let n = val.as_u64().unwrap_or(0);
1851 let _ = writeln!(out, " assert {call_expr} >= {n} # noqa: S101");
1852 }
1853 }
1854 "count_min" => {
1855 if let Some(val) = &assertion.value {
1856 let n = val.as_u64().unwrap_or(0);
1857 let _ = writeln!(out, " assert len({call_expr}) >= {n} # noqa: S101");
1858 }
1859 }
1860 "contains" => {
1861 if let Some(val) = &assertion.value {
1862 let expected = value_to_python_string(val);
1863 let _ = writeln!(out, " assert {expected} in {call_expr} # noqa: S101");
1864 }
1865 }
1866 "is_error" => {
1867 let _ = writeln!(out, " with pytest.raises(Exception): # noqa: B017");
1868 let _ = writeln!(out, " {call_expr}");
1869 }
1870 other_check => {
1871 panic!("unsupported method_result check type: {other_check}");
1872 }
1873 }
1874 } else {
1875 panic!("method_result assertion missing 'method' field");
1876 }
1877 }
1878 "matches_regex" => {
1879 if let Some(val) = &assertion.value {
1880 let expected = value_to_python_string(val);
1881 let _ = writeln!(out, " import re # noqa: PLC0415");
1882 let _ = writeln!(
1883 out,
1884 " assert re.search({expected}, {field_access}) is not None # noqa: S101"
1885 );
1886 }
1887 }
1888 other => {
1889 panic!("unsupported assertion type: {other}");
1890 }
1891 }
1892}
1893
1894fn build_python_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
1897 match method_name {
1898 "root_child_count" => format!("{result_var}.root_node().child_count()"),
1899 "root_node_type" => format!("{result_var}.root_node().kind()"),
1900 "named_children_count" => format!("{result_var}.root_node().named_child_count()"),
1901 "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
1902 "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
1903 "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
1904 "contains_node_type" => {
1905 let node_type = args
1906 .and_then(|a| a.get("node_type"))
1907 .and_then(|v| v.as_str())
1908 .unwrap_or("");
1909 format!("tree_contains_node_type({result_var}, \"{node_type}\")")
1910 }
1911 "find_nodes_by_type" => {
1912 let node_type = args
1913 .and_then(|a| a.get("node_type"))
1914 .and_then(|v| v.as_str())
1915 .unwrap_or("");
1916 format!("find_nodes_by_type({result_var}, \"{node_type}\")")
1917 }
1918 "run_query" => {
1919 let query_source = args
1920 .and_then(|a| a.get("query_source"))
1921 .and_then(|v| v.as_str())
1922 .unwrap_or("");
1923 let language = args
1924 .and_then(|a| a.get("language"))
1925 .and_then(|v| v.as_str())
1926 .unwrap_or("");
1927 format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
1928 }
1929 _ => {
1930 if let Some(args_val) = args {
1931 let arg_str = args_val
1932 .as_object()
1933 .map(|obj| {
1934 obj.iter()
1935 .map(|(k, v)| format!("{}={}", k, value_to_python_string(v)))
1936 .collect::<Vec<_>>()
1937 .join(", ")
1938 })
1939 .unwrap_or_default();
1940 format!("{result_var}.{method_name}({arg_str})")
1941 } else {
1942 format!("{result_var}.{method_name}()")
1943 }
1944 }
1945 }
1946}
1947
1948fn python_method_helper_import(method_name: &str) -> Option<String> {
1951 match method_name {
1952 "has_error_nodes" => Some("tree_has_error_nodes".to_string()),
1953 "error_count" | "tree_error_count" => Some("tree_error_count".to_string()),
1954 "tree_to_sexp" => Some("tree_to_sexp".to_string()),
1955 "contains_node_type" => Some("tree_contains_node_type".to_string()),
1956 "find_nodes_by_type" => Some("find_nodes_by_type".to_string()),
1957 "run_query" => Some("run_query".to_string()),
1958 _ => None,
1960 }
1961}
1962
1963fn value_to_python_string(value: &serde_json::Value) -> String {
1964 match value {
1965 serde_json::Value::String(s) => python_string_literal(s),
1966 serde_json::Value::Bool(true) => "True".to_string(),
1967 serde_json::Value::Bool(false) => "False".to_string(),
1968 serde_json::Value::Number(n) => n.to_string(),
1969 serde_json::Value::Null => "None".to_string(),
1970 other => python_string_literal(&other.to_string()),
1971 }
1972}
1973
1974fn python_string_literal(s: &str) -> String {
1977 if s.contains('"') && !s.contains('\'') {
1978 let escaped = s
1980 .replace('\\', "\\\\")
1981 .replace('\'', "\\'")
1982 .replace('\n', "\\n")
1983 .replace('\r', "\\r")
1984 .replace('\t', "\\t");
1985 format!("'{escaped}'")
1986 } else {
1987 format!("\"{}\"", escape_python(s))
1988 }
1989}
1990
1991fn emit_python_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
1993 let params = match method_name {
1994 "visit_link" => "self, ctx, href, text, title",
1995 "visit_image" => "self, ctx, src, alt, title",
1996 "visit_heading" => "self, ctx, level, text, id",
1997 "visit_code_block" => "self, ctx, lang, code",
1998 "visit_code_inline"
1999 | "visit_strong"
2000 | "visit_emphasis"
2001 | "visit_strikethrough"
2002 | "visit_underline"
2003 | "visit_subscript"
2004 | "visit_superscript"
2005 | "visit_mark"
2006 | "visit_button"
2007 | "visit_summary"
2008 | "visit_figcaption"
2009 | "visit_definition_term"
2010 | "visit_definition_description" => "self, ctx, text",
2011 "visit_text" => "self, ctx, text",
2012 "visit_list_item" => "self, ctx, ordered, marker, text",
2013 "visit_blockquote" => "self, ctx, content, depth",
2014 "visit_table_row" => "self, ctx, cells, is_header",
2015 "visit_custom_element" => "self, ctx, tag_name, html",
2016 "visit_form" => "self, ctx, action_url, method",
2017 "visit_input" => "self, ctx, input_type, name, value",
2018 "visit_audio" | "visit_video" | "visit_iframe" => "self, ctx, src",
2019 "visit_details" => "self, ctx, is_open",
2020 "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
2021 "self, ctx, output, *args"
2022 }
2023 "visit_list_start" => "self, ctx, ordered, *args",
2024 "visit_list_end" => "self, ctx, ordered, output, *args",
2025 _ => "self, ctx, *args",
2026 };
2027
2028 let _ = writeln!(
2029 out,
2030 " def {method_name}({params}): # noqa: A002, ANN001, ANN202, ARG002"
2031 );
2032 match action {
2033 CallbackAction::Skip => {
2034 let _ = writeln!(out, " return \"skip\"");
2035 }
2036 CallbackAction::Continue => {
2037 let _ = writeln!(out, " return \"continue\"");
2038 }
2039 CallbackAction::PreserveHtml => {
2040 let _ = writeln!(out, " return \"preserve_html\"");
2041 }
2042 CallbackAction::Custom { output } => {
2043 let escaped = escape_python(output);
2044 let _ = writeln!(out, " return {{\"custom\": \"{escaped}\"}}");
2045 }
2046 CallbackAction::CustomTemplate { template } => {
2047 let escaped_template = template
2052 .replace('\\', "\\\\")
2053 .replace('\'', "\\'")
2054 .replace('\n', "\\n")
2055 .replace('\r', "\\r")
2056 .replace('\t', "\\t");
2057 let _ = writeln!(out, " return {{\"custom\": f'{escaped_template}'}}");
2058 }
2059 }
2060}