1use super::client;
7use crate::codegen::resolve_field;
8use crate::config::E2eConfig;
9use crate::escape::{escape_python, sanitize_filename, sanitize_ident};
10use crate::field_access::FieldResolver;
11use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, ValidationErrorExpectation};
12use alef_core::backend::GeneratedFile;
13use alef_core::config::AlefConfig;
14use alef_core::hash::{self, CommentStyle};
15use anyhow::Result;
16use heck::{ToShoutySnakeCase, ToSnakeCase};
17use std::collections::HashMap;
18use std::fmt::Write as FmtWrite;
19use std::path::PathBuf;
20
21pub struct PythonE2eCodegen;
23
24impl super::E2eCodegen for PythonE2eCodegen {
25 fn generate(
26 &self,
27 groups: &[FixtureGroup],
28 e2e_config: &E2eConfig,
29 _alef_config: &AlefConfig,
30 ) -> Result<Vec<GeneratedFile>> {
31 let mut files = Vec::new();
32 let output_base = PathBuf::from(e2e_config.effective_output()).join("python");
33
34 files.push(GeneratedFile {
36 path: output_base.join("conftest.py"),
37 content: render_conftest(e2e_config, groups),
38 generated_header: true,
39 });
40
41 files.push(GeneratedFile {
43 path: output_base.join("__init__.py"),
44 content: "\n".to_string(),
45 generated_header: false,
46 });
47
48 files.push(GeneratedFile {
50 path: output_base.join("tests").join("__init__.py"),
51 content: "\n".to_string(),
52 generated_header: false,
53 });
54
55 let python_pkg = e2e_config.resolve_package("python");
57 let pkg_name = python_pkg
58 .as_ref()
59 .and_then(|p| p.name.as_deref())
60 .unwrap_or("kreuzcrawl");
61 let pkg_path = python_pkg
62 .as_ref()
63 .and_then(|p| p.path.as_deref())
64 .unwrap_or("../../packages/python");
65 let pkg_version = python_pkg
66 .as_ref()
67 .and_then(|p| p.version.as_deref())
68 .unwrap_or("0.1.0");
69 files.push(GeneratedFile {
70 path: output_base.join("pyproject.toml"),
71 content: render_pyproject(pkg_name, pkg_path, pkg_version, e2e_config.dep_mode),
72 generated_header: true,
73 });
74
75 for group in groups {
77 let fixtures: Vec<&Fixture> = group.fixtures.iter().collect();
78
79 if fixtures.is_empty() {
80 continue;
81 }
82
83 if fixtures.iter().all(|f| is_skipped(f, "python")) {
88 continue;
89 }
90
91 let filename = format!("test_{}.py", sanitize_filename(&group.category));
92 let content = render_test_file(&group.category, &fixtures, e2e_config);
93
94 files.push(GeneratedFile {
95 path: output_base.join("tests").join(filename),
96 content,
97 generated_header: true,
98 });
99 }
100
101 Ok(files)
102 }
103
104 fn language_name(&self) -> &'static str {
105 "python"
106 }
107}
108
109fn render_pyproject(
114 pkg_name: &str,
115 pkg_path: &str,
116 pkg_version: &str,
117 dep_mode: crate::config::DependencyMode,
118) -> String {
119 let (deps_line, uv_sources_block) = match dep_mode {
123 crate::config::DependencyMode::Registry => (
124 format!(
125 "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}{pkg_version}\" ]"
126 ),
127 String::new(),
128 ),
129 crate::config::DependencyMode::Local => (
130 format!(
131 "dependencies = [ \"pytest>=7.4\", \"pytest-asyncio>=0.23\", \"pytest-timeout>=2.1\", \"{pkg_name}\" ]"
132 ),
133 format!(
134 "\n[tool.uv]\nsources.{pkg_name} = {{ path = \"{pkg_path}\" }}\n",
135 pkg_path = pkg_path
136 ),
137 ),
138 };
139
140 format!(
141 r#"[build-system]
142build-backend = "setuptools.build_meta"
143requires = [ "setuptools>=68", "wheel" ]
144
145[project]
146name = "{pkg_name}-e2e-tests"
147version = "0.0.0"
148description = "End-to-end tests"
149requires-python = ">=3.10"
150classifiers = [
151 "Programming Language :: Python :: 3 :: Only",
152 "Programming Language :: Python :: 3.10",
153 "Programming Language :: Python :: 3.11",
154 "Programming Language :: Python :: 3.12",
155 "Programming Language :: Python :: 3.13",
156 "Programming Language :: Python :: 3.14",
157]
158{deps_line}
159
160[tool.setuptools]
161packages = [ ]
162{uv_sources_block}
163[tool.ruff]
164lint.ignore = [ "PLR2004" ]
165lint.per-file-ignores."tests/**" = [ "B017", "PT011", "S101", "S108" ]
166
167[tool.pytest]
168ini_options.asyncio_mode = "auto"
169ini_options.testpaths = [ "tests" ]
170ini_options.python_files = "test_*.py"
171ini_options.python_functions = "test_*"
172ini_options.addopts = "-v --strict-markers --tb=short"
173ini_options.timeout = 300
174"#
175 )
176}
177
178fn resolve_function_name(e2e_config: &E2eConfig) -> String {
183 resolve_function_name_for_call(&e2e_config.call)
184}
185
186fn resolve_function_name_for_call(call_config: &crate::config::CallConfig) -> String {
187 call_config
188 .overrides
189 .get("python")
190 .and_then(|o| o.function.clone())
191 .unwrap_or_else(|| call_config.function.clone())
192}
193
194fn resolve_module(e2e_config: &E2eConfig) -> String {
195 e2e_config
196 .call
197 .overrides
198 .get("python")
199 .and_then(|o| o.module.clone())
200 .unwrap_or_else(|| e2e_config.call.module.replace('-', "_"))
201}
202
203fn resolve_options_type(e2e_config: &E2eConfig) -> Option<String> {
204 e2e_config
205 .call
206 .overrides
207 .get("python")
208 .and_then(|o| o.options_type.clone())
209}
210
211fn resolve_options_via(e2e_config: &E2eConfig) -> &str {
213 e2e_config
214 .call
215 .overrides
216 .get("python")
217 .and_then(|o| o.options_via.as_deref())
218 .unwrap_or("kwargs")
219}
220
221fn resolve_enum_fields(e2e_config: &E2eConfig) -> &HashMap<String, String> {
223 static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
224 e2e_config
225 .call
226 .overrides
227 .get("python")
228 .map(|o| &o.enum_fields)
229 .unwrap_or(&EMPTY)
230}
231
232fn resolve_handle_nested_types(e2e_config: &E2eConfig) -> &HashMap<String, String> {
235 static EMPTY: std::sync::LazyLock<HashMap<String, String>> = std::sync::LazyLock::new(HashMap::new);
236 e2e_config
237 .call
238 .overrides
239 .get("python")
240 .map(|o| &o.handle_nested_types)
241 .unwrap_or(&EMPTY)
242}
243
244fn resolve_handle_dict_types(e2e_config: &E2eConfig) -> &std::collections::HashSet<String> {
247 static EMPTY: std::sync::LazyLock<std::collections::HashSet<String>> =
248 std::sync::LazyLock::new(std::collections::HashSet::new);
249 e2e_config
250 .call
251 .overrides
252 .get("python")
253 .map(|o| &o.handle_dict_types)
254 .unwrap_or(&EMPTY)
255}
256
257fn is_skipped(fixture: &Fixture, language: &str) -> bool {
258 fixture.skip.as_ref().is_some_and(|s| s.should_skip(language))
259}
260
261fn render_conftest(e2e_config: &E2eConfig, groups: &[FixtureGroup]) -> String {
266 let module = resolve_module(e2e_config);
267 let has_http_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| f.is_http_test());
268
269 let has_file_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| {
272 let cc = e2e_config.resolve_call(f.call.as_deref());
273 cc.args
274 .iter()
275 .any(|a| a.arg_type == "file_path" || a.arg_type == "bytes")
276 });
277
278 let header = hash::header(CommentStyle::Hash);
279 if has_http_fixtures {
280 format!(
281 r#"{header}"""Pytest configuration for e2e tests."""
282from __future__ import annotations
283
284import os
285import subprocess
286import threading
287from pathlib import Path
288from typing import Generator
289
290import pytest
291
292# Ensure the package is importable.
293# The {module} package is expected to be installed in the current environment.
294
295_HERE = Path(__file__).parent
296_E2E_DIR = _HERE.parent
297_MOCK_SERVER_BIN = _E2E_DIR / "rust" / "target" / "release" / "mock-server"
298_FIXTURES_DIR = _E2E_DIR.parent / "fixtures"
299
300
301@pytest.fixture(scope="session", autouse=True)
302def mock_server() -> Generator[str, None, None]:
303 """Spawn the mock HTTP server binary and set MOCK_SERVER_URL."""
304 proc = subprocess.Popen( # noqa: S603
305 [str(_MOCK_SERVER_BIN), str(_FIXTURES_DIR)],
306 stdout=subprocess.PIPE,
307 stderr=None,
308 stdin=subprocess.PIPE,
309 )
310 url = ""
311 assert proc.stdout is not None
312 for raw_line in proc.stdout:
313 line = raw_line.decode().strip()
314 if line.startswith("MOCK_SERVER_URL="):
315 url = line.split("=", 1)[1]
316 break
317 os.environ["MOCK_SERVER_URL"] = url
318 # Drain stdout in background so the server never blocks.
319 threading.Thread(target=proc.stdout.read, daemon=True).start()
320 yield url
321 if proc.stdin:
322 proc.stdin.close()
323 proc.terminate()
324 proc.wait()
325
326
327def _make_request(method: str, path: str, **kwargs: object) -> object:
328 """Make an HTTP request to the mock server."""
329 import urllib.request # noqa: PLC0415
330
331 base_url = os.environ.get("MOCK_SERVER_URL", "http://localhost:8080")
332 url = f"{{base_url}}{{path}}"
333 data = kwargs.pop("json", None)
334 if data is not None:
335 import json # noqa: PLC0415
336
337 body = json.dumps(data).encode()
338 headers = dict(kwargs.pop("headers", {{}}))
339 headers.setdefault("Content-Type", "application/json")
340 req = urllib.request.Request(url, data=body, headers=headers, method=method.upper())
341 else:
342 headers = dict(kwargs.pop("headers", {{}}))
343 req = urllib.request.Request(url, headers=headers, method=method.upper())
344 try:
345 with urllib.request.urlopen(req) as resp: # noqa: S310
346 return resp
347 except urllib.error.HTTPError as exc:
348 return exc
349
350
351@pytest.fixture(scope="session")
352def app(mock_server: str) -> object: # noqa: ARG001
353 """Return a simple HTTP helper bound to the mock server URL."""
354
355 class _App:
356 def request(self, path: str, **kwargs: object) -> object:
357 method = str(kwargs.pop("method", "GET"))
358 return _make_request(method, path, **kwargs)
359
360 return _App()
361"#
362 )
363 } else if has_file_fixtures {
364 format!(
365 r#"{header}"""Pytest configuration for e2e tests."""
366import os
367from pathlib import Path
368
369# Ensure the package is importable.
370# The {module} package is expected to be installed in the current environment.
371
372# Change to the test_documents directory so that fixture file paths like
373# "pdf/fake_memo.pdf" resolve correctly when running pytest from e2e/python/.
374_TEST_DOCUMENTS = Path(__file__).parent.parent.parent / "test_documents"
375if _TEST_DOCUMENTS.is_dir():
376 os.chdir(_TEST_DOCUMENTS)
377
378# On macOS, Pdfium is a separate dylib not on the default library path in dev builds.
379# Search common locations (Cargo build output, staged target/release) and extend
380# DYLD_LIBRARY_PATH / LD_LIBRARY_PATH so the extension can load the library.
381_REPO_ROOT = Path(__file__).parent.parent.parent
382
383
384def _find_pdfium_dir() -> str | None:
385 """Find the directory containing libpdfium, searching Cargo build outputs."""
386 for _candidate in sorted(_REPO_ROOT.glob("target/*/release/build/*/out/libpdfium*")):
387 return str(_candidate.parent)
388 for _candidate in sorted(_REPO_ROOT.glob("target/release/build/*/out/libpdfium*")):
389 return str(_candidate.parent)
390 return None
391
392
393_pdfium_dir = _find_pdfium_dir()
394if _pdfium_dir is not None:
395 for _var in ("DYLD_LIBRARY_PATH", "LD_LIBRARY_PATH"):
396 _existing = os.environ.get(_var, "")
397 if _pdfium_dir not in _existing:
398 os.environ[_var] = f"{{_pdfium_dir}}:{{_existing}}" if _existing else _pdfium_dir
399"#
400 )
401 } else {
402 format!(
403 r#"{header}"""Pytest configuration for e2e tests."""
404# Ensure the package is importable.
405# The {module} package is expected to be installed in the current environment.
406"#
407 )
408 }
409}
410
411fn render_test_file(category: &str, fixtures: &[&Fixture], e2e_config: &E2eConfig) -> String {
412 let mut out = String::new();
413 out.push_str(&hash::header(CommentStyle::Hash));
414 let _ = writeln!(out, "\"\"\"E2e tests for category: {category}.\"\"\"");
415
416 let module = resolve_module(e2e_config);
417 let function_name = resolve_function_name(e2e_config);
418 let options_type = resolve_options_type(e2e_config);
419 let options_via = resolve_options_via(e2e_config);
420 let enum_fields = resolve_enum_fields(e2e_config);
421 let handle_nested_types = resolve_handle_nested_types(e2e_config);
422 let handle_dict_types = resolve_handle_dict_types(e2e_config);
423 let field_resolver = FieldResolver::new(
424 &e2e_config.fields,
425 &e2e_config.fields_optional,
426 &e2e_config.result_fields,
427 &e2e_config.fields_array,
428 );
429
430 let has_error_test = fixtures
431 .iter()
432 .any(|f| f.assertions.iter().any(|a| a.assertion_type == "error"));
433 let has_skipped = fixtures.iter().any(|f| is_skipped(f, "python"));
434 let has_http_tests = fixtures.iter().any(|f| f.is_http_test());
435
436 let is_async = fixtures.iter().any(|f| {
438 let cc = e2e_config.resolve_call(f.call.as_deref());
439 cc.r#async
440 }) || e2e_config.call.r#async;
441 let needs_pytest = has_error_test || has_skipped || is_async;
442
443 let needs_json_import = options_via == "json"
445 && fixtures.iter().any(|f| {
446 e2e_config
447 .call
448 .args
449 .iter()
450 .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
451 });
452
453 let needs_os_import = e2e_config.call.args.iter().any(|arg| arg.arg_type == "mock_url");
455
456 let needs_path_import = fixtures.iter().any(|f| {
459 let cc = e2e_config.resolve_call(f.call.as_deref());
460 cc.args.iter().any(|arg| {
461 if arg.arg_type != "bytes" {
462 return false;
463 }
464 let val = resolve_field(&f.input, &arg.field);
465 val.as_str()
466 .is_some_and(|s| matches!(classify_bytes_value(s), BytesKind::FilePath))
467 })
468 });
469 let needs_base64_import = fixtures.iter().any(|f| {
470 let cc = e2e_config.resolve_call(f.call.as_deref());
471 cc.args.iter().any(|arg| {
472 if arg.arg_type != "bytes" {
473 return false;
474 }
475 let val = resolve_field(&f.input, &arg.field);
476 val.as_str()
477 .is_some_and(|s| matches!(classify_bytes_value(s), BytesKind::Base64))
478 })
479 });
480
481 let needs_re_import = false;
483 let _ = has_http_tests; let needs_options_type = options_via == "kwargs"
487 && options_type.is_some()
488 && fixtures.iter().any(|f| {
489 e2e_config
490 .call
491 .args
492 .iter()
493 .any(|arg| arg.arg_type == "json_object" && !resolve_field(&f.input, &arg.field).is_null())
494 });
495
496 let mut used_enum_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
498 if needs_options_type && !enum_fields.is_empty() {
499 for fixture in fixtures.iter() {
500 for arg in &e2e_config.call.args {
501 if arg.arg_type == "json_object" {
502 let value = resolve_field(&fixture.input, &arg.field);
503 if let Some(obj) = value.as_object() {
504 for key in obj.keys() {
505 if let Some(enum_type) = enum_fields.get(key) {
506 used_enum_types.insert(enum_type.clone());
507 }
508 }
509 }
510 }
511 }
512 }
513 }
514
515 let mut stdlib_imports: Vec<String> = Vec::new();
519 let mut thirdparty_bare: Vec<String> = Vec::new();
520 let mut thirdparty_from: Vec<String> = Vec::new();
521
522 if needs_base64_import {
523 stdlib_imports.push("import base64".to_string());
524 }
525
526 if needs_json_import {
527 stdlib_imports.push("import json".to_string());
528 }
529
530 if needs_os_import {
531 stdlib_imports.push("import os".to_string());
532 }
533
534 if needs_path_import {
535 stdlib_imports.push("from pathlib import Path".to_string());
536 }
537
538 if needs_re_import {
539 stdlib_imports.push("import re".to_string());
540 }
541
542 if needs_pytest {
543 thirdparty_bare.push("import pytest # noqa: F401".to_string());
548 }
549
550 let has_non_http_fixtures = fixtures
553 .iter()
554 .any(|f| !f.is_http_test() && !is_skipped(f, "python") && !f.assertions.is_empty());
555 if has_non_http_fixtures {
556 let handle_constructors: Vec<String> = e2e_config
558 .call
559 .args
560 .iter()
561 .filter(|arg| arg.arg_type == "handle")
562 .map(|arg| format!("create_{}", arg.name.to_snake_case()))
563 .collect();
564
565 let mut import_names: Vec<String> = Vec::new();
569 for fixture in fixtures.iter() {
570 let cc = e2e_config.resolve_call(fixture.call.as_deref());
571 let fn_name = resolve_function_name_for_call(cc);
572 if !import_names.contains(&fn_name) {
573 import_names.push(fn_name);
574 }
575 }
576 if import_names.is_empty() {
579 import_names.push(function_name.clone());
580 }
581 for ctor in &handle_constructors {
582 if !import_names.contains(ctor) {
583 import_names.push(ctor.clone());
584 }
585 }
586
587 let needs_config_import = e2e_config.call.args.iter().any(|arg| {
589 arg.arg_type == "handle"
590 && fixtures.iter().any(|f| {
591 let val = resolve_field(&f.input, &arg.field);
592 !val.is_null() && val.as_object().is_some_and(|o| !o.is_empty())
593 })
594 });
595 if needs_config_import {
596 let config_class = options_type.as_deref().unwrap_or("CrawlConfig");
597 if !import_names.contains(&config_class.to_string()) {
598 import_names.push(config_class.to_string());
599 }
600 }
601
602 if !handle_nested_types.is_empty() {
604 let mut used_nested_types: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
605 for fixture in fixtures.iter() {
606 for arg in &e2e_config.call.args {
607 if arg.arg_type == "handle" {
608 let config_value = resolve_field(&fixture.input, &arg.field);
609 if let Some(obj) = config_value.as_object() {
610 for key in obj.keys() {
611 if let Some(type_name) = handle_nested_types.get(key) {
612 if obj[key].is_object() {
613 used_nested_types.insert(type_name.clone());
614 }
615 }
616 }
617 }
618 }
619 }
620 }
621 for type_name in used_nested_types {
622 if !import_names.contains(&type_name) {
623 import_names.push(type_name);
624 }
625 }
626 }
627
628 for fixture in fixtures.iter() {
630 for assertion in &fixture.assertions {
631 if assertion.assertion_type == "method_result" {
632 if let Some(method_name) = &assertion.method {
633 let import = python_method_helper_import(method_name);
634 if let Some(name) = import {
635 if !import_names.contains(&name) {
636 import_names.push(name);
637 }
638 }
639 }
640 }
641 }
642 }
643
644 if let (true, Some(opts_type)) = (needs_options_type, &options_type) {
645 import_names.push(opts_type.clone());
646 thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
647 if !used_enum_types.is_empty() {
649 let enum_mod = e2e_config
650 .call
651 .overrides
652 .get("python")
653 .and_then(|o| o.enum_module.as_deref())
654 .unwrap_or(&module);
655 let enum_names: Vec<&String> = used_enum_types.iter().collect();
656 thirdparty_from.push(format!(
657 "from {enum_mod} import {}",
658 enum_names.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")
659 ));
660 }
661 } else {
662 thirdparty_from.push(format!("from {module} import {}", import_names.join(", ")));
663 }
664 }
665
666 stdlib_imports.sort();
667 thirdparty_bare.sort();
668 thirdparty_from.sort();
669
670 if !stdlib_imports.is_empty() {
672 for imp in &stdlib_imports {
673 let _ = writeln!(out, "{imp}");
674 }
675 let _ = writeln!(out);
676 }
677 for imp in &thirdparty_bare {
679 let _ = writeln!(out, "{imp}");
680 }
681 for imp in &thirdparty_from {
682 let _ = writeln!(out, "{imp}");
683 }
684 let _ = writeln!(out);
686 let _ = writeln!(out);
687
688 for fixture in fixtures {
689 if fixture.is_http_test() {
690 render_http_test_function(&mut out, fixture);
691 } else if !is_skipped(fixture, "python") && fixture.assertions.is_empty() {
692 let fn_name = sanitize_ident(&fixture.id);
694 let description = &fixture.description;
695 let desc_with_period = if description.ends_with('.') {
696 description.to_string()
697 } else {
698 format!("{description}.")
699 };
700 let _ = writeln!(
701 out,
702 "@pytest.mark.skip(reason=\"no assertions configured for this fixture in python e2e\")"
703 );
704 let _ = writeln!(out, "def test_{fn_name}() -> None:");
705 let _ = writeln!(out, " \"\"\"{desc_with_period}\"\"\"");
706 } else {
707 render_test_function(
708 &mut out,
709 fixture,
710 e2e_config,
711 options_type.as_deref(),
712 options_via,
713 enum_fields,
714 handle_nested_types,
715 handle_dict_types,
716 &field_resolver,
717 );
718 }
719 let _ = writeln!(out);
720 }
721
722 out
723}
724
725struct PythonTestClientRenderer;
736
737impl client::TestClientRenderer for PythonTestClientRenderer {
738 fn language_name(&self) -> &'static str {
739 "python"
740 }
741
742 fn render_test_open(&self, out: &mut String, fn_name: &str, description: &str, skip_reason: Option<&str>) {
746 let desc_with_period = if description.ends_with('.') {
747 description.to_string()
748 } else {
749 format!("{description}.")
750 };
751
752 if let Some(reason) = skip_reason {
753 let escaped = escape_python(reason);
754 let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
755 }
756 let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
757 let _ = writeln!(out, " \"\"\"{desc_with_period}\"\"\"");
758 if skip_reason.is_some() {
759 let _ = writeln!(out, " ...");
760 }
761 }
762
763 fn render_test_close(&self, _out: &mut String) {}
768
769 fn render_call(&self, out: &mut String, ctx: &client::CallCtx<'_>) {
778 let _ = writeln!(out, " import os # noqa: PLC0415");
779 let _ = writeln!(out, " import urllib.request # noqa: PLC0415");
780 let _ = writeln!(out, " base = os.environ.get(\"MOCK_SERVER_URL\", mock_server)");
781 let _ = writeln!(out, " url = f\"{{base}}{}\"", ctx.path);
782
783 let method = ctx.method.to_uppercase();
784
785 let mut header_entries: Vec<String> = ctx
787 .headers
788 .iter()
789 .map(|(k, v)| format!(" \"{}\": \"{}\",", escape_python(k), escape_python(v)))
790 .collect();
791 header_entries.sort(); let headers_py = if header_entries.is_empty() {
793 "{}".to_string()
794 } else {
795 format!("{{\n{}\n }}", header_entries.join("\n"))
796 };
797
798 if let Some(body) = ctx.body {
799 let py_body = json_to_python_literal(body);
800 let _ = writeln!(out, " import json # noqa: PLC0415");
801 let _ = writeln!(out, " _headers = {headers_py}");
802 let _ = writeln!(out, " _headers.setdefault(\"Content-Type\", \"application/json\")");
803 let _ = writeln!(out, " _body = json.dumps({py_body}).encode()");
804 let _ = writeln!(
805 out,
806 " _req = urllib.request.Request(url, data=_body, headers=_headers, method=\"{method}\")"
807 );
808 } else {
809 let _ = writeln!(out, " _headers = {headers_py}");
810 let _ = writeln!(
811 out,
812 " _req = urllib.request.Request(url, headers=_headers, method=\"{method}\")"
813 );
814 }
815
816 let _ = writeln!(
820 out,
821 " class _NoRedirect(urllib.request.HTTPRedirectHandler): # noqa: N801"
822 );
823 let _ = writeln!(
824 out,
825 " def redirect_request(self, *args, **kwargs): return None # noqa: E704"
826 );
827 let _ = writeln!(out, " _opener = urllib.request.build_opener(_NoRedirect())");
828 let _ = writeln!(out, " try:");
829 let _ = writeln!(out, " response = _opener.open(_req) # noqa: S310");
830 let _ = writeln!(out, " status_code = response.status");
831 let _ = writeln!(out, " resp_body = response.read() # noqa: F841");
832 let _ = writeln!(out, " resp_headers = dict(response.headers) # noqa: F841");
833 let _ = writeln!(out, " except urllib.error.HTTPError as _exc:");
834 let _ = writeln!(out, " status_code = _exc.code");
835 let _ = writeln!(out, " resp_body = _exc.read() # noqa: F841");
836 let _ = writeln!(out, " resp_headers = dict(_exc.headers) # noqa: F841");
837 }
838
839 fn render_assert_status(&self, out: &mut String, _response_var: &str, status: u16) {
840 let _ = writeln!(out, " assert status_code == {status} # noqa: S101");
841 }
842
843 fn render_assert_header(&self, out: &mut String, _response_var: &str, name: &str, expected: &str) {
846 let escaped_name = escape_python(&name.to_lowercase());
847 match expected {
848 "<<present>>" => {
849 let _ = writeln!(out, " assert \"{escaped_name}\" in resp_headers # noqa: S101");
850 }
851 "<<absent>>" => {
852 let _ = writeln!(
853 out,
854 " assert resp_headers.get(\"{escaped_name}\") is None # noqa: S101"
855 );
856 }
857 "<<uuid>>" => {
858 let _ = writeln!(out, " import re # noqa: PLC0415");
859 let _ = writeln!(
860 out,
861 " assert re.match(r'^[0-9a-f]{{8}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{12}}$', resp_headers[\"{escaped_name}\"]) # noqa: S101"
862 );
863 }
864 exact => {
865 let escaped_val = escape_python(exact);
866 let _ = writeln!(
867 out,
868 " assert resp_headers[\"{escaped_name}\"] == \"{escaped_val}\" # noqa: S101"
869 );
870 }
871 }
872 }
873
874 fn render_assert_json_body(&self, out: &mut String, _response_var: &str, expected: &serde_json::Value) {
879 if let serde_json::Value::String(s) = expected {
880 let py_val = format!("\"{}\"", escape_python(s));
881 let _ = writeln!(out, " assert resp_body.decode() == {py_val} # noqa: S101");
882 } else {
883 let py_val = json_to_python_literal(expected);
884 let _ = writeln!(out, " import json as _json # noqa: PLC0415");
885 let _ = writeln!(out, " data = _json.loads(resp_body)");
886 let _ = writeln!(out, " assert data == {py_val} # noqa: S101");
887 }
888 }
889
890 fn render_assert_partial_body(&self, out: &mut String, _response_var: &str, expected: &serde_json::Value) {
893 let _ = writeln!(out, " import json as _json # noqa: PLC0415");
894 let _ = writeln!(out, " data = _json.loads(resp_body)");
895 if let Some(obj) = expected.as_object() {
896 for (key, val) in obj {
897 let py_val = json_to_python_literal(val);
898 let escaped_key = escape_python(key);
899 let _ = writeln!(out, " assert data[\"{escaped_key}\"] == {py_val} # noqa: S101");
900 }
901 }
902 }
903
904 fn render_assert_validation_errors(
910 &self,
911 out: &mut String,
912 _response_var: &str,
913 errors: &[ValidationErrorExpectation],
914 ) {
915 let _ = writeln!(out, " import json as _json # noqa: PLC0415");
916 let _ = writeln!(out, " _data = _json.loads(resp_body)");
917 let _ = writeln!(out, " errors = _data.get(\"errors\", [])");
918 for ve in errors {
919 let loc_py: Vec<String> = ve.loc.iter().map(|s| format!("\"{}\"", escape_python(s))).collect();
920 let loc_str = loc_py.join(", ");
921 let escaped_msg = escape_python(&ve.msg);
922 let _ = writeln!(
923 out,
924 " assert any(e[\"loc\"] == [{loc_str}] and \"{escaped_msg}\" in e[\"msg\"] for e in errors) # noqa: S101"
925 );
926 }
927 }
928}
929
930fn render_http_test_function(out: &mut String, fixture: &Fixture) {
937 if let Some(http) = &fixture.http {
940 if http.expected_response.status_code == 101 {
941 let fn_name = sanitize_ident(&fixture.id);
942 let description = &fixture.description;
943 let desc_with_period = if description.ends_with('.') {
944 description.to_string()
945 } else {
946 format!("{description}.")
947 };
948 let _ = writeln!(
949 out,
950 "@pytest.mark.skip(reason=\"HTTP 101 WebSocket upgrade cannot be tested via urllib\")"
951 );
952 let _ = writeln!(out, "def test_{fn_name}(mock_server: str) -> None:");
953 let _ = writeln!(out, " \"\"\"{desc_with_period}\"\"\"");
954 let _ = writeln!(out, " ...");
955 let _ = writeln!(out);
956 return;
957 }
958 }
959
960 client::http_call::render_http_test(out, &PythonTestClientRenderer, fixture);
961}
962
963#[allow(clippy::too_many_arguments)]
968fn render_test_function(
969 out: &mut String,
970 fixture: &Fixture,
971 e2e_config: &E2eConfig,
972 options_type: Option<&str>,
973 options_via: &str,
974 enum_fields: &HashMap<String, String>,
975 handle_nested_types: &HashMap<String, String>,
976 handle_dict_types: &std::collections::HashSet<String>,
977 field_resolver: &FieldResolver,
978) {
979 let fn_name = sanitize_ident(&fixture.id);
980 let description = &fixture.description;
981 let call_config = e2e_config.resolve_call(fixture.call.as_deref());
982 let function_name = resolve_function_name_for_call(call_config);
983 let result_var = &call_config.result_var;
984
985 let python_override = call_config.overrides.get("python");
987 let result_is_simple = python_override.is_some_and(|o| o.result_is_simple);
988 let arg_name_map = python_override.map(|o| &o.arg_name_map);
989
990 let desc_with_period = if description.ends_with('.') {
991 description.to_string()
992 } else {
993 format!("{description}.")
994 };
995
996 if is_skipped(fixture, "python") {
998 let reason = fixture
999 .skip
1000 .as_ref()
1001 .and_then(|s| s.reason.as_deref())
1002 .unwrap_or("skipped for python");
1003 let escaped = escape_python(reason);
1004 let _ = writeln!(out, "@pytest.mark.skip(reason=\"{escaped}\")");
1005 }
1006
1007 let is_async = call_config.r#async;
1008 if is_async {
1009 let _ = writeln!(out, "@pytest.mark.asyncio");
1010 let _ = writeln!(out, "async def test_{fn_name}() -> None:");
1011 } else {
1012 let _ = writeln!(out, "def test_{fn_name}() -> None:");
1013 }
1014 let _ = writeln!(out, " \"\"\"{desc_with_period}\"\"\"");
1015
1016 let has_error_assertion = fixture.assertions.iter().any(|a| a.assertion_type == "error");
1018
1019 let mut arg_bindings = Vec::new();
1021 let mut kwarg_exprs = Vec::new();
1022 for arg in &call_config.args {
1023 let var_name = &arg.name;
1024 let kwarg_name = arg_name_map
1026 .and_then(|m| m.get(var_name.as_str()))
1027 .map(|s| s.as_str())
1028 .unwrap_or(var_name.as_str());
1029
1030 if arg.arg_type == "handle" {
1031 let constructor_name = format!("create_{}", arg.name.to_snake_case());
1034 let config_value = resolve_field(&fixture.input, &arg.field);
1035 if config_value.is_null()
1036 || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
1037 {
1038 arg_bindings.push(format!(" {var_name} = {constructor_name}(None)"));
1039 } else if let Some(obj) = config_value.as_object() {
1040 let kwargs: Vec<String> = obj
1044 .iter()
1045 .map(|(k, v)| {
1046 let snake_key = k.to_snake_case();
1047 let py_val = if let Some(type_name) = handle_nested_types.get(k) {
1048 if let Some(nested_obj) = v.as_object() {
1050 if nested_obj.is_empty() {
1051 format!("{type_name}()")
1053 } else if handle_dict_types.contains(k) {
1054 json_to_python_literal(v)
1059 } else {
1060 let nested_kwargs: Vec<String> = nested_obj
1062 .iter()
1063 .map(|(nk, nv)| {
1064 let nested_snake_key = nk.to_snake_case();
1065 format!("{nested_snake_key}={}", json_to_python_literal(nv))
1066 })
1067 .collect();
1068 format!("{type_name}({})", nested_kwargs.join(", "))
1069 }
1070 } else {
1071 json_to_python_literal(v)
1073 }
1074 } else if k == "request_timeout" {
1075 if let Some(ms) = v.as_u64() {
1081 format!("{}", ms / 1000)
1082 } else {
1083 json_to_python_literal(v)
1084 }
1085 } else {
1086 json_to_python_literal(v)
1087 };
1088 format!("{snake_key}={py_val}")
1089 })
1090 .collect();
1091 let config_class = options_type.unwrap_or("CrawlConfig");
1093 let single_line = format!(" {var_name}_config = {config_class}({})", kwargs.join(", "));
1094 if single_line.len() <= 120 {
1095 arg_bindings.push(single_line);
1096 } else {
1097 let mut lines = format!(" {var_name}_config = {config_class}(\n");
1099 for kw in &kwargs {
1100 lines.push_str(&format!(" {kw},\n"));
1101 }
1102 lines.push_str(" )");
1103 arg_bindings.push(lines);
1104 }
1105 arg_bindings.push(format!(" {var_name} = {constructor_name}({var_name}_config)"));
1106 } else {
1107 let literal = json_to_python_literal(config_value);
1108 arg_bindings.push(format!(" {var_name} = {constructor_name}({literal})"));
1109 }
1110 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1111 continue;
1112 }
1113
1114 if arg.arg_type == "mock_url" {
1115 let fixture_id = &fixture.id;
1116 arg_bindings.push(format!(
1117 " {var_name} = os.environ['MOCK_SERVER_URL'] + '/fixtures/{fixture_id}'"
1118 ));
1119 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1120 continue;
1121 }
1122
1123 let value = resolve_field(&fixture.input, &arg.field);
1124
1125 if value.is_null() && arg.optional {
1126 continue;
1127 }
1128
1129 if arg.arg_type == "json_object" && !value.is_null() {
1132 match options_via {
1133 "dict" => {
1134 let literal = json_to_python_literal(value);
1136 let noqa = if literal.contains("/tmp/") {
1137 " # noqa: S108"
1138 } else {
1139 ""
1140 };
1141 arg_bindings.push(format!(" {var_name} = {literal}{noqa}"));
1142 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1143 continue;
1144 }
1145 "json" => {
1146 let json_str = serde_json::to_string(value).unwrap_or_default();
1148 let escaped = escape_python(&json_str);
1149 arg_bindings.push(format!(" {var_name} = json.loads(\"{escaped}\")"));
1150 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1151 continue;
1152 }
1153 _ => {
1154 if let (Some(opts_type), Some(obj)) = (options_type, value.as_object()) {
1156 let kwargs: Vec<String> = obj
1157 .iter()
1158 .map(|(k, v)| {
1159 let snake_key = k.to_snake_case();
1160 let py_val = if let Some(enum_type) = enum_fields.get(k) {
1161 if let Some(s) = v.as_str() {
1163 let upper_val = s.to_shouty_snake_case();
1164 format!("{enum_type}.{upper_val}")
1165 } else {
1166 json_to_python_literal(v)
1167 }
1168 } else {
1169 json_to_python_literal(v)
1170 };
1171 format!("{snake_key}={py_val}")
1172 })
1173 .collect();
1174 let constructor = format!("{opts_type}({})", kwargs.join(", "));
1175 arg_bindings.push(format!(" {var_name} = {constructor}"));
1176 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1177 continue;
1178 }
1179 }
1180 }
1181 }
1182
1183 if arg.optional && value.is_null() {
1186 continue;
1187 }
1188
1189 if value.is_null() && !arg.optional {
1191 let default_val = match arg.arg_type.as_str() {
1192 "string" => "\"\"".to_string(),
1193 "int" | "integer" => "0".to_string(),
1194 "float" | "number" => "0.0".to_string(),
1195 "bool" | "boolean" => "False".to_string(),
1196 _ => "None".to_string(),
1197 };
1198 arg_bindings.push(format!(" {var_name} = {default_val}"));
1199 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1200 continue;
1201 }
1202
1203 if arg.arg_type == "bytes" {
1215 if let Some(raw) = value.as_str() {
1216 match classify_bytes_value(raw) {
1217 BytesKind::FilePath => {
1218 let escaped = escape_python(raw);
1219 arg_bindings.push(format!(" {var_name} = Path(\"{escaped}\").read_bytes()"));
1220 }
1221 BytesKind::InlineText => {
1222 let escaped = escape_python(raw);
1225 arg_bindings.push(format!(" {var_name} = b\"{escaped}\""));
1226 }
1227 BytesKind::Base64 => {
1228 let escaped = escape_python(raw);
1229 arg_bindings.push(format!(" {var_name} = base64.b64decode(\"{escaped}\")"));
1230 }
1231 }
1232 } else {
1233 arg_bindings.push(format!(" {var_name} = None"));
1234 }
1235 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1236 continue;
1237 }
1238
1239 let literal = json_to_python_literal(value);
1240 let noqa = if literal.contains("/tmp/") {
1241 " # noqa: S108"
1242 } else {
1243 ""
1244 };
1245 arg_bindings.push(format!(" {var_name} = {literal}{noqa}"));
1246 kwarg_exprs.push(format!("{kwarg_name}={var_name}"));
1247 }
1248
1249 if let Some(visitor_spec) = &fixture.visitor {
1251 let _ = writeln!(out, " class _TestVisitor:");
1252 for (method_name, action) in &visitor_spec.callbacks {
1253 emit_python_visitor_method(out, method_name, action);
1254 }
1255 kwarg_exprs.push("visitor=_TestVisitor()".to_string());
1256 }
1257
1258 for binding in &arg_bindings {
1259 let _ = writeln!(out, "{binding}");
1260 }
1261
1262 let call_args = kwarg_exprs.join(", ");
1263 let await_prefix = if is_async { "await " } else { "" };
1264 let call_expr = format!("{await_prefix}{function_name}({call_args})");
1265
1266 if has_error_assertion {
1267 let error_assertion = fixture.assertions.iter().find(|a| a.assertion_type == "error");
1269 let has_message = error_assertion
1270 .and_then(|a| a.value.as_ref())
1271 .and_then(|v| v.as_str())
1272 .is_some();
1273
1274 if has_message {
1275 let _ = writeln!(out, " with pytest.raises(Exception) as exc_info: # noqa: B017");
1276 let _ = writeln!(out, " {call_expr}");
1277 if let Some(msg) = error_assertion.and_then(|a| a.value.as_ref()).and_then(|v| v.as_str()) {
1278 let escaped = escape_python(msg);
1279 let _ = writeln!(out, " assert \"{escaped}\" in str(exc_info.value) # noqa: S101");
1280 }
1281 } else {
1282 let _ = writeln!(out, " with pytest.raises(Exception): # noqa: B017");
1283 let _ = writeln!(out, " {call_expr}");
1284 }
1285
1286 return;
1289 }
1290
1291 let has_usable_assertion = fixture.assertions.iter().any(|a| {
1294 if a.assertion_type == "not_error" || a.assertion_type == "error" {
1295 return false;
1296 }
1297 if result_is_simple {
1298 if let Some(f) = &a.field {
1301 let f_lower = f.to_lowercase();
1302 if !f.is_empty()
1303 && f_lower != "content"
1304 && f_lower != "result"
1305 && (f_lower.starts_with("metadata")
1306 || f_lower.starts_with("document")
1307 || f_lower.starts_with("structure")
1308 || f_lower.starts_with("pages")
1309 || f_lower.starts_with("chunks")
1310 || f_lower.starts_with("tables")
1311 || f_lower.starts_with("images")
1312 || f_lower.starts_with("mime_type")
1313 || f_lower.starts_with("is_")
1314 || f_lower == "byte_length"
1315 || f_lower == "page_count"
1316 || f_lower == "output_format"
1317 || f_lower == "extraction_method")
1318 {
1319 return false; }
1321 }
1322 return true;
1323 }
1324 match &a.field {
1325 Some(f) if !f.is_empty() => field_resolver.is_valid_for_result(f),
1326 _ => true,
1327 }
1328 });
1329 let py_result_var = if has_usable_assertion {
1330 result_var.to_string()
1331 } else {
1332 "_".to_string()
1333 };
1334 let _ = writeln!(out, " {py_result_var} = {call_expr}");
1335
1336 let fields_enum = &e2e_config.fields_enum;
1337 for assertion in &fixture.assertions {
1338 if assertion.assertion_type == "not_error" {
1339 if !call_config.returns_result {
1342 continue;
1343 }
1344 continue;
1346 }
1347 render_assertion(
1348 out,
1349 assertion,
1350 result_var,
1351 field_resolver,
1352 fields_enum,
1353 result_is_simple,
1354 );
1355 }
1356}
1357
1358enum BytesKind {
1364 FilePath,
1366 InlineText,
1368 Base64,
1370}
1371
1372fn classify_bytes_value(s: &str) -> BytesKind {
1381 if s.starts_with('<') || s.starts_with('{') || s.starts_with('[') || s.contains(' ') {
1383 return BytesKind::InlineText;
1384 }
1385
1386 let first = s.chars().next().unwrap_or('\0');
1389 if first.is_ascii_alphanumeric() || first == '_' {
1390 if let Some(slash_pos) = s.find('/') {
1391 if slash_pos > 0 {
1392 let after_slash = &s[slash_pos + 1..];
1393 if after_slash.contains('.') && !after_slash.is_empty() {
1394 return BytesKind::FilePath;
1395 }
1396 }
1397 }
1398 }
1399
1400 BytesKind::Base64
1402}
1403
1404fn json_to_python_literal(value: &serde_json::Value) -> String {
1409 match value {
1410 serde_json::Value::Null => "None".to_string(),
1411 serde_json::Value::Bool(true) => "True".to_string(),
1412 serde_json::Value::Bool(false) => "False".to_string(),
1413 serde_json::Value::Number(n) => n.to_string(),
1414 serde_json::Value::String(s) => python_string_literal(s),
1415 serde_json::Value::Array(arr) => {
1416 let items: Vec<String> = arr.iter().map(json_to_python_literal).collect();
1417 format!("[{}]", items.join(", "))
1418 }
1419 serde_json::Value::Object(map) => {
1420 let items: Vec<String> = map
1421 .iter()
1422 .map(|(k, v)| format!("\"{}\": {}", escape_python(k), json_to_python_literal(v)))
1423 .collect();
1424 format!("{{{}}}", items.join(", "))
1425 }
1426 }
1427}
1428
1429fn render_assertion(
1434 out: &mut String,
1435 assertion: &Assertion,
1436 result_var: &str,
1437 field_resolver: &FieldResolver,
1438 fields_enum: &std::collections::HashSet<String>,
1439 result_is_simple: bool,
1440) {
1441 if result_is_simple {
1445 if let Some(f) = &assertion.field {
1446 let f_lower = f.to_lowercase();
1447 if !f.is_empty()
1448 && f_lower != "content"
1449 && f_lower != "result"
1450 && (f_lower.starts_with("metadata")
1451 || f_lower.starts_with("document")
1452 || f_lower.starts_with("structure")
1453 || f_lower.starts_with("pages")
1454 || f_lower.starts_with("chunks")
1455 || f_lower.starts_with("tables")
1456 || f_lower.starts_with("images")
1457 || f_lower.starts_with("mime_type")
1458 || f_lower.starts_with("is_")
1459 || f_lower == "byte_length"
1460 || f_lower == "page_count"
1461 || f_lower == "output_format"
1462 || f_lower == "extraction_method")
1463 {
1464 let _ = writeln!(out, " # skipped: field '{f}' not applicable for simple result type");
1465 return;
1466 }
1467 }
1468 }
1469
1470 if let Some(f) = &assertion.field {
1473 match f.as_str() {
1474 "chunks_have_content" => {
1475 let pred = format!("all(c.content for c in ({result_var}.chunks or []))");
1476 match assertion.assertion_type.as_str() {
1477 "is_true" => {
1478 let _ = writeln!(out, " assert {pred} # noqa: S101");
1479 }
1480 "is_false" => {
1481 let _ = writeln!(out, " assert not ({pred}) # noqa: S101");
1482 }
1483 _ => {
1484 let _ = writeln!(
1485 out,
1486 " # skipped: unsupported assertion type on synthetic field '{f}'"
1487 );
1488 }
1489 }
1490 return;
1491 }
1492 "chunks_have_embeddings" => {
1493 let pred = format!(
1494 "all(c.embedding is not None and len(c.embedding) > 0 for c in ({result_var}.chunks or []))"
1495 );
1496 match assertion.assertion_type.as_str() {
1497 "is_true" => {
1498 let _ = writeln!(out, " assert {pred} # noqa: S101");
1499 }
1500 "is_false" => {
1501 let _ = writeln!(out, " assert not ({pred}) # noqa: S101");
1502 }
1503 _ => {
1504 let _ = writeln!(
1505 out,
1506 " # skipped: unsupported assertion type on synthetic field '{f}'"
1507 );
1508 }
1509 }
1510 return;
1511 }
1512 "embeddings" => {
1516 match assertion.assertion_type.as_str() {
1517 "count_equals" => {
1518 if let Some(val) = &assertion.value {
1519 if let Some(n) = val.as_u64() {
1520 let _ = writeln!(out, " assert len({result_var}) == {n} # noqa: S101");
1521 }
1522 }
1523 }
1524 "count_min" => {
1525 if let Some(val) = &assertion.value {
1526 if let Some(n) = val.as_u64() {
1527 let _ = writeln!(out, " assert len({result_var}) >= {n} # noqa: S101");
1528 }
1529 }
1530 }
1531 "not_empty" => {
1532 let _ = writeln!(out, " assert len({result_var}) > 0 # noqa: S101");
1533 }
1534 "is_empty" => {
1535 let _ = writeln!(out, " assert len({result_var}) == 0 # noqa: S101");
1536 }
1537 _ => {
1538 let _ = writeln!(
1539 out,
1540 " # skipped: unsupported assertion type on synthetic field 'embeddings'"
1541 );
1542 }
1543 }
1544 return;
1545 }
1546 "embedding_dimensions" => {
1547 let expr = format!("(len({result_var}[0]) if {result_var} else 0)");
1548 match assertion.assertion_type.as_str() {
1549 "equals" => {
1550 if let Some(val) = &assertion.value {
1551 let py_val = value_to_python_string(val);
1552 let _ = writeln!(out, " assert {expr} == {py_val} # noqa: S101");
1553 }
1554 }
1555 "greater_than" => {
1556 if let Some(val) = &assertion.value {
1557 let py_val = value_to_python_string(val);
1558 let _ = writeln!(out, " assert {expr} > {py_val} # noqa: S101");
1559 }
1560 }
1561 _ => {
1562 let _ = writeln!(
1563 out,
1564 " # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
1565 );
1566 }
1567 }
1568 return;
1569 }
1570 "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
1571 let pred = match f.as_str() {
1572 "embeddings_valid" => {
1573 format!("all(bool(e) for e in {result_var})")
1574 }
1575 "embeddings_finite" => {
1576 format!("all(v == v and abs(v) != float('inf') for e in {result_var} for v in e)")
1577 }
1578 "embeddings_non_zero" => {
1579 format!("all(any(v != 0.0 for v in e) for e in {result_var})")
1580 }
1581 "embeddings_normalized" => {
1582 format!("all(abs(sum(v * v for v in e) - 1.0) < 1e-3 for e in {result_var})")
1583 }
1584 _ => unreachable!(),
1585 };
1586 match assertion.assertion_type.as_str() {
1587 "is_true" => {
1588 let _ = writeln!(out, " assert {pred} # noqa: S101");
1589 }
1590 "is_false" => {
1591 let _ = writeln!(out, " assert not ({pred}) # noqa: S101");
1592 }
1593 _ => {
1594 let _ = writeln!(
1595 out,
1596 " # skipped: unsupported assertion type on synthetic field '{f}'"
1597 );
1598 }
1599 }
1600 return;
1601 }
1602 "keywords" | "keywords_count" => {
1605 let _ = writeln!(
1606 out,
1607 " # skipped: field '{f}' not available on Python ExtractionResult"
1608 );
1609 return;
1610 }
1611 _ => {}
1612 }
1613 }
1614
1615 if !result_is_simple {
1617 if let Some(f) = &assertion.field {
1618 if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
1619 let _ = writeln!(out, " # skipped: field '{f}' not available on result type");
1620 return;
1621 }
1622 }
1623 }
1624
1625 let field_access = if result_is_simple {
1628 result_var.to_string()
1629 } else {
1630 match &assertion.field {
1631 Some(f) if !f.is_empty() => field_resolver.accessor(f, "python", result_var),
1632 _ => result_var.to_string(),
1633 }
1634 };
1635
1636 let field_is_enum = assertion.field.as_deref().is_some_and(|f| {
1647 if fields_enum.contains(f) {
1648 return true;
1649 }
1650 let resolved = field_resolver.resolve(f);
1651 if fields_enum.contains(resolved) {
1652 return true;
1653 }
1654 field_resolver.accessor(f, "python", result_var).contains("[0]")
1659 });
1660
1661 let field_is_optional = match &assertion.field {
1664 Some(f) if !f.is_empty() => {
1665 let resolved = field_resolver.resolve(f);
1666 field_resolver.is_optional(resolved)
1667 }
1668 _ => false,
1669 };
1670
1671 match assertion.assertion_type.as_str() {
1672 "error" | "not_error" => {
1673 }
1675 "equals" => {
1676 if let Some(val) = &assertion.value {
1677 let expected = value_to_python_string(val);
1678 let op = if val.is_boolean() || val.is_null() { "is" } else { "==" };
1680 if val.is_string() {
1683 let _ = writeln!(out, " assert {field_access}.strip() {op} {expected} # noqa: S101");
1684 } else {
1685 let _ = writeln!(out, " assert {field_access} {op} {expected} # noqa: S101");
1686 }
1687 }
1688 }
1689 "contains" => {
1690 if let Some(val) = &assertion.value {
1691 let expected = value_to_python_string(val);
1692 let cmp_expr = if field_is_enum && val.is_string() {
1694 format!("str({field_access}).lower()")
1695 } else {
1696 field_access.clone()
1697 };
1698 if field_is_optional {
1699 let _ = writeln!(out, " assert {field_access} is not None # noqa: S101");
1700 let _ = writeln!(out, " assert {expected} in {cmp_expr} # noqa: S101");
1701 } else {
1702 let _ = writeln!(out, " assert {expected} in {cmp_expr} # noqa: S101");
1703 }
1704 }
1705 }
1706 "contains_all" => {
1707 if let Some(values) = &assertion.values {
1708 for val in values {
1709 let expected = value_to_python_string(val);
1710 let cmp_expr = if field_is_enum && val.is_string() {
1712 format!("str({field_access}).lower()")
1713 } else {
1714 field_access.clone()
1715 };
1716 if field_is_optional {
1717 let _ = writeln!(out, " assert {field_access} is not None # noqa: S101");
1718 let _ = writeln!(out, " assert {expected} in {cmp_expr} # noqa: S101");
1719 } else {
1720 let _ = writeln!(out, " assert {expected} in {cmp_expr} # noqa: S101");
1721 }
1722 }
1723 }
1724 }
1725 "not_contains" => {
1726 if let Some(val) = &assertion.value {
1727 let expected = value_to_python_string(val);
1728 let cmp_expr = if field_is_enum && val.is_string() {
1730 format!("str({field_access}).lower()")
1731 } else {
1732 field_access.clone()
1733 };
1734 if field_is_optional {
1735 let _ = writeln!(
1736 out,
1737 " assert {field_access} is None or {expected} not in {cmp_expr} # noqa: S101"
1738 );
1739 } else {
1740 let _ = writeln!(out, " assert {expected} not in {cmp_expr} # noqa: S101");
1741 }
1742 }
1743 }
1744 "not_empty" => {
1745 let _ = writeln!(out, " assert {field_access} # noqa: S101");
1746 }
1747 "is_empty" => {
1748 let _ = writeln!(out, " assert not {field_access} # noqa: S101");
1749 }
1750 "contains_any" => {
1751 if let Some(values) = &assertion.values {
1752 let items: Vec<String> = values.iter().map(value_to_python_string).collect();
1753 let list_str = items.join(", ");
1754 let cmp_expr = if field_is_enum {
1756 format!("str({field_access}).lower()")
1757 } else {
1758 field_access.clone()
1759 };
1760 if field_is_optional {
1761 let _ = writeln!(out, " assert {field_access} is not None # noqa: S101");
1762 let _ = writeln!(
1763 out,
1764 " assert any(v in {cmp_expr} for v in [{list_str}]) # noqa: S101"
1765 );
1766 } else {
1767 let _ = writeln!(
1768 out,
1769 " assert any(v in {cmp_expr} for v in [{list_str}]) # noqa: S101"
1770 );
1771 }
1772 }
1773 }
1774 "greater_than" => {
1775 if let Some(val) = &assertion.value {
1776 let expected = value_to_python_string(val);
1777 let _ = writeln!(out, " assert {field_access} > {expected} # noqa: S101");
1778 }
1779 }
1780 "less_than" => {
1781 if let Some(val) = &assertion.value {
1782 let expected = value_to_python_string(val);
1783 let _ = writeln!(out, " assert {field_access} < {expected} # noqa: S101");
1784 }
1785 }
1786 "greater_than_or_equal" | "min" => {
1787 if let Some(val) = &assertion.value {
1788 let expected = value_to_python_string(val);
1789 let _ = writeln!(out, " assert {field_access} >= {expected} # noqa: S101");
1790 }
1791 }
1792 "less_than_or_equal" | "max" => {
1793 if let Some(val) = &assertion.value {
1794 let expected = value_to_python_string(val);
1795 let _ = writeln!(out, " assert {field_access} <= {expected} # noqa: S101");
1796 }
1797 }
1798 "starts_with" => {
1799 if let Some(val) = &assertion.value {
1800 let expected = value_to_python_string(val);
1801 let _ = writeln!(out, " assert {field_access}.startswith({expected}) # noqa: S101");
1802 }
1803 }
1804 "ends_with" => {
1805 if let Some(val) = &assertion.value {
1806 let expected = value_to_python_string(val);
1807 let _ = writeln!(out, " assert {field_access}.endswith({expected}) # noqa: S101");
1808 }
1809 }
1810 "min_length" => {
1811 if let Some(val) = &assertion.value {
1812 if let Some(n) = val.as_u64() {
1813 let _ = writeln!(out, " assert len({field_access}) >= {n} # noqa: S101");
1814 }
1815 }
1816 }
1817 "max_length" => {
1818 if let Some(val) = &assertion.value {
1819 if let Some(n) = val.as_u64() {
1820 let _ = writeln!(out, " assert len({field_access}) <= {n} # noqa: S101");
1821 }
1822 }
1823 }
1824 "count_min" => {
1825 if let Some(val) = &assertion.value {
1826 if let Some(n) = val.as_u64() {
1827 let _ = writeln!(out, " assert len({field_access}) >= {n} # noqa: S101");
1828 }
1829 }
1830 }
1831 "count_equals" => {
1832 if let Some(val) = &assertion.value {
1833 if let Some(n) = val.as_u64() {
1834 let _ = writeln!(out, " assert len({field_access}) == {n} # noqa: S101");
1835 }
1836 }
1837 }
1838 "is_true" => {
1839 let _ = writeln!(out, " assert {field_access} is True # noqa: S101");
1840 }
1841 "is_false" => {
1842 let _ = writeln!(out, " assert not {field_access} # noqa: S101");
1843 }
1844 "method_result" => {
1845 if let Some(method_name) = &assertion.method {
1846 let call_expr = build_python_method_call(result_var, method_name, assertion.args.as_ref());
1847 let check = assertion.check.as_deref().unwrap_or("is_true");
1848 match check {
1849 "equals" => {
1850 if let Some(val) = &assertion.value {
1851 if val.is_boolean() {
1852 if val.as_bool() == Some(true) {
1853 let _ = writeln!(out, " assert {call_expr} is True # noqa: S101");
1854 } else {
1855 let _ = writeln!(out, " assert {call_expr} is False # noqa: S101");
1856 }
1857 } else {
1858 let expected = value_to_python_string(val);
1859 let _ = writeln!(out, " assert {call_expr} == {expected} # noqa: S101");
1860 }
1861 }
1862 }
1863 "is_true" => {
1864 let _ = writeln!(out, " assert {call_expr} # noqa: S101");
1865 }
1866 "is_false" => {
1867 let _ = writeln!(out, " assert not {call_expr} # noqa: S101");
1868 }
1869 "greater_than_or_equal" => {
1870 if let Some(val) = &assertion.value {
1871 let n = val.as_u64().unwrap_or(0);
1872 let _ = writeln!(out, " assert {call_expr} >= {n} # noqa: S101");
1873 }
1874 }
1875 "count_min" => {
1876 if let Some(val) = &assertion.value {
1877 let n = val.as_u64().unwrap_or(0);
1878 let _ = writeln!(out, " assert len({call_expr}) >= {n} # noqa: S101");
1879 }
1880 }
1881 "contains" => {
1882 if let Some(val) = &assertion.value {
1883 let expected = value_to_python_string(val);
1884 let _ = writeln!(out, " assert {expected} in {call_expr} # noqa: S101");
1885 }
1886 }
1887 "is_error" => {
1888 let _ = writeln!(out, " with pytest.raises(Exception): # noqa: B017");
1889 let _ = writeln!(out, " {call_expr}");
1890 }
1891 other_check => {
1892 panic!("unsupported method_result check type: {other_check}");
1893 }
1894 }
1895 } else {
1896 panic!("method_result assertion missing 'method' field");
1897 }
1898 }
1899 "matches_regex" => {
1900 if let Some(val) = &assertion.value {
1901 let expected = value_to_python_string(val);
1902 let _ = writeln!(out, " import re # noqa: PLC0415");
1903 let _ = writeln!(
1904 out,
1905 " assert re.search({expected}, {field_access}) is not None # noqa: S101"
1906 );
1907 }
1908 }
1909 other => {
1910 panic!("unsupported assertion type: {other}");
1911 }
1912 }
1913}
1914
1915fn build_python_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
1918 match method_name {
1919 "root_child_count" => format!("{result_var}.root_node().child_count()"),
1920 "root_node_type" => format!("{result_var}.root_node().kind()"),
1921 "named_children_count" => format!("{result_var}.root_node().named_child_count()"),
1922 "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
1923 "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
1924 "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
1925 "contains_node_type" => {
1926 let node_type = args
1927 .and_then(|a| a.get("node_type"))
1928 .and_then(|v| v.as_str())
1929 .unwrap_or("");
1930 format!("tree_contains_node_type({result_var}, \"{node_type}\")")
1931 }
1932 "find_nodes_by_type" => {
1933 let node_type = args
1934 .and_then(|a| a.get("node_type"))
1935 .and_then(|v| v.as_str())
1936 .unwrap_or("");
1937 format!("find_nodes_by_type({result_var}, \"{node_type}\")")
1938 }
1939 "run_query" => {
1940 let query_source = args
1941 .and_then(|a| a.get("query_source"))
1942 .and_then(|v| v.as_str())
1943 .unwrap_or("");
1944 let language = args
1945 .and_then(|a| a.get("language"))
1946 .and_then(|v| v.as_str())
1947 .unwrap_or("");
1948 format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
1949 }
1950 _ => {
1951 if let Some(args_val) = args {
1952 let arg_str = args_val
1953 .as_object()
1954 .map(|obj| {
1955 obj.iter()
1956 .map(|(k, v)| format!("{}={}", k, value_to_python_string(v)))
1957 .collect::<Vec<_>>()
1958 .join(", ")
1959 })
1960 .unwrap_or_default();
1961 format!("{result_var}.{method_name}({arg_str})")
1962 } else {
1963 format!("{result_var}.{method_name}()")
1964 }
1965 }
1966 }
1967}
1968
1969fn python_method_helper_import(method_name: &str) -> Option<String> {
1972 match method_name {
1973 "has_error_nodes" => Some("tree_has_error_nodes".to_string()),
1974 "error_count" | "tree_error_count" => Some("tree_error_count".to_string()),
1975 "tree_to_sexp" => Some("tree_to_sexp".to_string()),
1976 "contains_node_type" => Some("tree_contains_node_type".to_string()),
1977 "find_nodes_by_type" => Some("find_nodes_by_type".to_string()),
1978 "run_query" => Some("run_query".to_string()),
1979 _ => None,
1981 }
1982}
1983
1984fn value_to_python_string(value: &serde_json::Value) -> String {
1985 match value {
1986 serde_json::Value::String(s) => python_string_literal(s),
1987 serde_json::Value::Bool(true) => "True".to_string(),
1988 serde_json::Value::Bool(false) => "False".to_string(),
1989 serde_json::Value::Number(n) => n.to_string(),
1990 serde_json::Value::Null => "None".to_string(),
1991 other => python_string_literal(&other.to_string()),
1992 }
1993}
1994
1995fn python_string_literal(s: &str) -> String {
1998 if s.contains('"') && !s.contains('\'') {
1999 let escaped = s
2001 .replace('\\', "\\\\")
2002 .replace('\'', "\\'")
2003 .replace('\n', "\\n")
2004 .replace('\r', "\\r")
2005 .replace('\t', "\\t");
2006 format!("'{escaped}'")
2007 } else {
2008 format!("\"{}\"", escape_python(s))
2009 }
2010}
2011
2012fn emit_python_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
2014 let params = match method_name {
2015 "visit_link" => "self, ctx, href, text, title",
2016 "visit_image" => "self, ctx, src, alt, title",
2017 "visit_heading" => "self, ctx, level, text, id",
2018 "visit_code_block" => "self, ctx, lang, code",
2019 "visit_code_inline"
2020 | "visit_strong"
2021 | "visit_emphasis"
2022 | "visit_strikethrough"
2023 | "visit_underline"
2024 | "visit_subscript"
2025 | "visit_superscript"
2026 | "visit_mark"
2027 | "visit_button"
2028 | "visit_summary"
2029 | "visit_figcaption"
2030 | "visit_definition_term"
2031 | "visit_definition_description" => "self, ctx, text",
2032 "visit_text" => "self, ctx, text",
2033 "visit_list_item" => "self, ctx, ordered, marker, text",
2034 "visit_blockquote" => "self, ctx, content, depth",
2035 "visit_table_row" => "self, ctx, cells, is_header",
2036 "visit_custom_element" => "self, ctx, tag_name, html",
2037 "visit_form" => "self, ctx, action_url, method",
2038 "visit_input" => "self, ctx, input_type, name, value",
2039 "visit_audio" | "visit_video" | "visit_iframe" => "self, ctx, src",
2040 "visit_details" => "self, ctx, is_open",
2041 "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => {
2042 "self, ctx, output, *args"
2043 }
2044 "visit_list_start" => "self, ctx, ordered, *args",
2045 "visit_list_end" => "self, ctx, ordered, output, *args",
2046 _ => "self, ctx, *args",
2047 };
2048
2049 let _ = writeln!(
2050 out,
2051 " def {method_name}({params}): # noqa: A002, ANN001, ANN202, ARG002"
2052 );
2053 match action {
2054 CallbackAction::Skip => {
2055 let _ = writeln!(out, " return \"skip\"");
2056 }
2057 CallbackAction::Continue => {
2058 let _ = writeln!(out, " return \"continue\"");
2059 }
2060 CallbackAction::PreserveHtml => {
2061 let _ = writeln!(out, " return \"preserve_html\"");
2062 }
2063 CallbackAction::Custom { output } => {
2064 let escaped = escape_python(output);
2065 let _ = writeln!(out, " return {{\"custom\": \"{escaped}\"}}");
2066 }
2067 CallbackAction::CustomTemplate { template } => {
2068 let escaped_template = template
2073 .replace('\\', "\\\\")
2074 .replace('\'', "\\'")
2075 .replace('\n', "\\n")
2076 .replace('\r', "\\r")
2077 .replace('\t', "\\t");
2078 let _ = writeln!(out, " return {{\"custom\": f'{escaped_template}'}}");
2079 }
2080 }
2081}