use crate::config::E2eConfig;
use crate::escape::{escape_php, sanitize_filename};
use crate::field_access::FieldResolver;
use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, HttpFixture, ValidationErrorExpectation};
use alef_backend_php::naming::php_autoload_namespace;
use alef_core::backend::GeneratedFile;
use alef_core::config::ResolvedCrateConfig;
use alef_core::hash::{self, CommentStyle};
use alef_core::template_versions as tv;
use anyhow::Result;
use heck::{ToLowerCamelCase, ToSnakeCase, ToUpperCamelCase};
use std::collections::HashMap;
use std::fmt::Write as FmtWrite;
use std::path::PathBuf;
use super::E2eCodegen;
use super::client;
pub struct PhpCodegen;
impl E2eCodegen for PhpCodegen {
fn generate(
&self,
groups: &[FixtureGroup],
e2e_config: &E2eConfig,
config: &ResolvedCrateConfig,
) -> Result<Vec<GeneratedFile>> {
let lang = self.language_name();
let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
let mut files = Vec::new();
let call = &e2e_config.call;
let overrides = call.overrides.get(lang);
let extension_name = config.php_extension_name();
let class_name = overrides
.and_then(|o| o.class.as_ref())
.cloned()
.map(|cn| cn.split('\\').next_back().unwrap_or(&cn).to_string())
.unwrap_or_else(|| extension_name.to_upper_camel_case());
let namespace = overrides.and_then(|o| o.module.as_ref()).cloned().unwrap_or_else(|| {
if extension_name.contains('_') {
extension_name
.split('_')
.map(|p| p.to_upper_camel_case())
.collect::<Vec<_>>()
.join("\\")
} else {
extension_name.to_upper_camel_case()
}
});
let empty_enum_fields = HashMap::new();
let enum_fields = overrides.map(|o| &o.enum_fields).unwrap_or(&empty_enum_fields);
let result_is_simple = overrides.is_some_and(|o| o.result_is_simple);
let php_client_factory = overrides.and_then(|o| o.php_client_factory.as_deref());
let options_via = overrides.and_then(|o| o.options_via.as_deref()).unwrap_or("array");
let php_pkg = e2e_config.resolve_package("php");
let pkg_name = php_pkg
.as_ref()
.and_then(|p| p.name.as_ref())
.cloned()
.unwrap_or_else(|| {
let org = config
.try_github_repo()
.ok()
.as_deref()
.and_then(alef_core::config::derive_repo_org)
.unwrap_or_else(|| config.name.clone());
format!("{org}/{}", call.module.replace('_', "-"))
});
let pkg_path = php_pkg
.as_ref()
.and_then(|p| p.path.as_ref())
.cloned()
.unwrap_or_else(|| "../../packages/php".to_string());
let pkg_version = php_pkg
.as_ref()
.and_then(|p| p.version.as_ref())
.cloned()
.or_else(|| config.resolved_version())
.unwrap_or_else(|| "0.1.0".to_string());
let e2e_vendor = pkg_name.split('/').next().unwrap_or(&pkg_name).to_string();
let e2e_pkg_name = format!("{e2e_vendor}/e2e-php");
let php_namespace_escaped = php_autoload_namespace(config).replace('\\', "\\\\");
let e2e_autoload_ns = format!("{php_namespace_escaped}\\\\E2e\\\\");
files.push(GeneratedFile {
path: output_base.join("composer.json"),
content: render_composer_json(
&e2e_pkg_name,
&e2e_autoload_ns,
&pkg_name,
&pkg_path,
&pkg_version,
e2e_config.dep_mode,
),
generated_header: false,
});
files.push(GeneratedFile {
path: output_base.join("phpunit.xml"),
content: render_phpunit_xml(),
generated_header: false,
});
let has_http_fixtures = groups
.iter()
.flat_map(|g| g.fixtures.iter())
.any(|f| f.needs_mock_server());
let has_file_fixtures = groups.iter().flat_map(|g| g.fixtures.iter()).any(|f| {
let cc = e2e_config.resolve_call(f.call.as_deref());
cc.args
.iter()
.any(|a| a.arg_type == "file_path" || a.arg_type == "bytes")
});
files.push(GeneratedFile {
path: output_base.join("bootstrap.php"),
content: render_bootstrap(&pkg_path, has_http_fixtures, has_file_fixtures),
generated_header: true,
});
files.push(GeneratedFile {
path: output_base.join("run_tests.php"),
content: render_run_tests_php(&extension_name, config.php_cargo_crate_name()),
generated_header: true,
});
let tests_base = output_base.join("tests");
let field_resolver = FieldResolver::new(
&e2e_config.fields,
&e2e_config.fields_optional,
&e2e_config.result_fields,
&e2e_config.fields_array,
&std::collections::HashSet::new(),
);
for group in groups {
let active: Vec<&Fixture> = group
.fixtures
.iter()
.filter(|f| super::should_include_fixture(f, lang, e2e_config))
.collect();
if active.is_empty() {
continue;
}
let test_class = format!("{}Test", sanitize_filename(&group.category).to_upper_camel_case());
let filename = format!("{test_class}.php");
let content = render_test_file(
&group.category,
&active,
e2e_config,
lang,
&namespace,
&class_name,
&test_class,
&field_resolver,
enum_fields,
result_is_simple,
php_client_factory,
options_via,
);
files.push(GeneratedFile {
path: tests_base.join(filename),
content,
generated_header: true,
});
}
Ok(files)
}
fn language_name(&self) -> &'static str {
"php"
}
}
fn render_composer_json(
e2e_pkg_name: &str,
e2e_autoload_ns: &str,
pkg_name: &str,
pkg_path: &str,
pkg_version: &str,
dep_mode: crate::config::DependencyMode,
) -> String {
let (require_section, autoload_section) = match dep_mode {
crate::config::DependencyMode::Registry => {
let require = format!(
r#" "require": {{
"{pkg_name}": "{pkg_version}"
}},
"require-dev": {{
"phpunit/phpunit": "{phpunit}",
"guzzlehttp/guzzle": "{guzzle}"
}},"#,
phpunit = tv::packagist::PHPUNIT,
guzzle = tv::packagist::GUZZLE,
);
(require, String::new())
}
crate::config::DependencyMode::Local => {
let require = format!(
r#" "require-dev": {{
"phpunit/phpunit": "{phpunit}",
"guzzlehttp/guzzle": "{guzzle}"
}},"#,
phpunit = tv::packagist::PHPUNIT,
guzzle = tv::packagist::GUZZLE,
);
let pkg_namespace = pkg_name
.split('/')
.nth(1)
.unwrap_or(pkg_name)
.split('-')
.map(heck::ToUpperCamelCase::to_upper_camel_case)
.collect::<Vec<_>>()
.join("\\");
let autoload = format!(
r#"
"autoload": {{
"psr-4": {{
"{}\\": "{}/src/"
}}
}},"#,
pkg_namespace.replace('\\', "\\\\"),
pkg_path
);
(require, autoload)
}
};
crate::template_env::render(
"php/composer.json.jinja",
minijinja::context! {
e2e_pkg_name => e2e_pkg_name,
e2e_autoload_ns => e2e_autoload_ns,
require_section => require_section,
autoload_section => autoload_section,
},
)
}
fn render_phpunit_xml() -> String {
crate::template_env::render("php/phpunit.xml.jinja", minijinja::context! {})
}
fn render_bootstrap(pkg_path: &str, has_http_fixtures: bool, has_file_fixtures: bool) -> String {
let header = hash::header(CommentStyle::DoubleSlash);
crate::template_env::render(
"php/bootstrap.php.jinja",
minijinja::context! {
header => header,
pkg_path => pkg_path,
has_http_fixtures => has_http_fixtures,
has_file_fixtures => has_file_fixtures,
},
)
}
fn render_run_tests_php(extension_name: &str, cargo_crate_name: Option<&str>) -> String {
let header = hash::header(CommentStyle::DoubleSlash);
let ext_lib_name = if let Some(crate_name) = cargo_crate_name {
format!("lib{}", crate_name.replace('-', "_"))
} else {
format!("lib{extension_name}_php")
};
format!(
r#"#!/usr/bin/env php
<?php
{header}
declare(strict_types=1);
// Determine platform-specific extension suffix.
$extSuffix = match (PHP_OS_FAMILY) {{
'Darwin' => '.dylib',
default => '.so',
}};
$extPath = __DIR__ . '/../../target/release/{ext_lib_name}' . $extSuffix;
// If the locally-built extension exists and we have not already restarted with it,
// re-exec PHP with no system ini (-n) to avoid conflicts with any system-installed
// version of the extension, then load the local build explicitly.
if (file_exists($extPath) && !getenv('ALEF_PHP_LOCAL_EXT_LOADED')) {{
putenv('ALEF_PHP_LOCAL_EXT_LOADED=1');
$php = PHP_BINARY;
$phpunitPath = __DIR__ . '/vendor/bin/phpunit';
$cmd = array_merge(
[$php, '-n', '-d', 'extension=' . $extPath],
[$phpunitPath],
array_slice($GLOBALS['argv'], 1)
);
passthru(implode(' ', array_map('escapeshellarg', $cmd)), $exitCode);
exit($exitCode);
}}
// Extension is now loaded (via the restart above with -n flag).
// Invoke PHPUnit normally.
$phpunitPath = __DIR__ . '/vendor/bin/phpunit';
if (!file_exists($phpunitPath)) {{
echo "PHPUnit not found at $phpunitPath. Run 'composer install' first.\n";
exit(1);
}}
require $phpunitPath;
"#
)
}
#[allow(clippy::too_many_arguments)]
fn render_test_file(
category: &str,
fixtures: &[&Fixture],
e2e_config: &E2eConfig,
lang: &str,
namespace: &str,
class_name: &str,
test_class: &str,
field_resolver: &FieldResolver,
enum_fields: &HashMap<String, String>,
result_is_simple: bool,
php_client_factory: Option<&str>,
options_via: &str,
) -> String {
let header = hash::header(CommentStyle::DoubleSlash);
let needs_crawl_config_import = fixtures.iter().any(|f| {
let call = e2e_config.resolve_call(f.call.as_deref());
call.args.iter().filter(|a| a.arg_type == "handle").any(|a| {
let v = f.input.get(&a.field).unwrap_or(&serde_json::Value::Null);
!(v.is_null() || v.is_object() && v.as_object().is_some_and(|o| o.is_empty()))
})
});
let has_http_tests = fixtures.iter().any(|f| f.is_http_test());
let mut options_type_imports: Vec<String> = fixtures
.iter()
.flat_map(|f| {
let call = e2e_config.resolve_call(f.call.as_deref());
let php_override = call.overrides.get(lang);
let opt_type = php_override.and_then(|o| o.options_type.as_deref()).or_else(|| {
e2e_config
.call
.overrides
.get(lang)
.and_then(|o| o.options_type.as_deref())
});
let element_types: Vec<String> = call
.args
.iter()
.filter_map(|a| a.element_type.as_ref().map(|t| t.to_string()))
.filter(|t| !is_php_reserved_type(t))
.collect();
opt_type.map(|t| t.to_string()).into_iter().chain(element_types)
})
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
options_type_imports.sort();
let mut imports_use: Vec<String> = Vec::new();
if needs_crawl_config_import {
imports_use.push(format!("use {namespace}\\CrawlConfig;"));
}
for type_name in &options_type_imports {
if type_name != class_name {
imports_use.push(format!("use {namespace}\\{type_name};"));
}
}
let mut fixtures_body = String::new();
for (i, fixture) in fixtures.iter().enumerate() {
if fixture.is_http_test() {
render_http_test_method(&mut fixtures_body, fixture, fixture.http.as_ref().unwrap());
} else {
render_test_method(
&mut fixtures_body,
fixture,
e2e_config,
lang,
namespace,
class_name,
field_resolver,
enum_fields,
result_is_simple,
php_client_factory,
options_via,
);
}
if i + 1 < fixtures.len() {
fixtures_body.push('\n');
}
}
crate::template_env::render(
"php/test_file.jinja",
minijinja::context! {
header => header,
namespace => namespace,
class_name => class_name,
test_class => test_class,
category => category,
imports_use => imports_use,
has_http_tests => has_http_tests,
fixtures_body => fixtures_body,
},
)
}
struct PhpTestClientRenderer;
impl client::TestClientRenderer for PhpTestClientRenderer {
fn language_name(&self) -> &'static str {
"php"
}
fn sanitize_test_name(&self, id: &str) -> String {
sanitize_filename(id)
}
fn render_test_open(&self, out: &mut String, fn_name: &str, description: &str, skip_reason: Option<&str>) {
let escaped_reason = skip_reason.map(escape_php);
let rendered = crate::template_env::render(
"php/http_test_open.jinja",
minijinja::context! {
fn_name => fn_name,
description => description,
skip_reason => escaped_reason,
},
);
out.push_str(&rendered);
}
fn render_test_close(&self, out: &mut String) {
let rendered = crate::template_env::render("php/http_test_close.jinja", minijinja::context! {});
out.push_str(&rendered);
}
fn render_call(&self, out: &mut String, ctx: &client::CallCtx<'_>) {
let method = ctx.method.to_uppercase();
let mut opts: Vec<String> = Vec::new();
if let Some(body) = ctx.body {
let php_body = json_to_php(body);
opts.push(format!("'json' => {php_body}"));
}
let mut header_pairs: Vec<String> = Vec::new();
if let Some(ct) = ctx.content_type {
if !ctx.headers.keys().any(|k| k.to_lowercase() == "content-type") {
header_pairs.push(format!("\"Content-Type\" => \"{}\"", escape_php(ct)));
}
}
for (k, v) in ctx.headers {
header_pairs.push(format!("\"{}\" => \"{}\"", escape_php(k), escape_php(v)));
}
if !header_pairs.is_empty() {
opts.push(format!("'headers' => [{}]", header_pairs.join(", ")));
}
if !ctx.cookies.is_empty() {
let cookie_str = ctx
.cookies
.iter()
.map(|(k, v)| format!("{}={}", k, v))
.collect::<Vec<_>>()
.join("; ");
opts.push(format!("'headers' => ['Cookie' => \"{}\"]", escape_php(&cookie_str)));
}
if !ctx.query_params.is_empty() {
let pairs: Vec<String> = ctx
.query_params
.iter()
.map(|(k, v)| {
let val_str = match v {
serde_json::Value::String(s) => s.clone(),
other => other.to_string(),
};
format!("\"{}\" => \"{}\"", escape_php(k), escape_php(&val_str))
})
.collect();
opts.push(format!("'query' => [{}]", pairs.join(", ")));
}
let path_lit = format!("\"{}\"", escape_php(ctx.path));
let rendered = crate::template_env::render(
"php/http_request.jinja",
minijinja::context! {
method => method,
path => path_lit,
opts => opts,
response_var => ctx.response_var,
},
);
out.push_str(&rendered);
}
fn render_assert_status(&self, out: &mut String, _response_var: &str, status: u16) {
let rendered = crate::template_env::render(
"php/http_assertions.jinja",
minijinja::context! {
response_var => "",
status_code => status,
headers => Vec::<std::collections::HashMap<&str, String>>::new(),
body_assertion => String::new(),
partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
},
);
out.push_str(&rendered);
}
fn render_assert_header(&self, out: &mut String, _response_var: &str, name: &str, expected: &str) {
let header_key = name.to_lowercase();
let header_key_lit = format!("\"{}\"", escape_php(&header_key));
let assertion_code = match expected {
"<<present>>" => {
format!("$this->assertTrue($response->hasHeader({header_key_lit}));")
}
"<<absent>>" => {
format!("$this->assertFalse($response->hasHeader({header_key_lit}));")
}
"<<uuid>>" => {
format!(
"$this->assertMatchesRegularExpression('/^[0-9a-f]{{8}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{12}}$/i', $response->getHeaderLine({header_key_lit}));"
)
}
literal => {
let val_lit = format!("\"{}\"", escape_php(literal));
format!("$this->assertEquals({val_lit}, $response->getHeaderLine({header_key_lit}));")
}
};
let mut headers = vec![std::collections::HashMap::new()];
headers[0].insert("assertion_code", assertion_code);
let rendered = crate::template_env::render(
"php/http_assertions.jinja",
minijinja::context! {
response_var => "",
status_code => 0u16,
headers => headers,
body_assertion => String::new(),
partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
},
);
out.push_str(&rendered);
}
fn render_assert_json_body(&self, out: &mut String, _response_var: &str, expected: &serde_json::Value) {
let body_assertion = match expected {
serde_json::Value::String(s) if !s.is_empty() => {
let php_val = format!("\"{}\"", escape_php(s));
format!("$this->assertEquals({php_val}, (string) $response->getBody());")
}
_ => {
let php_val = json_to_php(expected);
format!(
"$body = json_decode((string) $response->getBody(), true, 512, JSON_THROW_ON_ERROR);\n $this->assertEquals({php_val}, $body);"
)
}
};
let rendered = crate::template_env::render(
"php/http_assertions.jinja",
minijinja::context! {
response_var => "",
status_code => 0u16,
headers => Vec::<std::collections::HashMap<&str, String>>::new(),
body_assertion => body_assertion,
partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
},
);
out.push_str(&rendered);
}
fn render_assert_partial_body(&self, out: &mut String, _response_var: &str, expected: &serde_json::Value) {
if let Some(obj) = expected.as_object() {
let mut partial_body: Vec<std::collections::HashMap<&str, String>> = Vec::new();
for (key, val) in obj {
let php_key = format!("\"{}\"", escape_php(key));
let php_val = json_to_php(val);
let assertion_code = format!("$this->assertEquals({php_val}, $body[{php_key}]);");
let mut entry = std::collections::HashMap::new();
entry.insert("assertion_code", assertion_code);
partial_body.push(entry);
}
let rendered = crate::template_env::render(
"php/http_assertions.jinja",
minijinja::context! {
response_var => "",
status_code => 0u16,
headers => Vec::<std::collections::HashMap<&str, String>>::new(),
body_assertion => String::new(),
partial_body => partial_body,
validation_errors => Vec::<std::collections::HashMap<&str, String>>::new(),
},
);
out.push_str(&rendered);
}
}
fn render_assert_validation_errors(
&self,
out: &mut String,
_response_var: &str,
errors: &[ValidationErrorExpectation],
) {
let mut validation_errors: Vec<std::collections::HashMap<&str, String>> = Vec::new();
for err in errors {
let msg_lit = format!("\"{}\"", escape_php(&err.msg));
let assertion_code =
format!("$this->assertStringContainsString({msg_lit}, json_encode($body, JSON_UNESCAPED_SLASHES));");
let mut entry = std::collections::HashMap::new();
entry.insert("assertion_code", assertion_code);
validation_errors.push(entry);
}
let rendered = crate::template_env::render(
"php/http_assertions.jinja",
minijinja::context! {
response_var => "",
status_code => 0u16,
headers => Vec::<std::collections::HashMap<&str, String>>::new(),
body_assertion => String::new(),
partial_body => Vec::<std::collections::HashMap<&str, String>>::new(),
validation_errors => validation_errors,
},
);
out.push_str(&rendered);
}
}
fn render_http_test_method(out: &mut String, fixture: &Fixture, http: &HttpFixture) {
if http.expected_response.status_code == 101 {
let method_name = sanitize_filename(&fixture.id);
let description = &fixture.description;
out.push_str(&crate::template_env::render(
"php/http_test_skip_101.jinja",
minijinja::context! {
method_name => method_name,
description => description,
},
));
return;
}
client::http_call::render_http_test(out, &PhpTestClientRenderer, fixture);
}
#[allow(clippy::too_many_arguments)]
fn render_test_method(
out: &mut String,
fixture: &Fixture,
e2e_config: &E2eConfig,
lang: &str,
namespace: &str,
class_name: &str,
field_resolver: &FieldResolver,
enum_fields: &HashMap<String, String>,
result_is_simple: bool,
php_client_factory: Option<&str>,
options_via: &str,
) {
let call_config = e2e_config.resolve_call(fixture.call.as_deref());
let call_overrides = call_config.overrides.get(lang);
let has_override = call_overrides.is_some_and(|o| o.function.is_some());
let result_is_simple = call_overrides.is_some_and(|o| o.result_is_simple) || result_is_simple;
let mut function_name = call_overrides
.and_then(|o| o.function.as_ref())
.cloned()
.unwrap_or_else(|| call_config.function.clone());
if !has_override && call_config.r#async && !function_name.ends_with("_async") {
function_name = format!("{function_name}_async");
}
if !has_override {
function_name = function_name.to_lower_camel_case();
}
let result_var = &call_config.result_var;
let args = &call_config.args;
let method_name = sanitize_filename(&fixture.id);
let description = &fixture.description;
let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
let call_options_type = call_overrides.and_then(|o| o.options_type.as_deref()).or_else(|| {
e2e_config
.call
.overrides
.get(lang)
.and_then(|o| o.options_type.as_deref())
});
let (mut setup_lines, args_str) = build_args_and_setup(
&fixture.input,
args,
class_name,
enum_fields,
&fixture.id,
options_via,
call_options_type,
);
let skip_test = call_config.skip_languages.iter().any(|l| l == "php");
if skip_test {
let rendered = crate::template_env::render(
"php/test_method.jinja",
minijinja::context! {
method_name => method_name,
description => description,
client_factory => String::new(),
setup_lines => Vec::<String>::new(),
expects_error => false,
skip_test => true,
has_usable_assertions => false,
call_expr => String::new(),
result_var => result_var,
assertions_body => String::new(),
},
);
out.push_str(&rendered);
return;
}
let mut options_already_created = !args_str.is_empty() && args_str == "$options";
if let Some(visitor_spec) = &fixture.visitor {
build_php_visitor(&mut setup_lines, visitor_spec);
if !options_already_created {
setup_lines.push("$builder = \\HtmlToMarkdown\\ConversionOptions::builder();".to_string());
setup_lines.push("$options = $builder->visitor($visitor)->build();".to_string());
options_already_created = true;
}
}
let final_args = if options_already_created {
if args_str.is_empty() || args_str == "$options" {
"$options".to_string()
} else {
format!("{args_str}, $options")
}
} else {
args_str
};
let call_expr = if php_client_factory.is_some() {
format!("$client->{function_name}({final_args})")
} else {
format!("{class_name}::{function_name}({final_args})")
};
let has_mock = fixture.mock_response.is_some() || fixture.http.is_some();
let api_key_var = fixture.env.as_ref().and_then(|e| e.api_key_var.as_deref());
let client_factory = if let Some(factory) = php_client_factory {
let fixture_id = &fixture.id;
if has_mock {
format!(
"$client = \\{namespace}\\{class_name}::{factory}('test-key', getenv('MOCK_SERVER_URL') . '/fixtures/{fixture_id}');"
)
} else if let Some(var) = api_key_var {
format!(
"$apiKey = getenv('{var}');\n if (!$apiKey) {{ $this->markTestSkipped('{var} not set'); return; }}\n $client = \\{namespace}\\{class_name}::{factory}($apiKey);"
)
} else {
format!("$client = \\{namespace}\\{class_name}::{factory}('test-key');")
}
} else {
String::new()
};
let has_usable_assertions = fixture.assertions.iter().any(|a| {
if a.assertion_type == "error" || a.assertion_type == "not_error" {
return false;
}
match &a.field {
Some(f) if !f.is_empty() => field_resolver.is_valid_for_result(f),
_ => true,
}
});
let mut assertions_body = String::new();
for assertion in &fixture.assertions {
render_assertion(
&mut assertions_body,
assertion,
result_var,
field_resolver,
result_is_simple,
call_config.result_is_array,
);
}
let rendered = crate::template_env::render(
"php/test_method.jinja",
minijinja::context! {
method_name => method_name,
description => description,
client_factory => client_factory,
setup_lines => setup_lines,
expects_error => expects_error,
skip_test => fixture.assertions.is_empty(),
has_usable_assertions => has_usable_assertions,
call_expr => call_expr,
result_var => result_var,
assertions_body => assertions_body,
},
);
out.push_str(&rendered);
}
fn emit_php_batch_item_array(arr: &serde_json::Value, elem_type: &str) -> String {
if let Some(items) = arr.as_array() {
let item_strs: Vec<String> = items
.iter()
.filter_map(|item| {
if let Some(obj) = item.as_object() {
match elem_type {
"BatchBytesItem" => {
let content = obj.get("content").and_then(|v| v.as_array());
let mime_type = obj.get("mime_type").and_then(|v| v.as_str()).unwrap_or("text/plain");
let content_code = if let Some(arr) = content {
let bytes: Vec<String> = arr
.iter()
.filter_map(|v| v.as_u64())
.map(|n| format!("\\x{:02x}", n))
.collect();
format!("\"{}\"", bytes.join(""))
} else {
"\"\"".to_string()
};
Some(format!(
"new {}(content: {}, mimeType: \"{}\")",
elem_type, content_code, mime_type
))
}
"BatchFileItem" => {
let path = obj.get("path").and_then(|v| v.as_str()).unwrap_or("");
Some(format!("new {}(path: \"{}\")", elem_type, path))
}
_ => None,
}
} else {
None
}
})
.collect();
format!("[{}]", item_strs.join(", "))
} else {
"[]".to_string()
}
}
fn build_args_and_setup(
input: &serde_json::Value,
args: &[crate::config::ArgMapping],
class_name: &str,
_enum_fields: &HashMap<String, String>,
fixture_id: &str,
options_via: &str,
options_type: Option<&str>,
) -> (Vec<String>, String) {
if args.is_empty() {
let is_empty_input = match input {
serde_json::Value::Null => true,
serde_json::Value::Object(m) => m.is_empty(),
_ => false,
};
if is_empty_input {
return (Vec::new(), String::new());
}
return (Vec::new(), json_to_php(input));
}
let mut setup_lines: Vec<String> = Vec::new();
let mut parts: Vec<String> = Vec::new();
let arg_has_emission = |arg: &crate::config::ArgMapping| -> bool {
let val = if arg.field == "input" {
Some(input)
} else {
let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
input.get(field)
};
match val {
None | Some(serde_json::Value::Null) => !arg.optional,
Some(_) => true,
}
};
let any_later_has_emission = |from_idx: usize| -> bool { args[from_idx..].iter().any(arg_has_emission) };
for (idx, arg) in args.iter().enumerate() {
if arg.arg_type == "mock_url" {
setup_lines.push(format!(
"${} = getenv('MOCK_SERVER_URL') . '/fixtures/{fixture_id}';",
arg.name,
));
parts.push(format!("${}", arg.name));
continue;
}
if arg.arg_type == "handle" {
let constructor_name = format!("create{}", arg.name.to_upper_camel_case());
let config_value = if arg.field == "input" {
input
} else {
let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
input.get(field).unwrap_or(&serde_json::Value::Null)
};
if config_value.is_null()
|| config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
{
setup_lines.push(format!("${} = {class_name}::{constructor_name}(null);", arg.name,));
} else {
let name = &arg.name;
let filtered_config = filter_empty_enum_strings(config_value);
setup_lines.push(format!(
"${name}_config = CrawlConfig::from_json(json_encode({}));",
json_to_php(&filtered_config)
));
setup_lines.push(format!(
"${} = {class_name}::{constructor_name}(${name}_config);",
arg.name,
));
}
parts.push(format!("${}", arg.name));
continue;
}
let val = if arg.field == "input" {
Some(input)
} else {
let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
input.get(field)
};
if arg.arg_type == "bytes" {
match val {
None | Some(serde_json::Value::Null) => {
if arg.optional {
parts.push("null".to_string());
} else {
parts.push("\"\"".to_string());
}
}
Some(serde_json::Value::String(s)) => {
let var_name = format!("{}Bytes", arg.name);
setup_lines.push(format!(
"${var_name} = file_get_contents(\"{path}\");\n if (${var_name} === false) {{ $this->fail(\"failed to read fixture: {path}\"); }}",
path = s.replace('"', "\\\"")
));
parts.push(format!("${var_name}"));
}
Some(serde_json::Value::Array(arr)) => {
let bytes: String = arr
.iter()
.filter_map(|v| v.as_u64())
.map(|n| format!("\\x{:02x}", n))
.collect();
parts.push(format!("\"{bytes}\""));
}
Some(other) => {
parts.push(json_to_php(other));
}
}
continue;
}
match val {
None | Some(serde_json::Value::Null) if arg.arg_type == "json_object" && arg.name == "config" => {
let type_name = if arg.name == "config" {
"ExtractionConfig".to_string()
} else {
format!("{}Config", arg.name.to_upper_camel_case())
};
parts.push(format!("{type_name}::from_json('{{}}')"));
continue;
}
None | Some(serde_json::Value::Null) if arg.optional => {
if any_later_has_emission(idx + 1) {
parts.push("null".to_string());
}
continue;
}
None | Some(serde_json::Value::Null) => {
let default_val = match arg.arg_type.as_str() {
"string" => "\"\"".to_string(),
"int" | "integer" => "0".to_string(),
"float" | "number" => "0.0".to_string(),
"bool" | "boolean" => "false".to_string(),
"json_object" if options_via == "json" => "null".to_string(),
_ => "null".to_string(),
};
parts.push(default_val);
}
Some(v) => {
if arg.arg_type == "json_object" && !v.is_null() {
if let Some(elem_type) = &arg.element_type {
if (elem_type == "BatchBytesItem" || elem_type == "BatchFileItem") && v.is_array() {
parts.push(emit_php_batch_item_array(v, elem_type));
continue;
}
if v.is_array() && is_php_reserved_type(elem_type) {
parts.push(json_to_php(v));
continue;
}
}
match options_via {
"json" => {
let filtered_v = filter_empty_enum_strings(v);
if let serde_json::Value::Object(obj) = &filtered_v {
if obj.is_empty() {
parts.push("null".to_string());
continue;
}
}
parts.push(format!("json_encode({})", json_to_php_camel_keys(&filtered_v)));
continue;
}
_ => {
if let Some(type_name) = options_type {
let filtered_v = filter_empty_enum_strings(v);
if let serde_json::Value::Object(obj) = &filtered_v {
if obj.is_empty() {
let arg_var = format!("${}", arg.name);
setup_lines.push(format!("{arg_var} = {type_name}::from_json('{{}}');"));
parts.push(arg_var);
continue;
}
}
let arg_var = format!("${}", arg.name);
setup_lines.push(format!(
"{arg_var} = {type_name}::from_json(json_encode({}));",
json_to_php(&filtered_v)
));
parts.push(arg_var);
continue;
}
if let Some(obj) = v.as_object() {
setup_lines.push("$builder = $this->createDefaultOptionsBuilder();".to_string());
for (k, vv) in obj {
let snake_key = k.to_snake_case();
if snake_key == "preprocessing" {
if let Some(prep_obj) = vv.as_object() {
let enabled =
prep_obj.get("enabled").and_then(|v| v.as_bool()).unwrap_or(true);
let preset =
prep_obj.get("preset").and_then(|v| v.as_str()).unwrap_or("Minimal");
let remove_navigation = prep_obj
.get("remove_navigation")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let remove_forms =
prep_obj.get("remove_forms").and_then(|v| v.as_bool()).unwrap_or(true);
setup_lines.push(format!(
"$preprocessing = $this->createPreprocessingOptions({}, {}, {}, {});",
if enabled { "true" } else { "false" },
json_to_php(&serde_json::Value::String(preset.to_string())),
if remove_navigation { "true" } else { "false" },
if remove_forms { "true" } else { "false" }
));
setup_lines.push(
"$builder = $builder->preprocessing($preprocessing);".to_string(),
);
}
}
}
setup_lines.push("$options = $builder->build();".to_string());
parts.push("$options".to_string());
continue;
}
}
}
}
parts.push(json_to_php(v));
}
}
}
(setup_lines, parts.join(", "))
}
fn render_assertion(
out: &mut String,
assertion: &Assertion,
result_var: &str,
field_resolver: &FieldResolver,
result_is_simple: bool,
result_is_array: bool,
) {
if let Some(f) = &assertion.field {
match f.as_str() {
"chunks_have_content" => {
let pred = format!(
"array_reduce(${result_var}->chunks ?? [], fn($carry, $c) => $carry && !empty($c->content), true)"
);
out.push_str(&crate::template_env::render(
"php/synthetic_assertion.jinja",
minijinja::context! {
assertion_kind => "chunks_content",
assertion_type => assertion.assertion_type.as_str(),
pred => pred,
field_name => f,
},
));
return;
}
"chunks_have_embeddings" => {
let pred = format!(
"array_reduce(${result_var}->chunks ?? [], fn($carry, $c) => $carry && !empty($c->embedding), true)"
);
out.push_str(&crate::template_env::render(
"php/synthetic_assertion.jinja",
minijinja::context! {
assertion_kind => "chunks_embeddings",
assertion_type => assertion.assertion_type.as_str(),
pred => pred,
field_name => f,
},
));
return;
}
"embeddings" => {
let php_val = assertion.value.as_ref().map(json_to_php).unwrap_or_default();
out.push_str(&crate::template_env::render(
"php/synthetic_assertion.jinja",
minijinja::context! {
assertion_kind => "embeddings",
assertion_type => assertion.assertion_type.as_str(),
php_val => php_val,
result_var => result_var,
},
));
return;
}
"embedding_dimensions" => {
let expr = format!("(empty(${result_var}) ? 0 : count(${result_var}[0]))");
let php_val = assertion.value.as_ref().map(json_to_php).unwrap_or_default();
out.push_str(&crate::template_env::render(
"php/synthetic_assertion.jinja",
minijinja::context! {
assertion_kind => "embedding_dimensions",
assertion_type => assertion.assertion_type.as_str(),
expr => expr,
php_val => php_val,
},
));
return;
}
"embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
let pred = match f.as_str() {
"embeddings_valid" => {
format!("array_reduce(${result_var}, fn($carry, $e) => $carry && count($e) > 0, true)")
}
"embeddings_finite" => {
format!(
"array_reduce(${result_var}, fn($carry, $e) => $carry && array_reduce($e, fn($c, $v) => $c && is_finite($v), true), true)"
)
}
"embeddings_non_zero" => {
format!(
"array_reduce(${result_var}, fn($carry, $e) => $carry && count(array_filter($e, fn($v) => $v !== 0.0)) > 0, true)"
)
}
"embeddings_normalized" => {
format!(
"array_reduce(${result_var}, fn($carry, $e) => $carry && abs(array_sum(array_map(fn($v) => $v * $v, $e)) - 1.0) < 1e-3, true)"
)
}
_ => unreachable!(),
};
let assertion_kind = format!("embeddings_{}", f.strip_prefix("embeddings_").unwrap_or(f));
out.push_str(&crate::template_env::render(
"php/synthetic_assertion.jinja",
minijinja::context! {
assertion_kind => assertion_kind,
assertion_type => assertion.assertion_type.as_str(),
pred => pred,
field_name => f,
},
));
return;
}
"keywords" | "keywords_count" => {
out.push_str(&crate::template_env::render(
"php/synthetic_assertion.jinja",
minijinja::context! {
assertion_kind => "keywords",
field_name => f,
},
));
return;
}
_ => {}
}
}
if let Some(f) = &assertion.field {
if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
out.push_str(&crate::template_env::render(
"php/synthetic_assertion.jinja",
minijinja::context! {
assertion_kind => "skipped",
field_name => f,
},
));
return;
}
}
if result_is_simple {
if let Some(f) = &assertion.field {
let f_lower = f.to_lowercase();
if !f.is_empty()
&& f_lower != "content"
&& (f_lower.starts_with("metadata")
|| f_lower.starts_with("document")
|| f_lower.starts_with("structure"))
{
out.push_str(&crate::template_env::render(
"php/synthetic_assertion.jinja",
minijinja::context! {
assertion_kind => "result_is_simple",
field_name => f,
},
));
return;
}
}
}
let field_expr = match &assertion.field {
_ if result_is_simple => format!("${result_var}"),
Some(f) if !f.is_empty() => field_resolver.accessor(f, "php", &format!("${result_var}")),
_ => format!("${result_var}"),
};
let field_is_array = assertion.field.as_ref().map_or(result_is_array, |f| {
if f.is_empty() {
result_is_array
} else {
field_resolver.is_array(f)
}
});
let trimmed_field_expr_for = |expected: &serde_json::Value| -> String {
if expected.is_string() {
format!("trim({})", field_expr)
} else {
field_expr.clone()
}
};
let assertion_type = assertion.assertion_type.as_str();
let has_php_val = assertion.value.is_some();
let php_val = match assertion.value.as_ref() {
Some(v) => json_to_php(v),
None if assertion_type == "equals" => "null".to_string(),
None => String::new(),
};
let trimmed_field_expr = trimmed_field_expr_for(assertion.value.as_ref().unwrap_or(&serde_json::Value::Null));
let is_string_val = assertion.value.as_ref().is_some_and(|v| v.is_string());
let values_php: Vec<String> = assertion
.values
.as_ref()
.map_or(Vec::new(), |vals| vals.iter().map(json_to_php).collect());
let contains_any_checks: Vec<String> = assertion
.values
.as_ref()
.map_or(Vec::new(), |vals| vals.iter().map(json_to_php).collect());
let n = assertion.value.as_ref().and_then(|v| v.as_u64()).unwrap_or(0);
let call_expr = if let Some(method_name) = &assertion.method {
build_php_method_call(result_var, method_name, assertion.args.as_ref())
} else {
String::new()
};
let check = assertion.check.as_deref().unwrap_or("is_true");
let has_php_check_val = matches!(assertion.assertion_type.as_str(), "method_result") && assertion.value.is_some();
let php_check_val = if matches!(assertion.assertion_type.as_str(), "method_result") {
assertion.value.as_ref().map(json_to_php).unwrap_or_default()
} else {
String::new()
};
let check_n = assertion.value.as_ref().and_then(|v| v.as_u64()).unwrap_or(0);
let is_bool_val = assertion.value.as_ref().is_some_and(|v| v.is_boolean());
let bool_is_true = assertion.value.as_ref().and_then(|v| v.as_bool()).unwrap_or(false);
if matches!(assertion_type, "not_error" | "error") {
if assertion_type == "not_error" {
}
return;
}
let rendered = crate::template_env::render(
"php/assertion.jinja",
minijinja::context! {
assertion_type => assertion_type,
field_expr => field_expr,
php_val => php_val,
has_php_val => has_php_val,
trimmed_field_expr => trimmed_field_expr,
is_string_val => is_string_val,
field_is_array => field_is_array,
values_php => values_php,
contains_any_checks => contains_any_checks,
n => n,
call_expr => call_expr,
check => check,
php_check_val => php_check_val,
has_php_check_val => has_php_check_val,
check_n => check_n,
is_bool_val => is_bool_val,
bool_is_true => bool_is_true,
},
);
let _ = write!(out, " {}", rendered);
}
fn build_php_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
let extra_args = if let Some(args_val) = args {
args_val
.as_object()
.map(|obj| {
obj.values()
.map(|v| match v {
serde_json::Value::String(s) => format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")),
serde_json::Value::Bool(true) => "true".to_string(),
serde_json::Value::Bool(false) => "false".to_string(),
serde_json::Value::Number(n) => n.to_string(),
serde_json::Value::Null => "null".to_string(),
other => format!("\"{}\"", other.to_string().replace('\\', "\\\\").replace('"', "\\\"")),
})
.collect::<Vec<_>>()
.join(", ")
})
.unwrap_or_default()
} else {
String::new()
};
if extra_args.is_empty() {
format!("${result_var}->{method_name}()")
} else {
format!("${result_var}->{method_name}({extra_args})")
}
}
fn filter_empty_enum_strings(value: &serde_json::Value) -> serde_json::Value {
match value {
serde_json::Value::Object(map) => {
let filtered: serde_json::Map<String, serde_json::Value> = map
.iter()
.filter_map(|(k, v)| {
if let serde_json::Value::String(s) = v {
if s.is_empty() {
return None;
}
}
Some((k.clone(), filter_empty_enum_strings(v)))
})
.collect();
serde_json::Value::Object(filtered)
}
serde_json::Value::Array(arr) => {
let filtered: Vec<serde_json::Value> = arr.iter().map(filter_empty_enum_strings).collect();
serde_json::Value::Array(filtered)
}
other => other.clone(),
}
}
fn json_to_php(value: &serde_json::Value) -> String {
match value {
serde_json::Value::String(s) => format!("\"{}\"", escape_php(s)),
serde_json::Value::Bool(true) => "true".to_string(),
serde_json::Value::Bool(false) => "false".to_string(),
serde_json::Value::Number(n) => n.to_string(),
serde_json::Value::Null => "null".to_string(),
serde_json::Value::Array(arr) => {
let items: Vec<String> = arr.iter().map(json_to_php).collect();
format!("[{}]", items.join(", "))
}
serde_json::Value::Object(map) => {
let items: Vec<String> = map
.iter()
.map(|(k, v)| format!("\"{}\" => {}", escape_php(k), json_to_php(v)))
.collect();
format!("[{}]", items.join(", "))
}
}
}
fn json_to_php_camel_keys(value: &serde_json::Value) -> String {
match value {
serde_json::Value::Object(map) => {
let items: Vec<String> = map
.iter()
.map(|(k, v)| {
let camel_key = k.to_lower_camel_case();
format!("\"{}\" => {}", escape_php(&camel_key), json_to_php_camel_keys(v))
})
.collect();
format!("[{}]", items.join(", "))
}
serde_json::Value::Array(arr) => {
let items: Vec<String> = arr.iter().map(json_to_php_camel_keys).collect();
format!("[{}]", items.join(", "))
}
_ => json_to_php(value),
}
}
fn build_php_visitor(setup_lines: &mut Vec<String>, visitor_spec: &crate::fixture::VisitorSpec) {
setup_lines.push("$visitor = new class {".to_string());
for (method_name, action) in &visitor_spec.callbacks {
emit_php_visitor_method(setup_lines, method_name, action);
}
setup_lines.push("};".to_string());
}
fn emit_php_visitor_method(setup_lines: &mut Vec<String>, method_name: &str, action: &CallbackAction) {
let params = match method_name {
"visit_link" => "$ctx, $href, $text, $title",
"visit_image" => "$ctx, $src, $alt, $title",
"visit_heading" => "$ctx, $level, $text, $id",
"visit_code_block" => "$ctx, $lang, $code",
"visit_code_inline"
| "visit_strong"
| "visit_emphasis"
| "visit_strikethrough"
| "visit_underline"
| "visit_subscript"
| "visit_superscript"
| "visit_mark"
| "visit_button"
| "visit_summary"
| "visit_figcaption"
| "visit_definition_term"
| "visit_definition_description" => "$ctx, $text",
"visit_text" => "$ctx, $text",
"visit_list_item" => "$ctx, $ordered, $marker, $text",
"visit_blockquote" => "$ctx, $content, $depth",
"visit_table_row" => "$ctx, $cells, $isHeader",
"visit_custom_element" => "$ctx, $tagName, $html",
"visit_form" => "$ctx, $actionUrl, $method",
"visit_input" => "$ctx, $input_type, $name, $value",
"visit_audio" | "visit_video" | "visit_iframe" => "$ctx, $src",
"visit_details" => "$ctx, $isOpen",
"visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => "$ctx, $output",
"visit_list_start" => "$ctx, $ordered",
"visit_list_end" => "$ctx, $ordered, $output",
_ => "$ctx",
};
let (action_type, action_value) = match action {
CallbackAction::Skip => ("skip", String::new()),
CallbackAction::Continue => ("continue", String::new()),
CallbackAction::PreserveHtml => ("preserve_html", String::new()),
CallbackAction::Custom { output } => ("custom", escape_php(output)),
CallbackAction::CustomTemplate { template } => ("custom_template", escape_php(template)),
};
let rendered = crate::template_env::render(
"php/visitor_method.jinja",
minijinja::context! {
method_name => method_name,
params => params,
action_type => action_type,
action_value => action_value,
},
);
for line in rendered.lines() {
setup_lines.push(line.to_string());
}
}
fn is_php_reserved_type(name: &str) -> bool {
matches!(
name.to_ascii_lowercase().as_str(),
"string"
| "int"
| "integer"
| "float"
| "double"
| "bool"
| "boolean"
| "array"
| "object"
| "null"
| "void"
| "callable"
| "iterable"
| "never"
| "self"
| "parent"
| "static"
| "true"
| "false"
| "mixed"
)
}