Skip to main content

alef_e2e/codegen/
php.rs

1//! PHP e2e test generator using PHPUnit.
2//!
3//! Generates `e2e/php/composer.json`, `e2e/php/phpunit.xml`, and
4//! `tests/{Category}Test.php` files from JSON fixtures, driven entirely by
5//! `E2eConfig` and `CallConfig`.
6
7use crate::config::E2eConfig;
8use crate::escape::{escape_php, sanitize_filename};
9use crate::field_access::FieldResolver;
10use crate::fixture::{Assertion, Fixture, FixtureGroup};
11use alef_core::backend::GeneratedFile;
12use alef_core::config::AlefConfig;
13use anyhow::Result;
14use heck::{ToSnakeCase, ToUpperCamelCase};
15use std::collections::HashMap;
16use std::fmt::Write as FmtWrite;
17use std::path::PathBuf;
18
19use super::E2eCodegen;
20
21/// PHP e2e code generator.
22pub struct PhpCodegen;
23
24impl E2eCodegen for PhpCodegen {
25    fn generate(
26        &self,
27        groups: &[FixtureGroup],
28        e2e_config: &E2eConfig,
29        alef_config: &AlefConfig,
30    ) -> Result<Vec<GeneratedFile>> {
31        let lang = self.language_name();
32        let output_base = PathBuf::from(&e2e_config.output).join(lang);
33
34        let mut files = Vec::new();
35
36        // Resolve call config with overrides.
37        let call = &e2e_config.call;
38        let overrides = call.overrides.get(lang);
39        let function_name = overrides
40            .and_then(|o| o.function.as_ref())
41            .cloned()
42            .unwrap_or_else(|| call.function.clone());
43        let class_name = overrides
44            .and_then(|o| o.class.as_ref())
45            .cloned()
46            .unwrap_or_else(|| alef_config.crate_config.name.to_upper_camel_case());
47        let namespace = overrides.and_then(|o| o.module.as_ref()).cloned().unwrap_or_else(|| {
48            if call.module.is_empty() {
49                "Kreuzberg".to_string()
50            } else {
51                call.module.to_upper_camel_case()
52            }
53        });
54        let empty_enum_fields = HashMap::new();
55        let enum_fields = overrides.map(|o| &o.enum_fields).unwrap_or(&empty_enum_fields);
56        let result_is_simple = overrides.is_some_and(|o| o.result_is_simple);
57        let result_var = &call.result_var;
58
59        // Resolve package config.
60        let php_pkg = e2e_config.packages.get("php");
61        let pkg_name = php_pkg
62            .and_then(|p| p.name.as_ref())
63            .cloned()
64            .unwrap_or_else(|| format!("kreuzberg/{}", call.module.replace('_', "-")));
65        let pkg_path = php_pkg
66            .and_then(|p| p.path.as_ref())
67            .cloned()
68            .unwrap_or_else(|| "../../packages/php".to_string());
69
70        // Generate composer.json.
71        files.push(GeneratedFile {
72            path: output_base.join("composer.json"),
73            content: render_composer_json(&pkg_name, &pkg_path),
74            generated_header: false,
75        });
76
77        // Generate phpunit.xml.
78        files.push(GeneratedFile {
79            path: output_base.join("phpunit.xml"),
80            content: render_phpunit_xml(),
81            generated_header: false,
82        });
83
84        // Generate bootstrap.php that loads both autoloaders.
85        files.push(GeneratedFile {
86            path: output_base.join("bootstrap.php"),
87            content: render_bootstrap(&pkg_path),
88            generated_header: true,
89        });
90
91        // Generate test files per category.
92        let tests_base = output_base.join("tests");
93        let field_resolver = FieldResolver::new(
94            &e2e_config.fields,
95            &e2e_config.fields_optional,
96            &e2e_config.result_fields,
97            &e2e_config.fields_array,
98        );
99
100        for group in groups {
101            let active: Vec<&Fixture> = group
102                .fixtures
103                .iter()
104                .filter(|f| f.skip.as_ref().is_none_or(|s| !s.should_skip(lang)))
105                .collect();
106
107            if active.is_empty() {
108                continue;
109            }
110
111            let test_class = format!("{}Test", sanitize_filename(&group.category).to_upper_camel_case());
112            let filename = format!("{test_class}.php");
113            let content = render_test_file(
114                &group.category,
115                &active,
116                &namespace,
117                &class_name,
118                &function_name,
119                result_var,
120                &test_class,
121                &e2e_config.call.args,
122                &field_resolver,
123                enum_fields,
124                result_is_simple,
125            );
126            files.push(GeneratedFile {
127                path: tests_base.join(filename),
128                content,
129                generated_header: true,
130            });
131        }
132
133        Ok(files)
134    }
135
136    fn language_name(&self) -> &'static str {
137        "php"
138    }
139}
140
141// ---------------------------------------------------------------------------
142// Rendering
143// ---------------------------------------------------------------------------
144
145fn render_composer_json(_pkg_name: &str, _pkg_path: &str) -> String {
146    r#"{
147  "name": "kreuzberg/e2e-php",
148  "description": "E2e tests for PHP bindings",
149  "type": "project",
150  "require-dev": {
151    "phpunit/phpunit": "^11.0"
152  },
153  "autoload-dev": {
154    "psr-4": {
155      "Kreuzberg\\E2e\\": "tests/"
156    }
157  }
158}
159"#
160    .to_string()
161}
162
163fn render_phpunit_xml() -> String {
164    r#"<?xml version="1.0" encoding="UTF-8"?>
165<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
166         xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/11.0/phpunit.xsd"
167         bootstrap="bootstrap.php"
168         colors="true"
169         failOnRisky="true"
170         failOnWarning="true">
171    <testsuites>
172        <testsuite name="e2e">
173            <directory>tests</directory>
174        </testsuite>
175    </testsuites>
176</phpunit>
177"#
178    .to_string()
179}
180
181fn render_bootstrap(pkg_path: &str) -> String {
182    format!(
183        r#"<?php
184// This file is auto-generated by alef. DO NOT EDIT.
185
186declare(strict_types=1);
187
188// Load the e2e project autoloader (PHPUnit, test helpers).
189require_once __DIR__ . '/vendor/autoload.php';
190
191// Load the PHP binding package classes via its Composer autoloader.
192// The package's autoloader is separate from the e2e project's autoloader
193// since the php-ext type prevents direct composer path dependency.
194$pkgAutoloader = __DIR__ . '/{pkg_path}/vendor/autoload.php';
195if (file_exists($pkgAutoloader)) {{
196    require_once $pkgAutoloader;
197}}
198"#
199    )
200}
201
202#[allow(clippy::too_many_arguments)]
203fn render_test_file(
204    category: &str,
205    fixtures: &[&Fixture],
206    namespace: &str,
207    class_name: &str,
208    function_name: &str,
209    result_var: &str,
210    test_class: &str,
211    args: &[crate::config::ArgMapping],
212    field_resolver: &FieldResolver,
213    enum_fields: &HashMap<String, String>,
214    result_is_simple: bool,
215) -> String {
216    let mut out = String::new();
217    let _ = writeln!(out, "<?php");
218    let _ = writeln!(out, "// This file is auto-generated by alef. DO NOT EDIT.");
219    let _ = writeln!(out);
220    let _ = writeln!(out, "declare(strict_types=1);");
221    let _ = writeln!(out);
222    let _ = writeln!(out, "namespace Kreuzberg\\E2e;");
223    let _ = writeln!(out);
224    // Determine if any handle arg has a non-null config (needs CrawlConfig import).
225    let needs_crawl_config_import = fixtures.iter().any(|f| {
226        args.iter().filter(|a| a.arg_type == "handle").any(|a| {
227            let v = f.input.get(&a.field).unwrap_or(&serde_json::Value::Null);
228            !(v.is_null() || v.is_object() && v.as_object().is_some_and(|o| o.is_empty()))
229        })
230    });
231
232    let _ = writeln!(out, "use PHPUnit\\Framework\\TestCase;");
233    if !result_is_simple {
234        let _ = writeln!(out, "use {namespace}\\{class_name};");
235    }
236    if needs_crawl_config_import {
237        let _ = writeln!(out, "use {namespace}\\CrawlConfig;");
238    }
239    let _ = writeln!(out);
240    let _ = writeln!(out, "/** E2e tests for category: {category}. */");
241    let _ = writeln!(out, "final class {test_class} extends TestCase");
242    let _ = writeln!(out, "{{");
243
244    for (i, fixture) in fixtures.iter().enumerate() {
245        render_test_method(
246            &mut out,
247            fixture,
248            class_name,
249            function_name,
250            result_var,
251            args,
252            field_resolver,
253            enum_fields,
254            result_is_simple,
255        );
256        if i + 1 < fixtures.len() {
257            let _ = writeln!(out);
258        }
259    }
260
261    let _ = writeln!(out, "}}");
262    out
263}
264
265#[allow(clippy::too_many_arguments)]
266fn render_test_method(
267    out: &mut String,
268    fixture: &Fixture,
269    class_name: &str,
270    function_name: &str,
271    result_var: &str,
272    args: &[crate::config::ArgMapping],
273    field_resolver: &FieldResolver,
274    enum_fields: &HashMap<String, String>,
275    result_is_simple: bool,
276) {
277    let method_name = sanitize_filename(&fixture.id);
278    let description = &fixture.description;
279    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
280
281    let (setup_lines, args_str) = build_args_and_setup(&fixture.input, args, class_name, enum_fields, &fixture.id);
282
283    // When result_is_simple, emit a simple function call instead of a class method.
284    let call_expr = if result_is_simple {
285        format!("html_to_markdown_convert({args_str})")
286    } else {
287        format!("{class_name}::{function_name}({args_str})")
288    };
289
290    let _ = writeln!(out, "    /** {description} */");
291    let _ = writeln!(out, "    public function test_{method_name}(): void");
292    let _ = writeln!(out, "    {{");
293
294    for line in &setup_lines {
295        let _ = writeln!(out, "        {line}");
296    }
297
298    if expects_error {
299        let _ = writeln!(out, "        $this->expectException(\\Exception::class);");
300        let _ = writeln!(out, "        {call_expr};");
301        let _ = writeln!(out, "    }}");
302        return;
303    }
304
305    let _ = writeln!(out, "        ${result_var} = {call_expr};");
306
307    for assertion in &fixture.assertions {
308        render_assertion(out, assertion, result_var, field_resolver, result_is_simple);
309    }
310
311    let _ = writeln!(out, "    }}");
312}
313
314/// Build setup lines (e.g. handle creation) and the argument list for the function call.
315///
316/// Returns `(setup_lines, args_string)`.
317fn build_args_and_setup(
318    input: &serde_json::Value,
319    args: &[crate::config::ArgMapping],
320    class_name: &str,
321    enum_fields: &HashMap<String, String>,
322    fixture_id: &str,
323) -> (Vec<String>, String) {
324    if args.is_empty() {
325        return (Vec::new(), json_to_php(input));
326    }
327
328    let mut setup_lines: Vec<String> = Vec::new();
329    let mut parts: Vec<String> = Vec::new();
330
331    for arg in args {
332        if arg.arg_type == "mock_url" {
333            setup_lines.push(format!(
334                "${} = getenv('MOCK_SERVER_URL') . '/fixtures/{fixture_id}';",
335                arg.name,
336            ));
337            parts.push(format!("${}", arg.name));
338            continue;
339        }
340
341        if arg.arg_type == "handle" {
342            // Generate a createEngine (or equivalent) call and pass the variable.
343            let constructor_name = format!("create{}", arg.name.to_upper_camel_case());
344            let config_value = input.get(&arg.field).unwrap_or(&serde_json::Value::Null);
345            if config_value.is_null()
346                || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
347            {
348                setup_lines.push(format!("${} = {class_name}::{constructor_name}(null);", arg.name,));
349            } else {
350                let name = &arg.name;
351                // Check if config has complex fields (objects/maps) that can't be
352                // set via PHP property assignment. If so, use createEngineFromJson
353                // which deserializes via core serde (handles auth, browser, proxy etc.).
354                let has_complex = config_value
355                    .as_object()
356                    .is_some_and(|obj| obj.values().any(|v| v.is_object() || v.is_array()));
357                if has_complex {
358                    let json_str = serde_json::to_string(config_value).unwrap_or_default();
359                    let escaped = json_str.replace('\'', "\\'");
360                    setup_lines.push(format!(
361                        "${} = {class_name}::createEngineFromJson('{escaped}');",
362                        arg.name,
363                    ));
364                } else {
365                    setup_lines.push(format!("${name}_config = CrawlConfig::default();"));
366                    if let Some(obj) = config_value.as_object() {
367                        for (key, val) in obj {
368                            let php_val = json_to_php(val);
369                            setup_lines.push(format!("${name}_config->{key} = {php_val};"));
370                        }
371                    }
372                    setup_lines.push(format!(
373                        "${} = {class_name}::{constructor_name}(${name}_config);",
374                        arg.name,
375                        name = name,
376                    ));
377                }
378            }
379            parts.push(format!("${}", arg.name));
380            continue;
381        }
382
383        let val = input.get(&arg.field);
384        match val {
385            None | Some(serde_json::Value::Null) if arg.optional => {
386                // Optional arg with no fixture value: skip entirely.
387                continue;
388            }
389            None | Some(serde_json::Value::Null) => {
390                // Required arg with no fixture value: pass a language-appropriate default.
391                let default_val = match arg.arg_type.as_str() {
392                    "string" => "\"\"".to_string(),
393                    "int" | "integer" => "0".to_string(),
394                    "float" | "number" => "0.0".to_string(),
395                    "bool" | "boolean" => "false".to_string(),
396                    _ => "null".to_string(),
397                };
398                parts.push(default_val);
399            }
400            Some(v) => {
401                // For json_object args, convert keys to snake_case and enum values appropriately.
402                if arg.arg_type == "json_object" && !v.is_null() {
403                    if let Some(obj) = v.as_object() {
404                        let items: Vec<String> = obj
405                            .iter()
406                            .map(|(k, vv)| {
407                                let snake_key = k.to_snake_case();
408                                let php_val = if enum_fields.contains_key(k) {
409                                    if let Some(s) = vv.as_str() {
410                                        let snake_val = s.to_snake_case();
411                                        format!("\"{}\"", escape_php(&snake_val))
412                                    } else {
413                                        json_to_php(vv)
414                                    }
415                                } else {
416                                    json_to_php(vv)
417                                };
418                                format!("\"{}\" => {}", escape_php(&snake_key), php_val)
419                            })
420                            .collect();
421                        parts.push(format!("[{}]", items.join(", ")));
422                        continue;
423                    }
424                }
425                parts.push(json_to_php(v));
426            }
427        }
428    }
429
430    (setup_lines, parts.join(", "))
431}
432
433fn render_assertion(
434    out: &mut String,
435    assertion: &Assertion,
436    result_var: &str,
437    field_resolver: &FieldResolver,
438    result_is_simple: bool,
439) {
440    // Skip assertions on fields that don't exist on the result type.
441    if let Some(f) = &assertion.field {
442        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
443            let _ = writeln!(out, "        // skipped: field '{f}' not available on result type");
444            return;
445        }
446    }
447
448    // When result_is_simple, skip assertions that reference non-content fields
449    // (e.g., metadata, document, structure) since the binding returns a plain value.
450    if result_is_simple {
451        if let Some(f) = &assertion.field {
452            let f_lower = f.to_lowercase();
453            if !f.is_empty()
454                && f_lower != "content"
455                && (f_lower.starts_with("metadata")
456                    || f_lower.starts_with("document")
457                    || f_lower.starts_with("structure"))
458            {
459                let _ = writeln!(out, "        // TODO: skipped (result_is_simple, field: {f})");
460                return;
461            }
462        }
463    }
464
465    let field_expr = if result_is_simple {
466        format!("${result_var}")
467    } else {
468        match &assertion.field {
469            Some(f) if !f.is_empty() => field_resolver.accessor(f, "php", &format!("${result_var}")),
470            _ => format!("${result_var}"),
471        }
472    };
473
474    // For string equality, trim trailing whitespace to handle trailing newlines.
475    let trimmed_field_expr = if result_is_simple {
476        format!("trim(${result_var})")
477    } else {
478        field_expr.clone()
479    };
480
481    match assertion.assertion_type.as_str() {
482        "equals" => {
483            if let Some(expected) = &assertion.value {
484                let php_val = json_to_php(expected);
485                let _ = writeln!(out, "        $this->assertEquals({php_val}, {trimmed_field_expr});");
486            }
487        }
488        "contains" => {
489            if let Some(expected) = &assertion.value {
490                let php_val = json_to_php(expected);
491                let _ = writeln!(
492                    out,
493                    "        $this->assertStringContainsString({php_val}, {field_expr});"
494                );
495            }
496        }
497        "contains_all" => {
498            if let Some(values) = &assertion.values {
499                for val in values {
500                    let php_val = json_to_php(val);
501                    let _ = writeln!(
502                        out,
503                        "        $this->assertStringContainsString({php_val}, {field_expr});"
504                    );
505                }
506            }
507        }
508        "not_contains" => {
509            if let Some(expected) = &assertion.value {
510                let php_val = json_to_php(expected);
511                let _ = writeln!(
512                    out,
513                    "        $this->assertStringNotContainsString({php_val}, {field_expr});"
514                );
515            }
516        }
517        "not_empty" => {
518            let _ = writeln!(out, "        $this->assertNotEmpty({field_expr});");
519        }
520        "is_empty" => {
521            let _ = writeln!(out, "        $this->assertEmpty({trimmed_field_expr});");
522        }
523        "contains_any" => {
524            if let Some(values) = &assertion.values {
525                let _ = writeln!(out, "        $found = false;");
526                for val in values {
527                    let php_val = json_to_php(val);
528                    let _ = writeln!(
529                        out,
530                        "        if (str_contains({field_expr}, {php_val})) {{ $found = true; }}"
531                    );
532                }
533                let _ = writeln!(
534                    out,
535                    "        $this->assertTrue($found, 'expected to contain at least one of the specified values');"
536                );
537            }
538        }
539        "greater_than" => {
540            if let Some(val) = &assertion.value {
541                let php_val = json_to_php(val);
542                let _ = writeln!(out, "        $this->assertGreaterThan({php_val}, {field_expr});");
543            }
544        }
545        "less_than" => {
546            if let Some(val) = &assertion.value {
547                let php_val = json_to_php(val);
548                let _ = writeln!(out, "        $this->assertLessThan({php_val}, {field_expr});");
549            }
550        }
551        "greater_than_or_equal" => {
552            if let Some(val) = &assertion.value {
553                let php_val = json_to_php(val);
554                let _ = writeln!(out, "        $this->assertGreaterThanOrEqual({php_val}, {field_expr});");
555            }
556        }
557        "less_than_or_equal" => {
558            if let Some(val) = &assertion.value {
559                let php_val = json_to_php(val);
560                let _ = writeln!(out, "        $this->assertLessThanOrEqual({php_val}, {field_expr});");
561            }
562        }
563        "starts_with" => {
564            if let Some(expected) = &assertion.value {
565                let php_val = json_to_php(expected);
566                let _ = writeln!(out, "        $this->assertStringStartsWith({php_val}, {field_expr});");
567            }
568        }
569        "ends_with" => {
570            if let Some(expected) = &assertion.value {
571                let php_val = json_to_php(expected);
572                let _ = writeln!(out, "        $this->assertStringEndsWith({php_val}, {field_expr});");
573            }
574        }
575        "min_length" => {
576            if let Some(val) = &assertion.value {
577                if let Some(n) = val.as_u64() {
578                    let _ = writeln!(
579                        out,
580                        "        $this->assertGreaterThanOrEqual({n}, strlen({field_expr}));"
581                    );
582                }
583            }
584        }
585        "max_length" => {
586            if let Some(val) = &assertion.value {
587                if let Some(n) = val.as_u64() {
588                    let _ = writeln!(out, "        $this->assertLessThanOrEqual({n}, strlen({field_expr}));");
589                }
590            }
591        }
592        "count_min" => {
593            if let Some(val) = &assertion.value {
594                if let Some(n) = val.as_u64() {
595                    let _ = writeln!(
596                        out,
597                        "        $this->assertGreaterThanOrEqual({n}, count({field_expr}));"
598                    );
599                }
600            }
601        }
602        "not_error" => {
603            // Already handled by the call succeeding without exception.
604        }
605        "error" => {
606            // Handled at the test method level.
607        }
608        other => {
609            let _ = writeln!(out, "        // TODO: unsupported assertion type: {other}");
610        }
611    }
612}
613
614/// Convert a `serde_json::Value` to a PHP literal string.
615fn json_to_php(value: &serde_json::Value) -> String {
616    match value {
617        serde_json::Value::String(s) => format!("\"{}\"", escape_php(s)),
618        serde_json::Value::Bool(true) => "true".to_string(),
619        serde_json::Value::Bool(false) => "false".to_string(),
620        serde_json::Value::Number(n) => n.to_string(),
621        serde_json::Value::Null => "null".to_string(),
622        serde_json::Value::Array(arr) => {
623            let items: Vec<String> = arr.iter().map(json_to_php).collect();
624            format!("[{}]", items.join(", "))
625        }
626        serde_json::Value::Object(map) => {
627            let items: Vec<String> = map
628                .iter()
629                .map(|(k, v)| format!("\"{}\" => {}", escape_php(k), json_to_php(v)))
630                .collect();
631            format!("[{}]", items.join(", "))
632        }
633    }
634}