Skip to main content

alef_e2e/codegen/
php.rs

1//! PHP e2e test generator using PHPUnit.
2//!
3//! Generates `e2e/php/composer.json`, `e2e/php/phpunit.xml`, and
4//! `tests/{Category}Test.php` files from JSON fixtures, driven entirely by
5//! `E2eConfig` and `CallConfig`.
6
7use crate::config::E2eConfig;
8use crate::escape::{escape_php, sanitize_filename};
9use crate::field_access::FieldResolver;
10use crate::fixture::{Assertion, Fixture, FixtureGroup};
11use alef_core::backend::GeneratedFile;
12use alef_core::config::AlefConfig;
13use anyhow::Result;
14use heck::{ToSnakeCase, ToUpperCamelCase};
15use std::collections::HashMap;
16use std::fmt::Write as FmtWrite;
17use std::path::PathBuf;
18
19use super::E2eCodegen;
20
21/// PHP e2e code generator.
22pub struct PhpCodegen;
23
24impl E2eCodegen for PhpCodegen {
25    fn generate(
26        &self,
27        groups: &[FixtureGroup],
28        e2e_config: &E2eConfig,
29        alef_config: &AlefConfig,
30    ) -> Result<Vec<GeneratedFile>> {
31        let lang = self.language_name();
32        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
33
34        let mut files = Vec::new();
35
36        // Resolve call config with overrides.
37        let call = &e2e_config.call;
38        let overrides = call.overrides.get(lang);
39        let function_name = overrides
40            .and_then(|o| o.function.as_ref())
41            .cloned()
42            .unwrap_or_else(|| call.function.clone());
43        let class_name = overrides
44            .and_then(|o| o.class.as_ref())
45            .cloned()
46            .unwrap_or_else(|| alef_config.crate_config.name.to_upper_camel_case());
47        let namespace = overrides.and_then(|o| o.module.as_ref()).cloned().unwrap_or_else(|| {
48            if call.module.is_empty() {
49                "Kreuzberg".to_string()
50            } else {
51                call.module.to_upper_camel_case()
52            }
53        });
54        let empty_enum_fields = HashMap::new();
55        let enum_fields = overrides.map(|o| &o.enum_fields).unwrap_or(&empty_enum_fields);
56        let result_is_simple = overrides.is_some_and(|o| o.result_is_simple);
57        let result_var = &call.result_var;
58
59        // Resolve package config.
60        let php_pkg = e2e_config.resolve_package("php");
61        let pkg_name = php_pkg
62            .as_ref()
63            .and_then(|p| p.name.as_ref())
64            .cloned()
65            .unwrap_or_else(|| format!("kreuzberg/{}", call.module.replace('_', "-")));
66        let pkg_path = php_pkg
67            .as_ref()
68            .and_then(|p| p.path.as_ref())
69            .cloned()
70            .unwrap_or_else(|| "../../packages/php".to_string());
71        let pkg_version = php_pkg
72            .as_ref()
73            .and_then(|p| p.version.as_ref())
74            .cloned()
75            .unwrap_or_else(|| "0.1.0".to_string());
76
77        // Generate composer.json.
78        files.push(GeneratedFile {
79            path: output_base.join("composer.json"),
80            content: render_composer_json(&pkg_name, &pkg_path, &pkg_version, e2e_config.dep_mode),
81            generated_header: false,
82        });
83
84        // Generate phpunit.xml.
85        files.push(GeneratedFile {
86            path: output_base.join("phpunit.xml"),
87            content: render_phpunit_xml(),
88            generated_header: false,
89        });
90
91        // Generate bootstrap.php that loads both autoloaders.
92        files.push(GeneratedFile {
93            path: output_base.join("bootstrap.php"),
94            content: render_bootstrap(&pkg_path),
95            generated_header: true,
96        });
97
98        // Generate test files per category.
99        let tests_base = output_base.join("tests");
100        let field_resolver = FieldResolver::new(
101            &e2e_config.fields,
102            &e2e_config.fields_optional,
103            &e2e_config.result_fields,
104            &e2e_config.fields_array,
105        );
106
107        for group in groups {
108            let active: Vec<&Fixture> = group
109                .fixtures
110                .iter()
111                .filter(|f| f.skip.as_ref().is_none_or(|s| !s.should_skip(lang)))
112                .collect();
113
114            if active.is_empty() {
115                continue;
116            }
117
118            let test_class = format!("{}Test", sanitize_filename(&group.category).to_upper_camel_case());
119            let filename = format!("{test_class}.php");
120            let content = render_test_file(
121                &group.category,
122                &active,
123                &namespace,
124                &class_name,
125                &function_name,
126                result_var,
127                &test_class,
128                &e2e_config.call.args,
129                &field_resolver,
130                enum_fields,
131                result_is_simple,
132            );
133            files.push(GeneratedFile {
134                path: tests_base.join(filename),
135                content,
136                generated_header: true,
137            });
138        }
139
140        Ok(files)
141    }
142
143    fn language_name(&self) -> &'static str {
144        "php"
145    }
146}
147
148// ---------------------------------------------------------------------------
149// Rendering
150// ---------------------------------------------------------------------------
151
152fn render_composer_json(
153    pkg_name: &str,
154    _pkg_path: &str,
155    pkg_version: &str,
156    dep_mode: crate::config::DependencyMode,
157) -> String {
158    let require_section = match dep_mode {
159        crate::config::DependencyMode::Registry => {
160            format!(
161                r#"  "require": {{
162    "{pkg_name}": "{pkg_version}"
163  }},
164  "require-dev": {{
165    "phpunit/phpunit": "^11.0"
166  }},"#
167            )
168        }
169        crate::config::DependencyMode::Local => r#"  "require-dev": {
170    "phpunit/phpunit": "^11.0"
171  },"#
172        .to_string(),
173    };
174
175    format!(
176        r#"{{
177  "name": "kreuzberg/e2e-php",
178  "description": "E2e tests for PHP bindings",
179  "type": "project",
180{require_section}
181  "autoload-dev": {{
182    "psr-4": {{
183      "Kreuzberg\\E2e\\": "tests/"
184    }}
185  }}
186}}
187"#
188    )
189}
190
191fn render_phpunit_xml() -> String {
192    r#"<?xml version="1.0" encoding="UTF-8"?>
193<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
194         xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/11.0/phpunit.xsd"
195         bootstrap="bootstrap.php"
196         colors="true"
197         failOnRisky="true"
198         failOnWarning="true">
199    <testsuites>
200        <testsuite name="e2e">
201            <directory>tests</directory>
202        </testsuite>
203    </testsuites>
204</phpunit>
205"#
206    .to_string()
207}
208
209fn render_bootstrap(pkg_path: &str) -> String {
210    format!(
211        r#"<?php
212// This file is auto-generated by alef. DO NOT EDIT.
213
214declare(strict_types=1);
215
216// Load the e2e project autoloader (PHPUnit, test helpers).
217require_once __DIR__ . '/vendor/autoload.php';
218
219// Load the PHP binding package classes via its Composer autoloader.
220// The package's autoloader is separate from the e2e project's autoloader
221// since the php-ext type prevents direct composer path dependency.
222$pkgAutoloader = __DIR__ . '/{pkg_path}/vendor/autoload.php';
223if (file_exists($pkgAutoloader)) {{
224    require_once $pkgAutoloader;
225}}
226"#
227    )
228}
229
230#[allow(clippy::too_many_arguments)]
231fn render_test_file(
232    category: &str,
233    fixtures: &[&Fixture],
234    namespace: &str,
235    class_name: &str,
236    function_name: &str,
237    result_var: &str,
238    test_class: &str,
239    args: &[crate::config::ArgMapping],
240    field_resolver: &FieldResolver,
241    enum_fields: &HashMap<String, String>,
242    result_is_simple: bool,
243) -> String {
244    let mut out = String::new();
245    let _ = writeln!(out, "<?php");
246    let _ = writeln!(out, "// This file is auto-generated by alef. DO NOT EDIT.");
247    let _ = writeln!(out);
248    let _ = writeln!(out, "declare(strict_types=1);");
249    let _ = writeln!(out);
250    let _ = writeln!(out, "namespace Kreuzberg\\E2e;");
251    let _ = writeln!(out);
252    // Determine if any handle arg has a non-null config (needs CrawlConfig import).
253    let needs_crawl_config_import = fixtures.iter().any(|f| {
254        args.iter().filter(|a| a.arg_type == "handle").any(|a| {
255            let v = f.input.get(&a.field).unwrap_or(&serde_json::Value::Null);
256            !(v.is_null() || v.is_object() && v.as_object().is_some_and(|o| o.is_empty()))
257        })
258    });
259
260    let _ = writeln!(out, "use PHPUnit\\Framework\\TestCase;");
261    if !result_is_simple {
262        let _ = writeln!(out, "use {namespace}\\{class_name};");
263    }
264    if needs_crawl_config_import {
265        let _ = writeln!(out, "use {namespace}\\CrawlConfig;");
266    }
267    let _ = writeln!(out);
268    let _ = writeln!(out, "/** E2e tests for category: {category}. */");
269    let _ = writeln!(out, "final class {test_class} extends TestCase");
270    let _ = writeln!(out, "{{");
271
272    for (i, fixture) in fixtures.iter().enumerate() {
273        render_test_method(
274            &mut out,
275            fixture,
276            class_name,
277            function_name,
278            result_var,
279            args,
280            field_resolver,
281            enum_fields,
282            result_is_simple,
283        );
284        if i + 1 < fixtures.len() {
285            let _ = writeln!(out);
286        }
287    }
288
289    let _ = writeln!(out, "}}");
290    out
291}
292
293#[allow(clippy::too_many_arguments)]
294fn render_test_method(
295    out: &mut String,
296    fixture: &Fixture,
297    class_name: &str,
298    function_name: &str,
299    result_var: &str,
300    args: &[crate::config::ArgMapping],
301    field_resolver: &FieldResolver,
302    enum_fields: &HashMap<String, String>,
303    result_is_simple: bool,
304) {
305    let method_name = sanitize_filename(&fixture.id);
306    let description = &fixture.description;
307    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
308
309    let (setup_lines, args_str) = build_args_and_setup(&fixture.input, args, class_name, enum_fields, &fixture.id);
310
311    // When result_is_simple, emit a simple function call instead of a class method.
312    let call_expr = if result_is_simple {
313        format!("html_to_markdown_convert({args_str})")
314    } else {
315        format!("{class_name}::{function_name}({args_str})")
316    };
317
318    let _ = writeln!(out, "    /** {description} */");
319    let _ = writeln!(out, "    public function test_{method_name}(): void");
320    let _ = writeln!(out, "    {{");
321
322    for line in &setup_lines {
323        let _ = writeln!(out, "        {line}");
324    }
325
326    if expects_error {
327        let _ = writeln!(out, "        $this->expectException(\\Exception::class);");
328        let _ = writeln!(out, "        {call_expr};");
329        let _ = writeln!(out, "    }}");
330        return;
331    }
332
333    let _ = writeln!(out, "        ${result_var} = {call_expr};");
334
335    for assertion in &fixture.assertions {
336        render_assertion(out, assertion, result_var, field_resolver, result_is_simple);
337    }
338
339    let _ = writeln!(out, "    }}");
340}
341
342/// Build setup lines (e.g. handle creation) and the argument list for the function call.
343///
344/// Returns `(setup_lines, args_string)`.
345fn build_args_and_setup(
346    input: &serde_json::Value,
347    args: &[crate::config::ArgMapping],
348    class_name: &str,
349    enum_fields: &HashMap<String, String>,
350    fixture_id: &str,
351) -> (Vec<String>, String) {
352    if args.is_empty() {
353        return (Vec::new(), json_to_php(input));
354    }
355
356    let mut setup_lines: Vec<String> = Vec::new();
357    let mut parts: Vec<String> = Vec::new();
358
359    for arg in args {
360        if arg.arg_type == "mock_url" {
361            setup_lines.push(format!(
362                "${} = getenv('MOCK_SERVER_URL') . '/fixtures/{fixture_id}';",
363                arg.name,
364            ));
365            parts.push(format!("${}", arg.name));
366            continue;
367        }
368
369        if arg.arg_type == "handle" {
370            // Generate a createEngine (or equivalent) call and pass the variable.
371            let constructor_name = format!("create{}", arg.name.to_upper_camel_case());
372            let config_value = input.get(&arg.field).unwrap_or(&serde_json::Value::Null);
373            if config_value.is_null()
374                || config_value.is_object() && config_value.as_object().is_some_and(|o| o.is_empty())
375            {
376                setup_lines.push(format!("${} = {class_name}::{constructor_name}(null);", arg.name,));
377            } else {
378                let name = &arg.name;
379                // Check if config has complex fields (objects/maps) that can't be
380                // set via PHP property assignment. If so, use createEngineFromJson
381                // which deserializes via core serde (handles auth, browser, proxy etc.).
382                let has_complex = config_value
383                    .as_object()
384                    .is_some_and(|obj| obj.values().any(|v| v.is_object() || v.is_array()));
385                if has_complex {
386                    let json_str = serde_json::to_string(config_value).unwrap_or_default();
387                    let escaped = json_str.replace('\'', "\\'");
388                    setup_lines.push(format!(
389                        "${} = {class_name}::createEngineFromJson('{escaped}');",
390                        arg.name,
391                    ));
392                } else {
393                    setup_lines.push(format!("${name}_config = CrawlConfig::default();"));
394                    if let Some(obj) = config_value.as_object() {
395                        for (key, val) in obj {
396                            let php_val = json_to_php(val);
397                            setup_lines.push(format!("${name}_config->{key} = {php_val};"));
398                        }
399                    }
400                    setup_lines.push(format!(
401                        "${} = {class_name}::{constructor_name}(${name}_config);",
402                        arg.name,
403                        name = name,
404                    ));
405                }
406            }
407            parts.push(format!("${}", arg.name));
408            continue;
409        }
410
411        let val = input.get(&arg.field);
412        match val {
413            None | Some(serde_json::Value::Null) if arg.optional => {
414                // Optional arg with no fixture value: skip entirely.
415                continue;
416            }
417            None | Some(serde_json::Value::Null) => {
418                // Required arg with no fixture value: pass a language-appropriate default.
419                let default_val = match arg.arg_type.as_str() {
420                    "string" => "\"\"".to_string(),
421                    "int" | "integer" => "0".to_string(),
422                    "float" | "number" => "0.0".to_string(),
423                    "bool" | "boolean" => "false".to_string(),
424                    _ => "null".to_string(),
425                };
426                parts.push(default_val);
427            }
428            Some(v) => {
429                // For json_object args, convert keys to snake_case and enum values appropriately.
430                if arg.arg_type == "json_object" && !v.is_null() {
431                    if let Some(obj) = v.as_object() {
432                        let items: Vec<String> = obj
433                            .iter()
434                            .map(|(k, vv)| {
435                                let snake_key = k.to_snake_case();
436                                let php_val = if enum_fields.contains_key(k) {
437                                    if let Some(s) = vv.as_str() {
438                                        let snake_val = s.to_snake_case();
439                                        format!("\"{}\"", escape_php(&snake_val))
440                                    } else {
441                                        json_to_php(vv)
442                                    }
443                                } else {
444                                    json_to_php(vv)
445                                };
446                                format!("\"{}\" => {}", escape_php(&snake_key), php_val)
447                            })
448                            .collect();
449                        parts.push(format!("[{}]", items.join(", ")));
450                        continue;
451                    }
452                }
453                parts.push(json_to_php(v));
454            }
455        }
456    }
457
458    (setup_lines, parts.join(", "))
459}
460
461fn render_assertion(
462    out: &mut String,
463    assertion: &Assertion,
464    result_var: &str,
465    field_resolver: &FieldResolver,
466    result_is_simple: bool,
467) {
468    // Skip assertions on fields that don't exist on the result type.
469    if let Some(f) = &assertion.field {
470        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
471            let _ = writeln!(out, "        // skipped: field '{f}' not available on result type");
472            return;
473        }
474    }
475
476    // When result_is_simple, skip assertions that reference non-content fields
477    // (e.g., metadata, document, structure) since the binding returns a plain value.
478    if result_is_simple {
479        if let Some(f) = &assertion.field {
480            let f_lower = f.to_lowercase();
481            if !f.is_empty()
482                && f_lower != "content"
483                && (f_lower.starts_with("metadata")
484                    || f_lower.starts_with("document")
485                    || f_lower.starts_with("structure"))
486            {
487                let _ = writeln!(out, "        // TODO: skipped (result_is_simple, field: {f})");
488                return;
489            }
490        }
491    }
492
493    let field_expr = if result_is_simple {
494        format!("${result_var}")
495    } else {
496        match &assertion.field {
497            Some(f) if !f.is_empty() => field_resolver.accessor(f, "php", &format!("${result_var}")),
498            _ => format!("${result_var}"),
499        }
500    };
501
502    // For string equality, trim trailing whitespace to handle trailing newlines.
503    let trimmed_field_expr = if result_is_simple {
504        format!("trim(${result_var})")
505    } else {
506        field_expr.clone()
507    };
508
509    match assertion.assertion_type.as_str() {
510        "equals" => {
511            if let Some(expected) = &assertion.value {
512                let php_val = json_to_php(expected);
513                let _ = writeln!(out, "        $this->assertEquals({php_val}, {trimmed_field_expr});");
514            }
515        }
516        "contains" => {
517            if let Some(expected) = &assertion.value {
518                let php_val = json_to_php(expected);
519                let _ = writeln!(
520                    out,
521                    "        $this->assertStringContainsString({php_val}, {field_expr});"
522                );
523            }
524        }
525        "contains_all" => {
526            if let Some(values) = &assertion.values {
527                for val in values {
528                    let php_val = json_to_php(val);
529                    let _ = writeln!(
530                        out,
531                        "        $this->assertStringContainsString({php_val}, {field_expr});"
532                    );
533                }
534            }
535        }
536        "not_contains" => {
537            if let Some(expected) = &assertion.value {
538                let php_val = json_to_php(expected);
539                let _ = writeln!(
540                    out,
541                    "        $this->assertStringNotContainsString({php_val}, {field_expr});"
542                );
543            }
544        }
545        "not_empty" => {
546            let _ = writeln!(out, "        $this->assertNotEmpty({field_expr});");
547        }
548        "is_empty" => {
549            let _ = writeln!(out, "        $this->assertEmpty({trimmed_field_expr});");
550        }
551        "contains_any" => {
552            if let Some(values) = &assertion.values {
553                let _ = writeln!(out, "        $found = false;");
554                for val in values {
555                    let php_val = json_to_php(val);
556                    let _ = writeln!(
557                        out,
558                        "        if (str_contains({field_expr}, {php_val})) {{ $found = true; }}"
559                    );
560                }
561                let _ = writeln!(
562                    out,
563                    "        $this->assertTrue($found, 'expected to contain at least one of the specified values');"
564                );
565            }
566        }
567        "greater_than" => {
568            if let Some(val) = &assertion.value {
569                let php_val = json_to_php(val);
570                let _ = writeln!(out, "        $this->assertGreaterThan({php_val}, {field_expr});");
571            }
572        }
573        "less_than" => {
574            if let Some(val) = &assertion.value {
575                let php_val = json_to_php(val);
576                let _ = writeln!(out, "        $this->assertLessThan({php_val}, {field_expr});");
577            }
578        }
579        "greater_than_or_equal" => {
580            if let Some(val) = &assertion.value {
581                let php_val = json_to_php(val);
582                let _ = writeln!(out, "        $this->assertGreaterThanOrEqual({php_val}, {field_expr});");
583            }
584        }
585        "less_than_or_equal" => {
586            if let Some(val) = &assertion.value {
587                let php_val = json_to_php(val);
588                let _ = writeln!(out, "        $this->assertLessThanOrEqual({php_val}, {field_expr});");
589            }
590        }
591        "starts_with" => {
592            if let Some(expected) = &assertion.value {
593                let php_val = json_to_php(expected);
594                let _ = writeln!(out, "        $this->assertStringStartsWith({php_val}, {field_expr});");
595            }
596        }
597        "ends_with" => {
598            if let Some(expected) = &assertion.value {
599                let php_val = json_to_php(expected);
600                let _ = writeln!(out, "        $this->assertStringEndsWith({php_val}, {field_expr});");
601            }
602        }
603        "min_length" => {
604            if let Some(val) = &assertion.value {
605                if let Some(n) = val.as_u64() {
606                    let _ = writeln!(
607                        out,
608                        "        $this->assertGreaterThanOrEqual({n}, strlen({field_expr}));"
609                    );
610                }
611            }
612        }
613        "max_length" => {
614            if let Some(val) = &assertion.value {
615                if let Some(n) = val.as_u64() {
616                    let _ = writeln!(out, "        $this->assertLessThanOrEqual({n}, strlen({field_expr}));");
617                }
618            }
619        }
620        "count_min" => {
621            if let Some(val) = &assertion.value {
622                if let Some(n) = val.as_u64() {
623                    let _ = writeln!(
624                        out,
625                        "        $this->assertGreaterThanOrEqual({n}, count({field_expr}));"
626                    );
627                }
628            }
629        }
630        "not_error" => {
631            // Already handled by the call succeeding without exception.
632        }
633        "error" => {
634            // Handled at the test method level.
635        }
636        other => {
637            let _ = writeln!(out, "        // TODO: unsupported assertion type: {other}");
638        }
639    }
640}
641
642/// Convert a `serde_json::Value` to a PHP literal string.
643fn json_to_php(value: &serde_json::Value) -> String {
644    match value {
645        serde_json::Value::String(s) => format!("\"{}\"", escape_php(s)),
646        serde_json::Value::Bool(true) => "true".to_string(),
647        serde_json::Value::Bool(false) => "false".to_string(),
648        serde_json::Value::Number(n) => n.to_string(),
649        serde_json::Value::Null => "null".to_string(),
650        serde_json::Value::Array(arr) => {
651            let items: Vec<String> = arr.iter().map(json_to_php).collect();
652            format!("[{}]", items.join(", "))
653        }
654        serde_json::Value::Object(map) => {
655            let items: Vec<String> = map
656                .iter()
657                .map(|(k, v)| format!("\"{}\" => {}", escape_php(k), json_to_php(v)))
658                .collect();
659            format!("[{}]", items.join(", "))
660        }
661    }
662}