Skip to main content

alef_e2e/codegen/
r.rs

1//! R e2e test generator using testthat.
2
3use crate::config::E2eConfig;
4use crate::escape::{escape_r, r_template_to_paste0, sanitize_filename, sanitize_ident};
5use crate::field_access::FieldResolver;
6use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup};
7use alef_core::backend::GeneratedFile;
8use alef_core::config::ResolvedCrateConfig;
9use alef_core::hash::{self, CommentStyle};
10use anyhow::Result;
11use std::fmt::Write as FmtWrite;
12use std::path::PathBuf;
13
14use super::E2eCodegen;
15
16/// R e2e code generator.
17pub struct RCodegen;
18
19impl E2eCodegen for RCodegen {
20    fn generate(
21        &self,
22        groups: &[FixtureGroup],
23        e2e_config: &E2eConfig,
24        _config: &ResolvedCrateConfig,
25    ) -> Result<Vec<GeneratedFile>> {
26        let lang = self.language_name();
27        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
28
29        let mut files = Vec::new();
30
31        // Resolve call config with overrides.
32        let call = &e2e_config.call;
33        let overrides = call.overrides.get(lang);
34        let module_path = overrides
35            .and_then(|o| o.module.as_ref())
36            .cloned()
37            .unwrap_or_else(|| call.module.clone());
38        let _function_name = overrides
39            .and_then(|o| o.function.as_ref())
40            .cloned()
41            .unwrap_or_else(|| call.function.clone());
42        let result_is_simple = call.result_is_simple || overrides.is_some_and(|o| o.result_is_simple);
43        let result_is_r_list = overrides.is_some_and(|o| o.result_is_r_list);
44        let _result_var = &call.result_var;
45
46        // Resolve package config.
47        let r_pkg = e2e_config.resolve_package("r");
48        let pkg_name = r_pkg
49            .as_ref()
50            .and_then(|p| p.name.as_ref())
51            .cloned()
52            .unwrap_or_else(|| module_path.clone());
53        let pkg_path = r_pkg
54            .as_ref()
55            .and_then(|p| p.path.as_ref())
56            .cloned()
57            .unwrap_or_else(|| "../../packages/r".to_string());
58        let pkg_version = r_pkg
59            .as_ref()
60            .and_then(|p| p.version.as_ref())
61            .cloned()
62            .unwrap_or_else(|| "0.1.0".to_string());
63
64        // Generate DESCRIPTION file.
65        files.push(GeneratedFile {
66            path: output_base.join("DESCRIPTION"),
67            content: render_description(&pkg_name, &pkg_version, e2e_config.dep_mode),
68            generated_header: false,
69        });
70
71        // Generate test runner script.
72        files.push(GeneratedFile {
73            path: output_base.join("run_tests.R"),
74            content: render_test_runner(&pkg_path, e2e_config.dep_mode),
75            generated_header: true,
76        });
77
78        // setup-fixtures.R — testthat sources `setup-*.R` files in the tests
79        // directory once before any tests run, with the working directory set
80        // to the tests/ folder. We use this hook to chdir into the repo's
81        // shared `test_documents/` directory so that fixture paths like
82        // `pdf/fake_memo.pdf` resolve at extraction time.
83        files.push(GeneratedFile {
84            path: output_base.join("tests").join("setup-fixtures.R"),
85            content: render_setup_fixtures(),
86            generated_header: true,
87        });
88
89        // Generate test files per category.
90        for group in groups {
91            let active: Vec<&Fixture> = group
92                .fixtures
93                .iter()
94                .filter(|f| super::should_include_fixture(f, lang, e2e_config))
95                .collect();
96
97            if active.is_empty() {
98                continue;
99            }
100
101            let filename = format!("test_{}.R", sanitize_filename(&group.category));
102            let field_resolver = FieldResolver::new(
103                &e2e_config.fields,
104                &e2e_config.fields_optional,
105                &e2e_config.result_fields,
106                &e2e_config.fields_array,
107                &std::collections::HashSet::new(),
108            );
109            let content = render_test_file(
110                &group.category,
111                &active,
112                &field_resolver,
113                result_is_simple,
114                result_is_r_list,
115                e2e_config,
116            );
117            files.push(GeneratedFile {
118                path: output_base.join("tests").join(filename),
119                content,
120                generated_header: true,
121            });
122        }
123
124        Ok(files)
125    }
126
127    fn language_name(&self) -> &'static str {
128        "r"
129    }
130}
131
132fn render_description(pkg_name: &str, pkg_version: &str, dep_mode: crate::config::DependencyMode) -> String {
133    let dep_line = match dep_mode {
134        crate::config::DependencyMode::Registry => {
135            format!("Imports: {pkg_name} ({pkg_version})\n")
136        }
137        crate::config::DependencyMode::Local => String::new(),
138    };
139    format!(
140        r#"Package: e2e.r
141Title: E2E Tests for {pkg_name}
142Version: 0.1.0
143Description: End-to-end test suite.
144{dep_line}Suggests: testthat (>= 3.0.0)
145Config/testthat/edition: 3
146"#
147    )
148}
149
150fn render_setup_fixtures() -> String {
151    let mut out = String::new();
152    out.push_str(&hash::header(CommentStyle::Hash));
153    let _ = writeln!(out);
154    let _ = writeln!(
155        out,
156        "# Resolve fixture paths against the repo's `test_documents/` directory."
157    );
158    let _ = writeln!(
159        out,
160        "# testthat sources setup-*.R with the working directory at tests/,"
161    );
162    let _ = writeln!(
163        out,
164        "# so test_documents lives three directories up: tests/ -> e2e/r/ -> e2e/ -> repo root."
165    );
166    let _ = writeln!(
167        out,
168        "# Each `test_that()` block has its working directory reset back to tests/, so"
169    );
170    let _ = writeln!(
171        out,
172        "# fixture lookups must be performed via this helper rather than relying on `setwd`."
173    );
174    let _ = writeln!(
175        out,
176        ".kreuzberg_test_documents <- normalizePath(\"../../../test_documents\", mustWork = FALSE)"
177    );
178    let _ = writeln!(out, ".resolve_fixture <- function(path) {{");
179    let _ = writeln!(out, "  if (dir.exists(.kreuzberg_test_documents)) {{");
180    let _ = writeln!(out, "    file.path(.kreuzberg_test_documents, path)");
181    let _ = writeln!(out, "  }} else {{");
182    let _ = writeln!(out, "    path");
183    let _ = writeln!(out, "  }}");
184    let _ = writeln!(out, "}}");
185    out
186}
187
188fn render_test_runner(pkg_path: &str, dep_mode: crate::config::DependencyMode) -> String {
189    let mut out = String::new();
190    out.push_str(&hash::header(CommentStyle::Hash));
191    let _ = writeln!(out, "library(testthat)");
192    match dep_mode {
193        crate::config::DependencyMode::Registry => {
194            // In registry mode, require the installed CRAN package directly.
195            let _ = writeln!(out, "# Package loaded via library() from CRAN install.");
196        }
197        crate::config::DependencyMode::Local => {
198            // Use devtools::load_all() to load the local R package without requiring
199            // a full install, matching the e2e test runner convention.
200            let _ = writeln!(out, "devtools::load_all(\"{pkg_path}\")");
201        }
202    }
203    let _ = writeln!(out);
204    // Surface every failure rather than aborting at the default max_fails=10 —
205    // partial pass counts are essential for triage during e2e bring-up.
206    let _ = writeln!(out, "testthat::set_max_fails(Inf)");
207    // Resolve the tests/ directory relative to this script. testthat reads
208    // setup-*.R from there before each file runs, where path resolution
209    // against test_documents/ is handled by the `.resolve_fixture` helper.
210    let _ = writeln!(
211        out,
212        ".script_dir <- tryCatch(dirname(normalizePath(sys.frame(1)$ofile)), error = function(e) getwd())"
213    );
214    let _ = writeln!(out, "test_dir(file.path(.script_dir, \"tests\"))");
215    out
216}
217
218fn render_test_file(
219    category: &str,
220    fixtures: &[&Fixture],
221    field_resolver: &FieldResolver,
222    result_is_simple: bool,
223    result_is_r_list: bool,
224    e2e_config: &E2eConfig,
225) -> String {
226    let mut out = String::new();
227    out.push_str(&hash::header(CommentStyle::Hash));
228    let _ = writeln!(out, "# E2e tests for category: {category}");
229    let _ = writeln!(out);
230
231    for (i, fixture) in fixtures.iter().enumerate() {
232        render_test_case(
233            &mut out,
234            fixture,
235            e2e_config,
236            field_resolver,
237            result_is_simple,
238            result_is_r_list,
239        );
240        if i + 1 < fixtures.len() {
241            let _ = writeln!(out);
242        }
243    }
244
245    // Clean up trailing newlines.
246    while out.ends_with("\n\n") {
247        out.pop();
248    }
249    if !out.ends_with('\n') {
250        out.push('\n');
251    }
252    out
253}
254
255fn render_test_case(
256    out: &mut String,
257    fixture: &Fixture,
258    e2e_config: &E2eConfig,
259    field_resolver: &FieldResolver,
260    default_result_is_simple: bool,
261    default_result_is_r_list: bool,
262) {
263    let call_config = e2e_config.resolve_call(fixture.call.as_deref());
264    let function_name = &call_config.function;
265    let result_var = &call_config.result_var;
266    // Per-fixture call configs (e.g. `list_document_extractors`) may set
267    // `result_is_simple = true` even when the default `[e2e.call]` does not.
268    // Without this lookup the registry/detection wrappers (which return scalar
269    // strings or character vectors directly) get wrapped in
270    // `jsonlite::fromJSON(...)` and the parser fails on non-JSON output.
271    let r_override = call_config.overrides.get("r");
272    let result_is_simple = if fixture.call.is_some() {
273        call_config.result_is_simple || r_override.is_some_and(|o| o.result_is_simple)
274    } else {
275        default_result_is_simple
276    };
277    // Per-fixture override: when the R binding already returns a native R list
278    // (not a JSON string), suppress `jsonlite::fromJSON` wrapping while still
279    // using field-path (`result$field`) accessors in assertions.
280    let result_is_r_list = if fixture.call.is_some() {
281        r_override.is_some_and(|o| o.result_is_r_list)
282    } else {
283        default_result_is_r_list
284    };
285
286    let test_name = sanitize_ident(&fixture.id);
287    let description = fixture.description.replace('"', "\\\"");
288
289    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
290
291    // Allow per-call R overrides to remap fixture argument names. Many calls
292    // (e.g. `extract_bytes`, `batch_extract_files`) use language-neutral
293    // fixture field names (`data`, `paths`) that the R extendr binding
294    // exposes under different identifiers (`content`, `items`).
295    let arg_name_map = r_override.map(|o| &o.arg_name_map);
296    let args_str = build_args_string(&fixture.input, &call_config.args, arg_name_map);
297
298    // Build visitor setup and args if present
299    let mut setup_lines = Vec::new();
300    let final_args = if let Some(visitor_spec) = &fixture.visitor {
301        build_r_visitor(&mut setup_lines, visitor_spec);
302        // Strip any `options = NULL` placeholder that build_args_string may have emitted
303        // for the optional options arg — we replace it with the visitor options list.
304        let base = args_str
305            .replace(", options = NULL", "")
306            .replace("options = NULL, ", "")
307            .replace("options = NULL", "");
308        let visitor_opts = "options = list(visitor = visitor)";
309        let trimmed = base.trim_matches([' ', ',']);
310        if trimmed.is_empty() {
311            visitor_opts.to_string()
312        } else {
313            format!("{trimmed}, {visitor_opts}")
314        }
315    } else {
316        args_str
317    };
318
319    if expects_error {
320        let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
321        for line in &setup_lines {
322            let _ = writeln!(out, "  {line}");
323        }
324        let _ = writeln!(out, "  expect_error({function_name}({final_args}))");
325        let _ = writeln!(out, "}})");
326        return;
327    }
328
329    let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
330    for line in &setup_lines {
331        let _ = writeln!(out, "  {line}");
332    }
333    // The extendr extraction wrappers return JSON strings carrying the
334    // serialized core result; parse into an R list so tests can use `$`
335    // accessors. `result_is_simple` calls (e.g. `convert_html_to_markdown`)
336    // already return scalar values and must be passed through verbatim.
337    // `result_is_r_list` signals the binding returns a native R list (Robj),
338    // not a JSON string — skip `jsonlite::fromJSON` but keep `$` accessors.
339    if result_is_simple || result_is_r_list {
340        let _ = writeln!(out, "  {result_var} <- {function_name}({final_args})");
341    } else {
342        let _ = writeln!(
343            out,
344            "  {result_var} <- jsonlite::fromJSON({function_name}({final_args}), simplifyVector = FALSE)"
345        );
346    }
347
348    for assertion in &fixture.assertions {
349        render_assertion(out, assertion, result_var, field_resolver, result_is_simple, e2e_config);
350    }
351
352    let _ = writeln!(out, "}})");
353}
354
355fn build_args_string(
356    input: &serde_json::Value,
357    args: &[crate::config::ArgMapping],
358    arg_name_map: Option<&std::collections::HashMap<String, String>>,
359) -> String {
360    if args.is_empty() {
361        // No declared args means the wrapper takes zero parameters; emitting
362        // `list()` here would trigger an `unused argument (list())` error in R.
363        // Likewise, fall through to nothing if the fixture's input is empty.
364        if matches!(input, serde_json::Value::Null) || input.as_object().is_some_and(|m| m.is_empty()) {
365            return String::new();
366        }
367        return json_to_r(input, true);
368    }
369
370    let parts: Vec<String> = args
371        .iter()
372        .filter_map(|arg| {
373            // Apply per-language argument renames before emitting the call.
374            let arg_name: &str = arg_name_map
375                .and_then(|m| m.get(&arg.name).map(String::as_str))
376                .unwrap_or(&arg.name);
377
378            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
379            let val = input.get(field);
380            // R extendr-generated wrappers do not preserve Option<T> defaults from
381            // the Rust signature — every parameter is positional and required at
382            // the R level. To keep generated calls valid we must pass a placeholder
383            // (`NULL` for `Option<T>`, `ExtractionConfig$default()` for typed
384            // configs) whenever the fixture omits an optional value.
385            let val = match val {
386                Some(v) if !(v.is_null() && arg.optional) => v,
387                _ => {
388                    if !arg.optional {
389                        return None;
390                    }
391                    if arg.arg_type == "json_object" {
392                        let r_value = r_default_for_config_arg(arg_name);
393                        return Some(format!("{arg_name} = {r_value}"));
394                    }
395                    return Some(format!("{arg_name} = NULL"));
396                }
397            };
398            // The extendr bindings expect owned PORs (ExternalPtr) for typed
399            // config arguments — passing an R `list()` raises
400            // `Expected ExternalPtr got List`. The fixtures don't carry the
401            // option fields needed to round-trip through ExtractionConfig$new,
402            // so emit `ExtractionConfig$default()` whenever a `json_object` arg
403            // resolves to an empty / object-shaped JSON value.
404            if arg.arg_type == "json_object" && (val.is_null() || val.as_object().is_some_and(|m| m.is_empty())) {
405                let r_value = r_default_for_config_arg(arg_name);
406                return Some(format!("{arg_name} = {r_value}"));
407            }
408            // Non-empty json_object for typed config args (those whose default is a
409            // `$default()` constructor): use `TypeName$from_json(jsonlite::toJSON(...))`
410            // so the Rust function receives a proper ExternalPtr, not a list.
411            // For `options`-style args (default = NULL) emit as a plain R list.
412            if arg.arg_type == "json_object" && val.is_object() {
413                let default_expr = r_default_for_config_arg(arg_name);
414                if default_expr.ends_with("$default()") {
415                    // Extract the type name from "TypeName$default()"
416                    let type_name = default_expr.trim_end_matches("$default()");
417                    let r_list = json_to_r(val, true);
418                    let r_value = format!("{type_name}$from_json(jsonlite::toJSON({r_list}, auto_unbox = TRUE))");
419                    return Some(format!("{arg_name} = {r_value}"));
420                }
421                let r_value = json_to_r(val, true);
422                return Some(format!("{arg_name} = {r_value}"));
423            }
424            // `json_object` arrays are passed to extendr functions whose Rust
425            // signature is `items: String` (JSON-serialized batch items). The
426            // wrapper has no R-list → JSON conversion, so we must serialize the
427            // fixture value to a literal JSON string at test-emit time.
428            if arg.arg_type == "json_object" && val.is_array() {
429                let json_literal = serde_json::to_string(val).unwrap_or_else(|_| "[]".to_string());
430                let escaped = escape_r(&json_literal);
431                return Some(format!("{arg_name} = \"{escaped}\""));
432            }
433            // `bytes` arg type: convert string fixture values into runtime
434            // `readBin(...)` calls so the wrapper receives raw bytes instead
435            // of an R character vector. This mirrors the Python emit_bytes_arg
436            // helper and is what the extendr binding for Vec<u8> expects.
437            if arg.arg_type == "bytes" {
438                if let Some(raw) = val.as_str() {
439                    let r_value = render_bytes_value(raw);
440                    return Some(format!("{arg_name} = {r_value}"));
441                }
442            }
443            // `file_path` arg type: fixtures encode relative paths that resolve
444            // against the repo's `test_documents/` directory. Using a runtime
445            // helper that anchors paths to that directory avoids fragility from
446            // testthat resetting the working directory between files.
447            if arg.arg_type == "file_path" {
448                if let Some(raw) = val.as_str() {
449                    if !raw.starts_with('/') && !raw.is_empty() {
450                        let escaped = escape_r(raw);
451                        return Some(format!("{arg_name} = .resolve_fixture(\"{escaped}\")"));
452                    }
453                }
454            }
455            Some(format!("{arg_name} = {}", json_to_r(val, true)))
456        })
457        .collect();
458
459    parts.join(", ")
460}
461
462/// Render a `bytes` fixture value as the R expression that produces a raw
463/// vector at test time. Mirrors python's `emit_bytes_arg` classifier so we can
464/// support both file-path style fixtures (`"pdf/fake_memo.pdf"`) and inline
465/// text payloads (`"<html>..."`). The resulting expression is dropped directly
466/// into the call site, e.g. `content = readBin(.resolve_fixture("pdf/fake_memo.pdf"), ...)`.
467fn render_bytes_value(raw: &str) -> String {
468    if raw.starts_with('<') || raw.starts_with('{') || raw.starts_with('[') || raw.contains(' ') {
469        // Inline text payload — encode to raw via charToRaw.
470        let escaped = escape_r(raw);
471        return format!("charToRaw(\"{escaped}\")");
472    }
473    let first = raw.chars().next().unwrap_or('\0');
474    if first.is_ascii_alphanumeric() || first == '_' {
475        if let Some(slash) = raw.find('/') {
476            if slash > 0 {
477                let after = &raw[slash + 1..];
478                if after.contains('.') && !after.is_empty() {
479                    let escaped = escape_r(raw);
480                    return format!(
481                        "readBin(.resolve_fixture(\"{escaped}\"), what = \"raw\", n = file.info(.resolve_fixture(\"{escaped}\"))$size)"
482                    );
483                }
484            }
485        }
486    }
487    // Default to inline text encoding — matches Python's InlineText branch.
488    let escaped = escape_r(raw);
489    format!("charToRaw(\"{escaped}\")")
490}
491
492/// Map the extractor argument name onto its R `*Config$default()` constructor.
493/// Falls back to `list()` for unknown names — the extendr binding will error
494/// with a clear message, which is preferable to silently passing a wrong type.
495fn r_default_for_config_arg(arg_name: &str) -> String {
496    match arg_name {
497        "config" => "ExtractionConfig$default()".to_string(),
498        "options" => "NULL".to_string(),
499        "html_output" => "HtmlOutputConfig$default()".to_string(),
500        "chunking" => "ChunkingConfig$default()".to_string(),
501        "ocr" => "OcrConfig$default()".to_string(),
502        "image" | "images" => "ImageExtractionConfig$default()".to_string(),
503        "language_detection" => "LanguageDetectionConfig$default()".to_string(),
504        _ => "list()".to_string(),
505    }
506}
507
508fn render_assertion(
509    out: &mut String,
510    assertion: &Assertion,
511    result_var: &str,
512    field_resolver: &FieldResolver,
513    result_is_simple: bool,
514    _e2e_config: &E2eConfig,
515) {
516    // Handle synthetic / derived fields before the is_valid_for_result check
517    // so they are never treated as struct attribute accesses on the result.
518    if let Some(f) = &assertion.field {
519        match f.as_str() {
520            "chunks_have_content" => {
521                let pred = format!("all(sapply({result_var}$chunks %||% list(), function(c) nchar(c$content) > 0))");
522                match assertion.assertion_type.as_str() {
523                    "is_true" => {
524                        let _ = writeln!(out, "  expect_true({pred})");
525                    }
526                    "is_false" => {
527                        let _ = writeln!(out, "  expect_false({pred})");
528                    }
529                    _ => {
530                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
531                    }
532                }
533                return;
534            }
535            "chunks_have_embeddings" => {
536                let pred = format!(
537                    "all(sapply({result_var}$chunks %||% list(), function(c) !is.null(c$embedding) && length(c$embedding) > 0))"
538                );
539                match assertion.assertion_type.as_str() {
540                    "is_true" => {
541                        let _ = writeln!(out, "  expect_true({pred})");
542                    }
543                    "is_false" => {
544                        let _ = writeln!(out, "  expect_false({pred})");
545                    }
546                    _ => {
547                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
548                    }
549                }
550                return;
551            }
552            // ---- EmbedResponse virtual fields ----
553            // embed_texts returns list of numeric vectors in R — no wrapper object.
554            // result_var is the embedding matrix; use it directly.
555            "embeddings" => {
556                match assertion.assertion_type.as_str() {
557                    "count_equals" => {
558                        if let Some(val) = &assertion.value {
559                            let r_val = json_to_r(val, false);
560                            let _ = writeln!(out, "  expect_equal(length({result_var}), {r_val})");
561                        }
562                    }
563                    "count_min" => {
564                        if let Some(val) = &assertion.value {
565                            let r_val = json_to_r(val, false);
566                            let _ = writeln!(out, "  expect_gte(length({result_var}), {r_val})");
567                        }
568                    }
569                    "not_empty" => {
570                        let _ = writeln!(out, "  expect_gt(length({result_var}), 0)");
571                    }
572                    "is_empty" => {
573                        let _ = writeln!(out, "  expect_equal(length({result_var}), 0)");
574                    }
575                    _ => {
576                        let _ = writeln!(
577                            out,
578                            "  # skipped: unsupported assertion type on synthetic field 'embeddings'"
579                        );
580                    }
581                }
582                return;
583            }
584            "embedding_dimensions" => {
585                let expr = format!("(if (length({result_var}) == 0) 0L else length({result_var}[[1]]))");
586                match assertion.assertion_type.as_str() {
587                    "equals" => {
588                        if let Some(val) = &assertion.value {
589                            let r_val = json_to_r(val, false);
590                            let _ = writeln!(out, "  expect_equal({expr}, {r_val})");
591                        }
592                    }
593                    "greater_than" => {
594                        if let Some(val) = &assertion.value {
595                            let r_val = json_to_r(val, false);
596                            let _ = writeln!(out, "  expect_gt({expr}, {r_val})");
597                        }
598                    }
599                    _ => {
600                        let _ = writeln!(
601                            out,
602                            "  # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
603                        );
604                    }
605                }
606                return;
607            }
608            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
609                let pred = match f.as_str() {
610                    "embeddings_valid" => {
611                        format!("all(sapply({result_var}, function(e) length(e) > 0))")
612                    }
613                    "embeddings_finite" => {
614                        format!("all(sapply({result_var}, function(e) all(is.finite(e))))")
615                    }
616                    "embeddings_non_zero" => {
617                        format!("all(sapply({result_var}, function(e) any(e != 0.0)))")
618                    }
619                    "embeddings_normalized" => {
620                        format!("all(sapply({result_var}, function(e) abs(sum(e * e) - 1.0) < 1e-3))")
621                    }
622                    _ => unreachable!(),
623                };
624                match assertion.assertion_type.as_str() {
625                    "is_true" => {
626                        let _ = writeln!(out, "  expect_true({pred})");
627                    }
628                    "is_false" => {
629                        let _ = writeln!(out, "  expect_false({pred})");
630                    }
631                    _ => {
632                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
633                    }
634                }
635                return;
636            }
637            // ---- keywords / keywords_count ----
638            // R ExtractionResult does not expose extracted_keywords; skip.
639            "keywords" | "keywords_count" => {
640                let _ = writeln!(out, "  # skipped: field '{f}' not available on R ExtractionResult");
641                return;
642            }
643            _ => {}
644        }
645    }
646
647    // Skip assertions on fields that don't exist on the result type.
648    if let Some(f) = &assertion.field {
649        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
650            let _ = writeln!(out, "  # skipped: field '{f}' not available on result type");
651            return;
652        }
653    }
654
655    // When result_is_simple, skip assertions that reference non-content fields
656    // (e.g., metadata, document, structure) since the binding returns a plain value.
657    if result_is_simple {
658        if let Some(f) = &assertion.field {
659            let f_lower = f.to_lowercase();
660            if !f.is_empty()
661                && f_lower != "content"
662                && (f_lower.starts_with("metadata")
663                    || f_lower.starts_with("document")
664                    || f_lower.starts_with("structure"))
665            {
666                let _ = writeln!(
667                    out,
668                    "  # skipped: result_is_simple for field '{f}' not available on result type"
669                );
670                return;
671            }
672        }
673    }
674
675    let field_expr = if result_is_simple {
676        result_var.to_string()
677    } else {
678        match &assertion.field {
679            Some(f) if !f.is_empty() => field_resolver.accessor(f, "r", result_var),
680            _ => result_var.to_string(),
681        }
682    };
683
684    match assertion.assertion_type.as_str() {
685        "equals" => {
686            if let Some(expected) = &assertion.value {
687                let r_val = json_to_r(expected, false);
688                let _ = writeln!(out, "  expect_equal(trimws({field_expr}), {r_val})");
689            }
690        }
691        "contains" => {
692            if let Some(expected) = &assertion.value {
693                let r_val = json_to_r(expected, false);
694                let _ = writeln!(out, "  expect_true(grepl({r_val}, {field_expr}, fixed = TRUE))");
695            }
696        }
697        "contains_all" => {
698            if let Some(values) = &assertion.values {
699                for val in values {
700                    let r_val = json_to_r(val, false);
701                    let _ = writeln!(out, "  expect_true(any(grepl({r_val}, {field_expr}, fixed = TRUE)))");
702                }
703            }
704        }
705        "not_contains" => {
706            if let Some(expected) = &assertion.value {
707                let r_val = json_to_r(expected, false);
708                let _ = writeln!(out, "  expect_false(grepl({r_val}, {field_expr}, fixed = TRUE))");
709            }
710        }
711        "not_empty" => {
712            let _ = writeln!(
713                out,
714                "  expect_true(if (is.character({field_expr})) nchar({field_expr}) > 0 else length({field_expr}) > 0)"
715            );
716        }
717        "is_empty" => {
718            let _ = writeln!(out, "  expect_equal({field_expr}, \"\")");
719        }
720        "contains_any" => {
721            if let Some(values) = &assertion.values {
722                let items: Vec<String> = values.iter().map(|v| json_to_r(v, false)).collect();
723                let vec_str = items.join(", ");
724                let _ = writeln!(
725                    out,
726                    "  expect_true(any(sapply(c({vec_str}), function(v) grepl(v, {field_expr}, fixed = TRUE))))"
727                );
728            }
729        }
730        "greater_than" => {
731            if let Some(val) = &assertion.value {
732                let r_val = json_to_r(val, false);
733                let _ = writeln!(out, "  expect_true({field_expr} > {r_val})");
734            }
735        }
736        "less_than" => {
737            if let Some(val) = &assertion.value {
738                let r_val = json_to_r(val, false);
739                let _ = writeln!(out, "  expect_true({field_expr} < {r_val})");
740            }
741        }
742        "greater_than_or_equal" => {
743            if let Some(val) = &assertion.value {
744                let r_val = json_to_r(val, false);
745                let _ = writeln!(out, "  expect_true({field_expr} >= {r_val})");
746            }
747        }
748        "less_than_or_equal" => {
749            if let Some(val) = &assertion.value {
750                let r_val = json_to_r(val, false);
751                let _ = writeln!(out, "  expect_true({field_expr} <= {r_val})");
752            }
753        }
754        "starts_with" => {
755            if let Some(expected) = &assertion.value {
756                let r_val = json_to_r(expected, false);
757                let _ = writeln!(out, "  expect_true(startsWith({field_expr}, {r_val}))");
758            }
759        }
760        "ends_with" => {
761            if let Some(expected) = &assertion.value {
762                let r_val = json_to_r(expected, false);
763                let _ = writeln!(out, "  expect_true(endsWith({field_expr}, {r_val}))");
764            }
765        }
766        "min_length" => {
767            if let Some(val) = &assertion.value {
768                if let Some(n) = val.as_u64() {
769                    let _ = writeln!(out, "  expect_true(nchar({field_expr}) >= {n})");
770                }
771            }
772        }
773        "max_length" => {
774            if let Some(val) = &assertion.value {
775                if let Some(n) = val.as_u64() {
776                    let _ = writeln!(out, "  expect_true(nchar({field_expr}) <= {n})");
777                }
778            }
779        }
780        "count_min" => {
781            if let Some(val) = &assertion.value {
782                if let Some(n) = val.as_u64() {
783                    let _ = writeln!(out, "  expect_true(length({field_expr}) >= {n})");
784                }
785            }
786        }
787        "count_equals" => {
788            if let Some(val) = &assertion.value {
789                if let Some(n) = val.as_u64() {
790                    let _ = writeln!(out, "  expect_equal(length({field_expr}), {n})");
791                }
792            }
793        }
794        "is_true" => {
795            let _ = writeln!(out, "  expect_true({field_expr})");
796        }
797        "is_false" => {
798            let _ = writeln!(out, "  expect_false({field_expr})");
799        }
800        "method_result" => {
801            if let Some(method_name) = &assertion.method {
802                let call_expr = build_r_method_call(result_var, method_name, assertion.args.as_ref());
803                let check = assertion.check.as_deref().unwrap_or("is_true");
804                match check {
805                    "equals" => {
806                        if let Some(val) = &assertion.value {
807                            if val.is_boolean() {
808                                if val.as_bool() == Some(true) {
809                                    let _ = writeln!(out, "  expect_true({call_expr})");
810                                } else {
811                                    let _ = writeln!(out, "  expect_false({call_expr})");
812                                }
813                            } else {
814                                let r_val = json_to_r(val, false);
815                                let _ = writeln!(out, "  expect_equal({call_expr}, {r_val})");
816                            }
817                        }
818                    }
819                    "is_true" => {
820                        let _ = writeln!(out, "  expect_true({call_expr})");
821                    }
822                    "is_false" => {
823                        let _ = writeln!(out, "  expect_false({call_expr})");
824                    }
825                    "greater_than_or_equal" => {
826                        if let Some(val) = &assertion.value {
827                            let r_val = json_to_r(val, false);
828                            let _ = writeln!(out, "  expect_true({call_expr} >= {r_val})");
829                        }
830                    }
831                    "count_min" => {
832                        if let Some(val) = &assertion.value {
833                            let n = val.as_u64().unwrap_or(0);
834                            let _ = writeln!(out, "  expect_true(length({call_expr}) >= {n})");
835                        }
836                    }
837                    "is_error" => {
838                        let _ = writeln!(out, "  expect_error({call_expr})");
839                    }
840                    "contains" => {
841                        if let Some(val) = &assertion.value {
842                            let r_val = json_to_r(val, false);
843                            let _ = writeln!(out, "  expect_true(grepl({r_val}, {call_expr}, fixed = TRUE))");
844                        }
845                    }
846                    other_check => {
847                        panic!("R e2e generator: unsupported method_result check type: {other_check}");
848                    }
849                }
850            } else {
851                panic!("R e2e generator: method_result assertion missing 'method' field");
852            }
853        }
854        "matches_regex" => {
855            if let Some(expected) = &assertion.value {
856                let r_val = json_to_r(expected, false);
857                let _ = writeln!(out, "  expect_true(grepl({r_val}, {field_expr}))");
858            }
859        }
860        "not_error" => {
861            // The call itself stops the test on error; emit an explicit
862            // `expect_true(TRUE)` so testthat doesn't report the test as
863            // empty when this is the only assertion.
864            let _ = writeln!(out, "  expect_true(TRUE)");
865        }
866        "error" => {
867            // Handled at the test level.
868        }
869        other => {
870            panic!("R e2e generator: unsupported assertion type: {other}");
871        }
872    }
873}
874
875/// Convert a `serde_json::Value` to an R literal string.
876///
877/// # Arguments
878///
879/// * `value` - The JSON value to convert
880///
881/// Convert a PascalCase string to snake_case.
882/// e.g. "DoubleEqual" → "double_equal", "Backticks" → "backticks"
883fn pascal_to_snake_case(s: &str) -> String {
884    let mut result = String::with_capacity(s.len() + 4);
885    for (i, ch) in s.chars().enumerate() {
886        if ch.is_uppercase() && i > 0 {
887            result.push('_');
888        }
889        for lc in ch.to_lowercase() {
890            result.push(lc);
891        }
892    }
893    result
894}
895
896/// * `lowercase_enum_values` - If true, convert PascalCase strings to snake_case (for enum values).
897///   If false, preserve original case (for assertion expected values).
898fn json_to_r(value: &serde_json::Value, lowercase_enum_values: bool) -> String {
899    match value {
900        serde_json::Value::String(s) => {
901            // Convert PascalCase enum values to snake_case only if requested.
902            // e.g. "Backticks" → "backticks", "DoubleEqual" → "double_equal"
903            let normalized = if lowercase_enum_values && s.chars().next().is_some_and(|c| c.is_uppercase()) {
904                pascal_to_snake_case(s)
905            } else {
906                s.clone()
907            };
908            format!("\"{}\"", escape_r(&normalized))
909        }
910        serde_json::Value::Bool(true) => "TRUE".to_string(),
911        serde_json::Value::Bool(false) => "FALSE".to_string(),
912        serde_json::Value::Number(n) => n.to_string(),
913        serde_json::Value::Null => "NULL".to_string(),
914        serde_json::Value::Array(arr) => {
915            let items: Vec<String> = arr.iter().map(|v| json_to_r(v, lowercase_enum_values)).collect();
916            format!("c({})", items.join(", "))
917        }
918        serde_json::Value::Object(map) => {
919            let entries: Vec<String> = map
920                .iter()
921                .map(|(k, v)| format!("\"{}\" = {}", escape_r(k), json_to_r(v, lowercase_enum_values)))
922                .collect();
923            format!("list({})", entries.join(", "))
924        }
925    }
926}
927
928/// Build an R visitor list and add setup line.
929fn build_r_visitor(setup_lines: &mut Vec<String>, visitor_spec: &crate::fixture::VisitorSpec) {
930    use std::fmt::Write as FmtWrite;
931    // Collect each callback as a separate string, then join with ",\n" to avoid
932    // trailing commas — R's list() does not accept a trailing comma.
933    let methods: Vec<String> = visitor_spec
934        .callbacks
935        .iter()
936        .map(|(method_name, action)| {
937            let mut buf = String::new();
938            emit_r_visitor_method(&mut buf, method_name, action);
939            // strip the trailing ",\n" added by emit_r_visitor_method
940            buf.trim_end_matches(['\n', ',']).to_string()
941        })
942        .collect();
943    let mut visitor_obj = String::new();
944    let _ = writeln!(visitor_obj, "list(");
945    let _ = write!(visitor_obj, "{}", methods.join(",\n"));
946    let _ = writeln!(visitor_obj);
947    let _ = writeln!(visitor_obj, "  )");
948
949    setup_lines.push(format!("visitor <- {visitor_obj}"));
950}
951
952/// Build an R call expression for a `method_result` assertion.
953/// Maps method names to the appropriate R function or method calls.
954fn build_r_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
955    match method_name {
956        "root_child_count" => format!("{result_var}$root_child_count()"),
957        "root_node_type" => format!("{result_var}$root_node_type()"),
958        "named_children_count" => format!("{result_var}$named_children_count()"),
959        "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
960        "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
961        "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
962        "contains_node_type" => {
963            let node_type = args
964                .and_then(|a| a.get("node_type"))
965                .and_then(|v| v.as_str())
966                .unwrap_or("");
967            format!("tree_contains_node_type({result_var}, \"{node_type}\")")
968        }
969        "find_nodes_by_type" => {
970            let node_type = args
971                .and_then(|a| a.get("node_type"))
972                .and_then(|v| v.as_str())
973                .unwrap_or("");
974            format!("find_nodes_by_type({result_var}, \"{node_type}\")")
975        }
976        "run_query" => {
977            let query_source = args
978                .and_then(|a| a.get("query_source"))
979                .and_then(|v| v.as_str())
980                .unwrap_or("");
981            let language = args
982                .and_then(|a| a.get("language"))
983                .and_then(|v| v.as_str())
984                .unwrap_or("");
985            format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
986        }
987        _ => {
988            if let Some(args_val) = args {
989                let arg_str = args_val
990                    .as_object()
991                    .map(|obj| {
992                        obj.iter()
993                            .map(|(k, v)| {
994                                let r_val = json_to_r(v, false);
995                                format!("{k} = {r_val}")
996                            })
997                            .collect::<Vec<_>>()
998                            .join(", ")
999                    })
1000                    .unwrap_or_default();
1001                format!("{result_var}${method_name}({arg_str})")
1002            } else {
1003                format!("{result_var}${method_name}()")
1004            }
1005        }
1006    }
1007}
1008
1009/// Emit an R visitor method for a callback action.
1010fn emit_r_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
1011    use std::fmt::Write as FmtWrite;
1012
1013    // R uses visit_ prefix (matches binding signature)
1014    let params = match method_name {
1015        "visit_link" => "ctx, href, text, title",
1016        "visit_image" => "ctx, src, alt, title",
1017        "visit_heading" => "ctx, level, text, id",
1018        "visit_code_block" => "ctx, lang, code",
1019        "visit_code_inline"
1020        | "visit_strong"
1021        | "visit_emphasis"
1022        | "visit_strikethrough"
1023        | "visit_underline"
1024        | "visit_subscript"
1025        | "visit_superscript"
1026        | "visit_mark"
1027        | "visit_button"
1028        | "visit_summary"
1029        | "visit_figcaption"
1030        | "visit_definition_term"
1031        | "visit_definition_description" => "ctx, text",
1032        "visit_text" => "ctx, text",
1033        "visit_list_item" => "ctx, ordered, marker, text",
1034        "visit_blockquote" => "ctx, content, depth",
1035        "visit_table_row" => "ctx, cells, is_header",
1036        "visit_custom_element" => "ctx, tag_name, html",
1037        "visit_form" => "ctx, action_url, method",
1038        "visit_input" => "ctx, input_type, name, value",
1039        "visit_audio" | "visit_video" | "visit_iframe" => "ctx, src",
1040        "visit_details" => "ctx, open",
1041        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => "ctx, output",
1042        "visit_list_start" => "ctx, ordered",
1043        "visit_list_end" => "ctx, ordered, output",
1044        _ => "ctx",
1045    };
1046
1047    let _ = writeln!(out, "    {method_name} = function({params}) {{");
1048    match action {
1049        CallbackAction::Skip => {
1050            let _ = writeln!(out, "      \"skip\"");
1051        }
1052        CallbackAction::Continue => {
1053            let _ = writeln!(out, "      \"continue\"");
1054        }
1055        CallbackAction::PreserveHtml => {
1056            let _ = writeln!(out, "      \"preserve_html\"");
1057        }
1058        CallbackAction::Custom { output } => {
1059            let escaped = escape_r(output);
1060            let _ = writeln!(out, "      list(custom = \"{escaped}\")");
1061        }
1062        CallbackAction::CustomTemplate { template } => {
1063            let r_expr = r_template_to_paste0(template);
1064            let _ = writeln!(out, "      list(custom = {r_expr})");
1065        }
1066    }
1067    let _ = writeln!(out, "    }},");
1068}