Skip to main content

alef_e2e/codegen/
r.rs

1//! R e2e test generator using testthat.
2
3use crate::config::E2eConfig;
4use crate::escape::{escape_r, r_template_to_paste0, sanitize_filename, sanitize_ident};
5use crate::field_access::FieldResolver;
6use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, TemplateReturnForm};
7use alef_core::backend::GeneratedFile;
8use alef_core::config::ResolvedCrateConfig;
9use alef_core::hash::{self, CommentStyle};
10use anyhow::Result;
11use std::fmt::Write as FmtWrite;
12use std::path::PathBuf;
13
14use super::E2eCodegen;
15
16/// R e2e code generator.
17pub struct RCodegen;
18
19impl E2eCodegen for RCodegen {
20    fn generate(
21        &self,
22        groups: &[FixtureGroup],
23        e2e_config: &E2eConfig,
24        config: &ResolvedCrateConfig,
25        _type_defs: &[alef_core::ir::TypeDef],
26        _enums: &[alef_core::ir::EnumDef],
27    ) -> Result<Vec<GeneratedFile>> {
28        let lang = self.language_name();
29        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
30
31        let mut files = Vec::new();
32
33        // Resolve call config with overrides.
34        let call = &e2e_config.call;
35        let overrides = call.overrides.get(lang);
36        let module_path = overrides
37            .and_then(|o| o.module.as_ref())
38            .cloned()
39            .unwrap_or_else(|| call.module.clone());
40        let _function_name = overrides
41            .and_then(|o| o.function.as_ref())
42            .cloned()
43            .unwrap_or_else(|| call.function.clone());
44        let result_is_simple = call.result_is_simple || overrides.is_some_and(|o| o.result_is_simple);
45        let result_is_r_list = overrides.is_some_and(|o| o.result_is_r_list);
46        let _result_var = &call.result_var;
47
48        // Resolve package config.
49        let r_pkg = e2e_config.resolve_package("r");
50        let pkg_name = r_pkg
51            .as_ref()
52            .and_then(|p| p.name.as_ref())
53            .cloned()
54            .unwrap_or_else(|| module_path.clone());
55        let pkg_path = r_pkg
56            .as_ref()
57            .and_then(|p| p.path.as_ref())
58            .cloned()
59            .unwrap_or_else(|| "../../packages/r".to_string());
60        let pkg_version = r_pkg
61            .as_ref()
62            .and_then(|p| p.version.as_ref())
63            .cloned()
64            .or_else(|| config.resolved_version())
65            .unwrap_or_else(|| "0.1.0".to_string());
66
67        // Generate DESCRIPTION file.
68        files.push(GeneratedFile {
69            path: output_base.join("DESCRIPTION"),
70            content: render_description(&pkg_name, &pkg_version, e2e_config.dep_mode),
71            generated_header: false,
72        });
73
74        // Generate test runner script.
75        files.push(GeneratedFile {
76            path: output_base.join("run_tests.R"),
77            content: render_test_runner(&pkg_path, e2e_config.dep_mode),
78            generated_header: true,
79        });
80
81        // setup-fixtures.R — testthat sources `setup-*.R` files in the tests
82        // directory once before any tests run, with the working directory set
83        // to the tests/ folder. We use this hook to chdir into the repo's
84        // shared `test_documents/` directory so that fixture paths like
85        // `pdf/fake_memo.pdf` resolve at extraction time.
86        files.push(GeneratedFile {
87            path: output_base.join("tests").join("setup-fixtures.R"),
88            content: render_setup_fixtures(&e2e_config.test_documents_relative_from(1)),
89            generated_header: true,
90        });
91
92        // Generate test files per category.
93        for group in groups {
94            let active: Vec<&Fixture> = group
95                .fixtures
96                .iter()
97                .filter(|f| super::should_include_fixture(f, lang, e2e_config))
98                .collect();
99
100            if active.is_empty() {
101                continue;
102            }
103
104            let filename = format!("test_{}.R", sanitize_filename(&group.category));
105            let content = render_test_file(&group.category, &active, result_is_simple, result_is_r_list, e2e_config);
106            files.push(GeneratedFile {
107                path: output_base.join("tests").join(filename),
108                content,
109                generated_header: true,
110            });
111        }
112
113        Ok(files)
114    }
115
116    fn language_name(&self) -> &'static str {
117        "r"
118    }
119}
120
121fn render_description(pkg_name: &str, pkg_version: &str, dep_mode: crate::config::DependencyMode) -> String {
122    let dep_line = match dep_mode {
123        crate::config::DependencyMode::Registry => {
124            format!("Imports: {pkg_name} ({pkg_version})\n")
125        }
126        crate::config::DependencyMode::Local => String::new(),
127    };
128    format!(
129        r#"Package: e2e.r
130Title: E2E Tests for {pkg_name}
131Version: 0.1.0
132Description: End-to-end test suite.
133{dep_line}Suggests: testthat (>= 3.0.0)
134Config/testthat/edition: 3
135"#
136    )
137}
138
139fn render_setup_fixtures(test_documents_path: &str) -> String {
140    let mut out = String::new();
141    out.push_str(&hash::header(CommentStyle::Hash));
142    let _ = writeln!(out);
143    let _ = writeln!(
144        out,
145        "# Resolve fixture paths against the repo's `test_documents/` directory."
146    );
147    let _ = writeln!(
148        out,
149        "# testthat sources setup-*.R with the working directory at tests/,"
150    );
151    let _ = writeln!(
152        out,
153        "# so test_documents lives three directories up: tests/ -> e2e/r/ -> e2e/ -> repo root."
154    );
155    let _ = writeln!(
156        out,
157        "# Each `test_that()` block has its working directory reset back to tests/, so"
158    );
159    let _ = writeln!(
160        out,
161        "# fixture lookups must be performed via this helper rather than relying on `setwd`."
162    );
163    let _ = writeln!(
164        out,
165        ".alef_test_documents <- normalizePath(\"{test_documents_path}\", mustWork = FALSE)"
166    );
167    let _ = writeln!(out, ".resolve_fixture <- function(path) {{");
168    let _ = writeln!(out, "  if (dir.exists(.alef_test_documents)) {{");
169    let _ = writeln!(out, "    file.path(.alef_test_documents, path)");
170    let _ = writeln!(out, "  }} else {{");
171    let _ = writeln!(out, "    path");
172    let _ = writeln!(out, "  }}");
173    let _ = writeln!(out, "}}");
174    let _ = writeln!(out);
175    // FormatMetadata is an internally-tagged enum (serde tag = "format_type")
176    // so the JSON shape varies. `simplifyVector = FALSE` hands us a per-variant
177    // list — keyed by the snake_case variant name (`image`, `excel`, ...) — that
178    // points at the inner metadata struct, with all other variants set to NULL.
179    // Collapse both shapes here so terminal `metadata$format` assertions see
180    // the human-readable format string (e.g. "PNG") instead of the wrapper list.
181    let _ = writeln!(
182        out,
183        ".alef_format_value <- function(x) {{
184  if (is.list(x)) {{
185    for (variant in names(x)) {{
186      v <- x[[variant]]
187      if (is.list(v) && !is.null(v[[\"format\"]]) && is.character(v[[\"format\"]])) {{
188        return(v[[\"format\"]])
189      }}
190    }}
191    if (!is.null(x[[\"format\"]]) && is.character(x[[\"format\"]])) {{
192      return(x[[\"format\"]])
193    }}
194    if (!is.null(x[[\"format_type\"]])) {{
195      return(x[[\"format_type\"]])
196    }}
197  }}
198  x
199}}"
200    );
201    out
202}
203
204fn render_test_runner(pkg_path: &str, dep_mode: crate::config::DependencyMode) -> String {
205    let mut out = String::new();
206    out.push_str(&hash::header(CommentStyle::Hash));
207    let _ = writeln!(out, "library(testthat)");
208    match dep_mode {
209        crate::config::DependencyMode::Registry => {
210            // In registry mode, require the installed CRAN package directly.
211            let _ = writeln!(out, "# Package loaded via library() from CRAN install.");
212        }
213        crate::config::DependencyMode::Local => {
214            // Use devtools::load_all() to load the local R package without requiring
215            // a full install, matching the e2e test runner convention.
216            let _ = writeln!(out, "devtools::load_all(\"{pkg_path}\")");
217        }
218    }
219    let _ = writeln!(out);
220    // Surface every failure rather than aborting at the default max_fails=10 —
221    // partial pass counts are essential for triage during e2e bring-up.
222    let _ = writeln!(out, "testthat::set_max_fails(Inf)");
223    // Resolve the tests/ directory relative to this script. testthat reads
224    // setup-*.R from there before each file runs, where path resolution
225    // against test_documents/ is handled by the `.resolve_fixture` helper.
226    let _ = writeln!(
227        out,
228        ".script_dir <- tryCatch(dirname(normalizePath(sys.frame(1)$ofile)), error = function(e) getwd())"
229    );
230    let _ = writeln!(out, "test_dir(file.path(.script_dir, \"tests\"))");
231    out
232}
233
234fn render_test_file(
235    category: &str,
236    fixtures: &[&Fixture],
237    result_is_simple: bool,
238    result_is_r_list: bool,
239    e2e_config: &E2eConfig,
240) -> String {
241    let mut out = String::new();
242    out.push_str(&hash::header(CommentStyle::Hash));
243    let _ = writeln!(out, "# E2e tests for category: {category}");
244    let _ = writeln!(out);
245
246    for (i, fixture) in fixtures.iter().enumerate() {
247        render_test_case(&mut out, fixture, e2e_config, result_is_simple, result_is_r_list);
248        if i + 1 < fixtures.len() {
249            let _ = writeln!(out);
250        }
251    }
252
253    // Clean up trailing newlines.
254    while out.ends_with("\n\n") {
255        out.pop();
256    }
257    if !out.ends_with('\n') {
258        out.push('\n');
259    }
260    out
261}
262
263fn render_test_case(
264    out: &mut String,
265    fixture: &Fixture,
266    e2e_config: &E2eConfig,
267    default_result_is_simple: bool,
268    default_result_is_r_list: bool,
269) {
270    let call_config = e2e_config.resolve_call_for_fixture(
271        fixture.call.as_deref(),
272        &fixture.id,
273        &fixture.resolved_category(),
274        &fixture.tags,
275        &fixture.input,
276    );
277    let call_field_resolver = FieldResolver::new(
278        e2e_config.effective_fields(call_config),
279        e2e_config.effective_fields_optional(call_config),
280        e2e_config.effective_result_fields(call_config),
281        e2e_config.effective_fields_array(call_config),
282        &std::collections::HashSet::new(),
283    );
284    let field_resolver = &call_field_resolver;
285    // Resolve `function` via the R override when present. The default
286    // `call_config.function` can be empty (e.g. trait-bridge calls like
287    // `clear_document_extractors` set `function = ""` at the top level and
288    // expose the real binding name only through per-language overrides);
289    // emitting it verbatim produces invalid `result <- ()` calls.
290    let function_name = call_config
291        .overrides
292        .get("r")
293        .and_then(|o| o.function.as_ref())
294        .cloned()
295        .unwrap_or_else(|| call_config.function.clone());
296    let result_var = &call_config.result_var;
297    // Per-fixture call configs (e.g. `list_document_extractors`) may set
298    // `result_is_simple = true` even when the default `[e2e.call]` does not.
299    // Without this lookup the registry/detection wrappers (which return scalar
300    // strings or character vectors directly) get wrapped in
301    // `jsonlite::fromJSON(...)` and the parser fails on non-JSON output.
302    let r_override = call_config.overrides.get("r");
303    let result_is_simple = if fixture.call.is_some() {
304        call_config.result_is_simple || r_override.is_some_and(|o| o.result_is_simple)
305    } else {
306        default_result_is_simple
307    };
308    // Per-fixture override: when the R binding already returns a native R list
309    // (not a JSON string), suppress `jsonlite::fromJSON` wrapping while still
310    // using field-path (`result$field`) accessors in assertions.
311    let result_is_r_list = if fixture.call.is_some() {
312        r_override.is_some_and(|o| o.result_is_r_list)
313    } else {
314        default_result_is_r_list
315    };
316
317    let test_name = sanitize_ident(&fixture.id);
318    let description = fixture.description.replace('"', "\\\"");
319
320    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
321
322    // Allow per-call R overrides to remap fixture argument names. Many calls
323    // (e.g. `extract_bytes`, `batch_extract_files`) use language-neutral
324    // fixture field names (`data`, `paths`) that the R extendr binding
325    // exposes under different identifiers (`content`, `items`).
326    let arg_name_map = r_override.map(|o| &o.arg_name_map);
327    // Resolve `options_type` for typed config args. When set (e.g. via the
328    // C#/Java override that pins the `config` arg of `embed_texts` to
329    // `EmbeddingConfig`), we use it instead of the heuristic in
330    // `r_default_for_config_arg` so the extendr binding receives the right
331    // ExternalPtr type rather than a default `ExtractionConfig`.
332    let options_type = r_override.and_then(|o| o.options_type.as_deref()).or_else(|| {
333        // Fall back to any other language's override that pins the type —
334        // R doesn't define its own override list yet for most embed calls,
335        // and the underlying Rust signature is the same regardless of
336        // binding, so reusing csharp/java/go/php options_type is safe.
337        //
338        // Skip `Js`-prefixed types from the Node/wasm bindings: those are
339        // NAPI/wasm-bindgen specific wrapper types, while extendr exposes the
340        // bare Rust type names (e.g. `ExtractionConfig`, not `JsExtractionConfig`).
341        call_config
342            .overrides
343            .values()
344            .filter_map(|o| o.options_type.as_deref())
345            .find(|name| !name.starts_with("Js"))
346    });
347    let args_str = build_args_string(&fixture.input, &call_config.args, arg_name_map, options_type);
348
349    // Per-call R extra_args: positional trailing arguments appended verbatim.
350    // Used when the extendr wrapper has more parameters than the fixture
351    // declares (e.g. `render_pdf_page_to_png(pdf_bytes, page_index, dpi,
352    // password)` where `dpi`/`password` are optional in Rust but extendr
353    // surfaces them as required R parameters with no defaults).
354    let r_extra_args: Vec<String> = r_override.map(|o| o.extra_args.clone()).unwrap_or_default();
355    let args_with_extra = if r_extra_args.is_empty() {
356        args_str
357    } else {
358        let extra = r_extra_args.join(", ");
359        if args_str.is_empty() {
360            extra
361        } else {
362            format!("{args_str}, {extra}")
363        }
364    };
365
366    // Build visitor setup and args if present
367    let mut setup_lines = Vec::new();
368    let final_args = if let Some(visitor_spec) = &fixture.visitor {
369        build_r_visitor(&mut setup_lines, visitor_spec);
370        // R rejects duplicated named arguments ("matched by multiple actual arguments"), so
371        // strip any existing `options = ...` arg before appending the visitor-options list.
372        // Handles `options = NULL` (when no default) and `options = ConversionOptions$default()`
373        // (when build_args_string emits a default placeholder for an optional options arg).
374        let base = strip_options_arg(&args_with_extra);
375        let visitor_opts = "options = list(visitor = visitor)";
376        let trimmed = base.trim_matches([' ', ',']);
377        if trimmed.is_empty() {
378            visitor_opts.to_string()
379        } else {
380            format!("{trimmed}, {visitor_opts}")
381        }
382    } else {
383        args_with_extra
384    };
385
386    if expects_error {
387        let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
388        for line in &setup_lines {
389            let _ = writeln!(out, "  {line}");
390        }
391        let _ = writeln!(out, "  expect_error({function_name}({final_args}))");
392        let _ = writeln!(out, "}})");
393        return;
394    }
395
396    let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
397    for line in &setup_lines {
398        let _ = writeln!(out, "  {line}");
399    }
400    // The extendr extraction wrappers return JSON strings carrying the
401    // serialized core result; parse into an R list so tests can use `$`
402    // accessors. `result_is_simple` calls (e.g. `convert_html_to_markdown`)
403    // already return scalar values and must be passed through verbatim.
404    // `result_is_r_list` signals the binding returns a native R list (Robj),
405    // not a JSON string — skip `jsonlite::fromJSON` but keep `$` accessors.
406    // `returns_void` calls (trait-bridge `clear_*` wrappers that return `()`
407    // in Rust → `NULL` in R) must not bind a `result` variable: the previous
408    // emission of `result <- {function_name}(...)` was already correct when
409    // `function_name` resolved, but parsers flag a stray `result` for void
410    // calls. Use `invisible(...)` to make the void contract explicit.
411    if call_config.returns_void {
412        let _ = writeln!(out, "  invisible({function_name}({final_args}))");
413    } else if result_is_simple || result_is_r_list {
414        let _ = writeln!(out, "  {result_var} <- {function_name}({final_args})");
415    } else {
416        let _ = writeln!(
417            out,
418            "  {result_var} <- jsonlite::fromJSON({function_name}({final_args}), simplifyVector = FALSE)"
419        );
420    }
421
422    let result_is_bytes = call_config.result_is_bytes || r_override.is_some_and(|o| o.result_is_bytes);
423    // Resolve assert_enum_fields from the R-language override so the assertion renderer
424    // can identify fields that require the `.alef_format_value` wrapper rather than
425    // matching against the literal field path "metadata.format".
426    static EMPTY_ASSERT_ENUM_FIELDS: std::sync::LazyLock<std::collections::HashMap<String, String>> =
427        std::sync::LazyLock::new(std::collections::HashMap::new);
428    let assert_enum_fields = r_override
429        .map(|o| &o.assert_enum_fields)
430        .unwrap_or(&EMPTY_ASSERT_ENUM_FIELDS);
431    for assertion in &fixture.assertions {
432        let context = RAssertionContext {
433            field_resolver,
434            result_is_simple,
435            result_is_bytes,
436            assert_enum_fields,
437        };
438        render_assertion(out, assertion, result_var, &context);
439    }
440
441    let _ = writeln!(out, "}})");
442}
443
444/// Remove the named `options = …` argument (if any) from an R call-args string.
445///
446/// Walks the string while tracking paren/quote depth so a comma inside a nested
447/// expression like `options = list(visitor = visitor)` isn't treated as the
448/// arg terminator. Returns the rebuilt args string with the `options =` arg
449/// dropped; callers append a fresh one.
450fn strip_options_arg(args_str: &str) -> String {
451    let mut parts: Vec<String> = Vec::new();
452    let mut current = String::new();
453    let mut paren_depth: i32 = 0;
454    let mut in_single = false;
455    let mut in_double = false;
456    for c in args_str.chars() {
457        if !in_single && !in_double {
458            match c {
459                '(' | '[' | '{' => paren_depth += 1,
460                ')' | ']' | '}' => paren_depth -= 1,
461                '\'' => in_single = true,
462                '"' => in_double = true,
463                ',' if paren_depth == 0 => {
464                    parts.push(current.trim().to_string());
465                    current.clear();
466                    continue;
467                }
468                _ => {}
469            }
470        } else if in_single && c == '\'' {
471            in_single = false;
472        } else if in_double && c == '"' {
473            in_double = false;
474        }
475        current.push(c);
476    }
477    if !current.trim().is_empty() {
478        parts.push(current.trim().to_string());
479    }
480    parts
481        .into_iter()
482        .filter(|p| !p.starts_with("options ") && !p.starts_with("options="))
483        .collect::<Vec<_>>()
484        .join(", ")
485}
486
487fn build_args_string(
488    input: &serde_json::Value,
489    args: &[crate::config::ArgMapping],
490    arg_name_map: Option<&std::collections::HashMap<String, String>>,
491    options_type: Option<&str>,
492) -> String {
493    if args.is_empty() {
494        // No declared args means the wrapper takes zero parameters. Always
495        // emit an empty arg list — fixtures may carry harness metadata under
496        // `input` (e.g. `setup.lazy_init_required` for Go's eager-init shim)
497        // that must not leak into the R call site as a positional `list(...)`.
498        return String::new();
499    }
500
501    let parts: Vec<String> = args
502        .iter()
503        .filter_map(|arg| {
504            // Apply per-language argument renames before emitting the call.
505            let arg_name: &str = arg_name_map
506                .and_then(|m| m.get(&arg.name).map(String::as_str))
507                .unwrap_or(&arg.name);
508
509            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
510            let val = input.get(field);
511            // R extendr-generated wrappers do not preserve Option<T> defaults from
512            // the Rust signature — every parameter is positional and required at
513            // the R level. To keep generated calls valid we must pass a placeholder
514            // (`NULL` for `Option<T>`, `ExtractionConfig$default()` for typed
515            // configs) whenever the fixture omits an optional value.
516            let val = match val {
517                Some(v) if !(v.is_null() && arg.optional) => v,
518                _ => {
519                    if !arg.optional {
520                        return None;
521                    }
522                    if arg.arg_type == "json_object" {
523                        let r_value = r_default_for_config_arg(arg_name, options_type);
524                        return Some(format!("{arg_name} = {r_value}"));
525                    }
526                    return Some(format!("{arg_name} = NULL"));
527                }
528            };
529            // The extendr bindings expect owned PORs (ExternalPtr) for typed
530            // config arguments — passing an R `list()` raises
531            // `Expected ExternalPtr got List`. The fixtures don't carry the
532            // option fields needed to round-trip through ExtractionConfig$new,
533            // so emit `ExtractionConfig$default()` whenever a `json_object` arg
534            // resolves to an empty / object-shaped JSON value.
535            if arg.arg_type == "json_object" && (val.is_null() || val.as_object().is_some_and(|m| m.is_empty())) {
536                let r_value = r_default_for_config_arg(arg_name, options_type);
537                return Some(format!("{arg_name} = {r_value}"));
538            }
539            // Non-empty json_object for typed config args (those whose default is a
540            // `$default()` constructor): use `TypeName$from_json(jsonlite::toJSON(...))`
541            // so the Rust function receives a proper ExternalPtr, not a list.
542            // For `options`-style args (default = NULL) emit as a plain R list.
543            if arg.arg_type == "json_object" && val.is_object() {
544                let default_expr = r_default_for_config_arg(arg_name, options_type);
545                if default_expr.ends_with("$default()") {
546                    // Extract the type name from "TypeName$default()"
547                    let type_name = default_expr.trim_end_matches("$default()");
548                    // Use the `I(...)` (AsIs) wrapper for array-valued fields so
549                    // `jsonlite::toJSON(..., auto_unbox = TRUE)` preserves them as
550                    // JSON arrays. Without this, single-element vectors get
551                    // unboxed to scalars (e.g. `c("foo")` → `"foo"`) and serde
552                    // rejects them when deserializing `Vec<T>` fields.
553                    let r_list = json_to_r_preserve_arrays(val, true);
554                    let r_value = format!("{type_name}$from_json(jsonlite::toJSON({r_list}, auto_unbox = TRUE))");
555                    return Some(format!("{arg_name} = {r_value}"));
556                }
557                let r_value = json_to_r(val, true);
558                return Some(format!("{arg_name} = {r_value}"));
559            }
560            // `json_object` arrays are passed to extendr functions whose Rust
561            // signature is `items: String` (JSON-serialized batch items). The
562            // wrapper has no R-list → JSON conversion, so we must serialize the
563            // fixture value to a literal JSON string at test-emit time.
564            //
565            // Exception: when `element_type = "String"` the Rust signature is
566            // `Vec<String>` (e.g. `embed_texts(texts: Vec<String>, ...)`), which
567            // extendr binds as a native R character vector. Passing a JSON
568            // literal there would land as a single-element character vector
569            // containing the literal bytes `["a","b"]`, which is not what the
570            // caller intended. Emit a plain `c("a","b")` literal instead.
571            if arg.arg_type == "json_object" && val.is_array() {
572                if arg.element_type.as_deref() == Some("String") {
573                    // `c()` is `NULL` in R, which extendr rejects with
574                    // `Expected Strings got Null` when the Rust signature is
575                    // `Vec<String>`. Emit a typed empty char vector for the
576                    // empty-input case so the binding sees `character(0)`.
577                    let r_value = if val.as_array().is_some_and(|arr| arr.is_empty()) {
578                        "character(0)".to_string()
579                    } else {
580                        json_to_r(val, false)
581                    };
582                    return Some(format!("{arg_name} = {r_value}"));
583                }
584                let json_literal = serde_json::to_string(val).unwrap_or_else(|_| "[]".to_string());
585                let escaped = escape_r(&json_literal);
586                return Some(format!("{arg_name} = \"{escaped}\""));
587            }
588            // `bytes` arg type: convert string fixture values into runtime
589            // `readBin(...)` calls so the wrapper receives raw bytes instead
590            // of an R character vector. This mirrors the Python emit_bytes_arg
591            // helper and is what the extendr binding for Vec<u8> expects.
592            if arg.arg_type == "bytes" {
593                if let Some(raw) = val.as_str() {
594                    let r_value = render_bytes_value(raw);
595                    return Some(format!("{arg_name} = {r_value}"));
596                }
597            }
598            // `file_path` arg type: fixtures encode relative paths that resolve
599            // against the repo's `test_documents/` directory. Using a runtime
600            // helper that anchors paths to that directory avoids fragility from
601            // testthat resetting the working directory between files.
602            if arg.arg_type == "file_path" {
603                if let Some(raw) = val.as_str() {
604                    if !raw.starts_with('/') && !raw.is_empty() {
605                        let escaped = escape_r(raw);
606                        return Some(format!("{arg_name} = .resolve_fixture(\"{escaped}\")"));
607                    }
608                }
609            }
610            Some(format!("{arg_name} = {}", json_to_r(val, true)))
611        })
612        .collect();
613
614    parts.join(", ")
615}
616
617/// Render a `bytes` fixture value as the R expression that produces a raw
618/// vector at test time. Mirrors python's `emit_bytes_arg` classifier so we can
619/// support both file-path style fixtures (`"pdf/fake_memo.pdf"`) and inline
620/// text payloads (`"<html>..."`). The resulting expression is dropped directly
621/// into the call site, e.g. `content = readBin(.resolve_fixture("pdf/fake_memo.pdf"), ...)`.
622fn render_bytes_value(raw: &str) -> String {
623    if raw.starts_with('<') || raw.starts_with('{') || raw.starts_with('[') || raw.contains(' ') {
624        // Inline text payload — encode to raw via charToRaw.
625        let escaped = escape_r(raw);
626        return format!("charToRaw(\"{escaped}\")");
627    }
628    let first = raw.chars().next().unwrap_or('\0');
629    if first.is_ascii_alphanumeric() || first == '_' {
630        if let Some(slash) = raw.find('/') {
631            if slash > 0 {
632                let after = &raw[slash + 1..];
633                if after.contains('.') && !after.is_empty() {
634                    let escaped = escape_r(raw);
635                    return format!(
636                        "readBin(.resolve_fixture(\"{escaped}\"), what = \"raw\", n = file.info(.resolve_fixture(\"{escaped}\"))$size)"
637                    );
638                }
639            }
640        }
641    }
642    // Default to inline text encoding — matches Python's InlineText branch.
643    let escaped = escape_r(raw);
644    format!("charToRaw(\"{escaped}\")")
645}
646
647/// Map the extractor argument name onto its R `*Config$default()` constructor.
648/// Falls back to `list()` for unknown names — the extendr binding will error
649/// with a clear message, which is preferable to silently passing a wrong type.
650///
651/// When `options_type` is provided (via a per-call language override pinning
652/// the typed config, e.g. `EmbeddingConfig` for `embed_texts`), it takes
653/// precedence over the arg-name heuristic so the extendr binding receives the
654/// correct ExternalPtr type.
655fn r_default_for_config_arg(arg_name: &str, options_type: Option<&str>) -> String {
656    if let Some(type_name) = options_type {
657        return format!("{type_name}$default()");
658    }
659    match arg_name {
660        "config" => "ExtractionConfig$default()".to_string(),
661        "options" => "NULL".to_string(),
662        "html_output" => "HtmlOutputConfig$default()".to_string(),
663        "chunking" => "ChunkingConfig$default()".to_string(),
664        "ocr" => "OcrConfig$default()".to_string(),
665        "image" | "images" => "ImageExtractionConfig$default()".to_string(),
666        "language_detection" => "LanguageDetectionConfig$default()".to_string(),
667        _ => "list()".to_string(),
668    }
669}
670
671struct RAssertionContext<'a> {
672    field_resolver: &'a FieldResolver,
673    result_is_simple: bool,
674    result_is_bytes: bool,
675    assert_enum_fields: &'a std::collections::HashMap<String, String>,
676}
677
678fn render_assertion(out: &mut String, assertion: &Assertion, result_var: &str, context: &RAssertionContext<'_>) {
679    // Handle synthetic / derived fields before the is_valid_for_result check
680    // so they are never treated as struct attribute accesses on the result.
681    if let Some(f) = &assertion.field {
682        match f.as_str() {
683            "chunks_have_content" => {
684                let pred = format!("all(sapply({result_var}$chunks %||% list(), function(c) nchar(c$content) > 0))");
685                match assertion.assertion_type.as_str() {
686                    "is_true" => {
687                        let _ = writeln!(out, "  expect_true({pred})");
688                    }
689                    "is_false" => {
690                        let _ = writeln!(out, "  expect_false({pred})");
691                    }
692                    _ => {
693                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
694                    }
695                }
696                return;
697            }
698            "chunks_have_embeddings" => {
699                let pred = format!(
700                    "all(sapply({result_var}$chunks %||% list(), function(c) !is.null(c$embedding) && length(c$embedding) > 0))"
701                );
702                match assertion.assertion_type.as_str() {
703                    "is_true" => {
704                        let _ = writeln!(out, "  expect_true({pred})");
705                    }
706                    "is_false" => {
707                        let _ = writeln!(out, "  expect_false({pred})");
708                    }
709                    _ => {
710                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
711                    }
712                }
713                return;
714            }
715            "chunks_have_heading_context" => {
716                // prepend_heading_context adds heading text to chunk content, so verify chunks
717                // exist and every chunk has non-empty content.
718                let pred_true = format!(
719                    "!is.null({result_var}$chunks) && length({result_var}$chunks) > 0 && all(sapply({result_var}$chunks, function(c) nchar(c$content) > 0))"
720                );
721                let pred_false = format!("is.null({result_var}$chunks) || length({result_var}$chunks) == 0");
722                match assertion.assertion_type.as_str() {
723                    "is_true" => {
724                        let _ = writeln!(out, "  expect_true({pred_true})");
725                    }
726                    "is_false" => {
727                        let _ = writeln!(out, "  expect_true({pred_false})");
728                    }
729                    _ => {
730                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
731                    }
732                }
733                return;
734            }
735            "first_chunk_starts_with_heading" => {
736                // First chunk's content should start with a markdown heading marker (`#`)
737                // when prepend_heading_context is enabled.
738                let pred_true = format!(
739                    "!is.null({result_var}$chunks) && length({result_var}$chunks) > 0 && startsWith(trimws({result_var}$chunks[[1]]$content), \"#\")"
740                );
741                let pred_false = format!(
742                    "is.null({result_var}$chunks) || length({result_var}$chunks) == 0 || !startsWith(trimws({result_var}$chunks[[1]]$content), \"#\")"
743                );
744                match assertion.assertion_type.as_str() {
745                    "is_true" => {
746                        let _ = writeln!(out, "  expect_true({pred_true})");
747                    }
748                    "is_false" => {
749                        let _ = writeln!(out, "  expect_true({pred_false})");
750                    }
751                    _ => {
752                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
753                    }
754                }
755                return;
756            }
757            // ---- EmbedResponse virtual fields ----
758            // The extendr binding cannot return `Vec<Vec<f32>>` directly (extendr's
759            // Robj conversion has no impl for nested numeric vectors), so the
760            // wrapper serializes the result to a JSON string at the FFI boundary.
761            // Parse it on demand here so length/index assertions operate on the
762            // matrix structure rather than on the single string scalar.
763            "embeddings" => {
764                let parsed = format!(
765                    "(if (is.character({result_var}) && length({result_var}) == 1) jsonlite::fromJSON({result_var}, simplifyVector = FALSE) else {result_var})"
766                );
767                match assertion.assertion_type.as_str() {
768                    "count_equals" => {
769                        if let Some(val) = &assertion.value {
770                            let r_val = json_to_r(val, false);
771                            let _ = writeln!(out, "  expect_equal(length({parsed}), {r_val})");
772                        }
773                    }
774                    "count_min" => {
775                        if let Some(val) = &assertion.value {
776                            let r_val = json_to_r(val, false);
777                            let _ = writeln!(out, "  expect_gte(length({parsed}), {r_val})");
778                        }
779                    }
780                    "not_empty" => {
781                        let _ = writeln!(out, "  expect_gt(length({parsed}), 0)");
782                    }
783                    "is_empty" => {
784                        let _ = writeln!(out, "  expect_equal(length({parsed}), 0)");
785                    }
786                    _ => {
787                        let _ = writeln!(
788                            out,
789                            "  # skipped: unsupported assertion type on synthetic field 'embeddings'"
790                        );
791                    }
792                }
793                return;
794            }
795            "embedding_dimensions" => {
796                let expr = format!("(if (length({result_var}) == 0) 0L else length({result_var}[[1]]))");
797                match assertion.assertion_type.as_str() {
798                    "equals" => {
799                        if let Some(val) = &assertion.value {
800                            let r_val = json_to_r(val, false);
801                            let _ = writeln!(out, "  expect_equal({expr}, {r_val})");
802                        }
803                    }
804                    "greater_than" => {
805                        if let Some(val) = &assertion.value {
806                            let r_val = json_to_r(val, false);
807                            let _ = writeln!(out, "  expect_gt({expr}, {r_val})");
808                        }
809                    }
810                    _ => {
811                        let _ = writeln!(
812                            out,
813                            "  # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
814                        );
815                    }
816                }
817                return;
818            }
819            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
820                let pred = match f.as_str() {
821                    "embeddings_valid" => {
822                        format!("all(sapply({result_var}, function(e) length(e) > 0))")
823                    }
824                    "embeddings_finite" => {
825                        format!("all(sapply({result_var}, function(e) all(is.finite(e))))")
826                    }
827                    "embeddings_non_zero" => {
828                        format!("all(sapply({result_var}, function(e) any(e != 0.0)))")
829                    }
830                    "embeddings_normalized" => {
831                        format!("all(sapply({result_var}, function(e) abs(sum(e * e) - 1.0) < 1e-3))")
832                    }
833                    _ => unreachable!(),
834                };
835                match assertion.assertion_type.as_str() {
836                    "is_true" => {
837                        let _ = writeln!(out, "  expect_true({pred})");
838                    }
839                    "is_false" => {
840                        let _ = writeln!(out, "  expect_false({pred})");
841                    }
842                    _ => {
843                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
844                    }
845                }
846                return;
847            }
848            // ---- keywords / keywords_count ----
849            // R ExtractionResult does not expose extracted_keywords; skip.
850            "keywords" | "keywords_count" => {
851                let _ = writeln!(out, "  # skipped: field '{f}' not available on R ExtractionResult");
852                return;
853            }
854            _ => {}
855        }
856    }
857
858    // Skip assertions on fields that don't exist on the result type.
859    // Exception: for result_is_simple, "result" is valid because it refers to the
860    // result variable directly (which holds the plain string/array value).
861    if let Some(f) = &assertion.field {
862        if !f.is_empty() && !context.field_resolver.is_valid_for_result(f) {
863            // Allow "result" field on simple-type returns
864            if !(context.result_is_simple && f == "result") {
865                let _ = writeln!(out, "  # skipped: field '{f}' not available on result type");
866                return;
867            }
868        }
869    }
870
871    // When result_is_simple, skip assertions that reference non-content fields
872    // (e.g., metadata, document, structure) since the binding returns a plain value.
873    if context.result_is_simple {
874        if let Some(f) = &assertion.field {
875            let f_lower = f.to_lowercase();
876            if !f.is_empty()
877                && f_lower != "content"
878                && (f_lower.starts_with("metadata")
879                    || f_lower.starts_with("document")
880                    || f_lower.starts_with("structure"))
881            {
882                let _ = writeln!(
883                    out,
884                    "  # skipped: result_is_simple for field '{f}' not available on result type"
885                );
886                return;
887            }
888        }
889    }
890
891    let field_expr = if context.result_is_simple {
892        result_var.to_string()
893    } else {
894        match &assertion.field {
895            Some(f) if !f.is_empty() => context.field_resolver.accessor(f, "r", result_var),
896            _ => result_var.to_string(),
897        }
898    };
899
900    // Fields declared in `assert_enum_fields` map to sealed/internally-tagged enum
901    // types.  Under `simplifyVector = FALSE`, such fields deserialize as named lists
902    // keyed by the active variant.  Wrap the accessor with `.alef_format_value`
903    // (defined in setup-fixtures.R) so the assertion sees the display string rather
904    // than the raw list structure.
905    let field_expr = match &assertion.field {
906        Some(f) if context.assert_enum_fields.contains_key(f.as_str()) => {
907            format!(".alef_format_value({field_expr})")
908        }
909        _ => field_expr,
910    };
911
912    match assertion.assertion_type.as_str() {
913        "equals" => {
914            if let Some(expected) = &assertion.value {
915                let r_val = json_to_r(expected, false);
916                let _ = writeln!(out, "  expect_equal(trimws({field_expr}), {r_val})");
917            }
918        }
919        "contains" => {
920            if let Some(expected) = &assertion.value {
921                let r_val = json_to_r(expected, false);
922                let _ = writeln!(out, "  expect_true(grepl({r_val}, {field_expr}, fixed = TRUE))");
923            }
924        }
925        "contains_all" => {
926            if let Some(values) = &assertion.values {
927                for val in values {
928                    let r_val = json_to_r(val, false);
929                    let _ = writeln!(out, "  expect_true(any(grepl({r_val}, {field_expr}, fixed = TRUE)))");
930                }
931            }
932        }
933        "not_contains" => {
934            if let Some(expected) = &assertion.value {
935                let r_val = json_to_r(expected, false);
936                let _ = writeln!(out, "  expect_false(grepl({r_val}, {field_expr}, fixed = TRUE))");
937            }
938        }
939        "not_empty" => {
940            // Multi-element character vectors (e.g. `list_embedding_presets`)
941            // would otherwise evaluate `nchar(x) > 0` element-wise and fail
942            // `expect_true`'s scalar-logical contract. Reduce with `any()` so
943            // the predicate stays a single TRUE/FALSE regardless of length,
944            // and treat zero-length vectors as empty.
945            let _ = writeln!(
946                out,
947                "  expect_true(if (is.character({field_expr})) length({field_expr}) > 0 && any(nchar({field_expr}) > 0) else length({field_expr}) > 0)"
948            );
949        }
950        "is_empty" => {
951            // Rust `Option<String>::None` surfaces as `NA_character_` through
952            // extendr, and `Vec<...>` empties as a zero-length vector. Treat
953            // NULL, NA, "", and zero-length collections as "empty" so the same
954            // assertion works for scalar Option returns (`get_embedding_preset`)
955            // and collection returns alike.
956            let _ = writeln!(
957                out,
958                "  expect_true(is.null({field_expr}) || length({field_expr}) == 0 || (length({field_expr}) == 1 && (is.na({field_expr}) || identical({field_expr}, \"\"))))"
959            );
960        }
961        "contains_any" => {
962            if let Some(values) = &assertion.values {
963                let items: Vec<String> = values.iter().map(|v| json_to_r(v, false)).collect();
964                let vec_str = items.join(", ");
965                let _ = writeln!(
966                    out,
967                    "  expect_true(any(sapply(c({vec_str}), function(v) grepl(v, {field_expr}, fixed = TRUE))))"
968                );
969            }
970        }
971        "greater_than" => {
972            if let Some(val) = &assertion.value {
973                let r_val = json_to_r(val, false);
974                let _ = writeln!(out, "  expect_true({field_expr} > {r_val})");
975            }
976        }
977        "less_than" => {
978            if let Some(val) = &assertion.value {
979                let r_val = json_to_r(val, false);
980                let _ = writeln!(out, "  expect_true({field_expr} < {r_val})");
981            }
982        }
983        "greater_than_or_equal" => {
984            if let Some(val) = &assertion.value {
985                let r_val = json_to_r(val, false);
986                let _ = writeln!(out, "  expect_true({field_expr} >= {r_val})");
987            }
988        }
989        "less_than_or_equal" => {
990            if let Some(val) = &assertion.value {
991                let r_val = json_to_r(val, false);
992                let _ = writeln!(out, "  expect_true({field_expr} <= {r_val})");
993            }
994        }
995        "starts_with" => {
996            if let Some(expected) = &assertion.value {
997                let r_val = json_to_r(expected, false);
998                let _ = writeln!(out, "  expect_true(startsWith({field_expr}, {r_val}))");
999            }
1000        }
1001        "ends_with" => {
1002            if let Some(expected) = &assertion.value {
1003                let r_val = json_to_r(expected, false);
1004                let _ = writeln!(out, "  expect_true(endsWith({field_expr}, {r_val}))");
1005            }
1006        }
1007        "min_length" => {
1008            if let Some(val) = &assertion.value {
1009                if let Some(n) = val.as_u64() {
1010                    // Raw byte returns (`result_is_bytes`) come back as an R
1011                    // raw vector; `nchar()` element-wises and breaks the
1012                    // expect_true scalar contract. Use `length()` to compare
1013                    // the byte count instead.
1014                    let size_fn = if context.result_is_bytes { "length" } else { "nchar" };
1015                    let _ = writeln!(out, "  expect_true({size_fn}({field_expr}) >= {n})");
1016                }
1017            }
1018        }
1019        "max_length" => {
1020            if let Some(val) = &assertion.value {
1021                if let Some(n) = val.as_u64() {
1022                    let size_fn = if context.result_is_bytes { "length" } else { "nchar" };
1023                    let _ = writeln!(out, "  expect_true({size_fn}({field_expr}) <= {n})");
1024                }
1025            }
1026        }
1027        "count_min" => {
1028            if let Some(val) = &assertion.value {
1029                if let Some(n) = val.as_u64() {
1030                    let _ = writeln!(out, "  expect_true(length({field_expr}) >= {n})");
1031                }
1032            }
1033        }
1034        "count_equals" => {
1035            if let Some(val) = &assertion.value {
1036                if let Some(n) = val.as_u64() {
1037                    let _ = writeln!(out, "  expect_equal(length({field_expr}), {n})");
1038                }
1039            }
1040        }
1041        "is_true" => {
1042            let _ = writeln!(out, "  expect_true({field_expr})");
1043        }
1044        "is_false" => {
1045            let _ = writeln!(out, "  expect_false({field_expr})");
1046        }
1047        "method_result" => {
1048            if let Some(method_name) = &assertion.method {
1049                let call_expr = build_r_method_call(result_var, method_name, assertion.args.as_ref());
1050                let check = assertion.check.as_deref().unwrap_or("is_true");
1051                match check {
1052                    "equals" => {
1053                        if let Some(val) = &assertion.value {
1054                            if val.is_boolean() {
1055                                if val.as_bool() == Some(true) {
1056                                    let _ = writeln!(out, "  expect_true({call_expr})");
1057                                } else {
1058                                    let _ = writeln!(out, "  expect_false({call_expr})");
1059                                }
1060                            } else {
1061                                let r_val = json_to_r(val, false);
1062                                let _ = writeln!(out, "  expect_equal({call_expr}, {r_val})");
1063                            }
1064                        }
1065                    }
1066                    "is_true" => {
1067                        let _ = writeln!(out, "  expect_true({call_expr})");
1068                    }
1069                    "is_false" => {
1070                        let _ = writeln!(out, "  expect_false({call_expr})");
1071                    }
1072                    "greater_than_or_equal" => {
1073                        if let Some(val) = &assertion.value {
1074                            let r_val = json_to_r(val, false);
1075                            let _ = writeln!(out, "  expect_true({call_expr} >= {r_val})");
1076                        }
1077                    }
1078                    "count_min" => {
1079                        if let Some(val) = &assertion.value {
1080                            let n = val.as_u64().unwrap_or(0);
1081                            let _ = writeln!(out, "  expect_true(length({call_expr}) >= {n})");
1082                        }
1083                    }
1084                    "is_error" => {
1085                        let _ = writeln!(out, "  expect_error({call_expr})");
1086                    }
1087                    "contains" => {
1088                        if let Some(val) = &assertion.value {
1089                            let r_val = json_to_r(val, false);
1090                            let _ = writeln!(out, "  expect_true(grepl({r_val}, {call_expr}, fixed = TRUE))");
1091                        }
1092                    }
1093                    other_check => {
1094                        panic!("R e2e generator: unsupported method_result check type: {other_check}");
1095                    }
1096                }
1097            } else {
1098                panic!("R e2e generator: method_result assertion missing 'method' field");
1099            }
1100        }
1101        "matches_regex" => {
1102            if let Some(expected) = &assertion.value {
1103                let r_val = json_to_r(expected, false);
1104                let _ = writeln!(out, "  expect_true(grepl({r_val}, {field_expr}))");
1105            }
1106        }
1107        "not_error" => {
1108            // The call itself stops the test on error; emit an explicit
1109            // `expect_true(TRUE)` so testthat doesn't report the test as
1110            // empty when this is the only assertion.
1111            let _ = writeln!(out, "  expect_true(TRUE)");
1112        }
1113        "error" => {
1114            // Handled at the test level.
1115        }
1116        other => {
1117            panic!("R e2e generator: unsupported assertion type: {other}");
1118        }
1119    }
1120}
1121
1122/// Convert a `serde_json::Value` to an R literal string.
1123///
1124/// # Arguments
1125///
1126/// * `value` - The JSON value to convert
1127///
1128/// Convert a PascalCase string to snake_case.
1129/// e.g. "DoubleEqual" → "double_equal", "Backticks" → "backticks"
1130fn pascal_to_snake_case(s: &str) -> String {
1131    let mut result = String::with_capacity(s.len() + 4);
1132    for (i, ch) in s.chars().enumerate() {
1133        if ch.is_uppercase() && i > 0 {
1134            result.push('_');
1135        }
1136        for lc in ch.to_lowercase() {
1137            result.push(lc);
1138        }
1139    }
1140    result
1141}
1142
1143/// Convert a JSON value to an R expression suitable for embedding inside a
1144/// `list(...)` that will be passed to `jsonlite::toJSON(..., auto_unbox = TRUE)`.
1145///
1146/// Differs from [`json_to_r`] in that any array-valued field is wrapped with
1147/// `I(...)` (jsonlite's `AsIs` marker) so it remains a JSON array after the
1148/// `auto_unbox` transform. Empty arrays become `I(list())` (→ `[]`) and
1149/// non-empty arrays become `I(c(...))` (→ `[..]`). Without this wrapping,
1150/// `Vec<String>` fields like `exclude_selectors` get unboxed to scalars and
1151/// serde deserialization on the Rust side fails with
1152/// `invalid type: string "foo", expected a sequence`.
1153fn json_to_r_preserve_arrays(value: &serde_json::Value, lowercase_enum_values: bool) -> String {
1154    match value {
1155        serde_json::Value::Array(arr) => {
1156            if arr.is_empty() {
1157                "I(list())".to_string()
1158            } else {
1159                let items: Vec<String> = arr.iter().map(|v| json_to_r(v, lowercase_enum_values)).collect();
1160                format!("I(c({}))", items.join(", "))
1161            }
1162        }
1163        serde_json::Value::Object(map) => {
1164            let entries: Vec<String> = map
1165                .iter()
1166                .map(|(k, v)| {
1167                    format!(
1168                        "\"{}\" = {}",
1169                        escape_r(k),
1170                        json_to_r_preserve_arrays(v, lowercase_enum_values)
1171                    )
1172                })
1173                .collect();
1174            format!("list({})", entries.join(", "))
1175        }
1176        _ => json_to_r(value, lowercase_enum_values),
1177    }
1178}
1179
1180/// * `lowercase_enum_values` - If true, convert PascalCase strings to snake_case (for enum values).
1181///   If false, preserve original case (for assertion expected values).
1182fn json_to_r(value: &serde_json::Value, lowercase_enum_values: bool) -> String {
1183    match value {
1184        serde_json::Value::String(s) => {
1185            // Convert PascalCase enum values to snake_case only if requested.
1186            // e.g. "Backticks" → "backticks", "DoubleEqual" → "double_equal"
1187            let normalized = if lowercase_enum_values && s.chars().next().is_some_and(|c| c.is_uppercase()) {
1188                pascal_to_snake_case(s)
1189            } else {
1190                s.clone()
1191            };
1192            format!("\"{}\"", escape_r(&normalized))
1193        }
1194        serde_json::Value::Bool(true) => "TRUE".to_string(),
1195        serde_json::Value::Bool(false) => "FALSE".to_string(),
1196        serde_json::Value::Number(n) => n.to_string(),
1197        serde_json::Value::Null => "NULL".to_string(),
1198        serde_json::Value::Array(arr) => {
1199            let items: Vec<String> = arr.iter().map(|v| json_to_r(v, lowercase_enum_values)).collect();
1200            format!("c({})", items.join(", "))
1201        }
1202        serde_json::Value::Object(map) => {
1203            let entries: Vec<String> = map
1204                .iter()
1205                .map(|(k, v)| format!("\"{}\" = {}", escape_r(k), json_to_r(v, lowercase_enum_values)))
1206                .collect();
1207            format!("list({})", entries.join(", "))
1208        }
1209    }
1210}
1211
1212/// Build an R visitor list and add setup line.
1213fn build_r_visitor(setup_lines: &mut Vec<String>, visitor_spec: &crate::fixture::VisitorSpec) {
1214    use std::fmt::Write as FmtWrite;
1215    // Collect each callback as a separate string, then join with ",\n" to avoid
1216    // trailing commas — R's list() does not accept a trailing comma.
1217    let methods: Vec<String> = visitor_spec
1218        .callbacks
1219        .iter()
1220        .map(|(method_name, action)| {
1221            let mut buf = String::new();
1222            emit_r_visitor_method(&mut buf, method_name, action);
1223            // strip the trailing ",\n" added by emit_r_visitor_method
1224            buf.trim_end_matches(['\n', ',']).to_string()
1225        })
1226        .collect();
1227    let mut visitor_obj = String::new();
1228    let _ = writeln!(visitor_obj, "list(");
1229    let _ = write!(visitor_obj, "{}", methods.join(",\n"));
1230    let _ = writeln!(visitor_obj);
1231    let _ = writeln!(visitor_obj, "  )");
1232
1233    setup_lines.push(format!("visitor <- {visitor_obj}"));
1234}
1235
1236/// Build an R call expression for a `method_result` assertion.
1237/// Maps method names to the appropriate R function or method calls.
1238fn build_r_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
1239    match method_name {
1240        "root_child_count" => format!("{result_var}$root_child_count()"),
1241        "root_node_type" => format!("{result_var}$root_node_type()"),
1242        "named_children_count" => format!("{result_var}$named_children_count()"),
1243        "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
1244        "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
1245        "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
1246        "contains_node_type" => {
1247            let node_type = args
1248                .and_then(|a| a.get("node_type"))
1249                .and_then(|v| v.as_str())
1250                .unwrap_or("");
1251            format!("tree_contains_node_type({result_var}, \"{node_type}\")")
1252        }
1253        "find_nodes_by_type" => {
1254            let node_type = args
1255                .and_then(|a| a.get("node_type"))
1256                .and_then(|v| v.as_str())
1257                .unwrap_or("");
1258            format!("find_nodes_by_type({result_var}, \"{node_type}\")")
1259        }
1260        "run_query" => {
1261            let query_source = args
1262                .and_then(|a| a.get("query_source"))
1263                .and_then(|v| v.as_str())
1264                .unwrap_or("");
1265            let language = args
1266                .and_then(|a| a.get("language"))
1267                .and_then(|v| v.as_str())
1268                .unwrap_or("");
1269            format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
1270        }
1271        _ => {
1272            if let Some(args_val) = args {
1273                let arg_str = args_val
1274                    .as_object()
1275                    .map(|obj| {
1276                        obj.iter()
1277                            .map(|(k, v)| {
1278                                let r_val = json_to_r(v, false);
1279                                format!("{k} = {r_val}")
1280                            })
1281                            .collect::<Vec<_>>()
1282                            .join(", ")
1283                    })
1284                    .unwrap_or_default();
1285                format!("{result_var}${method_name}({arg_str})")
1286            } else {
1287                format!("{result_var}${method_name}()")
1288            }
1289        }
1290    }
1291}
1292
1293/// Emit an R visitor method for a callback action.
1294fn emit_r_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
1295    use std::fmt::Write as FmtWrite;
1296
1297    // R uses visit_ prefix (matches binding signature)
1298    let params = match method_name {
1299        "visit_link" => "ctx, href, text, title",
1300        "visit_image" => "ctx, src, alt, title",
1301        "visit_heading" => "ctx, level, text, id",
1302        "visit_code_block" => "ctx, lang, code",
1303        "visit_code_inline"
1304        | "visit_strong"
1305        | "visit_emphasis"
1306        | "visit_strikethrough"
1307        | "visit_underline"
1308        | "visit_subscript"
1309        | "visit_superscript"
1310        | "visit_mark"
1311        | "visit_button"
1312        | "visit_summary"
1313        | "visit_figcaption"
1314        | "visit_definition_term"
1315        | "visit_definition_description" => "ctx, text",
1316        "visit_text" => "ctx, text",
1317        "visit_list_item" => "ctx, ordered, marker, text",
1318        "visit_blockquote" => "ctx, content, depth",
1319        "visit_table_row" => "ctx, cells, is_header",
1320        "visit_custom_element" => "ctx, tag_name, html",
1321        "visit_form" => "ctx, action_url, method",
1322        "visit_input" => "ctx, input_type, name, value",
1323        "visit_audio" | "visit_video" | "visit_iframe" => "ctx, src",
1324        "visit_details" => "ctx, open",
1325        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => "ctx, output",
1326        "visit_list_start" => "ctx, ordered",
1327        "visit_list_end" => "ctx, ordered, output",
1328        _ => "ctx",
1329    };
1330
1331    let _ = writeln!(out, "    {method_name} = function({params}) {{");
1332    match action {
1333        CallbackAction::Skip => {
1334            let _ = writeln!(out, "      \"skip\"");
1335        }
1336        CallbackAction::Continue => {
1337            let _ = writeln!(out, "      \"continue\"");
1338        }
1339        CallbackAction::PreserveHtml => {
1340            let _ = writeln!(out, "      \"preserve_html\"");
1341        }
1342        CallbackAction::Custom { output } => {
1343            let escaped = escape_r(output);
1344            let _ = writeln!(out, "      list(custom = \"{escaped}\")");
1345        }
1346        CallbackAction::CustomTemplate { template, return_form } => {
1347            let r_expr = r_template_to_paste0(template);
1348            match return_form {
1349                TemplateReturnForm::BareString => {
1350                    let _ = writeln!(out, "      {r_expr}");
1351                }
1352                TemplateReturnForm::Dict => {
1353                    let _ = writeln!(out, "      list(custom = {r_expr})");
1354                }
1355            }
1356        }
1357    }
1358    let _ = writeln!(out, "    }},");
1359}