Skip to main content

alef_e2e/codegen/
r.rs

1//! R e2e test generator using testthat.
2
3use crate::config::E2eConfig;
4use crate::escape::{escape_r, r_template_to_paste0, sanitize_filename, sanitize_ident};
5use crate::field_access::FieldResolver;
6use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup};
7use alef_core::backend::GeneratedFile;
8use alef_core::config::ResolvedCrateConfig;
9use alef_core::hash::{self, CommentStyle};
10use anyhow::Result;
11use std::fmt::Write as FmtWrite;
12use std::path::PathBuf;
13
14use super::E2eCodegen;
15
16/// R e2e code generator.
17pub struct RCodegen;
18
19impl E2eCodegen for RCodegen {
20    fn generate(
21        &self,
22        groups: &[FixtureGroup],
23        e2e_config: &E2eConfig,
24        config: &ResolvedCrateConfig,
25    ) -> Result<Vec<GeneratedFile>> {
26        let lang = self.language_name();
27        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
28
29        let mut files = Vec::new();
30
31        // Resolve call config with overrides.
32        let call = &e2e_config.call;
33        let overrides = call.overrides.get(lang);
34        let module_path = overrides
35            .and_then(|o| o.module.as_ref())
36            .cloned()
37            .unwrap_or_else(|| call.module.clone());
38        let _function_name = overrides
39            .and_then(|o| o.function.as_ref())
40            .cloned()
41            .unwrap_or_else(|| call.function.clone());
42        let result_is_simple = call.result_is_simple || overrides.is_some_and(|o| o.result_is_simple);
43        let result_is_r_list = overrides.is_some_and(|o| o.result_is_r_list);
44        let _result_var = &call.result_var;
45
46        // Resolve package config.
47        let r_pkg = e2e_config.resolve_package("r");
48        let pkg_name = r_pkg
49            .as_ref()
50            .and_then(|p| p.name.as_ref())
51            .cloned()
52            .unwrap_or_else(|| module_path.clone());
53        let pkg_path = r_pkg
54            .as_ref()
55            .and_then(|p| p.path.as_ref())
56            .cloned()
57            .unwrap_or_else(|| "../../packages/r".to_string());
58        let pkg_version = r_pkg
59            .as_ref()
60            .and_then(|p| p.version.as_ref())
61            .cloned()
62            .or_else(|| config.resolved_version())
63            .unwrap_or_else(|| "0.1.0".to_string());
64
65        // Generate DESCRIPTION file.
66        files.push(GeneratedFile {
67            path: output_base.join("DESCRIPTION"),
68            content: render_description(&pkg_name, &pkg_version, e2e_config.dep_mode),
69            generated_header: false,
70        });
71
72        // Generate test runner script.
73        files.push(GeneratedFile {
74            path: output_base.join("run_tests.R"),
75            content: render_test_runner(&pkg_path, e2e_config.dep_mode),
76            generated_header: true,
77        });
78
79        // setup-fixtures.R — testthat sources `setup-*.R` files in the tests
80        // directory once before any tests run, with the working directory set
81        // to the tests/ folder. We use this hook to chdir into the repo's
82        // shared `test_documents/` directory so that fixture paths like
83        // `pdf/fake_memo.pdf` resolve at extraction time.
84        files.push(GeneratedFile {
85            path: output_base.join("tests").join("setup-fixtures.R"),
86            content: render_setup_fixtures(),
87            generated_header: true,
88        });
89
90        // Generate test files per category.
91        for group in groups {
92            let active: Vec<&Fixture> = group
93                .fixtures
94                .iter()
95                .filter(|f| super::should_include_fixture(f, lang, e2e_config))
96                .collect();
97
98            if active.is_empty() {
99                continue;
100            }
101
102            let filename = format!("test_{}.R", sanitize_filename(&group.category));
103            let field_resolver = FieldResolver::new(
104                &e2e_config.fields,
105                &e2e_config.fields_optional,
106                &e2e_config.result_fields,
107                &e2e_config.fields_array,
108                &std::collections::HashSet::new(),
109            );
110            let content = render_test_file(
111                &group.category,
112                &active,
113                &field_resolver,
114                result_is_simple,
115                result_is_r_list,
116                e2e_config,
117            );
118            files.push(GeneratedFile {
119                path: output_base.join("tests").join(filename),
120                content,
121                generated_header: true,
122            });
123        }
124
125        Ok(files)
126    }
127
128    fn language_name(&self) -> &'static str {
129        "r"
130    }
131}
132
133fn render_description(pkg_name: &str, pkg_version: &str, dep_mode: crate::config::DependencyMode) -> String {
134    let dep_line = match dep_mode {
135        crate::config::DependencyMode::Registry => {
136            format!("Imports: {pkg_name} ({pkg_version})\n")
137        }
138        crate::config::DependencyMode::Local => String::new(),
139    };
140    format!(
141        r#"Package: e2e.r
142Title: E2E Tests for {pkg_name}
143Version: 0.1.0
144Description: End-to-end test suite.
145{dep_line}Suggests: testthat (>= 3.0.0)
146Config/testthat/edition: 3
147"#
148    )
149}
150
151fn render_setup_fixtures() -> String {
152    let mut out = String::new();
153    out.push_str(&hash::header(CommentStyle::Hash));
154    let _ = writeln!(out);
155    let _ = writeln!(
156        out,
157        "# Resolve fixture paths against the repo's `test_documents/` directory."
158    );
159    let _ = writeln!(
160        out,
161        "# testthat sources setup-*.R with the working directory at tests/,"
162    );
163    let _ = writeln!(
164        out,
165        "# so test_documents lives three directories up: tests/ -> e2e/r/ -> e2e/ -> repo root."
166    );
167    let _ = writeln!(
168        out,
169        "# Each `test_that()` block has its working directory reset back to tests/, so"
170    );
171    let _ = writeln!(
172        out,
173        "# fixture lookups must be performed via this helper rather than relying on `setwd`."
174    );
175    let _ = writeln!(
176        out,
177        ".kreuzberg_test_documents <- normalizePath(\"../../../test_documents\", mustWork = FALSE)"
178    );
179    let _ = writeln!(out, ".resolve_fixture <- function(path) {{");
180    let _ = writeln!(out, "  if (dir.exists(.kreuzberg_test_documents)) {{");
181    let _ = writeln!(out, "    file.path(.kreuzberg_test_documents, path)");
182    let _ = writeln!(out, "  }} else {{");
183    let _ = writeln!(out, "    path");
184    let _ = writeln!(out, "  }}");
185    let _ = writeln!(out, "}}");
186    out
187}
188
189fn render_test_runner(pkg_path: &str, dep_mode: crate::config::DependencyMode) -> String {
190    let mut out = String::new();
191    out.push_str(&hash::header(CommentStyle::Hash));
192    let _ = writeln!(out, "library(testthat)");
193    match dep_mode {
194        crate::config::DependencyMode::Registry => {
195            // In registry mode, require the installed CRAN package directly.
196            let _ = writeln!(out, "# Package loaded via library() from CRAN install.");
197        }
198        crate::config::DependencyMode::Local => {
199            // Use devtools::load_all() to load the local R package without requiring
200            // a full install, matching the e2e test runner convention.
201            let _ = writeln!(out, "devtools::load_all(\"{pkg_path}\")");
202        }
203    }
204    let _ = writeln!(out);
205    // Surface every failure rather than aborting at the default max_fails=10 —
206    // partial pass counts are essential for triage during e2e bring-up.
207    let _ = writeln!(out, "testthat::set_max_fails(Inf)");
208    // Resolve the tests/ directory relative to this script. testthat reads
209    // setup-*.R from there before each file runs, where path resolution
210    // against test_documents/ is handled by the `.resolve_fixture` helper.
211    let _ = writeln!(
212        out,
213        ".script_dir <- tryCatch(dirname(normalizePath(sys.frame(1)$ofile)), error = function(e) getwd())"
214    );
215    let _ = writeln!(out, "test_dir(file.path(.script_dir, \"tests\"))");
216    out
217}
218
219fn render_test_file(
220    category: &str,
221    fixtures: &[&Fixture],
222    field_resolver: &FieldResolver,
223    result_is_simple: bool,
224    result_is_r_list: bool,
225    e2e_config: &E2eConfig,
226) -> String {
227    let mut out = String::new();
228    out.push_str(&hash::header(CommentStyle::Hash));
229    let _ = writeln!(out, "# E2e tests for category: {category}");
230    let _ = writeln!(out);
231
232    for (i, fixture) in fixtures.iter().enumerate() {
233        render_test_case(
234            &mut out,
235            fixture,
236            e2e_config,
237            field_resolver,
238            result_is_simple,
239            result_is_r_list,
240        );
241        if i + 1 < fixtures.len() {
242            let _ = writeln!(out);
243        }
244    }
245
246    // Clean up trailing newlines.
247    while out.ends_with("\n\n") {
248        out.pop();
249    }
250    if !out.ends_with('\n') {
251        out.push('\n');
252    }
253    out
254}
255
256fn render_test_case(
257    out: &mut String,
258    fixture: &Fixture,
259    e2e_config: &E2eConfig,
260    field_resolver: &FieldResolver,
261    default_result_is_simple: bool,
262    default_result_is_r_list: bool,
263) {
264    let call_config = e2e_config.resolve_call(fixture.call.as_deref());
265    let function_name = &call_config.function;
266    let result_var = &call_config.result_var;
267    // Per-fixture call configs (e.g. `list_document_extractors`) may set
268    // `result_is_simple = true` even when the default `[e2e.call]` does not.
269    // Without this lookup the registry/detection wrappers (which return scalar
270    // strings or character vectors directly) get wrapped in
271    // `jsonlite::fromJSON(...)` and the parser fails on non-JSON output.
272    let r_override = call_config.overrides.get("r");
273    let result_is_simple = if fixture.call.is_some() {
274        call_config.result_is_simple || r_override.is_some_and(|o| o.result_is_simple)
275    } else {
276        default_result_is_simple
277    };
278    // Per-fixture override: when the R binding already returns a native R list
279    // (not a JSON string), suppress `jsonlite::fromJSON` wrapping while still
280    // using field-path (`result$field`) accessors in assertions.
281    let result_is_r_list = if fixture.call.is_some() {
282        r_override.is_some_and(|o| o.result_is_r_list)
283    } else {
284        default_result_is_r_list
285    };
286
287    let test_name = sanitize_ident(&fixture.id);
288    let description = fixture.description.replace('"', "\\\"");
289
290    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
291
292    // Allow per-call R overrides to remap fixture argument names. Many calls
293    // (e.g. `extract_bytes`, `batch_extract_files`) use language-neutral
294    // fixture field names (`data`, `paths`) that the R extendr binding
295    // exposes under different identifiers (`content`, `items`).
296    let arg_name_map = r_override.map(|o| &o.arg_name_map);
297    let args_str = build_args_string(&fixture.input, &call_config.args, arg_name_map);
298
299    // Build visitor setup and args if present
300    let mut setup_lines = Vec::new();
301    let final_args = if let Some(visitor_spec) = &fixture.visitor {
302        build_r_visitor(&mut setup_lines, visitor_spec);
303        // Strip any `options = NULL` placeholder that build_args_string may have emitted
304        // for the optional options arg — we replace it with the visitor options list.
305        let base = args_str
306            .replace(", options = NULL", "")
307            .replace("options = NULL, ", "")
308            .replace("options = NULL", "");
309        let visitor_opts = "options = list(visitor = visitor)";
310        let trimmed = base.trim_matches([' ', ',']);
311        if trimmed.is_empty() {
312            visitor_opts.to_string()
313        } else {
314            format!("{trimmed}, {visitor_opts}")
315        }
316    } else {
317        args_str
318    };
319
320    if expects_error {
321        let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
322        for line in &setup_lines {
323            let _ = writeln!(out, "  {line}");
324        }
325        let _ = writeln!(out, "  expect_error({function_name}({final_args}))");
326        let _ = writeln!(out, "}})");
327        return;
328    }
329
330    let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
331    for line in &setup_lines {
332        let _ = writeln!(out, "  {line}");
333    }
334    // The extendr extraction wrappers return JSON strings carrying the
335    // serialized core result; parse into an R list so tests can use `$`
336    // accessors. `result_is_simple` calls (e.g. `convert_html_to_markdown`)
337    // already return scalar values and must be passed through verbatim.
338    // `result_is_r_list` signals the binding returns a native R list (Robj),
339    // not a JSON string — skip `jsonlite::fromJSON` but keep `$` accessors.
340    if result_is_simple || result_is_r_list {
341        let _ = writeln!(out, "  {result_var} <- {function_name}({final_args})");
342    } else {
343        let _ = writeln!(
344            out,
345            "  {result_var} <- jsonlite::fromJSON({function_name}({final_args}), simplifyVector = FALSE)"
346        );
347    }
348
349    for assertion in &fixture.assertions {
350        render_assertion(out, assertion, result_var, field_resolver, result_is_simple, e2e_config);
351    }
352
353    let _ = writeln!(out, "}})");
354}
355
356fn build_args_string(
357    input: &serde_json::Value,
358    args: &[crate::config::ArgMapping],
359    arg_name_map: Option<&std::collections::HashMap<String, String>>,
360) -> String {
361    if args.is_empty() {
362        // No declared args means the wrapper takes zero parameters; emitting
363        // `list()` here would trigger an `unused argument (list())` error in R.
364        // Likewise, fall through to nothing if the fixture's input is empty.
365        if matches!(input, serde_json::Value::Null) || input.as_object().is_some_and(|m| m.is_empty()) {
366            return String::new();
367        }
368        return json_to_r(input, true);
369    }
370
371    let parts: Vec<String> = args
372        .iter()
373        .filter_map(|arg| {
374            // Apply per-language argument renames before emitting the call.
375            let arg_name: &str = arg_name_map
376                .and_then(|m| m.get(&arg.name).map(String::as_str))
377                .unwrap_or(&arg.name);
378
379            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
380            let val = input.get(field);
381            // R extendr-generated wrappers do not preserve Option<T> defaults from
382            // the Rust signature — every parameter is positional and required at
383            // the R level. To keep generated calls valid we must pass a placeholder
384            // (`NULL` for `Option<T>`, `ExtractionConfig$default()` for typed
385            // configs) whenever the fixture omits an optional value.
386            let val = match val {
387                Some(v) if !(v.is_null() && arg.optional) => v,
388                _ => {
389                    if !arg.optional {
390                        return None;
391                    }
392                    if arg.arg_type == "json_object" {
393                        let r_value = r_default_for_config_arg(arg_name);
394                        return Some(format!("{arg_name} = {r_value}"));
395                    }
396                    return Some(format!("{arg_name} = NULL"));
397                }
398            };
399            // The extendr bindings expect owned PORs (ExternalPtr) for typed
400            // config arguments — passing an R `list()` raises
401            // `Expected ExternalPtr got List`. The fixtures don't carry the
402            // option fields needed to round-trip through ExtractionConfig$new,
403            // so emit `ExtractionConfig$default()` whenever a `json_object` arg
404            // resolves to an empty / object-shaped JSON value.
405            if arg.arg_type == "json_object" && (val.is_null() || val.as_object().is_some_and(|m| m.is_empty())) {
406                let r_value = r_default_for_config_arg(arg_name);
407                return Some(format!("{arg_name} = {r_value}"));
408            }
409            // Non-empty json_object for typed config args (those whose default is a
410            // `$default()` constructor): use `TypeName$from_json(jsonlite::toJSON(...))`
411            // so the Rust function receives a proper ExternalPtr, not a list.
412            // For `options`-style args (default = NULL) emit as a plain R list.
413            if arg.arg_type == "json_object" && val.is_object() {
414                let default_expr = r_default_for_config_arg(arg_name);
415                if default_expr.ends_with("$default()") {
416                    // Extract the type name from "TypeName$default()"
417                    let type_name = default_expr.trim_end_matches("$default()");
418                    let r_list = json_to_r(val, true);
419                    let r_value = format!("{type_name}$from_json(jsonlite::toJSON({r_list}, auto_unbox = TRUE))");
420                    return Some(format!("{arg_name} = {r_value}"));
421                }
422                let r_value = json_to_r(val, true);
423                return Some(format!("{arg_name} = {r_value}"));
424            }
425            // `json_object` arrays are passed to extendr functions whose Rust
426            // signature is `items: String` (JSON-serialized batch items). The
427            // wrapper has no R-list → JSON conversion, so we must serialize the
428            // fixture value to a literal JSON string at test-emit time.
429            if arg.arg_type == "json_object" && val.is_array() {
430                let json_literal = serde_json::to_string(val).unwrap_or_else(|_| "[]".to_string());
431                let escaped = escape_r(&json_literal);
432                return Some(format!("{arg_name} = \"{escaped}\""));
433            }
434            // `bytes` arg type: convert string fixture values into runtime
435            // `readBin(...)` calls so the wrapper receives raw bytes instead
436            // of an R character vector. This mirrors the Python emit_bytes_arg
437            // helper and is what the extendr binding for Vec<u8> expects.
438            if arg.arg_type == "bytes" {
439                if let Some(raw) = val.as_str() {
440                    let r_value = render_bytes_value(raw);
441                    return Some(format!("{arg_name} = {r_value}"));
442                }
443            }
444            // `file_path` arg type: fixtures encode relative paths that resolve
445            // against the repo's `test_documents/` directory. Using a runtime
446            // helper that anchors paths to that directory avoids fragility from
447            // testthat resetting the working directory between files.
448            if arg.arg_type == "file_path" {
449                if let Some(raw) = val.as_str() {
450                    if !raw.starts_with('/') && !raw.is_empty() {
451                        let escaped = escape_r(raw);
452                        return Some(format!("{arg_name} = .resolve_fixture(\"{escaped}\")"));
453                    }
454                }
455            }
456            Some(format!("{arg_name} = {}", json_to_r(val, true)))
457        })
458        .collect();
459
460    parts.join(", ")
461}
462
463/// Render a `bytes` fixture value as the R expression that produces a raw
464/// vector at test time. Mirrors python's `emit_bytes_arg` classifier so we can
465/// support both file-path style fixtures (`"pdf/fake_memo.pdf"`) and inline
466/// text payloads (`"<html>..."`). The resulting expression is dropped directly
467/// into the call site, e.g. `content = readBin(.resolve_fixture("pdf/fake_memo.pdf"), ...)`.
468fn render_bytes_value(raw: &str) -> String {
469    if raw.starts_with('<') || raw.starts_with('{') || raw.starts_with('[') || raw.contains(' ') {
470        // Inline text payload — encode to raw via charToRaw.
471        let escaped = escape_r(raw);
472        return format!("charToRaw(\"{escaped}\")");
473    }
474    let first = raw.chars().next().unwrap_or('\0');
475    if first.is_ascii_alphanumeric() || first == '_' {
476        if let Some(slash) = raw.find('/') {
477            if slash > 0 {
478                let after = &raw[slash + 1..];
479                if after.contains('.') && !after.is_empty() {
480                    let escaped = escape_r(raw);
481                    return format!(
482                        "readBin(.resolve_fixture(\"{escaped}\"), what = \"raw\", n = file.info(.resolve_fixture(\"{escaped}\"))$size)"
483                    );
484                }
485            }
486        }
487    }
488    // Default to inline text encoding — matches Python's InlineText branch.
489    let escaped = escape_r(raw);
490    format!("charToRaw(\"{escaped}\")")
491}
492
493/// Map the extractor argument name onto its R `*Config$default()` constructor.
494/// Falls back to `list()` for unknown names — the extendr binding will error
495/// with a clear message, which is preferable to silently passing a wrong type.
496fn r_default_for_config_arg(arg_name: &str) -> String {
497    match arg_name {
498        "config" => "ExtractionConfig$default()".to_string(),
499        "options" => "NULL".to_string(),
500        "html_output" => "HtmlOutputConfig$default()".to_string(),
501        "chunking" => "ChunkingConfig$default()".to_string(),
502        "ocr" => "OcrConfig$default()".to_string(),
503        "image" | "images" => "ImageExtractionConfig$default()".to_string(),
504        "language_detection" => "LanguageDetectionConfig$default()".to_string(),
505        _ => "list()".to_string(),
506    }
507}
508
509fn render_assertion(
510    out: &mut String,
511    assertion: &Assertion,
512    result_var: &str,
513    field_resolver: &FieldResolver,
514    result_is_simple: bool,
515    _e2e_config: &E2eConfig,
516) {
517    // Handle synthetic / derived fields before the is_valid_for_result check
518    // so they are never treated as struct attribute accesses on the result.
519    if let Some(f) = &assertion.field {
520        match f.as_str() {
521            "chunks_have_content" => {
522                let pred = format!("all(sapply({result_var}$chunks %||% list(), function(c) nchar(c$content) > 0))");
523                match assertion.assertion_type.as_str() {
524                    "is_true" => {
525                        let _ = writeln!(out, "  expect_true({pred})");
526                    }
527                    "is_false" => {
528                        let _ = writeln!(out, "  expect_false({pred})");
529                    }
530                    _ => {
531                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
532                    }
533                }
534                return;
535            }
536            "chunks_have_embeddings" => {
537                let pred = format!(
538                    "all(sapply({result_var}$chunks %||% list(), function(c) !is.null(c$embedding) && length(c$embedding) > 0))"
539                );
540                match assertion.assertion_type.as_str() {
541                    "is_true" => {
542                        let _ = writeln!(out, "  expect_true({pred})");
543                    }
544                    "is_false" => {
545                        let _ = writeln!(out, "  expect_false({pred})");
546                    }
547                    _ => {
548                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
549                    }
550                }
551                return;
552            }
553            // ---- EmbedResponse virtual fields ----
554            // embed_texts returns list of numeric vectors in R — no wrapper object.
555            // result_var is the embedding matrix; use it directly.
556            "embeddings" => {
557                match assertion.assertion_type.as_str() {
558                    "count_equals" => {
559                        if let Some(val) = &assertion.value {
560                            let r_val = json_to_r(val, false);
561                            let _ = writeln!(out, "  expect_equal(length({result_var}), {r_val})");
562                        }
563                    }
564                    "count_min" => {
565                        if let Some(val) = &assertion.value {
566                            let r_val = json_to_r(val, false);
567                            let _ = writeln!(out, "  expect_gte(length({result_var}), {r_val})");
568                        }
569                    }
570                    "not_empty" => {
571                        let _ = writeln!(out, "  expect_gt(length({result_var}), 0)");
572                    }
573                    "is_empty" => {
574                        let _ = writeln!(out, "  expect_equal(length({result_var}), 0)");
575                    }
576                    _ => {
577                        let _ = writeln!(
578                            out,
579                            "  # skipped: unsupported assertion type on synthetic field 'embeddings'"
580                        );
581                    }
582                }
583                return;
584            }
585            "embedding_dimensions" => {
586                let expr = format!("(if (length({result_var}) == 0) 0L else length({result_var}[[1]]))");
587                match assertion.assertion_type.as_str() {
588                    "equals" => {
589                        if let Some(val) = &assertion.value {
590                            let r_val = json_to_r(val, false);
591                            let _ = writeln!(out, "  expect_equal({expr}, {r_val})");
592                        }
593                    }
594                    "greater_than" => {
595                        if let Some(val) = &assertion.value {
596                            let r_val = json_to_r(val, false);
597                            let _ = writeln!(out, "  expect_gt({expr}, {r_val})");
598                        }
599                    }
600                    _ => {
601                        let _ = writeln!(
602                            out,
603                            "  # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
604                        );
605                    }
606                }
607                return;
608            }
609            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
610                let pred = match f.as_str() {
611                    "embeddings_valid" => {
612                        format!("all(sapply({result_var}, function(e) length(e) > 0))")
613                    }
614                    "embeddings_finite" => {
615                        format!("all(sapply({result_var}, function(e) all(is.finite(e))))")
616                    }
617                    "embeddings_non_zero" => {
618                        format!("all(sapply({result_var}, function(e) any(e != 0.0)))")
619                    }
620                    "embeddings_normalized" => {
621                        format!("all(sapply({result_var}, function(e) abs(sum(e * e) - 1.0) < 1e-3))")
622                    }
623                    _ => unreachable!(),
624                };
625                match assertion.assertion_type.as_str() {
626                    "is_true" => {
627                        let _ = writeln!(out, "  expect_true({pred})");
628                    }
629                    "is_false" => {
630                        let _ = writeln!(out, "  expect_false({pred})");
631                    }
632                    _ => {
633                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
634                    }
635                }
636                return;
637            }
638            // ---- keywords / keywords_count ----
639            // R ExtractionResult does not expose extracted_keywords; skip.
640            "keywords" | "keywords_count" => {
641                let _ = writeln!(out, "  # skipped: field '{f}' not available on R ExtractionResult");
642                return;
643            }
644            _ => {}
645        }
646    }
647
648    // Skip assertions on fields that don't exist on the result type.
649    if let Some(f) = &assertion.field {
650        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
651            let _ = writeln!(out, "  # skipped: field '{f}' not available on result type");
652            return;
653        }
654    }
655
656    // When result_is_simple, skip assertions that reference non-content fields
657    // (e.g., metadata, document, structure) since the binding returns a plain value.
658    if result_is_simple {
659        if let Some(f) = &assertion.field {
660            let f_lower = f.to_lowercase();
661            if !f.is_empty()
662                && f_lower != "content"
663                && (f_lower.starts_with("metadata")
664                    || f_lower.starts_with("document")
665                    || f_lower.starts_with("structure"))
666            {
667                let _ = writeln!(
668                    out,
669                    "  # skipped: result_is_simple for field '{f}' not available on result type"
670                );
671                return;
672            }
673        }
674    }
675
676    let field_expr = if result_is_simple {
677        result_var.to_string()
678    } else {
679        match &assertion.field {
680            Some(f) if !f.is_empty() => field_resolver.accessor(f, "r", result_var),
681            _ => result_var.to_string(),
682        }
683    };
684
685    match assertion.assertion_type.as_str() {
686        "equals" => {
687            if let Some(expected) = &assertion.value {
688                let r_val = json_to_r(expected, false);
689                let _ = writeln!(out, "  expect_equal(trimws({field_expr}), {r_val})");
690            }
691        }
692        "contains" => {
693            if let Some(expected) = &assertion.value {
694                let r_val = json_to_r(expected, false);
695                let _ = writeln!(out, "  expect_true(grepl({r_val}, {field_expr}, fixed = TRUE))");
696            }
697        }
698        "contains_all" => {
699            if let Some(values) = &assertion.values {
700                for val in values {
701                    let r_val = json_to_r(val, false);
702                    let _ = writeln!(out, "  expect_true(any(grepl({r_val}, {field_expr}, fixed = TRUE)))");
703                }
704            }
705        }
706        "not_contains" => {
707            if let Some(expected) = &assertion.value {
708                let r_val = json_to_r(expected, false);
709                let _ = writeln!(out, "  expect_false(grepl({r_val}, {field_expr}, fixed = TRUE))");
710            }
711        }
712        "not_empty" => {
713            let _ = writeln!(
714                out,
715                "  expect_true(if (is.character({field_expr})) nchar({field_expr}) > 0 else length({field_expr}) > 0)"
716            );
717        }
718        "is_empty" => {
719            let _ = writeln!(out, "  expect_equal({field_expr}, \"\")");
720        }
721        "contains_any" => {
722            if let Some(values) = &assertion.values {
723                let items: Vec<String> = values.iter().map(|v| json_to_r(v, false)).collect();
724                let vec_str = items.join(", ");
725                let _ = writeln!(
726                    out,
727                    "  expect_true(any(sapply(c({vec_str}), function(v) grepl(v, {field_expr}, fixed = TRUE))))"
728                );
729            }
730        }
731        "greater_than" => {
732            if let Some(val) = &assertion.value {
733                let r_val = json_to_r(val, false);
734                let _ = writeln!(out, "  expect_true({field_expr} > {r_val})");
735            }
736        }
737        "less_than" => {
738            if let Some(val) = &assertion.value {
739                let r_val = json_to_r(val, false);
740                let _ = writeln!(out, "  expect_true({field_expr} < {r_val})");
741            }
742        }
743        "greater_than_or_equal" => {
744            if let Some(val) = &assertion.value {
745                let r_val = json_to_r(val, false);
746                let _ = writeln!(out, "  expect_true({field_expr} >= {r_val})");
747            }
748        }
749        "less_than_or_equal" => {
750            if let Some(val) = &assertion.value {
751                let r_val = json_to_r(val, false);
752                let _ = writeln!(out, "  expect_true({field_expr} <= {r_val})");
753            }
754        }
755        "starts_with" => {
756            if let Some(expected) = &assertion.value {
757                let r_val = json_to_r(expected, false);
758                let _ = writeln!(out, "  expect_true(startsWith({field_expr}, {r_val}))");
759            }
760        }
761        "ends_with" => {
762            if let Some(expected) = &assertion.value {
763                let r_val = json_to_r(expected, false);
764                let _ = writeln!(out, "  expect_true(endsWith({field_expr}, {r_val}))");
765            }
766        }
767        "min_length" => {
768            if let Some(val) = &assertion.value {
769                if let Some(n) = val.as_u64() {
770                    let _ = writeln!(out, "  expect_true(nchar({field_expr}) >= {n})");
771                }
772            }
773        }
774        "max_length" => {
775            if let Some(val) = &assertion.value {
776                if let Some(n) = val.as_u64() {
777                    let _ = writeln!(out, "  expect_true(nchar({field_expr}) <= {n})");
778                }
779            }
780        }
781        "count_min" => {
782            if let Some(val) = &assertion.value {
783                if let Some(n) = val.as_u64() {
784                    let _ = writeln!(out, "  expect_true(length({field_expr}) >= {n})");
785                }
786            }
787        }
788        "count_equals" => {
789            if let Some(val) = &assertion.value {
790                if let Some(n) = val.as_u64() {
791                    let _ = writeln!(out, "  expect_equal(length({field_expr}), {n})");
792                }
793            }
794        }
795        "is_true" => {
796            let _ = writeln!(out, "  expect_true({field_expr})");
797        }
798        "is_false" => {
799            let _ = writeln!(out, "  expect_false({field_expr})");
800        }
801        "method_result" => {
802            if let Some(method_name) = &assertion.method {
803                let call_expr = build_r_method_call(result_var, method_name, assertion.args.as_ref());
804                let check = assertion.check.as_deref().unwrap_or("is_true");
805                match check {
806                    "equals" => {
807                        if let Some(val) = &assertion.value {
808                            if val.is_boolean() {
809                                if val.as_bool() == Some(true) {
810                                    let _ = writeln!(out, "  expect_true({call_expr})");
811                                } else {
812                                    let _ = writeln!(out, "  expect_false({call_expr})");
813                                }
814                            } else {
815                                let r_val = json_to_r(val, false);
816                                let _ = writeln!(out, "  expect_equal({call_expr}, {r_val})");
817                            }
818                        }
819                    }
820                    "is_true" => {
821                        let _ = writeln!(out, "  expect_true({call_expr})");
822                    }
823                    "is_false" => {
824                        let _ = writeln!(out, "  expect_false({call_expr})");
825                    }
826                    "greater_than_or_equal" => {
827                        if let Some(val) = &assertion.value {
828                            let r_val = json_to_r(val, false);
829                            let _ = writeln!(out, "  expect_true({call_expr} >= {r_val})");
830                        }
831                    }
832                    "count_min" => {
833                        if let Some(val) = &assertion.value {
834                            let n = val.as_u64().unwrap_or(0);
835                            let _ = writeln!(out, "  expect_true(length({call_expr}) >= {n})");
836                        }
837                    }
838                    "is_error" => {
839                        let _ = writeln!(out, "  expect_error({call_expr})");
840                    }
841                    "contains" => {
842                        if let Some(val) = &assertion.value {
843                            let r_val = json_to_r(val, false);
844                            let _ = writeln!(out, "  expect_true(grepl({r_val}, {call_expr}, fixed = TRUE))");
845                        }
846                    }
847                    other_check => {
848                        panic!("R e2e generator: unsupported method_result check type: {other_check}");
849                    }
850                }
851            } else {
852                panic!("R e2e generator: method_result assertion missing 'method' field");
853            }
854        }
855        "matches_regex" => {
856            if let Some(expected) = &assertion.value {
857                let r_val = json_to_r(expected, false);
858                let _ = writeln!(out, "  expect_true(grepl({r_val}, {field_expr}))");
859            }
860        }
861        "not_error" => {
862            // The call itself stops the test on error; emit an explicit
863            // `expect_true(TRUE)` so testthat doesn't report the test as
864            // empty when this is the only assertion.
865            let _ = writeln!(out, "  expect_true(TRUE)");
866        }
867        "error" => {
868            // Handled at the test level.
869        }
870        other => {
871            panic!("R e2e generator: unsupported assertion type: {other}");
872        }
873    }
874}
875
876/// Convert a `serde_json::Value` to an R literal string.
877///
878/// # Arguments
879///
880/// * `value` - The JSON value to convert
881///
882/// Convert a PascalCase string to snake_case.
883/// e.g. "DoubleEqual" → "double_equal", "Backticks" → "backticks"
884fn pascal_to_snake_case(s: &str) -> String {
885    let mut result = String::with_capacity(s.len() + 4);
886    for (i, ch) in s.chars().enumerate() {
887        if ch.is_uppercase() && i > 0 {
888            result.push('_');
889        }
890        for lc in ch.to_lowercase() {
891            result.push(lc);
892        }
893    }
894    result
895}
896
897/// * `lowercase_enum_values` - If true, convert PascalCase strings to snake_case (for enum values).
898///   If false, preserve original case (for assertion expected values).
899fn json_to_r(value: &serde_json::Value, lowercase_enum_values: bool) -> String {
900    match value {
901        serde_json::Value::String(s) => {
902            // Convert PascalCase enum values to snake_case only if requested.
903            // e.g. "Backticks" → "backticks", "DoubleEqual" → "double_equal"
904            let normalized = if lowercase_enum_values && s.chars().next().is_some_and(|c| c.is_uppercase()) {
905                pascal_to_snake_case(s)
906            } else {
907                s.clone()
908            };
909            format!("\"{}\"", escape_r(&normalized))
910        }
911        serde_json::Value::Bool(true) => "TRUE".to_string(),
912        serde_json::Value::Bool(false) => "FALSE".to_string(),
913        serde_json::Value::Number(n) => n.to_string(),
914        serde_json::Value::Null => "NULL".to_string(),
915        serde_json::Value::Array(arr) => {
916            let items: Vec<String> = arr.iter().map(|v| json_to_r(v, lowercase_enum_values)).collect();
917            format!("c({})", items.join(", "))
918        }
919        serde_json::Value::Object(map) => {
920            let entries: Vec<String> = map
921                .iter()
922                .map(|(k, v)| format!("\"{}\" = {}", escape_r(k), json_to_r(v, lowercase_enum_values)))
923                .collect();
924            format!("list({})", entries.join(", "))
925        }
926    }
927}
928
929/// Build an R visitor list and add setup line.
930fn build_r_visitor(setup_lines: &mut Vec<String>, visitor_spec: &crate::fixture::VisitorSpec) {
931    use std::fmt::Write as FmtWrite;
932    // Collect each callback as a separate string, then join with ",\n" to avoid
933    // trailing commas — R's list() does not accept a trailing comma.
934    let methods: Vec<String> = visitor_spec
935        .callbacks
936        .iter()
937        .map(|(method_name, action)| {
938            let mut buf = String::new();
939            emit_r_visitor_method(&mut buf, method_name, action);
940            // strip the trailing ",\n" added by emit_r_visitor_method
941            buf.trim_end_matches(['\n', ',']).to_string()
942        })
943        .collect();
944    let mut visitor_obj = String::new();
945    let _ = writeln!(visitor_obj, "list(");
946    let _ = write!(visitor_obj, "{}", methods.join(",\n"));
947    let _ = writeln!(visitor_obj);
948    let _ = writeln!(visitor_obj, "  )");
949
950    setup_lines.push(format!("visitor <- {visitor_obj}"));
951}
952
953/// Build an R call expression for a `method_result` assertion.
954/// Maps method names to the appropriate R function or method calls.
955fn build_r_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
956    match method_name {
957        "root_child_count" => format!("{result_var}$root_child_count()"),
958        "root_node_type" => format!("{result_var}$root_node_type()"),
959        "named_children_count" => format!("{result_var}$named_children_count()"),
960        "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
961        "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
962        "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
963        "contains_node_type" => {
964            let node_type = args
965                .and_then(|a| a.get("node_type"))
966                .and_then(|v| v.as_str())
967                .unwrap_or("");
968            format!("tree_contains_node_type({result_var}, \"{node_type}\")")
969        }
970        "find_nodes_by_type" => {
971            let node_type = args
972                .and_then(|a| a.get("node_type"))
973                .and_then(|v| v.as_str())
974                .unwrap_or("");
975            format!("find_nodes_by_type({result_var}, \"{node_type}\")")
976        }
977        "run_query" => {
978            let query_source = args
979                .and_then(|a| a.get("query_source"))
980                .and_then(|v| v.as_str())
981                .unwrap_or("");
982            let language = args
983                .and_then(|a| a.get("language"))
984                .and_then(|v| v.as_str())
985                .unwrap_or("");
986            format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
987        }
988        _ => {
989            if let Some(args_val) = args {
990                let arg_str = args_val
991                    .as_object()
992                    .map(|obj| {
993                        obj.iter()
994                            .map(|(k, v)| {
995                                let r_val = json_to_r(v, false);
996                                format!("{k} = {r_val}")
997                            })
998                            .collect::<Vec<_>>()
999                            .join(", ")
1000                    })
1001                    .unwrap_or_default();
1002                format!("{result_var}${method_name}({arg_str})")
1003            } else {
1004                format!("{result_var}${method_name}()")
1005            }
1006        }
1007    }
1008}
1009
1010/// Emit an R visitor method for a callback action.
1011fn emit_r_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
1012    use std::fmt::Write as FmtWrite;
1013
1014    // R uses visit_ prefix (matches binding signature)
1015    let params = match method_name {
1016        "visit_link" => "ctx, href, text, title",
1017        "visit_image" => "ctx, src, alt, title",
1018        "visit_heading" => "ctx, level, text, id",
1019        "visit_code_block" => "ctx, lang, code",
1020        "visit_code_inline"
1021        | "visit_strong"
1022        | "visit_emphasis"
1023        | "visit_strikethrough"
1024        | "visit_underline"
1025        | "visit_subscript"
1026        | "visit_superscript"
1027        | "visit_mark"
1028        | "visit_button"
1029        | "visit_summary"
1030        | "visit_figcaption"
1031        | "visit_definition_term"
1032        | "visit_definition_description" => "ctx, text",
1033        "visit_text" => "ctx, text",
1034        "visit_list_item" => "ctx, ordered, marker, text",
1035        "visit_blockquote" => "ctx, content, depth",
1036        "visit_table_row" => "ctx, cells, is_header",
1037        "visit_custom_element" => "ctx, tag_name, html",
1038        "visit_form" => "ctx, action_url, method",
1039        "visit_input" => "ctx, input_type, name, value",
1040        "visit_audio" | "visit_video" | "visit_iframe" => "ctx, src",
1041        "visit_details" => "ctx, open",
1042        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => "ctx, output",
1043        "visit_list_start" => "ctx, ordered",
1044        "visit_list_end" => "ctx, ordered, output",
1045        _ => "ctx",
1046    };
1047
1048    let _ = writeln!(out, "    {method_name} = function({params}) {{");
1049    match action {
1050        CallbackAction::Skip => {
1051            let _ = writeln!(out, "      \"skip\"");
1052        }
1053        CallbackAction::Continue => {
1054            let _ = writeln!(out, "      \"continue\"");
1055        }
1056        CallbackAction::PreserveHtml => {
1057            let _ = writeln!(out, "      \"preserve_html\"");
1058        }
1059        CallbackAction::Custom { output } => {
1060            let escaped = escape_r(output);
1061            let _ = writeln!(out, "      list(custom = \"{escaped}\")");
1062        }
1063        CallbackAction::CustomTemplate { template } => {
1064            let r_expr = r_template_to_paste0(template);
1065            let _ = writeln!(out, "      list(custom = {r_expr})");
1066        }
1067    }
1068    let _ = writeln!(out, "    }},");
1069}