Skip to main content

alef_e2e/codegen/
r.rs

1//! R e2e test generator using testthat.
2
3use crate::config::E2eConfig;
4use crate::escape::{escape_r, r_template_to_paste0, sanitize_filename, sanitize_ident};
5use crate::field_access::FieldResolver;
6use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup};
7use alef_core::backend::GeneratedFile;
8use alef_core::config::ResolvedCrateConfig;
9use alef_core::hash::{self, CommentStyle};
10use anyhow::Result;
11use std::fmt::Write as FmtWrite;
12use std::path::PathBuf;
13
14use super::E2eCodegen;
15
16/// R e2e code generator.
17pub struct RCodegen;
18
19impl E2eCodegen for RCodegen {
20    fn generate(
21        &self,
22        groups: &[FixtureGroup],
23        e2e_config: &E2eConfig,
24        config: &ResolvedCrateConfig,
25        _type_defs: &[alef_core::ir::TypeDef],
26    ) -> Result<Vec<GeneratedFile>> {
27        let lang = self.language_name();
28        let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
29
30        let mut files = Vec::new();
31
32        // Resolve call config with overrides.
33        let call = &e2e_config.call;
34        let overrides = call.overrides.get(lang);
35        let module_path = overrides
36            .and_then(|o| o.module.as_ref())
37            .cloned()
38            .unwrap_or_else(|| call.module.clone());
39        let _function_name = overrides
40            .and_then(|o| o.function.as_ref())
41            .cloned()
42            .unwrap_or_else(|| call.function.clone());
43        let result_is_simple = call.result_is_simple || overrides.is_some_and(|o| o.result_is_simple);
44        let result_is_r_list = overrides.is_some_and(|o| o.result_is_r_list);
45        let _result_var = &call.result_var;
46
47        // Resolve package config.
48        let r_pkg = e2e_config.resolve_package("r");
49        let pkg_name = r_pkg
50            .as_ref()
51            .and_then(|p| p.name.as_ref())
52            .cloned()
53            .unwrap_or_else(|| module_path.clone());
54        let pkg_path = r_pkg
55            .as_ref()
56            .and_then(|p| p.path.as_ref())
57            .cloned()
58            .unwrap_or_else(|| "../../packages/r".to_string());
59        let pkg_version = r_pkg
60            .as_ref()
61            .and_then(|p| p.version.as_ref())
62            .cloned()
63            .or_else(|| config.resolved_version())
64            .unwrap_or_else(|| "0.1.0".to_string());
65
66        // Generate DESCRIPTION file.
67        files.push(GeneratedFile {
68            path: output_base.join("DESCRIPTION"),
69            content: render_description(&pkg_name, &pkg_version, e2e_config.dep_mode),
70            generated_header: false,
71        });
72
73        // Generate test runner script.
74        files.push(GeneratedFile {
75            path: output_base.join("run_tests.R"),
76            content: render_test_runner(&pkg_path, e2e_config.dep_mode),
77            generated_header: true,
78        });
79
80        // setup-fixtures.R — testthat sources `setup-*.R` files in the tests
81        // directory once before any tests run, with the working directory set
82        // to the tests/ folder. We use this hook to chdir into the repo's
83        // shared `test_documents/` directory so that fixture paths like
84        // `pdf/fake_memo.pdf` resolve at extraction time.
85        files.push(GeneratedFile {
86            path: output_base.join("tests").join("setup-fixtures.R"),
87            content: render_setup_fixtures(&e2e_config.test_documents_relative_from(1)),
88            generated_header: true,
89        });
90
91        // Generate test files per category.
92        for group in groups {
93            let active: Vec<&Fixture> = group
94                .fixtures
95                .iter()
96                .filter(|f| super::should_include_fixture(f, lang, e2e_config))
97                .collect();
98
99            if active.is_empty() {
100                continue;
101            }
102
103            let filename = format!("test_{}.R", sanitize_filename(&group.category));
104            let field_resolver = FieldResolver::new(
105                &e2e_config.fields,
106                &e2e_config.fields_optional,
107                &e2e_config.result_fields,
108                &e2e_config.fields_array,
109                &std::collections::HashSet::new(),
110            );
111            let content = render_test_file(
112                &group.category,
113                &active,
114                &field_resolver,
115                result_is_simple,
116                result_is_r_list,
117                e2e_config,
118            );
119            files.push(GeneratedFile {
120                path: output_base.join("tests").join(filename),
121                content,
122                generated_header: true,
123            });
124        }
125
126        Ok(files)
127    }
128
129    fn language_name(&self) -> &'static str {
130        "r"
131    }
132}
133
134fn render_description(pkg_name: &str, pkg_version: &str, dep_mode: crate::config::DependencyMode) -> String {
135    let dep_line = match dep_mode {
136        crate::config::DependencyMode::Registry => {
137            format!("Imports: {pkg_name} ({pkg_version})\n")
138        }
139        crate::config::DependencyMode::Local => String::new(),
140    };
141    format!(
142        r#"Package: e2e.r
143Title: E2E Tests for {pkg_name}
144Version: 0.1.0
145Description: End-to-end test suite.
146{dep_line}Suggests: testthat (>= 3.0.0)
147Config/testthat/edition: 3
148"#
149    )
150}
151
152fn render_setup_fixtures(test_documents_path: &str) -> String {
153    let mut out = String::new();
154    out.push_str(&hash::header(CommentStyle::Hash));
155    let _ = writeln!(out);
156    let _ = writeln!(
157        out,
158        "# Resolve fixture paths against the repo's `test_documents/` directory."
159    );
160    let _ = writeln!(
161        out,
162        "# testthat sources setup-*.R with the working directory at tests/,"
163    );
164    let _ = writeln!(
165        out,
166        "# so test_documents lives three directories up: tests/ -> e2e/r/ -> e2e/ -> repo root."
167    );
168    let _ = writeln!(
169        out,
170        "# Each `test_that()` block has its working directory reset back to tests/, so"
171    );
172    let _ = writeln!(
173        out,
174        "# fixture lookups must be performed via this helper rather than relying on `setwd`."
175    );
176    let _ = writeln!(
177        out,
178        ".alef_test_documents <- normalizePath(\"{test_documents_path}\", mustWork = FALSE)"
179    );
180    let _ = writeln!(out, ".resolve_fixture <- function(path) {{");
181    let _ = writeln!(out, "  if (dir.exists(.alef_test_documents)) {{");
182    let _ = writeln!(out, "    file.path(.alef_test_documents, path)");
183    let _ = writeln!(out, "  }} else {{");
184    let _ = writeln!(out, "    path");
185    let _ = writeln!(out, "  }}");
186    let _ = writeln!(out, "}}");
187    out
188}
189
190fn render_test_runner(pkg_path: &str, dep_mode: crate::config::DependencyMode) -> String {
191    let mut out = String::new();
192    out.push_str(&hash::header(CommentStyle::Hash));
193    let _ = writeln!(out, "library(testthat)");
194    match dep_mode {
195        crate::config::DependencyMode::Registry => {
196            // In registry mode, require the installed CRAN package directly.
197            let _ = writeln!(out, "# Package loaded via library() from CRAN install.");
198        }
199        crate::config::DependencyMode::Local => {
200            // Use devtools::load_all() to load the local R package without requiring
201            // a full install, matching the e2e test runner convention.
202            let _ = writeln!(out, "devtools::load_all(\"{pkg_path}\")");
203        }
204    }
205    let _ = writeln!(out);
206    // Surface every failure rather than aborting at the default max_fails=10 —
207    // partial pass counts are essential for triage during e2e bring-up.
208    let _ = writeln!(out, "testthat::set_max_fails(Inf)");
209    // Resolve the tests/ directory relative to this script. testthat reads
210    // setup-*.R from there before each file runs, where path resolution
211    // against test_documents/ is handled by the `.resolve_fixture` helper.
212    let _ = writeln!(
213        out,
214        ".script_dir <- tryCatch(dirname(normalizePath(sys.frame(1)$ofile)), error = function(e) getwd())"
215    );
216    let _ = writeln!(out, "test_dir(file.path(.script_dir, \"tests\"))");
217    out
218}
219
220fn render_test_file(
221    category: &str,
222    fixtures: &[&Fixture],
223    field_resolver: &FieldResolver,
224    result_is_simple: bool,
225    result_is_r_list: bool,
226    e2e_config: &E2eConfig,
227) -> String {
228    let mut out = String::new();
229    out.push_str(&hash::header(CommentStyle::Hash));
230    let _ = writeln!(out, "# E2e tests for category: {category}");
231    let _ = writeln!(out);
232
233    for (i, fixture) in fixtures.iter().enumerate() {
234        render_test_case(
235            &mut out,
236            fixture,
237            e2e_config,
238            field_resolver,
239            result_is_simple,
240            result_is_r_list,
241        );
242        if i + 1 < fixtures.len() {
243            let _ = writeln!(out);
244        }
245    }
246
247    // Clean up trailing newlines.
248    while out.ends_with("\n\n") {
249        out.pop();
250    }
251    if !out.ends_with('\n') {
252        out.push('\n');
253    }
254    out
255}
256
257fn render_test_case(
258    out: &mut String,
259    fixture: &Fixture,
260    e2e_config: &E2eConfig,
261    field_resolver: &FieldResolver,
262    default_result_is_simple: bool,
263    default_result_is_r_list: bool,
264) {
265    let call_config = e2e_config.resolve_call_for_fixture(fixture.call.as_deref(), &fixture.input);
266    let function_name = &call_config.function;
267    let result_var = &call_config.result_var;
268    // Per-fixture call configs (e.g. `list_document_extractors`) may set
269    // `result_is_simple = true` even when the default `[e2e.call]` does not.
270    // Without this lookup the registry/detection wrappers (which return scalar
271    // strings or character vectors directly) get wrapped in
272    // `jsonlite::fromJSON(...)` and the parser fails on non-JSON output.
273    let r_override = call_config.overrides.get("r");
274    let result_is_simple = if fixture.call.is_some() {
275        call_config.result_is_simple || r_override.is_some_and(|o| o.result_is_simple)
276    } else {
277        default_result_is_simple
278    };
279    // Per-fixture override: when the R binding already returns a native R list
280    // (not a JSON string), suppress `jsonlite::fromJSON` wrapping while still
281    // using field-path (`result$field`) accessors in assertions.
282    let result_is_r_list = if fixture.call.is_some() {
283        r_override.is_some_and(|o| o.result_is_r_list)
284    } else {
285        default_result_is_r_list
286    };
287
288    let test_name = sanitize_ident(&fixture.id);
289    let description = fixture.description.replace('"', "\\\"");
290
291    let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
292
293    // Allow per-call R overrides to remap fixture argument names. Many calls
294    // (e.g. `extract_bytes`, `batch_extract_files`) use language-neutral
295    // fixture field names (`data`, `paths`) that the R extendr binding
296    // exposes under different identifiers (`content`, `items`).
297    let arg_name_map = r_override.map(|o| &o.arg_name_map);
298    let args_str = build_args_string(&fixture.input, &call_config.args, arg_name_map);
299
300    // Build visitor setup and args if present
301    let mut setup_lines = Vec::new();
302    let final_args = if let Some(visitor_spec) = &fixture.visitor {
303        build_r_visitor(&mut setup_lines, visitor_spec);
304        // Strip any `options = NULL` placeholder that build_args_string may have emitted
305        // for the optional options arg — we replace it with the visitor options list.
306        let base = args_str
307            .replace(", options = NULL", "")
308            .replace("options = NULL, ", "")
309            .replace("options = NULL", "");
310        let visitor_opts = "options = list(visitor = visitor)";
311        let trimmed = base.trim_matches([' ', ',']);
312        if trimmed.is_empty() {
313            visitor_opts.to_string()
314        } else {
315            format!("{trimmed}, {visitor_opts}")
316        }
317    } else {
318        args_str
319    };
320
321    if expects_error {
322        let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
323        for line in &setup_lines {
324            let _ = writeln!(out, "  {line}");
325        }
326        let _ = writeln!(out, "  expect_error({function_name}({final_args}))");
327        let _ = writeln!(out, "}})");
328        return;
329    }
330
331    let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
332    for line in &setup_lines {
333        let _ = writeln!(out, "  {line}");
334    }
335    // The extendr extraction wrappers return JSON strings carrying the
336    // serialized core result; parse into an R list so tests can use `$`
337    // accessors. `result_is_simple` calls (e.g. `convert_html_to_markdown`)
338    // already return scalar values and must be passed through verbatim.
339    // `result_is_r_list` signals the binding returns a native R list (Robj),
340    // not a JSON string — skip `jsonlite::fromJSON` but keep `$` accessors.
341    if result_is_simple || result_is_r_list {
342        let _ = writeln!(out, "  {result_var} <- {function_name}({final_args})");
343    } else {
344        let _ = writeln!(
345            out,
346            "  {result_var} <- jsonlite::fromJSON({function_name}({final_args}), simplifyVector = FALSE)"
347        );
348    }
349
350    for assertion in &fixture.assertions {
351        render_assertion(out, assertion, result_var, field_resolver, result_is_simple, e2e_config);
352    }
353
354    let _ = writeln!(out, "}})");
355}
356
357fn build_args_string(
358    input: &serde_json::Value,
359    args: &[crate::config::ArgMapping],
360    arg_name_map: Option<&std::collections::HashMap<String, String>>,
361) -> String {
362    if args.is_empty() {
363        // No declared args means the wrapper takes zero parameters; emitting
364        // `list()` here would trigger an `unused argument (list())` error in R.
365        // Likewise, fall through to nothing if the fixture's input is empty.
366        if matches!(input, serde_json::Value::Null) || input.as_object().is_some_and(|m| m.is_empty()) {
367            return String::new();
368        }
369        return json_to_r(input, true);
370    }
371
372    let parts: Vec<String> = args
373        .iter()
374        .filter_map(|arg| {
375            // Apply per-language argument renames before emitting the call.
376            let arg_name: &str = arg_name_map
377                .and_then(|m| m.get(&arg.name).map(String::as_str))
378                .unwrap_or(&arg.name);
379
380            let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
381            let val = input.get(field);
382            // R extendr-generated wrappers do not preserve Option<T> defaults from
383            // the Rust signature — every parameter is positional and required at
384            // the R level. To keep generated calls valid we must pass a placeholder
385            // (`NULL` for `Option<T>`, `ExtractionConfig$default()` for typed
386            // configs) whenever the fixture omits an optional value.
387            let val = match val {
388                Some(v) if !(v.is_null() && arg.optional) => v,
389                _ => {
390                    if !arg.optional {
391                        return None;
392                    }
393                    if arg.arg_type == "json_object" {
394                        let r_value = r_default_for_config_arg(arg_name);
395                        return Some(format!("{arg_name} = {r_value}"));
396                    }
397                    return Some(format!("{arg_name} = NULL"));
398                }
399            };
400            // The extendr bindings expect owned PORs (ExternalPtr) for typed
401            // config arguments — passing an R `list()` raises
402            // `Expected ExternalPtr got List`. The fixtures don't carry the
403            // option fields needed to round-trip through ExtractionConfig$new,
404            // so emit `ExtractionConfig$default()` whenever a `json_object` arg
405            // resolves to an empty / object-shaped JSON value.
406            if arg.arg_type == "json_object" && (val.is_null() || val.as_object().is_some_and(|m| m.is_empty())) {
407                let r_value = r_default_for_config_arg(arg_name);
408                return Some(format!("{arg_name} = {r_value}"));
409            }
410            // Non-empty json_object for typed config args (those whose default is a
411            // `$default()` constructor): use `TypeName$from_json(jsonlite::toJSON(...))`
412            // so the Rust function receives a proper ExternalPtr, not a list.
413            // For `options`-style args (default = NULL) emit as a plain R list.
414            if arg.arg_type == "json_object" && val.is_object() {
415                let default_expr = r_default_for_config_arg(arg_name);
416                if default_expr.ends_with("$default()") {
417                    // Extract the type name from "TypeName$default()"
418                    let type_name = default_expr.trim_end_matches("$default()");
419                    let r_list = json_to_r(val, true);
420                    let r_value = format!("{type_name}$from_json(jsonlite::toJSON({r_list}, auto_unbox = TRUE))");
421                    return Some(format!("{arg_name} = {r_value}"));
422                }
423                let r_value = json_to_r(val, true);
424                return Some(format!("{arg_name} = {r_value}"));
425            }
426            // `json_object` arrays are passed to extendr functions whose Rust
427            // signature is `items: String` (JSON-serialized batch items). The
428            // wrapper has no R-list → JSON conversion, so we must serialize the
429            // fixture value to a literal JSON string at test-emit time.
430            if arg.arg_type == "json_object" && val.is_array() {
431                let json_literal = serde_json::to_string(val).unwrap_or_else(|_| "[]".to_string());
432                let escaped = escape_r(&json_literal);
433                return Some(format!("{arg_name} = \"{escaped}\""));
434            }
435            // `bytes` arg type: convert string fixture values into runtime
436            // `readBin(...)` calls so the wrapper receives raw bytes instead
437            // of an R character vector. This mirrors the Python emit_bytes_arg
438            // helper and is what the extendr binding for Vec<u8> expects.
439            if arg.arg_type == "bytes" {
440                if let Some(raw) = val.as_str() {
441                    let r_value = render_bytes_value(raw);
442                    return Some(format!("{arg_name} = {r_value}"));
443                }
444            }
445            // `file_path` arg type: fixtures encode relative paths that resolve
446            // against the repo's `test_documents/` directory. Using a runtime
447            // helper that anchors paths to that directory avoids fragility from
448            // testthat resetting the working directory between files.
449            if arg.arg_type == "file_path" {
450                if let Some(raw) = val.as_str() {
451                    if !raw.starts_with('/') && !raw.is_empty() {
452                        let escaped = escape_r(raw);
453                        return Some(format!("{arg_name} = .resolve_fixture(\"{escaped}\")"));
454                    }
455                }
456            }
457            Some(format!("{arg_name} = {}", json_to_r(val, true)))
458        })
459        .collect();
460
461    parts.join(", ")
462}
463
464/// Render a `bytes` fixture value as the R expression that produces a raw
465/// vector at test time. Mirrors python's `emit_bytes_arg` classifier so we can
466/// support both file-path style fixtures (`"pdf/fake_memo.pdf"`) and inline
467/// text payloads (`"<html>..."`). The resulting expression is dropped directly
468/// into the call site, e.g. `content = readBin(.resolve_fixture("pdf/fake_memo.pdf"), ...)`.
469fn render_bytes_value(raw: &str) -> String {
470    if raw.starts_with('<') || raw.starts_with('{') || raw.starts_with('[') || raw.contains(' ') {
471        // Inline text payload — encode to raw via charToRaw.
472        let escaped = escape_r(raw);
473        return format!("charToRaw(\"{escaped}\")");
474    }
475    let first = raw.chars().next().unwrap_or('\0');
476    if first.is_ascii_alphanumeric() || first == '_' {
477        if let Some(slash) = raw.find('/') {
478            if slash > 0 {
479                let after = &raw[slash + 1..];
480                if after.contains('.') && !after.is_empty() {
481                    let escaped = escape_r(raw);
482                    return format!(
483                        "readBin(.resolve_fixture(\"{escaped}\"), what = \"raw\", n = file.info(.resolve_fixture(\"{escaped}\"))$size)"
484                    );
485                }
486            }
487        }
488    }
489    // Default to inline text encoding — matches Python's InlineText branch.
490    let escaped = escape_r(raw);
491    format!("charToRaw(\"{escaped}\")")
492}
493
494/// Map the extractor argument name onto its R `*Config$default()` constructor.
495/// Falls back to `list()` for unknown names — the extendr binding will error
496/// with a clear message, which is preferable to silently passing a wrong type.
497fn r_default_for_config_arg(arg_name: &str) -> String {
498    match arg_name {
499        "config" => "ExtractionConfig$default()".to_string(),
500        "options" => "NULL".to_string(),
501        "html_output" => "HtmlOutputConfig$default()".to_string(),
502        "chunking" => "ChunkingConfig$default()".to_string(),
503        "ocr" => "OcrConfig$default()".to_string(),
504        "image" | "images" => "ImageExtractionConfig$default()".to_string(),
505        "language_detection" => "LanguageDetectionConfig$default()".to_string(),
506        _ => "list()".to_string(),
507    }
508}
509
510fn render_assertion(
511    out: &mut String,
512    assertion: &Assertion,
513    result_var: &str,
514    field_resolver: &FieldResolver,
515    result_is_simple: bool,
516    _e2e_config: &E2eConfig,
517) {
518    // Handle synthetic / derived fields before the is_valid_for_result check
519    // so they are never treated as struct attribute accesses on the result.
520    if let Some(f) = &assertion.field {
521        match f.as_str() {
522            "chunks_have_content" => {
523                let pred = format!("all(sapply({result_var}$chunks %||% list(), function(c) nchar(c$content) > 0))");
524                match assertion.assertion_type.as_str() {
525                    "is_true" => {
526                        let _ = writeln!(out, "  expect_true({pred})");
527                    }
528                    "is_false" => {
529                        let _ = writeln!(out, "  expect_false({pred})");
530                    }
531                    _ => {
532                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
533                    }
534                }
535                return;
536            }
537            "chunks_have_embeddings" => {
538                let pred = format!(
539                    "all(sapply({result_var}$chunks %||% list(), function(c) !is.null(c$embedding) && length(c$embedding) > 0))"
540                );
541                match assertion.assertion_type.as_str() {
542                    "is_true" => {
543                        let _ = writeln!(out, "  expect_true({pred})");
544                    }
545                    "is_false" => {
546                        let _ = writeln!(out, "  expect_false({pred})");
547                    }
548                    _ => {
549                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
550                    }
551                }
552                return;
553            }
554            // ---- EmbedResponse virtual fields ----
555            // embed_texts returns list of numeric vectors in R — no wrapper object.
556            // result_var is the embedding matrix; use it directly.
557            "embeddings" => {
558                match assertion.assertion_type.as_str() {
559                    "count_equals" => {
560                        if let Some(val) = &assertion.value {
561                            let r_val = json_to_r(val, false);
562                            let _ = writeln!(out, "  expect_equal(length({result_var}), {r_val})");
563                        }
564                    }
565                    "count_min" => {
566                        if let Some(val) = &assertion.value {
567                            let r_val = json_to_r(val, false);
568                            let _ = writeln!(out, "  expect_gte(length({result_var}), {r_val})");
569                        }
570                    }
571                    "not_empty" => {
572                        let _ = writeln!(out, "  expect_gt(length({result_var}), 0)");
573                    }
574                    "is_empty" => {
575                        let _ = writeln!(out, "  expect_equal(length({result_var}), 0)");
576                    }
577                    _ => {
578                        let _ = writeln!(
579                            out,
580                            "  # skipped: unsupported assertion type on synthetic field 'embeddings'"
581                        );
582                    }
583                }
584                return;
585            }
586            "embedding_dimensions" => {
587                let expr = format!("(if (length({result_var}) == 0) 0L else length({result_var}[[1]]))");
588                match assertion.assertion_type.as_str() {
589                    "equals" => {
590                        if let Some(val) = &assertion.value {
591                            let r_val = json_to_r(val, false);
592                            let _ = writeln!(out, "  expect_equal({expr}, {r_val})");
593                        }
594                    }
595                    "greater_than" => {
596                        if let Some(val) = &assertion.value {
597                            let r_val = json_to_r(val, false);
598                            let _ = writeln!(out, "  expect_gt({expr}, {r_val})");
599                        }
600                    }
601                    _ => {
602                        let _ = writeln!(
603                            out,
604                            "  # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
605                        );
606                    }
607                }
608                return;
609            }
610            "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
611                let pred = match f.as_str() {
612                    "embeddings_valid" => {
613                        format!("all(sapply({result_var}, function(e) length(e) > 0))")
614                    }
615                    "embeddings_finite" => {
616                        format!("all(sapply({result_var}, function(e) all(is.finite(e))))")
617                    }
618                    "embeddings_non_zero" => {
619                        format!("all(sapply({result_var}, function(e) any(e != 0.0)))")
620                    }
621                    "embeddings_normalized" => {
622                        format!("all(sapply({result_var}, function(e) abs(sum(e * e) - 1.0) < 1e-3))")
623                    }
624                    _ => unreachable!(),
625                };
626                match assertion.assertion_type.as_str() {
627                    "is_true" => {
628                        let _ = writeln!(out, "  expect_true({pred})");
629                    }
630                    "is_false" => {
631                        let _ = writeln!(out, "  expect_false({pred})");
632                    }
633                    _ => {
634                        let _ = writeln!(out, "  # skipped: unsupported assertion type on synthetic field '{f}'");
635                    }
636                }
637                return;
638            }
639            // ---- keywords / keywords_count ----
640            // R ExtractionResult does not expose extracted_keywords; skip.
641            "keywords" | "keywords_count" => {
642                let _ = writeln!(out, "  # skipped: field '{f}' not available on R ExtractionResult");
643                return;
644            }
645            _ => {}
646        }
647    }
648
649    // Skip assertions on fields that don't exist on the result type.
650    if let Some(f) = &assertion.field {
651        if !f.is_empty() && !field_resolver.is_valid_for_result(f) {
652            let _ = writeln!(out, "  # skipped: field '{f}' not available on result type");
653            return;
654        }
655    }
656
657    // When result_is_simple, skip assertions that reference non-content fields
658    // (e.g., metadata, document, structure) since the binding returns a plain value.
659    if result_is_simple {
660        if let Some(f) = &assertion.field {
661            let f_lower = f.to_lowercase();
662            if !f.is_empty()
663                && f_lower != "content"
664                && (f_lower.starts_with("metadata")
665                    || f_lower.starts_with("document")
666                    || f_lower.starts_with("structure"))
667            {
668                let _ = writeln!(
669                    out,
670                    "  # skipped: result_is_simple for field '{f}' not available on result type"
671                );
672                return;
673            }
674        }
675    }
676
677    let field_expr = if result_is_simple {
678        result_var.to_string()
679    } else {
680        match &assertion.field {
681            Some(f) if !f.is_empty() => field_resolver.accessor(f, "r", result_var),
682            _ => result_var.to_string(),
683        }
684    };
685
686    match assertion.assertion_type.as_str() {
687        "equals" => {
688            if let Some(expected) = &assertion.value {
689                let r_val = json_to_r(expected, false);
690                let _ = writeln!(out, "  expect_equal(trimws({field_expr}), {r_val})");
691            }
692        }
693        "contains" => {
694            if let Some(expected) = &assertion.value {
695                let r_val = json_to_r(expected, false);
696                let _ = writeln!(out, "  expect_true(grepl({r_val}, {field_expr}, fixed = TRUE))");
697            }
698        }
699        "contains_all" => {
700            if let Some(values) = &assertion.values {
701                for val in values {
702                    let r_val = json_to_r(val, false);
703                    let _ = writeln!(out, "  expect_true(any(grepl({r_val}, {field_expr}, fixed = TRUE)))");
704                }
705            }
706        }
707        "not_contains" => {
708            if let Some(expected) = &assertion.value {
709                let r_val = json_to_r(expected, false);
710                let _ = writeln!(out, "  expect_false(grepl({r_val}, {field_expr}, fixed = TRUE))");
711            }
712        }
713        "not_empty" => {
714            let _ = writeln!(
715                out,
716                "  expect_true(if (is.character({field_expr})) nchar({field_expr}) > 0 else length({field_expr}) > 0)"
717            );
718        }
719        "is_empty" => {
720            let _ = writeln!(out, "  expect_equal({field_expr}, \"\")");
721        }
722        "contains_any" => {
723            if let Some(values) = &assertion.values {
724                let items: Vec<String> = values.iter().map(|v| json_to_r(v, false)).collect();
725                let vec_str = items.join(", ");
726                let _ = writeln!(
727                    out,
728                    "  expect_true(any(sapply(c({vec_str}), function(v) grepl(v, {field_expr}, fixed = TRUE))))"
729                );
730            }
731        }
732        "greater_than" => {
733            if let Some(val) = &assertion.value {
734                let r_val = json_to_r(val, false);
735                let _ = writeln!(out, "  expect_true({field_expr} > {r_val})");
736            }
737        }
738        "less_than" => {
739            if let Some(val) = &assertion.value {
740                let r_val = json_to_r(val, false);
741                let _ = writeln!(out, "  expect_true({field_expr} < {r_val})");
742            }
743        }
744        "greater_than_or_equal" => {
745            if let Some(val) = &assertion.value {
746                let r_val = json_to_r(val, false);
747                let _ = writeln!(out, "  expect_true({field_expr} >= {r_val})");
748            }
749        }
750        "less_than_or_equal" => {
751            if let Some(val) = &assertion.value {
752                let r_val = json_to_r(val, false);
753                let _ = writeln!(out, "  expect_true({field_expr} <= {r_val})");
754            }
755        }
756        "starts_with" => {
757            if let Some(expected) = &assertion.value {
758                let r_val = json_to_r(expected, false);
759                let _ = writeln!(out, "  expect_true(startsWith({field_expr}, {r_val}))");
760            }
761        }
762        "ends_with" => {
763            if let Some(expected) = &assertion.value {
764                let r_val = json_to_r(expected, false);
765                let _ = writeln!(out, "  expect_true(endsWith({field_expr}, {r_val}))");
766            }
767        }
768        "min_length" => {
769            if let Some(val) = &assertion.value {
770                if let Some(n) = val.as_u64() {
771                    let _ = writeln!(out, "  expect_true(nchar({field_expr}) >= {n})");
772                }
773            }
774        }
775        "max_length" => {
776            if let Some(val) = &assertion.value {
777                if let Some(n) = val.as_u64() {
778                    let _ = writeln!(out, "  expect_true(nchar({field_expr}) <= {n})");
779                }
780            }
781        }
782        "count_min" => {
783            if let Some(val) = &assertion.value {
784                if let Some(n) = val.as_u64() {
785                    let _ = writeln!(out, "  expect_true(length({field_expr}) >= {n})");
786                }
787            }
788        }
789        "count_equals" => {
790            if let Some(val) = &assertion.value {
791                if let Some(n) = val.as_u64() {
792                    let _ = writeln!(out, "  expect_equal(length({field_expr}), {n})");
793                }
794            }
795        }
796        "is_true" => {
797            let _ = writeln!(out, "  expect_true({field_expr})");
798        }
799        "is_false" => {
800            let _ = writeln!(out, "  expect_false({field_expr})");
801        }
802        "method_result" => {
803            if let Some(method_name) = &assertion.method {
804                let call_expr = build_r_method_call(result_var, method_name, assertion.args.as_ref());
805                let check = assertion.check.as_deref().unwrap_or("is_true");
806                match check {
807                    "equals" => {
808                        if let Some(val) = &assertion.value {
809                            if val.is_boolean() {
810                                if val.as_bool() == Some(true) {
811                                    let _ = writeln!(out, "  expect_true({call_expr})");
812                                } else {
813                                    let _ = writeln!(out, "  expect_false({call_expr})");
814                                }
815                            } else {
816                                let r_val = json_to_r(val, false);
817                                let _ = writeln!(out, "  expect_equal({call_expr}, {r_val})");
818                            }
819                        }
820                    }
821                    "is_true" => {
822                        let _ = writeln!(out, "  expect_true({call_expr})");
823                    }
824                    "is_false" => {
825                        let _ = writeln!(out, "  expect_false({call_expr})");
826                    }
827                    "greater_than_or_equal" => {
828                        if let Some(val) = &assertion.value {
829                            let r_val = json_to_r(val, false);
830                            let _ = writeln!(out, "  expect_true({call_expr} >= {r_val})");
831                        }
832                    }
833                    "count_min" => {
834                        if let Some(val) = &assertion.value {
835                            let n = val.as_u64().unwrap_or(0);
836                            let _ = writeln!(out, "  expect_true(length({call_expr}) >= {n})");
837                        }
838                    }
839                    "is_error" => {
840                        let _ = writeln!(out, "  expect_error({call_expr})");
841                    }
842                    "contains" => {
843                        if let Some(val) = &assertion.value {
844                            let r_val = json_to_r(val, false);
845                            let _ = writeln!(out, "  expect_true(grepl({r_val}, {call_expr}, fixed = TRUE))");
846                        }
847                    }
848                    other_check => {
849                        panic!("R e2e generator: unsupported method_result check type: {other_check}");
850                    }
851                }
852            } else {
853                panic!("R e2e generator: method_result assertion missing 'method' field");
854            }
855        }
856        "matches_regex" => {
857            if let Some(expected) = &assertion.value {
858                let r_val = json_to_r(expected, false);
859                let _ = writeln!(out, "  expect_true(grepl({r_val}, {field_expr}))");
860            }
861        }
862        "not_error" => {
863            // The call itself stops the test on error; emit an explicit
864            // `expect_true(TRUE)` so testthat doesn't report the test as
865            // empty when this is the only assertion.
866            let _ = writeln!(out, "  expect_true(TRUE)");
867        }
868        "error" => {
869            // Handled at the test level.
870        }
871        other => {
872            panic!("R e2e generator: unsupported assertion type: {other}");
873        }
874    }
875}
876
877/// Convert a `serde_json::Value` to an R literal string.
878///
879/// # Arguments
880///
881/// * `value` - The JSON value to convert
882///
883/// Convert a PascalCase string to snake_case.
884/// e.g. "DoubleEqual" → "double_equal", "Backticks" → "backticks"
885fn pascal_to_snake_case(s: &str) -> String {
886    let mut result = String::with_capacity(s.len() + 4);
887    for (i, ch) in s.chars().enumerate() {
888        if ch.is_uppercase() && i > 0 {
889            result.push('_');
890        }
891        for lc in ch.to_lowercase() {
892            result.push(lc);
893        }
894    }
895    result
896}
897
898/// * `lowercase_enum_values` - If true, convert PascalCase strings to snake_case (for enum values).
899///   If false, preserve original case (for assertion expected values).
900fn json_to_r(value: &serde_json::Value, lowercase_enum_values: bool) -> String {
901    match value {
902        serde_json::Value::String(s) => {
903            // Convert PascalCase enum values to snake_case only if requested.
904            // e.g. "Backticks" → "backticks", "DoubleEqual" → "double_equal"
905            let normalized = if lowercase_enum_values && s.chars().next().is_some_and(|c| c.is_uppercase()) {
906                pascal_to_snake_case(s)
907            } else {
908                s.clone()
909            };
910            format!("\"{}\"", escape_r(&normalized))
911        }
912        serde_json::Value::Bool(true) => "TRUE".to_string(),
913        serde_json::Value::Bool(false) => "FALSE".to_string(),
914        serde_json::Value::Number(n) => n.to_string(),
915        serde_json::Value::Null => "NULL".to_string(),
916        serde_json::Value::Array(arr) => {
917            let items: Vec<String> = arr.iter().map(|v| json_to_r(v, lowercase_enum_values)).collect();
918            format!("c({})", items.join(", "))
919        }
920        serde_json::Value::Object(map) => {
921            let entries: Vec<String> = map
922                .iter()
923                .map(|(k, v)| format!("\"{}\" = {}", escape_r(k), json_to_r(v, lowercase_enum_values)))
924                .collect();
925            format!("list({})", entries.join(", "))
926        }
927    }
928}
929
930/// Build an R visitor list and add setup line.
931fn build_r_visitor(setup_lines: &mut Vec<String>, visitor_spec: &crate::fixture::VisitorSpec) {
932    use std::fmt::Write as FmtWrite;
933    // Collect each callback as a separate string, then join with ",\n" to avoid
934    // trailing commas — R's list() does not accept a trailing comma.
935    let methods: Vec<String> = visitor_spec
936        .callbacks
937        .iter()
938        .map(|(method_name, action)| {
939            let mut buf = String::new();
940            emit_r_visitor_method(&mut buf, method_name, action);
941            // strip the trailing ",\n" added by emit_r_visitor_method
942            buf.trim_end_matches(['\n', ',']).to_string()
943        })
944        .collect();
945    let mut visitor_obj = String::new();
946    let _ = writeln!(visitor_obj, "list(");
947    let _ = write!(visitor_obj, "{}", methods.join(",\n"));
948    let _ = writeln!(visitor_obj);
949    let _ = writeln!(visitor_obj, "  )");
950
951    setup_lines.push(format!("visitor <- {visitor_obj}"));
952}
953
954/// Build an R call expression for a `method_result` assertion.
955/// Maps method names to the appropriate R function or method calls.
956fn build_r_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
957    match method_name {
958        "root_child_count" => format!("{result_var}$root_child_count()"),
959        "root_node_type" => format!("{result_var}$root_node_type()"),
960        "named_children_count" => format!("{result_var}$named_children_count()"),
961        "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
962        "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
963        "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
964        "contains_node_type" => {
965            let node_type = args
966                .and_then(|a| a.get("node_type"))
967                .and_then(|v| v.as_str())
968                .unwrap_or("");
969            format!("tree_contains_node_type({result_var}, \"{node_type}\")")
970        }
971        "find_nodes_by_type" => {
972            let node_type = args
973                .and_then(|a| a.get("node_type"))
974                .and_then(|v| v.as_str())
975                .unwrap_or("");
976            format!("find_nodes_by_type({result_var}, \"{node_type}\")")
977        }
978        "run_query" => {
979            let query_source = args
980                .and_then(|a| a.get("query_source"))
981                .and_then(|v| v.as_str())
982                .unwrap_or("");
983            let language = args
984                .and_then(|a| a.get("language"))
985                .and_then(|v| v.as_str())
986                .unwrap_or("");
987            format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
988        }
989        _ => {
990            if let Some(args_val) = args {
991                let arg_str = args_val
992                    .as_object()
993                    .map(|obj| {
994                        obj.iter()
995                            .map(|(k, v)| {
996                                let r_val = json_to_r(v, false);
997                                format!("{k} = {r_val}")
998                            })
999                            .collect::<Vec<_>>()
1000                            .join(", ")
1001                    })
1002                    .unwrap_or_default();
1003                format!("{result_var}${method_name}({arg_str})")
1004            } else {
1005                format!("{result_var}${method_name}()")
1006            }
1007        }
1008    }
1009}
1010
1011/// Emit an R visitor method for a callback action.
1012fn emit_r_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
1013    use std::fmt::Write as FmtWrite;
1014
1015    // R uses visit_ prefix (matches binding signature)
1016    let params = match method_name {
1017        "visit_link" => "ctx, href, text, title",
1018        "visit_image" => "ctx, src, alt, title",
1019        "visit_heading" => "ctx, level, text, id",
1020        "visit_code_block" => "ctx, lang, code",
1021        "visit_code_inline"
1022        | "visit_strong"
1023        | "visit_emphasis"
1024        | "visit_strikethrough"
1025        | "visit_underline"
1026        | "visit_subscript"
1027        | "visit_superscript"
1028        | "visit_mark"
1029        | "visit_button"
1030        | "visit_summary"
1031        | "visit_figcaption"
1032        | "visit_definition_term"
1033        | "visit_definition_description" => "ctx, text",
1034        "visit_text" => "ctx, text",
1035        "visit_list_item" => "ctx, ordered, marker, text",
1036        "visit_blockquote" => "ctx, content, depth",
1037        "visit_table_row" => "ctx, cells, is_header",
1038        "visit_custom_element" => "ctx, tag_name, html",
1039        "visit_form" => "ctx, action_url, method",
1040        "visit_input" => "ctx, input_type, name, value",
1041        "visit_audio" | "visit_video" | "visit_iframe" => "ctx, src",
1042        "visit_details" => "ctx, open",
1043        "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => "ctx, output",
1044        "visit_list_start" => "ctx, ordered",
1045        "visit_list_end" => "ctx, ordered, output",
1046        _ => "ctx",
1047    };
1048
1049    let _ = writeln!(out, "    {method_name} = function({params}) {{");
1050    match action {
1051        CallbackAction::Skip => {
1052            let _ = writeln!(out, "      \"skip\"");
1053        }
1054        CallbackAction::Continue => {
1055            let _ = writeln!(out, "      \"continue\"");
1056        }
1057        CallbackAction::PreserveHtml => {
1058            let _ = writeln!(out, "      \"preserve_html\"");
1059        }
1060        CallbackAction::Custom { output } => {
1061            let escaped = escape_r(output);
1062            let _ = writeln!(out, "      list(custom = \"{escaped}\")");
1063        }
1064        CallbackAction::CustomTemplate { template } => {
1065            let r_expr = r_template_to_paste0(template);
1066            let _ = writeln!(out, "      list(custom = {r_expr})");
1067        }
1068    }
1069    let _ = writeln!(out, "    }},");
1070}