1use crate::config::E2eConfig;
4use crate::escape::{escape_r, r_template_to_paste0, sanitize_filename, sanitize_ident};
5use crate::field_access::FieldResolver;
6use crate::fixture::{Assertion, CallbackAction, Fixture, FixtureGroup, TemplateReturnForm};
7use alef_core::backend::GeneratedFile;
8use alef_core::config::ResolvedCrateConfig;
9use alef_core::hash::{self, CommentStyle};
10use anyhow::Result;
11use std::fmt::Write as FmtWrite;
12use std::path::PathBuf;
13
14use super::E2eCodegen;
15
16pub struct RCodegen;
18
19impl E2eCodegen for RCodegen {
20 fn generate(
21 &self,
22 groups: &[FixtureGroup],
23 e2e_config: &E2eConfig,
24 config: &ResolvedCrateConfig,
25 _type_defs: &[alef_core::ir::TypeDef],
26 _enums: &[alef_core::ir::EnumDef],
27 ) -> Result<Vec<GeneratedFile>> {
28 let lang = self.language_name();
29 let output_base = PathBuf::from(e2e_config.effective_output()).join(lang);
30
31 let mut files = Vec::new();
32
33 let call = &e2e_config.call;
35 let overrides = call.overrides.get(lang);
36 let module_path = overrides
37 .and_then(|o| o.module.as_ref())
38 .cloned()
39 .unwrap_or_else(|| call.module.clone());
40 let _function_name = overrides
41 .and_then(|o| o.function.as_ref())
42 .cloned()
43 .unwrap_or_else(|| call.function.clone());
44 let result_is_simple = call.result_is_simple || overrides.is_some_and(|o| o.result_is_simple);
45 let result_is_r_list = overrides.is_some_and(|o| o.result_is_r_list);
46 let _result_var = &call.result_var;
47
48 let r_pkg = e2e_config.resolve_package("r");
50 let pkg_name = r_pkg
51 .as_ref()
52 .and_then(|p| p.name.as_ref())
53 .cloned()
54 .unwrap_or_else(|| module_path.clone());
55 let pkg_path = r_pkg
56 .as_ref()
57 .and_then(|p| p.path.as_ref())
58 .cloned()
59 .unwrap_or_else(|| "../../packages/r".to_string());
60 let pkg_version = r_pkg
61 .as_ref()
62 .and_then(|p| p.version.as_ref())
63 .cloned()
64 .or_else(|| config.resolved_version())
65 .unwrap_or_else(|| "0.1.0".to_string());
66
67 files.push(GeneratedFile {
69 path: output_base.join("DESCRIPTION"),
70 content: render_description(&pkg_name, &pkg_version, e2e_config.dep_mode),
71 generated_header: false,
72 });
73
74 files.push(GeneratedFile {
76 path: output_base.join("run_tests.R"),
77 content: render_test_runner(&pkg_path, e2e_config.dep_mode),
78 generated_header: true,
79 });
80
81 files.push(GeneratedFile {
87 path: output_base.join("tests").join("setup-fixtures.R"),
88 content: render_setup_fixtures(&e2e_config.test_documents_relative_from(1)),
89 generated_header: true,
90 });
91
92 for group in groups {
94 let active: Vec<&Fixture> = group
95 .fixtures
96 .iter()
97 .filter(|f| super::should_include_fixture(f, lang, e2e_config))
98 .collect();
99
100 if active.is_empty() {
101 continue;
102 }
103
104 let filename = format!("test_{}.R", sanitize_filename(&group.category));
105 let content = render_test_file(&group.category, &active, result_is_simple, result_is_r_list, e2e_config);
106 files.push(GeneratedFile {
107 path: output_base.join("tests").join(filename),
108 content,
109 generated_header: true,
110 });
111 }
112
113 Ok(files)
114 }
115
116 fn language_name(&self) -> &'static str {
117 "r"
118 }
119}
120
121fn render_description(pkg_name: &str, pkg_version: &str, dep_mode: crate::config::DependencyMode) -> String {
122 let dep_line = match dep_mode {
123 crate::config::DependencyMode::Registry => {
124 format!("Imports: {pkg_name} ({pkg_version})\n")
125 }
126 crate::config::DependencyMode::Local => String::new(),
127 };
128 format!(
129 r#"Package: e2e.r
130Title: E2E Tests for {pkg_name}
131Version: 0.1.0
132Description: End-to-end test suite.
133{dep_line}Suggests: testthat (>= 3.0.0)
134Config/testthat/edition: 3
135"#
136 )
137}
138
139fn render_setup_fixtures(test_documents_path: &str) -> String {
140 let mut out = String::new();
141 out.push_str(&hash::header(CommentStyle::Hash));
142 let _ = writeln!(out);
143 let _ = writeln!(
144 out,
145 "# Resolve fixture paths against the repo's `test_documents/` directory."
146 );
147 let _ = writeln!(
148 out,
149 "# testthat sources setup-*.R with the working directory at tests/,"
150 );
151 let _ = writeln!(
152 out,
153 "# so test_documents lives three directories up: tests/ -> e2e/r/ -> e2e/ -> repo root."
154 );
155 let _ = writeln!(
156 out,
157 "# Each `test_that()` block has its working directory reset back to tests/, so"
158 );
159 let _ = writeln!(
160 out,
161 "# fixture lookups must be performed via this helper rather than relying on `setwd`."
162 );
163 let _ = writeln!(
164 out,
165 ".alef_test_documents <- normalizePath(\"{test_documents_path}\", mustWork = FALSE)"
166 );
167 let _ = writeln!(out, ".resolve_fixture <- function(path) {{");
168 let _ = writeln!(out, " if (dir.exists(.alef_test_documents)) {{");
169 let _ = writeln!(out, " file.path(.alef_test_documents, path)");
170 let _ = writeln!(out, " }} else {{");
171 let _ = writeln!(out, " path");
172 let _ = writeln!(out, " }}");
173 let _ = writeln!(out, "}}");
174 let _ = writeln!(out);
175 let _ = writeln!(
182 out,
183 ".alef_format_value <- function(x) {{
184 if (is.list(x)) {{
185 for (variant in names(x)) {{
186 v <- x[[variant]]
187 if (is.list(v) && !is.null(v[[\"format\"]]) && is.character(v[[\"format\"]])) {{
188 return(v[[\"format\"]])
189 }}
190 }}
191 if (!is.null(x[[\"format\"]]) && is.character(x[[\"format\"]])) {{
192 return(x[[\"format\"]])
193 }}
194 if (!is.null(x[[\"format_type\"]])) {{
195 return(x[[\"format_type\"]])
196 }}
197 }}
198 x
199}}"
200 );
201 out
202}
203
204fn render_test_runner(pkg_path: &str, dep_mode: crate::config::DependencyMode) -> String {
205 let mut out = String::new();
206 out.push_str(&hash::header(CommentStyle::Hash));
207 let _ = writeln!(out, "library(testthat)");
208 match dep_mode {
209 crate::config::DependencyMode::Registry => {
210 let _ = writeln!(out, "# Package loaded via library() from CRAN install.");
212 }
213 crate::config::DependencyMode::Local => {
214 let _ = writeln!(out, "devtools::load_all(\"{pkg_path}\")");
217 }
218 }
219 let _ = writeln!(out);
220 let _ = writeln!(out, "testthat::set_max_fails(Inf)");
223 let _ = writeln!(
227 out,
228 ".script_dir <- tryCatch(dirname(normalizePath(sys.frame(1)$ofile)), error = function(e) getwd())"
229 );
230 let _ = writeln!(out, "test_dir(file.path(.script_dir, \"tests\"))");
231 out
232}
233
234fn render_test_file(
235 category: &str,
236 fixtures: &[&Fixture],
237 result_is_simple: bool,
238 result_is_r_list: bool,
239 e2e_config: &E2eConfig,
240) -> String {
241 let mut out = String::new();
242 out.push_str(&hash::header(CommentStyle::Hash));
243 let _ = writeln!(out, "# E2e tests for category: {category}");
244 let _ = writeln!(out);
245
246 for (i, fixture) in fixtures.iter().enumerate() {
247 render_test_case(&mut out, fixture, e2e_config, result_is_simple, result_is_r_list);
248 if i + 1 < fixtures.len() {
249 let _ = writeln!(out);
250 }
251 }
252
253 while out.ends_with("\n\n") {
255 out.pop();
256 }
257 if !out.ends_with('\n') {
258 out.push('\n');
259 }
260 out
261}
262
263fn render_test_case(
264 out: &mut String,
265 fixture: &Fixture,
266 e2e_config: &E2eConfig,
267 default_result_is_simple: bool,
268 default_result_is_r_list: bool,
269) {
270 let call_config = e2e_config.resolve_call_for_fixture(
271 fixture.call.as_deref(),
272 &fixture.id,
273 &fixture.resolved_category(),
274 &fixture.tags,
275 &fixture.input,
276 );
277 let call_field_resolver = FieldResolver::new(
278 e2e_config.effective_fields(call_config),
279 e2e_config.effective_fields_optional(call_config),
280 e2e_config.effective_result_fields(call_config),
281 e2e_config.effective_fields_array(call_config),
282 &std::collections::HashSet::new(),
283 );
284 let field_resolver = &call_field_resolver;
285 let function_name = call_config
291 .overrides
292 .get("r")
293 .and_then(|o| o.function.as_ref())
294 .cloned()
295 .unwrap_or_else(|| call_config.function.clone());
296 let result_var = &call_config.result_var;
297 let r_override = call_config.overrides.get("r");
303 let result_is_simple = if fixture.call.is_some() {
304 call_config.result_is_simple || r_override.is_some_and(|o| o.result_is_simple)
305 } else {
306 default_result_is_simple
307 };
308 let result_is_r_list = if fixture.call.is_some() {
312 r_override.is_some_and(|o| o.result_is_r_list)
313 } else {
314 default_result_is_r_list
315 };
316
317 let test_name = sanitize_ident(&fixture.id);
318 let description = fixture.description.replace('"', "\\\"");
319
320 let expects_error = fixture.assertions.iter().any(|a| a.assertion_type == "error");
321
322 let arg_name_map = r_override.map(|o| &o.arg_name_map);
327 let options_type = r_override.and_then(|o| o.options_type.as_deref()).or_else(|| {
333 call_config
342 .overrides
343 .values()
344 .filter_map(|o| o.options_type.as_deref())
345 .find(|name| !name.starts_with("Js"))
346 });
347 let args_str = build_args_string(&fixture.input, &call_config.args, arg_name_map, options_type);
348
349 let r_extra_args: Vec<String> = r_override.map(|o| o.extra_args.clone()).unwrap_or_default();
355 let args_with_extra = if r_extra_args.is_empty() {
356 args_str
357 } else {
358 let extra = r_extra_args.join(", ");
359 if args_str.is_empty() {
360 extra
361 } else {
362 format!("{args_str}, {extra}")
363 }
364 };
365
366 let mut setup_lines = Vec::new();
368 let final_args = if let Some(visitor_spec) = &fixture.visitor {
369 build_r_visitor(&mut setup_lines, visitor_spec);
370 let base = strip_options_arg(&args_with_extra);
375 let visitor_opts = "options = list(visitor = visitor)";
376 let trimmed = base.trim_matches([' ', ',']);
377 if trimmed.is_empty() {
378 visitor_opts.to_string()
379 } else {
380 format!("{trimmed}, {visitor_opts}")
381 }
382 } else {
383 args_with_extra
384 };
385
386 if expects_error {
387 let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
388 for line in &setup_lines {
389 let _ = writeln!(out, " {line}");
390 }
391 let _ = writeln!(out, " expect_error({function_name}({final_args}))");
392 let _ = writeln!(out, "}})");
393 return;
394 }
395
396 let _ = writeln!(out, "test_that(\"{test_name}: {description}\", {{");
397 for line in &setup_lines {
398 let _ = writeln!(out, " {line}");
399 }
400 if call_config.returns_void {
412 let _ = writeln!(out, " invisible({function_name}({final_args}))");
413 } else if result_is_simple || result_is_r_list {
414 let _ = writeln!(out, " {result_var} <- {function_name}({final_args})");
415 } else {
416 let _ = writeln!(
417 out,
418 " {result_var} <- jsonlite::fromJSON({function_name}({final_args}), simplifyVector = FALSE)"
419 );
420 }
421
422 let result_is_bytes = call_config.result_is_bytes || r_override.is_some_and(|o| o.result_is_bytes);
423 static EMPTY_ASSERT_ENUM_FIELDS: std::sync::LazyLock<std::collections::HashMap<String, String>> =
427 std::sync::LazyLock::new(std::collections::HashMap::new);
428 let assert_enum_fields = r_override
429 .map(|o| &o.assert_enum_fields)
430 .unwrap_or(&EMPTY_ASSERT_ENUM_FIELDS);
431 for assertion in &fixture.assertions {
432 let context = RAssertionContext {
433 field_resolver,
434 result_is_simple,
435 result_is_bytes,
436 assert_enum_fields,
437 };
438 render_assertion(out, assertion, result_var, &context);
439 }
440
441 let _ = writeln!(out, "}})");
442}
443
444fn strip_options_arg(args_str: &str) -> String {
451 let mut parts: Vec<String> = Vec::new();
452 let mut current = String::new();
453 let mut paren_depth: i32 = 0;
454 let mut in_single = false;
455 let mut in_double = false;
456 for c in args_str.chars() {
457 if !in_single && !in_double {
458 match c {
459 '(' | '[' | '{' => paren_depth += 1,
460 ')' | ']' | '}' => paren_depth -= 1,
461 '\'' => in_single = true,
462 '"' => in_double = true,
463 ',' if paren_depth == 0 => {
464 parts.push(current.trim().to_string());
465 current.clear();
466 continue;
467 }
468 _ => {}
469 }
470 } else if in_single && c == '\'' {
471 in_single = false;
472 } else if in_double && c == '"' {
473 in_double = false;
474 }
475 current.push(c);
476 }
477 if !current.trim().is_empty() {
478 parts.push(current.trim().to_string());
479 }
480 parts
481 .into_iter()
482 .filter(|p| !p.starts_with("options ") && !p.starts_with("options="))
483 .collect::<Vec<_>>()
484 .join(", ")
485}
486
487fn build_args_string(
488 input: &serde_json::Value,
489 args: &[crate::config::ArgMapping],
490 arg_name_map: Option<&std::collections::HashMap<String, String>>,
491 options_type: Option<&str>,
492) -> String {
493 if args.is_empty() {
494 return String::new();
499 }
500
501 let parts: Vec<String> = args
502 .iter()
503 .filter_map(|arg| {
504 let arg_name: &str = arg_name_map
506 .and_then(|m| m.get(&arg.name).map(String::as_str))
507 .unwrap_or(&arg.name);
508
509 let field = arg.field.strip_prefix("input.").unwrap_or(&arg.field);
510 let val = input.get(field);
511 let val = match val {
517 Some(v) if !(v.is_null() && arg.optional) => v,
518 _ => {
519 if !arg.optional {
520 return None;
521 }
522 if arg.arg_type == "json_object" {
523 let r_value = r_default_for_config_arg(arg_name, options_type);
524 return Some(format!("{arg_name} = {r_value}"));
525 }
526 return Some(format!("{arg_name} = NULL"));
527 }
528 };
529 if arg.arg_type == "json_object" && (val.is_null() || val.as_object().is_some_and(|m| m.is_empty())) {
536 let r_value = r_default_for_config_arg(arg_name, options_type);
537 return Some(format!("{arg_name} = {r_value}"));
538 }
539 if arg.arg_type == "json_object" && val.is_object() {
544 let default_expr = r_default_for_config_arg(arg_name, options_type);
545 if default_expr.ends_with("$default()") {
546 let type_name = default_expr.trim_end_matches("$default()");
548 let r_list = json_to_r_preserve_arrays(val, true);
554 let r_value = format!("{type_name}$from_json(jsonlite::toJSON({r_list}, auto_unbox = TRUE))");
555 return Some(format!("{arg_name} = {r_value}"));
556 }
557 let r_value = json_to_r(val, true);
558 return Some(format!("{arg_name} = {r_value}"));
559 }
560 if arg.arg_type == "json_object" && val.is_array() {
572 if arg.element_type.as_deref() == Some("String") {
573 let r_value = if val.as_array().is_some_and(|arr| arr.is_empty()) {
578 "character(0)".to_string()
579 } else {
580 json_to_r(val, false)
581 };
582 return Some(format!("{arg_name} = {r_value}"));
583 }
584 let json_literal = serde_json::to_string(val).unwrap_or_else(|_| "[]".to_string());
585 let escaped = escape_r(&json_literal);
586 return Some(format!("{arg_name} = \"{escaped}\""));
587 }
588 if arg.arg_type == "bytes" {
593 if let Some(raw) = val.as_str() {
594 let r_value = render_bytes_value(raw);
595 return Some(format!("{arg_name} = {r_value}"));
596 }
597 }
598 if arg.arg_type == "file_path" {
603 if let Some(raw) = val.as_str() {
604 if !raw.starts_with('/') && !raw.is_empty() {
605 let escaped = escape_r(raw);
606 return Some(format!("{arg_name} = .resolve_fixture(\"{escaped}\")"));
607 }
608 }
609 }
610 Some(format!("{arg_name} = {}", json_to_r(val, true)))
611 })
612 .collect();
613
614 parts.join(", ")
615}
616
617fn render_bytes_value(raw: &str) -> String {
623 if raw.starts_with('<') || raw.starts_with('{') || raw.starts_with('[') || raw.contains(' ') {
624 let escaped = escape_r(raw);
626 return format!("charToRaw(\"{escaped}\")");
627 }
628 let first = raw.chars().next().unwrap_or('\0');
629 if first.is_ascii_alphanumeric() || first == '_' {
630 if let Some(slash) = raw.find('/') {
631 if slash > 0 {
632 let after = &raw[slash + 1..];
633 if after.contains('.') && !after.is_empty() {
634 let escaped = escape_r(raw);
635 return format!(
636 "readBin(.resolve_fixture(\"{escaped}\"), what = \"raw\", n = file.info(.resolve_fixture(\"{escaped}\"))$size)"
637 );
638 }
639 }
640 }
641 }
642 let escaped = escape_r(raw);
644 format!("charToRaw(\"{escaped}\")")
645}
646
647fn r_default_for_config_arg(arg_name: &str, options_type: Option<&str>) -> String {
656 if let Some(type_name) = options_type {
657 return format!("{type_name}$default()");
658 }
659 match arg_name {
660 "config" => "ExtractionConfig$default()".to_string(),
661 "options" => "NULL".to_string(),
662 "html_output" => "HtmlOutputConfig$default()".to_string(),
663 "chunking" => "ChunkingConfig$default()".to_string(),
664 "ocr" => "OcrConfig$default()".to_string(),
665 "image" | "images" => "ImageExtractionConfig$default()".to_string(),
666 "language_detection" => "LanguageDetectionConfig$default()".to_string(),
667 _ => "list()".to_string(),
668 }
669}
670
671struct RAssertionContext<'a> {
672 field_resolver: &'a FieldResolver,
673 result_is_simple: bool,
674 result_is_bytes: bool,
675 assert_enum_fields: &'a std::collections::HashMap<String, String>,
676}
677
678fn render_assertion(out: &mut String, assertion: &Assertion, result_var: &str, context: &RAssertionContext<'_>) {
679 if let Some(f) = &assertion.field {
682 match f.as_str() {
683 "chunks_have_content" => {
684 let pred = format!("all(sapply({result_var}$chunks %||% list(), function(c) nchar(c$content) > 0))");
685 match assertion.assertion_type.as_str() {
686 "is_true" => {
687 let _ = writeln!(out, " expect_true({pred})");
688 }
689 "is_false" => {
690 let _ = writeln!(out, " expect_false({pred})");
691 }
692 _ => {
693 let _ = writeln!(out, " # skipped: unsupported assertion type on synthetic field '{f}'");
694 }
695 }
696 return;
697 }
698 "chunks_have_embeddings" => {
699 let pred = format!(
700 "all(sapply({result_var}$chunks %||% list(), function(c) !is.null(c$embedding) && length(c$embedding) > 0))"
701 );
702 match assertion.assertion_type.as_str() {
703 "is_true" => {
704 let _ = writeln!(out, " expect_true({pred})");
705 }
706 "is_false" => {
707 let _ = writeln!(out, " expect_false({pred})");
708 }
709 _ => {
710 let _ = writeln!(out, " # skipped: unsupported assertion type on synthetic field '{f}'");
711 }
712 }
713 return;
714 }
715 "chunks_have_heading_context" => {
716 let pred_true = format!(
719 "!is.null({result_var}$chunks) && length({result_var}$chunks) > 0 && all(sapply({result_var}$chunks, function(c) nchar(c$content) > 0))"
720 );
721 let pred_false = format!("is.null({result_var}$chunks) || length({result_var}$chunks) == 0");
722 match assertion.assertion_type.as_str() {
723 "is_true" => {
724 let _ = writeln!(out, " expect_true({pred_true})");
725 }
726 "is_false" => {
727 let _ = writeln!(out, " expect_true({pred_false})");
728 }
729 _ => {
730 let _ = writeln!(out, " # skipped: unsupported assertion type on synthetic field '{f}'");
731 }
732 }
733 return;
734 }
735 "first_chunk_starts_with_heading" => {
736 let pred_true = format!(
739 "!is.null({result_var}$chunks) && length({result_var}$chunks) > 0 && startsWith(trimws({result_var}$chunks[[1]]$content), \"#\")"
740 );
741 let pred_false = format!(
742 "is.null({result_var}$chunks) || length({result_var}$chunks) == 0 || !startsWith(trimws({result_var}$chunks[[1]]$content), \"#\")"
743 );
744 match assertion.assertion_type.as_str() {
745 "is_true" => {
746 let _ = writeln!(out, " expect_true({pred_true})");
747 }
748 "is_false" => {
749 let _ = writeln!(out, " expect_true({pred_false})");
750 }
751 _ => {
752 let _ = writeln!(out, " # skipped: unsupported assertion type on synthetic field '{f}'");
753 }
754 }
755 return;
756 }
757 "embeddings" => {
764 let parsed = format!(
765 "(if (is.character({result_var}) && length({result_var}) == 1) jsonlite::fromJSON({result_var}, simplifyVector = FALSE) else {result_var})"
766 );
767 match assertion.assertion_type.as_str() {
768 "count_equals" => {
769 if let Some(val) = &assertion.value {
770 let r_val = json_to_r(val, false);
771 let _ = writeln!(out, " expect_equal(length({parsed}), {r_val})");
772 }
773 }
774 "count_min" => {
775 if let Some(val) = &assertion.value {
776 let r_val = json_to_r(val, false);
777 let _ = writeln!(out, " expect_gte(length({parsed}), {r_val})");
778 }
779 }
780 "not_empty" => {
781 let _ = writeln!(out, " expect_gt(length({parsed}), 0)");
782 }
783 "is_empty" => {
784 let _ = writeln!(out, " expect_equal(length({parsed}), 0)");
785 }
786 _ => {
787 let _ = writeln!(
788 out,
789 " # skipped: unsupported assertion type on synthetic field 'embeddings'"
790 );
791 }
792 }
793 return;
794 }
795 "embedding_dimensions" => {
796 let expr = format!("(if (length({result_var}) == 0) 0L else length({result_var}[[1]]))");
797 match assertion.assertion_type.as_str() {
798 "equals" => {
799 if let Some(val) = &assertion.value {
800 let r_val = json_to_r(val, false);
801 let _ = writeln!(out, " expect_equal({expr}, {r_val})");
802 }
803 }
804 "greater_than" => {
805 if let Some(val) = &assertion.value {
806 let r_val = json_to_r(val, false);
807 let _ = writeln!(out, " expect_gt({expr}, {r_val})");
808 }
809 }
810 _ => {
811 let _ = writeln!(
812 out,
813 " # skipped: unsupported assertion type on synthetic field 'embedding_dimensions'"
814 );
815 }
816 }
817 return;
818 }
819 "embeddings_valid" | "embeddings_finite" | "embeddings_non_zero" | "embeddings_normalized" => {
820 let pred = match f.as_str() {
821 "embeddings_valid" => {
822 format!("all(sapply({result_var}, function(e) length(e) > 0))")
823 }
824 "embeddings_finite" => {
825 format!("all(sapply({result_var}, function(e) all(is.finite(e))))")
826 }
827 "embeddings_non_zero" => {
828 format!("all(sapply({result_var}, function(e) any(e != 0.0)))")
829 }
830 "embeddings_normalized" => {
831 format!("all(sapply({result_var}, function(e) abs(sum(e * e) - 1.0) < 1e-3))")
832 }
833 _ => unreachable!(),
834 };
835 match assertion.assertion_type.as_str() {
836 "is_true" => {
837 let _ = writeln!(out, " expect_true({pred})");
838 }
839 "is_false" => {
840 let _ = writeln!(out, " expect_false({pred})");
841 }
842 _ => {
843 let _ = writeln!(out, " # skipped: unsupported assertion type on synthetic field '{f}'");
844 }
845 }
846 return;
847 }
848 "keywords" | "keywords_count" => {
851 let _ = writeln!(out, " # skipped: field '{f}' not available on R ExtractionResult");
852 return;
853 }
854 _ => {}
855 }
856 }
857
858 if let Some(f) = &assertion.field {
862 if !f.is_empty() && !context.field_resolver.is_valid_for_result(f) {
863 if !(context.result_is_simple && f == "result") {
865 let _ = writeln!(out, " # skipped: field '{f}' not available on result type");
866 return;
867 }
868 }
869 }
870
871 if context.result_is_simple {
874 if let Some(f) = &assertion.field {
875 let f_lower = f.to_lowercase();
876 if !f.is_empty()
877 && f_lower != "content"
878 && (f_lower.starts_with("metadata")
879 || f_lower.starts_with("document")
880 || f_lower.starts_with("structure"))
881 {
882 let _ = writeln!(
883 out,
884 " # skipped: result_is_simple for field '{f}' not available on result type"
885 );
886 return;
887 }
888 }
889 }
890
891 let field_expr = if context.result_is_simple {
892 result_var.to_string()
893 } else {
894 match &assertion.field {
895 Some(f) if !f.is_empty() => context.field_resolver.accessor(f, "r", result_var),
896 _ => result_var.to_string(),
897 }
898 };
899
900 let field_expr = match &assertion.field {
906 Some(f) if context.assert_enum_fields.contains_key(f.as_str()) => {
907 format!(".alef_format_value({field_expr})")
908 }
909 _ => field_expr,
910 };
911
912 match assertion.assertion_type.as_str() {
913 "equals" => {
914 if let Some(expected) = &assertion.value {
915 let r_val = json_to_r(expected, false);
916 let _ = writeln!(out, " expect_equal(trimws({field_expr}), {r_val})");
917 }
918 }
919 "contains" => {
920 if let Some(expected) = &assertion.value {
921 let r_val = json_to_r(expected, false);
922 let _ = writeln!(out, " expect_true(grepl({r_val}, {field_expr}, fixed = TRUE))");
923 }
924 }
925 "contains_all" => {
926 if let Some(values) = &assertion.values {
927 for val in values {
928 let r_val = json_to_r(val, false);
929 let _ = writeln!(out, " expect_true(any(grepl({r_val}, {field_expr}, fixed = TRUE)))");
930 }
931 }
932 }
933 "not_contains" => {
934 if let Some(expected) = &assertion.value {
935 let r_val = json_to_r(expected, false);
936 let _ = writeln!(out, " expect_false(grepl({r_val}, {field_expr}, fixed = TRUE))");
937 }
938 }
939 "not_empty" => {
940 let _ = writeln!(
946 out,
947 " expect_true(if (is.character({field_expr})) length({field_expr}) > 0 && any(nchar({field_expr}) > 0) else length({field_expr}) > 0)"
948 );
949 }
950 "is_empty" => {
951 let _ = writeln!(
957 out,
958 " expect_true(is.null({field_expr}) || length({field_expr}) == 0 || (length({field_expr}) == 1 && (is.na({field_expr}) || identical({field_expr}, \"\"))))"
959 );
960 }
961 "contains_any" => {
962 if let Some(values) = &assertion.values {
963 let items: Vec<String> = values.iter().map(|v| json_to_r(v, false)).collect();
964 let vec_str = items.join(", ");
965 let _ = writeln!(
966 out,
967 " expect_true(any(sapply(c({vec_str}), function(v) grepl(v, {field_expr}, fixed = TRUE))))"
968 );
969 }
970 }
971 "greater_than" => {
972 if let Some(val) = &assertion.value {
973 let r_val = json_to_r(val, false);
974 let _ = writeln!(out, " expect_true({field_expr} > {r_val})");
975 }
976 }
977 "less_than" => {
978 if let Some(val) = &assertion.value {
979 let r_val = json_to_r(val, false);
980 let _ = writeln!(out, " expect_true({field_expr} < {r_val})");
981 }
982 }
983 "greater_than_or_equal" => {
984 if let Some(val) = &assertion.value {
985 let r_val = json_to_r(val, false);
986 let _ = writeln!(out, " expect_true({field_expr} >= {r_val})");
987 }
988 }
989 "less_than_or_equal" => {
990 if let Some(val) = &assertion.value {
991 let r_val = json_to_r(val, false);
992 let _ = writeln!(out, " expect_true({field_expr} <= {r_val})");
993 }
994 }
995 "starts_with" => {
996 if let Some(expected) = &assertion.value {
997 let r_val = json_to_r(expected, false);
998 let _ = writeln!(out, " expect_true(startsWith({field_expr}, {r_val}))");
999 }
1000 }
1001 "ends_with" => {
1002 if let Some(expected) = &assertion.value {
1003 let r_val = json_to_r(expected, false);
1004 let _ = writeln!(out, " expect_true(endsWith({field_expr}, {r_val}))");
1005 }
1006 }
1007 "min_length" => {
1008 if let Some(val) = &assertion.value {
1009 if let Some(n) = val.as_u64() {
1010 let size_fn = if context.result_is_bytes { "length" } else { "nchar" };
1015 let _ = writeln!(out, " expect_true({size_fn}({field_expr}) >= {n})");
1016 }
1017 }
1018 }
1019 "max_length" => {
1020 if let Some(val) = &assertion.value {
1021 if let Some(n) = val.as_u64() {
1022 let size_fn = if context.result_is_bytes { "length" } else { "nchar" };
1023 let _ = writeln!(out, " expect_true({size_fn}({field_expr}) <= {n})");
1024 }
1025 }
1026 }
1027 "count_min" => {
1028 if let Some(val) = &assertion.value {
1029 if let Some(n) = val.as_u64() {
1030 let _ = writeln!(out, " expect_true(length({field_expr}) >= {n})");
1031 }
1032 }
1033 }
1034 "count_equals" => {
1035 if let Some(val) = &assertion.value {
1036 if let Some(n) = val.as_u64() {
1037 let _ = writeln!(out, " expect_equal(length({field_expr}), {n})");
1038 }
1039 }
1040 }
1041 "is_true" => {
1042 let _ = writeln!(out, " expect_true({field_expr})");
1043 }
1044 "is_false" => {
1045 let _ = writeln!(out, " expect_false({field_expr})");
1046 }
1047 "method_result" => {
1048 if let Some(method_name) = &assertion.method {
1049 let call_expr = build_r_method_call(result_var, method_name, assertion.args.as_ref());
1050 let check = assertion.check.as_deref().unwrap_or("is_true");
1051 match check {
1052 "equals" => {
1053 if let Some(val) = &assertion.value {
1054 if val.is_boolean() {
1055 if val.as_bool() == Some(true) {
1056 let _ = writeln!(out, " expect_true({call_expr})");
1057 } else {
1058 let _ = writeln!(out, " expect_false({call_expr})");
1059 }
1060 } else {
1061 let r_val = json_to_r(val, false);
1062 let _ = writeln!(out, " expect_equal({call_expr}, {r_val})");
1063 }
1064 }
1065 }
1066 "is_true" => {
1067 let _ = writeln!(out, " expect_true({call_expr})");
1068 }
1069 "is_false" => {
1070 let _ = writeln!(out, " expect_false({call_expr})");
1071 }
1072 "greater_than_or_equal" => {
1073 if let Some(val) = &assertion.value {
1074 let r_val = json_to_r(val, false);
1075 let _ = writeln!(out, " expect_true({call_expr} >= {r_val})");
1076 }
1077 }
1078 "count_min" => {
1079 if let Some(val) = &assertion.value {
1080 let n = val.as_u64().unwrap_or(0);
1081 let _ = writeln!(out, " expect_true(length({call_expr}) >= {n})");
1082 }
1083 }
1084 "is_error" => {
1085 let _ = writeln!(out, " expect_error({call_expr})");
1086 }
1087 "contains" => {
1088 if let Some(val) = &assertion.value {
1089 let r_val = json_to_r(val, false);
1090 let _ = writeln!(out, " expect_true(grepl({r_val}, {call_expr}, fixed = TRUE))");
1091 }
1092 }
1093 other_check => {
1094 panic!("R e2e generator: unsupported method_result check type: {other_check}");
1095 }
1096 }
1097 } else {
1098 panic!("R e2e generator: method_result assertion missing 'method' field");
1099 }
1100 }
1101 "matches_regex" => {
1102 if let Some(expected) = &assertion.value {
1103 let r_val = json_to_r(expected, false);
1104 let _ = writeln!(out, " expect_true(grepl({r_val}, {field_expr}))");
1105 }
1106 }
1107 "not_error" => {
1108 let _ = writeln!(out, " expect_true(TRUE)");
1112 }
1113 "error" => {
1114 }
1116 other => {
1117 panic!("R e2e generator: unsupported assertion type: {other}");
1118 }
1119 }
1120}
1121
1122fn pascal_to_snake_case(s: &str) -> String {
1131 let mut result = String::with_capacity(s.len() + 4);
1132 for (i, ch) in s.chars().enumerate() {
1133 if ch.is_uppercase() && i > 0 {
1134 result.push('_');
1135 }
1136 for lc in ch.to_lowercase() {
1137 result.push(lc);
1138 }
1139 }
1140 result
1141}
1142
1143fn json_to_r_preserve_arrays(value: &serde_json::Value, lowercase_enum_values: bool) -> String {
1154 match value {
1155 serde_json::Value::Array(arr) => {
1156 if arr.is_empty() {
1157 "I(list())".to_string()
1158 } else {
1159 let items: Vec<String> = arr.iter().map(|v| json_to_r(v, lowercase_enum_values)).collect();
1160 format!("I(c({}))", items.join(", "))
1161 }
1162 }
1163 serde_json::Value::Object(map) => {
1164 let entries: Vec<String> = map
1165 .iter()
1166 .map(|(k, v)| {
1167 format!(
1168 "\"{}\" = {}",
1169 escape_r(k),
1170 json_to_r_preserve_arrays(v, lowercase_enum_values)
1171 )
1172 })
1173 .collect();
1174 format!("list({})", entries.join(", "))
1175 }
1176 _ => json_to_r(value, lowercase_enum_values),
1177 }
1178}
1179
1180fn json_to_r(value: &serde_json::Value, lowercase_enum_values: bool) -> String {
1183 match value {
1184 serde_json::Value::String(s) => {
1185 let normalized = if lowercase_enum_values && s.chars().next().is_some_and(|c| c.is_uppercase()) {
1188 pascal_to_snake_case(s)
1189 } else {
1190 s.clone()
1191 };
1192 format!("\"{}\"", escape_r(&normalized))
1193 }
1194 serde_json::Value::Bool(true) => "TRUE".to_string(),
1195 serde_json::Value::Bool(false) => "FALSE".to_string(),
1196 serde_json::Value::Number(n) => n.to_string(),
1197 serde_json::Value::Null => "NULL".to_string(),
1198 serde_json::Value::Array(arr) => {
1199 let items: Vec<String> = arr.iter().map(|v| json_to_r(v, lowercase_enum_values)).collect();
1200 format!("c({})", items.join(", "))
1201 }
1202 serde_json::Value::Object(map) => {
1203 let entries: Vec<String> = map
1204 .iter()
1205 .map(|(k, v)| format!("\"{}\" = {}", escape_r(k), json_to_r(v, lowercase_enum_values)))
1206 .collect();
1207 format!("list({})", entries.join(", "))
1208 }
1209 }
1210}
1211
1212fn build_r_visitor(setup_lines: &mut Vec<String>, visitor_spec: &crate::fixture::VisitorSpec) {
1214 use std::fmt::Write as FmtWrite;
1215 let methods: Vec<String> = visitor_spec
1218 .callbacks
1219 .iter()
1220 .map(|(method_name, action)| {
1221 let mut buf = String::new();
1222 emit_r_visitor_method(&mut buf, method_name, action);
1223 buf.trim_end_matches(['\n', ',']).to_string()
1225 })
1226 .collect();
1227 let mut visitor_obj = String::new();
1228 let _ = writeln!(visitor_obj, "list(");
1229 let _ = write!(visitor_obj, "{}", methods.join(",\n"));
1230 let _ = writeln!(visitor_obj);
1231 let _ = writeln!(visitor_obj, " )");
1232
1233 setup_lines.push(format!("visitor <- {visitor_obj}"));
1234}
1235
1236fn build_r_method_call(result_var: &str, method_name: &str, args: Option<&serde_json::Value>) -> String {
1239 match method_name {
1240 "root_child_count" => format!("{result_var}$root_child_count()"),
1241 "root_node_type" => format!("{result_var}$root_node_type()"),
1242 "named_children_count" => format!("{result_var}$named_children_count()"),
1243 "has_error_nodes" => format!("tree_has_error_nodes({result_var})"),
1244 "error_count" | "tree_error_count" => format!("tree_error_count({result_var})"),
1245 "tree_to_sexp" => format!("tree_to_sexp({result_var})"),
1246 "contains_node_type" => {
1247 let node_type = args
1248 .and_then(|a| a.get("node_type"))
1249 .and_then(|v| v.as_str())
1250 .unwrap_or("");
1251 format!("tree_contains_node_type({result_var}, \"{node_type}\")")
1252 }
1253 "find_nodes_by_type" => {
1254 let node_type = args
1255 .and_then(|a| a.get("node_type"))
1256 .and_then(|v| v.as_str())
1257 .unwrap_or("");
1258 format!("find_nodes_by_type({result_var}, \"{node_type}\")")
1259 }
1260 "run_query" => {
1261 let query_source = args
1262 .and_then(|a| a.get("query_source"))
1263 .and_then(|v| v.as_str())
1264 .unwrap_or("");
1265 let language = args
1266 .and_then(|a| a.get("language"))
1267 .and_then(|v| v.as_str())
1268 .unwrap_or("");
1269 format!("run_query({result_var}, \"{language}\", \"{query_source}\", source)")
1270 }
1271 _ => {
1272 if let Some(args_val) = args {
1273 let arg_str = args_val
1274 .as_object()
1275 .map(|obj| {
1276 obj.iter()
1277 .map(|(k, v)| {
1278 let r_val = json_to_r(v, false);
1279 format!("{k} = {r_val}")
1280 })
1281 .collect::<Vec<_>>()
1282 .join(", ")
1283 })
1284 .unwrap_or_default();
1285 format!("{result_var}${method_name}({arg_str})")
1286 } else {
1287 format!("{result_var}${method_name}()")
1288 }
1289 }
1290 }
1291}
1292
1293fn emit_r_visitor_method(out: &mut String, method_name: &str, action: &CallbackAction) {
1295 use std::fmt::Write as FmtWrite;
1296
1297 let params = match method_name {
1299 "visit_link" => "ctx, href, text, title",
1300 "visit_image" => "ctx, src, alt, title",
1301 "visit_heading" => "ctx, level, text, id",
1302 "visit_code_block" => "ctx, lang, code",
1303 "visit_code_inline"
1304 | "visit_strong"
1305 | "visit_emphasis"
1306 | "visit_strikethrough"
1307 | "visit_underline"
1308 | "visit_subscript"
1309 | "visit_superscript"
1310 | "visit_mark"
1311 | "visit_button"
1312 | "visit_summary"
1313 | "visit_figcaption"
1314 | "visit_definition_term"
1315 | "visit_definition_description" => "ctx, text",
1316 "visit_text" => "ctx, text",
1317 "visit_list_item" => "ctx, ordered, marker, text",
1318 "visit_blockquote" => "ctx, content, depth",
1319 "visit_table_row" => "ctx, cells, is_header",
1320 "visit_custom_element" => "ctx, tag_name, html",
1321 "visit_form" => "ctx, action_url, method",
1322 "visit_input" => "ctx, input_type, name, value",
1323 "visit_audio" | "visit_video" | "visit_iframe" => "ctx, src",
1324 "visit_details" => "ctx, open",
1325 "visit_element_end" | "visit_table_end" | "visit_definition_list_end" | "visit_figure_end" => "ctx, output",
1326 "visit_list_start" => "ctx, ordered",
1327 "visit_list_end" => "ctx, ordered, output",
1328 _ => "ctx",
1329 };
1330
1331 let _ = writeln!(out, " {method_name} = function({params}) {{");
1332 match action {
1333 CallbackAction::Skip => {
1334 let _ = writeln!(out, " \"skip\"");
1335 }
1336 CallbackAction::Continue => {
1337 let _ = writeln!(out, " \"continue\"");
1338 }
1339 CallbackAction::PreserveHtml => {
1340 let _ = writeln!(out, " \"preserve_html\"");
1341 }
1342 CallbackAction::Custom { output } => {
1343 let escaped = escape_r(output);
1344 let _ = writeln!(out, " list(custom = \"{escaped}\")");
1345 }
1346 CallbackAction::CustomTemplate { template, return_form } => {
1347 let r_expr = r_template_to_paste0(template);
1348 match return_form {
1349 TemplateReturnForm::BareString => {
1350 let _ = writeln!(out, " {r_expr}");
1351 }
1352 TemplateReturnForm::Dict => {
1353 let _ = writeln!(out, " list(custom = {r_expr})");
1354 }
1355 }
1356 }
1357 }
1358 let _ = writeln!(out, " }},");
1359}