1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
// Regression coverage for the generic `OP_CALL_BUILTIN_TREE` bridge that
// lets tree-only builtins (rgx, rgxall, fmt variadic, 2-arg rd, rdb) run
// under `--vm` and `--jit` via interpreter fallback.
//
// Pre-fix, these all failed at VM compile time with
// `Compile error: undefined function: <name>` because the VM emitter fell
// through to OP_CALL's user-function lookup. The bridge routes them through
// the same `interpreter::call_function` the tree engine uses, so every
// engine produces identical output for the same source.
//
// Every test runs on tree, VM, and Cranelift, asserting all three engines
// agree on the result. That is the contract: future native lowerings can
// graduate any specific builtin off the bridge without changing user-visible
// behaviour.
use std::process::Command;
const ENGINES: &[&str] = &["--vm", "--jit"];
fn ilo() -> Command {
Command::new(env!("CARGO_BIN_EXE_ilo"))
}
fn run_engine(src: &str, engine: &str) -> String {
let out = ilo()
.args([src, engine, "f"])
.output()
.expect("failed to run ilo");
assert!(
out.status.success(),
"ilo {engine} failed for `{src}`: stderr={}",
String::from_utf8_lossy(&out.stderr)
);
String::from_utf8_lossy(&out.stdout).trim().to_string()
}
/// Asserts the same source produces `expected` on every engine.
fn check(src: &str, expected: &str) {
for engine in ENGINES {
let actual = run_engine(src, engine);
assert_eq!(
actual, expected,
"engine={engine}, src=`{src}`: got `{actual}`, expected `{expected}`"
);
}
}
// ── rgx ────────────────────────────────────────────────────────────────
#[test]
fn rgx_no_group_returns_all_matches() {
// Pre-fix: tree returned `[1, 2, 3]`; VM/Cranelift errored with
// `Compile error: undefined function: rgx`.
check(r#"f>L t;rgx "\d+" "a1 b2 c3""#, "[1, 2, 3]");
}
#[test]
fn rgx_with_group_returns_first_match_groups() {
// With a capture group, `rgx` returns the groups from the first match
// only (`rgxall` is the bulk variant). Same semantics on every engine.
check(r#"f>L t;rgx "(\w+)=(\d+)" "x=1 y=22 z=333""#, "[x, 1]");
}
#[test]
fn rgx_no_match_returns_empty_list() {
check(r#"f>L t;rgx "\d+" "no digits""#, "[]");
}
// ── rgxall ─────────────────────────────────────────────────────────────
#[test]
fn rgxall_html_extraction_cross_engine() {
// The real-world HTML-scrape case that motivated `rgxall`. Cranelift
// and VM must agree with tree byte-for-byte.
check(
r#"f>L (L t);rgxall "<h2>([^<]+)</h2>" "<h2>a</h2> <h2>b</h2> <h2>c</h2>""#,
"[[a], [b], [c]]",
);
}
#[test]
fn rgxall_two_groups_cross_engine() {
check(
r#"f>L (L t);rgxall "(\w+)=(\d+)" "x=1 y=22 z=333""#,
"[[x, 1], [y, 22], [z, 333]]",
);
}
// ── fmt (variadic) ─────────────────────────────────────────────────────
#[test]
fn fmt_zero_holes() {
check(r#"f>t;fmt "literal""#, "literal");
}
#[test]
fn fmt_one_hole() {
check(r#"f>t;fmt "x={}" 42"#, "x=42");
}
#[test]
fn fmt_three_holes_mixed_types() {
// Variadic with a mix of number and text args — the case that
// motivated the variadic shape over a fixed-arity opcode.
check(r#"f>t;fmt "{} {} {}" 1 "two" 3"#, "1 two 3");
}
#[test]
fn fmt_extra_holes_rejected_at_verify() {
// Pre-0.12.1, when a literal template had more `{}`s than value args,
// the extra placeholders silently passed through as literal `{}`.
// That was the persona footgun — `fmt "x={} y={}" [a, b]` silently
// mis-filled (list bound to first slot, second slot left literal).
// Verify now rejects any literal-template fmt where slot count !=
// value-arg count with ILO-T013.
let out = ilo()
.args([r#"f>t;fmt "{} {}" 1"#, "--run-vm", "f"])
.output()
.expect("failed to run ilo");
assert!(
!out.status.success(),
"expected verify error, ilo succeeded"
);
let stderr = String::from_utf8_lossy(&out.stderr);
let stdout = String::from_utf8_lossy(&out.stdout);
let combined = format!("{stderr}{stdout}");
assert!(
combined.contains("ILO-T013"),
"expected ILO-T013, got: {combined}"
);
assert!(
combined.contains("`{}` slot") || combined.contains("'fmt'"),
"expected fmt slot diagnostic, got: {combined}"
);
}
// ── rd (2-arg) ─────────────────────────────────────────────────────────
#[test]
fn rd_csv_two_arg_in_block_function() {
// 2-arg `rd path fmt` returns `R (L (L t)) t`; auto-unwrap with `!`
// requires the enclosing function to also return a Result.
use std::io::Write;
let mut path = std::env::temp_dir();
path.push(format!("ilo-bridge-rd-{}.csv", std::process::id()));
{
let mut f = std::fs::File::create(&path).unwrap();
writeln!(f, "a,1").unwrap();
writeln!(f, "b,2").unwrap();
}
let src = format!(r#"f>R (L (L t)) t;rd "{}" "csv""#, path.to_str().unwrap());
// Top-level Value::Ok prints bare (no `~` prefix) per the symmetric
// stdout/stderr split — see regression_main_ok_stdout_bare.rs.
check(&src, "[[a, 1], [b, 2]]");
let _ = std::fs::remove_file(&path);
}
// ── rdb ────────────────────────────────────────────────────────────────
#[test]
fn rdb_csv_cross_engine() {
// `rdb` parses an in-memory buffer in the given format; same dispatcher
// as `rd`, so the bridge plumbing must handle both. Newline escapes
// resolve at the string literal level.
// Top-level Value::Ok prints bare (no `~` prefix).
check(
r#"f>R (L (L t)) t;rdb "a,1
b,2" "csv""#,
"[[a, 1], [b, 2]]",
);
}
#[test]
fn rdb_csv_with_bang_unwrap() {
// Auto-unwrap `!` on a Result-returning bridge call: enclosing fn must
// return Result, and `!` extracts the Ok inner. The wrap markers vanish
// from the printed form because what's left is the inner list.
check(
r#"f>R (L (L t)) t;rdb! "a,1
b,2" "csv""#,
"[[a, 1], [b, 2]]",
);
}
// ── Cross-engine determinism guard ────────────────────────────────────
#[test]
fn engines_agree_on_chained_bridge_calls() {
// Two bridge calls feeding each other: rgx output piped into fmt.
// Catches state-leak bugs where a stale RC or NanVal tag survives
// across consecutive bridge invocations.
check(
r#"f>t;hits=rgx "\d+" "a1 b2 c3";fmt "{} hits" len hits"#,
"3 hits",
);
}