1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
use crate::region::{wrap_anonymous, wrap_child};
use vyre::ir::{BufferAccess, BufferDecl, DataType, Expr, Node, Program};
use vyre_foundation::ir::model::expr::GeneratorRef;
/// SIMT Control Flow Graph (CFG) & Goto Resolver.
///
/// Linux-kernel C uses `goto` pervasively for error unwind. This pass
/// walks the flat SSA node array, registers every `label:` site into
/// a lock-free open-addressing hash table (`goto_labels_*`), and
/// resolves every `goto target;` site against that table to a CFG
/// edge written into `out_cfg_blocks`.
///
/// The hash insert / lookup use linear probing on a 4096-slot table
/// with `AtomicCompareExchange` for first-writer-wins registration
/// and a bounded linear scan for lookup. No external primitive
/// dependency - the kernel is self-contained.
///
/// Opcode sentinels (ASCII tag bytes): `0x4C41424C` = "LABL" (label
/// definition), `0x474F544F` = "GOTO" (goto site).
#[must_use]
pub fn c11_build_cfg_and_gotos(
ssa_nodes: &str,
out_cfg_blocks: &str,
goto_labels: &str,
num_ssa: Expr,
) -> Program {
let t = Expr::InvocationId { axis: 0 };
const TABLE_CAP: u32 = 4096;
const EMPTY_SLOT: u32 = 0xFFFF_FFFF;
// Shared-across-branches: a bounded linear-probe body that
// scans up to `TABLE_CAP` slots starting at `start_slot`.
let hash_slot = |key: Expr| -> Expr {
// Fibonacci hash, capped to TABLE_CAP.
Expr::bitand(
Expr::mul(key, Expr::u32(2_654_435_769)),
Expr::u32(TABLE_CAP - 1),
)
};
let label_phase = vec![
Node::let_bind("opcode", Expr::load(ssa_nodes, t.clone())),
// 1. Label definition: register `label_hash -> ssa_index` in the
// open-addressing hash table. Linear probe with atomic CAS
// keyed on EMPTY_SLOT so only the first writer wins.
Node::if_then(
Expr::eq(Expr::var("opcode"), Expr::u32(0x4C41424C)),
vec![
Node::let_bind(
"label_hash",
Expr::load(ssa_nodes, Expr::add(t.clone(), Expr::u32(1))),
),
Node::let_bind("slot", hash_slot(Expr::var("label_hash"))),
Node::let_bind("inserted", Expr::u32(0)),
Node::loop_for(
"probe",
Expr::u32(0),
Expr::u32(TABLE_CAP),
vec![Node::if_then(
Expr::eq(Expr::var("inserted"), Expr::u32(0)),
vec![
Node::let_bind(
"prev",
Expr::atomic_compare_exchange(
"goto_labels_keys",
Expr::var("slot"),
Expr::u32(EMPTY_SLOT),
Expr::var("label_hash"),
),
),
Node::if_then(
Expr::eq(Expr::var("prev"), Expr::u32(EMPTY_SLOT)),
vec![
Node::store("goto_labels_vals", Expr::var("slot"), t.clone()),
Node::assign("inserted", Expr::u32(1)),
],
),
Node::assign(
"slot",
Expr::bitand(
Expr::add(Expr::var("slot"), Expr::u32(1)),
Expr::u32(TABLE_CAP - 1),
),
),
],
)],
),
],
),
];
let goto_phase = vec![
Node::let_bind("opcode", Expr::load(ssa_nodes, t.clone())),
// 2. Goto site: look up `target_hash` in the same table, write
// the resolved SSA index (or EMPTY_SLOT on miss) into
// `out_cfg_blocks[t]`.
//
// Lookup uses first-match semantics: the first slot whose key
// equals `target_hash` wins. Two invariants that allow early exit:
// (a) `found == 1` after the first match — skip all further slots.
// (b) `k == EMPTY_SLOT` means no further entries exist in a
// linear-probe table, so the target is absent.
// Both guards are implemented under `Expr::eq(found, 0)` because
// the IR `loop_for` has no break node.
Node::if_then(
Expr::eq(Expr::var("opcode"), Expr::u32(0x474F544F)),
vec![
Node::let_bind(
"target_hash",
Expr::load(ssa_nodes, Expr::add(t.clone(), Expr::u32(1))),
),
Node::let_bind("lk_slot", hash_slot(Expr::var("target_hash"))),
Node::let_bind("resolved", Expr::u32(EMPTY_SLOT)),
Node::let_bind("found", Expr::u32(0)),
Node::loop_for(
"probe",
Expr::u32(0),
Expr::u32(TABLE_CAP),
vec![
// Only probe further when we have not yet found a match.
Node::if_then(
Expr::eq(Expr::var("found"), Expr::u32(0)),
vec![
Node::let_bind(
"k",
Expr::load("goto_labels_keys", Expr::var("lk_slot")),
),
// Stop on the first key match — first-writer-wins.
Node::if_then(
Expr::eq(Expr::var("k"), Expr::var("target_hash")),
vec![
Node::assign(
"resolved",
Expr::load(
"goto_labels_vals",
Expr::var("lk_slot"),
),
),
Node::assign("found", Expr::u32(1)),
],
),
// Stop also when we hit an empty slot: no further
// entries can follow in a linear-probe table.
Node::if_then(
Expr::eq(Expr::var("k"), Expr::u32(EMPTY_SLOT)),
vec![Node::assign("found", Expr::u32(1))],
),
Node::assign(
"lk_slot",
Expr::bitand(
Expr::add(Expr::var("lk_slot"), Expr::u32(1)),
Expr::u32(TABLE_CAP - 1),
),
),
],
),
],
),
Node::store(out_cfg_blocks, t.clone(), Expr::var("resolved")),
],
),
];
let n_ssa = match &num_ssa {
Expr::LitU32(n) => *n,
_ => 1,
};
Program::wrapped(
vec![
BufferDecl::storage(ssa_nodes, 0, BufferAccess::ReadOnly, DataType::U32)
.with_count(n_ssa),
BufferDecl::storage(out_cfg_blocks, 1, BufferAccess::ReadWrite, DataType::U32)
.with_count(n_ssa),
BufferDecl::storage(goto_labels, 2, BufferAccess::ReadWrite, DataType::U32)
.with_count(n_ssa),
BufferDecl::storage(
"goto_labels_keys",
3,
BufferAccess::ReadWrite,
DataType::U32,
)
.with_count(TABLE_CAP)
.with_output_byte_range(0..0),
BufferDecl::storage(
"goto_labels_vals",
4,
BufferAccess::ReadWrite,
DataType::U32,
)
.with_count(TABLE_CAP)
.with_output_byte_range(0..0),
],
[256, 1, 1],
vec![wrap_anonymous(
"vyre-libs::parsing::c11_build_cfg_and_gotos",
vec![wrap_child(
vyre_primitives::graph::csr_forward_traverse::OP_ID,
GeneratorRef {
name: "vyre-libs::parsing::c11_build_cfg_and_gotos".to_string(),
},
vec![
Node::if_then(Expr::lt(t.clone(), num_ssa.clone()), label_phase),
Node::barrier(),
Node::if_then(Expr::lt(t.clone(), num_ssa), goto_phase),
],
)],
)],
)
}
#[cfg(test)]
mod tests {
use super::*;
use vyre_reference::value::Value;
/// Regression: two LABL entries with the same label_hash (collision in the
/// linear-probe table) must resolve a GOTO to the FIRST inserted label, not
/// the last one. Before the fix the lookup loop had no early-exit, so the
/// second colliding slot would overwrite `resolved` and the wrong SSA index
/// was emitted.
///
/// Layout (7 invocations, each thread t reads ssa_nodes[t] as opcode):
/// t=0 NOP (0)
/// t=1 LABL opcode, hash at t=2 -> SSA index of this label is 1
/// t=2 7 (label_hash for t=1's LABL -- t=2 itself is not an opcode thread)
/// t=3 LABL opcode, hash at t=4 -> same hash 7, second writer -> linear-probes to slot+1
/// t=4 7
/// t=5 GOTO opcode, target_hash at t=6
/// t=6 7 (resolves via first-match -> t=1, i.e. SSA index 1)
#[test]
fn goto_collision_resolves_to_first_label_not_last() {
// Build with num_ssa=7 so all 7 threads are in-bounds.
let program = c11_build_cfg_and_gotos("ssa", "cfg", "labels", Expr::u32(7));
let mut ssa = vec![0u8; 7 * 4];
for (i, v) in [0u32, 0x4C41424C, 7, 0x4C41424C, 7, 0x474F544F, 7]
.iter()
.enumerate()
{
ssa[i * 4..(i + 1) * 4].copy_from_slice(&v.to_le_bytes());
}
// goto_labels_keys initialised to EMPTY_SLOT (0xFFFFFFFF).
let keys_init = vec![0xFFu8; 4 * 4096];
let vals_init = vec![0u8; 4 * 4096];
let inputs = vec![
Value::from(ssa),
Value::from(vec![0u8; 7 * 4]), // out_cfg_blocks
Value::from(vec![0u8; 7 * 4]), // goto_labels (labels alias buffer)
Value::from(keys_init),
Value::from(vals_init),
];
let outputs =
vyre_reference::reference_eval(&program, &inputs).expect("cfg eval must succeed");
let cfg = vyre_primitives::wire::decode_u32_le_bytes_all(&outputs[0].to_bytes());
// t=5 is the GOTO thread. With first-match semantics it must resolve to
// the SSA index of the FIRST LABL with hash 7, which is t=1.
assert_eq!(
cfg[5], 1,
"goto must resolve to first label (SSA index 1), not last colliding label (SSA index 3); \
got cfg[5]={}",
cfg[5]
);
}
}
inventory::submit! {
crate::harness::OpEntry {
id: "vyre-libs::parsing::c11_build_cfg_and_gotos",
build: || c11_build_cfg_and_gotos("ssa", "cfg", "labels", Expr::u32(5)),
test_inputs: Some(|| {
let mut ssa = Vec::with_capacity(5 * 4);
for value in [0u32, 0x4C41424C, 7, 0x474F544F, 7] {
ssa.extend_from_slice(&value.to_le_bytes());
}
vec![vec![
ssa,
vec![0u8; 5 * 4],
vec![0u8; 5 * 4],
vec![0xFFu8; 4 * 4096],
vec![0u8; 4 * 4096],
]]
}),
expected_output: Some(|| {
let mut cfg = Vec::with_capacity(5 * 4);
for value in [0u32, 0, 0, 1, 0] {
cfg.extend_from_slice(&value.to_le_bytes());
}
let labels = vec![0u8; 5 * 4];
vec![vec![cfg, labels, Vec::new(), Vec::new()]]
}),
category: Some("compiler"),
}
}