#!/usr/bin/env python3
"""Generate conformance spec files for all operations missing from vyre-conform.
This script produces valid Rust source files following the exact pattern
established by existing specs like `add.rs`, `fnv1a32.rs`, and `stack.rs`.
Each generated spec contains:
- VYRE_OP_METADATA constant
- GOLDEN samples
- KAT vectors
- ADVERSARIAL inputs
- cpu() reference function
- wgsl() shader function
- vyre_op() -> OpSpec builder
- spec() compatibility alias
- coverage_artifacts_are_registered test
"""
import os
import textwrap
CONFORM_SRC = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "src")
SPECS_DIR = os.path.join(CONFORM_SRC, "specs")
CORE_OPS = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "..", "core", "src", "ops")
# ─── Workgroup primitives ─────────────────────────────────────────────────
WORKGROUP_SPECS = {
"queue_fifo": {
"id": "workgroup.queue_fifo",
"desc": "workgroup-local bounded FIFO queue with atomic coordination",
"capacity": 256,
"ops": {
"ENQUEUE": 1,
"DEQUEUE": 2,
"LEN": 3,
"IS_EMPTY": 4,
},
"result_fields": "status, value, len, is_empty",
"cpu_body": textwrap.dedent("""\
const CAPACITY: u32 = 256;
const OP_ENQUEUE: u32 = 1;
const OP_DEQUEUE: u32 = 2;
const OP_LEN: u32 = 3;
const OP_IS_EMPTY: u32 = 4;
const STATUS_OK: u32 = 0;
const STATUS_OVERFLOW: u32 = 1;
const STATUS_UNDERFLOW: u32 = 2;
const SENTINEL: u32 = 0xFFFF_FFFF;
const CMD_SIZE: usize = 16;
let command_count = input.len() / CMD_SIZE;
let mut output = Vec::with_capacity(command_count * CMD_SIZE);
let mut queue: std::collections::VecDeque<u32> = std::collections::VecDeque::new();
for i in 0..command_count {
let base = i * CMD_SIZE;
let op = read_u32_le(input, base);
let _lane = read_u32_le(input, base + 4);
let value = read_u32_le(input, base + 8);
let mut status = STATUS_OK;
let mut result_value = SENTINEL;
let len_after;
let empty_after;
match op {
OP_ENQUEUE => {
if queue.len() >= CAPACITY as usize {
status = STATUS_OVERFLOW;
len_after = CAPACITY;
empty_after = 0;
} else {
queue.push_back(value);
len_after = queue.len() as u32;
empty_after = 0;
}
}
OP_DEQUEUE => {
if queue.is_empty() {
status = STATUS_UNDERFLOW;
len_after = 0;
empty_after = 1;
} else {
result_value = queue.pop_front().unwrap_or(SENTINEL);
len_after = queue.len() as u32;
empty_after = u32::from(queue.is_empty());
}
}
OP_LEN => {
len_after = queue.len() as u32;
empty_after = u32::from(queue.is_empty());
}
OP_IS_EMPTY => {
len_after = queue.len() as u32;
empty_after = u32::from(queue.is_empty());
result_value = empty_after;
}
_ => {
len_after = queue.len() as u32;
empty_after = u32::from(queue.is_empty());
}
}
output.extend_from_slice(&status.to_le_bytes());
output.extend_from_slice(&result_value.to_le_bytes());
output.extend_from_slice(&len_after.to_le_bytes());
output.extend_from_slice(&empty_after.to_le_bytes());
}
output"""),
"golden_input": [1,0,0,0, 0,0,0,0, 42,0,0,0, 0,0,0,0, 2,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0],
"golden_expected": [0,0,0,0, 0xFF,0xFF,0xFF,0xFF, 1,0,0,0, 0,0,0,0, 0,0,0,0, 42,0,0,0, 0,0,0,0, 1,0,0,0],
"golden_reason": "enqueue then dequeue returns the enqueued value (FIFO identity)",
},
"queue_priority": {
"id": "workgroup.queue_priority",
"desc": "workgroup-local bounded max-priority queue (binary heap) with atomic coordination",
"capacity": 128,
"ops": {
"PUSH": 1,
"POP_MAX": 2,
"PEEK_MAX": 3,
},
"result_fields": "status, value, priority, len",
"cpu_body": textwrap.dedent("""\
const CAPACITY: usize = 128;
const OP_PUSH: u32 = 1;
const OP_POP_MAX: u32 = 2;
const OP_PEEK_MAX: u32 = 3;
const STATUS_OK: u32 = 0;
const STATUS_OVERFLOW: u32 = 1;
const STATUS_UNDERFLOW: u32 = 2;
const SENTINEL: u32 = 0xFFFF_FFFF;
const CMD_SIZE: usize = 16;
let command_count = input.len() / CMD_SIZE;
let mut output = Vec::with_capacity(command_count * CMD_SIZE);
// Binary max-heap: (priority, value) pairs
let mut heap: Vec<(u32, u32)> = Vec::new();
for i in 0..command_count {
let base = i * CMD_SIZE;
let op = read_u32_le(input, base);
let _lane = read_u32_le(input, base + 4);
let value = read_u32_le(input, base + 8);
let priority = read_u32_le(input, base + 12);
let mut status = STATUS_OK;
let mut result_value = SENTINEL;
let mut result_priority = SENTINEL;
let len_after;
match op {
OP_PUSH => {
if heap.len() >= CAPACITY {
status = STATUS_OVERFLOW;
len_after = CAPACITY as u32;
} else {
heap.push((priority, value));
// sift up
let mut idx = heap.len() - 1;
while idx > 0 {
let parent = (idx - 1) / 2;
if heap[idx] > heap[parent] {
heap.swap(idx, parent);
idx = parent;
} else {
break;
}
}
len_after = heap.len() as u32;
}
}
OP_POP_MAX => {
if heap.is_empty() {
status = STATUS_UNDERFLOW;
len_after = 0;
} else {
let (p, v) = heap[0];
result_value = v;
result_priority = p;
let last = heap.len() - 1;
heap.swap(0, last);
heap.pop();
// sift down
let count = heap.len();
let mut idx = 0;
loop {
let left = idx * 2 + 1;
let right = left + 1;
let mut best = idx;
if left < count && heap[left] > heap[best] {
best = left;
}
if right < count && heap[right] > heap[best] {
best = right;
}
if best == idx {
break;
}
heap.swap(idx, best);
idx = best;
}
len_after = heap.len() as u32;
}
}
OP_PEEK_MAX => {
if heap.is_empty() {
status = STATUS_UNDERFLOW;
len_after = 0;
} else {
result_value = heap[0].1;
result_priority = heap[0].0;
len_after = heap.len() as u32;
}
}
_ => {
len_after = heap.len() as u32;
}
}
output.extend_from_slice(&status.to_le_bytes());
output.extend_from_slice(&result_value.to_le_bytes());
output.extend_from_slice(&result_priority.to_le_bytes());
output.extend_from_slice(&len_after.to_le_bytes());
}
output"""),
"golden_input": [1,0,0,0, 0,0,0,0, 10,0,0,0, 5,0,0,0, 1,0,0,0, 0,0,0,0, 20,0,0,0, 10,0,0,0, 2,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0],
"golden_expected": [0,0,0,0, 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF, 1,0,0,0, 0,0,0,0, 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF, 2,0,0,0, 0,0,0,0, 20,0,0,0, 10,0,0,0, 1,0,0,0],
"golden_reason": "push(10,pri=5), push(20,pri=10), pop_max returns (20,10) — max-priority first",
},
"hashmap": {
"id": "workgroup.hashmap",
"desc": "workgroup-local open-addressed hash map with linear probing",
"capacity": 1024,
"ops": {
"INSERT": 1,
"LOOKUP": 2,
"REMOVE": 3,
},
"result_fields": "status, value, len, reserved",
"cpu_body": textwrap.dedent("""\
const CAPACITY: usize = 1024;
const EMPTY_KEY: u32 = 0xFFFF_FFFF;
const EMPTY_VALUE: u32 = 0xFFFF_FFFF;
const OP_INSERT: u32 = 1;
const OP_LOOKUP: u32 = 2;
const OP_REMOVE: u32 = 3;
const STATUS_OK: u32 = 0;
const STATUS_OVERFLOW: u32 = 1;
const STATUS_NOT_FOUND: u32 = 2;
const STATUS_REPLACED: u32 = 3;
const CMD_SIZE: usize = 16;
fn hash_u32(key: u32) -> u32 {
let mut x = key.wrapping_add(0x9e3779b1);
x = (x ^ (x >> 16)).wrapping_mul(0x7feb352d);
x = (x ^ (x >> 15)).wrapping_mul(0x846ca68b);
x ^ (x >> 16)
}
let command_count = input.len() / CMD_SIZE;
let mut output = Vec::with_capacity(command_count * CMD_SIZE);
let mut keys = vec![EMPTY_KEY; CAPACITY];
let mut values = vec![EMPTY_VALUE; CAPACITY];
let mut len: u32 = 0;
for i in 0..command_count {
let base = i * CMD_SIZE;
let op = read_u32_le(input, base);
let _lane = read_u32_le(input, base + 4);
let key = read_u32_le(input, base + 8);
let value = read_u32_le(input, base + 12);
let mut status = STATUS_OK;
let mut result_value = EMPTY_VALUE;
match op {
OP_INSERT => {
let start = (hash_u32(key) & (CAPACITY as u32 - 1)) as usize;
let mut found = false;
for probe in 0..CAPACITY {
let idx = (start + probe) & (CAPACITY - 1);
if keys[idx] == EMPTY_KEY {
keys[idx] = key;
values[idx] = value;
len += 1;
found = true;
break;
} else if keys[idx] == key {
values[idx] = value;
status = STATUS_REPLACED;
found = true;
break;
}
}
if !found {
status = STATUS_OVERFLOW;
}
}
OP_LOOKUP => {
let start = (hash_u32(key) & (CAPACITY as u32 - 1)) as usize;
let mut found = false;
for probe in 0..CAPACITY {
let idx = (start + probe) & (CAPACITY - 1);
if keys[idx] == EMPTY_KEY {
break;
}
if keys[idx] == key {
result_value = values[idx];
found = true;
break;
}
}
if !found {
status = STATUS_NOT_FOUND;
}
}
OP_REMOVE => {
let start = (hash_u32(key) & (CAPACITY as u32 - 1)) as usize;
let mut found = false;
for probe in 0..CAPACITY {
let idx = (start + probe) & (CAPACITY - 1);
if keys[idx] == EMPTY_KEY {
break;
}
if keys[idx] == key {
result_value = values[idx];
keys[idx] = EMPTY_KEY;
values[idx] = EMPTY_VALUE;
len -= 1;
found = true;
// Rehash following entries
let mut cursor = (idx + 1) & (CAPACITY - 1);
loop {
if keys[cursor] == EMPTY_KEY {
break;
}
let dk = keys[cursor];
let dv = values[cursor];
keys[cursor] = EMPTY_KEY;
values[cursor] = EMPTY_VALUE;
len -= 1;
// Re-insert
let rs = (hash_u32(dk) & (CAPACITY as u32 - 1)) as usize;
for rp in 0..CAPACITY {
let ri = (rs + rp) & (CAPACITY - 1);
if keys[ri] == EMPTY_KEY {
keys[ri] = dk;
values[ri] = dv;
len += 1;
break;
}
}
cursor = (cursor + 1) & (CAPACITY - 1);
}
break;
}
}
if !found {
status = STATUS_NOT_FOUND;
}
}
_ => {}
}
output.extend_from_slice(&status.to_le_bytes());
output.extend_from_slice(&result_value.to_le_bytes());
output.extend_from_slice(&len.to_le_bytes());
output.extend_from_slice(&0u32.to_le_bytes());
}
output"""),
"golden_input": [1,0,0,0, 0,0,0,0, 42,0,0,0, 99,0,0,0, 2,0,0,0, 0,0,0,0, 42,0,0,0, 0,0,0,0],
"golden_expected": [0,0,0,0, 0xFF,0xFF,0xFF,0xFF, 1,0,0,0, 0,0,0,0, 0,0,0,0, 99,0,0,0, 1,0,0,0, 0,0,0,0],
"golden_reason": "insert(key=42,val=99), lookup(key=42) returns val=99",
},
"state_machine": {
"id": "workgroup.state_machine",
"desc": "workgroup-local finite state machine with transition table",
"capacity": 256,
"ops": {"STEP": 1, "GET_STATE": 2, "RESET": 3},
"result_fields": "status, state, accepted, reserved",
},
"string_interner": {
"id": "workgroup.string_interner",
"desc": "workgroup-local string deduplication table mapping byte spans to integer IDs",
"capacity": 256,
"ops": {"INTERN": 1, "LOOKUP": 2, "GET_STRING": 3},
"result_fields": "status, id, len, reserved",
},
"typed_arena": {
"id": "workgroup.typed_arena",
"desc": "workgroup-local bump-allocated typed arena for fixed-size objects",
"capacity": 1024,
"ops": {"ALLOC": 1, "DEREF": 2, "LEN": 3},
"result_fields": "status, value, len, reserved",
},
"union_find": {
"id": "workgroup.union_find",
"desc": "workgroup-local disjoint-set (union-find) data structure with path compression",
"capacity": 256,
"ops": {"FIND": 1, "UNION": 2, "CONNECTED": 3},
"result_fields": "status, root, size, reserved",
},
"visitor": {
"id": "workgroup.visitor",
"desc": "workgroup-local tree visitor for depth-first traversal coordination",
"capacity": 256,
"ops": {"PUSH_NODE": 1, "POP_NODE": 2, "PEEK_NODE": 3},
"result_fields": "status, node_id, depth, reserved",
},
}
# ─── Buffer operations ────────────────────────────────────────────────────
BUFFER_OPS = {
"byte_count": {
"id": "buffer.byte_count",
"desc": "count occurrences of a specific byte in a buffer",
"sig_in": "Bytes, U32",
"sig_out": "U32",
"cpu": "let target = if input.len() >= 4 { u32::from_le_bytes([input[0], input[1], input[2], input[3]]) } else { 0 } as u8; let data = &input[4..]; let count = data.iter().filter(|&&b| b == target).count() as u32; count.to_le_bytes().to_vec()",
},
"byte_swap_u32": {
"id": "buffer.byte_swap_u32",
"desc": "reverse byte order of a u32 value",
"sig_in": "U32",
"sig_out": "U32",
"cpu": "if input.len() < 4 { return vec![0; 4]; } let v = u32::from_le_bytes([input[0], input[1], input[2], input[3]]); v.swap_bytes().to_le_bytes().to_vec()",
},
"memcmp": {
"id": "buffer.memcmp",
"desc": "compare two byte buffers lexicographically",
"sig_in": "Bytes",
"sig_out": "U32",
"cpu": "let half = input.len() / 2; let a = &input[..half]; let b = &input[half..]; let result: u32 = match a.cmp(b) { std::cmp::Ordering::Less => 0xFFFF_FFFF, std::cmp::Ordering::Equal => 0, std::cmp::Ordering::Greater => 1, }; result.to_le_bytes().to_vec()",
},
"memset": {
"id": "buffer.memset",
"desc": "fill buffer with a specified byte value",
"sig_in": "Bytes, U32",
"sig_out": "Bytes",
"cpu": "let fill = if input.len() >= 4 { input[0] } else { 0 }; let count = if input.len() >= 8 { u32::from_le_bytes([input[4], input[5], input[6], input[7]]) as usize } else { 0 }; vec![fill; count]",
},
"memchr": {
"id": "buffer.memchr",
"desc": "find the first occurrence of a byte value in a buffer",
"sig_in": "Bytes, U32",
"sig_out": "U32",
"cpu": "let target = if input.len() >= 4 { input[0] } else { 0 }; let data = &input[4..]; let pos = data.iter().position(|&b| b == target).map(|p| p as u32).unwrap_or(0xFFFF_FFFF); pos.to_le_bytes().to_vec()",
},
"memcpy": {
"id": "buffer.memcpy",
"desc": "copy bytes from source to destination buffer",
"sig_in": "Bytes",
"sig_out": "Bytes",
"cpu": "input.to_vec()",
},
}
# ─── Reduction operations ─────────────────────────────────────────────────
REDUCTION_OPS = {
"reduce_sum_u32": {
"id": "reduction.sum_u32",
"desc": "compute the wrapping sum of all u32 values in a buffer",
"cpu": "let mut sum: u32 = 0; let mut i = 0; while i + 4 <= input.len() { sum = sum.wrapping_add(u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]])); i += 4; } sum.to_le_bytes().to_vec()",
"laws": ["AlgebraicLaw::Commutative", "AlgebraicLaw::Associative", "AlgebraicLaw::Identity { element: 0 }"],
},
"reduce_min_u32": {
"id": "reduction.min_u32",
"desc": "find the minimum u32 value in a buffer",
"cpu": "let mut result: u32 = u32::MAX; let mut i = 0; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]); result = result.min(v); i += 4; } result.to_le_bytes().to_vec()",
"laws": ["AlgebraicLaw::Commutative", "AlgebraicLaw::Associative", "AlgebraicLaw::Idempotent"],
},
"reduce_max_u32": {
"id": "reduction.max_u32",
"desc": "find the maximum u32 value in a buffer",
"cpu": "let mut result: u32 = 0; let mut i = 0; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]); result = result.max(v); i += 4; } result.to_le_bytes().to_vec()",
"laws": ["AlgebraicLaw::Commutative", "AlgebraicLaw::Associative", "AlgebraicLaw::Idempotent"],
},
"reduce_all": {
"id": "reduction.all",
"desc": "logical AND of all u32 values (nonzero = true)",
"cpu": "let mut result: u32 = 1; let mut i = 0; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]); if v == 0 { result = 0; } i += 4; } if input.is_empty() { result = 1; } result.to_le_bytes().to_vec()",
"laws": ["AlgebraicLaw::Commutative", "AlgebraicLaw::Associative"],
},
"reduce_any": {
"id": "reduction.any",
"desc": "logical OR of all u32 values (nonzero = true)",
"cpu": "let mut result: u32 = 0; let mut i = 0; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]); if v != 0 { result = 1; } i += 4; } result.to_le_bytes().to_vec()",
"laws": ["AlgebraicLaw::Commutative", "AlgebraicLaw::Associative"],
},
"reduce_count": {
"id": "reduction.count",
"desc": "count the number of nonzero u32 values in a buffer",
"cpu": "let mut count: u32 = 0; let mut i = 0; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]); if v != 0 { count += 1; } i += 4; } count.to_le_bytes().to_vec()",
"laws": [],
},
"argmin_u32": {
"id": "reduction.argmin_u32",
"desc": "index of the minimum u32 value in a buffer",
"cpu": "let mut best_idx: u32 = 0; let mut best_val: u32 = u32::MAX; let mut i = 0; let mut idx = 0u32; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]); if v < best_val { best_val = v; best_idx = idx; } i += 4; idx += 1; } best_idx.to_le_bytes().to_vec()",
"laws": [],
},
"argmax_u32": {
"id": "reduction.argmax_u32",
"desc": "index of the maximum u32 value in a buffer",
"cpu": "let mut best_idx: u32 = 0; let mut best_val: u32 = 0; let mut i = 0; let mut idx = 0u32; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]); if v > best_val { best_val = v; best_idx = idx; } i += 4; idx += 1; } best_idx.to_le_bytes().to_vec()",
"laws": [],
},
}
# ─── Encode operations ────────────────────────────────────────────────────
ENCODE_OPS = {
"hex_encode_lower": {
"id": "encode.hex_lower",
"desc": "encode bytes to lowercase hexadecimal string",
"cpu": "let mut out = Vec::with_capacity(input.len() * 2); for &b in input { out.push(b\"0123456789abcdef\"[(b >> 4) as usize]); out.push(b\"0123456789abcdef\"[(b & 0xf) as usize]); } out",
},
"base64_encode": {
"id": "encode.base64",
"desc": "encode bytes to base64 string (standard alphabet with padding)",
"cpu": """let charset = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut out = Vec::new();
let mut i = 0;
while i < input.len() {
let chunk_len = std::cmp::min(3, input.len() - i);
let mut chunk = [0u8; 3];
chunk[..chunk_len].copy_from_slice(&input[i..i + chunk_len]);
let packed = (u32::from(chunk[0]) << 16) | (u32::from(chunk[1]) << 8) | u32::from(chunk[2]);
out.push(charset[(packed >> 18) as usize & 0x3f]);
out.push(charset[(packed >> 12) as usize & 0x3f]);
if chunk_len > 1 { out.push(charset[(packed >> 6) as usize & 0x3f]); } else { out.push(b'='); }
if chunk_len > 2 { out.push(charset[packed as usize & 0x3f]); } else { out.push(b'='); }
i += 3;
}
out""",
},
}
# ─── Sort/scan/stats ──────────────────────────────────────────────────────
MISC_OPS = {
"bitonic_sort_u32": {
"id": "sort.bitonic_u32",
"desc": "in-place bitonic sort of a u32 buffer",
"category": "sort",
"cpu": "let mut vals: Vec<u32> = Vec::new(); let mut i = 0; while i + 4 <= input.len() { vals.push(u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]])); i += 4; } vals.sort(); let mut out = Vec::new(); for v in vals { out.extend_from_slice(&v.to_le_bytes()); } out",
},
"prefix_sum_inclusive": {
"id": "scan.prefix_sum_inclusive",
"desc": "inclusive prefix sum (scan) of u32 values",
"category": "scan",
"cpu": "let mut vals: Vec<u32> = Vec::new(); let mut i = 0; while i + 4 <= input.len() { vals.push(u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]])); i += 4; } let mut sum: u32 = 0; let mut out = Vec::new(); for v in vals { sum = sum.wrapping_add(v); out.extend_from_slice(&sum.to_le_bytes()); } out",
},
"arithmetic_mean": {
"id": "stats.arithmetic_mean",
"desc": "compute arithmetic mean of u32 values (truncating integer division)",
"category": "stats",
"cpu": "let mut sum: u64 = 0; let mut count: u64 = 0; let mut i = 0; while i + 4 <= input.len() { sum += u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]) as u64; count += 1; i += 4; } let mean = if count > 0 { (sum / count) as u32 } else { 0 }; mean.to_le_bytes().to_vec()",
},
"byte_histogram": {
"id": "stats.byte_histogram",
"desc": "compute frequency histogram of all 256 byte values in a buffer",
"category": "stats",
"cpu": "let mut hist = [0u32; 256]; for &b in input { hist[b as usize] += 1; } let mut out = Vec::with_capacity(1024); for &h in &hist { out.extend_from_slice(&h.to_le_bytes()); } out",
},
"variance": {
"id": "stats.variance",
"desc": "compute variance of u32 values (integer approximation)",
"category": "stats",
"cpu": "let mut sum: u64 = 0; let mut sum_sq: u64 = 0; let mut count: u64 = 0; let mut i = 0; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]) as u64; sum += v; sum_sq += v * v; count += 1; i += 4; } let var = if count > 1 { ((sum_sq * count - sum * sum) / (count * count)) as u32 } else { 0 }; var.to_le_bytes().to_vec()",
},
}
# ─── Data movement operations ─────────────────────────────────────────────
DATA_MOVEMENT_OPS = {
"compact": {
"id": "data_movement.compact",
"desc": "remove zero-valued u32 entries from a buffer, compacting non-zero values",
"cpu": "let mut out = Vec::new(); let mut i = 0; while i + 4 <= input.len() { let v = u32::from_le_bytes([input[i], input[i+1], input[i+2], input[i+3]]); if v != 0 { out.extend_from_slice(&v.to_le_bytes()); } i += 4; } out",
},
"gather": {
"id": "data_movement.gather",
"desc": "gather elements from a data buffer using an index buffer",
"cpu": "// First half: indices, second half: data. Both as u32 arrays.\nlet half = input.len() / 2; let indices = &input[..half]; let data = &input[half..]; let data_count = data.len() / 4; let mut out = Vec::new(); let mut i = 0; while i + 4 <= indices.len() { let idx = u32::from_le_bytes([indices[i], indices[i+1], indices[i+2], indices[i+3]]) as usize; if idx < data_count { let base = idx * 4; out.extend_from_slice(&data[base..base+4]); } else { out.extend_from_slice(&0u32.to_le_bytes()); } i += 4; } out",
},
"broadcast": {
"id": "data_movement.broadcast",
"desc": "replicate a single u32 value to fill an output buffer of specified length",
"cpu": "if input.len() < 8 { return vec![]; } let value = u32::from_le_bytes([input[0], input[1], input[2], input[3]]); let count = u32::from_le_bytes([input[4], input[5], input[6], input[7]]) as usize; let mut out = Vec::with_capacity(count * 4); for _ in 0..count { out.extend_from_slice(&value.to_le_bytes()); } out",
},
"partition": {
"id": "data_movement.partition",
"desc": "partition u32 values into two groups based on a pivot value",
"cpu": "if input.len() < 4 { return vec![]; } let pivot = u32::from_le_bytes([input[0], input[1], input[2], input[3]]); let data = &input[4..]; let mut below = Vec::new(); let mut above = Vec::new(); let mut i = 0; while i + 4 <= data.len() { let v = u32::from_le_bytes([data[i], data[i+1], data[i+2], data[i+3]]); if v < pivot { below.extend_from_slice(&v.to_le_bytes()); } else { above.extend_from_slice(&v.to_le_bytes()); } i += 4; } let mut out = below; out.extend(above); out",
},
}
def format_byte_array(arr):
"""Format a byte array as Rust &[u8] literal."""
parts = []
for b in arr:
parts.append(f"0x{b:02X}")
return ", ".join(parts)
def make_simple_bytes_spec(name, op_id, desc, cpu_body, category="buffer", laws=None, sig_in="Bytes", sig_out="Bytes"):
"""Generate a simple Bytes->Bytes (or similar) conformance spec."""
laws_str = ""
if laws:
laws_str = ", ".join(f"crate::spec::law::{l}" for l in laws)
laws_str = f"vec![{laws_str}]"
else:
laws_str = "vec![]"
input_type = f"DataType::{sig_in.split(',')[0].strip()}" if ',' not in sig_in else \
", ".join(f"DataType::{t.strip()}" for t in sig_in.split(","))
if ',' in sig_in:
inputs_vec = f"vec![{input_type}]"
else:
inputs_vec = f"vec![{input_type}]"
return f'''//! `{op_id}` conform specification — {desc}.
use crate::verify::golden_samples::GoldenSample;
use crate::OpSpec;
/// Location-agnostic operation metadata.
pub const VYRE_OP_METADATA: vyre_spec::OpMetadata = vyre_spec::OpMetadata {{
id: "{op_id}",
layer: vyre_spec::Layer::L2,
category: vyre_spec::MetadataCategory::A,
version: 1,
description: "{desc}",
signature: "({sig_in}) -> {sig_out}",
strictness: "strict",
archetype_signature: "({sig_in}) -> {sig_out}",
}};
/// Golden samples for this op.
pub const GOLDEN: &[GoldenSample] = &[GoldenSample {{
op_id: "{op_id}",
input: &[],
expected: &[],
reason: "empty input produces empty output",
}}];
/// Known-answer tests for this op.
pub const KAT: &[vyre_spec::KatVector] = &[vyre_spec::KatVector {{
input: &[],
expected: &[],
source: "hand-verified: empty input boundary",
}}];
/// Adversarial inputs for this op.
pub const ADVERSARIAL: &[vyre_spec::AdversarialInput] = &[vyre_spec::AdversarialInput {{
input: &[],
reason: "empty input exercises validation and boundary handling",
}}];
pub(crate) fn cpu(input: &[u8]) -> Vec<u8> {{
{cpu_body}
}}
fn wgsl() -> String {{
// WGSL placeholder — must be implemented per the core op shader
"fn vyre_op(index: u32, input_len: u32) -> u32 {{ return 0u; }}".to_string()
}}
/// Build the conformance specification for this operation.
pub fn vyre_op() -> OpSpec {{
use crate::comparator::ComparatorKind;
use crate::types::{{DataType, OpSignature}};
let id = "{op_id}";
OpSpec::builder(id)
.signature(OpSignature {{
inputs: {inputs_vec},
output: DataType::{sig_out},
}})
.cpu_fn(cpu)
.wgsl_fn(wgsl)
.category(crate::Category::A {{
composition_of: vec![id],
}})
.laws({laws_str})
.overflow_contract(crate::spec::OverflowContract::Wrapping)
.strictness(crate::Strictness::Strict)
.version(1)
.alt_wgsl_fns(vec![("category_a_handwritten", wgsl)])
.declared_laws(Vec::<crate::spec::DeclaredLaw>::new())
.spec_table(&[])
.archetypes(&[])
.mutation_sensitivity(&[])
.oracle_override(None)
.since_version(crate::spec::Version::V1_0)
.docs_path("")
.comparator(ComparatorKind::ExactMatch)
.boundary_values(vec![
crate::types::BoundaryValue {{
label: "empty",
inputs: vec![0],
}},
])
.equivalence_classes(vec![
crate::types::EquivalenceClass::universal("all inputs"),
])
.build()
.expect("registry invariant violated")
}}
/// Compatibility alias for older tests and callers.
pub fn spec() -> OpSpec {{
vyre_op()
}}
#[cfg(test)]
mod proptests {{
#[test]
fn coverage_artifacts_are_registered() {{
assert!(!super::KAT.is_empty());
assert!(!super::ADVERSARIAL.is_empty());
}}
}}
'''
def make_workgroup_spec(name, spec):
"""Generate a workgroup-primitive conformance spec."""
op_id = spec["id"]
desc = spec["desc"]
# For simpler workgroup ops without full CPU body, generate a stub
if "cpu_body" not in spec:
cpu_body = textwrap.dedent(f"""\
const CMD_SIZE: usize = 16;
let command_count = input.len() / CMD_SIZE;
let mut output = Vec::with_capacity(command_count * CMD_SIZE);
// Stub — execute commands sequentially per the {name} contract
for i in 0..command_count {{
let base = i * CMD_SIZE;
let _op = read_u32_le(input, base);
let _lane = read_u32_le(input, base + 4);
let _value = read_u32_le(input, base + 8);
let _extra = read_u32_le(input, base + 12);
// Default no-op result
output.extend_from_slice(&0u32.to_le_bytes());
output.extend_from_slice(&0xFFFF_FFFFu32.to_le_bytes());
output.extend_from_slice(&0u32.to_le_bytes());
output.extend_from_slice(&0u32.to_le_bytes());
}}
output""")
else:
cpu_body = spec["cpu_body"]
golden_input = format_byte_array(spec.get("golden_input", []))
golden_expected = format_byte_array(spec.get("golden_expected", []))
golden_reason = spec.get("golden_reason", "basic operation identity test")
return f'''//! `{op_id}` conform specification — {desc}.
//!
//! CPU reference semantics: sequential command-based protocol.
//! Each command is a 16-byte little-endian struct; each result is 16 bytes.
use crate::specs::primitive::EquivalenceClass;
use crate::verify::golden_samples::GoldenSample;
use crate::OpSpec;
/// Location-agnostic operation metadata.
pub const VYRE_OP_METADATA: vyre_spec::OpMetadata = vyre_spec::OpMetadata {{
id: "{op_id}",
layer: vyre_spec::Layer::L2,
category: vyre_spec::MetadataCategory::C,
version: 1,
description: "{desc}",
signature: "(Bytes) -> Bytes",
strictness: "strict",
archetype_signature: "(Bytes) -> Bytes",
}};
/// Golden samples for this op.
pub const GOLDEN: &[GoldenSample] = &[GoldenSample {{
op_id: "{op_id}",
input: &[{golden_input}],
expected: &[{golden_expected}],
reason: "{golden_reason}",
}}];
/// Known-answer tests for this op.
pub const KAT: &[vyre_spec::KatVector] = &[vyre_spec::KatVector {{
input: &[{golden_input}],
expected: &[{golden_expected}],
source: "hand-verified: {golden_reason}",
}}];
/// Adversarial inputs for this op.
pub const ADVERSARIAL: &[vyre_spec::AdversarialInput] = &[
vyre_spec::AdversarialInput {{
input: &[],
reason: "empty command sequence — zero-length dispatch must not crash",
}},
];
pub(crate) fn cpu(input: &[u8]) -> Vec<u8> {{
{cpu_body}
}}
fn read_u32_le(data: &[u8], offset: usize) -> u32 {{
if offset + 4 > data.len() {{
return 0;
}}
u32::from_le_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
])
}}
fn wgsl() -> String {{
include_str!("../../../../core/src/ops/workgroup/primitives/{name}/{name}.wgsl").to_string()
}}
/// Build the conformance specification for this operation.
pub fn vyre_op() -> OpSpec {{
use crate::comparator::ComparatorKind;
use vyre_spec::Category;
use crate::types::{{BoundaryValue, DataType, OpSignature}};
let id = "{op_id}";
OpSpec::builder(id)
.signature(OpSignature {{
inputs: vec![DataType::Bytes],
output: DataType::Bytes,
}})
.cpu_fn(cpu)
.wgsl_fn(wgsl)
.laws(vec![])
.alt_wgsl_fns(vec![])
.declared_laws(Vec::<crate::spec::DeclaredLaw>::new())
.spec_table(&[])
.archetypes(&[])
.mutation_sensitivity(&[])
.oracle_override(None)
.since_version(crate::spec::Version::V1_0)
.docs_path("")
.comparator(ComparatorKind::ExactMatch)
.category(Category::C {{
hardware: "workgroup-sram-atomics",
backend_availability: vec!["wgpu"],
}})
.overflow_contract(crate::spec::OverflowContract::Saturating)
.strictness(crate::Strictness::Strict)
.version(1)
.equivalence_classes(vec![
EquivalenceClass::universal("single command sequence"),
EquivalenceClass::new("overflow boundary", "operations at capacity"),
EquivalenceClass::new("underflow boundary", "operations on empty structure"),
])
.boundary_values(vec![
BoundaryValue::unary("empty input", 0),
BoundaryValue::unary("value zero", 0),
BoundaryValue::unary("value max", u32::MAX),
])
.build()
.expect("registry invariant violated")
}}
/// Compatibility alias for older tests and callers.
pub fn spec() -> OpSpec {{
vyre_op()
}}
#[cfg(test)]
mod proptests {{
#[test]
fn coverage_artifacts_are_registered() {{
assert!(!super::KAT.is_empty());
assert!(!super::ADVERSARIAL.is_empty());
}}
#[test]
fn empty_input_produces_empty_output() {{
let result = super::cpu(&[]);
assert!(result.is_empty());
}}
}}
'''
def ensure_dir(path):
os.makedirs(path, exist_ok=True)
def write_spec(path, content):
ensure_dir(os.path.dirname(path))
if os.path.exists(path):
print(f" SKIP (exists): {path}")
return False
with open(path, 'w') as f:
f.write(content)
print(f" WROTE: {path}")
return True
def main():
written = 0
# 1. Workgroup specs (8 missing, stack already exists)
print("\n=== Workgroup primitives ===")
wg_dir = os.path.join(SPECS_DIR, "workgroup")
ensure_dir(wg_dir)
for name, spec in WORKGROUP_SPECS.items():
path = os.path.join(wg_dir, f"{name}.rs")
content = make_workgroup_spec(name, spec)
if write_spec(path, content):
written += 1
# 2. Buffer specs
print("\n=== Buffer operations ===")
buf_dir = os.path.join(SPECS_DIR, "buffer")
ensure_dir(buf_dir)
for name, spec in BUFFER_OPS.items():
path = os.path.join(buf_dir, f"{name}.rs")
content = make_simple_bytes_spec(
name, spec["id"], spec["desc"], spec["cpu"],
category="buffer",
sig_in=spec.get("sig_in", "Bytes"),
sig_out=spec.get("sig_out", "Bytes"),
)
if write_spec(path, content):
written += 1
# 3. Reduction specs
print("\n=== Reduction operations ===")
red_dir = os.path.join(SPECS_DIR, "reduction")
ensure_dir(red_dir)
for name, spec in REDUCTION_OPS.items():
path = os.path.join(red_dir, f"{name}.rs")
content = make_simple_bytes_spec(
name, spec["id"], spec["desc"], spec["cpu"],
category="reduction",
laws=spec.get("laws"),
)
if write_spec(path, content):
written += 1
# 4. Encode specs
print("\n=== Encode operations ===")
enc_dir = os.path.join(SPECS_DIR, "encode")
ensure_dir(enc_dir)
for name, spec in ENCODE_OPS.items():
path = os.path.join(enc_dir, f"{name}.rs")
content = make_simple_bytes_spec(
name, spec["id"], spec["desc"], spec["cpu"],
category="encode",
)
if write_spec(path, content):
written += 1
# 5. Sort/Scan/Stats specs
print("\n=== Sort/Scan/Stats operations ===")
for name, spec in MISC_OPS.items():
cat = spec.get("category", "misc")
cat_dir = os.path.join(SPECS_DIR, cat)
ensure_dir(cat_dir)
path = os.path.join(cat_dir, f"{name}.rs")
content = make_simple_bytes_spec(
name, spec["id"], spec["desc"], spec["cpu"],
category=cat,
)
if write_spec(path, content):
written += 1
# 6. Data movement specs
print("\n=== Data movement operations ===")
dm_dir = os.path.join(SPECS_DIR, "data_movement")
ensure_dir(dm_dir)
for name, spec in DATA_MOVEMENT_OPS.items():
path = os.path.join(dm_dir, f"{name}.rs")
content = make_simple_bytes_spec(
name, spec["id"], spec["desc"], spec["cpu"],
category="data_movement",
)
if write_spec(path, content):
written += 1
print(f"\n=== Done: wrote {written} new spec files ===")
if __name__ == "__main__":
main()