#![cfg(test)]
mod common;
use common::{bytes_u32, u32_bytes, with_live_backend};
use vyre::DispatchConfig;
use vyre_primitives::bitset::popcount::{bitset_popcount, cpu_ref as popcount_cpu};
use vyre_primitives::decode::rle_segment_lengths::{rle_segment_lengths, rle_segment_lengths_cpu};
use vyre_primitives::graph::program_graph::ProgramGraphShape;
use vyre_primitives::parsing::line_splice_classify::{
line_splice_classify, reference_line_splice_classify,
};
use vyre_primitives::parsing::planar_rewrite::{
planar_rewrite_schedule, reference_planar_rewrite_schedule,
};
use vyre_primitives::predicate::edge::{cpu_ref as edge_cpu, edge};
use vyre_primitives::predicate::edge_kind;
fn run_popcount(input: &[u32]) -> Vec<u32> {
let words = input.len() as u32;
let program = bitset_popcount("input", "count_words", words);
let inputs: Vec<Vec<u8>> = vec![u32_bytes(input), vec![0u8; words as usize * 4]];
let mut config = DispatchConfig::default();
let workgroup_x = 256u32;
let grid_x = ((words + workgroup_x - 1) / workgroup_x).max(1);
config.grid_override = Some([grid_x, 1, 1]);
let outputs = with_live_backend("bitset popcount batch", |backend| {
backend
.dispatch(&program, &inputs, &config)
.unwrap_or_else(|error| panic!("Fix: CUDA bitset-popcount dispatch failed: {error}"))
});
let mut out = bytes_u32(&outputs[0]);
out.truncate(words as usize);
out
}
#[test]
fn cuda_bitset_popcount_basic() {
let input = vec![0xFFFF_FFFFu32, 0u32, 0b1010_1010_u32, 0xAA55u32];
let cpu = popcount_cpu(&input);
let gpu = run_popcount(&input);
assert_eq!(gpu, cpu);
assert_eq!(gpu, vec![32, 0, 4, 8]);
}
#[test]
fn cuda_bitset_popcount_all_zero() {
let input = vec![0u32; 16];
let cpu = popcount_cpu(&input);
let gpu = run_popcount(&input);
assert_eq!(gpu, cpu);
assert_eq!(gpu, vec![0u32; 16]);
}
fn run_edge(
node_count: u32,
edge_offsets: &[u32],
edge_targets: &[u32],
edge_kind_mask: &[u32],
frontier: &[u32],
allow_mask: u32,
) -> Vec<u32> {
let words = node_count.div_ceil(32).max(1);
let pg_nodes = vec![0u32; node_count as usize];
let pg_node_tags = vec![0u32; node_count as usize];
let edge_count = edge_targets.len() as u32;
let program = edge(
ProgramGraphShape::new(node_count, edge_count.max(1)),
"frontier_in",
"frontier_out",
allow_mask,
);
let inputs: Vec<Vec<u8>> = vec![
u32_bytes(&pg_nodes),
u32_bytes(edge_offsets),
u32_bytes(edge_targets),
u32_bytes(edge_kind_mask),
u32_bytes(&pg_node_tags),
u32_bytes(frontier),
vec![0u8; words as usize * 4],
];
let mut config = DispatchConfig::default();
config.grid_override = Some([node_count.max(1), 1, 1]);
let outputs = with_live_backend("predicate edge batch", |backend| {
backend
.dispatch(&program, &inputs, &config)
.unwrap_or_else(|error| panic!("Fix: CUDA predicate-edge dispatch failed: {error}"))
});
let mut out = bytes_u32(&outputs[0]);
out.truncate(words as usize);
out
}
#[test]
fn cuda_predicate_edge_one_step() {
let edge_offsets = vec![0u32, 1, 1];
let edge_targets = vec![1u32];
let edge_kind_mask = vec![edge_kind::ASSIGNMENT];
let frontier = vec![0b01u32];
let allow = edge_kind::ASSIGNMENT;
let cpu = edge_cpu(
2,
&edge_offsets,
&edge_targets,
&edge_kind_mask,
&frontier,
allow,
);
let gpu = run_edge(
2,
&edge_offsets,
&edge_targets,
&edge_kind_mask,
&frontier,
allow,
);
assert_eq!(gpu, cpu);
assert_eq!(gpu, vec![0b10u32]);
}
#[test]
fn cuda_predicate_edge_kind_mask_skips() {
let edge_offsets = vec![0u32, 1, 1];
let edge_targets = vec![1u32];
let edge_kind_mask = vec![edge_kind::ASSIGNMENT];
let frontier = vec![0b01u32];
let allow = edge_kind::CALL_ARG;
let cpu = edge_cpu(
2,
&edge_offsets,
&edge_targets,
&edge_kind_mask,
&frontier,
allow,
);
let gpu = run_edge(
2,
&edge_offsets,
&edge_targets,
&edge_kind_mask,
&frontier,
allow,
);
assert_eq!(gpu, cpu);
assert_eq!(gpu, vec![0u32]);
}
fn pack_bytes(bytes: &[u8]) -> Vec<u32> {
let mut padded = bytes.to_vec();
while padded.len() % 4 != 0 {
padded.push(0);
}
padded
.chunks_exact(4)
.map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
.collect()
}
fn run_line_splice(source: &[u8]) -> Vec<u32> {
let byte_count = source.len() as u32;
let words = pack_bytes(source);
let program = line_splice_classify(byte_count);
let inputs: Vec<Vec<u8>> = vec![u32_bytes(&words), vec![0u8; byte_count.max(1) as usize * 4]];
let mut config = DispatchConfig::default();
let workgroup_x = 256u32;
let grid_x = ((byte_count + workgroup_x - 1) / workgroup_x).max(1);
config.grid_override = Some([grid_x, 1, 1]);
let outputs = with_live_backend("line splice classify", |backend| {
backend
.dispatch(&program, &inputs, &config)
.unwrap_or_else(|error| {
panic!("Fix: CUDA line-splice classify dispatch failed: {error}")
})
});
let mut out = bytes_u32(&outputs[0]);
out.truncate(byte_count as usize);
out
}
#[test]
fn cuda_line_splice_classify_keeps_plain_text() {
let source = b"abcd";
let cpu = reference_line_splice_classify(source);
let gpu = run_line_splice(source);
assert_eq!(gpu, cpu);
assert_eq!(gpu, vec![1u32, 1, 1, 1]);
}
#[test]
fn cuda_line_splice_classify_drops_backslash_lf() {
let source = b"ab\\\ncd";
let cpu = reference_line_splice_classify(source);
let gpu = run_line_splice(source);
assert_eq!(gpu, cpu);
}
fn run_planar(candidates: &[u32], h: u32, w: u32, k: u32) -> Vec<u32> {
let cells = (h * w) as usize;
let program = planar_rewrite_schedule("c", "ch", h, w, k);
let inputs: Vec<Vec<u8>> = vec![u32_bytes(candidates), vec![0u8; cells * 4]];
let mut config = DispatchConfig::default();
let workgroup_x = 256u32;
let grid_x = ((cells as u32 + workgroup_x - 1) / workgroup_x).max(1);
config.grid_override = Some([grid_x, 1, 1]);
let outputs = with_live_backend("planar rewrite schedule", |backend| {
backend
.dispatch(&program, &inputs, &config)
.unwrap_or_else(|error| panic!("Fix: CUDA planar-rewrite dispatch failed: {error}"))
});
let mut out = bytes_u32(&outputs[0]);
out.truncate(cells);
out
}
#[test]
fn cuda_planar_rewrite_schedule_no_candidates() {
let h = 3u32;
let w = 3u32;
let k = 1u32;
let candidates = vec![0u32; (h * w) as usize];
let cpu = reference_planar_rewrite_schedule(&candidates, h, w, k);
let gpu = run_planar(&candidates, h, w, k);
assert_eq!(gpu, cpu);
assert_eq!(gpu, vec![0u32; (h * w) as usize]);
}
#[test]
fn cuda_planar_rewrite_schedule_isolated_candidates() {
let h = 4u32;
let w = 4u32;
let k = 1u32;
let mut candidates = vec![0u32; (h * w) as usize];
candidates[0] = 1;
candidates[10] = 1;
let cpu = reference_planar_rewrite_schedule(&candidates, h, w, k);
let gpu = run_planar(&candidates, h, w, k);
assert_eq!(gpu, cpu);
}
fn run_rle(segments: &[u32]) -> (Vec<u32>, Vec<u32>) {
let count = segments.len() as u32;
let program = rle_segment_lengths(count);
let inputs: Vec<Vec<u8>> = vec![
u32_bytes(segments),
vec![0u8; count as usize * 4],
vec![0u8; count as usize * 4],
];
let mut config = DispatchConfig::default();
let workgroup_x = 256u32;
let grid_x = ((count + workgroup_x - 1) / workgroup_x).max(1);
config.grid_override = Some([grid_x, 1, 1]);
let outputs = with_live_backend("RLE segment lengths", |backend| {
backend
.dispatch(&program, &inputs, &config)
.unwrap_or_else(|error| panic!("Fix: CUDA RLE segment-length dispatch failed: {error}"))
});
let mut lengths = bytes_u32(&outputs[0]);
let mut values = bytes_u32(&outputs[1]);
lengths.truncate(count as usize);
values.truncate(count as usize);
(lengths, values)
}
#[test]
fn cuda_rle_segment_lengths_basic() {
let segments = vec![(5u32 << 8) | 0xAA, (10u32 << 8) | 0x55];
let (cpu_lengths, cpu_values) = rle_segment_lengths_cpu(&segments);
let (gpu_lengths, gpu_values) = run_rle(&segments);
assert_eq!(gpu_lengths, cpu_lengths);
assert_eq!(gpu_values, cpu_values);
assert_eq!(gpu_lengths, vec![5, 10]);
assert_eq!(gpu_values, vec![0xAA, 0x55]);
}
#[test]
fn cuda_rle_segment_lengths_zero_length() {
let segments = vec![0u32, (1u32 << 8) | 0xFF];
let (cpu_lengths, cpu_values) = rle_segment_lengths_cpu(&segments);
let (gpu_lengths, gpu_values) = run_rle(&segments);
assert_eq!(gpu_lengths, cpu_lengths);
assert_eq!(gpu_values, cpu_values);
assert_eq!(gpu_lengths, vec![0, 1]);
assert_eq!(gpu_values, vec![0, 0xFF]);
}