use vyre_foundation::ir::{BufferAccess, BufferDecl, DataType, Expr, Node, Program};
pub const OP_ID: &str = "vyre-primitives::parsing::line_splice_classify";
pub const BINDING_BYTES_IN: u32 = 0;
pub const BINDING_KEPT_MASK_OUT: u32 = 1;
const BACKSLASH: u32 = 0x5C; const LF: u32 = 0x0A; const CR: u32 = 0x0D;
#[must_use]
pub fn line_splice_classify(byte_count: u32) -> Program {
let i = Expr::var("i");
let load_u32 = |addr: Expr| -> Expr {
let word_idx = Expr::div(addr.clone(), Expr::u32(4));
let byte_in_word = Expr::rem(addr, Expr::u32(4));
let word = Expr::cast(DataType::U32, Expr::load("bytes_in", word_idx));
let shift = Expr::mul(byte_in_word, Expr::u32(8));
Expr::bitand(Expr::shr(word, shift), Expr::u32(0xFF))
};
let load = |off: i32| -> Expr {
match off {
0 => load_u32(i.clone()),
1 => Expr::select(
Expr::lt(Expr::add(i.clone(), Expr::u32(1)), Expr::u32(byte_count)),
load_u32(Expr::add(i.clone(), Expr::u32(1))),
Expr::u32(0),
),
2 => Expr::select(
Expr::lt(Expr::add(i.clone(), Expr::u32(2)), Expr::u32(byte_count)),
load_u32(Expr::add(i.clone(), Expr::u32(2))),
Expr::u32(0),
),
-1 => Expr::select(
Expr::ge(i.clone(), Expr::u32(1)),
load_u32(Expr::sub(i.clone(), Expr::u32(1))),
Expr::u32(0),
),
-2 => Expr::select(
Expr::ge(i.clone(), Expr::u32(2)),
load_u32(Expr::sub(i.clone(), Expr::u32(2))),
Expr::u32(0),
),
_ => unreachable!("line_splice_classify only uses offsets in [-2, 2]"),
}
};
let body = vec![
Node::let_bind("i", Expr::InvocationId { axis: 0 }),
Node::if_then(
Expr::lt(i.clone(), Expr::u32(byte_count)),
vec![
Node::let_bind("b_m2", load(-2)),
Node::let_bind("b_m1", load(-1)),
Node::let_bind("b_0", load(0)),
Node::let_bind("b_p1", load(1)),
Node::let_bind(
"case1",
Expr::and(
Expr::eq(Expr::var("b_0"), Expr::u32(BACKSLASH)),
Expr::eq(Expr::var("b_p1"), Expr::u32(LF)),
),
),
Node::let_bind(
"case2",
Expr::and(
Expr::eq(Expr::var("b_0"), Expr::u32(BACKSLASH)),
Expr::eq(Expr::var("b_p1"), Expr::u32(CR)),
),
),
Node::let_bind(
"case3",
Expr::and(
Expr::eq(Expr::var("b_m1"), Expr::u32(BACKSLASH)),
Expr::eq(Expr::var("b_0"), Expr::u32(LF)),
),
),
Node::let_bind(
"case4",
Expr::and(
Expr::eq(Expr::var("b_m1"), Expr::u32(BACKSLASH)),
Expr::eq(Expr::var("b_0"), Expr::u32(CR)),
),
),
Node::let_bind(
"case5",
Expr::and(
Expr::eq(Expr::var("b_m2"), Expr::u32(BACKSLASH)),
Expr::and(
Expr::eq(Expr::var("b_m1"), Expr::u32(CR)),
Expr::eq(Expr::var("b_0"), Expr::u32(LF)),
),
),
),
Node::let_bind(
"any_drop",
Expr::or(
Expr::or(
Expr::or(Expr::var("case1"), Expr::var("case2")),
Expr::or(Expr::var("case3"), Expr::var("case4")),
),
Expr::var("case5"),
),
),
Node::let_bind(
"kept",
Expr::select(Expr::var("any_drop"), Expr::u32(0), Expr::u32(1)),
),
Node::store("kept_mask_out", i.clone(), Expr::var("kept")),
],
),
];
Program::wrapped(
vec![
BufferDecl::storage(
"bytes_in",
BINDING_BYTES_IN,
BufferAccess::ReadOnly,
DataType::U32,
)
.with_count(byte_count.div_ceil(4).max(1)),
BufferDecl::storage(
"kept_mask_out",
BINDING_KEPT_MASK_OUT,
BufferAccess::ReadWrite,
DataType::U32,
)
.with_count(byte_count.max(1)),
],
[256, 1, 1],
body,
)
.with_entry_op_id(OP_ID)
}
#[must_use]
#[cfg(any(test, feature = "cpu-parity"))]
pub fn reference_line_splice_classify(source: &[u8]) -> Vec<u32> {
let mut out = Vec::new();
try_reference_line_splice_classify_into(source, &mut out)
.expect("Fix: replace expect with fallible API or document caller precondition; panic only on programmer error - line-splice classifier reference allocation failed");
out
}
#[cfg(any(test, feature = "cpu-parity"))]
pub fn reference_line_splice_classify_into(source: &[u8], out: &mut Vec<u32>) {
try_reference_line_splice_classify_into(source, out)
.expect("Fix: replace expect with fallible API or document caller precondition; panic only on programmer error - line-splice classifier reference allocation failed");
}
#[cfg(any(test, feature = "cpu-parity"))]
pub fn try_reference_line_splice_classify_into(
source: &[u8],
out: &mut Vec<u32>,
) -> Result<(), String> {
if source.len() > out.capacity() {
out.try_reserve_exact(source.len() - out.capacity())
.map_err(|err| {
format!(
"line-splice classifier reference could not reserve {} output words: {err}",
source.len()
)
})?;
}
out.clear();
for i in 0..source.len() {
let b_m2 = i.checked_sub(2).map(|j| source[j]).unwrap_or(0);
let b_m1 = i.checked_sub(1).map(|j| source[j]).unwrap_or(0);
let b_0 = source[i];
let b_p1 = source.get(i + 1).copied().unwrap_or(0);
let case1 = b_0 == b'\\' && b_p1 == b'\n';
let case2 = b_0 == b'\\' && b_p1 == b'\r';
let case3 = b_m1 == b'\\' && b_0 == b'\n';
let case4 = b_m1 == b'\\' && b_0 == b'\r';
let case5 = b_m2 == b'\\' && b_m1 == b'\r' && b_0 == b'\n';
let dropped = case1 || case2 || case3 || case4 || case5;
out.push(u32::from(!dropped));
}
Ok(())
}
#[cfg(feature = "inventory-registry")]
inventory::submit! {
crate::harness::OpEntry::new(
OP_ID,
|| line_splice_classify(256),
Some(|| {
let to_bytes = |w: &[u32]| crate::wire::pack_u32_slice(w);
let mut bytes = vec![120 | (120 << 8) | (120 << 16) | (120 << 24); 64];
bytes[0] = 97 | (92 << 8) | (10 << 16) | (98 << 24);
vec![vec![
to_bytes(&bytes), to_bytes(&[0; 256]), ]]
}),
Some(|| {
let to_bytes = |w: &[u32]| crate::wire::pack_u32_slice(w);
let mut expected = vec![1; 256];
expected[1] = 0;
expected[2] = 0;
vec![vec![to_bytes(&expected)]]
}),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_input_emits_empty_output() {
assert!(reference_line_splice_classify(b"").is_empty());
}
#[test]
fn classify_into_reuses_output_and_clears_stale_tail() {
let mut out = Vec::with_capacity(16);
out.extend_from_slice(&[u32::MAX; 16]);
let ptr = out.as_ptr();
try_reference_line_splice_classify_into(b"a\\\nB", &mut out).unwrap();
assert_eq!(out, vec![1, 0, 0, 1]);
assert_eq!(out.as_ptr(), ptr);
}
#[test]
fn no_backslashes_keeps_every_byte() {
let src = b"int main(void) { return 0; }";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1; src.len()]);
}
#[test]
fn lone_backslash_with_no_newline_is_kept() {
let src = b"a\\ b";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 1, 1, 1]);
}
#[test]
fn backslash_lf_pair_drops_both_bytes() {
let src = b"a\\\nb";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 0, 0, 1]);
}
#[test]
fn backslash_cr_lf_triple_drops_all_three() {
let src = b"a\\\r\nb";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 0, 0, 0, 1]);
}
#[test]
fn backslash_cr_alone_drops_both_bytes() {
let src = b"a\\\rb";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 0, 0, 1]);
}
#[test]
fn back_to_back_splices_each_drop_their_pair() {
let src = b"a\\\nb\\\nc";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 0, 0, 1, 0, 0, 1]);
}
#[test]
fn splice_at_start_of_buffer_is_handled() {
let src = b"\\\nx";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![0, 0, 1]);
}
#[test]
fn splice_at_end_of_buffer_is_handled() {
let src = b"x\\\n";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 0, 0]);
}
#[test]
fn lone_backslash_at_eof_is_kept() {
let src = b"x\\";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 1]);
}
#[test]
fn double_backslash_before_newline_only_drops_the_pair() {
let src = b"a\\\\\nb";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 1, 0, 0, 1]);
}
#[test]
fn cr_alone_without_backslash_is_kept() {
let src = b"a\rb";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 1, 1]);
}
#[test]
fn lf_alone_without_backslash_is_kept() {
let src = b"a\nb";
let mask = reference_line_splice_classify(src);
assert_eq!(mask, vec![1, 1, 1]);
}
#[test]
fn op_id_is_canonical_and_stable() {
assert_eq!(OP_ID, "vyre-primitives::parsing::line_splice_classify");
}
#[test]
fn binding_indices_are_canonical_and_stable() {
assert_eq!(BINDING_BYTES_IN, 0);
assert_eq!(BINDING_KEPT_MASK_OUT, 1);
}
#[test]
fn build_program_returns_well_formed_program() {
let p = line_splice_classify(64);
assert_eq!(p.buffers().len(), 2, "bytes_in + kept_mask_out");
assert_eq!(p.workgroup_size(), [256, 1, 1]);
}
#[test]
fn build_program_is_deterministic_across_calls() {
let p1 = line_splice_classify(128);
let p2 = line_splice_classify(128);
assert_eq!(p1.buffers().len(), p2.buffers().len());
assert_eq!(p1.workgroup_size(), p2.workgroup_size());
}
#[test]
fn cpu_reference_is_deterministic() {
let src = b"a\\\nb\\\r\nc\\\rd";
let m1 = reference_line_splice_classify(src);
let m2 = reference_line_splice_classify(src);
assert_eq!(m1, m2);
}
#[test]
fn classify_into_reuses_output_capacity() {
let src = b"a\\\nb";
let mut out = Vec::with_capacity(64);
let cap = out.capacity();
reference_line_splice_classify_into(src, &mut out);
assert_eq!(out, vec![1, 0, 0, 1]);
assert_eq!(out.capacity(), cap);
}
}