use crate::parsing::c::parse::gnu_builtins::gpu_builtin_hash_table_words;
use crate::parsing::c::preprocess::gpu_define_parse::gpu_define_parse;
use crate::parsing::c::preprocess::gpu_if_expression::gpu_if_expression;
use crate::parsing::c::preprocess::gpu_ifdef_value::gpu_ifdef_value;
use crate::parsing::c::preprocess::gpu_include_parse::gpu_include_parse;
use crate::parsing::c::preprocess::gpu_undef_parse::gpu_undef_parse;
use vyre::execution_plan::fusion::fuse_programs;
use super::buffers::{
bucket_pow2, pack_u32_words_into, pad_to_u32_words_into, unpack_u32_words_exact_into,
};
use super::tokenization::reject_invalid_if_expression_values;
use super::{ClassifiedTokens, GpuDispatcher};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DirectivePayload {
None,
Define {
name: Vec<u8>,
name_start: u32,
name_len: u32,
args: Vec<u8>,
args_start: u32,
args_len: u32,
body: Vec<u8>,
body_start: u32,
body_len: u32,
is_function_like: bool,
},
Undef {
name: Vec<u8>,
},
Include {
path: Vec<u8>,
is_system: bool,
is_next: bool,
},
Ifdef {
value: u32,
negated: bool,
},
IfExpr {
value: u32,
is_elif: bool,
},
Else,
Endif,
Other,
}
#[derive(Default)]
pub(super) struct DirectiveExtractionScratch {
starts_b: Vec<u8>,
lens_b: Vec<u8>,
kinds_b: Vec<u8>,
src_pad: Vec<u8>,
zero_init: Vec<u8>,
macro_names: Vec<u8>,
macro_offsets_b: Vec<u8>,
macro_values_b: Vec<u8>,
parse_out: Vec<Vec<u8>>,
condition_out: Vec<Vec<u8>>,
name_s: Vec<u32>,
name_l: Vec<u32>,
args_s: Vec<u32>,
args_l: Vec<u32>,
body_s: Vec<u32>,
body_l: Vec<u32>,
is_func: Vec<u32>,
path_s: Vec<u32>,
path_l: Vec<u32>,
is_system: Vec<u32>,
undef_name_s: Vec<u32>,
undef_name_l: Vec<u32>,
ifdef_values: Vec<u32>,
if_values: Vec<u32>,
}
impl DirectiveExtractionScratch {
fn prepare_zero_init(&mut self, byte_len: usize) -> Result<(), String> {
self.zero_init.clear();
self.zero_init.try_reserve_exact(byte_len).map_err(|error| {
format!(
"gpu directive parse zero-init staging could not reserve {byte_len} bytes: {error:?}. Fix: shard preprocessing before directive payload extraction."
)
})?;
self.zero_init.resize(byte_len, 0);
Ok(())
}
}
fn directive_word_bytes(word_count: usize, label: &'static str) -> Result<usize, String> {
word_count.checked_mul(4).ok_or_else(|| {
format!(
"gpu directive parse {label} word count {word_count} overflows host byte sizing. Fix: shard preprocessing before directive payload extraction."
)
})
}
fn directive_padded_u32_bytes(byte_len: usize, label: &'static str) -> Result<usize, String> {
byte_len
.checked_add(3)
.and_then(|value| value.checked_div(4))
.and_then(|words| words.checked_mul(4))
.map(|bytes| bytes.max(4))
.ok_or_else(|| {
format!(
"gpu directive parse {label} byte length {byte_len} overflows u32 padding. Fix: shard preprocessing before directive payload extraction."
)
})
}
fn reserve_directive_vec<T>(
out: &mut Vec<T>,
additional: usize,
label: &'static str,
) -> Result<(), String> {
out.try_reserve_exact(additional).map_err(|error| {
format!(
"gpu directive parse could not reserve {additional} {label}: {error:?}. Fix: shard preprocessing before directive payload extraction."
)
})
}
pub fn gpu_extract_directive_payloads(
dispatcher: &dyn GpuDispatcher,
classified: &ClassifiedTokens,
defined_macros: &[&[u8]],
) -> Result<Vec<DirectivePayload>, String> {
let mut scratch = DirectiveExtractionScratch::default();
gpu_extract_directive_payloads_impl(dispatcher, classified, defined_macros, true, &mut scratch)
}
pub(super) fn gpu_extract_directive_payloads_for_driver_with_scratch(
dispatcher: &dyn GpuDispatcher,
classified: &ClassifiedTokens,
scratch: &mut DirectiveExtractionScratch,
) -> Result<Vec<DirectivePayload>, String> {
gpu_extract_directive_payloads_impl(dispatcher, classified, &[], false, scratch)
}
fn gpu_extract_directive_payloads_impl(
dispatcher: &dyn GpuDispatcher,
classified: &ClassifiedTokens,
defined_macros: &[&[u8]],
evaluate_condition_values: bool,
scratch: &mut DirectiveExtractionScratch,
) -> Result<Vec<DirectivePayload>, String> {
use crate::parsing::c::lex::tokens::{
TOK_PP_DEFINE, TOK_PP_ELIF, TOK_PP_ELSE, TOK_PP_ENDIF, TOK_PP_IF, TOK_PP_IFDEF,
TOK_PP_IFNDEF, TOK_PP_INCLUDE, TOK_PP_INCLUDE_NEXT, TOK_PP_UNDEF,
};
let n = classified.tok_types.len();
if n == 0 {
return Ok(Vec::new());
}
if !classified.has_directives() {
if !evaluate_condition_values {
return Ok(Vec::new());
}
return Ok(vec![DirectivePayload::None; n]);
}
let n_bucket = bucket_pow2(n.max(1), 64);
let n_pad = n_bucket;
let source_len = u32::try_from(classified.source.len()).map_err(|_| {
format!(
"gpu directive parse source length {} exceeds u32 address space. Fix: shard preprocessing before directive payload extraction.",
classified.source.len()
)
})?;
pack_u32_words_into(&mut scratch.starts_b, &classified.tok_starts, n_pad)?;
pack_u32_words_into(&mut scratch.lens_b, &classified.tok_lens, n_pad)?;
pack_u32_words_into(&mut scratch.kinds_b, &classified.directive_kinds, n_pad)?;
pad_to_u32_words_into(&mut scratch.src_pad, &classified.source)?;
let dp = gpu_define_parse(n_bucket as u32, source_len);
let ip = gpu_include_parse(n_bucket as u32, source_len);
let up = gpu_undef_parse(n_bucket as u32, source_len);
let parse_fused = fuse_programs(&[dp, ip, up])
.map_err(|e| format!("fuse define+include+undef parse: {e}"))?
.with_entry_op_id("vyre-libs::parsing::c::preprocess::define_include_undef_parse_fused");
let zero_init_bytes = directive_word_bytes(n_pad, "zero-init")?;
scratch.prepare_zero_init(zero_init_bytes)?;
let parse_inputs = [
scratch.starts_b.as_slice(),
scratch.lens_b.as_slice(),
scratch.kinds_b.as_slice(),
scratch.src_pad.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
scratch.zero_init.as_slice(),
];
dispatcher
.dispatch_borrowed_into(&parse_fused, &parse_inputs, &mut scratch.parse_out)
.map_err(|e| format!("gpu_define+include+undef_parse fused: {e}"))?;
if scratch.parse_out.len() != 12 {
return Err(format!(
"gpu_define+include+undef_parse fused: expected exactly 12 outputs, got {}. Fix: backend must return the declared directive parse tables and no extras.",
scratch.parse_out.len()
));
}
unpack_u32_words_exact_into(
&scratch.parse_out[0],
n_pad,
"define name_start",
&mut scratch.name_s,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[1],
n_pad,
"define name_len",
&mut scratch.name_l,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[2],
n_pad,
"define args_start",
&mut scratch.args_s,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[3],
n_pad,
"define args_len",
&mut scratch.args_l,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[4],
n_pad,
"define body_start",
&mut scratch.body_s,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[5],
n_pad,
"define body_len",
&mut scratch.body_l,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[6],
n_pad,
"define is_function_like",
&mut scratch.is_func,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[7],
n_pad,
"include path_start",
&mut scratch.path_s,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[8],
n_pad,
"include path_len",
&mut scratch.path_l,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[9],
n_pad,
"include is_system",
&mut scratch.is_system,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[10],
n_pad,
"undef name_start",
&mut scratch.undef_name_s,
)?;
unpack_u32_words_exact_into(
&scratch.parse_out[11],
n_pad,
"undef name_len",
&mut scratch.undef_name_l,
)?;
if evaluate_condition_values {
let macro_name_bytes =
defined_macros
.iter()
.try_fold(0usize, |total, name| {
total.checked_add(name.len()).ok_or_else(|| {
"gpu directive parse macro-name byte total overflows usize. Fix: shard preprocessing before directive payload extraction.".to_string()
})
})?;
scratch.macro_names.clear();
reserve_directive_vec(
&mut scratch.macro_names,
macro_name_bytes,
"macro-name bytes",
)?;
let macro_offset_slots = defined_macros.len().checked_add(1).ok_or_else(|| {
"gpu directive parse macro-offset slot count overflows usize. Fix: shard preprocessing before directive payload extraction.".to_string()
})?;
let mut macro_offsets: Vec<u32> = Vec::new();
reserve_directive_vec(&mut macro_offsets, macro_offset_slots, "macro-offset slots")?;
macro_offsets.push(0);
for name in defined_macros {
scratch.macro_names.extend_from_slice(name);
macro_offsets.push(u32::try_from(scratch.macro_names.len()).map_err(|_| {
format!(
"gpu directive parse macro-name byte offset {} exceeds u32 address space. Fix: shard preprocessing before directive payload extraction.",
scratch.macro_names.len()
)
})?);
}
let padded = directive_padded_u32_bytes(scratch.macro_names.len(), "macro names")?;
let macro_name_padding = padded
.checked_sub(scratch.macro_names.len())
.ok_or_else(|| {
"gpu directive parse macro-name padded length underflowed. Fix: repair directive padding sizing.".to_string()
})?;
reserve_directive_vec(
&mut scratch.macro_names,
macro_name_padding,
"macro-name padding bytes",
)?;
scratch.macro_names.resize(padded, 0);
pack_u32_words_into(
&mut scratch.macro_offsets_b,
¯o_offsets,
macro_offsets.len(),
)?;
let count = defined_macros.len().max(1);
scratch.macro_values_b.clear();
let builtin_hashes = gpu_builtin_hash_table_words();
let macro_value_words = count.checked_add(builtin_hashes.len()).ok_or_else(|| {
"gpu directive parse macro-value word count overflows usize. Fix: shard preprocessing before directive payload extraction.".to_string()
})?;
let macro_value_bytes = directive_word_bytes(macro_value_words, "macro values")?;
reserve_directive_vec(
&mut scratch.macro_values_b,
macro_value_bytes,
"macro-value bytes",
)?;
vyre_primitives::wire::append_u32_slice_le_bytes(
&builtin_hashes,
&mut scratch.macro_values_b,
);
for idx in 0..count {
let value = u32::from(idx < defined_macros.len());
scratch
.macro_values_b
.extend_from_slice(&value.to_le_bytes());
}
let iv = gpu_ifdef_value(n_bucket as u32, source_len);
let iv_inputs = [
scratch.starts_b.as_slice(),
scratch.lens_b.as_slice(),
scratch.kinds_b.as_slice(),
scratch.src_pad.as_slice(),
scratch.macro_names.as_slice(),
scratch.macro_offsets_b.as_slice(),
scratch.zero_init.as_slice(),
];
dispatcher
.dispatch_borrowed_into(&iv, &iv_inputs, &mut scratch.condition_out)
.map_err(|e| format!("gpu_ifdef_value: {e}"))?;
if scratch.condition_out.len() != 1 {
return Err(format!(
"gpu_ifdef_value: expected exactly 1 output, got {}. Fix: backend must return only the ifdef values table.",
scratch.condition_out.len()
));
}
unpack_u32_words_exact_into(
&scratch.condition_out[0],
n_pad,
"ifdef values",
&mut scratch.ifdef_values,
)?;
let ie = gpu_if_expression(n_bucket as u32, source_len);
let ie_inputs = [
scratch.starts_b.as_slice(),
scratch.lens_b.as_slice(),
scratch.kinds_b.as_slice(),
scratch.src_pad.as_slice(),
scratch.macro_names.as_slice(),
scratch.macro_offsets_b.as_slice(),
scratch.macro_values_b.as_slice(),
scratch.zero_init.as_slice(),
];
dispatcher
.dispatch_borrowed_into(&ie, &ie_inputs, &mut scratch.condition_out)
.map_err(|e| format!("gpu_if_expression: {e}"))?;
if scratch.condition_out.len() != 1 {
return Err(format!(
"gpu_if_expression: expected exactly 1 output, got {}. Fix: backend must return only the #if expression values table.",
scratch.condition_out.len()
));
}
unpack_u32_words_exact_into(
&scratch.condition_out[0],
n_pad,
"if expression values",
&mut scratch.if_values,
)?;
reject_invalid_if_expression_values(&scratch.if_values, classified)?;
} else {
scratch.ifdef_values.clear();
scratch.ifdef_values.resize(n, 0);
scratch.if_values.clear();
scratch.if_values.resize(n, 0);
};
let mut out = Vec::new();
reserve_directive_vec(&mut out, n, "directive payload slots")?;
for i in 0..n {
let kind = classified.directive_kinds[i];
let payload = match kind {
0 => DirectivePayload::None,
k if k == TOK_PP_DEFINE => {
let nb = scratch.name_s[i] as usize;
let nl = scratch.name_l[i] as usize;
let ab = scratch.args_s[i] as usize;
let al = scratch.args_l[i] as usize;
let bb = scratch.body_s[i] as usize;
let bl = scratch.body_l[i] as usize;
let name = payload_span_bytes(&classified.source, nb, nl, i, "define name")?;
let args = if al == 0 {
Vec::new()
} else {
payload_span_bytes(&classified.source, ab, al, i, "define args")?
};
let body = if bl == 0 {
Vec::new()
} else {
payload_span_bytes(&classified.source, bb, bl, i, "define body")?
};
DirectivePayload::Define {
name,
name_start: scratch.name_s[i],
name_len: scratch.name_l[i],
args,
args_start: scratch.args_s[i],
args_len: scratch.args_l[i],
body,
body_start: scratch.body_s[i],
body_len: scratch.body_l[i],
is_function_like: scratch.is_func[i] == 1,
}
}
k if k == TOK_PP_UNDEF => {
let nb = scratch.undef_name_s[i] as usize;
let nl = scratch.undef_name_l[i] as usize;
if nl == 0 {
DirectivePayload::Undef { name: Vec::new() }
} else {
DirectivePayload::Undef {
name: payload_span_bytes(&classified.source, nb, nl, i, "undef name")?,
}
}
}
k if k == TOK_PP_INCLUDE || k == TOK_PP_INCLUDE_NEXT => {
let pb = scratch.path_s[i] as usize;
let pl = scratch.path_l[i] as usize;
if pl == 0 {
DirectivePayload::Other
} else {
DirectivePayload::Include {
path: payload_span_bytes(&classified.source, pb, pl, i, "include path")?,
is_system: scratch.is_system[i] == 1,
is_next: k == TOK_PP_INCLUDE_NEXT,
}
}
}
k if k == TOK_PP_IFDEF => DirectivePayload::Ifdef {
value: scratch.ifdef_values[i],
negated: false,
},
k if k == TOK_PP_IFNDEF => DirectivePayload::Ifdef {
value: scratch.ifdef_values[i],
negated: true,
},
k if k == TOK_PP_IF => DirectivePayload::IfExpr {
value: scratch.if_values[i],
is_elif: false,
},
k if k == TOK_PP_ELIF => DirectivePayload::IfExpr {
value: scratch.if_values[i],
is_elif: true,
},
k if k == TOK_PP_ELSE => DirectivePayload::Else,
k if k == TOK_PP_ENDIF => DirectivePayload::Endif,
_ => DirectivePayload::Other,
};
out.push(payload);
}
Ok(out)
}
fn payload_span_bytes(
source: &[u8],
start: usize,
len: usize,
token_index: usize,
label: &str,
) -> Result<Vec<u8>, String> {
let end = start.checked_add(len).ok_or_else(|| {
format!(
"vyre-libs::gpu_pipeline: {label} span at token {token_index} overflows usize. Fix: repair GPU directive payload span emission."
)
})?;
source
.get(start..end)
.map(|bytes| bytes.to_vec())
.ok_or_else(|| {
format!(
"vyre-libs::gpu_pipeline: {label} span {start}..{end} at token {token_index} is outside source length {}. Fix: repair GPU directive payload span emission.",
source.len()
)
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use vyre::ir::Program;
struct NoDispatch;
impl GpuDispatcher for NoDispatch {
fn dispatch(
&self,
_program: &Program,
_inputs: &[Vec<u8>],
) -> Result<Vec<Vec<u8>>, String> {
Err("directive-free fixture must not dispatch".to_string())
}
}
fn directive_free_classified() -> ClassifiedTokens {
ClassifiedTokens {
tok_types: vec![1, 1, 1],
tok_starts: vec![0, 4, 5],
tok_lens: vec![3, 1, 1],
directive_kinds: vec![0, 0, 0],
directive_count: 0,
source: Arc::from(b"int x".as_slice()),
}
}
#[test]
fn driver_payload_extraction_uses_empty_slice_for_directive_free_inputs() {
let classified = directive_free_classified();
let mut scratch = DirectiveExtractionScratch::default();
let payloads = gpu_extract_directive_payloads_for_driver_with_scratch(
&NoDispatch,
&classified,
&mut scratch,
)
.expect("Fix: directive-free production extraction must not dispatch");
assert!(
payloads.is_empty(),
"production driver should use empty payload slices for directive-free inputs"
);
}
#[test]
fn compatibility_payload_extraction_preserves_per_token_none_contract() {
let classified = directive_free_classified();
let payloads = gpu_extract_directive_payloads(&NoDispatch, &classified, &[])
.expect("Fix: compatibility extraction must not dispatch on directive-free inputs");
assert_eq!(
payloads,
vec![
DirectivePayload::None,
DirectivePayload::None,
DirectivePayload::None
]
);
}
#[test]
fn directive_staging_sizing_is_checked_and_fallible() {
assert_eq!(
directive_word_bytes(3, "test").expect("Fix: small directive table should fit"),
12
);
assert!(
directive_word_bytes(usize::MAX, "test").is_err(),
"Fix: directive word-to-byte sizing must reject usize overflow"
);
assert_eq!(
directive_padded_u32_bytes(0, "test")
.expect("Fix: empty macro-name table should pad to one u32"),
4
);
assert_eq!(
directive_padded_u32_bytes(5, "test")
.expect("Fix: small macro-name table should pad to u32 bytes"),
8
);
assert!(
directive_padded_u32_bytes(usize::MAX, "test").is_err(),
"Fix: directive padding must reject usize overflow"
);
let mut scratch = DirectiveExtractionScratch::default();
scratch
.prepare_zero_init(8)
.expect("Fix: small directive zero staging should fit");
assert_eq!(scratch.zero_init, vec![0; 8]);
}
}