use rustc_hash::FxHashSet;
use std::sync::Arc;
use vyre::ir::{BufferDecl, Node, Program};
use vyre_foundation::ir::model::expr::GeneratorRef;
use super::core::{c11_lexer, c11_lexer_regular};
use super::digraphs::c11_lex_digraphs;
use crate::region::wrap;
const OP_ID: &str = "vyre-libs::parsing::c11_lex_single_pass";
const REGULAR_OP_ID: &str = "vyre-libs::parsing::c11_lex_regular_single_pass";
#[must_use]
pub fn c11_lex_single_pass(
haystack: &str,
out_tok_types: &str,
out_tok_starts: &str,
out_tok_lens: &str,
out_counts: &str,
haystack_len: u32,
digraph_capacity: u32,
) -> Program {
let lex_program = c11_lexer(
haystack,
out_tok_types,
out_tok_starts,
out_tok_lens,
out_counts,
haystack_len,
);
let digraph_program = c11_lex_digraphs(
out_tok_types,
out_tok_starts,
out_tok_lens,
digraph_capacity,
);
let workgroup_size = lex_program.workgroup_size();
let mut buffers: Vec<BufferDecl> = lex_program.buffers().to_vec();
let mut seen_names: FxHashSet<Arc<str>> = buffers
.iter()
.map(|buffer| Arc::clone(&buffer.name))
.collect();
for buffer in digraph_program.buffers() {
if seen_names.insert(Arc::clone(&buffer.name)) {
buffers.push(buffer.clone());
}
}
let combined_body = vec![
Node::Block(lex_program.into_entry_vec()),
Node::Block(digraph_program.into_entry_vec()),
];
Program::wrapped(
buffers,
workgroup_size,
vec![wrap(
OP_ID,
combined_body,
Some(GeneratorRef {
name: OP_ID.to_string(),
}),
)],
)
}
#[must_use]
pub fn c11_lex_regular_single_pass(
haystack: &str,
out_tok_types: &str,
out_tok_starts: &str,
out_tok_lens: &str,
out_counts: &str,
haystack_len: u32,
digraph_capacity: u32,
) -> Program {
let lex_program = c11_lexer_regular(
haystack,
out_tok_types,
out_tok_starts,
out_tok_lens,
out_counts,
haystack_len,
);
let digraph_program = c11_lex_digraphs(
out_tok_types,
out_tok_starts,
out_tok_lens,
digraph_capacity,
);
let workgroup_size = lex_program.workgroup_size();
let mut buffers: Vec<BufferDecl> = lex_program.buffers().to_vec();
let mut seen_names: FxHashSet<Arc<str>> = buffers
.iter()
.map(|buffer| Arc::clone(&buffer.name))
.collect();
for buffer in digraph_program.buffers() {
if seen_names.insert(Arc::clone(&buffer.name)) {
buffers.push(buffer.clone());
}
}
let combined_body = vec![
Node::Block(lex_program.into_entry_vec()),
Node::Block(digraph_program.into_entry_vec()),
];
Program::wrapped(
buffers,
workgroup_size,
vec![wrap(
REGULAR_OP_ID,
combined_body,
Some(GeneratorRef {
name: REGULAR_OP_ID.to_string(),
}),
)],
)
}
inventory::submit! {
crate::harness::OpEntry {
id: OP_ID,
build: || c11_lex_single_pass(
"haystack",
"tok_types",
"tok_starts",
"tok_lens",
"tok_counts",
64,
64,
),
test_inputs: Some(single_pass_inputs),
expected_output: Some(single_pass_expected),
category: Some("parsing"),
}
}
fn single_pass_inputs() -> Vec<Vec<Vec<u8>>> {
vec![vec![
vec![b'a'; 64 * 4],
vec![0u8; 64 * 4],
vec![0u8; 64 * 4],
vec![0u8; 64 * 4],
vec![0u8; 4],
]]
}
fn single_pass_expected() -> Vec<Vec<Vec<u8>>> {
let mut out_tok_types = vec![0u8; 64 * 4];
out_tok_types[0..4]
.copy_from_slice(&crate::parsing::c::lex::tokens::TOK_IDENTIFIER.to_le_bytes());
let out_tok_starts = vec![0u8; 64 * 4];
let mut out_tok_lens = vec![0u8; 64 * 4];
out_tok_lens[0..4].copy_from_slice(&64u32.to_le_bytes());
let mut out_counts = vec![0u8; 4];
out_counts.copy_from_slice(&1u32.to_le_bytes());
vec![vec![
out_tok_types,
out_tok_starts,
out_tok_lens,
out_counts,
]]
}
#[cfg(test)]
mod tests {
use super::*;
use vyre::ir::Node;
#[test]
fn c11_lex_single_pass_matches_two_program_sequence() {
let fused = c11_lex_single_pass(
"haystack",
"tok_types",
"tok_starts",
"tok_lens",
"tok_counts",
64,
64,
);
let lex = c11_lexer(
"haystack",
"tok_types",
"tok_starts",
"tok_lens",
"tok_counts",
64,
);
let digraphs = c11_lex_digraphs("tok_types", "tok_starts", "tok_lens", 64);
assert_eq!(
fused.entry().len(),
1,
"fused must have exactly one wrapping Region at the entry top"
);
let Node::Region {
generator, body, ..
} = &fused.entry()[0]
else {
panic!("fused entry's top-level must be a Region");
};
assert_eq!(generator.as_str(), OP_ID);
let body_len: usize = lex.entry().len().saturating_add(digraphs.entry().len());
assert_eq!(body.as_ref().len(), body_len);
let fused_names: Vec<String> = fused
.buffers()
.iter()
.map(|b| b.name.as_ref().to_string())
.collect();
for buf in lex.buffers() {
assert!(
fused_names.contains(&buf.name.as_ref().to_string()),
"fused buffer table must include lex buffer `{}`",
buf.name
);
}
for buf in digraphs.buffers() {
assert!(
fused_names.contains(&buf.name.as_ref().to_string()),
"fused buffer table must include digraph buffer `{}`",
buf.name
);
}
}
#[test]
fn fused_region_uses_l1_op_id() {
let fused = c11_lex_single_pass(
"haystack",
"tok_types",
"tok_starts",
"tok_lens",
"tok_counts",
64,
64,
);
let Node::Region { generator, .. } = &fused.entry()[0] else {
panic!("entry must be Region");
};
assert_eq!(
generator.as_str(),
"vyre-libs::parsing::c11_lex_single_pass"
);
}
#[test]
fn fused_buffer_table_has_no_duplicates() {
let fused = c11_lex_single_pass(
"haystack",
"tok_types",
"tok_starts",
"tok_lens",
"tok_counts",
64,
64,
);
let mut seen = std::collections::HashSet::new();
for buf in fused.buffers() {
let name = buf.name.as_ref().to_string();
assert!(
seen.insert(name.clone()),
"duplicate buffer `{name}` in fused table"
);
}
}
}