use vyre::ir::{BufferAccess, BufferDecl, DataType, Expr, Node, Program};
use crate::region::wrap_anonymous;
pub const SCAN_SUBSTRING_OP_ID: &str = "vyre-libs::scan::substring_search";
pub(crate) const LEGACY_MATCHING_SUBSTRING_OP_ID: &str = "vyre-libs::matching::substring_search";
#[must_use]
pub fn substring_search(
haystack: &str,
needle: &str,
matches: &str,
haystack_len: u32,
needle_len: u32,
) -> Program {
substring_search_with_op_id(
SCAN_SUBSTRING_OP_ID,
haystack,
needle,
matches,
haystack_len,
needle_len,
)
}
#[must_use]
pub(crate) fn substring_search_with_op_id(
op_id: &str,
haystack: &str,
needle: &str,
matches: &str,
haystack_len: u32,
needle_len: u32,
) -> Program {
let counted_storage = |name: &str, binding, count| {
let decl = BufferDecl::storage(name, binding, BufferAccess::ReadOnly, DataType::U32);
if count == 0 {
decl
} else {
decl.with_count(count)
}
};
let output_count = haystack_len.max(1);
let visible_output_bytes = (haystack_len as usize).saturating_mul(4);
let output = BufferDecl::output(matches, 2, DataType::U32)
.with_count(output_count)
.with_output_byte_range(0..visible_output_bytes);
let i = Expr::var("i");
let mut check_body: Vec<Node> = vec![Node::let_bind("ok", Expr::u32(1))];
check_body.push(Node::loop_for(
"k",
Expr::u32(0),
Expr::u32(needle_len),
vec![Node::assign(
"ok",
Expr::bitand(
Expr::var("ok"),
Expr::select(
Expr::eq(
Expr::load(haystack, Expr::add(i.clone(), Expr::var("k"))),
Expr::load(needle, Expr::var("k")),
),
Expr::u32(1),
Expr::u32(0),
),
),
)],
));
check_body.push(Node::Store {
buffer: matches.into(),
index: i.clone(),
value: Expr::var("ok"),
});
let body = vec![
Node::let_bind("i", Expr::InvocationId { axis: 0 }),
Node::let_bind("haystack_len", Expr::buf_len(haystack)),
Node::if_then(
Expr::and(
Expr::le(Expr::u32(needle_len), Expr::var("haystack_len")),
Expr::le(
i.clone(),
Expr::sub(Expr::var("haystack_len"), Expr::u32(needle_len)),
),
),
check_body,
),
];
Program::wrapped(
vec![
counted_storage(haystack, 0, haystack_len),
counted_storage(needle, 1, needle_len),
output,
],
[64, 1, 1],
vec![wrap_anonymous(op_id, body)],
)
}
inventory::submit! {
crate::harness::OpEntry {
id: SCAN_SUBSTRING_OP_ID,
build: || substring_search("haystack", "needle", "matches", 8, 3),
test_inputs: Some(|| {
let to_u32_vec = |s: &str| s.bytes().map(u32::from).collect::<Vec<_>>();
vec![
vec![
crate::test_support::byte_pack::u32_bytes(&to_u32_vec("abcabc++")),
crate::test_support::byte_pack::u32_bytes(&to_u32_vec("abc")),
],
vec![
crate::test_support::byte_pack::u32_bytes(&to_u32_vec("xyzxyzxy")),
crate::test_support::byte_pack::u32_bytes(&to_u32_vec("xyz")),
]
]
}),
expected_output: Some(|| {
let case0 = crate::test_support::byte_pack::u32_bytes(&[1u32, 0, 0, 1, 0, 0, 0, 0]);
let case1 = crate::test_support::byte_pack::u32_bytes(&[1u32, 0, 0, 1, 0, 0, 0, 0]);
vec![vec![case0], vec![case1]]
}),
category: Some("scan"),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn canonical_scan_builder_uses_scan_op_id_not_matching_id() {
let program = substring_search("haystack", "needle", "matches", 8, 3);
let [Node::Region { generator, .. }] = program.entry() else {
panic!("expected substring search to emit one scan region");
};
assert_eq!(generator.as_str(), SCAN_SUBSTRING_OP_ID);
assert_ne!(generator.as_str(), LEGACY_MATCHING_SUBSTRING_OP_ID);
}
#[test]
fn explicit_compatibility_builder_preserves_legacy_op_id() {
let program = substring_search_with_op_id(
LEGACY_MATCHING_SUBSTRING_OP_ID,
"haystack",
"needle",
"matches",
8,
3,
);
let [Node::Region { generator, .. }] = program.entry() else {
panic!("expected substring compatibility search to emit one region");
};
assert_eq!(generator.as_str(), LEGACY_MATCHING_SUBSTRING_OP_ID);
}
#[test]
#[allow(deprecated)]
fn legacy_matching_public_path_preserves_old_id_without_polluting_scan_identity() {
let program =
crate::matching::substring::substring_search("haystack", "needle", "matches", 8, 3);
let [Node::Region { generator, .. }] = program.entry() else {
panic!("expected legacy substring search to emit one region");
};
assert_eq!(generator.as_str(), LEGACY_MATCHING_SUBSTRING_OP_ID);
assert_eq!(
crate::matching::substring::substring::CANONICAL_SUBSTRING_MODULE,
"vyre_libs::scan::substring"
);
assert_eq!(
crate::matching::substring::substring::LEGACY_SUBSTRING_MODULE,
"vyre_libs::matching::substring"
);
}
#[test]
fn source_boundary_keeps_matching_identity_out_of_canonical_builder() {
let source = include_str!("substring.rs");
let canonical_builder = source
.split("pub fn substring_search(")
.nth(1)
.expect("Fix: canonical substring builder must exist")
.split("/// Build a substring Program with an explicit compatibility op id.")
.next()
.expect("Fix: compatibility builder must follow canonical substring builder");
assert!(canonical_builder.contains("SCAN_SUBSTRING_OP_ID"));
assert!(!canonical_builder.contains("LEGACY_MATCHING_SUBSTRING_OP_ID"));
assert!(!canonical_builder.contains("vyre-libs::matching::substring_search"));
}
}