use aho_corasick::AhoCorasick;
use memchr;
#[inline]
pub fn find_byte_simd(haystack: &[u8], needle: u8) -> Option<usize> {
memchr::memchr(needle, haystack)
}
#[inline]
pub fn find_either_byte_simd(haystack: &[u8], byte1: u8, byte2: u8) -> Option<usize> {
memchr::memchr2(byte1, byte2, haystack)
}
#[inline]
pub fn find_any_of_three_simd(haystack: &[u8], byte1: u8, byte2: u8, byte3: u8) -> Option<usize> {
memchr::memchr3(byte1, byte2, byte3, haystack)
}
#[inline]
pub fn find_newline_simd(haystack: &[u8]) -> Option<usize> {
memchr::memchr2(b'\r', b'\n', haystack)
}
pub fn parse_rule_header_simd(text: &str) -> Option<(String, usize)> {
let bytes = text.as_bytes();
let mut pos = 0;
while pos < bytes.len() {
pos += memchr::memchr(b'r', &bytes[pos..])?;
if pos + 4 <= bytes.len() && &bytes[pos..pos + 4] == b"rule" {
if pos > 0 && bytes[pos - 1].is_ascii_alphanumeric() {
pos += 1;
continue;
}
if pos + 4 < bytes.len() && bytes[pos + 4].is_ascii_alphanumeric() {
pos += 1;
continue;
}
let after_rule = &text[pos + 4..];
let name_start = after_rule.find(|c: char| !c.is_whitespace())?;
let after_ws = &after_rule[name_start..];
if after_ws.starts_with('"') {
let end_quote = memchr::memchr(b'"', &after_ws.as_bytes()[1..])?;
let name = after_ws[1..end_quote + 1].to_string();
let consumed = pos + 4 + name_start + end_quote + 2;
return Some((name, consumed));
}
let name_end = after_ws
.find(|c: char| !c.is_alphanumeric() && c != '_')
.unwrap_or(after_ws.len());
if name_end > 0 {
let name = after_ws[..name_end].to_string();
let consumed = pos + 4 + name_start + name_end;
return Some((name, consumed));
}
}
pos += 1;
}
None
}
pub fn find_then_keyword_simd(text: &str) -> Option<usize> {
let bytes = text.as_bytes();
let mut pos = 0;
let mut brace_depth = 0;
let mut paren_depth = 0;
let mut in_string = false;
while pos < bytes.len() {
let search_result = memchr::memchr3(b't', b'"', b'{', &bytes[pos..]);
if let Some(offset) = search_result {
pos += offset;
match bytes[pos] {
b'"' if !in_string => {
in_string = true;
pos += 1;
}
b'"' if in_string => {
if pos > 0 && bytes[pos - 1] == b'\\' {
pos += 1;
continue;
}
in_string = false;
pos += 1;
}
b'{' if !in_string => {
brace_depth += 1;
pos += 1;
}
b'}' if !in_string => {
brace_depth -= 1;
pos += 1;
}
b't' if !in_string && brace_depth == 0 && paren_depth == 0 => {
if pos + 4 <= bytes.len() && &bytes[pos..pos + 4] == b"then" {
let before_ok = pos == 0 || !bytes[pos - 1].is_ascii_alphanumeric();
let after_ok =
pos + 4 >= bytes.len() || !bytes[pos + 4].is_ascii_alphanumeric();
if before_ok && after_ok {
return Some(pos);
}
}
pos += 1;
}
_ => pos += 1,
}
} else {
break;
}
while pos < bytes.len() {
if bytes[pos] == b'(' && !in_string {
paren_depth += 1;
} else if bytes[pos] == b')' && !in_string {
paren_depth -= 1;
} else if memchr::memchr3(b't', b'"', b'{', &bytes[pos..pos + 1]).is_some() {
break;
}
pos += 1;
}
}
None
}
pub fn find_keywords_simd<'a>(text: &str, keywords: &'a [&str]) -> Vec<(usize, &'a str)> {
if keywords.is_empty() {
return Vec::new();
}
let ac = AhoCorasick::new(keywords).unwrap();
ac.find_iter(text)
.map(|mat| (mat.start(), keywords[mat.pattern().as_usize()]))
.collect()
}
pub fn count_lines_simd(text: &str) -> usize {
let bytes = text.as_bytes();
let mut count = 0;
let mut pos = 0;
while pos < bytes.len() {
if let Some(offset) = memchr::memchr2(b'\r', b'\n', &bytes[pos..]) {
pos += offset;
if bytes[pos] == b'\r' && pos + 1 < bytes.len() && bytes[pos + 1] == b'\n' {
pos += 2;
} else {
pos += 1;
}
count += 1;
} else {
break;
}
}
count
}
pub fn skip_whitespace_simd(text: &str) -> usize {
let bytes = text.as_bytes();
for (i, &byte) in bytes.iter().enumerate() {
if !matches!(byte, b' ' | b'\t' | b'\r' | b'\n') {
return i;
}
}
text.len()
}
pub fn extract_identifier_simd(text: &str) -> Option<String> {
let bytes = text.as_bytes();
if bytes.is_empty() {
return None;
}
if !bytes[0].is_ascii_alphabetic() && bytes[0] != b'_' {
return None;
}
let mut end = 1;
while end < bytes.len() {
let byte = bytes[end];
if !byte.is_ascii_alphanumeric() && byte != b'_' {
break;
}
end += 1;
}
Some(text[..end].to_string())
}
pub fn split_into_rules_simd(grl_text: &str) -> Vec<String> {
let bytes = grl_text.as_bytes();
let mut rules = Vec::new();
let mut pos = 0;
while pos < bytes.len() {
if let Some(offset) = memchr::memchr(b'r', &bytes[pos..]) {
let rule_pos = pos + offset;
if rule_pos + 5 <= bytes.len() && &bytes[rule_pos..rule_pos + 5] == b"rule " {
if let Some(brace_offset) = memchr::memchr(b'{', &bytes[rule_pos..]) {
let brace_pos = rule_pos + brace_offset;
if let Some(close_pos) = find_matching_brace_simd(grl_text, brace_pos) {
let rule_text = &grl_text[rule_pos..=close_pos];
rules.push(rule_text.to_string());
pos = close_pos + 1;
continue;
}
}
}
pos = rule_pos + 1;
} else {
break;
}
}
rules
}
pub fn find_matching_brace_simd(text: &str, open_pos: usize) -> Option<usize> {
let bytes = text.as_bytes();
if open_pos >= bytes.len() || bytes[open_pos] != b'{' {
return None;
}
let mut depth = 1;
let mut pos = open_pos + 1;
let mut in_string = false;
let mut escape_next = false;
while pos < bytes.len() {
let search = if in_string {
memchr::memchr2(b'"', b'\\', &bytes[pos..])
} else {
memchr::memchr3(b'{', b'}', b'"', &bytes[pos..])
};
if let Some(offset) = search {
pos += offset;
if escape_next {
escape_next = false;
pos += 1;
continue;
}
match bytes[pos] {
b'\\' if in_string => escape_next = true,
b'"' => in_string = !in_string,
b'{' if !in_string => depth += 1,
b'}' if !in_string => {
depth -= 1;
if depth == 0 {
return Some(pos);
}
}
_ => {}
}
pos += 1;
} else {
break;
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_byte_simd() {
assert_eq!(find_byte_simd(b"hello world", b'w'), Some(6));
assert_eq!(find_byte_simd(b"hello world", b'x'), None);
}
#[test]
fn test_find_either_byte_simd() {
assert_eq!(find_either_byte_simd(b"hello world", b'w', b'l'), Some(2));
assert_eq!(find_either_byte_simd(b"hello world", b'x', b'y'), None);
}
#[test]
fn test_parse_rule_header_simd() {
let (name, _) = parse_rule_header_simd(r#"rule "MyRule" {"#).unwrap();
assert_eq!(name, "MyRule");
let (name2, _) = parse_rule_header_simd("rule SimpleRule {").unwrap();
assert_eq!(name2, "SimpleRule");
}
#[test]
fn test_find_then_keyword_simd() {
let text = "when X > 5 then Y = 10";
let pos = find_then_keyword_simd(text).unwrap();
assert_eq!(&text[pos..pos + 4], "then");
}
#[test]
fn test_count_lines_simd() {
assert_eq!(count_lines_simd("line1\nline2\nline3"), 2);
assert_eq!(count_lines_simd("line1\r\nline2\r\nline3"), 2);
assert_eq!(count_lines_simd("single line"), 0);
}
#[test]
fn test_skip_whitespace_simd() {
assert_eq!(skip_whitespace_simd(" hello"), 3);
assert_eq!(skip_whitespace_simd("\t\n world"), 4);
assert_eq!(skip_whitespace_simd("no_space"), 0);
}
#[test]
fn test_extract_identifier_simd() {
assert_eq!(
extract_identifier_simd("hello world"),
Some("hello".to_string())
);
assert_eq!(
extract_identifier_simd("_test123"),
Some("_test123".to_string())
);
assert_eq!(extract_identifier_simd("123invalid"), None);
}
#[test]
fn test_split_into_rules_simd() {
let grl = r#"
rule "Rule1" { when X > 5 then Y = 10 }
rule "Rule2" { when A < 3 then B = 7 }
"#;
let rules = split_into_rules_simd(grl);
assert_eq!(rules.len(), 2);
assert!(rules[0].contains("Rule1"));
assert!(rules[1].contains("Rule2"));
}
#[test]
fn test_find_matching_brace_simd() {
let text = "{ nested { braces } here }";
let close = find_matching_brace_simd(text, 0).unwrap();
assert_eq!(text.chars().nth(close).unwrap(), '}');
assert_eq!(close, text.len() - 1);
}
#[test]
fn test_find_keywords_simd() {
let text = "when X > 5 then Y = 10 and Z = 20";
let keywords = vec!["when", "then", "and"];
let matches = find_keywords_simd(text, &keywords);
assert_eq!(matches.len(), 3);
assert_eq!(matches[0].1, "when");
assert_eq!(matches[1].1, "then");
assert_eq!(matches[2].1, "and");
}
}