use rayon::prelude::*;
use std::collections::BTreeSet;
use std::sync::OnceLock;
use crate::rules::{is_word_byte, AcMeta, ResidualShard, RuleSet};
use crate::scan_format::{build_line_index, emit_hit};
pub fn scan_content(path: &str, content: &[u8], rs: &RuleSet) -> Vec<String> {
let mut hits: Vec<String> = Vec::new();
let mut prefix_matched: BTreeSet<usize> = BTreeSet::new();
let line_index: OnceLock<Vec<usize>> = OnceLock::new();
if let Some(ac) = &rs.ac {
for m in ac.find_overlapping_iter(content) {
let pid = m.pattern().as_usize();
match &rs.ac_meta[pid] {
AcMeta::Literal { idx, bound_left, bound_right } => {
if *bound_left
&& m.start() > 0
&& is_word_byte(content[m.start() - 1])
{
continue;
}
if *bound_right
&& m.end() < content.len()
&& is_word_byte(content[m.end()])
{
continue;
}
let li = line_index.get_or_init(|| build_line_index(content));
hits.push(emit_hit(li, path, m.start(), m.end(), *idx));
}
AcMeta::RegexPrefix { rule_pos } => {
prefix_matched.insert(*rule_pos);
}
}
}
}
if let Some(ac_ci) = &rs.ac_ci {
for m in ac_ci.find_overlapping_iter(content) {
let pid = m.pattern().as_usize();
match &rs.ac_meta_ci[pid] {
AcMeta::Literal { .. } => {
}
AcMeta::RegexPrefix { rule_pos } => {
prefix_matched.insert(*rule_pos);
}
}
}
}
if !prefix_matched.is_empty() {
let positions: Vec<usize> = prefix_matched.iter().copied().collect();
let regex_hits: Vec<String> = positions
.par_iter()
.flat_map_iter(|&pos| {
let rr = &rs.regex_rules[pos];
let mut local: Vec<String> = Vec::new();
match rr.re.find_all(content) {
Ok(matches) => {
let li = line_index.get_or_init(|| build_line_index(content));
for m in matches {
if m.start == m.end {
continue;
}
local.push(emit_hit(li, path, m.start, m.end, rr.idx));
}
}
Err(()) => {
local.push(format!(
"{}: rule={} engine error",
path, rr.idx
));
}
}
local
})
.collect();
hits.extend(regex_hits);
}
for shard in &rs.residual_shards {
match shard {
ResidualShard::Single { rule_pos } => {
let rr = &rs.regex_rules[*rule_pos];
match rr.re.find_all(content) {
Ok(matches) => {
if !matches.is_empty() {
let li = line_index.get_or_init(|| build_line_index(content));
for m in matches {
if m.start == m.end {
continue;
}
hits.push(emit_hit(li, path, m.start, m.end, rr.idx));
}
}
}
Err(()) => {
hits.push(format!(
"{}: rule={} engine error",
path, rr.idx
));
}
}
}
ResidualShard::Combined { gate, positions } => {
let gate_result = gate.is_match(content);
let should_evaluate = matches!(gate_result, Ok(true) | Err(()));
if should_evaluate {
let regex_hits: Vec<String> = positions
.par_iter()
.flat_map_iter(|&pos| {
let rr = &rs.regex_rules[pos];
let mut local: Vec<String> = Vec::new();
match rr.re.find_all(content) {
Ok(matches) => {
let li = line_index.get_or_init(|| build_line_index(content));
for m in matches {
if m.start == m.end {
continue;
}
local.push(emit_hit(li, path, m.start, m.end, rr.idx));
}
}
Err(()) => {
local.push(format!(
"{}: rule={} engine error",
path, rr.idx
));
}
}
local
})
.collect();
hits.extend(regex_hits);
}
}
}
}
hits
}