use std::path::Path;
use memchr::memmem;
use regex::bytes::Regex;
use crate::index::is_binary;
use crate::SearchMatch;
use super::lines::for_each_line;
pub fn verify_literal(pattern: &str, path: &Path, content: &[u8]) -> Vec<SearchMatch> {
if is_binary(content) {
return Vec::new(); }
let finder = memmem::Finder::new(pattern.as_bytes());
collect_line_matches(path, content, |line| {
finder
.find(line)
.map(|start| (start, start + pattern.len()))
})
}
pub fn verify_regex(re: &Regex, path: &Path, content: &[u8]) -> Vec<SearchMatch> {
if is_binary(content) {
return Vec::new(); }
collect_line_matches(path, content, |line| {
re.find(line).map(|m| (m.start(), m.end()))
})
}
fn collect_line_matches(
path: &Path,
content: &[u8],
mut predicate: impl FnMut(&[u8]) -> Option<(usize, usize)>,
) -> Vec<SearchMatch> {
let mut matches = Vec::new();
for_each_line(content, |line_num, line_start, line| {
if let Some((match_start, match_end)) = predicate(line) {
matches.push(SearchMatch {
path: path.to_path_buf(),
line_number: line_num,
line_content: line.to_vec(),
byte_offset: (line_start + match_start) as u64,
submatch_start: match_start,
submatch_end: match_end,
});
}
});
matches
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn literal_reports_match_start_offset() {
let matches = verify_literal("needle", Path::new("file.txt"), b"prefix needle suffix\n");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].byte_offset, 7);
assert_eq!(matches[0].submatch_start, 7);
assert_eq!(matches[0].submatch_end, 13);
}
#[test]
fn regex_reports_match_start_offset() {
let re = Regex::new("needle").unwrap();
let matches = verify_regex(&re, Path::new("file.txt"), b"prefix needle suffix\n");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].byte_offset, 7);
assert_eq!(matches[0].submatch_start, 7);
assert_eq!(matches[0].submatch_end, 13);
}
#[test]
fn crlf_offsets_include_line_break_bytes_before_match() {
let matches = verify_literal("needle", Path::new("file.txt"), b"one\r\ntwo needle\r\n");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].line_number, 2);
assert_eq!(matches[0].byte_offset, 9);
assert_eq!(matches[0].line_content, b"two needle");
}
#[test]
fn regex_matches_invalid_utf8_line_bytes() {
let re = Regex::new(r"(?-u)\xFF").unwrap();
let matches = verify_regex(&re, Path::new("file.bin"), b"prefix\xFFsuffix\n");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].line_content, b"prefix\xFFsuffix");
assert_eq!(matches[0].submatch_start, 6);
assert_eq!(matches[0].submatch_end, 7);
}
}