use super::types::{SearchConfig, SearchMatch};
use regex::{Regex, RegexBuilder};
pub struct SearchEngine {
cached_regex: Option<(String, bool, Regex)>, }
impl Default for SearchEngine {
fn default() -> Self {
Self::new()
}
}
impl SearchEngine {
pub fn new() -> Self {
Self { cached_regex: None }
}
pub fn search<I>(&mut self, lines: I, query: &str, config: &SearchConfig) -> Vec<SearchMatch>
where
I: Iterator<Item = (usize, String)>,
{
if query.is_empty() {
return Vec::new();
}
let mut matches = Vec::new();
if config.use_regex {
self.search_regex(lines, query, config, &mut matches);
} else {
self.search_plain(lines, query, config, &mut matches);
}
matches
}
fn search_plain<I>(
&self,
lines: I,
query: &str,
config: &SearchConfig,
matches: &mut Vec<SearchMatch>,
) where
I: Iterator<Item = (usize, String)>,
{
let query_lower = if config.case_sensitive {
query.to_string()
} else {
query.to_lowercase()
};
let query_char_len = query.chars().count();
for (line_idx, line) in lines {
let search_line = if config.case_sensitive {
line.clone()
} else {
line.to_lowercase()
};
let mut start_byte = 0;
while let Some(pos) = search_line[start_byte..].find(&query_lower) {
let byte_offset = start_byte + pos;
let char_column = Self::byte_offset_to_char_offset(&search_line, byte_offset);
if config.whole_word
&& !Self::is_whole_word_static(&line, byte_offset, query_lower.len())
{
start_byte = byte_offset + 1;
continue;
}
matches.push(SearchMatch::new(line_idx, char_column, query_char_len));
start_byte = byte_offset + query_lower.len().max(1);
if query.is_empty() {
break;
}
}
}
}
fn search_regex<I>(
&mut self,
lines: I,
query: &str,
config: &SearchConfig,
matches: &mut Vec<SearchMatch>,
) where
I: Iterator<Item = (usize, String)>,
{
let regex = match self.get_or_compile_regex(query, config.case_sensitive) {
Ok(re) => re.clone(), Err(e) => {
log::debug!("Invalid regex pattern '{}': {}", query, e);
return;
}
};
for (line_idx, line) in lines {
for mat in regex.find_iter(&line) {
let byte_start = mat.start();
let byte_end = mat.end();
let char_column = Self::byte_offset_to_char_offset(&line, byte_start);
let char_length = Self::byte_offset_to_char_offset(&line, byte_end) - char_column;
if config.whole_word
&& !Self::is_whole_word_static(&line, byte_start, byte_end - byte_start)
{
continue;
}
matches.push(SearchMatch::new(line_idx, char_column, char_length));
}
}
}
fn byte_offset_to_char_offset(s: &str, byte_offset: usize) -> usize {
s[..byte_offset].chars().count()
}
fn get_or_compile_regex(
&mut self,
pattern: &str,
case_sensitive: bool,
) -> Result<&Regex, regex::Error> {
let needs_recompile = match &self.cached_regex {
Some((cached_pattern, cached_case, _)) => {
cached_pattern != pattern || *cached_case != case_sensitive
}
None => true,
};
if needs_recompile {
let regex = RegexBuilder::new(pattern)
.case_insensitive(!case_sensitive)
.build()?;
self.cached_regex = Some((pattern.to_string(), case_sensitive, regex));
}
Ok(&self
.cached_regex
.as_ref()
.expect("cached_regex was just set to Some above if it was None")
.2)
}
fn is_whole_word_static(line: &str, start: usize, length: usize) -> bool {
let end = start + length;
if start > 0
&& let Some(c) = line[..start].chars().last()
&& (c.is_alphanumeric() || c == '_')
{
return false;
}
if end < line.len()
&& let Some(c) = line[end..].chars().next()
&& (c.is_alphanumeric() || c == '_')
{
return false;
}
true
}
pub fn clear_cache(&mut self) {
self.cached_regex = None;
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_lines<'a>(texts: &'a [&'a str]) -> impl Iterator<Item = (usize, String)> + 'a {
texts.iter().enumerate().map(|(i, s)| (i, s.to_string()))
}
#[test]
fn test_plain_search_case_insensitive() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["Hello World", "hello there", "HELLO WORLD"];
let config = SearchConfig::default();
let matches = engine.search(make_lines(&lines), "hello", &config);
assert_eq!(matches.len(), 3);
assert_eq!(matches[0], SearchMatch::new(0, 0, 5));
assert_eq!(matches[1], SearchMatch::new(1, 0, 5));
assert_eq!(matches[2], SearchMatch::new(2, 0, 5));
}
#[test]
fn test_plain_search_case_sensitive() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["Hello World", "hello there", "HELLO WORLD"];
let config = SearchConfig {
case_sensitive: true,
..Default::default()
};
let matches = engine.search(make_lines(&lines), "hello", &config);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0], SearchMatch::new(1, 0, 5));
}
#[test]
fn test_plain_search_multiple_matches_per_line() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["foo bar foo baz foo"];
let config = SearchConfig::default();
let matches = engine.search(make_lines(&lines), "foo", &config);
assert_eq!(matches.len(), 3);
assert_eq!(matches[0], SearchMatch::new(0, 0, 3));
assert_eq!(matches[1], SearchMatch::new(0, 8, 3));
assert_eq!(matches[2], SearchMatch::new(0, 16, 3));
}
#[test]
fn test_whole_word_matching() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["foobar foo barfoo"];
let config = SearchConfig {
whole_word: true,
..Default::default()
};
let matches = engine.search(make_lines(&lines), "foo", &config);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0], SearchMatch::new(0, 7, 3));
}
#[test]
fn test_regex_search() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec![
"error: something failed",
"warning: check this",
"error: again",
];
let config = SearchConfig {
use_regex: true,
..Default::default()
};
let matches = engine.search(make_lines(&lines), "error:", &config);
assert_eq!(matches.len(), 2);
assert_eq!(matches[0], SearchMatch::new(0, 0, 6));
assert_eq!(matches[1], SearchMatch::new(2, 0, 6));
}
#[test]
fn test_regex_pattern() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["test123", "test456", "notest"];
let config = SearchConfig {
use_regex: true,
..Default::default()
};
let matches = engine.search(make_lines(&lines), r"test\d+", &config);
assert_eq!(matches.len(), 2);
assert_eq!(matches[0], SearchMatch::new(0, 0, 7));
assert_eq!(matches[1], SearchMatch::new(1, 0, 7));
}
#[test]
fn test_empty_query() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["some text"];
let config = SearchConfig::default();
let matches = engine.search(make_lines(&lines), "", &config);
assert!(matches.is_empty());
}
#[test]
fn test_unicode_character_offsets() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["📁 Downloads", "normal text"];
let config = SearchConfig::default();
let matches = engine.search(make_lines(&lines), "down", &config);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].line, 0);
assert_eq!(matches[0].column, 2); assert_eq!(matches[0].length, 4);
}
#[test]
fn test_unicode_multiple_emoji() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["🎉🎊🎁 party time"];
let config = SearchConfig::default();
let matches = engine.search(make_lines(&lines), "party", &config);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].column, 4);
assert_eq!(matches[0].length, 5);
}
#[test]
fn test_invalid_regex() {
let mut engine = SearchEngine::new();
let lines: Vec<&str> = vec!["some text"];
let config = SearchConfig {
use_regex: true,
..Default::default()
};
let matches = engine.search(make_lines(&lines), "[invalid", &config);
assert!(matches.is_empty());
}
}