use crate::error::{Result, SearchError};
use grep_matcher::Matcher;
use grep_regex::RegexMatcherBuilder;
use grep_searcher::sinks::UTF8;
use grep_searcher::SearcherBuilder;
use ignore::overrides::OverrideBuilder;
use ignore::WalkBuilder;
use std::path::PathBuf;
use std::sync::mpsc;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Match {
pub file: PathBuf,
pub line: usize,
pub content: String,
pub context_before: Vec<String>,
pub context_after: Vec<String>,
}
pub struct TextSearcher {
respect_gitignore: bool,
case_sensitive: bool,
word_match: bool,
is_regex: bool,
globs: Vec<String>,
exclusions: Vec<String>,
base_dir: PathBuf,
context_lines: usize,
}
impl TextSearcher {
pub fn new(base_dir: PathBuf) -> Self {
Self {
respect_gitignore: true,
case_sensitive: false,
word_match: false,
is_regex: false,
globs: Vec::new(),
exclusions: Vec::new(),
base_dir,
context_lines: 2, }
}
pub fn respect_gitignore(mut self, value: bool) -> Self {
self.respect_gitignore = value;
self
}
pub fn case_sensitive(mut self, value: bool) -> Self {
self.case_sensitive = value;
self
}
pub fn word_match(mut self, value: bool) -> Self {
self.word_match = value;
self
}
pub fn is_regex(mut self, value: bool) -> Self {
self.is_regex = value;
self
}
pub fn add_globs(mut self, globs: Vec<String>) -> Self {
self.globs.extend(globs);
self
}
pub fn add_exclusions(mut self, exclusions: Vec<String>) -> Self {
self.exclusions.extend(exclusions);
self
}
pub fn context_lines(mut self, lines: usize) -> Self {
self.context_lines = lines;
self
}
pub fn search(&self, text: &str) -> Result<Vec<Match>> {
let matcher = RegexMatcherBuilder::new()
.case_insensitive(!self.case_sensitive)
.word(self.word_match)
.fixed_strings(!self.is_regex) .build(text)
.map_err(|e| SearchError::Generic(format!("Failed to build matcher: {}", e)))?;
let _searcher = SearcherBuilder::new()
.before_context(self.context_lines)
.after_context(self.context_lines)
.line_number(true)
.build();
let (tx, rx) = mpsc::channel();
let mut builder = WalkBuilder::new(&self.base_dir);
let mut walk_builder = builder
.git_ignore(self.respect_gitignore)
.git_global(self.respect_gitignore)
.git_exclude(self.respect_gitignore)
.hidden(false);
if !self.globs.is_empty() {
let mut override_builder = OverrideBuilder::new(&self.base_dir);
for glob in &self.globs {
if let Err(e) = override_builder.add(glob) {
return Err(SearchError::Generic(format!(
"Invalid glob pattern '{}': {}",
glob, e
)));
}
}
if let Ok(overrides) = override_builder.build() {
walk_builder = walk_builder.overrides(overrides);
}
}
walk_builder.build_parallel().run(|| {
let tx = tx.clone();
let matcher = matcher.clone();
let context_lines = self.context_lines;
Box::new(move |entry| {
use ignore::WalkState;
let entry = match entry {
Ok(e) => e,
Err(_) => return WalkState::Continue,
};
if entry.file_type().is_none_or(|ft| ft.is_dir()) {
return WalkState::Continue;
}
let path = entry.path();
let path_buf = path.to_path_buf();
let mut file_matches = Vec::new();
let mut searcher = SearcherBuilder::new()
.before_context(context_lines)
.after_context(context_lines)
.line_number(true)
.build();
let result = searcher.search_path(
&matcher,
path,
UTF8(|line_num, line_content| {
let line_str = line_content;
file_matches.push(Match {
file: path_buf.clone(),
line: line_num as usize,
content: line_str.trim_end().to_string(),
context_before: Vec::new(), context_after: Vec::new(), });
Ok(true) }),
);
if result.is_ok() && !file_matches.is_empty() {
let _ = tx.send(file_matches);
}
WalkState::Continue
})
});
drop(tx);
let mut all_matches = Vec::new();
for file_matches in rx {
all_matches.extend(file_matches);
}
self.add_context_to_matches(&mut all_matches, &matcher)?;
Ok(all_matches)
}
fn add_context_to_matches(&self, matches: &mut [Match], _matcher: &impl Matcher) -> Result<()> {
use std::collections::HashMap;
let mut matches_by_file: HashMap<PathBuf, Vec<usize>> = HashMap::new();
for (idx, m) in matches.iter().enumerate() {
matches_by_file.entry(m.file.clone()).or_default().push(idx);
}
for (file_path, match_indices) in matches_by_file {
if let Ok(content) = std::fs::read_to_string(&file_path) {
let lines: Vec<&str> = content.lines().collect();
for &match_idx in &match_indices {
let match_ref = &mut matches[match_idx];
let line_idx = match_ref.line.saturating_sub(1);
if line_idx < lines.len() {
let context_start = line_idx.saturating_sub(self.context_lines);
let context_end =
std::cmp::min(line_idx + self.context_lines + 1, lines.len());
match_ref.context_before = lines[context_start..line_idx]
.iter()
.map(|s| s.to_string())
.collect();
match_ref.context_after = lines[line_idx + 1..context_end]
.iter()
.map(|s| s.to_string())
.collect();
}
}
}
}
Ok(())
}
}
impl Default for TextSearcher {
fn default() -> Self {
Self::new(std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_basic_search() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("test.txt"),
"hello world\nfoo bar\nhello again",
)
.unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
let matches = searcher.search("hello").unwrap();
assert_eq!(matches.len(), 2);
assert_eq!(matches[0].line, 1);
assert_eq!(matches[0].content, "hello world");
assert_eq!(matches[1].line, 3);
assert_eq!(matches[1].content, "hello again");
}
#[test]
fn test_case_insensitive_default() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("test.txt"),
"Hello World\nHELLO\nhello",
)
.unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
let matches = searcher.search("hello").unwrap();
assert_eq!(matches.len(), 3); }
#[test]
fn test_case_sensitive() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("test.txt"),
"Hello World\nHELLO\nhello",
)
.unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).case_sensitive(true);
let matches = searcher.search("hello").unwrap();
assert_eq!(matches.len(), 1); assert_eq!(matches[0].content, "hello");
}
#[test]
fn test_no_matches() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("test.txt"), "foo bar baz").unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
let matches = searcher.search("notfound").unwrap();
assert_eq!(matches.len(), 0);
}
#[test]
fn test_multiple_files() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("file1.txt"), "target line 1").unwrap();
fs::write(temp_dir.path().join("file2.txt"), "target line 2").unwrap();
fs::write(temp_dir.path().join("file3.txt"), "other content").unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
let matches = searcher.search("target").unwrap();
assert_eq!(matches.len(), 2);
}
#[test]
fn test_gitignore_respected() {
let temp_dir = TempDir::new().unwrap();
fs::create_dir(temp_dir.path().join(".git")).unwrap();
fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(true);
let matches = searcher.search("target").unwrap();
assert_eq!(matches.len(), 1);
assert!(matches[0].file.ends_with("tracked.txt"));
}
#[test]
fn test_gitignore_disabled() {
let temp_dir = TempDir::new().unwrap();
fs::create_dir(temp_dir.path().join(".git")).unwrap();
fs::write(temp_dir.path().join(".gitignore"), "ignored.txt\n").unwrap();
fs::write(temp_dir.path().join("ignored.txt"), "target content").unwrap();
fs::write(temp_dir.path().join("tracked.txt"), "target content").unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf()).respect_gitignore(false);
let matches = searcher.search("target").unwrap();
assert_eq!(matches.len(), 2);
}
#[test]
fn test_builder_pattern() {
let searcher = TextSearcher::new(std::env::current_dir().unwrap())
.case_sensitive(true)
.respect_gitignore(false);
assert!(searcher.case_sensitive);
assert!(!searcher.respect_gitignore);
}
#[test]
fn test_default() {
let searcher = TextSearcher::default();
assert!(!searcher.case_sensitive);
assert!(searcher.respect_gitignore);
assert_eq!(searcher.context_lines, 2);
}
#[test]
fn test_special_characters() {
let temp_dir = TempDir::new().unwrap();
fs::write(
temp_dir.path().join("test.txt"),
"price: $19.99\nurl: http://example.com",
)
.unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
let matches = searcher.search("$19.99").unwrap();
assert_eq!(matches.len(), 1);
let matches = searcher.search("http://").unwrap();
assert_eq!(matches.len(), 1);
}
#[test]
fn test_line_numbers_accurate() {
let temp_dir = TempDir::new().unwrap();
let content = "line 1\nline 2\ntarget line 3\nline 4\ntarget line 5\nline 6";
fs::write(temp_dir.path().join("test.txt"), content).unwrap();
let searcher = TextSearcher::new(temp_dir.path().to_path_buf());
let matches = searcher.search("target").unwrap();
assert_eq!(matches.len(), 2);
assert_eq!(matches[0].line, 3);
assert_eq!(matches[1].line, 5);
}
}