use crate::{
config::Config,
errors::Message,
match_system::{Directory, File, Line, Match, Matches, wrap_dirs, wrap_file},
mes,
};
use crossbeam_channel::{Receiver, Sender};
use grep::{
matcher::LineTerminator,
regex::{RegexMatcher, RegexMatcherBuilder},
searcher::{
BinaryDetection, MmapChoice, Searcher, SearcherBuilder, Sink, SinkContext, SinkMatch,
},
};
use ignore::{WalkBuilder, WalkState, overrides::OverrideBuilder};
use std::{
collections::HashMap,
ffi::OsString,
io,
path::{Path, PathBuf},
sync::{
Arc,
atomic::{AtomicBool, Ordering},
},
};
struct Matcher {
combined: RegexMatcher,
individual: Vec<regex::bytes::Regex>,
}
impl Matcher {
fn new(patterns: &[String]) -> Result<Self, Message> {
let combined = RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build_many(patterns)
.map_err(|e| mes!("regex expression is invalid: {}", e))?;
let individual = patterns
.iter()
.map(|p| {
regex::bytes::Regex::new(p).map_err(|_| mes!("regex expression `{}` is invalid", p))
})
.collect::<Result<Vec<_>, _>>()?;
Ok(Self {
combined,
individual,
})
}
}
struct MatchSink<'a> {
lines: Vec<Line>,
matcher: &'a Matcher,
match_buf: Vec<Match>,
}
fn strip_line_ending(bytes: &[u8]) -> &[u8] {
bytes
.strip_suffix(b"\r\n")
.or_else(|| bytes.strip_suffix(b"\n"))
.unwrap_or(bytes)
}
impl<'a> Sink for MatchSink<'a> {
type Error = io::Error;
fn matched(&mut self, _searcher: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, io::Error> {
let line_bytes = mat.bytes();
let line_num = mat.line_number().unwrap_or(0) as usize;
let trimmed_bytes = strip_line_ending(line_bytes);
self.match_buf.clear();
for (pattern_id, regex) in self.matcher.individual.iter().enumerate() {
for m in regex.find_iter(trimmed_bytes) {
self.match_buf
.push(Match::new(pattern_id, m.start(), m.end()));
}
}
if !self.match_buf.is_empty() {
let content = String::from_utf8_lossy(trimmed_bytes).into_owned();
let matches = std::mem::replace(&mut self.match_buf, Vec::with_capacity(8));
self.lines.push(Line::new(content, matches, line_num));
}
Ok(true)
}
fn context(&mut self, _searcher: &Searcher, ctx: &SinkContext<'_>) -> Result<bool, io::Error> {
let line_bytes = ctx.bytes();
let trimmed_bytes = strip_line_ending(line_bytes);
let content = String::from_utf8_lossy(trimmed_bytes).into_owned();
let line_num = ctx.line_number().unwrap_or(0) as usize;
self.lines.push(Line::new_context(content, line_num));
Ok(true)
}
}
pub fn search(abort: Arc<AtomicBool>, config: Arc<Config>) -> Result<Option<Matches>, Message> {
let matchers = Matcher::new(&config.regexps)?;
let mut searcher = SearcherBuilder::new()
.line_number(true)
.line_terminator(LineTerminator::byte(b'\n'))
.binary_detection(BinaryDetection::quit(b'\x00'))
.memory_map(unsafe { MmapChoice::auto() })
.before_context(config.before_context)
.after_context(config.after_context)
.build();
if config.is_dir {
let matchers = Arc::new(matchers);
Ok(wrap_dirs(search_dir(
&matchers,
&mut searcher,
abort,
config,
)?))
} else {
Ok(wrap_file(
search_file(config.path.to_path_buf(), &matchers, &mut searcher, &config)?,
config.files && config.regexps.is_empty(),
))
}
}
fn search_dir(
matchers: &Arc<Matcher>,
searcher: &mut Searcher,
abort: Arc<AtomicBool>,
config: Arc<Config>,
) -> Result<Vec<Directory>, Message> {
let path = config.path.clone();
let globs = config.globs.clone();
search_dir_impl(&path, &globs, matchers, searcher, abort, config)
}
fn search_dir_impl(
path: &PathBuf,
globs: &[String],
matchers: &Arc<Matcher>,
searcher: &mut Searcher,
abort: Arc<AtomicBool>,
config: Arc<Config>,
) -> Result<Vec<Directory>, Message> {
let mut override_builder = OverrideBuilder::new(path);
for glob in globs {
override_builder
.add(glob)
.map_err(|_| mes!("glob {} is invalid", glob))?;
}
let walker = WalkBuilder::new(path)
.hidden(!config.hidden)
.max_depth(config.max_depth)
.follow_links(config.links)
.ignore(config.ignore)
.git_global(config.ignore)
.git_ignore(config.ignore)
.git_exclude(config.ignore)
.require_git(false)
.threads(config.core.threads)
.overrides(
override_builder
.build()
.map_err(|_| mes!("failed to build override builder with given globs"))?,
)
.build_parallel();
let (tx, rx): (Sender<File>, Receiver<File>) = crossbeam_channel::unbounded();
walker.run(|| {
let tx = tx.clone();
let matchers = Arc::clone(matchers);
let mut searcher = searcher.clone();
let abort = Arc::clone(&abort);
let config = Arc::clone(&config);
Box::new(move |entry_result| {
if abort.load(Ordering::Relaxed) {
return WalkState::Quit;
}
let entry = match entry_result {
Ok(e) => e,
Err(_) => return WalkState::Continue,
};
let is_file = entry.file_type().is_some_and(|ft| ft.is_file());
if !is_file {
return WalkState::Continue;
}
let path = entry.into_path();
if let Ok(Some(file)) = search_file(path, &matchers, &mut searcher, &config) {
let _ = tx.send(file);
}
WalkState::Continue
})
});
drop(tx);
build_directory_tree(rx, path, config.links)
}
fn search_file(
pb: PathBuf,
matcher: &Matcher,
searcher: &mut Searcher,
config: &Config,
) -> Result<Option<File>, Message> {
if config.files && config.regexps.is_empty() {
return Ok(Some(File::from_pathbuf(pb, config.links)?));
}
let mut sink = MatchSink {
lines: Vec::with_capacity(32),
match_buf: Vec::with_capacity(8),
matcher,
};
if searcher
.search_path(&matcher.combined, &pb, &mut sink)
.is_err()
{
return Ok(None);
}
if sink.lines.is_empty() {
return Ok(None);
}
if config.before_context > 0 || config.after_context > 0 {
Line::compute_context_offsets(&mut sink.lines);
}
let mut file = File::from_pathbuf(pb, config.links)?;
file.lines = sink.lines;
Ok(Some(file))
}
fn build_directory_tree(
rx: Receiver<File>,
root_path: &Path,
links: bool,
) -> Result<Vec<Directory>, Message> {
let mut path_to_index: HashMap<OsString, usize> = HashMap::new();
let mut directories: Vec<Directory> = Vec::new();
path_to_index.insert(root_path.as_os_str().to_owned(), 0);
directories.push(Directory::new(root_path, links)?);
for file in rx {
if let Some(dir_path) = file.path.parent() {
let dir_idx = get_or_create_directory(
&mut path_to_index,
&mut directories,
dir_path,
root_path,
links,
)?;
directories[dir_idx].files.push(file);
}
}
Ok(directories)
}
fn get_or_create_directory(
path_to_index: &mut HashMap<OsString, usize>,
directories: &mut Vec<Directory>,
dir_path: &std::path::Path,
root_path: &Path,
links: bool,
) -> Result<usize, Message> {
if let Some(&idx) = path_to_index.get(dir_path.as_os_str()) {
return Ok(idx);
}
let parent_idx = if let Some(parent) = dir_path.parent() {
if parent == root_path || path_to_index.contains_key(parent.as_os_str()) {
path_to_index.get(parent.as_os_str()).copied()
} else {
Some(get_or_create_directory(
path_to_index,
directories,
parent,
root_path,
links,
)?)
}
} else {
None
};
let new_idx = directories.len();
path_to_index.insert(dir_path.as_os_str().to_owned(), new_idx);
directories.push(Directory::new(dir_path, links)?);
if let Some(p_idx) = parent_idx {
directories[p_idx].children.push(new_idx);
}
Ok(new_idx)
}