use crate::binary::is_binary;
use crate::config::{split_csv, Config};
use crate::error::{FsearchError, FsearchResult};
use glob::Pattern;
use rayon::prelude::*;
use std::fs;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use walkdir::WalkDir;
pub type LineMatch = (usize, String);
#[derive(Debug, Clone)]
pub enum SearchMatch {
Path(PathBuf),
Content {
path: PathBuf,
lines: Vec<LineMatch>,
},
}
impl SearchMatch {
pub fn path(&self) -> &Path {
match self {
Self::Path(p) => p,
Self::Content { path, .. } => path,
}
}
pub fn is_content(&self) -> bool {
matches!(self, Self::Content { .. })
}
}
#[derive(Debug, Clone)]
pub struct SearchOptions {
pub base_dir: PathBuf,
pub pattern: String,
pub max_depth: u32,
pub include_dirs: bool,
pub case_insensitive: bool,
pub search_in_files: bool,
pub include_patterns: Vec<String>,
pub exclude_dirs: Vec<String>,
pub max_line_length: usize,
pub binary_check_bytes: usize,
pub max_results: usize,
}
impl SearchOptions {
pub fn from_config(cfg: &Config, base_dir: PathBuf, pattern: String) -> Self {
Self {
base_dir,
pattern,
max_depth: cfg.default_depth,
include_dirs: cfg.include_dirs,
case_insensitive: cfg.case_insensitive,
search_in_files: false,
include_patterns: split_csv(&cfg.default_include),
exclude_dirs: cfg.excluded_dirs(),
max_line_length: cfg.max_line_length,
binary_check_bytes: cfg.binary_check_bytes,
max_results: cfg.max_results,
}
}
pub fn builder(pattern: impl Into<String>) -> SearchOptionsBuilder {
SearchOptionsBuilder::new(pattern.into())
}
}
pub struct SearchOptionsBuilder(SearchOptions);
impl SearchOptionsBuilder {
fn new(pattern: String) -> Self {
Self(SearchOptions {
base_dir: PathBuf::from("."),
pattern,
max_depth: 1,
include_dirs: true,
case_insensitive: true,
search_in_files: false,
include_patterns: vec![],
exclude_dirs: vec![
".git".into(),
"node_modules".into(),
"target".into(),
".svn".into(),
"__pycache__".into(),
".hg".into(),
".cache".into(),
],
max_line_length: 10_000,
binary_check_bytes: 1024,
max_results: 0,
})
}
pub fn base_dir(mut self, p: impl Into<PathBuf>) -> Self {
self.0.base_dir = p.into();
self
}
pub fn max_depth(mut self, d: u32) -> Self {
self.0.max_depth = d;
self
}
pub fn include_dirs(mut self, v: bool) -> Self {
self.0.include_dirs = v;
self
}
pub fn case_insensitive(mut self, v: bool) -> Self {
self.0.case_insensitive = v;
self
}
pub fn search_in_files(mut self, v: bool) -> Self {
self.0.search_in_files = v;
self
}
pub fn include_patterns(mut self, p: Vec<String>) -> Self {
self.0.include_patterns = p;
self
}
pub fn exclude_dirs(mut self, d: Vec<String>) -> Self {
self.0.exclude_dirs = d;
self
}
pub fn max_results(mut self, n: usize) -> Self {
self.0.max_results = n;
self
}
pub fn build(self) -> SearchOptions {
self.0
}
}
pub fn parse_patterns(raw: &str, case_insensitive: bool) -> Vec<String> {
raw.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.map(|s| {
if case_insensitive {
s.to_lowercase()
} else {
s
}
})
.collect()
}
fn matches_include(name: &str, patterns: &[String], ci: bool) -> bool {
if patterns.is_empty() {
return true;
}
let cmp = if ci {
name.to_lowercase()
} else {
name.to_string()
};
patterns.iter().any(|p| {
Pattern::new(p)
.map(|pat| pat.matches(&cmp))
.unwrap_or(false)
})
}
fn is_excluded_dir(name: &str, excludes: &[String]) -> bool {
excludes
.iter()
.any(|ex| Pattern::new(ex).map(|p| p.matches(name)).unwrap_or(false) || ex == name)
}
fn name_matches(entry_name: &str, pattern: &str, ci: bool) -> bool {
let (name, pat) = if ci {
(entry_name.to_lowercase(), pattern.to_lowercase())
} else {
(entry_name.to_string(), pattern.to_string())
};
if pat.contains('*') || pat.contains('?') {
Pattern::new(&pat)
.map(|p| p.matches(&name))
.unwrap_or(false)
} else {
name.contains(&pat)
}
}
fn search_in_file(
path: &Path,
pattern: &str,
ci: bool,
max_line: usize,
check_bytes: usize,
) -> Vec<LineMatch> {
if is_binary(path, check_bytes) {
return vec![];
}
let file = match fs::File::open(path) {
Ok(f) => f,
Err(_) => return vec![],
};
let pat = if ci {
pattern.to_lowercase()
} else {
pattern.to_string()
};
BufReader::new(file)
.lines()
.enumerate()
.filter_map(|(i, lr)| {
let line = lr.ok()?;
if line.len() > max_line {
return None;
}
let cmp = if ci {
line.to_lowercase()
} else {
line.clone()
};
if cmp.contains(&pat) {
Some((i + 1, line))
} else {
None
}
})
.collect()
}
pub fn fast_find(
opts: &SearchOptions,
interrupted: Arc<AtomicBool>,
) -> FsearchResult<Vec<SearchMatch>> {
validate(opts)?;
let entries: Vec<_> = WalkDir::new(&opts.base_dir)
.max_depth(opts.max_depth as usize + 1)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
if e.file_type().is_dir() && e.depth() > 0 {
let name = e.file_name().to_string_lossy().to_string();
if e.depth() > 0 && is_excluded_dir(&name, &opts.exclude_dirs) {
return false;
}
}
true
})
.filter_map(|e| e.ok())
.filter(|e| e.depth() > 0)
.collect();
let results: Vec<SearchMatch> = entries
.into_par_iter()
.filter_map(|entry| {
if interrupted.load(Ordering::Relaxed) {
return None;
}
let is_dir = entry.file_type().is_dir();
let name = entry.file_name().to_string_lossy().to_string();
let path = entry.path().to_path_buf();
if !is_dir && !matches_include(&name, &opts.include_patterns, opts.case_insensitive) {
return None;
}
if opts.search_in_files {
if is_dir {
return None;
}
let lines = search_in_file(
&path,
&opts.pattern,
opts.case_insensitive,
opts.max_line_length,
opts.binary_check_bytes,
);
if lines.is_empty() {
None
} else {
Some(SearchMatch::Content { path, lines })
}
} else {
if is_dir && !opts.include_dirs {
return None;
}
if name_matches(&name, &opts.pattern, opts.case_insensitive) {
Some(SearchMatch::Path(path))
} else {
None
}
}
})
.collect();
Ok(cap(results, opts.max_results))
}
pub fn recursive_find(
opts: &SearchOptions,
interrupted: Arc<AtomicBool>,
) -> FsearchResult<Vec<SearchMatch>> {
validate(opts)?;
let mut matches = Vec::new();
walk_dir(&opts.base_dir, opts, 0, &mut matches, &interrupted);
Ok(cap(matches, opts.max_results))
}
fn walk_dir(
dir: &Path,
opts: &SearchOptions,
depth: u32,
matches: &mut Vec<SearchMatch>,
interrupted: &AtomicBool,
) {
if depth > opts.max_depth || interrupted.load(Ordering::Relaxed) {
return;
}
let entries = match fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
if interrupted.load(Ordering::Relaxed) {
break;
}
let path = entry.path();
let file_type = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
let name = entry.file_name().to_string_lossy().to_string();
if file_type.is_dir() {
if is_excluded_dir(&name, &opts.exclude_dirs) {
continue;
}
if !opts.search_in_files
&& opts.include_dirs
&& name_matches(&name, &opts.pattern, opts.case_insensitive)
{
matches.push(SearchMatch::Path(path.clone()));
}
walk_dir(&path, opts, depth + 1, matches, interrupted);
} else if file_type.is_file() {
if !matches_include(&name, &opts.include_patterns, opts.case_insensitive) {
continue;
}
if opts.search_in_files {
let lines = search_in_file(
&path,
&opts.pattern,
opts.case_insensitive,
opts.max_line_length,
opts.binary_check_bytes,
);
if !lines.is_empty() {
matches.push(SearchMatch::Content { path, lines });
}
} else if name_matches(&name, &opts.pattern, opts.case_insensitive) {
matches.push(SearchMatch::Path(path));
}
}
}
}
fn validate(opts: &SearchOptions) -> FsearchResult<()> {
if !opts.base_dir.exists() {
return Err(FsearchError::DirectoryNotFound(
opts.base_dir.display().to_string(),
));
}
if !opts.base_dir.is_dir() {
return Err(FsearchError::NotADirectory(
opts.base_dir.display().to_string(),
));
}
Ok(())
}
fn cap(mut v: Vec<SearchMatch>, limit: usize) -> Vec<SearchMatch> {
if limit > 0 && v.len() > limit {
v.truncate(limit);
}
v
}