fn should_skip_file(
path: &Path,
relative_path: &str,
lang_extensions: &Option<Vec<&str>>,
exclude_glob: &Option<globset::GlobSet>,
) -> bool {
if let Some(ref exts) = lang_extensions {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !exts.contains(&ext) {
return true;
}
}
if let Some(ref glob) = exclude_glob {
if glob.is_match(relative_path) {
return true;
}
}
false
}
struct FileMatchAccumulator {
results: Vec<RawSearchResult>,
file_matches: Vec<String>,
file_counts: Vec<FileMatchCount>,
total_results: usize,
}
fn line_matches(line: &str, regex: &Regex, exclude_regex: &Option<Regex>) -> bool {
if !regex.is_match(line) {
return false;
}
if let Some(ref exc) = exclude_regex {
if exc.is_match(line) {
return false;
}
}
true
}
fn build_match_result(
lines: &[&str],
relative_path: &str,
i: usize,
before_ctx: usize,
after_ctx: usize,
) -> RawSearchResult {
let before_start = i.saturating_sub(before_ctx);
let after_end = (i + 1 + after_ctx).min(lines.len());
let context_before: Vec<String> = lines[before_start..i]
.iter()
.map(|s| s.to_string())
.collect();
let context_after: Vec<String> = lines
.get(i + 1..after_end)
.map(|slice| slice.iter().map(|s| s.to_string()).collect())
.unwrap_or_default();
RawSearchResult {
file_path: relative_path.to_string(),
line_number: i + 1,
line_content: lines[i].to_string(),
context_before,
context_after,
}
}
fn collect_files_with_matches(
lines: &[&str],
relative_path: &str,
regex: &Regex,
exclude_regex: &Option<Regex>,
acc: &mut FileMatchAccumulator,
) {
let has_match = lines
.iter()
.any(|line| line_matches(line, regex, exclude_regex));
if has_match {
acc.file_matches.push(relative_path.to_string());
}
}
fn collect_count_matches(
lines: &[&str],
relative_path: &str,
regex: &Regex,
exclude_regex: &Option<Regex>,
acc: &mut FileMatchAccumulator,
) {
let count = lines
.iter()
.filter(|line| line_matches(line, regex, exclude_regex))
.count();
if count > 0 {
acc.file_counts.push(FileMatchCount {
file_path: relative_path.to_string(),
count,
});
}
}
fn collect_line_matches(
lines: &[&str],
relative_path: &str,
regex: &Regex,
exclude_regex: &Option<Regex>,
options: &RawSearchOptions,
acc: &mut FileMatchAccumulator,
) -> bool {
for (i, line) in lines.iter().enumerate() {
if !line_matches(line, regex, exclude_regex) {
continue;
}
if options.limit > 0 && acc.total_results >= options.limit {
return true;
}
acc.results.push(build_match_result(
lines,
relative_path,
i,
options.before_context,
options.after_context,
));
acc.total_results += 1;
}
options.limit > 0 && acc.total_results >= options.limit
}
fn collect_file_matches(
lines: &[&str],
relative_path: &str,
regex: &Regex,
exclude_regex: &Option<Regex>,
options: &RawSearchOptions,
acc: &mut FileMatchAccumulator,
) -> bool {
if options.files_with_matches {
collect_files_with_matches(lines, relative_path, regex, exclude_regex, acc);
return false;
}
if options.count_mode {
collect_count_matches(lines, relative_path, regex, exclude_regex, acc);
return false;
}
collect_line_matches(lines, relative_path, regex, exclude_regex, options, acc)
}
fn build_search_patterns(options: &RawSearchOptions) -> Result<(Regex, Option<Regex>), String> {
let pattern_str = if options.literal {
regex::escape(options.pattern)
} else {
options.pattern.to_string()
};
let regex = if options.case_insensitive {
Regex::new(&format!("(?i){}", pattern_str))
} else {
Regex::new(&pattern_str)
}
.map_err(|e| format!("Invalid regex pattern: {e}"))?;
let exclude_regex = options
.exclude_pattern
.map(|p| Regex::new(&format!("(?i){}", regex::escape(p))))
.transpose()
.map_err(|e| format!("Invalid exclude pattern: {e}"))?;
Ok((regex, exclude_regex))
}
fn build_exclude_glob(options: &RawSearchOptions) -> Option<globset::GlobSet> {
options.exclude_file_pattern.and_then(|g| {
globset::GlobBuilder::new(&format!("**{g}**"))
.case_insensitive(true)
.build()
.ok()
.and_then(|gb| globset::GlobSetBuilder::new().add(gb).build().ok())
})
}
fn walk_and_collect(
project_root: &Path,
regex: &Regex,
exclude_regex: &Option<Regex>,
lang_extensions: &Option<Vec<&str>>,
exclude_glob: &Option<globset::GlobSet>,
options: &RawSearchOptions,
acc: &mut FileMatchAccumulator,
) {
let walker = WalkBuilder::new(project_root)
.hidden(true)
.git_ignore(true)
.git_global(true)
.add_custom_ignore_filename(".pmatignore")
.build();
for entry in walker.filter_map(|e| e.ok()) {
let path = entry.path();
if !path.is_file() || is_search_ignored_dir(path) {
continue;
}
let relative_path = path
.strip_prefix(project_root)
.unwrap_or(path)
.to_string_lossy()
.to_string();
if should_skip_file(path, &relative_path, lang_extensions, exclude_glob) {
continue;
}
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => continue,
};
let lines: Vec<&str> = content.lines().collect();
let limit_reached =
collect_file_matches(&lines, &relative_path, regex, exclude_regex, options, acc);
if limit_reached {
break;
}
}
}
pub fn raw_search(
project_path: &Path,
options: &RawSearchOptions,
) -> Result<RawSearchOutput, String> {
let project_root = project_path
.canonicalize()
.unwrap_or_else(|_| project_path.to_path_buf());
let (regex, exclude_regex) = build_search_patterns(options)?;
let exclude_glob = build_exclude_glob(options);
let lang_extensions = options.language_filter.map(language_to_extensions);
let mut acc = FileMatchAccumulator {
results: Vec::new(),
file_matches: Vec::new(),
file_counts: Vec::new(),
total_results: 0,
};
walk_and_collect(
&project_root,
®ex,
&exclude_regex,
&lang_extensions,
&exclude_glob,
options,
&mut acc,
);
if options.files_with_matches {
Ok(RawSearchOutput::Files(acc.file_matches))
} else if options.count_mode {
acc.file_counts.sort_by(|a, b| b.count.cmp(&a.count));
Ok(RawSearchOutput::Counts(acc.file_counts))
} else {
Ok(RawSearchOutput::Lines(acc.results))
}
}
pub fn is_within_indexed_function(
raw_file: &str,
raw_line: usize,
indexed_results: &[super::types::QueryResult],
) -> bool {
indexed_results
.iter()
.any(|r| r.file_path == raw_file && raw_line >= r.start_line && raw_line <= r.end_line)
}
fn is_search_ignored_dir(path: &Path) -> bool {
path.components().any(|c| {
let s = c.as_os_str().to_str().unwrap_or("");
matches!(
s,
"target"
| "node_modules"
| ".git"
| ".pmat"
| "__pycache__"
| "venv"
| ".venv"
| "dist"
| ".next"
| ".cache"
| "vendor"
| "third_party"
| "third-party"
| ".cargo"
)
})
}
fn language_to_extensions(lang: &str) -> Vec<&'static str> {
match lang.to_lowercase().as_str() {
"rust" | "rs" => vec!["rs"],
"python" | "py" => vec!["py", "pyi"],
"typescript" | "ts" => vec!["ts", "tsx"],
"javascript" | "js" => vec!["js", "jsx", "mjs"],
"go" => vec!["go"],
"java" => vec!["java"],
"c" => vec!["c", "h"],
"cpp" | "c++" | "cxx" => vec!["cpp", "cxx", "cc", "hpp", "hxx", "h"],
"ruby" | "rb" => vec!["rb"],
"toml" => vec!["toml"],
"yaml" | "yml" => vec!["yaml", "yml"],
"json" => vec!["json"],
"markdown" | "md" => vec!["md", "markdown"],
"shell" | "bash" | "sh" => vec!["sh", "bash"],
"makefile" | "make" => vec!["mk", "makefile"],
_ => vec![],
}
}