use std::collections::BTreeMap;
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::process::{Command, Stdio};
use crate::cli::{Args, SearchMode};
const MAX_SCAN_MATCHES: usize = 50_000;
const MAX_STORED_MATCHES: usize = 5_000;
const EXCLUDES: &[&str] = &[
"!.git/**",
"!target/**",
"!node_modules/**",
"!vendor/**",
"!dist/**",
"!build/**",
"!coverage/**",
"!scratch/**",
"!tmp/**",
"!generated/**",
"!*.log",
"!*.jsonl",
"!*.xml",
"!*.min.js",
"!*.map",
];
#[derive(Clone, Debug)]
pub(crate) struct Match {
pub(crate) path: String,
pub(crate) line: usize,
pub(crate) column: usize,
pub(crate) text: String,
}
#[derive(Debug)]
pub(crate) struct SearchResult {
pub(crate) matches: Vec<Match>,
pub(crate) counts: BTreeMap<String, usize>,
pub(crate) scanned_matches: usize,
pub(crate) scan_limited: bool,
}
fn rg_command(args: &Args, query: &str, path: &Path) -> Command {
let mut command = Command::new("rg");
command.args([
"--null",
"--line-number",
"--column",
"--no-heading",
"--with-filename",
"--color",
"never",
"--no-messages",
"--sort",
"path",
"--max-columns",
"300",
"--max-columns-preview",
]);
let effective_query = (args.mode == SearchMode::Identifier).then(|| identifier_pattern(query));
match args.mode {
SearchMode::Fixed => {
command.arg("--fixed-strings");
}
SearchMode::Identifier => {}
SearchMode::Word => {
command.args(["--fixed-strings", "--word-regexp"]);
}
SearchMode::Regex => {}
}
for glob in EXCLUDES {
command.args(["--glob", glob]);
}
command
.arg("--")
.arg(effective_query.as_deref().unwrap_or(query))
.arg(path);
command.stdout(Stdio::piped()).stderr(Stdio::piped());
command
}
fn identifier_pattern(value: &str) -> String {
format!("(^|[^A-Za-z0-9_]){}([^A-Za-z0-9_]|$)", regex_escape(value))
}
fn regex_escape(value: &str) -> String {
let mut escaped = String::with_capacity(value.len());
for ch in value.chars() {
if matches!(
ch,
'\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$'
) {
escaped.push('\\');
}
escaped.push(ch);
}
escaped
}
pub(crate) fn search(args: &Args, query: &str, path: &Path) -> Result<SearchResult, String> {
let mut child = rg_command(args, query, path)
.spawn()
.map_err(|error| format!("failed to start ripgrep: {error}"))?;
let stdout = child
.stdout
.take()
.ok_or_else(|| "failed to read ripgrep output".to_string())?;
let mut reader = BufReader::new(stdout);
let mut raw = Vec::new();
let mut matches = Vec::new();
let mut counts = BTreeMap::new();
let mut scanned_matches = 0;
let mut scan_limited = false;
loop {
raw.clear();
let read = reader
.read_until(b'\n', &mut raw)
.map_err(|error| format!("failed to read ripgrep output: {error}"))?;
if read == 0 {
break;
}
if let Some(item) = parse_match(&raw) {
scanned_matches += 1;
*counts.entry(item.path.clone()).or_insert(0) += 1;
if matches.len() < MAX_STORED_MATCHES {
matches.push(item);
}
if scanned_matches >= MAX_SCAN_MATCHES {
scan_limited = true;
let _ = child.kill();
break;
}
}
}
let status = child
.wait()
.map_err(|error| format!("failed to wait for ripgrep: {error}"))?;
if !scan_limited && !matches!(status.code(), Some(0 | 1)) {
let stderr = child
.stderr
.take()
.map(|stream| {
let mut text = String::new();
let _ = BufReader::new(stream).read_line(&mut text);
text.trim().to_string()
})
.unwrap_or_default();
return Err(if stderr.is_empty() {
format!("ripgrep exited with {status}")
} else {
stderr
});
}
Ok(SearchResult {
matches,
counts,
scanned_matches,
scan_limited,
})
}
fn parse_match(raw: &[u8]) -> Option<Match> {
let nul = raw.iter().position(|byte| *byte == 0)?;
let path = String::from_utf8_lossy(&raw[..nul]).to_string();
let rest = String::from_utf8_lossy(&raw[nul + 1..]);
let mut fields = rest.trim_end_matches(['\r', '\n']).splitn(3, ':');
let line = fields.next()?.parse().ok()?;
let column = fields.next()?.parse().ok()?;
let text = fields.next()?.to_string();
Some(Match {
path,
line,
column,
text,
})
}