use crate::types::GrepOutput;
use crate::types::OutputMode;
use crate::walker::BUILTIN_IGNORES;
use crate::walker::{self};
use agentic_tools_core::ToolError;
use globset::Glob;
use globset::GlobSet;
use globset::GlobSetBuilder;
use ignore::WalkBuilder;
use regex::Regex;
use std::collections::HashSet;
use std::fs::File;
use std::io::BufRead;
use std::io::BufReader;
use std::io::Read;
use std::path::Path;
#[derive(Debug)]
pub struct GrepConfig {
pub root: String,
pub pattern: String,
pub mode: OutputMode,
pub include_globs: Vec<String>,
pub ignore_globs: Vec<String>,
pub include_hidden: bool,
pub case_insensitive: bool,
pub multiline: bool,
pub line_numbers: bool,
pub context: Option<u32>,
pub context_before: Option<u32>,
pub context_after: Option<u32>,
pub include_binary: bool,
pub head_limit: usize,
pub offset: usize,
}
const MAX_HEAD_LIMIT: usize = 1000;
const BINARY_CHECK_SIZE: usize = 8192;
fn is_binary_file(path: &Path) -> std::io::Result<bool> {
let mut file = File::open(path)?;
let mut buffer = vec![0u8; BINARY_CHECK_SIZE];
let bytes_read = file.read(&mut buffer)?;
Ok(buffer[..bytes_read].contains(&0))
}
fn build_include_globset(patterns: &[String]) -> Result<Option<GlobSet>, ToolError> {
if patterns.is_empty() {
return Ok(None);
}
let mut builder = GlobSetBuilder::new();
for p in patterns {
let g = Glob::new(p)
.map_err(|e| ToolError::invalid_input(format!("Invalid include glob '{p}': {e}")))?;
builder.add(g);
}
let gs = builder
.build()
.map_err(|e| ToolError::internal(format!("Failed to build include globset: {e}")))?;
Ok(Some(gs))
}
#[derive(Debug)]
struct FileMatch {
rel_path: String,
lines: Vec<(usize, String)>,
match_count: usize,
}
fn search_file_lines(
path: &Path,
rel_path: &str,
regex: &Regex,
cfg: &GrepConfig,
) -> std::io::Result<Option<FileMatch>> {
let file = File::open(path)?;
let reader = BufReader::new(file);
let mut matched_lines: Vec<(usize, String)> = Vec::new();
let mut match_count = 0;
let ctx_before = cfg.context_before.or(cfg.context).unwrap_or(0) as usize;
let ctx_after = cfg.context_after.or(cfg.context).unwrap_or(0) as usize;
let mut before_buffer: Vec<(usize, String)> = Vec::with_capacity(ctx_before);
let mut after_countdown: usize = 0;
let mut last_matched_line: usize = 0;
for (idx, line_result) in reader.lines().enumerate() {
let line = line_result?;
let line_num = idx + 1;
if regex.is_match(&line) {
match_count += regex.find_iter(&line).count();
#[expect(
clippy::iter_with_drain,
reason = "drain() clears buffer in-place; into_iter() would require reassignment"
)]
for (ln, content) in before_buffer.drain(..) {
if matched_lines.is_empty() || ln > last_matched_line {
matched_lines.push((ln, content));
}
}
matched_lines.push((line_num, line.clone()));
last_matched_line = line_num;
after_countdown = ctx_after;
} else if after_countdown > 0 {
matched_lines.push((line_num, line.clone()));
last_matched_line = line_num;
after_countdown -= 1;
} else if ctx_before > 0 {
if before_buffer.len() >= ctx_before {
before_buffer.remove(0);
}
before_buffer.push((line_num, line));
}
}
if match_count == 0 {
return Ok(None);
}
Ok(Some(FileMatch {
rel_path: rel_path.to_string(),
lines: matched_lines,
match_count,
}))
}
fn search_file_multiline(
path: &Path,
rel_path: &str,
regex: &Regex,
) -> std::io::Result<Option<FileMatch>> {
let mut file = File::open(path)?;
let mut content = String::new();
file.read_to_string(&mut content)?;
let matches: Vec<_> = regex.find_iter(&content).collect();
if matches.is_empty() {
return Ok(None);
}
let match_count = matches.len();
let mut matched_lines: Vec<(usize, String)> = Vec::new();
for m in &matches {
let start = m.start();
let line_num = content[..start].matches('\n').count() + 1;
let matched_text = m.as_str().replace('\n', "\\n");
matched_lines.push((line_num, matched_text));
}
Ok(Some(FileMatch {
rel_path: rel_path.to_string(),
lines: matched_lines,
match_count,
}))
}
pub fn run(cfg: GrepConfig) -> Result<GrepOutput, ToolError> {
let root_path = Path::new(&cfg.root);
if !root_path.exists() {
return Err(ToolError::invalid_input(format!(
"Path does not exist: {}",
cfg.root
)));
}
let mut rb = regex::RegexBuilder::new(&cfg.pattern);
rb.case_insensitive(cfg.case_insensitive);
if cfg.multiline {
rb.multi_line(true).dot_matches_new_line(true);
}
let regex = rb
.build()
.map_err(|e| ToolError::invalid_input(format!("Invalid regex: {e}")))?;
let include_gs = build_include_globset(&cfg.include_globs)?;
let ignore_gs = walker::build_ignore_globset(&cfg.ignore_globs)?;
let head_limit = cfg.head_limit.min(MAX_HEAD_LIMIT);
let mut warnings: Vec<String> = Vec::new();
let mut all_matches: Vec<FileMatch> = Vec::new();
let mut binary_skipped = 0usize;
if root_path.is_file() {
let rel_path = root_path
.file_name()
.map_or_else(|| cfg.root.clone(), |s| s.to_string_lossy().to_string());
if cfg.include_binary {
let result = if cfg.multiline {
search_file_multiline(root_path, &rel_path, ®ex)
} else {
search_file_lines(root_path, &rel_path, ®ex, &cfg)
};
match result {
Ok(Some(m)) => all_matches.push(m),
Ok(None) => {}
Err(e) => warnings.push(format!("Could not read {rel_path}: {e}")),
}
} else {
match is_binary_file(root_path) {
Ok(true) => {
binary_skipped = 1;
}
Ok(false) => {
let result = if cfg.multiline {
search_file_multiline(root_path, &rel_path, ®ex)
} else {
search_file_lines(root_path, &rel_path, ®ex, &cfg)
};
if let Ok(Some(m)) = result {
all_matches.push(m);
}
}
Err(e) => {
warnings.push(format!("Could not read {rel_path}: {e}"));
}
}
}
} else {
let mut builder = WalkBuilder::new(root_path);
builder.hidden(!cfg.include_hidden);
builder.git_ignore(true);
builder.git_global(true);
builder.git_exclude(true);
builder.parents(false);
builder.follow_links(false);
let root_clone = root_path.to_path_buf();
let gs_clone = ignore_gs.clone();
builder.filter_entry(move |entry| {
let rel = entry
.path()
.strip_prefix(&root_clone)
.map(|p| p.to_string_lossy().replace('\\', "/"))
.unwrap_or_default();
if rel.is_empty() {
return true;
}
!gs_clone.is_match(&rel)
});
for result in builder.build() {
match result {
Ok(entry) => {
let path = entry.path();
if path.is_dir() {
continue;
}
let rel_path = path.strip_prefix(root_path).map_or_else(
|_| path.to_string_lossy().to_string(),
|p| p.to_string_lossy().replace('\\', "/"),
);
if ignore_gs.is_match(&rel_path) {
continue;
}
let matches_builtin = BUILTIN_IGNORES.iter().any(|pattern| {
if let Ok(g) = Glob::new(pattern) {
g.compile_matcher().is_match(&rel_path)
} else {
false
}
});
if matches_builtin {
continue;
}
if let Some(ref inc_gs) = include_gs
&& !inc_gs.is_match(&rel_path)
{
continue;
}
if !cfg.include_binary {
match is_binary_file(path) {
Ok(true) => {
binary_skipped += 1;
continue;
}
Ok(false) => {}
Err(_) => continue,
}
}
let search_result = if cfg.multiline {
search_file_multiline(path, &rel_path, ®ex)
} else {
search_file_lines(path, &rel_path, ®ex, &cfg)
};
match search_result {
Ok(Some(m)) => all_matches.push(m),
Ok(None) => {}
Err(e) => {
warnings.push(format!("Could not read {rel_path}: {e}"));
}
}
}
Err(e) => {
warnings.push(format!("Walk error: {e}"));
}
}
}
}
if binary_skipped > 0 {
warnings.push(format!(
"{} binary file{} skipped (use include_binary=true to search)",
binary_skipped,
if binary_skipped == 1 { "" } else { "s" }
));
}
let (lines, summary, total_count) = match cfg.mode {
OutputMode::Files => {
let mut seen: HashSet<String> = HashSet::new();
let mut file_paths: Vec<String> = Vec::new();
for m in &all_matches {
if seen.insert(m.rel_path.clone()) {
file_paths.push(m.rel_path.clone());
}
}
let total = file_paths.len();
(file_paths, None, total)
}
OutputMode::Content => {
let mut output_lines: Vec<String> = Vec::new();
for m in &all_matches {
for (line_num, content) in &m.lines {
if cfg.line_numbers {
output_lines.push(format!("{}:{}: {}", m.rel_path, line_num, content));
} else {
output_lines.push(format!("{}: {}", m.rel_path, content));
}
}
}
let total = output_lines.len();
(output_lines, None, total)
}
OutputMode::Count => {
let total: usize = all_matches.iter().map(|m| m.match_count).sum();
let summary = format!("Total matches: {total}");
(vec![], Some(summary), total)
}
};
let offset = cfg.offset;
let paginated: Vec<String> = lines.into_iter().skip(offset).take(head_limit).collect();
let has_more = total_count > offset + paginated.len();
Ok(GrepOutput {
root: cfg.root,
mode: cfg.mode,
lines: paginated,
has_more,
warnings,
summary,
})
}