use std::path::{Path, PathBuf};
use async_trait::async_trait;
use regex::Regex;
use serde_json::{json, Value};
use crate::traits::{
Tool, ToolCallSemantics, ToolCapabilities, ToolRole, ToolTargetHintKind, ToolVerificationMode,
};
use super::fs_utils;
pub struct SearchFilesTool;
const MAX_RESULTS: usize = 200;
const DEFAULT_MAX_RESULTS: usize = 50;
const MAX_FILES_SCANNED: usize = 10_000;
const MAX_DEPTH: usize = 20;
const MAX_CONTENT_SEARCH_FILE_SIZE: u64 = 1024 * 1024;
#[async_trait]
impl Tool for SearchFilesTool {
fn name(&self) -> &str {
"search_files"
}
fn description(&self) -> &str {
"Search for files by name pattern or content regex"
}
fn schema(&self) -> Value {
json!({
"name": "search_files",
"description": "Search for files by name pattern (glob) and/or content (regex). Use this instead of terminal find/grep. Automatically skips .git, node_modules, target, etc.",
"parameters": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "Regex pattern to search file contents for"
},
"glob": {
"type": "string",
"description": "Filename glob pattern (e.g., '*.rs', '*.ts', 'Cargo.*')"
},
"path": {
"type": "string",
"description": "Directory to search in (default: current directory)"
},
"max_results": {
"type": "integer",
"description": "Maximum results to return (default: 50, max: 200)"
}
},
"additionalProperties": false
}
})
}
fn tool_role(&self) -> ToolRole {
ToolRole::Action
}
fn capabilities(&self) -> ToolCapabilities {
ToolCapabilities {
read_only: true,
external_side_effect: false,
needs_approval: false,
idempotent: true,
high_impact_write: false,
}
}
fn call_semantics(&self, arguments: &str) -> ToolCallSemantics {
let path = serde_json::from_str::<Value>(arguments)
.ok()
.and_then(|args| {
args.get("path")
.and_then(|value| value.as_str())
.map(str::to_string)
})
.unwrap_or_default();
ToolCallSemantics::observation()
.with_verification_mode(ToolVerificationMode::ResultContent)
.with_target_hint(ToolTargetHintKind::Path, path)
}
async fn call(&self, arguments: &str) -> anyhow::Result<String> {
let args: Value = serde_json::from_str(arguments)?;
let content_pattern = args["pattern"].as_str();
let glob_pattern = args["glob"].as_str();
let path_value = args.get("path").and_then(|v| v.as_str());
let path_str = path_value.unwrap_or(".");
let used_default_path = path_value.is_none();
let max_results = args["max_results"]
.as_u64()
.map(|n| (n as usize).min(MAX_RESULTS))
.unwrap_or(DEFAULT_MAX_RESULTS);
if content_pattern.is_none() && glob_pattern.is_none() {
anyhow::bail!("At least one of 'pattern' (content regex) or 'glob' (filename pattern) is required");
}
let search_dir = fs_utils::validate_path(path_str)?;
if !search_dir.exists() {
anyhow::bail!("Directory not found: {}", search_dir.display());
}
if !search_dir.is_dir() {
anyhow::bail!("Not a directory: {}", search_dir.display());
}
let content_regex = if let Some(pat) = content_pattern {
Some(Regex::new(pat).map_err(|e| anyhow::anyhow!("Invalid regex '{}': {}", pat, e))?)
} else {
None
};
let glob_regex = if let Some(g) = glob_pattern {
Some(glob_to_regex(g)?)
} else {
None
};
let mut results = Vec::new();
let mut stats = SearchStats::default();
walk_dir(
&search_dir,
&content_regex,
&glob_regex,
max_results,
0,
&mut results,
&mut stats,
)
.await;
let default_path_note = used_default_path.then(|| {
format!(
"Note: no 'path' was provided, so search_files defaulted to current directory: {}",
search_dir.display()
)
});
if results.is_empty() {
let mut output = format!(
"No matches found ({} files scanned in {})",
stats.files_scanned,
search_dir.display()
);
if let Some(note) = &default_path_note {
output.push('\n');
output.push_str(note);
}
if stats.oversized_files_skipped > 0 {
output.push('\n');
output.push_str(&oversized_note(stats.oversized_files_skipped));
}
return Ok(output);
}
let mut output = String::new();
if let Some(note) = &default_path_note {
output.push_str(note);
output.push_str("\n\n");
}
output.push_str(&format!(
"Found {} match{} ({} files scanned in {}):\n\n",
results.len(),
if results.len() == 1 { "" } else { "es" },
stats.files_scanned,
search_dir.display()
));
if stats.oversized_files_skipped > 0 {
output.push_str(&oversized_note(stats.oversized_files_skipped));
output.push_str("\n\n");
}
for result in &results {
output.push_str(&result.format());
output.push('\n');
}
if results.len() >= max_results {
output.push_str(&format!(
"\n(Results capped at {}. Use a more specific pattern or glob to narrow results.)",
max_results
));
}
Ok(output)
}
}
fn oversized_note(count: usize) -> String {
format!(
"Skipped {} oversized file{} larger than {} bytes during content search.",
count,
if count == 1 { "" } else { "s" },
MAX_CONTENT_SEARCH_FILE_SIZE
)
}
struct SearchResult {
path: PathBuf,
matches: Vec<(usize, String)>, }
#[derive(Default)]
struct SearchStats {
files_scanned: usize,
oversized_files_skipped: usize,
}
impl SearchResult {
fn format(&self) -> String {
let path_str = self.path.display();
if self.matches.is_empty() {
format!("{}", path_str)
} else {
let mut s = format!("{}:", path_str);
for (line_num, line) in &self.matches {
let truncated = crate::utils::truncate_str(line, 203);
s.push_str(&format!("\n {:>4}: {}", line_num, truncated));
}
s
}
}
}
fn glob_to_regex(glob: &str) -> anyhow::Result<Regex> {
let mut regex = String::from("^");
for c in glob.chars() {
match c {
'*' => regex.push_str(".*"),
'?' => regex.push('.'),
'.' => regex.push_str("\\."),
'[' => regex.push('['),
']' => regex.push(']'),
'{' => regex.push('('),
'}' => regex.push(')'),
',' => regex.push('|'),
c => regex.push(c),
}
}
regex.push('$');
Regex::new(®ex).map_err(|e| anyhow::anyhow!("Invalid glob pattern '{}': {}", glob, e))
}
fn walk_dir<'a>(
dir: &'a Path,
content_regex: &'a Option<Regex>,
glob_regex: &'a Option<Regex>,
max_results: usize,
depth: usize,
results: &'a mut Vec<SearchResult>,
stats: &'a mut SearchStats,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send + 'a>> {
Box::pin(async move {
if depth > MAX_DEPTH
|| results.len() >= max_results
|| stats.files_scanned >= MAX_FILES_SCANNED
{
return;
}
let mut entries = match tokio::fs::read_dir(dir).await {
Ok(e) => e,
Err(_) => return,
};
let mut subdirs = Vec::new();
while let Ok(Some(entry)) = entries.next_entry().await {
if results.len() >= max_results || stats.files_scanned >= MAX_FILES_SCANNED {
break;
}
let path = entry.path();
let file_name = entry.file_name().to_string_lossy().to_string();
if let Ok(file_type) = entry.file_type().await {
if file_type.is_dir() {
if !fs_utils::should_skip_dir(&file_name) && !file_name.starts_with('.') {
subdirs.push(path);
}
continue;
}
if !file_type.is_file() {
continue;
}
} else {
continue;
}
if let Some(ref glob_re) = glob_regex {
if !glob_re.is_match(&file_name) {
continue;
}
}
stats.files_scanned += 1;
if let Some(ref content_re) = content_regex {
if let Ok(metadata) = entry.metadata().await {
if metadata.len() > MAX_CONTENT_SEARCH_FILE_SIZE {
stats.oversized_files_skipped += 1;
continue;
}
}
if let Ok(content) = tokio::fs::read_to_string(&path).await {
let mut matches = Vec::new();
for (i, line) in content.lines().enumerate() {
if content_re.is_match(line) {
matches.push((i + 1, line.to_string()));
if matches.len() >= 5 {
break; }
}
}
if !matches.is_empty() {
results.push(SearchResult {
path: path.clone(),
matches,
});
}
}
} else {
results.push(SearchResult {
path: path.clone(),
matches: vec![],
});
}
}
for subdir in subdirs {
walk_dir(
&subdir,
content_regex,
glob_regex,
max_results,
depth + 1,
results,
stats,
)
.await;
}
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_schema_has_required_fields() {
let tool = SearchFilesTool;
let schema = tool.schema();
assert_eq!(schema["name"], "search_files");
assert!(!schema["description"].as_str().unwrap().is_empty());
}
#[test]
fn test_glob_to_regex() {
let re = glob_to_regex("*.rs").unwrap();
assert!(re.is_match("main.rs"));
assert!(!re.is_match("main.py"));
let re = glob_to_regex("Cargo.*").unwrap();
assert!(re.is_match("Cargo.toml"));
assert!(re.is_match("Cargo.lock"));
}
#[tokio::test]
async fn test_search_by_glob() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("test.rs"), "fn main() {}").unwrap();
std::fs::write(dir.path().join("test.py"), "def main(): pass").unwrap();
let args = json!({
"glob": "*.rs",
"path": dir.path().to_str().unwrap()
})
.to_string();
let result = SearchFilesTool.call(&args).await.unwrap();
assert!(result.contains("test.rs"));
assert!(!result.contains("test.py"));
}
#[tokio::test]
async fn test_search_by_content() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("a.txt"), "hello world\nfoo bar\n").unwrap();
std::fs::write(dir.path().join("b.txt"), "goodbye world\n").unwrap();
let args = json!({
"pattern": "hello",
"path": dir.path().to_str().unwrap()
})
.to_string();
let result = SearchFilesTool.call(&args).await.unwrap();
assert!(result.contains("a.txt"));
assert!(result.contains("hello world"));
assert!(!result.contains("b.txt"));
}
#[tokio::test]
async fn test_content_search_skips_oversized_files() {
let dir = tempfile::tempdir().unwrap();
let mut large = vec![b'a'; 1_048_577];
large.extend_from_slice(b"\nneedle_in_large_file\n");
std::fs::write(dir.path().join("large.log"), large).unwrap();
let args = json!({
"pattern": "needle_in_large_file",
"path": dir.path().to_str().unwrap()
})
.to_string();
let result = SearchFilesTool.call(&args).await.unwrap();
assert!(!result.contains("needle_in_large_file"));
assert!(result.contains("Skipped 1 oversized file"));
}
#[tokio::test]
async fn test_search_no_results() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("a.txt"), "hello\n").unwrap();
let args = json!({
"pattern": "nonexistent_pattern_xyz",
"path": dir.path().to_str().unwrap()
})
.to_string();
let result = SearchFilesTool.call(&args).await.unwrap();
assert!(result.contains("No matches"));
assert!(result.contains(dir.path().to_str().unwrap()));
}
#[tokio::test]
async fn test_search_requires_pattern_or_glob() {
let args = json!({"path": "/tmp"}).to_string();
let result = SearchFilesTool.call(&args).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_search_skips_ignored_dirs() {
let dir = tempfile::tempdir().unwrap();
let node_modules = dir.path().join("node_modules");
std::fs::create_dir(&node_modules).unwrap();
std::fs::write(node_modules.join("hidden.js"), "should not find").unwrap();
std::fs::write(dir.path().join("visible.js"), "should find").unwrap();
let args = json!({
"glob": "*.js",
"path": dir.path().to_str().unwrap()
})
.to_string();
let result = SearchFilesTool.call(&args).await.unwrap();
assert!(result.contains("visible.js"));
assert!(!result.contains("hidden.js"));
}
#[tokio::test]
async fn test_search_warns_when_path_omitted() {
let args = json!({
"glob": "*",
"max_results": 1
})
.to_string();
let result = SearchFilesTool.call(&args).await.unwrap();
assert!(result.contains("defaulted to current directory"));
}
}