1use std::cmp::Reverse;
5use std::path::{Path, PathBuf};
6use std::pin::Pin;
7use std::time::{Instant, SystemTime};
8
9use agent_client_protocol_schema::{
10 Content, ContentBlock, TextContent, ToolCallContent, ToolCallLocation, ToolCallUpdateFields,
11 ToolKind,
12};
13use defect_agent::error::BoxError;
14use defect_agent::tool::{
15 SafetyClass, Tool, ToolCallDescription, ToolContext, ToolError, ToolEvent, ToolSchema,
16 ToolStream,
17};
18use defect_config::SearchToolConfig;
19use futures::future::BoxFuture;
20use futures::stream;
21use grep_regex::RegexMatcherBuilder;
22use grep_searcher::{BinaryDetection, SearcherBuilder};
23use ignore::WalkBuilder;
24use serde::{Deserialize, Serialize};
25use serde_json::json;
26use tokio_util::sync::CancellationToken;
27
28mod content;
29mod files;
30mod glob;
31
32#[cfg(test)]
33mod tests;
34
35const TITLE_TRUNC: usize = 80;
36const MAX_MATCH_LINE: usize = 4 * 1024;
37
38pub struct SearchTool {
41 schema: ToolSchema,
42 config: SearchToolConfig,
43}
44
45impl SearchTool {
46 pub fn new() -> Self {
48 Self::from_config(&SearchToolConfig::default())
49 }
50
51 pub fn from_config(config: &SearchToolConfig) -> Self {
54 let default_head_limit = config.default_head_limit.max(1);
55 let max_head_limit = config.max_head_limit.max(default_head_limit);
56 let mut effective = config.clone();
57 effective.default_head_limit = default_head_limit;
58 effective.max_head_limit = max_head_limit;
59
60 let description = format!(
61 "Search the workspace. \
62 In `content` mode (default) runs a regex over file contents and returns \
63 matching lines as `<path> / L<line>: <text>`; \
64 in `files` mode lists workspace files matching a glob pattern. \
65 Respects .gitignore by default; binary files are skipped in content mode. \
66 Results are truncated at `head_limit` (default {default_head_limit}; max {max_head_limit}); \
67 files-mode results are sorted by mtime (newest first)."
68 );
69
70 let schema = ToolSchema {
71 name: "search".to_string(),
72 description,
73 input_schema: json!({
74 "type": "object",
75 "properties": {
76 "mode": {
77 "type": "string",
78 "enum": ["content", "files"],
79 "description": "`content` greps file contents (regex over `pattern`); \
80 `files` lists files matching `pattern` as a glob. \
81 Defaults to `content`."
82 },
83 "pattern": {
84 "type": "string",
85 "description": "**Required.** What to search for. \
86 In `content` mode (default): a Rust regex (RE2 syntax) — e.g. `\"pub struct \"`, `\"TODO|FIXME\"`. \
87 In `files` mode: a glob — e.g. `\"**/*.rs\"`, `\"src/**/foo.{ts,tsx}\"`. \
88 To narrow which files content-mode scans, use `path_glob` (not this field)."
89 },
90 "path": {
91 "type": "string",
92 "description": "Optional sub-path under the workspace root. \
93 Relative paths resolve against the session cwd. \
94 Must resolve inside the workspace."
95 },
96 "path_glob": {
97 "type": "string",
98 "description": "Content mode only. Optional glob restricting **which files** to scan \
99 (e.g. `**/*.rs`). This selects the file set; `pattern` is the regex \
100 applied to their contents. Ignored in `files` mode—use `pattern` directly."
101 },
102 "case_insensitive": {
103 "type": "boolean",
104 "description": "Content mode only. Defaults to false."
105 },
106 "multiline": {
107 "type": "boolean",
108 "description": "Content mode only. Lets `.` and the regex engine span line breaks. \
109 Defaults to false."
110 },
111 "before": {
112 "type": "integer",
113 "minimum": 0,
114 "maximum": 50,
115 "description": "Content mode only. Number of context lines before each match (like grep -B)."
116 },
117 "after": {
118 "type": "integer",
119 "minimum": 0,
120 "maximum": 50,
121 "description": "Content mode only. Number of context lines after each match (like grep -A)."
122 },
123 "head_limit": {
124 "type": "integer",
125 "minimum": 1,
126 "maximum": max_head_limit as i64,
127 "description": format!(
128 "Maximum number of matches (content mode) or files (files mode) to return. \
129 Defaults to {default_head_limit}; clamped at {max_head_limit}."
130 )
131 },
132 "respect_gitignore": {
133 "type": "boolean",
134 "description": "When true (default) honors .gitignore / .ignore / hidden-file rules. \
135 Set to false to search the full tree."
136 }
137 },
138 "required": ["pattern"]
139 }),
140 };
141 Self {
142 schema,
143 config: effective,
144 }
145 }
146}
147
148impl Default for SearchTool {
149 fn default() -> Self {
150 Self::new()
151 }
152}
153
154#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize)]
155#[serde(rename_all = "snake_case")]
156enum SearchMode {
157 #[default]
158 Content,
159 Files,
160}
161
162#[derive(Debug, Deserialize)]
163struct SearchArgs {
164 pattern: String,
165 #[serde(default)]
166 mode: Option<SearchMode>,
167 #[serde(default)]
168 path: Option<String>,
169 #[serde(default, rename = "path_glob")]
170 path_glob: Option<String>,
171 #[serde(default)]
172 case_insensitive: Option<bool>,
173 #[serde(default)]
174 multiline: Option<bool>,
175 #[serde(default)]
176 before: Option<u32>,
177 #[serde(default)]
178 after: Option<u32>,
179 #[serde(default)]
180 head_limit: Option<u32>,
181 #[serde(default)]
182 respect_gitignore: Option<bool>,
183}
184
185#[derive(Debug, Serialize)]
186pub(crate) struct SearchOutput {
187 pub(crate) mode: &'static str,
188 pub(crate) files_scanned: u64,
189 pub(crate) files_matched: u32,
190 pub(crate) matches_total: u32,
191 pub(crate) truncated: bool,
192 pub(crate) elapsed_ms: u64,
193 pub(crate) head_limit: u32,
194}
195
196impl Tool for SearchTool {
197 fn schema(&self) -> &ToolSchema {
198 &self.schema
199 }
200
201 fn safety_hint(&self, _args: &serde_json::Value) -> SafetyClass {
202 SafetyClass::ReadOnly
203 }
204
205 fn describe<'a>(
206 &'a self,
207 args: &'a serde_json::Value,
208 _ctx: ToolContext<'a>,
209 ) -> BoxFuture<'a, ToolCallDescription> {
210 Box::pin(async move {
211 let mode = args
212 .get("mode")
213 .and_then(|v| v.as_str())
214 .unwrap_or("content");
215 let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
216 let path = args.get("path").and_then(|v| v.as_str());
217
218 let title = format_title(mode, pattern, path);
219 let mut fields = ToolCallUpdateFields::default();
220 fields.title = Some(title);
221 fields.kind = Some(ToolKind::Search);
222 if let Some(p) = path {
223 fields.locations = Some(vec![ToolCallLocation::new(PathBuf::from(p))]);
224 }
225 ToolCallDescription { fields }
226 })
227 }
228
229 fn execute(&self, args: serde_json::Value, ctx: ToolContext<'_>) -> ToolStream {
230 let cancel = ctx.cancel.clone();
231 let cwd = ctx.cwd.to_path_buf();
232 let config = self.config.clone();
233 let fut = async move { run_search(args, cwd, cancel, config).await };
234 let s: Pin<Box<dyn futures::Stream<Item = ToolEvent> + Send>> = Box::pin(stream::once(fut));
235 s
236 }
237}
238
239async fn run_search(
240 args: serde_json::Value,
241 cwd: PathBuf,
242 cancel: CancellationToken,
243 config: SearchToolConfig,
244) -> ToolEvent {
245 let parsed: SearchArgs = match serde_json::from_value(args) {
246 Ok(v) => v,
247 Err(err) => return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(err))),
248 };
249
250 if parsed.pattern.is_empty() {
251 return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(std::io::Error::new(
252 std::io::ErrorKind::InvalidInput,
253 "pattern must not be empty",
254 ))));
255 }
256
257 let mode = parsed.mode.unwrap_or_default();
258 let head_limit = parsed
259 .head_limit
260 .unwrap_or(config.default_head_limit)
261 .min(config.max_head_limit)
262 .max(1);
263 let respect_gitignore = parsed
264 .respect_gitignore
265 .unwrap_or(config.respect_gitignore_default);
266
267 let start_dir = match resolve_search_path(&cwd, parsed.path.as_deref()) {
268 Ok(p) => p,
269 Err(e) => return ToolEvent::Failed(e),
270 };
271
272 let cancel_for_task = cancel.clone();
276 let cwd_for_task = cwd.clone();
277 let join = tokio::task::spawn_blocking(move || {
278 run_search_blocking(
279 mode,
280 parsed,
281 start_dir,
282 cwd_for_task,
283 head_limit,
284 respect_gitignore,
285 cancel_for_task,
286 config,
287 )
288 });
289
290 match join.await {
291 Ok(event) => event,
292 Err(err) => ToolEvent::Failed(ToolError::Execution(BoxError::new(err))),
293 }
294}
295
296#[allow(clippy::too_many_arguments)]
297fn run_search_blocking(
298 mode: SearchMode,
299 parsed: SearchArgs,
300 start_dir: PathBuf,
301 cwd: PathBuf,
302 head_limit: u32,
303 respect_gitignore: bool,
304 cancel: CancellationToken,
305 config: SearchToolConfig,
306) -> ToolEvent {
307 let started = Instant::now();
308 match mode {
309 SearchMode::Content => {
310 let matcher_build = RegexMatcherBuilder::new()
311 .case_insensitive(parsed.case_insensitive.unwrap_or(false))
312 .multi_line(parsed.multiline.unwrap_or(false))
313 .build(&parsed.pattern);
314 let matcher = match matcher_build {
315 Ok(m) => m,
316 Err(err) => {
317 return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(
318 std::io::Error::new(
319 std::io::ErrorKind::InvalidInput,
320 format!("invalid regex pattern: {err}"),
321 ),
322 )));
323 }
324 };
325
326 let content_glob = match parsed.path_glob.as_deref() {
327 Some(spec) => match glob::build_globset(spec) {
328 Ok(set) => Some(set),
329 Err(err) => {
330 return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(
331 std::io::Error::new(
332 std::io::ErrorKind::InvalidInput,
333 format!("invalid glob pattern: {err}"),
334 ),
335 )));
336 }
337 },
338 None => None,
339 };
340
341 let walker = build_walker(&start_dir, respect_gitignore, &config);
342 let searcher = SearcherBuilder::new()
343 .binary_detection(BinaryDetection::quit(0))
344 .before_context(parsed.before.unwrap_or(0) as usize)
345 .after_context(parsed.after.unwrap_or(0) as usize)
346 .multi_line(parsed.multiline.unwrap_or(false))
347 .build();
348
349 content::run(
350 walker,
351 searcher,
352 matcher,
353 content_glob,
354 &cwd,
355 head_limit,
356 &cancel,
357 &config,
358 started,
359 )
360 }
361 SearchMode::Files => {
362 let glob_set = match glob::build_globset(&parsed.pattern) {
363 Ok(set) => set,
364 Err(err) => {
365 return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(
366 std::io::Error::new(
367 std::io::ErrorKind::InvalidInput,
368 format!("invalid glob pattern: {err}"),
369 ),
370 )));
371 }
372 };
373 let walker = build_walker(&start_dir, respect_gitignore, &config);
374 files::run(
375 walker, glob_set, &cwd, head_limit, &cancel, &config, started,
376 )
377 }
378 }
379}
380
381fn build_walker(start: &Path, respect_gitignore: bool, config: &SearchToolConfig) -> ignore::Walk {
382 let mut builder = WalkBuilder::new(start);
383 builder
384 .standard_filters(respect_gitignore)
385 .require_git(false)
386 .max_filesize(Some(config.max_file_size_bytes))
387 .threads(1);
388 builder.build()
389}
390
391fn resolve_search_path(cwd: &Path, requested: Option<&str>) -> Result<PathBuf, ToolError> {
392 let target = match requested {
393 None | Some("") => cwd.to_path_buf(),
394 Some(s) => {
395 let p = Path::new(s);
396 if p.is_absolute() {
397 p.to_path_buf()
398 } else {
399 cwd.join(p)
400 }
401 }
402 };
403
404 let canon_target = std::fs::canonicalize(&target).map_err(|e| {
405 ToolError::InvalidArgs(BoxError::new(std::io::Error::new(
406 std::io::ErrorKind::InvalidInput,
407 format!("path {} cannot be resolved: {e}", target.display()),
408 )))
409 })?;
410 let canon_cwd = std::fs::canonicalize(cwd).unwrap_or_else(|_| cwd.to_path_buf());
411
412 if !canon_target.starts_with(&canon_cwd) {
413 return Err(ToolError::InvalidArgs(BoxError::new(std::io::Error::new(
414 std::io::ErrorKind::PermissionDenied,
415 format!(
416 "path {} escapes workspace root {}",
417 canon_target.display(),
418 canon_cwd.display()
419 ),
420 ))));
421 }
422
423 Ok(canon_target)
424}
425
426fn format_title(mode: &str, pattern: &str, path: Option<&str>) -> String {
427 let verb = if mode == "files" { "Find" } else { "Search" };
428 let pat = truncate_for_title(pattern);
429 match path {
430 Some(p) if !p.is_empty() => {
431 let p = truncate_for_title(p);
432 format!("{verb} \"{pat}\" in {p}")
433 }
434 _ => format!("{verb} \"{pat}\""),
435 }
436}
437
438fn truncate_for_title(s: &str) -> String {
439 if s.chars().count() <= TITLE_TRUNC {
440 return s.to_string();
441 }
442 let truncated: String = s.chars().take(TITLE_TRUNC).collect();
443 format!("{truncated}…")
444}
445
446pub(crate) fn display_relative(cwd: &Path, path: &Path) -> String {
449 path.strip_prefix(cwd)
450 .map(|p| p.to_string_lossy().into_owned())
451 .unwrap_or_else(|_| path.to_string_lossy().into_owned())
452}
453
454pub(crate) fn truncate_match_line(line: &str) -> String {
455 if line.len() <= MAX_MATCH_LINE {
456 return line.to_string();
457 }
458 let mut end = MAX_MATCH_LINE;
459 while !line.is_char_boundary(end) && end > 0 {
460 end -= 1;
461 }
462 let mut out = String::with_capacity(end + 1);
463 out.push_str(line.get(..end).unwrap_or(""));
464 out.push('…');
465 out
466}
467
468pub(crate) fn elapsed_ms(started: Instant) -> u64 {
469 let m = started.elapsed().as_millis();
470 if m > u64::MAX as u128 {
471 u64::MAX
472 } else {
473 m as u64
474 }
475}
476
477pub(crate) fn make_completed(text: String, output: SearchOutput) -> ToolEvent {
478 let raw_output = serde_json::to_value(&output).unwrap_or(serde_json::Value::Null);
479 let mut fields = ToolCallUpdateFields::default();
480 fields.content = Some(vec![ToolCallContent::Content(Content::new(
481 ContentBlock::Text(TextContent::new(text)),
482 ))]);
483 fields.raw_output = Some(raw_output);
484 ToolEvent::Completed(fields)
485}
486
487pub(crate) fn sort_by_mtime_desc(hits: &mut [(PathBuf, Option<SystemTime>)]) {
488 hits.sort_by_key(|(_, mtime)| Reverse(*mtime));
489}