Skip to main content

defect_tools/
search.rs

1//! Built-in `search` tool: grep file contents (content mode) or list files matching a
2//! glob (files mode) within the workspace.
3
4use std::cmp::Reverse;
5use std::path::{Path, PathBuf};
6use std::pin::Pin;
7use std::time::{Instant, SystemTime};
8
9use agent_client_protocol_schema::{
10    Content, ContentBlock, TextContent, ToolCallContent, ToolCallLocation, ToolCallUpdateFields,
11    ToolKind,
12};
13use defect_agent::error::BoxError;
14use defect_agent::tool::{
15    SafetyClass, Tool, ToolCallDescription, ToolContext, ToolError, ToolEvent, ToolSchema,
16    ToolStream,
17};
18use defect_config::SearchToolConfig;
19use futures::future::BoxFuture;
20use futures::stream;
21use grep_regex::RegexMatcherBuilder;
22use grep_searcher::{BinaryDetection, SearcherBuilder};
23use ignore::WalkBuilder;
24use serde::{Deserialize, Serialize};
25use serde_json::json;
26use tokio_util::sync::CancellationToken;
27
28mod content;
29mod files;
30mod glob;
31
32#[cfg(test)]
33mod tests;
34
35const TITLE_TRUNC: usize = 80;
36const MAX_MATCH_LINE: usize = 4 * 1024;
37
38/// Built-in implementation of the `search` tool. No runtime state — the parameterized
39/// schema and limits are fixed at construction time.
40pub struct SearchTool {
41    schema: ToolSchema,
42    config: SearchToolConfig,
43}
44
45impl SearchTool {
46    /// Constructs using [`SearchToolConfig::default`].
47    pub fn new() -> Self {
48        Self::from_config(&SearchToolConfig::default())
49    }
50
51    /// Constructs from a [`SearchToolConfig`]. The `max_head_limit` is reflected in the
52    /// schema's `head_limit` upper bound.
53    pub fn from_config(config: &SearchToolConfig) -> Self {
54        let default_head_limit = config.default_head_limit.max(1);
55        let max_head_limit = config.max_head_limit.max(default_head_limit);
56        let mut effective = config.clone();
57        effective.default_head_limit = default_head_limit;
58        effective.max_head_limit = max_head_limit;
59
60        let description = format!(
61            "Search the workspace. \
62             In `content` mode (default) runs a regex over file contents and returns \
63             matching lines as `<path> / L<line>: <text>`; \
64             in `files` mode lists workspace files matching a glob pattern. \
65             Respects .gitignore by default; binary files are skipped in content mode. \
66             Results are truncated at `head_limit` (default {default_head_limit}; max {max_head_limit}); \
67             files-mode results are sorted by mtime (newest first)."
68        );
69
70        let schema = ToolSchema {
71            name: "search".to_string(),
72            description,
73            input_schema: json!({
74                "type": "object",
75                "properties": {
76                    "mode": {
77                        "type": "string",
78                        "enum": ["content", "files"],
79                        "description": "`content` greps file contents (regex over `pattern`); \
80                                        `files` lists files matching `pattern` as a glob. \
81                                        Defaults to `content`."
82                    },
83                    "pattern": {
84                        "type": "string",
85                        "description": "**Required.** What to search for. \
86                                        In `content` mode (default): a Rust regex (RE2 syntax) — e.g. `\"pub struct \"`, `\"TODO|FIXME\"`. \
87                                        In `files` mode: a glob — e.g. `\"**/*.rs\"`, `\"src/**/foo.{ts,tsx}\"`. \
88                                        To narrow which files content-mode scans, use `path_glob` (not this field)."
89                    },
90                    "path": {
91                        "type": "string",
92                        "description": "Optional sub-path under the workspace root. \
93                                        Relative paths resolve against the session cwd. \
94                                        Must resolve inside the workspace."
95                    },
96                    "path_glob": {
97                        "type": "string",
98                        "description": "Content mode only. Optional glob restricting **which files** to scan \
99                                        (e.g. `**/*.rs`). This selects the file set; `pattern` is the regex \
100                                        applied to their contents. Ignored in `files` mode—use `pattern` directly."
101                    },
102                    "case_insensitive": {
103                        "type": "boolean",
104                        "description": "Content mode only. Defaults to false."
105                    },
106                    "multiline": {
107                        "type": "boolean",
108                        "description": "Content mode only. Lets `.` and the regex engine span line breaks. \
109                                        Defaults to false."
110                    },
111                    "before": {
112                        "type": "integer",
113                        "minimum": 0,
114                        "maximum": 50,
115                        "description": "Content mode only. Number of context lines before each match (like grep -B)."
116                    },
117                    "after": {
118                        "type": "integer",
119                        "minimum": 0,
120                        "maximum": 50,
121                        "description": "Content mode only. Number of context lines after each match (like grep -A)."
122                    },
123                    "head_limit": {
124                        "type": "integer",
125                        "minimum": 1,
126                        "maximum": max_head_limit as i64,
127                        "description": format!(
128                            "Maximum number of matches (content mode) or files (files mode) to return. \
129                             Defaults to {default_head_limit}; clamped at {max_head_limit}."
130                        )
131                    },
132                    "respect_gitignore": {
133                        "type": "boolean",
134                        "description": "When true (default) honors .gitignore / .ignore / hidden-file rules. \
135                                        Set to false to search the full tree."
136                    }
137                },
138                "required": ["pattern"]
139            }),
140        };
141        Self {
142            schema,
143            config: effective,
144        }
145    }
146}
147
148impl Default for SearchTool {
149    fn default() -> Self {
150        Self::new()
151    }
152}
153
154#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize)]
155#[serde(rename_all = "snake_case")]
156enum SearchMode {
157    #[default]
158    Content,
159    Files,
160}
161
162#[derive(Debug, Deserialize)]
163struct SearchArgs {
164    pattern: String,
165    #[serde(default)]
166    mode: Option<SearchMode>,
167    #[serde(default)]
168    path: Option<String>,
169    #[serde(default, rename = "path_glob")]
170    path_glob: Option<String>,
171    #[serde(default)]
172    case_insensitive: Option<bool>,
173    #[serde(default)]
174    multiline: Option<bool>,
175    #[serde(default)]
176    before: Option<u32>,
177    #[serde(default)]
178    after: Option<u32>,
179    #[serde(default)]
180    head_limit: Option<u32>,
181    #[serde(default)]
182    respect_gitignore: Option<bool>,
183}
184
185#[derive(Debug, Serialize)]
186pub(crate) struct SearchOutput {
187    pub(crate) mode: &'static str,
188    pub(crate) files_scanned: u64,
189    pub(crate) files_matched: u32,
190    pub(crate) matches_total: u32,
191    pub(crate) truncated: bool,
192    pub(crate) elapsed_ms: u64,
193    pub(crate) head_limit: u32,
194}
195
196impl Tool for SearchTool {
197    fn schema(&self) -> &ToolSchema {
198        &self.schema
199    }
200
201    fn safety_hint(&self, _args: &serde_json::Value) -> SafetyClass {
202        SafetyClass::ReadOnly
203    }
204
205    fn describe<'a>(
206        &'a self,
207        args: &'a serde_json::Value,
208        _ctx: ToolContext<'a>,
209    ) -> BoxFuture<'a, ToolCallDescription> {
210        Box::pin(async move {
211            let mode = args
212                .get("mode")
213                .and_then(|v| v.as_str())
214                .unwrap_or("content");
215            let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
216            let path = args.get("path").and_then(|v| v.as_str());
217
218            let title = format_title(mode, pattern, path);
219            let mut fields = ToolCallUpdateFields::default();
220            fields.title = Some(title);
221            fields.kind = Some(ToolKind::Search);
222            if let Some(p) = path {
223                fields.locations = Some(vec![ToolCallLocation::new(PathBuf::from(p))]);
224            }
225            ToolCallDescription { fields }
226        })
227    }
228
229    fn execute(&self, args: serde_json::Value, ctx: ToolContext<'_>) -> ToolStream {
230        let cancel = ctx.cancel.clone();
231        let cwd = ctx.cwd.to_path_buf();
232        let config = self.config.clone();
233        let fut = async move { run_search(args, cwd, cancel, config).await };
234        let s: Pin<Box<dyn futures::Stream<Item = ToolEvent> + Send>> = Box::pin(stream::once(fut));
235        s
236    }
237}
238
239async fn run_search(
240    args: serde_json::Value,
241    cwd: PathBuf,
242    cancel: CancellationToken,
243    config: SearchToolConfig,
244) -> ToolEvent {
245    let parsed: SearchArgs = match serde_json::from_value(args) {
246        Ok(v) => v,
247        Err(err) => return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(err))),
248    };
249
250    if parsed.pattern.is_empty() {
251        return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(std::io::Error::new(
252            std::io::ErrorKind::InvalidInput,
253            "pattern must not be empty",
254        ))));
255    }
256
257    let mode = parsed.mode.unwrap_or_default();
258    let head_limit = parsed
259        .head_limit
260        .unwrap_or(config.default_head_limit)
261        .min(config.max_head_limit)
262        .max(1);
263    let respect_gitignore = parsed
264        .respect_gitignore
265        .unwrap_or(config.respect_gitignore_default);
266
267    let start_dir = match resolve_search_path(&cwd, parsed.path.as_deref()) {
268        Ok(p) => p,
269        Err(e) => return ToolEvent::Failed(e),
270    };
271
272    // Run the walker/grep on a blocking thread — `ignore` and `grep-searcher` both
273    // perform synchronous I/O, so running them on the main runtime would block other
274    // tasks.
275    let cancel_for_task = cancel.clone();
276    let cwd_for_task = cwd.clone();
277    let join = tokio::task::spawn_blocking(move || {
278        run_search_blocking(
279            mode,
280            parsed,
281            start_dir,
282            cwd_for_task,
283            head_limit,
284            respect_gitignore,
285            cancel_for_task,
286            config,
287        )
288    });
289
290    match join.await {
291        Ok(event) => event,
292        Err(err) => ToolEvent::Failed(ToolError::Execution(BoxError::new(err))),
293    }
294}
295
296#[allow(clippy::too_many_arguments)]
297fn run_search_blocking(
298    mode: SearchMode,
299    parsed: SearchArgs,
300    start_dir: PathBuf,
301    cwd: PathBuf,
302    head_limit: u32,
303    respect_gitignore: bool,
304    cancel: CancellationToken,
305    config: SearchToolConfig,
306) -> ToolEvent {
307    let started = Instant::now();
308    match mode {
309        SearchMode::Content => {
310            let matcher_build = RegexMatcherBuilder::new()
311                .case_insensitive(parsed.case_insensitive.unwrap_or(false))
312                .multi_line(parsed.multiline.unwrap_or(false))
313                .build(&parsed.pattern);
314            let matcher = match matcher_build {
315                Ok(m) => m,
316                Err(err) => {
317                    return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(
318                        std::io::Error::new(
319                            std::io::ErrorKind::InvalidInput,
320                            format!("invalid regex pattern: {err}"),
321                        ),
322                    )));
323                }
324            };
325
326            let content_glob = match parsed.path_glob.as_deref() {
327                Some(spec) => match glob::build_globset(spec) {
328                    Ok(set) => Some(set),
329                    Err(err) => {
330                        return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(
331                            std::io::Error::new(
332                                std::io::ErrorKind::InvalidInput,
333                                format!("invalid glob pattern: {err}"),
334                            ),
335                        )));
336                    }
337                },
338                None => None,
339            };
340
341            let walker = build_walker(&start_dir, respect_gitignore, &config);
342            let searcher = SearcherBuilder::new()
343                .binary_detection(BinaryDetection::quit(0))
344                .before_context(parsed.before.unwrap_or(0) as usize)
345                .after_context(parsed.after.unwrap_or(0) as usize)
346                .multi_line(parsed.multiline.unwrap_or(false))
347                .build();
348
349            content::run(
350                walker,
351                searcher,
352                matcher,
353                content_glob,
354                &cwd,
355                head_limit,
356                &cancel,
357                &config,
358                started,
359            )
360        }
361        SearchMode::Files => {
362            let glob_set = match glob::build_globset(&parsed.pattern) {
363                Ok(set) => set,
364                Err(err) => {
365                    return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(
366                        std::io::Error::new(
367                            std::io::ErrorKind::InvalidInput,
368                            format!("invalid glob pattern: {err}"),
369                        ),
370                    )));
371                }
372            };
373            let walker = build_walker(&start_dir, respect_gitignore, &config);
374            files::run(
375                walker, glob_set, &cwd, head_limit, &cancel, &config, started,
376            )
377        }
378    }
379}
380
381fn build_walker(start: &Path, respect_gitignore: bool, config: &SearchToolConfig) -> ignore::Walk {
382    let mut builder = WalkBuilder::new(start);
383    builder
384        .standard_filters(respect_gitignore)
385        .require_git(false)
386        .max_filesize(Some(config.max_file_size_bytes))
387        .threads(1);
388    builder.build()
389}
390
391fn resolve_search_path(cwd: &Path, requested: Option<&str>) -> Result<PathBuf, ToolError> {
392    let target = match requested {
393        None | Some("") => cwd.to_path_buf(),
394        Some(s) => {
395            let p = Path::new(s);
396            if p.is_absolute() {
397                p.to_path_buf()
398            } else {
399                cwd.join(p)
400            }
401        }
402    };
403
404    let canon_target = std::fs::canonicalize(&target).map_err(|e| {
405        ToolError::InvalidArgs(BoxError::new(std::io::Error::new(
406            std::io::ErrorKind::InvalidInput,
407            format!("path {} cannot be resolved: {e}", target.display()),
408        )))
409    })?;
410    let canon_cwd = std::fs::canonicalize(cwd).unwrap_or_else(|_| cwd.to_path_buf());
411
412    if !canon_target.starts_with(&canon_cwd) {
413        return Err(ToolError::InvalidArgs(BoxError::new(std::io::Error::new(
414            std::io::ErrorKind::PermissionDenied,
415            format!(
416                "path {} escapes workspace root {}",
417                canon_target.display(),
418                canon_cwd.display()
419            ),
420        ))));
421    }
422
423    Ok(canon_target)
424}
425
426fn format_title(mode: &str, pattern: &str, path: Option<&str>) -> String {
427    let verb = if mode == "files" { "Find" } else { "Search" };
428    let pat = truncate_for_title(pattern);
429    match path {
430        Some(p) if !p.is_empty() => {
431            let p = truncate_for_title(p);
432            format!("{verb} \"{pat}\" in {p}")
433        }
434        _ => format!("{verb} \"{pat}\""),
435    }
436}
437
438fn truncate_for_title(s: &str) -> String {
439    if s.chars().count() <= TITLE_TRUNC {
440        return s.to_string();
441    }
442    let truncated: String = s.chars().take(TITLE_TRUNC).collect();
443    format!("{truncated}…")
444}
445
446/// Converts `path` to a display string relative to `cwd`; falls back to the absolute path
447/// if it lies outside `cwd`.
448pub(crate) fn display_relative(cwd: &Path, path: &Path) -> String {
449    path.strip_prefix(cwd)
450        .map(|p| p.to_string_lossy().into_owned())
451        .unwrap_or_else(|_| path.to_string_lossy().into_owned())
452}
453
454pub(crate) fn truncate_match_line(line: &str) -> String {
455    if line.len() <= MAX_MATCH_LINE {
456        return line.to_string();
457    }
458    let mut end = MAX_MATCH_LINE;
459    while !line.is_char_boundary(end) && end > 0 {
460        end -= 1;
461    }
462    let mut out = String::with_capacity(end + 1);
463    out.push_str(line.get(..end).unwrap_or(""));
464    out.push('…');
465    out
466}
467
468pub(crate) fn elapsed_ms(started: Instant) -> u64 {
469    let m = started.elapsed().as_millis();
470    if m > u64::MAX as u128 {
471        u64::MAX
472    } else {
473        m as u64
474    }
475}
476
477pub(crate) fn make_completed(text: String, output: SearchOutput) -> ToolEvent {
478    let raw_output = serde_json::to_value(&output).unwrap_or(serde_json::Value::Null);
479    let mut fields = ToolCallUpdateFields::default();
480    fields.content = Some(vec![ToolCallContent::Content(Content::new(
481        ContentBlock::Text(TextContent::new(text)),
482    ))]);
483    fields.raw_output = Some(raw_output);
484    ToolEvent::Completed(fields)
485}
486
487pub(crate) fn sort_by_mtime_desc(hits: &mut [(PathBuf, Option<SystemTime>)]) {
488    hits.sort_by_key(|(_, mtime)| Reverse(*mtime));
489}