Skip to main content

lash_tools/
search.rs

1use std::collections::{HashMap, VecDeque};
2use std::path::{Path, PathBuf};
3use std::sync::{
4    Arc, Mutex, MutexGuard, OnceLock,
5    atomic::{AtomicBool, Ordering},
6};
7use std::time::{Duration, Instant};
8
9use fff_search::git::format_git_status_opt;
10use fff_search::grep::{GrepMode, GrepSearchOptions, has_regex_metacharacters, is_import_line};
11use fff_search::{
12    AiGrepConfig, ContentCacheBudget, FFFMode, FileItem, FilePicker, FilePickerOptions,
13    FuzzySearchOptions, GrepMatch, PaginationArgs, QueryParser, SharedFrecency, SharedPicker,
14};
15use serde_json::json;
16
17use lash_core::{
18    ToolCall, ToolDefinition, ToolFailureClass, ToolResult, ToolRetryPolicy, ToolScheduling,
19};
20
21use lash_tool_support::{
22    StaticToolExecute, StaticToolProvider, canonicalize_under, object_schema,
23    parse_optional_usize_arg, require_str,
24};
25
26const DEFAULT_MAX_RESULTS: usize = 20;
27const MAX_CURSORS: usize = 20;
28const MAX_LINE_LEN: usize = 180;
29const MAX_FFF_FUZZY_QUERY_BYTES: usize = (u16::MAX as usize) / (16 * 50);
30const GREP_WALL_TIMEOUT: Duration = Duration::from_secs(5);
31const FFF_SEARCH_BUDGET: Duration = Duration::from_secs(3);
32const DIRECT_FILE_MAX_SIZE: u64 = 10 * 1024 * 1024;
33
34/// Search file contents using an indexed fff-search backend.
35pub struct Grep {
36    base_path: Result<PathBuf, String>,
37    backend: OnceLock<Result<Arc<GrepBackend>, String>>,
38    cursor_store: Arc<Mutex<CursorStore>>,
39}
40
41impl Grep {
42    pub fn new() -> Self {
43        match std::env::current_dir() {
44            Ok(path) => Self::with_base_path(path),
45            Err(err) => {
46                Self::with_init_error(format!("failed to resolve current directory: {err}"))
47            }
48        }
49    }
50
51    fn with_init_error(message: String) -> Self {
52        Self {
53            base_path: Err(message),
54            backend: OnceLock::new(),
55            cursor_store: Arc::new(Mutex::new(CursorStore::new())),
56        }
57    }
58
59    fn with_base_path(base_path: PathBuf) -> Self {
60        Self {
61            base_path: Ok(base_path),
62            backend: OnceLock::new(),
63            cursor_store: Arc::new(Mutex::new(CursorStore::new())),
64        }
65    }
66
67    fn ensure_ready_for_query(&self, query: &str) -> Result<Arc<GrepBackend>, ToolResult> {
68        let backend = self
69            .backend
70            .get_or_init(|| self.shared_backend())
71            .as_ref()
72            .map_err(|err| ToolResult::err_fmt(format_args!("{err}")))?;
73        if !backend.picker.wait_for_scan(GREP_WALL_TIMEOUT) {
74            return Err(timeout_grep_result(
75                query,
76                "index_scan",
77                GREP_WALL_TIMEOUT,
78                "fff-search initial scan timed out",
79            ));
80        }
81        Ok(Arc::clone(backend))
82    }
83
84    fn shared_backend(&self) -> Result<Arc<GrepBackend>, String> {
85        let base_path = self.base_path.as_ref().map_err(Clone::clone)?;
86        backend_for_base(base_path)
87    }
88
89    fn lock_cursors(
90        cursor_store: &Mutex<CursorStore>,
91    ) -> Result<MutexGuard<'_, CursorStore>, ToolResult> {
92        cursor_store
93            .lock()
94            .map_err(|_| ToolResult::err_fmt(format_args!("Failed to acquire cursor store lock")))
95    }
96
97    fn perform_grep(
98        backend: &GrepBackend,
99        cursor_store: &Mutex<CursorStore>,
100        query: &str,
101        mode: GrepMode,
102        max_results: usize,
103        cursor_id: Option<&str>,
104        control: &GrepRunControl,
105    ) -> Result<serde_json::Value, ToolResult> {
106        control.check(query)?;
107        let file_offset = cursor_id
108            .and_then(|id| cursor_store.lock().ok()?.get(id))
109            .unwrap_or(0);
110
111        let (options, auto_expand) = make_grep_options(mode, file_offset, control);
112
113        let guard = backend.picker.read().map_err(|err| {
114            ToolResult::err_fmt(format_args!("Failed to acquire picker lock: {err}"))
115        })?;
116        let picker = guard
117            .as_ref()
118            .ok_or_else(|| ToolResult::err_fmt(format_args!("File picker not initialized")))?;
119
120        let parser = QueryParser::new(AiGrepConfig);
121        let parsed = parser.parse(query);
122        control.check(query)?;
123        let result = picker.grep(&parsed, &options);
124
125        if result.matches.is_empty() && file_offset == 0 {
126            control.check(query)?;
127            let parts = query.split_whitespace().collect::<Vec<_>>();
128            if parts.len() >= 2 {
129                let first_word = parts[0];
130                let is_valid_constraint = first_word.starts_with('!')
131                    || first_word.starts_with('*')
132                    || first_word.ends_with('/');
133
134                if !is_valid_constraint {
135                    let rest_query = parts[1..].join(" ");
136                    let rest_parsed = parser.parse(&rest_query);
137                    let rest_text = rest_parsed.grep_text();
138                    let retry_mode = if has_regex_metacharacters(&rest_text) {
139                        GrepMode::Regex
140                    } else {
141                        mode
142                    };
143                    let (retry_options, retry_auto_expand) =
144                        make_grep_options(retry_mode, 0, control);
145                    control.check(query)?;
146                    let retry_result = picker.grep(&rest_parsed, &retry_options);
147
148                    if !retry_result.matches.is_empty() && retry_result.matches.len() <= 10 {
149                        let mut cursors = Self::lock_cursors(cursor_store)?;
150                        return Ok(structured_grep_result(
151                            StructuredGrepInput {
152                                query,
153                                query_used: &rest_query,
154                                matches: &retry_result.matches,
155                                files: &retry_result.files,
156                                total_matched: retry_result.matches.len(),
157                                files_with_matches: retry_result.files_with_matches,
158                                next_file_offset: retry_result.next_file_offset,
159                                regex_fallback_error: retry_result.regex_fallback_error.as_deref(),
160                                max_results,
161                                auto_expand_defs: retry_auto_expand,
162                                broadened_from: Some(query),
163                                approximate: false,
164                                picker,
165                            },
166                            &mut cursors,
167                        ));
168                    }
169                }
170            }
171
172            let fuzzy_query = cleanup_fuzzy_query(query);
173            let (fuzzy_options, fuzzy_auto_expand) = make_grep_options(GrepMode::Fuzzy, 0, control);
174            let fuzzy_parsed = parser.parse(&fuzzy_query);
175            control.check(query)?;
176            let fuzzy_result = picker.grep(&fuzzy_parsed, &fuzzy_options);
177            if !fuzzy_result.matches.is_empty() {
178                let mut cursors = Self::lock_cursors(cursor_store)?;
179                return Ok(structured_grep_result(
180                    StructuredGrepInput {
181                        query,
182                        query_used: &fuzzy_query,
183                        matches: &fuzzy_result.matches,
184                        files: &fuzzy_result.files,
185                        total_matched: fuzzy_result.matches.len(),
186                        files_with_matches: fuzzy_result.files_with_matches,
187                        next_file_offset: fuzzy_result.next_file_offset,
188                        regex_fallback_error: fuzzy_result.regex_fallback_error.as_deref(),
189                        max_results,
190                        auto_expand_defs: fuzzy_auto_expand,
191                        broadened_from: None,
192                        approximate: true,
193                        picker,
194                    },
195                    &mut cursors,
196                ));
197            }
198
199            if query.contains('/') {
200                let file_query = QueryParser::default().parse(query);
201                control.check(query)?;
202                let file_result = picker.fuzzy_search(
203                    &file_query,
204                    None,
205                    FuzzySearchOptions {
206                        max_threads: 0,
207                        current_file: None,
208                        project_path: Some(picker.base_path()),
209                        combo_boost_score_multiplier: 100,
210                        min_combo_count: 3,
211                        pagination: PaginationArgs {
212                            offset: 0,
213                            limit: 1,
214                        },
215                    },
216                );
217                if let (Some(top), Some(score)) =
218                    (file_result.items.first(), file_result.scores.first())
219                {
220                    let query_len = query.len() as i32;
221                    if score.base_score > query_len * 10 {
222                        return Ok(json!({
223                            "query": query,
224                            "query_used": query,
225                            "matches": [],
226                            "files": [],
227                            "count": 0,
228                            "shown": 0,
229                            "files_with_matches": 0,
230                            "truncated": false,
231                            "cursor": null,
232                            "suggested_path": top.relative_path(picker),
233                            "approximate": false,
234                            "broadened_from": null,
235                            "regex_fallback_error": null,
236                            "timed_out": false,
237                            "cancelled": false,
238                            "error": null,
239                        }));
240                    }
241                }
242            }
243
244            return Ok(empty_grep_result(query));
245        }
246
247        if result.matches.is_empty() {
248            return Ok(empty_grep_result(query));
249        }
250
251        let mut cursors = Self::lock_cursors(cursor_store)?;
252        Ok(structured_grep_result(
253            StructuredGrepInput {
254                query,
255                query_used: query,
256                matches: &result.matches,
257                files: &result.files,
258                total_matched: result.matches.len(),
259                files_with_matches: result.files_with_matches,
260                next_file_offset: result.next_file_offset,
261                regex_fallback_error: result.regex_fallback_error.as_deref(),
262                max_results,
263                auto_expand_defs: auto_expand,
264                broadened_from: None,
265                approximate: false,
266                picker,
267            },
268            &mut cursors,
269        ))
270    }
271}
272
273impl Default for Grep {
274    fn default() -> Self {
275        Self::new()
276    }
277}
278
279/// Build the cached `grep` tool provider rooted at the current workspace.
280pub fn grep_provider() -> StaticToolProvider<Grep> {
281    StaticToolProvider::new(vec![grep_tool_definition()], Grep::new())
282}
283
284#[async_trait::async_trait]
285impl StaticToolExecute for Grep {
286    async fn execute(&self, call: ToolCall<'_>) -> ToolResult {
287        let cancellation_token = call.context.cancellation_token().cloned();
288        self.execute_inner(call.args, cancellation_token).await
289    }
290}
291
292fn grep_tool_definition() -> ToolDefinition {
293    ToolDefinition::raw(
294                "tool:grep",
295                "grep",
296                "Search file contents. Search for bare identifiers (e.g. 'InProgressQuote', 'ActorAuth'), NOT code syntax or regex. By default searches the current workspace. Pass `path` to point the search at a specific file or directory anywhere on the filesystem (including outside the workspace). If `query` accidentally starts with an obvious filesystem path followed by search text, grep treats that prefix as `path`. Within a search root, use inline constraints in the query as a leading token: `*.rs term` (extension), `src/ term` (path segment), `**/foo/* term` (glob), `!*.test.ts term` (negate). Constraints AND together; one search term per query.",
297                object_schema(
298                    json!({
299                        "query": {
300                            "type": "string",
301                            "description": "Search text or regex query with optional constraint prefixes. Pattern is matched within a single line (no cross-line matches). Use a literal token, a short phrase, or a regex — not a multi-clause natural-language query."
302                        },
303                        "path": {
304                            "type": "string",
305                            "description": "Optional file or directory to search within. Accepts absolute paths or paths relative to the workspace root. A directory becomes the search root; a file searches that one file only. When omitted, searches the current workspace."
306                        },
307                        "limit": {
308                            "type": "integer",
309                            "minimum": 1,
310                            "default": DEFAULT_MAX_RESULTS,
311                            "description": "Max matching lines (default 20)."
312                        },
313                        "cursor": {
314                            "type": "string",
315                            "description": "Cursor from a previous grep result. Only use if previous results were not sufficient."
316                        }
317                    }),
318                    &["query"],
319                ),
320                grep_output_schema(),
321            )
322            .with_examples(vec![
323                r#"await files.grep({ query: "ToolProvider", path: "crates/lash/src" })?"#.into(),
324                r#"await files.grep({ query: "*.rs apply_patch", path: "." })?"#.into(),
325                r#"await files.grep({ query: "current_query" })?"#.into(),
326            ])
327            .with_agent_surface(lash_tool_support::agent_surface(
328                ["files"],
329                "grep",
330                &["search_files", "ripgrep"],
331            ))
332            .with_scheduling(ToolScheduling::Parallel)
333            .with_retry_policy(ToolRetryPolicy::safe(2, 50, 150))
334}
335
336fn grep_output_schema() -> serde_json::Value {
337    json!({
338        "type": "object",
339        "properties": {
340            "query": { "type": "string" },
341            "query_used": {
342                "type": "string",
343                "description": "The concrete query executed after path/constraint/fuzzy broadening."
344            },
345            "broadened_from": nullable_schema(json!({ "type": "string" })),
346            "regex_fallback_error": nullable_schema(json!({ "type": "string" })),
347            "matches": {
348                "type": "array",
349                "items": grep_match_output_schema()
350            },
351            "files": {
352                "type": "array",
353                "items": grep_file_output_schema()
354            },
355            "count": {
356                "type": "integer",
357                "minimum": 0,
358                "description": "Total matching lines found, including results not shown due to limit/cursor."
359            },
360            "shown": {
361                "type": "integer",
362                "minimum": 0,
363                "description": "Number of match records included in this response."
364            },
365            "files_with_matches": { "type": "integer", "minimum": 0 },
366            "truncated": { "type": "boolean" },
367            "cursor": nullable_schema(json!({ "type": "string" })),
368            "suggested_path": nullable_schema(json!({ "type": "string" })),
369            "approximate": {
370                "type": "boolean",
371                "description": "True when a fuzzy fallback produced the matches."
372            },
373            "timed_out": { "type": "boolean" },
374            "cancelled": { "type": "boolean" },
375            "error": nullable_schema(json!({
376                "type": "object",
377                "properties": {
378                    "kind": { "type": "string" },
379                    "message": { "type": "string" },
380                    "stage": { "type": "string" }
381                },
382                "required": ["kind", "message", "stage"],
383                "additionalProperties": true
384            }))
385        },
386        "required": [
387            "query",
388            "query_used",
389            "broadened_from",
390            "regex_fallback_error",
391            "matches",
392            "files",
393            "count",
394            "shown",
395            "files_with_matches",
396            "truncated",
397            "cursor",
398            "suggested_path",
399            "approximate",
400            "timed_out",
401            "cancelled",
402            "error"
403        ],
404        "additionalProperties": false
405    })
406}
407
408fn grep_match_output_schema() -> serde_json::Value {
409    json!({
410        "type": "object",
411        "properties": {
412            "path": { "type": "string" },
413            "line": { "type": "integer", "minimum": 1 },
414            "column": { "type": "integer", "minimum": 1 },
415            "byte_column": { "type": "integer", "minimum": 0 },
416            "excerpt": { "type": "string" },
417            "match": { "type": "string" },
418            "ranges": {
419                "type": "array",
420                "items": {
421                    "type": "object",
422                    "properties": {
423                        "start": { "type": "integer", "minimum": 0 },
424                        "end": { "type": "integer", "minimum": 0 }
425                    },
426                    "required": ["start", "end"],
427                    "additionalProperties": false
428                }
429            },
430            "is_definition": { "type": "boolean" }
431        },
432        "required": [
433            "path",
434            "line",
435            "column",
436            "byte_column",
437            "excerpt",
438            "match",
439            "ranges",
440            "is_definition"
441        ],
442        "additionalProperties": false
443    })
444}
445
446fn grep_file_output_schema() -> serde_json::Value {
447    json!({
448        "type": "object",
449        "properties": {
450            "path": { "type": "string" },
451            "count": { "type": "integer", "minimum": 0 },
452            "size_bytes": { "type": "integer", "minimum": 0 },
453            "is_binary": { "type": "boolean" },
454            "git_status": nullable_schema(json!({ "type": "string" }))
455        },
456        "required": ["path", "count", "size_bytes", "is_binary", "git_status"],
457        "additionalProperties": false
458    })
459}
460
461fn nullable_schema(schema: serde_json::Value) -> serde_json::Value {
462    json!({ "anyOf": [schema, { "type": "null" }] })
463}
464
465impl Grep {
466    async fn execute_inner(
467        &self,
468        args: &serde_json::Value,
469        cancellation_token: Option<tokio_util::sync::CancellationToken>,
470    ) -> ToolResult {
471        let raw_query = match require_str(args, "query") {
472            Ok(query) => query,
473            Err(err) => return err,
474        };
475        let max_results = match parse_limit(args) {
476            Ok(max_results) => max_results,
477            Err(err) => return err,
478        };
479        let cursor = args.get("cursor").and_then(|value| value.as_str());
480        let path_arg = args
481            .get("path")
482            .and_then(|value| value.as_str())
483            .map(str::trim)
484            .filter(|value| !value.is_empty());
485
486        let default_base = self.base_path.as_ref().cloned().ok();
487        let inferred_scope = path_arg
488            .is_none()
489            .then(|| infer_path_prefix(default_base.as_deref(), raw_query))
490            .flatten();
491        let path_arg_owned;
492        let query_owned;
493        let (path_arg, raw_query) = if let Some((path, query)) = inferred_scope {
494            path_arg_owned = path;
495            query_owned = query;
496            (Some(path_arg_owned.as_str()), query_owned.as_str())
497        } else {
498            (path_arg, raw_query)
499        };
500
501        let (backend, query) = match path_arg {
502            Some(path) => match resolve_path_scope(default_base.as_deref(), path) {
503                Ok(PathScope::File(file_path)) => {
504                    return direct_file_grep(
505                        raw_query,
506                        &file_path,
507                        default_base.as_deref(),
508                        max_results,
509                        cancellation_token,
510                    )
511                    .await;
512                }
513                Ok(PathScope::Directory(base_path)) => {
514                    let backend = match backend_for_base(&base_path) {
515                        Ok(backend) => backend,
516                        Err(err) => return ToolResult::err_fmt(format_args!("{err}")),
517                    };
518                    if !backend.picker.wait_for_scan(GREP_WALL_TIMEOUT) {
519                        return timeout_grep_result(
520                            raw_query,
521                            "index_scan",
522                            GREP_WALL_TIMEOUT,
523                            &format!(
524                                "fff-search initial scan timed out for {}",
525                                base_path.display()
526                            ),
527                        );
528                    }
529                    (backend, raw_query.to_string())
530                }
531                Err(err) => return err,
532            },
533            None => match self.ensure_ready_for_query(raw_query) {
534                Ok(backend) => (backend, raw_query.to_string()),
535                Err(err) => return err,
536            },
537        };
538
539        let grep_text = QueryParser::new(AiGrepConfig).parse(&query).grep_text();
540        let mode = if has_regex_metacharacters(&grep_text) {
541            GrepMode::Regex
542        } else {
543            GrepMode::PlainText
544        };
545
546        bounded_indexed_grep(
547            Arc::clone(&backend),
548            Arc::clone(&self.cursor_store),
549            query,
550            mode,
551            max_results,
552            cursor.map(str::to_string),
553            cancellation_token,
554        )
555        .await
556    }
557}
558
559enum PathScope {
560    Directory(PathBuf),
561    File(PathBuf),
562}
563
564#[derive(Clone)]
565struct GrepRunControl {
566    abort_signal: Arc<AtomicBool>,
567    deadline: Instant,
568    budget: Duration,
569}
570
571impl GrepRunControl {
572    fn new(abort_signal: Arc<AtomicBool>, budget: Duration) -> Self {
573        Self {
574            abort_signal,
575            deadline: Instant::now() + budget,
576            budget,
577        }
578    }
579
580    fn check(&self, query: &str) -> Result<(), ToolResult> {
581        if self.abort_signal.load(Ordering::Relaxed) {
582            return Err(cancelled_grep_result(query));
583        }
584        if Instant::now() >= self.deadline {
585            self.abort_signal.store(true, Ordering::Relaxed);
586            return Err(timeout_grep_result(
587                query,
588                "fff_search",
589                self.budget,
590                "grep search timed out",
591            ));
592        }
593        Ok(())
594    }
595
596    fn remaining_budget_ms(&self) -> u64 {
597        self.deadline
598            .saturating_duration_since(Instant::now())
599            .as_millis()
600            .max(1) as u64
601    }
602}
603
604async fn bounded_indexed_grep(
605    backend: Arc<GrepBackend>,
606    cursor_store: Arc<Mutex<CursorStore>>,
607    query: String,
608    mode: GrepMode,
609    max_results: usize,
610    cursor: Option<String>,
611    cancellation_token: Option<tokio_util::sync::CancellationToken>,
612) -> ToolResult {
613    let abort_signal = Arc::new(AtomicBool::new(false));
614    let cancellation_watcher = cancellation_token.map(|token| {
615        let abort_signal = Arc::clone(&abort_signal);
616        tokio::spawn(async move {
617            token.cancelled().await;
618            abort_signal.store(true, Ordering::Relaxed);
619        })
620    });
621    let control = GrepRunControl::new(Arc::clone(&abort_signal), FFF_SEARCH_BUDGET);
622    let timeout_query = query.clone();
623    let handle = tokio::task::spawn_blocking(move || {
624        Grep::perform_grep(
625            &backend,
626            &cursor_store,
627            &query,
628            mode,
629            max_results,
630            cursor.as_deref(),
631            &control,
632        )
633    });
634
635    let result = match tokio::time::timeout(GREP_WALL_TIMEOUT, handle).await {
636        Ok(Ok(Ok(value))) => ToolResult::ok(value),
637        Ok(Ok(Err(err))) => err,
638        Ok(Err(err)) => ToolResult::err(serde_json::json!({
639            "query": timeout_query,
640            "query_used": timeout_query,
641            "matches": [],
642            "files": [],
643            "count": 0,
644            "shown": 0,
645            "files_with_matches": 0,
646            "truncated": false,
647            "cursor": null,
648            "suggested_path": null,
649            "approximate": false,
650            "timed_out": false,
651            "cancelled": false,
652            "error": {
653                "kind": "panic",
654                "message": format!("grep worker failed: {err}"),
655                "stage": "fff_search",
656            },
657        })),
658        Err(_) => {
659            abort_signal.store(true, Ordering::Relaxed);
660            timeout_grep_result(
661                &timeout_query,
662                "fff_search",
663                GREP_WALL_TIMEOUT,
664                "grep search timed out",
665            )
666        }
667    };
668    if let Some(watcher) = cancellation_watcher {
669        watcher.abort();
670    }
671    result
672}
673
674async fn direct_file_grep(
675    query: &str,
676    file_path: &Path,
677    default_base: Option<&Path>,
678    max_results: usize,
679    cancellation_token: Option<tokio_util::sync::CancellationToken>,
680) -> ToolResult {
681    let query = query.to_string();
682    let file_path = file_path.to_path_buf();
683    let default_base = default_base.map(Path::to_path_buf);
684    let abort_signal = Arc::new(AtomicBool::new(false));
685    let cancellation_watcher = cancellation_token.map(|token| {
686        let abort_signal = Arc::clone(&abort_signal);
687        tokio::spawn(async move {
688            token.cancelled().await;
689            abort_signal.store(true, Ordering::Relaxed);
690        })
691    });
692    let worker_abort = Arc::clone(&abort_signal);
693    let timeout_query = query.clone();
694    let handle = tokio::task::spawn_blocking(move || {
695        direct_file_grep_sync(
696            &query,
697            &file_path,
698            default_base.as_deref(),
699            max_results,
700            &worker_abort,
701        )
702    });
703    let result = match tokio::time::timeout(GREP_WALL_TIMEOUT, handle).await {
704        Ok(Ok(result)) => result,
705        Ok(Err(err)) => ToolResult::err(serde_json::json!({
706            "query": timeout_query,
707            "query_used": timeout_query,
708            "matches": [],
709            "files": [],
710            "count": 0,
711            "shown": 0,
712            "files_with_matches": 0,
713            "truncated": false,
714            "cursor": null,
715            "suggested_path": null,
716            "approximate": false,
717            "timed_out": false,
718            "cancelled": false,
719            "error": {
720                "kind": "panic",
721                "message": format!("direct grep worker failed: {err}"),
722                "stage": "direct_file",
723            },
724        })),
725        Err(_) => {
726            abort_signal.store(true, Ordering::Relaxed);
727            timeout_grep_result(
728                &timeout_query,
729                "direct_file",
730                GREP_WALL_TIMEOUT,
731                "direct file grep timed out",
732            )
733        }
734    };
735    if let Some(watcher) = cancellation_watcher {
736        watcher.abort();
737    }
738    result
739}
740
741/// Resolve a user-supplied `path` into either an indexed directory search root
742/// or a direct single-file scan. Relative paths resolve against the workspace
743/// root when available and fall back to the current directory otherwise.
744fn resolve_path_scope(
745    default_base: Option<&Path>,
746    requested: &str,
747) -> Result<PathScope, ToolResult> {
748    let candidate = Path::new(requested);
749    // Resolve relative paths against the search base (falling back to the
750    // process cwd) and then canonicalize on disk: search needs a real,
751    // existence-checked path so it can distinguish a file scan from a
752    // directory index and surface a clear error for missing paths.
753    let base = match default_base {
754        Some(base) => base.to_path_buf(),
755        None => std::env::current_dir().map_err(|err| {
756            ToolResult::err_fmt(format_args!("failed to resolve current directory: {err}"))
757        })?,
758    };
759    let canonical = canonicalize_under(&base, candidate).map_err(|err| {
760        ToolResult::err_fmt(format_args!(
761            "`path` {requested} does not exist or is not accessible: {err}"
762        ))
763    })?;
764    if canonical.is_dir() {
765        Ok(PathScope::Directory(canonical))
766    } else {
767        Ok(PathScope::File(canonical))
768    }
769}
770
771fn infer_path_prefix(default_base: Option<&Path>, query: &str) -> Option<(String, String)> {
772    let trimmed = query.trim();
773    let (candidate, rest) = split_first_query_token(trimmed)?;
774    let candidate = candidate.trim_matches(['"', '\'']);
775    if candidate.is_empty() || rest.trim().is_empty() || !looks_like_path(candidate) {
776        return None;
777    }
778
779    let path = Path::new(candidate);
780    let absolute = if path.is_absolute() {
781        path.to_path_buf()
782    } else {
783        default_base?.join(path)
784    };
785    absolute
786        .exists()
787        .then(|| (candidate.to_string(), rest.trim().to_string()))
788}
789
790fn split_first_query_token(query: &str) -> Option<(&str, &str)> {
791    let mut chars = query.char_indices();
792    let (_, first) = chars.next()?;
793    if first == '"' || first == '\'' {
794        for (index, ch) in chars {
795            if ch == first {
796                let rest = query[index + ch.len_utf8()..].trim_start();
797                return Some((&query[..=index], rest));
798            }
799        }
800        return None;
801    }
802
803    query
804        .char_indices()
805        .find(|(_, ch)| ch.is_whitespace())
806        .map(|(index, _)| (&query[..index], query[index..].trim_start()))
807}
808
809fn looks_like_path(value: &str) -> bool {
810    value.starts_with('/')
811        || value.starts_with("./")
812        || value.starts_with("../")
813        || value.contains('/')
814}
815
816/// Look up — or create — a shared fff-search backend rooted at
817/// `base_path`. Reuses the process-wide backend cache so repeat
818/// searches against the same path avoid the initial scan cost.
819fn backend_for_base(base_path: &Path) -> Result<Arc<GrepBackend>, String> {
820    let cache_key = std::fs::canonicalize(base_path).unwrap_or_else(|_| base_path.to_path_buf());
821    let cache = shared_backend_cache();
822    let mut cache = cache
823        .lock()
824        .map_err(|_| "failed to lock shared grep backend cache".to_string())?;
825    if let Some(existing) = cache.get(&cache_key) {
826        return existing.clone();
827    }
828    let backend = initialize_backend_at(base_path).map(Arc::new);
829    cache.insert(cache_key, backend.clone());
830    backend
831}
832
833fn initialize_backend_at(base_path: &Path) -> Result<GrepBackend, String> {
834    let picker = SharedPicker::default();
835    FilePicker::new_with_shared_state(
836        picker.clone(),
837        SharedFrecency::default(),
838        FilePickerOptions {
839            base_path: base_path.to_string_lossy().into_owned(),
840            enable_mmap_cache: false,
841            enable_content_indexing: false,
842            mode: FFFMode::Ai,
843            cache_budget: Some(grep_content_cache_budget()),
844            watch: false,
845        },
846    )
847    .map_err(|err| format!("failed to initialize indexed grep backend: {err}"))?;
848    Ok(GrepBackend { picker })
849}
850
851struct GrepBackend {
852    picker: SharedPicker,
853}
854
855type SharedBackendCache = Mutex<HashMap<PathBuf, Result<Arc<GrepBackend>, String>>>;
856
857fn shared_backend_cache() -> &'static SharedBackendCache {
858    static CACHE: OnceLock<SharedBackendCache> = OnceLock::new();
859    CACHE.get_or_init(|| Mutex::new(HashMap::new()))
860}
861
862fn grep_content_cache_budget() -> ContentCacheBudget {
863    ContentCacheBudget {
864        max_files: 0,
865        max_bytes: 0,
866        max_file_size: DIRECT_FILE_MAX_SIZE,
867        cached_count: Default::default(),
868        cached_bytes: Default::default(),
869    }
870}
871
872fn direct_file_grep_sync(
873    query: &str,
874    file_path: &Path,
875    default_base: Option<&Path>,
876    max_results: usize,
877    abort_signal: &AtomicBool,
878) -> ToolResult {
879    if abort_signal.load(Ordering::Relaxed) {
880        return cancelled_grep_result(query);
881    }
882    let metadata = match std::fs::metadata(file_path) {
883        Ok(metadata) => metadata,
884        Err(err) => {
885            return ToolResult::err(serde_json::json!({
886                "query": query,
887                "query_used": query,
888                "matches": [],
889                "files": [],
890                "count": 0,
891                "shown": 0,
892                "files_with_matches": 0,
893                "truncated": false,
894                "cursor": null,
895                "suggested_path": null,
896                "approximate": false,
897                "timed_out": false,
898                "cancelled": false,
899                "error": {
900                    "kind": "io",
901                    "message": format!("failed to stat file: {err}"),
902                    "stage": "direct_file",
903                },
904            }));
905        }
906    };
907    if !metadata.is_file() {
908        return ToolResult::err(serde_json::json!({
909            "query": query,
910            "query_used": query,
911            "matches": [],
912            "files": [],
913            "count": 0,
914            "shown": 0,
915            "files_with_matches": 0,
916            "truncated": false,
917            "cursor": null,
918            "suggested_path": null,
919            "approximate": false,
920            "timed_out": false,
921            "cancelled": false,
922            "error": {
923                "kind": "not_a_file",
924                "message": "path is not a regular file",
925                "stage": "direct_file",
926            },
927        }));
928    }
929    if metadata.len() > DIRECT_FILE_MAX_SIZE {
930        return ToolResult::err(serde_json::json!({
931            "query": query,
932            "query_used": query,
933            "matches": [],
934            "files": [],
935            "count": 0,
936            "shown": 0,
937            "files_with_matches": 0,
938            "truncated": false,
939            "cursor": null,
940            "suggested_path": null,
941            "approximate": false,
942            "timed_out": false,
943            "cancelled": false,
944            "error": {
945                "kind": "file_too_large",
946                "message": format!("file exceeds grep limit of {DIRECT_FILE_MAX_SIZE} bytes"),
947                "stage": "direct_file",
948                "size_bytes": metadata.len(),
949                "max_size_bytes": DIRECT_FILE_MAX_SIZE,
950            },
951        }));
952    }
953
954    let parsed = QueryParser::new(AiGrepConfig).parse(query);
955    let grep_text = parsed.grep_text();
956    if grep_text.is_empty() {
957        return ToolResult::ok(empty_grep_result(query));
958    }
959
960    let bytes = match std::fs::read(file_path) {
961        Ok(bytes) => bytes,
962        Err(err) => {
963            return ToolResult::err(serde_json::json!({
964                "query": query,
965                "query_used": grep_text,
966                "matches": [],
967                "files": [],
968                "count": 0,
969                "shown": 0,
970                "files_with_matches": 0,
971                "truncated": false,
972                "cursor": null,
973                "suggested_path": null,
974                "approximate": false,
975                "timed_out": false,
976                "cancelled": false,
977                "error": {
978                    "kind": "io",
979                    "message": format!("failed to read file: {err}"),
980                    "stage": "direct_file",
981                },
982            }));
983        }
984    };
985    if abort_signal.load(Ordering::Relaxed) {
986        return cancelled_grep_result(query);
987    }
988
989    let display_path = display_path_for_direct_file(file_path, default_base);
990    let matcher = match DirectMatcher::new(&grep_text) {
991        Ok(matcher) => matcher,
992        Err(regex_error) => DirectMatcher::literal_with_error(&grep_text, regex_error),
993    };
994
995    let text = String::from_utf8_lossy(&bytes);
996    let mut matches = Vec::new();
997    let mut total_matches = 0usize;
998    for (line_index, segment) in text.split_inclusive('\n').enumerate() {
999        if abort_signal.load(Ordering::Relaxed) {
1000            return cancelled_grep_result(query);
1001        }
1002        let line = segment.trim_end_matches(['\r', '\n']);
1003        let ranges = matcher.ranges(line);
1004        if !ranges.is_empty() {
1005            total_matches += 1;
1006            if matches.len() < max_results {
1007                let first = ranges[0];
1008                let json_ranges = ranges
1009                    .iter()
1010                    .map(|(start, end)| {
1011                        json!({
1012                            "start": start,
1013                            "end": end,
1014                        })
1015                    })
1016                    .collect::<Vec<_>>();
1017                let match_text =
1018                    direct_match_text(line, first.0 as usize, first.1 as usize).to_string();
1019                matches.push(json!({
1020                    "path": display_path.clone(),
1021                    "line": (line_index + 1) as u64,
1022                    "column": first.0.saturating_add(1),
1023                    "byte_column": first.0,
1024                    "excerpt": truncate_line_for_ai(line, Some(ranges.as_slice()), MAX_LINE_LEN),
1025                    "match": match_text,
1026                    "ranges": json_ranges,
1027                    "is_definition": looks_like_definition_line(line),
1028                }));
1029            }
1030        }
1031    }
1032
1033    let shown = matches.len();
1034    let files = if total_matches > 0 {
1035        vec![json!({
1036            "path": display_path.clone(),
1037            "count": total_matches,
1038            "size_bytes": metadata.len(),
1039            "is_binary": bytes.contains(&0),
1040            "git_status": null,
1041        })]
1042    } else {
1043        Vec::new()
1044    };
1045
1046    ToolResult::ok(json!({
1047        "query": query,
1048        "query_used": grep_text,
1049        "broadened_from": null,
1050        "regex_fallback_error": matcher.regex_error(),
1051        "matches": matches,
1052        "files": files,
1053        "count": total_matches,
1054        "shown": shown,
1055        "files_with_matches": if total_matches > 0 { 1 } else { 0 },
1056        "truncated": total_matches > shown,
1057        "cursor": null,
1058        "suggested_path": if total_matches > 0 { Some(display_path) } else { None },
1059        "approximate": false,
1060        "timed_out": false,
1061        "cancelled": false,
1062        "error": null,
1063    }))
1064}
1065
1066enum DirectMatcher {
1067    Literal {
1068        needle: String,
1069        case_insensitive: bool,
1070        regex_error: Option<String>,
1071    },
1072    Regex(regex::Regex),
1073}
1074
1075impl DirectMatcher {
1076    fn new(pattern: &str) -> Result<Self, regex::Error> {
1077        if has_regex_metacharacters(pattern) {
1078            let case_insensitive = !pattern.chars().any(|ch| ch.is_uppercase());
1079            let regex = regex::RegexBuilder::new(pattern)
1080                .case_insensitive(case_insensitive)
1081                .build()?;
1082            Ok(Self::Regex(regex))
1083        } else {
1084            Ok(Self::Literal {
1085                needle: pattern.to_string(),
1086                case_insensitive: !pattern.chars().any(|ch| ch.is_uppercase()),
1087                regex_error: None,
1088            })
1089        }
1090    }
1091
1092    fn literal_with_error(pattern: &str, error: regex::Error) -> Self {
1093        Self::Literal {
1094            needle: pattern.to_string(),
1095            case_insensitive: !pattern.chars().any(|ch| ch.is_uppercase()),
1096            regex_error: Some(error.to_string()),
1097        }
1098    }
1099
1100    fn regex_error(&self) -> Option<&str> {
1101        match self {
1102            Self::Literal { regex_error, .. } => regex_error.as_deref(),
1103            Self::Regex(_) => None,
1104        }
1105    }
1106
1107    fn ranges(&self, line: &str) -> Vec<(u32, u32)> {
1108        match self {
1109            Self::Literal {
1110                needle,
1111                case_insensitive,
1112                ..
1113            } => literal_ranges(line, needle, *case_insensitive),
1114            Self::Regex(regex) => regex
1115                .find_iter(line)
1116                .take(16)
1117                .map(|matched| (matched.start() as u32, matched.end() as u32))
1118                .collect(),
1119        }
1120    }
1121}
1122
1123fn literal_ranges(line: &str, needle: &str, case_insensitive: bool) -> Vec<(u32, u32)> {
1124    if needle.is_empty() {
1125        return Vec::new();
1126    }
1127    let haystack = if case_insensitive {
1128        line.to_ascii_lowercase()
1129    } else {
1130        line.to_string()
1131    };
1132    let needle = if case_insensitive {
1133        needle.to_ascii_lowercase()
1134    } else {
1135        needle.to_string()
1136    };
1137    let mut ranges = Vec::new();
1138    let mut offset = 0usize;
1139    while let Some(found) = haystack[offset..].find(&needle) {
1140        let start = offset + found;
1141        let end = start + needle.len();
1142        ranges.push((start as u32, end as u32));
1143        if ranges.len() >= 16 {
1144            break;
1145        }
1146        offset = end.max(start + 1);
1147    }
1148    ranges
1149}
1150
1151fn display_path_for_direct_file(file_path: &Path, default_base: Option<&Path>) -> String {
1152    if let Some(base) = default_base
1153        && let Ok(relative) = file_path.strip_prefix(base)
1154    {
1155        return relative.to_string_lossy().to_string();
1156    }
1157    file_path
1158        .file_name()
1159        .map(|name| name.to_string_lossy().to_string())
1160        .unwrap_or_else(|| file_path.display().to_string())
1161}
1162
1163fn direct_match_text(line: &str, start: usize, end: usize) -> &str {
1164    let start = floor_char_boundary(line, start);
1165    let end = ceil_char_boundary(line, end);
1166    &line[start..end]
1167}
1168
1169fn looks_like_definition_line(line: &str) -> bool {
1170    let trimmed = line.trim_start();
1171    [
1172        "fn ",
1173        "pub fn ",
1174        "async fn ",
1175        "def ",
1176        "class ",
1177        "struct ",
1178        "enum ",
1179        "trait ",
1180        "impl ",
1181        "function ",
1182    ]
1183    .iter()
1184    .any(|prefix| trimmed.starts_with(prefix))
1185}
1186
1187fn parse_limit(args: &serde_json::Value) -> Result<usize, ToolResult> {
1188    Ok(
1189        parse_optional_usize_arg(args, "limit", Some(DEFAULT_MAX_RESULTS), false, 1)?
1190            .unwrap_or(DEFAULT_MAX_RESULTS),
1191    )
1192}
1193
1194fn cleanup_fuzzy_query(input: &str) -> String {
1195    let mut output = String::with_capacity(input.len().min(MAX_FFF_FUZZY_QUERY_BYTES));
1196    for ch in input.chars() {
1197        if !matches!(ch, ':' | '-' | '_') {
1198            for lower in ch.to_lowercase() {
1199                let next_len = output.len() + lower.len_utf8();
1200                if next_len > MAX_FFF_FUZZY_QUERY_BYTES {
1201                    return output;
1202                }
1203                output.push(lower);
1204            }
1205        }
1206    }
1207    output
1208}
1209
1210fn make_grep_options(
1211    mode: GrepMode,
1212    file_offset: usize,
1213    control: &GrepRunControl,
1214) -> (GrepSearchOptions, bool) {
1215    let max_matches_per_file = 10;
1216    let before_context = 0;
1217    let auto_expand_defs = before_context == 0;
1218    let after_context = if auto_expand_defs { 8 } else { before_context };
1219
1220    (
1221        GrepSearchOptions {
1222            max_file_size: 10 * 1024 * 1024,
1223            max_matches_per_file,
1224            smart_case: true,
1225            file_offset,
1226            page_limit: 50,
1227            mode,
1228            time_budget_ms: control.remaining_budget_ms(),
1229            before_context,
1230            after_context,
1231            classify_definitions: true,
1232            trim_whitespace: false,
1233            abort_signal: Some(Arc::clone(&control.abort_signal)),
1234        },
1235        auto_expand_defs,
1236    )
1237}
1238
1239fn timeout_grep_result(query: &str, stage: &str, budget: Duration, message: &str) -> ToolResult {
1240    let raw = json!({
1241        "query": query,
1242        "query_used": query,
1243        "broadened_from": null,
1244        "regex_fallback_error": null,
1245        "matches": [],
1246        "files": [],
1247        "count": 0,
1248        "shown": 0,
1249        "files_with_matches": 0,
1250        "truncated": false,
1251        "cursor": null,
1252        "suggested_path": null,
1253        "approximate": false,
1254        "timed_out": true,
1255        "cancelled": false,
1256        "error": {
1257            "kind": "timeout",
1258            "message": message,
1259            "stage": stage,
1260            "budget_ms": budget.as_millis() as u64,
1261        },
1262    });
1263    let mut failure = lash_core::ToolFailure::safe_retry(
1264        ToolFailureClass::Timeout,
1265        "grep_timeout",
1266        message,
1267        Some(50),
1268    );
1269    failure.raw = Some(lash_core::ToolValue::from(raw));
1270    ToolResult::failure(failure)
1271}
1272
1273fn cancelled_grep_result(query: &str) -> ToolResult {
1274    ToolResult::cancelled_with_raw(
1275        "grep cancelled",
1276        json!({
1277            "query": query,
1278            "query_used": query,
1279            "broadened_from": null,
1280            "regex_fallback_error": null,
1281            "matches": [],
1282            "files": [],
1283            "count": 0,
1284            "shown": 0,
1285            "files_with_matches": 0,
1286            "truncated": false,
1287            "cursor": null,
1288            "suggested_path": null,
1289            "approximate": false,
1290            "timed_out": false,
1291            "cancelled": true,
1292            "error": {
1293                "kind": "cancelled",
1294                "message": "grep cancelled",
1295                "stage": "grep",
1296            },
1297        }),
1298    )
1299}
1300
1301#[derive(Default)]
1302struct CursorStore {
1303    counter: u64,
1304    cursors: HashMap<String, usize>,
1305    insertion_order: VecDeque<String>,
1306}
1307
1308impl CursorStore {
1309    fn new() -> Self {
1310        Self::default()
1311    }
1312
1313    fn store(&mut self, file_offset: usize) -> String {
1314        self.counter = self.counter.wrapping_add(1);
1315        let id = self.counter.to_string();
1316        self.cursors.insert(id.clone(), file_offset);
1317        self.insertion_order.push_back(id.clone());
1318        while self.cursors.len() > MAX_CURSORS {
1319            if let Some(oldest) = self.insertion_order.pop_front() {
1320                self.cursors.remove(&oldest);
1321            }
1322        }
1323        id
1324    }
1325
1326    fn get(&self, id: &str) -> Option<usize> {
1327        self.cursors.get(id).copied()
1328    }
1329}
1330
1331fn truncate_line_for_ai(line: &str, match_ranges: Option<&[(u32, u32)]>, max_len: usize) -> String {
1332    let trimmed = line.trim_end();
1333    if trimmed.is_empty() {
1334        return String::new();
1335    }
1336    if trimmed.len() <= max_len {
1337        return trimmed.to_string();
1338    }
1339
1340    if let Some(ranges) = match_ranges
1341        && let Some(&(match_start, match_end)) = ranges.first()
1342    {
1343        let match_start = match_start as usize;
1344        let match_end = match_end as usize;
1345        let match_len = match_end.saturating_sub(match_start);
1346        let budget = max_len.saturating_sub(match_len);
1347        let before = budget / 3;
1348        let after = budget - before;
1349        let win_start = floor_char_boundary(trimmed, match_start.saturating_sub(before));
1350        let win_end = ceil_char_boundary(trimmed, (match_end + after).min(trimmed.len()));
1351
1352        let mut result = trimmed[win_start..win_end].to_string();
1353        if win_start > 0 {
1354            result.insert_str(0, "...");
1355        }
1356        if win_end < trimmed.len() {
1357            result.push_str("...");
1358        }
1359        return result;
1360    }
1361
1362    let end = ceil_char_boundary(trimmed, max_len);
1363    format!("{}...", &trimmed[..end])
1364}
1365
1366fn floor_char_boundary(text: &str, index: usize) -> usize {
1367    if index >= text.len() {
1368        return text.len();
1369    }
1370    let mut idx = index;
1371    while idx > 0 && !text.is_char_boundary(idx) {
1372        idx -= 1;
1373    }
1374    idx
1375}
1376
1377fn ceil_char_boundary(text: &str, index: usize) -> usize {
1378    if index >= text.len() {
1379        return text.len();
1380    }
1381    let mut idx = index;
1382    while idx < text.len() && !text.is_char_boundary(idx) {
1383        idx += 1;
1384    }
1385    idx
1386}
1387
1388struct StructuredGrepInput<'a> {
1389    query: &'a str,
1390    query_used: &'a str,
1391    matches: &'a [GrepMatch],
1392    files: &'a [&'a FileItem],
1393    total_matched: usize,
1394    files_with_matches: usize,
1395    next_file_offset: usize,
1396    regex_fallback_error: Option<&'a str>,
1397    max_results: usize,
1398    auto_expand_defs: bool,
1399    broadened_from: Option<&'a str>,
1400    approximate: bool,
1401    picker: &'a FilePicker,
1402}
1403
1404fn structured_grep_result(
1405    input: StructuredGrepInput<'_>,
1406    cursor_store: &mut CursorStore,
1407) -> serde_json::Value {
1408    let mut indices = (0..input.matches.len()).collect::<Vec<_>>();
1409    if input.auto_expand_defs {
1410        indices.sort_unstable_by_key(|&index| {
1411            if input.matches[index].is_definition {
1412                0
1413            } else if is_import_line(&input.matches[index].line_content) {
1414                2
1415            } else {
1416                1
1417            }
1418        });
1419    }
1420    indices.truncate(input.max_results);
1421
1422    let cursor = (input.next_file_offset > 0).then(|| cursor_store.store(input.next_file_offset));
1423    let mut per_file: HashMap<String, usize> = HashMap::new();
1424    let mut file_order: Vec<String> = Vec::new();
1425    let mut suggested_path = None::<String>;
1426    let matches = indices
1427        .iter()
1428        .map(|&index| {
1429            let matched = &input.matches[index];
1430            let file = input.files[matched.file_index];
1431            let path = file.relative_path(input.picker);
1432            let count = per_file.entry(path.clone()).or_insert_with(|| {
1433                file_order.push(path.clone());
1434                0
1435            });
1436            *count += 1;
1437            if suggested_path.is_none() || matched.is_definition {
1438                suggested_path = Some(path.clone());
1439            }
1440            let ranges = matched
1441                .match_byte_offsets
1442                .iter()
1443                .map(|(start, end)| {
1444                    json!({
1445                        "start": start,
1446                        "end": end,
1447                    })
1448                })
1449                .collect::<Vec<_>>();
1450            json!({
1451                "path": path,
1452                "line": matched.line_number,
1453                "column": matched.col.saturating_add(1),
1454                "byte_column": matched.col,
1455                "excerpt": truncate_line_for_ai(
1456                    &matched.line_content,
1457                    Some(matched.match_byte_offsets.as_ref()),
1458                    MAX_LINE_LEN
1459                ),
1460                "match": first_match_text(matched),
1461                "ranges": ranges,
1462                "is_definition": matched.is_definition,
1463            })
1464        })
1465        .collect::<Vec<_>>();
1466
1467    let files = file_order
1468        .into_iter()
1469        .map(|path| {
1470            let file = input
1471                .files
1472                .iter()
1473                .find(|file| file.relative_path(input.picker) == path)
1474                .expect("file_order only contains known files");
1475            json!({
1476                "path": path,
1477                "count": per_file[&path],
1478                "size_bytes": file.size,
1479                "is_binary": file.is_binary(),
1480                "git_status": format_git_status_opt(file.git_status),
1481            })
1482        })
1483        .collect::<Vec<_>>();
1484
1485    json!({
1486        "query": input.query,
1487        "query_used": input.query_used,
1488        "broadened_from": input.broadened_from,
1489        "approximate": input.approximate,
1490        "matches": matches,
1491        "files": files,
1492        "count": input.total_matched,
1493        "shown": indices.len(),
1494        "files_with_matches": input.files_with_matches,
1495        "truncated": input.total_matched > indices.len() || input.next_file_offset > 0,
1496        "cursor": cursor,
1497        "suggested_path": suggested_path,
1498        "regex_fallback_error": input.regex_fallback_error,
1499        "timed_out": false,
1500        "cancelled": false,
1501        "error": null,
1502    })
1503}
1504
1505fn empty_grep_result(query: &str) -> serde_json::Value {
1506    json!({
1507        "query": query,
1508        "query_used": query,
1509        "broadened_from": null,
1510        "regex_fallback_error": null,
1511        "matches": [],
1512        "files": [],
1513        "count": 0,
1514        "shown": 0,
1515        "files_with_matches": 0,
1516        "truncated": false,
1517        "cursor": null,
1518        "suggested_path": null,
1519        "approximate": false,
1520        "timed_out": false,
1521        "cancelled": false,
1522        "error": null,
1523    })
1524}
1525
1526fn first_match_text(matched: &GrepMatch) -> String {
1527    let Some((start, end)) = matched.match_byte_offsets.first().copied() else {
1528        return String::new();
1529    };
1530    let start = floor_char_boundary(&matched.line_content, start as usize);
1531    let end = ceil_char_boundary(&matched.line_content, end as usize);
1532    matched.line_content[start..end].to_string()
1533}
1534
1535#[cfg(test)]
1536mod tests {
1537    use super::*;
1538    use serde_json::json;
1539    use tempfile::TempDir;
1540
1541    fn grep_provider_with_base_path(base_path: std::path::PathBuf) -> StaticToolProvider<Grep> {
1542        StaticToolProvider::new(
1543            vec![grep_tool_definition()],
1544            Grep::with_base_path(base_path),
1545        )
1546    }
1547
1548    #[test]
1549    fn grep_uses_limit_argument_in_model_contract() {
1550        let definition = grep_tool_definition();
1551        let properties = definition
1552            .contract
1553            .input_schema
1554            .get("properties")
1555            .and_then(serde_json::Value::as_object)
1556            .expect("object properties");
1557
1558        assert!(properties.contains_key("limit"));
1559        assert!(!properties.contains_key("maxResults"));
1560        assert_eq!(properties["limit"]["default"], serde_json::json!(20));
1561    }
1562
1563    #[test]
1564    fn grep_contract_documents_result_shape() {
1565        let definition = grep_tool_definition();
1566
1567        assert_eq!(definition.contract.output_schema["type"], json!("object"));
1568        assert!(definition.contract.output_schema["properties"]["matches"].is_object());
1569        assert!(definition.contract.output_schema["properties"]["count"].is_object());
1570        assert!(definition.contract.output_schema["properties"]["cursor"].is_object());
1571        let rendered = definition.compact_contract().render_signature();
1572        assert!(rendered.contains("matches"), "{rendered}");
1573        assert!(rendered.contains("count"), "{rendered}");
1574    }
1575
1576    #[tokio::test]
1577    async fn test_grep_matches_with_query() {
1578        let dir = TempDir::new().unwrap();
1579        std::fs::write(
1580            dir.path().join("test.txt"),
1581            "hello world\nfoo bar\nhello again\n",
1582        )
1583        .unwrap();
1584
1585        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1586        let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": "hello"})).await;
1587        assert!(result.is_success());
1588        assert_eq!(result.value_for_projection()["count"], 2);
1589        assert_eq!(
1590            result.value_for_projection()["matches"][0]["path"],
1591            "test.txt"
1592        );
1593        assert_eq!(
1594            result.value_for_projection()["matches"][0]["excerpt"],
1595            "hello world"
1596        );
1597        assert_eq!(
1598            result.value_for_projection()["matches"][1]["excerpt"],
1599            "hello again"
1600        );
1601    }
1602
1603    #[tokio::test]
1604    async fn test_grep_returns_structured_file_summaries() {
1605        let dir = TempDir::new().unwrap();
1606        std::fs::write(dir.path().join("alpha.rs"), "fn thing() {}\n").unwrap();
1607
1608        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1609        let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": "thing"})).await;
1610        assert!(result.is_success());
1611        assert_eq!(
1612            result.value_for_projection()["files"][0]["path"],
1613            "alpha.rs"
1614        );
1615        assert_eq!(result.value_for_projection()["files"][0]["count"], 1);
1616        assert_eq!(result.value_for_projection()["suggested_path"], "alpha.rs");
1617    }
1618
1619    #[tokio::test]
1620    async fn test_grep_structured_counts() {
1621        let dir = TempDir::new().unwrap();
1622        std::fs::write(dir.path().join("alpha.rs"), "ctx\nctx\n").unwrap();
1623
1624        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1625        let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": "ctx"})).await;
1626        assert!(result.is_success());
1627        assert_eq!(result.value_for_projection()["count"], 2);
1628        assert_eq!(result.value_for_projection()["files"][0]["count"], 2);
1629    }
1630
1631    #[tokio::test]
1632    async fn test_grep_empty_result_keeps_structured_metadata() {
1633        let dir = TempDir::new().unwrap();
1634        std::fs::write(dir.path().join("alpha.rs"), "ctx\n").unwrap();
1635
1636        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1637        let result =
1638            lash_core::testing::run_tool(&tool, "grep", &json!({"query": "missing"})).await;
1639        assert!(result.is_success());
1640        assert_eq!(
1641            result.value_for_projection()["matches"]
1642                .as_array()
1643                .unwrap()
1644                .len(),
1645            0
1646        );
1647        assert!(result.value_for_projection()["broadened_from"].is_null());
1648        assert!(result.value_for_projection()["regex_fallback_error"].is_null());
1649    }
1650
1651    #[tokio::test]
1652    async fn test_grep_long_query_does_not_panic_in_fuzzy_fallback() {
1653        let dir = TempDir::new().unwrap();
1654        std::fs::write(dir.path().join("alpha.rs"), "short searchable content\n").unwrap();
1655
1656        let query = "definitely missing ".repeat(20);
1657        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1658        let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": query})).await;
1659
1660        assert!(
1661            result.is_success(),
1662            "long query should not panic or fail: {:?}",
1663            result.value_for_projection()
1664        );
1665    }
1666
1667    #[test]
1668    fn test_cleanup_fuzzy_query_caps_to_fff_score_limit() {
1669        let query = "Ä".repeat(MAX_FFF_FUZZY_QUERY_BYTES + 10);
1670        let cleaned = cleanup_fuzzy_query(&query);
1671
1672        assert!(cleaned.len() <= MAX_FFF_FUZZY_QUERY_BYTES);
1673        assert!(cleaned.is_char_boundary(cleaned.len()));
1674    }
1675
1676    #[tokio::test]
1677    async fn test_grep_initializes_backend_lazily() {
1678        let dir = TempDir::new().unwrap();
1679        std::fs::write(dir.path().join("alpha.rs"), "ctx\n").unwrap();
1680
1681        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1682        assert!(tool.executor().backend.get().is_none());
1683
1684        let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": "ctx"})).await;
1685        assert!(result.is_success());
1686        assert!(tool.executor().backend.get().is_some());
1687    }
1688
1689    #[tokio::test]
1690    async fn test_grep_path_scopes_search_to_subdirectory() {
1691        let dir = TempDir::new().unwrap();
1692        std::fs::create_dir(dir.path().join("inner")).unwrap();
1693        std::fs::write(dir.path().join("outer.txt"), "banana at root\n").unwrap();
1694        std::fs::write(dir.path().join("inner/inner.txt"), "banana in inner\n").unwrap();
1695
1696        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1697        let result = lash_core::testing::run_tool(
1698            &tool,
1699            "grep",
1700            &json!({"query": "banana", "path": "inner"}),
1701        )
1702        .await;
1703        assert!(result.is_success());
1704        assert!(
1705            result.value_for_projection()["matches"]
1706                .as_array()
1707                .unwrap()
1708                .iter()
1709                .any(|item| item["path"] == "inner.txt"),
1710            "expected inner.txt match, got {:?}",
1711            result.value_for_projection()
1712        );
1713        assert!(
1714            !result.value_for_projection()["matches"]
1715                .as_array()
1716                .unwrap()
1717                .iter()
1718                .any(|item| item["path"] == "outer.txt"),
1719            "path scope should exclude outer.txt, got {:?}",
1720            result.value_for_projection()
1721        );
1722    }
1723
1724    #[tokio::test]
1725    async fn test_grep_path_constrains_search_to_single_file() {
1726        let dir = TempDir::new().unwrap();
1727        std::fs::write(dir.path().join("notes.txt"), "banana\n").unwrap();
1728        std::fs::write(dir.path().join("other.txt"), "banana\n").unwrap();
1729
1730        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1731        let result = lash_core::testing::run_tool(
1732            &tool,
1733            "grep",
1734            &json!({"query": "banana", "path": "notes.txt"}),
1735        )
1736        .await;
1737        assert!(result.is_success());
1738        assert!(
1739            result.value_for_projection()["matches"]
1740                .as_array()
1741                .unwrap()
1742                .iter()
1743                .any(|item| item["path"] == "notes.txt"),
1744            "expected notes.txt match, got {:?}",
1745            result.value_for_projection()
1746        );
1747        assert!(
1748            !result.value_for_projection()["matches"]
1749                .as_array()
1750                .unwrap()
1751                .iter()
1752                .any(|item| item["path"] == "other.txt"),
1753            "file path should exclude other.txt"
1754        );
1755        assert!(
1756            tool.executor().backend.get().is_none(),
1757            "single-file grep should bypass the indexed backend"
1758        );
1759        assert_eq!(result.value_for_projection()["timed_out"], false);
1760        assert_eq!(
1761            result.value_for_projection()["error"],
1762            serde_json::Value::Null
1763        );
1764    }
1765
1766    #[tokio::test]
1767    async fn test_grep_file_path_uses_direct_scan_for_multiword_query() {
1768        let dir = TempDir::new().unwrap();
1769        std::fs::write(
1770            dir.path().join("bottle.py"),
1771            "header cookie static_file abort redirect request response\nunrelated\n",
1772        )
1773        .unwrap();
1774        std::fs::write(
1775            dir.path().join("other.py"),
1776            "header cookie static_file abort redirect request response\n",
1777        )
1778        .unwrap();
1779
1780        let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1781        let result = lash_core::testing::run_tool(
1782            &tool,
1783            "grep",
1784            &json!({
1785                "query": "header cookie static_file abort redirect request response",
1786                "path": "bottle.py",
1787                "limit": 80,
1788            }),
1789        )
1790        .await;
1791
1792        assert!(
1793            result.is_success(),
1794            "direct grep failed: {:?}",
1795            result.value_for_projection()
1796        );
1797        assert_eq!(result.value_for_projection()["count"], 1);
1798        assert_eq!(result.value_for_projection()["shown"], 1);
1799        assert_eq!(
1800            result.value_for_projection()["matches"][0]["path"],
1801            "bottle.py"
1802        );
1803        assert_eq!(
1804            result.value_for_projection()["matches"][0]["match"],
1805            "header cookie static_file abort redirect request response"
1806        );
1807        assert!(
1808            tool.executor().backend.get().is_none(),
1809            "single-file grep should not initialize fff"
1810        );
1811        assert_eq!(result.value_for_projection()["timed_out"], false);
1812        assert_eq!(
1813            result.value_for_projection()["error"],
1814            serde_json::Value::Null
1815        );
1816    }
1817
1818    #[tokio::test]
1819    async fn test_grep_path_can_search_outside_workspace() {
1820        let workspace = TempDir::new().unwrap();
1821        let outside = TempDir::new().unwrap();
1822        std::fs::write(outside.path().join("external.txt"), "banana\n").unwrap();
1823
1824        let tool = grep_provider_with_base_path(workspace.path().to_path_buf());
1825        let result = lash_core::testing::run_tool(
1826            &tool,
1827            "grep",
1828            &json!({
1829                "query": "banana",
1830                "path": outside.path().to_string_lossy(),
1831            }),
1832        )
1833        .await;
1834        assert!(
1835            result.is_success(),
1836            "expected search outside workspace to succeed, got {:?}",
1837            result.value_for_projection()
1838        );
1839        assert!(
1840            result.value_for_projection()["matches"]
1841                .as_array()
1842                .unwrap()
1843                .iter()
1844                .any(|item| item["path"] == "external.txt"),
1845            "expected external.txt match, got {:?}",
1846            result.value_for_projection()
1847        );
1848    }
1849
1850    #[tokio::test]
1851    async fn test_grep_infers_obvious_path_prefix_from_query() {
1852        let workspace = TempDir::new().unwrap();
1853        let outside = TempDir::new().unwrap();
1854        std::fs::write(outside.path().join("external.txt"), "banana\n").unwrap();
1855
1856        let tool = grep_provider_with_base_path(workspace.path().to_path_buf());
1857        let result = lash_core::testing::run_tool(
1858            &tool,
1859            "grep",
1860            &json!({"query": format!("{} banana", outside.path().display())}),
1861        )
1862        .await;
1863        assert!(result.is_success());
1864        assert!(
1865            result.value_for_projection()["matches"]
1866                .as_array()
1867                .unwrap()
1868                .iter()
1869                .any(|item| item["path"] == "external.txt"),
1870            "expected inferred path search to find external.txt, got {:?}",
1871            result.value_for_projection()
1872        );
1873    }
1874
1875    #[tokio::test]
1876    async fn test_grep_infers_obvious_file_prefix_without_indexing() {
1877        let workspace = TempDir::new().unwrap();
1878        let outside = TempDir::new().unwrap();
1879        let file = outside.path().join("external.txt");
1880        std::fs::write(&file, "banana split\n").unwrap();
1881
1882        let tool = grep_provider_with_base_path(workspace.path().to_path_buf());
1883        let result = lash_core::testing::run_tool(
1884            &tool,
1885            "grep",
1886            &json!({"query": format!("{} banana", file.display())}),
1887        )
1888        .await;
1889        assert!(result.is_success());
1890        assert_eq!(
1891            result.value_for_projection()["matches"][0]["path"],
1892            "external.txt"
1893        );
1894        assert!(
1895            tool.executor().backend.get().is_none(),
1896            "inferred single-file grep should bypass fff"
1897        );
1898    }
1899
1900    #[test]
1901    fn test_direct_file_grep_observes_pre_cancelled_abort_signal() {
1902        let dir = TempDir::new().unwrap();
1903        let file = dir.path().join("notes.txt");
1904        std::fs::write(&file, "banana\n").unwrap();
1905        let abort = AtomicBool::new(true);
1906
1907        let result = direct_file_grep_sync("banana", &file, Some(dir.path()), 20, &abort);
1908
1909        assert!(!result.is_success());
1910        let value = result.value_for_projection();
1911        assert_eq!(value["cancelled"], true);
1912        assert_eq!(value["error"]["kind"], "cancelled");
1913        let output = result.as_output().value_for_projection();
1914        assert_eq!(output["message"], "grep cancelled");
1915        assert_eq!(output["source"], "cancellation");
1916    }
1917
1918    #[tokio::test]
1919    async fn test_grep_path_missing_returns_clear_error() {
1920        let workspace = TempDir::new().unwrap();
1921        let tool = grep_provider_with_base_path(workspace.path().to_path_buf());
1922        let result = lash_core::testing::run_tool(
1923            &tool,
1924            "grep",
1925            &json!({"query": "banana", "path": "/nonexistent/totally/fake"}),
1926        )
1927        .await;
1928        assert!(!result.is_success());
1929        let value = result.value_for_projection();
1930        let message = value.as_str().unwrap_or("");
1931        assert!(
1932            message.contains("does not exist"),
1933            "expected missing-path error, got {message:?}"
1934        );
1935    }
1936
1937    #[tokio::test]
1938    async fn test_grep_backend_is_shared_process_wide_for_same_workspace() {
1939        let dir = TempDir::new().unwrap();
1940        std::fs::write(dir.path().join("alpha.rs"), "ctx\n").unwrap();
1941
1942        let left = Grep::with_base_path(dir.path().to_path_buf());
1943        let right = Grep::with_base_path(dir.path().to_path_buf());
1944
1945        let left_backend = left.ensure_ready_for_query("ctx").expect("left backend");
1946        let right_backend = right.ensure_ready_for_query("ctx").expect("right backend");
1947
1948        assert!(Arc::ptr_eq(&left_backend, &right_backend));
1949    }
1950}