1use std::collections::{HashMap, VecDeque};
2use std::path::{Path, PathBuf};
3use std::sync::{
4 Arc, Mutex, MutexGuard, OnceLock,
5 atomic::{AtomicBool, Ordering},
6};
7use std::time::{Duration, Instant};
8
9use fff_search::git::format_git_status_opt;
10use fff_search::grep::{GrepMode, GrepSearchOptions, has_regex_metacharacters, is_import_line};
11use fff_search::{
12 AiGrepConfig, ContentCacheBudget, FFFMode, FileItem, FilePicker, FilePickerOptions,
13 FuzzySearchOptions, GrepMatch, PaginationArgs, QueryParser, SharedFrecency, SharedPicker,
14};
15use serde_json::json;
16
17use lash_core::{
18 ToolCall, ToolDefinition, ToolFailureClass, ToolResult, ToolRetryPolicy, ToolScheduling,
19};
20
21use lash_tool_support::{
22 StaticToolExecute, StaticToolProvider, canonicalize_under, object_schema,
23 parse_optional_usize_arg, require_str,
24};
25
26const DEFAULT_MAX_RESULTS: usize = 20;
27const MAX_CURSORS: usize = 20;
28const MAX_LINE_LEN: usize = 180;
29const MAX_FFF_FUZZY_QUERY_BYTES: usize = (u16::MAX as usize) / (16 * 50);
30const GREP_WALL_TIMEOUT: Duration = Duration::from_secs(5);
31const FFF_SEARCH_BUDGET: Duration = Duration::from_secs(3);
32const DIRECT_FILE_MAX_SIZE: u64 = 10 * 1024 * 1024;
33
34pub struct Grep {
36 base_path: Result<PathBuf, String>,
37 backend: OnceLock<Result<Arc<GrepBackend>, String>>,
38 cursor_store: Arc<Mutex<CursorStore>>,
39}
40
41impl Grep {
42 pub fn new() -> Self {
43 match std::env::current_dir() {
44 Ok(path) => Self::with_base_path(path),
45 Err(err) => {
46 Self::with_init_error(format!("failed to resolve current directory: {err}"))
47 }
48 }
49 }
50
51 fn with_init_error(message: String) -> Self {
52 Self {
53 base_path: Err(message),
54 backend: OnceLock::new(),
55 cursor_store: Arc::new(Mutex::new(CursorStore::new())),
56 }
57 }
58
59 fn with_base_path(base_path: PathBuf) -> Self {
60 Self {
61 base_path: Ok(base_path),
62 backend: OnceLock::new(),
63 cursor_store: Arc::new(Mutex::new(CursorStore::new())),
64 }
65 }
66
67 fn ensure_ready_for_query(&self, query: &str) -> Result<Arc<GrepBackend>, ToolResult> {
68 let backend = self
69 .backend
70 .get_or_init(|| self.shared_backend())
71 .as_ref()
72 .map_err(|err| ToolResult::err_fmt(format_args!("{err}")))?;
73 if !backend.picker.wait_for_scan(GREP_WALL_TIMEOUT) {
74 return Err(timeout_grep_result(
75 query,
76 "index_scan",
77 GREP_WALL_TIMEOUT,
78 "fff-search initial scan timed out",
79 ));
80 }
81 Ok(Arc::clone(backend))
82 }
83
84 fn shared_backend(&self) -> Result<Arc<GrepBackend>, String> {
85 let base_path = self.base_path.as_ref().map_err(Clone::clone)?;
86 backend_for_base(base_path)
87 }
88
89 fn lock_cursors(
90 cursor_store: &Mutex<CursorStore>,
91 ) -> Result<MutexGuard<'_, CursorStore>, ToolResult> {
92 cursor_store
93 .lock()
94 .map_err(|_| ToolResult::err_fmt(format_args!("Failed to acquire cursor store lock")))
95 }
96
97 fn perform_grep(
98 backend: &GrepBackend,
99 cursor_store: &Mutex<CursorStore>,
100 query: &str,
101 mode: GrepMode,
102 max_results: usize,
103 cursor_id: Option<&str>,
104 control: &GrepRunControl,
105 ) -> Result<serde_json::Value, ToolResult> {
106 control.check(query)?;
107 let file_offset = cursor_id
108 .and_then(|id| cursor_store.lock().ok()?.get(id))
109 .unwrap_or(0);
110
111 let (options, auto_expand) = make_grep_options(mode, file_offset, control);
112
113 let guard = backend.picker.read().map_err(|err| {
114 ToolResult::err_fmt(format_args!("Failed to acquire picker lock: {err}"))
115 })?;
116 let picker = guard
117 .as_ref()
118 .ok_or_else(|| ToolResult::err_fmt(format_args!("File picker not initialized")))?;
119
120 let parser = QueryParser::new(AiGrepConfig);
121 let parsed = parser.parse(query);
122 control.check(query)?;
123 let result = picker.grep(&parsed, &options);
124
125 if result.matches.is_empty() && file_offset == 0 {
126 control.check(query)?;
127 let parts = query.split_whitespace().collect::<Vec<_>>();
128 if parts.len() >= 2 {
129 let first_word = parts[0];
130 let is_valid_constraint = first_word.starts_with('!')
131 || first_word.starts_with('*')
132 || first_word.ends_with('/');
133
134 if !is_valid_constraint {
135 let rest_query = parts[1..].join(" ");
136 let rest_parsed = parser.parse(&rest_query);
137 let rest_text = rest_parsed.grep_text();
138 let retry_mode = if has_regex_metacharacters(&rest_text) {
139 GrepMode::Regex
140 } else {
141 mode
142 };
143 let (retry_options, retry_auto_expand) =
144 make_grep_options(retry_mode, 0, control);
145 control.check(query)?;
146 let retry_result = picker.grep(&rest_parsed, &retry_options);
147
148 if !retry_result.matches.is_empty() && retry_result.matches.len() <= 10 {
149 let mut cursors = Self::lock_cursors(cursor_store)?;
150 return Ok(structured_grep_result(
151 StructuredGrepInput {
152 query,
153 query_used: &rest_query,
154 matches: &retry_result.matches,
155 files: &retry_result.files,
156 total_matched: retry_result.matches.len(),
157 files_with_matches: retry_result.files_with_matches,
158 next_file_offset: retry_result.next_file_offset,
159 regex_fallback_error: retry_result.regex_fallback_error.as_deref(),
160 max_results,
161 auto_expand_defs: retry_auto_expand,
162 broadened_from: Some(query),
163 approximate: false,
164 picker,
165 },
166 &mut cursors,
167 ));
168 }
169 }
170 }
171
172 let fuzzy_query = cleanup_fuzzy_query(query);
173 let (fuzzy_options, fuzzy_auto_expand) = make_grep_options(GrepMode::Fuzzy, 0, control);
174 let fuzzy_parsed = parser.parse(&fuzzy_query);
175 control.check(query)?;
176 let fuzzy_result = picker.grep(&fuzzy_parsed, &fuzzy_options);
177 if !fuzzy_result.matches.is_empty() {
178 let mut cursors = Self::lock_cursors(cursor_store)?;
179 return Ok(structured_grep_result(
180 StructuredGrepInput {
181 query,
182 query_used: &fuzzy_query,
183 matches: &fuzzy_result.matches,
184 files: &fuzzy_result.files,
185 total_matched: fuzzy_result.matches.len(),
186 files_with_matches: fuzzy_result.files_with_matches,
187 next_file_offset: fuzzy_result.next_file_offset,
188 regex_fallback_error: fuzzy_result.regex_fallback_error.as_deref(),
189 max_results,
190 auto_expand_defs: fuzzy_auto_expand,
191 broadened_from: None,
192 approximate: true,
193 picker,
194 },
195 &mut cursors,
196 ));
197 }
198
199 if query.contains('/') {
200 let file_query = QueryParser::default().parse(query);
201 control.check(query)?;
202 let file_result = picker.fuzzy_search(
203 &file_query,
204 None,
205 FuzzySearchOptions {
206 max_threads: 0,
207 current_file: None,
208 project_path: Some(picker.base_path()),
209 combo_boost_score_multiplier: 100,
210 min_combo_count: 3,
211 pagination: PaginationArgs {
212 offset: 0,
213 limit: 1,
214 },
215 },
216 );
217 if let (Some(top), Some(score)) =
218 (file_result.items.first(), file_result.scores.first())
219 {
220 let query_len = query.len() as i32;
221 if score.base_score > query_len * 10 {
222 return Ok(json!({
223 "query": query,
224 "query_used": query,
225 "matches": [],
226 "files": [],
227 "count": 0,
228 "shown": 0,
229 "files_with_matches": 0,
230 "truncated": false,
231 "cursor": null,
232 "suggested_path": top.relative_path(picker),
233 "approximate": false,
234 "broadened_from": null,
235 "regex_fallback_error": null,
236 "timed_out": false,
237 "cancelled": false,
238 "error": null,
239 }));
240 }
241 }
242 }
243
244 return Ok(empty_grep_result(query));
245 }
246
247 if result.matches.is_empty() {
248 return Ok(empty_grep_result(query));
249 }
250
251 let mut cursors = Self::lock_cursors(cursor_store)?;
252 Ok(structured_grep_result(
253 StructuredGrepInput {
254 query,
255 query_used: query,
256 matches: &result.matches,
257 files: &result.files,
258 total_matched: result.matches.len(),
259 files_with_matches: result.files_with_matches,
260 next_file_offset: result.next_file_offset,
261 regex_fallback_error: result.regex_fallback_error.as_deref(),
262 max_results,
263 auto_expand_defs: auto_expand,
264 broadened_from: None,
265 approximate: false,
266 picker,
267 },
268 &mut cursors,
269 ))
270 }
271}
272
273impl Default for Grep {
274 fn default() -> Self {
275 Self::new()
276 }
277}
278
279pub fn grep_provider() -> StaticToolProvider<Grep> {
281 StaticToolProvider::new(vec![grep_tool_definition()], Grep::new())
282}
283
284#[async_trait::async_trait]
285impl StaticToolExecute for Grep {
286 async fn execute(&self, call: ToolCall<'_>) -> ToolResult {
287 let cancellation_token = call.context.cancellation_token().cloned();
288 self.execute_inner(call.args, cancellation_token).await
289 }
290}
291
292fn grep_tool_definition() -> ToolDefinition {
293 ToolDefinition::raw(
294 "tool:grep",
295 "grep",
296 "Search file contents. Search for bare identifiers (e.g. 'InProgressQuote', 'ActorAuth'), NOT code syntax or regex. By default searches the current workspace. Pass `path` to point the search at a specific file or directory anywhere on the filesystem (including outside the workspace). If `query` accidentally starts with an obvious filesystem path followed by search text, grep treats that prefix as `path`. Within a search root, use inline constraints in the query as a leading token: `*.rs term` (extension), `src/ term` (path segment), `**/foo/* term` (glob), `!*.test.ts term` (negate). Constraints AND together; one search term per query.",
297 object_schema(
298 json!({
299 "query": {
300 "type": "string",
301 "description": "Search text or regex query with optional constraint prefixes. Pattern is matched within a single line (no cross-line matches). Use a literal token, a short phrase, or a regex — not a multi-clause natural-language query."
302 },
303 "path": {
304 "type": "string",
305 "description": "Optional file or directory to search within. Accepts absolute paths or paths relative to the workspace root. A directory becomes the search root; a file searches that one file only. When omitted, searches the current workspace."
306 },
307 "limit": {
308 "type": "integer",
309 "minimum": 1,
310 "default": DEFAULT_MAX_RESULTS,
311 "description": "Max matching lines (default 20)."
312 },
313 "cursor": {
314 "type": "string",
315 "description": "Cursor from a previous grep result. Only use if previous results were not sufficient."
316 }
317 }),
318 &["query"],
319 ),
320 grep_output_schema(),
321 )
322 .with_examples(vec![
323 r#"await files.grep({ query: "ToolProvider", path: "crates/lash/src" })?"#.into(),
324 r#"await files.grep({ query: "*.rs apply_patch", path: "." })?"#.into(),
325 r#"await files.grep({ query: "current_query" })?"#.into(),
326 ])
327 .with_agent_surface(lash_tool_support::agent_surface(
328 ["files"],
329 "grep",
330 &["search_files", "ripgrep"],
331 ))
332 .with_scheduling(ToolScheduling::Parallel)
333 .with_retry_policy(ToolRetryPolicy::safe(2, 50, 150))
334}
335
336fn grep_output_schema() -> serde_json::Value {
337 json!({
338 "type": "object",
339 "properties": {
340 "query": { "type": "string" },
341 "query_used": {
342 "type": "string",
343 "description": "The concrete query executed after path/constraint/fuzzy broadening."
344 },
345 "broadened_from": nullable_schema(json!({ "type": "string" })),
346 "regex_fallback_error": nullable_schema(json!({ "type": "string" })),
347 "matches": {
348 "type": "array",
349 "items": grep_match_output_schema()
350 },
351 "files": {
352 "type": "array",
353 "items": grep_file_output_schema()
354 },
355 "count": {
356 "type": "integer",
357 "minimum": 0,
358 "description": "Total matching lines found, including results not shown due to limit/cursor."
359 },
360 "shown": {
361 "type": "integer",
362 "minimum": 0,
363 "description": "Number of match records included in this response."
364 },
365 "files_with_matches": { "type": "integer", "minimum": 0 },
366 "truncated": { "type": "boolean" },
367 "cursor": nullable_schema(json!({ "type": "string" })),
368 "suggested_path": nullable_schema(json!({ "type": "string" })),
369 "approximate": {
370 "type": "boolean",
371 "description": "True when a fuzzy fallback produced the matches."
372 },
373 "timed_out": { "type": "boolean" },
374 "cancelled": { "type": "boolean" },
375 "error": nullable_schema(json!({
376 "type": "object",
377 "properties": {
378 "kind": { "type": "string" },
379 "message": { "type": "string" },
380 "stage": { "type": "string" }
381 },
382 "required": ["kind", "message", "stage"],
383 "additionalProperties": true
384 }))
385 },
386 "required": [
387 "query",
388 "query_used",
389 "broadened_from",
390 "regex_fallback_error",
391 "matches",
392 "files",
393 "count",
394 "shown",
395 "files_with_matches",
396 "truncated",
397 "cursor",
398 "suggested_path",
399 "approximate",
400 "timed_out",
401 "cancelled",
402 "error"
403 ],
404 "additionalProperties": false
405 })
406}
407
408fn grep_match_output_schema() -> serde_json::Value {
409 json!({
410 "type": "object",
411 "properties": {
412 "path": { "type": "string" },
413 "line": { "type": "integer", "minimum": 1 },
414 "column": { "type": "integer", "minimum": 1 },
415 "byte_column": { "type": "integer", "minimum": 0 },
416 "excerpt": { "type": "string" },
417 "match": { "type": "string" },
418 "ranges": {
419 "type": "array",
420 "items": {
421 "type": "object",
422 "properties": {
423 "start": { "type": "integer", "minimum": 0 },
424 "end": { "type": "integer", "minimum": 0 }
425 },
426 "required": ["start", "end"],
427 "additionalProperties": false
428 }
429 },
430 "is_definition": { "type": "boolean" }
431 },
432 "required": [
433 "path",
434 "line",
435 "column",
436 "byte_column",
437 "excerpt",
438 "match",
439 "ranges",
440 "is_definition"
441 ],
442 "additionalProperties": false
443 })
444}
445
446fn grep_file_output_schema() -> serde_json::Value {
447 json!({
448 "type": "object",
449 "properties": {
450 "path": { "type": "string" },
451 "count": { "type": "integer", "minimum": 0 },
452 "size_bytes": { "type": "integer", "minimum": 0 },
453 "is_binary": { "type": "boolean" },
454 "git_status": nullable_schema(json!({ "type": "string" }))
455 },
456 "required": ["path", "count", "size_bytes", "is_binary", "git_status"],
457 "additionalProperties": false
458 })
459}
460
461fn nullable_schema(schema: serde_json::Value) -> serde_json::Value {
462 json!({ "anyOf": [schema, { "type": "null" }] })
463}
464
465impl Grep {
466 async fn execute_inner(
467 &self,
468 args: &serde_json::Value,
469 cancellation_token: Option<tokio_util::sync::CancellationToken>,
470 ) -> ToolResult {
471 let raw_query = match require_str(args, "query") {
472 Ok(query) => query,
473 Err(err) => return err,
474 };
475 let max_results = match parse_limit(args) {
476 Ok(max_results) => max_results,
477 Err(err) => return err,
478 };
479 let cursor = args.get("cursor").and_then(|value| value.as_str());
480 let path_arg = args
481 .get("path")
482 .and_then(|value| value.as_str())
483 .map(str::trim)
484 .filter(|value| !value.is_empty());
485
486 let default_base = self.base_path.as_ref().cloned().ok();
487 let inferred_scope = path_arg
488 .is_none()
489 .then(|| infer_path_prefix(default_base.as_deref(), raw_query))
490 .flatten();
491 let path_arg_owned;
492 let query_owned;
493 let (path_arg, raw_query) = if let Some((path, query)) = inferred_scope {
494 path_arg_owned = path;
495 query_owned = query;
496 (Some(path_arg_owned.as_str()), query_owned.as_str())
497 } else {
498 (path_arg, raw_query)
499 };
500
501 let (backend, query) = match path_arg {
502 Some(path) => match resolve_path_scope(default_base.as_deref(), path) {
503 Ok(PathScope::File(file_path)) => {
504 return direct_file_grep(
505 raw_query,
506 &file_path,
507 default_base.as_deref(),
508 max_results,
509 cancellation_token,
510 )
511 .await;
512 }
513 Ok(PathScope::Directory(base_path)) => {
514 let backend = match backend_for_base(&base_path) {
515 Ok(backend) => backend,
516 Err(err) => return ToolResult::err_fmt(format_args!("{err}")),
517 };
518 if !backend.picker.wait_for_scan(GREP_WALL_TIMEOUT) {
519 return timeout_grep_result(
520 raw_query,
521 "index_scan",
522 GREP_WALL_TIMEOUT,
523 &format!(
524 "fff-search initial scan timed out for {}",
525 base_path.display()
526 ),
527 );
528 }
529 (backend, raw_query.to_string())
530 }
531 Err(err) => return err,
532 },
533 None => match self.ensure_ready_for_query(raw_query) {
534 Ok(backend) => (backend, raw_query.to_string()),
535 Err(err) => return err,
536 },
537 };
538
539 let grep_text = QueryParser::new(AiGrepConfig).parse(&query).grep_text();
540 let mode = if has_regex_metacharacters(&grep_text) {
541 GrepMode::Regex
542 } else {
543 GrepMode::PlainText
544 };
545
546 bounded_indexed_grep(
547 Arc::clone(&backend),
548 Arc::clone(&self.cursor_store),
549 query,
550 mode,
551 max_results,
552 cursor.map(str::to_string),
553 cancellation_token,
554 )
555 .await
556 }
557}
558
559enum PathScope {
560 Directory(PathBuf),
561 File(PathBuf),
562}
563
564#[derive(Clone)]
565struct GrepRunControl {
566 abort_signal: Arc<AtomicBool>,
567 deadline: Instant,
568 budget: Duration,
569}
570
571impl GrepRunControl {
572 fn new(abort_signal: Arc<AtomicBool>, budget: Duration) -> Self {
573 Self {
574 abort_signal,
575 deadline: Instant::now() + budget,
576 budget,
577 }
578 }
579
580 fn check(&self, query: &str) -> Result<(), ToolResult> {
581 if self.abort_signal.load(Ordering::Relaxed) {
582 return Err(cancelled_grep_result(query));
583 }
584 if Instant::now() >= self.deadline {
585 self.abort_signal.store(true, Ordering::Relaxed);
586 return Err(timeout_grep_result(
587 query,
588 "fff_search",
589 self.budget,
590 "grep search timed out",
591 ));
592 }
593 Ok(())
594 }
595
596 fn remaining_budget_ms(&self) -> u64 {
597 self.deadline
598 .saturating_duration_since(Instant::now())
599 .as_millis()
600 .max(1) as u64
601 }
602}
603
604async fn bounded_indexed_grep(
605 backend: Arc<GrepBackend>,
606 cursor_store: Arc<Mutex<CursorStore>>,
607 query: String,
608 mode: GrepMode,
609 max_results: usize,
610 cursor: Option<String>,
611 cancellation_token: Option<tokio_util::sync::CancellationToken>,
612) -> ToolResult {
613 let abort_signal = Arc::new(AtomicBool::new(false));
614 let cancellation_watcher = cancellation_token.map(|token| {
615 let abort_signal = Arc::clone(&abort_signal);
616 tokio::spawn(async move {
617 token.cancelled().await;
618 abort_signal.store(true, Ordering::Relaxed);
619 })
620 });
621 let control = GrepRunControl::new(Arc::clone(&abort_signal), FFF_SEARCH_BUDGET);
622 let timeout_query = query.clone();
623 let handle = tokio::task::spawn_blocking(move || {
624 Grep::perform_grep(
625 &backend,
626 &cursor_store,
627 &query,
628 mode,
629 max_results,
630 cursor.as_deref(),
631 &control,
632 )
633 });
634
635 let result = match tokio::time::timeout(GREP_WALL_TIMEOUT, handle).await {
636 Ok(Ok(Ok(value))) => ToolResult::ok(value),
637 Ok(Ok(Err(err))) => err,
638 Ok(Err(err)) => ToolResult::err(serde_json::json!({
639 "query": timeout_query,
640 "query_used": timeout_query,
641 "matches": [],
642 "files": [],
643 "count": 0,
644 "shown": 0,
645 "files_with_matches": 0,
646 "truncated": false,
647 "cursor": null,
648 "suggested_path": null,
649 "approximate": false,
650 "timed_out": false,
651 "cancelled": false,
652 "error": {
653 "kind": "panic",
654 "message": format!("grep worker failed: {err}"),
655 "stage": "fff_search",
656 },
657 })),
658 Err(_) => {
659 abort_signal.store(true, Ordering::Relaxed);
660 timeout_grep_result(
661 &timeout_query,
662 "fff_search",
663 GREP_WALL_TIMEOUT,
664 "grep search timed out",
665 )
666 }
667 };
668 if let Some(watcher) = cancellation_watcher {
669 watcher.abort();
670 }
671 result
672}
673
674async fn direct_file_grep(
675 query: &str,
676 file_path: &Path,
677 default_base: Option<&Path>,
678 max_results: usize,
679 cancellation_token: Option<tokio_util::sync::CancellationToken>,
680) -> ToolResult {
681 let query = query.to_string();
682 let file_path = file_path.to_path_buf();
683 let default_base = default_base.map(Path::to_path_buf);
684 let abort_signal = Arc::new(AtomicBool::new(false));
685 let cancellation_watcher = cancellation_token.map(|token| {
686 let abort_signal = Arc::clone(&abort_signal);
687 tokio::spawn(async move {
688 token.cancelled().await;
689 abort_signal.store(true, Ordering::Relaxed);
690 })
691 });
692 let worker_abort = Arc::clone(&abort_signal);
693 let timeout_query = query.clone();
694 let handle = tokio::task::spawn_blocking(move || {
695 direct_file_grep_sync(
696 &query,
697 &file_path,
698 default_base.as_deref(),
699 max_results,
700 &worker_abort,
701 )
702 });
703 let result = match tokio::time::timeout(GREP_WALL_TIMEOUT, handle).await {
704 Ok(Ok(result)) => result,
705 Ok(Err(err)) => ToolResult::err(serde_json::json!({
706 "query": timeout_query,
707 "query_used": timeout_query,
708 "matches": [],
709 "files": [],
710 "count": 0,
711 "shown": 0,
712 "files_with_matches": 0,
713 "truncated": false,
714 "cursor": null,
715 "suggested_path": null,
716 "approximate": false,
717 "timed_out": false,
718 "cancelled": false,
719 "error": {
720 "kind": "panic",
721 "message": format!("direct grep worker failed: {err}"),
722 "stage": "direct_file",
723 },
724 })),
725 Err(_) => {
726 abort_signal.store(true, Ordering::Relaxed);
727 timeout_grep_result(
728 &timeout_query,
729 "direct_file",
730 GREP_WALL_TIMEOUT,
731 "direct file grep timed out",
732 )
733 }
734 };
735 if let Some(watcher) = cancellation_watcher {
736 watcher.abort();
737 }
738 result
739}
740
741fn resolve_path_scope(
745 default_base: Option<&Path>,
746 requested: &str,
747) -> Result<PathScope, ToolResult> {
748 let candidate = Path::new(requested);
749 let base = match default_base {
754 Some(base) => base.to_path_buf(),
755 None => std::env::current_dir().map_err(|err| {
756 ToolResult::err_fmt(format_args!("failed to resolve current directory: {err}"))
757 })?,
758 };
759 let canonical = canonicalize_under(&base, candidate).map_err(|err| {
760 ToolResult::err_fmt(format_args!(
761 "`path` {requested} does not exist or is not accessible: {err}"
762 ))
763 })?;
764 if canonical.is_dir() {
765 Ok(PathScope::Directory(canonical))
766 } else {
767 Ok(PathScope::File(canonical))
768 }
769}
770
771fn infer_path_prefix(default_base: Option<&Path>, query: &str) -> Option<(String, String)> {
772 let trimmed = query.trim();
773 let (candidate, rest) = split_first_query_token(trimmed)?;
774 let candidate = candidate.trim_matches(['"', '\'']);
775 if candidate.is_empty() || rest.trim().is_empty() || !looks_like_path(candidate) {
776 return None;
777 }
778
779 let path = Path::new(candidate);
780 let absolute = if path.is_absolute() {
781 path.to_path_buf()
782 } else {
783 default_base?.join(path)
784 };
785 absolute
786 .exists()
787 .then(|| (candidate.to_string(), rest.trim().to_string()))
788}
789
790fn split_first_query_token(query: &str) -> Option<(&str, &str)> {
791 let mut chars = query.char_indices();
792 let (_, first) = chars.next()?;
793 if first == '"' || first == '\'' {
794 for (index, ch) in chars {
795 if ch == first {
796 let rest = query[index + ch.len_utf8()..].trim_start();
797 return Some((&query[..=index], rest));
798 }
799 }
800 return None;
801 }
802
803 query
804 .char_indices()
805 .find(|(_, ch)| ch.is_whitespace())
806 .map(|(index, _)| (&query[..index], query[index..].trim_start()))
807}
808
809fn looks_like_path(value: &str) -> bool {
810 value.starts_with('/')
811 || value.starts_with("./")
812 || value.starts_with("../")
813 || value.contains('/')
814}
815
816fn backend_for_base(base_path: &Path) -> Result<Arc<GrepBackend>, String> {
820 let cache_key = std::fs::canonicalize(base_path).unwrap_or_else(|_| base_path.to_path_buf());
821 let cache = shared_backend_cache();
822 let mut cache = cache
823 .lock()
824 .map_err(|_| "failed to lock shared grep backend cache".to_string())?;
825 if let Some(existing) = cache.get(&cache_key) {
826 return existing.clone();
827 }
828 let backend = initialize_backend_at(base_path).map(Arc::new);
829 cache.insert(cache_key, backend.clone());
830 backend
831}
832
833fn initialize_backend_at(base_path: &Path) -> Result<GrepBackend, String> {
834 let picker = SharedPicker::default();
835 FilePicker::new_with_shared_state(
836 picker.clone(),
837 SharedFrecency::default(),
838 FilePickerOptions {
839 base_path: base_path.to_string_lossy().into_owned(),
840 enable_mmap_cache: false,
841 enable_content_indexing: false,
842 mode: FFFMode::Ai,
843 cache_budget: Some(grep_content_cache_budget()),
844 watch: false,
845 },
846 )
847 .map_err(|err| format!("failed to initialize indexed grep backend: {err}"))?;
848 Ok(GrepBackend { picker })
849}
850
851struct GrepBackend {
852 picker: SharedPicker,
853}
854
855type SharedBackendCache = Mutex<HashMap<PathBuf, Result<Arc<GrepBackend>, String>>>;
856
857fn shared_backend_cache() -> &'static SharedBackendCache {
858 static CACHE: OnceLock<SharedBackendCache> = OnceLock::new();
859 CACHE.get_or_init(|| Mutex::new(HashMap::new()))
860}
861
862fn grep_content_cache_budget() -> ContentCacheBudget {
863 ContentCacheBudget {
864 max_files: 0,
865 max_bytes: 0,
866 max_file_size: DIRECT_FILE_MAX_SIZE,
867 cached_count: Default::default(),
868 cached_bytes: Default::default(),
869 }
870}
871
872fn direct_file_grep_sync(
873 query: &str,
874 file_path: &Path,
875 default_base: Option<&Path>,
876 max_results: usize,
877 abort_signal: &AtomicBool,
878) -> ToolResult {
879 if abort_signal.load(Ordering::Relaxed) {
880 return cancelled_grep_result(query);
881 }
882 let metadata = match std::fs::metadata(file_path) {
883 Ok(metadata) => metadata,
884 Err(err) => {
885 return ToolResult::err(serde_json::json!({
886 "query": query,
887 "query_used": query,
888 "matches": [],
889 "files": [],
890 "count": 0,
891 "shown": 0,
892 "files_with_matches": 0,
893 "truncated": false,
894 "cursor": null,
895 "suggested_path": null,
896 "approximate": false,
897 "timed_out": false,
898 "cancelled": false,
899 "error": {
900 "kind": "io",
901 "message": format!("failed to stat file: {err}"),
902 "stage": "direct_file",
903 },
904 }));
905 }
906 };
907 if !metadata.is_file() {
908 return ToolResult::err(serde_json::json!({
909 "query": query,
910 "query_used": query,
911 "matches": [],
912 "files": [],
913 "count": 0,
914 "shown": 0,
915 "files_with_matches": 0,
916 "truncated": false,
917 "cursor": null,
918 "suggested_path": null,
919 "approximate": false,
920 "timed_out": false,
921 "cancelled": false,
922 "error": {
923 "kind": "not_a_file",
924 "message": "path is not a regular file",
925 "stage": "direct_file",
926 },
927 }));
928 }
929 if metadata.len() > DIRECT_FILE_MAX_SIZE {
930 return ToolResult::err(serde_json::json!({
931 "query": query,
932 "query_used": query,
933 "matches": [],
934 "files": [],
935 "count": 0,
936 "shown": 0,
937 "files_with_matches": 0,
938 "truncated": false,
939 "cursor": null,
940 "suggested_path": null,
941 "approximate": false,
942 "timed_out": false,
943 "cancelled": false,
944 "error": {
945 "kind": "file_too_large",
946 "message": format!("file exceeds grep limit of {DIRECT_FILE_MAX_SIZE} bytes"),
947 "stage": "direct_file",
948 "size_bytes": metadata.len(),
949 "max_size_bytes": DIRECT_FILE_MAX_SIZE,
950 },
951 }));
952 }
953
954 let parsed = QueryParser::new(AiGrepConfig).parse(query);
955 let grep_text = parsed.grep_text();
956 if grep_text.is_empty() {
957 return ToolResult::ok(empty_grep_result(query));
958 }
959
960 let bytes = match std::fs::read(file_path) {
961 Ok(bytes) => bytes,
962 Err(err) => {
963 return ToolResult::err(serde_json::json!({
964 "query": query,
965 "query_used": grep_text,
966 "matches": [],
967 "files": [],
968 "count": 0,
969 "shown": 0,
970 "files_with_matches": 0,
971 "truncated": false,
972 "cursor": null,
973 "suggested_path": null,
974 "approximate": false,
975 "timed_out": false,
976 "cancelled": false,
977 "error": {
978 "kind": "io",
979 "message": format!("failed to read file: {err}"),
980 "stage": "direct_file",
981 },
982 }));
983 }
984 };
985 if abort_signal.load(Ordering::Relaxed) {
986 return cancelled_grep_result(query);
987 }
988
989 let display_path = display_path_for_direct_file(file_path, default_base);
990 let matcher = match DirectMatcher::new(&grep_text) {
991 Ok(matcher) => matcher,
992 Err(regex_error) => DirectMatcher::literal_with_error(&grep_text, regex_error),
993 };
994
995 let text = String::from_utf8_lossy(&bytes);
996 let mut matches = Vec::new();
997 let mut total_matches = 0usize;
998 for (line_index, segment) in text.split_inclusive('\n').enumerate() {
999 if abort_signal.load(Ordering::Relaxed) {
1000 return cancelled_grep_result(query);
1001 }
1002 let line = segment.trim_end_matches(['\r', '\n']);
1003 let ranges = matcher.ranges(line);
1004 if !ranges.is_empty() {
1005 total_matches += 1;
1006 if matches.len() < max_results {
1007 let first = ranges[0];
1008 let json_ranges = ranges
1009 .iter()
1010 .map(|(start, end)| {
1011 json!({
1012 "start": start,
1013 "end": end,
1014 })
1015 })
1016 .collect::<Vec<_>>();
1017 let match_text =
1018 direct_match_text(line, first.0 as usize, first.1 as usize).to_string();
1019 matches.push(json!({
1020 "path": display_path.clone(),
1021 "line": (line_index + 1) as u64,
1022 "column": first.0.saturating_add(1),
1023 "byte_column": first.0,
1024 "excerpt": truncate_line_for_ai(line, Some(ranges.as_slice()), MAX_LINE_LEN),
1025 "match": match_text,
1026 "ranges": json_ranges,
1027 "is_definition": looks_like_definition_line(line),
1028 }));
1029 }
1030 }
1031 }
1032
1033 let shown = matches.len();
1034 let files = if total_matches > 0 {
1035 vec![json!({
1036 "path": display_path.clone(),
1037 "count": total_matches,
1038 "size_bytes": metadata.len(),
1039 "is_binary": bytes.contains(&0),
1040 "git_status": null,
1041 })]
1042 } else {
1043 Vec::new()
1044 };
1045
1046 ToolResult::ok(json!({
1047 "query": query,
1048 "query_used": grep_text,
1049 "broadened_from": null,
1050 "regex_fallback_error": matcher.regex_error(),
1051 "matches": matches,
1052 "files": files,
1053 "count": total_matches,
1054 "shown": shown,
1055 "files_with_matches": if total_matches > 0 { 1 } else { 0 },
1056 "truncated": total_matches > shown,
1057 "cursor": null,
1058 "suggested_path": if total_matches > 0 { Some(display_path) } else { None },
1059 "approximate": false,
1060 "timed_out": false,
1061 "cancelled": false,
1062 "error": null,
1063 }))
1064}
1065
1066enum DirectMatcher {
1067 Literal {
1068 needle: String,
1069 case_insensitive: bool,
1070 regex_error: Option<String>,
1071 },
1072 Regex(regex::Regex),
1073}
1074
1075impl DirectMatcher {
1076 fn new(pattern: &str) -> Result<Self, regex::Error> {
1077 if has_regex_metacharacters(pattern) {
1078 let case_insensitive = !pattern.chars().any(|ch| ch.is_uppercase());
1079 let regex = regex::RegexBuilder::new(pattern)
1080 .case_insensitive(case_insensitive)
1081 .build()?;
1082 Ok(Self::Regex(regex))
1083 } else {
1084 Ok(Self::Literal {
1085 needle: pattern.to_string(),
1086 case_insensitive: !pattern.chars().any(|ch| ch.is_uppercase()),
1087 regex_error: None,
1088 })
1089 }
1090 }
1091
1092 fn literal_with_error(pattern: &str, error: regex::Error) -> Self {
1093 Self::Literal {
1094 needle: pattern.to_string(),
1095 case_insensitive: !pattern.chars().any(|ch| ch.is_uppercase()),
1096 regex_error: Some(error.to_string()),
1097 }
1098 }
1099
1100 fn regex_error(&self) -> Option<&str> {
1101 match self {
1102 Self::Literal { regex_error, .. } => regex_error.as_deref(),
1103 Self::Regex(_) => None,
1104 }
1105 }
1106
1107 fn ranges(&self, line: &str) -> Vec<(u32, u32)> {
1108 match self {
1109 Self::Literal {
1110 needle,
1111 case_insensitive,
1112 ..
1113 } => literal_ranges(line, needle, *case_insensitive),
1114 Self::Regex(regex) => regex
1115 .find_iter(line)
1116 .take(16)
1117 .map(|matched| (matched.start() as u32, matched.end() as u32))
1118 .collect(),
1119 }
1120 }
1121}
1122
1123fn literal_ranges(line: &str, needle: &str, case_insensitive: bool) -> Vec<(u32, u32)> {
1124 if needle.is_empty() {
1125 return Vec::new();
1126 }
1127 let haystack = if case_insensitive {
1128 line.to_ascii_lowercase()
1129 } else {
1130 line.to_string()
1131 };
1132 let needle = if case_insensitive {
1133 needle.to_ascii_lowercase()
1134 } else {
1135 needle.to_string()
1136 };
1137 let mut ranges = Vec::new();
1138 let mut offset = 0usize;
1139 while let Some(found) = haystack[offset..].find(&needle) {
1140 let start = offset + found;
1141 let end = start + needle.len();
1142 ranges.push((start as u32, end as u32));
1143 if ranges.len() >= 16 {
1144 break;
1145 }
1146 offset = end.max(start + 1);
1147 }
1148 ranges
1149}
1150
1151fn display_path_for_direct_file(file_path: &Path, default_base: Option<&Path>) -> String {
1152 if let Some(base) = default_base
1153 && let Ok(relative) = file_path.strip_prefix(base)
1154 {
1155 return relative.to_string_lossy().to_string();
1156 }
1157 file_path
1158 .file_name()
1159 .map(|name| name.to_string_lossy().to_string())
1160 .unwrap_or_else(|| file_path.display().to_string())
1161}
1162
1163fn direct_match_text(line: &str, start: usize, end: usize) -> &str {
1164 let start = floor_char_boundary(line, start);
1165 let end = ceil_char_boundary(line, end);
1166 &line[start..end]
1167}
1168
1169fn looks_like_definition_line(line: &str) -> bool {
1170 let trimmed = line.trim_start();
1171 [
1172 "fn ",
1173 "pub fn ",
1174 "async fn ",
1175 "def ",
1176 "class ",
1177 "struct ",
1178 "enum ",
1179 "trait ",
1180 "impl ",
1181 "function ",
1182 ]
1183 .iter()
1184 .any(|prefix| trimmed.starts_with(prefix))
1185}
1186
1187fn parse_limit(args: &serde_json::Value) -> Result<usize, ToolResult> {
1188 Ok(
1189 parse_optional_usize_arg(args, "limit", Some(DEFAULT_MAX_RESULTS), false, 1)?
1190 .unwrap_or(DEFAULT_MAX_RESULTS),
1191 )
1192}
1193
1194fn cleanup_fuzzy_query(input: &str) -> String {
1195 let mut output = String::with_capacity(input.len().min(MAX_FFF_FUZZY_QUERY_BYTES));
1196 for ch in input.chars() {
1197 if !matches!(ch, ':' | '-' | '_') {
1198 for lower in ch.to_lowercase() {
1199 let next_len = output.len() + lower.len_utf8();
1200 if next_len > MAX_FFF_FUZZY_QUERY_BYTES {
1201 return output;
1202 }
1203 output.push(lower);
1204 }
1205 }
1206 }
1207 output
1208}
1209
1210fn make_grep_options(
1211 mode: GrepMode,
1212 file_offset: usize,
1213 control: &GrepRunControl,
1214) -> (GrepSearchOptions, bool) {
1215 let max_matches_per_file = 10;
1216 let before_context = 0;
1217 let auto_expand_defs = before_context == 0;
1218 let after_context = if auto_expand_defs { 8 } else { before_context };
1219
1220 (
1221 GrepSearchOptions {
1222 max_file_size: 10 * 1024 * 1024,
1223 max_matches_per_file,
1224 smart_case: true,
1225 file_offset,
1226 page_limit: 50,
1227 mode,
1228 time_budget_ms: control.remaining_budget_ms(),
1229 before_context,
1230 after_context,
1231 classify_definitions: true,
1232 trim_whitespace: false,
1233 abort_signal: Some(Arc::clone(&control.abort_signal)),
1234 },
1235 auto_expand_defs,
1236 )
1237}
1238
1239fn timeout_grep_result(query: &str, stage: &str, budget: Duration, message: &str) -> ToolResult {
1240 let raw = json!({
1241 "query": query,
1242 "query_used": query,
1243 "broadened_from": null,
1244 "regex_fallback_error": null,
1245 "matches": [],
1246 "files": [],
1247 "count": 0,
1248 "shown": 0,
1249 "files_with_matches": 0,
1250 "truncated": false,
1251 "cursor": null,
1252 "suggested_path": null,
1253 "approximate": false,
1254 "timed_out": true,
1255 "cancelled": false,
1256 "error": {
1257 "kind": "timeout",
1258 "message": message,
1259 "stage": stage,
1260 "budget_ms": budget.as_millis() as u64,
1261 },
1262 });
1263 let mut failure = lash_core::ToolFailure::safe_retry(
1264 ToolFailureClass::Timeout,
1265 "grep_timeout",
1266 message,
1267 Some(50),
1268 );
1269 failure.raw = Some(lash_core::ToolValue::from(raw));
1270 ToolResult::failure(failure)
1271}
1272
1273fn cancelled_grep_result(query: &str) -> ToolResult {
1274 ToolResult::cancelled_with_raw(
1275 "grep cancelled",
1276 json!({
1277 "query": query,
1278 "query_used": query,
1279 "broadened_from": null,
1280 "regex_fallback_error": null,
1281 "matches": [],
1282 "files": [],
1283 "count": 0,
1284 "shown": 0,
1285 "files_with_matches": 0,
1286 "truncated": false,
1287 "cursor": null,
1288 "suggested_path": null,
1289 "approximate": false,
1290 "timed_out": false,
1291 "cancelled": true,
1292 "error": {
1293 "kind": "cancelled",
1294 "message": "grep cancelled",
1295 "stage": "grep",
1296 },
1297 }),
1298 )
1299}
1300
1301#[derive(Default)]
1302struct CursorStore {
1303 counter: u64,
1304 cursors: HashMap<String, usize>,
1305 insertion_order: VecDeque<String>,
1306}
1307
1308impl CursorStore {
1309 fn new() -> Self {
1310 Self::default()
1311 }
1312
1313 fn store(&mut self, file_offset: usize) -> String {
1314 self.counter = self.counter.wrapping_add(1);
1315 let id = self.counter.to_string();
1316 self.cursors.insert(id.clone(), file_offset);
1317 self.insertion_order.push_back(id.clone());
1318 while self.cursors.len() > MAX_CURSORS {
1319 if let Some(oldest) = self.insertion_order.pop_front() {
1320 self.cursors.remove(&oldest);
1321 }
1322 }
1323 id
1324 }
1325
1326 fn get(&self, id: &str) -> Option<usize> {
1327 self.cursors.get(id).copied()
1328 }
1329}
1330
1331fn truncate_line_for_ai(line: &str, match_ranges: Option<&[(u32, u32)]>, max_len: usize) -> String {
1332 let trimmed = line.trim_end();
1333 if trimmed.is_empty() {
1334 return String::new();
1335 }
1336 if trimmed.len() <= max_len {
1337 return trimmed.to_string();
1338 }
1339
1340 if let Some(ranges) = match_ranges
1341 && let Some(&(match_start, match_end)) = ranges.first()
1342 {
1343 let match_start = match_start as usize;
1344 let match_end = match_end as usize;
1345 let match_len = match_end.saturating_sub(match_start);
1346 let budget = max_len.saturating_sub(match_len);
1347 let before = budget / 3;
1348 let after = budget - before;
1349 let win_start = floor_char_boundary(trimmed, match_start.saturating_sub(before));
1350 let win_end = ceil_char_boundary(trimmed, (match_end + after).min(trimmed.len()));
1351
1352 let mut result = trimmed[win_start..win_end].to_string();
1353 if win_start > 0 {
1354 result.insert_str(0, "...");
1355 }
1356 if win_end < trimmed.len() {
1357 result.push_str("...");
1358 }
1359 return result;
1360 }
1361
1362 let end = ceil_char_boundary(trimmed, max_len);
1363 format!("{}...", &trimmed[..end])
1364}
1365
1366fn floor_char_boundary(text: &str, index: usize) -> usize {
1367 if index >= text.len() {
1368 return text.len();
1369 }
1370 let mut idx = index;
1371 while idx > 0 && !text.is_char_boundary(idx) {
1372 idx -= 1;
1373 }
1374 idx
1375}
1376
1377fn ceil_char_boundary(text: &str, index: usize) -> usize {
1378 if index >= text.len() {
1379 return text.len();
1380 }
1381 let mut idx = index;
1382 while idx < text.len() && !text.is_char_boundary(idx) {
1383 idx += 1;
1384 }
1385 idx
1386}
1387
1388struct StructuredGrepInput<'a> {
1389 query: &'a str,
1390 query_used: &'a str,
1391 matches: &'a [GrepMatch],
1392 files: &'a [&'a FileItem],
1393 total_matched: usize,
1394 files_with_matches: usize,
1395 next_file_offset: usize,
1396 regex_fallback_error: Option<&'a str>,
1397 max_results: usize,
1398 auto_expand_defs: bool,
1399 broadened_from: Option<&'a str>,
1400 approximate: bool,
1401 picker: &'a FilePicker,
1402}
1403
1404fn structured_grep_result(
1405 input: StructuredGrepInput<'_>,
1406 cursor_store: &mut CursorStore,
1407) -> serde_json::Value {
1408 let mut indices = (0..input.matches.len()).collect::<Vec<_>>();
1409 if input.auto_expand_defs {
1410 indices.sort_unstable_by_key(|&index| {
1411 if input.matches[index].is_definition {
1412 0
1413 } else if is_import_line(&input.matches[index].line_content) {
1414 2
1415 } else {
1416 1
1417 }
1418 });
1419 }
1420 indices.truncate(input.max_results);
1421
1422 let cursor = (input.next_file_offset > 0).then(|| cursor_store.store(input.next_file_offset));
1423 let mut per_file: HashMap<String, usize> = HashMap::new();
1424 let mut file_order: Vec<String> = Vec::new();
1425 let mut suggested_path = None::<String>;
1426 let matches = indices
1427 .iter()
1428 .map(|&index| {
1429 let matched = &input.matches[index];
1430 let file = input.files[matched.file_index];
1431 let path = file.relative_path(input.picker);
1432 let count = per_file.entry(path.clone()).or_insert_with(|| {
1433 file_order.push(path.clone());
1434 0
1435 });
1436 *count += 1;
1437 if suggested_path.is_none() || matched.is_definition {
1438 suggested_path = Some(path.clone());
1439 }
1440 let ranges = matched
1441 .match_byte_offsets
1442 .iter()
1443 .map(|(start, end)| {
1444 json!({
1445 "start": start,
1446 "end": end,
1447 })
1448 })
1449 .collect::<Vec<_>>();
1450 json!({
1451 "path": path,
1452 "line": matched.line_number,
1453 "column": matched.col.saturating_add(1),
1454 "byte_column": matched.col,
1455 "excerpt": truncate_line_for_ai(
1456 &matched.line_content,
1457 Some(matched.match_byte_offsets.as_ref()),
1458 MAX_LINE_LEN
1459 ),
1460 "match": first_match_text(matched),
1461 "ranges": ranges,
1462 "is_definition": matched.is_definition,
1463 })
1464 })
1465 .collect::<Vec<_>>();
1466
1467 let files = file_order
1468 .into_iter()
1469 .map(|path| {
1470 let file = input
1471 .files
1472 .iter()
1473 .find(|file| file.relative_path(input.picker) == path)
1474 .expect("file_order only contains known files");
1475 json!({
1476 "path": path,
1477 "count": per_file[&path],
1478 "size_bytes": file.size,
1479 "is_binary": file.is_binary(),
1480 "git_status": format_git_status_opt(file.git_status),
1481 })
1482 })
1483 .collect::<Vec<_>>();
1484
1485 json!({
1486 "query": input.query,
1487 "query_used": input.query_used,
1488 "broadened_from": input.broadened_from,
1489 "approximate": input.approximate,
1490 "matches": matches,
1491 "files": files,
1492 "count": input.total_matched,
1493 "shown": indices.len(),
1494 "files_with_matches": input.files_with_matches,
1495 "truncated": input.total_matched > indices.len() || input.next_file_offset > 0,
1496 "cursor": cursor,
1497 "suggested_path": suggested_path,
1498 "regex_fallback_error": input.regex_fallback_error,
1499 "timed_out": false,
1500 "cancelled": false,
1501 "error": null,
1502 })
1503}
1504
1505fn empty_grep_result(query: &str) -> serde_json::Value {
1506 json!({
1507 "query": query,
1508 "query_used": query,
1509 "broadened_from": null,
1510 "regex_fallback_error": null,
1511 "matches": [],
1512 "files": [],
1513 "count": 0,
1514 "shown": 0,
1515 "files_with_matches": 0,
1516 "truncated": false,
1517 "cursor": null,
1518 "suggested_path": null,
1519 "approximate": false,
1520 "timed_out": false,
1521 "cancelled": false,
1522 "error": null,
1523 })
1524}
1525
1526fn first_match_text(matched: &GrepMatch) -> String {
1527 let Some((start, end)) = matched.match_byte_offsets.first().copied() else {
1528 return String::new();
1529 };
1530 let start = floor_char_boundary(&matched.line_content, start as usize);
1531 let end = ceil_char_boundary(&matched.line_content, end as usize);
1532 matched.line_content[start..end].to_string()
1533}
1534
1535#[cfg(test)]
1536mod tests {
1537 use super::*;
1538 use serde_json::json;
1539 use tempfile::TempDir;
1540
1541 fn grep_provider_with_base_path(base_path: std::path::PathBuf) -> StaticToolProvider<Grep> {
1542 StaticToolProvider::new(
1543 vec![grep_tool_definition()],
1544 Grep::with_base_path(base_path),
1545 )
1546 }
1547
1548 #[test]
1549 fn grep_uses_limit_argument_in_model_contract() {
1550 let definition = grep_tool_definition();
1551 let properties = definition
1552 .contract
1553 .input_schema
1554 .get("properties")
1555 .and_then(serde_json::Value::as_object)
1556 .expect("object properties");
1557
1558 assert!(properties.contains_key("limit"));
1559 assert!(!properties.contains_key("maxResults"));
1560 assert_eq!(properties["limit"]["default"], serde_json::json!(20));
1561 }
1562
1563 #[test]
1564 fn grep_contract_documents_result_shape() {
1565 let definition = grep_tool_definition();
1566
1567 assert_eq!(definition.contract.output_schema["type"], json!("object"));
1568 assert!(definition.contract.output_schema["properties"]["matches"].is_object());
1569 assert!(definition.contract.output_schema["properties"]["count"].is_object());
1570 assert!(definition.contract.output_schema["properties"]["cursor"].is_object());
1571 let rendered = definition.compact_contract().render_signature();
1572 assert!(rendered.contains("matches"), "{rendered}");
1573 assert!(rendered.contains("count"), "{rendered}");
1574 }
1575
1576 #[tokio::test]
1577 async fn test_grep_matches_with_query() {
1578 let dir = TempDir::new().unwrap();
1579 std::fs::write(
1580 dir.path().join("test.txt"),
1581 "hello world\nfoo bar\nhello again\n",
1582 )
1583 .unwrap();
1584
1585 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1586 let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": "hello"})).await;
1587 assert!(result.is_success());
1588 assert_eq!(result.value_for_projection()["count"], 2);
1589 assert_eq!(
1590 result.value_for_projection()["matches"][0]["path"],
1591 "test.txt"
1592 );
1593 assert_eq!(
1594 result.value_for_projection()["matches"][0]["excerpt"],
1595 "hello world"
1596 );
1597 assert_eq!(
1598 result.value_for_projection()["matches"][1]["excerpt"],
1599 "hello again"
1600 );
1601 }
1602
1603 #[tokio::test]
1604 async fn test_grep_returns_structured_file_summaries() {
1605 let dir = TempDir::new().unwrap();
1606 std::fs::write(dir.path().join("alpha.rs"), "fn thing() {}\n").unwrap();
1607
1608 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1609 let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": "thing"})).await;
1610 assert!(result.is_success());
1611 assert_eq!(
1612 result.value_for_projection()["files"][0]["path"],
1613 "alpha.rs"
1614 );
1615 assert_eq!(result.value_for_projection()["files"][0]["count"], 1);
1616 assert_eq!(result.value_for_projection()["suggested_path"], "alpha.rs");
1617 }
1618
1619 #[tokio::test]
1620 async fn test_grep_structured_counts() {
1621 let dir = TempDir::new().unwrap();
1622 std::fs::write(dir.path().join("alpha.rs"), "ctx\nctx\n").unwrap();
1623
1624 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1625 let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": "ctx"})).await;
1626 assert!(result.is_success());
1627 assert_eq!(result.value_for_projection()["count"], 2);
1628 assert_eq!(result.value_for_projection()["files"][0]["count"], 2);
1629 }
1630
1631 #[tokio::test]
1632 async fn test_grep_empty_result_keeps_structured_metadata() {
1633 let dir = TempDir::new().unwrap();
1634 std::fs::write(dir.path().join("alpha.rs"), "ctx\n").unwrap();
1635
1636 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1637 let result =
1638 lash_core::testing::run_tool(&tool, "grep", &json!({"query": "missing"})).await;
1639 assert!(result.is_success());
1640 assert_eq!(
1641 result.value_for_projection()["matches"]
1642 .as_array()
1643 .unwrap()
1644 .len(),
1645 0
1646 );
1647 assert!(result.value_for_projection()["broadened_from"].is_null());
1648 assert!(result.value_for_projection()["regex_fallback_error"].is_null());
1649 }
1650
1651 #[tokio::test]
1652 async fn test_grep_long_query_does_not_panic_in_fuzzy_fallback() {
1653 let dir = TempDir::new().unwrap();
1654 std::fs::write(dir.path().join("alpha.rs"), "short searchable content\n").unwrap();
1655
1656 let query = "definitely missing ".repeat(20);
1657 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1658 let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": query})).await;
1659
1660 assert!(
1661 result.is_success(),
1662 "long query should not panic or fail: {:?}",
1663 result.value_for_projection()
1664 );
1665 }
1666
1667 #[test]
1668 fn test_cleanup_fuzzy_query_caps_to_fff_score_limit() {
1669 let query = "Ä".repeat(MAX_FFF_FUZZY_QUERY_BYTES + 10);
1670 let cleaned = cleanup_fuzzy_query(&query);
1671
1672 assert!(cleaned.len() <= MAX_FFF_FUZZY_QUERY_BYTES);
1673 assert!(cleaned.is_char_boundary(cleaned.len()));
1674 }
1675
1676 #[tokio::test]
1677 async fn test_grep_initializes_backend_lazily() {
1678 let dir = TempDir::new().unwrap();
1679 std::fs::write(dir.path().join("alpha.rs"), "ctx\n").unwrap();
1680
1681 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1682 assert!(tool.executor().backend.get().is_none());
1683
1684 let result = lash_core::testing::run_tool(&tool, "grep", &json!({"query": "ctx"})).await;
1685 assert!(result.is_success());
1686 assert!(tool.executor().backend.get().is_some());
1687 }
1688
1689 #[tokio::test]
1690 async fn test_grep_path_scopes_search_to_subdirectory() {
1691 let dir = TempDir::new().unwrap();
1692 std::fs::create_dir(dir.path().join("inner")).unwrap();
1693 std::fs::write(dir.path().join("outer.txt"), "banana at root\n").unwrap();
1694 std::fs::write(dir.path().join("inner/inner.txt"), "banana in inner\n").unwrap();
1695
1696 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1697 let result = lash_core::testing::run_tool(
1698 &tool,
1699 "grep",
1700 &json!({"query": "banana", "path": "inner"}),
1701 )
1702 .await;
1703 assert!(result.is_success());
1704 assert!(
1705 result.value_for_projection()["matches"]
1706 .as_array()
1707 .unwrap()
1708 .iter()
1709 .any(|item| item["path"] == "inner.txt"),
1710 "expected inner.txt match, got {:?}",
1711 result.value_for_projection()
1712 );
1713 assert!(
1714 !result.value_for_projection()["matches"]
1715 .as_array()
1716 .unwrap()
1717 .iter()
1718 .any(|item| item["path"] == "outer.txt"),
1719 "path scope should exclude outer.txt, got {:?}",
1720 result.value_for_projection()
1721 );
1722 }
1723
1724 #[tokio::test]
1725 async fn test_grep_path_constrains_search_to_single_file() {
1726 let dir = TempDir::new().unwrap();
1727 std::fs::write(dir.path().join("notes.txt"), "banana\n").unwrap();
1728 std::fs::write(dir.path().join("other.txt"), "banana\n").unwrap();
1729
1730 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1731 let result = lash_core::testing::run_tool(
1732 &tool,
1733 "grep",
1734 &json!({"query": "banana", "path": "notes.txt"}),
1735 )
1736 .await;
1737 assert!(result.is_success());
1738 assert!(
1739 result.value_for_projection()["matches"]
1740 .as_array()
1741 .unwrap()
1742 .iter()
1743 .any(|item| item["path"] == "notes.txt"),
1744 "expected notes.txt match, got {:?}",
1745 result.value_for_projection()
1746 );
1747 assert!(
1748 !result.value_for_projection()["matches"]
1749 .as_array()
1750 .unwrap()
1751 .iter()
1752 .any(|item| item["path"] == "other.txt"),
1753 "file path should exclude other.txt"
1754 );
1755 assert!(
1756 tool.executor().backend.get().is_none(),
1757 "single-file grep should bypass the indexed backend"
1758 );
1759 assert_eq!(result.value_for_projection()["timed_out"], false);
1760 assert_eq!(
1761 result.value_for_projection()["error"],
1762 serde_json::Value::Null
1763 );
1764 }
1765
1766 #[tokio::test]
1767 async fn test_grep_file_path_uses_direct_scan_for_multiword_query() {
1768 let dir = TempDir::new().unwrap();
1769 std::fs::write(
1770 dir.path().join("bottle.py"),
1771 "header cookie static_file abort redirect request response\nunrelated\n",
1772 )
1773 .unwrap();
1774 std::fs::write(
1775 dir.path().join("other.py"),
1776 "header cookie static_file abort redirect request response\n",
1777 )
1778 .unwrap();
1779
1780 let tool = grep_provider_with_base_path(dir.path().to_path_buf());
1781 let result = lash_core::testing::run_tool(
1782 &tool,
1783 "grep",
1784 &json!({
1785 "query": "header cookie static_file abort redirect request response",
1786 "path": "bottle.py",
1787 "limit": 80,
1788 }),
1789 )
1790 .await;
1791
1792 assert!(
1793 result.is_success(),
1794 "direct grep failed: {:?}",
1795 result.value_for_projection()
1796 );
1797 assert_eq!(result.value_for_projection()["count"], 1);
1798 assert_eq!(result.value_for_projection()["shown"], 1);
1799 assert_eq!(
1800 result.value_for_projection()["matches"][0]["path"],
1801 "bottle.py"
1802 );
1803 assert_eq!(
1804 result.value_for_projection()["matches"][0]["match"],
1805 "header cookie static_file abort redirect request response"
1806 );
1807 assert!(
1808 tool.executor().backend.get().is_none(),
1809 "single-file grep should not initialize fff"
1810 );
1811 assert_eq!(result.value_for_projection()["timed_out"], false);
1812 assert_eq!(
1813 result.value_for_projection()["error"],
1814 serde_json::Value::Null
1815 );
1816 }
1817
1818 #[tokio::test]
1819 async fn test_grep_path_can_search_outside_workspace() {
1820 let workspace = TempDir::new().unwrap();
1821 let outside = TempDir::new().unwrap();
1822 std::fs::write(outside.path().join("external.txt"), "banana\n").unwrap();
1823
1824 let tool = grep_provider_with_base_path(workspace.path().to_path_buf());
1825 let result = lash_core::testing::run_tool(
1826 &tool,
1827 "grep",
1828 &json!({
1829 "query": "banana",
1830 "path": outside.path().to_string_lossy(),
1831 }),
1832 )
1833 .await;
1834 assert!(
1835 result.is_success(),
1836 "expected search outside workspace to succeed, got {:?}",
1837 result.value_for_projection()
1838 );
1839 assert!(
1840 result.value_for_projection()["matches"]
1841 .as_array()
1842 .unwrap()
1843 .iter()
1844 .any(|item| item["path"] == "external.txt"),
1845 "expected external.txt match, got {:?}",
1846 result.value_for_projection()
1847 );
1848 }
1849
1850 #[tokio::test]
1851 async fn test_grep_infers_obvious_path_prefix_from_query() {
1852 let workspace = TempDir::new().unwrap();
1853 let outside = TempDir::new().unwrap();
1854 std::fs::write(outside.path().join("external.txt"), "banana\n").unwrap();
1855
1856 let tool = grep_provider_with_base_path(workspace.path().to_path_buf());
1857 let result = lash_core::testing::run_tool(
1858 &tool,
1859 "grep",
1860 &json!({"query": format!("{} banana", outside.path().display())}),
1861 )
1862 .await;
1863 assert!(result.is_success());
1864 assert!(
1865 result.value_for_projection()["matches"]
1866 .as_array()
1867 .unwrap()
1868 .iter()
1869 .any(|item| item["path"] == "external.txt"),
1870 "expected inferred path search to find external.txt, got {:?}",
1871 result.value_for_projection()
1872 );
1873 }
1874
1875 #[tokio::test]
1876 async fn test_grep_infers_obvious_file_prefix_without_indexing() {
1877 let workspace = TempDir::new().unwrap();
1878 let outside = TempDir::new().unwrap();
1879 let file = outside.path().join("external.txt");
1880 std::fs::write(&file, "banana split\n").unwrap();
1881
1882 let tool = grep_provider_with_base_path(workspace.path().to_path_buf());
1883 let result = lash_core::testing::run_tool(
1884 &tool,
1885 "grep",
1886 &json!({"query": format!("{} banana", file.display())}),
1887 )
1888 .await;
1889 assert!(result.is_success());
1890 assert_eq!(
1891 result.value_for_projection()["matches"][0]["path"],
1892 "external.txt"
1893 );
1894 assert!(
1895 tool.executor().backend.get().is_none(),
1896 "inferred single-file grep should bypass fff"
1897 );
1898 }
1899
1900 #[test]
1901 fn test_direct_file_grep_observes_pre_cancelled_abort_signal() {
1902 let dir = TempDir::new().unwrap();
1903 let file = dir.path().join("notes.txt");
1904 std::fs::write(&file, "banana\n").unwrap();
1905 let abort = AtomicBool::new(true);
1906
1907 let result = direct_file_grep_sync("banana", &file, Some(dir.path()), 20, &abort);
1908
1909 assert!(!result.is_success());
1910 let value = result.value_for_projection();
1911 assert_eq!(value["cancelled"], true);
1912 assert_eq!(value["error"]["kind"], "cancelled");
1913 let output = result.as_output().value_for_projection();
1914 assert_eq!(output["message"], "grep cancelled");
1915 assert_eq!(output["source"], "cancellation");
1916 }
1917
1918 #[tokio::test]
1919 async fn test_grep_path_missing_returns_clear_error() {
1920 let workspace = TempDir::new().unwrap();
1921 let tool = grep_provider_with_base_path(workspace.path().to_path_buf());
1922 let result = lash_core::testing::run_tool(
1923 &tool,
1924 "grep",
1925 &json!({"query": "banana", "path": "/nonexistent/totally/fake"}),
1926 )
1927 .await;
1928 assert!(!result.is_success());
1929 let value = result.value_for_projection();
1930 let message = value.as_str().unwrap_or("");
1931 assert!(
1932 message.contains("does not exist"),
1933 "expected missing-path error, got {message:?}"
1934 );
1935 }
1936
1937 #[tokio::test]
1938 async fn test_grep_backend_is_shared_process_wide_for_same_workspace() {
1939 let dir = TempDir::new().unwrap();
1940 std::fs::write(dir.path().join("alpha.rs"), "ctx\n").unwrap();
1941
1942 let left = Grep::with_base_path(dir.path().to_path_buf());
1943 let right = Grep::with_base_path(dir.path().to_path_buf());
1944
1945 let left_backend = left.ensure_ready_for_query("ctx").expect("left backend");
1946 let right_backend = right.ensure_ready_for_query("ctx").expect("right backend");
1947
1948 assert!(Arc::ptr_eq(&left_backend, &right_backend));
1949 }
1950}