Skip to main content

nika_engine/tools/
grep.rs

1//! Grep Tool - Search file contents with regex
2//!
3//! Fast content search with:
4//! - Full regex support
5//! - Multiple output modes (content, files, count)
6//! - Context lines (-B/-A/-C)
7//! - File type filtering
8
9use std::sync::Arc;
10
11use async_trait::async_trait;
12use ignore::WalkBuilder;
13use regex::RegexBuilder;
14use serde::{Deserialize, Serialize};
15use serde_json::{json, Value};
16use tokio::fs;
17
18use super::context::{ToolContext, ToolEvent};
19use super::{FileTool, ToolErrorCode, ToolOutput};
20use crate::error::NikaError;
21
22// ═══════════════════════════════════════════════════════════════════════════
23// PARAMETERS & RESULT
24// ═══════════════════════════════════════════════════════════════════════════
25
26/// Output mode for grep results
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
28#[serde(rename_all = "snake_case")]
29pub enum GrepOutputMode {
30    /// Show matching lines with content
31    Content,
32    /// Show only file paths (default)
33    #[default]
34    FilesWithMatches,
35    /// Show match count per file
36    Count,
37}
38
39/// Parameters for the Grep tool
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct GrepParams {
42    /// Regex pattern to search for
43    pub pattern: String,
44
45    /// Base path to search in (default: working directory)
46    #[serde(default)]
47    pub path: Option<String>,
48
49    /// Glob pattern to filter files (e.g., "*.rs")
50    #[serde(default)]
51    pub glob: Option<String>,
52
53    /// Output mode
54    #[serde(default)]
55    pub output_mode: GrepOutputMode,
56
57    /// Case-insensitive search
58    #[serde(default)]
59    pub case_insensitive: bool,
60
61    /// Lines of context before match
62    #[serde(default, rename = "context_before")]
63    pub context_before: Option<usize>,
64
65    /// Lines of context after match
66    #[serde(default, rename = "context_after")]
67    pub context_after: Option<usize>,
68
69    /// Lines of context before and after
70    #[serde(default, rename = "context")]
71    pub context: Option<usize>,
72
73    /// Limit results
74    #[serde(default)]
75    pub limit: Option<usize>,
76}
77
78/// A single match in a file
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct GrepMatch {
81    /// File path
82    pub file: String,
83    /// Line number (1-indexed)
84    pub line_number: usize,
85    /// Line content
86    pub content: String,
87    /// Context lines before (if requested)
88    #[serde(skip_serializing_if = "Vec::is_empty")]
89    pub context_before: Vec<String>,
90    /// Context lines after (if requested)
91    #[serde(skip_serializing_if = "Vec::is_empty")]
92    pub context_after: Vec<String>,
93}
94
95/// Result from grep search
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct GrepResult {
98    /// Total matches found
99    pub total_matches: usize,
100
101    /// Files searched
102    pub files_searched: usize,
103
104    /// Files with matches
105    pub files_with_matches: usize,
106
107    /// Matches (for Content mode) or file paths (for FilesWithMatches mode)
108    pub matches: Vec<GrepMatch>,
109
110    /// Match counts per file (for Count mode)
111    #[serde(skip_serializing_if = "Vec::is_empty")]
112    pub counts: Vec<(String, usize)>,
113}
114
115// ═══════════════════════════════════════════════════════════════════════════
116// GREP TOOL
117// ═══════════════════════════════════════════════════════════════════════════
118
119/// Grep tool for searching file contents
120///
121/// # Features
122///
123/// - Full regex support via `regex` crate
124/// - Multiple output modes
125/// - Context lines for surrounding content
126/// - File type filtering via glob
127/// - Respects .gitignore
128pub struct GrepTool {
129    ctx: Arc<ToolContext>,
130}
131
132impl GrepTool {
133    /// Maximum files to search
134    pub const MAX_FILES: usize = 10000;
135
136    /// Maximum matches to return
137    pub const MAX_MATCHES: usize = 1000;
138
139    /// Create a new Grep tool
140    pub fn new(ctx: Arc<ToolContext>) -> Self {
141        Self { ctx }
142    }
143
144    /// Execute the grep search
145    pub async fn execute(&self, params: GrepParams) -> Result<GrepResult, NikaError> {
146        // Determine base path
147        let base_path = match params.path {
148            Some(ref p) => self.ctx.validate_path(p)?,
149            None => self.ctx.working_dir().to_path_buf(),
150        };
151
152        // Build regex
153        let regex = RegexBuilder::new(&params.pattern)
154            .case_insensitive(params.case_insensitive)
155            .multi_line(true)
156            .build()
157            .map_err(|e| NikaError::ToolError {
158                code: ToolErrorCode::InvalidRegex.code(),
159                message: format!("Invalid regex pattern '{}': {}", params.pattern, e),
160            })?;
161
162        // Build glob filter if provided
163        let glob_filter = if let Some(ref glob_pattern) = params.glob {
164            Some(
165                globset::GlobBuilder::new(glob_pattern)
166                    .literal_separator(true)
167                    .build()
168                    .map_err(|e| NikaError::ToolError {
169                        code: ToolErrorCode::InvalidGlobPattern.code(),
170                        message: format!("Invalid glob pattern '{}': {}", glob_pattern, e),
171                    })?
172                    .compile_matcher(),
173            )
174        } else {
175            None
176        };
177
178        // Context lines
179        let context_before = params.context_before.or(params.context).unwrap_or(0);
180        let context_after = params.context_after.or(params.context).unwrap_or(0);
181
182        // Walk files and search
183        let mut matches: Vec<GrepMatch> = Vec::new();
184        let mut counts: Vec<(String, usize)> = Vec::new();
185        let mut files_searched = 0;
186        let mut files_with_matches = 0;
187        let mut total_matches = 0;
188
189        let limit = params.limit.unwrap_or(Self::MAX_MATCHES);
190
191        let walker = WalkBuilder::new(&base_path)
192            .hidden(false)
193            .git_ignore(true)
194            .git_global(true)
195            .git_exclude(true)
196            .build();
197
198        for entry in walker.filter_map(Result::ok) {
199            let path = entry.path();
200
201            // Skip directories
202            if path.is_dir() {
203                continue;
204            }
205
206            // Apply glob filter
207            if let Some(ref glob) = glob_filter {
208                let relative = path.strip_prefix(&base_path).unwrap_or(path);
209                if !glob.is_match(relative) && !glob.is_match(path) {
210                    continue;
211                }
212            }
213
214            // Read file
215            let content = match fs::read_to_string(path).await {
216                Ok(c) => c,
217                Err(_) => continue, // Skip unreadable files
218            };
219
220            files_searched += 1;
221
222            if files_searched > Self::MAX_FILES {
223                break;
224            }
225
226            // Search for matches
227            let lines: Vec<&str> = content.lines().collect();
228            let mut file_matches = 0;
229
230            for (line_idx, line) in lines.iter().enumerate() {
231                if regex.is_match(line) {
232                    file_matches += 1;
233                    total_matches += 1;
234
235                    if total_matches > limit {
236                        continue; // Still count but don't store
237                    }
238
239                    // Build context
240                    let ctx_before: Vec<String> = if context_before > 0 {
241                        let start = line_idx.saturating_sub(context_before);
242                        lines[start..line_idx]
243                            .iter()
244                            .map(|s| s.to_string())
245                            .collect()
246                    } else {
247                        Vec::new()
248                    };
249
250                    let ctx_after: Vec<String> = if context_after > 0 {
251                        let end = (line_idx + 1 + context_after).min(lines.len());
252                        lines[line_idx + 1..end]
253                            .iter()
254                            .map(|s| s.to_string())
255                            .collect()
256                    } else {
257                        Vec::new()
258                    };
259
260                    matches.push(GrepMatch {
261                        file: path.to_string_lossy().to_string(),
262                        line_number: line_idx + 1,
263                        content: line.to_string(),
264                        context_before: ctx_before,
265                        context_after: ctx_after,
266                    });
267                }
268            }
269
270            if file_matches > 0 {
271                files_with_matches += 1;
272                counts.push((path.to_string_lossy().to_string(), file_matches));
273            }
274        }
275
276        // Emit event
277        self.ctx
278            .emit(ToolEvent::GrepSearch {
279                pattern: params.pattern,
280                files_searched,
281                matches: total_matches,
282            })
283            .await;
284
285        Ok(GrepResult {
286            total_matches,
287            files_searched,
288            files_with_matches,
289            matches,
290            counts,
291        })
292    }
293
294    /// Format output based on mode
295    fn format_output(&self, result: &GrepResult, mode: GrepOutputMode) -> String {
296        match mode {
297            GrepOutputMode::Content => {
298                if result.matches.is_empty() {
299                    return "No matches found".to_string();
300                }
301
302                result
303                    .matches
304                    .iter()
305                    .map(|m| {
306                        let mut output = String::new();
307
308                        // Context before
309                        for (i, ctx) in m.context_before.iter().enumerate() {
310                            let line_num = m.line_number - m.context_before.len() + i;
311                            output.push_str(&format!("{}:{}: {}\n", m.file, line_num, ctx));
312                        }
313
314                        // Matching line
315                        output.push_str(&format!("{}:{}> {}\n", m.file, m.line_number, m.content));
316
317                        // Context after
318                        for (i, ctx) in m.context_after.iter().enumerate() {
319                            let line_num = m.line_number + 1 + i;
320                            output.push_str(&format!("{}:{}: {}\n", m.file, line_num, ctx));
321                        }
322
323                        output
324                    })
325                    .collect::<Vec<_>>()
326                    .join("--\n")
327            }
328            GrepOutputMode::FilesWithMatches => {
329                if result.files_with_matches == 0 {
330                    return "No matching files found".to_string();
331                }
332
333                // Deduplicate file paths
334                let mut files: Vec<&str> = result.matches.iter().map(|m| m.file.as_str()).collect();
335                files.sort();
336                files.dedup();
337
338                format!("Found {} files:\n{}", files.len(), files.join("\n"))
339            }
340            GrepOutputMode::Count => {
341                if result.counts.is_empty() {
342                    return "No matches found".to_string();
343                }
344
345                let counts_str = result
346                    .counts
347                    .iter()
348                    .map(|(file, count)| format!("{}: {}", file, count))
349                    .collect::<Vec<_>>()
350                    .join("\n");
351
352                format!(
353                    "Total: {} matches in {} files\n{}",
354                    result.total_matches, result.files_with_matches, counts_str
355                )
356            }
357        }
358    }
359}
360
361#[async_trait]
362impl FileTool for GrepTool {
363    fn name(&self) -> &'static str {
364        "grep"
365    }
366
367    fn description(&self) -> &'static str {
368        "Search file contents with regex patterns. Supports multiple output modes: \
369         'content' shows matching lines, 'files_with_matches' shows file paths, \
370         'count' shows match counts. Use context_before/context_after for surrounding lines. \
371         Use glob parameter to filter by file pattern."
372    }
373
374    fn parameters_schema(&self) -> Value {
375        json!({
376            "type": "object",
377            "properties": {
378                "pattern": {
379                    "type": "string",
380                    "description": "Regex pattern to search for"
381                },
382                "path": {
383                    "type": "string",
384                    "description": "Base path to search in (default: working directory)"
385                },
386                "glob": {
387                    "type": "string",
388                    "description": "Glob pattern to filter files (e.g., '*.rs', '**/*.ts')"
389                },
390                "output_mode": {
391                    "type": "string",
392                    "enum": ["content", "files_with_matches", "count"],
393                    "description": "Output format (default: files_with_matches)"
394                },
395                "case_insensitive": {
396                    "type": "boolean",
397                    "description": "Case-insensitive search (default: false)"
398                },
399                "context_before": {
400                    "type": "integer",
401                    "description": "Lines of context before match"
402                },
403                "context_after": {
404                    "type": "integer",
405                    "description": "Lines of context after match"
406                },
407                "context": {
408                    "type": "integer",
409                    "description": "Lines of context before and after (shorthand)"
410                },
411                "limit": {
412                    "type": "integer",
413                    "description": "Maximum matches to return"
414                }
415            },
416            "required": ["pattern"],
417            "additionalProperties": false
418        })
419    }
420
421    async fn call(&self, params: Value) -> Result<ToolOutput, NikaError> {
422        let params: GrepParams =
423            serde_json::from_value(params.clone()).map_err(|e| NikaError::ToolError {
424                code: ToolErrorCode::InvalidRegex.code(),
425                message: format!("Invalid parameters: {}", e),
426            })?;
427
428        let output_mode = params.output_mode;
429        let result = self.execute(params).await?;
430        let content = self.format_output(&result, output_mode);
431
432        Ok(ToolOutput::success_with_data(
433            content,
434            serde_json::to_value(&result).unwrap_or_default(),
435        ))
436    }
437}
438
439// ═══════════════════════════════════════════════════════════════════════════
440// TESTS
441// ═══════════════════════════════════════════════════════════════════════════
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446    use crate::tools::context::testing::{create_test_tree, setup_test};
447
448    /// Create standard test files for grep tests
449    async fn create_test_files(temp_dir: &tempfile::TempDir) {
450        create_test_tree(
451            temp_dir,
452            &[
453                (
454                    "src/main.rs",
455                    "fn main() {\n    println!(\"Hello\");\n    println!(\"World\");\n}",
456                ),
457                ("src/lib.rs", "pub fn hello() {\n    // Hello function\n}"),
458                ("README.md", "# Hello World\n\nThis is a test."),
459            ],
460        )
461        .await;
462    }
463
464    #[tokio::test]
465    async fn test_grep_simple_pattern() {
466        let (temp_dir, ctx) = setup_test().await;
467        create_test_files(&temp_dir).await;
468
469        let tool = GrepTool::new(ctx);
470        let result = tool
471            .execute(GrepParams {
472                pattern: "Hello".to_string(),
473                path: None,
474                glob: None,
475                output_mode: GrepOutputMode::Content,
476                case_insensitive: false,
477                context_before: None,
478                context_after: None,
479                context: None,
480                limit: None,
481            })
482            .await
483            .unwrap();
484
485        assert!(result.total_matches >= 2);
486        assert!(result.files_with_matches >= 2);
487    }
488
489    #[tokio::test]
490    async fn test_grep_with_glob_filter() {
491        let (temp_dir, ctx) = setup_test().await;
492        create_test_files(&temp_dir).await;
493
494        let tool = GrepTool::new(ctx);
495        let result = tool
496            .execute(GrepParams {
497                pattern: "fn".to_string(),
498                path: None,
499                glob: Some("**/*.rs".to_string()),
500                output_mode: GrepOutputMode::FilesWithMatches,
501                case_insensitive: false,
502                context_before: None,
503                context_after: None,
504                context: None,
505                limit: None,
506            })
507            .await
508            .unwrap();
509
510        assert_eq!(result.files_with_matches, 2);
511        assert!(result.matches.iter().all(|m| m.file.ends_with(".rs")));
512    }
513
514    #[tokio::test]
515    async fn test_grep_case_insensitive() {
516        let (temp_dir, ctx) = setup_test().await;
517        create_test_files(&temp_dir).await;
518
519        let tool = GrepTool::new(ctx);
520        let result = tool
521            .execute(GrepParams {
522                pattern: "hello".to_string(),
523                path: None,
524                glob: None,
525                output_mode: GrepOutputMode::Content,
526                case_insensitive: true,
527                context_before: None,
528                context_after: None,
529                context: None,
530                limit: None,
531            })
532            .await
533            .unwrap();
534
535        // Should match "Hello" in multiple files
536        assert!(result.total_matches >= 2);
537    }
538
539    #[tokio::test]
540    async fn test_grep_with_context() {
541        let (temp_dir, ctx) = setup_test().await;
542        create_test_files(&temp_dir).await;
543
544        let tool = GrepTool::new(ctx);
545        let result = tool
546            .execute(GrepParams {
547                pattern: "println".to_string(),
548                path: None,
549                glob: Some("*.rs".to_string()),
550                output_mode: GrepOutputMode::Content,
551                case_insensitive: false,
552                context_before: Some(1),
553                context_after: Some(1),
554                context: None,
555                limit: None,
556            })
557            .await
558            .unwrap();
559
560        // Matches should have context
561        for m in &result.matches {
562            // Depending on position, there should be context
563            assert!(m.context_before.len() <= 1);
564            assert!(m.context_after.len() <= 1);
565        }
566    }
567
568    #[tokio::test]
569    async fn test_grep_count_mode() {
570        let (temp_dir, ctx) = setup_test().await;
571        create_test_files(&temp_dir).await;
572
573        let tool = GrepTool::new(ctx);
574        let result = tool
575            .execute(GrepParams {
576                pattern: "println".to_string(),
577                path: None,
578                glob: None,
579                output_mode: GrepOutputMode::Count,
580                case_insensitive: false,
581                context_before: None,
582                context_after: None,
583                context: None,
584                limit: None,
585            })
586            .await
587            .unwrap();
588
589        // main.rs has 2 println calls
590        assert!(result
591            .counts
592            .iter()
593            .any(|(f, c)| f.contains("main.rs") && *c == 2));
594    }
595
596    #[tokio::test]
597    async fn test_grep_invalid_regex() {
598        let (_temp_dir, ctx) = setup_test().await;
599
600        let tool = GrepTool::new(ctx);
601        let result = tool
602            .execute(GrepParams {
603                pattern: "[invalid".to_string(),
604                path: None,
605                glob: None,
606                output_mode: GrepOutputMode::Content,
607                case_insensitive: false,
608                context_before: None,
609                context_after: None,
610                context: None,
611                limit: None,
612            })
613            .await;
614
615        assert!(result.is_err());
616        assert!(result.unwrap_err().to_string().contains("Invalid regex"));
617    }
618
619    #[tokio::test]
620    async fn test_file_tool_trait() {
621        let (temp_dir, ctx) = setup_test().await;
622        create_test_files(&temp_dir).await;
623
624        let tool = GrepTool::new(ctx);
625
626        assert_eq!(tool.name(), "grep");
627        assert!(tool.description().contains("Search file contents"));
628
629        let result = tool
630            .call(json!({
631                "pattern": "fn",
632                "glob": "**/*.rs"
633            }))
634            .await
635            .unwrap();
636
637        assert!(!result.is_error);
638        assert!(result.content.contains("Found"));
639    }
640}